summaryrefslogtreecommitdiffstats
path: root/src/isa-l
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-21 11:54:28 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-21 11:54:28 +0000
commite6918187568dbd01842d8d1d2c808ce16a894239 (patch)
tree64f88b554b444a49f656b6c656111a145cbbaa28 /src/isa-l
parentInitial commit. (diff)
downloadceph-e6918187568dbd01842d8d1d2c808ce16a894239.tar.xz
ceph-e6918187568dbd01842d8d1d2c808ce16a894239.zip
Adding upstream version 18.2.2.upstream/18.2.2
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/isa-l')
-rw-r--r--src/isa-l/.gitignore28
-rw-r--r--src/isa-l/.travis.yml100
-rw-r--r--src/isa-l/CONTRIBUTING.md39
-rw-r--r--src/isa-l/Doxyfile33
-rw-r--r--src/isa-l/LICENSE26
-rw-r--r--src/isa-l/Makefile.am182
-rw-r--r--src/isa-l/Makefile.nmake318
-rw-r--r--src/isa-l/Makefile.unx56
-rw-r--r--src/isa-l/README.md74
-rw-r--r--src/isa-l/Release_notes.txt308
-rwxr-xr-xsrc/isa-l/autogen.sh17
-rw-r--r--src/isa-l/configure.ac313
-rw-r--r--src/isa-l/crc/Makefile.am89
-rw-r--r--src/isa-l/crc/aarch64/Makefile.am57
-rw-r--r--src/isa-l/crc/aarch64/crc16_t10dif_copy_pmull.S423
-rw-r--r--src/isa-l/crc/aarch64/crc16_t10dif_pmull.S404
-rw-r--r--src/isa-l/crc/aarch64/crc32_aarch64_common.h321
-rw-r--r--src/isa-l/crc/aarch64/crc32_common_crc_ext_cortex_a72.S135
-rw-r--r--src/isa-l/crc/aarch64/crc32_common_mix_neoverse_n1.S432
-rw-r--r--src/isa-l/crc/aarch64/crc32_gzip_refl_3crc_fold.S95
-rw-r--r--src/isa-l/crc/aarch64/crc32_gzip_refl_crc_ext.S66
-rw-r--r--src/isa-l/crc/aarch64/crc32_gzip_refl_pmull.S33
-rw-r--r--src/isa-l/crc/aarch64/crc32_gzip_refl_pmull.h87
-rw-r--r--src/isa-l/crc/aarch64/crc32_ieee_norm_pmull.S33
-rw-r--r--src/isa-l/crc/aarch64/crc32_ieee_norm_pmull.h87
-rw-r--r--src/isa-l/crc/aarch64/crc32_iscsi_3crc_fold.S97
-rw-r--r--src/isa-l/crc/aarch64/crc32_iscsi_crc_ext.S65
-rw-r--r--src/isa-l/crc/aarch64/crc32_iscsi_refl_pmull.S53
-rw-r--r--src/isa-l/crc/aarch64/crc32_iscsi_refl_pmull.h87
-rw-r--r--src/isa-l/crc/aarch64/crc32_mix_default.S107
-rw-r--r--src/isa-l/crc/aarch64/crc32_mix_default_common.S563
-rw-r--r--src/isa-l/crc/aarch64/crc32_mix_neoverse_n1.S70
-rw-r--r--src/isa-l/crc/aarch64/crc32_norm_common_pmull.h135
-rw-r--r--src/isa-l/crc/aarch64/crc32_refl_common_pmull.h126
-rw-r--r--src/isa-l/crc/aarch64/crc32c_mix_default.S109
-rw-r--r--src/isa-l/crc/aarch64/crc32c_mix_neoverse_n1.S68
-rw-r--r--src/isa-l/crc/aarch64/crc64_ecma_norm_pmull.S33
-rw-r--r--src/isa-l/crc/aarch64/crc64_ecma_norm_pmull.h200
-rw-r--r--src/isa-l/crc/aarch64/crc64_ecma_refl_pmull.S33
-rw-r--r--src/isa-l/crc/aarch64/crc64_ecma_refl_pmull.h196
-rw-r--r--src/isa-l/crc/aarch64/crc64_iso_norm_pmull.S33
-rw-r--r--src/isa-l/crc/aarch64/crc64_iso_norm_pmull.h201
-rw-r--r--src/isa-l/crc/aarch64/crc64_iso_refl_pmull.S33
-rw-r--r--src/isa-l/crc/aarch64/crc64_iso_refl_pmull.h197
-rw-r--r--src/isa-l/crc/aarch64/crc64_jones_norm_pmull.S33
-rw-r--r--src/isa-l/crc/aarch64/crc64_jones_norm_pmull.h200
-rw-r--r--src/isa-l/crc/aarch64/crc64_jones_refl_pmull.S33
-rw-r--r--src/isa-l/crc/aarch64/crc64_jones_refl_pmull.h196
-rw-r--r--src/isa-l/crc/aarch64/crc64_norm_common_pmull.h129
-rw-r--r--src/isa-l/crc/aarch64/crc64_refl_common_pmull.h126
-rw-r--r--src/isa-l/crc/aarch64/crc_aarch64_dispatcher.c166
-rw-r--r--src/isa-l/crc/aarch64/crc_common_pmull.h302
-rw-r--r--src/isa-l/crc/aarch64/crc_multibinary_arm.S42
-rw-r--r--src/isa-l/crc/crc16_t10dif_01.asm666
-rw-r--r--src/isa-l/crc/crc16_t10dif_02.asm654
-rw-r--r--src/isa-l/crc/crc16_t10dif_by16_10.asm591
-rw-r--r--src/isa-l/crc/crc16_t10dif_by4.asm563
-rw-r--r--src/isa-l/crc/crc16_t10dif_copy_by4.asm599
-rw-r--r--src/isa-l/crc/crc16_t10dif_copy_by4_02.asm596
-rw-r--r--src/isa-l/crc/crc16_t10dif_copy_perf.c84
-rw-r--r--src/isa-l/crc/crc16_t10dif_copy_test.c175
-rw-r--r--src/isa-l/crc/crc16_t10dif_op_perf.c116
-rw-r--r--src/isa-l/crc/crc16_t10dif_perf.c79
-rw-r--r--src/isa-l/crc/crc16_t10dif_test.c179
-rw-r--r--src/isa-l/crc/crc32_funcs_test.c324
-rw-r--r--src/isa-l/crc/crc32_gzip_refl_by16_10.asm569
-rw-r--r--src/isa-l/crc/crc32_gzip_refl_by8.asm625
-rw-r--r--src/isa-l/crc/crc32_gzip_refl_by8_02.asm556
-rw-r--r--src/isa-l/crc/crc32_gzip_refl_perf.c91
-rw-r--r--src/isa-l/crc/crc32_ieee_01.asm656
-rw-r--r--src/isa-l/crc/crc32_ieee_02.asm652
-rw-r--r--src/isa-l/crc/crc32_ieee_by16_10.asm585
-rw-r--r--src/isa-l/crc/crc32_ieee_by4.asm566
-rw-r--r--src/isa-l/crc/crc32_ieee_perf.c79
-rw-r--r--src/isa-l/crc/crc32_iscsi_00.asm672
-rw-r--r--src/isa-l/crc/crc32_iscsi_01.asm592
-rw-r--r--src/isa-l/crc/crc32_iscsi_by16_10.asm556
-rw-r--r--src/isa-l/crc/crc32_iscsi_perf.c79
-rw-r--r--src/isa-l/crc/crc64_base.c912
-rw-r--r--src/isa-l/crc/crc64_ecma_norm_by16_10.asm61
-rw-r--r--src/isa-l/crc/crc64_ecma_norm_by8.asm584
-rw-r--r--src/isa-l/crc/crc64_ecma_refl_by16_10.asm61
-rw-r--r--src/isa-l/crc/crc64_ecma_refl_by8.asm549
-rw-r--r--src/isa-l/crc/crc64_example.c68
-rw-r--r--src/isa-l/crc/crc64_funcs_perf.c103
-rw-r--r--src/isa-l/crc/crc64_funcs_test.c315
-rw-r--r--src/isa-l/crc/crc64_iso_norm_by16_10.asm525
-rw-r--r--src/isa-l/crc/crc64_iso_norm_by8.asm582
-rw-r--r--src/isa-l/crc/crc64_iso_refl_by16_10.asm495
-rw-r--r--src/isa-l/crc/crc64_iso_refl_by8.asm545
-rw-r--r--src/isa-l/crc/crc64_jones_norm_by16_10.asm61
-rw-r--r--src/isa-l/crc/crc64_jones_norm_by8.asm582
-rw-r--r--src/isa-l/crc/crc64_jones_refl_by16_10.asm61
-rw-r--r--src/isa-l/crc/crc64_jones_refl_by8.asm545
-rw-r--r--src/isa-l/crc/crc64_multibinary.asm92
-rw-r--r--src/isa-l/crc/crc64_ref.h148
-rw-r--r--src/isa-l/crc/crc_base.c351
-rw-r--r--src/isa-l/crc/crc_base_aliases.c87
-rw-r--r--src/isa-l/crc/crc_multibinary.asm328
-rw-r--r--src/isa-l/crc/crc_ref.h140
-rw-r--r--src/isa-l/crc/crc_simple_test.c64
-rw-r--r--src/isa-l/doc/build.md46
-rw-r--r--src/isa-l/doc/test.md49
-rw-r--r--src/isa-l/erasure_code/Makefile.am153
-rw-r--r--src/isa-l/erasure_code/aarch64/Makefile.am45
-rw-r--r--src/isa-l/erasure_code/aarch64/ec_aarch64_dispatcher.c69
-rw-r--r--src/isa-l/erasure_code/aarch64/ec_aarch64_highlevel_func.c127
-rw-r--r--src/isa-l/erasure_code/aarch64/ec_multibinary_arm.S36
-rw-r--r--src/isa-l/erasure_code/aarch64/gf_2vect_dot_prod_neon.S399
-rw-r--r--src/isa-l/erasure_code/aarch64/gf_2vect_mad_neon.S402
-rw-r--r--src/isa-l/erasure_code/aarch64/gf_3vect_dot_prod_neon.S358
-rw-r--r--src/isa-l/erasure_code/aarch64/gf_3vect_mad_neon.S382
-rw-r--r--src/isa-l/erasure_code/aarch64/gf_4vect_dot_prod_neon.S421
-rw-r--r--src/isa-l/erasure_code/aarch64/gf_4vect_mad_neon.S456
-rw-r--r--src/isa-l/erasure_code/aarch64/gf_5vect_dot_prod_neon.S481
-rw-r--r--src/isa-l/erasure_code/aarch64/gf_5vect_mad_neon.S535
-rw-r--r--src/isa-l/erasure_code/aarch64/gf_6vect_mad_neon.S610
-rw-r--r--src/isa-l/erasure_code/aarch64/gf_vect_dot_prod_neon.S298
-rw-r--r--src/isa-l/erasure_code/aarch64/gf_vect_mad_neon.S315
-rw-r--r--src/isa-l/erasure_code/aarch64/gf_vect_mul_neon.S235
-rw-r--r--src/isa-l/erasure_code/ec_base.c371
-rw-r--r--src/isa-l/erasure_code/ec_base.h6680
-rw-r--r--src/isa-l/erasure_code/ec_base_aliases.c61
-rw-r--r--src/isa-l/erasure_code/ec_highlevel_func.c374
-rw-r--r--src/isa-l/erasure_code/ec_multibinary.asm95
-rw-r--r--src/isa-l/erasure_code/erasure_code_base_perf.c176
-rw-r--r--src/isa-l/erasure_code/erasure_code_base_test.c764
-rw-r--r--src/isa-l/erasure_code/erasure_code_perf.c177
-rw-r--r--src/isa-l/erasure_code/erasure_code_test.c764
-rw-r--r--src/isa-l/erasure_code/erasure_code_update_perf.c281
-rw-r--r--src/isa-l/erasure_code/erasure_code_update_test.c959
-rw-r--r--src/isa-l/erasure_code/gen_rs_matrix_limits.c115
-rw-r--r--src/isa-l/erasure_code/gf_2vect_dot_prod_avx.asm337
-rw-r--r--src/isa-l/erasure_code/gf_2vect_dot_prod_avx2.asm356
-rw-r--r--src/isa-l/erasure_code/gf_2vect_dot_prod_avx512.asm245
-rw-r--r--src/isa-l/erasure_code/gf_2vect_dot_prod_sse.asm339
-rw-r--r--src/isa-l/erasure_code/gf_2vect_mad_avx.asm236
-rw-r--r--src/isa-l/erasure_code/gf_2vect_mad_avx2.asm247
-rw-r--r--src/isa-l/erasure_code/gf_2vect_mad_avx512.asm230
-rw-r--r--src/isa-l/erasure_code/gf_2vect_mad_sse.asm239
-rw-r--r--src/isa-l/erasure_code/gf_3vect_dot_prod_avx.asm377
-rw-r--r--src/isa-l/erasure_code/gf_3vect_dot_prod_avx2.asm397
-rw-r--r--src/isa-l/erasure_code/gf_3vect_dot_prod_avx512.asm270
-rw-r--r--src/isa-l/erasure_code/gf_3vect_dot_prod_sse.asm378
-rw-r--r--src/isa-l/erasure_code/gf_3vect_mad_avx.asm288
-rw-r--r--src/isa-l/erasure_code/gf_3vect_mad_avx2.asm317
-rw-r--r--src/isa-l/erasure_code/gf_3vect_mad_avx512.asm247
-rw-r--r--src/isa-l/erasure_code/gf_3vect_mad_sse.asm298
-rw-r--r--src/isa-l/erasure_code/gf_4vect_dot_prod_avx.asm441
-rw-r--r--src/isa-l/erasure_code/gf_4vect_dot_prod_avx2.asm460
-rw-r--r--src/isa-l/erasure_code/gf_4vect_dot_prod_avx512.asm301
-rw-r--r--src/isa-l/erasure_code/gf_4vect_dot_prod_sse.asm443
-rw-r--r--src/isa-l/erasure_code/gf_4vect_mad_avx.asm336
-rw-r--r--src/isa-l/erasure_code/gf_4vect_mad_avx2.asm342
-rw-r--r--src/isa-l/erasure_code/gf_4vect_mad_avx512.asm267
-rw-r--r--src/isa-l/erasure_code/gf_4vect_mad_sse.asm342
-rw-r--r--src/isa-l/erasure_code/gf_5vect_dot_prod_avx.asm303
-rw-r--r--src/isa-l/erasure_code/gf_5vect_dot_prod_avx2.asm315
-rw-r--r--src/isa-l/erasure_code/gf_5vect_dot_prod_avx512.asm335
-rw-r--r--src/isa-l/erasure_code/gf_5vect_dot_prod_sse.asm304
-rw-r--r--src/isa-l/erasure_code/gf_5vect_mad_avx.asm365
-rw-r--r--src/isa-l/erasure_code/gf_5vect_mad_avx2.asm363
-rw-r--r--src/isa-l/erasure_code/gf_5vect_mad_avx512.asm287
-rw-r--r--src/isa-l/erasure_code/gf_5vect_mad_sse.asm373
-rw-r--r--src/isa-l/erasure_code/gf_6vect_dot_prod_avx.asm315
-rw-r--r--src/isa-l/erasure_code/gf_6vect_dot_prod_avx2.asm326
-rw-r--r--src/isa-l/erasure_code/gf_6vect_dot_prod_avx512.asm354
-rw-r--r--src/isa-l/erasure_code/gf_6vect_dot_prod_sse.asm315
-rw-r--r--src/isa-l/erasure_code/gf_6vect_mad_avx.asm394
-rw-r--r--src/isa-l/erasure_code/gf_6vect_mad_avx2.asm400
-rw-r--r--src/isa-l/erasure_code/gf_6vect_mad_avx512.asm321
-rw-r--r--src/isa-l/erasure_code/gf_6vect_mad_sse.asm406
-rw-r--r--src/isa-l/erasure_code/gf_inverse_test.c225
-rw-r--r--src/isa-l/erasure_code/gf_vect_dot_prod_1tbl.c152
-rw-r--r--src/isa-l/erasure_code/gf_vect_dot_prod_avx.asm271
-rw-r--r--src/isa-l/erasure_code/gf_vect_dot_prod_avx2.asm280
-rw-r--r--src/isa-l/erasure_code/gf_vect_dot_prod_avx512.asm240
-rw-r--r--src/isa-l/erasure_code/gf_vect_dot_prod_base_test.c290
-rw-r--r--src/isa-l/erasure_code/gf_vect_dot_prod_perf.c174
-rw-r--r--src/isa-l/erasure_code/gf_vect_dot_prod_sse.asm271
-rw-r--r--src/isa-l/erasure_code/gf_vect_dot_prod_test.c525
-rw-r--r--src/isa-l/erasure_code/gf_vect_mad_avx.asm196
-rw-r--r--src/isa-l/erasure_code/gf_vect_mad_avx2.asm203
-rw-r--r--src/isa-l/erasure_code/gf_vect_mad_avx512.asm193
-rw-r--r--src/isa-l/erasure_code/gf_vect_mad_sse.asm197
-rw-r--r--src/isa-l/erasure_code/gf_vect_mad_test.c519
-rw-r--r--src/isa-l/erasure_code/gf_vect_mul_avx.asm164
-rw-r--r--src/isa-l/erasure_code/gf_vect_mul_base_test.c129
-rw-r--r--src/isa-l/erasure_code/gf_vect_mul_perf.c90
-rw-r--r--src/isa-l/erasure_code/gf_vect_mul_sse.asm170
-rw-r--r--src/isa-l/erasure_code/gf_vect_mul_test.c158
-rw-r--r--src/isa-l/erasure_code/ppc64le/Makefile.am15
-rw-r--r--src/isa-l/erasure_code/ppc64le/ec_base_vsx.c97
-rw-r--r--src/isa-l/erasure_code/ppc64le/ec_base_vsx.h338
-rw-r--r--src/isa-l/erasure_code/ppc64le/gf_2vect_dot_prod_vsx.c83
-rw-r--r--src/isa-l/erasure_code/ppc64le/gf_2vect_mad_vsx.c65
-rw-r--r--src/isa-l/erasure_code/ppc64le/gf_3vect_dot_prod_vsx.c104
-rw-r--r--src/isa-l/erasure_code/ppc64le/gf_3vect_mad_vsx.c84
-rw-r--r--src/isa-l/erasure_code/ppc64le/gf_4vect_dot_prod_vsx.c124
-rw-r--r--src/isa-l/erasure_code/ppc64le/gf_4vect_mad_vsx.c103
-rw-r--r--src/isa-l/erasure_code/ppc64le/gf_5vect_dot_prod_vsx.c145
-rw-r--r--src/isa-l/erasure_code/ppc64le/gf_5vect_mad_vsx.c122
-rw-r--r--src/isa-l/erasure_code/ppc64le/gf_6vect_dot_prod_vsx.c166
-rw-r--r--src/isa-l/erasure_code/ppc64le/gf_6vect_mad_vsx.c142
-rw-r--r--src/isa-l/erasure_code/ppc64le/gf_vect_dot_prod_vsx.c85
-rw-r--r--src/isa-l/erasure_code/ppc64le/gf_vect_mad_vsx.c48
-rw-r--r--src/isa-l/erasure_code/ppc64le/gf_vect_mul_vsx.c61
-rw-r--r--src/isa-l/examples/ec/Makefile.am33
-rw-r--r--src/isa-l/examples/ec/Makefile.unx8
-rw-r--r--src/isa-l/examples/ec/ec_piggyback_example.c506
-rw-r--r--src/isa-l/examples/ec/ec_simple_example.c277
-rw-r--r--src/isa-l/igzip/Makefile.am144
-rw-r--r--src/isa-l/igzip/aarch64/bitbuf2_aarch64.h57
-rw-r--r--src/isa-l/igzip/aarch64/data_struct_aarch64.h215
-rw-r--r--src/isa-l/igzip/aarch64/encode_df.S159
-rw-r--r--src/isa-l/igzip/aarch64/gen_icf_map.S266
-rw-r--r--src/isa-l/igzip/aarch64/huffman_aarch64.h173
-rw-r--r--src/isa-l/igzip/aarch64/igzip_decode_huffman_code_block_aarch64.S689
-rw-r--r--src/isa-l/igzip/aarch64/igzip_deflate_body_aarch64.S261
-rw-r--r--src/isa-l/igzip/aarch64/igzip_deflate_finish_aarch64.S264
-rw-r--r--src/isa-l/igzip/aarch64/igzip_deflate_hash_aarch64.S95
-rw-r--r--src/isa-l/igzip/aarch64/igzip_inflate_multibinary_arm64.S32
-rw-r--r--src/isa-l/igzip/aarch64/igzip_isal_adler32_neon.S178
-rw-r--r--src/isa-l/igzip/aarch64/igzip_multibinary_aarch64_dispatcher.c188
-rw-r--r--src/isa-l/igzip/aarch64/igzip_multibinary_arm64.S50
-rw-r--r--src/isa-l/igzip/aarch64/igzip_set_long_icf_fg.S194
-rw-r--r--src/isa-l/igzip/aarch64/isal_deflate_icf_body_hash_hist.S364
-rw-r--r--src/isa-l/igzip/aarch64/isal_deflate_icf_finish_hash_hist.S397
-rw-r--r--src/isa-l/igzip/aarch64/isal_update_histogram.S311
-rw-r--r--src/isa-l/igzip/aarch64/lz0a_const_aarch64.h72
-rw-r--r--src/isa-l/igzip/aarch64/options_aarch64.h71
-rw-r--r--src/isa-l/igzip/aarch64/stdmac_aarch64.h57
-rw-r--r--src/isa-l/igzip/adler32_avx2_4.asm295
-rw-r--r--src/isa-l/igzip/adler32_base.c63
-rw-r--r--src/isa-l/igzip/adler32_perf.c72
-rw-r--r--src/isa-l/igzip/adler32_sse.asm253
-rw-r--r--src/isa-l/igzip/bitbuf2.asm64
-rw-r--r--src/isa-l/igzip/bitbuf2.h130
-rw-r--r--src/isa-l/igzip/checksum32_funcs_test.c308
-rw-r--r--src/isa-l/igzip/checksum_test_ref.h102
-rw-r--r--src/isa-l/igzip/data_struct2.asm275
-rw-r--r--src/isa-l/igzip/encode_df.c38
-rw-r--r--src/isa-l/igzip/encode_df.h30
-rw-r--r--src/isa-l/igzip/encode_df_04.asm580
-rw-r--r--src/isa-l/igzip/encode_df_06.asm624
-rw-r--r--src/isa-l/igzip/flatten_ll.c41
-rw-r--r--src/isa-l/igzip/flatten_ll.h3
-rw-r--r--src/isa-l/igzip/generate_custom_hufftables.c480
-rw-r--r--src/isa-l/igzip/generate_static_inflate.c205
-rw-r--r--src/isa-l/igzip/heap_macros.asm98
-rw-r--r--src/isa-l/igzip/huff_codes.c1694
-rw-r--r--src/isa-l/igzip/huff_codes.h170
-rw-r--r--src/isa-l/igzip/huffman.asm249
-rw-r--r--src/isa-l/igzip/huffman.h359
-rw-r--r--src/isa-l/igzip/hufftables_c.c6742
-rw-r--r--src/isa-l/igzip/igzip.c2022
-rw-r--r--src/isa-l/igzip/igzip_base.c236
-rw-r--r--src/isa-l/igzip/igzip_base_aliases.c153
-rw-r--r--src/isa-l/igzip/igzip_body.asm792
-rw-r--r--src/isa-l/igzip/igzip_build_hash_table_perf.c38
-rw-r--r--src/isa-l/igzip/igzip_checksums.h12
-rw-r--r--src/isa-l/igzip/igzip_compare_types.asm452
-rw-r--r--src/isa-l/igzip/igzip_decode_block_stateless.asm800
-rw-r--r--src/isa-l/igzip/igzip_decode_block_stateless_01.asm3
-rw-r--r--src/isa-l/igzip/igzip_decode_block_stateless_04.asm4
-rw-r--r--src/isa-l/igzip/igzip_deflate_hash.asm170
-rw-r--r--src/isa-l/igzip/igzip_example.c101
-rw-r--r--src/isa-l/igzip/igzip_file_perf.c348
-rw-r--r--src/isa-l/igzip/igzip_finish.asm330
-rw-r--r--src/isa-l/igzip/igzip_gen_icf_map_lh1_04.asm746
-rw-r--r--src/isa-l/igzip/igzip_gen_icf_map_lh1_06.asm581
-rw-r--r--src/isa-l/igzip/igzip_hist_perf.c129
-rw-r--r--src/isa-l/igzip/igzip_icf_base.c370
-rw-r--r--src/isa-l/igzip/igzip_icf_body.c326
-rw-r--r--src/isa-l/igzip/igzip_icf_body_h1_gr_bt.asm906
-rw-r--r--src/isa-l/igzip/igzip_icf_finish.asm327
-rw-r--r--src/isa-l/igzip/igzip_inflate.c2520
-rw-r--r--src/isa-l/igzip/igzip_inflate_multibinary.asm45
-rw-r--r--src/isa-l/igzip/igzip_inflate_test.c311
-rw-r--r--src/isa-l/igzip/igzip_level_buf_structs.h48
-rw-r--r--src/isa-l/igzip/igzip_multibinary.asm134
-rw-r--r--src/isa-l/igzip/igzip_perf.c832
-rw-r--r--src/isa-l/igzip/igzip_rand_test.c3101
-rw-r--r--src/isa-l/igzip/igzip_semi_dyn_file_perf.c334
-rw-r--r--src/isa-l/igzip/igzip_set_long_icf_fg_04.asm300
-rw-r--r--src/isa-l/igzip/igzip_set_long_icf_fg_06.asm372
-rw-r--r--src/isa-l/igzip/igzip_sync_flush_example.c86
-rw-r--r--src/isa-l/igzip/igzip_update_histogram.asm579
-rw-r--r--src/isa-l/igzip/igzip_update_histogram_01.asm7
-rw-r--r--src/isa-l/igzip/igzip_update_histogram_04.asm8
-rw-r--r--src/isa-l/igzip/igzip_wrapper.h52
-rw-r--r--src/isa-l/igzip/igzip_wrapper_hdr_test.c890
-rw-r--r--src/isa-l/igzip/inflate_data_structs.asm146
-rw-r--r--src/isa-l/igzip/inflate_std_vects.h1554
-rw-r--r--src/isa-l/igzip/lz0a_const.asm65
-rw-r--r--src/isa-l/igzip/options.asm77
-rw-r--r--src/isa-l/igzip/proc_heap.asm132
-rw-r--r--src/isa-l/igzip/proc_heap_base.c85
-rw-r--r--src/isa-l/igzip/repeated_char_result.h68
-rw-r--r--src/isa-l/igzip/rfc1951_lookup.asm118
-rw-r--r--src/isa-l/igzip/static_inflate.h2678
-rw-r--r--src/isa-l/igzip/stdmac.asm469
-rw-r--r--src/isa-l/include/aarch64_multibinary.h311
-rw-r--r--src/isa-l/include/crc.h212
-rw-r--r--src/isa-l/include/crc64.h277
-rw-r--r--src/isa-l/include/erasure_code.h947
-rw-r--r--src/isa-l/include/gf_vect_mul.h152
-rw-r--r--src/isa-l/include/igzip_lib.h990
-rw-r--r--src/isa-l/include/mem_routines.h64
-rw-r--r--src/isa-l/include/multibinary.asm440
-rw-r--r--src/isa-l/include/raid.h305
-rw-r--r--src/isa-l/include/reg_sizes.asm291
-rw-r--r--src/isa-l/include/test.h285
-rw-r--r--src/isa-l/include/types.h77
-rw-r--r--src/isa-l/include/unaligned.h76
-rw-r--r--src/isa-l/isa-l.def117
-rw-r--r--src/isa-l/libisal.pc.in11
-rw-r--r--src/isa-l/make.inc380
-rw-r--r--src/isa-l/mem/Makefile.am48
-rw-r--r--src/isa-l/mem/aarch64/Makefile.am33
-rw-r--r--src/isa-l/mem/aarch64/mem_aarch64_dispatcher.c39
-rw-r--r--src/isa-l/mem/aarch64/mem_multibinary_arm.S33
-rw-r--r--src/isa-l/mem/aarch64/mem_zero_detect_neon.S243
-rw-r--r--src/isa-l/mem/mem_multibinary.asm42
-rw-r--r--src/isa-l/mem/mem_zero_detect_avx.asm189
-rw-r--r--src/isa-l/mem/mem_zero_detect_base.c69
-rw-r--r--src/isa-l/mem/mem_zero_detect_base_aliases.c38
-rw-r--r--src/isa-l/mem/mem_zero_detect_perf.c60
-rw-r--r--src/isa-l/mem/mem_zero_detect_sse.asm176
-rw-r--r--src/isa-l/mem/mem_zero_detect_test.c226
-rw-r--r--src/isa-l/programs/Makefile.am38
-rw-r--r--src/isa-l/programs/igzip.187
-rw-r--r--src/isa-l/programs/igzip.1.h2m31
-rw-r--r--src/isa-l/programs/igzip_cli.c1206
-rwxr-xr-xsrc/isa-l/programs/igzip_cli_check.sh261
-rw-r--r--src/isa-l/raid/Makefile.am67
-rw-r--r--src/isa-l/raid/aarch64/Makefile.am36
-rw-r--r--src/isa-l/raid/aarch64/pq_check_neon.S341
-rw-r--r--src/isa-l/raid/aarch64/pq_gen_neon.S282
-rw-r--r--src/isa-l/raid/aarch64/raid_aarch64_dispatcher.c61
-rw-r--r--src/isa-l/raid/aarch64/raid_multibinary_arm.S36
-rw-r--r--src/isa-l/raid/aarch64/xor_check_neon.S271
-rw-r--r--src/isa-l/raid/aarch64/xor_gen_neon.S264
-rw-r--r--src/isa-l/raid/pq_check_sse.asm277
-rw-r--r--src/isa-l/raid/pq_check_sse_i32.asm282
-rw-r--r--src/isa-l/raid/pq_check_test.c304
-rw-r--r--src/isa-l/raid/pq_gen_avx.asm254
-rw-r--r--src/isa-l/raid/pq_gen_avx2.asm256
-rw-r--r--src/isa-l/raid/pq_gen_avx512.asm235
-rw-r--r--src/isa-l/raid/pq_gen_perf.c88
-rw-r--r--src/isa-l/raid/pq_gen_sse.asm258
-rw-r--r--src/isa-l/raid/pq_gen_sse_i32.asm264
-rw-r--r--src/isa-l/raid/pq_gen_test.c194
-rw-r--r--src/isa-l/raid/raid_base.c147
-rw-r--r--src/isa-l/raid/raid_base_aliases.c50
-rw-r--r--src/isa-l/raid/raid_multibinary.asm143
-rw-r--r--src/isa-l/raid/raid_multibinary_i32.asm52
-rw-r--r--src/isa-l/raid/xor_check_sse.asm285
-rw-r--r--src/isa-l/raid/xor_check_test.c280
-rw-r--r--src/isa-l/raid/xor_example.c70
-rw-r--r--src/isa-l/raid/xor_gen_avx.asm228
-rw-r--r--src/isa-l/raid/xor_gen_avx512.asm217
-rw-r--r--src/isa-l/raid/xor_gen_perf.c90
-rw-r--r--src/isa-l/raid/xor_gen_sse.asm284
-rw-r--r--src/isa-l/raid/xor_gen_test.c165
-rw-r--r--src/isa-l/tests/fuzz/Makefile.am52
-rw-r--r--src/isa-l/tests/fuzz/Makefile.unx12
-rw-r--r--src/isa-l/tests/fuzz/igzip_checked_inflate_fuzz_test.c72
-rw-r--r--src/isa-l/tests/fuzz/igzip_dump_inflate_corpus.c40
-rw-r--r--src/isa-l/tests/fuzz/igzip_fuzz_inflate.c41
-rw-r--r--src/isa-l/tests/fuzz/igzip_simple_inflate_fuzz_test.c22
-rw-r--r--src/isa-l/tests/fuzz/igzip_simple_round_trip_fuzz_test.c130
-rwxr-xr-xsrc/isa-l/tools/check_format.sh87
-rw-r--r--src/isa-l/tools/gen_nmake.mk123
-rwxr-xr-xsrc/isa-l/tools/iindent2
-rwxr-xr-xsrc/isa-l/tools/nasm-cet-filter.sh56
-rwxr-xr-xsrc/isa-l/tools/nasm-filter.sh47
-rwxr-xr-xsrc/isa-l/tools/remove_trailing_whitespace.sh2
-rwxr-xr-xsrc/isa-l/tools/test_autorun.sh63
-rwxr-xr-xsrc/isa-l/tools/test_checks.sh115
-rwxr-xr-xsrc/isa-l/tools/test_extended.sh211
-rwxr-xr-xsrc/isa-l/tools/test_fuzz.sh171
-rwxr-xr-xsrc/isa-l/tools/test_tools.sh11
-rwxr-xr-xsrc/isa-l/tools/yasm-cet-filter.sh47
-rwxr-xr-xsrc/isa-l/tools/yasm-filter.sh38
385 files changed, 117894 insertions, 0 deletions
diff --git a/src/isa-l/.gitignore b/src/isa-l/.gitignore
new file mode 100644
index 000000000..e85c4176b
--- /dev/null
+++ b/src/isa-l/.gitignore
@@ -0,0 +1,28 @@
+# Objects
+*~
+*.o
+*.lo
+*.so
+*.dll
+*.exp
+*.lib
+bin
+
+# Autobuild
+Makefile
+Makefile.in
+aclocal.m4
+autom4te.cache
+build-aux
+config.*
+configure
+.deps
+.dirstamp
+.libs
+libtool
+
+# Generated files
+isa-l.h
+/libisal.la
+libisal.pc
+programs/igzip
diff --git a/src/isa-l/.travis.yml b/src/isa-l/.travis.yml
new file mode 100644
index 000000000..9bf236c08
--- /dev/null
+++ b/src/isa-l/.travis.yml
@@ -0,0 +1,100 @@
+language: c
+sudo: required
+matrix:
+ include:
+ ### OS X
+ - os: osx
+ env: C_COMPILER=clang
+
+ ### linux gcc and format check
+ - dist: xenial
+ addons:
+ apt:
+ packages:
+ - nasm
+ install:
+ # Install newer indent to check formatting
+ - sudo apt-get install texinfo
+ - wget http://archive.ubuntu.com/ubuntu/pool/main/i/indent/indent_2.2.12.orig.tar.xz -O /tmp/indent.tar.xz
+ - tar -xJf /tmp/indent.tar.xz -C /tmp/
+ - pushd /tmp/indent-2.2.12 && ./configure --prefix=/usr && make && sudo make install && popd
+ env: C_COMPILER=gcc
+
+ ### linux clang
+ - dist: xenial
+ addons:
+ apt:
+ packages:
+ - nasm
+ env: C_COMPILER=clang
+
+ ### linux newer clang
+ - dist: trusty
+ addons:
+ apt:
+ sources:
+ - ubuntu-toolchain-r-test
+ - llvm-toolchain-trusty-4.0
+ packages:
+ - clang-4.0
+ env: C_COMPILER=clang-4.0
+
+ ### linux older gcc
+ - dist: trusty
+ addons:
+ apt:
+ sources:
+ - ubuntu-toolchain-r-test
+ packages:
+ - g++-4.7
+ env: C_COMPILER=gcc-4.7
+
+ ### linux newer gcc
+ - dist: trusty
+ addons:
+ apt:
+ sources:
+ - ubuntu-toolchain-r-test
+ packages:
+ - g++-6
+ env: C_COMPILER=gcc-6
+
+ ### arm64: gcc-5.4
+ - os: linux
+ dist: xenial
+ arch: arm64
+ env: C_COMPILER=gcc
+
+ ### arm64: gcc-5.4 extended tests
+ - os: linux
+ dist: xenial
+ arch: arm64
+ env: TEST_TYPE=ext
+
+ ### linux extended tests
+ - dist: xenial
+ addons:
+ apt:
+ sources:
+ - ubuntu-toolchain-r-test
+ packages:
+ - binutils-mingw-w64-x86-64
+ - gcc-mingw-w64-x86-64
+ - wine
+ - nasm
+ env: TEST_TYPE=ext
+
+before_install:
+ - if [ -n "${C_COMPILER}" ]; then export CC="${C_COMPILER}"; fi
+ - if [ -n "${AS_ASSEMBL}" ]; then export AS="${AS_ASSEMBL}"; fi
+
+before_script:
+ - if [ $TRAVIS_OS_NAME = linux ]; then sudo apt-get -q update; fi
+ - if [ $TRAVIS_OS_NAME = linux ]; then sudo apt-get install yasm; fi
+ - if [ $TRAVIS_OS_NAME = osx ]; then brew update; fi
+ - if [ $TRAVIS_OS_NAME = osx ]; then brew install yasm; fi
+
+script:
+ - if [ -n "${CC}" ]; then $CC --version; fi
+ - if [ -n "${AS}" ]; then $AS --version || echo No version; fi
+ - ./tools/test_autorun.sh "${TEST_TYPE}"
diff --git a/src/isa-l/CONTRIBUTING.md b/src/isa-l/CONTRIBUTING.md
new file mode 100644
index 000000000..fb3dfbf21
--- /dev/null
+++ b/src/isa-l/CONTRIBUTING.md
@@ -0,0 +1,39 @@
+# Contributing to ISA-L
+
+Everyone is welcome to contribute. Patches may be submitted using GitHub pull
+requests (PRs). All commits must be signed off by the developer (--signoff)
+which indicates that you agree to the Developer Certificate of Origin. Patch
+discussion will happen directly on the GitHub PR. Design pre-work and general
+discussion occurs on the [mailing list]. Anyone can provide feedback in either
+location and all discussion is welcome. Decisions on whether to merge patches
+will be handled by the maintainer.
+
+## License
+
+ISA-L is licensed using a BSD 3-clause [license]. All code submitted to
+the project is required to carry that license.
+
+## Certificate of Origin
+
+In order to get a clear contribution chain of trust we use the
+[signed-off-by language] used by the Linux kernel project.
+
+## Mailing List
+
+Contributors and users are welcome to submit new request on our roadmap, submit
+patches, file issues, and ask questions on our [mailing list].
+
+## Coding Style
+
+The coding style for ISA-L C code roughly follows linux kernel guidelines. Use
+the included indent script to format C code.
+
+ ./tools/iindent your_files.c
+
+And use check format script before submitting.
+
+ ./tools/check_format.sh
+
+[mailing list]:https://lists.01.org/hyperkitty/list/isal@lists.01.org/
+[license]:LICENSE
+[signed-off-by language]:https://01.org/community/signed-process
diff --git a/src/isa-l/Doxyfile b/src/isa-l/Doxyfile
new file mode 100644
index 000000000..d49b079ae
--- /dev/null
+++ b/src/isa-l/Doxyfile
@@ -0,0 +1,33 @@
+PROJECT_NAME = "Intel Intelligent Storage Acceleration Library"
+PROJECT_BRIEF = "ISA-L API reference doc"
+
+OUTPUT_DIRECTORY = generated_doc
+FULL_PATH_NAMES = NO
+TAB_SIZE = 8
+ALIASES = "requires=\xrefitem requires \"Requires\" \"Instruction Set Requirements for arch-specific functions (non-multibinary)\""
+OPTIMIZE_OUTPUT_FOR_C = YES
+HIDE_UNDOC_MEMBERS = YES
+USE_MDFILE_AS_MAINPAGE = README.md
+
+INPUT = isa-l.h \
+ include \
+ README.md \
+ CONTRIBUTING.md \
+ Release_notes.txt \
+ doc/test.md \
+ doc/build.md
+
+EXCLUDE = include/test.h include/types.h include/unaligned.h
+EXCLUDE_PATTERNS = */include/*_multibinary.h
+EXAMPLE_PATH = . crc raid erasure_code igzip
+PAPER_TYPE = letter
+LATEX_SOURCE_CODE = YES
+GENERATE_TREEVIEW = YES
+MACRO_EXPANSION = YES
+EXPAND_ONLY_PREDEF = YES
+PREDEFINED = "DECLARE_ALIGNED(n, a)=ALIGN n" \
+ __declspec(x)='x' \
+ align(x)='ALIGN \
+ x'
+EXPAND_AS_DEFINED = DECLARE_ALIGNED
+EXTENSION_MAPPING = "txt=md"
diff --git a/src/isa-l/LICENSE b/src/isa-l/LICENSE
new file mode 100644
index 000000000..ecebef110
--- /dev/null
+++ b/src/isa-l/LICENSE
@@ -0,0 +1,26 @@
+ Copyright(c) 2011-2017 Intel Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/src/isa-l/Makefile.am b/src/isa-l/Makefile.am
new file mode 100644
index 000000000..646b85544
--- /dev/null
+++ b/src/isa-l/Makefile.am
@@ -0,0 +1,182 @@
+EXTRA_DIST = autogen.sh Makefile.unx make.inc Makefile.nmake isa-l.def LICENSE README.md Doxyfile CONTRIBUTING.md
+CLEANFILES =
+LDADD =
+AM_MAKEFLAGS = --no-print-directory
+noinst_HEADERS =
+pkginclude_HEADERS = include/test.h include/types.h
+noinst_LTLIBRARIES =
+bin_PROGRAMS =
+INCLUDE = -I $(srcdir)/include/
+
+pkgconfigdir = $(libdir)/pkgconfig
+pkgconfig_DATA = libisal.pc
+EXTRA_DIST += libisal.pc.in
+CLEANFILES += libisal.pc
+
+lsrc=
+src_include=
+extern_hdrs=
+other_src=
+check_tests=
+unit_tests=
+perf_tests=
+unit_tests_extra=
+perf_tests_extra=
+examples=
+other_tests=
+other_tests_x86_64=
+other_tests_x86_32=
+other_tests_aarch64=
+other_tests_ppc64le=
+lsrc_x86_64=
+lsrc_x86_32=
+lsrc_aarch64=
+lsrc_ppc64le=
+lsrc_base_aliases=
+lsrc32=
+unit_tests32=
+perf_tests32=
+progs=
+
+# Include units
+
+include erasure_code/Makefile.am
+include raid/Makefile.am
+include crc/Makefile.am
+include igzip/Makefile.am
+include tests/fuzz/Makefile.am
+include examples/ec/Makefile.am
+include programs/Makefile.am
+include mem/Makefile.am
+
+# LIB version info not necessarily the same as package version
+LIBISAL_CURRENT=2
+LIBISAL_REVISION=30
+LIBISAL_AGE=0
+
+lib_LTLIBRARIES = libisal.la
+pkginclude_HEADERS += $(sort ${extern_hdrs})
+libisal_la_SOURCES = ${lsrc}
+
+if CPU_X86_64
+libisal_la_SOURCES += ${lsrc_x86_64}
+other_tests += ${other_tests_x86_64}
+endif
+
+if CPU_X86_32
+libisal_la_SOURCES += ${lsrc_x86_32}
+other_tests += ${other_tests_x86_32}
+endif
+
+if CPU_AARCH64
+libisal_la_SOURCES += ${lsrc_aarch64}
+other_tests += ${other_tests_aarch64}
+endif
+
+if CPU_PPC64LE
+libisal_la_SOURCES += ${lsrc_ppc64le}
+other_tests += ${other_tests_ppc64le}
+endif
+
+if CPU_UNDEFINED
+libisal_la_SOURCES += ${lsrc_base_aliases}
+endif
+
+nobase_include_HEADERS = isa-l.h
+libisal_la_LDFLAGS = $(AM_LDFLAGS) \
+ -version-info $(LIBISAL_CURRENT):$(LIBISAL_REVISION):$(LIBISAL_AGE)
+libisal_la_LIBADD = ${noinst_LTLIBRARIES}
+
+EXTRA_DIST += ${other_src}
+EXTRA_DIST += Release_notes.txt
+
+# For tests
+LDADD += libisal.la
+check_PROGRAMS = ${check_tests}
+TESTS = ${check_tests}
+
+# For additional tests
+EXTRA_PROGRAMS = ${unit_tests}
+EXTRA_PROGRAMS += ${perf_tests}
+EXTRA_PROGRAMS += ${other_tests}
+EXTRA_PROGRAMS += ${examples}
+CLEANFILES += ${EXTRA_PROGRAMS}
+
+programs:${progs}
+perfs: ${perf_tests}
+tests: ${unit_tests}
+checks: ${check_tests}
+other: ${other_tests}
+perf: $(addsuffix .run,$(perf_tests))
+ex: ${examples}
+test: $(addsuffix .run,$(unit_tests))
+
+# Build rule to run tests
+%.run: %
+ $<
+ @echo Completed run: $<
+
+# Support for yasm/nasm/gas
+if INTEL_CET_ENABLED
+export CET_LD=$(LD)
+endif
+if USE_YASM
+if INTEL_CET_ENABLED
+ as_filter = ${srcdir}/tools/yasm-cet-filter.sh
+else
+ as_filter = ${srcdir}/tools/yasm-filter.sh
+endif
+endif
+if USE_NASM
+if INTEL_CET_ENABLED
+ as_filter = ${srcdir}/tools/nasm-cet-filter.sh
+else
+ as_filter = ${srcdir}/tools/nasm-filter.sh
+endif
+endif
+if CPU_AARCH64
+ as_filter = $(CC) -D__ASSEMBLY__
+endif
+
+CCAS = $(as_filter)
+EXTRA_DIST += tools/yasm-filter.sh tools/nasm-filter.sh
+EXTRA_DIST += tools/yasm-cet-filter.sh tools/nasm-cet-filter.sh
+
+AM_CFLAGS = ${my_CFLAGS} ${INCLUDE} $(src_include) ${D}
+if CPU_AARCH64
+AM_CCASFLAGS = ${AM_CFLAGS}
+else
+AM_CCASFLAGS = ${yasm_args} ${INCLUDE} ${src_include} ${DEFS} ${D}
+endif
+
+.asm.s:
+ @echo " MKTMP " $@;
+ @cp $< $@
+
+# Generate isa-l.h
+BUILT_SOURCES = isa-l.h
+CLEANFILES += isa-l.h
+isa-l.h:
+ @echo 'Building $@'
+ @echo '' >> $@
+ @echo '/**' >> $@
+ @echo ' * @file isa-l.h' >> $@
+ @echo ' * @brief Include for ISA-L library' >> $@
+ @echo ' */' >> $@
+ @echo '' >> $@
+ @echo '#ifndef _ISAL_H_' >> $@
+ @echo '#define _ISAL_H_' >> $@
+ @echo '' >> $@
+ @echo '#define.ISAL_MAJOR_VERSION.${VERSION}' | ${AWK} -F . '{print $$1, $$2, $$3}' >> $@
+ @echo '#define.ISAL_MINOR_VERSION.${VERSION}' | ${AWK} -F . '{print $$1, $$2, $$4}' >> $@
+ @echo '#define.ISAL_PATCH_VERSION.${VERSION}' | ${AWK} -F . '{print $$1, $$2, $$5}' >> $@
+ @echo '#define ISAL_MAKE_VERSION(maj, min, patch) ((maj) * 0x10000 + (min) * 0x100 + (patch))' >> $@
+ @echo '#define ISAL_VERSION ISAL_MAKE_VERSION(ISAL_MAJOR_VERSION, ISAL_MINOR_VERSION, ISAL_PATCH_VERSION)' >> $@
+ @echo '' >> $@
+ @for unit in $(sort $(extern_hdrs)); do echo "#include <isa-l/$$unit>" | sed -e 's;include/;;' >> $@; done
+ @echo '#endif //_ISAL_H_' >> $@
+
+doc: isa-l.h
+ (cat Doxyfile; echo 'PROJECT_NUMBER=${VERSION}') | doxygen -
+ $(MAKE) -C generated_doc/latex &> generated_doc/latex_build_api.log
+ cp generated_doc/latex/refman.pdf isa-l_api_${VERSION}.pdf
diff --git a/src/isa-l/Makefile.nmake b/src/isa-l/Makefile.nmake
new file mode 100644
index 000000000..6360d1f77
--- /dev/null
+++ b/src/isa-l/Makefile.nmake
@@ -0,0 +1,318 @@
+########################################################################
+# Copyright(c) 2011-2017 Intel Corporation All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Intel Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+########################################################################
+
+# This file can be auto-regenerated with $make -f Makefile.unx Makefile.nmake
+
+objs = \
+ bin\ec_base.obj \
+ bin\raid_base.obj \
+ bin\crc_base.obj \
+ bin\crc64_base.obj \
+ bin\igzip.obj \
+ bin\hufftables_c.obj \
+ bin\igzip_base.obj \
+ bin\igzip_icf_base.obj \
+ bin\adler32_base.obj \
+ bin\flatten_ll.obj \
+ bin\encode_df.obj \
+ bin\igzip_icf_body.obj \
+ bin\huff_codes.obj \
+ bin\igzip_inflate.obj \
+ bin\mem_zero_detect_base.obj \
+ bin\ec_highlevel_func.obj \
+ bin\gf_vect_mul_sse.obj \
+ bin\gf_vect_mul_avx.obj \
+ bin\gf_vect_dot_prod_sse.obj \
+ bin\gf_vect_dot_prod_avx.obj \
+ bin\gf_vect_dot_prod_avx2.obj \
+ bin\gf_2vect_dot_prod_sse.obj \
+ bin\gf_3vect_dot_prod_sse.obj \
+ bin\gf_4vect_dot_prod_sse.obj \
+ bin\gf_5vect_dot_prod_sse.obj \
+ bin\gf_6vect_dot_prod_sse.obj \
+ bin\gf_2vect_dot_prod_avx.obj \
+ bin\gf_3vect_dot_prod_avx.obj \
+ bin\gf_4vect_dot_prod_avx.obj \
+ bin\gf_5vect_dot_prod_avx.obj \
+ bin\gf_6vect_dot_prod_avx.obj \
+ bin\gf_2vect_dot_prod_avx2.obj \
+ bin\gf_3vect_dot_prod_avx2.obj \
+ bin\gf_4vect_dot_prod_avx2.obj \
+ bin\gf_5vect_dot_prod_avx2.obj \
+ bin\gf_6vect_dot_prod_avx2.obj \
+ bin\gf_vect_mad_sse.obj \
+ bin\gf_2vect_mad_sse.obj \
+ bin\gf_3vect_mad_sse.obj \
+ bin\gf_4vect_mad_sse.obj \
+ bin\gf_5vect_mad_sse.obj \
+ bin\gf_6vect_mad_sse.obj \
+ bin\gf_vect_mad_avx.obj \
+ bin\gf_2vect_mad_avx.obj \
+ bin\gf_3vect_mad_avx.obj \
+ bin\gf_4vect_mad_avx.obj \
+ bin\gf_5vect_mad_avx.obj \
+ bin\gf_6vect_mad_avx.obj \
+ bin\gf_vect_mad_avx2.obj \
+ bin\gf_2vect_mad_avx2.obj \
+ bin\gf_3vect_mad_avx2.obj \
+ bin\gf_4vect_mad_avx2.obj \
+ bin\gf_5vect_mad_avx2.obj \
+ bin\gf_6vect_mad_avx2.obj \
+ bin\ec_multibinary.obj \
+ bin\gf_vect_dot_prod_avx512.obj \
+ bin\gf_2vect_dot_prod_avx512.obj \
+ bin\gf_3vect_dot_prod_avx512.obj \
+ bin\gf_4vect_dot_prod_avx512.obj \
+ bin\gf_5vect_dot_prod_avx512.obj \
+ bin\gf_6vect_dot_prod_avx512.obj \
+ bin\gf_vect_mad_avx512.obj \
+ bin\gf_2vect_mad_avx512.obj \
+ bin\gf_3vect_mad_avx512.obj \
+ bin\gf_4vect_mad_avx512.obj \
+ bin\gf_5vect_mad_avx512.obj \
+ bin\gf_6vect_mad_avx512.obj \
+ bin\xor_gen_sse.obj \
+ bin\pq_gen_sse.obj \
+ bin\xor_check_sse.obj \
+ bin\pq_check_sse.obj \
+ bin\pq_gen_avx.obj \
+ bin\xor_gen_avx.obj \
+ bin\pq_gen_avx2.obj \
+ bin\xor_gen_avx512.obj \
+ bin\pq_gen_avx512.obj \
+ bin\raid_multibinary.obj \
+ bin\crc16_t10dif_01.obj \
+ bin\crc16_t10dif_by4.obj \
+ bin\crc16_t10dif_02.obj \
+ bin\crc16_t10dif_by16_10.obj \
+ bin\crc16_t10dif_copy_by4.obj \
+ bin\crc16_t10dif_copy_by4_02.obj \
+ bin\crc32_ieee_01.obj \
+ bin\crc32_ieee_02.obj \
+ bin\crc32_ieee_by4.obj \
+ bin\crc32_ieee_by16_10.obj \
+ bin\crc32_iscsi_01.obj \
+ bin\crc32_iscsi_00.obj \
+ bin\crc32_iscsi_by16_10.obj \
+ bin\crc_multibinary.obj \
+ bin\crc64_multibinary.obj \
+ bin\crc64_ecma_refl_by8.obj \
+ bin\crc64_ecma_refl_by16_10.obj \
+ bin\crc64_ecma_norm_by8.obj \
+ bin\crc64_ecma_norm_by16_10.obj \
+ bin\crc64_iso_refl_by8.obj \
+ bin\crc64_iso_refl_by16_10.obj \
+ bin\crc64_iso_norm_by8.obj \
+ bin\crc64_iso_norm_by16_10.obj \
+ bin\crc64_jones_refl_by8.obj \
+ bin\crc64_jones_refl_by16_10.obj \
+ bin\crc64_jones_norm_by8.obj \
+ bin\crc64_jones_norm_by16_10.obj \
+ bin\crc32_gzip_refl_by8.obj \
+ bin\crc32_gzip_refl_by8_02.obj \
+ bin\crc32_gzip_refl_by16_10.obj \
+ bin\igzip_body.obj \
+ bin\igzip_finish.obj \
+ bin\igzip_icf_body_h1_gr_bt.obj \
+ bin\igzip_icf_finish.obj \
+ bin\rfc1951_lookup.obj \
+ bin\adler32_sse.obj \
+ bin\adler32_avx2_4.obj \
+ bin\igzip_multibinary.obj \
+ bin\igzip_update_histogram_01.obj \
+ bin\igzip_update_histogram_04.obj \
+ bin\igzip_decode_block_stateless_01.obj \
+ bin\igzip_decode_block_stateless_04.obj \
+ bin\igzip_inflate_multibinary.obj \
+ bin\encode_df_04.obj \
+ bin\encode_df_06.obj \
+ bin\proc_heap.obj \
+ bin\igzip_deflate_hash.obj \
+ bin\igzip_gen_icf_map_lh1_06.obj \
+ bin\igzip_gen_icf_map_lh1_04.obj \
+ bin\igzip_set_long_icf_fg_04.obj \
+ bin\igzip_set_long_icf_fg_06.obj \
+ bin\mem_zero_detect_avx.obj \
+ bin\mem_zero_detect_sse.obj \
+ bin\mem_multibinary.obj
+
+INCLUDES = -I./ -Ierasure_code/ -Iraid/ -Icrc/ -Iigzip/ -Iprograms/ -Imem/ -Iinclude/ -Itests/fuzz/ -Iexamples/ec/
+# Modern asm feature level, consider upgrading nasm/yasm before decreasing feature_level
+FEAT_FLAGS = -DHAVE_AS_KNOWS_AVX512 -DAS_FEATURE_LEVEL=10
+CFLAGS_REL = -O2 -DNDEBUG /Z7 /MD /Gy
+CFLAGS_DBG = -Od -DDEBUG /Z7 /MDd
+LINKFLAGS = -nologo -incremental:no -debug
+CFLAGS = $(CFLAGS_REL) -nologo -D_USE_MATH_DEFINES $(FEAT_FLAGS) $(INCLUDES) $(D)
+AFLAGS = -f win64 $(FEAT_FLAGS) $(INCLUDES) $(D)
+CC = cl
+# or CC = icl -Qstd=c99
+AS = nasm
+
+lib: bin static dll
+static: bin isa-l_static.lib
+dll: bin isa-l.dll
+
+bin: ; -mkdir $@
+
+isa-l_static.lib: $(objs)
+ lib -out:$@ @<<
+$?
+<<
+
+isa-l.dll: $(objs)
+ link -out:$@ -dll -def:isa-l.def $(LINKFLAGS) @<<
+$?
+<<
+
+{erasure_code}.c.obj:
+ $(CC) $(CFLAGS) /c -Fo$@ $?
+{erasure_code}.asm.obj:
+ $(AS) $(AFLAGS) -o $@ $?
+
+{raid}.c.obj:
+ $(CC) $(CFLAGS) /c -Fo$@ $?
+{raid}.asm.obj:
+ $(AS) $(AFLAGS) -o $@ $?
+
+{crc}.c.obj:
+ $(CC) $(CFLAGS) /c -Fo$@ $?
+{crc}.asm.obj:
+ $(AS) $(AFLAGS) -o $@ $?
+
+{igzip}.c.obj:
+ $(CC) $(CFLAGS) /c -Fo$@ $?
+{igzip}.asm.obj:
+ $(AS) $(AFLAGS) -o $@ $?
+
+{programs}.c.obj:
+ $(CC) $(CFLAGS) /c -Fo$@ $?
+{programs}.asm.obj:
+ $(AS) $(AFLAGS) -o $@ $?
+
+{mem}.c.obj:
+ $(CC) $(CFLAGS) /c -Fo$@ $?
+{mem}.asm.obj:
+ $(AS) $(AFLAGS) -o $@ $?
+
+
+# Examples
+ex = \
+ xor_example.exe \
+ crc_simple_test.exe \
+ crc64_example.exe \
+ igzip_example.exe \
+ igzip_sync_flush_example.exe \
+ ec_simple_example.exe \
+ ec_piggyback_example.exe
+
+ex: lib $(ex)
+
+$(ex): $(@B).obj
+
+.obj.exe:
+ link /out:$@ $(LINKFLAGS) isa-l.lib $?
+
+# Check tests
+checks = \
+ gf_vect_mul_test.exe \
+ erasure_code_test.exe \
+ gf_inverse_test.exe \
+ erasure_code_update_test.exe \
+ xor_gen_test.exe \
+ pq_gen_test.exe \
+ xor_check_test.exe \
+ pq_check_test.exe \
+ crc16_t10dif_test.exe \
+ crc16_t10dif_copy_test.exe \
+ crc64_funcs_test.exe \
+ crc32_funcs_test.exe \
+ igzip_rand_test.exe \
+ igzip_wrapper_hdr_test.exe \
+ checksum32_funcs_test.exe \
+ mem_zero_detect_test.exe
+
+checks: lib $(checks)
+$(checks): $(@B).obj
+check: $(checks)
+ !$?
+
+# Unit tests
+tests = \
+ gf_vect_mul_base_test.exe \
+ gf_vect_dot_prod_base_test.exe \
+ gf_vect_dot_prod_test.exe \
+ gf_vect_mad_test.exe \
+ erasure_code_base_test.exe
+
+tests: lib $(tests)
+$(tests): $(@B).obj
+
+# Performance tests
+perfs = \
+ gf_vect_mul_perf.exe \
+ gf_vect_dot_prod_perf.exe \
+ gf_vect_dot_prod_1tbl.exe \
+ erasure_code_perf.exe \
+ erasure_code_base_perf.exe \
+ erasure_code_update_perf.exe \
+ xor_gen_perf.exe \
+ pq_gen_perf.exe \
+ crc16_t10dif_perf.exe \
+ crc16_t10dif_copy_perf.exe \
+ crc16_t10dif_op_perf.exe \
+ crc32_ieee_perf.exe \
+ crc32_iscsi_perf.exe \
+ crc64_funcs_perf.exe \
+ crc32_gzip_refl_perf.exe \
+ adler32_perf.exe \
+ mem_zero_detect_perf.exe
+
+perfs: lib $(perfs)
+$(perfs): $(@B).obj
+
+progs = \
+ igzip.exe
+
+progs: lib $(progs)
+igzip.exe: programs\igzip_cli.obj
+ link /out:$@ $(LINKFLAGS) isa-l.lib $?
+
+clean:
+ -if exist *.obj del *.obj
+ -if exist bin\*.obj del bin\*.obj
+ -if exist isa-l_static.lib del isa-l_static.lib
+ -if exist *.exe del *.exe
+ -if exist *.pdb del *.pdb
+ -if exist isa-l.lib del isa-l.lib
+ -if exist isa-l.dll del isa-l.dll
+ -if exist isa-l.exp del isa-l.exp
+
+zlib.lib:
+igzip_perf.exe: zlib.lib
+igzip_inflate_test.exe: zlib.lib
diff --git a/src/isa-l/Makefile.unx b/src/isa-l/Makefile.unx
new file mode 100644
index 000000000..ada6eb173
--- /dev/null
+++ b/src/isa-l/Makefile.unx
@@ -0,0 +1,56 @@
+########################################################################
+# Copyright(c) 2011-2015 Intel Corporation All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Intel Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+########################################################################
+
+units = erasure_code raid crc igzip programs mem
+
+default: lib
+
+ifeq (,$(findstring crc,$(units)))
+ ifneq (,$(findstring igzip,$(units)))
+ override units += crc
+ endif
+endif
+
+include $(foreach unit,$(units), $(unit)/Makefile.am)
+
+ifneq (,$(findstring igzip,$(units)))
+ include tests/fuzz/Makefile.am
+endif
+
+ifneq (,$(findstring erasure_code,$(units)))
+ include examples/ec/Makefile.am
+endif
+
+# Override individual lib names to make one inclusive library.
+lib_name := bin/isa-l.a
+
+include make.inc
+include tools/gen_nmake.mk
+
+VPATH = . $(units) include tests/fuzz examples/ec
diff --git a/src/isa-l/README.md b/src/isa-l/README.md
new file mode 100644
index 000000000..091fee2bd
--- /dev/null
+++ b/src/isa-l/README.md
@@ -0,0 +1,74 @@
+Intel(R) Intelligent Storage Acceleration Library
+=================================================
+
+[![Build Status](https://travis-ci.org/intel/isa-l.svg?branch=master)](https://travis-ci.org/intel/isa-l)
+[![Package on conda-forge](https://img.shields.io/conda/v/conda-forge/isa-l.svg)](https://anaconda.org/conda-forge/isa-l)
+
+ISA-L is a collection of optimized low-level functions targeting storage
+applications. ISA-L includes:
+* Erasure codes - Fast block Reed-Solomon type erasure codes for any
+ encode/decode matrix in GF(2^8).
+* CRC - Fast implementations of cyclic redundancy check. Six different
+ polynomials supported.
+ - iscsi32, ieee32, t10dif, ecma64, iso64, jones64.
+* Raid - calculate and operate on XOR and P+Q parity found in common RAID
+ implementations.
+* Compression - Fast deflate-compatible data compression.
+* De-compression - Fast inflate-compatible data compression.
+
+Also see:
+* [ISA-L for updates](https://github.com/intel/isa-l).
+* For crypto functions see [isa-l_crypto on github](https://github.com/intel/isa-l_crypto).
+* The [github wiki](https://github.com/intel/isa-l/wiki) including a list of
+ [distros/ports](https://github.com/intel/isa-l/wiki/Ports--Repos) offering binary packages.
+* ISA-L [mailing list](https://lists.01.org/hyperkitty/list/isal@lists.01.org/).
+* [Contributing](CONTRIBUTING.md).
+
+Building ISA-L
+--------------
+
+### Prerequisites
+
+* Make: GNU 'make' or 'nmake' (Windows).
+* Optional: Building with autotools requires autoconf/automake packages.
+
+x86_64:
+* Assembler: nasm v2.11.01 or later (nasm v2.13 or better suggested for building in AVX512 support)
+ or yasm version 1.2.0 or later.
+* Compiler: gcc, clang, icc or VC compiler.
+
+aarch64:
+* Assembler: gas v2.24 or later.
+* Compiler: gcc v4.7 or later.
+
+other:
+* Compiler: Portable base functions are available that build with most C compilers.
+
+### Autotools
+To build and install the library with autotools it is usually sufficient to run:
+
+ ./autogen.sh
+ ./configure
+ make
+ sudo make install
+
+### Makefile
+To use a standard makefile run:
+
+ make -f Makefile.unx
+
+### Windows
+On Windows use nmake to build dll and static lib:
+
+ nmake -f Makefile.nmake
+
+or see [details on setting up environment here](doc/build.md).
+
+### Other make targets
+Other targets include:
+* `make check` : create and run tests
+* `make tests` : create additional unit tests
+* `make perfs` : create included performance tests
+* `make ex` : build examples
+* `make other` : build other utilities such as compression file tests
+* `make doc` : build API manual
diff --git a/src/isa-l/Release_notes.txt b/src/isa-l/Release_notes.txt
new file mode 100644
index 000000000..e0211f72f
--- /dev/null
+++ b/src/isa-l/Release_notes.txt
@@ -0,0 +1,308 @@
+v2.30 Intel Intelligent Storage Acceleration Library Release Notes
+==================================================================
+
+RELEASE NOTE CONTENTS
+1. KNOWN ISSUES
+2. FIXED ISSUES
+3. CHANGE LOG & FEATURES ADDED
+
+1. KNOWN ISSUES
+----------------
+
+* Perf tests do not run in Windows environment.
+
+* 32-bit lib is not supported in Windows.
+
+2. FIXED ISSUES
+---------------
+v2.30
+
+* Intel CET support.
+* Windows nasm support fix.
+
+v2.28
+
+* Fix documentation on gf_vect_mad(). Min length listed as 32 instead of
+ required min 64 bytes.
+
+v2.27
+
+* Fix lack of install for pkg-config files
+
+v2.26
+
+* Fixes for sanitizer warnings.
+
+v2.25
+
+* Fix for nasm on Mac OS X/darwin.
+
+v2.24
+
+* Fix for crc32_iscsi(). Potential read-over for small buffer. For an input
+ buffer length of less than 8 bytes and aligned to an 8 byte boundary, function
+ could read past length. Previously had the possibility to cause a seg fault
+ only for length 0 and invalid buffer passed. Calculated CRC is unchanged.
+
+* Fix for compression/decompression of > 4GB files. For streaming compression
+ of extremely large files, the total_out parameter would wrap and could
+ potentially flag an otherwise valid lookback distance as being invalid.
+ Total_out is still 32bit for zlib compatibility. No inconsistent compressed
+ buffers were generated by the issue.
+
+v2.23
+
+* Fix for histogram generation base function.
+* Fix library build warnings on macOS.
+* Fix igzip to use bsf instruction when tzcnt is not available.
+
+v2.22
+
+* Fix ISA-L builds for other architectures. Base function and examples
+ sanitized for non-IA builds.
+
+* Fix fuzz test script to work with llvm 6.0 builtin libFuzz.
+
+v2.20
+
+* Inflate total_out behavior corrected for in-progress decompression.
+ Previously total_out represented the total bytes decompressed into the output
+ buffer or temp internal buffer. This is changed to be only the bytes put into
+ the output buffer.
+
+* Fixed issue with isal_create_hufftables_subset. Affects semi-dynamic
+ compression use case when explicitly creating hufftables from histogram. The
+ _hufftables_subset function could fail to generate length symbols for any
+ length that were never seen.
+
+v2.19
+
+* Fix erasure code test that violates rs matrix bounds.
+
+* Fix 0 length file and looping errors in igzip_inflate_test.
+
+v2.18
+
+* Mac OS X/darwin systems no longer require the --target=darwin config option.
+ The autoconf canonical build should detect.
+
+v2.17
+
+* Fix igzip using 32K window and a shared object
+
+* Fix igzip undefined instruction error on Nehalem.
+
+* Fixed issue in crc performance tests where OS optimizations turned cold cache
+ tests into warm tests.
+
+v2.15
+
+* Fix for windows register save in gf_6vect_mad_avx2.asm. Only affects windows
+ versions of ec_encode_data_update() running with AVX2. A GP register was not
+ properly restored resulting in corruption on return.
+
+v2.14
+
+* Building in unit directories is no longer supported removing the issue of
+ leftover object files causing the top-level make build to fail.
+
+v2.10
+
+* Fix for windows register save overlap in gf_{3-6}vect_dot_prod_sse.asm. Only
+ affects windows versions of erasure code. GP register saves/restore were
+ pushed to same stack area as XMM.
+
+3. CHANGE LOG & FEATURES ADDED
+------------------------------
+v2.30
+
+* Igzip compression enhancements.
+ - New functions for dictionary acceleration. Split dictionary processing and
+ resetting can greatly accelerate the performance of compressing many small
+ files with a dictionary.
+ - New static level 0 header decode tables. Accelerates decompressing small
+ files that are level 0 compressed by skipping the known header parsing.
+ - New feature for igzip cli tool: support for concatenated .gz files. On
+ decompression, igzip will process a series of independent, concatenated .gz
+ files into one output stream.
+
+* CRC Improvements
+ - New vclmul version of crc32_iscsi().
+ - Updates for aarch64.
+
+v2.29
+
+* CRC Improvements
+ - New AVX512 vclmul versions of crc16_t10dif(), crc32_ieee(), crc32_gzip_refl.
+
+* Erasure code improvements
+ - Added AVX512 ec functions with 5 and 6 outputs. Can improve performance for
+ codes with 5 or more parity by running in batches of up to 6 at a time.
+
+v2.28
+
+* New next-arch versions of 64-bit CRC. All norm and reflected 64-bit
+ polynomials are expanded to utilize vpclmulqdq.
+
+v2.27
+
+* New multi-threaded compression option for igzip cli tool
+
+v2.26
+
+* Adler32 added to external API.
+* Multi-arch improvements.
+* Performance test improvements.
+
+v2.25
+
+* Igzip performance improvements and features.
+ - Performance improvements for uncompressable files. Random or uncompressable
+ files can be up to 3x faster in level 1 or 2 compression.
+ - Additional small file performance improvments.
+ - New options in igzip cli: use name from header or not, test compressed file.
+
+* Multi-arch autoconf script.
+ - Autoconf should detect architecture and run base functions at minimum.
+
+v2.24
+
+* Igzip small file performance improvements and new features.
+ - Better performance on small files.
+ - New gzip/zlib header and trailer handling.
+ - New gzip/zlib header parsing helper functions.
+ - New user-space compression/decompression tool igzip.
+
+* New mem unit added with first function isal_zero_detect().
+
+v2.23
+
+* Igzip inflate (decompression) performance improvements.
+ - Implemented multi-byte decode for inflate. Decode can pack up to three
+ symbols into the decode table making some compressed streams decompress much
+ faster depending on the prevalence of short codes.
+
+v2.22
+
+* Igzip: AVX2 version of level 3 compression added.
+
+* Erasure code examples
+ - New examples for standard EC encode and decode.
+ - Example of piggyback EC encode and decode.
+
+v2.21
+
+* Igzip improvements
+ - New compression levels added. ISA-L fast deflate now has more levels to
+ balance speed vs. target compression level. Level 0, 1 are as in previous
+ generations. New levels 2 & 3 target higher compression roughly comparable
+ to zlib levels 2-3. Level 3 is currently only optimized for processors with
+ AVX512 instructions.
+
+* New T10dif & copy function - crc16_t10dif_copy()
+ - CRC and copy was added to emulate T10dif operations such as DIF insert and
+ strip. This function stitches together CRC and memcpy operations
+ eliminating an extra data read.
+
+* CRC32 iscsi performance improvements
+ - Fixes issue under some distributions where warm cache performance was
+ reduced.
+
+v2.20
+
+* Igzip improvements
+ - Optimized deflate_hash in compression functions.
+ Improves performance of using preset dictionary.
+ - Removed alignment restrictions on input structure.
+
+v2.19
+
+* Igzip improvements
+
+ - Add optimized Adler-32 checksum.
+
+ - Implement zlib compression format.
+
+ - Add stateful dictionary support.
+
+ - Add struct reset functions for both deflate and inflate.
+
+* Reflected IEEE format CRC32 is released out. Function interface is named
+ crc32_gzip_refl.
+
+* Exact work condition of Erasure Code Reed-Solomon Matrix is determined by new
+ added program gen_rs_matrix_limits.
+
+v2.18
+
+* New 2-pass fully-dynamic deflate compression (level -1). ISA-L fast deflate
+ now has two levels. Level 0 (default) is the same as previous generations.
+ Setting to level 1 will switch to the fully-dynamic compression that will
+ typically reach higher compression ratios.
+
+* RAID AVX512 functions.
+
+v2.17
+
+* New fast decompression (inflate)
+
+* Compression improvements (deflate)
+ - Speed and compression ratio improvements.
+ - Fast custom Huffman code generation.
+ - New features:
+ * Run-time option of gzip crc calculation and headers/trailer.
+ * Choice of static header (BTYPE 01) blocks.
+ * LARGE_WINDOW, 32K history, now default.
+ * Stateless full flush mode.
+
+* CRC64
+ - Six new 64-bit polynomials supported. Normal and reflected versions of ECMA,
+ ISO and Jones polynomials.
+
+v2.16
+
+* Units added: crc, raid, igzip (deflate compression).
+
+v2.15
+
+* Erasure code updates. New AVX512 versions.
+
+* Nasm support. ISA-L ported to build with nasm or yasm assembler.
+
+* Windows DLL support. Windows builds DLL by default.
+
+v2.14
+
+* Autoconf and autotools build allows easier porting to additional systems.
+ Previous make system still available to embedded users with Makefile.unx.
+
+* Includes update for building on Mac OS X/darwin systems. Add --target=darwin
+ to ./configure step.
+
+v2.13
+
+* Erasure code improvments
+ - 32-bit port of optimized gf_vect_dot_prod() functions. This makes
+ ec_encode_data() functions much faster on 32-bit processors.
+ - Avoton performance improvements. Performance on Avoton for
+ gf_vect_dot_prod() and ec_encode_data() can improve by as much as 20%.
+
+v2.11
+
+* Incremental erasure code. New functions added to erasure code to handle
+ single source update of code blocks. The function ec_encode_data_update()
+ works with parameters similar to ec_encode_data() but are called incrementally
+ with each source block. These versions are useful when source blocks are not
+ all available at once.
+
+v2.10
+
+* Erasure code updates
+ - New AVX and AVX2 support functions.
+ - Changes min len requirement on gf_vect_dot_prod() to 32 from 16.
+ - Tests include both source and parity recovery with ec_encode_data().
+ - New encoding examples with Vandermonde or Cauchy matrix.
+
+v2.8
+
+* First open release of erasure code unit that is part of ISA-L.
diff --git a/src/isa-l/autogen.sh b/src/isa-l/autogen.sh
new file mode 100755
index 000000000..0a3189383
--- /dev/null
+++ b/src/isa-l/autogen.sh
@@ -0,0 +1,17 @@
+#!/bin/sh -e
+
+autoreconf --install --symlink -f
+
+libdir() {
+ echo $(cd $1/$(gcc -print-multi-os-directory); pwd)
+}
+
+args="--prefix=/usr --libdir=$(libdir /usr/lib)"
+
+echo
+echo "----------------------------------------------------------------"
+echo "Initialized build system. For a common configuration please run:"
+echo "----------------------------------------------------------------"
+echo
+echo "./configure $args"
+echo
diff --git a/src/isa-l/configure.ac b/src/isa-l/configure.ac
new file mode 100644
index 000000000..8ca75ca36
--- /dev/null
+++ b/src/isa-l/configure.ac
@@ -0,0 +1,313 @@
+# -*- Autoconf -*-
+# Process this file with autoconf to produce a configure script.
+
+AC_PREREQ(2.69)
+AC_INIT([libisal],
+ [2.30.0],
+ [sg.support.isal@intel.com],
+ [isa-l],
+ [http://01.org/storage-acceleration-library])
+AC_CONFIG_SRCDIR([])
+AC_CONFIG_AUX_DIR([build-aux])
+AM_INIT_AUTOMAKE([
+ foreign
+ 1.11
+ -Wall
+ -Wno-portability
+ silent-rules
+ tar-pax
+ no-dist-gzip
+ dist-xz
+ subdir-objects
+])
+AM_PROG_AS
+
+AC_CANONICAL_HOST
+CPU=""
+AS_CASE([$host_cpu],
+ [x86_64], [CPU="x86_64"],
+ [amd64], [CPU="x86_64"],
+ [i?86], [CPU="x86_32"],
+ [aarch64], [CPU="aarch64"],
+ [arm64], [CPU="aarch64"],
+ [powerpc64le], [CPU="ppc64le"],
+ [ppc64le], [CPU="ppc64le"],
+)
+AM_CONDITIONAL([CPU_X86_64], [test "$CPU" = "x86_64"])
+AM_CONDITIONAL([CPU_X86_32], [test "$CPU" = "x86_32"])
+AM_CONDITIONAL([CPU_AARCH64], [test "$CPU" = "aarch64"])
+AM_CONDITIONAL([CPU_PPC64LE], [test "$CPU" = "ppc64le"])
+AM_CONDITIONAL([CPU_UNDEFINED], [test "x$CPU" = "x"])
+
+if test "$CPU" = "x86_64"; then
+ is_x86=yes
+else
+ if test "$CPU" = "x86_32"; then
+ is_x86=yes
+ else
+ is_x86=no
+ fi
+fi
+
+# Check for programs
+AC_PROG_CC_STDC
+AC_PROG_LD
+AC_USE_SYSTEM_EXTENSIONS
+AM_SILENT_RULES([yes])
+LT_INIT
+AC_PREFIX_DEFAULT([/usr])
+AC_PROG_SED
+AC_PROG_MKDIR_P
+
+# Options
+AC_ARG_ENABLE([debug],
+ AS_HELP_STRING([--enable-debug], [enable debug messages @<:@default=disabled@:>@]),
+ [], [enable_debug=no])
+AS_IF([test "x$enable_debug" = "xyes"], [
+ AC_DEFINE(ENABLE_DEBUG, [1], [Debug messages.])
+])
+
+# If this build is for x86, look for yasm and nasm
+if test x"$is_x86" = x"yes"; then
+ AC_MSG_CHECKING([whether Intel CET is enabled])
+ AC_TRY_COMPILE([],[
+#ifndef __CET__
+# error CET is not enabled
+#endif],
+ [AC_MSG_RESULT([yes])
+ intel_cet_enabled=yes],
+ [AC_MSG_RESULT([no])
+ intel_cet_enabled=no])
+
+
+ # Pick an assembler yasm or nasm
+ if test x"$AS" = x""; then
+ # Check for yasm and yasm features
+ yasm_feature_level=0
+ AC_CHECK_PROG(HAVE_YASM, yasm, yes, no)
+ if test "$HAVE_YASM" = "yes"; then
+ yasm_feature_level=1
+ else
+ AC_MSG_RESULT([no yasm])
+ fi
+ if test x"$yasm_feature_level" = x"1"; then
+ AC_MSG_CHECKING([for modern yasm])
+ AC_LANG_CONFTEST([AC_LANG_SOURCE([[vmovdqa %xmm0, %xmm1;]])])
+ if yasm -f elf64 -p gas conftest.c ; then
+ AC_MSG_RESULT([yes])
+ yasm_feature_level=4
+ else
+ AC_MSG_RESULT([no])
+ fi
+ fi
+ if test x"$yasm_feature_level" = x"4"; then
+ AC_MSG_CHECKING([for optional yasm AVX512 support])
+ AC_LANG_CONFTEST([AC_LANG_SOURCE([[vpshufb %zmm0, %zmm1, %zmm2;]])])
+ if yasm -f elf64 -p gas conftest.c 2> /dev/null; then
+ AC_MSG_RESULT([yes])
+ yasm_feature_level=6
+ else
+ AC_MSG_RESULT([no])
+ fi
+ fi
+ if test x"$yasm_feature_level" = x"6"; then
+ AC_MSG_CHECKING([for additional yasm AVX512 support])
+ AC_LANG_CONFTEST([AC_LANG_SOURCE([[vpcompressb zmm0, k1, zmm1;]])])
+ sed -i -e '/vpcompressb/!d' conftest.c
+ if yasm -f elf64 conftest.c 2> /dev/null; then
+ AC_MSG_RESULT([yes])
+ yasm_feature_level=10
+ else
+ AC_MSG_RESULT([no])
+ fi
+ fi
+
+ # Check for nasm and nasm features
+ nasm_feature_level=0
+ AC_CHECK_PROG(HAVE_NASM, nasm, yes, no)
+ if test "$HAVE_NASM" = "yes"; then
+ nasm_feature_level=1
+ else
+ AC_MSG_RESULT([no nasm])
+ fi
+
+ if test x"$nasm_feature_level" = x"1"; then
+ AC_MSG_CHECKING([for modern nasm])
+ AC_LANG_CONFTEST([AC_LANG_SOURCE([[pblendvb xmm2, xmm1;]])])
+ sed -i -e '/pblendvb/!d' conftest.c
+ if nasm -f elf64 conftest.c 2> /dev/null; then
+ AC_MSG_RESULT([yes])
+ nasm_feature_level=4
+ else
+ AC_MSG_RESULT([no])
+ fi
+ fi
+ if test x"$nasm_feature_level" = x"4"; then
+ AC_MSG_CHECKING([for optional nasm AVX512 support])
+ AC_LANG_CONFTEST([AC_LANG_SOURCE([[vinserti32x8 zmm0, ymm1, 1;]])])
+ sed -i -e '/vinsert/!d' conftest.c
+ if nasm -f elf64 conftest.c 2> /dev/null; then
+ AC_MSG_RESULT([yes])
+ nasm_feature_level=6
+ else
+ AC_MSG_RESULT([no])
+ fi
+ fi
+ if test x"$nasm_feature_level" = x"6"; then
+ AC_MSG_CHECKING([for additional nasm AVX512 support])
+ AC_LANG_CONFTEST([AC_LANG_SOURCE([[vpcompressb zmm0 {k1}, zmm1;]])])
+ sed -i -e '/vpcompressb/!d' conftest.c
+ if nasm -f elf64 conftest.c 2> /dev/null; then
+ AC_MSG_RESULT([yes])
+ nasm_feature_level=10
+ else
+ AC_MSG_RESULT([no])
+ fi
+ fi
+
+ if test $nasm_feature_level -ge $yasm_feature_level ; then
+ AS=nasm
+ as_feature_level=$nasm_feature_level
+ else
+ AS=yasm
+ as_feature_level=$yasm_feature_level
+ fi
+
+ else
+ # Check for $AS supported features
+ as_feature_level=0
+ AC_CHECK_PROG(HAVE_AS, $AS, yes, no)
+ if test "$HAVE_AS" = "yes"; then
+ as_feature_level=1
+ else
+ AC_MSG_ERROR([no $AS])
+ fi
+
+ if test x"$as_feature_level" = x"1"; then
+ AC_MSG_CHECKING([for modern $AS])
+ AC_LANG_CONFTEST([AC_LANG_SOURCE([[pblendvb xmm2, xmm1;]])])
+ sed -i -e '/pblendvb/!d' conftest.c
+ if $AS -f elf64 conftest.c 2> /dev/null; then
+ AC_MSG_RESULT([yes])
+ as_feature_level=4
+ else
+ AC_MSG_RESULT([no])
+ fi
+ fi
+ if test x"$as_feature_level" = x"4"; then
+ AC_MSG_CHECKING([for optional as AVX512 support])
+ AC_LANG_CONFTEST([AC_LANG_SOURCE([[vinserti32x8 zmm0, ymm1, 1;]])])
+ sed -i -e '/vinsert/!d' conftest.c
+ if $AS -f elf64 conftest.c 2> /dev/null; then
+ AC_MSG_RESULT([yes])
+ as_feature_level=6
+ else
+ AC_MSG_RESULT([no])
+ fi
+ fi
+ if test x"$as_feature_level" = x"6"; then
+ AC_MSG_CHECKING([for additional as AVX512 support])
+ AC_LANG_CONFTEST([AC_LANG_SOURCE([[vpcompressb zmm0, k1, zmm1;]])])
+ sed -i -e '/vpcompressb/!d' conftest.c
+ if $AS -f elf64 conftest.c 2> /dev/null; then
+ AC_MSG_RESULT([yes])
+ as_feature_level=10
+ else
+ AC_MSG_RESULT([no])
+ fi
+ fi
+ fi
+
+ if test $as_feature_level -lt 2 ; then
+ AC_MSG_ERROR([No modern nasm or yasm found as required. Nasm should be v2.11.01 or later (v2.13 for AVX512) and yasm should be 1.2.0 or later.])
+ fi
+
+ case $host_os in
+ *linux*) arch=linux yasm_args="-f elf64";;
+ *darwin*) arch=darwin yasm_args="-f macho64 --prefix=_ ";;
+ *netbsd*) arch=netbsd yasm_args="-f elf64";;
+ *mingw*) arch=mingw yasm_args="-f win64";;
+ *) arch=unknown yasm_args="-f elf64";;
+ esac
+
+ # Fix for nasm missing windows features
+ if test x"$arch" = x"mingw"; then
+ AS=yasm
+ as_feature_level=$yasm_feature_level
+ if test $as_feature_level -lt 2 ; then
+ AC_MSG_ERROR([Mingw build requires Yasm 1.2.0 or later.])
+ fi
+ fi
+
+ AC_DEFINE_UNQUOTED(AS_FEATURE_LEVEL, [$as_feature_level], [Assembler feature level.])
+ if test $as_feature_level -ge 6 ; then
+ AC_DEFINE(HAVE_AS_KNOWS_AVX512, [1], [Assembler can do AVX512.])
+ have_as_knows_avx512=yes
+ else
+ AC_MSG_RESULT([Assembler does not understand AVX512 opcodes. Consider upgrading for best performance.])
+ fi
+
+ AM_CONDITIONAL(USE_YASM, test x"$AS" = x"yasm")
+ AM_CONDITIONAL(USE_NASM, test x"$AS" = x"nasm")
+ AM_CONDITIONAL(WITH_AVX512, test x"$have_as_knows_avx512" = x"yes")
+ AC_SUBST([yasm_args])
+ AM_CONDITIONAL(DARWIN, test x"$arch" = x"darwin")
+ AC_MSG_RESULT([Using $AS args target "$arch" "$yasm_args"])
+else
+ # Disable below conditionals if not x86
+ AM_CONDITIONAL(USE_YASM, test "x" = "y")
+ AM_CONDITIONAL(USE_NASM, test "x" = "y")
+ AM_CONDITIONAL(WITH_AVX512, test "x" = "y")
+ AM_CONDITIONAL(DARWIN, test "x" = "y")
+fi
+
+AM_CONDITIONAL(INTEL_CET_ENABLED, [test x"$intel_cet_enabled" = x"yes"])
+
+# Check for header files
+AC_CHECK_HEADERS([limits.h stdint.h stdlib.h string.h])
+
+# Checks for typedefs, structures, and compiler characteristics.
+AC_C_INLINE
+AC_TYPE_SIZE_T
+AC_TYPE_UINT16_T
+AC_TYPE_UINT32_T
+AC_TYPE_UINT64_T
+AC_TYPE_UINT8_T
+
+# Checks for library functions.
+AC_FUNC_MALLOC # Used only in tests
+AC_CHECK_FUNCS([memmove memset getopt])
+
+my_CFLAGS="\
+-Wall \
+-Wchar-subscripts \
+-Wformat-security \
+-Wnested-externs \
+-Wpointer-arith \
+-Wshadow \
+-Wstrict-prototypes \
+-Wtype-limits \
+"
+AC_SUBST([my_CFLAGS])
+
+AC_CONFIG_FILES([\
+ Makefile\
+ libisal.pc
+])
+
+AC_OUTPUT
+AC_MSG_RESULT([
+ $PACKAGE $VERSION
+ =====
+
+ prefix: ${prefix}
+ sysconfdir: ${sysconfdir}
+ libdir: ${libdir}
+ includedir: ${includedir}
+
+ compiler: ${CC}
+ cflags: ${CFLAGS}
+ ldflags: ${LDFLAGS}
+
+ debug: ${enable_debug}
+])
diff --git a/src/isa-l/crc/Makefile.am b/src/isa-l/crc/Makefile.am
new file mode 100644
index 000000000..f12441c8d
--- /dev/null
+++ b/src/isa-l/crc/Makefile.am
@@ -0,0 +1,89 @@
+########################################################################
+# Copyright(c) 2011-2017 Intel Corporation All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Intel Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+########################################################################
+
+include crc/aarch64/Makefile.am
+
+lsrc += \
+ crc/crc_base.c \
+ crc/crc64_base.c
+
+lsrc_base_aliases += crc/crc_base_aliases.c
+lsrc_x86_32 += crc/crc_base_aliases.c
+lsrc_ppc64le += crc/crc_base_aliases.c
+
+lsrc_x86_64 += \
+ crc/crc16_t10dif_01.asm \
+ crc/crc16_t10dif_by4.asm \
+ crc/crc16_t10dif_02.asm \
+ crc/crc16_t10dif_by16_10.asm \
+ crc/crc16_t10dif_copy_by4.asm \
+ crc/crc16_t10dif_copy_by4_02.asm \
+ crc/crc32_ieee_01.asm \
+ crc/crc32_ieee_02.asm \
+ crc/crc32_ieee_by4.asm \
+ crc/crc32_ieee_by16_10.asm \
+ crc/crc32_iscsi_01.asm \
+ crc/crc32_iscsi_00.asm \
+ crc/crc32_iscsi_by16_10.asm \
+ crc/crc_multibinary.asm \
+ crc/crc64_multibinary.asm \
+ crc/crc64_ecma_refl_by8.asm \
+ crc/crc64_ecma_refl_by16_10.asm \
+ crc/crc64_ecma_norm_by8.asm \
+ crc/crc64_ecma_norm_by16_10.asm \
+ crc/crc64_iso_refl_by8.asm \
+ crc/crc64_iso_refl_by16_10.asm \
+ crc/crc64_iso_norm_by8.asm \
+ crc/crc64_iso_norm_by16_10.asm \
+ crc/crc64_jones_refl_by8.asm \
+ crc/crc64_jones_refl_by16_10.asm \
+ crc/crc64_jones_norm_by8.asm \
+ crc/crc64_jones_norm_by16_10.asm \
+ crc/crc32_gzip_refl_by8.asm \
+ crc/crc32_gzip_refl_by8_02.asm \
+ crc/crc32_gzip_refl_by16_10.asm
+
+src_include += -I $(srcdir)/crc
+extern_hdrs += include/crc.h include/crc64.h
+
+other_src += include/reg_sizes.asm include/types.h include/test.h \
+ crc/crc_ref.h crc/crc64_ref.h
+
+check_tests += crc/crc16_t10dif_test \
+ crc/crc16_t10dif_copy_test \
+ crc/crc64_funcs_test \
+ crc/crc32_funcs_test
+
+perf_tests += crc/crc16_t10dif_perf crc/crc16_t10dif_copy_perf \
+ crc/crc16_t10dif_op_perf \
+ crc/crc32_ieee_perf crc/crc32_iscsi_perf \
+ crc/crc64_funcs_perf crc/crc32_gzip_refl_perf
+
+examples += crc/crc_simple_test crc/crc64_example
+
diff --git a/src/isa-l/crc/aarch64/Makefile.am b/src/isa-l/crc/aarch64/Makefile.am
new file mode 100644
index 000000000..5113b77e9
--- /dev/null
+++ b/src/isa-l/crc/aarch64/Makefile.am
@@ -0,0 +1,57 @@
+########################################################################
+# Copyright(c) 2020 Arm Corporation All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Arm Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#########################################################################
+
+lsrc_aarch64 += \
+ crc/aarch64/crc_multibinary_arm.S \
+ crc/aarch64/crc_aarch64_dispatcher.c
+
+lsrc_aarch64 += \
+ crc/aarch64/crc16_t10dif_pmull.S \
+ crc/aarch64/crc16_t10dif_copy_pmull.S \
+ crc/aarch64/crc32_ieee_norm_pmull.S \
+ crc/aarch64/crc64_ecma_refl_pmull.S \
+ crc/aarch64/crc64_ecma_norm_pmull.S \
+ crc/aarch64/crc64_iso_refl_pmull.S \
+ crc/aarch64/crc64_iso_norm_pmull.S \
+ crc/aarch64/crc64_jones_refl_pmull.S \
+ crc/aarch64/crc64_jones_norm_pmull.S
+
+#CRC32/CRC32C for micro-architecture
+lsrc_aarch64 += \
+ crc/aarch64/crc32_iscsi_refl_pmull.S \
+ crc/aarch64/crc32_gzip_refl_pmull.S \
+ crc/aarch64/crc32_iscsi_3crc_fold.S \
+ crc/aarch64/crc32_gzip_refl_3crc_fold.S \
+ crc/aarch64/crc32_iscsi_crc_ext.S \
+ crc/aarch64/crc32_gzip_refl_crc_ext.S \
+ crc/aarch64/crc32_mix_default.S \
+ crc/aarch64/crc32c_mix_default.S \
+ crc/aarch64/crc32_mix_neoverse_n1.S \
+ crc/aarch64/crc32c_mix_neoverse_n1.S
+
diff --git a/src/isa-l/crc/aarch64/crc16_t10dif_copy_pmull.S b/src/isa-l/crc/aarch64/crc16_t10dif_copy_pmull.S
new file mode 100644
index 000000000..10bf157c2
--- /dev/null
+++ b/src/isa-l/crc/aarch64/crc16_t10dif_copy_pmull.S
@@ -0,0 +1,423 @@
+########################################################################
+# Copyright(c) 2019 Arm Corporation All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Arm Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#########################################################################
+
+ .arch armv8-a+crc+crypto
+ .text
+ .align 3
+ .global crc16_t10dif_copy_pmull
+ .type crc16_t10dif_copy_pmull, %function
+
+/* uint16_t crc16_t10dif_pmull(uint16_t seed, uint8_t *buf, uint64_t len) */
+
+/* arguments */
+w_seed .req w0
+x_dst .req x1
+x_src .req x2
+x_len .req x3
+w_len .req w3
+
+/* returns */
+w_ret .req w0
+
+/* these as global temporary registers */
+w_tmp .req w6
+x_tmp .req x6
+x_tmp1 .req x7
+x_tmp2 .req x11
+
+d_tmp1 .req d0
+d_tmp2 .req d1
+q_tmp1 .req q0
+q_tmp2 .req q1
+v_tmp1 .req v0
+v_tmp2 .req v1
+
+/* local variables */
+w_counter .req w4
+w_crc .req w0
+x_crc .req x0
+x_counter .req x4
+x_crc16tab .req x5
+x_src_saved .req x0
+x_dst_saved .req x12
+
+crc16_t10dif_copy_pmull:
+ cmp x_len, 1023
+ sub sp, sp, #16
+ uxth w_seed, w_seed
+ bhi .crc_fold
+
+ mov x_tmp, 0
+ mov w_counter, 0
+
+.crc_table_loop_pre:
+ cmp x_len, x_tmp
+ bls .end
+
+ sxtw x_counter, w_counter
+ adrp x_crc16tab, .LANCHOR0
+ sub x_src, x_src, x_counter
+ sub x_dst, x_dst, x_counter
+ add x_crc16tab, x_crc16tab, :lo12:.LANCHOR0
+
+ .align 2
+.crc_table_loop:
+ ldrb w_tmp, [x_src, x_counter]
+ strb w_tmp, [x_dst, x_counter]
+ add x_counter, x_counter, 1
+ cmp x_len, x_counter
+ eor w_tmp, w_tmp, w_crc, lsr 8
+ ldrh w_tmp, [x_crc16tab, w_tmp, sxtw 1]
+ eor w_crc, w_tmp, w_crc, lsl 8
+ uxth w_crc, w_crc
+ bhi .crc_table_loop
+
+.end:
+ add sp, sp, 16
+ ret
+
+/* carry less multiplication, part1 - before loop */
+q_x0 .req q2
+q_x1 .req q3
+q_x2 .req q4
+q_x3 .req q5
+
+v_x0 .req v2
+v_x1 .req v3
+v_x2 .req v4
+v_x3 .req v5
+
+d_x0 .req d2
+d_x1 .req d3
+d_x2 .req d4
+d_x3 .req d5
+
+// the following registers only used this part1
+d_tmp3 .req d16
+v_tmp3 .req v16
+
+ .align 3
+.crc_fold:
+ fmov d_tmp1, x_crc
+ fmov d_tmp2, xzr
+ dup d_tmp3, v_tmp2.d[0]
+ shl d_tmp1, d_tmp1, 48
+ ins v_tmp3.d[1], v_tmp1.d[0]
+
+ and x_counter, x_len, -64
+ sub x_counter, x_counter, #64
+ cmp x_counter, 63
+ add x_src_saved, x_src, 64
+ add x_dst_saved, x_dst, 64
+
+ ldr q_x0, [x_src]
+ ldr q_x1, [x_src, 16]
+ ldr q_x2, [x_src, 32]
+ ldr q_x3, [x_src, 48]
+
+ str q_x0, [x_dst]
+ str q_x1, [x_dst, 16]
+ str q_x2, [x_dst, 32]
+ str q_x3, [x_dst, 48]
+
+ adrp x_tmp, .shuffle_mask_lanchor
+ ldr q7, [x_tmp, :lo12:.shuffle_mask_lanchor]
+
+ tbl v_tmp1.16b, {v_x0.16b}, v7.16b
+ eor v_x0.16b, v_tmp3.16b, v_tmp1.16b
+
+ tbl v_x1.16b, {v_x1.16b}, v7.16b
+ tbl v_x2.16b, {v_x2.16b}, v7.16b
+ tbl v_x3.16b, {v_x3.16b}, v7.16b
+ bls .crc_fold_loop_end
+
+/* carry less multiplication, part2 - loop */
+q_y0 .req q28
+q_y1 .req q29
+q_y2 .req q30
+q_y3 .req q31
+
+v_y0 .req v28
+v_y1 .req v29
+v_y2 .req v30
+v_y3 .req v31
+
+d_x0_h .req d24
+d_x0_l .req d2
+d_x1_h .req d25
+d_x1_l .req d3
+d_x2_h .req d26
+d_x2_l .req d4
+d_x3_h .req d27
+d_x3_l .req d5
+
+v_x0_h .req v24
+v_x0_l .req v2
+v_x1_h .req v25
+v_x1_l .req v3
+v_x2_h .req v26
+v_x2_l .req v4
+v_x3_h .req v27
+v_x3_l .req v5
+
+v_tmp1_x0 .req v24
+v_tmp1_x1 .req v25
+v_tmp1_x2 .req v26
+v_tmp1_x3 .req v27
+
+d_p4_h .req d19
+v_p4_h .req v19
+d_p4_l .req d17
+v_p4_l .req v17
+
+ mov x_tmp, 0x371d0000 /* p4 [1] */
+ fmov d_p4_h, x_tmp
+ mov x_tmp, 0x87e70000 /* p4 [0] */
+ fmov d_p4_l, x_tmp
+
+ .align 2
+.crc_fold_loop:
+ add x_src_saved, x_src_saved, 64
+ add x_dst_saved, x_dst_saved, 64
+
+ sub x_counter, x_counter, #64
+ cmp x_counter, 63
+
+ dup d_x0_h, v_x0.d[1]
+ dup d_x1_h, v_x1.d[1]
+ dup d_x2_h, v_x2.d[1]
+ dup d_x3_h, v_x3.d[1]
+
+ dup d_x0_l, v_x0.d[0]
+ dup d_x1_l, v_x1.d[0]
+ dup d_x2_l, v_x2.d[0]
+ dup d_x3_l, v_x3.d[0]
+
+ ldr q_y0, [x_src_saved, -64]
+ ldr q_y1, [x_src_saved, -48]
+ ldr q_y2, [x_src_saved, -32]
+ ldr q_y3, [x_src_saved, -16]
+
+ str q_y0, [x_dst_saved, -64]
+ str q_y1, [x_dst_saved, -48]
+ str q_y2, [x_dst_saved, -32]
+ str q_y3, [x_dst_saved, -16]
+
+ pmull v_x0_h.1q, v_x0_h.1d, v_p4_h.1d
+ pmull v_x0_l.1q, v_x0_l.1d, v_p4_l.1d
+ pmull v_x1_h.1q, v_x1_h.1d, v_p4_h.1d
+ pmull v_x1_l.1q, v_x1_l.1d, v_p4_l.1d
+ pmull v_x2_h.1q, v_x2_h.1d, v_p4_h.1d
+ pmull v_x2_l.1q, v_x2_l.1d, v_p4_l.1d
+ pmull v_x3_h.1q, v_x3_h.1d, v_p4_h.1d
+ pmull v_x3_l.1q, v_x3_l.1d, v_p4_l.1d
+
+ tbl v_y0.16b, {v_y0.16b}, v7.16b
+ tbl v_y1.16b, {v_y1.16b}, v7.16b
+ tbl v_y2.16b, {v_y2.16b}, v7.16b
+ tbl v_y3.16b, {v_y3.16b}, v7.16b
+
+ eor v_tmp1_x0.16b, v_x0_h.16b, v_x0_l.16b
+ eor v_tmp1_x1.16b, v_x1_h.16b, v_x1_l.16b
+ eor v_tmp1_x2.16b, v_x2_h.16b, v_x2_l.16b
+ eor v_tmp1_x3.16b, v_x3_h.16b, v_x3_l.16b
+
+ eor v_x0.16b, v_tmp1_x0.16b, v_y0.16b
+ eor v_x1.16b, v_tmp1_x1.16b, v_y1.16b
+ eor v_x2.16b, v_tmp1_x2.16b, v_y2.16b
+ eor v_x3.16b, v_tmp1_x3.16b, v_y3.16b
+
+ bhi .crc_fold_loop
+
+/* carry less multiplication, part3 - after loop */
+/* folding 512bit ---> 128bit */
+
+// input parameters:
+// v_x0 => v2
+// v_x1 => v3
+// v_x2 => v4
+// v_x3 => v5
+
+// v0, v1, v6, v30, are tmp registers
+
+.crc_fold_loop_end:
+ mov x_tmp, 0x4c1a0000 /* p1 [1] */
+ fmov d0, x_tmp
+ mov x_tmp, 0xfb0b0000 /* p1 [0] */
+ fmov d1, x_tmp
+
+ and w_counter, w_len, -64
+ sxtw x_tmp, w_counter
+
+ add x_src, x_src, x_tmp
+ add x_dst, x_dst, x_tmp
+
+ dup d6, v_x0.d[1]
+ dup d30, v_x0.d[0]
+ pmull v6.1q, v6.1d, v0.1d
+ pmull v30.1q, v30.1d, v1.1d
+ eor v6.16b, v6.16b, v30.16b
+ eor v_x1.16b, v6.16b, v_x1.16b
+
+ dup d6, v_x1.d[1]
+ dup d30, v_x1.d[0]
+ pmull v6.1q, v6.1d, v0.1d
+ pmull v16.1q, v30.1d, v1.1d
+ eor v6.16b, v6.16b, v16.16b
+ eor v_x2.16b, v6.16b, v_x2.16b
+
+ dup d_x0, v_x2.d[1]
+ dup d30, v_x2.d[0]
+ pmull v0.1q, v_x0.1d, v0.1d
+ pmull v_x0.1q, v30.1d, v1.1d
+ eor v1.16b, v0.16b, v_x0.16b
+ eor v_x0.16b, v1.16b, v_x3.16b
+
+/* carry less multiplication, part3 - after loop */
+/* crc16 fold function */
+d_16fold_p0_h .req d18
+v_16fold_p0_h .req v18
+
+d_16fold_p0_l .req d4
+v_16fold_p0_l .req v4
+
+v_16fold_from .req v_x0
+d_16fold_from_h .req d3
+v_16fold_from_h .req v3
+
+v_16fold_zero .req v7
+
+v_16fold_from1 .req v16
+
+v_16fold_from2 .req v0
+d_16fold_from2_h .req d6
+v_16fold_from2_h .req v6
+
+v_16fold_tmp .req v0
+
+ movi v_16fold_zero.4s, 0
+ mov x_tmp1, 0x2d560000 /* p0 [1] */
+ mov x_tmp2, 0x13680000 /* p0 [0] */
+
+ ext v_16fold_tmp.16b, v_16fold_zero.16b, v_16fold_from.16b, #8
+ ext v_16fold_tmp.16b, v0.16b, v_16fold_zero.16b, #4
+
+ dup d_16fold_from_h, v_16fold_from.d[1]
+ fmov d_16fold_p0_h, x_tmp1
+ pmull v_16fold_from1.1q, v_16fold_from_h.1d, v_16fold_p0_h.1d
+ eor v_16fold_from2.16b, v_16fold_tmp.16b, v_16fold_from1.16b
+
+ dup d_16fold_from2_h, v_16fold_from2.d[1]
+ fmov d_16fold_p0_l, x_tmp2
+ pmull v6.1q, v_16fold_from2_h.1d, v_16fold_p0_l.1d
+ eor v_x0.16b, v0.16b, v6.16b
+
+/* carry less multiplication, part3 - after loop */
+/* crc16 barrett reduction function */
+
+// input parameters:
+// v_x0: v2
+// barrett reduction constant: br[0], br[1]
+
+d_br0 .req d3
+v_br0 .req v3
+d_br1 .req d5
+v_br1 .req v5
+
+ mov x_tmp1, 0x57f9 /* br[0] low */
+ movk x_tmp1, 0xf65a, lsl 16 /* br[0] high */
+ movk x_tmp1, 0x1, lsl 32
+ fmov d_br0, x_tmp1
+
+ dup d1, v_x0.d[0]
+ dup d1, v1.d[0]
+ ext v1.16b, v1.16b, v7.16b, #4
+ pmull v4.1q, v1.1d, v_br0.1d
+
+ ext v1.16b, v4.16b, v7.16b, #4
+ mov x_tmp1, 0x8bb70000 /* br[1] low */
+ movk x_tmp1, 0x1, lsl 32 /* br[1] high */
+
+ fmov d_br1, x_tmp1
+ pmull v_br1.1q, v1.1d, v_br1.1d
+ eor v_x0.16b, v_x0.16b, v_br1.16b
+
+ umov x0, v_x0.d[0]
+ ubfx x0, x0, 16, 16
+ b .crc_table_loop_pre
+
+ .size crc16_t10dif_copy_pmull, .-crc16_t10dif_copy_pmull
+
+ .section .rodata
+
+ .align 4
+.shuffle_mask_lanchor = . + 0
+ .type shuffle_mask, %object
+ .size shuffle_mask, 16
+shuffle_mask:
+ .byte 15, 14, 13, 12, 11, 10, 9, 8
+ .byte 7, 6, 5, 4, 3, 2, 1, 0
+
+ .align 4
+.LANCHOR0 = . + 0
+ .type crc16tab, %object
+ .size crc16tab, 512
+crc16tab:
+ .hword 0x0000, 0x8bb7, 0x9cd9, 0x176e, 0xb205, 0x39b2, 0x2edc, 0xa56b
+ .hword 0xEFBD, 0x640a, 0x7364, 0xf8d3, 0x5db8, 0xd60f, 0xc161, 0x4ad6
+ .hword 0x54CD, 0xdf7a, 0xc814, 0x43a3, 0xe6c8, 0x6d7f, 0x7a11, 0xf1a6
+ .hword 0xBB70, 0x30c7, 0x27a9, 0xac1e, 0x0975, 0x82c2, 0x95ac, 0x1e1b
+ .hword 0xA99A, 0x222d, 0x3543, 0xbef4, 0x1b9f, 0x9028, 0x8746, 0x0cf1
+ .hword 0x4627, 0xcd90, 0xdafe, 0x5149, 0xf422, 0x7f95, 0x68fb, 0xe34c
+ .hword 0xFD57, 0x76e0, 0x618e, 0xea39, 0x4f52, 0xc4e5, 0xd38b, 0x583c
+ .hword 0x12EA, 0x995d, 0x8e33, 0x0584, 0xa0ef, 0x2b58, 0x3c36, 0xb781
+ .hword 0xD883, 0x5334, 0x445a, 0xcfed, 0x6a86, 0xe131, 0xf65f, 0x7de8
+ .hword 0x373E, 0xbc89, 0xabe7, 0x2050, 0x853b, 0x0e8c, 0x19e2, 0x9255
+ .hword 0x8C4E, 0x07f9, 0x1097, 0x9b20, 0x3e4b, 0xb5fc, 0xa292, 0x2925
+ .hword 0x63F3, 0xe844, 0xff2a, 0x749d, 0xd1f6, 0x5a41, 0x4d2f, 0xc698
+ .hword 0x7119, 0xfaae, 0xedc0, 0x6677, 0xc31c, 0x48ab, 0x5fc5, 0xd472
+ .hword 0x9EA4, 0x1513, 0x027d, 0x89ca, 0x2ca1, 0xa716, 0xb078, 0x3bcf
+ .hword 0x25D4, 0xae63, 0xb90d, 0x32ba, 0x97d1, 0x1c66, 0x0b08, 0x80bf
+ .hword 0xCA69, 0x41de, 0x56b0, 0xdd07, 0x786c, 0xf3db, 0xe4b5, 0x6f02
+ .hword 0x3AB1, 0xb106, 0xa668, 0x2ddf, 0x88b4, 0x0303, 0x146d, 0x9fda
+ .hword 0xD50C, 0x5ebb, 0x49d5, 0xc262, 0x6709, 0xecbe, 0xfbd0, 0x7067
+ .hword 0x6E7C, 0xe5cb, 0xf2a5, 0x7912, 0xdc79, 0x57ce, 0x40a0, 0xcb17
+ .hword 0x81C1, 0x0a76, 0x1d18, 0x96af, 0x33c4, 0xb873, 0xaf1d, 0x24aa
+ .hword 0x932B, 0x189c, 0x0ff2, 0x8445, 0x212e, 0xaa99, 0xbdf7, 0x3640
+ .hword 0x7C96, 0xf721, 0xe04f, 0x6bf8, 0xce93, 0x4524, 0x524a, 0xd9fd
+ .hword 0xC7E6, 0x4c51, 0x5b3f, 0xd088, 0x75e3, 0xfe54, 0xe93a, 0x628d
+ .hword 0x285B, 0xa3ec, 0xb482, 0x3f35, 0x9a5e, 0x11e9, 0x0687, 0x8d30
+ .hword 0xE232, 0x6985, 0x7eeb, 0xf55c, 0x5037, 0xdb80, 0xccee, 0x4759
+ .hword 0x0D8F, 0x8638, 0x9156, 0x1ae1, 0xbf8a, 0x343d, 0x2353, 0xa8e4
+ .hword 0xB6FF, 0x3d48, 0x2a26, 0xa191, 0x04fa, 0x8f4d, 0x9823, 0x1394
+ .hword 0x5942, 0xd2f5, 0xc59b, 0x4e2c, 0xeb47, 0x60f0, 0x779e, 0xfc29
+ .hword 0x4BA8, 0xc01f, 0xd771, 0x5cc6, 0xf9ad, 0x721a, 0x6574, 0xeec3
+ .hword 0xA415, 0x2fa2, 0x38cc, 0xb37b, 0x1610, 0x9da7, 0x8ac9, 0x017e
+ .hword 0x1F65, 0x94d2, 0x83bc, 0x080b, 0xad60, 0x26d7, 0x31b9, 0xba0e
+ .hword 0xF0D8, 0x7b6f, 0x6c01, 0xe7b6, 0x42dd, 0xc96a, 0xde04, 0x55b3
diff --git a/src/isa-l/crc/aarch64/crc16_t10dif_pmull.S b/src/isa-l/crc/aarch64/crc16_t10dif_pmull.S
new file mode 100644
index 000000000..08f1a35ad
--- /dev/null
+++ b/src/isa-l/crc/aarch64/crc16_t10dif_pmull.S
@@ -0,0 +1,404 @@
+########################################################################
+# Copyright(c) 2019 Arm Corporation All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Arm Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#########################################################################
+
+ .arch armv8-a+crc+crypto
+ .text
+ .align 3
+ .global crc16_t10dif_pmull
+ .type crc16_t10dif_pmull, %function
+
+/* uint16_t crc16_t10dif_pmull(uint16_t seed, uint8_t *buf, uint64_t len) */
+
+/* arguments */
+w_seed .req w0
+x_buf .req x1
+x_len .req x2
+w_len .req w2
+
+/* returns */
+w_ret .req w0
+
+/* these as global temporary registers */
+w_tmp .req w5
+x_tmp .req x5
+x_tmp1 .req x6
+x_tmp2 .req x7
+
+d_tmp1 .req d0
+d_tmp2 .req d1
+q_tmp1 .req q0
+q_tmp2 .req q1
+v_tmp1 .req v0
+v_tmp2 .req v1
+
+/* local variables */
+w_counter .req w3
+w_crc .req w0
+x_crc .req x0
+x_counter .req x3
+x_crc16tab .req x4
+x_buf_saved .req x0
+
+crc16_t10dif_pmull:
+ cmp x_len, 1023
+ sub sp, sp, #16
+ uxth w_seed, w_seed
+ bhi .crc_fold
+
+ mov x_tmp, 0
+ mov w_counter, 0
+
+.crc_table_loop_pre:
+ cmp x_len, x_tmp
+ bls .end
+
+ sxtw x_counter, w_counter
+ adrp x_crc16tab, .LANCHOR0
+ sub x_buf, x_buf, x_counter
+ add x_crc16tab, x_crc16tab, :lo12:.LANCHOR0
+
+ .align 2
+.crc_table_loop:
+ ldrb w_tmp, [x_buf, x_counter]
+ add x_counter, x_counter, 1
+ cmp x_len, x_counter
+ eor w_tmp, w_tmp, w_crc, lsr 8
+ ldrh w_tmp, [x_crc16tab, w_tmp, sxtw 1]
+ eor w_crc, w_tmp, w_crc, lsl 8
+ uxth w_crc, w_crc
+ bhi .crc_table_loop
+
+.end:
+ add sp, sp, 16
+ ret
+
+/* carry less multiplication, part1 - before loop */
+q_x0 .req q2
+q_x1 .req q3
+q_x2 .req q4
+q_x3 .req q5
+
+v_x0 .req v2
+v_x1 .req v3
+v_x2 .req v4
+v_x3 .req v5
+
+d_x0 .req d2
+d_x1 .req d3
+d_x2 .req d4
+d_x3 .req d5
+
+// the following registers only used this part1
+d_tmp3 .req d16
+v_tmp3 .req v16
+
+ .align 3
+.crc_fold:
+ fmov d_tmp1, x_crc
+ fmov d_tmp2, xzr
+ dup d_tmp3, v_tmp2.d[0]
+ shl d_tmp1, d_tmp1, 48
+ ins v_tmp3.d[1], v_tmp1.d[0]
+
+ and x_counter, x_len, -64
+ sub x_counter, x_counter, #64
+ cmp x_counter, 63
+ add x_buf_saved, x_buf, 64
+
+ ldr q_x0, [x_buf]
+ ldr q_x1, [x_buf, 16]
+ ldr q_x2, [x_buf, 32]
+ ldr q_x3, [x_buf, 48]
+
+ adrp x_tmp, .shuffle_mask_lanchor
+ ldr q7, [x_tmp, :lo12:.shuffle_mask_lanchor]
+
+ tbl v_tmp1.16b, {v_x0.16b}, v7.16b
+ eor v_x0.16b, v_tmp3.16b, v_tmp1.16b
+
+ tbl v_x1.16b, {v_x1.16b}, v7.16b
+ tbl v_x2.16b, {v_x2.16b}, v7.16b
+ tbl v_x3.16b, {v_x3.16b}, v7.16b
+ bls .crc_fold_loop_end
+
+/* carry less multiplication, part2 - loop */
+q_y0 .req q28
+q_y1 .req q29
+q_y2 .req q30
+q_y3 .req q31
+
+v_y0 .req v28
+v_y1 .req v29
+v_y2 .req v30
+v_y3 .req v31
+
+d_x0_h .req d24
+d_x0_l .req d2
+d_x1_h .req d25
+d_x1_l .req d3
+d_x2_h .req d26
+d_x2_l .req d4
+d_x3_h .req d27
+d_x3_l .req d5
+
+v_x0_h .req v24
+v_x0_l .req v2
+v_x1_h .req v25
+v_x1_l .req v3
+v_x2_h .req v26
+v_x2_l .req v4
+v_x3_h .req v27
+v_x3_l .req v5
+
+v_tmp1_x0 .req v24
+v_tmp1_x1 .req v25
+v_tmp1_x2 .req v26
+v_tmp1_x3 .req v27
+
+d_p4_h .req d19
+v_p4_h .req v19
+d_p4_l .req d17
+v_p4_l .req v17
+
+ mov x_tmp, 0x371d0000 /* p4 [1] */
+ fmov d_p4_h, x_tmp
+ mov x_tmp, 0x87e70000 /* p4 [0] */
+ fmov d_p4_l, x_tmp
+
+ .align 2
+.crc_fold_loop:
+ add x_buf_saved, x_buf_saved, 64
+ sub x_counter, x_counter, #64
+ cmp x_counter, 63
+
+ dup d_x0_h, v_x0.d[1]
+ dup d_x1_h, v_x1.d[1]
+ dup d_x2_h, v_x2.d[1]
+ dup d_x3_h, v_x3.d[1]
+
+ dup d_x0_l, v_x0.d[0]
+ dup d_x1_l, v_x1.d[0]
+ dup d_x2_l, v_x2.d[0]
+ dup d_x3_l, v_x3.d[0]
+
+ ldr q_y0, [x_buf_saved, -64]
+ ldr q_y1, [x_buf_saved, -48]
+ ldr q_y2, [x_buf_saved, -32]
+ ldr q_y3, [x_buf_saved, -16]
+
+ pmull v_x0_h.1q, v_x0_h.1d, v_p4_h.1d
+ pmull v_x0_l.1q, v_x0_l.1d, v_p4_l.1d
+ pmull v_x1_h.1q, v_x1_h.1d, v_p4_h.1d
+ pmull v_x1_l.1q, v_x1_l.1d, v_p4_l.1d
+ pmull v_x2_h.1q, v_x2_h.1d, v_p4_h.1d
+ pmull v_x2_l.1q, v_x2_l.1d, v_p4_l.1d
+ pmull v_x3_h.1q, v_x3_h.1d, v_p4_h.1d
+ pmull v_x3_l.1q, v_x3_l.1d, v_p4_l.1d
+
+ tbl v_y0.16b, {v_y0.16b}, v7.16b
+ tbl v_y1.16b, {v_y1.16b}, v7.16b
+ tbl v_y2.16b, {v_y2.16b}, v7.16b
+ tbl v_y3.16b, {v_y3.16b}, v7.16b
+
+ eor v_tmp1_x0.16b, v_x0_h.16b, v_x0_l.16b
+ eor v_tmp1_x1.16b, v_x1_h.16b, v_x1_l.16b
+ eor v_tmp1_x2.16b, v_x2_h.16b, v_x2_l.16b
+ eor v_tmp1_x3.16b, v_x3_h.16b, v_x3_l.16b
+
+ eor v_x0.16b, v_tmp1_x0.16b, v_y0.16b
+ eor v_x1.16b, v_tmp1_x1.16b, v_y1.16b
+ eor v_x2.16b, v_tmp1_x2.16b, v_y2.16b
+ eor v_x3.16b, v_tmp1_x3.16b, v_y3.16b
+
+ bhi .crc_fold_loop
+
+/* carry less multiplication, part3 - after loop */
+/* folding 512bit ---> 128bit */
+
+// input parameters:
+// v_x0 => v2
+// v_x1 => v3
+// v_x2 => v4
+// v_x3 => v5
+
+// v0, v1, v6, v30, are tmp registers
+
+.crc_fold_loop_end:
+ mov x_tmp, 0x4c1a0000 /* p1 [1] */
+ fmov d0, x_tmp
+ mov x_tmp, 0xfb0b0000 /* p1 [0] */
+ fmov d1, x_tmp
+
+ and w_counter, w_len, -64
+ sxtw x_tmp, w_counter
+ add x_buf, x_buf, x_tmp
+
+ dup d6, v_x0.d[1]
+ dup d30, v_x0.d[0]
+ pmull v6.1q, v6.1d, v0.1d
+ pmull v30.1q, v30.1d, v1.1d
+ eor v6.16b, v6.16b, v30.16b
+ eor v_x1.16b, v6.16b, v_x1.16b
+
+ dup d6, v_x1.d[1]
+ dup d30, v_x1.d[0]
+ pmull v6.1q, v6.1d, v0.1d
+ pmull v16.1q, v30.1d, v1.1d
+ eor v6.16b, v6.16b, v16.16b
+ eor v_x2.16b, v6.16b, v_x2.16b
+
+ dup d_x0, v_x2.d[1]
+ dup d30, v_x2.d[0]
+ pmull v0.1q, v_x0.1d, v0.1d
+ pmull v_x0.1q, v30.1d, v1.1d
+ eor v1.16b, v0.16b, v_x0.16b
+ eor v_x0.16b, v1.16b, v_x3.16b
+
+/* carry less multiplication, part3 - after loop */
+/* crc16 fold function */
+d_16fold_p0_h .req d18
+v_16fold_p0_h .req v18
+
+d_16fold_p0_l .req d4
+v_16fold_p0_l .req v4
+
+v_16fold_from .req v_x0
+d_16fold_from_h .req d3
+v_16fold_from_h .req v3
+
+v_16fold_zero .req v7
+
+v_16fold_from1 .req v16
+
+v_16fold_from2 .req v0
+d_16fold_from2_h .req d6
+v_16fold_from2_h .req v6
+
+v_16fold_tmp .req v0
+
+ movi v_16fold_zero.4s, 0
+ mov x_tmp1, 0x2d560000 /* p0 [1] */
+ mov x_tmp2, 0x13680000 /* p0 [0] */
+
+ ext v_16fold_tmp.16b, v_16fold_zero.16b, v_16fold_from.16b, #8
+ ext v_16fold_tmp.16b, v0.16b, v_16fold_zero.16b, #4
+
+ dup d_16fold_from_h, v_16fold_from.d[1]
+ fmov d_16fold_p0_h, x_tmp1
+ pmull v_16fold_from1.1q, v_16fold_from_h.1d, v_16fold_p0_h.1d
+ eor v_16fold_from2.16b, v_16fold_tmp.16b, v_16fold_from1.16b
+
+ dup d_16fold_from2_h, v_16fold_from2.d[1]
+ fmov d_16fold_p0_l, x_tmp2
+ pmull v6.1q, v_16fold_from2_h.1d, v_16fold_p0_l.1d
+ eor v_x0.16b, v0.16b, v6.16b
+
+/* carry less multiplication, part3 - after loop */
+/* crc16 barrett reduction function */
+
+// input parameters:
+// v_x0: v2
+// barrett reduction constant: br[0], br[1]
+
+d_br0 .req d3
+v_br0 .req v3
+d_br1 .req d5
+v_br1 .req v5
+
+ mov x_tmp1, 0x57f9 /* br[0] low */
+ movk x_tmp1, 0xf65a, lsl 16 /* br[0] high */
+ movk x_tmp1, 0x1, lsl 32
+ fmov d_br0, x_tmp1
+
+ dup d1, v_x0.d[0]
+ dup d1, v1.d[0]
+ ext v1.16b, v1.16b, v7.16b, #4
+ pmull v4.1q, v1.1d, v_br0.1d
+
+ ext v1.16b, v4.16b, v7.16b, #4
+ mov x_tmp1, 0x8bb70000 /* br[1] low */
+ movk x_tmp1, 0x1, lsl 32 /* br[1] high */
+
+ fmov d_br1, x_tmp1
+ pmull v_br1.1q, v1.1d, v_br1.1d
+ eor v_x0.16b, v_x0.16b, v_br1.16b
+
+ umov x0, v_x0.d[0]
+ ubfx x0, x0, 16, 16
+ b .crc_table_loop_pre
+
+ .size crc16_t10dif_pmull, .-crc16_t10dif_pmull
+
+ .section .rodata
+
+ .align 4
+.shuffle_mask_lanchor = . + 0
+ .type shuffle_mask, %object
+ .size shuffle_mask, 16
+shuffle_mask:
+ .byte 15, 14, 13, 12, 11, 10, 9, 8
+ .byte 7, 6, 5, 4, 3, 2, 1, 0
+
+ .align 4
+.LANCHOR0 = . + 0
+ .type crc16tab, %object
+ .size crc16tab, 512
+crc16tab:
+ .hword 0x0000, 0x8bb7, 0x9cd9, 0x176e, 0xb205, 0x39b2, 0x2edc, 0xa56b
+ .hword 0xEFBD, 0x640a, 0x7364, 0xf8d3, 0x5db8, 0xd60f, 0xc161, 0x4ad6
+ .hword 0x54CD, 0xdf7a, 0xc814, 0x43a3, 0xe6c8, 0x6d7f, 0x7a11, 0xf1a6
+ .hword 0xBB70, 0x30c7, 0x27a9, 0xac1e, 0x0975, 0x82c2, 0x95ac, 0x1e1b
+ .hword 0xA99A, 0x222d, 0x3543, 0xbef4, 0x1b9f, 0x9028, 0x8746, 0x0cf1
+ .hword 0x4627, 0xcd90, 0xdafe, 0x5149, 0xf422, 0x7f95, 0x68fb, 0xe34c
+ .hword 0xFD57, 0x76e0, 0x618e, 0xea39, 0x4f52, 0xc4e5, 0xd38b, 0x583c
+ .hword 0x12EA, 0x995d, 0x8e33, 0x0584, 0xa0ef, 0x2b58, 0x3c36, 0xb781
+ .hword 0xD883, 0x5334, 0x445a, 0xcfed, 0x6a86, 0xe131, 0xf65f, 0x7de8
+ .hword 0x373E, 0xbc89, 0xabe7, 0x2050, 0x853b, 0x0e8c, 0x19e2, 0x9255
+ .hword 0x8C4E, 0x07f9, 0x1097, 0x9b20, 0x3e4b, 0xb5fc, 0xa292, 0x2925
+ .hword 0x63F3, 0xe844, 0xff2a, 0x749d, 0xd1f6, 0x5a41, 0x4d2f, 0xc698
+ .hword 0x7119, 0xfaae, 0xedc0, 0x6677, 0xc31c, 0x48ab, 0x5fc5, 0xd472
+ .hword 0x9EA4, 0x1513, 0x027d, 0x89ca, 0x2ca1, 0xa716, 0xb078, 0x3bcf
+ .hword 0x25D4, 0xae63, 0xb90d, 0x32ba, 0x97d1, 0x1c66, 0x0b08, 0x80bf
+ .hword 0xCA69, 0x41de, 0x56b0, 0xdd07, 0x786c, 0xf3db, 0xe4b5, 0x6f02
+ .hword 0x3AB1, 0xb106, 0xa668, 0x2ddf, 0x88b4, 0x0303, 0x146d, 0x9fda
+ .hword 0xD50C, 0x5ebb, 0x49d5, 0xc262, 0x6709, 0xecbe, 0xfbd0, 0x7067
+ .hword 0x6E7C, 0xe5cb, 0xf2a5, 0x7912, 0xdc79, 0x57ce, 0x40a0, 0xcb17
+ .hword 0x81C1, 0x0a76, 0x1d18, 0x96af, 0x33c4, 0xb873, 0xaf1d, 0x24aa
+ .hword 0x932B, 0x189c, 0x0ff2, 0x8445, 0x212e, 0xaa99, 0xbdf7, 0x3640
+ .hword 0x7C96, 0xf721, 0xe04f, 0x6bf8, 0xce93, 0x4524, 0x524a, 0xd9fd
+ .hword 0xC7E6, 0x4c51, 0x5b3f, 0xd088, 0x75e3, 0xfe54, 0xe93a, 0x628d
+ .hword 0x285B, 0xa3ec, 0xb482, 0x3f35, 0x9a5e, 0x11e9, 0x0687, 0x8d30
+ .hword 0xE232, 0x6985, 0x7eeb, 0xf55c, 0x5037, 0xdb80, 0xccee, 0x4759
+ .hword 0x0D8F, 0x8638, 0x9156, 0x1ae1, 0xbf8a, 0x343d, 0x2353, 0xa8e4
+ .hword 0xB6FF, 0x3d48, 0x2a26, 0xa191, 0x04fa, 0x8f4d, 0x9823, 0x1394
+ .hword 0x5942, 0xd2f5, 0xc59b, 0x4e2c, 0xeb47, 0x60f0, 0x779e, 0xfc29
+ .hword 0x4BA8, 0xc01f, 0xd771, 0x5cc6, 0xf9ad, 0x721a, 0x6574, 0xeec3
+ .hword 0xA415, 0x2fa2, 0x38cc, 0xb37b, 0x1610, 0x9da7, 0x8ac9, 0x017e
+ .hword 0x1F65, 0x94d2, 0x83bc, 0x080b, 0xad60, 0x26d7, 0x31b9, 0xba0e
+ .hword 0xF0D8, 0x7b6f, 0x6c01, 0xe7b6, 0x42dd, 0xc96a, 0xde04, 0x55b3
diff --git a/src/isa-l/crc/aarch64/crc32_aarch64_common.h b/src/isa-l/crc/aarch64/crc32_aarch64_common.h
new file mode 100644
index 000000000..a2ef22aea
--- /dev/null
+++ b/src/isa-l/crc/aarch64/crc32_aarch64_common.h
@@ -0,0 +1,321 @@
+/**********************************************************************
+ Copyright(c) 2020 Arm Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Arm Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+
+
+
+.macro crc32_hw_common poly_type
+
+.ifc \poly_type,crc32
+ mvn wCRC,wCRC
+.endif
+ cbz LEN, .zero_length_ret
+ tbz BUF, 0, .align_short
+ ldrb wdata,[BUF],1
+ sub LEN,LEN,1
+ crc32_u8 wCRC,wCRC,wdata
+.align_short:
+ tst BUF,2
+ ccmp LEN,1,0,ne
+ bhi .align_short_2
+ tst BUF,4
+ ccmp LEN,3,0,ne
+ bhi .align_word
+
+.align_finish:
+
+ cmp LEN, 63
+ bls .loop_16B
+.loop_64B:
+ ldp data0, data1, [BUF],#16
+ prfm pldl2keep,[BUF,2048]
+ sub LEN,LEN,#64
+ ldp data2, data3, [BUF],#16
+ prfm pldl1keep,[BUF,256]
+ cmp LEN,#64
+ crc32_u64 wCRC, wCRC, data0
+ crc32_u64 wCRC, wCRC, data1
+ ldp data0, data1, [BUF],#16
+ crc32_u64 wCRC, wCRC, data2
+ crc32_u64 wCRC, wCRC, data3
+ ldp data2, data3, [BUF],#16
+ crc32_u64 wCRC, wCRC, data0
+ crc32_u64 wCRC, wCRC, data1
+ crc32_u64 wCRC, wCRC, data2
+ crc32_u64 wCRC, wCRC, data3
+ bge .loop_64B
+
+.loop_16B:
+ cmp LEN, 15
+ bls .less_16B
+ ldp data0, data1, [BUF],#16
+ sub LEN,LEN,#16
+ cmp LEN,15
+ crc32_u64 wCRC, wCRC, data0
+ crc32_u64 wCRC, wCRC, data1
+ bls .less_16B
+ ldp data0, data1, [BUF],#16
+ sub LEN,LEN,#16
+ cmp LEN,15
+ crc32_u64 wCRC, wCRC, data0
+ crc32_u64 wCRC, wCRC, data1
+ bls .less_16B
+ ldp data0, data1, [BUF],#16
+ sub LEN,LEN,#16 //MUST less than 16B
+ crc32_u64 wCRC, wCRC, data0
+ crc32_u64 wCRC, wCRC, data1
+.less_16B:
+ cmp LEN, 7
+ bls .less_8B
+ ldr data0, [BUF], 8
+ sub LEN, LEN, #8
+ crc32_u64 wCRC, wCRC, data0
+.less_8B:
+ cmp LEN, 3
+ bls .less_4B
+ ldr wdata, [BUF], 4
+ sub LEN, LEN, #4
+ crc32_u32 wCRC, wCRC, wdata
+.less_4B:
+ cmp LEN, 1
+ bls .less_2B
+ ldrh wdata, [BUF], 2
+ sub LEN, LEN, #2
+ crc32_u16 wCRC, wCRC, wdata
+.less_2B:
+ cbz LEN, .zero_length_ret
+ ldrb wdata, [BUF]
+ crc32_u8 wCRC, wCRC, wdata
+.zero_length_ret:
+.ifc \poly_type,crc32
+ mvn w0, wCRC
+.else
+ mov w0, wCRC
+.endif
+ ret
+.align_short_2:
+ ldrh wdata, [BUF], 2
+ sub LEN, LEN, 2
+ tst BUF, 4
+ crc32_u16 wCRC, wCRC, wdata
+ ccmp LEN, 3, 0, ne
+ bls .align_finish
+.align_word:
+ ldr wdata, [BUF], 4
+ sub LEN, LEN, #4
+ crc32_u32 wCRC, wCRC, wdata
+ b .align_finish
+.endm
+
+.macro crc32_3crc_fold poly_type
+.ifc \poly_type,crc32
+ mvn wCRC,wCRC
+.endif
+ cbz LEN, .zero_length_ret
+ tbz BUF, 0, .align_short
+ ldrb wdata,[BUF],1
+ sub LEN,LEN,1
+ crc32_u8 wCRC,wCRC,wdata
+.align_short:
+ tst BUF,2
+ ccmp LEN,1,0,ne
+ bhi .align_short_2
+ tst BUF,4
+ ccmp LEN,3,0,ne
+ bhi .align_word
+
+.align_finish:
+ cmp LEN,1023
+ adr const_adr, .Lconstants
+ bls 1f
+ ldp dconst0,dconst1,[const_adr]
+2:
+ ldr crc0_data0,[ptr_crc0],8
+ prfm pldl2keep,[ptr_crc0,3*1024-8]
+ mov crc1,0
+ mov crc2,0
+ add ptr_crc1,ptr_crc0,336
+ add ptr_crc2,ptr_crc0,336*2
+ crc32_u64 crc0,crc0,crc0_data0
+ .set offset,0
+ .set ptr_offset,8
+ .rept 5
+ ldp crc0_data0,crc0_data1,[ptr_crc0],16
+ ldp crc1_data0,crc1_data1,[ptr_crc1],16
+ .set offset,offset+64
+ .set ptr_offset,ptr_offset+16
+ prfm pldl2keep,[ptr_crc0,3*1024-ptr_offset+offset]
+ crc32_u64 crc0,crc0,crc0_data0
+ crc32_u64 crc0,crc0,crc0_data1
+ ldp crc2_data0,crc2_data1,[ptr_crc2],16
+ crc32_u64 crc1,crc1,crc1_data0
+ crc32_u64 crc1,crc1,crc1_data1
+ crc32_u64 crc2,crc2,crc2_data0
+ crc32_u64 crc2,crc2,crc2_data1
+ .endr
+ .set l1_offset,0
+ .rept 10
+ ldp crc0_data0,crc0_data1,[ptr_crc0],16
+ ldp crc1_data0,crc1_data1,[ptr_crc1],16
+ .set offset,offset+64
+ .set ptr_offset,ptr_offset+16
+ prfm pldl2keep,[ptr_crc0,3*1024-ptr_offset+offset]
+ prfm pldl1keep,[ptr_crc0,2*1024-ptr_offset+l1_offset]
+ .set l1_offset,l1_offset+64
+ crc32_u64 crc0,crc0,crc0_data0
+ crc32_u64 crc0,crc0,crc0_data1
+ ldp crc2_data0,crc2_data1,[ptr_crc2],16
+ crc32_u64 crc1,crc1,crc1_data0
+ crc32_u64 crc1,crc1,crc1_data1
+ crc32_u64 crc2,crc2,crc2_data0
+ crc32_u64 crc2,crc2,crc2_data1
+ .endr
+
+ .rept 6
+ ldp crc0_data0,crc0_data1,[ptr_crc0],16
+ ldp crc1_data0,crc1_data1,[ptr_crc1],16
+ .set ptr_offset,ptr_offset+16
+ prfm pldl1keep,[ptr_crc0,2*1024-ptr_offset+l1_offset]
+ .set l1_offset,l1_offset+64
+ crc32_u64 crc0,crc0,crc0_data0
+ crc32_u64 crc0,crc0,crc0_data1
+ ldp crc2_data0,crc2_data1,[ptr_crc2],16
+ crc32_u64 crc1,crc1,crc1_data0
+ crc32_u64 crc1,crc1,crc1_data1
+ crc32_u64 crc2,crc2,crc2_data0
+ crc32_u64 crc2,crc2,crc2_data1
+ .endr
+ ldr crc2_data0,[ptr_crc2]
+ fmov dtmp0,xcrc0
+ fmov dtmp1,xcrc1
+ crc32_u64 crc2,crc2,crc2_data0
+ add ptr_crc0,ptr_crc0,1024-(336+8)
+ pmull vtmp0.1q,vtmp0.1d,vconst0.1d
+ sub LEN,LEN,1024
+ pmull vtmp1.1q,vtmp1.1d,vconst1.1d
+ cmp LEN,1024
+ fmov xcrc0,dtmp0
+ fmov xcrc1,dtmp1
+ crc32_u64 crc0,wzr,xcrc0
+ crc32_u64 crc1,wzr,xcrc1
+
+ eor crc0,crc0,crc2
+ eor crc0,crc0,crc1
+
+ bhs 2b
+1:
+ cmp LEN, 63
+ bls .loop_16B
+.loop_64B:
+ ldp data0, data1, [BUF],#16
+ sub LEN,LEN,#64
+ ldp data2, data3, [BUF],#16
+ cmp LEN,#64
+ crc32_u64 wCRC, wCRC, data0
+ crc32_u64 wCRC, wCRC, data1
+ ldp data0, data1, [BUF],#16
+ crc32_u64 wCRC, wCRC, data2
+ crc32_u64 wCRC, wCRC, data3
+ ldp data2, data3, [BUF],#16
+ crc32_u64 wCRC, wCRC, data0
+ crc32_u64 wCRC, wCRC, data1
+ crc32_u64 wCRC, wCRC, data2
+ crc32_u64 wCRC, wCRC, data3
+ bge .loop_64B
+
+.loop_16B:
+ cmp LEN, 15
+ bls .less_16B
+ ldp data0, data1, [BUF],#16
+ sub LEN,LEN,#16
+ cmp LEN,15
+ crc32_u64 wCRC, wCRC, data0
+ crc32_u64 wCRC, wCRC, data1
+ bls .less_16B
+ ldp data0, data1, [BUF],#16
+ sub LEN,LEN,#16
+ cmp LEN,15
+ crc32_u64 wCRC, wCRC, data0
+ crc32_u64 wCRC, wCRC, data1
+ bls .less_16B
+ ldp data0, data1, [BUF],#16
+ sub LEN,LEN,#16 //MUST less than 16B
+ crc32_u64 wCRC, wCRC, data0
+ crc32_u64 wCRC, wCRC, data1
+.less_16B:
+ cmp LEN, 7
+ bls .less_8B
+ ldr data0, [BUF], 8
+ sub LEN, LEN, #8
+ crc32_u64 wCRC, wCRC, data0
+.less_8B:
+ cmp LEN, 3
+ bls .less_4B
+ ldr wdata, [BUF], 4
+ sub LEN, LEN, #4
+ crc32_u32 wCRC, wCRC, wdata
+.less_4B:
+ cmp LEN, 1
+ bls .less_2B
+ ldrh wdata, [BUF], 2
+ sub LEN, LEN, #2
+ crc32_u16 wCRC, wCRC, wdata
+.less_2B:
+ cbz LEN, .zero_length_ret
+ ldrb wdata, [BUF]
+ crc32_u8 wCRC, wCRC, wdata
+.zero_length_ret:
+.ifc \poly_type,crc32
+ mvn w0, wCRC
+.else
+ mov w0, wCRC
+.endif
+ ret
+.align_short_2:
+ ldrh wdata, [BUF], 2
+ sub LEN, LEN, 2
+ tst BUF, 4
+ crc32_u16 wCRC, wCRC, wdata
+ ccmp LEN, 3, 0, ne
+ bls .align_finish
+.align_word:
+ ldr wdata, [BUF], 4
+ sub LEN, LEN, #4
+ crc32_u32 wCRC, wCRC, wdata
+ b .align_finish
+.Lconstants:
+.ifc \poly_type,crc32
+ .quad 0xb486819b
+ .quad 0x76278617
+.else
+ .quad 0xe417f38a
+ .quad 0x8f158014
+.endif
+
+.endm
diff --git a/src/isa-l/crc/aarch64/crc32_common_crc_ext_cortex_a72.S b/src/isa-l/crc/aarch64/crc32_common_crc_ext_cortex_a72.S
new file mode 100644
index 000000000..7c9ca35ad
--- /dev/null
+++ b/src/isa-l/crc/aarch64/crc32_common_crc_ext_cortex_a72.S
@@ -0,0 +1,135 @@
+/**********************************************************************
+ Copyright(c) 2020 Arm Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Arm Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+
+
+
+.macro crc32_hw_common poly_type
+ cbz LEN, .zero_length_ret
+.ifc \poly_type,crc32
+ mvn wCRC,wCRC
+.endif
+ tbz BUF, 0, .align_short
+ ldrb wdata,[BUF],1
+ sub LEN,LEN,1
+ crc32_u8 wCRC,wCRC,wdata
+.align_short:
+ tst BUF,2
+ ccmp LEN,1,0,ne
+ bhi .align_short_2
+ tst BUF,4
+ ccmp LEN,3,0,ne
+ bhi .align_word
+
+.align_finish:
+
+ cmp LEN, 63
+ bls .loop_16B
+.loop_64B:
+ ldp data0, data1, [BUF],#16
+ sub LEN,LEN,#64
+ ldp data2, data3, [BUF],#16
+ cmp LEN,#64
+ crc32_u64 wCRC, wCRC, data0
+ crc32_u64 wCRC, wCRC, data1
+ ldp data0, data1, [BUF],#16
+ crc32_u64 wCRC, wCRC, data2
+ crc32_u64 wCRC, wCRC, data3
+ ldp data2, data3, [BUF],#16
+ crc32_u64 wCRC, wCRC, data0
+ crc32_u64 wCRC, wCRC, data1
+ crc32_u64 wCRC, wCRC, data2
+ crc32_u64 wCRC, wCRC, data3
+ bge .loop_64B
+
+.loop_16B:
+ cmp LEN, 15
+ bls .less_16B
+ ldp data0, data1, [BUF],#16
+ sub LEN,LEN,#16
+ cmp LEN,15
+ crc32_u64 wCRC, wCRC, data0
+ crc32_u64 wCRC, wCRC, data1
+ bls .less_16B
+ ldp data0, data1, [BUF],#16
+ sub LEN,LEN,#16
+ cmp LEN,15
+ crc32_u64 wCRC, wCRC, data0
+ crc32_u64 wCRC, wCRC, data1
+ bls .less_16B
+ ldp data0, data1, [BUF],#16
+ sub LEN,LEN,#16 //MUST less than 16B
+ crc32_u64 wCRC, wCRC, data0
+ crc32_u64 wCRC, wCRC, data1
+.less_16B:
+ cmp LEN, 7
+ bls .less_8B
+ ldr data0, [BUF], 8
+ sub LEN, LEN, #8
+ crc32_u64 wCRC, wCRC, data0
+.less_8B:
+ cmp LEN, 3
+ bls .less_4B
+ ldr wdata, [BUF], 4
+ sub LEN, LEN, #4
+ crc32_u32 wCRC, wCRC, wdata
+.less_4B:
+ cmp LEN, 1
+ bls .less_2B
+ ldrh wdata, [BUF], 2
+ sub LEN, LEN, #2
+ crc32_u16 wCRC, wCRC, wdata
+.less_2B:
+ cbz LEN, .finish_exit
+ ldrb wdata, [BUF]
+ crc32_u8 wCRC, wCRC, wdata
+.finish_exit:
+.ifc \poly_type,crc32
+ mvn w0, wCRC
+.else
+ mov w0, wCRC
+.endif
+ ret
+.zero_length_ret:
+ mov w0, wCRC
+ ret
+.align_short_2:
+ ldrh wdata, [BUF], 2
+ sub LEN, LEN, 2
+ tst BUF, 4
+ crc32_u16 wCRC, wCRC, wdata
+ ccmp LEN, 3, 0, ne
+ bls .align_finish
+.align_word:
+ ldr wdata, [BUF], 4
+ sub LEN, LEN, #4
+ crc32_u32 wCRC, wCRC, wdata
+ b .align_finish
+
+.endm
diff --git a/src/isa-l/crc/aarch64/crc32_common_mix_neoverse_n1.S b/src/isa-l/crc/aarch64/crc32_common_mix_neoverse_n1.S
new file mode 100644
index 000000000..4911a30b8
--- /dev/null
+++ b/src/isa-l/crc/aarch64/crc32_common_mix_neoverse_n1.S
@@ -0,0 +1,432 @@
+/**********************************************************************
+ Copyright(c) 2020 Arm Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Arm Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+
+.macro declare_var_vector_reg name:req,reg:req
+ \name\()_q .req q\reg
+ \name\()_v .req v\reg
+ \name\()_s .req s\reg
+ \name\()_d .req d\reg
+.endm
+ declare_var_vector_reg k1k2,20
+ declare_var_vector_reg k3k4,21
+ declare_var_vector_reg poly,22
+ declare_var_vector_reg k5k0,23
+ declare_var_vector_reg mask,24
+ declare_var_vector_reg fold_poly,25
+
+ declare_var_vector_reg tmp0,0
+ declare_var_vector_reg tmp1,1
+ declare_var_vector_reg tmp2,2
+ declare_var_vector_reg tmp3,3
+ declare_var_vector_reg tmp4,4
+ declare_var_vector_reg tmp5,5
+ declare_var_vector_reg tmp6,6
+ declare_var_vector_reg tmp7,7
+ declare_var_vector_reg pmull_data0,16
+ declare_var_vector_reg pmull_data1,17
+ declare_var_vector_reg pmull_data2,18
+ declare_var_vector_reg pmull_data3,19
+
+ vzr .req v26
+
+ const_addr .req x3
+ crc_blk_ptr .req x4
+ pmull_blk_ptr .req x5
+ crc_data0 .req x6
+ crc_data1 .req x7
+ crc_data2 .req x9
+ crc_data3 .req x10
+ wPmull .req w11
+ xPmull .req x11
+
+ data0 .req x4
+ data1 .req x5
+ data2 .req x6
+ data3 .req x7
+ wdata .req w4
+
+.macro pmull_fold
+
+ pmull2 tmp4_v.1q, tmp0_v.2d, k1k2_v.2d
+ pmull2 tmp5_v.1q, tmp1_v.2d, k1k2_v.2d
+ pmull2 tmp6_v.1q, tmp2_v.2d, k1k2_v.2d
+ pmull2 tmp7_v.1q, tmp3_v.2d, k1k2_v.2d
+
+ pmull tmp0_v.1q, tmp0_v.1d, k1k2_v.1d
+ pmull tmp1_v.1q, tmp1_v.1d, k1k2_v.1d
+ pmull tmp2_v.1q, tmp2_v.1d, k1k2_v.1d
+ pmull tmp3_v.1q, tmp3_v.1d, k1k2_v.1d
+ ld1 {pmull_data0_v.16b-pmull_data3_v.16b},[pmull_blk_ptr],#64
+ crc32_u64 wCRC,wCRC,crc_data0
+ crc32_u64 wCRC,wCRC,crc_data1
+ ldp crc_data0,crc_data1,[crc_blk_ptr],16
+ crc32_u64 wCRC,wCRC,crc_data2
+ crc32_u64 wCRC,wCRC,crc_data3
+ ldp crc_data2,crc_data3,[crc_blk_ptr],16
+
+ eor tmp0_v.16b, tmp0_v.16b, tmp4_v.16b
+ eor tmp1_v.16b, tmp1_v.16b, tmp5_v.16b
+ eor tmp2_v.16b, tmp2_v.16b, tmp6_v.16b
+ eor tmp3_v.16b, tmp3_v.16b, tmp7_v.16b
+
+ crc32_u64 wCRC,wCRC,crc_data0
+ crc32_u64 wCRC,wCRC,crc_data1
+ ldp crc_data0,crc_data1,[crc_blk_ptr],16
+ crc32_u64 wCRC,wCRC,crc_data2
+ crc32_u64 wCRC,wCRC,crc_data3
+ ldp crc_data2,crc_data3,[crc_blk_ptr],16
+ eor tmp0_v.16b, tmp0_v.16b, v16.16b
+ eor tmp1_v.16b, tmp1_v.16b, v17.16b
+ eor tmp2_v.16b, tmp2_v.16b, v18.16b
+ eor tmp3_v.16b, tmp3_v.16b, v19.16b
+ crc32_u64 wCRC,wCRC,crc_data0
+ crc32_u64 wCRC,wCRC,crc_data1
+ ldp crc_data0,crc_data1,[crc_blk_ptr],16
+ crc32_u64 wCRC,wCRC,crc_data2
+ crc32_u64 wCRC,wCRC,crc_data3
+ ldp crc_data2,crc_data3,[crc_blk_ptr],16
+.endm
+
+
+
+.macro crc32_common_mix poly_type
+ .set MIX_BLK_SIZE,2048
+
+.ifc \poly_type,crc32
+ mvn wCRC,wCRC
+.endif
+ cmp LEN,MIX_BLK_SIZE-1
+ adr const_addr, .Lconstants
+ bls start_final
+ ld1 {k1k2_v.16b,k3k4_v.16b,poly_v.16b},[const_addr],#48
+ movi vzr.16b, #0
+ ld1 {k5k0_v.8b,mask_v.8b,fold_poly_v.8b},[const_addr]
+
+loop_2048:
+ ld1 {tmp0_v.16b-tmp3_v.16b}, [BUF]
+ add pmull_blk_ptr,BUF,0x40
+ add crc_blk_ptr, BUF,512
+ mov tmp4_v.16b,vzr.16b
+ fmov tmp4_s, wCRC
+ ldp crc_data0,crc_data1,[crc_blk_ptr],16
+ eor tmp0_v.16b,tmp0_v.16b,tmp4_v.16b
+ mov wCRC, 0
+ sub LEN,LEN,MIX_BLK_SIZE
+ cmp LEN,MIX_BLK_SIZE
+ ldp crc_data2,crc_data3,[crc_blk_ptr],16
+ crc32_u64 wCRC,wCRC,crc_data0
+ crc32_u64 wCRC,wCRC,crc_data1
+ ldp crc_data0,crc_data1,[crc_blk_ptr],16
+ crc32_u64 wCRC,wCRC,crc_data2
+ crc32_u64 wCRC,wCRC,crc_data3
+ ldp crc_data2,crc_data3,[crc_blk_ptr],16
+
+ pmull_fold
+ pmull_fold
+ pmull_fold
+ pmull_fold
+ pmull_fold
+ pmull_fold
+ pmull_fold
+
+ /* Folding cache line into 128bit */
+ pmull2 tmp4_v.1q, tmp0_v.2d, k3k4_v.2d
+ crc32_u64 wCRC,wCRC,crc_data0
+ crc32_u64 wCRC,wCRC,crc_data1
+ ldp crc_data0,crc_data1,[crc_blk_ptr],16
+ crc32_u64 wCRC,wCRC,crc_data2
+ crc32_u64 wCRC,wCRC,crc_data3
+ ldp crc_data2,crc_data3,[crc_blk_ptr],16
+ crc32_u64 wCRC,wCRC,crc_data0
+ crc32_u64 wCRC,wCRC,crc_data1
+ ldp crc_data0,crc_data1,[crc_blk_ptr],16
+ pmull tmp0_v.1q, tmp0_v.1d, k3k4_v.1d
+ crc32_u64 wCRC,wCRC,crc_data2
+ crc32_u64 wCRC,wCRC,crc_data3
+ ldp crc_data2,crc_data3,[crc_blk_ptr],16
+ crc32_u64 wCRC,wCRC,crc_data0
+ crc32_u64 wCRC,wCRC,crc_data1
+ ldp crc_data0,crc_data1,[crc_blk_ptr],16
+ crc32_u64 wCRC,wCRC,crc_data2
+ crc32_u64 wCRC,wCRC,crc_data3
+ ldp crc_data2,crc_data3,[crc_blk_ptr],16
+ eor tmp0_v.16b, tmp0_v.16b, tmp4_v.16b
+ crc32_u64 wCRC,wCRC,crc_data0
+ crc32_u64 wCRC,wCRC,crc_data1
+ ldp crc_data0,crc_data1,[crc_blk_ptr],16
+ eor tmp0_v.16b, tmp0_v.16b, tmp1_v.16b
+ crc32_u64 wCRC,wCRC,crc_data2
+ crc32_u64 wCRC,wCRC,crc_data3
+ ldp crc_data2,crc_data3,[crc_blk_ptr],16
+ crc32_u64 wCRC,wCRC,crc_data0
+ crc32_u64 wCRC,wCRC,crc_data1
+ ldp crc_data0,crc_data1,[crc_blk_ptr],16
+ pmull2 tmp4_v.1q, tmp0_v.2d, k3k4_v.2d
+ crc32_u64 wCRC,wCRC,crc_data2
+ crc32_u64 wCRC,wCRC,crc_data3
+ ldp crc_data2,crc_data3,[crc_blk_ptr],16
+ pmull tmp0_v.1q, tmp0_v.1d, k3k4_v.1d
+ crc32_u64 wCRC,wCRC,crc_data0
+ crc32_u64 wCRC,wCRC,crc_data1
+ ldp crc_data0,crc_data1,[crc_blk_ptr],16
+ eor tmp0_v.16b, tmp0_v.16b, tmp4_v.16b
+ crc32_u64 wCRC,wCRC,crc_data2
+ crc32_u64 wCRC,wCRC,crc_data3
+ ldp crc_data2,crc_data3,[crc_blk_ptr],16
+ crc32_u64 wCRC,wCRC,crc_data0
+ crc32_u64 wCRC,wCRC,crc_data1
+ ldp crc_data0,crc_data1,[crc_blk_ptr],16
+ crc32_u64 wCRC,wCRC,crc_data2
+ crc32_u64 wCRC,wCRC,crc_data3
+ ldp crc_data2,crc_data3,[crc_blk_ptr],16
+ eor tmp0_v.16b, tmp0_v.16b, tmp2_v.16b
+ crc32_u64 wCRC,wCRC,crc_data0
+ crc32_u64 wCRC,wCRC,crc_data1
+ ldp crc_data0,crc_data1,[crc_blk_ptr],16
+ pmull2 tmp4_v.1q, tmp0_v.2d, k3k4_v.2d
+ crc32_u64 wCRC,wCRC,crc_data2
+ crc32_u64 wCRC,wCRC,crc_data3
+ ldp crc_data2,crc_data3,[crc_blk_ptr],16
+ crc32_u64 wCRC,wCRC,crc_data0
+ crc32_u64 wCRC,wCRC,crc_data1
+ ldp crc_data0,crc_data1,[crc_blk_ptr],16
+ crc32_u64 wCRC,wCRC,crc_data2
+ crc32_u64 wCRC,wCRC,crc_data3
+ ldp crc_data2,crc_data3,[crc_blk_ptr],16
+ pmull tmp0_v.1q, tmp0_v.1d, k3k4_v.1d
+ crc32_u64 wCRC,wCRC,crc_data0
+ crc32_u64 wCRC,wCRC,crc_data1
+ ldp crc_data0,crc_data1,[crc_blk_ptr],16
+ crc32_u64 wCRC,wCRC,crc_data2
+ crc32_u64 wCRC,wCRC,crc_data3
+ ldp crc_data2,crc_data3,[crc_blk_ptr],16
+ eor tmp0_v.16b, tmp0_v.16b, tmp4_v.16b
+ crc32_u64 wCRC,wCRC,crc_data0
+ crc32_u64 wCRC,wCRC,crc_data1
+ ldp crc_data0,crc_data1,[crc_blk_ptr],16
+ crc32_u64 wCRC,wCRC,crc_data2
+ crc32_u64 wCRC,wCRC,crc_data3
+ ldp crc_data2,crc_data3,[crc_blk_ptr],16
+ crc32_u64 wCRC,wCRC,crc_data0
+ crc32_u64 wCRC,wCRC,crc_data1
+ ldp crc_data0,crc_data1,[crc_blk_ptr],16
+ eor tmp0_v.16b, tmp0_v.16b, tmp3_v.16b
+ crc32_u64 wCRC,wCRC,crc_data2
+ crc32_u64 wCRC,wCRC,crc_data3
+ ldp crc_data2,crc_data3,[crc_blk_ptr],16
+ crc32_u64 wCRC,wCRC,crc_data0
+ crc32_u64 wCRC,wCRC,crc_data1
+ ldp crc_data0,crc_data1,[crc_blk_ptr],16
+
+
+ /**
+ * perform the last 64 bit fold, also
+ * adds 32 zeroes to the input stream
+ */
+ ext tmp1_v.16b, tmp0_v.16b, tmp0_v.16b, #8
+ crc32_u64 wCRC,wCRC,crc_data2
+ crc32_u64 wCRC,wCRC,crc_data3
+ ldp crc_data2,crc_data3,[crc_blk_ptr],16
+ crc32_u64 wCRC,wCRC,crc_data0
+ crc32_u64 wCRC,wCRC,crc_data1
+ ldp crc_data0,crc_data1,[crc_blk_ptr],16
+ pmull2 tmp1_v.1q, tmp1_v.2d, k3k4_v.2d
+ crc32_u64 wCRC,wCRC,crc_data2
+ crc32_u64 wCRC,wCRC,crc_data3
+ ldp crc_data2,crc_data3,[crc_blk_ptr],16
+ crc32_u64 wCRC,wCRC,crc_data0
+ crc32_u64 wCRC,wCRC,crc_data1
+ ldp crc_data0,crc_data1,[crc_blk_ptr],16
+ crc32_u64 wCRC,wCRC,crc_data2
+ crc32_u64 wCRC,wCRC,crc_data3
+ ldp crc_data2,crc_data3,[crc_blk_ptr],16
+ ext tmp0_v.16b, tmp0_v.16b, vzr.16b, #8
+ crc32_u64 wCRC,wCRC,crc_data0
+ crc32_u64 wCRC,wCRC,crc_data1
+ ldp crc_data0,crc_data1,[crc_blk_ptr],16
+ crc32_u64 wCRC,wCRC,crc_data2
+ crc32_u64 wCRC,wCRC,crc_data3
+ ldp crc_data2,crc_data3,[crc_blk_ptr],16
+ eor tmp0_v.16b, tmp0_v.16b, tmp1_v.16b
+ crc32_u64 wCRC,wCRC,crc_data0
+ crc32_u64 wCRC,wCRC,crc_data1
+ ldp crc_data0,crc_data1,[crc_blk_ptr],16
+ crc32_u64 wCRC,wCRC,crc_data2
+ crc32_u64 wCRC,wCRC,crc_data3
+ ldp crc_data2,crc_data3,[crc_blk_ptr],16
+
+ /* final 32-bit fold */
+ ext tmp1_v.16b, tmp0_v.16b, vzr.16b, #4
+ and tmp0_v.16b, tmp0_v.16b, mask_v.16b
+ crc32_u64 wCRC,wCRC,crc_data0
+ crc32_u64 wCRC,wCRC,crc_data1
+ ldp crc_data0,crc_data1,[crc_blk_ptr],16
+ pmull tmp0_v.1q, tmp0_v.1d, k5k0_v.1d
+ crc32_u64 wCRC,wCRC,crc_data2
+ crc32_u64 wCRC,wCRC,crc_data3
+ ldp crc_data2,crc_data3,[crc_blk_ptr],16
+ crc32_u64 wCRC,wCRC,crc_data0
+ crc32_u64 wCRC,wCRC,crc_data1
+ ldp crc_data0,crc_data1,[crc_blk_ptr],16
+ crc32_u64 wCRC,wCRC,crc_data2
+ crc32_u64 wCRC,wCRC,crc_data3
+ ldp crc_data2,crc_data3,[crc_blk_ptr],16
+ eor tmp0_v.16b, tmp0_v.16b, tmp1_v.16b
+
+ /**
+ * Finish up with the bit-reversed barrett
+ * reduction 64 ==> 32 bits
+ */
+ crc32_u64 wCRC,wCRC,crc_data0
+ crc32_u64 wCRC,wCRC,crc_data1
+ and tmp1_v.16b, tmp0_v.16b, mask_v.16b
+ ldp crc_data0,crc_data1,[crc_blk_ptr],16
+ ext tmp1_v.16b, vzr.16b, tmp1_v.16b, #8
+ crc32_u64 wCRC,wCRC,crc_data2
+ crc32_u64 wCRC,wCRC,crc_data3
+ pmull2 tmp1_v.1q, tmp1_v.2d, poly_v.2d
+ ldp crc_data2,crc_data3,[crc_blk_ptr],16
+ crc32_u64 wCRC,wCRC,crc_data0
+ crc32_u64 wCRC,wCRC,crc_data1
+ ldp crc_data0,crc_data1,[crc_blk_ptr],16
+ crc32_u64 wCRC,wCRC,crc_data2
+ crc32_u64 wCRC,wCRC,crc_data3
+ and tmp1_v.16b, tmp1_v.16b, mask_v.16b
+ ldp crc_data2,crc_data3,[crc_blk_ptr],16
+ pmull tmp1_v.1q, tmp1_v.1d, poly_v.1d
+ crc32_u64 wCRC,wCRC,crc_data0
+ crc32_u64 wCRC,wCRC,crc_data1
+ ldp crc_data0,crc_data1,[crc_blk_ptr],16
+ eor tmp0_v.16b, tmp0_v.16b, tmp1_v.16b
+ crc32_u64 wCRC,wCRC,crc_data2
+ crc32_u64 wCRC,wCRC,crc_data3
+ mov tmp4_v.16b,vzr.16b
+ mov tmp4_v.s[0], tmp0_v.s[1]
+ ldp crc_data2,crc_data3,[crc_blk_ptr],16
+ crc32_u64 wCRC,wCRC,crc_data0
+ crc32_u64 wCRC,wCRC,crc_data1
+ ldp crc_data0,crc_data1,[crc_blk_ptr],16
+ crc32_u64 wCRC,wCRC,crc_data2
+ crc32_u64 wCRC,wCRC,crc_data3
+ ldp crc_data2,crc_data3,[crc_blk_ptr],16
+ crc32_u64 wCRC,wCRC,crc_data0
+ crc32_u64 wCRC,wCRC,crc_data1
+ ldp crc_data0,crc_data1,[crc_blk_ptr],16
+ crc32_u64 wCRC,wCRC,crc_data2
+ crc32_u64 wCRC,wCRC,crc_data3
+ ldp crc_data2,crc_data3,[crc_blk_ptr],16
+ crc32_u64 wCRC,wCRC,crc_data0
+ crc32_u64 wCRC,wCRC,crc_data1
+ ldp crc_data0,crc_data1,[crc_blk_ptr],16
+ crc32_u64 wCRC,wCRC,crc_data2
+ crc32_u64 wCRC,wCRC,crc_data3
+ ldp crc_data2,crc_data3,[crc_blk_ptr],16
+
+ crc32_u64 wCRC,wCRC,crc_data0
+ crc32_u64 wCRC,wCRC,crc_data1
+ crc32_u64 wCRC,wCRC,crc_data2
+ crc32_u64 wCRC,wCRC,crc_data3
+
+ pmull tmp4_v.1q, tmp4_v.1d, fold_poly_v.1d
+ add BUF,BUF,MIX_BLK_SIZE
+ fmov xPmull, tmp4_d
+ crc32_u64 wPmull, wzr, xPmull
+ eor wCRC, wPmull, wCRC
+ bge loop_2048
+start_final:
+ cmp LEN, 63
+ bls .loop_16B
+.loop_64B:
+ ldp data0, data1, [BUF],#16
+ sub LEN,LEN,#64
+ ldp data2, data3, [BUF],#16
+ cmp LEN,#64
+ crc32_u64 wCRC, wCRC, data0
+ crc32_u64 wCRC, wCRC, data1
+ ldp data0, data1, [BUF],#16
+ crc32_u64 wCRC, wCRC, data2
+ crc32_u64 wCRC, wCRC, data3
+ ldp data2, data3, [BUF],#16
+ crc32_u64 wCRC, wCRC, data0
+ crc32_u64 wCRC, wCRC, data1
+ crc32_u64 wCRC, wCRC, data2
+ crc32_u64 wCRC, wCRC, data3
+ bge .loop_64B
+
+.loop_16B:
+ cmp LEN, 15
+ bls .less_16B
+ ldp data0, data1, [BUF],#16
+ sub LEN,LEN,#16
+ cmp LEN,15
+ crc32_u64 wCRC, wCRC, data0
+ crc32_u64 wCRC, wCRC, data1
+ bls .less_16B
+ ldp data0, data1, [BUF],#16
+ sub LEN,LEN,#16
+ cmp LEN,15
+ crc32_u64 wCRC, wCRC, data0
+ crc32_u64 wCRC, wCRC, data1
+ bls .less_16B
+ ldp data0, data1, [BUF],#16
+ sub LEN,LEN,#16 //MUST less than 16B
+ crc32_u64 wCRC, wCRC, data0
+ crc32_u64 wCRC, wCRC, data1
+.less_16B:
+ cmp LEN, 7
+ bls .less_8B
+ ldr data0, [BUF], 8
+ sub LEN, LEN, #8
+ crc32_u64 wCRC, wCRC, data0
+.less_8B:
+ cmp LEN, 3
+ bls .less_4B
+ ldr wdata, [BUF], 4
+ sub LEN, LEN, #4
+ crc32_u32 wCRC, wCRC, wdata
+.less_4B:
+ cmp LEN, 1
+ bls .less_2B
+ ldrh wdata, [BUF], 2
+ sub LEN, LEN, #2
+ crc32_u16 wCRC, wCRC, wdata
+.less_2B:
+ cbz LEN, .finish_exit
+ ldrb wdata, [BUF]
+ crc32_u8 wCRC, wCRC, wdata
+.finish_exit:
+.ifc \poly_type,crc32
+ mvn w0, wCRC
+.else
+ mov w0, wCRC
+.endif
+ ret
+.endm
+
diff --git a/src/isa-l/crc/aarch64/crc32_gzip_refl_3crc_fold.S b/src/isa-l/crc/aarch64/crc32_gzip_refl_3crc_fold.S
new file mode 100644
index 000000000..116d62cc9
--- /dev/null
+++ b/src/isa-l/crc/aarch64/crc32_gzip_refl_3crc_fold.S
@@ -0,0 +1,95 @@
+########################################################################
+# Copyright(c) 2020 Arm Corporation All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Arm Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#########################################################################
+#include "crc32_aarch64_common.h"
+ .text
+ .align 6
+ .arch armv8-a+crc+crypto
+.macro crc32_u64 dst,src,data
+ crc32x \dst,\src,\data
+.endm
+.macro crc32_u32 dst,src,data
+ crc32w \dst,\src,\data
+.endm
+.macro crc32_u16 dst,src,data
+ crc32h \dst,\src,\data
+.endm
+.macro crc32_u8 dst,src,data
+ crc32b \dst,\src,\data
+.endm
+.macro declare_var_vector_reg name:req,reg:req
+ q\name .req q\reg
+ v\name .req v\reg
+ s\name .req s\reg
+ d\name .req d\reg
+.endm
+
+ BUF .req x1
+ ptr_crc0 .req x1
+ LEN .req x2
+ wCRC .req w0
+ crc0 .req w0
+ xcrc0 .req x0
+
+ crc1 .req w3
+ crc2 .req w4
+ xcrc1 .req x3
+ const_adr .req x3
+ ptr_crc1 .req x6
+ ptr_crc2 .req x7
+ crc0_data0 .req x9
+ crc0_data1 .req x10
+ crc1_data0 .req x11
+ crc1_data1 .req x12
+ crc2_data0 .req x13
+ crc2_data1 .req x14
+
+ wdata .req w3
+ data0 .req x3
+ data1 .req x4
+ data2 .req x5
+ data3 .req x6
+
+ declare_var_vector_reg tmp0,0
+ declare_var_vector_reg tmp1,1
+ declare_var_vector_reg const0,2
+ declare_var_vector_reg const1,3
+
+/**
+ uint32_t crc32_gzip_refl(
+ uint32_t wCRC,
+ const unsigned char *BUF,
+ uint64_t LEN
+ );
+*/
+
+ .global crc32_gzip_refl_3crc_fold
+ .type crc32_gzip_refl_3crc_fold, %function
+crc32_gzip_refl_3crc_fold:
+ crc32_3crc_fold crc32
+ .size crc32_gzip_refl_3crc_fold, .-crc32_gzip_refl_3crc_fold
diff --git a/src/isa-l/crc/aarch64/crc32_gzip_refl_crc_ext.S b/src/isa-l/crc/aarch64/crc32_gzip_refl_crc_ext.S
new file mode 100644
index 000000000..8e3d227be
--- /dev/null
+++ b/src/isa-l/crc/aarch64/crc32_gzip_refl_crc_ext.S
@@ -0,0 +1,66 @@
+/**********************************************************************
+ Copyright(c) 2020 Arm Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Arm Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+ .text
+ .align 6
+ .arch armv8-a+crc
+
+
+#include "crc32_aarch64_common.h"
+
+ BUF .req x1
+ LEN .req x2
+ wCRC .req w0
+ data0 .req x4
+ data1 .req x5
+ data2 .req x6
+ data3 .req x7
+ wdata .req w3
+.macro crc32_u64 dst,src,data
+ crc32x \dst,\src,\data
+.endm
+.macro crc32_u32 dst,src,data
+ crc32w \dst,\src,\data
+.endm
+.macro crc32_u16 dst,src,data
+ crc32h \dst,\src,\data
+.endm
+.macro crc32_u8 dst,src,data
+ crc32b \dst,\src,\data
+.endm
+
+ /**
+ * uint32_t crc32_gzip_refl_crc_ext(const unsigned char *BUF,
+ * uint64_t LEN,uint32_t wCRC);
+ */
+ .global crc32_gzip_refl_crc_ext
+ .type crc32_gzip_refl_crc_ext, %function
+crc32_gzip_refl_crc_ext:
+ crc32_hw_common crc32
+ .size crc32_gzip_refl_crc_ext, .-crc32_gzip_refl_crc_ext
diff --git a/src/isa-l/crc/aarch64/crc32_gzip_refl_pmull.S b/src/isa-l/crc/aarch64/crc32_gzip_refl_pmull.S
new file mode 100644
index 000000000..d52e2d8f5
--- /dev/null
+++ b/src/isa-l/crc/aarch64/crc32_gzip_refl_pmull.S
@@ -0,0 +1,33 @@
+########################################################################
+# Copyright(c) 2019 Arm Corporation All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Arm Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#########################################################################
+
+#include "crc32_gzip_refl_pmull.h"
+#include "crc32_refl_common_pmull.h"
+
+crc32_refl_func crc32_gzip_refl_pmull
diff --git a/src/isa-l/crc/aarch64/crc32_gzip_refl_pmull.h b/src/isa-l/crc/aarch64/crc32_gzip_refl_pmull.h
new file mode 100644
index 000000000..883567d97
--- /dev/null
+++ b/src/isa-l/crc/aarch64/crc32_gzip_refl_pmull.h
@@ -0,0 +1,87 @@
+########################################################################
+# Copyright(c) 2019 Arm Corporation All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Arm Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#########################################################################
+
+.equ p4_low_b0, 0x2d95
+.equ p4_low_b1, 0x8f35
+.equ p4_high_b0, 0x13d7
+.equ p4_high_b1, 0x1d95
+.equ p1_low_b0, 0x9191
+.equ p1_low_b1, 0xae68
+.equ p1_high_b0, 0x009e
+.equ p1_high_b1, 0xccaa
+.equ p0_low_b0, 0x6765
+.equ p0_low_b1, 0xb8bc
+.equ p0_high_b0, p1_high_b0
+.equ p0_high_b1, p1_high_b1
+.equ br_low_b0, 0x0641
+.equ br_low_b1, 0xdb71
+.equ br_low_b2, 0x1
+.equ br_high_b0, 0x1641
+.equ br_high_b1, 0xf701
+.equ br_high_b2, 0x1
+
+ .text
+ .section .rodata
+ .align 4
+ .set .lanchor_crc_tab,. + 0
+ .type crc32_table_gzip_refl, %object
+ .size crc32_table_gzip_refl, 1024
+crc32_table_gzip_refl:
+ .word 0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, 0x076dc419, 0x706af48f, 0xe963a535, 0x9e6495a3
+ .word 0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988, 0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, 0x90bf1d91
+ .word 0x1db71064, 0x6ab020f2, 0xf3b97148, 0x84be41de, 0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7
+ .word 0x136c9856, 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec, 0x14015c4f, 0x63066cd9, 0xfa0f3d63, 0x8d080df5
+ .word 0x3b6e20c8, 0x4c69105e, 0xd56041e4, 0xa2677172, 0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b
+ .word 0x35b5a8fa, 0x42b2986c, 0xdbbbc9d6, 0xacbcf940, 0x32d86ce3, 0x45df5c75, 0xdcd60dcf, 0xabd13d59
+ .word 0x26d930ac, 0x51de003a, 0xc8d75180, 0xbfd06116, 0x21b4f4b5, 0x56b3c423, 0xcfba9599, 0xb8bda50f
+ .word 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924, 0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d
+ .word 0x76dc4190, 0x01db7106, 0x98d220bc, 0xefd5102a, 0x71b18589, 0x06b6b51f, 0x9fbfe4a5, 0xe8b8d433
+ .word 0x7807c9a2, 0x0f00f934, 0x9609a88e, 0xe10e9818, 0x7f6a0dbb, 0x086d3d2d, 0x91646c97, 0xe6635c01
+ .word 0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e, 0x6c0695ed, 0x1b01a57b, 0x8208f4c1, 0xf50fc457
+ .word 0x65b0d9c6, 0x12b7e950, 0x8bbeb8ea, 0xfcb9887c, 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, 0xfbd44c65
+ .word 0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2, 0x4adfa541, 0x3dd895d7, 0xa4d1c46d, 0xd3d6f4fb
+ .word 0x4369e96a, 0x346ed9fc, 0xad678846, 0xda60b8d0, 0x44042d73, 0x33031de5, 0xaa0a4c5f, 0xdd0d7cc9
+ .word 0x5005713c, 0x270241aa, 0xbe0b1010, 0xc90c2086, 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f
+ .word 0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4, 0x59b33d17, 0x2eb40d81, 0xb7bd5c3b, 0xc0ba6cad
+ .word 0xedb88320, 0x9abfb3b6, 0x03b6e20c, 0x74b1d29a, 0xead54739, 0x9dd277af, 0x04db2615, 0x73dc1683
+ .word 0xe3630b12, 0x94643b84, 0x0d6d6a3e, 0x7a6a5aa8, 0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1
+ .word 0xf00f9344, 0x8708a3d2, 0x1e01f268, 0x6906c2fe, 0xf762575d, 0x806567cb, 0x196c3671, 0x6e6b06e7
+ .word 0xfed41b76, 0x89d32be0, 0x10da7a5a, 0x67dd4acc, 0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5
+ .word 0xd6d6a3e8, 0xa1d1937e, 0x38d8c2c4, 0x4fdff252, 0xd1bb67f1, 0xa6bc5767, 0x3fb506dd, 0x48b2364b
+ .word 0xd80d2bda, 0xaf0a1b4c, 0x36034af6, 0x41047a60, 0xdf60efc3, 0xa867df55, 0x316e8eef, 0x4669be79
+ .word 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236, 0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f
+ .word 0xc5ba3bbe, 0xb2bd0b28, 0x2bb45a92, 0x5cb36a04, 0xc2d7ffa7, 0xb5d0cf31, 0x2cd99e8b, 0x5bdeae1d
+ .word 0x9b64c2b0, 0xec63f226, 0x756aa39c, 0x026d930a, 0x9c0906a9, 0xeb0e363f, 0x72076785, 0x05005713
+ .word 0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38, 0x92d28e9b, 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21
+ .word 0x86d3d2d4, 0xf1d4e242, 0x68ddb3f8, 0x1fda836e, 0x81be16cd, 0xf6b9265b, 0x6fb077e1, 0x18b74777
+ .word 0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c, 0x8f659eff, 0xf862ae69, 0x616bffd3, 0x166ccf45
+ .word 0xa00ae278, 0xd70dd2ee, 0x4e048354, 0x3903b3c2, 0xa7672661, 0xd06016f7, 0x4969474d, 0x3e6e77db
+ .word 0xaed16a4a, 0xd9d65adc, 0x40df0b66, 0x37d83bf0, 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9
+ .word 0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, 0xbad03605, 0xcdd70693, 0x54de5729, 0x23d967bf
+ .word 0xb3667a2e, 0xc4614ab8, 0x5d681b02, 0x2a6f2b94, 0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d
diff --git a/src/isa-l/crc/aarch64/crc32_ieee_norm_pmull.S b/src/isa-l/crc/aarch64/crc32_ieee_norm_pmull.S
new file mode 100644
index 000000000..32966fb9d
--- /dev/null
+++ b/src/isa-l/crc/aarch64/crc32_ieee_norm_pmull.S
@@ -0,0 +1,33 @@
+########################################################################
+# Copyright(c) 2019 Arm Corporation All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Arm Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#########################################################################
+
+#include "crc32_ieee_norm_pmull.h"
+#include "crc32_norm_common_pmull.h"
+
+crc32_norm_func crc32_ieee_norm_pmull
diff --git a/src/isa-l/crc/aarch64/crc32_ieee_norm_pmull.h b/src/isa-l/crc/aarch64/crc32_ieee_norm_pmull.h
new file mode 100644
index 000000000..67acd2a03
--- /dev/null
+++ b/src/isa-l/crc/aarch64/crc32_ieee_norm_pmull.h
@@ -0,0 +1,87 @@
+########################################################################
+# Copyright(c) 2019 Arm Corporation All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Arm Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#########################################################################
+
+.equ p4_low_b0, 0x8b11
+.equ p4_low_b1, 0xe622
+.equ p4_high_b0, 0x794c
+.equ p4_high_b1, 0x8833
+.equ p1_low_b0, 0x5605
+.equ p1_low_b1, 0xe8a4
+.equ p1_high_b0, 0xcd4c
+.equ p1_high_b1, 0xc5b9
+.equ p0_low_b0, 0x678d
+.equ p0_low_b1, 0x490d
+.equ p0_high_b0, 0xaa66
+.equ p0_high_b1, 0xf200
+.equ br_low_b0, 0x01df
+.equ br_low_b1, 0x04d1
+.equ br_low_b2, 0x1
+.equ br_high_b0, 0x1db7
+.equ br_high_b1, 0x04c1
+.equ br_high_b2, 0x1
+
+ .text
+ .section .rodata
+ .align 4
+ .set .lanchor_crc_tab,. + 0
+ .type crc32_table_ieee_norm, %object
+ .size crc32_table_ieee_norm, 1024
+crc32_table_ieee_norm:
+ .word 0x00000000, 0x04c11db7, 0x09823b6e, 0x0d4326d9, 0x130476dc, 0x17c56b6b, 0x1a864db2, 0x1e475005
+ .word 0x2608edb8, 0x22c9f00f, 0x2f8ad6d6, 0x2b4bcb61, 0x350c9b64, 0x31cd86d3, 0x3c8ea00a, 0x384fbdbd
+ .word 0x4c11db70, 0x48d0c6c7, 0x4593e01e, 0x4152fda9, 0x5f15adac, 0x5bd4b01b, 0x569796c2, 0x52568b75
+ .word 0x6a1936c8, 0x6ed82b7f, 0x639b0da6, 0x675a1011, 0x791d4014, 0x7ddc5da3, 0x709f7b7a, 0x745e66cd
+ .word 0x9823b6e0, 0x9ce2ab57, 0x91a18d8e, 0x95609039, 0x8b27c03c, 0x8fe6dd8b, 0x82a5fb52, 0x8664e6e5
+ .word 0xbe2b5b58, 0xbaea46ef, 0xb7a96036, 0xb3687d81, 0xad2f2d84, 0xa9ee3033, 0xa4ad16ea, 0xa06c0b5d
+ .word 0xd4326d90, 0xd0f37027, 0xddb056fe, 0xd9714b49, 0xc7361b4c, 0xc3f706fb, 0xceb42022, 0xca753d95
+ .word 0xf23a8028, 0xf6fb9d9f, 0xfbb8bb46, 0xff79a6f1, 0xe13ef6f4, 0xe5ffeb43, 0xe8bccd9a, 0xec7dd02d
+ .word 0x34867077, 0x30476dc0, 0x3d044b19, 0x39c556ae, 0x278206ab, 0x23431b1c, 0x2e003dc5, 0x2ac12072
+ .word 0x128e9dcf, 0x164f8078, 0x1b0ca6a1, 0x1fcdbb16, 0x018aeb13, 0x054bf6a4, 0x0808d07d, 0x0cc9cdca
+ .word 0x7897ab07, 0x7c56b6b0, 0x71159069, 0x75d48dde, 0x6b93dddb, 0x6f52c06c, 0x6211e6b5, 0x66d0fb02
+ .word 0x5e9f46bf, 0x5a5e5b08, 0x571d7dd1, 0x53dc6066, 0x4d9b3063, 0x495a2dd4, 0x44190b0d, 0x40d816ba
+ .word 0xaca5c697, 0xa864db20, 0xa527fdf9, 0xa1e6e04e, 0xbfa1b04b, 0xbb60adfc, 0xb6238b25, 0xb2e29692
+ .word 0x8aad2b2f, 0x8e6c3698, 0x832f1041, 0x87ee0df6, 0x99a95df3, 0x9d684044, 0x902b669d, 0x94ea7b2a
+ .word 0xe0b41de7, 0xe4750050, 0xe9362689, 0xedf73b3e, 0xf3b06b3b, 0xf771768c, 0xfa325055, 0xfef34de2
+ .word 0xc6bcf05f, 0xc27dede8, 0xcf3ecb31, 0xcbffd686, 0xd5b88683, 0xd1799b34, 0xdc3abded, 0xd8fba05a
+ .word 0x690ce0ee, 0x6dcdfd59, 0x608edb80, 0x644fc637, 0x7a089632, 0x7ec98b85, 0x738aad5c, 0x774bb0eb
+ .word 0x4f040d56, 0x4bc510e1, 0x46863638, 0x42472b8f, 0x5c007b8a, 0x58c1663d, 0x558240e4, 0x51435d53
+ .word 0x251d3b9e, 0x21dc2629, 0x2c9f00f0, 0x285e1d47, 0x36194d42, 0x32d850f5, 0x3f9b762c, 0x3b5a6b9b
+ .word 0x0315d626, 0x07d4cb91, 0x0a97ed48, 0x0e56f0ff, 0x1011a0fa, 0x14d0bd4d, 0x19939b94, 0x1d528623
+ .word 0xf12f560e, 0xf5ee4bb9, 0xf8ad6d60, 0xfc6c70d7, 0xe22b20d2, 0xe6ea3d65, 0xeba91bbc, 0xef68060b
+ .word 0xd727bbb6, 0xd3e6a601, 0xdea580d8, 0xda649d6f, 0xc423cd6a, 0xc0e2d0dd, 0xcda1f604, 0xc960ebb3
+ .word 0xbd3e8d7e, 0xb9ff90c9, 0xb4bcb610, 0xb07daba7, 0xae3afba2, 0xaafbe615, 0xa7b8c0cc, 0xa379dd7b
+ .word 0x9b3660c6, 0x9ff77d71, 0x92b45ba8, 0x9675461f, 0x8832161a, 0x8cf30bad, 0x81b02d74, 0x857130c3
+ .word 0x5d8a9099, 0x594b8d2e, 0x5408abf7, 0x50c9b640, 0x4e8ee645, 0x4a4ffbf2, 0x470cdd2b, 0x43cdc09c
+ .word 0x7b827d21, 0x7f436096, 0x7200464f, 0x76c15bf8, 0x68860bfd, 0x6c47164a, 0x61043093, 0x65c52d24
+ .word 0x119b4be9, 0x155a565e, 0x18197087, 0x1cd86d30, 0x029f3d35, 0x065e2082, 0x0b1d065b, 0x0fdc1bec
+ .word 0x3793a651, 0x3352bbe6, 0x3e119d3f, 0x3ad08088, 0x2497d08d, 0x2056cd3a, 0x2d15ebe3, 0x29d4f654
+ .word 0xc5a92679, 0xc1683bce, 0xcc2b1d17, 0xc8ea00a0, 0xd6ad50a5, 0xd26c4d12, 0xdf2f6bcb, 0xdbee767c
+ .word 0xe3a1cbc1, 0xe760d676, 0xea23f0af, 0xeee2ed18, 0xf0a5bd1d, 0xf464a0aa, 0xf9278673, 0xfde69bc4
+ .word 0x89b8fd09, 0x8d79e0be, 0x803ac667, 0x84fbdbd0, 0x9abc8bd5, 0x9e7d9662, 0x933eb0bb, 0x97ffad0c
+ .word 0xafb010b1, 0xab710d06, 0xa6322bdf, 0xa2f33668, 0xbcb4666d, 0xb8757bda, 0xb5365d03, 0xb1f740b4
diff --git a/src/isa-l/crc/aarch64/crc32_iscsi_3crc_fold.S b/src/isa-l/crc/aarch64/crc32_iscsi_3crc_fold.S
new file mode 100644
index 000000000..2beaa80c7
--- /dev/null
+++ b/src/isa-l/crc/aarch64/crc32_iscsi_3crc_fold.S
@@ -0,0 +1,97 @@
+########################################################################
+# Copyright(c) 2020 Arm Corporation All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Arm Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#########################################################################
+
+
+ .text
+ .align 6
+ .arch armv8-a+crc+crypto
+#include "crc32_aarch64_common.h"
+.macro crc32_u64 dst,src,data
+ crc32cx \dst,\src,\data
+.endm
+.macro crc32_u32 dst,src,data
+ crc32cw \dst,\src,\data
+.endm
+.macro crc32_u16 dst,src,data
+ crc32ch \dst,\src,\data
+.endm
+.macro crc32_u8 dst,src,data
+ crc32cb \dst,\src,\data
+.endm
+.macro declare_var_vector_reg name:req,reg:req
+ q\name .req q\reg
+ v\name .req v\reg
+ s\name .req s\reg
+ d\name .req d\reg
+.endm
+
+ BUF .req x0
+ LEN .req x1
+ wCRC .req w2
+ crc0 .req w2
+ crc1 .req w3
+ crc2 .req w4
+ xcrc0 .req x2
+ xcrc1 .req x3
+ const_adr .req x3
+ ptr_crc0 .req x0
+ ptr_crc1 .req x6
+ ptr_crc2 .req x7
+ crc0_data0 .req x9
+ crc0_data1 .req x10
+ crc1_data0 .req x11
+ crc1_data1 .req x12
+ crc2_data0 .req x13
+ crc2_data1 .req x14
+
+ wdata .req w3
+ data0 .req x3
+ data1 .req x4
+ data2 .req x5
+ data3 .req x6
+
+ declare_var_vector_reg tmp0,0
+ declare_var_vector_reg tmp1,1
+ declare_var_vector_reg const0,2
+ declare_var_vector_reg const1,3
+
+/**
+ unsigned int crc32_iscsi(
+ unsigned char *BUF,
+ int LEN,
+ unsigned int wCRC
+ );
+
+*/
+
+ .global crc32_iscsi_3crc_fold
+ .type crc32_iscsi_3crc_fold, %function
+crc32_iscsi_3crc_fold:
+ crc32_3crc_fold crc32c
+ .size crc32_iscsi_3crc_fold, .-crc32_iscsi_3crc_fold
diff --git a/src/isa-l/crc/aarch64/crc32_iscsi_crc_ext.S b/src/isa-l/crc/aarch64/crc32_iscsi_crc_ext.S
new file mode 100644
index 000000000..359401a52
--- /dev/null
+++ b/src/isa-l/crc/aarch64/crc32_iscsi_crc_ext.S
@@ -0,0 +1,65 @@
+/**********************************************************************
+ Copyright(c) 2020 Arm Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Arm Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+ .text
+ .align 6
+ .arch armv8-a+crc
+
+
+#include "crc32_aarch64_common.h"
+ BUF .req x0
+ LEN .req x1
+ wCRC .req w2
+ data0 .req x4
+ data1 .req x5
+ data2 .req x6
+ data3 .req x7
+ wdata .req w3
+.macro crc32_u64 dst,src,data
+ crc32cx \dst,\src,\data
+.endm
+.macro crc32_u32 dst,src,data
+ crc32cw \dst,\src,\data
+.endm
+.macro crc32_u16 dst,src,data
+ crc32ch \dst,\src,\data
+.endm
+.macro crc32_u8 dst,src,data
+ crc32cb \dst,\src,\data
+.endm
+
+ /**
+ * uint32_t crc32_iscsi_crc_ext(const unsigned char *BUF,
+ * uint64_t LEN,uint32_t wCRC);
+ */
+ .global crc32_iscsi_crc_ext
+ .type crc32_iscsi_crc_ext, %function
+crc32_iscsi_crc_ext:
+ crc32_hw_common crc32c
+ .size crc32_iscsi_crc_ext, .-crc32_iscsi_crc_ext
diff --git a/src/isa-l/crc/aarch64/crc32_iscsi_refl_pmull.S b/src/isa-l/crc/aarch64/crc32_iscsi_refl_pmull.S
new file mode 100644
index 000000000..09a88e2e1
--- /dev/null
+++ b/src/isa-l/crc/aarch64/crc32_iscsi_refl_pmull.S
@@ -0,0 +1,53 @@
+########################################################################
+# Copyright(c) 2019 Arm Corporation All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Arm Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#########################################################################
+
+#include "crc32_iscsi_refl_pmull.h"
+#include "crc32_refl_common_pmull.h"
+
+crc32_refl_func crc32_iscsi_refl_pmull_internal
+
+ .arch armv8-a+crc+crypto
+ .text
+ .align 3
+ .global crc32_iscsi_refl_pmull
+ .type crc32_iscsi_refl_pmull, %function
+crc32_iscsi_refl_pmull:
+ stp x29, x30, [sp, -32]!
+ mov x29, sp
+
+ mov w7, w2
+ sxtw x2, w1
+ mov x1, x0
+ mov w0, w7
+ mvn w0, w0
+
+ bl crc32_iscsi_refl_pmull_internal
+ mvn w0, w0
+ ldp x29, x30, [sp], 32
+ ret
diff --git a/src/isa-l/crc/aarch64/crc32_iscsi_refl_pmull.h b/src/isa-l/crc/aarch64/crc32_iscsi_refl_pmull.h
new file mode 100644
index 000000000..c17b91be3
--- /dev/null
+++ b/src/isa-l/crc/aarch64/crc32_iscsi_refl_pmull.h
@@ -0,0 +1,87 @@
+########################################################################
+# Copyright(c) 2019 Arm Corporation All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Arm Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#########################################################################
+
+.equ p4_low_b0, 0xef02
+.equ p4_low_b1, 0x740e
+.equ p4_high_b0, 0xddf8
+.equ p4_high_b1, 0x9e4a
+.equ p1_low_b0, 0x0dfe
+.equ p1_low_b1, 0xf20c
+.equ p1_high_b0, 0x7d27
+.equ p1_high_b1, 0x493c
+.equ p0_low_b0, 0xaab8
+.equ p0_low_b1, 0xdd45
+.equ p0_high_b0, p1_high_b0
+.equ p0_high_b1, p1_high_b1
+.equ br_low_b0, 0x76f1
+.equ br_low_b1, 0x05ec
+.equ br_low_b2, 0x1
+.equ br_high_b0, 0x13f1
+.equ br_high_b1, 0xdea7
+.equ br_high_b2, 0x0
+
+ .text
+ .section .rodata
+ .align 4
+ .set .lanchor_crc_tab,. + 0
+ .type crc32_table_iscsi_refl, %object
+ .size crc32_table_iscsi_refl, 1024
+crc32_table_iscsi_refl:
+ .word 0x00000000, 0xF26B8303, 0xE13B70F7, 0x1350F3F4, 0xC79A971F, 0x35F1141C, 0x26A1E7E8, 0xD4CA64EB
+ .word 0x8AD958CF, 0x78B2DBCC, 0x6BE22838, 0x9989AB3B, 0x4D43CFD0, 0xBF284CD3, 0xAC78BF27, 0x5E133C24
+ .word 0x105EC76F, 0xE235446C, 0xF165B798, 0x030E349B, 0xD7C45070, 0x25AFD373, 0x36FF2087, 0xC494A384
+ .word 0x9A879FA0, 0x68EC1CA3, 0x7BBCEF57, 0x89D76C54, 0x5D1D08BF, 0xAF768BBC, 0xBC267848, 0x4E4DFB4B
+ .word 0x20BD8EDE, 0xD2D60DDD, 0xC186FE29, 0x33ED7D2A, 0xE72719C1, 0x154C9AC2, 0x061C6936, 0xF477EA35
+ .word 0xAA64D611, 0x580F5512, 0x4B5FA6E6, 0xB93425E5, 0x6DFE410E, 0x9F95C20D, 0x8CC531F9, 0x7EAEB2FA
+ .word 0x30E349B1, 0xC288CAB2, 0xD1D83946, 0x23B3BA45, 0xF779DEAE, 0x05125DAD, 0x1642AE59, 0xE4292D5A
+ .word 0xBA3A117E, 0x4851927D, 0x5B016189, 0xA96AE28A, 0x7DA08661, 0x8FCB0562, 0x9C9BF696, 0x6EF07595
+ .word 0x417B1DBC, 0xB3109EBF, 0xA0406D4B, 0x522BEE48, 0x86E18AA3, 0x748A09A0, 0x67DAFA54, 0x95B17957
+ .word 0xCBA24573, 0x39C9C670, 0x2A993584, 0xD8F2B687, 0x0C38D26C, 0xFE53516F, 0xED03A29B, 0x1F682198
+ .word 0x5125DAD3, 0xA34E59D0, 0xB01EAA24, 0x42752927, 0x96BF4DCC, 0x64D4CECF, 0x77843D3B, 0x85EFBE38
+ .word 0xDBFC821C, 0x2997011F, 0x3AC7F2EB, 0xC8AC71E8, 0x1C661503, 0xEE0D9600, 0xFD5D65F4, 0x0F36E6F7
+ .word 0x61C69362, 0x93AD1061, 0x80FDE395, 0x72966096, 0xA65C047D, 0x5437877E, 0x4767748A, 0xB50CF789
+ .word 0xEB1FCBAD, 0x197448AE, 0x0A24BB5A, 0xF84F3859, 0x2C855CB2, 0xDEEEDFB1, 0xCDBE2C45, 0x3FD5AF46
+ .word 0x7198540D, 0x83F3D70E, 0x90A324FA, 0x62C8A7F9, 0xB602C312, 0x44694011, 0x5739B3E5, 0xA55230E6
+ .word 0xFB410CC2, 0x092A8FC1, 0x1A7A7C35, 0xE811FF36, 0x3CDB9BDD, 0xCEB018DE, 0xDDE0EB2A, 0x2F8B6829
+ .word 0x82F63B78, 0x709DB87B, 0x63CD4B8F, 0x91A6C88C, 0x456CAC67, 0xB7072F64, 0xA457DC90, 0x563C5F93
+ .word 0x082F63B7, 0xFA44E0B4, 0xE9141340, 0x1B7F9043, 0xCFB5F4A8, 0x3DDE77AB, 0x2E8E845F, 0xDCE5075C
+ .word 0x92A8FC17, 0x60C37F14, 0x73938CE0, 0x81F80FE3, 0x55326B08, 0xA759E80B, 0xB4091BFF, 0x466298FC
+ .word 0x1871A4D8, 0xEA1A27DB, 0xF94AD42F, 0x0B21572C, 0xDFEB33C7, 0x2D80B0C4, 0x3ED04330, 0xCCBBC033
+ .word 0xA24BB5A6, 0x502036A5, 0x4370C551, 0xB11B4652, 0x65D122B9, 0x97BAA1BA, 0x84EA524E, 0x7681D14D
+ .word 0x2892ED69, 0xDAF96E6A, 0xC9A99D9E, 0x3BC21E9D, 0xEF087A76, 0x1D63F975, 0x0E330A81, 0xFC588982
+ .word 0xB21572C9, 0x407EF1CA, 0x532E023E, 0xA145813D, 0x758FE5D6, 0x87E466D5, 0x94B49521, 0x66DF1622
+ .word 0x38CC2A06, 0xCAA7A905, 0xD9F75AF1, 0x2B9CD9F2, 0xFF56BD19, 0x0D3D3E1A, 0x1E6DCDEE, 0xEC064EED
+ .word 0xC38D26C4, 0x31E6A5C7, 0x22B65633, 0xD0DDD530, 0x0417B1DB, 0xF67C32D8, 0xE52CC12C, 0x1747422F
+ .word 0x49547E0B, 0xBB3FFD08, 0xA86F0EFC, 0x5A048DFF, 0x8ECEE914, 0x7CA56A17, 0x6FF599E3, 0x9D9E1AE0
+ .word 0xD3D3E1AB, 0x21B862A8, 0x32E8915C, 0xC083125F, 0x144976B4, 0xE622F5B7, 0xF5720643, 0x07198540
+ .word 0x590AB964, 0xAB613A67, 0xB831C993, 0x4A5A4A90, 0x9E902E7B, 0x6CFBAD78, 0x7FAB5E8C, 0x8DC0DD8F
+ .word 0xE330A81A, 0x115B2B19, 0x020BD8ED, 0xF0605BEE, 0x24AA3F05, 0xD6C1BC06, 0xC5914FF2, 0x37FACCF1
+ .word 0x69E9F0D5, 0x9B8273D6, 0x88D28022, 0x7AB90321, 0xAE7367CA, 0x5C18E4C9, 0x4F48173D, 0xBD23943E
+ .word 0xF36E6F75, 0x0105EC76, 0x12551F82, 0xE03E9C81, 0x34F4F86A, 0xC69F7B69, 0xD5CF889D, 0x27A40B9E
+ .word 0x79B737BA, 0x8BDCB4B9, 0x988C474D, 0x6AE7C44E, 0xBE2DA0A5, 0x4C4623A6, 0x5F16D052, 0xAD7D5351
diff --git a/src/isa-l/crc/aarch64/crc32_mix_default.S b/src/isa-l/crc/aarch64/crc32_mix_default.S
new file mode 100644
index 000000000..05c34074d
--- /dev/null
+++ b/src/isa-l/crc/aarch64/crc32_mix_default.S
@@ -0,0 +1,107 @@
+/**********************************************************************
+ Copyright(c) 2020 Arm Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Arm Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+ .arch armv8-a+crypto+crc
+ .text
+ .align 6
+
+#define CRC32
+
+.macro crc32_u64 dst,src,data
+ crc32x \dst,\src,\data
+.endm
+
+.macro crc32_u32 dst,src,data
+ crc32w \dst,\src,\data
+.endm
+
+.macro crc32_u16 dst,src,data
+ crc32h \dst,\src,\data
+.endm
+
+.macro crc32_u8 dst,src,data
+ crc32b \dst,\src,\data
+.endm
+
+#include "crc32_mix_default_common.S"
+
+ .global crc32_mix_default
+ .type crc32_mix_default, %function
+crc32_mix_default:
+ crc32_mix_main_default
+ .size crc32_mix_default, .-crc32_mix_default
+
+ .section .rodata
+ .align 4
+ .set lanchor_crc32,. + 0
+
+ .type k1k2, %object
+ .size k1k2, 16
+k1k2:
+ .xword 0x0154442bd4
+ .xword 0x01c6e41596
+
+ .type k3k4, %object
+ .size k3k4, 16
+k3k4:
+ .xword 0x01751997d0
+ .xword 0x00ccaa009e
+
+ .type k5k0, %object
+ .size k5k0, 16
+k5k0:
+ .xword 0x0163cd6124
+ .xword 0
+
+ .type poly, %object
+ .size poly, 16
+poly:
+ .xword 0x01db710641
+ .xword 0x01f7011641
+
+ .type crc32_const, %object
+ .size crc32_const, 48
+crc32_const:
+ .xword 0x1753ab84
+ .xword 0
+ .xword 0xbbf2f6d6
+ .xword 0
+ .xword 0x0c30f51d
+ .xword 0
+
+ .align 4
+ .set .lanchor_mask,. + 0
+
+ .type mask, %object
+ .size mask, 16
+mask:
+ .word -1
+ .word 0
+ .word -1
+ .word 0
diff --git a/src/isa-l/crc/aarch64/crc32_mix_default_common.S b/src/isa-l/crc/aarch64/crc32_mix_default_common.S
new file mode 100644
index 000000000..106da209a
--- /dev/null
+++ b/src/isa-l/crc/aarch64/crc32_mix_default_common.S
@@ -0,0 +1,563 @@
+/**********************************************************************
+ Copyright(c) 2020 Arm Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Arm Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+.macro declare_generic_reg name:req, reg:req, default:req
+ \name .req \default\reg
+ w_\name .req w\reg
+ x_\name .req x\reg
+.endm
+
+.macro declare_neon_reg name:req, reg:req, default:req
+ \name .req \default\reg
+ v_\name .req v\reg
+ q_\name .req q\reg
+ d_\name .req d\reg
+ s_\name .req s\reg
+.endm
+
+/**********************************************************************
+ variables
+**********************************************************************/
+ declare_generic_reg crc, 0,w
+ declare_generic_reg buf, 1,x
+ declare_generic_reg len, 2,x
+ declare_generic_reg buf_saved, 3,x
+ declare_generic_reg buf_iter, 4,x
+ declare_generic_reg len_saved, 5,x
+ declare_generic_reg buf_tmp, 6,x
+
+ declare_generic_reg crc0, 7,x
+ declare_generic_reg crc1, 8,x
+ declare_generic_reg crc2, 9,x
+ declare_generic_reg pconst, 10,x
+ declare_generic_reg data_crc0, 11,x
+ declare_generic_reg data_crc1, 12,x
+ declare_generic_reg data_crc2, 13,x
+
+ declare_generic_reg size, 9,x
+ declare_generic_reg crc_tmp, 10,w
+ declare_generic_reg size_tmp, 11,x
+ declare_generic_reg data_tmp1, 11,x
+ declare_generic_reg data_tmp2, 12,x
+ declare_generic_reg data_tmp3, 13,x
+
+ declare_generic_reg tmp, 14,x
+ declare_generic_reg tmp1, 15,x
+
+// return
+ declare_generic_reg ret_crc, 0,w
+
+/**********************************************************************
+ simd variables
+**********************************************************************/
+ declare_neon_reg a0, 0,v
+ declare_neon_reg a1, 1,v
+ declare_neon_reg a2, 2,v
+ declare_neon_reg a3, 3,v
+ declare_neon_reg a4, 4,v
+
+ declare_neon_reg a5, 16,v
+ declare_neon_reg a6, 17,v
+ declare_neon_reg a7, 18,v
+ declare_neon_reg a8, 19,v
+
+ declare_neon_reg y5, 20,v
+ declare_neon_reg y6, 21,v
+ declare_neon_reg y7, 22,v
+ declare_neon_reg y8, 23,v
+
+ declare_neon_reg neon_zero, 24,v
+ declare_neon_reg neon_tmp, 24,v
+
+ declare_neon_reg k5k0, 25,v
+ declare_neon_reg neon_tmp1, 26,v
+ declare_neon_reg neon_tmp2, 27,v
+ declare_neon_reg neon_tmp3, 28,v
+
+ declare_neon_reg crc_pmull, 29,v
+ declare_neon_reg neon_crc0, 30,v
+ declare_neon_reg neon_crc1, 31,v
+
+ declare_neon_reg neon_const0, 5,v
+ declare_neon_reg neon_const1, 6,v
+ declare_neon_reg neon_const2, 7,v
+
+// constants
+ .equ offset_k3k4, 16
+ .equ offset_k5k0, 32
+ .equ offset_poly, 48
+ .equ offset_crc32_const, 64
+
+// pmull fold
+.macro pmull_fold
+ ldr x_data_crc0, [x_buf_tmp, 464]
+ ldr x_data_crc1, [x_buf_tmp, 976]
+ ldr x_data_crc2, [x_buf_tmp, 1488]
+
+ pmull v_a5.1q, v_a1.1d, v_a0.1d
+ crc32_u64 w_crc0, w_crc0, x_data_crc0
+ crc32_u64 w_crc1, w_crc1, x_data_crc1
+ crc32_u64 w_crc2, w_crc2, x_data_crc2
+
+ ldr x_data_crc0, [x_buf_tmp, 472]
+ ldr x_data_crc1, [x_buf_tmp, 984]
+ ldr x_data_crc2, [x_buf_tmp, 1496]
+
+ pmull v_a6.1q, v_a2.1d, v_a0.1d
+ crc32_u64 w_crc0, w_crc0, x_data_crc0
+ crc32_u64 w_crc1, w_crc1, x_data_crc1
+ crc32_u64 w_crc2, w_crc2, x_data_crc2
+
+ ldr x_data_crc0, [x_buf_tmp, 480]
+ ldr x_data_crc1, [x_buf_tmp, 992]
+ ldr x_data_crc2, [x_buf_tmp, 1504]
+
+ pmull v_a7.1q, v_a3.1d, v_a0.1d
+ crc32_u64 w_crc0, w_crc0, x_data_crc0
+ crc32_u64 w_crc1, w_crc1, x_data_crc1
+ crc32_u64 w_crc2, w_crc2, x_data_crc2
+
+ ldr x_data_crc0, [x_buf_tmp, 488]
+ ldr x_data_crc1, [x_buf_tmp, 1000]
+ ldr x_data_crc2, [x_buf_tmp, 1512]
+
+ pmull v_a8.1q, v_a4.1d, v_a0.1d
+ crc32_u64 w_crc0, w_crc0, x_data_crc0
+ crc32_u64 w_crc1, w_crc1, x_data_crc1
+ crc32_u64 w_crc2, w_crc2, x_data_crc2
+
+ ldr x_data_crc0, [x_buf_tmp, 496]
+ ldr x_data_crc1, [x_buf_tmp, 1008]
+ ldr x_data_crc2, [x_buf_tmp, 1520]
+
+ pmull2 v_a1.1q, v_a1.2d, v_a0.2d
+ crc32_u64 w_crc0, w_crc0, x_data_crc0
+ crc32_u64 w_crc1, w_crc1, x_data_crc1
+ crc32_u64 w_crc2, w_crc2, x_data_crc2
+
+ ld1 {v_y5.4s, v_y6.4s, v_y7.4s, v_y8.4s}, [x_buf_tmp]
+
+ ldr x_data_crc0, [x_buf_tmp, 504]
+ ldr x_data_crc1, [x_buf_tmp, 1016]
+ ldr x_data_crc2, [x_buf_tmp, 1528]
+
+ pmull2 v_a2.1q, v_a2.2d, v_a0.2d
+ crc32_u64 w_crc0, w_crc0, x_data_crc0
+ crc32_u64 w_crc1, w_crc1, x_data_crc1
+ crc32_u64 w_crc2, w_crc2, x_data_crc2
+
+ pmull2 v_a3.1q, v_a3.2d, v_a0.2d
+ pmull2 v_a4.1q, v_a4.2d, v_a0.2d
+
+ eor v_y5.16b, v_y5.16b, v_a5.16b
+ eor v_y6.16b, v_y6.16b, v_a6.16b
+ eor v_y7.16b, v_y7.16b, v_a7.16b
+ eor v_y8.16b, v_y8.16b, v_a8.16b
+
+ ldr x_data_crc0, [x_buf_tmp, 512]
+ ldr x_data_crc1, [x_buf_tmp, 1024]
+ ldr x_data_crc2, [x_buf_tmp, 1536]
+
+ eor v_a1.16b, v_y5.16b, v_a1.16b
+ eor v_a2.16b, v_y6.16b, v_a2.16b
+ eor v_a3.16b, v_y7.16b, v_a3.16b
+ eor v_a4.16b, v_y8.16b, v_a4.16b
+
+ crc32_u64 w_crc0, w_crc0, x_data_crc0
+ crc32_u64 w_crc1, w_crc1, x_data_crc1
+ crc32_u64 w_crc2, w_crc2, x_data_crc2
+
+ ldr x_data_crc0, [x_buf_tmp, 520]
+ ldr x_data_crc1, [x_buf_tmp, 1032]
+ ldr x_data_crc2, [x_buf_tmp, 1544]
+
+ crc32_u64 w_crc0, w_crc0, x_data_crc0
+ crc32_u64 w_crc1, w_crc1, x_data_crc1
+ crc32_u64 w_crc2, w_crc2, x_data_crc2
+.endm
+
+// crc32 mix for 2048 byte input data
+.macro crc32_mix2048
+ fmov s_a1, w_crc
+ movi v_neon_tmp.4s, 0
+
+ adrp x_pconst, lanchor_crc32
+ add x_buf_tmp, x_buf, 64
+
+ ldr x_data_crc0, [x_buf, 512]
+ ldr x_data_crc1, [x_buf, 1024]
+ ldr x_data_crc2, [x_buf, 1536]
+
+ crc32_u64 w_crc0, wzr, x_data_crc0
+ crc32_u64 w_crc1, wzr, x_data_crc1
+ crc32_u64 w_crc2, wzr, x_data_crc2
+
+#ifdef CRC32
+ mvn v_a1.8b, v_a1.8b
+#endif
+
+ ins v_neon_tmp.s[0], v_a1.s[0]
+
+ ld1 {v_a1.4s, v_a2.4s, v_a3.4s, v_a4.4s}, [x_buf]
+
+ ldr x_data_crc0, [x_buf, 520]
+ ldr x_data_crc1, [x_buf, 1032]
+ ldr x_data_crc2, [x_buf, 1544]
+
+ eor v_a1.16b, v_a1.16b, v_neon_tmp.16b
+ ldr q_a0, [x_pconst, #:lo12:lanchor_crc32] // k1k2
+
+ crc32_u64 w_crc0, w_crc0, x_data_crc0
+ crc32_u64 w_crc1, w_crc1, x_data_crc1
+ crc32_u64 w_crc2, w_crc2, x_data_crc2
+
+// loop start, unroll the loop
+ .align 4
+ pmull_fold
+
+ add x_buf_tmp, x_buf_tmp, 64
+ pmull_fold
+
+ add x_buf_tmp, x_buf_tmp, 64
+ pmull_fold
+
+ add x_buf_tmp, x_buf_tmp, 64
+ pmull_fold
+
+ add x_buf_tmp, x_buf_tmp, 64
+ pmull_fold
+
+ add x_buf_tmp, x_buf_tmp, 64
+ pmull_fold
+
+ add x_buf_tmp, x_buf_tmp, 64
+ pmull_fold
+// loop end
+
+// PMULL: fold into 128-bits
+ add x_pconst, x_pconst, :lo12:lanchor_crc32
+
+ ldr x_data_crc0, [x_buf, 976]
+ ldr x_data_crc1, [x_buf, 1488]
+ ldr x_data_crc2, [x_buf, 2000]
+
+ ldr q_a0, [x_pconst, offset_k3k4] // k3k4
+
+ crc32_u64 w_crc0, w_crc0, x_data_crc0
+ crc32_u64 w_crc1, w_crc1, x_data_crc1
+ crc32_u64 w_crc2, w_crc2, x_data_crc2
+
+ pmull v_a5.1q, v_a1.1d, v_a0.1d
+ pmull2 v_a1.1q, v_a1.2d, v_a0.2d
+
+ eor v_a1.16b, v_a5.16b, v_a1.16b
+ eor v_a1.16b, v_a1.16b, v_a2.16b
+
+ ldr x_data_crc0, [x_buf, 984]
+ ldr x_data_crc1, [x_buf, 1496]
+ ldr x_data_crc2, [x_buf, 2008]
+
+ crc32_u64 w_crc0, w_crc0, x_data_crc0
+ crc32_u64 w_crc1, w_crc1, x_data_crc1
+ crc32_u64 w_crc2, w_crc2, x_data_crc2
+
+ pmull v_a5.1q, v_a1.1d, v_a0.1d
+ pmull2 v_a1.1q, v_a1.2d, v_a0.2d
+
+ ldr x_data_crc0, [x_buf, 992]
+ ldr x_data_crc1, [x_buf, 1504]
+ ldr x_data_crc2, [x_buf, 2016]
+
+ eor v_a1.16b, v_a5.16b, v_a1.16b
+ eor v_a1.16b, v_a1.16b, v_a3.16b
+
+ crc32_u64 w_crc0, w_crc0, x_data_crc0
+ crc32_u64 w_crc1, w_crc1, x_data_crc1
+ crc32_u64 w_crc2, w_crc2, x_data_crc2
+
+ pmull v_a5.1q, v_a1.1d, v_a0.1d
+ pmull2 v_a1.1q, v_a1.2d, v_a0.2d
+
+ ldr x_data_crc0, [x_buf, 1000]
+ ldr x_data_crc1, [x_buf, 1512]
+ ldr x_data_crc2, [x_buf, 2024]
+
+ eor v_a1.16b, v_a5.16b, v_a1.16b
+ eor v_a1.16b, v_a1.16b, v_a4.16b
+
+// PMULL: fold 128-bits to 64-bits
+ crc32_u64 w_crc0, w_crc0, x_data_crc0
+ crc32_u64 w_crc1, w_crc1, x_data_crc1
+ crc32_u64 w_crc2, w_crc2, x_data_crc2
+
+ dup d_a0, v_a0.d[1]
+ pmull v_a2.1q, v_a1.1d, v_a0.1d
+
+ movi v_neon_zero.4s, 0
+ ldr q_k5k0, [x_pconst, offset_k5k0] // k5k0
+ adrp x_tmp, .lanchor_mask
+
+ ldr x_data_crc0, [x_buf, 1008]
+ ldr x_data_crc1, [x_buf, 1520]
+ ldr x_data_crc2, [x_buf, 2032]
+
+ ext v_a1.16b, v_a1.16b, v_neon_zero.16b, #8
+ eor v_a1.16b, v_a2.16b, v_a1.16b
+ ldr q_neon_tmp3, [x_tmp, #:lo12:.lanchor_mask]
+
+ crc32_u64 w_crc0, w_crc0, x_data_crc0
+ crc32_u64 w_crc1, w_crc1, x_data_crc1
+ crc32_u64 w_crc2, w_crc2, x_data_crc2
+
+ dup d_a0, v_k5k0.d[1]
+ pmull v_a3.1q, v_a2.1d, v_a0.1d
+
+ ext v_a2.16b, v_a1.16b, v_neon_zero.16b, #4
+ and v_a1.16b, v_a1.16b, v_neon_tmp3.16b
+ pmull v_a1.1q, v_a1.1d, v_k5k0.1d
+ eor v_a1.16b, v_a2.16b, v_a1.16b
+
+// PMULL: barret reduce to 32-bits
+ ldr q_neon_tmp1, [x_pconst, offset_poly] // poly
+
+ ldr x_data_crc0, [x_buf, 1016]
+ ldr x_data_crc1, [x_buf, 1528]
+ ldr x_data_crc2, [x_buf, 2040]
+
+ dup d_neon_tmp2, v_neon_tmp1.d[1]
+
+ crc32_u64 w_crc0, w_crc0, x_data_crc0
+ crc32_u64 w_crc1, w_crc1, x_data_crc1
+ crc32_u64 w_crc2, w_crc2, x_data_crc2
+
+ and v_a2.16b, v_a1.16b, v_neon_tmp3.16b
+ pmull v_a2.1q, v_a2.1d, v_neon_tmp2.1d
+ and v_a2.16b, v_neon_tmp3.16b, v_a2.16b
+ pmull v_a2.1q, v_a2.1d, v_neon_tmp1.1d
+
+// crc_pmull result
+ eor v_a1.16b, v_a1.16b, v_a2.16b
+ dup s_crc_pmull, v_a1.s[1]
+
+// merge crc_pmull, crc0, crc1, crc2 using pmull instruction
+ fmov s_neon_crc0, w_crc0
+ fmov s_neon_crc1, w_crc1
+
+ ldr q_neon_const0, [x_pconst, offset_crc32_const]
+ ldr q_neon_const1, [x_pconst, offset_crc32_const+16]
+ ldr q_neon_const2, [x_pconst, offset_crc32_const+32]
+
+ pmull v_crc_pmull.1q, v_crc_pmull.1d, v_neon_const0.1d
+ pmull v_neon_crc0.1q, v_neon_crc0.1d, v_neon_const1.1d
+ pmull v_neon_crc1.1q, v_neon_crc1.1d, v_neon_const2.1d
+
+ fmov x_tmp1, d_neon_crc0
+ crc32_u64 w_crc0, wzr, x_tmp1
+
+ fmov x_tmp1, d_neon_crc1
+ crc32_u64 w_crc1, wzr, x_tmp1
+
+ eor w_ret_crc, w_crc1, w_crc0
+
+ fmov x_tmp1, d_crc_pmull
+ crc32_u64 w_tmp, wzr, x_tmp1
+
+ eor w_crc2, w_tmp, w_crc2
+
+// handle crc32/crc32c
+#ifdef CRC32
+ eon w_ret_crc, w_crc2, w_ret_crc
+#else
+ eor w_ret_crc, w_crc2, w_ret_crc
+#endif
+.endm
+
+// crc32 mix main default
+.macro crc32_mix_main_default
+ cmp x_len, 2047
+ mov x_len_saved, x_len
+ mov x_buf_saved, x_buf
+ bls .less_than_2048
+
+ sub x_buf_iter, x_len, #2048
+ stp x29, x30, [sp, -16]!
+
+ mov x29, sp
+ and x_buf_iter, x_buf_iter, -2048
+ add x_buf_iter, x_buf_iter, 2048
+ add x_buf_iter, x_buf, x_buf_iter
+
+ .align 4
+.loop_mix:
+ mov x_buf, x_buf_saved
+ crc32_mix2048
+
+ add x_buf_saved, x_buf_saved, 2048
+ cmp x_buf_saved, x_buf_iter
+ bne .loop_mix
+
+ and x_len_saved, x_len_saved, 2047
+ cbnz x_len_saved, .remain_ldp
+
+ ldp x29, x30, [sp], 16
+ ret
+
+ .align 4
+.remain_ldp:
+ mov w_crc_tmp, crc
+ ldp x29, x30, [sp], 16
+ mov size, x_len_saved
+ mov buf, x_buf_iter
+ b .crc32_hw_handle
+
+.remain:
+ mov w_crc_tmp, crc
+ mov size, x_len_saved
+ mov buf, x_buf_saved
+ b .crc32_hw_handle
+
+ .align 4
+.less_than_2048:
+ cbnz x_len, .remain
+ ret
+
+.crc32_hw_handle:
+ cmp size, 63
+
+#ifdef CRC32
+ mvn crc_tmp, crc_tmp
+#endif
+
+ bls .less_than_64
+ sub buf_saved, size, #64
+ and buf_saved, buf_saved, -64
+ add buf_saved, buf_saved, 64
+ add buf_saved, buf, buf_saved
+
+ .align 4
+.loop_64:
+ ldp data_tmp1, data_tmp2, [buf]
+ ldr data_tmp3, [buf, 16]
+ crc32_u64 crc_tmp, crc_tmp, data_tmp1
+ crc32_u64 crc_tmp, crc_tmp, data_tmp2
+
+ ldp data_tmp1, data_tmp2, [buf, 24]
+ add buf, buf, 64
+
+ crc32_u64 crc_tmp, crc_tmp, data_tmp3
+ ldr data_tmp3, [buf, -24]
+
+ crc32_u64 crc_tmp, crc_tmp, data_tmp1
+ crc32_u64 crc_tmp, crc_tmp, data_tmp2
+
+ ldp data_tmp1, data_tmp2, [buf, -16]
+ cmp buf_saved, buf
+ crc32_u64 crc_tmp, crc_tmp, data_tmp3
+
+ crc32_u64 crc_tmp, crc_tmp, data_tmp1
+ crc32_u64 crc_tmp, crc_tmp, data_tmp2
+ bne .loop_64
+
+ and size, size, 63
+.less_than_64:
+ cmp size, 7
+ bls .crc32_hw_w
+
+ ldr data_tmp2, [buf]
+ sub size_tmp, size, #8
+ cmp size_tmp, 7
+ crc32_u64 crc_tmp, crc_tmp, data_tmp2
+ bls .crc32_hw_w_pre
+
+ ldr data_tmp2, [buf, 8]
+ sub data_tmp3, size, #16
+ cmp data_tmp3, 7
+ crc32_u64 crc_tmp, crc_tmp, data_tmp2
+ bls .crc32_hw_w_pre
+
+ ldr data_tmp2, [buf, 16]
+ sub data_tmp3, size, #24
+ cmp data_tmp3, 7
+ crc32_u64 crc_tmp, crc_tmp, data_tmp2
+ bls .crc32_hw_w_pre
+
+ ldr data_tmp2, [buf, 24]
+ sub data_tmp3, size, #32
+ cmp data_tmp3, 7
+ crc32_u64 crc_tmp, crc_tmp, data_tmp2
+ bls .crc32_hw_w_pre
+
+ ldr data_tmp2, [buf, 32]
+ sub data_tmp3, size, #40
+ cmp data_tmp3, 7
+ crc32_u64 crc_tmp, crc_tmp, data_tmp2
+ bls .crc32_hw_w_pre
+
+ ldr data_tmp2, [buf, 40]
+ sub data_tmp3, size, #48
+ cmp data_tmp3, 7
+ crc32_u64 crc_tmp, crc_tmp, data_tmp2
+ bls .crc32_hw_w_pre
+
+ ldr data_tmp2, [buf, 48]
+ crc32_u64 crc_tmp, crc_tmp, data_tmp2
+
+.crc32_hw_w_pre:
+ and size_tmp, size_tmp, -8
+ and size, size, 7
+ add size_tmp, size_tmp, 8
+ add buf, buf, size_tmp
+
+.crc32_hw_w:
+ cmp size, 3
+ bls .crc32_hw_h
+ ldr w_data_tmp2, [buf], 4
+ sub size, size, #4
+ crc32_u32 crc_tmp, crc_tmp, w_data_tmp2
+
+.crc32_hw_h:
+ cmp size, 1
+ bls .crc32_hw_b
+ ldrh w_data_tmp2, [buf], 2
+ sub size, size, #2
+ crc32_u16 crc_tmp, crc_tmp, w_data_tmp2
+
+.crc32_hw_b:
+ cbz size, .crc32_hw_done
+ ldrb w_data_tmp2, [buf]
+ crc32_u8 crc_tmp, crc_tmp, w_data_tmp2
+
+.crc32_hw_done:
+#ifdef CRC32
+ mvn ret_crc, crc_tmp
+#else
+ mov ret_crc, crc_tmp
+#endif
+ ret
+.endm
diff --git a/src/isa-l/crc/aarch64/crc32_mix_neoverse_n1.S b/src/isa-l/crc/aarch64/crc32_mix_neoverse_n1.S
new file mode 100644
index 000000000..62b40e1f2
--- /dev/null
+++ b/src/isa-l/crc/aarch64/crc32_mix_neoverse_n1.S
@@ -0,0 +1,70 @@
+/**********************************************************************
+ Copyright(c) 2020 Arm Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Arm Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+ .text
+ .align 6
+ .arch armv8-a+crypto+crc
+
+#include "crc32_common_mix_neoverse_n1.S"
+.Lconstants:
+ .octa 0x00000001c6e415960000000154442bd4
+ .octa 0x00000000ccaa009e00000001751997d0
+ .octa 0x00000001F701164100000001DB710641
+ .quad 0x0000000163cd6124
+ .quad 0x00000000FFFFFFFF
+ .quad 0x000000001753ab84
+.macro crc32_u64 dst,src,data
+ crc32x \dst,\src,\data
+.endm
+.macro crc32_u32 dst,src,data
+ crc32w \dst,\src,\data
+.endm
+.macro crc32_u16 dst,src,data
+ crc32h \dst,\src,\data
+.endm
+.macro crc32_u8 dst,src,data
+ crc32b \dst,\src,\data
+.endm
+
+
+/**
+ * uint32_t crc32_mix_neoverse_n1(uint CRC ,uint8_t * BUF,
+ * size_t LEN)
+ */
+ BUF .req x1
+ LEN .req x2
+ CRC .req x0
+ wCRC .req w0
+ .align 6
+ .global crc32_mix_neoverse_n1
+ .type crc32_mix_neoverse_n1, %function
+crc32_mix_neoverse_n1:
+ crc32_common_mix crc32
+ .size crc32_mix_neoverse_n1, .-crc32_mix_neoverse_n1
+
diff --git a/src/isa-l/crc/aarch64/crc32_norm_common_pmull.h b/src/isa-l/crc/aarch64/crc32_norm_common_pmull.h
new file mode 100644
index 000000000..7377e30a1
--- /dev/null
+++ b/src/isa-l/crc/aarch64/crc32_norm_common_pmull.h
@@ -0,0 +1,135 @@
+########################################################################
+# Copyright(c) 2019 Arm Corporation All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Arm Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#########################################################################
+
+#include "crc_common_pmull.h"
+
+.macro crc32_norm_func name:req
+ .arch armv8-a+crypto
+ .text
+ .align 3
+ .global \name
+ .type \name, %function
+
+/* uint32_t crc32_norm_func(uint32_t seed, uint8_t * buf, uint64_t len) */
+
+\name\():
+ mvn w_seed, w_seed
+ mov x_counter, 0
+ cmp x_len, (FOLD_SIZE - 1)
+ bhi .crc_clmul_pre
+
+.crc_tab_pre:
+ cmp x_len, x_counter
+ bls .done
+
+ adrp x_tmp, .lanchor_crc_tab
+ add x_buf_iter, x_buf, x_counter
+ add x_buf, x_buf, x_len
+ add x_crc_tab_addr, x_tmp, :lo12:.lanchor_crc_tab
+
+ .align 3
+.loop_crc_tab:
+ ldrb w_tmp, [x_buf_iter], 1
+ cmp x_buf, x_buf_iter
+ eor w_tmp, w_tmp, w_seed, lsr 24
+ ldr w_tmp, [x_crc_tab_addr, w_tmp, uxtw 2]
+ eor w_seed, w_tmp, w_seed, lsl 8
+ bhi .loop_crc_tab
+
+.done:
+ mvn w_crc_ret, w_seed
+ ret
+
+ .align 2
+.crc_clmul_pre:
+ lsl x_seed, x_seed, 32
+ movi v_x0.2s, 0
+ fmov v_x0.d[1], x_seed // save crc to v_x0
+
+ crc_norm_load_first_block
+
+ bls .clmul_loop_end
+
+ crc32_load_p4
+
+// 1024bit --> 512bit loop
+// merge x0, x1, x2, x3, y0, y1, y2, y3 => x0, x1, x2, x3 (uint64x2_t)
+ crc_norm_loop
+
+.clmul_loop_end:
+// folding 512bit --> 128bit
+ crc32_fold_512b_to_128b
+
+// folding 128bit --> 64bit
+ mov x_tmp, p0_high_b0
+ movk x_tmp, p0_high_b1, lsl 16
+ fmov d_p0_high, x_tmp
+
+ mov x_tmp2, p0_low_b0
+ movk x_tmp2, p0_low_b1, lsl 16
+ fmov d_p0_high2, x_tmp2
+
+ mov d_tmp_high, v_x3.d[0]
+ ext v_tmp_high.16b, v_tmp_high.16b, v_tmp_high.16b, #12
+
+ pmull2 v_x3.1q, v_x3.2d, v_p0.2d
+
+ eor v_tmp_high.16b, v_tmp_high.16b, v_x3.16b
+ pmull2 v_x3.1q, v_tmp_high.2d, v_p02.2d
+
+// barrett reduction
+ mov x_tmp2, br_high_b0
+ movk x_tmp2, br_high_b1, lsl 16
+ movk x_tmp2, br_high_b2, lsl 32
+ fmov d_br_high, x_tmp2
+
+ mov x_tmp, br_low_b0
+ movk x_tmp, br_low_b1, lsl 16
+ movk x_tmp, br_low_b2, lsl 32
+ fmov d_br_low, x_tmp
+
+ eor v_tmp_high.16b, v_tmp_high.16b, v_x3.16b
+ mov s_x3, v_tmp_high.s[1]
+ pmull v_x3.1q, v_x3.1d, v_br_low.1d
+
+ mov s_x3, v_x3.s[1]
+ pmull v_x3.1q, v_x3.1d, v_br_high.1d
+ eor v_tmp_high.8b, v_tmp_high.8b, v_x3.8b
+ umov w_seed, v_tmp_high.s[0]
+
+ b .crc_tab_pre
+
+ .size \name, .-\name
+
+ .section .rodata.cst16,"aM",@progbits,16
+ .align 4
+.shuffle_data:
+ .byte 15, 14, 13, 12, 11, 10, 9
+ .byte 8, 7, 6, 5, 4, 3, 2, 1, 0
+.endm
diff --git a/src/isa-l/crc/aarch64/crc32_refl_common_pmull.h b/src/isa-l/crc/aarch64/crc32_refl_common_pmull.h
new file mode 100644
index 000000000..6418f1240
--- /dev/null
+++ b/src/isa-l/crc/aarch64/crc32_refl_common_pmull.h
@@ -0,0 +1,126 @@
+########################################################################
+# Copyright(c) 2019 Arm Corporation All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Arm Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#########################################################################
+
+#include "crc_common_pmull.h"
+
+.macro crc32_refl_func name:req
+ .arch armv8-a+crypto
+ .text
+ .align 3
+ .global \name
+ .type \name, %function
+
+/* uint32_t crc32_refl_func(uint32_t seed, uint8_t * buf, uint64_t len) */
+
+\name\():
+ mvn w_seed, w_seed
+ mov x_counter, 0
+ cmp x_len, (FOLD_SIZE - 1)
+ bhi .crc32_clmul_pre
+
+.crc_tab_pre:
+ cmp x_len, x_counter
+ bls .done
+
+ adrp x_tmp, .lanchor_crc_tab
+ add x_buf_iter, x_buf, x_counter
+ add x_buf, x_buf, x_len
+ add x_crc_tab_addr, x_tmp, :lo12:.lanchor_crc_tab
+
+ .align 3
+.loop_crc_tab:
+ ldrb w_tmp, [x_buf_iter], 1
+ cmp x_buf, x_buf_iter
+ eor w_tmp, w_tmp, w_seed
+ and w_tmp, w_tmp, 255
+ ldr w_tmp, [x_crc_tab_addr, w_tmp, uxtw 2]
+ eor w_seed, w_tmp, w_seed, lsr 8
+ bhi .loop_crc_tab
+
+.done:
+ mvn w_crc_ret, w_seed
+ ret
+
+ .align 2
+.crc32_clmul_pre:
+ fmov s_x0, w_seed // save crc to s_x0
+
+ crc_refl_load_first_block
+
+ bls .clmul_loop_end
+
+ crc32_load_p4
+
+// 1024bit --> 512bit loop
+// merge x0, x1, x2, x3, y0, y1, y2, y3 => x0, x1, x2, x3 (uint64x2_t)
+ crc_refl_loop
+
+.clmul_loop_end:
+// folding 512bit --> 128bit
+ crc32_fold_512b_to_128b
+
+// folding 128bit --> 64bit
+ mov x_tmp, p0_low_b0
+ movk x_tmp, p0_low_b1, lsl 16
+ fmov d_p0_low2, x_tmp
+
+ mov d_tmp_high, v_x3.d[1]
+
+ mov d_p0_low, v_p1.d[1]
+ pmull v_x3.1q, v_x3.1d, v_p0.1d
+
+ eor v_tmp_high.16b, v_tmp_high.16b, v_x3.16b
+ mov s_x3, v_tmp_high.s[0]
+ ext v_tmp_high.16b, v_tmp_high.16b, v_tmp_high.16b, #4
+ pmull v_x3.1q, v_x3.1d, v_p02.1d
+
+// barrett reduction
+ mov x_tmp2, br_high_b0
+ movk x_tmp2, br_high_b1, lsl 16
+ movk x_tmp2, br_high_b2, lsl 32
+ fmov d_br_high, x_tmp2
+
+ mov x_tmp, br_low_b0
+ movk x_tmp, br_low_b1, lsl 16
+ movk x_tmp, br_low_b2, lsl 32
+ fmov d_br_low, x_tmp
+
+ eor v_tmp_high.16b, v_tmp_high.16b, v_x3.16b
+ mov s_x3, v_tmp_high.s[0]
+ pmull v_x3.1q, v_x3.1d, v_br_high.1d
+
+ mov s_x3, v_x3.s[0]
+ pmull v_x3.1q, v_x3.1d, v_br_low.1d
+ eor v_tmp_high.8b, v_tmp_high.8b, v_x3.8b
+ umov w_seed, v_tmp_high.s[1]
+
+ b .crc_tab_pre
+
+ .size \name, .-\name
+.endm
diff --git a/src/isa-l/crc/aarch64/crc32c_mix_default.S b/src/isa-l/crc/aarch64/crc32c_mix_default.S
new file mode 100644
index 000000000..87b8ce39c
--- /dev/null
+++ b/src/isa-l/crc/aarch64/crc32c_mix_default.S
@@ -0,0 +1,109 @@
+/**********************************************************************
+ Copyright(c) 2020 Arm Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Arm Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+ .text
+ .arch armv8-a+crypto+crc
+ .align 6
+
+.macro crc32_u64 dst,src,data
+ crc32cx \dst,\src,\data
+.endm
+
+.macro crc32_u32 dst,src,data
+ crc32cw \dst,\src,\data
+.endm
+
+.macro crc32_u16 dst,src,data
+ crc32ch \dst,\src,\data
+.endm
+
+.macro crc32_u8 dst,src,data
+ crc32cb \dst,\src,\data
+.endm
+
+#include "crc32_mix_default_common.S"
+
+ .global crc32c_mix_default
+ .type crc32c_mix_default, %function
+crc32c_mix_default:
+ mov w3, w2
+ sxtw x2, w1
+ mov x1, x0
+ mov w0, w3
+ crc32_mix_main_default
+ .size crc32c_mix_default, .-crc32c_mix_default
+
+ .section .rodata
+ .align 4
+ .set lanchor_crc32,. + 0
+
+ .type k1k2, %object
+ .size k1k2, 16
+k1k2:
+ .xword 0x00740eef02
+ .xword 0x009e4addf8
+
+ .type k3k4, %object
+ .size k3k4, 16
+k3k4:
+ .xword 0x00f20c0dfe
+ .xword 0x014cd00bd6
+
+ .type k5k0, %object
+ .size k5k0, 16
+k5k0:
+ .xword 0x00dd45aab8
+ .xword 0
+
+ .type poly, %object
+ .size poly, 16
+poly:
+ .xword 0x0105ec76f0
+ .xword 0x00dea713f1
+
+ .type crc32_const, %object
+ .size crc32_const, 48
+crc32_const:
+ .xword 0x9ef68d35
+ .xword 0
+ .xword 0x170076fa
+ .xword 0
+ .xword 0xdd7e3b0c
+ .xword 0
+
+ .align 4
+ .set .lanchor_mask,. + 0
+
+ .type mask, %object
+ .size mask, 16
+mask:
+ .word -1
+ .word 0
+ .word -1
+ .word 0
diff --git a/src/isa-l/crc/aarch64/crc32c_mix_neoverse_n1.S b/src/isa-l/crc/aarch64/crc32c_mix_neoverse_n1.S
new file mode 100644
index 000000000..a98511aab
--- /dev/null
+++ b/src/isa-l/crc/aarch64/crc32c_mix_neoverse_n1.S
@@ -0,0 +1,68 @@
+/**********************************************************************
+ Copyright(c) 2020 Arm Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Arm Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+ .text
+ .align 6
+ .arch armv8-a+crypto+crc
+
+#include "crc32_common_mix_neoverse_n1.S"
+.Lconstants:
+ .octa 0x000000009e4addf800000000740eef02
+ .octa 0x000000014cd00bd600000000f20c0dfe
+ .octa 0x00000000dea713f10000000105ec76f0
+ .quad 0x00000000dd45aab8
+ .quad 0x00000000FFFFFFFF
+ .quad 0x000000009ef68d35
+
+.macro crc32_u64 dst,src,data
+ crc32cx \dst,\src,\data
+.endm
+.macro crc32_u32 dst,src,data
+ crc32cw \dst,\src,\data
+.endm
+.macro crc32_u16 dst,src,data
+ crc32ch \dst,\src,\data
+.endm
+.macro crc32_u8 dst,src,data
+ crc32cb \dst,\src,\data
+.endm
+/**
+ * uint32_t crc32c_mix_neoverse_n1(uint8_t * BUF,
+ * size_t LEN, uint CRC)
+ */
+ BUF .req x0
+ LEN .req x1
+ CRC .req x2
+ wCRC .req w2
+ .align 6
+ .global crc32c_mix_neoverse_n1
+ .type crc32c_mix_neoverse_n1, %function
+crc32c_mix_neoverse_n1:
+ crc32_common_mix crc32c
+ .size crc32c_mix_neoverse_n1, .-crc32c_mix_neoverse_n1
diff --git a/src/isa-l/crc/aarch64/crc64_ecma_norm_pmull.S b/src/isa-l/crc/aarch64/crc64_ecma_norm_pmull.S
new file mode 100644
index 000000000..0089a09de
--- /dev/null
+++ b/src/isa-l/crc/aarch64/crc64_ecma_norm_pmull.S
@@ -0,0 +1,33 @@
+########################################################################
+# Copyright(c) 2019 Arm Corporation All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Arm Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#########################################################################
+
+#include "crc64_ecma_norm_pmull.h"
+#include "crc64_norm_common_pmull.h"
+
+crc64_norm_func crc64_ecma_norm_pmull
diff --git a/src/isa-l/crc/aarch64/crc64_ecma_norm_pmull.h b/src/isa-l/crc/aarch64/crc64_ecma_norm_pmull.h
new file mode 100644
index 000000000..07d58cd87
--- /dev/null
+++ b/src/isa-l/crc/aarch64/crc64_ecma_norm_pmull.h
@@ -0,0 +1,200 @@
+########################################################################
+# Copyright(c) 2019 Arm Corporation All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Arm Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#########################################################################
+
+.equ p4_low_b0, (0xf020)
+.equ p4_low_b1, 0x540d
+.equ p4_low_b2, 0x43ca
+.equ p4_low_b3, 0x5f68
+.equ p4_high_b0, 0xb83f
+.equ p4_high_b1, 0x1205
+.equ p4_high_b2, 0xb698
+.equ p4_high_b3, 0xddf4
+
+.equ p1_low_b0, (0xfab6)
+.equ p1_low_b1, 0xeb52
+.equ p1_low_b2, 0xc3c7
+.equ p1_low_b3, 0x05f5
+.equ p1_high_b0, 0x740e
+.equ p1_high_b1, 0xd257
+.equ p1_high_b2, 0x38a7
+.equ p1_high_b3, 0x4eb9
+
+.equ p0_low_b0, (0xfab6)
+.equ p0_low_b1, 0xeb52
+.equ p0_low_b2, 0xc3c7
+.equ p0_low_b3, 0x05f5
+.equ p0_high_b0, 0x0
+.equ p0_high_b1, 0x0
+.equ p0_high_b2, 0x0
+.equ p0_high_b3, 0x0
+
+.equ br_low_b0, (0xf872)
+.equ br_low_b1, 0x6cc4
+.equ br_low_b2, 0x29d0
+.equ br_low_b3, 0x578d
+.equ br_high_b0, 0x3693
+.equ br_high_b1, 0xa9ea
+.equ br_high_b2, 0xe1eb
+.equ br_high_b3, 0x42f0
+
+ .text
+ .section .rodata
+ .align 4
+ .set .lanchor_crc_tab,. + 0
+ .type crc64_tab, %object
+ .size crc64_tab, 2048
+crc64_tab:
+ .xword 0x0000000000000000, 0x42f0e1eba9ea3693
+ .xword 0x85e1c3d753d46d26, 0xc711223cfa3e5bb5
+ .xword 0x493366450e42ecdf, 0x0bc387aea7a8da4c
+ .xword 0xccd2a5925d9681f9, 0x8e224479f47cb76a
+ .xword 0x9266cc8a1c85d9be, 0xd0962d61b56fef2d
+ .xword 0x17870f5d4f51b498, 0x5577eeb6e6bb820b
+ .xword 0xdb55aacf12c73561, 0x99a54b24bb2d03f2
+ .xword 0x5eb4691841135847, 0x1c4488f3e8f96ed4
+ .xword 0x663d78ff90e185ef, 0x24cd9914390bb37c
+ .xword 0xe3dcbb28c335e8c9, 0xa12c5ac36adfde5a
+ .xword 0x2f0e1eba9ea36930, 0x6dfeff5137495fa3
+ .xword 0xaaefdd6dcd770416, 0xe81f3c86649d3285
+ .xword 0xf45bb4758c645c51, 0xb6ab559e258e6ac2
+ .xword 0x71ba77a2dfb03177, 0x334a9649765a07e4
+ .xword 0xbd68d2308226b08e, 0xff9833db2bcc861d
+ .xword 0x388911e7d1f2dda8, 0x7a79f00c7818eb3b
+ .xword 0xcc7af1ff21c30bde, 0x8e8a101488293d4d
+ .xword 0x499b3228721766f8, 0x0b6bd3c3dbfd506b
+ .xword 0x854997ba2f81e701, 0xc7b97651866bd192
+ .xword 0x00a8546d7c558a27, 0x4258b586d5bfbcb4
+ .xword 0x5e1c3d753d46d260, 0x1cecdc9e94ace4f3
+ .xword 0xdbfdfea26e92bf46, 0x990d1f49c77889d5
+ .xword 0x172f5b3033043ebf, 0x55dfbadb9aee082c
+ .xword 0x92ce98e760d05399, 0xd03e790cc93a650a
+ .xword 0xaa478900b1228e31, 0xe8b768eb18c8b8a2
+ .xword 0x2fa64ad7e2f6e317, 0x6d56ab3c4b1cd584
+ .xword 0xe374ef45bf6062ee, 0xa1840eae168a547d
+ .xword 0x66952c92ecb40fc8, 0x2465cd79455e395b
+ .xword 0x3821458aada7578f, 0x7ad1a461044d611c
+ .xword 0xbdc0865dfe733aa9, 0xff3067b657990c3a
+ .xword 0x711223cfa3e5bb50, 0x33e2c2240a0f8dc3
+ .xword 0xf4f3e018f031d676, 0xb60301f359dbe0e5
+ .xword 0xda050215ea6c212f, 0x98f5e3fe438617bc
+ .xword 0x5fe4c1c2b9b84c09, 0x1d14202910527a9a
+ .xword 0x93366450e42ecdf0, 0xd1c685bb4dc4fb63
+ .xword 0x16d7a787b7faa0d6, 0x5427466c1e109645
+ .xword 0x4863ce9ff6e9f891, 0x0a932f745f03ce02
+ .xword 0xcd820d48a53d95b7, 0x8f72eca30cd7a324
+ .xword 0x0150a8daf8ab144e, 0x43a04931514122dd
+ .xword 0x84b16b0dab7f7968, 0xc6418ae602954ffb
+ .xword 0xbc387aea7a8da4c0, 0xfec89b01d3679253
+ .xword 0x39d9b93d2959c9e6, 0x7b2958d680b3ff75
+ .xword 0xf50b1caf74cf481f, 0xb7fbfd44dd257e8c
+ .xword 0x70eadf78271b2539, 0x321a3e938ef113aa
+ .xword 0x2e5eb66066087d7e, 0x6cae578bcfe24bed
+ .xword 0xabbf75b735dc1058, 0xe94f945c9c3626cb
+ .xword 0x676dd025684a91a1, 0x259d31cec1a0a732
+ .xword 0xe28c13f23b9efc87, 0xa07cf2199274ca14
+ .xword 0x167ff3eacbaf2af1, 0x548f120162451c62
+ .xword 0x939e303d987b47d7, 0xd16ed1d631917144
+ .xword 0x5f4c95afc5edc62e, 0x1dbc74446c07f0bd
+ .xword 0xdaad56789639ab08, 0x985db7933fd39d9b
+ .xword 0x84193f60d72af34f, 0xc6e9de8b7ec0c5dc
+ .xword 0x01f8fcb784fe9e69, 0x43081d5c2d14a8fa
+ .xword 0xcd2a5925d9681f90, 0x8fdab8ce70822903
+ .xword 0x48cb9af28abc72b6, 0x0a3b7b1923564425
+ .xword 0x70428b155b4eaf1e, 0x32b26afef2a4998d
+ .xword 0xf5a348c2089ac238, 0xb753a929a170f4ab
+ .xword 0x3971ed50550c43c1, 0x7b810cbbfce67552
+ .xword 0xbc902e8706d82ee7, 0xfe60cf6caf321874
+ .xword 0xe224479f47cb76a0, 0xa0d4a674ee214033
+ .xword 0x67c58448141f1b86, 0x253565a3bdf52d15
+ .xword 0xab1721da49899a7f, 0xe9e7c031e063acec
+ .xword 0x2ef6e20d1a5df759, 0x6c0603e6b3b7c1ca
+ .xword 0xf6fae5c07d3274cd, 0xb40a042bd4d8425e
+ .xword 0x731b26172ee619eb, 0x31ebc7fc870c2f78
+ .xword 0xbfc9838573709812, 0xfd39626eda9aae81
+ .xword 0x3a28405220a4f534, 0x78d8a1b9894ec3a7
+ .xword 0x649c294a61b7ad73, 0x266cc8a1c85d9be0
+ .xword 0xe17dea9d3263c055, 0xa38d0b769b89f6c6
+ .xword 0x2daf4f0f6ff541ac, 0x6f5faee4c61f773f
+ .xword 0xa84e8cd83c212c8a, 0xeabe6d3395cb1a19
+ .xword 0x90c79d3fedd3f122, 0xd2377cd44439c7b1
+ .xword 0x15265ee8be079c04, 0x57d6bf0317edaa97
+ .xword 0xd9f4fb7ae3911dfd, 0x9b041a914a7b2b6e
+ .xword 0x5c1538adb04570db, 0x1ee5d94619af4648
+ .xword 0x02a151b5f156289c, 0x4051b05e58bc1e0f
+ .xword 0x87409262a28245ba, 0xc5b073890b687329
+ .xword 0x4b9237f0ff14c443, 0x0962d61b56fef2d0
+ .xword 0xce73f427acc0a965, 0x8c8315cc052a9ff6
+ .xword 0x3a80143f5cf17f13, 0x7870f5d4f51b4980
+ .xword 0xbf61d7e80f251235, 0xfd913603a6cf24a6
+ .xword 0x73b3727a52b393cc, 0x31439391fb59a55f
+ .xword 0xf652b1ad0167feea, 0xb4a25046a88dc879
+ .xword 0xa8e6d8b54074a6ad, 0xea16395ee99e903e
+ .xword 0x2d071b6213a0cb8b, 0x6ff7fa89ba4afd18
+ .xword 0xe1d5bef04e364a72, 0xa3255f1be7dc7ce1
+ .xword 0x64347d271de22754, 0x26c49cccb40811c7
+ .xword 0x5cbd6cc0cc10fafc, 0x1e4d8d2b65facc6f
+ .xword 0xd95caf179fc497da, 0x9bac4efc362ea149
+ .xword 0x158e0a85c2521623, 0x577eeb6e6bb820b0
+ .xword 0x906fc95291867b05, 0xd29f28b9386c4d96
+ .xword 0xcedba04ad0952342, 0x8c2b41a1797f15d1
+ .xword 0x4b3a639d83414e64, 0x09ca82762aab78f7
+ .xword 0x87e8c60fded7cf9d, 0xc51827e4773df90e
+ .xword 0x020905d88d03a2bb, 0x40f9e43324e99428
+ .xword 0x2cffe7d5975e55e2, 0x6e0f063e3eb46371
+ .xword 0xa91e2402c48a38c4, 0xebeec5e96d600e57
+ .xword 0x65cc8190991cb93d, 0x273c607b30f68fae
+ .xword 0xe02d4247cac8d41b, 0xa2dda3ac6322e288
+ .xword 0xbe992b5f8bdb8c5c, 0xfc69cab42231bacf
+ .xword 0x3b78e888d80fe17a, 0x7988096371e5d7e9
+ .xword 0xf7aa4d1a85996083, 0xb55aacf12c735610
+ .xword 0x724b8ecdd64d0da5, 0x30bb6f267fa73b36
+ .xword 0x4ac29f2a07bfd00d, 0x08327ec1ae55e69e
+ .xword 0xcf235cfd546bbd2b, 0x8dd3bd16fd818bb8
+ .xword 0x03f1f96f09fd3cd2, 0x41011884a0170a41
+ .xword 0x86103ab85a2951f4, 0xc4e0db53f3c36767
+ .xword 0xd8a453a01b3a09b3, 0x9a54b24bb2d03f20
+ .xword 0x5d45907748ee6495, 0x1fb5719ce1045206
+ .xword 0x919735e51578e56c, 0xd367d40ebc92d3ff
+ .xword 0x1476f63246ac884a, 0x568617d9ef46bed9
+ .xword 0xe085162ab69d5e3c, 0xa275f7c11f7768af
+ .xword 0x6564d5fde549331a, 0x279434164ca30589
+ .xword 0xa9b6706fb8dfb2e3, 0xeb46918411358470
+ .xword 0x2c57b3b8eb0bdfc5, 0x6ea7525342e1e956
+ .xword 0x72e3daa0aa188782, 0x30133b4b03f2b111
+ .xword 0xf7021977f9cceaa4, 0xb5f2f89c5026dc37
+ .xword 0x3bd0bce5a45a6b5d, 0x79205d0e0db05dce
+ .xword 0xbe317f32f78e067b, 0xfcc19ed95e6430e8
+ .xword 0x86b86ed5267cdbd3, 0xc4488f3e8f96ed40
+ .xword 0x0359ad0275a8b6f5, 0x41a94ce9dc428066
+ .xword 0xcf8b0890283e370c, 0x8d7be97b81d4019f
+ .xword 0x4a6acb477bea5a2a, 0x089a2aacd2006cb9
+ .xword 0x14dea25f3af9026d, 0x562e43b4931334fe
+ .xword 0x913f6188692d6f4b, 0xd3cf8063c0c759d8
+ .xword 0x5dedc41a34bbeeb2, 0x1f1d25f19d51d821
+ .xword 0xd80c07cd676f8394, 0x9afce626ce85b507
diff --git a/src/isa-l/crc/aarch64/crc64_ecma_refl_pmull.S b/src/isa-l/crc/aarch64/crc64_ecma_refl_pmull.S
new file mode 100644
index 000000000..812517f77
--- /dev/null
+++ b/src/isa-l/crc/aarch64/crc64_ecma_refl_pmull.S
@@ -0,0 +1,33 @@
+########################################################################
+# Copyright(c) 2019 Arm Corporation All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Arm Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#########################################################################
+
+#include "crc64_ecma_refl_pmull.h"
+#include "crc64_refl_common_pmull.h"
+
+crc64_refl_func crc64_ecma_refl_pmull
diff --git a/src/isa-l/crc/aarch64/crc64_ecma_refl_pmull.h b/src/isa-l/crc/aarch64/crc64_ecma_refl_pmull.h
new file mode 100644
index 000000000..5f53d7903
--- /dev/null
+++ b/src/isa-l/crc/aarch64/crc64_ecma_refl_pmull.h
@@ -0,0 +1,196 @@
+########################################################################
+# Copyright(c) 2019 Arm Corporation All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Arm Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#########################################################################
+
+.equ p4_low_b0, 0x41f3
+.equ p4_low_b1, 0x9dd4
+.equ p4_low_b2, 0xefbb
+.equ p4_low_b3, 0x6ae3
+.equ p4_high_b0, 0x2df4
+.equ p4_high_b1, 0xa784
+.equ p4_high_b2, 0x6054
+.equ p4_high_b3, 0x081f
+
+.equ p1_low_b0, 0x3ae4
+.equ p1_low_b1, 0xca39
+.equ p1_low_b2, 0xd497
+.equ p1_low_b3, 0xe05d
+.equ p1_high_b0, 0x5f40
+.equ p1_high_b1, 0xc787
+.equ p1_high_b2, 0x95af
+.equ p1_high_b3, 0xdabe
+
+.equ p0_low_b0, 0x5f40
+.equ p0_low_b1, 0xc787
+.equ p0_low_b2, 0x95af
+.equ p0_low_b3, 0xdabe
+
+.equ br_low_b0, 0x63d5
+.equ br_low_b1, 0x1729
+.equ br_low_b2, 0x466c
+.equ br_low_b3, 0x9c3e
+.equ br_high_b0, 0x1e85
+.equ br_high_b1, 0xaf0e
+.equ br_high_b2, 0xaf2b
+.equ br_high_b3, 0x92d8
+
+ .text
+ .section .rodata
+ .align 4
+ .set .lanchor_crc_tab,. + 0
+ .type crc64_tab, %object
+ .size crc64_tab, 2048
+crc64_tab:
+ .xword 0x0000000000000000, 0xb32e4cbe03a75f6f
+ .xword 0xf4843657a840a05b, 0x47aa7ae9abe7ff34
+ .xword 0x7bd0c384ff8f5e33, 0xc8fe8f3afc28015c
+ .xword 0x8f54f5d357cffe68, 0x3c7ab96d5468a107
+ .xword 0xf7a18709ff1ebc66, 0x448fcbb7fcb9e309
+ .xword 0x0325b15e575e1c3d, 0xb00bfde054f94352
+ .xword 0x8c71448d0091e255, 0x3f5f08330336bd3a
+ .xword 0x78f572daa8d1420e, 0xcbdb3e64ab761d61
+ .xword 0x7d9ba13851336649, 0xceb5ed8652943926
+ .xword 0x891f976ff973c612, 0x3a31dbd1fad4997d
+ .xword 0x064b62bcaebc387a, 0xb5652e02ad1b6715
+ .xword 0xf2cf54eb06fc9821, 0x41e11855055bc74e
+ .xword 0x8a3a2631ae2dda2f, 0x39146a8fad8a8540
+ .xword 0x7ebe1066066d7a74, 0xcd905cd805ca251b
+ .xword 0xf1eae5b551a2841c, 0x42c4a90b5205db73
+ .xword 0x056ed3e2f9e22447, 0xb6409f5cfa457b28
+ .xword 0xfb374270a266cc92, 0x48190ecea1c193fd
+ .xword 0x0fb374270a266cc9, 0xbc9d3899098133a6
+ .xword 0x80e781f45de992a1, 0x33c9cd4a5e4ecdce
+ .xword 0x7463b7a3f5a932fa, 0xc74dfb1df60e6d95
+ .xword 0x0c96c5795d7870f4, 0xbfb889c75edf2f9b
+ .xword 0xf812f32ef538d0af, 0x4b3cbf90f69f8fc0
+ .xword 0x774606fda2f72ec7, 0xc4684a43a15071a8
+ .xword 0x83c230aa0ab78e9c, 0x30ec7c140910d1f3
+ .xword 0x86ace348f355aadb, 0x3582aff6f0f2f5b4
+ .xword 0x7228d51f5b150a80, 0xc10699a158b255ef
+ .xword 0xfd7c20cc0cdaf4e8, 0x4e526c720f7dab87
+ .xword 0x09f8169ba49a54b3, 0xbad65a25a73d0bdc
+ .xword 0x710d64410c4b16bd, 0xc22328ff0fec49d2
+ .xword 0x85895216a40bb6e6, 0x36a71ea8a7ace989
+ .xword 0x0adda7c5f3c4488e, 0xb9f3eb7bf06317e1
+ .xword 0xfe5991925b84e8d5, 0x4d77dd2c5823b7ba
+ .xword 0x64b62bcaebc387a1, 0xd7986774e864d8ce
+ .xword 0x90321d9d438327fa, 0x231c512340247895
+ .xword 0x1f66e84e144cd992, 0xac48a4f017eb86fd
+ .xword 0xebe2de19bc0c79c9, 0x58cc92a7bfab26a6
+ .xword 0x9317acc314dd3bc7, 0x2039e07d177a64a8
+ .xword 0x67939a94bc9d9b9c, 0xd4bdd62abf3ac4f3
+ .xword 0xe8c76f47eb5265f4, 0x5be923f9e8f53a9b
+ .xword 0x1c4359104312c5af, 0xaf6d15ae40b59ac0
+ .xword 0x192d8af2baf0e1e8, 0xaa03c64cb957be87
+ .xword 0xeda9bca512b041b3, 0x5e87f01b11171edc
+ .xword 0x62fd4976457fbfdb, 0xd1d305c846d8e0b4
+ .xword 0x96797f21ed3f1f80, 0x2557339fee9840ef
+ .xword 0xee8c0dfb45ee5d8e, 0x5da24145464902e1
+ .xword 0x1a083bacedaefdd5, 0xa9267712ee09a2ba
+ .xword 0x955cce7fba6103bd, 0x267282c1b9c65cd2
+ .xword 0x61d8f8281221a3e6, 0xd2f6b4961186fc89
+ .xword 0x9f8169ba49a54b33, 0x2caf25044a02145c
+ .xword 0x6b055fede1e5eb68, 0xd82b1353e242b407
+ .xword 0xe451aa3eb62a1500, 0x577fe680b58d4a6f
+ .xword 0x10d59c691e6ab55b, 0xa3fbd0d71dcdea34
+ .xword 0x6820eeb3b6bbf755, 0xdb0ea20db51ca83a
+ .xword 0x9ca4d8e41efb570e, 0x2f8a945a1d5c0861
+ .xword 0x13f02d374934a966, 0xa0de61894a93f609
+ .xword 0xe7741b60e174093d, 0x545a57dee2d35652
+ .xword 0xe21ac88218962d7a, 0x5134843c1b317215
+ .xword 0x169efed5b0d68d21, 0xa5b0b26bb371d24e
+ .xword 0x99ca0b06e7197349, 0x2ae447b8e4be2c26
+ .xword 0x6d4e3d514f59d312, 0xde6071ef4cfe8c7d
+ .xword 0x15bb4f8be788911c, 0xa6950335e42fce73
+ .xword 0xe13f79dc4fc83147, 0x521135624c6f6e28
+ .xword 0x6e6b8c0f1807cf2f, 0xdd45c0b11ba09040
+ .xword 0x9aefba58b0476f74, 0x29c1f6e6b3e0301b
+ .xword 0xc96c5795d7870f42, 0x7a421b2bd420502d
+ .xword 0x3de861c27fc7af19, 0x8ec62d7c7c60f076
+ .xword 0xb2bc941128085171, 0x0192d8af2baf0e1e
+ .xword 0x4638a2468048f12a, 0xf516eef883efae45
+ .xword 0x3ecdd09c2899b324, 0x8de39c222b3eec4b
+ .xword 0xca49e6cb80d9137f, 0x7967aa75837e4c10
+ .xword 0x451d1318d716ed17, 0xf6335fa6d4b1b278
+ .xword 0xb199254f7f564d4c, 0x02b769f17cf11223
+ .xword 0xb4f7f6ad86b4690b, 0x07d9ba1385133664
+ .xword 0x4073c0fa2ef4c950, 0xf35d8c442d53963f
+ .xword 0xcf273529793b3738, 0x7c0979977a9c6857
+ .xword 0x3ba3037ed17b9763, 0x888d4fc0d2dcc80c
+ .xword 0x435671a479aad56d, 0xf0783d1a7a0d8a02
+ .xword 0xb7d247f3d1ea7536, 0x04fc0b4dd24d2a59
+ .xword 0x3886b22086258b5e, 0x8ba8fe9e8582d431
+ .xword 0xcc0284772e652b05, 0x7f2cc8c92dc2746a
+ .xword 0x325b15e575e1c3d0, 0x8175595b76469cbf
+ .xword 0xc6df23b2dda1638b, 0x75f16f0cde063ce4
+ .xword 0x498bd6618a6e9de3, 0xfaa59adf89c9c28c
+ .xword 0xbd0fe036222e3db8, 0x0e21ac88218962d7
+ .xword 0xc5fa92ec8aff7fb6, 0x76d4de52895820d9
+ .xword 0x317ea4bb22bfdfed, 0x8250e80521188082
+ .xword 0xbe2a516875702185, 0x0d041dd676d77eea
+ .xword 0x4aae673fdd3081de, 0xf9802b81de97deb1
+ .xword 0x4fc0b4dd24d2a599, 0xfceef8632775faf6
+ .xword 0xbb44828a8c9205c2, 0x086ace348f355aad
+ .xword 0x34107759db5dfbaa, 0x873e3be7d8faa4c5
+ .xword 0xc094410e731d5bf1, 0x73ba0db070ba049e
+ .xword 0xb86133d4dbcc19ff, 0x0b4f7f6ad86b4690
+ .xword 0x4ce50583738cb9a4, 0xffcb493d702be6cb
+ .xword 0xc3b1f050244347cc, 0x709fbcee27e418a3
+ .xword 0x3735c6078c03e797, 0x841b8ab98fa4b8f8
+ .xword 0xadda7c5f3c4488e3, 0x1ef430e13fe3d78c
+ .xword 0x595e4a08940428b8, 0xea7006b697a377d7
+ .xword 0xd60abfdbc3cbd6d0, 0x6524f365c06c89bf
+ .xword 0x228e898c6b8b768b, 0x91a0c532682c29e4
+ .xword 0x5a7bfb56c35a3485, 0xe955b7e8c0fd6bea
+ .xword 0xaeffcd016b1a94de, 0x1dd181bf68bdcbb1
+ .xword 0x21ab38d23cd56ab6, 0x9285746c3f7235d9
+ .xword 0xd52f0e859495caed, 0x6601423b97329582
+ .xword 0xd041dd676d77eeaa, 0x636f91d96ed0b1c5
+ .xword 0x24c5eb30c5374ef1, 0x97eba78ec690119e
+ .xword 0xab911ee392f8b099, 0x18bf525d915feff6
+ .xword 0x5f1528b43ab810c2, 0xec3b640a391f4fad
+ .xword 0x27e05a6e926952cc, 0x94ce16d091ce0da3
+ .xword 0xd3646c393a29f297, 0x604a2087398eadf8
+ .xword 0x5c3099ea6de60cff, 0xef1ed5546e415390
+ .xword 0xa8b4afbdc5a6aca4, 0x1b9ae303c601f3cb
+ .xword 0x56ed3e2f9e224471, 0xe5c372919d851b1e
+ .xword 0xa26908783662e42a, 0x114744c635c5bb45
+ .xword 0x2d3dfdab61ad1a42, 0x9e13b115620a452d
+ .xword 0xd9b9cbfcc9edba19, 0x6a978742ca4ae576
+ .xword 0xa14cb926613cf817, 0x1262f598629ba778
+ .xword 0x55c88f71c97c584c, 0xe6e6c3cfcadb0723
+ .xword 0xda9c7aa29eb3a624, 0x69b2361c9d14f94b
+ .xword 0x2e184cf536f3067f, 0x9d36004b35545910
+ .xword 0x2b769f17cf112238, 0x9858d3a9ccb67d57
+ .xword 0xdff2a94067518263, 0x6cdce5fe64f6dd0c
+ .xword 0x50a65c93309e7c0b, 0xe388102d33392364
+ .xword 0xa4226ac498dedc50, 0x170c267a9b79833f
+ .xword 0xdcd7181e300f9e5e, 0x6ff954a033a8c131
+ .xword 0x28532e49984f3e05, 0x9b7d62f79be8616a
+ .xword 0xa707db9acf80c06d, 0x14299724cc279f02
+ .xword 0x5383edcd67c06036, 0xe0ada17364673f59
diff --git a/src/isa-l/crc/aarch64/crc64_iso_norm_pmull.S b/src/isa-l/crc/aarch64/crc64_iso_norm_pmull.S
new file mode 100644
index 000000000..185b75bdf
--- /dev/null
+++ b/src/isa-l/crc/aarch64/crc64_iso_norm_pmull.S
@@ -0,0 +1,33 @@
+########################################################################
+# Copyright(c) 2019 Arm Corporation All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Arm Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#########################################################################
+
+#include "crc64_iso_norm_pmull.h"
+#include "crc64_norm_common_pmull.h"
+
+crc64_norm_func crc64_iso_norm_pmull
diff --git a/src/isa-l/crc/aarch64/crc64_iso_norm_pmull.h b/src/isa-l/crc/aarch64/crc64_iso_norm_pmull.h
new file mode 100644
index 000000000..cc176051c
--- /dev/null
+++ b/src/isa-l/crc/aarch64/crc64_iso_norm_pmull.h
@@ -0,0 +1,201 @@
+########################################################################
+# Copyright(c) 2019 Arm Corporation All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Arm Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#########################################################################
+
+.equ p4_low_b0, (0x0101)
+.equ p4_low_b1, 0x0100
+.equ p4_low_b2, 0x0001
+.equ p4_low_b3, 0x0000
+.equ p4_high_b0, 0x1b1b
+.equ p4_high_b1, 0x1b00
+.equ p4_high_b2, 0x001b
+.equ p4_high_b3, 0x0000
+
+.equ p1_low_b0, (0x0145)
+.equ p1_low_b1, 0x0000
+.equ p1_low_b2, 0x0000
+.equ p1_low_b3, 0x0000
+.equ p1_high_b0, 0x1db7
+.equ p1_high_b1, 0x0000
+.equ p1_high_b2, 0x0000
+.equ p1_high_b3, 0x0000
+
+.equ p0_low_b0, (0x0145)
+.equ p0_low_b1, 0x0000
+.equ p0_low_b2, 0x0000
+.equ p0_low_b3, 0x0000
+.equ p0_high_b0, 0x0000
+.equ p0_high_b1, 0x0000
+.equ p0_high_b2, 0x0000
+.equ p0_high_b3, 0x0000
+
+.equ br_low_b0, (0x001b)
+.equ br_low_b1, 0x0000
+.equ br_low_b2, 0x0000
+.equ br_low_b3, 0x0000
+.equ br_high_b0, 0x001b
+.equ br_high_b1, 0x0000
+.equ br_high_b2, 0x0000
+.equ br_high_b3, 0x0000
+
+ .text
+ .section .rodata
+ .align 4
+ .set .lanchor_crc_tab,. + 0
+ .type crc64_tab, %object
+ .size crc64_tab, 2048
+
+crc64_tab:
+ .xword 0x0000000000000000, 0x000000000000001b
+ .xword 0x0000000000000036, 0x000000000000002d
+ .xword 0x000000000000006c, 0x0000000000000077
+ .xword 0x000000000000005a, 0x0000000000000041
+ .xword 0x00000000000000d8, 0x00000000000000c3
+ .xword 0x00000000000000ee, 0x00000000000000f5
+ .xword 0x00000000000000b4, 0x00000000000000af
+ .xword 0x0000000000000082, 0x0000000000000099
+ .xword 0x00000000000001b0, 0x00000000000001ab
+ .xword 0x0000000000000186, 0x000000000000019d
+ .xword 0x00000000000001dc, 0x00000000000001c7
+ .xword 0x00000000000001ea, 0x00000000000001f1
+ .xword 0x0000000000000168, 0x0000000000000173
+ .xword 0x000000000000015e, 0x0000000000000145
+ .xword 0x0000000000000104, 0x000000000000011f
+ .xword 0x0000000000000132, 0x0000000000000129
+ .xword 0x0000000000000360, 0x000000000000037b
+ .xword 0x0000000000000356, 0x000000000000034d
+ .xword 0x000000000000030c, 0x0000000000000317
+ .xword 0x000000000000033a, 0x0000000000000321
+ .xword 0x00000000000003b8, 0x00000000000003a3
+ .xword 0x000000000000038e, 0x0000000000000395
+ .xword 0x00000000000003d4, 0x00000000000003cf
+ .xword 0x00000000000003e2, 0x00000000000003f9
+ .xword 0x00000000000002d0, 0x00000000000002cb
+ .xword 0x00000000000002e6, 0x00000000000002fd
+ .xword 0x00000000000002bc, 0x00000000000002a7
+ .xword 0x000000000000028a, 0x0000000000000291
+ .xword 0x0000000000000208, 0x0000000000000213
+ .xword 0x000000000000023e, 0x0000000000000225
+ .xword 0x0000000000000264, 0x000000000000027f
+ .xword 0x0000000000000252, 0x0000000000000249
+ .xword 0x00000000000006c0, 0x00000000000006db
+ .xword 0x00000000000006f6, 0x00000000000006ed
+ .xword 0x00000000000006ac, 0x00000000000006b7
+ .xword 0x000000000000069a, 0x0000000000000681
+ .xword 0x0000000000000618, 0x0000000000000603
+ .xword 0x000000000000062e, 0x0000000000000635
+ .xword 0x0000000000000674, 0x000000000000066f
+ .xword 0x0000000000000642, 0x0000000000000659
+ .xword 0x0000000000000770, 0x000000000000076b
+ .xword 0x0000000000000746, 0x000000000000075d
+ .xword 0x000000000000071c, 0x0000000000000707
+ .xword 0x000000000000072a, 0x0000000000000731
+ .xword 0x00000000000007a8, 0x00000000000007b3
+ .xword 0x000000000000079e, 0x0000000000000785
+ .xword 0x00000000000007c4, 0x00000000000007df
+ .xword 0x00000000000007f2, 0x00000000000007e9
+ .xword 0x00000000000005a0, 0x00000000000005bb
+ .xword 0x0000000000000596, 0x000000000000058d
+ .xword 0x00000000000005cc, 0x00000000000005d7
+ .xword 0x00000000000005fa, 0x00000000000005e1
+ .xword 0x0000000000000578, 0x0000000000000563
+ .xword 0x000000000000054e, 0x0000000000000555
+ .xword 0x0000000000000514, 0x000000000000050f
+ .xword 0x0000000000000522, 0x0000000000000539
+ .xword 0x0000000000000410, 0x000000000000040b
+ .xword 0x0000000000000426, 0x000000000000043d
+ .xword 0x000000000000047c, 0x0000000000000467
+ .xword 0x000000000000044a, 0x0000000000000451
+ .xword 0x00000000000004c8, 0x00000000000004d3
+ .xword 0x00000000000004fe, 0x00000000000004e5
+ .xword 0x00000000000004a4, 0x00000000000004bf
+ .xword 0x0000000000000492, 0x0000000000000489
+ .xword 0x0000000000000d80, 0x0000000000000d9b
+ .xword 0x0000000000000db6, 0x0000000000000dad
+ .xword 0x0000000000000dec, 0x0000000000000df7
+ .xword 0x0000000000000dda, 0x0000000000000dc1
+ .xword 0x0000000000000d58, 0x0000000000000d43
+ .xword 0x0000000000000d6e, 0x0000000000000d75
+ .xword 0x0000000000000d34, 0x0000000000000d2f
+ .xword 0x0000000000000d02, 0x0000000000000d19
+ .xword 0x0000000000000c30, 0x0000000000000c2b
+ .xword 0x0000000000000c06, 0x0000000000000c1d
+ .xword 0x0000000000000c5c, 0x0000000000000c47
+ .xword 0x0000000000000c6a, 0x0000000000000c71
+ .xword 0x0000000000000ce8, 0x0000000000000cf3
+ .xword 0x0000000000000cde, 0x0000000000000cc5
+ .xword 0x0000000000000c84, 0x0000000000000c9f
+ .xword 0x0000000000000cb2, 0x0000000000000ca9
+ .xword 0x0000000000000ee0, 0x0000000000000efb
+ .xword 0x0000000000000ed6, 0x0000000000000ecd
+ .xword 0x0000000000000e8c, 0x0000000000000e97
+ .xword 0x0000000000000eba, 0x0000000000000ea1
+ .xword 0x0000000000000e38, 0x0000000000000e23
+ .xword 0x0000000000000e0e, 0x0000000000000e15
+ .xword 0x0000000000000e54, 0x0000000000000e4f
+ .xword 0x0000000000000e62, 0x0000000000000e79
+ .xword 0x0000000000000f50, 0x0000000000000f4b
+ .xword 0x0000000000000f66, 0x0000000000000f7d
+ .xword 0x0000000000000f3c, 0x0000000000000f27
+ .xword 0x0000000000000f0a, 0x0000000000000f11
+ .xword 0x0000000000000f88, 0x0000000000000f93
+ .xword 0x0000000000000fbe, 0x0000000000000fa5
+ .xword 0x0000000000000fe4, 0x0000000000000fff
+ .xword 0x0000000000000fd2, 0x0000000000000fc9
+ .xword 0x0000000000000b40, 0x0000000000000b5b
+ .xword 0x0000000000000b76, 0x0000000000000b6d
+ .xword 0x0000000000000b2c, 0x0000000000000b37
+ .xword 0x0000000000000b1a, 0x0000000000000b01
+ .xword 0x0000000000000b98, 0x0000000000000b83
+ .xword 0x0000000000000bae, 0x0000000000000bb5
+ .xword 0x0000000000000bf4, 0x0000000000000bef
+ .xword 0x0000000000000bc2, 0x0000000000000bd9
+ .xword 0x0000000000000af0, 0x0000000000000aeb
+ .xword 0x0000000000000ac6, 0x0000000000000add
+ .xword 0x0000000000000a9c, 0x0000000000000a87
+ .xword 0x0000000000000aaa, 0x0000000000000ab1
+ .xword 0x0000000000000a28, 0x0000000000000a33
+ .xword 0x0000000000000a1e, 0x0000000000000a05
+ .xword 0x0000000000000a44, 0x0000000000000a5f
+ .xword 0x0000000000000a72, 0x0000000000000a69
+ .xword 0x0000000000000820, 0x000000000000083b
+ .xword 0x0000000000000816, 0x000000000000080d
+ .xword 0x000000000000084c, 0x0000000000000857
+ .xword 0x000000000000087a, 0x0000000000000861
+ .xword 0x00000000000008f8, 0x00000000000008e3
+ .xword 0x00000000000008ce, 0x00000000000008d5
+ .xword 0x0000000000000894, 0x000000000000088f
+ .xword 0x00000000000008a2, 0x00000000000008b9
+ .xword 0x0000000000000990, 0x000000000000098b
+ .xword 0x00000000000009a6, 0x00000000000009bd
+ .xword 0x00000000000009fc, 0x00000000000009e7
+ .xword 0x00000000000009ca, 0x00000000000009d1
+ .xword 0x0000000000000948, 0x0000000000000953
+ .xword 0x000000000000097e, 0x0000000000000965
+ .xword 0x0000000000000924, 0x000000000000093f
+ .xword 0x0000000000000912, 0x0000000000000909
diff --git a/src/isa-l/crc/aarch64/crc64_iso_refl_pmull.S b/src/isa-l/crc/aarch64/crc64_iso_refl_pmull.S
new file mode 100644
index 000000000..2d2bc6658
--- /dev/null
+++ b/src/isa-l/crc/aarch64/crc64_iso_refl_pmull.S
@@ -0,0 +1,33 @@
+########################################################################
+# Copyright(c) 2019 Arm Corporation All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Arm Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#########################################################################
+
+#include "crc64_iso_refl_pmull.h"
+#include "crc64_refl_common_pmull.h"
+
+crc64_refl_func crc64_iso_refl_pmull
diff --git a/src/isa-l/crc/aarch64/crc64_iso_refl_pmull.h b/src/isa-l/crc/aarch64/crc64_iso_refl_pmull.h
new file mode 100644
index 000000000..8ee4f58b1
--- /dev/null
+++ b/src/isa-l/crc/aarch64/crc64_iso_refl_pmull.h
@@ -0,0 +1,197 @@
+########################################################################
+# Copyright(c) 2019 Arm Corporation All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Arm Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#########################################################################
+
+.equ p4_low_b0, 0x0001
+.equ p4_low_b1, 0xb000
+.equ p4_low_b2, 0x01b1
+.equ p4_low_b3, 0x01b0
+.equ p4_high_b0, 0x0001
+.equ p4_high_b1, 0x0000
+.equ p4_high_b2, 0x0101
+.equ p4_high_b3, 0xb100
+
+.equ p1_low_b0, 0x0001
+.equ p1_low_b1, 0x0000
+.equ p1_low_b2, 0x0000
+.equ p1_low_b3, 0x6b70
+.equ p1_high_b0, 0x0001
+.equ p1_high_b1, 0x0000
+.equ p1_high_b2, 0x0000
+.equ p1_high_b3, 0xf500
+
+.equ p0_low_b0, 0x0001
+.equ p0_low_b1, 0x0000
+.equ p0_low_b2, 0x0000
+.equ p0_low_b3, 0xf500
+
+.equ br_low_b0, 0x0001
+.equ br_low_b1, 0x0000
+.equ br_low_b2, 0x0000
+.equ br_low_b3, 0xb000
+.equ br_high_b0, 0x0001
+.equ br_high_b1, 0x0000
+.equ br_high_b2, 0x0000
+.equ br_high_b3, 0xb000
+
+ .text
+ .section .rodata
+ .align 4
+ .set .lanchor_crc_tab,. + 0
+ .type crc64_tab, %object
+ .size crc64_tab, 2048
+
+crc64_tab:
+ .xword 0x0000000000000000, 0x01b0000000000000
+ .xword 0x0360000000000000, 0x02d0000000000000
+ .xword 0x06c0000000000000, 0x0770000000000000
+ .xword 0x05a0000000000000, 0x0410000000000000
+ .xword 0x0d80000000000000, 0x0c30000000000000
+ .xword 0x0ee0000000000000, 0x0f50000000000000
+ .xword 0x0b40000000000000, 0x0af0000000000000
+ .xword 0x0820000000000000, 0x0990000000000000
+ .xword 0x1b00000000000000, 0x1ab0000000000000
+ .xword 0x1860000000000000, 0x19d0000000000000
+ .xword 0x1dc0000000000000, 0x1c70000000000000
+ .xword 0x1ea0000000000000, 0x1f10000000000000
+ .xword 0x1680000000000000, 0x1730000000000000
+ .xword 0x15e0000000000000, 0x1450000000000000
+ .xword 0x1040000000000000, 0x11f0000000000000
+ .xword 0x1320000000000000, 0x1290000000000000
+ .xword 0x3600000000000000, 0x37b0000000000000
+ .xword 0x3560000000000000, 0x34d0000000000000
+ .xword 0x30c0000000000000, 0x3170000000000000
+ .xword 0x33a0000000000000, 0x3210000000000000
+ .xword 0x3b80000000000000, 0x3a30000000000000
+ .xword 0x38e0000000000000, 0x3950000000000000
+ .xword 0x3d40000000000000, 0x3cf0000000000000
+ .xword 0x3e20000000000000, 0x3f90000000000000
+ .xword 0x2d00000000000000, 0x2cb0000000000000
+ .xword 0x2e60000000000000, 0x2fd0000000000000
+ .xword 0x2bc0000000000000, 0x2a70000000000000
+ .xword 0x28a0000000000000, 0x2910000000000000
+ .xword 0x2080000000000000, 0x2130000000000000
+ .xword 0x23e0000000000000, 0x2250000000000000
+ .xword 0x2640000000000000, 0x27f0000000000000
+ .xword 0x2520000000000000, 0x2490000000000000
+ .xword 0x6c00000000000000, 0x6db0000000000000
+ .xword 0x6f60000000000000, 0x6ed0000000000000
+ .xword 0x6ac0000000000000, 0x6b70000000000000
+ .xword 0x69a0000000000000, 0x6810000000000000
+ .xword 0x6180000000000000, 0x6030000000000000
+ .xword 0x62e0000000000000, 0x6350000000000000
+ .xword 0x6740000000000000, 0x66f0000000000000
+ .xword 0x6420000000000000, 0x6590000000000000
+ .xword 0x7700000000000000, 0x76b0000000000000
+ .xword 0x7460000000000000, 0x75d0000000000000
+ .xword 0x71c0000000000000, 0x7070000000000000
+ .xword 0x72a0000000000000, 0x7310000000000000
+ .xword 0x7a80000000000000, 0x7b30000000000000
+ .xword 0x79e0000000000000, 0x7850000000000000
+ .xword 0x7c40000000000000, 0x7df0000000000000
+ .xword 0x7f20000000000000, 0x7e90000000000000
+ .xword 0x5a00000000000000, 0x5bb0000000000000
+ .xword 0x5960000000000000, 0x58d0000000000000
+ .xword 0x5cc0000000000000, 0x5d70000000000000
+ .xword 0x5fa0000000000000, 0x5e10000000000000
+ .xword 0x5780000000000000, 0x5630000000000000
+ .xword 0x54e0000000000000, 0x5550000000000000
+ .xword 0x5140000000000000, 0x50f0000000000000
+ .xword 0x5220000000000000, 0x5390000000000000
+ .xword 0x4100000000000000, 0x40b0000000000000
+ .xword 0x4260000000000000, 0x43d0000000000000
+ .xword 0x47c0000000000000, 0x4670000000000000
+ .xword 0x44a0000000000000, 0x4510000000000000
+ .xword 0x4c80000000000000, 0x4d30000000000000
+ .xword 0x4fe0000000000000, 0x4e50000000000000
+ .xword 0x4a40000000000000, 0x4bf0000000000000
+ .xword 0x4920000000000000, 0x4890000000000000
+ .xword 0xd800000000000000, 0xd9b0000000000000
+ .xword 0xdb60000000000000, 0xdad0000000000000
+ .xword 0xdec0000000000000, 0xdf70000000000000
+ .xword 0xdda0000000000000, 0xdc10000000000000
+ .xword 0xd580000000000000, 0xd430000000000000
+ .xword 0xd6e0000000000000, 0xd750000000000000
+ .xword 0xd340000000000000, 0xd2f0000000000000
+ .xword 0xd020000000000000, 0xd190000000000000
+ .xword 0xc300000000000000, 0xc2b0000000000000
+ .xword 0xc060000000000000, 0xc1d0000000000000
+ .xword 0xc5c0000000000000, 0xc470000000000000
+ .xword 0xc6a0000000000000, 0xc710000000000000
+ .xword 0xce80000000000000, 0xcf30000000000000
+ .xword 0xcde0000000000000, 0xcc50000000000000
+ .xword 0xc840000000000000, 0xc9f0000000000000
+ .xword 0xcb20000000000000, 0xca90000000000000
+ .xword 0xee00000000000000, 0xefb0000000000000
+ .xword 0xed60000000000000, 0xecd0000000000000
+ .xword 0xe8c0000000000000, 0xe970000000000000
+ .xword 0xeba0000000000000, 0xea10000000000000
+ .xword 0xe380000000000000, 0xe230000000000000
+ .xword 0xe0e0000000000000, 0xe150000000000000
+ .xword 0xe540000000000000, 0xe4f0000000000000
+ .xword 0xe620000000000000, 0xe790000000000000
+ .xword 0xf500000000000000, 0xf4b0000000000000
+ .xword 0xf660000000000000, 0xf7d0000000000000
+ .xword 0xf3c0000000000000, 0xf270000000000000
+ .xword 0xf0a0000000000000, 0xf110000000000000
+ .xword 0xf880000000000000, 0xf930000000000000
+ .xword 0xfbe0000000000000, 0xfa50000000000000
+ .xword 0xfe40000000000000, 0xfff0000000000000
+ .xword 0xfd20000000000000, 0xfc90000000000000
+ .xword 0xb400000000000000, 0xb5b0000000000000
+ .xword 0xb760000000000000, 0xb6d0000000000000
+ .xword 0xb2c0000000000000, 0xb370000000000000
+ .xword 0xb1a0000000000000, 0xb010000000000000
+ .xword 0xb980000000000000, 0xb830000000000000
+ .xword 0xbae0000000000000, 0xbb50000000000000
+ .xword 0xbf40000000000000, 0xbef0000000000000
+ .xword 0xbc20000000000000, 0xbd90000000000000
+ .xword 0xaf00000000000000, 0xaeb0000000000000
+ .xword 0xac60000000000000, 0xadd0000000000000
+ .xword 0xa9c0000000000000, 0xa870000000000000
+ .xword 0xaaa0000000000000, 0xab10000000000000
+ .xword 0xa280000000000000, 0xa330000000000000
+ .xword 0xa1e0000000000000, 0xa050000000000000
+ .xword 0xa440000000000000, 0xa5f0000000000000
+ .xword 0xa720000000000000, 0xa690000000000000
+ .xword 0x8200000000000000, 0x83b0000000000000
+ .xword 0x8160000000000000, 0x80d0000000000000
+ .xword 0x84c0000000000000, 0x8570000000000000
+ .xword 0x87a0000000000000, 0x8610000000000000
+ .xword 0x8f80000000000000, 0x8e30000000000000
+ .xword 0x8ce0000000000000, 0x8d50000000000000
+ .xword 0x8940000000000000, 0x88f0000000000000
+ .xword 0x8a20000000000000, 0x8b90000000000000
+ .xword 0x9900000000000000, 0x98b0000000000000
+ .xword 0x9a60000000000000, 0x9bd0000000000000
+ .xword 0x9fc0000000000000, 0x9e70000000000000
+ .xword 0x9ca0000000000000, 0x9d10000000000000
+ .xword 0x9480000000000000, 0x9530000000000000
+ .xword 0x97e0000000000000, 0x9650000000000000
+ .xword 0x9240000000000000, 0x93f0000000000000
+ .xword 0x9120000000000000, 0x9090000000000000
diff --git a/src/isa-l/crc/aarch64/crc64_jones_norm_pmull.S b/src/isa-l/crc/aarch64/crc64_jones_norm_pmull.S
new file mode 100644
index 000000000..4f298376c
--- /dev/null
+++ b/src/isa-l/crc/aarch64/crc64_jones_norm_pmull.S
@@ -0,0 +1,33 @@
+########################################################################
+# Copyright(c) 2019 Arm Corporation All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Arm Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#########################################################################
+
+#include "crc64_jones_norm_pmull.h"
+#include "crc64_norm_common_pmull.h"
+
+crc64_norm_func crc64_jones_norm_pmull
diff --git a/src/isa-l/crc/aarch64/crc64_jones_norm_pmull.h b/src/isa-l/crc/aarch64/crc64_jones_norm_pmull.h
new file mode 100644
index 000000000..a20fa045d
--- /dev/null
+++ b/src/isa-l/crc/aarch64/crc64_jones_norm_pmull.h
@@ -0,0 +1,200 @@
+########################################################################
+# Copyright(c) 2019 Arm Corporation All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Arm Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#########################################################################
+
+.equ p4_low_b0, (0xd25e)
+.equ p4_low_b1, 0xca43
+.equ p4_low_b2, 0x1e58
+.equ p4_low_b3, 0x4e50
+.equ p4_high_b0, 0xf643
+.equ p4_high_b1, 0x8f27
+.equ p4_high_b2, 0x6158
+.equ p4_high_b3, 0x13c9
+
+.equ p1_low_b0, (0x7038)
+.equ p1_low_b1, 0x5001
+.equ p1_low_b2, 0xed27
+.equ p1_low_b3, 0x4445
+.equ p1_high_b0, 0xd736
+.equ p1_high_b1, 0x7cfb
+.equ p1_high_b2, 0x7415
+.equ p1_high_b3, 0x698b
+
+.equ p0_low_b0, (0x7038)
+.equ p0_low_b1, 0x5001
+.equ p0_low_b2, 0xed27
+.equ p0_low_b3, 0x4445
+.equ p0_high_b0, 0x0000
+.equ p0_high_b1, 0x0000
+.equ p0_high_b2, 0x0000
+.equ p0_high_b3, 0x0000
+
+.equ br_low_b0, (0x6cf8)
+.equ br_low_b1, 0x98be
+.equ br_low_b2, 0xeeb2
+.equ br_low_b3, 0xddf3
+.equ br_high_b0, 0x35a9
+.equ br_high_b1, 0x94c9
+.equ br_high_b2, 0xd235
+.equ br_high_b3, 0xad93
+
+ .text
+ .section .rodata
+ .align 4
+ .set .lanchor_crc_tab,. + 0
+ .type crc64_tab, %object
+ .size crc64_tab, 2048
+crc64_tab:
+ .xword 0x0000000000000000, 0xad93d23594c935a9
+ .xword 0xf6b4765ebd5b5efb, 0x5b27a46b29926b52
+ .xword 0x40fb3e88ee7f885f, 0xed68ecbd7ab6bdf6
+ .xword 0xb64f48d65324d6a4, 0x1bdc9ae3c7ede30d
+ .xword 0x81f67d11dcff10be, 0x2c65af2448362517
+ .xword 0x77420b4f61a44e45, 0xdad1d97af56d7bec
+ .xword 0xc10d4399328098e1, 0x6c9e91aca649ad48
+ .xword 0x37b935c78fdbc61a, 0x9a2ae7f21b12f3b3
+ .xword 0xae7f28162d3714d5, 0x03ecfa23b9fe217c
+ .xword 0x58cb5e48906c4a2e, 0xf5588c7d04a57f87
+ .xword 0xee84169ec3489c8a, 0x4317c4ab5781a923
+ .xword 0x183060c07e13c271, 0xb5a3b2f5eadaf7d8
+ .xword 0x2f895507f1c8046b, 0x821a8732650131c2
+ .xword 0xd93d23594c935a90, 0x74aef16cd85a6f39
+ .xword 0x6f726b8f1fb78c34, 0xc2e1b9ba8b7eb99d
+ .xword 0x99c61dd1a2ecd2cf, 0x3455cfe43625e766
+ .xword 0xf16d8219cea71c03, 0x5cfe502c5a6e29aa
+ .xword 0x07d9f44773fc42f8, 0xaa4a2672e7357751
+ .xword 0xb196bc9120d8945c, 0x1c056ea4b411a1f5
+ .xword 0x4722cacf9d83caa7, 0xeab118fa094aff0e
+ .xword 0x709bff0812580cbd, 0xdd082d3d86913914
+ .xword 0x862f8956af035246, 0x2bbc5b633bca67ef
+ .xword 0x3060c180fc2784e2, 0x9df313b568eeb14b
+ .xword 0xc6d4b7de417cda19, 0x6b4765ebd5b5efb0
+ .xword 0x5f12aa0fe39008d6, 0xf281783a77593d7f
+ .xword 0xa9a6dc515ecb562d, 0x04350e64ca026384
+ .xword 0x1fe994870def8089, 0xb27a46b29926b520
+ .xword 0xe95de2d9b0b4de72, 0x44ce30ec247debdb
+ .xword 0xdee4d71e3f6f1868, 0x7377052baba62dc1
+ .xword 0x2850a14082344693, 0x85c3737516fd733a
+ .xword 0x9e1fe996d1109037, 0x338c3ba345d9a59e
+ .xword 0x68ab9fc86c4bcecc, 0xc5384dfdf882fb65
+ .xword 0x4f48d60609870daf, 0xe2db04339d4e3806
+ .xword 0xb9fca058b4dc5354, 0x146f726d201566fd
+ .xword 0x0fb3e88ee7f885f0, 0xa2203abb7331b059
+ .xword 0xf9079ed05aa3db0b, 0x54944ce5ce6aeea2
+ .xword 0xcebeab17d5781d11, 0x632d792241b128b8
+ .xword 0x380add49682343ea, 0x95990f7cfcea7643
+ .xword 0x8e45959f3b07954e, 0x23d647aaafcea0e7
+ .xword 0x78f1e3c1865ccbb5, 0xd56231f41295fe1c
+ .xword 0xe137fe1024b0197a, 0x4ca42c25b0792cd3
+ .xword 0x1783884e99eb4781, 0xba105a7b0d227228
+ .xword 0xa1ccc098cacf9125, 0x0c5f12ad5e06a48c
+ .xword 0x5778b6c67794cfde, 0xfaeb64f3e35dfa77
+ .xword 0x60c18301f84f09c4, 0xcd5251346c863c6d
+ .xword 0x9675f55f4514573f, 0x3be6276ad1dd6296
+ .xword 0x203abd891630819b, 0x8da96fbc82f9b432
+ .xword 0xd68ecbd7ab6bdf60, 0x7b1d19e23fa2eac9
+ .xword 0xbe25541fc72011ac, 0x13b6862a53e92405
+ .xword 0x489122417a7b4f57, 0xe502f074eeb27afe
+ .xword 0xfede6a97295f99f3, 0x534db8a2bd96ac5a
+ .xword 0x086a1cc99404c708, 0xa5f9cefc00cdf2a1
+ .xword 0x3fd3290e1bdf0112, 0x9240fb3b8f1634bb
+ .xword 0xc9675f50a6845fe9, 0x64f48d65324d6a40
+ .xword 0x7f281786f5a0894d, 0xd2bbc5b36169bce4
+ .xword 0x899c61d848fbd7b6, 0x240fb3eddc32e21f
+ .xword 0x105a7c09ea170579, 0xbdc9ae3c7ede30d0
+ .xword 0xe6ee0a57574c5b82, 0x4b7dd862c3856e2b
+ .xword 0x50a1428104688d26, 0xfd3290b490a1b88f
+ .xword 0xa61534dfb933d3dd, 0x0b86e6ea2dfae674
+ .xword 0x91ac011836e815c7, 0x3c3fd32da221206e
+ .xword 0x671877468bb34b3c, 0xca8ba5731f7a7e95
+ .xword 0xd1573f90d8979d98, 0x7cc4eda54c5ea831
+ .xword 0x27e349ce65ccc363, 0x8a709bfbf105f6ca
+ .xword 0x9e91ac0c130e1b5e, 0x33027e3987c72ef7
+ .xword 0x6825da52ae5545a5, 0xc5b608673a9c700c
+ .xword 0xde6a9284fd719301, 0x73f940b169b8a6a8
+ .xword 0x28dee4da402acdfa, 0x854d36efd4e3f853
+ .xword 0x1f67d11dcff10be0, 0xb2f403285b383e49
+ .xword 0xe9d3a74372aa551b, 0x44407576e66360b2
+ .xword 0x5f9cef95218e83bf, 0xf20f3da0b547b616
+ .xword 0xa92899cb9cd5dd44, 0x04bb4bfe081ce8ed
+ .xword 0x30ee841a3e390f8b, 0x9d7d562faaf03a22
+ .xword 0xc65af24483625170, 0x6bc9207117ab64d9
+ .xword 0x7015ba92d04687d4, 0xdd8668a7448fb27d
+ .xword 0x86a1cccc6d1dd92f, 0x2b321ef9f9d4ec86
+ .xword 0xb118f90be2c61f35, 0x1c8b2b3e760f2a9c
+ .xword 0x47ac8f555f9d41ce, 0xea3f5d60cb547467
+ .xword 0xf1e3c7830cb9976a, 0x5c7015b69870a2c3
+ .xword 0x0757b1ddb1e2c991, 0xaac463e8252bfc38
+ .xword 0x6ffc2e15dda9075d, 0xc26ffc20496032f4
+ .xword 0x9948584b60f259a6, 0x34db8a7ef43b6c0f
+ .xword 0x2f07109d33d68f02, 0x8294c2a8a71fbaab
+ .xword 0xd9b366c38e8dd1f9, 0x7420b4f61a44e450
+ .xword 0xee0a5304015617e3, 0x43998131959f224a
+ .xword 0x18be255abc0d4918, 0xb52df76f28c47cb1
+ .xword 0xaef16d8cef299fbc, 0x0362bfb97be0aa15
+ .xword 0x58451bd25272c147, 0xf5d6c9e7c6bbf4ee
+ .xword 0xc1830603f09e1388, 0x6c10d43664572621
+ .xword 0x3737705d4dc54d73, 0x9aa4a268d90c78da
+ .xword 0x8178388b1ee19bd7, 0x2cebeabe8a28ae7e
+ .xword 0x77cc4ed5a3bac52c, 0xda5f9ce03773f085
+ .xword 0x40757b122c610336, 0xede6a927b8a8369f
+ .xword 0xb6c10d4c913a5dcd, 0x1b52df7905f36864
+ .xword 0x008e459ac21e8b69, 0xad1d97af56d7bec0
+ .xword 0xf63a33c47f45d592, 0x5ba9e1f1eb8ce03b
+ .xword 0xd1d97a0a1a8916f1, 0x7c4aa83f8e402358
+ .xword 0x276d0c54a7d2480a, 0x8afede61331b7da3
+ .xword 0x91224482f4f69eae, 0x3cb196b7603fab07
+ .xword 0x679632dc49adc055, 0xca05e0e9dd64f5fc
+ .xword 0x502f071bc676064f, 0xfdbcd52e52bf33e6
+ .xword 0xa69b71457b2d58b4, 0x0b08a370efe46d1d
+ .xword 0x10d4399328098e10, 0xbd47eba6bcc0bbb9
+ .xword 0xe6604fcd9552d0eb, 0x4bf39df8019be542
+ .xword 0x7fa6521c37be0224, 0xd2358029a377378d
+ .xword 0x891224428ae55cdf, 0x2481f6771e2c6976
+ .xword 0x3f5d6c94d9c18a7b, 0x92cebea14d08bfd2
+ .xword 0xc9e91aca649ad480, 0x647ac8fff053e129
+ .xword 0xfe502f0deb41129a, 0x53c3fd387f882733
+ .xword 0x08e45953561a4c61, 0xa5778b66c2d379c8
+ .xword 0xbeab1185053e9ac5, 0x1338c3b091f7af6c
+ .xword 0x481f67dbb865c43e, 0xe58cb5ee2cacf197
+ .xword 0x20b4f813d42e0af2, 0x8d272a2640e73f5b
+ .xword 0xd6008e4d69755409, 0x7b935c78fdbc61a0
+ .xword 0x604fc69b3a5182ad, 0xcddc14aeae98b704
+ .xword 0x96fbb0c5870adc56, 0x3b6862f013c3e9ff
+ .xword 0xa142850208d11a4c, 0x0cd157379c182fe5
+ .xword 0x57f6f35cb58a44b7, 0xfa6521692143711e
+ .xword 0xe1b9bb8ae6ae9213, 0x4c2a69bf7267a7ba
+ .xword 0x170dcdd45bf5cce8, 0xba9e1fe1cf3cf941
+ .xword 0x8ecbd005f9191e27, 0x235802306dd02b8e
+ .xword 0x787fa65b444240dc, 0xd5ec746ed08b7575
+ .xword 0xce30ee8d17669678, 0x63a33cb883afa3d1
+ .xword 0x388498d3aa3dc883, 0x95174ae63ef4fd2a
+ .xword 0x0f3dad1425e60e99, 0xa2ae7f21b12f3b30
+ .xword 0xf989db4a98bd5062, 0x541a097f0c7465cb
+ .xword 0x4fc6939ccb9986c6, 0xe25541a95f50b36f
+ .xword 0xb972e5c276c2d83d, 0x14e137f7e20bed94
diff --git a/src/isa-l/crc/aarch64/crc64_jones_refl_pmull.S b/src/isa-l/crc/aarch64/crc64_jones_refl_pmull.S
new file mode 100644
index 000000000..177092f9f
--- /dev/null
+++ b/src/isa-l/crc/aarch64/crc64_jones_refl_pmull.S
@@ -0,0 +1,33 @@
+########################################################################
+# Copyright(c) 2019 Arm Corporation All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Arm Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#########################################################################
+
+#include "crc64_jones_refl_pmull.h"
+#include "crc64_refl_common_pmull.h"
+
+crc64_refl_func crc64_jones_refl_pmull
diff --git a/src/isa-l/crc/aarch64/crc64_jones_refl_pmull.h b/src/isa-l/crc/aarch64/crc64_jones_refl_pmull.h
new file mode 100644
index 000000000..5bf98f73e
--- /dev/null
+++ b/src/isa-l/crc/aarch64/crc64_jones_refl_pmull.h
@@ -0,0 +1,196 @@
+########################################################################
+# Copyright(c) 2019 Arm Corporation All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Arm Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#########################################################################
+
+.equ p4_low_b0, 0xb4fb
+.equ p4_low_b1, 0x6d9a
+.equ p4_low_b2, 0xefb1
+.equ p4_low_b3, 0xaf86
+.equ p4_high_b0, 0x14e4
+.equ p4_high_b1, 0x34f0
+.equ p4_high_b2, 0x84a6
+.equ p4_high_b3, 0xf497
+
+.equ p1_low_b0, 0xa32c
+.equ p1_low_b1, 0x505d
+.equ p1_low_b2, 0xbe7d
+.equ p1_low_b3, 0xd9d7
+.equ p1_high_b0, 0x4444
+.equ p1_high_b1, 0xc96f
+.equ p1_high_b2, 0x0015
+.equ p1_high_b3, 0x381d
+
+.equ p0_low_b0, 0x4444
+.equ p0_low_b1, 0xc96f
+.equ p0_low_b2, 0x0015
+.equ p0_low_b3, 0x381d
+
+.equ br_low_b0, 0x9f77
+.equ br_low_b1, 0x9aef
+.equ br_low_b2, 0xfa32
+.equ br_low_b3, 0x3e6c
+.equ br_high_b0, 0x936b
+.equ br_high_b1, 0x5897
+.equ br_high_b2, 0x2653
+.equ br_high_b3, 0x2b59
+
+ .text
+ .section .rodata
+ .align 4
+ .set .lanchor_crc_tab,. + 0
+ .type crc64_tab, %object
+ .size crc64_tab, 2048
+crc64_tab:
+ .xword 0x0000000000000000, 0x7ad870c830358979
+ .xword 0xf5b0e190606b12f2, 0x8f689158505e9b8b
+ .xword 0xc038e5739841b68f, 0xbae095bba8743ff6
+ .xword 0x358804e3f82aa47d, 0x4f50742bc81f2d04
+ .xword 0xab28ecb46814fe75, 0xd1f09c7c5821770c
+ .xword 0x5e980d24087fec87, 0x24407dec384a65fe
+ .xword 0x6b1009c7f05548fa, 0x11c8790fc060c183
+ .xword 0x9ea0e857903e5a08, 0xe478989fa00bd371
+ .xword 0x7d08ff3b88be6f81, 0x07d08ff3b88be6f8
+ .xword 0x88b81eabe8d57d73, 0xf2606e63d8e0f40a
+ .xword 0xbd301a4810ffd90e, 0xc7e86a8020ca5077
+ .xword 0x4880fbd87094cbfc, 0x32588b1040a14285
+ .xword 0xd620138fe0aa91f4, 0xacf86347d09f188d
+ .xword 0x2390f21f80c18306, 0x594882d7b0f40a7f
+ .xword 0x1618f6fc78eb277b, 0x6cc0863448deae02
+ .xword 0xe3a8176c18803589, 0x997067a428b5bcf0
+ .xword 0xfa11fe77117cdf02, 0x80c98ebf2149567b
+ .xword 0x0fa11fe77117cdf0, 0x75796f2f41224489
+ .xword 0x3a291b04893d698d, 0x40f16bccb908e0f4
+ .xword 0xcf99fa94e9567b7f, 0xb5418a5cd963f206
+ .xword 0x513912c379682177, 0x2be1620b495da80e
+ .xword 0xa489f35319033385, 0xde51839b2936bafc
+ .xword 0x9101f7b0e12997f8, 0xebd98778d11c1e81
+ .xword 0x64b116208142850a, 0x1e6966e8b1770c73
+ .xword 0x8719014c99c2b083, 0xfdc17184a9f739fa
+ .xword 0x72a9e0dcf9a9a271, 0x08719014c99c2b08
+ .xword 0x4721e43f0183060c, 0x3df994f731b68f75
+ .xword 0xb29105af61e814fe, 0xc849756751dd9d87
+ .xword 0x2c31edf8f1d64ef6, 0x56e99d30c1e3c78f
+ .xword 0xd9810c6891bd5c04, 0xa3597ca0a188d57d
+ .xword 0xec09088b6997f879, 0x96d1784359a27100
+ .xword 0x19b9e91b09fcea8b, 0x636199d339c963f2
+ .xword 0xdf7adabd7a6e2d6f, 0xa5a2aa754a5ba416
+ .xword 0x2aca3b2d1a053f9d, 0x50124be52a30b6e4
+ .xword 0x1f423fcee22f9be0, 0x659a4f06d21a1299
+ .xword 0xeaf2de5e82448912, 0x902aae96b271006b
+ .xword 0x74523609127ad31a, 0x0e8a46c1224f5a63
+ .xword 0x81e2d7997211c1e8, 0xfb3aa75142244891
+ .xword 0xb46ad37a8a3b6595, 0xceb2a3b2ba0eecec
+ .xword 0x41da32eaea507767, 0x3b024222da65fe1e
+ .xword 0xa2722586f2d042ee, 0xd8aa554ec2e5cb97
+ .xword 0x57c2c41692bb501c, 0x2d1ab4dea28ed965
+ .xword 0x624ac0f56a91f461, 0x1892b03d5aa47d18
+ .xword 0x97fa21650afae693, 0xed2251ad3acf6fea
+ .xword 0x095ac9329ac4bc9b, 0x7382b9faaaf135e2
+ .xword 0xfcea28a2faafae69, 0x8632586aca9a2710
+ .xword 0xc9622c4102850a14, 0xb3ba5c8932b0836d
+ .xword 0x3cd2cdd162ee18e6, 0x460abd1952db919f
+ .xword 0x256b24ca6b12f26d, 0x5fb354025b277b14
+ .xword 0xd0dbc55a0b79e09f, 0xaa03b5923b4c69e6
+ .xword 0xe553c1b9f35344e2, 0x9f8bb171c366cd9b
+ .xword 0x10e3202993385610, 0x6a3b50e1a30ddf69
+ .xword 0x8e43c87e03060c18, 0xf49bb8b633338561
+ .xword 0x7bf329ee636d1eea, 0x012b592653589793
+ .xword 0x4e7b2d0d9b47ba97, 0x34a35dc5ab7233ee
+ .xword 0xbbcbcc9dfb2ca865, 0xc113bc55cb19211c
+ .xword 0x5863dbf1e3ac9dec, 0x22bbab39d3991495
+ .xword 0xadd33a6183c78f1e, 0xd70b4aa9b3f20667
+ .xword 0x985b3e827bed2b63, 0xe2834e4a4bd8a21a
+ .xword 0x6debdf121b863991, 0x1733afda2bb3b0e8
+ .xword 0xf34b37458bb86399, 0x8993478dbb8deae0
+ .xword 0x06fbd6d5ebd3716b, 0x7c23a61ddbe6f812
+ .xword 0x3373d23613f9d516, 0x49aba2fe23cc5c6f
+ .xword 0xc6c333a67392c7e4, 0xbc1b436e43a74e9d
+ .xword 0x95ac9329ac4bc9b5, 0xef74e3e19c7e40cc
+ .xword 0x601c72b9cc20db47, 0x1ac40271fc15523e
+ .xword 0x5594765a340a7f3a, 0x2f4c0692043ff643
+ .xword 0xa02497ca54616dc8, 0xdafce7026454e4b1
+ .xword 0x3e847f9dc45f37c0, 0x445c0f55f46abeb9
+ .xword 0xcb349e0da4342532, 0xb1eceec59401ac4b
+ .xword 0xfebc9aee5c1e814f, 0x8464ea266c2b0836
+ .xword 0x0b0c7b7e3c7593bd, 0x71d40bb60c401ac4
+ .xword 0xe8a46c1224f5a634, 0x927c1cda14c02f4d
+ .xword 0x1d148d82449eb4c6, 0x67ccfd4a74ab3dbf
+ .xword 0x289c8961bcb410bb, 0x5244f9a98c8199c2
+ .xword 0xdd2c68f1dcdf0249, 0xa7f41839ecea8b30
+ .xword 0x438c80a64ce15841, 0x3954f06e7cd4d138
+ .xword 0xb63c61362c8a4ab3, 0xcce411fe1cbfc3ca
+ .xword 0x83b465d5d4a0eece, 0xf96c151de49567b7
+ .xword 0x76048445b4cbfc3c, 0x0cdcf48d84fe7545
+ .xword 0x6fbd6d5ebd3716b7, 0x15651d968d029fce
+ .xword 0x9a0d8ccedd5c0445, 0xe0d5fc06ed698d3c
+ .xword 0xaf85882d2576a038, 0xd55df8e515432941
+ .xword 0x5a3569bd451db2ca, 0x20ed197575283bb3
+ .xword 0xc49581ead523e8c2, 0xbe4df122e51661bb
+ .xword 0x3125607ab548fa30, 0x4bfd10b2857d7349
+ .xword 0x04ad64994d625e4d, 0x7e7514517d57d734
+ .xword 0xf11d85092d094cbf, 0x8bc5f5c11d3cc5c6
+ .xword 0x12b5926535897936, 0x686de2ad05bcf04f
+ .xword 0xe70573f555e26bc4, 0x9ddd033d65d7e2bd
+ .xword 0xd28d7716adc8cfb9, 0xa85507de9dfd46c0
+ .xword 0x273d9686cda3dd4b, 0x5de5e64efd965432
+ .xword 0xb99d7ed15d9d8743, 0xc3450e196da80e3a
+ .xword 0x4c2d9f413df695b1, 0x36f5ef890dc31cc8
+ .xword 0x79a59ba2c5dc31cc, 0x037deb6af5e9b8b5
+ .xword 0x8c157a32a5b7233e, 0xf6cd0afa9582aa47
+ .xword 0x4ad64994d625e4da, 0x300e395ce6106da3
+ .xword 0xbf66a804b64ef628, 0xc5bed8cc867b7f51
+ .xword 0x8aeeace74e645255, 0xf036dc2f7e51db2c
+ .xword 0x7f5e4d772e0f40a7, 0x05863dbf1e3ac9de
+ .xword 0xe1fea520be311aaf, 0x9b26d5e88e0493d6
+ .xword 0x144e44b0de5a085d, 0x6e963478ee6f8124
+ .xword 0x21c640532670ac20, 0x5b1e309b16452559
+ .xword 0xd476a1c3461bbed2, 0xaeaed10b762e37ab
+ .xword 0x37deb6af5e9b8b5b, 0x4d06c6676eae0222
+ .xword 0xc26e573f3ef099a9, 0xb8b627f70ec510d0
+ .xword 0xf7e653dcc6da3dd4, 0x8d3e2314f6efb4ad
+ .xword 0x0256b24ca6b12f26, 0x788ec2849684a65f
+ .xword 0x9cf65a1b368f752e, 0xe62e2ad306bafc57
+ .xword 0x6946bb8b56e467dc, 0x139ecb4366d1eea5
+ .xword 0x5ccebf68aecec3a1, 0x2616cfa09efb4ad8
+ .xword 0xa97e5ef8cea5d153, 0xd3a62e30fe90582a
+ .xword 0xb0c7b7e3c7593bd8, 0xca1fc72bf76cb2a1
+ .xword 0x45775673a732292a, 0x3faf26bb9707a053
+ .xword 0x70ff52905f188d57, 0x0a2722586f2d042e
+ .xword 0x854fb3003f739fa5, 0xff97c3c80f4616dc
+ .xword 0x1bef5b57af4dc5ad, 0x61372b9f9f784cd4
+ .xword 0xee5fbac7cf26d75f, 0x9487ca0fff135e26
+ .xword 0xdbd7be24370c7322, 0xa10fceec0739fa5b
+ .xword 0x2e675fb4576761d0, 0x54bf2f7c6752e8a9
+ .xword 0xcdcf48d84fe75459, 0xb71738107fd2dd20
+ .xword 0x387fa9482f8c46ab, 0x42a7d9801fb9cfd2
+ .xword 0x0df7adabd7a6e2d6, 0x772fdd63e7936baf
+ .xword 0xf8474c3bb7cdf024, 0x829f3cf387f8795d
+ .xword 0x66e7a46c27f3aa2c, 0x1c3fd4a417c62355
+ .xword 0x935745fc4798b8de, 0xe98f353477ad31a7
+ .xword 0xa6df411fbfb21ca3, 0xdc0731d78f8795da
+ .xword 0x536fa08fdfd90e51, 0x29b7d047efec8728
diff --git a/src/isa-l/crc/aarch64/crc64_norm_common_pmull.h b/src/isa-l/crc/aarch64/crc64_norm_common_pmull.h
new file mode 100644
index 000000000..1bdfc26b5
--- /dev/null
+++ b/src/isa-l/crc/aarch64/crc64_norm_common_pmull.h
@@ -0,0 +1,129 @@
+########################################################################
+# Copyright(c) 2019 Arm Corporation All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Arm Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#########################################################################
+
+#include "crc_common_pmull.h"
+
+.macro crc64_norm_func name:req
+ .arch armv8-a+crypto
+ .text
+ .align 3
+ .global \name
+ .type \name, %function
+
+/* uint64_t crc64_norm_func(uint64_t seed, const uint8_t * buf, uint64_t len) */
+
+\name\():
+ mvn x_seed, x_seed
+ mov x_counter, 0
+ cmp x_len, (FOLD_SIZE-1)
+ bhi .crc_clmul_pre
+
+.crc_tab_pre:
+ cmp x_len, x_counter
+ bls .done
+
+ adrp x_tmp, .lanchor_crc_tab
+ add x_buf_iter, x_buf, x_counter
+ add x_buf, x_buf, x_len
+ add x_crc_tab_addr, x_tmp, :lo12:.lanchor_crc_tab
+
+ .align 3
+.loop_crc_tab:
+ ldrb w_tmp, [x_buf_iter], 1
+ cmp x_buf, x_buf_iter
+ eor x_tmp, x_tmp, x_seed, lsr 56
+ ldr x_tmp, [x_crc_tab_addr, x_tmp, lsl 3]
+ eor x_seed, x_tmp, x_seed, lsl 8
+ bne .loop_crc_tab
+
+.done:
+ mvn x_crc_ret, x_seed
+ ret
+
+ .align 2
+.crc_clmul_pre:
+ movi v_x0.2s, 0
+ fmov v_x0.d[1], x_seed // save crc to v_x0
+
+ crc_norm_load_first_block
+
+ bls .clmul_loop_end
+
+ crc64_load_p4
+
+// 1024bit --> 512bit loop
+// merge x0, x1, x2, x3, y0, y1, y2, y3 => x0, x1, x2, x3 (uint64x2_t)
+ crc_norm_loop
+
+.clmul_loop_end:
+// folding 512bit --> 128bit
+ crc64_fold_512b_to_128b
+
+// folding 128bit --> 64bit
+ mov x_tmp, p0_low_b0
+ movk x_tmp, p0_low_b1, lsl 16
+ movk x_tmp, p0_low_b2, lsl 32
+ movk x_tmp, p0_low_b3, lsl 48
+ fmov d_p0_high, x_tmp
+
+ pmull2 v_tmp_high.1q, v_x3.2d, v_p0.2d
+ movi v_tmp_low.2s, 0
+ ext v_tmp_low.16b, v_tmp_low.16b, v_x3.16b, #8
+
+ eor v_x3.16b, v_tmp_high.16b, v_tmp_low.16b
+
+// barrett reduction
+ mov x_tmp, br_low_b0
+ movk x_tmp, br_low_b1, lsl 16
+ movk x_tmp, br_low_b2, lsl 32
+ movk x_tmp, br_low_b3, lsl 48
+ fmov d_br_low2, x_tmp
+
+ mov x_tmp2, br_high_b0
+ movk x_tmp2, br_high_b1, lsl 16
+ movk x_tmp2, br_high_b2, lsl 32
+ movk x_tmp2, br_high_b3, lsl 48
+ fmov d_br_high2, x_tmp2
+
+ pmull2 v_tmp_low.1q, v_x3.2d, v_br_low.2d
+ eor v_tmp_low.16b, v_x3.16b, v_tmp_low.16b
+ pmull2 v_tmp_low.1q, v_tmp_low.2d, v_br_high.2d
+ eor v_x3.8b, v_x3.8b, v_tmp_low.8b
+ umov x_seed, v_x3.d[0]
+
+ b .crc_tab_pre
+
+ .size \name, .-\name
+
+ .section .rodata.cst16,"aM",@progbits,16
+ .align 4
+.shuffle_data:
+ .byte 15, 14, 13, 12, 11, 10, 9, 8
+ .byte 7, 6, 5, 4, 3, 2, 1, 0
+.endm
diff --git a/src/isa-l/crc/aarch64/crc64_refl_common_pmull.h b/src/isa-l/crc/aarch64/crc64_refl_common_pmull.h
new file mode 100644
index 000000000..a45501300
--- /dev/null
+++ b/src/isa-l/crc/aarch64/crc64_refl_common_pmull.h
@@ -0,0 +1,126 @@
+########################################################################
+# Copyright(c) 2019 Arm Corporation All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Arm Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#########################################################################
+
+#include "crc_common_pmull.h"
+
+.macro crc64_refl_func name:req
+ .arch armv8-a+crypto
+ .text
+ .align 3
+ .global \name
+ .type \name, %function
+
+/* uint64_t crc64_refl_func(uint64_t seed, const uint8_t * buf, uint64_t len) */
+
+\name\():
+ mvn x_seed, x_seed
+ mov x_counter, 0
+ cmp x_len, (FOLD_SIZE-1)
+ bhi .crc_clmul_pre
+
+.crc_tab_pre:
+ cmp x_len, x_counter
+ bls .done
+
+ adrp x_tmp, .lanchor_crc_tab
+ add x_buf_iter, x_buf, x_counter
+ add x_buf, x_buf, x_len
+ add x_crc_tab_addr, x_tmp, :lo12:.lanchor_crc_tab
+
+ .align 3
+.loop_crc_tab:
+ ldrb w_tmp, [x_buf_iter], 1
+ eor w_tmp, w_tmp, w0
+ cmp x_buf, x_buf_iter
+ and x_tmp, x_tmp, 255
+ ldr x_tmp, [x_crc_tab_addr, x_tmp, lsl 3]
+ eor x_seed, x_tmp, x_seed, lsr 8
+ bne .loop_crc_tab
+
+.done:
+ mvn x_crc_ret, x_seed
+ ret
+
+ .align 2
+.crc_clmul_pre:
+ fmov d_x0, x_seed // save crc to d_x0
+
+ crc_refl_load_first_block
+
+ bls .clmul_loop_end
+
+ crc64_load_p4
+
+// 1024bit --> 512bit loop
+// merge x0, x1, x2, x3, y0, y1, y2, y3 => x0, x1, x2, x3 (uint64x2_t)
+ crc_refl_loop
+
+.clmul_loop_end:
+// folding 512bit --> 128bit
+ crc64_fold_512b_to_128b
+
+// folding 128bit --> 64bit
+ mov x_tmp, p0_low_b0
+ movk x_tmp, p0_low_b1, lsl 16
+ movk x_tmp, p0_low_b2, lsl 32
+ movk x_tmp, p0_low_b3, lsl 48
+ fmov d_p0_low, x_tmp
+
+ pmull v_tmp_low.1q, v_x3.1d, v_p0.1d
+
+ mov d_tmp_high, v_x3.d[1]
+
+ eor v_x3.16b, v_tmp_high.16b, v_tmp_low.16b
+
+// barrett reduction
+ mov x_tmp, br_low_b0
+ movk x_tmp, br_low_b1, lsl 16
+ movk x_tmp, br_low_b2, lsl 32
+ movk x_tmp, br_low_b3, lsl 48
+ fmov d_br_low, x_tmp
+
+ mov x_tmp2, br_high_b0
+ movk x_tmp2, br_high_b1, lsl 16
+ movk x_tmp2, br_high_b2, lsl 32
+ movk x_tmp2, br_high_b3, lsl 48
+ fmov d_br_high, x_tmp2
+
+ pmull v_tmp_low.1q, v_x3.1d, v_br_low.1d
+ pmull v_tmp_high.1q, v_tmp_low.1d, v_br_high.1d
+
+ ext v_tmp_low.16b, v_br_low.16b, v_tmp_low.16b, #8
+
+ eor v_tmp_low.16b, v_tmp_low.16b, v_tmp_high.16b
+ eor v_tmp_low.16b, v_tmp_low.16b, v_x3.16b
+ umov x_crc_ret, v_tmp_low.d[1]
+
+ b .crc_tab_pre
+
+ .size \name, .-\name
+.endm
diff --git a/src/isa-l/crc/aarch64/crc_aarch64_dispatcher.c b/src/isa-l/crc/aarch64/crc_aarch64_dispatcher.c
new file mode 100644
index 000000000..22ea72b14
--- /dev/null
+++ b/src/isa-l/crc/aarch64/crc_aarch64_dispatcher.c
@@ -0,0 +1,166 @@
+/**********************************************************************
+ Copyright(c) 2019-2020 Arm Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Arm Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+#include <aarch64_multibinary.h>
+
+DEFINE_INTERFACE_DISPATCHER(crc16_t10dif)
+{
+ unsigned long auxval = getauxval(AT_HWCAP);
+ if (auxval & HWCAP_PMULL)
+ return PROVIDER_INFO(crc16_t10dif_pmull);
+
+ return PROVIDER_BASIC(crc16_t10dif);
+
+}
+
+DEFINE_INTERFACE_DISPATCHER(crc16_t10dif_copy)
+{
+ unsigned long auxval = getauxval(AT_HWCAP);
+ if (auxval & HWCAP_PMULL)
+ return PROVIDER_INFO(crc16_t10dif_copy_pmull);
+
+ return PROVIDER_BASIC(crc16_t10dif_copy);
+
+}
+
+DEFINE_INTERFACE_DISPATCHER(crc32_ieee)
+{
+ unsigned long auxval = getauxval(AT_HWCAP);
+ if (auxval & HWCAP_PMULL) {
+ return PROVIDER_INFO(crc32_ieee_norm_pmull);
+ }
+
+ return PROVIDER_BASIC(crc32_ieee);
+
+}
+
+DEFINE_INTERFACE_DISPATCHER(crc32_iscsi)
+{
+ unsigned long auxval = getauxval(AT_HWCAP);
+ if (auxval & HWCAP_CRC32) {
+ switch (get_micro_arch_id()) {
+ case MICRO_ARCH_ID(ARM, NEOVERSE_N1):
+ case MICRO_ARCH_ID(ARM, CORTEX_A57):
+ case MICRO_ARCH_ID(ARM, CORTEX_A72):
+ return PROVIDER_INFO(crc32_iscsi_crc_ext);
+ }
+ }
+ if ((HWCAP_CRC32 | HWCAP_PMULL) == (auxval & (HWCAP_CRC32 | HWCAP_PMULL))) {
+ return PROVIDER_INFO(crc32_iscsi_3crc_fold);
+ }
+
+ if (auxval & HWCAP_PMULL) {
+ return PROVIDER_INFO(crc32_iscsi_refl_pmull);
+ }
+ return PROVIDER_BASIC(crc32_iscsi);
+
+}
+
+DEFINE_INTERFACE_DISPATCHER(crc32_gzip_refl)
+{
+ unsigned long auxval = getauxval(AT_HWCAP);
+
+ if (auxval & HWCAP_CRC32) {
+ switch (get_micro_arch_id()) {
+ case MICRO_ARCH_ID(ARM, NEOVERSE_N1):
+ case MICRO_ARCH_ID(ARM, CORTEX_A57):
+ case MICRO_ARCH_ID(ARM, CORTEX_A72):
+ return PROVIDER_INFO(crc32_gzip_refl_crc_ext);
+ }
+ }
+ if ((HWCAP_CRC32 | HWCAP_PMULL) == (auxval & (HWCAP_CRC32 | HWCAP_PMULL))) {
+ return PROVIDER_INFO(crc32_gzip_refl_3crc_fold);
+ }
+
+ if (auxval & HWCAP_PMULL)
+ return PROVIDER_INFO(crc32_gzip_refl_pmull);
+
+ return PROVIDER_BASIC(crc32_gzip_refl);
+
+}
+
+DEFINE_INTERFACE_DISPATCHER(crc64_ecma_refl)
+{
+ unsigned long auxval = getauxval(AT_HWCAP);
+
+ if (auxval & HWCAP_PMULL)
+ return PROVIDER_INFO(crc64_ecma_refl_pmull);
+
+ return PROVIDER_BASIC(crc64_ecma_refl);
+
+}
+
+DEFINE_INTERFACE_DISPATCHER(crc64_ecma_norm)
+{
+ unsigned long auxval = getauxval(AT_HWCAP);
+ if (auxval & HWCAP_PMULL)
+ return PROVIDER_INFO(crc64_ecma_norm_pmull);
+
+ return PROVIDER_BASIC(crc64_ecma_norm);
+
+}
+
+DEFINE_INTERFACE_DISPATCHER(crc64_iso_refl)
+{
+ unsigned long auxval = getauxval(AT_HWCAP);
+ if (auxval & HWCAP_PMULL)
+ return PROVIDER_INFO(crc64_iso_refl_pmull);
+
+ return PROVIDER_BASIC(crc64_iso_refl);
+
+}
+
+DEFINE_INTERFACE_DISPATCHER(crc64_iso_norm)
+{
+ unsigned long auxval = getauxval(AT_HWCAP);
+ if (auxval & HWCAP_PMULL)
+ return PROVIDER_INFO(crc64_iso_norm_pmull);
+
+ return PROVIDER_BASIC(crc64_iso_norm);
+
+}
+
+DEFINE_INTERFACE_DISPATCHER(crc64_jones_refl)
+{
+ unsigned long auxval = getauxval(AT_HWCAP);
+ if (auxval & HWCAP_PMULL)
+ return PROVIDER_INFO(crc64_jones_refl_pmull);
+
+ return PROVIDER_BASIC(crc64_jones_refl);
+
+}
+
+DEFINE_INTERFACE_DISPATCHER(crc64_jones_norm)
+{
+ unsigned long auxval = getauxval(AT_HWCAP);
+ if (auxval & HWCAP_PMULL)
+ return PROVIDER_INFO(crc64_jones_norm_pmull);
+
+ return PROVIDER_BASIC(crc64_jones_norm);
+
+}
diff --git a/src/isa-l/crc/aarch64/crc_common_pmull.h b/src/isa-l/crc/aarch64/crc_common_pmull.h
new file mode 100644
index 000000000..20a71b913
--- /dev/null
+++ b/src/isa-l/crc/aarch64/crc_common_pmull.h
@@ -0,0 +1,302 @@
+########################################################################
+# Copyright (c) 2019 Microsoft Corporation.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Microsoft Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#########################################################################
+
+// parameters
+#define w_seed w0
+#define x_seed x0
+#define x_buf x1
+#define w_len w2
+#define x_len x2
+
+// return
+#define w_crc_ret w0
+#define x_crc_ret x0
+
+// constant
+#define FOLD_SIZE 64
+
+// global variables
+#define x_buf_end x3
+#define w_counter w4
+#define x_counter x4
+#define x_buf_iter x5
+#define x_crc_tab_addr x6
+#define x_tmp2 x6
+#define w_tmp w7
+#define x_tmp x7
+
+#define v_x0 v0
+#define d_x0 d0
+#define s_x0 s0
+
+#define q_x1 q1
+#define v_x1 v1
+
+#define q_x2 q2
+#define v_x2 v2
+
+#define q_x3 q3
+#define v_x3 v3
+#define d_x3 d3
+#define s_x3 s3
+
+#define q_y0 q4
+#define v_y0 v4
+#define v_tmp_high v4
+#define d_tmp_high d4
+
+#define q_y1 q5
+#define v_y1 v5
+#define v_tmp_low v5
+
+#define q_y2 q6
+#define v_y2 v6
+
+#define q_y3 q7
+#define v_y3 v7
+
+#define q_x0_tmp q30
+#define v_x0_tmp v30
+#define d_p4_high v30.d[1]
+#define d_p4_low d30
+#define v_p4 v30
+#define d_p1_high v30.d[1]
+#define d_p1_low d30
+#define v_p1 v30
+#define d_p0_high v30.d[1]
+#define d_p0_low d30
+#define v_p0 v30
+#define d_br_low d30
+#define d_br_low2 v30.d[1]
+#define v_br_low v30
+
+#define q_shuffle q31
+#define v_shuffle v31
+#define d_br_high d31
+#define d_br_high2 v31.d[1]
+#define v_br_high v31
+#define d_p0_low2 d31
+#define d_p0_high2 v31.d[1]
+#define v_p02 v31
+
+#define v_x0_high v16
+#define v_x1_high v17
+#define v_x2_high v18
+#define v_x3_high v19
+
+.macro crc_refl_load_first_block
+ ldr q_x0_tmp, [x_buf]
+ ldr q_x1, [x_buf, 16]
+ ldr q_x2, [x_buf, 32]
+ ldr q_x3, [x_buf, 48]
+
+ and x_counter, x_len, -64
+ sub x_tmp, x_counter, #64
+ cmp x_tmp, 63
+
+ add x_buf_iter, x_buf, 64
+
+ eor v_x0.16b, v_x0.16b, v_x0_tmp.16b
+.endm
+
+.macro crc_norm_load_first_block
+ adrp x_tmp, .shuffle_data
+ ldr q_shuffle, [x_tmp, #:lo12:.shuffle_data]
+
+ ldr q_x0_tmp, [x_buf]
+ ldr q_x1, [x_buf, 16]
+ ldr q_x2, [x_buf, 32]
+ ldr q_x3, [x_buf, 48]
+
+ and x_counter, x_len, -64
+ sub x_tmp, x_counter, #64
+ cmp x_tmp, 63
+
+ add x_buf_iter, x_buf, 64
+
+ tbl v_x0_tmp.16b, {v_x0_tmp.16b}, v_shuffle.16b
+ tbl v_x1.16b, {v_x1.16b}, v_shuffle.16b
+ tbl v_x2.16b, {v_x2.16b}, v_shuffle.16b
+ tbl v_x3.16b, {v_x3.16b}, v_shuffle.16b
+
+ eor v_x0.16b, v_x0.16b, v_x0_tmp.16b
+.endm
+
+.macro crc32_load_p4
+ add x_buf_end, x_buf_iter, x_tmp
+
+ mov x_tmp, p4_low_b0
+ movk x_tmp, p4_low_b1, lsl 16
+ fmov d_p4_low, x_tmp
+
+ mov x_tmp2, p4_high_b0
+ movk x_tmp2, p4_high_b1, lsl 16
+ fmov d_p4_high, x_tmp2
+.endm
+
+.macro crc64_load_p4
+ add x_buf_end, x_buf_iter, x_tmp
+
+ mov x_tmp, p4_low_b0
+ movk x_tmp, p4_low_b1, lsl 16
+ movk x_tmp, p4_low_b2, lsl 32
+ movk x_tmp, p4_low_b3, lsl 48
+ fmov d_p4_low, x_tmp
+
+ mov x_tmp2, p4_high_b0
+ movk x_tmp2, p4_high_b1, lsl 16
+ movk x_tmp2, p4_high_b2, lsl 32
+ movk x_tmp2, p4_high_b3, lsl 48
+ fmov d_p4_high, x_tmp2
+.endm
+
+.macro crc_refl_loop
+ .align 3
+.clmul_loop:
+ // interleave ldr and pmull(2) for arch which can only issue quadword load every
+ // other cycle (i.e. A55)
+ ldr q_y0, [x_buf_iter]
+ pmull2 v_x0_high.1q, v_x0.2d, v_p4.2d
+ ldr q_y1, [x_buf_iter, 16]
+ pmull2 v_x1_high.1q, v_x1.2d, v_p4.2d
+ ldr q_y2, [x_buf_iter, 32]
+ pmull2 v_x2_high.1q, v_x2.2d, v_p4.2d
+ ldr q_y3, [x_buf_iter, 48]
+ pmull2 v_x3_high.1q, v_x3.2d, v_p4.2d
+
+ pmull v_x0.1q, v_x0.1d, v_p4.1d
+ add x_buf_iter, x_buf_iter, 64
+ pmull v_x1.1q, v_x1.1d, v_p4.1d
+ cmp x_buf_iter, x_buf_end
+ pmull v_x2.1q, v_x2.1d, v_p4.1d
+ pmull v_x3.1q, v_x3.1d, v_p4.1d
+
+ eor v_x0.16b, v_x0.16b, v_x0_high.16b
+ eor v_x1.16b, v_x1.16b, v_x1_high.16b
+ eor v_x2.16b, v_x2.16b, v_x2_high.16b
+ eor v_x3.16b, v_x3.16b, v_x3_high.16b
+
+ eor v_x0.16b, v_x0.16b, v_y0.16b
+ eor v_x1.16b, v_x1.16b, v_y1.16b
+ eor v_x2.16b, v_x2.16b, v_y2.16b
+ eor v_x3.16b, v_x3.16b, v_y3.16b
+ bne .clmul_loop
+.endm
+
+.macro crc_norm_loop
+ .align 3
+.clmul_loop:
+ // interleave ldr and pmull(2) for arch which can only issue quadword load every
+ // other cycle (i.e. A55)
+ ldr q_y0, [x_buf_iter]
+ pmull2 v_x0_high.1q, v_x0.2d, v_p4.2d
+ ldr q_y1, [x_buf_iter, 16]
+ pmull2 v_x1_high.1q, v_x1.2d, v_p4.2d
+ ldr q_y2, [x_buf_iter, 32]
+ pmull2 v_x2_high.1q, v_x2.2d, v_p4.2d
+ ldr q_y3, [x_buf_iter, 48]
+ pmull2 v_x3_high.1q, v_x3.2d, v_p4.2d
+
+ pmull v_x0.1q, v_x0.1d, v_p4.1d
+ add x_buf_iter, x_buf_iter, 64
+ pmull v_x1.1q, v_x1.1d, v_p4.1d
+ cmp x_buf_iter, x_buf_end
+ pmull v_x2.1q, v_x2.1d, v_p4.1d
+ pmull v_x3.1q, v_x3.1d, v_p4.1d
+
+ tbl v_y0.16b, {v_y0.16b}, v_shuffle.16b
+ tbl v_y1.16b, {v_y1.16b}, v_shuffle.16b
+ tbl v_y2.16b, {v_y2.16b}, v_shuffle.16b
+ tbl v_y3.16b, {v_y3.16b}, v_shuffle.16b
+
+ eor v_x0.16b, v_x0.16b, v_x0_high.16b
+ eor v_x1.16b, v_x1.16b, v_x1_high.16b
+ eor v_x2.16b, v_x2.16b, v_x2_high.16b
+ eor v_x3.16b, v_x3.16b, v_x3_high.16b
+
+ eor v_x0.16b, v_x0.16b, v_y0.16b
+ eor v_x1.16b, v_x1.16b, v_y1.16b
+ eor v_x2.16b, v_x2.16b, v_y2.16b
+ eor v_x3.16b, v_x3.16b, v_y3.16b
+ bne .clmul_loop
+.endm
+
+.macro crc32_fold_512b_to_128b
+ mov x_tmp, p1_low_b0
+ movk x_tmp, p1_low_b1, lsl 16
+ fmov d_p1_low, x_tmp
+
+ mov x_tmp2, p1_high_b0
+ movk x_tmp2, p1_high_b1, lsl 16
+ fmov d_p1_high, x_tmp2
+
+ pmull2 v_tmp_high.1q, v_x0.2d, v_p1.2d
+ pmull v_tmp_low.1q, v_x0.1d, v_p1.1d
+ eor v_x1.16b, v_x1.16b, v_tmp_high.16b
+ eor v_x1.16b, v_x1.16b, v_tmp_low.16b
+
+ pmull2 v_tmp_high.1q, v_x1.2d, v_p1.2d
+ pmull v_tmp_low.1q, v_x1.1d, v_p1.1d
+ eor v_x2.16b, v_x2.16b, v_tmp_high.16b
+ eor v_x2.16b, v_x2.16b, v_tmp_low.16b
+
+ pmull2 v_tmp_high.1q, v_x2.2d, v_p1.2d
+ pmull v_tmp_low.1q, v_x2.1d, v_p1.1d
+ eor v_x3.16b, v_x3.16b, v_tmp_high.16b
+ eor v_x3.16b, v_x3.16b, v_tmp_low.16b
+.endm
+
+.macro crc64_fold_512b_to_128b
+ mov x_tmp, p1_low_b0
+ movk x_tmp, p1_low_b1, lsl 16
+ movk x_tmp, p1_low_b2, lsl 32
+ movk x_tmp, p1_low_b3, lsl 48
+ fmov d_p1_low, x_tmp
+
+ mov x_tmp2, p1_high_b0
+ movk x_tmp2, p1_high_b1, lsl 16
+ movk x_tmp2, p1_high_b2, lsl 32
+ movk x_tmp2, p1_high_b3, lsl 48
+ fmov d_p1_high, x_tmp2
+
+ pmull2 v_tmp_high.1q, v_x0.2d, v_p1.2d
+ pmull v_tmp_low.1q, v_x0.1d, v_p1.1d
+ eor v_x1.16b, v_x1.16b, v_tmp_high.16b
+ eor v_x1.16b, v_x1.16b, v_tmp_low.16b
+
+ pmull2 v_tmp_high.1q, v_x1.2d, v_p1.2d
+ pmull v_tmp_low.1q, v_x1.1d, v_p1.1d
+ eor v_x2.16b, v_x2.16b, v_tmp_high.16b
+ eor v_x2.16b, v_x2.16b, v_tmp_low.16b
+
+ pmull2 v_tmp_high.1q, v_x2.2d, v_p1.2d
+ pmull v_tmp_low.1q, v_x2.1d, v_p1.1d
+ eor v_x3.16b, v_x3.16b, v_tmp_high.16b
+ eor v_x3.16b, v_x3.16b, v_tmp_low.16b
+.endm \ No newline at end of file
diff --git a/src/isa-l/crc/aarch64/crc_multibinary_arm.S b/src/isa-l/crc/aarch64/crc_multibinary_arm.S
new file mode 100644
index 000000000..76f957164
--- /dev/null
+++ b/src/isa-l/crc/aarch64/crc_multibinary_arm.S
@@ -0,0 +1,42 @@
+########################################################################
+# Copyright(c) 2019 Arm Corporation All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Arm Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#########################################################################
+#include <aarch64_multibinary.h>
+
+
+mbin_interface crc32_iscsi
+mbin_interface crc16_t10dif
+mbin_interface crc16_t10dif_copy
+mbin_interface crc32_ieee
+mbin_interface crc32_gzip_refl
+mbin_interface crc64_ecma_refl
+mbin_interface crc64_ecma_norm
+mbin_interface crc64_iso_refl
+mbin_interface crc64_iso_norm
+mbin_interface crc64_jones_refl
+mbin_interface crc64_jones_norm
diff --git a/src/isa-l/crc/crc16_t10dif_01.asm b/src/isa-l/crc/crc16_t10dif_01.asm
new file mode 100644
index 000000000..536b6f38d
--- /dev/null
+++ b/src/isa-l/crc/crc16_t10dif_01.asm
@@ -0,0 +1,666 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright(c) 2011-2015 Intel Corporation All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in
+; the documentation and/or other materials provided with the
+; distribution.
+; * Neither the name of Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived
+; from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+; Function API:
+; UINT16 crc16_t10dif_01(
+; UINT16 init_crc, //initial CRC value, 16 bits
+; const unsigned char *buf, //buffer pointer to calculate CRC on
+; UINT64 len //buffer length in bytes (64-bit data)
+; );
+;
+; Authors:
+; Erdinc Ozturk
+; Vinodh Gopal
+; James Guilford
+;
+; Reference paper titled "Fast CRC Computation for Generic Polynomials Using PCLMULQDQ Instruction"
+; URL: http://www.intel.com/content/dam/www/public/us/en/documents/white-papers/fast-crc-computation-generic-polynomials-pclmulqdq-paper.pdf
+
+%include "reg_sizes.asm"
+
+%define fetch_dist 1024
+
+[bits 64]
+default rel
+
+section .text
+
+%ifidn __OUTPUT_FORMAT__, win64
+ %xdefine arg1 rcx
+ %xdefine arg2 rdx
+ %xdefine arg3 r8
+
+ %xdefine arg1_low32 ecx
+%else
+ %xdefine arg1 rdi
+ %xdefine arg2 rsi
+ %xdefine arg3 rdx
+
+ %xdefine arg1_low32 edi
+%endif
+
+%ifidn __OUTPUT_FORMAT__, win64
+ %define XMM_SAVE 16*2
+ %define VARIABLE_OFFSET 16*10+8
+%else
+ %define VARIABLE_OFFSET 16*2+8
+%endif
+
+align 16
+mk_global crc16_t10dif_01, function
+crc16_t10dif_01:
+ endbranch
+
+ ; adjust the 16-bit initial_crc value, scale it to 32 bits
+ shl arg1_low32, 16
+
+ ; After this point, code flow is exactly same as a 32-bit CRC.
+ ; The only difference is before returning eax, we will shift it right 16 bits, to scale back to 16 bits.
+
+ sub rsp, VARIABLE_OFFSET
+%ifidn __OUTPUT_FORMAT__, win64
+ ; push the xmm registers into the stack to maintain
+ movdqa [rsp+16*2],xmm6
+ movdqa [rsp+16*3],xmm7
+ movdqa [rsp+16*4],xmm8
+ movdqa [rsp+16*5],xmm9
+ movdqa [rsp+16*6],xmm10
+ movdqa [rsp+16*7],xmm11
+ movdqa [rsp+16*8],xmm12
+ movdqa [rsp+16*9],xmm13
+%endif
+
+ ; check if smaller than 256
+ cmp arg3, 256
+
+ ; for sizes less than 256, we can't fold 128B at a time...
+ jl _less_than_256
+
+
+ ; load the initial crc value
+ movd xmm10, arg1_low32 ; initial crc
+
+ ; crc value does not need to be byte-reflected, but it needs to be moved to the high part of the register.
+ ; because data will be byte-reflected and will align with initial crc at correct place.
+ pslldq xmm10, 12
+
+ movdqa xmm11, [SHUF_MASK]
+ ; receive the initial 128B data, xor the initial crc value
+ movdqu xmm0, [arg2+16*0]
+ movdqu xmm1, [arg2+16*1]
+ movdqu xmm2, [arg2+16*2]
+ movdqu xmm3, [arg2+16*3]
+ movdqu xmm4, [arg2+16*4]
+ movdqu xmm5, [arg2+16*5]
+ movdqu xmm6, [arg2+16*6]
+ movdqu xmm7, [arg2+16*7]
+
+ pshufb xmm0, xmm11
+ ; XOR the initial_crc value
+ pxor xmm0, xmm10
+ pshufb xmm1, xmm11
+ pshufb xmm2, xmm11
+ pshufb xmm3, xmm11
+ pshufb xmm4, xmm11
+ pshufb xmm5, xmm11
+ pshufb xmm6, xmm11
+ pshufb xmm7, xmm11
+
+ movdqa xmm10, [rk3] ;xmm10 has rk3 and rk4
+ ;imm value of pclmulqdq instruction will determine which constant to use
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+ ; we subtract 256 instead of 128 to save one instruction from the loop
+ sub arg3, 256
+
+ ; at this section of the code, there is 128*x+y (0<=y<128) bytes of buffer. The _fold_128_B_loop
+ ; loop will fold 128B at a time until we have 128+y Bytes of buffer
+
+
+ ; fold 128B at a time. This section of the code folds 8 xmm registers in parallel
+_fold_128_B_loop:
+
+ ; update the buffer pointer
+ add arg2, 128 ; buf += 128;
+
+ prefetchnta [arg2+fetch_dist+0]
+ movdqu xmm9, [arg2+16*0]
+ movdqu xmm12, [arg2+16*1]
+ pshufb xmm9, xmm11
+ pshufb xmm12, xmm11
+ movdqa xmm8, xmm0
+ movdqa xmm13, xmm1
+ pclmulqdq xmm0, xmm10, 0x0
+ pclmulqdq xmm8, xmm10 , 0x11
+ pclmulqdq xmm1, xmm10, 0x0
+ pclmulqdq xmm13, xmm10 , 0x11
+ pxor xmm0, xmm9
+ xorps xmm0, xmm8
+ pxor xmm1, xmm12
+ xorps xmm1, xmm13
+
+ prefetchnta [arg2+fetch_dist+32]
+ movdqu xmm9, [arg2+16*2]
+ movdqu xmm12, [arg2+16*3]
+ pshufb xmm9, xmm11
+ pshufb xmm12, xmm11
+ movdqa xmm8, xmm2
+ movdqa xmm13, xmm3
+ pclmulqdq xmm2, xmm10, 0x0
+ pclmulqdq xmm8, xmm10 , 0x11
+ pclmulqdq xmm3, xmm10, 0x0
+ pclmulqdq xmm13, xmm10 , 0x11
+ pxor xmm2, xmm9
+ xorps xmm2, xmm8
+ pxor xmm3, xmm12
+ xorps xmm3, xmm13
+
+ prefetchnta [arg2+fetch_dist+64]
+ movdqu xmm9, [arg2+16*4]
+ movdqu xmm12, [arg2+16*5]
+ pshufb xmm9, xmm11
+ pshufb xmm12, xmm11
+ movdqa xmm8, xmm4
+ movdqa xmm13, xmm5
+ pclmulqdq xmm4, xmm10, 0x0
+ pclmulqdq xmm8, xmm10 , 0x11
+ pclmulqdq xmm5, xmm10, 0x0
+ pclmulqdq xmm13, xmm10 , 0x11
+ pxor xmm4, xmm9
+ xorps xmm4, xmm8
+ pxor xmm5, xmm12
+ xorps xmm5, xmm13
+
+ prefetchnta [arg2+fetch_dist+96]
+ movdqu xmm9, [arg2+16*6]
+ movdqu xmm12, [arg2+16*7]
+ pshufb xmm9, xmm11
+ pshufb xmm12, xmm11
+ movdqa xmm8, xmm6
+ movdqa xmm13, xmm7
+ pclmulqdq xmm6, xmm10, 0x0
+ pclmulqdq xmm8, xmm10 , 0x11
+ pclmulqdq xmm7, xmm10, 0x0
+ pclmulqdq xmm13, xmm10 , 0x11
+ pxor xmm6, xmm9
+ xorps xmm6, xmm8
+ pxor xmm7, xmm12
+ xorps xmm7, xmm13
+
+ sub arg3, 128
+
+ ; check if there is another 128B in the buffer to be able to fold
+ jge _fold_128_B_loop
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+
+ add arg2, 128
+ ; at this point, the buffer pointer is pointing at the last y Bytes of the buffer
+ ; fold the 8 xmm registers to 1 xmm register with different constants
+
+ movdqa xmm10, [rk9]
+ movdqa xmm8, xmm0
+ pclmulqdq xmm0, xmm10, 0x11
+ pclmulqdq xmm8, xmm10, 0x0
+ pxor xmm7, xmm8
+ xorps xmm7, xmm0
+
+ movdqa xmm10, [rk11]
+ movdqa xmm8, xmm1
+ pclmulqdq xmm1, xmm10, 0x11
+ pclmulqdq xmm8, xmm10, 0x0
+ pxor xmm7, xmm8
+ xorps xmm7, xmm1
+
+ movdqa xmm10, [rk13]
+ movdqa xmm8, xmm2
+ pclmulqdq xmm2, xmm10, 0x11
+ pclmulqdq xmm8, xmm10, 0x0
+ pxor xmm7, xmm8
+ pxor xmm7, xmm2
+
+ movdqa xmm10, [rk15]
+ movdqa xmm8, xmm3
+ pclmulqdq xmm3, xmm10, 0x11
+ pclmulqdq xmm8, xmm10, 0x0
+ pxor xmm7, xmm8
+ xorps xmm7, xmm3
+
+ movdqa xmm10, [rk17]
+ movdqa xmm8, xmm4
+ pclmulqdq xmm4, xmm10, 0x11
+ pclmulqdq xmm8, xmm10, 0x0
+ pxor xmm7, xmm8
+ pxor xmm7, xmm4
+
+ movdqa xmm10, [rk19]
+ movdqa xmm8, xmm5
+ pclmulqdq xmm5, xmm10, 0x11
+ pclmulqdq xmm8, xmm10, 0x0
+ pxor xmm7, xmm8
+ xorps xmm7, xmm5
+
+ movdqa xmm10, [rk1] ;xmm10 has rk1 and rk2
+ ;imm value of pclmulqdq instruction will determine which constant to use
+ movdqa xmm8, xmm6
+ pclmulqdq xmm6, xmm10, 0x11
+ pclmulqdq xmm8, xmm10, 0x0
+ pxor xmm7, xmm8
+ pxor xmm7, xmm6
+
+
+ ; instead of 128, we add 112 to the loop counter to save 1 instruction from the loop
+ ; instead of a cmp instruction, we use the negative flag with the jl instruction
+ add arg3, 128-16
+ jl _final_reduction_for_128
+
+ ; now we have 16+y bytes left to reduce. 16 Bytes is in register xmm7 and the rest is in memory
+ ; we can fold 16 bytes at a time if y>=16
+ ; continue folding 16B at a time
+
+_16B_reduction_loop:
+ movdqa xmm8, xmm7
+ pclmulqdq xmm7, xmm10, 0x11
+ pclmulqdq xmm8, xmm10, 0x0
+ pxor xmm7, xmm8
+ movdqu xmm0, [arg2]
+ pshufb xmm0, xmm11
+ pxor xmm7, xmm0
+ add arg2, 16
+ sub arg3, 16
+ ; instead of a cmp instruction, we utilize the flags with the jge instruction
+ ; equivalent of: cmp arg3, 16-16
+ ; check if there is any more 16B in the buffer to be able to fold
+ jge _16B_reduction_loop
+
+ ;now we have 16+z bytes left to reduce, where 0<= z < 16.
+ ;first, we reduce the data in the xmm7 register
+
+
+_final_reduction_for_128:
+ ; check if any more data to fold. If not, compute the CRC of the final 128 bits
+ add arg3, 16
+ je _128_done
+
+ ; here we are getting data that is less than 16 bytes.
+ ; since we know that there was data before the pointer, we can offset the input pointer before the actual point, to receive exactly 16 bytes.
+ ; after that the registers need to be adjusted.
+_get_last_two_xmms:
+ movdqa xmm2, xmm7
+
+ movdqu xmm1, [arg2 - 16 + arg3]
+ pshufb xmm1, xmm11
+
+ ; get rid of the extra data that was loaded before
+ ; load the shift constant
+ lea rax, [pshufb_shf_table + 16]
+ sub rax, arg3
+ movdqu xmm0, [rax]
+
+ ; shift xmm2 to the left by arg3 bytes
+ pshufb xmm2, xmm0
+
+ ; shift xmm7 to the right by 16-arg3 bytes
+ pxor xmm0, [mask1]
+ pshufb xmm7, xmm0
+ pblendvb xmm1, xmm2 ;xmm0 is implicit
+
+ ; fold 16 Bytes
+ movdqa xmm2, xmm1
+ movdqa xmm8, xmm7
+ pclmulqdq xmm7, xmm10, 0x11
+ pclmulqdq xmm8, xmm10, 0x0
+ pxor xmm7, xmm8
+ pxor xmm7, xmm2
+
+_128_done:
+ ; compute crc of a 128-bit value
+ movdqa xmm10, [rk5] ; rk5 and rk6 in xmm10
+ movdqa xmm0, xmm7
+
+ ;64b fold
+ pclmulqdq xmm7, xmm10, 0x1
+ pslldq xmm0, 8
+ pxor xmm7, xmm0
+
+ ;32b fold
+ movdqa xmm0, xmm7
+
+ pand xmm0, [mask2]
+
+ psrldq xmm7, 12
+ pclmulqdq xmm7, xmm10, 0x10
+ pxor xmm7, xmm0
+
+ ;barrett reduction
+_barrett:
+ movdqa xmm10, [rk7] ; rk7 and rk8 in xmm10
+ movdqa xmm0, xmm7
+ pclmulqdq xmm7, xmm10, 0x01
+ pslldq xmm7, 4
+ pclmulqdq xmm7, xmm10, 0x11
+
+ pslldq xmm7, 4
+ pxor xmm7, xmm0
+ pextrd eax, xmm7,1
+
+_cleanup:
+ ; scale the result back to 16 bits
+ shr eax, 16
+%ifidn __OUTPUT_FORMAT__, win64
+ movdqa xmm6, [rsp+16*2]
+ movdqa xmm7, [rsp+16*3]
+ movdqa xmm8, [rsp+16*4]
+ movdqa xmm9, [rsp+16*5]
+ movdqa xmm10, [rsp+16*6]
+ movdqa xmm11, [rsp+16*7]
+ movdqa xmm12, [rsp+16*8]
+ movdqa xmm13, [rsp+16*9]
+%endif
+ add rsp, VARIABLE_OFFSET
+ ret
+
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+align 16
+_less_than_256:
+
+ ; check if there is enough buffer to be able to fold 16B at a time
+ cmp arg3, 32
+ jl _less_than_32
+ movdqa xmm11, [SHUF_MASK]
+
+ ; if there is, load the constants
+ movdqa xmm10, [rk1] ; rk1 and rk2 in xmm10
+
+ movd xmm0, arg1_low32 ; get the initial crc value
+ pslldq xmm0, 12 ; align it to its correct place
+ movdqu xmm7, [arg2] ; load the plaintext
+ pshufb xmm7, xmm11 ; byte-reflect the plaintext
+ pxor xmm7, xmm0
+
+
+ ; update the buffer pointer
+ add arg2, 16
+
+ ; update the counter. subtract 32 instead of 16 to save one instruction from the loop
+ sub arg3, 32
+
+ jmp _16B_reduction_loop
+
+
+align 16
+_less_than_32:
+ ; mov initial crc to the return value. this is necessary for zero-length buffers.
+ mov eax, arg1_low32
+ test arg3, arg3
+ je _cleanup
+
+ movdqa xmm11, [SHUF_MASK]
+
+ movd xmm0, arg1_low32 ; get the initial crc value
+ pslldq xmm0, 12 ; align it to its correct place
+
+ cmp arg3, 16
+ je _exact_16_left
+ jl _less_than_16_left
+
+ movdqu xmm7, [arg2] ; load the plaintext
+ pshufb xmm7, xmm11 ; byte-reflect the plaintext
+ pxor xmm7, xmm0 ; xor the initial crc value
+ add arg2, 16
+ sub arg3, 16
+ movdqa xmm10, [rk1] ; rk1 and rk2 in xmm10
+ jmp _get_last_two_xmms
+
+
+align 16
+_less_than_16_left:
+ ; use stack space to load data less than 16 bytes, zero-out the 16B in memory first.
+
+ pxor xmm1, xmm1
+ mov r11, rsp
+ movdqa [r11], xmm1
+
+ cmp arg3, 4
+ jl _only_less_than_4
+
+ ; backup the counter value
+ mov r9, arg3
+ cmp arg3, 8
+ jl _less_than_8_left
+
+ ; load 8 Bytes
+ mov rax, [arg2]
+ mov [r11], rax
+ add r11, 8
+ sub arg3, 8
+ add arg2, 8
+_less_than_8_left:
+
+ cmp arg3, 4
+ jl _less_than_4_left
+
+ ; load 4 Bytes
+ mov eax, [arg2]
+ mov [r11], eax
+ add r11, 4
+ sub arg3, 4
+ add arg2, 4
+_less_than_4_left:
+
+ cmp arg3, 2
+ jl _less_than_2_left
+
+ ; load 2 Bytes
+ mov ax, [arg2]
+ mov [r11], ax
+ add r11, 2
+ sub arg3, 2
+ add arg2, 2
+_less_than_2_left:
+ cmp arg3, 1
+ jl _zero_left
+
+ ; load 1 Byte
+ mov al, [arg2]
+ mov [r11], al
+_zero_left:
+ movdqa xmm7, [rsp]
+ pshufb xmm7, xmm11
+ pxor xmm7, xmm0 ; xor the initial crc value
+
+ lea rax, [pshufb_shf_table + 16]
+ sub rax, r9
+ movdqu xmm0, [rax]
+ pxor xmm0, [mask1]
+
+ pshufb xmm7, xmm0
+ jmp _128_done
+
+align 16
+_exact_16_left:
+ movdqu xmm7, [arg2]
+ pshufb xmm7, xmm11
+ pxor xmm7, xmm0 ; xor the initial crc value
+
+ jmp _128_done
+
+_only_less_than_4:
+ cmp arg3, 3
+ jl _only_less_than_3
+
+ ; load 3 Bytes
+ mov al, [arg2]
+ mov [r11], al
+
+ mov al, [arg2+1]
+ mov [r11+1], al
+
+ mov al, [arg2+2]
+ mov [r11+2], al
+
+ movdqa xmm7, [rsp]
+ pshufb xmm7, xmm11
+ pxor xmm7, xmm0 ; xor the initial crc value
+
+ psrldq xmm7, 5
+
+ jmp _barrett
+_only_less_than_3:
+ cmp arg3, 2
+ jl _only_less_than_2
+
+ ; load 2 Bytes
+ mov al, [arg2]
+ mov [r11], al
+
+ mov al, [arg2+1]
+ mov [r11+1], al
+
+ movdqa xmm7, [rsp]
+ pshufb xmm7, xmm11
+ pxor xmm7, xmm0 ; xor the initial crc value
+
+ psrldq xmm7, 6
+
+ jmp _barrett
+_only_less_than_2:
+
+ ; load 1 Byte
+ mov al, [arg2]
+ mov [r11], al
+
+ movdqa xmm7, [rsp]
+ pshufb xmm7, xmm11
+ pxor xmm7, xmm0 ; xor the initial crc value
+
+ psrldq xmm7, 7
+
+ jmp _barrett
+
+section .data
+
+; precomputed constants
+; these constants are precomputed from the poly: 0x8bb70000 (0x8bb7 scaled to 32 bits)
+align 16
+; Q = 0x18BB70000
+; rk1 = 2^(32*3) mod Q << 32
+; rk2 = 2^(32*5) mod Q << 32
+; rk3 = 2^(32*15) mod Q << 32
+; rk4 = 2^(32*17) mod Q << 32
+; rk5 = 2^(32*3) mod Q << 32
+; rk6 = 2^(32*2) mod Q << 32
+; rk7 = floor(2^64/Q)
+; rk8 = Q
+rk1:
+DQ 0x2d56000000000000
+rk2:
+DQ 0x06df000000000000
+rk3:
+DQ 0x9d9d000000000000
+rk4:
+DQ 0x7cf5000000000000
+rk5:
+DQ 0x2d56000000000000
+rk6:
+DQ 0x1368000000000000
+rk7:
+DQ 0x00000001f65a57f8
+rk8:
+DQ 0x000000018bb70000
+
+rk9:
+DQ 0xceae000000000000
+rk10:
+DQ 0xbfd6000000000000
+rk11:
+DQ 0x1e16000000000000
+rk12:
+DQ 0x713c000000000000
+rk13:
+DQ 0xf7f9000000000000
+rk14:
+DQ 0x80a6000000000000
+rk15:
+DQ 0x044c000000000000
+rk16:
+DQ 0xe658000000000000
+rk17:
+DQ 0xad18000000000000
+rk18:
+DQ 0xa497000000000000
+rk19:
+DQ 0x6ee3000000000000
+rk20:
+DQ 0xe7b5000000000000
+
+
+
+
+
+
+
+
+
+mask1:
+dq 0x8080808080808080, 0x8080808080808080
+mask2:
+dq 0xFFFFFFFFFFFFFFFF, 0x00000000FFFFFFFF
+
+SHUF_MASK:
+dq 0x08090A0B0C0D0E0F, 0x0001020304050607
+
+pshufb_shf_table:
+; use these values for shift constants for the pshufb instruction
+; different alignments result in values as shown:
+; dq 0x8887868584838281, 0x008f8e8d8c8b8a89 ; shl 15 (16-1) / shr1
+; dq 0x8988878685848382, 0x01008f8e8d8c8b8a ; shl 14 (16-3) / shr2
+; dq 0x8a89888786858483, 0x0201008f8e8d8c8b ; shl 13 (16-4) / shr3
+; dq 0x8b8a898887868584, 0x030201008f8e8d8c ; shl 12 (16-4) / shr4
+; dq 0x8c8b8a8988878685, 0x04030201008f8e8d ; shl 11 (16-5) / shr5
+; dq 0x8d8c8b8a89888786, 0x0504030201008f8e ; shl 10 (16-6) / shr6
+; dq 0x8e8d8c8b8a898887, 0x060504030201008f ; shl 9 (16-7) / shr7
+; dq 0x8f8e8d8c8b8a8988, 0x0706050403020100 ; shl 8 (16-8) / shr8
+; dq 0x008f8e8d8c8b8a89, 0x0807060504030201 ; shl 7 (16-9) / shr9
+; dq 0x01008f8e8d8c8b8a, 0x0908070605040302 ; shl 6 (16-10) / shr10
+; dq 0x0201008f8e8d8c8b, 0x0a09080706050403 ; shl 5 (16-11) / shr11
+; dq 0x030201008f8e8d8c, 0x0b0a090807060504 ; shl 4 (16-12) / shr12
+; dq 0x04030201008f8e8d, 0x0c0b0a0908070605 ; shl 3 (16-13) / shr13
+; dq 0x0504030201008f8e, 0x0d0c0b0a09080706 ; shl 2 (16-14) / shr14
+; dq 0x060504030201008f, 0x0e0d0c0b0a090807 ; shl 1 (16-15) / shr15
+dq 0x8786858483828100, 0x8f8e8d8c8b8a8988
+dq 0x0706050403020100, 0x000e0d0c0b0a0908
+
+;;; func core, ver, snum
+slversion crc16_t10dif_01, 01, 06, 0010
+
diff --git a/src/isa-l/crc/crc16_t10dif_02.asm b/src/isa-l/crc/crc16_t10dif_02.asm
new file mode 100644
index 000000000..0e392afb1
--- /dev/null
+++ b/src/isa-l/crc/crc16_t10dif_02.asm
@@ -0,0 +1,654 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright(c) 2011-2020 Intel Corporation All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in
+; the documentation and/or other materials provided with the
+; distribution.
+; * Neither the name of Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived
+; from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+; Function API:
+; UINT16 crc16_t10dif_02(
+; UINT16 init_crc, //initial CRC value, 16 bits
+; const unsigned char *buf, //buffer pointer to calculate CRC on
+; UINT64 len //buffer length in bytes (64-bit data)
+; );
+;
+; Authors:
+; Erdinc Ozturk
+; Vinodh Gopal
+; James Guilford
+;
+; Reference paper titled "Fast CRC Computation for Generic Polynomials Using PCLMULQDQ Instruction"
+; URL: http://www.intel.com/content/dam/www/public/us/en/documents/white-papers/fast-crc-computation-generic-polynomials-pclmulqdq-paper.pdf
+
+%include "reg_sizes.asm"
+
+%define fetch_dist 1024
+
+[bits 64]
+default rel
+
+section .text
+
+%ifidn __OUTPUT_FORMAT__, win64
+ %xdefine arg1 rcx
+ %xdefine arg2 rdx
+ %xdefine arg3 r8
+
+ %xdefine arg1_low32 ecx
+%else
+ %xdefine arg1 rdi
+ %xdefine arg2 rsi
+ %xdefine arg3 rdx
+
+ %xdefine arg1_low32 edi
+%endif
+
+%ifidn __OUTPUT_FORMAT__, win64
+ %define XMM_SAVE 16*2
+ %define VARIABLE_OFFSET 16*10+8
+%else
+ %define VARIABLE_OFFSET 16*2+8
+%endif
+
+align 16
+mk_global crc16_t10dif_02, function
+crc16_t10dif_02:
+ endbranch
+
+ ; adjust the 16-bit initial_crc value, scale it to 32 bits
+ shl arg1_low32, 16
+
+ ; After this point, code flow is exactly same as a 32-bit CRC.
+ ; The only difference is before returning eax, we will shift it right 16 bits, to scale back to 16 bits.
+
+ sub rsp, VARIABLE_OFFSET
+%ifidn __OUTPUT_FORMAT__, win64
+ ; push the xmm registers into the stack to maintain
+ vmovdqa [rsp+16*2],xmm6
+ vmovdqa [rsp+16*3],xmm7
+ vmovdqa [rsp+16*4],xmm8
+ vmovdqa [rsp+16*5],xmm9
+ vmovdqa [rsp+16*6],xmm10
+ vmovdqa [rsp+16*7],xmm11
+ vmovdqa [rsp+16*8],xmm12
+ vmovdqa [rsp+16*9],xmm13
+%endif
+
+ ; check if smaller than 256
+ cmp arg3, 256
+
+ ; for sizes less than 256, we can't fold 128B at a time...
+ jl _less_than_256
+
+
+ ; load the initial crc value
+ vmovd xmm10, arg1_low32 ; initial crc
+
+ ; crc value does not need to be byte-reflected, but it needs to be moved to the high part of the register.
+ ; because data will be byte-reflected and will align with initial crc at correct place.
+ vpslldq xmm10, 12
+
+ vmovdqa xmm11, [SHUF_MASK]
+ ; receive the initial 128B data, xor the initial crc value
+ vmovdqu xmm0, [arg2+16*0]
+ vmovdqu xmm1, [arg2+16*1]
+ vmovdqu xmm2, [arg2+16*2]
+ vmovdqu xmm3, [arg2+16*3]
+ vmovdqu xmm4, [arg2+16*4]
+ vmovdqu xmm5, [arg2+16*5]
+ vmovdqu xmm6, [arg2+16*6]
+ vmovdqu xmm7, [arg2+16*7]
+
+ vpshufb xmm0, xmm11
+ ; XOR the initial_crc value
+ vpxor xmm0, xmm10
+ vpshufb xmm1, xmm11
+ vpshufb xmm2, xmm11
+ vpshufb xmm3, xmm11
+ vpshufb xmm4, xmm11
+ vpshufb xmm5, xmm11
+ vpshufb xmm6, xmm11
+ vpshufb xmm7, xmm11
+
+ vmovdqa xmm10, [rk3] ;xmm10 has rk3 and rk4
+ ;imm value of pclmulqdq instruction will determine which constant to use
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+ ; we subtract 256 instead of 128 to save one instruction from the loop
+ sub arg3, 256
+
+ ; at this section of the code, there is 128*x+y (0<=y<128) bytes of buffer. The _fold_128_B_loop
+ ; loop will fold 128B at a time until we have 128+y Bytes of buffer
+
+
+ ; fold 128B at a time. This section of the code folds 8 xmm registers in parallel
+_fold_128_B_loop:
+
+ ; update the buffer pointer
+ add arg2, 128 ; buf += 128;
+
+ prefetchnta [arg2+fetch_dist+0]
+ vmovdqu xmm9, [arg2+16*0]
+ vmovdqu xmm12, [arg2+16*1]
+ vpshufb xmm9, xmm11
+ vpshufb xmm12, xmm11
+ vmovdqa xmm8, xmm0
+ vmovdqa xmm13, xmm1
+ vpclmulqdq xmm0, xmm10, 0x0
+ vpclmulqdq xmm8, xmm10 , 0x11
+ vpclmulqdq xmm1, xmm10, 0x0
+ vpclmulqdq xmm13, xmm10 , 0x11
+ vpxor xmm0, xmm9
+ vxorps xmm0, xmm8
+ vpxor xmm1, xmm12
+ vxorps xmm1, xmm13
+
+ prefetchnta [arg2+fetch_dist+32]
+ vmovdqu xmm9, [arg2+16*2]
+ vmovdqu xmm12, [arg2+16*3]
+ vpshufb xmm9, xmm11
+ vpshufb xmm12, xmm11
+ vmovdqa xmm8, xmm2
+ vmovdqa xmm13, xmm3
+ vpclmulqdq xmm2, xmm10, 0x0
+ vpclmulqdq xmm8, xmm10 , 0x11
+ vpclmulqdq xmm3, xmm10, 0x0
+ vpclmulqdq xmm13, xmm10 , 0x11
+ vpxor xmm2, xmm9
+ vxorps xmm2, xmm8
+ vpxor xmm3, xmm12
+ vxorps xmm3, xmm13
+
+ prefetchnta [arg2+fetch_dist+64]
+ vmovdqu xmm9, [arg2+16*4]
+ vmovdqu xmm12, [arg2+16*5]
+ vpshufb xmm9, xmm11
+ vpshufb xmm12, xmm11
+ vmovdqa xmm8, xmm4
+ vmovdqa xmm13, xmm5
+ vpclmulqdq xmm4, xmm10, 0x0
+ vpclmulqdq xmm8, xmm10 , 0x11
+ vpclmulqdq xmm5, xmm10, 0x0
+ vpclmulqdq xmm13, xmm10 , 0x11
+ vpxor xmm4, xmm9
+ vxorps xmm4, xmm8
+ vpxor xmm5, xmm12
+ vxorps xmm5, xmm13
+
+ prefetchnta [arg2+fetch_dist+96]
+ vmovdqu xmm9, [arg2+16*6]
+ vmovdqu xmm12, [arg2+16*7]
+ vpshufb xmm9, xmm11
+ vpshufb xmm12, xmm11
+ vmovdqa xmm8, xmm6
+ vmovdqa xmm13, xmm7
+ vpclmulqdq xmm6, xmm10, 0x0
+ vpclmulqdq xmm8, xmm10 , 0x11
+ vpclmulqdq xmm7, xmm10, 0x0
+ vpclmulqdq xmm13, xmm10 , 0x11
+ vpxor xmm6, xmm9
+ vxorps xmm6, xmm8
+ vpxor xmm7, xmm12
+ vxorps xmm7, xmm13
+
+ sub arg3, 128
+
+ ; check if there is another 128B in the buffer to be able to fold
+ jge _fold_128_B_loop
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+
+ add arg2, 128
+ ; at this point, the buffer pointer is pointing at the last y Bytes of the buffer
+ ; fold the 8 xmm registers to 1 xmm register with different constants
+
+ vmovdqa xmm10, [rk9]
+ vmovdqa xmm8, xmm0
+ vpclmulqdq xmm0, xmm10, 0x11
+ vpclmulqdq xmm8, xmm10, 0x0
+ vpxor xmm7, xmm8
+ vxorps xmm7, xmm0
+
+ vmovdqa xmm10, [rk11]
+ vmovdqa xmm8, xmm1
+ vpclmulqdq xmm1, xmm10, 0x11
+ vpclmulqdq xmm8, xmm10, 0x0
+ vpxor xmm7, xmm8
+ vxorps xmm7, xmm1
+
+ vmovdqa xmm10, [rk13]
+ vmovdqa xmm8, xmm2
+ vpclmulqdq xmm2, xmm10, 0x11
+ vpclmulqdq xmm8, xmm10, 0x0
+ vpxor xmm7, xmm8
+ vpxor xmm7, xmm2
+
+ vmovdqa xmm10, [rk15]
+ vmovdqa xmm8, xmm3
+ vpclmulqdq xmm3, xmm10, 0x11
+ vpclmulqdq xmm8, xmm10, 0x0
+ vpxor xmm7, xmm8
+ vxorps xmm7, xmm3
+
+ vmovdqa xmm10, [rk17]
+ vmovdqa xmm8, xmm4
+ vpclmulqdq xmm4, xmm10, 0x11
+ vpclmulqdq xmm8, xmm10, 0x0
+ vpxor xmm7, xmm8
+ vpxor xmm7, xmm4
+
+ vmovdqa xmm10, [rk19]
+ vmovdqa xmm8, xmm5
+ vpclmulqdq xmm5, xmm10, 0x11
+ vpclmulqdq xmm8, xmm10, 0x0
+ vpxor xmm7, xmm8
+ vxorps xmm7, xmm5
+
+ vmovdqa xmm10, [rk1] ;xmm10 has rk1 and rk2
+ ;imm value of pclmulqdq instruction will determine which constant to use
+ vmovdqa xmm8, xmm6
+ vpclmulqdq xmm6, xmm10, 0x11
+ vpclmulqdq xmm8, xmm10, 0x0
+ vpxor xmm7, xmm8
+ vpxor xmm7, xmm6
+
+
+ ; instead of 128, we add 112 to the loop counter to save 1 instruction from the loop
+ ; instead of a cmp instruction, we use the negative flag with the jl instruction
+ add arg3, 128-16
+ jl _final_reduction_for_128
+
+ ; now we have 16+y bytes left to reduce. 16 Bytes is in register xmm7 and the rest is in memory
+ ; we can fold 16 bytes at a time if y>=16
+ ; continue folding 16B at a time
+
+_16B_reduction_loop:
+ vmovdqa xmm8, xmm7
+ vpclmulqdq xmm7, xmm10, 0x11
+ vpclmulqdq xmm8, xmm10, 0x0
+ vpxor xmm7, xmm8
+ vmovdqu xmm0, [arg2]
+ vpshufb xmm0, xmm11
+ vpxor xmm7, xmm0
+ add arg2, 16
+ sub arg3, 16
+ ; instead of a cmp instruction, we utilize the flags with the jge instruction
+ ; equivalent of: cmp arg3, 16-16
+ ; check if there is any more 16B in the buffer to be able to fold
+ jge _16B_reduction_loop
+
+ ;now we have 16+z bytes left to reduce, where 0<= z < 16.
+ ;first, we reduce the data in the xmm7 register
+
+
+_final_reduction_for_128:
+ ; check if any more data to fold. If not, compute the CRC of the final 128 bits
+ add arg3, 16
+ je _128_done
+
+ ; here we are getting data that is less than 16 bytes.
+ ; since we know that there was data before the pointer, we can offset the input pointer before the actual point, to receive exactly 16 bytes.
+ ; after that the registers need to be adjusted.
+_get_last_two_xmms:
+ vmovdqa xmm2, xmm7
+
+ vmovdqu xmm1, [arg2 - 16 + arg3]
+ vpshufb xmm1, xmm11
+
+ ; get rid of the extra data that was loaded before
+ ; load the shift constant
+ lea rax, [pshufb_shf_table + 16]
+ sub rax, arg3
+ vmovdqu xmm0, [rax]
+
+ ; shift xmm2 to the left by arg3 bytes
+ vpshufb xmm2, xmm0
+
+ ; shift xmm7 to the right by 16-arg3 bytes
+ vpxor xmm0, [mask1]
+ vpshufb xmm7, xmm0
+ vpblendvb xmm1, xmm1, xmm2, xmm0
+
+ ; fold 16 Bytes
+ vmovdqa xmm2, xmm1
+ vmovdqa xmm8, xmm7
+ vpclmulqdq xmm7, xmm10, 0x11
+ vpclmulqdq xmm8, xmm10, 0x0
+ vpxor xmm7, xmm8
+ vpxor xmm7, xmm2
+
+_128_done:
+ ; compute crc of a 128-bit value
+ vmovdqa xmm10, [rk5] ; rk5 and rk6 in xmm10
+ vmovdqa xmm0, xmm7
+
+ ;64b fold
+ vpclmulqdq xmm7, xmm10, 0x1
+ vpslldq xmm0, 8
+ vpxor xmm7, xmm0
+
+ ;32b fold
+ vmovdqa xmm0, xmm7
+
+ vpand xmm0, [mask2]
+
+ vpsrldq xmm7, 12
+ vpclmulqdq xmm7, xmm10, 0x10
+ vpxor xmm7, xmm0
+
+ ;barrett reduction
+_barrett:
+ vmovdqa xmm10, [rk7] ; rk7 and rk8 in xmm10
+ vmovdqa xmm0, xmm7
+ vpclmulqdq xmm7, xmm10, 0x01
+ vpslldq xmm7, 4
+ vpclmulqdq xmm7, xmm10, 0x11
+
+ vpslldq xmm7, 4
+ vpxor xmm7, xmm0
+ vpextrd eax, xmm7,1
+
+_cleanup:
+ ; scale the result back to 16 bits
+ shr eax, 16
+%ifidn __OUTPUT_FORMAT__, win64
+ vmovdqa xmm6, [rsp+16*2]
+ vmovdqa xmm7, [rsp+16*3]
+ vmovdqa xmm8, [rsp+16*4]
+ vmovdqa xmm9, [rsp+16*5]
+ vmovdqa xmm10, [rsp+16*6]
+ vmovdqa xmm11, [rsp+16*7]
+ vmovdqa xmm12, [rsp+16*8]
+ vmovdqa xmm13, [rsp+16*9]
+%endif
+ add rsp, VARIABLE_OFFSET
+ ret
+
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+align 16
+_less_than_256:
+
+ ; check if there is enough buffer to be able to fold 16B at a time
+ cmp arg3, 32
+ jl _less_than_32
+ vmovdqa xmm11, [SHUF_MASK]
+
+ ; if there is, load the constants
+ vmovdqa xmm10, [rk1] ; rk1 and rk2 in xmm10
+
+ vmovd xmm0, arg1_low32 ; get the initial crc value
+ vpslldq xmm0, 12 ; align it to its correct place
+ vmovdqu xmm7, [arg2] ; load the plaintext
+ vpshufb xmm7, xmm11 ; byte-reflect the plaintext
+ vpxor xmm7, xmm0
+
+
+ ; update the buffer pointer
+ add arg2, 16
+
+ ; update the counter. subtract 32 instead of 16 to save one instruction from the loop
+ sub arg3, 32
+
+ jmp _16B_reduction_loop
+
+
+align 16
+_less_than_32:
+ ; mov initial crc to the return value. this is necessary for zero-length buffers.
+ mov eax, arg1_low32
+ test arg3, arg3
+ je _cleanup
+
+ vmovdqa xmm11, [SHUF_MASK]
+
+ vmovd xmm0, arg1_low32 ; get the initial crc value
+ vpslldq xmm0, 12 ; align it to its correct place
+
+ cmp arg3, 16
+ je _exact_16_left
+ jl _less_than_16_left
+
+ vmovdqu xmm7, [arg2] ; load the plaintext
+ vpshufb xmm7, xmm11 ; byte-reflect the plaintext
+ vpxor xmm7, xmm0 ; xor the initial crc value
+ add arg2, 16
+ sub arg3, 16
+ vmovdqa xmm10, [rk1] ; rk1 and rk2 in xmm10
+ jmp _get_last_two_xmms
+
+
+align 16
+_less_than_16_left:
+ ; use stack space to load data less than 16 bytes, zero-out the 16B in memory first.
+
+ vpxor xmm1, xmm1
+ mov r11, rsp
+ vmovdqa [r11], xmm1
+
+ cmp arg3, 4
+ jl _only_less_than_4
+
+ ; backup the counter value
+ mov r9, arg3
+ cmp arg3, 8
+ jl _less_than_8_left
+
+ ; load 8 Bytes
+ mov rax, [arg2]
+ mov [r11], rax
+ add r11, 8
+ sub arg3, 8
+ add arg2, 8
+_less_than_8_left:
+
+ cmp arg3, 4
+ jl _less_than_4_left
+
+ ; load 4 Bytes
+ mov eax, [arg2]
+ mov [r11], eax
+ add r11, 4
+ sub arg3, 4
+ add arg2, 4
+_less_than_4_left:
+
+ cmp arg3, 2
+ jl _less_than_2_left
+
+ ; load 2 Bytes
+ mov ax, [arg2]
+ mov [r11], ax
+ add r11, 2
+ sub arg3, 2
+ add arg2, 2
+_less_than_2_left:
+ cmp arg3, 1
+ jl _zero_left
+
+ ; load 1 Byte
+ mov al, [arg2]
+ mov [r11], al
+_zero_left:
+ vmovdqa xmm7, [rsp]
+ vpshufb xmm7, xmm11
+ vpxor xmm7, xmm0 ; xor the initial crc value
+
+ lea rax, [pshufb_shf_table + 16]
+ sub rax, r9
+ vmovdqu xmm0, [rax]
+ vpxor xmm0, [mask1]
+
+ vpshufb xmm7, xmm0
+ jmp _128_done
+
+align 16
+_exact_16_left:
+ vmovdqu xmm7, [arg2]
+ vpshufb xmm7, xmm11
+ vpxor xmm7, xmm0 ; xor the initial crc value
+
+ jmp _128_done
+
+_only_less_than_4:
+ cmp arg3, 3
+ jl _only_less_than_3
+
+ ; load 3 Bytes
+ mov al, [arg2]
+ mov [r11], al
+
+ mov al, [arg2+1]
+ mov [r11+1], al
+
+ mov al, [arg2+2]
+ mov [r11+2], al
+
+ vmovdqa xmm7, [rsp]
+ vpshufb xmm7, xmm11
+ vpxor xmm7, xmm0 ; xor the initial crc value
+
+ vpsrldq xmm7, 5
+
+ jmp _barrett
+_only_less_than_3:
+ cmp arg3, 2
+ jl _only_less_than_2
+
+ ; load 2 Bytes
+ mov al, [arg2]
+ mov [r11], al
+
+ mov al, [arg2+1]
+ mov [r11+1], al
+
+ vmovdqa xmm7, [rsp]
+ vpshufb xmm7, xmm11
+ vpxor xmm7, xmm0 ; xor the initial crc value
+
+ vpsrldq xmm7, 6
+
+ jmp _barrett
+_only_less_than_2:
+
+ ; load 1 Byte
+ mov al, [arg2]
+ mov [r11], al
+
+ vmovdqa xmm7, [rsp]
+ vpshufb xmm7, xmm11
+ vpxor xmm7, xmm0 ; xor the initial crc value
+
+ vpsrldq xmm7, 7
+
+ jmp _barrett
+
+section .data
+
+; precomputed constants
+; these constants are precomputed from the poly: 0x8bb70000 (0x8bb7 scaled to 32 bits)
+align 16
+; Q = 0x18BB70000
+; rk1 = 2^(32*3) mod Q << 32
+; rk2 = 2^(32*5) mod Q << 32
+; rk3 = 2^(32*15) mod Q << 32
+; rk4 = 2^(32*17) mod Q << 32
+; rk5 = 2^(32*3) mod Q << 32
+; rk6 = 2^(32*2) mod Q << 32
+; rk7 = floor(2^64/Q)
+; rk8 = Q
+rk1:
+DQ 0x2d56000000000000
+rk2:
+DQ 0x06df000000000000
+rk3:
+DQ 0x9d9d000000000000
+rk4:
+DQ 0x7cf5000000000000
+rk5:
+DQ 0x2d56000000000000
+rk6:
+DQ 0x1368000000000000
+rk7:
+DQ 0x00000001f65a57f8
+rk8:
+DQ 0x000000018bb70000
+
+rk9:
+DQ 0xceae000000000000
+rk10:
+DQ 0xbfd6000000000000
+rk11:
+DQ 0x1e16000000000000
+rk12:
+DQ 0x713c000000000000
+rk13:
+DQ 0xf7f9000000000000
+rk14:
+DQ 0x80a6000000000000
+rk15:
+DQ 0x044c000000000000
+rk16:
+DQ 0xe658000000000000
+rk17:
+DQ 0xad18000000000000
+rk18:
+DQ 0xa497000000000000
+rk19:
+DQ 0x6ee3000000000000
+rk20:
+DQ 0xe7b5000000000000
+
+mask1:
+dq 0x8080808080808080, 0x8080808080808080
+mask2:
+dq 0xFFFFFFFFFFFFFFFF, 0x00000000FFFFFFFF
+
+SHUF_MASK:
+dq 0x08090A0B0C0D0E0F, 0x0001020304050607
+
+pshufb_shf_table:
+; use these values for shift constants for the pshufb instruction
+; different alignments result in values as shown:
+; dq 0x8887868584838281, 0x008f8e8d8c8b8a89 ; shl 15 (16-1) / shr1
+; dq 0x8988878685848382, 0x01008f8e8d8c8b8a ; shl 14 (16-3) / shr2
+; dq 0x8a89888786858483, 0x0201008f8e8d8c8b ; shl 13 (16-4) / shr3
+; dq 0x8b8a898887868584, 0x030201008f8e8d8c ; shl 12 (16-4) / shr4
+; dq 0x8c8b8a8988878685, 0x04030201008f8e8d ; shl 11 (16-5) / shr5
+; dq 0x8d8c8b8a89888786, 0x0504030201008f8e ; shl 10 (16-6) / shr6
+; dq 0x8e8d8c8b8a898887, 0x060504030201008f ; shl 9 (16-7) / shr7
+; dq 0x8f8e8d8c8b8a8988, 0x0706050403020100 ; shl 8 (16-8) / shr8
+; dq 0x008f8e8d8c8b8a89, 0x0807060504030201 ; shl 7 (16-9) / shr9
+; dq 0x01008f8e8d8c8b8a, 0x0908070605040302 ; shl 6 (16-10) / shr10
+; dq 0x0201008f8e8d8c8b, 0x0a09080706050403 ; shl 5 (16-11) / shr11
+; dq 0x030201008f8e8d8c, 0x0b0a090807060504 ; shl 4 (16-12) / shr12
+; dq 0x04030201008f8e8d, 0x0c0b0a0908070605 ; shl 3 (16-13) / shr13
+; dq 0x0504030201008f8e, 0x0d0c0b0a09080706 ; shl 2 (16-14) / shr14
+; dq 0x060504030201008f, 0x0e0d0c0b0a090807 ; shl 1 (16-15) / shr15
+dq 0x8786858483828100, 0x8f8e8d8c8b8a8988
+dq 0x0706050403020100, 0x000e0d0c0b0a0908
diff --git a/src/isa-l/crc/crc16_t10dif_by16_10.asm b/src/isa-l/crc/crc16_t10dif_by16_10.asm
new file mode 100644
index 000000000..27a2e02a0
--- /dev/null
+++ b/src/isa-l/crc/crc16_t10dif_by16_10.asm
@@ -0,0 +1,591 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright(c) 2011-2020 Intel Corporation All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in
+; the documentation and/or other materials provided with the
+; distribution.
+; * Neither the name of Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived
+; from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Function API:
+; UINT32 crc16_t10dif_by16_10(
+; UINT16 init_crc, //initial CRC value, 16 bits
+; const unsigned char *buf, //buffer pointer to calculate CRC on
+; UINT64 len //buffer length in bytes (64-bit data)
+; );
+;
+; Authors:
+; Erdinc Ozturk
+; Vinodh Gopal
+; James Guilford
+;
+; Reference paper titled "Fast CRC Computation for Generic Polynomials Using PCLMULQDQ Instruction"
+; URL: http://www.intel.com/content/dam/www/public/us/en/documents/white-papers/fast-crc-computation-generic-polynomials-pclmulqdq-paper.pdf
+;
+;
+
+%include "reg_sizes.asm"
+
+%ifndef FUNCTION_NAME
+%define FUNCTION_NAME crc16_t10dif_by16_10
+%endif
+
+%if (AS_FEATURE_LEVEL) >= 10
+
+[bits 64]
+default rel
+
+section .text
+
+
+%ifidn __OUTPUT_FORMAT__, win64
+ %xdefine arg1 rcx
+ %xdefine arg2 rdx
+ %xdefine arg3 r8
+
+ %xdefine arg1_low32 ecx
+%else
+ %xdefine arg1 rdi
+ %xdefine arg2 rsi
+ %xdefine arg3 rdx
+
+ %xdefine arg1_low32 edi
+%endif
+
+%define TMP 16*0
+%ifidn __OUTPUT_FORMAT__, win64
+ %define XMM_SAVE 16*2
+ %define VARIABLE_OFFSET 16*12+8
+%else
+ %define VARIABLE_OFFSET 16*2+8
+%endif
+
+align 16
+mk_global FUNCTION_NAME, function
+FUNCTION_NAME:
+ endbranch
+
+ ; adjust the 16-bit initial_crc value, scale it to 32 bits
+ shl arg1_low32, 16
+
+ ; After this point, code flow is exactly same as a 32-bit CRC.
+ ; The only difference is before returning eax, we will shift it right 16 bits, to scale back to 16 bits.
+
+ sub rsp, VARIABLE_OFFSET
+
+%ifidn __OUTPUT_FORMAT__, win64
+ ; push the xmm registers into the stack to maintain
+ vmovdqa [rsp + XMM_SAVE + 16*0], xmm6
+ vmovdqa [rsp + XMM_SAVE + 16*1], xmm7
+ vmovdqa [rsp + XMM_SAVE + 16*2], xmm8
+ vmovdqa [rsp + XMM_SAVE + 16*3], xmm9
+ vmovdqa [rsp + XMM_SAVE + 16*4], xmm10
+ vmovdqa [rsp + XMM_SAVE + 16*5], xmm11
+ vmovdqa [rsp + XMM_SAVE + 16*6], xmm12
+ vmovdqa [rsp + XMM_SAVE + 16*7], xmm13
+ vmovdqa [rsp + XMM_SAVE + 16*8], xmm14
+ vmovdqa [rsp + XMM_SAVE + 16*9], xmm15
+%endif
+
+ vbroadcasti32x4 zmm18, [SHUF_MASK]
+ cmp arg3, 256
+ jl .less_than_256
+
+ ; load the initial crc value
+ vmovd xmm10, arg1_low32 ; initial crc
+
+ ; crc value does not need to be byte-reflected, but it needs to be moved to the high part of the register.
+ ; because data will be byte-reflected and will align with initial crc at correct place.
+ vpslldq xmm10, 12
+
+ ; receive the initial 64B data, xor the initial crc value
+ vmovdqu8 zmm0, [arg2+16*0]
+ vmovdqu8 zmm4, [arg2+16*4]
+ vpshufb zmm0, zmm0, zmm18
+ vpshufb zmm4, zmm4, zmm18
+ vpxorq zmm0, zmm10
+ vbroadcasti32x4 zmm10, [rk3] ;xmm10 has rk3 and rk4
+ ;imm value of pclmulqdq instruction will determine which constant to use
+
+ sub arg3, 256
+ cmp arg3, 256
+ jl .fold_128_B_loop
+
+ vmovdqu8 zmm7, [arg2+16*8]
+ vmovdqu8 zmm8, [arg2+16*12]
+ vpshufb zmm7, zmm7, zmm18
+ vpshufb zmm8, zmm8, zmm18
+ vbroadcasti32x4 zmm16, [rk_1] ;zmm16 has rk-1 and rk-2
+ sub arg3, 256
+
+.fold_256_B_loop:
+ add arg2, 256
+ vmovdqu8 zmm3, [arg2+16*0]
+ vpshufb zmm3, zmm3, zmm18
+ vpclmulqdq zmm1, zmm0, zmm16, 0x00
+ vpclmulqdq zmm2, zmm0, zmm16, 0x11
+ vpxorq zmm0, zmm1, zmm2
+ vpxorq zmm0, zmm0, zmm3
+
+ vmovdqu8 zmm9, [arg2+16*4]
+ vpshufb zmm9, zmm9, zmm18
+ vpclmulqdq zmm5, zmm4, zmm16, 0x00
+ vpclmulqdq zmm6, zmm4, zmm16, 0x11
+ vpxorq zmm4, zmm5, zmm6
+ vpxorq zmm4, zmm4, zmm9
+
+ vmovdqu8 zmm11, [arg2+16*8]
+ vpshufb zmm11, zmm11, zmm18
+ vpclmulqdq zmm12, zmm7, zmm16, 0x00
+ vpclmulqdq zmm13, zmm7, zmm16, 0x11
+ vpxorq zmm7, zmm12, zmm13
+ vpxorq zmm7, zmm7, zmm11
+
+ vmovdqu8 zmm17, [arg2+16*12]
+ vpshufb zmm17, zmm17, zmm18
+ vpclmulqdq zmm14, zmm8, zmm16, 0x00
+ vpclmulqdq zmm15, zmm8, zmm16, 0x11
+ vpxorq zmm8, zmm14, zmm15
+ vpxorq zmm8, zmm8, zmm17
+
+ sub arg3, 256
+ jge .fold_256_B_loop
+
+ ;; Fold 256 into 128
+ add arg2, 256
+ vpclmulqdq zmm1, zmm0, zmm10, 0x00
+ vpclmulqdq zmm2, zmm0, zmm10, 0x11
+ vpternlogq zmm7, zmm1, zmm2, 0x96 ; xor ABC
+
+ vpclmulqdq zmm5, zmm4, zmm10, 0x00
+ vpclmulqdq zmm6, zmm4, zmm10, 0x11
+ vpternlogq zmm8, zmm5, zmm6, 0x96 ; xor ABC
+
+ vmovdqa32 zmm0, zmm7
+ vmovdqa32 zmm4, zmm8
+
+ add arg3, 128
+ jmp .fold_128_B_register
+
+
+
+ ; at this section of the code, there is 128*x+y (0<=y<128) bytes of buffer. The fold_128_B_loop
+ ; loop will fold 128B at a time until we have 128+y Bytes of buffer
+
+ ; fold 128B at a time. This section of the code folds 8 xmm registers in parallel
+.fold_128_B_loop:
+ add arg2, 128
+ vmovdqu8 zmm8, [arg2+16*0]
+ vpshufb zmm8, zmm8, zmm18
+ vpclmulqdq zmm2, zmm0, zmm10, 0x00
+ vpclmulqdq zmm1, zmm0, zmm10, 0x11
+ vpxorq zmm0, zmm2, zmm1
+ vpxorq zmm0, zmm0, zmm8
+
+ vmovdqu8 zmm9, [arg2+16*4]
+ vpshufb zmm9, zmm9, zmm18
+ vpclmulqdq zmm5, zmm4, zmm10, 0x00
+ vpclmulqdq zmm6, zmm4, zmm10, 0x11
+ vpxorq zmm4, zmm5, zmm6
+ vpxorq zmm4, zmm4, zmm9
+
+ sub arg3, 128
+ jge .fold_128_B_loop
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+ add arg2, 128
+ ; at this point, the buffer pointer is pointing at the last y Bytes of the buffer, where 0 <= y < 128
+ ; the 128B of folded data is in 8 of the xmm registers: xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7
+
+.fold_128_B_register:
+ ; fold the 8 128b parts into 1 xmm register with different constants
+ vmovdqu8 zmm16, [rk9] ; multiply by rk9-rk16
+ vmovdqu8 zmm11, [rk17] ; multiply by rk17-rk20, rk1,rk2, 0,0
+ vpclmulqdq zmm1, zmm0, zmm16, 0x00
+ vpclmulqdq zmm2, zmm0, zmm16, 0x11
+ vextracti64x2 xmm7, zmm4, 3 ; save last that has no multiplicand
+
+ vpclmulqdq zmm5, zmm4, zmm11, 0x00
+ vpclmulqdq zmm6, zmm4, zmm11, 0x11
+ vmovdqa xmm10, [rk1] ; Needed later in reduction loop
+ vpternlogq zmm1, zmm2, zmm5, 0x96 ; xor ABC
+ vpternlogq zmm1, zmm6, zmm7, 0x96 ; xor ABC
+
+ vshufi64x2 zmm8, zmm1, zmm1, 0x4e ; Swap 1,0,3,2 - 01 00 11 10
+ vpxorq ymm8, ymm8, ymm1
+ vextracti64x2 xmm5, ymm8, 1
+ vpxorq xmm7, xmm5, xmm8
+
+ ; instead of 128, we add 128-16 to the loop counter to save 1 instruction from the loop
+ ; instead of a cmp instruction, we use the negative flag with the jl instruction
+ add arg3, 128-16
+ jl .final_reduction_for_128
+
+ ; now we have 16+y bytes left to reduce. 16 Bytes is in register xmm7 and the rest is in memory
+ ; we can fold 16 bytes at a time if y>=16
+ ; continue folding 16B at a time
+
+.16B_reduction_loop:
+ vpclmulqdq xmm8, xmm7, xmm10, 0x11
+ vpclmulqdq xmm7, xmm7, xmm10, 0x00
+ vpxor xmm7, xmm8
+ vmovdqu xmm0, [arg2]
+ vpshufb xmm0, xmm0, xmm18
+ vpxor xmm7, xmm0
+ add arg2, 16
+ sub arg3, 16
+ ; instead of a cmp instruction, we utilize the flags with the jge instruction
+ ; equivalent of: cmp arg3, 16-16
+ ; check if there is any more 16B in the buffer to be able to fold
+ jge .16B_reduction_loop
+
+ ;now we have 16+z bytes left to reduce, where 0<= z < 16.
+ ;first, we reduce the data in the xmm7 register
+
+
+.final_reduction_for_128:
+ add arg3, 16
+ je .128_done
+
+ ; here we are getting data that is less than 16 bytes.
+ ; since we know that there was data before the pointer, we can offset
+ ; the input pointer before the actual point, to receive exactly 16 bytes.
+ ; after that the registers need to be adjusted.
+.get_last_two_xmms:
+
+ vmovdqa xmm2, xmm7
+ vmovdqu xmm1, [arg2 - 16 + arg3]
+ vpshufb xmm1, xmm18
+
+ ; get rid of the extra data that was loaded before
+ ; load the shift constant
+ lea rax, [pshufb_shf_table + 16]
+ sub rax, arg3
+ vmovdqu xmm0, [rax]
+
+ vpshufb xmm2, xmm0
+ vpxor xmm0, [mask1]
+ vpshufb xmm7, xmm0
+ vpblendvb xmm1, xmm1, xmm2, xmm0
+
+ vpclmulqdq xmm8, xmm7, xmm10, 0x11
+ vpclmulqdq xmm7, xmm7, xmm10, 0x00
+ vpxor xmm7, xmm8
+ vpxor xmm7, xmm1
+
+.128_done:
+ ; compute crc of a 128-bit value
+ vmovdqa xmm10, [rk5]
+ vmovdqa xmm0, xmm7
+
+ ;64b fold
+ vpclmulqdq xmm7, xmm10, 0x01 ; H*L
+ vpslldq xmm0, 8
+ vpxor xmm7, xmm0
+
+ ;32b fold
+ vmovdqa xmm0, xmm7
+ vpand xmm0, [mask2]
+ vpsrldq xmm7, 12
+ vpclmulqdq xmm7, xmm10, 0x10
+ vpxor xmm7, xmm0
+
+ ;barrett reduction
+.barrett:
+ vmovdqa xmm10, [rk7] ; rk7 and rk8 in xmm10
+ vmovdqa xmm0, xmm7
+ vpclmulqdq xmm7, xmm10, 0x01
+ vpslldq xmm7, 4
+ vpclmulqdq xmm7, xmm10, 0x11
+
+ vpslldq xmm7, 4
+ vpxor xmm7, xmm0
+ vpextrd eax, xmm7, 1
+
+.cleanup:
+ ; scale the result back to 16 bits
+ shr eax, 16
+
+%ifidn __OUTPUT_FORMAT__, win64
+ vmovdqa xmm6, [rsp + XMM_SAVE + 16*0]
+ vmovdqa xmm7, [rsp + XMM_SAVE + 16*1]
+ vmovdqa xmm8, [rsp + XMM_SAVE + 16*2]
+ vmovdqa xmm9, [rsp + XMM_SAVE + 16*3]
+ vmovdqa xmm10, [rsp + XMM_SAVE + 16*4]
+ vmovdqa xmm11, [rsp + XMM_SAVE + 16*5]
+ vmovdqa xmm12, [rsp + XMM_SAVE + 16*6]
+ vmovdqa xmm13, [rsp + XMM_SAVE + 16*7]
+ vmovdqa xmm14, [rsp + XMM_SAVE + 16*8]
+ vmovdqa xmm15, [rsp + XMM_SAVE + 16*9]
+%endif
+ add rsp, VARIABLE_OFFSET
+ ret
+
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+align 16
+.less_than_256:
+
+ ; check if there is enough buffer to be able to fold 16B at a time
+ cmp arg3, 32
+ jl .less_than_32
+
+ ; if there is, load the constants
+ vmovdqa xmm10, [rk1] ; rk1 and rk2 in xmm10
+
+ vmovd xmm0, arg1_low32 ; get the initial crc value
+ vpslldq xmm0, 12 ; align it to its correct place
+ vmovdqu xmm7, [arg2] ; load the plaintext
+ vpshufb xmm7, xmm18 ; byte-reflect the plaintext
+ vpxor xmm7, xmm0
+
+ ; update the buffer pointer
+ add arg2, 16
+
+ ; update the counter. subtract 32 instead of 16 to save one instruction from the loop
+ sub arg3, 32
+
+ jmp .16B_reduction_loop
+
+
+align 16
+.less_than_32:
+ ; mov initial crc to the return value. this is necessary for zero-length buffers.
+ mov eax, arg1_low32
+ test arg3, arg3
+ je .cleanup
+
+ vmovd xmm0, arg1_low32 ; get the initial crc value
+ vpslldq xmm0, 12 ; align it to its correct place
+
+ cmp arg3, 16
+ je .exact_16_left
+ jl .less_than_16_left
+
+ vmovdqu xmm7, [arg2] ; load the plaintext
+ vpshufb xmm7, xmm18
+ vpxor xmm7, xmm0 ; xor the initial crc value
+ add arg2, 16
+ sub arg3, 16
+ vmovdqa xmm10, [rk1] ; rk1 and rk2 in xmm10
+ jmp .get_last_two_xmms
+
+align 16
+.less_than_16_left:
+ ; use stack space to load data less than 16 bytes, zero-out the 16B in memory first.
+
+ vpxor xmm1, xmm1
+ mov r11, rsp
+ vmovdqa [r11], xmm1
+
+ cmp arg3, 4
+ jl .only_less_than_4
+
+ ; backup the counter value
+ mov r9, arg3
+ cmp arg3, 8
+ jl .less_than_8_left
+
+ ; load 8 Bytes
+ mov rax, [arg2]
+ mov [r11], rax
+ add r11, 8
+ sub arg3, 8
+ add arg2, 8
+.less_than_8_left:
+
+ cmp arg3, 4
+ jl .less_than_4_left
+
+ ; load 4 Bytes
+ mov eax, [arg2]
+ mov [r11], eax
+ add r11, 4
+ sub arg3, 4
+ add arg2, 4
+.less_than_4_left:
+
+ cmp arg3, 2
+ jl .less_than_2_left
+
+ ; load 2 Bytes
+ mov ax, [arg2]
+ mov [r11], ax
+ add r11, 2
+ sub arg3, 2
+ add arg2, 2
+.less_than_2_left:
+ cmp arg3, 1
+ jl .zero_left
+
+ ; load 1 Byte
+ mov al, [arg2]
+ mov [r11], al
+
+.zero_left:
+ vmovdqa xmm7, [rsp]
+ vpshufb xmm7, xmm18
+ vpxor xmm7, xmm0 ; xor the initial crc value
+
+ lea rax, [pshufb_shf_table + 16]
+ sub rax, r9
+ vmovdqu xmm0, [rax]
+ vpxor xmm0, [mask1]
+
+ vpshufb xmm7,xmm0
+ jmp .128_done
+
+align 16
+.exact_16_left:
+ vmovdqu xmm7, [arg2]
+ vpshufb xmm7, xmm18
+ vpxor xmm7, xmm0 ; xor the initial crc value
+ jmp .128_done
+
+.only_less_than_4:
+ cmp arg3, 3
+ jl .only_less_than_3
+
+ ; load 3 Bytes
+ mov al, [arg2]
+ mov [r11], al
+
+ mov al, [arg2+1]
+ mov [r11+1], al
+
+ mov al, [arg2+2]
+ mov [r11+2], al
+
+ vmovdqa xmm7, [rsp]
+ vpshufb xmm7, xmm18
+ vpxor xmm7, xmm0 ; xor the initial crc value
+
+ vpsrldq xmm7, 5
+ jmp .barrett
+
+.only_less_than_3:
+ cmp arg3, 2
+ jl .only_less_than_2
+
+ ; load 2 Bytes
+ mov al, [arg2]
+ mov [r11], al
+
+ mov al, [arg2+1]
+ mov [r11+1], al
+
+ vmovdqa xmm7, [rsp]
+ vpshufb xmm7, xmm18
+ vpxor xmm7, xmm0 ; xor the initial crc value
+
+ vpsrldq xmm7, 6
+ jmp .barrett
+
+.only_less_than_2:
+ ; load 1 Byte
+ mov al, [arg2]
+ mov [r11], al
+
+ vmovdqa xmm7, [rsp]
+ vpshufb xmm7, xmm18
+ vpxor xmm7, xmm0 ; xor the initial crc value
+
+ vpsrldq xmm7, 7
+ jmp .barrett
+
+section .data
+align 32
+
+%ifndef USE_CONSTS
+; precomputed constants
+
+rk_1: dq 0xdccf000000000000
+rk_2: dq 0x4b0b000000000000
+rk1: dq 0x2d56000000000000
+rk2: dq 0x06df000000000000
+rk3: dq 0x9d9d000000000000
+rk4: dq 0x7cf5000000000000
+rk5: dq 0x2d56000000000000
+rk6: dq 0x1368000000000000
+rk7: dq 0x00000001f65a57f8
+rk8: dq 0x000000018bb70000
+rk9: dq 0xceae000000000000
+rk10: dq 0xbfd6000000000000
+rk11: dq 0x1e16000000000000
+rk12: dq 0x713c000000000000
+rk13: dq 0xf7f9000000000000
+rk14: dq 0x80a6000000000000
+rk15: dq 0x044c000000000000
+rk16: dq 0xe658000000000000
+rk17: dq 0xad18000000000000
+rk18: dq 0xa497000000000000
+rk19: dq 0x6ee3000000000000
+rk20: dq 0xe7b5000000000000
+
+rk_1b: dq 0x2d56000000000000
+rk_2b: dq 0x06df000000000000
+ dq 0x0000000000000000
+ dq 0x0000000000000000
+%else
+INCLUDE_CONSTS
+%endif
+
+mask1: dq 0x8080808080808080, 0x8080808080808080
+mask2: dq 0xFFFFFFFFFFFFFFFF, 0x00000000FFFFFFFF
+
+SHUF_MASK: dq 0x08090A0B0C0D0E0F, 0x0001020304050607
+
+pshufb_shf_table:
+; use these values for shift constants for the pshufb instruction
+; different alignments result in values as shown:
+; dq 0x8887868584838281, 0x008f8e8d8c8b8a89 ; shl 15 (16-1) / shr1
+; dq 0x8988878685848382, 0x01008f8e8d8c8b8a ; shl 14 (16-3) / shr2
+; dq 0x8a89888786858483, 0x0201008f8e8d8c8b ; shl 13 (16-4) / shr3
+; dq 0x8b8a898887868584, 0x030201008f8e8d8c ; shl 12 (16-4) / shr4
+; dq 0x8c8b8a8988878685, 0x04030201008f8e8d ; shl 11 (16-5) / shr5
+; dq 0x8d8c8b8a89888786, 0x0504030201008f8e ; shl 10 (16-6) / shr6
+; dq 0x8e8d8c8b8a898887, 0x060504030201008f ; shl 9 (16-7) / shr7
+; dq 0x8f8e8d8c8b8a8988, 0x0706050403020100 ; shl 8 (16-8) / shr8
+; dq 0x008f8e8d8c8b8a89, 0x0807060504030201 ; shl 7 (16-9) / shr9
+; dq 0x01008f8e8d8c8b8a, 0x0908070605040302 ; shl 6 (16-10) / shr10
+; dq 0x0201008f8e8d8c8b, 0x0a09080706050403 ; shl 5 (16-11) / shr11
+; dq 0x030201008f8e8d8c, 0x0b0a090807060504 ; shl 4 (16-12) / shr12
+; dq 0x04030201008f8e8d, 0x0c0b0a0908070605 ; shl 3 (16-13) / shr13
+; dq 0x0504030201008f8e, 0x0d0c0b0a09080706 ; shl 2 (16-14) / shr14
+; dq 0x060504030201008f, 0x0e0d0c0b0a090807 ; shl 1 (16-15) / shr15
+dq 0x8786858483828100, 0x8f8e8d8c8b8a8988
+dq 0x0706050403020100, 0x000e0d0c0b0a0908
+dq 0x8080808080808080, 0x0f0e0d0c0b0a0908
+dq 0x8080808080808080, 0x8080808080808080
+
+%else ; Assembler doesn't understand these opcodes. Add empty symbol for windows.
+%ifidn __OUTPUT_FORMAT__, win64
+global no_ %+ FUNCTION_NAME
+no_ %+ FUNCTION_NAME %+ :
+%endif
+%endif ; (AS_FEATURE_LEVEL) >= 10
diff --git a/src/isa-l/crc/crc16_t10dif_by4.asm b/src/isa-l/crc/crc16_t10dif_by4.asm
new file mode 100644
index 000000000..1326eb2f5
--- /dev/null
+++ b/src/isa-l/crc/crc16_t10dif_by4.asm
@@ -0,0 +1,563 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright(c) 2011-2015 Intel Corporation All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in
+; the documentation and/or other materials provided with the
+; distribution.
+; * Neither the name of Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived
+; from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;
+; Function API:
+; UINT16 crc16_t10dif_by4(
+; UINT16 init_crc, //initial CRC value, 16 bits
+; const unsigned char *buf, //buffer pointer to calculate CRC on
+; UINT64 len //buffer length in bytes (64-bit data)
+; );
+;
+; Authors:
+; Erdinc Ozturk
+; Vinodh Gopal
+; James Guilford
+;
+; Reference paper titled "Fast CRC Computation for Generic Polynomials Using PCLMULQDQ Instruction"
+; URL: http://download.intel.com/design/intarch/papers/323102.pdf
+;
+
+%include "reg_sizes.asm"
+
+%define fetch_dist 1024
+
+[bits 64]
+default rel
+
+section .text
+%ifidn __OUTPUT_FORMAT__, win64
+ %xdefine arg1 rcx
+ %xdefine arg2 rdx
+ %xdefine arg3 r8
+
+ %xdefine arg1_low32 ecx
+%else
+ %xdefine arg1 rdi
+ %xdefine arg2 rsi
+ %xdefine arg3 rdx
+
+ %xdefine arg1_low32 edi
+%endif
+
+align 16
+mk_global crc16_t10dif_by4, function
+crc16_t10dif_by4:
+ endbranch
+
+ ; adjust the 16-bit initial_crc value, scale it to 32 bits
+ shl arg1_low32, 16
+
+ ; After this point, code flow is exactly same as a 32-bit CRC.
+ ; The only difference is before returning eax, we will shift
+ ; it right 16 bits, to scale back to 16 bits.
+
+ sub rsp,16*4+8
+
+ ; push the xmm registers into the stack to maintain
+ movdqa [rsp+16*2],xmm6
+ movdqa [rsp+16*3],xmm7
+
+ ; check if smaller than 128B
+ cmp arg3, 128
+
+ ; for sizes less than 128, we can't fold 64B at a time...
+ jl _less_than_128
+
+
+ ; load the initial crc value
+ movd xmm6, arg1_low32 ; initial crc
+
+ ; crc value does not need to be byte-reflected, but it needs to
+ ; be moved to the high part of the register.
+ ; because data will be byte-reflected and will align with
+ ; initial crc at correct place.
+ pslldq xmm6, 12
+
+ movdqa xmm7, [SHUF_MASK]
+ ; receive the initial 64B data, xor the initial crc value
+ movdqu xmm0, [arg2]
+ movdqu xmm1, [arg2+16]
+ movdqu xmm2, [arg2+32]
+ movdqu xmm3, [arg2+48]
+
+ pshufb xmm0, xmm7
+ ; XOR the initial_crc value
+ pxor xmm0, xmm6
+ pshufb xmm1, xmm7
+ pshufb xmm2, xmm7
+ pshufb xmm3, xmm7
+
+ movdqa xmm6, [rk3] ;xmm6 has rk3 and rk4
+ ;imm value of pclmulqdq instruction
+ ;will determine which constant to use
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+ ; we subtract 128 instead of 64 to save one instruction from the loop
+ sub arg3, 128
+
+ ; at this section of the code, there is 64*x+y (0<=y<64) bytes of
+ ; buffer. The _fold_64_B_loop
+ ; loop will fold 64B at a time until we have 64+y Bytes of buffer
+
+
+ ; fold 64B at a time. This section of the code folds 4 xmm
+ ; registers in parallel
+_fold_64_B_loop:
+
+ ; update the buffer pointer
+ add arg2, 64 ; buf += 64;
+
+ prefetchnta [arg2+fetch_dist+0]
+ movdqu xmm4, xmm0
+ movdqu xmm5, xmm1
+
+ pclmulqdq xmm0, xmm6 , 0x11
+ pclmulqdq xmm1, xmm6 , 0x11
+
+ pclmulqdq xmm4, xmm6, 0x0
+ pclmulqdq xmm5, xmm6, 0x0
+
+ pxor xmm0, xmm4
+ pxor xmm1, xmm5
+
+ prefetchnta [arg2+fetch_dist+32]
+ movdqu xmm4, xmm2
+ movdqu xmm5, xmm3
+
+ pclmulqdq xmm2, xmm6, 0x11
+ pclmulqdq xmm3, xmm6, 0x11
+
+ pclmulqdq xmm4, xmm6, 0x0
+ pclmulqdq xmm5, xmm6, 0x0
+
+ pxor xmm2, xmm4
+ pxor xmm3, xmm5
+
+ movdqu xmm4, [arg2]
+ movdqu xmm5, [arg2+16]
+ pshufb xmm4, xmm7
+ pshufb xmm5, xmm7
+ pxor xmm0, xmm4
+ pxor xmm1, xmm5
+
+ movdqu xmm4, [arg2+32]
+ movdqu xmm5, [arg2+48]
+ pshufb xmm4, xmm7
+ pshufb xmm5, xmm7
+
+ pxor xmm2, xmm4
+ pxor xmm3, xmm5
+
+ sub arg3, 64
+
+ ; check if there is another 64B in the buffer to be able to fold
+ jge _fold_64_B_loop
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+
+ add arg2, 64
+ ; at this point, the buffer pointer is pointing at the last y Bytes of the buffer
+ ; the 64B of folded data is in 4 of the xmm registers: xmm0, xmm1, xmm2, xmm3
+
+
+ ; fold the 4 xmm registers to 1 xmm register with different constants
+
+ movdqa xmm6, [rk1] ;xmm6 has rk1 and rk2
+ ;imm value of pclmulqdq instruction will
+ ;determine which constant to use
+
+ movdqa xmm4, xmm0
+ pclmulqdq xmm0, xmm6, 0x11
+ pclmulqdq xmm4, xmm6, 0x0
+ pxor xmm1, xmm4
+ pxor xmm1, xmm0
+
+ movdqa xmm4, xmm1
+ pclmulqdq xmm1, xmm6, 0x11
+ pclmulqdq xmm4, xmm6, 0x0
+ pxor xmm2, xmm4
+ pxor xmm2, xmm1
+
+ movdqa xmm4, xmm2
+ pclmulqdq xmm2, xmm6, 0x11
+ pclmulqdq xmm4, xmm6, 0x0
+ pxor xmm3, xmm4
+ pxor xmm3, xmm2
+
+
+ ; instead of 64, we add 48 to the loop counter to save 1 instruction from the loop
+ ; instead of a cmp instruction, we use the negative flag with the jl instruction
+ add arg3, 64-16
+ jl _final_reduction_for_128
+
+ ; now we have 16+y bytes left to reduce. 16 Bytes
+ ; is in register xmm3 and the rest is in memory
+ ; we can fold 16 bytes at a time if y>=16
+ ; continue folding 16B at a time
+
+_16B_reduction_loop:
+ movdqa xmm4, xmm3
+ pclmulqdq xmm3, xmm6, 0x11
+ pclmulqdq xmm4, xmm6, 0x0
+ pxor xmm3, xmm4
+ movdqu xmm0, [arg2]
+ pshufb xmm0, xmm7
+ pxor xmm3, xmm0
+ add arg2, 16
+ sub arg3, 16
+ ; instead of a cmp instruction, we utilize the flags with the jge instruction
+ ; equivalent of: cmp arg3, 16-16
+ ; check if there is any more 16B in the buffer to be able to fold
+ jge _16B_reduction_loop
+
+ ;now we have 16+z bytes left to reduce, where 0<= z < 16.
+ ;first, we reduce the data in the xmm3 register
+
+
+_final_reduction_for_128:
+ ; check if any more data to fold. If not, compute the CRC of the final 128 bits
+ add arg3, 16
+ je _128_done
+
+ ; here we are getting data that is less than 16 bytes.
+ ; since we know that there was data before the pointer,
+ ; we can offset the input pointer before the actual point,
+ ; to receive exactly 16 bytes.
+ ; after that the registers need to be adjusted.
+_get_last_two_xmms:
+ movdqa xmm2, xmm3
+
+ movdqu xmm1, [arg2 - 16 + arg3]
+ pshufb xmm1, xmm7
+
+ ; get rid of the extra data that was loaded before
+ ; load the shift constant
+ lea rax, [pshufb_shf_table + 16]
+ sub rax, arg3
+ movdqu xmm0, [rax]
+
+ ; shift xmm2 to the left by arg3 bytes
+ pshufb xmm2, xmm0
+
+ ; shift xmm3 to the right by 16-arg3 bytes
+ pxor xmm0, [mask1]
+ pshufb xmm3, xmm0
+ pblendvb xmm1, xmm2 ;xmm0 is implicit
+
+ ; fold 16 Bytes
+ movdqa xmm2, xmm1
+ movdqa xmm4, xmm3
+ pclmulqdq xmm3, xmm6, 0x11
+ pclmulqdq xmm4, xmm6, 0x0
+ pxor xmm3, xmm4
+ pxor xmm3, xmm2
+
+_128_done:
+ ; compute crc of a 128-bit value
+ movdqa xmm6, [rk5] ; rk5 and rk6 in xmm6
+ movdqa xmm0, xmm3
+
+ ;64b fold
+ pclmulqdq xmm3, xmm6, 0x1
+ pslldq xmm0, 8
+ pxor xmm3, xmm0
+
+ ;32b fold
+ movdqa xmm0, xmm3
+
+ pand xmm0, [mask2]
+
+ psrldq xmm3, 12
+ pclmulqdq xmm3, xmm6, 0x10
+ pxor xmm3, xmm0
+
+ ;barrett reduction
+_barrett:
+ movdqa xmm6, [rk7] ; rk7 and rk8 in xmm6
+ movdqa xmm0, xmm3
+ pclmulqdq xmm3, xmm6, 0x01
+ pslldq xmm3, 4
+ pclmulqdq xmm3, xmm6, 0x11
+
+ pslldq xmm3, 4
+ pxor xmm3, xmm0
+ pextrd eax, xmm3,1
+
+_cleanup:
+ ; scale the result back to 16 bits
+ shr eax, 16
+ movdqa xmm6, [rsp+16*2]
+ movdqa xmm7, [rsp+16*3]
+ add rsp,16*4+8
+ ret
+
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+align 16
+_less_than_128:
+
+ ; check if there is enough buffer to be able to fold 16B at a time
+ cmp arg3, 32
+ jl _less_than_32
+ movdqa xmm7, [SHUF_MASK]
+
+ ; if there is, load the constants
+ movdqa xmm6, [rk1] ; rk1 and rk2 in xmm6
+
+ movd xmm0, arg1_low32 ; get the initial crc value
+ pslldq xmm0, 12 ; align it to its correct place
+ movdqu xmm3, [arg2] ; load the plaintext
+ pshufb xmm3, xmm7 ; byte-reflect the plaintext
+ pxor xmm3, xmm0
+
+
+ ; update the buffer pointer
+ add arg2, 16
+
+ ; update the counter. subtract 32 instead of 16 to save one instruction from the loop
+ sub arg3, 32
+
+ jmp _16B_reduction_loop
+
+
+align 16
+_less_than_32:
+ ; mov initial crc to the return value. this is necessary for zero-length buffers.
+ mov eax, arg1_low32
+ test arg3, arg3
+ je _cleanup
+
+ movdqa xmm7, [SHUF_MASK]
+
+ movd xmm0, arg1_low32 ; get the initial crc value
+ pslldq xmm0, 12 ; align it to its correct place
+
+ cmp arg3, 16
+ je _exact_16_left
+ jl _less_than_16_left
+
+ movdqu xmm3, [arg2] ; load the plaintext
+ pshufb xmm3, xmm7 ; byte-reflect the plaintext
+ pxor xmm3, xmm0 ; xor the initial crc value
+ add arg2, 16
+ sub arg3, 16
+ movdqa xmm6, [rk1] ; rk1 and rk2 in xmm6
+ jmp _get_last_two_xmms
+
+
+align 16
+_less_than_16_left:
+ ; use stack space to load data less than 16 bytes, zero-out the 16B in memory first.
+
+ pxor xmm1, xmm1
+ mov r11, rsp
+ movdqa [r11], xmm1
+
+ cmp arg3, 4
+ jl _only_less_than_4
+
+ ; backup the counter value
+ mov r9, arg3
+ cmp arg3, 8
+ jl _less_than_8_left
+
+ ; load 8 Bytes
+ mov rax, [arg2]
+ mov [r11], rax
+ add r11, 8
+ sub arg3, 8
+ add arg2, 8
+_less_than_8_left:
+
+ cmp arg3, 4
+ jl _less_than_4_left
+
+ ; load 4 Bytes
+ mov eax, [arg2]
+ mov [r11], eax
+ add r11, 4
+ sub arg3, 4
+ add arg2, 4
+_less_than_4_left:
+
+ cmp arg3, 2
+ jl _less_than_2_left
+
+ ; load 2 Bytes
+ mov ax, [arg2]
+ mov [r11], ax
+ add r11, 2
+ sub arg3, 2
+ add arg2, 2
+_less_than_2_left:
+ cmp arg3, 1
+ jl _zero_left
+
+ ; load 1 Byte
+ mov al, [arg2]
+ mov [r11], al
+_zero_left:
+ movdqa xmm3, [rsp]
+ pshufb xmm3, xmm7
+ pxor xmm3, xmm0 ; xor the initial crc value
+
+ ; shl r9, 4
+ lea rax, [pshufb_shf_table + 16]
+ sub rax, r9
+ movdqu xmm0, [rax]
+ pxor xmm0, [mask1]
+
+ pshufb xmm3, xmm0
+ jmp _128_done
+
+align 16
+_exact_16_left:
+ movdqu xmm3, [arg2]
+ pshufb xmm3, xmm7
+ pxor xmm3, xmm0 ; xor the initial crc value
+
+ jmp _128_done
+
+_only_less_than_4:
+ cmp arg3, 3
+ jl _only_less_than_3
+
+ ; load 3 Bytes
+ mov al, [arg2]
+ mov [r11], al
+
+ mov al, [arg2+1]
+ mov [r11+1], al
+
+ mov al, [arg2+2]
+ mov [r11+2], al
+
+ movdqa xmm3, [rsp]
+ pshufb xmm3, xmm7
+ pxor xmm3, xmm0 ; xor the initial crc value
+
+ psrldq xmm3, 5
+
+ jmp _barrett
+_only_less_than_3:
+ cmp arg3, 2
+ jl _only_less_than_2
+
+ ; load 2 Bytes
+ mov al, [arg2]
+ mov [r11], al
+
+ mov al, [arg2+1]
+ mov [r11+1], al
+
+ movdqa xmm3, [rsp]
+ pshufb xmm3, xmm7
+ pxor xmm3, xmm0 ; xor the initial crc value
+
+ psrldq xmm3, 6
+
+ jmp _barrett
+_only_less_than_2:
+
+ ; load 1 Byte
+ mov al, [arg2]
+ mov [r11], al
+
+ movdqa xmm3, [rsp]
+ pshufb xmm3, xmm7
+ pxor xmm3, xmm0 ; xor the initial crc value
+
+ psrldq xmm3, 7
+
+ jmp _barrett
+
+section .data
+
+; precomputed constants
+; these constants are precomputed from the poly: 0x8bb70000 (0x8bb7 scaled to 32 bits)
+align 16
+; Q = 0x18BB70000
+; rk1 = 2^(32*3) mod Q << 32
+; rk2 = 2^(32*5) mod Q << 32
+; rk3 = 2^(32*15) mod Q << 32
+; rk4 = 2^(32*17) mod Q << 32
+; rk5 = 2^(32*3) mod Q << 32
+; rk6 = 2^(32*2) mod Q << 32
+; rk7 = floor(2^64/Q)
+; rk8 = Q
+rk1:
+DQ 0x2d56000000000000
+rk2:
+DQ 0x06df000000000000
+rk3:
+DQ 0x044c000000000000
+rk4:
+DQ 0xe658000000000000
+rk5:
+DQ 0x2d56000000000000
+rk6:
+DQ 0x1368000000000000
+rk7:
+DQ 0x00000001f65a57f8
+rk8:
+DQ 0x000000018bb70000
+mask1:
+dq 0x8080808080808080, 0x8080808080808080
+mask2:
+dq 0xFFFFFFFFFFFFFFFF, 0x00000000FFFFFFFF
+
+SHUF_MASK:
+dq 0x08090A0B0C0D0E0F, 0x0001020304050607
+
+pshufb_shf_table:
+; use these values for shift constants for the pshufb instruction
+; different alignments result in values as shown:
+; dq 0x8887868584838281, 0x008f8e8d8c8b8a89 ; shl 15 (16-1) / shr1
+; dq 0x8988878685848382, 0x01008f8e8d8c8b8a ; shl 14 (16-3) / shr2
+; dq 0x8a89888786858483, 0x0201008f8e8d8c8b ; shl 13 (16-4) / shr3
+; dq 0x8b8a898887868584, 0x030201008f8e8d8c ; shl 12 (16-4) / shr4
+; dq 0x8c8b8a8988878685, 0x04030201008f8e8d ; shl 11 (16-5) / shr5
+; dq 0x8d8c8b8a89888786, 0x0504030201008f8e ; shl 10 (16-6) / shr6
+; dq 0x8e8d8c8b8a898887, 0x060504030201008f ; shl 9 (16-7) / shr7
+; dq 0x8f8e8d8c8b8a8988, 0x0706050403020100 ; shl 8 (16-8) / shr8
+; dq 0x008f8e8d8c8b8a89, 0x0807060504030201 ; shl 7 (16-9) / shr9
+; dq 0x01008f8e8d8c8b8a, 0x0908070605040302 ; shl 6 (16-10) / shr10
+; dq 0x0201008f8e8d8c8b, 0x0a09080706050403 ; shl 5 (16-11) / shr11
+; dq 0x030201008f8e8d8c, 0x0b0a090807060504 ; shl 4 (16-12) / shr12
+; dq 0x04030201008f8e8d, 0x0c0b0a0908070605 ; shl 3 (16-13) / shr13
+; dq 0x0504030201008f8e, 0x0d0c0b0a09080706 ; shl 2 (16-14) / shr14
+; dq 0x060504030201008f, 0x0e0d0c0b0a090807 ; shl 1 (16-15) / shr15
+dq 0x8786858483828100, 0x8f8e8d8c8b8a8988
+dq 0x0706050403020100, 0x000e0d0c0b0a0908
+
+;;; func core, ver, snum
+slversion crc16_t10dif_by4, 05, 02, 0016
diff --git a/src/isa-l/crc/crc16_t10dif_copy_by4.asm b/src/isa-l/crc/crc16_t10dif_copy_by4.asm
new file mode 100644
index 000000000..b8a6838b4
--- /dev/null
+++ b/src/isa-l/crc/crc16_t10dif_copy_by4.asm
@@ -0,0 +1,599 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright(c) 2011-2017 Intel Corporation All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in
+; the documentation and/or other materials provided with the
+; distribution.
+; * Neither the name of Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived
+; from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;
+; Function API:
+; UINT16 crc16_t10dif_copy_by4(
+; UINT16 init_crc, //initial CRC value, 16 bits
+; unsigned char *dst, //buffer pointer destination for copy
+; const unsigned char *src, //buffer pointer to calculate CRC on
+; UINT64 len //buffer length in bytes (64-bit data)
+; );
+;
+; Authors:
+; Erdinc Ozturk
+; Vinodh Gopal
+; James Guilford
+;
+; Reference paper titled "Fast CRC Computation for Generic Polynomials Using PCLMULQDQ Instruction"
+; URL: http://download.intel.com/design/intarch/papers/323102.pdf
+;
+
+%include "reg_sizes.asm"
+
+%define fetch_dist 1024
+
+[bits 64]
+default rel
+
+section .text
+%ifidn __OUTPUT_FORMAT__, win64
+ %xdefine arg1 rcx
+ %xdefine arg2 rdx
+ %xdefine arg3 r8
+ %xdefine arg4 r9
+ %xdefine tmp1 r10
+ %xdefine arg1_low32 ecx
+%else
+ %xdefine arg1 rdi
+ %xdefine arg2 rsi
+ %xdefine arg3 rdx
+ %xdefine arg4 rcx
+ %xdefine tmp1 r10
+ %xdefine arg1_low32 edi
+%endif
+
+align 16
+mk_global crc16_t10dif_copy_by4, function
+crc16_t10dif_copy_by4:
+ endbranch
+
+ ; adjust the 16-bit initial_crc value, scale it to 32 bits
+ shl arg1_low32, 16
+
+ ; After this point, code flow is exactly same as a 32-bit CRC.
+ ; The only difference is before returning eax, we will shift
+ ; it right 16 bits, to scale back to 16 bits.
+
+ sub rsp,16*4+8
+
+ ; push the xmm registers into the stack to maintain
+ movdqa [rsp+16*2],xmm6
+ movdqa [rsp+16*3],xmm7
+
+ ; check if smaller than 128B
+ cmp arg4, 128
+
+ ; for sizes less than 128, we can't fold 64B at a time...
+ jl _less_than_128
+
+
+ ; load the initial crc value
+ movd xmm6, arg1_low32 ; initial crc
+
+ ; crc value does not need to be byte-reflected, but it needs to
+ ; be moved to the high part of the register.
+ ; because data will be byte-reflected and will align with
+ ; initial crc at correct place.
+ pslldq xmm6, 12
+
+ movdqa xmm7, [SHUF_MASK]
+ ; receive the initial 64B data, xor the initial crc value
+ movdqu xmm0, [arg3]
+ movdqu xmm1, [arg3+16]
+ movdqu xmm2, [arg3+32]
+ movdqu xmm3, [arg3+48]
+
+ ; copy initial data
+ movdqu [arg2], xmm0
+ movdqu [arg2+16], xmm1
+ movdqu [arg2+32], xmm2
+ movdqu [arg2+48], xmm3
+
+ pshufb xmm0, xmm7
+ ; XOR the initial_crc value
+ pxor xmm0, xmm6
+ pshufb xmm1, xmm7
+ pshufb xmm2, xmm7
+ pshufb xmm3, xmm7
+
+ movdqa xmm6, [rk3] ;xmm6 has rk3 and rk4
+ ;imm value of pclmulqdq instruction
+ ;will determine which constant to use
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+ ; we subtract 128 instead of 64 to save one instruction from the loop
+ sub arg4, 128
+
+ ; at this section of the code, there is 64*x+y (0<=y<64) bytes of
+ ; buffer. The _fold_64_B_loop
+ ; loop will fold 64B at a time until we have 64+y Bytes of buffer
+
+
+ ; fold 64B at a time. This section of the code folds 4 xmm
+ ; registers in parallel
+_fold_64_B_loop:
+
+ ; update the buffer pointer
+ add arg3, 64 ; buf += 64;
+ add arg2, 64
+
+ prefetchnta [arg3+fetch_dist+0]
+ movdqu xmm4, xmm0
+ movdqu xmm5, xmm1
+
+ pclmulqdq xmm0, xmm6 , 0x11
+ pclmulqdq xmm1, xmm6 , 0x11
+
+ pclmulqdq xmm4, xmm6, 0x0
+ pclmulqdq xmm5, xmm6, 0x0
+
+ pxor xmm0, xmm4
+ pxor xmm1, xmm5
+
+ prefetchnta [arg3+fetch_dist+32]
+ movdqu xmm4, xmm2
+ movdqu xmm5, xmm3
+
+ pclmulqdq xmm2, xmm6, 0x11
+ pclmulqdq xmm3, xmm6, 0x11
+
+ pclmulqdq xmm4, xmm6, 0x0
+ pclmulqdq xmm5, xmm6, 0x0
+
+ pxor xmm2, xmm4
+ pxor xmm3, xmm5
+
+ movdqu xmm4, [arg3]
+ movdqu xmm5, [arg3+16]
+ movdqu [arg2], xmm4
+ movdqu [arg2+16], xmm5
+ pshufb xmm4, xmm7
+ pshufb xmm5, xmm7
+ pxor xmm0, xmm4
+ pxor xmm1, xmm5
+
+ movdqu xmm4, [arg3+32]
+ movdqu xmm5, [arg3+48]
+ movdqu [arg2+32], xmm4
+ movdqu [arg2+48], xmm5
+ pshufb xmm4, xmm7
+ pshufb xmm5, xmm7
+
+ pxor xmm2, xmm4
+ pxor xmm3, xmm5
+
+ sub arg4, 64
+
+ ; check if there is another 64B in the buffer to be able to fold
+ jge _fold_64_B_loop
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+
+ add arg3, 64
+ add arg2, 64
+ ; at this point, the buffer pointer is pointing at the last y Bytes of the buffer
+ ; the 64B of folded data is in 4 of the xmm registers: xmm0, xmm1, xmm2, xmm3
+
+
+ ; fold the 4 xmm registers to 1 xmm register with different constants
+
+ movdqa xmm6, [rk1] ;xmm6 has rk1 and rk2
+ ;imm value of pclmulqdq instruction will
+ ;determine which constant to use
+
+ movdqa xmm4, xmm0
+ pclmulqdq xmm0, xmm6, 0x11
+ pclmulqdq xmm4, xmm6, 0x0
+ pxor xmm1, xmm4
+ pxor xmm1, xmm0
+
+ movdqa xmm4, xmm1
+ pclmulqdq xmm1, xmm6, 0x11
+ pclmulqdq xmm4, xmm6, 0x0
+ pxor xmm2, xmm4
+ pxor xmm2, xmm1
+
+ movdqa xmm4, xmm2
+ pclmulqdq xmm2, xmm6, 0x11
+ pclmulqdq xmm4, xmm6, 0x0
+ pxor xmm3, xmm4
+ pxor xmm3, xmm2
+
+
+ ; instead of 64, we add 48 to the loop counter to save 1 instruction from the loop
+ ; instead of a cmp instruction, we use the negative flag with the jl instruction
+ add arg4, 64-16
+ jl _final_reduction_for_128
+
+ ; now we have 16+y bytes left to reduce. 16 Bytes
+ ; is in register xmm3 and the rest is in memory
+ ; we can fold 16 bytes at a time if y>=16
+ ; continue folding 16B at a time
+
+_16B_reduction_loop:
+ movdqa xmm4, xmm3
+ pclmulqdq xmm3, xmm6, 0x11
+ pclmulqdq xmm4, xmm6, 0x0
+ pxor xmm3, xmm4
+ movdqu xmm0, [arg3]
+ movdqu [arg2], xmm0
+ pshufb xmm0, xmm7
+ pxor xmm3, xmm0
+ add arg3, 16
+ add arg2, 16
+ sub arg4, 16
+ ; instead of a cmp instruction, we utilize the flags with the jge instruction
+ ; equivalent of: cmp arg4, 16-16
+ ; check if there is any more 16B in the buffer to be able to fold
+ jge _16B_reduction_loop
+
+ ;now we have 16+z bytes left to reduce, where 0<= z < 16.
+ ;first, we reduce the data in the xmm3 register
+
+
+_final_reduction_for_128:
+ ; check if any more data to fold. If not, compute the CRC of the final 128 bits
+ add arg4, 16
+ je _128_done
+
+ ; here we are getting data that is less than 16 bytes.
+ ; since we know that there was data before the pointer,
+ ; we can offset the input pointer before the actual point,
+ ; to receive exactly 16 bytes.
+ ; after that the registers need to be adjusted.
+_get_last_two_xmms:
+ movdqa xmm2, xmm3
+
+ movdqu xmm1, [arg3 - 16 + arg4]
+ movdqu [arg2 - 16 + arg4], xmm1
+ pshufb xmm1, xmm7
+
+ ; get rid of the extra data that was loaded before
+ ; load the shift constant
+ lea rax, [pshufb_shf_table + 16]
+ sub rax, arg4
+ movdqu xmm0, [rax]
+
+ ; shift xmm2 to the left by arg4 bytes
+ pshufb xmm2, xmm0
+
+ ; shift xmm3 to the right by 16-arg4 bytes
+ pxor xmm0, [mask1]
+ pshufb xmm3, xmm0
+ pblendvb xmm1, xmm2 ;xmm0 is implicit
+
+ ; fold 16 Bytes
+ movdqa xmm2, xmm1
+ movdqa xmm4, xmm3
+ pclmulqdq xmm3, xmm6, 0x11
+ pclmulqdq xmm4, xmm6, 0x0
+ pxor xmm3, xmm4
+ pxor xmm3, xmm2
+
+_128_done:
+ ; compute crc of a 128-bit value
+ movdqa xmm6, [rk5] ; rk5 and rk6 in xmm6
+ movdqa xmm0, xmm3
+
+ ;64b fold
+ pclmulqdq xmm3, xmm6, 0x1
+ pslldq xmm0, 8
+ pxor xmm3, xmm0
+
+ ;32b fold
+ movdqa xmm0, xmm3
+
+ pand xmm0, [mask2]
+
+ psrldq xmm3, 12
+ pclmulqdq xmm3, xmm6, 0x10
+ pxor xmm3, xmm0
+
+ ;barrett reduction
+_barrett:
+ movdqa xmm6, [rk7] ; rk7 and rk8 in xmm6
+ movdqa xmm0, xmm3
+ pclmulqdq xmm3, xmm6, 0x01
+ pslldq xmm3, 4
+ pclmulqdq xmm3, xmm6, 0x11
+
+ pslldq xmm3, 4
+ pxor xmm3, xmm0
+ pextrd eax, xmm3,1
+
+_cleanup:
+ ; scale the result back to 16 bits
+ shr eax, 16
+ movdqa xmm6, [rsp+16*2]
+ movdqa xmm7, [rsp+16*3]
+ add rsp,16*4+8
+ ret
+
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+align 16
+_less_than_128:
+
+ ; check if there is enough buffer to be able to fold 16B at a time
+ cmp arg4, 32
+ jl _less_than_32
+ movdqa xmm7, [SHUF_MASK]
+
+ ; if there is, load the constants
+ movdqa xmm6, [rk1] ; rk1 and rk2 in xmm6
+
+ movd xmm0, arg1_low32 ; get the initial crc value
+ pslldq xmm0, 12 ; align it to its correct place
+ movdqu xmm3, [arg3] ; load the plaintext
+ movdqu [arg2], xmm3 ; store copy
+ pshufb xmm3, xmm7 ; byte-reflect the plaintext
+ pxor xmm3, xmm0
+
+
+ ; update the buffer pointer
+ add arg3, 16
+ add arg2, 16
+
+ ; update the counter. subtract 32 instead of 16 to save one instruction from the loop
+ sub arg4, 32
+
+ jmp _16B_reduction_loop
+
+
+align 16
+_less_than_32:
+ ; mov initial crc to the return value. this is necessary for zero-length buffers.
+ mov eax, arg1_low32
+ test arg4, arg4
+ je _cleanup
+
+ movdqa xmm7, [SHUF_MASK]
+
+ movd xmm0, arg1_low32 ; get the initial crc value
+ pslldq xmm0, 12 ; align it to its correct place
+
+ cmp arg4, 16
+ je _exact_16_left
+ jl _less_than_16_left
+
+ movdqu xmm3, [arg3] ; load the plaintext
+ movdqu [arg2], xmm3 ; store the copy
+ pshufb xmm3, xmm7 ; byte-reflect the plaintext
+ pxor xmm3, xmm0 ; xor the initial crc value
+ add arg3, 16
+ add arg2, 16
+ sub arg4, 16
+ movdqa xmm6, [rk1] ; rk1 and rk2 in xmm6
+ jmp _get_last_two_xmms
+
+
+align 16
+_less_than_16_left:
+ ; use stack space to load data less than 16 bytes, zero-out the 16B in memory first.
+
+ pxor xmm1, xmm1
+ mov r11, rsp
+ movdqa [r11], xmm1
+
+ cmp arg4, 4
+ jl _only_less_than_4
+
+ ; backup the counter value
+ mov tmp1, arg4
+ cmp arg4, 8
+ jl _less_than_8_left
+
+ ; load 8 Bytes
+ mov rax, [arg3]
+ mov [arg2], rax
+ mov [r11], rax
+ add r11, 8
+ sub arg4, 8
+ add arg3, 8
+ add arg2, 8
+_less_than_8_left:
+
+ cmp arg4, 4
+ jl _less_than_4_left
+
+ ; load 4 Bytes
+ mov eax, [arg3]
+ mov [arg2], eax
+ mov [r11], eax
+ add r11, 4
+ sub arg4, 4
+ add arg3, 4
+ add arg2, 4
+_less_than_4_left:
+
+ cmp arg4, 2
+ jl _less_than_2_left
+
+ ; load 2 Bytes
+ mov ax, [arg3]
+ mov [arg2], ax
+ mov [r11], ax
+ add r11, 2
+ sub arg4, 2
+ add arg3, 2
+ add arg2, 2
+_less_than_2_left:
+ cmp arg4, 1
+ jl _zero_left
+
+ ; load 1 Byte
+ mov al, [arg3]
+ mov [arg2], al
+ mov [r11], al
+_zero_left:
+ movdqa xmm3, [rsp]
+ pshufb xmm3, xmm7
+ pxor xmm3, xmm0 ; xor the initial crc value
+
+ ; shl tmp1, 4
+ lea rax, [pshufb_shf_table + 16]
+ sub rax, tmp1
+ movdqu xmm0, [rax]
+ pxor xmm0, [mask1]
+
+ pshufb xmm3, xmm0
+ jmp _128_done
+
+align 16
+_exact_16_left:
+ movdqu xmm3, [arg3]
+ movdqu [arg2], xmm3
+ pshufb xmm3, xmm7
+ pxor xmm3, xmm0 ; xor the initial crc value
+
+ jmp _128_done
+
+_only_less_than_4:
+ cmp arg4, 3
+ jl _only_less_than_3
+
+ ; load 3 Bytes
+ mov al, [arg3]
+ mov [arg2], al
+ mov [r11], al
+
+ mov al, [arg3+1]
+ mov [arg2+1], al
+ mov [r11+1], al
+
+ mov al, [arg3+2]
+ mov [arg2+2], al
+ mov [r11+2], al
+
+ movdqa xmm3, [rsp]
+ pshufb xmm3, xmm7
+ pxor xmm3, xmm0 ; xor the initial crc value
+
+ psrldq xmm3, 5
+
+ jmp _barrett
+_only_less_than_3:
+ cmp arg4, 2
+ jl _only_less_than_2
+
+ ; load 2 Bytes
+ mov al, [arg3]
+ mov [arg2], al
+ mov [r11], al
+
+ mov al, [arg3+1]
+ mov [arg2+1], al
+ mov [r11+1], al
+
+ movdqa xmm3, [rsp]
+ pshufb xmm3, xmm7
+ pxor xmm3, xmm0 ; xor the initial crc value
+
+ psrldq xmm3, 6
+
+ jmp _barrett
+_only_less_than_2:
+
+ ; load 1 Byte
+ mov al, [arg3]
+ mov [arg2],al
+ mov [r11], al
+
+ movdqa xmm3, [rsp]
+ pshufb xmm3, xmm7
+ pxor xmm3, xmm0 ; xor the initial crc value
+
+ psrldq xmm3, 7
+
+ jmp _barrett
+
+section .data
+
+; precomputed constants
+; these constants are precomputed from the poly: 0x8bb70000 (0x8bb7 scaled to 32 bits)
+align 16
+; Q = 0x18BB70000
+; rk1 = 2^(32*3) mod Q << 32
+; rk2 = 2^(32*5) mod Q << 32
+; rk3 = 2^(32*15) mod Q << 32
+; rk4 = 2^(32*17) mod Q << 32
+; rk5 = 2^(32*3) mod Q << 32
+; rk6 = 2^(32*2) mod Q << 32
+; rk7 = floor(2^64/Q)
+; rk8 = Q
+rk1:
+DQ 0x2d56000000000000
+rk2:
+DQ 0x06df000000000000
+rk3:
+DQ 0x044c000000000000
+rk4:
+DQ 0xe658000000000000
+rk5:
+DQ 0x2d56000000000000
+rk6:
+DQ 0x1368000000000000
+rk7:
+DQ 0x00000001f65a57f8
+rk8:
+DQ 0x000000018bb70000
+mask1:
+dq 0x8080808080808080, 0x8080808080808080
+mask2:
+dq 0xFFFFFFFFFFFFFFFF, 0x00000000FFFFFFFF
+
+SHUF_MASK:
+dq 0x08090A0B0C0D0E0F, 0x0001020304050607
+
+pshufb_shf_table:
+; use these values for shift constants for the pshufb instruction
+; different alignments result in values as shown:
+; dq 0x8887868584838281, 0x008f8e8d8c8b8a89 ; shl 15 (16-1) / shr1
+; dq 0x8988878685848382, 0x01008f8e8d8c8b8a ; shl 14 (16-3) / shr2
+; dq 0x8a89888786858483, 0x0201008f8e8d8c8b ; shl 13 (16-4) / shr3
+; dq 0x8b8a898887868584, 0x030201008f8e8d8c ; shl 12 (16-4) / shr4
+; dq 0x8c8b8a8988878685, 0x04030201008f8e8d ; shl 11 (16-5) / shr5
+; dq 0x8d8c8b8a89888786, 0x0504030201008f8e ; shl 10 (16-6) / shr6
+; dq 0x8e8d8c8b8a898887, 0x060504030201008f ; shl 9 (16-7) / shr7
+; dq 0x8f8e8d8c8b8a8988, 0x0706050403020100 ; shl 8 (16-8) / shr8
+; dq 0x008f8e8d8c8b8a89, 0x0807060504030201 ; shl 7 (16-9) / shr9
+; dq 0x01008f8e8d8c8b8a, 0x0908070605040302 ; shl 6 (16-10) / shr10
+; dq 0x0201008f8e8d8c8b, 0x0a09080706050403 ; shl 5 (16-11) / shr11
+; dq 0x030201008f8e8d8c, 0x0b0a090807060504 ; shl 4 (16-12) / shr12
+; dq 0x04030201008f8e8d, 0x0c0b0a0908070605 ; shl 3 (16-13) / shr13
+; dq 0x0504030201008f8e, 0x0d0c0b0a09080706 ; shl 2 (16-14) / shr14
+; dq 0x060504030201008f, 0x0e0d0c0b0a090807 ; shl 1 (16-15) / shr15
+dq 0x8786858483828100, 0x8f8e8d8c8b8a8988
+dq 0x0706050403020100, 0x000e0d0c0b0a0908
+
+;;; func core, ver, snum
+slversion crc16_t10dif_copy_by4, 05, 02, 0000
diff --git a/src/isa-l/crc/crc16_t10dif_copy_by4_02.asm b/src/isa-l/crc/crc16_t10dif_copy_by4_02.asm
new file mode 100644
index 000000000..254a18711
--- /dev/null
+++ b/src/isa-l/crc/crc16_t10dif_copy_by4_02.asm
@@ -0,0 +1,596 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright(c) 2011-2020 Intel Corporation All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in
+; the documentation and/or other materials provided with the
+; distribution.
+; * Neither the name of Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived
+; from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;
+; Function API:
+; UINT16 crc16_t10dif_copy_by4_02(
+; UINT16 init_crc, //initial CRC value, 16 bits
+; unsigned char *dst, //buffer pointer destination for copy
+; const unsigned char *src, //buffer pointer to calculate CRC on
+; UINT64 len //buffer length in bytes (64-bit data)
+; );
+;
+; Authors:
+; Erdinc Ozturk
+; Vinodh Gopal
+; James Guilford
+;
+; Reference paper titled "Fast CRC Computation for Generic Polynomials Using PCLMULQDQ Instruction"
+; URL: http://download.intel.com/design/intarch/papers/323102.pdf
+;
+
+%include "reg_sizes.asm"
+
+%define fetch_dist 1024
+
+[bits 64]
+default rel
+
+section .text
+%ifidn __OUTPUT_FORMAT__, win64
+ %xdefine arg1 rcx
+ %xdefine arg2 rdx
+ %xdefine arg3 r8
+ %xdefine arg4 r9
+ %xdefine tmp1 r10
+ %xdefine arg1_low32 ecx
+%else
+ %xdefine arg1 rdi
+ %xdefine arg2 rsi
+ %xdefine arg3 rdx
+ %xdefine arg4 rcx
+ %xdefine tmp1 r10
+ %xdefine arg1_low32 edi
+%endif
+
+align 16
+mk_global crc16_t10dif_copy_by4_02, function
+crc16_t10dif_copy_by4_02:
+ endbranch
+
+ ; adjust the 16-bit initial_crc value, scale it to 32 bits
+ shl arg1_low32, 16
+
+ ; After this point, code flow is exactly same as a 32-bit CRC.
+ ; The only difference is before returning eax, we will shift
+ ; it right 16 bits, to scale back to 16 bits.
+
+ sub rsp,16*4+8
+
+ ; push the xmm registers into the stack to maintain
+ movdqa [rsp+16*2],xmm6
+ movdqa [rsp+16*3],xmm7
+
+ ; check if smaller than 128B
+ cmp arg4, 128
+
+ ; for sizes less than 128, we can't fold 64B at a time...
+ jl _less_than_128
+
+
+ ; load the initial crc value
+ vmovd xmm6, arg1_low32 ; initial crc
+
+ ; crc value does not need to be byte-reflected, but it needs to
+ ; be moved to the high part of the register.
+ ; because data will be byte-reflected and will align with
+ ; initial crc at correct place.
+ vpslldq xmm6, 12
+
+ vmovdqa xmm7, [SHUF_MASK]
+ ; receive the initial 64B data, xor the initial crc value
+ vmovdqu xmm0, [arg3]
+ vmovdqu xmm1, [arg3+16]
+ vmovdqu xmm2, [arg3+32]
+ vmovdqu xmm3, [arg3+48]
+
+ ; copy initial data
+ vmovdqu [arg2], xmm0
+ vmovdqu [arg2+16], xmm1
+ vmovdqu [arg2+32], xmm2
+ vmovdqu [arg2+48], xmm3
+
+ vpshufb xmm0, xmm7
+ ; XOR the initial_crc value
+ vpxor xmm0, xmm6
+ vpshufb xmm1, xmm7
+ vpshufb xmm2, xmm7
+ vpshufb xmm3, xmm7
+
+ vmovdqa xmm6, [rk3] ;xmm6 has rk3 and rk4
+ ;imm value of pclmulqdq instruction
+ ;will determine which constant to use
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+ ; we subtract 128 instead of 64 to save one instruction from the loop
+ sub arg4, 128
+
+ ; at this section of the code, there is 64*x+y (0<=y<64) bytes of
+ ; buffer. The _fold_64_B_loop
+ ; loop will fold 64B at a time until we have 64+y Bytes of buffer
+
+
+ ; fold 64B at a time. This section of the code folds 4 xmm
+ ; registers in parallel
+_fold_64_B_loop:
+
+ ; update the buffer pointer
+ add arg3, 64 ; buf += 64;
+ add arg2, 64
+
+ prefetchnta [arg3+fetch_dist+0]
+ vmovdqu xmm4, xmm0
+ vmovdqu xmm5, xmm1
+
+ vpclmulqdq xmm0, xmm6 , 0x11
+ vpclmulqdq xmm1, xmm6 , 0x11
+
+ vpclmulqdq xmm4, xmm6, 0x0
+ vpclmulqdq xmm5, xmm6, 0x0
+
+ vpxor xmm0, xmm4
+ vpxor xmm1, xmm5
+
+ prefetchnta [arg3+fetch_dist+32]
+ vmovdqu xmm4, xmm2
+ vmovdqu xmm5, xmm3
+
+ vpclmulqdq xmm2, xmm6, 0x11
+ vpclmulqdq xmm3, xmm6, 0x11
+
+ vpclmulqdq xmm4, xmm6, 0x0
+ vpclmulqdq xmm5, xmm6, 0x0
+
+ vpxor xmm2, xmm4
+ vpxor xmm3, xmm5
+
+ vmovdqu xmm4, [arg3]
+ vmovdqu xmm5, [arg3+16]
+ vmovdqu [arg2], xmm4
+ vmovdqu [arg2+16], xmm5
+ vpshufb xmm4, xmm7
+ vpshufb xmm5, xmm7
+ vpxor xmm0, xmm4
+ vpxor xmm1, xmm5
+
+ vmovdqu xmm4, [arg3+32]
+ vmovdqu xmm5, [arg3+48]
+ vmovdqu [arg2+32], xmm4
+ vmovdqu [arg2+48], xmm5
+ vpshufb xmm4, xmm7
+ vpshufb xmm5, xmm7
+
+ vpxor xmm2, xmm4
+ vpxor xmm3, xmm5
+
+ sub arg4, 64
+
+ ; check if there is another 64B in the buffer to be able to fold
+ jge _fold_64_B_loop
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+
+ add arg3, 64
+ add arg2, 64
+ ; at this point, the buffer pointer is pointing at the last y Bytes of the buffer
+ ; the 64B of folded data is in 4 of the xmm registers: xmm0, xmm1, xmm2, xmm3
+
+
+ ; fold the 4 xmm registers to 1 xmm register with different constants
+
+ vmovdqa xmm6, [rk1] ;xmm6 has rk1 and rk2
+ ;imm value of pclmulqdq instruction will
+ ;determine which constant to use
+
+ vmovdqa xmm4, xmm0
+ vpclmulqdq xmm0, xmm6, 0x11
+ vpclmulqdq xmm4, xmm6, 0x0
+ vpxor xmm1, xmm4
+ vpxor xmm1, xmm0
+
+ vmovdqa xmm4, xmm1
+ vpclmulqdq xmm1, xmm6, 0x11
+ vpclmulqdq xmm4, xmm6, 0x0
+ vpxor xmm2, xmm4
+ vpxor xmm2, xmm1
+
+ vmovdqa xmm4, xmm2
+ vpclmulqdq xmm2, xmm6, 0x11
+ vpclmulqdq xmm4, xmm6, 0x0
+ vpxor xmm3, xmm4
+ vpxor xmm3, xmm2
+
+
+ ; instead of 64, we add 48 to the loop counter to save 1 instruction from the loop
+ ; instead of a cmp instruction, we use the negative flag with the jl instruction
+ add arg4, 64-16
+ jl _final_reduction_for_128
+
+ ; now we have 16+y bytes left to reduce. 16 Bytes
+ ; is in register xmm3 and the rest is in memory
+ ; we can fold 16 bytes at a time if y>=16
+ ; continue folding 16B at a time
+
+_16B_reduction_loop:
+ vmovdqa xmm4, xmm3
+ vpclmulqdq xmm3, xmm6, 0x11
+ vpclmulqdq xmm4, xmm6, 0x0
+ vpxor xmm3, xmm4
+ vmovdqu xmm0, [arg3]
+ vmovdqu [arg2], xmm0
+ vpshufb xmm0, xmm7
+ vpxor xmm3, xmm0
+ add arg3, 16
+ add arg2, 16
+ sub arg4, 16
+ ; instead of a cmp instruction, we utilize the flags with the jge instruction
+ ; equivalent of: cmp arg4, 16-16
+ ; check if there is any more 16B in the buffer to be able to fold
+ jge _16B_reduction_loop
+
+ ;now we have 16+z bytes left to reduce, where 0<= z < 16.
+ ;first, we reduce the data in the xmm3 register
+
+
+_final_reduction_for_128:
+ ; check if any more data to fold. If not, compute the CRC of the final 128 bits
+ add arg4, 16
+ je _128_done
+
+ ; here we are getting data that is less than 16 bytes.
+ ; since we know that there was data before the pointer,
+ ; we can offset the input pointer before the actual point,
+ ; to receive exactly 16 bytes.
+ ; after that the registers need to be adjusted.
+_get_last_two_xmms:
+ vmovdqa xmm2, xmm3
+
+ vmovdqu xmm1, [arg3 - 16 + arg4]
+ vmovdqu [arg2 - 16 + arg4], xmm1
+ vpshufb xmm1, xmm7
+
+ ; get rid of the extra data that was loaded before
+ ; load the shift constant
+ lea rax, [pshufb_shf_table + 16]
+ sub rax, arg4
+ vmovdqu xmm0, [rax]
+
+ ; shift xmm2 to the left by arg4 bytes
+ vpshufb xmm2, xmm0
+
+ ; shift xmm3 to the right by 16-arg4 bytes
+ vpxor xmm0, [mask1]
+ vpshufb xmm3, xmm0
+ vpblendvb xmm1, xmm1, xmm2, xmm0
+
+ ; fold 16 Bytes
+ vmovdqa xmm2, xmm1
+ vmovdqa xmm4, xmm3
+ vpclmulqdq xmm3, xmm6, 0x11
+ vpclmulqdq xmm4, xmm6, 0x0
+ vpxor xmm3, xmm4
+ vpxor xmm3, xmm2
+
+_128_done:
+ ; compute crc of a 128-bit value
+ vmovdqa xmm6, [rk5] ; rk5 and rk6 in xmm6
+ vmovdqa xmm0, xmm3
+
+ ;64b fold
+ vpclmulqdq xmm3, xmm6, 0x1
+ vpslldq xmm0, 8
+ vpxor xmm3, xmm0
+
+ ;32b fold
+ vmovdqa xmm0, xmm3
+
+ vpand xmm0, [mask2]
+
+ vpsrldq xmm3, 12
+ vpclmulqdq xmm3, xmm6, 0x10
+ vpxor xmm3, xmm0
+
+ ;barrett reduction
+_barrett:
+ vmovdqa xmm6, [rk7] ; rk7 and rk8 in xmm6
+ vmovdqa xmm0, xmm3
+ vpclmulqdq xmm3, xmm6, 0x01
+ vpslldq xmm3, 4
+ vpclmulqdq xmm3, xmm6, 0x11
+
+ vpslldq xmm3, 4
+ vpxor xmm3, xmm0
+ vpextrd eax, xmm3,1
+
+_cleanup:
+ ; scale the result back to 16 bits
+ shr eax, 16
+ vmovdqa xmm6, [rsp+16*2]
+ vmovdqa xmm7, [rsp+16*3]
+ add rsp,16*4+8
+ ret
+
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+align 16
+_less_than_128:
+
+ ; check if there is enough buffer to be able to fold 16B at a time
+ cmp arg4, 32
+ jl _less_than_32
+ vmovdqa xmm7, [SHUF_MASK]
+
+ ; if there is, load the constants
+ vmovdqa xmm6, [rk1] ; rk1 and rk2 in xmm6
+
+ vmovd xmm0, arg1_low32 ; get the initial crc value
+ vpslldq xmm0, 12 ; align it to its correct place
+ vmovdqu xmm3, [arg3] ; load the plaintext
+ vmovdqu [arg2], xmm3 ; store copy
+ vpshufb xmm3, xmm7 ; byte-reflect the plaintext
+ vpxor xmm3, xmm0
+
+
+ ; update the buffer pointer
+ add arg3, 16
+ add arg2, 16
+
+ ; update the counter. subtract 32 instead of 16 to save one instruction from the loop
+ sub arg4, 32
+
+ jmp _16B_reduction_loop
+
+
+align 16
+_less_than_32:
+ ; mov initial crc to the return value. this is necessary for zero-length buffers.
+ mov eax, arg1_low32
+ test arg4, arg4
+ je _cleanup
+
+ vmovdqa xmm7, [SHUF_MASK]
+
+ vmovd xmm0, arg1_low32 ; get the initial crc value
+ vpslldq xmm0, 12 ; align it to its correct place
+
+ cmp arg4, 16
+ je _exact_16_left
+ jl _less_than_16_left
+
+ vmovdqu xmm3, [arg3] ; load the plaintext
+ vmovdqu [arg2], xmm3 ; store the copy
+ vpshufb xmm3, xmm7 ; byte-reflect the plaintext
+ vpxor xmm3, xmm0 ; xor the initial crc value
+ add arg3, 16
+ add arg2, 16
+ sub arg4, 16
+ vmovdqa xmm6, [rk1] ; rk1 and rk2 in xmm6
+ jmp _get_last_two_xmms
+
+
+align 16
+_less_than_16_left:
+ ; use stack space to load data less than 16 bytes, zero-out the 16B in memory first.
+
+ vpxor xmm1, xmm1
+ mov r11, rsp
+ vmovdqa [r11], xmm1
+
+ cmp arg4, 4
+ jl _only_less_than_4
+
+ ; backup the counter value
+ mov tmp1, arg4
+ cmp arg4, 8
+ jl _less_than_8_left
+
+ ; load 8 Bytes
+ mov rax, [arg3]
+ mov [arg2], rax
+ mov [r11], rax
+ add r11, 8
+ sub arg4, 8
+ add arg3, 8
+ add arg2, 8
+_less_than_8_left:
+
+ cmp arg4, 4
+ jl _less_than_4_left
+
+ ; load 4 Bytes
+ mov eax, [arg3]
+ mov [arg2], eax
+ mov [r11], eax
+ add r11, 4
+ sub arg4, 4
+ add arg3, 4
+ add arg2, 4
+_less_than_4_left:
+
+ cmp arg4, 2
+ jl _less_than_2_left
+
+ ; load 2 Bytes
+ mov ax, [arg3]
+ mov [arg2], ax
+ mov [r11], ax
+ add r11, 2
+ sub arg4, 2
+ add arg3, 2
+ add arg2, 2
+_less_than_2_left:
+ cmp arg4, 1
+ jl _zero_left
+
+ ; load 1 Byte
+ mov al, [arg3]
+ mov [arg2], al
+ mov [r11], al
+_zero_left:
+ vmovdqa xmm3, [rsp]
+ vpshufb xmm3, xmm7
+ vpxor xmm3, xmm0 ; xor the initial crc value
+
+ ; shl tmp1, 4
+ lea rax, [pshufb_shf_table + 16]
+ sub rax, tmp1
+ vmovdqu xmm0, [rax]
+ vpxor xmm0, [mask1]
+
+ vpshufb xmm3, xmm0
+ jmp _128_done
+
+align 16
+_exact_16_left:
+ vmovdqu xmm3, [arg3]
+ vmovdqu [arg2], xmm3
+ vpshufb xmm3, xmm7
+ vpxor xmm3, xmm0 ; xor the initial crc value
+
+ jmp _128_done
+
+_only_less_than_4:
+ cmp arg4, 3
+ jl _only_less_than_3
+
+ ; load 3 Bytes
+ mov al, [arg3]
+ mov [arg2], al
+ mov [r11], al
+
+ mov al, [arg3+1]
+ mov [arg2+1], al
+ mov [r11+1], al
+
+ mov al, [arg3+2]
+ mov [arg2+2], al
+ mov [r11+2], al
+
+ vmovdqa xmm3, [rsp]
+ vpshufb xmm3, xmm7
+ vpxor xmm3, xmm0 ; xor the initial crc value
+
+ vpsrldq xmm3, 5
+
+ jmp _barrett
+_only_less_than_3:
+ cmp arg4, 2
+ jl _only_less_than_2
+
+ ; load 2 Bytes
+ mov al, [arg3]
+ mov [arg2], al
+ mov [r11], al
+
+ mov al, [arg3+1]
+ mov [arg2+1], al
+ mov [r11+1], al
+
+ vmovdqa xmm3, [rsp]
+ vpshufb xmm3, xmm7
+ vpxor xmm3, xmm0 ; xor the initial crc value
+
+ vpsrldq xmm3, 6
+
+ jmp _barrett
+_only_less_than_2:
+
+ ; load 1 Byte
+ mov al, [arg3]
+ mov [arg2],al
+ mov [r11], al
+
+ vmovdqa xmm3, [rsp]
+ vpshufb xmm3, xmm7
+ vpxor xmm3, xmm0 ; xor the initial crc value
+
+ vpsrldq xmm3, 7
+
+ jmp _barrett
+
+section .data
+
+; precomputed constants
+; these constants are precomputed from the poly: 0x8bb70000 (0x8bb7 scaled to 32 bits)
+align 16
+; Q = 0x18BB70000
+; rk1 = 2^(32*3) mod Q << 32
+; rk2 = 2^(32*5) mod Q << 32
+; rk3 = 2^(32*15) mod Q << 32
+; rk4 = 2^(32*17) mod Q << 32
+; rk5 = 2^(32*3) mod Q << 32
+; rk6 = 2^(32*2) mod Q << 32
+; rk7 = floor(2^64/Q)
+; rk8 = Q
+rk1:
+DQ 0x2d56000000000000
+rk2:
+DQ 0x06df000000000000
+rk3:
+DQ 0x044c000000000000
+rk4:
+DQ 0xe658000000000000
+rk5:
+DQ 0x2d56000000000000
+rk6:
+DQ 0x1368000000000000
+rk7:
+DQ 0x00000001f65a57f8
+rk8:
+DQ 0x000000018bb70000
+mask1:
+dq 0x8080808080808080, 0x8080808080808080
+mask2:
+dq 0xFFFFFFFFFFFFFFFF, 0x00000000FFFFFFFF
+
+SHUF_MASK:
+dq 0x08090A0B0C0D0E0F, 0x0001020304050607
+
+pshufb_shf_table:
+; use these values for shift constants for the pshufb instruction
+; different alignments result in values as shown:
+; dq 0x8887868584838281, 0x008f8e8d8c8b8a89 ; shl 15 (16-1) / shr1
+; dq 0x8988878685848382, 0x01008f8e8d8c8b8a ; shl 14 (16-3) / shr2
+; dq 0x8a89888786858483, 0x0201008f8e8d8c8b ; shl 13 (16-4) / shr3
+; dq 0x8b8a898887868584, 0x030201008f8e8d8c ; shl 12 (16-4) / shr4
+; dq 0x8c8b8a8988878685, 0x04030201008f8e8d ; shl 11 (16-5) / shr5
+; dq 0x8d8c8b8a89888786, 0x0504030201008f8e ; shl 10 (16-6) / shr6
+; dq 0x8e8d8c8b8a898887, 0x060504030201008f ; shl 9 (16-7) / shr7
+; dq 0x8f8e8d8c8b8a8988, 0x0706050403020100 ; shl 8 (16-8) / shr8
+; dq 0x008f8e8d8c8b8a89, 0x0807060504030201 ; shl 7 (16-9) / shr9
+; dq 0x01008f8e8d8c8b8a, 0x0908070605040302 ; shl 6 (16-10) / shr10
+; dq 0x0201008f8e8d8c8b, 0x0a09080706050403 ; shl 5 (16-11) / shr11
+; dq 0x030201008f8e8d8c, 0x0b0a090807060504 ; shl 4 (16-12) / shr12
+; dq 0x04030201008f8e8d, 0x0c0b0a0908070605 ; shl 3 (16-13) / shr13
+; dq 0x0504030201008f8e, 0x0d0c0b0a09080706 ; shl 2 (16-14) / shr14
+; dq 0x060504030201008f, 0x0e0d0c0b0a090807 ; shl 1 (16-15) / shr15
+dq 0x8786858483828100, 0x8f8e8d8c8b8a8988
+dq 0x0706050403020100, 0x000e0d0c0b0a0908
diff --git a/src/isa-l/crc/crc16_t10dif_copy_perf.c b/src/isa-l/crc/crc16_t10dif_copy_perf.c
new file mode 100644
index 000000000..17cba6bc0
--- /dev/null
+++ b/src/isa-l/crc/crc16_t10dif_copy_perf.c
@@ -0,0 +1,84 @@
+/**********************************************************************
+ Copyright(c) 2011-2017 Intel Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdint.h>
+#include <sys/time.h>
+#include "crc.h"
+#include "test.h"
+
+//#define CACHED_TEST
+#ifdef CACHED_TEST
+// Cached test, loop many times over small dataset
+# define TEST_LEN 8*1024
+# define TEST_TYPE_STR "_warm"
+#else
+// Uncached test. Pull from large mem base.
+# define GT_L3_CACHE 32*1024*1024 /* some number > last level cache */
+# define TEST_LEN (2 * GT_L3_CACHE)
+# define TEST_TYPE_STR "_cold"
+#endif
+
+#ifndef TEST_SEED
+# define TEST_SEED 0x1234
+#endif
+
+#define TEST_MEM TEST_LEN
+
+int main(int argc, char *argv[])
+{
+ void *src, *dst;
+ uint16_t crc;
+ struct perf start;
+
+ printf("crc16_t10dif_copy_perf:\n");
+
+ if (posix_memalign(&src, 1024, TEST_LEN)) {
+ printf("alloc error: Fail");
+ return -1;
+ }
+ if (posix_memalign(&dst, 1024, TEST_LEN)) {
+ printf("alloc error: Fail");
+ return -1;
+ }
+
+ printf("Start timed tests\n");
+ fflush(0);
+
+ memset(src, 0, TEST_LEN);
+ BENCHMARK(&start, BENCHMARK_TIME, crc =
+ crc16_t10dif_copy(TEST_SEED, dst, src, TEST_LEN));
+ printf("crc16_t10dif_copy" TEST_TYPE_STR ": ");
+ perf_print(start, (long long)TEST_LEN);
+
+ printf("finish 0x%x\n", crc);
+ return 0;
+}
diff --git a/src/isa-l/crc/crc16_t10dif_copy_test.c b/src/isa-l/crc/crc16_t10dif_copy_test.c
new file mode 100644
index 000000000..4c398c429
--- /dev/null
+++ b/src/isa-l/crc/crc16_t10dif_copy_test.c
@@ -0,0 +1,175 @@
+/**********************************************************************
+ Copyright(c) 2011-2017 Intel Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+#include <stdio.h>
+#include <string.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <assert.h>
+#include "crc.h"
+#include "crc_ref.h"
+
+#ifndef RANDOMS
+# define RANDOMS 20
+#endif
+#ifndef TEST_SEED
+# define TEST_SEED 0x1234
+#endif
+
+#define MAX_BUF 2345
+#define TEST_SIZE 217
+#define TEST_LEN (8 * 1024)
+
+typedef uint16_t u16;
+typedef uint8_t u8;
+
+// bitwise crc version
+uint16_t crc16_t10dif_copy_ref(uint16_t seed, uint8_t * dst, uint8_t * src, uint64_t len);
+
+void rand_buffer(unsigned char *buf, long buffer_size)
+{
+ long i;
+ for (i = 0; i < buffer_size; i++)
+ buf[i] = rand();
+}
+
+int memtst(unsigned char *buf, unsigned char c, int len)
+{
+ int i;
+ for (i = 0; i < len; i++)
+ if (*buf++ != c)
+ return 1;
+
+ return 0;
+}
+
+int crc_copy_check(const char *description, u8 * dst, u8 * src, u8 dst_fill_val, int len,
+ int tot)
+{
+ u16 seed;
+ int rem;
+
+ assert(tot >= len);
+ seed = rand();
+ rem = tot - len;
+ memset(dst, dst_fill_val, tot);
+
+ // multi-binary crc version
+ u16 crc_dut = crc16_t10dif_copy(seed, dst, src, len);
+ u16 crc_ref = crc16_t10dif(seed, src, len);
+ if (crc_dut != crc_ref) {
+ printf("%s, crc gen fail: 0x%4x 0x%4x len=%d\n", description, crc_dut,
+ crc_ref, len);
+ return 1;
+ } else if (memcmp(dst, src, len)) {
+ printf("%s, copy fail: len=%d\n", description, len);
+ return 1;
+ } else if (memtst(&dst[len], dst_fill_val, rem)) {
+ printf("%s, writeover fail: len=%d\n", description, len);
+ return 1;
+ }
+ // bitwise crc version
+ crc_dut = crc16_t10dif_copy_ref(seed, dst, src, len);
+ crc_ref = crc16_t10dif_ref(seed, src, len);
+ if (crc_dut != crc_ref) {
+ printf("%s, crc gen fail (table-driven): 0x%4x 0x%4x len=%d\n", description,
+ crc_dut, crc_ref, len);
+ return 1;
+ } else if (memcmp(dst, src, len)) {
+ printf("%s, copy fail (table driven): len=%d\n", description, len);
+ return 1;
+ } else if (memtst(&dst[len], dst_fill_val, rem)) {
+ printf("%s, writeover fail (table driven): len=%d\n", description, len);
+ return 1;
+ }
+ return 0;
+}
+
+int main(int argc, char *argv[])
+{
+ int r = 0;
+ int i;
+ int len, tot;
+ u8 *src_raw, *dst_raw;
+ u8 *src, *dst;
+
+ printf("Test crc16_t10dif_copy_test:\n");
+ src_raw = (u8 *) malloc(TEST_LEN);
+ dst_raw = (u8 *) malloc(TEST_LEN);
+ if (NULL == src_raw || NULL == dst_raw) {
+ printf("alloc error: Fail");
+ return -1;
+ }
+ src = src_raw;
+ dst = dst_raw;
+
+ srand(TEST_SEED);
+
+ // Test of all zeros
+ memset(src, 0, TEST_LEN);
+ r |= crc_copy_check("zero tst", dst, src, 0x5e, MAX_BUF, TEST_LEN);
+
+ // Another simple test pattern
+ memset(src, 0xff, TEST_LEN);
+ r |= crc_copy_check("simp tst", dst, src, 0x5e, MAX_BUF, TEST_LEN);
+
+ // Do a few short len random data tests
+ rand_buffer(src, TEST_LEN);
+ rand_buffer(dst, TEST_LEN);
+ for (i = 0; i < MAX_BUF; i++) {
+ r |= crc_copy_check("short len", dst, src, rand(), i, MAX_BUF);
+ }
+ printf(".");
+
+ // Do a few longer tests, random data
+ for (i = TEST_LEN; i >= (TEST_LEN - TEST_SIZE); i--) {
+ r |= crc_copy_check("long len", dst, src, rand(), i, TEST_LEN);
+ }
+ printf(".");
+
+ // Do random size, random data
+ for (i = 0; i < RANDOMS; i++) {
+ len = rand() % TEST_LEN;
+ r |= crc_copy_check("rand len", dst, src, rand(), len, TEST_LEN);
+ }
+ printf(".");
+
+ // Run tests at end of buffer
+ for (i = 0; i < RANDOMS; i++) {
+ len = rand() % TEST_LEN;
+ src = &src_raw[TEST_LEN - len - 1];
+ dst = &dst_raw[TEST_LEN - len - 1];
+ tot = len;
+ r |= crc_copy_check("end of buffer", dst, src, rand(), len, tot);
+ }
+ printf(".");
+
+ printf("Test done: %s\n", r ? "Fail" : "Pass");
+ return r;
+}
diff --git a/src/isa-l/crc/crc16_t10dif_op_perf.c b/src/isa-l/crc/crc16_t10dif_op_perf.c
new file mode 100644
index 000000000..9b91ef39d
--- /dev/null
+++ b/src/isa-l/crc/crc16_t10dif_op_perf.c
@@ -0,0 +1,116 @@
+/**********************************************************************
+ Copyright(c) 2011-2017 Intel Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdint.h>
+#include "crc.h"
+#include "test.h"
+
+#define BLKSIZE (512)
+
+//#define CACHED_TEST
+#ifdef CACHED_TEST
+// Cached test, loop many times over small dataset
+# define NBLOCKS 100
+# define TEST_TYPE_STR "_warm"
+#else
+// Uncached test. Pull from large mem base.
+# define GT_L3_CACHE 32*1024*1024 /* some number > last level cache */
+# define TEST_LEN (2 * GT_L3_CACHE)
+# define NBLOCKS (TEST_LEN / BLKSIZE)
+# define TEST_TYPE_STR "_cold"
+#endif
+
+#ifndef TEST_SEED
+# define TEST_SEED 0x1234
+#endif
+
+struct blk {
+ uint8_t data[BLKSIZE];
+};
+
+struct blk_ext {
+ uint8_t data[BLKSIZE];
+ uint32_t tag;
+ uint16_t meta;
+ uint16_t crc;
+};
+
+void crc16_t10dif_copy_perf(struct blk *blks, struct blk *blkp, struct blk_ext *blks_ext,
+ struct blk_ext *blkp_ext, uint16_t * crc)
+{
+ int i;
+ for (i = 0, blkp = blks, blkp_ext = blks_ext; i < NBLOCKS; i++) {
+ *crc = crc16_t10dif_copy(TEST_SEED, blkp_ext->data, blkp->data,
+ sizeof(blks->data));
+ blkp_ext->crc = *crc;
+ blkp++;
+ blkp_ext++;
+ }
+}
+
+int main(int argc, char *argv[])
+{
+ uint16_t crc;
+ struct blk *blks, *blkp;
+ struct blk_ext *blks_ext, *blkp_ext;
+ struct perf start;
+
+ printf("crc16_t10dif_streaming_insert_perf:\n");
+
+ if (posix_memalign((void *)&blks, 1024, NBLOCKS * sizeof(*blks))) {
+ printf("alloc error: Fail");
+ return -1;
+ }
+ if (posix_memalign((void *)&blks_ext, 1024, NBLOCKS * sizeof(*blks_ext))) {
+ printf("alloc error: Fail");
+ return -1;
+ }
+
+ printf(" size blk: %ld, blk_ext: %ld, blk data: %ld, stream: %ld\n",
+ sizeof(*blks), sizeof(*blks_ext), sizeof(blks->data),
+ NBLOCKS * sizeof(blks->data));
+ memset(blks, 0xe5, NBLOCKS * sizeof(*blks));
+ memset(blks_ext, 0xe5, NBLOCKS * sizeof(*blks_ext));
+
+ printf("Start timed tests\n");
+ fflush(0);
+
+ // Copy and insert test
+ BENCHMARK(&start, BENCHMARK_TIME,
+ crc16_t10dif_copy_perf(blks, blkp, blks_ext, blkp_ext, &crc));
+
+ printf("crc16_t10pi_op_copy_insert" TEST_TYPE_STR ": ");
+ perf_print(start, (long long)sizeof(blks->data) * NBLOCKS);
+
+ printf("finish 0x%x\n", crc);
+ return 0;
+}
diff --git a/src/isa-l/crc/crc16_t10dif_perf.c b/src/isa-l/crc/crc16_t10dif_perf.c
new file mode 100644
index 000000000..7b7c0bcd9
--- /dev/null
+++ b/src/isa-l/crc/crc16_t10dif_perf.c
@@ -0,0 +1,79 @@
+/**********************************************************************
+ Copyright(c) 2011-2015 Intel Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdint.h>
+#include <sys/time.h>
+#include "crc.h"
+#include "test.h"
+
+//#define CACHED_TEST
+#ifdef CACHED_TEST
+// Cached test, loop many times over small dataset
+# define TEST_LEN 8*1024
+# define TEST_TYPE_STR "_warm"
+#else
+// Uncached test. Pull from large mem base.
+# define GT_L3_CACHE 32*1024*1024 /* some number > last level cache */
+# define TEST_LEN (2 * GT_L3_CACHE)
+# define TEST_TYPE_STR "_cold"
+#endif
+
+#ifndef TEST_SEED
+# define TEST_SEED 0x1234
+#endif
+
+#define TEST_MEM TEST_LEN
+
+int main(int argc, char *argv[])
+{
+ void *buf;
+ uint16_t crc;
+ struct perf start;
+
+ printf("crc16_t10dif_perf:\n");
+
+ if (posix_memalign(&buf, 1024, TEST_LEN)) {
+ printf("alloc error: Fail");
+ return -1;
+ }
+
+ printf("Start timed tests\n");
+ fflush(0);
+
+ memset(buf, 0, TEST_LEN);
+ BENCHMARK(&start, BENCHMARK_TIME, crc = crc16_t10dif(TEST_SEED, buf, TEST_LEN));
+ printf("crc16_t10dif" TEST_TYPE_STR ": ");
+ perf_print(start, (long long)TEST_LEN);
+
+ printf("finish 0x%x\n", crc);
+ return 0;
+}
diff --git a/src/isa-l/crc/crc16_t10dif_test.c b/src/isa-l/crc/crc16_t10dif_test.c
new file mode 100644
index 000000000..ceb9aab45
--- /dev/null
+++ b/src/isa-l/crc/crc16_t10dif_test.c
@@ -0,0 +1,179 @@
+/**********************************************************************
+ Copyright(c) 2011-2015 Intel Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+#include <stdio.h>
+#include <string.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include "crc.h"
+#include "types.h"
+#include "crc_ref.h"
+
+#ifndef TEST_SEED
+# define TEST_SEED 0x1234
+#endif
+
+#define MAX_BUF 4096
+#define TEST_SIZE 20
+
+typedef uint32_t u32;
+typedef uint16_t u16;
+typedef uint8_t u8;
+
+uint16_t crc16_t10dif_ref(uint16_t seed, uint8_t * buf, uint64_t len);
+
+void rand_buffer(unsigned char *buf, long buffer_size)
+{
+ long i;
+ for (i = 0; i < buffer_size; i++)
+ buf[i] = rand();
+}
+
+int main(int argc, char *argv[])
+{
+ int fail = 0;
+ u32 r = 0;
+ int verbose = argc - 1;
+ int i, s;
+ void *buf_raw;
+ unsigned char *buf;
+
+ printf("Test crc16_t10dif_test ");
+ if (posix_memalign(&buf_raw, 32, MAX_BUF * TEST_SIZE)) {
+ printf("alloc error: Fail");
+ return -1;
+ }
+ buf = (unsigned char *)buf_raw;
+
+ srand(TEST_SEED);
+
+ // Test of all zeros
+ memset(buf, 0, MAX_BUF * 10);
+ u16 crc_ref = crc16_t10dif_ref(TEST_SEED, buf, MAX_BUF);
+ u16 crc_base = crc16_t10dif_base(TEST_SEED, buf, MAX_BUF);
+ u16 crc = crc16_t10dif(TEST_SEED, buf, MAX_BUF);
+ if ((crc_base != crc_ref) || (crc != crc_ref)) {
+ fail++;
+ printf("\n opt ref\n");
+ printf(" ------ ------\n");
+ printf("crc zero = 0x%4x 0x%4x 0x%4x \n", crc_ref, crc_base, crc);
+ } else
+ printf(".");
+
+ // Another simple test pattern
+ memset(buf, 0x8a, MAX_BUF);
+ crc_ref = crc16_t10dif_ref(TEST_SEED, buf, MAX_BUF);
+ crc_base = crc16_t10dif_base(TEST_SEED, buf, MAX_BUF);
+ crc = crc16_t10dif(TEST_SEED, buf, MAX_BUF);
+ if ((crc_base != crc_ref) || (crc != crc_ref)) {
+ fail++;
+ printf("crc all 8a = 0x%4x 0x%4x 0x%4x\n", crc_ref, crc_base, crc);
+ } else
+ printf(".");
+
+ // Do a few random tests
+
+ rand_buffer(buf, MAX_BUF * TEST_SIZE);
+
+ for (i = 0; i < TEST_SIZE; i++) {
+ crc_ref = crc16_t10dif_ref(TEST_SEED, buf, MAX_BUF);
+ crc_base = crc16_t10dif_base(TEST_SEED, buf, MAX_BUF);
+ crc = crc16_t10dif(TEST_SEED, buf, MAX_BUF);
+ if ((crc_base != crc_ref) || (crc != crc_ref))
+ fail++;
+ if (verbose)
+ printf("crc rand%3d = 0x%4x 0x%4x 0x%4x\n", i, crc_ref, crc_base, crc);
+ else if (i % (TEST_SIZE / 8) == 0)
+ printf(".");
+ buf += MAX_BUF;
+ }
+
+ // Do a few random sizes
+ buf = (unsigned char *)buf_raw; //reset buf
+ r = rand();
+
+ for (i = MAX_BUF; i >= 0; i--) {
+ crc_ref = crc16_t10dif_ref(r, buf, i);
+ crc_base = crc16_t10dif_base(r, buf, i);
+ crc = crc16_t10dif(r, buf, i);
+ if ((crc_base != crc_ref) || (crc != crc_ref)) {
+ fail++;
+ printf("fail random size%i 0x%8x 0x%8x 0x%8x\n", i, crc_ref, crc_base,
+ crc);
+ } else if (i % (MAX_BUF / 8) == 0)
+ printf(".");
+ }
+
+ // Try different seeds
+ for (s = 0; s < 20; s++) {
+ buf = (unsigned char *)buf_raw; //reset buf
+
+ r = rand(); // just to get a new seed
+ rand_buffer(buf, MAX_BUF * TEST_SIZE); // new pseudo-rand data
+
+ if (verbose)
+ printf("seed = 0x%x\n", r);
+
+ for (i = 0; i < TEST_SIZE; i++) {
+ crc_ref = crc16_t10dif_ref(r, buf, MAX_BUF);
+ crc_base = crc16_t10dif_base(r, buf, MAX_BUF);
+ crc = crc16_t10dif(r, buf, MAX_BUF);
+ if ((crc_base != crc_ref) || (crc != crc_ref))
+ fail++;
+ if (verbose)
+ printf("crc rand%3d = 0x%4x 0x%4x 0x%4x\n", i, crc_ref,
+ crc_base, crc);
+ else if (i % (TEST_SIZE * 20 / 8) == 0)
+ printf(".");
+ buf += MAX_BUF;
+ }
+ }
+
+ // Run tests at end of buffer
+ buf = (unsigned char *)buf_raw; //reset buf
+ buf = buf + ((MAX_BUF - 1) * TEST_SIZE); //Line up TEST_SIZE from end
+ for (i = 0; i < TEST_SIZE; i++) {
+ crc_ref = crc16_t10dif_ref(TEST_SEED, buf + i, TEST_SIZE - i);
+ crc_base = crc16_t10dif_base(TEST_SEED, buf + i, TEST_SIZE - i);
+ crc = crc16_t10dif(TEST_SEED, buf + i, TEST_SIZE - i);
+ if ((crc_base != crc_ref) || (crc != crc_ref))
+ fail++;
+ if (verbose)
+ printf("crc eob rand%3d = 0x%4x 0x%4x 0x%4x\n", i, crc_ref, crc_base,
+ crc);
+ else
+ printf(".");
+ }
+
+ printf("Test done: %s\n", fail ? "Fail" : "Pass");
+ if (fail)
+ printf("\nFailed %d tests\n", fail);
+
+ return fail;
+}
diff --git a/src/isa-l/crc/crc32_funcs_test.c b/src/isa-l/crc/crc32_funcs_test.c
new file mode 100644
index 000000000..e28da4018
--- /dev/null
+++ b/src/isa-l/crc/crc32_funcs_test.c
@@ -0,0 +1,324 @@
+/**********************************************************************
+ Copyright(c) 2011-2018 Intel Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdint.h>
+#include "crc.h"
+#include "types.h"
+#include "crc_ref.h"
+
+#ifndef TEST_SEED
+# define TEST_SEED 0x1234
+#endif
+
+#define MAX_BUF 4096
+#define TEST_SIZE 32
+
+typedef uint32_t(*crc32_func_t) (uint32_t, const uint8_t *, uint64_t);
+typedef uint32_t(*crc32_func_t_base) (uint32_t, uint8_t *, uint64_t);
+typedef uint32_t(*crc32_func_t_ref) (uint32_t, uint8_t *, uint64_t);
+
+typedef struct func_case {
+ char *note;
+ crc32_func_t crc32_func_call;
+ crc32_func_t_base crc32_base_call;
+ crc32_func_t_ref crc32_ref_call;
+} func_case_t;
+
+uint32_t crc32_iscsi_wrap(uint32_t seed, const uint8_t * buf, uint64_t len)
+{
+ return crc32_iscsi((uint8_t *) buf, len, seed);
+}
+
+uint32_t crc32_iscsi_base_wrap(uint32_t seed, uint8_t * buf, uint64_t len)
+{
+ return crc32_iscsi_base(buf, len, seed);
+}
+
+uint32_t crc32_iscsi_ref_wrap(uint32_t seed, uint8_t * buf, uint64_t len)
+{
+ return crc32_iscsi_ref(buf, len, seed);
+}
+
+func_case_t test_funcs[] = {
+ {"crc32_ieee", crc32_ieee, crc32_ieee_base, crc32_ieee_ref}
+ ,
+ {"crc32_gzip_refl", crc32_gzip_refl, crc32_gzip_refl_base, crc32_gzip_refl_ref}
+ ,
+ {"crc32_iscsi", crc32_iscsi_wrap, crc32_iscsi_base_wrap, crc32_iscsi_ref_wrap}
+};
+
+// Generates pseudo-random data
+
+void rand_buffer(unsigned char *buf, long buffer_size)
+{
+ long i;
+ for (i = 0; i < buffer_size; i++)
+ buf[i] = rand();
+}
+
+// Test cases
+int zeros_test(func_case_t * test_func);
+
+int simple_pattern_test(func_case_t * test_func);
+
+int seeds_sizes_test(func_case_t * test_func);
+
+int eob_test(func_case_t * test_func);
+
+int update_test(func_case_t * test_func);
+
+int verbose = 0;
+void *buf_alloc = NULL;
+
+int main(int argc, char *argv[])
+{
+ int fail = 0, fail_case;
+ int i, ret;
+ func_case_t *test_func;
+
+ verbose = argc - 1;
+
+ // Align to TEST_SIZE boundary
+ ret = posix_memalign(&buf_alloc, TEST_SIZE, MAX_BUF * TEST_SIZE);
+ if (ret) {
+ printf("alloc error: Fail");
+ return -1;
+ }
+ srand(TEST_SEED);
+ printf("CRC32 Tests\n");
+
+ for (i = 0; i < sizeof(test_funcs) / sizeof(test_funcs[0]); i++) {
+ fail_case = 0;
+ test_func = &test_funcs[i];
+
+ printf("Test %s\t", test_func->note);
+ fail_case += zeros_test(test_func);
+ fail_case += simple_pattern_test(test_func);
+ fail_case += seeds_sizes_test(test_func);
+ fail_case += eob_test(test_func);
+ fail_case += update_test(test_func);
+ printf(" done: %s\n", fail_case ? "Fail" : "Pass");
+
+ if (fail_case) {
+ printf("\n%s Failed %d tests\n", test_func->note, fail_case);
+ fail++;
+ }
+ }
+
+ printf("CRC32 Tests all done: %s\n", fail ? "Fail" : "Pass");
+
+ return fail;
+}
+
+// Test of all zeros
+int zeros_test(func_case_t * test_func)
+{
+ uint32_t crc_ref, crc_base, crc;
+ int fail = 0;
+ unsigned char *buf = NULL;
+
+ buf = (unsigned char *)buf_alloc;
+ memset(buf, 0, MAX_BUF * 10);
+ crc_ref = test_func->crc32_ref_call(TEST_SEED, buf, MAX_BUF * 10);
+ crc_base = test_func->crc32_base_call(TEST_SEED, buf, MAX_BUF * 10);
+ crc = test_func->crc32_func_call(TEST_SEED, buf, MAX_BUF * 10);
+
+ if ((crc_base != crc_ref) || (crc != crc_ref)) {
+ fail++;
+ printf("\n opt ref\n");
+ printf(" ------ ------\n");
+ printf("crc zero = 0x%8x 0x%8x 0x%8x\n", crc_ref, crc_base, crc);
+ } else
+ printf(".");
+
+ return fail;
+}
+
+// Another simple test pattern
+int simple_pattern_test(func_case_t * test_func)
+{
+ uint32_t crc_ref, crc_base, crc;
+ int fail = 0;
+ unsigned char *buf = NULL;
+
+ buf = (unsigned char *)buf_alloc;
+ memset(buf, 0x8a, MAX_BUF);
+ crc_ref = test_func->crc32_ref_call(TEST_SEED, buf, MAX_BUF);
+ crc_base = test_func->crc32_base_call(TEST_SEED, buf, MAX_BUF);
+ crc = test_func->crc32_func_call(TEST_SEED, buf, MAX_BUF);
+
+ if ((crc_base != crc_ref) || (crc != crc_ref))
+ fail++;
+ if (verbose)
+ printf("crc all 8a = 0x%8x 0x%8x 0x%8x\n", crc_ref, crc_base, crc);
+ else
+ printf(".");
+
+ return fail;
+}
+
+int seeds_sizes_test(func_case_t * test_func)
+{
+ uint32_t crc_ref, crc_base, crc;
+ int fail = 0;
+ int i;
+ uint64_t r, s;
+ unsigned char *buf = NULL;
+
+ // Do a few random tests
+ buf = (unsigned char *)buf_alloc; //reset buf
+ r = rand();
+ rand_buffer(buf, MAX_BUF * TEST_SIZE);
+
+ for (i = 0; i < TEST_SIZE; i++) {
+ crc_ref = test_func->crc32_ref_call(r, buf, MAX_BUF);
+ crc_base = test_func->crc32_base_call(r, buf, MAX_BUF);
+ crc = test_func->crc32_func_call(r, buf, MAX_BUF);
+
+ if ((crc_base != crc_ref) || (crc != crc_ref))
+ fail++;
+ if (verbose)
+ printf("crc rand%3d = 0x%8x 0x%8x 0x%8x\n", i, crc_ref, crc_base, crc);
+ else if (i % (TEST_SIZE / 8) == 0)
+ printf(".");
+ buf += MAX_BUF;
+ }
+
+ // Do a few random sizes
+ buf = (unsigned char *)buf_alloc; //reset buf
+ r = rand();
+
+ for (i = MAX_BUF; i >= 0; i--) {
+ crc_ref = test_func->crc32_ref_call(r, buf, i);
+ crc_base = test_func->crc32_base_call(r, buf, i);
+ crc = test_func->crc32_func_call(r, buf, i);
+
+ if ((crc_base != crc_ref) || (crc != crc_ref)) {
+ fail++;
+ printf("fail random size%i 0x%8x 0x%8x 0x%8x\n", i, crc_ref, crc_base,
+ crc);
+ } else if (i % (MAX_BUF / 8) == 0)
+ printf(".");
+ }
+
+ // Try different seeds
+ for (s = 0; s < 20; s++) {
+ buf = (unsigned char *)buf_alloc; //reset buf
+
+ r = rand(); // just to get a new seed
+ rand_buffer(buf, MAX_BUF * TEST_SIZE); // new pseudo-rand data
+
+ if (verbose)
+ printf("seed = 0x%lx\n", r);
+
+ for (i = 0; i < TEST_SIZE; i++) {
+ crc_ref = test_func->crc32_ref_call(r, buf, MAX_BUF);
+ crc_base = test_func->crc32_base_call(r, buf, MAX_BUF);
+ crc = test_func->crc32_func_call(r, buf, MAX_BUF);
+
+ if ((crc_base != crc_ref) || (crc != crc_ref))
+ fail++;
+ if (verbose)
+ printf("crc rand%3d = 0x%8x 0x%8x 0x%8x\n", i, crc_ref,
+ crc_base, crc);
+ else if (i % (TEST_SIZE * 20 / 8) == 0)
+ printf(".");
+ buf += MAX_BUF;
+ }
+ }
+
+ return fail;
+}
+
+// Run tests at end of buffer
+int eob_test(func_case_t * test_func)
+{
+ uint32_t crc_ref, crc_base, crc;
+ int fail = 0;
+ int i;
+ unsigned char *buf = NULL;
+
+ // Null test
+ if (0 != test_func->crc32_func_call(0, NULL, 0)) {
+ fail++;
+ printf("crc null test fail\n");
+ }
+
+ buf = (unsigned char *)buf_alloc; //reset buf
+ buf = buf + ((MAX_BUF - 1) * TEST_SIZE); //Line up TEST_SIZE from end
+ for (i = 0; i <= TEST_SIZE; i++) {
+ crc_ref = test_func->crc32_ref_call(TEST_SEED, buf + i, TEST_SIZE - i);
+ crc_base = test_func->crc32_base_call(TEST_SEED, buf + i, TEST_SIZE - i);
+ crc = test_func->crc32_func_call(TEST_SEED, buf + i, TEST_SIZE - i);
+
+ if ((crc_base != crc_ref) || (crc != crc_ref))
+ fail++;
+ if (verbose)
+ printf("crc eob rand%3d = 0x%8x 0x%8x 0x%8x\n", i, crc_ref, crc_base,
+ crc);
+ else if (i % (TEST_SIZE / 8) == 0)
+ printf(".");
+ }
+
+ return fail;
+}
+
+int update_test(func_case_t * test_func)
+{
+ uint32_t crc_ref, crc_base, crc;
+ int fail = 0;
+ int i;
+ uint64_t r;
+ unsigned char *buf = NULL;
+
+ buf = (unsigned char *)buf_alloc; //reset buf
+ r = rand();
+ // Process the whole buf with reference func single call.
+ crc_ref = test_func->crc32_ref_call(r, buf, MAX_BUF * TEST_SIZE);
+ crc_base = test_func->crc32_base_call(r, buf, MAX_BUF * TEST_SIZE);
+ // Process buf with update method.
+ for (i = 0; i < TEST_SIZE; i++) {
+ crc = test_func->crc32_func_call(r, buf, MAX_BUF);
+ // Update crc seeds and buf pointer.
+ r = crc;
+ buf += MAX_BUF;
+ }
+
+ if ((crc_base != crc_ref) || (crc != crc_ref))
+ fail++;
+ if (verbose)
+ printf("crc rand%3d = 0x%8x 0x%8x 0x%8x\n", i, crc_ref, crc_base, crc);
+ else
+ printf(".");
+
+ return fail;
+}
diff --git a/src/isa-l/crc/crc32_gzip_refl_by16_10.asm b/src/isa-l/crc/crc32_gzip_refl_by16_10.asm
new file mode 100644
index 000000000..15280b8cf
--- /dev/null
+++ b/src/isa-l/crc/crc32_gzip_refl_by16_10.asm
@@ -0,0 +1,569 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright(c) 2011-2020 Intel Corporation All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in
+; the documentation and/or other materials provided with the
+; distribution.
+; * Neither the name of Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived
+; from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Function API:
+; UINT32 crc32_gzip_refl_by16_10(
+; UINT32 init_crc, //initial CRC value, 32 bits
+; const unsigned char *buf, //buffer pointer to calculate CRC on
+; UINT64 len //buffer length in bytes (64-bit data)
+; );
+;
+; Authors:
+; Erdinc Ozturk
+; Vinodh Gopal
+; James Guilford
+;
+; Reference paper titled "Fast CRC Computation for Generic Polynomials Using PCLMULQDQ Instruction"
+; URL: http://download.intel.com/design/intarch/papers/323102.pdf
+;
+;
+; sample yasm command line:
+; yasm -f x64 -f elf64 -X gnu -g dwarf2 crc32_gzip_refl_by8
+;
+; As explained here:
+; http://docs.oracle.com/javase/7/docs/api/java/util/zip/package-summary.html
+; CRC-32 checksum is described in RFC 1952
+; Implementing RFC 1952 CRC:
+; http://www.ietf.org/rfc/rfc1952.txt
+
+%include "reg_sizes.asm"
+
+%ifndef FUNCTION_NAME
+%define FUNCTION_NAME crc32_gzip_refl_by16_10
+%endif
+
+%if (AS_FEATURE_LEVEL) >= 10
+
+%define fetch_dist 1024
+
+[bits 64]
+default rel
+
+section .text
+
+
+%ifidn __OUTPUT_FORMAT__, win64
+ %xdefine arg1 rcx
+ %xdefine arg2 rdx
+ %xdefine arg3 r8
+
+ %xdefine arg1_low32 ecx
+%else
+ %xdefine arg1 rdi
+ %xdefine arg2 rsi
+ %xdefine arg3 rdx
+
+ %xdefine arg1_low32 edi
+%endif
+
+%define TMP 16*0
+%ifidn __OUTPUT_FORMAT__, win64
+ %define XMM_SAVE 16*2
+ %define VARIABLE_OFFSET 16*12+8
+%else
+ %define VARIABLE_OFFSET 16*2+8
+%endif
+
+align 16
+mk_global FUNCTION_NAME, function
+FUNCTION_NAME:
+ endbranch
+
+ not arg1_low32
+ sub rsp, VARIABLE_OFFSET
+
+%ifidn __OUTPUT_FORMAT__, win64
+ ; push the xmm registers into the stack to maintain
+ vmovdqa [rsp + XMM_SAVE + 16*0], xmm6
+ vmovdqa [rsp + XMM_SAVE + 16*1], xmm7
+ vmovdqa [rsp + XMM_SAVE + 16*2], xmm8
+ vmovdqa [rsp + XMM_SAVE + 16*3], xmm9
+ vmovdqa [rsp + XMM_SAVE + 16*4], xmm10
+ vmovdqa [rsp + XMM_SAVE + 16*5], xmm11
+ vmovdqa [rsp + XMM_SAVE + 16*6], xmm12
+ vmovdqa [rsp + XMM_SAVE + 16*7], xmm13
+ vmovdqa [rsp + XMM_SAVE + 16*8], xmm14
+ vmovdqa [rsp + XMM_SAVE + 16*9], xmm15
+%endif
+
+ ; check if smaller than 256B
+ cmp arg3, 256
+ jl .less_than_256
+
+ ; load the initial crc value
+ vmovd xmm10, arg1_low32 ; initial crc
+
+ ; receive the initial 64B data, xor the initial crc value
+ vmovdqu8 zmm0, [arg2+16*0]
+ vmovdqu8 zmm4, [arg2+16*4]
+ vpxorq zmm0, zmm10
+ vbroadcasti32x4 zmm10, [rk3] ;xmm10 has rk3 and rk4
+ ;imm value of pclmulqdq instruction will determine which constant to use
+
+ sub arg3, 256
+ cmp arg3, 256
+ jl .fold_128_B_loop
+
+ vmovdqu8 zmm7, [arg2+16*8]
+ vmovdqu8 zmm8, [arg2+16*12]
+ vbroadcasti32x4 zmm16, [rk_1] ;zmm16 has rk-1 and rk-2
+ sub arg3, 256
+
+.fold_256_B_loop:
+ add arg2, 256
+ vmovdqu8 zmm3, [arg2+16*0]
+ vpclmulqdq zmm1, zmm0, zmm16, 0x10
+ vpclmulqdq zmm2, zmm0, zmm16, 0x01
+ vpxorq zmm0, zmm1, zmm2
+ vpxorq zmm0, zmm0, zmm3
+
+ vmovdqu8 zmm9, [arg2+16*4]
+ vpclmulqdq zmm5, zmm4, zmm16, 0x10
+ vpclmulqdq zmm6, zmm4, zmm16, 0x01
+ vpxorq zmm4, zmm5, zmm6
+ vpxorq zmm4, zmm4, zmm9
+
+ vmovdqu8 zmm11, [arg2+16*8]
+ vpclmulqdq zmm12, zmm7, zmm16, 0x10
+ vpclmulqdq zmm13, zmm7, zmm16, 0x01
+ vpxorq zmm7, zmm12, zmm13
+ vpxorq zmm7, zmm7, zmm11
+
+ vmovdqu8 zmm17, [arg2+16*12]
+ vpclmulqdq zmm14, zmm8, zmm16, 0x10
+ vpclmulqdq zmm15, zmm8, zmm16, 0x01
+ vpxorq zmm8, zmm14, zmm15
+ vpxorq zmm8, zmm8, zmm17
+
+ sub arg3, 256
+ jge .fold_256_B_loop
+
+ ;; Fold 256 into 128
+ add arg2, 256
+ vpclmulqdq zmm1, zmm0, zmm10, 0x01
+ vpclmulqdq zmm2, zmm0, zmm10, 0x10
+ vpternlogq zmm7, zmm1, zmm2, 0x96 ; xor ABC
+
+ vpclmulqdq zmm5, zmm4, zmm10, 0x01
+ vpclmulqdq zmm6, zmm4, zmm10, 0x10
+ vpternlogq zmm8, zmm5, zmm6, 0x96 ; xor ABC
+
+ vmovdqa32 zmm0, zmm7
+ vmovdqa32 zmm4, zmm8
+
+ add arg3, 128
+ jmp .fold_128_B_register
+
+
+
+ ; at this section of the code, there is 128*x+y (0<=y<128) bytes of buffer. The fold_128_B_loop
+ ; loop will fold 128B at a time until we have 128+y Bytes of buffer
+
+ ; fold 128B at a time. This section of the code folds 8 xmm registers in parallel
+.fold_128_B_loop:
+ add arg2, 128
+ vmovdqu8 zmm8, [arg2+16*0]
+ vpclmulqdq zmm2, zmm0, zmm10, 0x10
+ vpclmulqdq zmm1, zmm0, zmm10, 0x01
+ vpxorq zmm0, zmm2, zmm1
+ vpxorq zmm0, zmm0, zmm8
+
+ vmovdqu8 zmm9, [arg2+16*4]
+ vpclmulqdq zmm5, zmm4, zmm10, 0x10
+ vpclmulqdq zmm6, zmm4, zmm10, 0x01
+ vpxorq zmm4, zmm5, zmm6
+ vpxorq zmm4, zmm4, zmm9
+
+ sub arg3, 128
+ jge .fold_128_B_loop
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+ add arg2, 128
+ ; at this point, the buffer pointer is pointing at the last y Bytes of the buffer, where 0 <= y < 128
+ ; the 128B of folded data is in 8 of the xmm registers: xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7
+
+.fold_128_B_register:
+ ; fold the 8 128b parts into 1 xmm register with different constants
+ vmovdqu8 zmm16, [rk9] ; multiply by rk9-rk16
+ vmovdqu8 zmm11, [rk17] ; multiply by rk17-rk20, rk1,rk2, 0,0
+ vpclmulqdq zmm1, zmm0, zmm16, 0x01
+ vpclmulqdq zmm2, zmm0, zmm16, 0x10
+ vextracti64x2 xmm7, zmm4, 3 ; save last that has no multiplicand
+
+ vpclmulqdq zmm5, zmm4, zmm11, 0x01
+ vpclmulqdq zmm6, zmm4, zmm11, 0x10
+ vmovdqa xmm10, [rk1] ; Needed later in reduction loop
+ vpternlogq zmm1, zmm2, zmm5, 0x96 ; xor ABC
+ vpternlogq zmm1, zmm6, zmm7, 0x96 ; xor ABC
+
+ vshufi64x2 zmm8, zmm1, zmm1, 0x4e ; Swap 1,0,3,2 - 01 00 11 10
+ vpxorq ymm8, ymm8, ymm1
+ vextracti64x2 xmm5, ymm8, 1
+ vpxorq xmm7, xmm5, xmm8
+
+ ; instead of 128, we add 128-16 to the loop counter to save 1 instruction from the loop
+ ; instead of a cmp instruction, we use the negative flag with the jl instruction
+ add arg3, 128-16
+ jl .final_reduction_for_128
+
+ ; now we have 16+y bytes left to reduce. 16 Bytes is in register xmm7 and the rest is in memory
+ ; we can fold 16 bytes at a time if y>=16
+ ; continue folding 16B at a time
+
+.16B_reduction_loop:
+ vpclmulqdq xmm8, xmm7, xmm10, 0x1
+ vpclmulqdq xmm7, xmm7, xmm10, 0x10
+ vpxor xmm7, xmm8
+ vmovdqu xmm0, [arg2]
+ vpxor xmm7, xmm0
+ add arg2, 16
+ sub arg3, 16
+ ; instead of a cmp instruction, we utilize the flags with the jge instruction
+ ; equivalent of: cmp arg3, 16-16
+ ; check if there is any more 16B in the buffer to be able to fold
+ jge .16B_reduction_loop
+
+ ;now we have 16+z bytes left to reduce, where 0<= z < 16.
+ ;first, we reduce the data in the xmm7 register
+
+
+.final_reduction_for_128:
+ add arg3, 16
+ je .128_done
+
+ ; here we are getting data that is less than 16 bytes.
+ ; since we know that there was data before the pointer, we can offset
+ ; the input pointer before the actual point, to receive exactly 16 bytes.
+ ; after that the registers need to be adjusted.
+.get_last_two_xmms:
+
+ vmovdqa xmm2, xmm7
+ vmovdqu xmm1, [arg2 - 16 + arg3]
+
+ ; get rid of the extra data that was loaded before
+ ; load the shift constant
+ lea rax, [pshufb_shf_table]
+ add rax, arg3
+ vmovdqu xmm0, [rax]
+
+ vpshufb xmm7, xmm0
+ vpxor xmm0, [mask3]
+ vpshufb xmm2, xmm0
+
+ vpblendvb xmm2, xmm2, xmm1, xmm0
+ ;;;;;;;;;;
+ vpclmulqdq xmm8, xmm7, xmm10, 0x1
+ vpclmulqdq xmm7, xmm7, xmm10, 0x10
+ vpxor xmm7, xmm8
+ vpxor xmm7, xmm2
+
+.128_done:
+ ; compute crc of a 128-bit value
+ vmovdqa xmm10, [rk5]
+ vmovdqa xmm0, xmm7
+
+ ;64b fold
+ vpclmulqdq xmm7, xmm10, 0
+ vpsrldq xmm0, 8
+ vpxor xmm7, xmm0
+
+ ;32b fold
+ vmovdqa xmm0, xmm7
+ vpslldq xmm7, 4
+ vpclmulqdq xmm7, xmm10, 0x10
+ vpxor xmm7, xmm0
+
+
+ ;barrett reduction
+.barrett:
+ vpand xmm7, [mask2]
+ vmovdqa xmm1, xmm7
+ vmovdqa xmm2, xmm7
+ vmovdqa xmm10, [rk7]
+
+ vpclmulqdq xmm7, xmm10, 0
+ vpxor xmm7, xmm2
+ vpand xmm7, [mask]
+ vmovdqa xmm2, xmm7
+ vpclmulqdq xmm7, xmm10, 0x10
+ vpxor xmm7, xmm2
+ vpxor xmm7, xmm1
+ vpextrd eax, xmm7, 2
+
+.cleanup:
+ not eax
+
+
+%ifidn __OUTPUT_FORMAT__, win64
+ vmovdqa xmm6, [rsp + XMM_SAVE + 16*0]
+ vmovdqa xmm7, [rsp + XMM_SAVE + 16*1]
+ vmovdqa xmm8, [rsp + XMM_SAVE + 16*2]
+ vmovdqa xmm9, [rsp + XMM_SAVE + 16*3]
+ vmovdqa xmm10, [rsp + XMM_SAVE + 16*4]
+ vmovdqa xmm11, [rsp + XMM_SAVE + 16*5]
+ vmovdqa xmm12, [rsp + XMM_SAVE + 16*6]
+ vmovdqa xmm13, [rsp + XMM_SAVE + 16*7]
+ vmovdqa xmm14, [rsp + XMM_SAVE + 16*8]
+ vmovdqa xmm15, [rsp + XMM_SAVE + 16*9]
+%endif
+ add rsp, VARIABLE_OFFSET
+ ret
+
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+align 16
+.less_than_256:
+
+ ; check if there is enough buffer to be able to fold 16B at a time
+ cmp arg3, 32
+ jl .less_than_32
+
+ ; if there is, load the constants
+ vmovdqa xmm10, [rk1] ; rk1 and rk2 in xmm10
+
+ vmovd xmm0, arg1_low32 ; get the initial crc value
+ vmovdqu xmm7, [arg2] ; load the plaintext
+ vpxor xmm7, xmm0
+
+ ; update the buffer pointer
+ add arg2, 16
+
+ ; update the counter. subtract 32 instead of 16 to save one instruction from the loop
+ sub arg3, 32
+
+ jmp .16B_reduction_loop
+
+
+align 16
+.less_than_32:
+ ; mov initial crc to the return value. this is necessary for zero-length buffers.
+ mov eax, arg1_low32
+ test arg3, arg3
+ je .cleanup
+
+ vmovd xmm0, arg1_low32 ; get the initial crc value
+
+ cmp arg3, 16
+ je .exact_16_left
+ jl .less_than_16_left
+
+ vmovdqu xmm7, [arg2] ; load the plaintext
+ vpxor xmm7, xmm0 ; xor the initial crc value
+ add arg2, 16
+ sub arg3, 16
+ vmovdqa xmm10, [rk1] ; rk1 and rk2 in xmm10
+ jmp .get_last_two_xmms
+
+align 16
+.less_than_16_left:
+ ; use stack space to load data less than 16 bytes, zero-out the 16B in memory first.
+
+ vpxor xmm1, xmm1
+ mov r11, rsp
+ vmovdqa [r11], xmm1
+
+ cmp arg3, 4
+ jl .only_less_than_4
+
+ ; backup the counter value
+ mov r9, arg3
+ cmp arg3, 8
+ jl .less_than_8_left
+
+ ; load 8 Bytes
+ mov rax, [arg2]
+ mov [r11], rax
+ add r11, 8
+ sub arg3, 8
+ add arg2, 8
+.less_than_8_left:
+
+ cmp arg3, 4
+ jl .less_than_4_left
+
+ ; load 4 Bytes
+ mov eax, [arg2]
+ mov [r11], eax
+ add r11, 4
+ sub arg3, 4
+ add arg2, 4
+.less_than_4_left:
+
+ cmp arg3, 2
+ jl .less_than_2_left
+
+ ; load 2 Bytes
+ mov ax, [arg2]
+ mov [r11], ax
+ add r11, 2
+ sub arg3, 2
+ add arg2, 2
+.less_than_2_left:
+ cmp arg3, 1
+ jl .zero_left
+
+ ; load 1 Byte
+ mov al, [arg2]
+ mov [r11], al
+
+.zero_left:
+ vmovdqa xmm7, [rsp]
+ vpxor xmm7, xmm0 ; xor the initial crc value
+
+ lea rax,[pshufb_shf_table]
+ vmovdqu xmm0, [rax + r9]
+ vpshufb xmm7,xmm0
+ jmp .128_done
+
+align 16
+.exact_16_left:
+ vmovdqu xmm7, [arg2]
+ vpxor xmm7, xmm0 ; xor the initial crc value
+ jmp .128_done
+
+.only_less_than_4:
+ cmp arg3, 3
+ jl .only_less_than_3
+
+ ; load 3 Bytes
+ mov al, [arg2]
+ mov [r11], al
+
+ mov al, [arg2+1]
+ mov [r11+1], al
+
+ mov al, [arg2+2]
+ mov [r11+2], al
+
+ vmovdqa xmm7, [rsp]
+ vpxor xmm7, xmm0 ; xor the initial crc value
+
+ vpslldq xmm7, 5
+ jmp .barrett
+
+.only_less_than_3:
+ cmp arg3, 2
+ jl .only_less_than_2
+
+ ; load 2 Bytes
+ mov al, [arg2]
+ mov [r11], al
+
+ mov al, [arg2+1]
+ mov [r11+1], al
+
+ vmovdqa xmm7, [rsp]
+ vpxor xmm7, xmm0 ; xor the initial crc value
+
+ vpslldq xmm7, 6
+ jmp .barrett
+
+.only_less_than_2:
+ ; load 1 Byte
+ mov al, [arg2]
+ mov [r11], al
+
+ vmovdqa xmm7, [rsp]
+ vpxor xmm7, xmm0 ; xor the initial crc value
+
+ vpslldq xmm7, 7
+ jmp .barrett
+
+section .data
+align 32
+
+%ifndef USE_CONSTS
+; precomputed constants
+rk_1: dq 0x00000000e95c1271
+rk_2: dq 0x00000000ce3371cb
+rk1: dq 0x00000000ccaa009e
+rk2: dq 0x00000001751997d0
+rk3: dq 0x000000014a7fe880
+rk4: dq 0x00000001e88ef372
+rk5: dq 0x00000000ccaa009e
+rk6: dq 0x0000000163cd6124
+rk7: dq 0x00000001f7011640
+rk8: dq 0x00000001db710640
+rk9: dq 0x00000001d7cfc6ac
+rk10: dq 0x00000001ea89367e
+rk11: dq 0x000000018cb44e58
+rk12: dq 0x00000000df068dc2
+rk13: dq 0x00000000ae0b5394
+rk14: dq 0x00000001c7569e54
+rk15: dq 0x00000001c6e41596
+rk16: dq 0x0000000154442bd4
+rk17: dq 0x0000000174359406
+rk18: dq 0x000000003db1ecdc
+rk19: dq 0x000000015a546366
+rk20: dq 0x00000000f1da05aa
+
+rk_1b: dq 0x00000000ccaa009e
+rk_2b: dq 0x00000001751997d0
+ dq 0x0000000000000000
+ dq 0x0000000000000000
+%else
+INCLUDE_CONSTS
+%endif
+
+pshufb_shf_table:
+; use these values for shift constants for the pshufb instruction
+; different alignments result in values as shown:
+; dq 0x8887868584838281, 0x008f8e8d8c8b8a89 ; shl 15 (16-1) / shr1
+; dq 0x8988878685848382, 0x01008f8e8d8c8b8a ; shl 14 (16-3) / shr2
+; dq 0x8a89888786858483, 0x0201008f8e8d8c8b ; shl 13 (16-4) / shr3
+; dq 0x8b8a898887868584, 0x030201008f8e8d8c ; shl 12 (16-4) / shr4
+; dq 0x8c8b8a8988878685, 0x04030201008f8e8d ; shl 11 (16-5) / shr5
+; dq 0x8d8c8b8a89888786, 0x0504030201008f8e ; shl 10 (16-6) / shr6
+; dq 0x8e8d8c8b8a898887, 0x060504030201008f ; shl 9 (16-7) / shr7
+; dq 0x8f8e8d8c8b8a8988, 0x0706050403020100 ; shl 8 (16-8) / shr8
+; dq 0x008f8e8d8c8b8a89, 0x0807060504030201 ; shl 7 (16-9) / shr9
+; dq 0x01008f8e8d8c8b8a, 0x0908070605040302 ; shl 6 (16-10) / shr10
+; dq 0x0201008f8e8d8c8b, 0x0a09080706050403 ; shl 5 (16-11) / shr11
+; dq 0x030201008f8e8d8c, 0x0b0a090807060504 ; shl 4 (16-12) / shr12
+; dq 0x04030201008f8e8d, 0x0c0b0a0908070605 ; shl 3 (16-13) / shr13
+; dq 0x0504030201008f8e, 0x0d0c0b0a09080706 ; shl 2 (16-14) / shr14
+; dq 0x060504030201008f, 0x0e0d0c0b0a090807 ; shl 1 (16-15) / shr15
+dq 0x8786858483828100, 0x8f8e8d8c8b8a8988
+dq 0x0706050403020100, 0x000e0d0c0b0a0908
+
+mask: dq 0xFFFFFFFFFFFFFFFF, 0x0000000000000000
+mask2: dq 0xFFFFFFFF00000000, 0xFFFFFFFFFFFFFFFF
+mask3: dq 0x8080808080808080, 0x8080808080808080
+
+%else ; Assembler doesn't understand these opcodes. Add empty symbol for windows.
+%ifidn __OUTPUT_FORMAT__, win64
+global no_ %+ FUNCTION_NAME
+no_ %+ FUNCTION_NAME %+ :
+%endif
+%endif ; (AS_FEATURE_LEVEL) >= 10
diff --git a/src/isa-l/crc/crc32_gzip_refl_by8.asm b/src/isa-l/crc/crc32_gzip_refl_by8.asm
new file mode 100644
index 000000000..43840244a
--- /dev/null
+++ b/src/isa-l/crc/crc32_gzip_refl_by8.asm
@@ -0,0 +1,625 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright(c) 2011-2017 Intel Corporation All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in
+; the documentation and/or other materials provided with the
+; distribution.
+; * Neither the name of Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived
+; from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Function API:
+; UINT32 crc32_gzip_refl_by8(
+; UINT32 init_crc, //initial CRC value, 32 bits
+; const unsigned char *buf, //buffer pointer to calculate CRC on
+; UINT64 len //buffer length in bytes (64-bit data)
+; );
+;
+; Authors:
+; Erdinc Ozturk
+; Vinodh Gopal
+; James Guilford
+;
+; Reference paper titled "Fast CRC Computation for Generic Polynomials Using PCLMULQDQ Instruction"
+; URL: http://download.intel.com/design/intarch/papers/323102.pdf
+;
+;
+; sample yasm command line:
+; yasm -f x64 -f elf64 -X gnu -g dwarf2 crc32_gzip_refl_by8
+;
+; As explained here:
+; http://docs.oracle.com/javase/7/docs/api/java/util/zip/package-summary.html
+; CRC-32 checksum is described in RFC 1952
+; Implementing RFC 1952 CRC:
+; http://www.ietf.org/rfc/rfc1952.txt
+
+%include "reg_sizes.asm"
+
+%define fetch_dist 1024
+
+[bits 64]
+default rel
+
+section .text
+
+
+%ifidn __OUTPUT_FORMAT__, win64
+ %xdefine arg1 rcx
+ %xdefine arg2 rdx
+ %xdefine arg3 r8
+
+ %xdefine arg1_low32 ecx
+%else
+ %xdefine arg1 rdi
+ %xdefine arg2 rsi
+ %xdefine arg3 rdx
+
+ %xdefine arg1_low32 edi
+%endif
+
+%define TMP 16*0
+%ifidn __OUTPUT_FORMAT__, win64
+ %define XMM_SAVE 16*2
+ %define VARIABLE_OFFSET 16*10+8
+%else
+ %define VARIABLE_OFFSET 16*2+8
+%endif
+
+align 16
+mk_global crc32_gzip_refl_by8, function
+crc32_gzip_refl_by8:
+ endbranch
+
+ ; unsigned long c = crc ^ 0xffffffffL;
+ not arg1_low32 ;
+
+
+ sub rsp, VARIABLE_OFFSET
+%ifidn __OUTPUT_FORMAT__, win64
+ ; push the xmm registers into the stack to maintain
+ movdqa [rsp + XMM_SAVE + 16*0], xmm6
+ movdqa [rsp + XMM_SAVE + 16*1], xmm7
+ movdqa [rsp + XMM_SAVE + 16*2], xmm8
+ movdqa [rsp + XMM_SAVE + 16*3], xmm9
+ movdqa [rsp + XMM_SAVE + 16*4], xmm10
+ movdqa [rsp + XMM_SAVE + 16*5], xmm11
+ movdqa [rsp + XMM_SAVE + 16*6], xmm12
+ movdqa [rsp + XMM_SAVE + 16*7], xmm13
+%endif
+
+ ; check if smaller than 256B
+ cmp arg3, 256
+
+ ; for sizes less than 256, we can't fold 128B at a time...
+ jl _less_than_256
+
+
+ ; load the initial crc value
+ movd xmm10, arg1_low32 ; initial crc
+
+ ; receive the initial 64B data, xor the initial crc value
+ movdqu xmm0, [arg2+16*0]
+ movdqu xmm1, [arg2+16*1]
+ movdqu xmm2, [arg2+16*2]
+ movdqu xmm3, [arg2+16*3]
+ movdqu xmm4, [arg2+16*4]
+ movdqu xmm5, [arg2+16*5]
+ movdqu xmm6, [arg2+16*6]
+ movdqu xmm7, [arg2+16*7]
+
+ ; XOR the initial_crc value
+ pxor xmm0, xmm10
+ movdqa xmm10, [rk3] ;xmm10 has rk3 and rk4
+ ;imm value of pclmulqdq instruction will determine which constant to use
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+ ; we subtract 256 instead of 128 to save one instruction from the loop
+ sub arg3, 256
+
+ ; at this section of the code, there is 128*x+y (0<=y<128) bytes of buffer. The _fold_128_B_loop
+ ; loop will fold 128B at a time until we have 128+y Bytes of buffer
+
+
+ ; fold 128B at a time. This section of the code folds 8 xmm registers in parallel
+_fold_128_B_loop:
+
+ ; update the buffer pointer
+ add arg2, 128
+
+ prefetchnta [arg2+fetch_dist+0]
+ movdqu xmm9, [arg2+16*0]
+ movdqu xmm12, [arg2+16*1]
+ movdqa xmm8, xmm0
+ movdqa xmm13, xmm1
+ pclmulqdq xmm0, xmm10, 0x10
+ pclmulqdq xmm8, xmm10 , 0x1
+ pclmulqdq xmm1, xmm10, 0x10
+ pclmulqdq xmm13, xmm10 , 0x1
+ pxor xmm0, xmm9
+ xorps xmm0, xmm8
+ pxor xmm1, xmm12
+ xorps xmm1, xmm13
+
+ prefetchnta [arg2+fetch_dist+32]
+ movdqu xmm9, [arg2+16*2]
+ movdqu xmm12, [arg2+16*3]
+ movdqa xmm8, xmm2
+ movdqa xmm13, xmm3
+ pclmulqdq xmm2, xmm10, 0x10
+ pclmulqdq xmm8, xmm10 , 0x1
+ pclmulqdq xmm3, xmm10, 0x10
+ pclmulqdq xmm13, xmm10 , 0x1
+ pxor xmm2, xmm9
+ xorps xmm2, xmm8
+ pxor xmm3, xmm12
+ xorps xmm3, xmm13
+
+ prefetchnta [arg2+fetch_dist+64]
+ movdqu xmm9, [arg2+16*4]
+ movdqu xmm12, [arg2+16*5]
+ movdqa xmm8, xmm4
+ movdqa xmm13, xmm5
+ pclmulqdq xmm4, xmm10, 0x10
+ pclmulqdq xmm8, xmm10 , 0x1
+ pclmulqdq xmm5, xmm10, 0x10
+ pclmulqdq xmm13, xmm10 , 0x1
+ pxor xmm4, xmm9
+ xorps xmm4, xmm8
+ pxor xmm5, xmm12
+ xorps xmm5, xmm13
+
+ prefetchnta [arg2+fetch_dist+96]
+ movdqu xmm9, [arg2+16*6]
+ movdqu xmm12, [arg2+16*7]
+ movdqa xmm8, xmm6
+ movdqa xmm13, xmm7
+ pclmulqdq xmm6, xmm10, 0x10
+ pclmulqdq xmm8, xmm10 , 0x1
+ pclmulqdq xmm7, xmm10, 0x10
+ pclmulqdq xmm13, xmm10 , 0x1
+ pxor xmm6, xmm9
+ xorps xmm6, xmm8
+ pxor xmm7, xmm12
+ xorps xmm7, xmm13
+
+ sub arg3, 128
+
+ ; check if there is another 128B in the buffer to be able to fold
+ jge _fold_128_B_loop
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+
+ add arg2, 128
+ ; at this point, the buffer pointer is pointing at the last y Bytes of the buffer, where 0 <= y < 128
+ ; the 128B of folded data is in 8 of the xmm registers: xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7
+
+
+ ; fold the 8 xmm registers to 1 xmm register with different constants
+
+ movdqa xmm10, [rk9]
+ movdqa xmm8, xmm0
+ pclmulqdq xmm0, xmm10, 0x1
+ pclmulqdq xmm8, xmm10, 0x10
+ pxor xmm7, xmm8
+ xorps xmm7, xmm0
+
+ movdqa xmm10, [rk11]
+ movdqa xmm8, xmm1
+ pclmulqdq xmm1, xmm10, 0x1
+ pclmulqdq xmm8, xmm10, 0x10
+ pxor xmm7, xmm8
+ xorps xmm7, xmm1
+
+ movdqa xmm10, [rk13]
+ movdqa xmm8, xmm2
+ pclmulqdq xmm2, xmm10, 0x1
+ pclmulqdq xmm8, xmm10, 0x10
+ pxor xmm7, xmm8
+ pxor xmm7, xmm2
+
+ movdqa xmm10, [rk15]
+ movdqa xmm8, xmm3
+ pclmulqdq xmm3, xmm10, 0x1
+ pclmulqdq xmm8, xmm10, 0x10
+ pxor xmm7, xmm8
+ xorps xmm7, xmm3
+
+ movdqa xmm10, [rk17]
+ movdqa xmm8, xmm4
+ pclmulqdq xmm4, xmm10, 0x1
+ pclmulqdq xmm8, xmm10, 0x10
+ pxor xmm7, xmm8
+ pxor xmm7, xmm4
+
+ movdqa xmm10, [rk19]
+ movdqa xmm8, xmm5
+ pclmulqdq xmm5, xmm10, 0x1
+ pclmulqdq xmm8, xmm10, 0x10
+ pxor xmm7, xmm8
+ xorps xmm7, xmm5
+
+ movdqa xmm10, [rk1]
+ movdqa xmm8, xmm6
+ pclmulqdq xmm6, xmm10, 0x1
+ pclmulqdq xmm8, xmm10, 0x10
+ pxor xmm7, xmm8
+ pxor xmm7, xmm6
+
+
+ ; instead of 128, we add 128-16 to the loop counter to save 1 instruction from the loop
+ ; instead of a cmp instruction, we use the negative flag with the jl instruction
+ add arg3, 128-16
+ jl _final_reduction_for_128
+
+ ; now we have 16+y bytes left to reduce. 16 Bytes is in register xmm7 and the rest is in memory
+ ; we can fold 16 bytes at a time if y>=16
+ ; continue folding 16B at a time
+
+_16B_reduction_loop:
+ movdqa xmm8, xmm7
+ pclmulqdq xmm7, xmm10, 0x1
+ pclmulqdq xmm8, xmm10, 0x10
+ pxor xmm7, xmm8
+ movdqu xmm0, [arg2]
+ pxor xmm7, xmm0
+ add arg2, 16
+ sub arg3, 16
+ ; instead of a cmp instruction, we utilize the flags with the jge instruction
+ ; equivalent of: cmp arg3, 16-16
+ ; check if there is any more 16B in the buffer to be able to fold
+ jge _16B_reduction_loop
+
+ ;now we have 16+z bytes left to reduce, where 0<= z < 16.
+ ;first, we reduce the data in the xmm7 register
+
+
+_final_reduction_for_128:
+ add arg3, 16
+ je _128_done
+
+; here we are getting data that is less than 16 bytes.
+ ; since we know that there was data before the pointer, we can offset the input pointer before the actual point, to receive exactly 16 bytes.
+ ; after that the registers need to be adjusted.
+_get_last_two_xmms:
+
+
+ movdqa xmm2, xmm7
+ movdqu xmm1, [arg2 - 16 + arg3]
+
+ ; get rid of the extra data that was loaded before
+ ; load the shift constant
+ lea rax, [pshufb_shf_table]
+ add rax, arg3
+ movdqu xmm0, [rax]
+
+
+ pshufb xmm7, xmm0
+ pxor xmm0, [mask3]
+ pshufb xmm2, xmm0
+
+ pblendvb xmm2, xmm1 ;xmm0 is implicit
+ ;;;;;;;;;;
+ movdqa xmm8, xmm7
+ pclmulqdq xmm7, xmm10, 0x1
+
+ pclmulqdq xmm8, xmm10, 0x10
+ pxor xmm7, xmm8
+ pxor xmm7, xmm2
+
+_128_done:
+ ; compute crc of a 128-bit value
+ movdqa xmm10, [rk5]
+ movdqa xmm0, xmm7
+
+ ;64b fold
+ pclmulqdq xmm7, xmm10, 0
+ psrldq xmm0, 8
+ pxor xmm7, xmm0
+
+ ;32b fold
+ movdqa xmm0, xmm7
+ pslldq xmm7, 4
+ pclmulqdq xmm7, xmm10, 0x10
+
+ pxor xmm7, xmm0
+
+
+ ;barrett reduction
+_barrett:
+ pand xmm7, [mask2]
+ movdqa xmm1, xmm7
+ movdqa xmm2, xmm7
+ movdqa xmm10, [rk7]
+
+ pclmulqdq xmm7, xmm10, 0
+ pxor xmm7, xmm2
+ pand xmm7, [mask]
+ movdqa xmm2, xmm7
+ pclmulqdq xmm7, xmm10, 0x10
+ pxor xmm7, xmm2
+ pxor xmm7, xmm1
+ pextrd eax, xmm7, 2
+
+_cleanup:
+ ; return c ^ 0xffffffffL;
+ not eax
+
+
+%ifidn __OUTPUT_FORMAT__, win64
+ movdqa xmm6, [rsp + XMM_SAVE + 16*0]
+ movdqa xmm7, [rsp + XMM_SAVE + 16*1]
+ movdqa xmm8, [rsp + XMM_SAVE + 16*2]
+ movdqa xmm9, [rsp + XMM_SAVE + 16*3]
+ movdqa xmm10, [rsp + XMM_SAVE + 16*4]
+ movdqa xmm11, [rsp + XMM_SAVE + 16*5]
+ movdqa xmm12, [rsp + XMM_SAVE + 16*6]
+ movdqa xmm13, [rsp + XMM_SAVE + 16*7]
+%endif
+ add rsp, VARIABLE_OFFSET
+ ret
+
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+align 16
+_less_than_256:
+
+ ; check if there is enough buffer to be able to fold 16B at a time
+ cmp arg3, 32
+ jl _less_than_32
+
+ ; if there is, load the constants
+ movdqa xmm10, [rk1] ; rk1 and rk2 in xmm10
+
+ movd xmm0, arg1_low32 ; get the initial crc value
+ movdqu xmm7, [arg2] ; load the plaintext
+ pxor xmm7, xmm0
+
+ ; update the buffer pointer
+ add arg2, 16
+
+ ; update the counter. subtract 32 instead of 16 to save one instruction from the loop
+ sub arg3, 32
+
+ jmp _16B_reduction_loop
+
+
+align 16
+_less_than_32:
+ ; mov initial crc to the return value. this is necessary for zero-length buffers.
+ mov eax, arg1_low32
+ test arg3, arg3
+ je _cleanup
+
+ movd xmm0, arg1_low32 ; get the initial crc value
+
+ cmp arg3, 16
+ je _exact_16_left
+ jl _less_than_16_left
+
+ movdqu xmm7, [arg2] ; load the plaintext
+ pxor xmm7, xmm0 ; xor the initial crc value
+ add arg2, 16
+ sub arg3, 16
+ movdqa xmm10, [rk1] ; rk1 and rk2 in xmm10
+ jmp _get_last_two_xmms
+
+
+align 16
+_less_than_16_left:
+ ; use stack space to load data less than 16 bytes, zero-out the 16B in memory first.
+
+ pxor xmm1, xmm1
+ mov r11, rsp
+ movdqa [r11], xmm1
+
+ cmp arg3, 4
+ jl _only_less_than_4
+
+ ; backup the counter value
+ mov r9, arg3
+ cmp arg3, 8
+ jl _less_than_8_left
+
+ ; load 8 Bytes
+ mov rax, [arg2]
+ mov [r11], rax
+ add r11, 8
+ sub arg3, 8
+ add arg2, 8
+_less_than_8_left:
+
+ cmp arg3, 4
+ jl _less_than_4_left
+
+ ; load 4 Bytes
+ mov eax, [arg2]
+ mov [r11], eax
+ add r11, 4
+ sub arg3, 4
+ add arg2, 4
+_less_than_4_left:
+
+ cmp arg3, 2
+ jl _less_than_2_left
+
+ ; load 2 Bytes
+ mov ax, [arg2]
+ mov [r11], ax
+ add r11, 2
+ sub arg3, 2
+ add arg2, 2
+_less_than_2_left:
+ cmp arg3, 1
+ jl _zero_left
+
+ ; load 1 Byte
+ mov al, [arg2]
+ mov [r11], al
+
+_zero_left:
+ movdqa xmm7, [rsp]
+ pxor xmm7, xmm0 ; xor the initial crc value
+
+ lea rax,[pshufb_shf_table]
+ movdqu xmm0, [rax + r9]
+ pshufb xmm7,xmm0
+
+
+
+ jmp _128_done
+
+align 16
+_exact_16_left:
+ movdqu xmm7, [arg2]
+ pxor xmm7, xmm0 ; xor the initial crc value
+
+ jmp _128_done
+
+_only_less_than_4:
+ cmp arg3, 3
+ jl _only_less_than_3
+
+ ; load 3 Bytes
+ mov al, [arg2]
+ mov [r11], al
+
+ mov al, [arg2+1]
+ mov [r11+1], al
+
+ mov al, [arg2+2]
+ mov [r11+2], al
+
+ movdqa xmm7, [rsp]
+ pxor xmm7, xmm0 ; xor the initial crc value
+
+ pslldq xmm7, 5
+
+ jmp _barrett
+_only_less_than_3:
+ cmp arg3, 2
+ jl _only_less_than_2
+
+ ; load 2 Bytes
+ mov al, [arg2]
+ mov [r11], al
+
+ mov al, [arg2+1]
+ mov [r11+1], al
+
+ movdqa xmm7, [rsp]
+ pxor xmm7, xmm0 ; xor the initial crc value
+
+ pslldq xmm7, 6
+
+ jmp _barrett
+_only_less_than_2:
+
+ ; load 1 Byte
+ mov al, [arg2]
+ mov [r11], al
+
+ movdqa xmm7, [rsp]
+ pxor xmm7, xmm0 ; xor the initial crc value
+
+ pslldq xmm7, 7
+
+ jmp _barrett
+
+section .data
+
+; precomputed constants
+align 16
+rk1 :
+DQ 0x00000000ccaa009e
+rk2 :
+DQ 0x00000001751997d0
+rk3 :
+DQ 0x000000014a7fe880
+rk4 :
+DQ 0x00000001e88ef372
+rk5 :
+DQ 0x00000000ccaa009e
+rk6 :
+DQ 0x0000000163cd6124
+rk7 :
+DQ 0x00000001f7011640
+rk8 :
+DQ 0x00000001db710640
+rk9 :
+DQ 0x00000001d7cfc6ac
+rk10 :
+DQ 0x00000001ea89367e
+rk11 :
+DQ 0x000000018cb44e58
+rk12 :
+DQ 0x00000000df068dc2
+rk13 :
+DQ 0x00000000ae0b5394
+rk14 :
+DQ 0x00000001c7569e54
+rk15 :
+DQ 0x00000001c6e41596
+rk16 :
+DQ 0x0000000154442bd4
+rk17 :
+DQ 0x0000000174359406
+rk18 :
+DQ 0x000000003db1ecdc
+rk19 :
+DQ 0x000000015a546366
+rk20 :
+DQ 0x00000000f1da05aa
+
+mask:
+dq 0xFFFFFFFFFFFFFFFF, 0x0000000000000000
+mask2:
+dq 0xFFFFFFFF00000000, 0xFFFFFFFFFFFFFFFF
+mask3:
+dq 0x8080808080808080, 0x8080808080808080
+
+pshufb_shf_table:
+; use these values for shift constants for the pshufb instruction
+; different alignments result in values as shown:
+; dq 0x8887868584838281, 0x008f8e8d8c8b8a89 ; shl 15 (16-1) / shr1
+; dq 0x8988878685848382, 0x01008f8e8d8c8b8a ; shl 14 (16-3) / shr2
+; dq 0x8a89888786858483, 0x0201008f8e8d8c8b ; shl 13 (16-4) / shr3
+; dq 0x8b8a898887868584, 0x030201008f8e8d8c ; shl 12 (16-4) / shr4
+; dq 0x8c8b8a8988878685, 0x04030201008f8e8d ; shl 11 (16-5) / shr5
+; dq 0x8d8c8b8a89888786, 0x0504030201008f8e ; shl 10 (16-6) / shr6
+; dq 0x8e8d8c8b8a898887, 0x060504030201008f ; shl 9 (16-7) / shr7
+; dq 0x8f8e8d8c8b8a8988, 0x0706050403020100 ; shl 8 (16-8) / shr8
+; dq 0x008f8e8d8c8b8a89, 0x0807060504030201 ; shl 7 (16-9) / shr9
+; dq 0x01008f8e8d8c8b8a, 0x0908070605040302 ; shl 6 (16-10) / shr10
+; dq 0x0201008f8e8d8c8b, 0x0a09080706050403 ; shl 5 (16-11) / shr11
+; dq 0x030201008f8e8d8c, 0x0b0a090807060504 ; shl 4 (16-12) / shr12
+; dq 0x04030201008f8e8d, 0x0c0b0a0908070605 ; shl 3 (16-13) / shr13
+; dq 0x0504030201008f8e, 0x0d0c0b0a09080706 ; shl 2 (16-14) / shr14
+; dq 0x060504030201008f, 0x0e0d0c0b0a090807 ; shl 1 (16-15) / shr15
+dq 0x8786858483828100, 0x8f8e8d8c8b8a8988
+dq 0x0706050403020100, 0x000e0d0c0b0a0908
+
+;;; func core, ver, snum
+slversion crc32_gzip_refl_by8, 01, 00, 002c
diff --git a/src/isa-l/crc/crc32_gzip_refl_by8_02.asm b/src/isa-l/crc/crc32_gzip_refl_by8_02.asm
new file mode 100644
index 000000000..712fe87aa
--- /dev/null
+++ b/src/isa-l/crc/crc32_gzip_refl_by8_02.asm
@@ -0,0 +1,556 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright(c) 2011-2020 Intel Corporation All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in
+; the documentation and/or other materials provided with the
+; distribution.
+; * Neither the name of Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived
+; from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Function API:
+; UINT32 crc32_gzip_refl_by8_02(
+; UINT32 init_crc, //initial CRC value, 32 bits
+; const unsigned char *buf, //buffer pointer to calculate CRC on
+; UINT64 len //buffer length in bytes (64-bit data)
+; );
+;
+; Authors:
+; Erdinc Ozturk
+; Vinodh Gopal
+; James Guilford
+;
+; Reference paper titled "Fast CRC Computation for Generic Polynomials Using PCLMULQDQ Instruction"
+; URL: http://download.intel.com/design/intarch/papers/323102.pdf
+;
+;
+; sample yasm command line:
+; yasm -f x64 -f elf64 -X gnu -g dwarf2 crc32_gzip_refl_by8
+;
+; As explained here:
+; http://docs.oracle.com/javase/7/docs/api/java/util/zip/package-summary.html
+; CRC-32 checksum is described in RFC 1952
+; Implementing RFC 1952 CRC:
+; http://www.ietf.org/rfc/rfc1952.txt
+
+%include "reg_sizes.asm"
+
+%define fetch_dist 1024
+
+[bits 64]
+default rel
+
+section .text
+
+
+%ifidn __OUTPUT_FORMAT__, win64
+ %xdefine arg1 rcx
+ %xdefine arg2 rdx
+ %xdefine arg3 r8
+
+ %xdefine arg1_low32 ecx
+%else
+ %xdefine arg1 rdi
+ %xdefine arg2 rsi
+ %xdefine arg3 rdx
+
+ %xdefine arg1_low32 edi
+%endif
+
+%define TMP 16*0
+%ifidn __OUTPUT_FORMAT__, win64
+ %define XMM_SAVE 16*2
+ %define VARIABLE_OFFSET 16*10+8
+%else
+ %define VARIABLE_OFFSET 16*2+8
+%endif
+
+align 16
+mk_global crc32_gzip_refl_by8_02, function
+crc32_gzip_refl_by8_02:
+ endbranch
+ not arg1_low32
+ sub rsp, VARIABLE_OFFSET
+
+%ifidn __OUTPUT_FORMAT__, win64
+ ; push the xmm registers into the stack to maintain
+ vmovdqa [rsp + XMM_SAVE + 16*0], xmm6
+ vmovdqa [rsp + XMM_SAVE + 16*1], xmm7
+ vmovdqa [rsp + XMM_SAVE + 16*2], xmm8
+ vmovdqa [rsp + XMM_SAVE + 16*3], xmm9
+ vmovdqa [rsp + XMM_SAVE + 16*4], xmm10
+ vmovdqa [rsp + XMM_SAVE + 16*5], xmm11
+ vmovdqa [rsp + XMM_SAVE + 16*6], xmm12
+ vmovdqa [rsp + XMM_SAVE + 16*7], xmm13
+%endif
+
+ ; check if smaller than 256B
+ cmp arg3, 256
+ jl .less_than_256
+
+ ; load the initial crc value
+ vmovd xmm10, arg1_low32 ; initial crc
+
+ ; receive the initial 64B data, xor the initial crc value
+ vmovdqu xmm0, [arg2+16*0]
+ vmovdqu xmm1, [arg2+16*1]
+ vmovdqu xmm2, [arg2+16*2]
+ vmovdqu xmm3, [arg2+16*3]
+ vmovdqu xmm4, [arg2+16*4]
+ vmovdqu xmm5, [arg2+16*5]
+ vmovdqu xmm6, [arg2+16*6]
+ vmovdqu xmm7, [arg2+16*7]
+
+ ; XOR the initial_crc value
+ vpxor xmm0, xmm10
+ vmovdqa xmm10, [rk3] ;xmm10 has rk3 and rk4
+ ;imm value of pclmulqdq instruction will determine which constant to use
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+ ; we subtract 256 instead of 128 to save one instruction from the loop
+ sub arg3, 256
+
+ ; at this section of the code, there is 128*x+y (0<=y<128) bytes of buffer. The fold_128_B_loop
+ ; loop will fold 128B at a time until we have 128+y Bytes of buffer
+
+ ; fold 128B at a time. This section of the code folds 8 xmm registers in parallel
+.fold_128_B_loop:
+ add arg2, 128
+ prefetchnta [arg2+fetch_dist+0]
+ vmovdqu xmm9, [arg2+16*0]
+ vmovdqu xmm12, [arg2+16*1]
+ vpclmulqdq xmm8, xmm0, xmm10, 0x10
+ vpclmulqdq xmm0, xmm0, xmm10 , 0x1
+ vpclmulqdq xmm13, xmm1, xmm10, 0x10
+ vpclmulqdq xmm1, xmm1, xmm10 , 0x1
+ vpxor xmm0, xmm9
+ vxorps xmm0, xmm8
+ vpxor xmm1, xmm12
+ vxorps xmm1, xmm13
+
+ prefetchnta [arg2+fetch_dist+32]
+ vmovdqu xmm9, [arg2+16*2]
+ vmovdqu xmm12, [arg2+16*3]
+ vpclmulqdq xmm8, xmm2, xmm10, 0x10
+ vpclmulqdq xmm2, xmm2, xmm10 , 0x1
+ vpclmulqdq xmm13, xmm3, xmm10, 0x10
+ vpclmulqdq xmm3, xmm3, xmm10 , 0x1
+ vpxor xmm2, xmm9
+ vxorps xmm2, xmm8
+ vpxor xmm3, xmm12
+ vxorps xmm3, xmm13
+
+ prefetchnta [arg2+fetch_dist+64]
+ vmovdqu xmm9, [arg2+16*4]
+ vmovdqu xmm12, [arg2+16*5]
+ vpclmulqdq xmm8, xmm4, xmm10, 0x10
+ vpclmulqdq xmm4, xmm4, xmm10 , 0x1
+ vpclmulqdq xmm13, xmm5, xmm10, 0x10
+ vpclmulqdq xmm5, xmm5, xmm10 , 0x1
+ vpxor xmm4, xmm9
+ vxorps xmm4, xmm8
+ vpxor xmm5, xmm12
+ vxorps xmm5, xmm13
+
+ prefetchnta [arg2+fetch_dist+96]
+ vmovdqu xmm9, [arg2+16*6]
+ vmovdqu xmm12, [arg2+16*7]
+ vpclmulqdq xmm8, xmm6, xmm10, 0x10
+ vpclmulqdq xmm6, xmm6, xmm10 , 0x1
+ vpclmulqdq xmm13, xmm7, xmm10, 0x10
+ vpclmulqdq xmm7, xmm7, xmm10 , 0x1
+ vpxor xmm6, xmm9
+ vxorps xmm6, xmm8
+ vpxor xmm7, xmm12
+ vxorps xmm7, xmm13
+
+ sub arg3, 128
+ jge .fold_128_B_loop
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+ add arg2, 128
+ ; at this point, the buffer pointer is pointing at the last y Bytes of the buffer, where 0 <= y < 128
+ ; the 128B of folded data is in 8 of the xmm registers: xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7
+
+ ; fold the 8 xmm registers to 1 xmm register with different constants
+ vmovdqa xmm10, [rk9]
+ vpclmulqdq xmm8, xmm0, xmm10, 0x1
+ vpclmulqdq xmm0, xmm0, xmm10, 0x10
+ vpxor xmm7, xmm8
+ vxorps xmm7, xmm0
+
+ vmovdqa xmm10, [rk11]
+ vpclmulqdq xmm8, xmm1, xmm10, 0x1
+ vpclmulqdq xmm1, xmm1, xmm10, 0x10
+ vpxor xmm7, xmm8
+ vxorps xmm7, xmm1
+
+ vmovdqa xmm10, [rk13]
+ vpclmulqdq xmm8, xmm2, xmm10, 0x1
+ vpclmulqdq xmm2, xmm2, xmm10, 0x10
+ vpxor xmm7, xmm8
+ vpxor xmm7, xmm2
+
+ vmovdqa xmm10, [rk15]
+ vpclmulqdq xmm8, xmm3, xmm10, 0x1
+ vpclmulqdq xmm3, xmm3, xmm10, 0x10
+ vpxor xmm7, xmm8
+ vxorps xmm7, xmm3
+
+ vmovdqa xmm10, [rk17]
+ vpclmulqdq xmm8, xmm4, xmm10, 0x1
+ vpclmulqdq xmm4, xmm4, xmm10, 0x10
+ vpxor xmm7, xmm8
+ vpxor xmm7, xmm4
+
+ vmovdqa xmm10, [rk19]
+ vpclmulqdq xmm8, xmm5, xmm10, 0x1
+ vpclmulqdq xmm5, xmm5, xmm10, 0x10
+ vpxor xmm7, xmm8
+ vxorps xmm7, xmm5
+
+ vmovdqa xmm10, [rk1]
+ vpclmulqdq xmm8, xmm6, xmm10, 0x1
+ vpclmulqdq xmm6, xmm6, xmm10, 0x10
+ vpxor xmm7, xmm8
+ vpxor xmm7, xmm6
+
+
+ ; instead of 128, we add 128-16 to the loop counter to save 1 instruction from the loop
+ ; instead of a cmp instruction, we use the negative flag with the jl instruction
+ add arg3, 128-16
+ jl .final_reduction_for_128
+
+ ; now we have 16+y bytes left to reduce. 16 Bytes is in register xmm7 and the rest is in memory
+ ; we can fold 16 bytes at a time if y>=16
+ ; continue folding 16B at a time
+
+.16B_reduction_loop:
+ vpclmulqdq xmm8, xmm7, xmm10, 0x1
+ vpclmulqdq xmm7, xmm7, xmm10, 0x10
+ vpxor xmm7, xmm8
+ vmovdqu xmm0, [arg2]
+ vpxor xmm7, xmm0
+ add arg2, 16
+ sub arg3, 16
+ ; instead of a cmp instruction, we utilize the flags with the jge instruction
+ ; equivalent of: cmp arg3, 16-16
+ ; check if there is any more 16B in the buffer to be able to fold
+ jge .16B_reduction_loop
+
+ ;now we have 16+z bytes left to reduce, where 0<= z < 16.
+ ;first, we reduce the data in the xmm7 register
+
+
+.final_reduction_for_128:
+ add arg3, 16
+ je .128_done
+
+ ; here we are getting data that is less than 16 bytes.
+ ; since we know that there was data before the pointer, we can offset
+ ; the input pointer before the actual point, to receive exactly 16 bytes.
+ ; after that the registers need to be adjusted.
+.get_last_two_xmms:
+
+ vmovdqa xmm2, xmm7
+ vmovdqu xmm1, [arg2 - 16 + arg3]
+
+ ; get rid of the extra data that was loaded before
+ ; load the shift constant
+ lea rax, [pshufb_shf_table]
+ add rax, arg3
+ vmovdqu xmm0, [rax]
+
+ vpshufb xmm7, xmm0
+ vpxor xmm0, [mask3]
+ vpshufb xmm2, xmm0
+
+ vpblendvb xmm2, xmm2, xmm1, xmm0
+ ;;;;;;;;;;
+ vpclmulqdq xmm8, xmm7, xmm10, 0x1
+ vpclmulqdq xmm7, xmm7, xmm10, 0x10
+ vpxor xmm7, xmm8
+ vpxor xmm7, xmm2
+
+.128_done:
+ ; compute crc of a 128-bit value
+ vmovdqa xmm10, [rk5]
+ vmovdqa xmm0, xmm7
+
+ ;64b fold
+ vpclmulqdq xmm7, xmm10, 0
+ vpsrldq xmm0, 8
+ vpxor xmm7, xmm0
+
+ ;32b fold
+ vmovdqa xmm0, xmm7
+ vpslldq xmm7, 4
+ vpclmulqdq xmm7, xmm10, 0x10
+ vpxor xmm7, xmm0
+
+
+ ;barrett reduction
+.barrett:
+ vpand xmm7, [mask2]
+ vmovdqa xmm1, xmm7
+ vmovdqa xmm2, xmm7
+ vmovdqa xmm10, [rk7]
+
+ vpclmulqdq xmm7, xmm10, 0
+ vpxor xmm7, xmm2
+ vpand xmm7, [mask]
+ vmovdqa xmm2, xmm7
+ vpclmulqdq xmm7, xmm10, 0x10
+ vpxor xmm7, xmm2
+ vpxor xmm7, xmm1
+ vpextrd eax, xmm7, 2
+
+.cleanup:
+ not eax
+
+
+%ifidn __OUTPUT_FORMAT__, win64
+ vmovdqa xmm6, [rsp + XMM_SAVE + 16*0]
+ vmovdqa xmm7, [rsp + XMM_SAVE + 16*1]
+ vmovdqa xmm8, [rsp + XMM_SAVE + 16*2]
+ vmovdqa xmm9, [rsp + XMM_SAVE + 16*3]
+ vmovdqa xmm10, [rsp + XMM_SAVE + 16*4]
+ vmovdqa xmm11, [rsp + XMM_SAVE + 16*5]
+ vmovdqa xmm12, [rsp + XMM_SAVE + 16*6]
+ vmovdqa xmm13, [rsp + XMM_SAVE + 16*7]
+%endif
+ add rsp, VARIABLE_OFFSET
+ ret
+
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+align 16
+.less_than_256:
+
+ ; check if there is enough buffer to be able to fold 16B at a time
+ cmp arg3, 32
+ jl .less_than_32
+
+ ; if there is, load the constants
+ vmovdqa xmm10, [rk1] ; rk1 and rk2 in xmm10
+
+ vmovd xmm0, arg1_low32 ; get the initial crc value
+ vmovdqu xmm7, [arg2] ; load the plaintext
+ vpxor xmm7, xmm0
+
+ ; update the buffer pointer
+ add arg2, 16
+
+ ; update the counter. subtract 32 instead of 16 to save one instruction from the loop
+ sub arg3, 32
+
+ jmp .16B_reduction_loop
+
+
+align 16
+.less_than_32:
+ ; mov initial crc to the return value. this is necessary for zero-length buffers.
+ mov eax, arg1_low32
+ test arg3, arg3
+ je .cleanup
+
+ vmovd xmm0, arg1_low32 ; get the initial crc value
+
+ cmp arg3, 16
+ je .exact_16_left
+ jl .less_than_16_left
+
+ vmovdqu xmm7, [arg2] ; load the plaintext
+ vpxor xmm7, xmm0 ; xor the initial crc value
+ add arg2, 16
+ sub arg3, 16
+ vmovdqa xmm10, [rk1] ; rk1 and rk2 in xmm10
+ jmp .get_last_two_xmms
+
+align 16
+.less_than_16_left:
+ ; use stack space to load data less than 16 bytes, zero-out the 16B in memory first.
+
+ vpxor xmm1, xmm1
+ mov r11, rsp
+ vmovdqa [r11], xmm1
+
+ cmp arg3, 4
+ jl .only_less_than_4
+
+ ; backup the counter value
+ mov r9, arg3
+ cmp arg3, 8
+ jl .less_than_8_left
+
+ ; load 8 Bytes
+ mov rax, [arg2]
+ mov [r11], rax
+ add r11, 8
+ sub arg3, 8
+ add arg2, 8
+.less_than_8_left:
+
+ cmp arg3, 4
+ jl .less_than_4_left
+
+ ; load 4 Bytes
+ mov eax, [arg2]
+ mov [r11], eax
+ add r11, 4
+ sub arg3, 4
+ add arg2, 4
+.less_than_4_left:
+
+ cmp arg3, 2
+ jl .less_than_2_left
+
+ ; load 2 Bytes
+ mov ax, [arg2]
+ mov [r11], ax
+ add r11, 2
+ sub arg3, 2
+ add arg2, 2
+.less_than_2_left:
+ cmp arg3, 1
+ jl .zero_left
+
+ ; load 1 Byte
+ mov al, [arg2]
+ mov [r11], al
+
+.zero_left:
+ vmovdqa xmm7, [rsp]
+ vpxor xmm7, xmm0 ; xor the initial crc value
+
+ lea rax,[pshufb_shf_table]
+ vmovdqu xmm0, [rax + r9]
+ vpshufb xmm7,xmm0
+ jmp .128_done
+
+align 16
+.exact_16_left:
+ vmovdqu xmm7, [arg2]
+ vpxor xmm7, xmm0 ; xor the initial crc value
+ jmp .128_done
+
+.only_less_than_4:
+ cmp arg3, 3
+ jl .only_less_than_3
+
+ ; load 3 Bytes
+ mov al, [arg2]
+ mov [r11], al
+
+ mov al, [arg2+1]
+ mov [r11+1], al
+
+ mov al, [arg2+2]
+ mov [r11+2], al
+
+ vmovdqa xmm7, [rsp]
+ vpxor xmm7, xmm0 ; xor the initial crc value
+
+ vpslldq xmm7, 5
+ jmp .barrett
+
+.only_less_than_3:
+ cmp arg3, 2
+ jl .only_less_than_2
+
+ ; load 2 Bytes
+ mov al, [arg2]
+ mov [r11], al
+
+ mov al, [arg2+1]
+ mov [r11+1], al
+
+ vmovdqa xmm7, [rsp]
+ vpxor xmm7, xmm0 ; xor the initial crc value
+
+ vpslldq xmm7, 6
+ jmp .barrett
+
+.only_less_than_2:
+ ; load 1 Byte
+ mov al, [arg2]
+ mov [r11], al
+
+ vmovdqa xmm7, [rsp]
+ vpxor xmm7, xmm0 ; xor the initial crc value
+
+ vpslldq xmm7, 7
+ jmp .barrett
+
+section .data
+
+; precomputed constants
+align 16
+rk1: dq 0x00000000ccaa009e
+rk2: dq 0x00000001751997d0
+rk3: dq 0x000000014a7fe880
+rk4: dq 0x00000001e88ef372
+rk5: dq 0x00000000ccaa009e
+rk6: dq 0x0000000163cd6124
+rk7: dq 0x00000001f7011640
+rk8: dq 0x00000001db710640
+rk9: dq 0x00000001d7cfc6ac
+rk10: dq 0x00000001ea89367e
+rk11: dq 0x000000018cb44e58
+rk12: dq 0x00000000df068dc2
+rk13: dq 0x00000000ae0b5394
+rk14: dq 0x00000001c7569e54
+rk15: dq 0x00000001c6e41596
+rk16: dq 0x0000000154442bd4
+rk17: dq 0x0000000174359406
+rk18: dq 0x000000003db1ecdc
+rk19: dq 0x000000015a546366
+rk20: dq 0x00000000f1da05aa
+
+mask: dq 0xFFFFFFFFFFFFFFFF, 0x0000000000000000
+mask2: dq 0xFFFFFFFF00000000, 0xFFFFFFFFFFFFFFFF
+mask3: dq 0x8080808080808080, 0x8080808080808080
+
+pshufb_shf_table:
+; use these values for shift constants for the pshufb instruction
+; different alignments result in values as shown:
+; dq 0x8887868584838281, 0x008f8e8d8c8b8a89 ; shl 15 (16-1) / shr1
+; dq 0x8988878685848382, 0x01008f8e8d8c8b8a ; shl 14 (16-3) / shr2
+; dq 0x8a89888786858483, 0x0201008f8e8d8c8b ; shl 13 (16-4) / shr3
+; dq 0x8b8a898887868584, 0x030201008f8e8d8c ; shl 12 (16-4) / shr4
+; dq 0x8c8b8a8988878685, 0x04030201008f8e8d ; shl 11 (16-5) / shr5
+; dq 0x8d8c8b8a89888786, 0x0504030201008f8e ; shl 10 (16-6) / shr6
+; dq 0x8e8d8c8b8a898887, 0x060504030201008f ; shl 9 (16-7) / shr7
+; dq 0x8f8e8d8c8b8a8988, 0x0706050403020100 ; shl 8 (16-8) / shr8
+; dq 0x008f8e8d8c8b8a89, 0x0807060504030201 ; shl 7 (16-9) / shr9
+; dq 0x01008f8e8d8c8b8a, 0x0908070605040302 ; shl 6 (16-10) / shr10
+; dq 0x0201008f8e8d8c8b, 0x0a09080706050403 ; shl 5 (16-11) / shr11
+; dq 0x030201008f8e8d8c, 0x0b0a090807060504 ; shl 4 (16-12) / shr12
+; dq 0x04030201008f8e8d, 0x0c0b0a0908070605 ; shl 3 (16-13) / shr13
+; dq 0x0504030201008f8e, 0x0d0c0b0a09080706 ; shl 2 (16-14) / shr14
+; dq 0x060504030201008f, 0x0e0d0c0b0a090807 ; shl 1 (16-15) / shr15
+dq 0x8786858483828100, 0x8f8e8d8c8b8a8988
+dq 0x0706050403020100, 0x000e0d0c0b0a0908
diff --git a/src/isa-l/crc/crc32_gzip_refl_perf.c b/src/isa-l/crc/crc32_gzip_refl_perf.c
new file mode 100644
index 000000000..ad3d86fb5
--- /dev/null
+++ b/src/isa-l/crc/crc32_gzip_refl_perf.c
@@ -0,0 +1,91 @@
+/**********************************************************************
+ Copyright(c) 2011-2017 Intel Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdint.h>
+#include <sys/time.h>
+#include "crc.h"
+#include "test.h"
+
+//#define CACHED_TEST
+#ifdef CACHED_TEST
+// Cached test, loop many times over small dataset
+# define TEST_LEN 8*1024
+# define TEST_TYPE_STR "_warm"
+#else
+// Uncached test. Pull from large mem base.
+# define GT_L3_CACHE 32*1024*1024 /* some number > last level cache */
+# define TEST_LEN (2 * GT_L3_CACHE)
+# define TEST_TYPE_STR "_cold"
+#endif
+
+#ifndef TEST_SEED
+# define TEST_SEED 0x1234
+#endif
+
+#define TEST_MEM TEST_LEN
+
+int main(int argc, char *argv[])
+{
+ void *buf;
+ uint32_t crc;
+ struct perf start;
+
+ printf("crc32_gzip_refl_perf:\n");
+
+ if (posix_memalign(&buf, 1024, TEST_LEN)) {
+ printf("alloc error: Fail");
+ return -1;
+ }
+
+ printf("Start timed tests\n");
+ fflush(0);
+
+ memset(buf, 0, TEST_LEN);
+ BENCHMARK(&start, BENCHMARK_TIME, crc = crc32_gzip_refl(TEST_SEED, buf, TEST_LEN));
+ printf("crc32_gzip_refl" TEST_TYPE_STR ": ");
+ perf_print(start, (long long)TEST_LEN);
+
+ printf("finish 0x%x\n", crc);
+
+ printf("crc32_gzip_refl_base_perf:\n");
+ printf("Start timed tests\n");
+ fflush(0);
+
+ BENCHMARK(&start, BENCHMARK_TIME, crc =
+ crc32_gzip_refl_base(TEST_SEED, buf, TEST_LEN));
+ printf("crc32_gzip_refl_base" TEST_TYPE_STR ": ");
+ perf_print(start, (long long)TEST_LEN);
+
+ printf("finish 0x%x\n", crc);
+
+ return 0;
+}
diff --git a/src/isa-l/crc/crc32_ieee_01.asm b/src/isa-l/crc/crc32_ieee_01.asm
new file mode 100644
index 000000000..368261de2
--- /dev/null
+++ b/src/isa-l/crc/crc32_ieee_01.asm
@@ -0,0 +1,656 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright(c) 2011-2015 Intel Corporation All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in
+; the documentation and/or other materials provided with the
+; distribution.
+; * Neither the name of Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived
+; from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+; Function API:
+; UINT32 crc32_ieee_01(
+; UINT32 init_crc, //initial CRC value, 32 bits
+; const unsigned char *buf, //buffer pointer to calculate CRC on
+; UINT64 len //buffer length in bytes (64-bit data)
+; );
+;
+; Authors:
+; Erdinc Ozturk
+; Vinodh Gopal
+; James Guilford
+;
+; Reference paper titled "Fast CRC Computation for Generic Polynomials Using PCLMULQDQ Instruction"
+; URL: http://www.intel.com/content/dam/www/public/us/en/documents/white-papers/fast-crc-computation-generic-polynomials-pclmulqdq-paper.pdf
+
+%include "reg_sizes.asm"
+
+%define fetch_dist 1024
+[bits 64]
+default rel
+
+section .text
+
+%ifidn __OUTPUT_FORMAT__, win64
+ %xdefine arg1 rcx
+ %xdefine arg2 rdx
+ %xdefine arg3 r8
+
+ %xdefine arg1_low32 ecx
+%else
+ %xdefine arg1 rdi
+ %xdefine arg2 rsi
+ %xdefine arg3 rdx
+
+ %xdefine arg1_low32 edi
+%endif
+
+%define TMP 16*0
+%ifidn __OUTPUT_FORMAT__, win64
+ %define XMM_SAVE 16*2
+ %define VARIABLE_OFFSET 16*10+8
+%else
+ %define VARIABLE_OFFSET 16*2+8
+%endif
+align 16
+mk_global crc32_ieee_01, function
+crc32_ieee_01:
+ endbranch
+
+ not arg1_low32 ;~init_crc
+
+ sub rsp,VARIABLE_OFFSET
+
+%ifidn __OUTPUT_FORMAT__, win64
+ ; push the xmm registers into the stack to maintain
+ movdqa [rsp + XMM_SAVE + 16*0], xmm6
+ movdqa [rsp + XMM_SAVE + 16*1], xmm7
+ movdqa [rsp + XMM_SAVE + 16*2], xmm8
+ movdqa [rsp + XMM_SAVE + 16*3], xmm9
+ movdqa [rsp + XMM_SAVE + 16*4], xmm10
+ movdqa [rsp + XMM_SAVE + 16*5], xmm11
+ movdqa [rsp + XMM_SAVE + 16*6], xmm12
+ movdqa [rsp + XMM_SAVE + 16*7], xmm13
+%endif
+
+
+ ; check if smaller than 256
+ cmp arg3, 256
+
+ ; for sizes less than 256, we can't fold 128B at a time...
+ jl _less_than_256
+
+
+ ; load the initial crc value
+ movd xmm10, arg1_low32 ; initial crc
+
+ ; crc value does not need to be byte-reflected, but it needs to be moved to the high part of the register.
+ ; because data will be byte-reflected and will align with initial crc at correct place.
+ pslldq xmm10, 12
+
+ movdqa xmm11, [SHUF_MASK]
+ ; receive the initial 128B data, xor the initial crc value
+ movdqu xmm0, [arg2+16*0]
+ movdqu xmm1, [arg2+16*1]
+ movdqu xmm2, [arg2+16*2]
+ movdqu xmm3, [arg2+16*3]
+ movdqu xmm4, [arg2+16*4]
+ movdqu xmm5, [arg2+16*5]
+ movdqu xmm6, [arg2+16*6]
+ movdqu xmm7, [arg2+16*7]
+
+ pshufb xmm0, xmm11
+ ; XOR the initial_crc value
+ pxor xmm0, xmm10
+ pshufb xmm1, xmm11
+ pshufb xmm2, xmm11
+ pshufb xmm3, xmm11
+ pshufb xmm4, xmm11
+ pshufb xmm5, xmm11
+ pshufb xmm6, xmm11
+ pshufb xmm7, xmm11
+
+ movdqa xmm10, [rk3] ;xmm10 has rk3 and rk4
+ ;imm value of pclmulqdq instruction will determine which constant to use
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+ ; we subtract 256 instead of 128 to save one instruction from the loop
+ sub arg3, 256
+
+ ; at this section of the code, there is 128*x+y (0<=y<128) bytes of buffer. The _fold_128_B_loop
+ ; loop will fold 128B at a time until we have 128+y Bytes of buffer
+
+
+ ; fold 128B at a time. This section of the code folds 8 xmm registers in parallel
+_fold_128_B_loop:
+
+ ; update the buffer pointer
+ add arg2, 128 ; buf += 128;
+
+ prefetchnta [arg2+fetch_dist+0]
+ movdqu xmm9, [arg2+16*0]
+ movdqu xmm12, [arg2+16*1]
+ pshufb xmm9, xmm11
+ pshufb xmm12, xmm11
+ movdqa xmm8, xmm0
+ movdqa xmm13, xmm1
+ pclmulqdq xmm0, xmm10, 0x0
+ pclmulqdq xmm8, xmm10 , 0x11
+ pclmulqdq xmm1, xmm10, 0x0
+ pclmulqdq xmm13, xmm10 , 0x11
+ pxor xmm0, xmm9
+ xorps xmm0, xmm8
+ pxor xmm1, xmm12
+ xorps xmm1, xmm13
+
+ prefetchnta [arg2+fetch_dist+32]
+ movdqu xmm9, [arg2+16*2]
+ movdqu xmm12, [arg2+16*3]
+ pshufb xmm9, xmm11
+ pshufb xmm12, xmm11
+ movdqa xmm8, xmm2
+ movdqa xmm13, xmm3
+ pclmulqdq xmm2, xmm10, 0x0
+ pclmulqdq xmm8, xmm10 , 0x11
+ pclmulqdq xmm3, xmm10, 0x0
+ pclmulqdq xmm13, xmm10 , 0x11
+ pxor xmm2, xmm9
+ xorps xmm2, xmm8
+ pxor xmm3, xmm12
+ xorps xmm3, xmm13
+
+ prefetchnta [arg2+fetch_dist+64]
+ movdqu xmm9, [arg2+16*4]
+ movdqu xmm12, [arg2+16*5]
+ pshufb xmm9, xmm11
+ pshufb xmm12, xmm11
+ movdqa xmm8, xmm4
+ movdqa xmm13, xmm5
+ pclmulqdq xmm4, xmm10, 0x0
+ pclmulqdq xmm8, xmm10 , 0x11
+ pclmulqdq xmm5, xmm10, 0x0
+ pclmulqdq xmm13, xmm10 , 0x11
+ pxor xmm4, xmm9
+ xorps xmm4, xmm8
+ pxor xmm5, xmm12
+ xorps xmm5, xmm13
+
+ prefetchnta [arg2+fetch_dist+96]
+ movdqu xmm9, [arg2+16*6]
+ movdqu xmm12, [arg2+16*7]
+ pshufb xmm9, xmm11
+ pshufb xmm12, xmm11
+ movdqa xmm8, xmm6
+ movdqa xmm13, xmm7
+ pclmulqdq xmm6, xmm10, 0x0
+ pclmulqdq xmm8, xmm10 , 0x11
+ pclmulqdq xmm7, xmm10, 0x0
+ pclmulqdq xmm13, xmm10 , 0x11
+ pxor xmm6, xmm9
+ xorps xmm6, xmm8
+ pxor xmm7, xmm12
+ xorps xmm7, xmm13
+
+ sub arg3, 128
+
+ ; check if there is another 128B in the buffer to be able to fold
+ jge _fold_128_B_loop
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+
+ add arg2, 128
+ ; at this point, the buffer pointer is pointing at the last y Bytes of the buffer
+ ; the 128 of folded data is in 4 of the xmm registers: xmm0, xmm1, xmm2, xmm3
+
+
+ ; fold the 8 xmm registers to 1 xmm register with different constants
+
+ movdqa xmm10, [rk9]
+ movdqa xmm8, xmm0
+ pclmulqdq xmm0, xmm10, 0x11
+ pclmulqdq xmm8, xmm10, 0x0
+ pxor xmm7, xmm8
+ xorps xmm7, xmm0
+
+ movdqa xmm10, [rk11]
+ movdqa xmm8, xmm1
+ pclmulqdq xmm1, xmm10, 0x11
+ pclmulqdq xmm8, xmm10, 0x0
+ pxor xmm7, xmm8
+ xorps xmm7, xmm1
+
+ movdqa xmm10, [rk13]
+ movdqa xmm8, xmm2
+ pclmulqdq xmm2, xmm10, 0x11
+ pclmulqdq xmm8, xmm10, 0x0
+ pxor xmm7, xmm8
+ pxor xmm7, xmm2
+
+ movdqa xmm10, [rk15]
+ movdqa xmm8, xmm3
+ pclmulqdq xmm3, xmm10, 0x11
+ pclmulqdq xmm8, xmm10, 0x0
+ pxor xmm7, xmm8
+ xorps xmm7, xmm3
+
+ movdqa xmm10, [rk17]
+ movdqa xmm8, xmm4
+ pclmulqdq xmm4, xmm10, 0x11
+ pclmulqdq xmm8, xmm10, 0x0
+ pxor xmm7, xmm8
+ pxor xmm7, xmm4
+
+ movdqa xmm10, [rk19]
+ movdqa xmm8, xmm5
+ pclmulqdq xmm5, xmm10, 0x11
+ pclmulqdq xmm8, xmm10, 0x0
+ pxor xmm7, xmm8
+ xorps xmm7, xmm5
+
+ movdqa xmm10, [rk1] ;xmm10 has rk1 and rk2
+ ;imm value of pclmulqdq instruction will determine which constant to use
+ movdqa xmm8, xmm6
+ pclmulqdq xmm6, xmm10, 0x11
+ pclmulqdq xmm8, xmm10, 0x0
+ pxor xmm7, xmm8
+ pxor xmm7, xmm6
+
+
+ ; instead of 128, we add 112 to the loop counter to save 1 instruction from the loop
+ ; instead of a cmp instruction, we use the negative flag with the jl instruction
+ add arg3, 128-16
+ jl _final_reduction_for_128
+
+ ; now we have 16+y bytes left to reduce. 16 Bytes is in register xmm7 and the rest is in memory
+ ; we can fold 16 bytes at a time if y>=16
+ ; continue folding 16B at a time
+
+_16B_reduction_loop:
+ movdqa xmm8, xmm7
+ pclmulqdq xmm7, xmm10, 0x11
+ pclmulqdq xmm8, xmm10, 0x0
+ pxor xmm7, xmm8
+ movdqu xmm0, [arg2]
+ pshufb xmm0, xmm11
+ pxor xmm7, xmm0
+ add arg2, 16
+ sub arg3, 16
+ ; instead of a cmp instruction, we utilize the flags with the jge instruction
+ ; equivalent of: cmp arg3, 16-16
+ ; check if there is any more 16B in the buffer to be able to fold
+ jge _16B_reduction_loop
+
+ ;now we have 16+z bytes left to reduce, where 0<= z < 16.
+ ;first, we reduce the data in the xmm7 register
+
+
+_final_reduction_for_128:
+ ; check if any more data to fold. If not, compute the CRC of the final 128 bits
+ add arg3, 16
+ je _128_done
+
+ ; here we are getting data that is less than 16 bytes.
+ ; since we know that there was data before the pointer, we can offset the input pointer before the actual point, to receive exactly 16 bytes.
+ ; after that the registers need to be adjusted.
+_get_last_two_xmms:
+ movdqa xmm2, xmm7
+
+ movdqu xmm1, [arg2 - 16 + arg3]
+ pshufb xmm1, xmm11
+
+ ; get rid of the extra data that was loaded before
+ ; load the shift constant
+ lea rax, [pshufb_shf_table + 16]
+ sub rax, arg3
+ movdqu xmm0, [rax]
+
+ ; shift xmm2 to the left by arg3 bytes
+ pshufb xmm2, xmm0
+
+ ; shift xmm7 to the right by 16-arg3 bytes
+ pxor xmm0, [mask1]
+ pshufb xmm7, xmm0
+ pblendvb xmm1, xmm2 ;xmm0 is implicit
+
+ ; fold 16 Bytes
+ movdqa xmm2, xmm1
+ movdqa xmm8, xmm7
+ pclmulqdq xmm7, xmm10, 0x11
+ pclmulqdq xmm8, xmm10, 0x0
+ pxor xmm7, xmm8
+ pxor xmm7, xmm2
+
+_128_done:
+ ; compute crc of a 128-bit value
+ movdqa xmm10, [rk5] ; rk5 and rk6 in xmm10
+ movdqa xmm0, xmm7
+
+ ;64b fold
+ pclmulqdq xmm7, xmm10, 0x1
+ pslldq xmm0, 8
+ pxor xmm7, xmm0
+
+ ;32b fold
+ movdqa xmm0, xmm7
+
+ pand xmm0, [mask2]
+
+ psrldq xmm7, 12
+ pclmulqdq xmm7, xmm10, 0x10
+ pxor xmm7, xmm0
+
+ ;barrett reduction
+_barrett:
+ movdqa xmm10, [rk7] ; rk7 and rk8 in xmm10
+ movdqa xmm0, xmm7
+ pclmulqdq xmm7, xmm10, 0x01
+ pslldq xmm7, 4
+ pclmulqdq xmm7, xmm10, 0x11
+
+ pslldq xmm7, 4
+ pxor xmm7, xmm0
+ pextrd eax, xmm7,1
+
+_cleanup:
+ not eax
+%ifidn __OUTPUT_FORMAT__, win64
+ movdqa xmm6, [rsp + XMM_SAVE + 16*0]
+ movdqa xmm7, [rsp + XMM_SAVE + 16*1]
+ movdqa xmm8, [rsp + XMM_SAVE + 16*2]
+ movdqa xmm9, [rsp + XMM_SAVE + 16*3]
+ movdqa xmm10, [rsp + XMM_SAVE + 16*4]
+ movdqa xmm11, [rsp + XMM_SAVE + 16*5]
+ movdqa xmm12, [rsp + XMM_SAVE + 16*6]
+ movdqa xmm13, [rsp + XMM_SAVE + 16*7]
+%endif
+ add rsp,VARIABLE_OFFSET
+ ret
+
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+align 16
+_less_than_256:
+
+ ; check if there is enough buffer to be able to fold 16B at a time
+ cmp arg3, 32
+ jl _less_than_32
+ movdqa xmm11, [SHUF_MASK]
+
+ ; if there is, load the constants
+ movdqa xmm10, [rk1] ; rk1 and rk2 in xmm10
+
+ movd xmm0, arg1_low32 ; get the initial crc value
+ pslldq xmm0, 12 ; align it to its correct place
+ movdqu xmm7, [arg2] ; load the plaintext
+ pshufb xmm7, xmm11 ; byte-reflect the plaintext
+ pxor xmm7, xmm0
+
+
+ ; update the buffer pointer
+ add arg2, 16
+
+ ; update the counter. subtract 32 instead of 16 to save one instruction from the loop
+ sub arg3, 32
+
+ jmp _16B_reduction_loop
+
+
+align 16
+_less_than_32:
+ ; mov initial crc to the return value. this is necessary for zero-length buffers.
+ mov eax, arg1_low32
+ test arg3, arg3
+ je _cleanup
+
+ movdqa xmm11, [SHUF_MASK]
+
+ movd xmm0, arg1_low32 ; get the initial crc value
+ pslldq xmm0, 12 ; align it to its correct place
+
+ cmp arg3, 16
+ je _exact_16_left
+ jl _less_than_16_left
+
+ movdqu xmm7, [arg2] ; load the plaintext
+ pshufb xmm7, xmm11 ; byte-reflect the plaintext
+ pxor xmm7, xmm0 ; xor the initial crc value
+ add arg2, 16
+ sub arg3, 16
+ movdqa xmm10, [rk1] ; rk1 and rk2 in xmm10
+ jmp _get_last_two_xmms
+
+
+align 16
+_less_than_16_left:
+ ; use stack space to load data less than 16 bytes, zero-out the 16B in memory first.
+
+ pxor xmm1, xmm1
+ mov r11, rsp
+ movdqa [r11], xmm1
+
+ cmp arg3, 4
+ jl _only_less_than_4
+
+ ; backup the counter value
+ mov r9, arg3
+ cmp arg3, 8
+ jl _less_than_8_left
+
+ ; load 8 Bytes
+ mov rax, [arg2]
+ mov [r11], rax
+ add r11, 8
+ sub arg3, 8
+ add arg2, 8
+_less_than_8_left:
+
+ cmp arg3, 4
+ jl _less_than_4_left
+
+ ; load 4 Bytes
+ mov eax, [arg2]
+ mov [r11], eax
+ add r11, 4
+ sub arg3, 4
+ add arg2, 4
+_less_than_4_left:
+
+ cmp arg3, 2
+ jl _less_than_2_left
+
+ ; load 2 Bytes
+ mov ax, [arg2]
+ mov [r11], ax
+ add r11, 2
+ sub arg3, 2
+ add arg2, 2
+_less_than_2_left:
+ cmp arg3, 1
+ jl _zero_left
+
+ ; load 1 Byte
+ mov al, [arg2]
+ mov [r11], al
+_zero_left:
+ movdqa xmm7, [rsp]
+ pshufb xmm7, xmm11
+ pxor xmm7, xmm0 ; xor the initial crc value
+
+ ; shl r9, 4
+ lea rax, [pshufb_shf_table + 16]
+ sub rax, r9
+ movdqu xmm0, [rax]
+ pxor xmm0, [mask1]
+
+ pshufb xmm7, xmm0
+ jmp _128_done
+
+align 16
+_exact_16_left:
+ movdqu xmm7, [arg2]
+ pshufb xmm7, xmm11
+ pxor xmm7, xmm0 ; xor the initial crc value
+
+ jmp _128_done
+
+_only_less_than_4:
+ cmp arg3, 3
+ jl _only_less_than_3
+
+ ; load 3 Bytes
+ mov al, [arg2]
+ mov [r11], al
+
+ mov al, [arg2+1]
+ mov [r11+1], al
+
+ mov al, [arg2+2]
+ mov [r11+2], al
+
+ movdqa xmm7, [rsp]
+ pshufb xmm7, xmm11
+ pxor xmm7, xmm0 ; xor the initial crc value
+
+ psrldq xmm7, 5
+
+ jmp _barrett
+_only_less_than_3:
+ cmp arg3, 2
+ jl _only_less_than_2
+
+ ; load 2 Bytes
+ mov al, [arg2]
+ mov [r11], al
+
+ mov al, [arg2+1]
+ mov [r11+1], al
+
+ movdqa xmm7, [rsp]
+ pshufb xmm7, xmm11
+ pxor xmm7, xmm0 ; xor the initial crc value
+
+ psrldq xmm7, 6
+
+ jmp _barrett
+_only_less_than_2:
+
+ ; load 1 Byte
+ mov al, [arg2]
+ mov [r11], al
+
+ movdqa xmm7, [rsp]
+ pshufb xmm7, xmm11
+ pxor xmm7, xmm0 ; xor the initial crc value
+
+ psrldq xmm7, 7
+
+ jmp _barrett
+
+section .data
+
+; precomputed constants
+align 16
+
+rk1 :
+DQ 0xf200aa6600000000
+rk2 :
+DQ 0x17d3315d00000000
+rk3 :
+DQ 0x022ffca500000000
+rk4 :
+DQ 0x9d9ee22f00000000
+rk5 :
+DQ 0xf200aa6600000000
+rk6 :
+DQ 0x490d678d00000000
+rk7 :
+DQ 0x0000000104d101df
+rk8 :
+DQ 0x0000000104c11db7
+rk9 :
+DQ 0x6ac7e7d700000000
+rk10 :
+DQ 0xfcd922af00000000
+rk11 :
+DQ 0x34e45a6300000000
+rk12 :
+DQ 0x8762c1f600000000
+rk13 :
+DQ 0x5395a0ea00000000
+rk14 :
+DQ 0x54f2d5c700000000
+rk15 :
+DQ 0xd3504ec700000000
+rk16 :
+DQ 0x57a8445500000000
+rk17 :
+DQ 0xc053585d00000000
+rk18 :
+DQ 0x766f1b7800000000
+rk19 :
+DQ 0xcd8c54b500000000
+rk20 :
+DQ 0xab40b71e00000000
+
+
+
+
+
+
+
+
+
+mask1:
+dq 0x8080808080808080, 0x8080808080808080
+mask2:
+dq 0xFFFFFFFFFFFFFFFF, 0x00000000FFFFFFFF
+
+SHUF_MASK:
+dq 0x08090A0B0C0D0E0F, 0x0001020304050607
+
+pshufb_shf_table:
+; use these values for shift constants for the pshufb instruction
+; different alignments result in values as shown:
+; dq 0x8887868584838281, 0x008f8e8d8c8b8a89 ; shl 15 (16-1) / shr1
+; dq 0x8988878685848382, 0x01008f8e8d8c8b8a ; shl 14 (16-3) / shr2
+; dq 0x8a89888786858483, 0x0201008f8e8d8c8b ; shl 13 (16-4) / shr3
+; dq 0x8b8a898887868584, 0x030201008f8e8d8c ; shl 12 (16-4) / shr4
+; dq 0x8c8b8a8988878685, 0x04030201008f8e8d ; shl 11 (16-5) / shr5
+; dq 0x8d8c8b8a89888786, 0x0504030201008f8e ; shl 10 (16-6) / shr6
+; dq 0x8e8d8c8b8a898887, 0x060504030201008f ; shl 9 (16-7) / shr7
+; dq 0x8f8e8d8c8b8a8988, 0x0706050403020100 ; shl 8 (16-8) / shr8
+; dq 0x008f8e8d8c8b8a89, 0x0807060504030201 ; shl 7 (16-9) / shr9
+; dq 0x01008f8e8d8c8b8a, 0x0908070605040302 ; shl 6 (16-10) / shr10
+; dq 0x0201008f8e8d8c8b, 0x0a09080706050403 ; shl 5 (16-11) / shr11
+; dq 0x030201008f8e8d8c, 0x0b0a090807060504 ; shl 4 (16-12) / shr12
+; dq 0x04030201008f8e8d, 0x0c0b0a0908070605 ; shl 3 (16-13) / shr13
+; dq 0x0504030201008f8e, 0x0d0c0b0a09080706 ; shl 2 (16-14) / shr14
+; dq 0x060504030201008f, 0x0e0d0c0b0a090807 ; shl 1 (16-15) / shr15
+dq 0x8786858483828100, 0x8f8e8d8c8b8a8988
+dq 0x0706050403020100, 0x000e0d0c0b0a0908
+
+;;; func core, ver, snum
+slversion crc32_ieee_01, 01, 06, 0011
+
diff --git a/src/isa-l/crc/crc32_ieee_02.asm b/src/isa-l/crc/crc32_ieee_02.asm
new file mode 100644
index 000000000..95d53e8a3
--- /dev/null
+++ b/src/isa-l/crc/crc32_ieee_02.asm
@@ -0,0 +1,652 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright(c) 2011-2020 Intel Corporation All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in
+; the documentation and/or other materials provided with the
+; distribution.
+; * Neither the name of Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived
+; from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+; Function API:
+; UINT32 crc32_ieee_02(
+; UINT32 init_crc, //initial CRC value, 32 bits
+; const unsigned char *buf, //buffer pointer to calculate CRC on
+; UINT64 len //buffer length in bytes (64-bit data)
+; );
+;
+; Authors:
+; Erdinc Ozturk
+; Vinodh Gopal
+; James Guilford
+;
+; Reference paper titled "Fast CRC Computation for Generic Polynomials Using PCLMULQDQ Instruction"
+; URL: http://www.intel.com/content/dam/www/public/us/en/documents/white-papers/fast-crc-computation-generic-polynomials-pclmulqdq-paper.pdf
+
+%include "reg_sizes.asm"
+
+%define fetch_dist 1024
+[bits 64]
+default rel
+
+section .text
+
+%ifidn __OUTPUT_FORMAT__, win64
+ %xdefine arg1 rcx
+ %xdefine arg2 rdx
+ %xdefine arg3 r8
+
+ %xdefine arg1_low32 ecx
+%else
+ %xdefine arg1 rdi
+ %xdefine arg2 rsi
+ %xdefine arg3 rdx
+
+ %xdefine arg1_low32 edi
+%endif
+
+%define TMP 16*0
+%ifidn __OUTPUT_FORMAT__, win64
+ %define XMM_SAVE 16*2
+ %define VARIABLE_OFFSET 16*10+8
+%else
+ %define VARIABLE_OFFSET 16*2+8
+%endif
+align 16
+mk_global crc32_ieee_02, function
+crc32_ieee_02:
+ endbranch
+
+ not arg1_low32 ;~init_crc
+
+ sub rsp,VARIABLE_OFFSET
+
+%ifidn __OUTPUT_FORMAT__, win64
+ ; push the xmm registers into the stack to maintain
+ vmovdqa [rsp + XMM_SAVE + 16*0], xmm6
+ vmovdqa [rsp + XMM_SAVE + 16*1], xmm7
+ vmovdqa [rsp + XMM_SAVE + 16*2], xmm8
+ vmovdqa [rsp + XMM_SAVE + 16*3], xmm9
+ vmovdqa [rsp + XMM_SAVE + 16*4], xmm10
+ vmovdqa [rsp + XMM_SAVE + 16*5], xmm11
+ vmovdqa [rsp + XMM_SAVE + 16*6], xmm12
+ vmovdqa [rsp + XMM_SAVE + 16*7], xmm13
+%endif
+
+
+ ; check if smaller than 256
+ cmp arg3, 256
+
+ ; for sizes less than 256, we can't fold 128B at a time...
+ jl _less_than_256
+
+
+ ; load the initial crc value
+ vmovd xmm10, arg1_low32 ; initial crc
+
+ ; crc value does not need to be byte-reflected, but it needs to be moved to the high part of the register.
+ ; because data will be byte-reflected and will align with initial crc at correct place.
+ vpslldq xmm10, 12
+
+ vmovdqa xmm11, [SHUF_MASK]
+ ; receive the initial 128B data, xor the initial crc value
+ vmovdqu xmm0, [arg2+16*0]
+ vmovdqu xmm1, [arg2+16*1]
+ vmovdqu xmm2, [arg2+16*2]
+ vmovdqu xmm3, [arg2+16*3]
+ vmovdqu xmm4, [arg2+16*4]
+ vmovdqu xmm5, [arg2+16*5]
+ vmovdqu xmm6, [arg2+16*6]
+ vmovdqu xmm7, [arg2+16*7]
+
+ vpshufb xmm0, xmm11
+ ; XOR the initial_crc value
+ vpxor xmm0, xmm10
+ vpshufb xmm1, xmm11
+ vpshufb xmm2, xmm11
+ vpshufb xmm3, xmm11
+ vpshufb xmm4, xmm11
+ vpshufb xmm5, xmm11
+ vpshufb xmm6, xmm11
+ vpshufb xmm7, xmm11
+
+ vmovdqa xmm10, [rk3] ;xmm10 has rk3 and rk4
+ ;imm value of pclmulqdq instruction will determine which constant to use
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+ ; we subtract 256 instead of 128 to save one instruction from the loop
+ sub arg3, 256
+
+ ; at this section of the code, there is 128*x+y (0<=y<128) bytes of buffer. The _fold_128_B_loop
+ ; loop will fold 128B at a time until we have 128+y Bytes of buffer
+
+
+ ; fold 128B at a time. This section of the code folds 8 xmm registers in parallel
+_fold_128_B_loop:
+
+ ; update the buffer pointer
+ add arg2, 128 ; buf += 128;
+
+ prefetchnta [arg2+fetch_dist+0]
+ vmovdqu xmm9, [arg2+16*0]
+ vmovdqu xmm12, [arg2+16*1]
+ vpshufb xmm9, xmm11
+ vpshufb xmm12, xmm11
+ vmovdqa xmm8, xmm0
+ vmovdqa xmm13, xmm1
+ vpclmulqdq xmm0, xmm10, 0x0
+ vpclmulqdq xmm8, xmm10 , 0x11
+ vpclmulqdq xmm1, xmm10, 0x0
+ vpclmulqdq xmm13, xmm10 , 0x11
+ vpxor xmm0, xmm9
+ vxorps xmm0, xmm8
+ vpxor xmm1, xmm12
+ vxorps xmm1, xmm13
+
+ prefetchnta [arg2+fetch_dist+32]
+ vmovdqu xmm9, [arg2+16*2]
+ vmovdqu xmm12, [arg2+16*3]
+ vpshufb xmm9, xmm11
+ vpshufb xmm12, xmm11
+ vmovdqa xmm8, xmm2
+ vmovdqa xmm13, xmm3
+ vpclmulqdq xmm2, xmm10, 0x0
+ vpclmulqdq xmm8, xmm10 , 0x11
+ vpclmulqdq xmm3, xmm10, 0x0
+ vpclmulqdq xmm13, xmm10 , 0x11
+ vpxor xmm2, xmm9
+ vxorps xmm2, xmm8
+ vpxor xmm3, xmm12
+ vxorps xmm3, xmm13
+
+ prefetchnta [arg2+fetch_dist+64]
+ vmovdqu xmm9, [arg2+16*4]
+ vmovdqu xmm12, [arg2+16*5]
+ vpshufb xmm9, xmm11
+ vpshufb xmm12, xmm11
+ vmovdqa xmm8, xmm4
+ vmovdqa xmm13, xmm5
+ vpclmulqdq xmm4, xmm10, 0x0
+ vpclmulqdq xmm8, xmm10 , 0x11
+ vpclmulqdq xmm5, xmm10, 0x0
+ vpclmulqdq xmm13, xmm10 , 0x11
+ vpxor xmm4, xmm9
+ vxorps xmm4, xmm8
+ vpxor xmm5, xmm12
+ vxorps xmm5, xmm13
+
+ prefetchnta [arg2+fetch_dist+96]
+ vmovdqu xmm9, [arg2+16*6]
+ vmovdqu xmm12, [arg2+16*7]
+ vpshufb xmm9, xmm11
+ vpshufb xmm12, xmm11
+ vmovdqa xmm8, xmm6
+ vmovdqa xmm13, xmm7
+ vpclmulqdq xmm6, xmm10, 0x0
+ vpclmulqdq xmm8, xmm10 , 0x11
+ vpclmulqdq xmm7, xmm10, 0x0
+ vpclmulqdq xmm13, xmm10 , 0x11
+ vpxor xmm6, xmm9
+ vxorps xmm6, xmm8
+ vpxor xmm7, xmm12
+ vxorps xmm7, xmm13
+
+ sub arg3, 128
+
+ ; check if there is another 128B in the buffer to be able to fold
+ jge _fold_128_B_loop
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+
+ add arg2, 128
+ ; at this point, the buffer pointer is pointing at the last y Bytes of the buffer
+ ; the 128 of folded data is in 4 of the xmm registers: xmm0, xmm1, xmm2, xmm3
+
+
+ ; fold the 8 xmm registers to 1 xmm register with different constants
+
+ vmovdqa xmm10, [rk9]
+ vmovdqa xmm8, xmm0
+ vpclmulqdq xmm0, xmm10, 0x11
+ vpclmulqdq xmm8, xmm10, 0x0
+ vpxor xmm7, xmm8
+ vxorps xmm7, xmm0
+
+ vmovdqa xmm10, [rk11]
+ vmovdqa xmm8, xmm1
+ vpclmulqdq xmm1, xmm10, 0x11
+ vpclmulqdq xmm8, xmm10, 0x0
+ vpxor xmm7, xmm8
+ vxorps xmm7, xmm1
+
+ vmovdqa xmm10, [rk13]
+ vmovdqa xmm8, xmm2
+ vpclmulqdq xmm2, xmm10, 0x11
+ vpclmulqdq xmm8, xmm10, 0x0
+ vpxor xmm7, xmm8
+ vpxor xmm7, xmm2
+
+ vmovdqa xmm10, [rk15]
+ vmovdqa xmm8, xmm3
+ vpclmulqdq xmm3, xmm10, 0x11
+ vpclmulqdq xmm8, xmm10, 0x0
+ vpxor xmm7, xmm8
+ vxorps xmm7, xmm3
+
+ vmovdqa xmm10, [rk17]
+ vmovdqa xmm8, xmm4
+ vpclmulqdq xmm4, xmm10, 0x11
+ vpclmulqdq xmm8, xmm10, 0x0
+ vpxor xmm7, xmm8
+ vpxor xmm7, xmm4
+
+ vmovdqa xmm10, [rk19]
+ vmovdqa xmm8, xmm5
+ vpclmulqdq xmm5, xmm10, 0x11
+ vpclmulqdq xmm8, xmm10, 0x0
+ vpxor xmm7, xmm8
+ vxorps xmm7, xmm5
+
+ vmovdqa xmm10, [rk1] ;xmm10 has rk1 and rk2
+ ;imm value of pclmulqdq instruction will determine which constant to use
+ vmovdqa xmm8, xmm6
+ vpclmulqdq xmm6, xmm10, 0x11
+ vpclmulqdq xmm8, xmm10, 0x0
+ vpxor xmm7, xmm8
+ vpxor xmm7, xmm6
+
+
+ ; instead of 128, we add 112 to the loop counter to save 1 instruction from the loop
+ ; instead of a cmp instruction, we use the negative flag with the jl instruction
+ add arg3, 128-16
+ jl _final_reduction_for_128
+
+ ; now we have 16+y bytes left to reduce. 16 Bytes is in register xmm7 and the rest is in memory
+ ; we can fold 16 bytes at a time if y>=16
+ ; continue folding 16B at a time
+
+_16B_reduction_loop:
+ vmovdqa xmm8, xmm7
+ vpclmulqdq xmm7, xmm10, 0x11
+ vpclmulqdq xmm8, xmm10, 0x0
+ vpxor xmm7, xmm8
+ vmovdqu xmm0, [arg2]
+ vpshufb xmm0, xmm11
+ vpxor xmm7, xmm0
+ add arg2, 16
+ sub arg3, 16
+ ; instead of a cmp instruction, we utilize the flags with the jge instruction
+ ; equivalent of: cmp arg3, 16-16
+ ; check if there is any more 16B in the buffer to be able to fold
+ jge _16B_reduction_loop
+
+ ;now we have 16+z bytes left to reduce, where 0<= z < 16.
+ ;first, we reduce the data in the xmm7 register
+
+
+_final_reduction_for_128:
+ ; check if any more data to fold. If not, compute the CRC of the final 128 bits
+ add arg3, 16
+ je _128_done
+
+ ; here we are getting data that is less than 16 bytes.
+ ; since we know that there was data before the pointer, we can offset the input pointer before the actual point, to receive exactly 16 bytes.
+ ; after that the registers need to be adjusted.
+_get_last_two_xmms:
+ vmovdqa xmm2, xmm7
+
+ vmovdqu xmm1, [arg2 - 16 + arg3]
+ vpshufb xmm1, xmm11
+
+ ; get rid of the extra data that was loaded before
+ ; load the shift constant
+ lea rax, [pshufb_shf_table + 16]
+ sub rax, arg3
+ vmovdqu xmm0, [rax]
+
+ ; shift xmm2 to the left by arg3 bytes
+ vpshufb xmm2, xmm0
+
+ ; shift xmm7 to the right by 16-arg3 bytes
+ vpxor xmm0, [mask1]
+ vpshufb xmm7, xmm0
+ vpblendvb xmm1, xmm1, xmm2, xmm0
+
+ ; fold 16 Bytes
+ vmovdqa xmm2, xmm1
+ vmovdqa xmm8, xmm7
+ vpclmulqdq xmm7, xmm10, 0x11
+ vpclmulqdq xmm8, xmm10, 0x0
+ vpxor xmm7, xmm8
+ vpxor xmm7, xmm2
+
+_128_done:
+ ; compute crc of a 128-bit value
+ vmovdqa xmm10, [rk5] ; rk5 and rk6 in xmm10
+ vmovdqa xmm0, xmm7
+
+ ;64b fold
+ vpclmulqdq xmm7, xmm10, 0x1
+ vpslldq xmm0, 8
+ vpxor xmm7, xmm0
+
+ ;32b fold
+ vmovdqa xmm0, xmm7
+
+ vpand xmm0, [mask2]
+
+ vpsrldq xmm7, 12
+ vpclmulqdq xmm7, xmm10, 0x10
+ vpxor xmm7, xmm0
+
+ ;barrett reduction
+_barrett:
+ vmovdqa xmm10, [rk7] ; rk7 and rk8 in xmm10
+ vmovdqa xmm0, xmm7
+ vpclmulqdq xmm7, xmm10, 0x01
+ vpslldq xmm7, 4
+ vpclmulqdq xmm7, xmm10, 0x11
+
+ vpslldq xmm7, 4
+ vpxor xmm7, xmm0
+ vpextrd eax, xmm7,1
+
+_cleanup:
+ not eax
+%ifidn __OUTPUT_FORMAT__, win64
+ vmovdqa xmm6, [rsp + XMM_SAVE + 16*0]
+ vmovdqa xmm7, [rsp + XMM_SAVE + 16*1]
+ vmovdqa xmm8, [rsp + XMM_SAVE + 16*2]
+ vmovdqa xmm9, [rsp + XMM_SAVE + 16*3]
+ vmovdqa xmm10, [rsp + XMM_SAVE + 16*4]
+ vmovdqa xmm11, [rsp + XMM_SAVE + 16*5]
+ vmovdqa xmm12, [rsp + XMM_SAVE + 16*6]
+ vmovdqa xmm13, [rsp + XMM_SAVE + 16*7]
+%endif
+ add rsp,VARIABLE_OFFSET
+ ret
+
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+align 16
+_less_than_256:
+
+ ; check if there is enough buffer to be able to fold 16B at a time
+ cmp arg3, 32
+ jl _less_than_32
+ vmovdqa xmm11, [SHUF_MASK]
+
+ ; if there is, load the constants
+ vmovdqa xmm10, [rk1] ; rk1 and rk2 in xmm10
+
+ vmovd xmm0, arg1_low32 ; get the initial crc value
+ vpslldq xmm0, 12 ; align it to its correct place
+ vmovdqu xmm7, [arg2] ; load the plaintext
+ vpshufb xmm7, xmm11 ; byte-reflect the plaintext
+ vpxor xmm7, xmm0
+
+
+ ; update the buffer pointer
+ add arg2, 16
+
+ ; update the counter. subtract 32 instead of 16 to save one instruction from the loop
+ sub arg3, 32
+
+ jmp _16B_reduction_loop
+
+
+align 16
+_less_than_32:
+ ; mov initial crc to the return value. this is necessary for zero-length buffers.
+ mov eax, arg1_low32
+ test arg3, arg3
+ je _cleanup
+
+ vmovdqa xmm11, [SHUF_MASK]
+
+ vmovd xmm0, arg1_low32 ; get the initial crc value
+ vpslldq xmm0, 12 ; align it to its correct place
+
+ cmp arg3, 16
+ je _exact_16_left
+ jl _less_than_16_left
+
+ vmovdqu xmm7, [arg2] ; load the plaintext
+ vpshufb xmm7, xmm11 ; byte-reflect the plaintext
+ vpxor xmm7, xmm0 ; xor the initial crc value
+ add arg2, 16
+ sub arg3, 16
+ vmovdqa xmm10, [rk1] ; rk1 and rk2 in xmm10
+ jmp _get_last_two_xmms
+
+
+align 16
+_less_than_16_left:
+ ; use stack space to load data less than 16 bytes, zero-out the 16B in memory first.
+
+ vpxor xmm1, xmm1
+ mov r11, rsp
+ vmovdqa [r11], xmm1
+
+ cmp arg3, 4
+ jl _only_less_than_4
+
+ ; backup the counter value
+ mov r9, arg3
+ cmp arg3, 8
+ jl _less_than_8_left
+
+ ; load 8 Bytes
+ mov rax, [arg2]
+ mov [r11], rax
+ add r11, 8
+ sub arg3, 8
+ add arg2, 8
+_less_than_8_left:
+
+ cmp arg3, 4
+ jl _less_than_4_left
+
+ ; load 4 Bytes
+ mov eax, [arg2]
+ mov [r11], eax
+ add r11, 4
+ sub arg3, 4
+ add arg2, 4
+_less_than_4_left:
+
+ cmp arg3, 2
+ jl _less_than_2_left
+
+ ; load 2 Bytes
+ mov ax, [arg2]
+ mov [r11], ax
+ add r11, 2
+ sub arg3, 2
+ add arg2, 2
+_less_than_2_left:
+ cmp arg3, 1
+ jl _zero_left
+
+ ; load 1 Byte
+ mov al, [arg2]
+ mov [r11], al
+_zero_left:
+ vmovdqa xmm7, [rsp]
+ vpshufb xmm7, xmm11
+ vpxor xmm7, xmm0 ; xor the initial crc value
+
+ ; shl r9, 4
+ lea rax, [pshufb_shf_table + 16]
+ sub rax, r9
+ vmovdqu xmm0, [rax]
+ vpxor xmm0, [mask1]
+
+ vpshufb xmm7, xmm0
+ jmp _128_done
+
+align 16
+_exact_16_left:
+ vmovdqu xmm7, [arg2]
+ vpshufb xmm7, xmm11
+ vpxor xmm7, xmm0 ; xor the initial crc value
+
+ jmp _128_done
+
+_only_less_than_4:
+ cmp arg3, 3
+ jl _only_less_than_3
+
+ ; load 3 Bytes
+ mov al, [arg2]
+ mov [r11], al
+
+ mov al, [arg2+1]
+ mov [r11+1], al
+
+ mov al, [arg2+2]
+ mov [r11+2], al
+
+ vmovdqa xmm7, [rsp]
+ vpshufb xmm7, xmm11
+ vpxor xmm7, xmm0 ; xor the initial crc value
+
+ vpsrldq xmm7, 5
+
+ jmp _barrett
+_only_less_than_3:
+ cmp arg3, 2
+ jl _only_less_than_2
+
+ ; load 2 Bytes
+ mov al, [arg2]
+ mov [r11], al
+
+ mov al, [arg2+1]
+ mov [r11+1], al
+
+ vmovdqa xmm7, [rsp]
+ vpshufb xmm7, xmm11
+ vpxor xmm7, xmm0 ; xor the initial crc value
+
+ vpsrldq xmm7, 6
+
+ jmp _barrett
+_only_less_than_2:
+
+ ; load 1 Byte
+ mov al, [arg2]
+ mov [r11], al
+
+ vmovdqa xmm7, [rsp]
+ vpshufb xmm7, xmm11
+ vpxor xmm7, xmm0 ; xor the initial crc value
+
+ vpsrldq xmm7, 7
+
+ jmp _barrett
+
+section .data
+
+; precomputed constants
+align 16
+
+rk1 :
+DQ 0xf200aa6600000000
+rk2 :
+DQ 0x17d3315d00000000
+rk3 :
+DQ 0x022ffca500000000
+rk4 :
+DQ 0x9d9ee22f00000000
+rk5 :
+DQ 0xf200aa6600000000
+rk6 :
+DQ 0x490d678d00000000
+rk7 :
+DQ 0x0000000104d101df
+rk8 :
+DQ 0x0000000104c11db7
+rk9 :
+DQ 0x6ac7e7d700000000
+rk10 :
+DQ 0xfcd922af00000000
+rk11 :
+DQ 0x34e45a6300000000
+rk12 :
+DQ 0x8762c1f600000000
+rk13 :
+DQ 0x5395a0ea00000000
+rk14 :
+DQ 0x54f2d5c700000000
+rk15 :
+DQ 0xd3504ec700000000
+rk16 :
+DQ 0x57a8445500000000
+rk17 :
+DQ 0xc053585d00000000
+rk18 :
+DQ 0x766f1b7800000000
+rk19 :
+DQ 0xcd8c54b500000000
+rk20 :
+DQ 0xab40b71e00000000
+
+
+
+
+
+
+
+
+
+mask1:
+dq 0x8080808080808080, 0x8080808080808080
+mask2:
+dq 0xFFFFFFFFFFFFFFFF, 0x00000000FFFFFFFF
+
+SHUF_MASK:
+dq 0x08090A0B0C0D0E0F, 0x0001020304050607
+
+pshufb_shf_table:
+; use these values for shift constants for the pshufb instruction
+; different alignments result in values as shown:
+; dq 0x8887868584838281, 0x008f8e8d8c8b8a89 ; shl 15 (16-1) / shr1
+; dq 0x8988878685848382, 0x01008f8e8d8c8b8a ; shl 14 (16-3) / shr2
+; dq 0x8a89888786858483, 0x0201008f8e8d8c8b ; shl 13 (16-4) / shr3
+; dq 0x8b8a898887868584, 0x030201008f8e8d8c ; shl 12 (16-4) / shr4
+; dq 0x8c8b8a8988878685, 0x04030201008f8e8d ; shl 11 (16-5) / shr5
+; dq 0x8d8c8b8a89888786, 0x0504030201008f8e ; shl 10 (16-6) / shr6
+; dq 0x8e8d8c8b8a898887, 0x060504030201008f ; shl 9 (16-7) / shr7
+; dq 0x8f8e8d8c8b8a8988, 0x0706050403020100 ; shl 8 (16-8) / shr8
+; dq 0x008f8e8d8c8b8a89, 0x0807060504030201 ; shl 7 (16-9) / shr9
+; dq 0x01008f8e8d8c8b8a, 0x0908070605040302 ; shl 6 (16-10) / shr10
+; dq 0x0201008f8e8d8c8b, 0x0a09080706050403 ; shl 5 (16-11) / shr11
+; dq 0x030201008f8e8d8c, 0x0b0a090807060504 ; shl 4 (16-12) / shr12
+; dq 0x04030201008f8e8d, 0x0c0b0a0908070605 ; shl 3 (16-13) / shr13
+; dq 0x0504030201008f8e, 0x0d0c0b0a09080706 ; shl 2 (16-14) / shr14
+; dq 0x060504030201008f, 0x0e0d0c0b0a090807 ; shl 1 (16-15) / shr15
+dq 0x8786858483828100, 0x8f8e8d8c8b8a8988
+dq 0x0706050403020100, 0x000e0d0c0b0a0908
diff --git a/src/isa-l/crc/crc32_ieee_by16_10.asm b/src/isa-l/crc/crc32_ieee_by16_10.asm
new file mode 100644
index 000000000..5c3f52a93
--- /dev/null
+++ b/src/isa-l/crc/crc32_ieee_by16_10.asm
@@ -0,0 +1,585 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright(c) 2011-2020 Intel Corporation All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in
+; the documentation and/or other materials provided with the
+; distribution.
+; * Neither the name of Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived
+; from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Function API:
+; UINT32 crc32_gzip_refl_by16_10(
+; UINT32 init_crc, //initial CRC value, 32 bits
+; const unsigned char *buf, //buffer pointer to calculate CRC on
+; UINT64 len //buffer length in bytes (64-bit data)
+; );
+;
+; Authors:
+; Erdinc Ozturk
+; Vinodh Gopal
+; James Guilford
+;
+; Reference paper titled "Fast CRC Computation for Generic Polynomials Using PCLMULQDQ Instruction"
+; URL: http://www.intel.com/content/dam/www/public/us/en/documents/white-papers/fast-crc-computation-generic-polynomials-pclmulqdq-paper.pdf
+;
+;
+
+%include "reg_sizes.asm"
+
+%ifndef FUNCTION_NAME
+%define FUNCTION_NAME crc32_ieee_by16_10
+%endif
+
+%if (AS_FEATURE_LEVEL) >= 10
+
+[bits 64]
+default rel
+
+section .text
+
+
+%ifidn __OUTPUT_FORMAT__, win64
+ %xdefine arg1 rcx
+ %xdefine arg2 rdx
+ %xdefine arg3 r8
+
+ %xdefine arg1_low32 ecx
+%else
+ %xdefine arg1 rdi
+ %xdefine arg2 rsi
+ %xdefine arg3 rdx
+
+ %xdefine arg1_low32 edi
+%endif
+
+%define TMP 16*0
+%ifidn __OUTPUT_FORMAT__, win64
+ %define XMM_SAVE 16*2
+ %define VARIABLE_OFFSET 16*12+8
+%else
+ %define VARIABLE_OFFSET 16*2+8
+%endif
+
+align 16
+mk_global FUNCTION_NAME, function
+FUNCTION_NAME:
+ endbranch
+
+ not arg1_low32
+ sub rsp, VARIABLE_OFFSET
+
+%ifidn __OUTPUT_FORMAT__, win64
+ ; push the xmm registers into the stack to maintain
+ vmovdqa [rsp + XMM_SAVE + 16*0], xmm6
+ vmovdqa [rsp + XMM_SAVE + 16*1], xmm7
+ vmovdqa [rsp + XMM_SAVE + 16*2], xmm8
+ vmovdqa [rsp + XMM_SAVE + 16*3], xmm9
+ vmovdqa [rsp + XMM_SAVE + 16*4], xmm10
+ vmovdqa [rsp + XMM_SAVE + 16*5], xmm11
+ vmovdqa [rsp + XMM_SAVE + 16*6], xmm12
+ vmovdqa [rsp + XMM_SAVE + 16*7], xmm13
+ vmovdqa [rsp + XMM_SAVE + 16*8], xmm14
+ vmovdqa [rsp + XMM_SAVE + 16*9], xmm15
+%endif
+
+ vbroadcasti32x4 zmm18, [SHUF_MASK]
+ cmp arg3, 256
+ jl .less_than_256
+
+ ; load the initial crc value
+ vmovd xmm10, arg1_low32 ; initial crc
+
+ ; crc value does not need to be byte-reflected, but it needs to be moved to the high part of the register.
+ ; because data will be byte-reflected and will align with initial crc at correct place.
+ vpslldq xmm10, 12
+
+ ; receive the initial 64B data, xor the initial crc value
+ vmovdqu8 zmm0, [arg2+16*0]
+ vmovdqu8 zmm4, [arg2+16*4]
+ vpshufb zmm0, zmm0, zmm18
+ vpshufb zmm4, zmm4, zmm18
+ vpxorq zmm0, zmm10
+ vbroadcasti32x4 zmm10, [rk3] ;xmm10 has rk3 and rk4
+ ;imm value of pclmulqdq instruction will determine which constant to use
+
+ sub arg3, 256
+ cmp arg3, 256
+ jl .fold_128_B_loop
+
+ vmovdqu8 zmm7, [arg2+16*8]
+ vmovdqu8 zmm8, [arg2+16*12]
+ vpshufb zmm7, zmm7, zmm18
+ vpshufb zmm8, zmm8, zmm18
+ vbroadcasti32x4 zmm16, [rk_1] ;zmm16 has rk-1 and rk-2
+ sub arg3, 256
+
+.fold_256_B_loop:
+ add arg2, 256
+ vmovdqu8 zmm3, [arg2+16*0]
+ vpshufb zmm3, zmm3, zmm18
+ vpclmulqdq zmm1, zmm0, zmm16, 0x00
+ vpclmulqdq zmm2, zmm0, zmm16, 0x11
+ vpxorq zmm0, zmm1, zmm2
+ vpxorq zmm0, zmm0, zmm3
+
+ vmovdqu8 zmm9, [arg2+16*4]
+ vpshufb zmm9, zmm9, zmm18
+ vpclmulqdq zmm5, zmm4, zmm16, 0x00
+ vpclmulqdq zmm6, zmm4, zmm16, 0x11
+ vpxorq zmm4, zmm5, zmm6
+ vpxorq zmm4, zmm4, zmm9
+
+ vmovdqu8 zmm11, [arg2+16*8]
+ vpshufb zmm11, zmm11, zmm18
+ vpclmulqdq zmm12, zmm7, zmm16, 0x00
+ vpclmulqdq zmm13, zmm7, zmm16, 0x11
+ vpxorq zmm7, zmm12, zmm13
+ vpxorq zmm7, zmm7, zmm11
+
+ vmovdqu8 zmm17, [arg2+16*12]
+ vpshufb zmm17, zmm17, zmm18
+ vpclmulqdq zmm14, zmm8, zmm16, 0x00
+ vpclmulqdq zmm15, zmm8, zmm16, 0x11
+ vpxorq zmm8, zmm14, zmm15
+ vpxorq zmm8, zmm8, zmm17
+
+ sub arg3, 256
+ jge .fold_256_B_loop
+
+ ;; Fold 256 into 128
+ add arg2, 256
+ vpclmulqdq zmm1, zmm0, zmm10, 0x00
+ vpclmulqdq zmm2, zmm0, zmm10, 0x11
+ vpternlogq zmm7, zmm1, zmm2, 0x96 ; xor ABC
+
+ vpclmulqdq zmm5, zmm4, zmm10, 0x00
+ vpclmulqdq zmm6, zmm4, zmm10, 0x11
+ vpternlogq zmm8, zmm5, zmm6, 0x96 ; xor ABC
+
+ vmovdqa32 zmm0, zmm7
+ vmovdqa32 zmm4, zmm8
+
+ add arg3, 128
+ jmp .fold_128_B_register
+
+
+
+ ; at this section of the code, there is 128*x+y (0<=y<128) bytes of buffer. The fold_128_B_loop
+ ; loop will fold 128B at a time until we have 128+y Bytes of buffer
+
+ ; fold 128B at a time. This section of the code folds 8 xmm registers in parallel
+.fold_128_B_loop:
+ add arg2, 128
+ vmovdqu8 zmm8, [arg2+16*0]
+ vpshufb zmm8, zmm8, zmm18
+ vpclmulqdq zmm2, zmm0, zmm10, 0x00
+ vpclmulqdq zmm1, zmm0, zmm10, 0x11
+ vpxorq zmm0, zmm2, zmm1
+ vpxorq zmm0, zmm0, zmm8
+
+ vmovdqu8 zmm9, [arg2+16*4]
+ vpshufb zmm9, zmm9, zmm18
+ vpclmulqdq zmm5, zmm4, zmm10, 0x00
+ vpclmulqdq zmm6, zmm4, zmm10, 0x11
+ vpxorq zmm4, zmm5, zmm6
+ vpxorq zmm4, zmm4, zmm9
+
+ sub arg3, 128
+ jge .fold_128_B_loop
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+ add arg2, 128
+ ; at this point, the buffer pointer is pointing at the last y Bytes of the buffer, where 0 <= y < 128
+ ; the 128B of folded data is in 8 of the xmm registers: xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7
+
+.fold_128_B_register:
+ ; fold the 8 128b parts into 1 xmm register with different constants
+ vmovdqu8 zmm16, [rk9] ; multiply by rk9-rk16
+ vmovdqu8 zmm11, [rk17] ; multiply by rk17-rk20, rk1,rk2, 0,0
+ vpclmulqdq zmm1, zmm0, zmm16, 0x00
+ vpclmulqdq zmm2, zmm0, zmm16, 0x11
+ vextracti64x2 xmm7, zmm4, 3 ; save last that has no multiplicand
+
+ vpclmulqdq zmm5, zmm4, zmm11, 0x00
+ vpclmulqdq zmm6, zmm4, zmm11, 0x11
+ vmovdqa xmm10, [rk1] ; Needed later in reduction loop
+ vpternlogq zmm1, zmm2, zmm5, 0x96 ; xor ABC
+ vpternlogq zmm1, zmm6, zmm7, 0x96 ; xor ABC
+
+ vshufi64x2 zmm8, zmm1, zmm1, 0x4e ; Swap 1,0,3,2 - 01 00 11 10
+ vpxorq ymm8, ymm8, ymm1
+ vextracti64x2 xmm5, ymm8, 1
+ vpxorq xmm7, xmm5, xmm8
+
+ ; instead of 128, we add 128-16 to the loop counter to save 1 instruction from the loop
+ ; instead of a cmp instruction, we use the negative flag with the jl instruction
+ add arg3, 128-16
+ jl .final_reduction_for_128
+
+ ; now we have 16+y bytes left to reduce. 16 Bytes is in register xmm7 and the rest is in memory
+ ; we can fold 16 bytes at a time if y>=16
+ ; continue folding 16B at a time
+
+.16B_reduction_loop:
+ vpclmulqdq xmm8, xmm7, xmm10, 0x11
+ vpclmulqdq xmm7, xmm7, xmm10, 0x00
+ vpxor xmm7, xmm8
+ vmovdqu xmm0, [arg2]
+ vpshufb xmm0, xmm0, xmm18
+ vpxor xmm7, xmm0
+ add arg2, 16
+ sub arg3, 16
+ ; instead of a cmp instruction, we utilize the flags with the jge instruction
+ ; equivalent of: cmp arg3, 16-16
+ ; check if there is any more 16B in the buffer to be able to fold
+ jge .16B_reduction_loop
+
+ ;now we have 16+z bytes left to reduce, where 0<= z < 16.
+ ;first, we reduce the data in the xmm7 register
+
+
+.final_reduction_for_128:
+ add arg3, 16
+ je .128_done
+
+ ; here we are getting data that is less than 16 bytes.
+ ; since we know that there was data before the pointer, we can offset
+ ; the input pointer before the actual point, to receive exactly 16 bytes.
+ ; after that the registers need to be adjusted.
+.get_last_two_xmms:
+
+ vmovdqa xmm2, xmm7
+ vmovdqu xmm1, [arg2 - 16 + arg3]
+ vpshufb xmm1, xmm18
+
+ ; get rid of the extra data that was loaded before
+ ; load the shift constant
+ lea rax, [pshufb_shf_table + 16]
+ sub rax, arg3
+ vmovdqu xmm0, [rax]
+
+ vpshufb xmm2, xmm0
+ vpxor xmm0, [mask1]
+ vpshufb xmm7, xmm0
+ vpblendvb xmm1, xmm1, xmm2, xmm0
+
+ vpclmulqdq xmm8, xmm7, xmm10, 0x11
+ vpclmulqdq xmm7, xmm7, xmm10, 0x00
+ vpxor xmm7, xmm8
+ vpxor xmm7, xmm1
+
+.128_done:
+ ; compute crc of a 128-bit value
+ vmovdqa xmm10, [rk5]
+ vmovdqa xmm0, xmm7
+
+ ;64b fold
+ vpclmulqdq xmm7, xmm10, 0x01 ; H*L
+ vpslldq xmm0, 8
+ vpxor xmm7, xmm0
+
+ ;32b fold
+ vmovdqa xmm0, xmm7
+ vpand xmm0, [mask2]
+ vpsrldq xmm7, 12
+ vpclmulqdq xmm7, xmm10, 0x10
+ vpxor xmm7, xmm0
+
+ ;barrett reduction
+.barrett:
+ vmovdqa xmm10, [rk7] ; rk7 and rk8 in xmm10
+ vmovdqa xmm0, xmm7
+ vpclmulqdq xmm7, xmm10, 0x01
+ vpslldq xmm7, 4
+ vpclmulqdq xmm7, xmm10, 0x11
+
+ vpslldq xmm7, 4
+ vpxor xmm7, xmm0
+ vpextrd eax, xmm7, 1
+
+.cleanup:
+ not eax
+
+
+%ifidn __OUTPUT_FORMAT__, win64
+ vmovdqa xmm6, [rsp + XMM_SAVE + 16*0]
+ vmovdqa xmm7, [rsp + XMM_SAVE + 16*1]
+ vmovdqa xmm8, [rsp + XMM_SAVE + 16*2]
+ vmovdqa xmm9, [rsp + XMM_SAVE + 16*3]
+ vmovdqa xmm10, [rsp + XMM_SAVE + 16*4]
+ vmovdqa xmm11, [rsp + XMM_SAVE + 16*5]
+ vmovdqa xmm12, [rsp + XMM_SAVE + 16*6]
+ vmovdqa xmm13, [rsp + XMM_SAVE + 16*7]
+ vmovdqa xmm14, [rsp + XMM_SAVE + 16*8]
+ vmovdqa xmm15, [rsp + XMM_SAVE + 16*9]
+%endif
+ add rsp, VARIABLE_OFFSET
+ ret
+
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+align 16
+.less_than_256:
+
+ ; check if there is enough buffer to be able to fold 16B at a time
+ cmp arg3, 32
+ jl .less_than_32
+
+ ; if there is, load the constants
+ vmovdqa xmm10, [rk1] ; rk1 and rk2 in xmm10
+
+ vmovd xmm0, arg1_low32 ; get the initial crc value
+ vpslldq xmm0, 12 ; align it to its correct place
+ vmovdqu xmm7, [arg2] ; load the plaintext
+ vpshufb xmm7, xmm18 ; byte-reflect the plaintext
+ vpxor xmm7, xmm0
+
+ ; update the buffer pointer
+ add arg2, 16
+
+ ; update the counter. subtract 32 instead of 16 to save one instruction from the loop
+ sub arg3, 32
+
+ jmp .16B_reduction_loop
+
+
+align 16
+.less_than_32:
+ ; mov initial crc to the return value. this is necessary for zero-length buffers.
+ mov eax, arg1_low32
+ test arg3, arg3
+ je .cleanup
+
+ vmovd xmm0, arg1_low32 ; get the initial crc value
+ vpslldq xmm0, 12 ; align it to its correct place
+
+ cmp arg3, 16
+ je .exact_16_left
+ jl .less_than_16_left
+
+ vmovdqu xmm7, [arg2] ; load the plaintext
+ vpshufb xmm7, xmm18
+ vpxor xmm7, xmm0 ; xor the initial crc value
+ add arg2, 16
+ sub arg3, 16
+ vmovdqa xmm10, [rk1] ; rk1 and rk2 in xmm10
+ jmp .get_last_two_xmms
+
+align 16
+.less_than_16_left:
+ ; use stack space to load data less than 16 bytes, zero-out the 16B in memory first.
+
+ vpxor xmm1, xmm1
+ mov r11, rsp
+ vmovdqa [r11], xmm1
+
+ cmp arg3, 4
+ jl .only_less_than_4
+
+ ; backup the counter value
+ mov r9, arg3
+ cmp arg3, 8
+ jl .less_than_8_left
+
+ ; load 8 Bytes
+ mov rax, [arg2]
+ mov [r11], rax
+ add r11, 8
+ sub arg3, 8
+ add arg2, 8
+.less_than_8_left:
+
+ cmp arg3, 4
+ jl .less_than_4_left
+
+ ; load 4 Bytes
+ mov eax, [arg2]
+ mov [r11], eax
+ add r11, 4
+ sub arg3, 4
+ add arg2, 4
+.less_than_4_left:
+
+ cmp arg3, 2
+ jl .less_than_2_left
+
+ ; load 2 Bytes
+ mov ax, [arg2]
+ mov [r11], ax
+ add r11, 2
+ sub arg3, 2
+ add arg2, 2
+.less_than_2_left:
+ cmp arg3, 1
+ jl .zero_left
+
+ ; load 1 Byte
+ mov al, [arg2]
+ mov [r11], al
+
+.zero_left:
+ vmovdqa xmm7, [rsp]
+ vpshufb xmm7, xmm18
+ vpxor xmm7, xmm0 ; xor the initial crc value
+
+ lea rax, [pshufb_shf_table + 16]
+ sub rax, r9
+ vmovdqu xmm0, [rax]
+ vpxor xmm0, [mask1]
+
+ vpshufb xmm7,xmm0
+ jmp .128_done
+
+align 16
+.exact_16_left:
+ vmovdqu xmm7, [arg2]
+ vpshufb xmm7, xmm18
+ vpxor xmm7, xmm0 ; xor the initial crc value
+ jmp .128_done
+
+.only_less_than_4:
+ cmp arg3, 3
+ jl .only_less_than_3
+
+ ; load 3 Bytes
+ mov al, [arg2]
+ mov [r11], al
+
+ mov al, [arg2+1]
+ mov [r11+1], al
+
+ mov al, [arg2+2]
+ mov [r11+2], al
+
+ vmovdqa xmm7, [rsp]
+ vpshufb xmm7, xmm18
+ vpxor xmm7, xmm0 ; xor the initial crc value
+
+ vpsrldq xmm7, 5
+ jmp .barrett
+
+.only_less_than_3:
+ cmp arg3, 2
+ jl .only_less_than_2
+
+ ; load 2 Bytes
+ mov al, [arg2]
+ mov [r11], al
+
+ mov al, [arg2+1]
+ mov [r11+1], al
+
+ vmovdqa xmm7, [rsp]
+ vpshufb xmm7, xmm18
+ vpxor xmm7, xmm0 ; xor the initial crc value
+
+ vpsrldq xmm7, 6
+ jmp .barrett
+
+.only_less_than_2:
+ ; load 1 Byte
+ mov al, [arg2]
+ mov [r11], al
+
+ vmovdqa xmm7, [rsp]
+ vpshufb xmm7, xmm18
+ vpxor xmm7, xmm0 ; xor the initial crc value
+
+ vpsrldq xmm7, 7
+ jmp .barrett
+
+section .data
+align 32
+
+%ifndef USE_CONSTS
+; precomputed constants
+rk_1: dq 0x1851689900000000
+rk_2: dq 0xa3dc855100000000
+rk1: dq 0xf200aa6600000000
+rk2: dq 0x17d3315d00000000
+rk3: dq 0x022ffca500000000
+rk4: dq 0x9d9ee22f00000000
+rk5: dq 0xf200aa6600000000
+rk6: dq 0x490d678d00000000
+rk7: dq 0x0000000104d101df
+rk8: dq 0x0000000104c11db7
+rk9: dq 0x6ac7e7d700000000
+rk10: dq 0xfcd922af00000000
+rk11: dq 0x34e45a6300000000
+rk12: dq 0x8762c1f600000000
+rk13: dq 0x5395a0ea00000000
+rk14: dq 0x54f2d5c700000000
+rk15: dq 0xd3504ec700000000
+rk16: dq 0x57a8445500000000
+rk17: dq 0xc053585d00000000
+rk18: dq 0x766f1b7800000000
+rk19: dq 0xcd8c54b500000000
+rk20: dq 0xab40b71e00000000
+
+rk_1b: dq 0xf200aa6600000000
+rk_2b: dq 0x17d3315d00000000
+ dq 0x0000000000000000
+ dq 0x0000000000000000
+%else
+INCLUDE_CONSTS
+%endif
+
+mask1: dq 0x8080808080808080, 0x8080808080808080
+mask2: dq 0xFFFFFFFFFFFFFFFF, 0x00000000FFFFFFFF
+
+SHUF_MASK: dq 0x08090A0B0C0D0E0F, 0x0001020304050607
+
+pshufb_shf_table:
+; use these values for shift constants for the pshufb instruction
+; different alignments result in values as shown:
+; dq 0x8887868584838281, 0x008f8e8d8c8b8a89 ; shl 15 (16-1) / shr1
+; dq 0x8988878685848382, 0x01008f8e8d8c8b8a ; shl 14 (16-3) / shr2
+; dq 0x8a89888786858483, 0x0201008f8e8d8c8b ; shl 13 (16-4) / shr3
+; dq 0x8b8a898887868584, 0x030201008f8e8d8c ; shl 12 (16-4) / shr4
+; dq 0x8c8b8a8988878685, 0x04030201008f8e8d ; shl 11 (16-5) / shr5
+; dq 0x8d8c8b8a89888786, 0x0504030201008f8e ; shl 10 (16-6) / shr6
+; dq 0x8e8d8c8b8a898887, 0x060504030201008f ; shl 9 (16-7) / shr7
+; dq 0x8f8e8d8c8b8a8988, 0x0706050403020100 ; shl 8 (16-8) / shr8
+; dq 0x008f8e8d8c8b8a89, 0x0807060504030201 ; shl 7 (16-9) / shr9
+; dq 0x01008f8e8d8c8b8a, 0x0908070605040302 ; shl 6 (16-10) / shr10
+; dq 0x0201008f8e8d8c8b, 0x0a09080706050403 ; shl 5 (16-11) / shr11
+; dq 0x030201008f8e8d8c, 0x0b0a090807060504 ; shl 4 (16-12) / shr12
+; dq 0x04030201008f8e8d, 0x0c0b0a0908070605 ; shl 3 (16-13) / shr13
+; dq 0x0504030201008f8e, 0x0d0c0b0a09080706 ; shl 2 (16-14) / shr14
+; dq 0x060504030201008f, 0x0e0d0c0b0a090807 ; shl 1 (16-15) / shr15
+dq 0x8786858483828100, 0x8f8e8d8c8b8a8988
+dq 0x0706050403020100, 0x000e0d0c0b0a0908
+dq 0x8080808080808080, 0x0f0e0d0c0b0a0908
+dq 0x8080808080808080, 0x8080808080808080
+
+%else ; Assembler doesn't understand these opcodes. Add empty symbol for windows.
+%ifidn __OUTPUT_FORMAT__, win64
+global no_ %+ FUNCTION_NAME
+no_ %+ FUNCTION_NAME %+ :
+%endif
+%endif ; (AS_FEATURE_LEVEL) >= 10
diff --git a/src/isa-l/crc/crc32_ieee_by4.asm b/src/isa-l/crc/crc32_ieee_by4.asm
new file mode 100644
index 000000000..f43264095
--- /dev/null
+++ b/src/isa-l/crc/crc32_ieee_by4.asm
@@ -0,0 +1,566 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright(c) 2011-2015 Intel Corporation All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in
+; the documentation and/or other materials provided with the
+; distribution.
+; * Neither the name of Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived
+; from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;
+; Function API:
+; UINT32 crc32_ieee_by4(
+; UINT32 init_crc, //initial CRC value, 32 bits
+; const unsigned char *buf, //buffer pointer to calculate CRC on
+; UINT64 len //buffer length in bytes (64-bit data)
+; );
+;
+; Authors:
+; Erdinc Ozturk
+; Vinodh Gopal
+; James Guilford
+;
+; Reference paper titled "Fast CRC Computation for Generic Polynomials Using PCLMULQDQ Instruction"
+; URL: http://download.intel.com/design/intarch/papers/323102.pdf
+;
+
+%include "reg_sizes.asm"
+
+%define fetch_dist 1024
+
+[bits 64]
+default rel
+
+section .text
+
+%ifidn __OUTPUT_FORMAT__, win64
+ %xdefine arg1 rcx
+ %xdefine arg2 rdx
+ %xdefine arg3 r8
+
+ %xdefine arg1_low32 ecx
+%else
+ %xdefine arg1 rdi
+ %xdefine arg2 rsi
+ %xdefine arg3 rdx
+
+ %xdefine arg1_low32 edi
+%endif
+
+%ifidn __OUTPUT_FORMAT__, win64
+ %define XMM_SAVE 16*2
+ %define VARIABLE_OFFSET 16*4+8
+%else
+ %define VARIABLE_OFFSET 16*2+8
+%endif
+
+align 16
+mk_global crc32_ieee_by4, function
+crc32_ieee_by4:
+ endbranch
+
+ not arg1_low32
+
+ sub rsp,VARIABLE_OFFSET
+
+%ifidn __OUTPUT_FORMAT__, win64
+ ; push the xmm registers into the stack to maintain
+ movdqa [rsp + XMM_SAVE + 16*0],xmm6
+ movdqa [rsp + XMM_SAVE + 16*1],xmm7
+%endif
+
+ ; check if smaller than 128B
+ cmp arg3, 128
+ jl _less_than_128
+
+
+
+ ; load the initial crc value
+ movd xmm6, arg1_low32 ; initial crc
+ ; crc value does not need to be byte-reflected, but it needs to be
+ ; moved to the high part of the register.
+ ; because data will be byte-reflected and will align with initial
+ ; crc at correct place.
+ pslldq xmm6, 12
+
+
+
+ movdqa xmm7, [SHUF_MASK]
+ ; receive the initial 64B data, xor the initial crc value
+ movdqu xmm0, [arg2]
+ movdqu xmm1, [arg2+16]
+ movdqu xmm2, [arg2+32]
+ movdqu xmm3, [arg2+48]
+
+
+
+ pshufb xmm0, xmm7
+ ; XOR the initial_crc value
+ pxor xmm0, xmm6
+ pshufb xmm1, xmm7
+ pshufb xmm2, xmm7
+ pshufb xmm3, xmm7
+
+ movdqa xmm6, [rk3] ; k3=2^480 mod POLY << 32
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+ ;we subtract 128 instead of 64 to save one instruction from the loop
+ sub arg3, 128
+
+ ; at this section of the code, there is 64*x+y (0<=y<64) bytes of
+ ; buffer. The _fold_64_B_loop loop will fold 64B at a time until we
+ ; have 64+y Bytes of buffer
+
+
+ ; fold 64B at a time. This section of the code folds 4 xmm registers in parallel
+_fold_64_B_loop:
+
+ ;update the buffer pointer
+ add arg2, 64
+
+ prefetchnta [arg2+fetch_dist+0]
+ movdqa xmm4, xmm0
+ movdqa xmm5, xmm1
+
+ pclmulqdq xmm0, xmm6 , 0x11
+ pclmulqdq xmm1, xmm6 , 0x11
+
+ pclmulqdq xmm4, xmm6, 0x0
+ pclmulqdq xmm5, xmm6, 0x0
+
+ pxor xmm0, xmm4
+ pxor xmm1, xmm5
+
+ prefetchnta [arg2+fetch_dist+32]
+ movdqa xmm4, xmm2
+ movdqa xmm5, xmm3
+
+ pclmulqdq xmm2, xmm6, 0x11
+ pclmulqdq xmm3, xmm6, 0x11
+
+ pclmulqdq xmm4, xmm6, 0x0
+ pclmulqdq xmm5, xmm6, 0x0
+
+ pxor xmm2, xmm4
+ pxor xmm3, xmm5
+
+ movdqu xmm4, [arg2]
+ movdqu xmm5, [arg2+16]
+ pshufb xmm4, xmm7
+ pshufb xmm5, xmm7
+ pxor xmm0, xmm4
+ pxor xmm1, xmm5
+
+ movdqu xmm4, [arg2+32]
+ movdqu xmm5, [arg2+48]
+ pshufb xmm4, xmm7
+ pshufb xmm5, xmm7
+
+ pxor xmm2, xmm4
+ pxor xmm3, xmm5
+
+ sub arg3, 64
+
+ ; check if there is another 64B in the buffer to be able to fold
+ jge _fold_64_B_loop
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+
+ add arg2, 64
+ ;at this point, the arg2 is pointing at the last y Bytes of the buffer
+ ; the 64B of data is in 4 of the xmm registers: xmm0, xmm1, xmm2, xmm3
+
+
+ movdqa xmm6, [rk1] ;k1
+
+ ; fold the 4 xmm registers to 1 xmm register with different constants
+ movdqa xmm4, xmm0
+ pclmulqdq xmm0, xmm6, 0x11
+ pclmulqdq xmm4, xmm6, 0x0
+ pxor xmm1, xmm4
+ xorps xmm1, xmm0
+
+ movdqa xmm4, xmm1
+ pclmulqdq xmm1, xmm6, 0x11
+ pclmulqdq xmm4, xmm6, 0x0
+ pxor xmm2, xmm4
+ xorps xmm2, xmm1
+
+ movdqa xmm4, xmm2
+ pclmulqdq xmm2, xmm6, 0x11
+ pclmulqdq xmm4, xmm6, 0x0
+ pxor xmm3, xmm4
+ pxor xmm3, xmm2
+
+
+ ;instead of 64, we add 48 to the loop counter to save 1 instruction from the loop
+ ; instead of a cmp instruction, we use the negative flag with the jl instruction
+ add arg3, 64-16
+ jl _final_reduction_for_128
+
+; now we have 16+y bytes left to reduce. 16 Bytes is in register xmm3 and the rest is in memory
+; we can fold 16 bytes at a time if y>=16
+; continue folding 16B at a time
+
+_16B_reduction_loop:
+ movdqa xmm4, xmm3
+ pclmulqdq xmm3, xmm6, 0x11
+ pclmulqdq xmm4, xmm6, 0x0
+ pxor xmm3, xmm4
+ movdqu xmm0, [arg2]
+ pshufb xmm0, xmm7
+ pxor xmm3, xmm0
+ add arg2, 16
+ sub arg3, 16
+ ; instead of a cmp instruction, we utilize the flags with the jge instruction
+ ; equivalent of: cmp arg3, 16-16
+ ; check if there is any more 16B in the buffer to be able to fold
+ jge _16B_reduction_loop
+
+ ;now we have 16+z bytes left to reduce, where 0<= z < 16.
+ ;first, we reduce the data in the xmm3 register
+
+
+
+_final_reduction_for_128:
+ ; check if any more data to fold. If not, compute the CRC of the final 128 bits
+ add arg3, 16
+ je _128_done
+
+ ; here we are getting data that is less than 16 bytes.
+ ; since we know that there was data before the pointer, we can offset
+ ; the input pointer before the actual point, to receive exactly 16 bytes.
+ ; after that the registers need to be adjusted.
+_get_last_two_xmms:
+ movdqa xmm2, xmm3
+
+ movdqu xmm1, [arg2 - 16 + arg3]
+ pshufb xmm1, xmm7
+
+ shl arg3, 4
+ lea rax, [pshufb_shf_table + 15*16]
+ sub rax, arg3
+ movdqu xmm0, [rax]
+
+ pshufb xmm2, xmm0
+
+ pxor xmm0, [mask3]
+
+ pshufb xmm3, xmm0
+
+ pblendvb xmm1, xmm2 ;xmm0 is implicit
+
+ movdqa xmm2, xmm1
+
+ movdqa xmm4, xmm3
+ pclmulqdq xmm3, xmm6, 0x11
+
+ pclmulqdq xmm4, xmm6, 0x0
+ pxor xmm3, xmm4
+ pxor xmm3, xmm2
+
+_128_done:
+
+ movdqa xmm6, [rk5]
+ movdqa xmm0, xmm3
+
+ ;64b fold
+ pclmulqdq xmm3, xmm6, 0x1
+ pslldq xmm0, 8
+ pxor xmm3, xmm0
+
+ ;32b fold
+ movdqa xmm0, xmm3
+
+ pand xmm0, [mask4]
+
+ psrldq xmm3, 12
+ pclmulqdq xmm3, xmm6, 0x10
+ pxor xmm3, xmm0
+
+ ;barrett reduction
+_barrett:
+ movdqa xmm6, [rk7]
+ movdqa xmm0, xmm3
+ pclmulqdq xmm3, xmm6, 0x01
+ pslldq xmm3, 4
+ pclmulqdq xmm3, xmm6, 0x11
+
+ pslldq xmm3, 4
+ pxor xmm3, xmm0
+ pextrd eax, xmm3,1
+
+_cleanup:
+ not eax
+%ifidn __OUTPUT_FORMAT__, win64
+ movdqa xmm6, [rsp + XMM_SAVE + 16*0]
+ movdqa xmm7, [rsp + XMM_SAVE + 16*1]
+%endif
+ add rsp,VARIABLE_OFFSET
+
+
+ ret
+
+
+
+
+
+
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+align 16
+_less_than_128:
+
+ ;check if there is enough buffer to be able to fold 16B at a time
+ cmp arg3, 32
+ jl _less_than_32
+ movdqa xmm7, [SHUF_MASK]
+
+ ;if there is, load the constants
+ movdqa xmm6, [rk1] ;k1
+
+ movd xmm0, arg1_low32
+ pslldq xmm0, 12
+ movdqu xmm3, [arg2]
+ pshufb xmm3, xmm7
+ pxor xmm3, xmm0
+
+
+ ;update the buffer pointer
+ add arg2, 16
+
+ ;update the counter. subtract 32 instead of 16 to save one instruction from the loop
+ sub arg3, 32
+
+ jmp _16B_reduction_loop
+
+
+align 16
+_less_than_32:
+ mov eax, arg1_low32
+ test arg3, arg3
+ je _cleanup
+
+ movdqa xmm7, [SHUF_MASK]
+
+ movd xmm0, arg1_low32
+ pslldq xmm0, 12
+
+ cmp arg3, 16
+ je _exact_16_left
+ jl _less_than_16_left
+ movd xmm0, arg1_low32
+ pslldq xmm0, 12
+ movdqu xmm3, [arg2]
+ pshufb xmm3, xmm7
+ pxor xmm3, xmm0
+ add arg2, 16
+ sub arg3, 16
+ movdqa xmm6, [rk1] ;k1
+ jmp _get_last_two_xmms
+
+
+align 16
+_less_than_16_left:
+ ; use stack space to load data less than 16 bytes, zero-out the 16B in memory first.
+
+ pxor xmm1, xmm1
+ mov r11, rsp
+ movdqa [r11], xmm1
+
+
+
+ cmp arg3, 4
+ jl _only_less_than_4
+
+ mov r9, arg3
+
+
+ cmp arg3, 8
+ jl _less_than_8_left
+ mov rax, [arg2]
+ mov [r11], rax
+ add r11, 8
+ sub arg3, 8
+ add arg2, 8
+_less_than_8_left:
+
+ cmp arg3, 4
+ jl _less_than_4_left
+ mov eax, [arg2]
+ mov [r11], eax
+ add r11, 4
+ sub arg3, 4
+ add arg2, 4
+_less_than_4_left:
+
+ cmp arg3, 2
+ jl _less_than_2_left
+ mov ax, [arg2]
+ mov [r11], ax
+ add r11, 2
+ sub arg3, 2
+ add arg2, 2
+_less_than_2_left:
+ cmp arg3, 1
+ jl _zero_left
+
+ mov al, [arg2]
+ mov [r11], al
+
+_zero_left:
+ movdqa xmm3, [rsp]
+ pshufb xmm3, xmm7
+ pxor xmm3, xmm0
+
+ shl r9, 4
+ lea rax, [pshufb_shf_table + 15*16]
+ sub rax, r9
+ movdqu xmm0, [rax]
+ pxor xmm0, [mask3]
+
+ pshufb xmm3, xmm0
+ jmp _128_done
+
+align 16
+_exact_16_left:
+ movdqu xmm3, [arg2]
+ pshufb xmm3, xmm7
+ pxor xmm3, xmm0
+
+ jmp _128_done
+
+_only_less_than_4:
+ cmp arg3, 3
+ jl _only_less_than_3
+ mov al, [arg2]
+ mov [r11], al
+
+ mov al, [arg2+1]
+ mov [r11+1], al
+
+ mov al, [arg2+2]
+ mov [r11+2], al
+
+ movdqa xmm3, [rsp]
+ pshufb xmm3, xmm7
+ pxor xmm3, xmm0
+
+ psrldq xmm3, 5
+
+ jmp _barrett
+_only_less_than_3:
+ cmp arg3, 2
+ jl _only_less_than_2
+ mov al, [arg2]
+ mov [r11], al
+
+ mov al, [arg2+1]
+ mov [r11+1], al
+
+ movdqa xmm3, [rsp]
+ pshufb xmm3, xmm7
+ pxor xmm3, xmm0
+
+ psrldq xmm3, 6
+
+ jmp _barrett
+_only_less_than_2:
+ mov al, [arg2]
+ mov [r11], al
+
+ movdqa xmm3, [rsp]
+ pshufb xmm3, xmm7
+ pxor xmm3, xmm0
+
+ psrldq xmm3, 7
+
+ jmp _barrett
+; precomputed constants
+section .data
+
+align 16
+rk1:
+DQ 0xf200aa6600000000
+rk2:
+DQ 0x17d3315d00000000
+rk3:
+DQ 0xd3504ec700000000
+rk4:
+DQ 0x57a8445500000000
+rk5:
+DQ 0xf200aa6600000000
+rk6:
+DQ 0x490d678d00000000
+rk7:
+DQ 0x0000000104d101df
+rk8:
+DQ 0x0000000104c11db7
+mask:
+dq 0xFFFFFFFFFFFFFFFF, 0x0000000000000000
+mask2:
+dq 0xFFFFFFFF00000000, 0xFFFFFFFFFFFFFFFF
+mask3:
+dq 0x8080808080808080, 0x8080808080808080
+mask4:
+dq 0xFFFFFFFFFFFFFFFF, 0x00000000FFFFFFFF
+ align 32
+pshufb_shf_table:
+
+ dq 0x8887868584838281, 0x008f8e8d8c8b8a89 ; shl 15 (16-1) / shr1
+
+ dq 0x8988878685848382, 0x01008f8e8d8c8b8a ; shl 14 (16-3) / shr2
+
+ dq 0x8a89888786858483, 0x0201008f8e8d8c8b ; shl 13 (16-4) / shr3
+
+ dq 0x8b8a898887868584, 0x030201008f8e8d8c ; shl 12 (16-4) / shr4
+
+ dq 0x8c8b8a8988878685, 0x04030201008f8e8d ; shl 11 (16-5) / shr5
+
+ dq 0x8d8c8b8a89888786, 0x0504030201008f8e ; shl 10 (16-6) / shr6
+
+ dq 0x8e8d8c8b8a898887, 0x060504030201008f ; shl 9 (16-7) / shr7
+
+ dq 0x8f8e8d8c8b8a8988, 0x0706050403020100 ; shl 8 (16-8) / shr8
+
+ dq 0x008f8e8d8c8b8a89, 0x0807060504030201 ; shl 7 (16-9) / shr9
+
+ dq 0x01008f8e8d8c8b8a, 0x0908070605040302 ; shl 6 (16-10) / shr10
+
+ dq 0x0201008f8e8d8c8b, 0x0a09080706050403 ; shl 5 (16-11) / shr11
+
+ dq 0x030201008f8e8d8c, 0x0b0a090807060504 ; shl 4 (16-12) / shr12
+
+ dq 0x04030201008f8e8d, 0x0c0b0a0908070605 ; shl 3 (16-13) / shr13
+
+ dq 0x0504030201008f8e, 0x0d0c0b0a09080706 ; shl 2 (16-14) / shr14
+
+ dq 0x060504030201008f, 0x0e0d0c0b0a090807 ; shl 1 (16-15) / shr15
+
+
+SHUF_MASK dq 0x08090A0B0C0D0E0F, 0x0001020304050607
+
+;;; func core, ver, snum
+slversion crc32_ieee_by4, 05, 02, 0017
diff --git a/src/isa-l/crc/crc32_ieee_perf.c b/src/isa-l/crc/crc32_ieee_perf.c
new file mode 100644
index 000000000..f6ffbbe44
--- /dev/null
+++ b/src/isa-l/crc/crc32_ieee_perf.c
@@ -0,0 +1,79 @@
+/**********************************************************************
+ Copyright(c) 2011-2015 Intel Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdint.h>
+#include <sys/time.h>
+#include "crc.h"
+#include "test.h"
+
+//#define CACHED_TEST
+#ifdef CACHED_TEST
+// Cached test, loop many times over small dataset
+# define TEST_LEN 8*1024
+# define TEST_TYPE_STR "_warm"
+#else
+// Uncached test. Pull from large mem base.
+# define GT_L3_CACHE 32*1024*1024 /* some number > last level cache */
+# define TEST_LEN (2 * GT_L3_CACHE)
+# define TEST_TYPE_STR "_cold"
+#endif
+
+#ifndef TEST_SEED
+# define TEST_SEED 0x1234
+#endif
+
+#define TEST_MEM TEST_LEN
+
+int main(int argc, char *argv[])
+{
+ void *buf;
+ uint32_t crc;
+ struct perf start;
+
+ printf("crc32_ieee_perf:\n");
+
+ if (posix_memalign(&buf, 1024, TEST_LEN)) {
+ printf("alloc error: Fail");
+ return -1;
+ }
+
+ printf("Start timed tests\n");
+ fflush(0);
+
+ memset(buf, 0, TEST_LEN);
+ BENCHMARK(&start, BENCHMARK_TIME, crc = crc32_ieee(TEST_SEED, buf, TEST_LEN));
+ printf("crc32_ieee" TEST_TYPE_STR ": ");
+ perf_print(start, (long long)TEST_LEN);
+
+ printf("finish 0x%x\n", crc);
+ return 0;
+}
diff --git a/src/isa-l/crc/crc32_iscsi_00.asm b/src/isa-l/crc/crc32_iscsi_00.asm
new file mode 100644
index 000000000..1a5e02928
--- /dev/null
+++ b/src/isa-l/crc/crc32_iscsi_00.asm
@@ -0,0 +1,672 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright(c) 2011-2015 Intel Corporation All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in
+; the documentation and/or other materials provided with the
+; distribution.
+; * Neither the name of Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived
+; from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+; Function to compute iscsi CRC32 with table-based recombination
+; crc done "by 3" with block sizes 1920, 960, 480, 240
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+%include "reg_sizes.asm"
+
+default rel
+; crcB3 MACRO to implement crc32 on 3 %%bSize-byte blocks
+%macro crcB3 3
+%define %%bSize %1 ; 1/3 of buffer size
+%define %%td2 %2 ; table offset for crc0 (2/3 of buffer)
+%define %%td1 %3 ; table offset for crc1 (1/3 of buffer)
+
+%IF %%bSize=640
+ sub len, %%bSize*3
+ js %%crcB3_end ;; jump to next level if 3*blockSize > len
+%ELSE
+ cmp len, %%bSize*3
+ jnae %%crcB3_end ;; jump to next level if 3*blockSize > len
+%ENDIF
+ ;;;;;; Calculate CRC of 3 blocks of the buffer ;;;;;;
+%%crcB3_loop:
+ ;; rax = crc0 = initial crc
+ xor rbx, rbx ;; rbx = crc1 = 0;
+ xor r10, r10 ;; r10 = crc2 = 0;
+
+ cmp len, %%bSize*3*2
+ jbe %%non_prefetch
+
+ %assign i 0
+ %rep %%bSize/8 - 1
+ %if i < %%bSize*3/4
+ prefetchnta [bufptmp+ %%bSize*3 +i*4]
+ %endif
+ crc32 rax, qword [bufptmp+i + 0*%%bSize] ;; update crc0
+ crc32 rbx, qword [bufptmp+i + 1*%%bSize] ;; update crc1
+ crc32 r10, qword [bufptmp+i + 2*%%bSize] ;; update crc2
+ %assign i (i+8)
+ %endrep
+ jmp %%next %+ %1
+
+%%non_prefetch:
+ %assign i 0
+ %rep %%bSize/8 - 1
+ crc32 rax, qword [bufptmp+i + 0*%%bSize] ;; update crc0
+ crc32 rbx, qword [bufptmp+i + 1*%%bSize] ;; update crc1
+ crc32 r10, qword [bufptmp+i + 2*%%bSize] ;; update crc2
+ %assign i (i+8)
+ %endrep
+
+%%next %+ %1:
+ crc32 rax, qword [bufptmp+i + 0*%%bSize] ;; update crc0
+ crc32 rbx, qword [bufptmp+i + 1*%%bSize] ;; update crc1
+; SKIP ;crc32 r10, [bufptmp+i + 2*%%bSize] ;; update crc2
+
+ ; merge in crc0
+ movzx bufp_dw, al
+ mov r9d, [crc_init + bufp*4 + %%td2]
+ movzx bufp_dw, ah
+ shr eax, 16
+ mov r11d, [crc_init + bufp*4 + %%td2]
+ shl r11, 8
+ xor r9, r11
+
+ movzx bufp_dw, al
+ mov r11d, [crc_init + bufp*4 + %%td2]
+ movzx bufp_dw, ah
+ shl r11, 16
+ xor r9, r11
+ mov r11d, [crc_init + bufp*4 + %%td2]
+ shl r11, 24
+ xor r9, r11
+
+ ; merge in crc1
+
+ movzx bufp_dw, bl
+ mov r11d, [crc_init + bufp*4 + %%td1]
+ movzx bufp_dw, bh
+ shr ebx, 16
+ xor r9, r11
+ mov r11d, [crc_init + bufp*4 + %%td1]
+ shl r11, 8
+ xor r9, r11
+
+ movzx bufp_dw, bl
+ mov r11d, [crc_init + bufp*4 + %%td1]
+ movzx bufp_dw, bh
+ shl r11, 16
+ xor r9, r11
+ mov r11d, [crc_init + bufp*4 + %%td1]
+ shl r11, 24
+ xor r9, r11
+
+ xor r9, [bufptmp+i + 2*%%bSize]
+ crc32 r10, r9
+ mov rax, r10
+
+ add bufptmp, %%bSize*3 ;; move to next block
+ sub len, %%bSize*3
+%IF %%bSize=640
+ jns %%crcB3_loop
+%ENDIF
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+%%crcB3_end:
+%IF %%bSize=640
+ add len, %%bSize*3
+%ENDIF
+ je do_return ;; return if remaining data is zero
+%endmacro
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;;; ISCSI CRC 32 Implementation with crc32 Instruction
+
+;;; unsigned int crc32_iscsi_00(unsigned char * buffer, int len, unsigned int crc_init);
+;;;
+;;; *buf = rcx
+;;; len = rdx
+;;; crc_init = r8
+;;;
+
+mk_global crc32_iscsi_00, function
+crc32_iscsi_00:
+ endbranch
+
+%ifidn __OUTPUT_FORMAT__, elf64
+%define bufp rdi
+%define bufp_dw edi
+%define bufp_w di
+%define bufp_b dil
+%define bufptmp rcx
+%define block_0 rcx
+%define block_1 r8
+%define block_2 r11
+%define len rsi
+%define len_dw esi
+%define len_w si
+%define len_b sil
+%define crc_init rdx
+%define crc_init_dw edx
+%else
+%define bufp rcx
+%define bufp_dw ecx
+%define bufp_w cx
+%define bufp_b cl
+%define bufptmp rdi
+%define block_0 rdi
+%define block_1 rsi
+%define block_2 r11
+%define len rdx
+%define len_dw edx
+%define len_w dx
+%define len_b dl
+%define crc_init r8
+%define crc_init_dw r8d
+%endif
+
+
+ push rdi
+ push rbx
+
+ mov rax, crc_init ;; rax = crc_init;
+
+ cmp len, 8
+ jb less_than_8
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; 1) ALIGN: ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+ mov bufptmp, bufp ;; rdi = *buf
+ neg bufp
+ and bufp, 7 ;; calculate the unalignment
+ ;; amount of the address
+ je proc_block ;; Skip if aligned
+
+ ;;;; Calculate CRC of unaligned bytes of the buffer (if any) ;;;;
+ mov rbx, [bufptmp] ;; load a quadword from the buffer
+ add bufptmp, bufp ;; align buffer pointer for
+ ;; quadword processing
+ sub len, bufp ;; update buffer length
+align_loop:
+ crc32 eax, bl ;; compute crc32 of 1-byte
+ shr rbx, 8 ;; get next byte
+ dec bufp
+ jne align_loop
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; 2) BLOCK LEVEL: ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+proc_block:
+ cmp len, 240
+ jb bit8
+
+ lea crc_init, [mul_table_72] ;; load table base address
+
+ crcB3 640, 0x1000, 0x0c00 ; 640*3 = 1920 (Tables 1280, 640)
+ crcB3 320, 0x0c00, 0x0800 ; 320*3 = 960 (Tables 640, 320)
+ crcB3 160, 0x0800, 0x0400 ; 160*3 = 480 (Tables 320, 160)
+ crcB3 80, 0x0400, 0x0000 ; 80*3 = 240 (Tables 160, 80)
+
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;4) LESS THAN 256-bytes REMAIN AT THIS POINT (8-bits of rdx are full)
+
+bit8:
+ shl len_b, 1 ;; shift-out MSB (bit-7)
+ jnc bit7 ;; jump to bit-6 if bit-7 == 0
+ %assign i 0
+ %rep 16
+ crc32 rax, qword [bufptmp+i] ;; compute crc32 of 8-byte data
+ %assign i (i+8)
+ %endrep
+ je do_return ;; return if remaining data is zero
+ add bufptmp, 128 ;; buf +=64; (next 64 bytes)
+
+bit7:
+ shl len_b, 1 ;; shift-out MSB (bit-7)
+ jnc bit6 ;; jump to bit-6 if bit-7 == 0
+ %assign i 0
+ %rep 8
+ crc32 rax, qword [bufptmp+i] ;; compute crc32 of 8-byte data
+ %assign i (i+8)
+ %endrep
+ je do_return ;; return if remaining data is zero
+ add bufptmp, 64 ;; buf +=64; (next 64 bytes)
+bit6:
+ shl len_b, 1 ;; shift-out MSB (bit-6)
+ jnc bit5 ;; jump to bit-5 if bit-6 == 0
+ %assign i 0
+ %rep 4
+ crc32 rax, qword [bufptmp+i] ;; compute crc32 of 8-byte data
+ %assign i (i+8)
+ %endrep
+ je do_return ;; return if remaining data is zero
+ add bufptmp, 32 ;; buf +=32; (next 32 bytes)
+bit5:
+ shl len_b, 1 ;; shift-out MSB (bit-5)
+ jnc bit4 ;; jump to bit-4 if bit-5 == 0
+ %assign i 0
+ %rep 2
+ crc32 rax, qword [bufptmp+i] ;; compute crc32 of 8-byte data
+ %assign i (i+8)
+ %endrep
+ je do_return ;; return if remaining data is zero
+ add bufptmp, 16 ;; buf +=16; (next 16 bytes)
+bit4:
+ shl len_b, 1 ;; shift-out MSB (bit-4)
+ jnc bit3 ;; jump to bit-3 if bit-4 == 0
+ crc32 rax, qword [bufptmp] ;; compute crc32 of 8-byte data
+ je do_return ;; return if remaining data is zero
+ add bufptmp, 8 ;; buf +=8; (next 8 bytes)
+bit3:
+ mov rbx, qword [bufptmp] ;; load a 8-bytes from the buffer:
+ shl len_b, 1 ;; shift-out MSB (bit-3)
+ jnc bit2 ;; jump to bit-2 if bit-3 == 0
+ crc32 eax, ebx ;; compute crc32 of 4-byte data
+ je do_return ;; return if remaining data is zero
+ shr rbx, 32 ;; get next 3 bytes
+bit2:
+ shl len_b, 1 ;; shift-out MSB (bit-2)
+ jnc bit1 ;; jump to bit-1 if bit-2 == 0
+ crc32 eax, bx ;; compute crc32 of 2-byte data
+ je do_return ;; return if remaining data is zero
+ shr rbx, 16 ;; next byte
+bit1:
+ test len_b,len_b
+ je do_return
+ crc32 eax, bl ;; compute crc32 of 1-byte data
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+do_return:
+
+ pop rbx
+ pop rdi
+ ret
+
+less_than_8:
+ test len,4
+ jz less_than_4
+ crc32 eax, dword[bufp]
+ add bufp,4
+less_than_4:
+ test len,2
+ jz less_than_2
+ crc32 eax, word[bufp]
+ add bufp,2
+less_than_2:
+ test len,1
+ jz do_return
+ crc32 rax, byte[bufp]
+ pop rbx
+ pop bufp
+ ret
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;;; global mul_table_72, mul_table_152, mul_table_312, mul_table_632, mul_table_1272
+
+section .data
+align 8
+mul_table_72:
+DD 0x00000000,0x39d3b296,0x73a7652c,0x4a74d7ba
+DD 0xe74eca58,0xde9d78ce,0x94e9af74,0xad3a1de2
+DD 0xcb71e241,0xf2a250d7,0xb8d6876d,0x810535fb
+DD 0x2c3f2819,0x15ec9a8f,0x5f984d35,0x664bffa3
+DD 0x930fb273,0xaadc00e5,0xe0a8d75f,0xd97b65c9
+DD 0x7441782b,0x4d92cabd,0x07e61d07,0x3e35af91
+DD 0x587e5032,0x61ade2a4,0x2bd9351e,0x120a8788
+DD 0xbf309a6a,0x86e328fc,0xcc97ff46,0xf5444dd0
+DD 0x23f31217,0x1a20a081,0x5054773b,0x6987c5ad
+DD 0xc4bdd84f,0xfd6e6ad9,0xb71abd63,0x8ec90ff5
+DD 0xe882f056,0xd15142c0,0x9b25957a,0xa2f627ec
+DD 0x0fcc3a0e,0x361f8898,0x7c6b5f22,0x45b8edb4
+DD 0xb0fca064,0x892f12f2,0xc35bc548,0xfa8877de
+DD 0x57b26a3c,0x6e61d8aa,0x24150f10,0x1dc6bd86
+DD 0x7b8d4225,0x425ef0b3,0x082a2709,0x31f9959f
+DD 0x9cc3887d,0xa5103aeb,0xef64ed51,0xd6b75fc7
+DD 0x47e6242e,0x7e3596b8,0x34414102,0x0d92f394
+DD 0xa0a8ee76,0x997b5ce0,0xd30f8b5a,0xeadc39cc
+DD 0x8c97c66f,0xb54474f9,0xff30a343,0xc6e311d5
+DD 0x6bd90c37,0x520abea1,0x187e691b,0x21addb8d
+DD 0xd4e9965d,0xed3a24cb,0xa74ef371,0x9e9d41e7
+DD 0x33a75c05,0x0a74ee93,0x40003929,0x79d38bbf
+DD 0x1f98741c,0x264bc68a,0x6c3f1130,0x55eca3a6
+DD 0xf8d6be44,0xc1050cd2,0x8b71db68,0xb2a269fe
+DD 0x64153639,0x5dc684af,0x17b25315,0x2e61e183
+DD 0x835bfc61,0xba884ef7,0xf0fc994d,0xc92f2bdb
+DD 0xaf64d478,0x96b766ee,0xdcc3b154,0xe51003c2
+DD 0x482a1e20,0x71f9acb6,0x3b8d7b0c,0x025ec99a
+DD 0xf71a844a,0xcec936dc,0x84bde166,0xbd6e53f0
+DD 0x10544e12,0x2987fc84,0x63f32b3e,0x5a2099a8
+DD 0x3c6b660b,0x05b8d49d,0x4fcc0327,0x761fb1b1
+DD 0xdb25ac53,0xe2f61ec5,0xa882c97f,0x91517be9
+DD 0x8fcc485c,0xb61ffaca,0xfc6b2d70,0xc5b89fe6
+DD 0x68828204,0x51513092,0x1b25e728,0x22f655be
+DD 0x44bdaa1d,0x7d6e188b,0x371acf31,0x0ec97da7
+DD 0xa3f36045,0x9a20d2d3,0xd0540569,0xe987b7ff
+DD 0x1cc3fa2f,0x251048b9,0x6f649f03,0x56b72d95
+DD 0xfb8d3077,0xc25e82e1,0x882a555b,0xb1f9e7cd
+DD 0xd7b2186e,0xee61aaf8,0xa4157d42,0x9dc6cfd4
+DD 0x30fcd236,0x092f60a0,0x435bb71a,0x7a88058c
+DD 0xac3f5a4b,0x95ece8dd,0xdf983f67,0xe64b8df1
+DD 0x4b719013,0x72a22285,0x38d6f53f,0x010547a9
+DD 0x674eb80a,0x5e9d0a9c,0x14e9dd26,0x2d3a6fb0
+DD 0x80007252,0xb9d3c0c4,0xf3a7177e,0xca74a5e8
+DD 0x3f30e838,0x06e35aae,0x4c978d14,0x75443f82
+DD 0xd87e2260,0xe1ad90f6,0xabd9474c,0x920af5da
+DD 0xf4410a79,0xcd92b8ef,0x87e66f55,0xbe35ddc3
+DD 0x130fc021,0x2adc72b7,0x60a8a50d,0x597b179b
+DD 0xc82a6c72,0xf1f9dee4,0xbb8d095e,0x825ebbc8
+DD 0x2f64a62a,0x16b714bc,0x5cc3c306,0x65107190
+DD 0x035b8e33,0x3a883ca5,0x70fceb1f,0x492f5989
+DD 0xe415446b,0xddc6f6fd,0x97b22147,0xae6193d1
+DD 0x5b25de01,0x62f66c97,0x2882bb2d,0x115109bb
+DD 0xbc6b1459,0x85b8a6cf,0xcfcc7175,0xf61fc3e3
+DD 0x90543c40,0xa9878ed6,0xe3f3596c,0xda20ebfa
+DD 0x771af618,0x4ec9448e,0x04bd9334,0x3d6e21a2
+DD 0xebd97e65,0xd20accf3,0x987e1b49,0xa1ada9df
+DD 0x0c97b43d,0x354406ab,0x7f30d111,0x46e36387
+DD 0x20a89c24,0x197b2eb2,0x530ff908,0x6adc4b9e
+DD 0xc7e6567c,0xfe35e4ea,0xb4413350,0x8d9281c6
+DD 0x78d6cc16,0x41057e80,0x0b71a93a,0x32a21bac
+DD 0x9f98064e,0xa64bb4d8,0xec3f6362,0xd5ecd1f4
+DD 0xb3a72e57,0x8a749cc1,0xc0004b7b,0xf9d3f9ed
+DD 0x54e9e40f,0x6d3a5699,0x274e8123,0x1e9d33b5
+
+mul_table_152:
+DD 0x00000000,0x878a92a7,0x0af953bf,0x8d73c118
+DD 0x15f2a77e,0x927835d9,0x1f0bf4c1,0x98816666
+DD 0x2be54efc,0xac6fdc5b,0x211c1d43,0xa6968fe4
+DD 0x3e17e982,0xb99d7b25,0x34eeba3d,0xb364289a
+DD 0x57ca9df8,0xd0400f5f,0x5d33ce47,0xdab95ce0
+DD 0x42383a86,0xc5b2a821,0x48c16939,0xcf4bfb9e
+DD 0x7c2fd304,0xfba541a3,0x76d680bb,0xf15c121c
+DD 0x69dd747a,0xee57e6dd,0x632427c5,0xe4aeb562
+DD 0xaf953bf0,0x281fa957,0xa56c684f,0x22e6fae8
+DD 0xba679c8e,0x3ded0e29,0xb09ecf31,0x37145d96
+DD 0x8470750c,0x03fae7ab,0x8e8926b3,0x0903b414
+DD 0x9182d272,0x160840d5,0x9b7b81cd,0x1cf1136a
+DD 0xf85fa608,0x7fd534af,0xf2a6f5b7,0x752c6710
+DD 0xedad0176,0x6a2793d1,0xe75452c9,0x60dec06e
+DD 0xd3bae8f4,0x54307a53,0xd943bb4b,0x5ec929ec
+DD 0xc6484f8a,0x41c2dd2d,0xccb11c35,0x4b3b8e92
+DD 0x5ac60111,0xdd4c93b6,0x503f52ae,0xd7b5c009
+DD 0x4f34a66f,0xc8be34c8,0x45cdf5d0,0xc2476777
+DD 0x71234fed,0xf6a9dd4a,0x7bda1c52,0xfc508ef5
+DD 0x64d1e893,0xe35b7a34,0x6e28bb2c,0xe9a2298b
+DD 0x0d0c9ce9,0x8a860e4e,0x07f5cf56,0x807f5df1
+DD 0x18fe3b97,0x9f74a930,0x12076828,0x958dfa8f
+DD 0x26e9d215,0xa16340b2,0x2c1081aa,0xab9a130d
+DD 0x331b756b,0xb491e7cc,0x39e226d4,0xbe68b473
+DD 0xf5533ae1,0x72d9a846,0xffaa695e,0x7820fbf9
+DD 0xe0a19d9f,0x672b0f38,0xea58ce20,0x6dd25c87
+DD 0xdeb6741d,0x593ce6ba,0xd44f27a2,0x53c5b505
+DD 0xcb44d363,0x4cce41c4,0xc1bd80dc,0x4637127b
+DD 0xa299a719,0x251335be,0xa860f4a6,0x2fea6601
+DD 0xb76b0067,0x30e192c0,0xbd9253d8,0x3a18c17f
+DD 0x897ce9e5,0x0ef67b42,0x8385ba5a,0x040f28fd
+DD 0x9c8e4e9b,0x1b04dc3c,0x96771d24,0x11fd8f83
+DD 0xb58c0222,0x32069085,0xbf75519d,0x38ffc33a
+DD 0xa07ea55c,0x27f437fb,0xaa87f6e3,0x2d0d6444
+DD 0x9e694cde,0x19e3de79,0x94901f61,0x131a8dc6
+DD 0x8b9beba0,0x0c117907,0x8162b81f,0x06e82ab8
+DD 0xe2469fda,0x65cc0d7d,0xe8bfcc65,0x6f355ec2
+DD 0xf7b438a4,0x703eaa03,0xfd4d6b1b,0x7ac7f9bc
+DD 0xc9a3d126,0x4e294381,0xc35a8299,0x44d0103e
+DD 0xdc517658,0x5bdbe4ff,0xd6a825e7,0x5122b740
+DD 0x1a1939d2,0x9d93ab75,0x10e06a6d,0x976af8ca
+DD 0x0feb9eac,0x88610c0b,0x0512cd13,0x82985fb4
+DD 0x31fc772e,0xb676e589,0x3b052491,0xbc8fb636
+DD 0x240ed050,0xa38442f7,0x2ef783ef,0xa97d1148
+DD 0x4dd3a42a,0xca59368d,0x472af795,0xc0a06532
+DD 0x58210354,0xdfab91f3,0x52d850eb,0xd552c24c
+DD 0x6636ead6,0xe1bc7871,0x6ccfb969,0xeb452bce
+DD 0x73c44da8,0xf44edf0f,0x793d1e17,0xfeb78cb0
+DD 0xef4a0333,0x68c09194,0xe5b3508c,0x6239c22b
+DD 0xfab8a44d,0x7d3236ea,0xf041f7f2,0x77cb6555
+DD 0xc4af4dcf,0x4325df68,0xce561e70,0x49dc8cd7
+DD 0xd15deab1,0x56d77816,0xdba4b90e,0x5c2e2ba9
+DD 0xb8809ecb,0x3f0a0c6c,0xb279cd74,0x35f35fd3
+DD 0xad7239b5,0x2af8ab12,0xa78b6a0a,0x2001f8ad
+DD 0x9365d037,0x14ef4290,0x999c8388,0x1e16112f
+DD 0x86977749,0x011de5ee,0x8c6e24f6,0x0be4b651
+DD 0x40df38c3,0xc755aa64,0x4a266b7c,0xcdacf9db
+DD 0x552d9fbd,0xd2a70d1a,0x5fd4cc02,0xd85e5ea5
+DD 0x6b3a763f,0xecb0e498,0x61c32580,0xe649b727
+DD 0x7ec8d141,0xf94243e6,0x743182fe,0xf3bb1059
+DD 0x1715a53b,0x909f379c,0x1decf684,0x9a666423
+DD 0x02e70245,0x856d90e2,0x081e51fa,0x8f94c35d
+DD 0x3cf0ebc7,0xbb7a7960,0x3609b878,0xb1832adf
+DD 0x29024cb9,0xae88de1e,0x23fb1f06,0xa4718da1
+
+mul_table_312:
+DD 0x00000000,0xbac2fd7b,0x70698c07,0xcaab717c
+DD 0xe0d3180e,0x5a11e575,0x90ba9409,0x2a786972
+DD 0xc44a46ed,0x7e88bb96,0xb423caea,0x0ee13791
+DD 0x24995ee3,0x9e5ba398,0x54f0d2e4,0xee322f9f
+DD 0x8d78fb2b,0x37ba0650,0xfd11772c,0x47d38a57
+DD 0x6dabe325,0xd7691e5e,0x1dc26f22,0xa7009259
+DD 0x4932bdc6,0xf3f040bd,0x395b31c1,0x8399ccba
+DD 0xa9e1a5c8,0x132358b3,0xd98829cf,0x634ad4b4
+DD 0x1f1d80a7,0xa5df7ddc,0x6f740ca0,0xd5b6f1db
+DD 0xffce98a9,0x450c65d2,0x8fa714ae,0x3565e9d5
+DD 0xdb57c64a,0x61953b31,0xab3e4a4d,0x11fcb736
+DD 0x3b84de44,0x8146233f,0x4bed5243,0xf12faf38
+DD 0x92657b8c,0x28a786f7,0xe20cf78b,0x58ce0af0
+DD 0x72b66382,0xc8749ef9,0x02dfef85,0xb81d12fe
+DD 0x562f3d61,0xecedc01a,0x2646b166,0x9c844c1d
+DD 0xb6fc256f,0x0c3ed814,0xc695a968,0x7c575413
+DD 0x3e3b014e,0x84f9fc35,0x4e528d49,0xf4907032
+DD 0xdee81940,0x642ae43b,0xae819547,0x1443683c
+DD 0xfa7147a3,0x40b3bad8,0x8a18cba4,0x30da36df
+DD 0x1aa25fad,0xa060a2d6,0x6acbd3aa,0xd0092ed1
+DD 0xb343fa65,0x0981071e,0xc32a7662,0x79e88b19
+DD 0x5390e26b,0xe9521f10,0x23f96e6c,0x993b9317
+DD 0x7709bc88,0xcdcb41f3,0x0760308f,0xbda2cdf4
+DD 0x97daa486,0x2d1859fd,0xe7b32881,0x5d71d5fa
+DD 0x212681e9,0x9be47c92,0x514f0dee,0xeb8df095
+DD 0xc1f599e7,0x7b37649c,0xb19c15e0,0x0b5ee89b
+DD 0xe56cc704,0x5fae3a7f,0x95054b03,0x2fc7b678
+DD 0x05bfdf0a,0xbf7d2271,0x75d6530d,0xcf14ae76
+DD 0xac5e7ac2,0x169c87b9,0xdc37f6c5,0x66f50bbe
+DD 0x4c8d62cc,0xf64f9fb7,0x3ce4eecb,0x862613b0
+DD 0x68143c2f,0xd2d6c154,0x187db028,0xa2bf4d53
+DD 0x88c72421,0x3205d95a,0xf8aea826,0x426c555d
+DD 0x7c76029c,0xc6b4ffe7,0x0c1f8e9b,0xb6dd73e0
+DD 0x9ca51a92,0x2667e7e9,0xeccc9695,0x560e6bee
+DD 0xb83c4471,0x02feb90a,0xc855c876,0x7297350d
+DD 0x58ef5c7f,0xe22da104,0x2886d078,0x92442d03
+DD 0xf10ef9b7,0x4bcc04cc,0x816775b0,0x3ba588cb
+DD 0x11dde1b9,0xab1f1cc2,0x61b46dbe,0xdb7690c5
+DD 0x3544bf5a,0x8f864221,0x452d335d,0xffefce26
+DD 0xd597a754,0x6f555a2f,0xa5fe2b53,0x1f3cd628
+DD 0x636b823b,0xd9a97f40,0x13020e3c,0xa9c0f347
+DD 0x83b89a35,0x397a674e,0xf3d11632,0x4913eb49
+DD 0xa721c4d6,0x1de339ad,0xd74848d1,0x6d8ab5aa
+DD 0x47f2dcd8,0xfd3021a3,0x379b50df,0x8d59ada4
+DD 0xee137910,0x54d1846b,0x9e7af517,0x24b8086c
+DD 0x0ec0611e,0xb4029c65,0x7ea9ed19,0xc46b1062
+DD 0x2a593ffd,0x909bc286,0x5a30b3fa,0xe0f24e81
+DD 0xca8a27f3,0x7048da88,0xbae3abf4,0x0021568f
+DD 0x424d03d2,0xf88ffea9,0x32248fd5,0x88e672ae
+DD 0xa29e1bdc,0x185ce6a7,0xd2f797db,0x68356aa0
+DD 0x8607453f,0x3cc5b844,0xf66ec938,0x4cac3443
+DD 0x66d45d31,0xdc16a04a,0x16bdd136,0xac7f2c4d
+DD 0xcf35f8f9,0x75f70582,0xbf5c74fe,0x059e8985
+DD 0x2fe6e0f7,0x95241d8c,0x5f8f6cf0,0xe54d918b
+DD 0x0b7fbe14,0xb1bd436f,0x7b163213,0xc1d4cf68
+DD 0xebaca61a,0x516e5b61,0x9bc52a1d,0x2107d766
+DD 0x5d508375,0xe7927e0e,0x2d390f72,0x97fbf209
+DD 0xbd839b7b,0x07416600,0xcdea177c,0x7728ea07
+DD 0x991ac598,0x23d838e3,0xe973499f,0x53b1b4e4
+DD 0x79c9dd96,0xc30b20ed,0x09a05191,0xb362acea
+DD 0xd028785e,0x6aea8525,0xa041f459,0x1a830922
+DD 0x30fb6050,0x8a399d2b,0x4092ec57,0xfa50112c
+DD 0x14623eb3,0xaea0c3c8,0x640bb2b4,0xdec94fcf
+DD 0xf4b126bd,0x4e73dbc6,0x84d8aaba,0x3e1a57c1
+
+mul_table_632:
+DD 0x00000000,0x6b749fb2,0xd6e93f64,0xbd9da0d6
+DD 0xa83e0839,0xc34a978b,0x7ed7375d,0x15a3a8ef
+DD 0x55906683,0x3ee4f931,0x837959e7,0xe80dc655
+DD 0xfdae6eba,0x96daf108,0x2b4751de,0x4033ce6c
+DD 0xab20cd06,0xc05452b4,0x7dc9f262,0x16bd6dd0
+DD 0x031ec53f,0x686a5a8d,0xd5f7fa5b,0xbe8365e9
+DD 0xfeb0ab85,0x95c43437,0x285994e1,0x432d0b53
+DD 0x568ea3bc,0x3dfa3c0e,0x80679cd8,0xeb13036a
+DD 0x53adecfd,0x38d9734f,0x8544d399,0xee304c2b
+DD 0xfb93e4c4,0x90e77b76,0x2d7adba0,0x460e4412
+DD 0x063d8a7e,0x6d4915cc,0xd0d4b51a,0xbba02aa8
+DD 0xae038247,0xc5771df5,0x78eabd23,0x139e2291
+DD 0xf88d21fb,0x93f9be49,0x2e641e9f,0x4510812d
+DD 0x50b329c2,0x3bc7b670,0x865a16a6,0xed2e8914
+DD 0xad1d4778,0xc669d8ca,0x7bf4781c,0x1080e7ae
+DD 0x05234f41,0x6e57d0f3,0xd3ca7025,0xb8beef97
+DD 0xa75bd9fa,0xcc2f4648,0x71b2e69e,0x1ac6792c
+DD 0x0f65d1c3,0x64114e71,0xd98ceea7,0xb2f87115
+DD 0xf2cbbf79,0x99bf20cb,0x2422801d,0x4f561faf
+DD 0x5af5b740,0x318128f2,0x8c1c8824,0xe7681796
+DD 0x0c7b14fc,0x670f8b4e,0xda922b98,0xb1e6b42a
+DD 0xa4451cc5,0xcf318377,0x72ac23a1,0x19d8bc13
+DD 0x59eb727f,0x329fedcd,0x8f024d1b,0xe476d2a9
+DD 0xf1d57a46,0x9aa1e5f4,0x273c4522,0x4c48da90
+DD 0xf4f63507,0x9f82aab5,0x221f0a63,0x496b95d1
+DD 0x5cc83d3e,0x37bca28c,0x8a21025a,0xe1559de8
+DD 0xa1665384,0xca12cc36,0x778f6ce0,0x1cfbf352
+DD 0x09585bbd,0x622cc40f,0xdfb164d9,0xb4c5fb6b
+DD 0x5fd6f801,0x34a267b3,0x893fc765,0xe24b58d7
+DD 0xf7e8f038,0x9c9c6f8a,0x2101cf5c,0x4a7550ee
+DD 0x0a469e82,0x61320130,0xdcafa1e6,0xb7db3e54
+DD 0xa27896bb,0xc90c0909,0x7491a9df,0x1fe5366d
+DD 0x4b5bc505,0x202f5ab7,0x9db2fa61,0xf6c665d3
+DD 0xe365cd3c,0x8811528e,0x358cf258,0x5ef86dea
+DD 0x1ecba386,0x75bf3c34,0xc8229ce2,0xa3560350
+DD 0xb6f5abbf,0xdd81340d,0x601c94db,0x0b680b69
+DD 0xe07b0803,0x8b0f97b1,0x36923767,0x5de6a8d5
+DD 0x4845003a,0x23319f88,0x9eac3f5e,0xf5d8a0ec
+DD 0xb5eb6e80,0xde9ff132,0x630251e4,0x0876ce56
+DD 0x1dd566b9,0x76a1f90b,0xcb3c59dd,0xa048c66f
+DD 0x18f629f8,0x7382b64a,0xce1f169c,0xa56b892e
+DD 0xb0c821c1,0xdbbcbe73,0x66211ea5,0x0d558117
+DD 0x4d664f7b,0x2612d0c9,0x9b8f701f,0xf0fbefad
+DD 0xe5584742,0x8e2cd8f0,0x33b17826,0x58c5e794
+DD 0xb3d6e4fe,0xd8a27b4c,0x653fdb9a,0x0e4b4428
+DD 0x1be8ecc7,0x709c7375,0xcd01d3a3,0xa6754c11
+DD 0xe646827d,0x8d321dcf,0x30afbd19,0x5bdb22ab
+DD 0x4e788a44,0x250c15f6,0x9891b520,0xf3e52a92
+DD 0xec001cff,0x8774834d,0x3ae9239b,0x519dbc29
+DD 0x443e14c6,0x2f4a8b74,0x92d72ba2,0xf9a3b410
+DD 0xb9907a7c,0xd2e4e5ce,0x6f794518,0x040ddaaa
+DD 0x11ae7245,0x7adaedf7,0xc7474d21,0xac33d293
+DD 0x4720d1f9,0x2c544e4b,0x91c9ee9d,0xfabd712f
+DD 0xef1ed9c0,0x846a4672,0x39f7e6a4,0x52837916
+DD 0x12b0b77a,0x79c428c8,0xc459881e,0xaf2d17ac
+DD 0xba8ebf43,0xd1fa20f1,0x6c678027,0x07131f95
+DD 0xbfadf002,0xd4d96fb0,0x6944cf66,0x023050d4
+DD 0x1793f83b,0x7ce76789,0xc17ac75f,0xaa0e58ed
+DD 0xea3d9681,0x81490933,0x3cd4a9e5,0x57a03657
+DD 0x42039eb8,0x2977010a,0x94eaa1dc,0xff9e3e6e
+DD 0x148d3d04,0x7ff9a2b6,0xc2640260,0xa9109dd2
+DD 0xbcb3353d,0xd7c7aa8f,0x6a5a0a59,0x012e95eb
+DD 0x411d5b87,0x2a69c435,0x97f464e3,0xfc80fb51
+DD 0xe92353be,0x8257cc0c,0x3fca6cda,0x54bef368
+
+mul_table_1272:
+DD 0x00000000,0xdd66cbbb,0xbf21e187,0x62472a3c
+DD 0x7bafb5ff,0xa6c97e44,0xc48e5478,0x19e89fc3
+DD 0xf75f6bfe,0x2a39a045,0x487e8a79,0x951841c2
+DD 0x8cf0de01,0x519615ba,0x33d13f86,0xeeb7f43d
+DD 0xeb52a10d,0x36346ab6,0x5473408a,0x89158b31
+DD 0x90fd14f2,0x4d9bdf49,0x2fdcf575,0xf2ba3ece
+DD 0x1c0dcaf3,0xc16b0148,0xa32c2b74,0x7e4ae0cf
+DD 0x67a27f0c,0xbac4b4b7,0xd8839e8b,0x05e55530
+DD 0xd34934eb,0x0e2fff50,0x6c68d56c,0xb10e1ed7
+DD 0xa8e68114,0x75804aaf,0x17c76093,0xcaa1ab28
+DD 0x24165f15,0xf97094ae,0x9b37be92,0x46517529
+DD 0x5fb9eaea,0x82df2151,0xe0980b6d,0x3dfec0d6
+DD 0x381b95e6,0xe57d5e5d,0x873a7461,0x5a5cbfda
+DD 0x43b42019,0x9ed2eba2,0xfc95c19e,0x21f30a25
+DD 0xcf44fe18,0x122235a3,0x70651f9f,0xad03d424
+DD 0xb4eb4be7,0x698d805c,0x0bcaaa60,0xd6ac61db
+DD 0xa37e1f27,0x7e18d49c,0x1c5ffea0,0xc139351b
+DD 0xd8d1aad8,0x05b76163,0x67f04b5f,0xba9680e4
+DD 0x542174d9,0x8947bf62,0xeb00955e,0x36665ee5
+DD 0x2f8ec126,0xf2e80a9d,0x90af20a1,0x4dc9eb1a
+DD 0x482cbe2a,0x954a7591,0xf70d5fad,0x2a6b9416
+DD 0x33830bd5,0xeee5c06e,0x8ca2ea52,0x51c421e9
+DD 0xbf73d5d4,0x62151e6f,0x00523453,0xdd34ffe8
+DD 0xc4dc602b,0x19baab90,0x7bfd81ac,0xa69b4a17
+DD 0x70372bcc,0xad51e077,0xcf16ca4b,0x127001f0
+DD 0x0b989e33,0xd6fe5588,0xb4b97fb4,0x69dfb40f
+DD 0x87684032,0x5a0e8b89,0x3849a1b5,0xe52f6a0e
+DD 0xfcc7f5cd,0x21a13e76,0x43e6144a,0x9e80dff1
+DD 0x9b658ac1,0x4603417a,0x24446b46,0xf922a0fd
+DD 0xe0ca3f3e,0x3dacf485,0x5febdeb9,0x828d1502
+DD 0x6c3ae13f,0xb15c2a84,0xd31b00b8,0x0e7dcb03
+DD 0x179554c0,0xcaf39f7b,0xa8b4b547,0x75d27efc
+DD 0x431048bf,0x9e768304,0xfc31a938,0x21576283
+DD 0x38bffd40,0xe5d936fb,0x879e1cc7,0x5af8d77c
+DD 0xb44f2341,0x6929e8fa,0x0b6ec2c6,0xd608097d
+DD 0xcfe096be,0x12865d05,0x70c17739,0xada7bc82
+DD 0xa842e9b2,0x75242209,0x17630835,0xca05c38e
+DD 0xd3ed5c4d,0x0e8b97f6,0x6cccbdca,0xb1aa7671
+DD 0x5f1d824c,0x827b49f7,0xe03c63cb,0x3d5aa870
+DD 0x24b237b3,0xf9d4fc08,0x9b93d634,0x46f51d8f
+DD 0x90597c54,0x4d3fb7ef,0x2f789dd3,0xf21e5668
+DD 0xebf6c9ab,0x36900210,0x54d7282c,0x89b1e397
+DD 0x670617aa,0xba60dc11,0xd827f62d,0x05413d96
+DD 0x1ca9a255,0xc1cf69ee,0xa38843d2,0x7eee8869
+DD 0x7b0bdd59,0xa66d16e2,0xc42a3cde,0x194cf765
+DD 0x00a468a6,0xddc2a31d,0xbf858921,0x62e3429a
+DD 0x8c54b6a7,0x51327d1c,0x33755720,0xee139c9b
+DD 0xf7fb0358,0x2a9dc8e3,0x48dae2df,0x95bc2964
+DD 0xe06e5798,0x3d089c23,0x5f4fb61f,0x82297da4
+DD 0x9bc1e267,0x46a729dc,0x24e003e0,0xf986c85b
+DD 0x17313c66,0xca57f7dd,0xa810dde1,0x7576165a
+DD 0x6c9e8999,0xb1f84222,0xd3bf681e,0x0ed9a3a5
+DD 0x0b3cf695,0xd65a3d2e,0xb41d1712,0x697bdca9
+DD 0x7093436a,0xadf588d1,0xcfb2a2ed,0x12d46956
+DD 0xfc639d6b,0x210556d0,0x43427cec,0x9e24b757
+DD 0x87cc2894,0x5aaae32f,0x38edc913,0xe58b02a8
+DD 0x33276373,0xee41a8c8,0x8c0682f4,0x5160494f
+DD 0x4888d68c,0x95ee1d37,0xf7a9370b,0x2acffcb0
+DD 0xc478088d,0x191ec336,0x7b59e90a,0xa63f22b1
+DD 0xbfd7bd72,0x62b176c9,0x00f65cf5,0xdd90974e
+DD 0xd875c27e,0x051309c5,0x675423f9,0xba32e842
+DD 0xa3da7781,0x7ebcbc3a,0x1cfb9606,0xc19d5dbd
+DD 0x2f2aa980,0xf24c623b,0x900b4807,0x4d6d83bc
+DD 0x54851c7f,0x89e3d7c4,0xeba4fdf8,0x36c23643
+
+;;; func core, ver, snum
+slversion crc32_iscsi_00, 00, 04, 0014
+
diff --git a/src/isa-l/crc/crc32_iscsi_01.asm b/src/isa-l/crc/crc32_iscsi_01.asm
new file mode 100644
index 000000000..e0f2b5e82
--- /dev/null
+++ b/src/isa-l/crc/crc32_iscsi_01.asm
@@ -0,0 +1,592 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright(c) 2011-2015 Intel Corporation All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in
+; the documentation and/or other materials provided with the
+; distribution.
+; * Neither the name of Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived
+; from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;;; ISCSI CRC 32 Implementation with crc32 and pclmulqdq Instruction
+
+%include "reg_sizes.asm"
+
+default rel
+%define CONCAT(a,b,c) a %+ b %+ c
+
+; Define threshold where buffers are considered "small" and routed to more
+; efficient "by-1" code. This "by-1" code only handles up to 255 bytes, so
+; SMALL_SIZE can be no larger than 256.
+%define SMALL_SIZE 200
+
+%if (SMALL_SIZE > 256)
+%error SMALL_ SIZE must be <= 256
+% error ; needed because '%error' actually generates only a warning
+%endif
+
+;;; unsigned int crc32_iscsi_01(unsigned char * buffer, int len, unsigned int crc_init);
+;;;
+;;; *buf = rcx
+;;; len = rdx
+;;; crc_init = r8
+
+mk_global crc32_iscsi_01, function
+crc32_iscsi_01:
+ endbranch
+
+%ifidn __OUTPUT_FORMAT__, elf64
+%define bufp rdi
+%define bufp_dw edi
+%define bufp_w di
+%define bufp_b dil
+%define bufptmp rcx
+%define block_0 rcx
+%define block_1 rdx
+%define block_2 r11
+%define len rsi
+%define len_dw esi
+%define len_w si
+%define len_b sil
+%define crc_init_arg rdx
+%else
+%define bufp rcx
+%define bufp_dw ecx
+%define bufp_w cx
+%define bufp_b cl
+%define bufptmp rdi
+%define block_0 rdi
+%define block_1 rsi
+%define block_2 r11
+%define len rdx
+%define len_dw edx
+%define len_w dx
+%define len_b dl
+%endif
+
+%define tmp rbx
+%define crc_init r8
+%define crc_init_dw r8d
+%define crc1 r9
+%define crc2 r10
+
+ push rbx
+ push rdi
+ push rsi
+
+ ;; Move crc_init for Linux to a different reg
+%ifidn __OUTPUT_FORMAT__, elf64
+ mov crc_init, crc_init_arg
+%endif
+
+ ;; If len is less than 8 we need to jump to special code to avoid
+ ;; reading beyond the end of the buffer
+ cmp len, 8
+ jb less_than_8
+
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+ ;; 1) ALIGN: ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+ mov bufptmp, bufp ;; rdi = *buf
+ neg bufp
+ and bufp, 7 ;; calculate the unalignment amount of
+ ;; the address
+ je proc_block ;; Skip if aligned
+
+ ;;;; Calculate CRC of unaligned bytes of the buffer (if any) ;;;
+ mov tmp, [bufptmp] ;; load a quadword from the buffer
+ add bufptmp, bufp ;; align buffer pointer for quadword
+ ;; processing
+ sub len, bufp ;; update buffer length
+align_loop:
+ crc32 crc_init_dw, bl ;; compute crc32 of 1-byte
+ shr tmp, 8 ;; get next byte
+ dec bufp
+ jne align_loop
+
+proc_block:
+
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+ ;; 2) PROCESS BLOCKS: ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+ ;; compute num of bytes to be processed
+ mov tmp, len ;; save num bytes in tmp
+
+ cmp len, 128*24
+ jae full_block
+
+continue_block:
+ cmp len, SMALL_SIZE
+ jb small
+
+ ;; len < 128*24
+ mov rax, 2731 ;; 2731 = ceil(2^16 / 24)
+ mul len_dw
+ shr rax, 16
+
+ ;; eax contains floor(bytes / 24) = num 24-byte chunks to do
+
+ ;; process rax 24-byte chunks (128 >= rax >= 0)
+
+ ;; compute end address of each block
+ ;; rdi -> block 0 (base addr + RAX * 8)
+ ;; rsi -> block 1 (base addr + RAX * 16)
+ ;; r11 -> block 2 (base addr + RAX * 24)
+ lea block_0, [bufptmp + rax * 8]
+ lea block_1, [block_0 + rax * 8]
+ lea block_2, [block_1 + rax * 8]
+
+ xor crc1,crc1
+ xor crc2,crc2
+
+ ;; branch into array
+ lea bufp, [jump_table]
+ movzx len, word [bufp + rax * 2] ;; len is offset from crc_array
+ lea bufp, [bufp + len + crc_array - jump_table]
+ jmp bufp
+
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+ ;; 2a) PROCESS FULL BLOCKS: ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+full_block:
+ mov rax, 128
+ lea block_1, [block_0 + 128*8*2]
+ lea block_2, [block_0 + 128*8*3]
+ add block_0, 128*8*1
+
+ xor crc1,crc1
+ xor crc2,crc2
+
+; ;; branch into array
+; jmp CONCAT(crc_,128,)
+ ; Fall thruogh into top of crc array (crc_128)
+
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+ ;; 3) CRC Array: ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+crc_array:
+ cmp len, 128*24*2
+ jbe non_prefetch
+
+%assign i 128
+%rep 128-1
+
+CONCAT(_crc_,i,:)
+ crc32 crc_init, qword [block_0 - i*8]
+ crc32 crc1, qword [block_1 - i*8]
+ crc32 crc2, qword [block_2 - i*8]
+
+ %if i > 128*8 / 32 ; prefetch next 3KB data
+ prefetchnta [block_2 + 128*32 - i*32]
+ %endif
+
+%assign i (i-1)
+%endrep
+ jmp next_
+
+non_prefetch:
+%assign i 128
+%rep 128-1
+
+CONCAT(crc_,i,:)
+ endbranch
+ crc32 crc_init, qword [block_0 - i*8]
+ crc32 crc1, qword [block_1 - i*8]
+ crc32 crc2, qword [block_2 - i*8]
+%assign i (i-1)
+%endrep
+
+next_:
+CONCAT(crc_,i,:)
+ crc32 crc_init, qword [block_0 - i*8]
+ crc32 crc1, qword [block_1 - i*8]
+; SKIP ;crc32 crc2, [block_2 - i*8] ; Don't do this one yet
+
+ mov block_0, block_2
+
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+ ;; 4) Combine three results: ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+ lea bufp, [K_table - 16] ; first entry is for idx 1
+ shl rax, 3 ; rax *= 8
+ sub tmp, rax ; tmp -= rax*8
+ shl rax, 1
+ sub tmp, rax ; tmp -= rax*16 (total tmp -= rax*24)
+ add bufp, rax
+
+ movdqa xmm0, [bufp] ; 2 consts: K1:K2
+
+ movq xmm1, crc_init ; CRC for block 1
+ pclmulqdq xmm1, xmm0, 0x00 ; Multiply by K2
+
+ movq xmm2, crc1 ; CRC for block 2
+ pclmulqdq xmm2, xmm0, 0x10 ; Multiply by K1
+
+ pxor xmm1, xmm2
+ movq rax, xmm1
+ xor rax, [block_2 - i*8]
+ mov crc_init, crc2
+ crc32 crc_init, rax
+
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+ ;; 5) Check for end: ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+CONCAT(crc_,0,:)
+ mov len, tmp
+ cmp tmp, 128*24
+ jae full_block
+ cmp tmp, 24
+ jae continue_block
+
+fewer_than_24:
+ ;; now fewer than 24 bytes remain
+ cmp tmp, 16
+ jae do_16
+ cmp tmp, 8
+ jae do_8
+
+ ;; 0 <= tmp <= 7
+ shl ebx, 29 ; size now in bits 31:29
+ jz do_return
+check_4:
+ mov bufp, [bufptmp]
+ shl ebx, 1 ; shift out into carry MSB (orig size & 4)
+ jnc check_2
+ crc32 crc_init_dw, bufp_dw
+ jz do_return
+ shr bufp, 32 ; shift data down by 4 bytes
+check_2:
+ shl ebx, 1 ; shift out into carry MSB (orig size & 2)
+ jnc check_1
+ crc32 crc_init_dw, bufp_w
+ jz do_return
+ shr bufp, 16 ; shift data down by 2 bytes
+check_1:
+ crc32 crc_init_dw, bufp_b
+
+do_return:
+ mov rax, crc_init
+ pop rsi
+ pop rdi
+ pop rbx
+ ret
+
+do_8:
+ crc32 crc_init, qword [bufptmp]
+ add bufptmp, 8
+ shl ebx, 29 ; size (0...7) in bits 31:29
+ jnz check_4
+ mov rax, crc_init
+ pop rsi
+ pop rdi
+ pop rbx
+ ret
+
+do_16:
+ crc32 crc_init, qword [bufptmp]
+ crc32 crc_init, qword [bufptmp+8]
+ add bufptmp, 16
+ shl ebx, 29 ; size (0...7) in bits 31:29
+ jnz check_4
+ mov rax, crc_init
+ pop rsi
+ pop rdi
+ pop rbx
+ ret
+
+
+
+
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+ ;; Handle the case of fewer than 8 bytes, unaligned. In this case
+ ;; we can't read 8 bytes, as this might go beyond the end of the buffer
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+less_than_8:
+ test len,4
+ jz less_than_4
+ crc32 crc_init_dw, dword[bufp]
+ add bufp,4
+less_than_4:
+ test len,2
+ jz less_than_2
+ crc32 crc_init_dw, word[bufp]
+ add bufp,2
+less_than_2:
+ test len,1
+ jz do_return
+ crc32 crc_init_dw, byte[bufp]
+ mov rax, crc_init
+ pop rsi
+ pop rdi
+ pop rbx
+ ret
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;4) LESS THAN 256-bytes REMAIN AT THIS POINT (8-bits of len are full)
+
+small:
+ mov rax, crc_init
+
+bit8:
+ shl len_b, 1 ;; shift-out MSB (bit-7)
+ jnc bit7 ;; jump to bit-6 if bit-7 == 0
+ %assign i 0
+ %rep 16
+ crc32 rax, qword [bufptmp+i] ;; compute crc32 of 8-byte data
+ %assign i (i+8)
+ %endrep
+ je do_return2 ;; return if remaining data is zero
+ add bufptmp, 128 ;; buf +=64; (next 64 bytes)
+
+bit7:
+ shl len_b, 1 ;; shift-out MSB (bit-7)
+ jnc bit6 ;; jump to bit-6 if bit-7 == 0
+ %assign i 0
+ %rep 8
+ crc32 rax, qword [bufptmp+i] ;; compute crc32 of 8-byte data
+ %assign i (i+8)
+ %endrep
+ je do_return2 ;; return if remaining data is zero
+ add bufptmp, 64 ;; buf +=64; (next 64 bytes)
+bit6:
+ shl len_b, 1 ;; shift-out MSB (bit-6)
+ jnc bit5 ;; jump to bit-5 if bit-6 == 0
+ %assign i 0
+ %rep 4
+ crc32 rax, qword [bufptmp+i] ;; compute crc32 of 8-byte data
+ %assign i (i+8)
+ %endrep
+ je do_return2 ;; return if remaining data is zero
+ add bufptmp, 32 ;; buf +=32; (next 32 bytes)
+bit5:
+ shl len_b, 1 ;; shift-out MSB (bit-5)
+ jnc bit4 ;; jump to bit-4 if bit-5 == 0
+ %assign i 0
+ %rep 2
+ crc32 rax, qword [bufptmp+i] ;; compute crc32 of 8-byte data
+ %assign i (i+8)
+ %endrep
+ je do_return2 ;; return if remaining data is zero
+ add bufptmp, 16 ;; buf +=16; (next 16 bytes)
+bit4:
+ shl len_b, 1 ;; shift-out MSB (bit-4)
+ jnc bit3 ;; jump to bit-3 if bit-4 == 0
+ crc32 rax, qword [bufptmp] ;; compute crc32 of 8-byte data
+ je do_return2 ;; return if remaining data is zero
+ add bufptmp, 8 ;; buf +=8; (next 8 bytes)
+bit3:
+ mov rbx, qword [bufptmp] ;; load a 8-bytes from the buffer:
+ shl len_b, 1 ;; shift-out MSB (bit-3)
+ jnc bit2 ;; jump to bit-2 if bit-3 == 0
+ crc32 eax, ebx ;; compute crc32 of 4-byte data
+ je do_return2 ;; return if remaining data is zero
+ shr rbx, 32 ;; get next 3 bytes
+bit2:
+ shl len_b, 1 ;; shift-out MSB (bit-2)
+ jnc bit1 ;; jump to bit-1 if bit-2 == 0
+ crc32 eax, bx ;; compute crc32 of 2-byte data
+ je do_return2 ;; return if remaining data is zero
+ shr rbx, 16 ;; next byte
+bit1:
+ test len_b,len_b
+ je do_return2
+ crc32 eax, bl ;; compute crc32 of 1-byte data
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+do_return2:
+ pop rsi
+ pop rdi
+ pop rbx
+ ret
+
+
+
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+ ;; jump table ;; Table is 129 entries x 2 bytes each
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+align 4
+jump_table:
+%assign i 0
+%rep 129
+ dw CONCAT(crc_,i,) - crc_array
+%assign i (i+1)
+%endrep
+
+
+
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+ ;; PCLMULQDQ tables
+ ;; Table is 128 entries x 2 quad words each
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+section .data
+align 64
+K_table:
+ dq 0x14cd00bd6, 0x105ec76f0
+ dq 0x0ba4fc28e, 0x14cd00bd6
+ dq 0x1d82c63da, 0x0f20c0dfe
+ dq 0x09e4addf8, 0x0ba4fc28e
+ dq 0x039d3b296, 0x1384aa63a
+ dq 0x102f9b8a2, 0x1d82c63da
+ dq 0x14237f5e6, 0x01c291d04
+ dq 0x00d3b6092, 0x09e4addf8
+ dq 0x0c96cfdc0, 0x0740eef02
+ dq 0x18266e456, 0x039d3b296
+ dq 0x0daece73e, 0x0083a6eec
+ dq 0x0ab7aff2a, 0x102f9b8a2
+ dq 0x1248ea574, 0x1c1733996
+ dq 0x083348832, 0x14237f5e6
+ dq 0x12c743124, 0x02ad91c30
+ dq 0x0b9e02b86, 0x00d3b6092
+ dq 0x018b33a4e, 0x06992cea2
+ dq 0x1b331e26a, 0x0c96cfdc0
+ dq 0x17d35ba46, 0x07e908048
+ dq 0x1bf2e8b8a, 0x18266e456
+ dq 0x1a3e0968a, 0x11ed1f9d8
+ dq 0x0ce7f39f4, 0x0daece73e
+ dq 0x061d82e56, 0x0f1d0f55e
+ dq 0x0d270f1a2, 0x0ab7aff2a
+ dq 0x1c3f5f66c, 0x0a87ab8a8
+ dq 0x12ed0daac, 0x1248ea574
+ dq 0x065863b64, 0x08462d800
+ dq 0x11eef4f8e, 0x083348832
+ dq 0x1ee54f54c, 0x071d111a8
+ dq 0x0b3e32c28, 0x12c743124
+ dq 0x0064f7f26, 0x0ffd852c6
+ dq 0x0dd7e3b0c, 0x0b9e02b86
+ dq 0x0f285651c, 0x0dcb17aa4
+ dq 0x010746f3c, 0x018b33a4e
+ dq 0x1c24afea4, 0x0f37c5aee
+ dq 0x0271d9844, 0x1b331e26a
+ dq 0x08e766a0c, 0x06051d5a2
+ dq 0x093a5f730, 0x17d35ba46
+ dq 0x06cb08e5c, 0x11d5ca20e
+ dq 0x06b749fb2, 0x1bf2e8b8a
+ dq 0x1167f94f2, 0x021f3d99c
+ dq 0x0cec3662e, 0x1a3e0968a
+ dq 0x19329634a, 0x08f158014
+ dq 0x0e6fc4e6a, 0x0ce7f39f4
+ dq 0x08227bb8a, 0x1a5e82106
+ dq 0x0b0cd4768, 0x061d82e56
+ dq 0x13c2b89c4, 0x188815ab2
+ dq 0x0d7a4825c, 0x0d270f1a2
+ dq 0x10f5ff2ba, 0x105405f3e
+ dq 0x00167d312, 0x1c3f5f66c
+ dq 0x0f6076544, 0x0e9adf796
+ dq 0x026f6a60a, 0x12ed0daac
+ dq 0x1a2adb74e, 0x096638b34
+ dq 0x19d34af3a, 0x065863b64
+ dq 0x049c3cc9c, 0x1e50585a0
+ dq 0x068bce87a, 0x11eef4f8e
+ dq 0x1524fa6c6, 0x19f1c69dc
+ dq 0x16cba8aca, 0x1ee54f54c
+ dq 0x042d98888, 0x12913343e
+ dq 0x1329d9f7e, 0x0b3e32c28
+ dq 0x1b1c69528, 0x088f25a3a
+ dq 0x02178513a, 0x0064f7f26
+ dq 0x0e0ac139e, 0x04e36f0b0
+ dq 0x0170076fa, 0x0dd7e3b0c
+ dq 0x141a1a2e2, 0x0bd6f81f8
+ dq 0x16ad828b4, 0x0f285651c
+ dq 0x041d17b64, 0x19425cbba
+ dq 0x1fae1cc66, 0x010746f3c
+ dq 0x1a75b4b00, 0x18db37e8a
+ dq 0x0f872e54c, 0x1c24afea4
+ dq 0x01e41e9fc, 0x04c144932
+ dq 0x086d8e4d2, 0x0271d9844
+ dq 0x160f7af7a, 0x052148f02
+ dq 0x05bb8f1bc, 0x08e766a0c
+ dq 0x0a90fd27a, 0x0a3c6f37a
+ dq 0x0b3af077a, 0x093a5f730
+ dq 0x04984d782, 0x1d22c238e
+ dq 0x0ca6ef3ac, 0x06cb08e5c
+ dq 0x0234e0b26, 0x063ded06a
+ dq 0x1d88abd4a, 0x06b749fb2
+ dq 0x04597456a, 0x04d56973c
+ dq 0x0e9e28eb4, 0x1167f94f2
+ dq 0x07b3ff57a, 0x19385bf2e
+ dq 0x0c9c8b782, 0x0cec3662e
+ dq 0x13a9cba9e, 0x0e417f38a
+ dq 0x093e106a4, 0x19329634a
+ dq 0x167001a9c, 0x14e727980
+ dq 0x1ddffc5d4, 0x0e6fc4e6a
+ dq 0x00df04680, 0x0d104b8fc
+ dq 0x02342001e, 0x08227bb8a
+ dq 0x00a2a8d7e, 0x05b397730
+ dq 0x168763fa6, 0x0b0cd4768
+ dq 0x1ed5a407a, 0x0e78eb416
+ dq 0x0d2c3ed1a, 0x13c2b89c4
+ dq 0x0995a5724, 0x1641378f0
+ dq 0x19b1afbc4, 0x0d7a4825c
+ dq 0x109ffedc0, 0x08d96551c
+ dq 0x0f2271e60, 0x10f5ff2ba
+ dq 0x00b0bf8ca, 0x00bf80dd2
+ dq 0x123888b7a, 0x00167d312
+ dq 0x1e888f7dc, 0x18dcddd1c
+ dq 0x002ee03b2, 0x0f6076544
+ dq 0x183e8d8fe, 0x06a45d2b2
+ dq 0x133d7a042, 0x026f6a60a
+ dq 0x116b0f50c, 0x1dd3e10e8
+ dq 0x05fabe670, 0x1a2adb74e
+ dq 0x130004488, 0x0de87806c
+ dq 0x000bcf5f6, 0x19d34af3a
+ dq 0x18f0c7078, 0x014338754
+ dq 0x017f27698, 0x049c3cc9c
+ dq 0x058ca5f00, 0x15e3e77ee
+ dq 0x1af900c24, 0x068bce87a
+ dq 0x0b5cfca28, 0x0dd07448e
+ dq 0x0ded288f8, 0x1524fa6c6
+ dq 0x059f229bc, 0x1d8048348
+ dq 0x06d390dec, 0x16cba8aca
+ dq 0x037170390, 0x0a3e3e02c
+ dq 0x06353c1cc, 0x042d98888
+ dq 0x0c4584f5c, 0x0d73c7bea
+ dq 0x1f16a3418, 0x1329d9f7e
+ dq 0x0531377e2, 0x185137662
+ dq 0x1d8d9ca7c, 0x1b1c69528
+ dq 0x0b25b29f2, 0x18a08b5bc
+ dq 0x19fb2a8b0, 0x02178513a
+ dq 0x1a08fe6ac, 0x1da758ae0
+ dq 0x045cddf4e, 0x0e0ac139e
+ dq 0x1a91647f2, 0x169cf9eb0
+ dq 0x1a0f717c4, 0x0170076fa
+
+;;; func core, ver, snum
+slversion crc32_iscsi_01, 01, 04, 0015
+
diff --git a/src/isa-l/crc/crc32_iscsi_by16_10.asm b/src/isa-l/crc/crc32_iscsi_by16_10.asm
new file mode 100644
index 000000000..4c63bab39
--- /dev/null
+++ b/src/isa-l/crc/crc32_iscsi_by16_10.asm
@@ -0,0 +1,556 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright(c) 2011-2020 Intel Corporation All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in
+; the documentation and/or other materials provided with the
+; distribution.
+; * Neither the name of Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived
+; from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Function API:
+; UINT32 crc32_iscsi_by16_10(
+; UINT32 init_crc, //initial CRC value, 32 bits
+; const unsigned char *buf, //buffer pointer to calculate CRC on
+; UINT64 len //buffer length in bytes (64-bit data)
+; );
+;
+; Authors:
+; Erdinc Ozturk
+; Vinodh Gopal
+; James Guilford
+;
+; Reference paper titled "Fast CRC Computation for Generic Polynomials Using PCLMULQDQ Instruction"
+; URL: http://www.intel.com/content/dam/www/public/us/en/documents/white-papers/fast-crc-computation-generic-polynomials-pclmulqdq-paper.pdf
+;
+;
+
+%include "reg_sizes.asm"
+
+%ifndef FUNCTION_NAME
+%define FUNCTION_NAME crc32_iscsi_by16_10
+%endif
+
+%if (AS_FEATURE_LEVEL) >= 10
+
+[bits 64]
+default rel
+
+section .text
+
+
+%ifidn __OUTPUT_FORMAT__, win64
+ %xdefine arg1 r8
+ %xdefine arg2 rcx
+ %xdefine arg3 rdx
+
+ %xdefine arg1_low32 r8d
+%else
+ %xdefine arg1 rdx
+ %xdefine arg2 rdi
+ %xdefine arg3 rsi
+
+ %xdefine arg1_low32 edx
+%endif
+
+%define TMP 16*0
+%ifidn __OUTPUT_FORMAT__, win64
+ %define XMM_SAVE 16*2
+ %define VARIABLE_OFFSET 16*12+8
+%else
+ %define VARIABLE_OFFSET 16*2+8
+%endif
+
+align 16
+mk_global FUNCTION_NAME, function
+FUNCTION_NAME:
+ endbranch
+ sub rsp, VARIABLE_OFFSET
+
+%ifidn __OUTPUT_FORMAT__, win64
+ ; push the xmm registers into the stack to maintain
+ vmovdqa [rsp + XMM_SAVE + 16*0], xmm6
+ vmovdqa [rsp + XMM_SAVE + 16*1], xmm7
+ vmovdqa [rsp + XMM_SAVE + 16*2], xmm8
+ vmovdqa [rsp + XMM_SAVE + 16*3], xmm9
+ vmovdqa [rsp + XMM_SAVE + 16*4], xmm10
+ vmovdqa [rsp + XMM_SAVE + 16*5], xmm11
+ vmovdqa [rsp + XMM_SAVE + 16*6], xmm12
+ vmovdqa [rsp + XMM_SAVE + 16*7], xmm13
+ vmovdqa [rsp + XMM_SAVE + 16*8], xmm14
+ vmovdqa [rsp + XMM_SAVE + 16*9], xmm15
+%endif
+
+ ; check if smaller than 256B
+ cmp arg3, 256
+ jl .less_than_256
+
+ ; load the initial crc value
+ vmovd xmm10, arg1_low32 ; initial crc
+
+ ; receive the initial 64B data, xor the initial crc value
+ vmovdqu8 zmm0, [arg2+16*0]
+ vmovdqu8 zmm4, [arg2+16*4]
+ vpxorq zmm0, zmm10
+ vbroadcasti32x4 zmm10, [rk3] ;xmm10 has rk3 and rk4
+ ;imm value of pclmulqdq instruction will determine which constant to use
+
+ sub arg3, 256
+ cmp arg3, 256
+ jl .fold_128_B_loop
+
+ vmovdqu8 zmm7, [arg2+16*8]
+ vmovdqu8 zmm8, [arg2+16*12]
+ vbroadcasti32x4 zmm16, [rk_1] ;zmm16 has rk-1 and rk-2
+ sub arg3, 256
+
+.fold_256_B_loop:
+ add arg2, 256
+ vmovdqu8 zmm3, [arg2+16*0]
+ vpclmulqdq zmm1, zmm0, zmm16, 0x10
+ vpclmulqdq zmm2, zmm0, zmm16, 0x01
+ vpxorq zmm0, zmm1, zmm2
+ vpxorq zmm0, zmm0, zmm3
+
+ vmovdqu8 zmm9, [arg2+16*4]
+ vpclmulqdq zmm5, zmm4, zmm16, 0x10
+ vpclmulqdq zmm6, zmm4, zmm16, 0x01
+ vpxorq zmm4, zmm5, zmm6
+ vpxorq zmm4, zmm4, zmm9
+
+ vmovdqu8 zmm11, [arg2+16*8]
+ vpclmulqdq zmm12, zmm7, zmm16, 0x10
+ vpclmulqdq zmm13, zmm7, zmm16, 0x01
+ vpxorq zmm7, zmm12, zmm13
+ vpxorq zmm7, zmm7, zmm11
+
+ vmovdqu8 zmm17, [arg2+16*12]
+ vpclmulqdq zmm14, zmm8, zmm16, 0x10
+ vpclmulqdq zmm15, zmm8, zmm16, 0x01
+ vpxorq zmm8, zmm14, zmm15
+ vpxorq zmm8, zmm8, zmm17
+
+ sub arg3, 256
+ jge .fold_256_B_loop
+
+ ;; Fold 256 into 128
+ add arg2, 256
+ vpclmulqdq zmm1, zmm0, zmm10, 0x01
+ vpclmulqdq zmm2, zmm0, zmm10, 0x10
+ vpternlogq zmm7, zmm1, zmm2, 0x96 ; xor ABC
+
+ vpclmulqdq zmm5, zmm4, zmm10, 0x01
+ vpclmulqdq zmm6, zmm4, zmm10, 0x10
+ vpternlogq zmm8, zmm5, zmm6, 0x96 ; xor ABC
+
+ vmovdqa32 zmm0, zmm7
+ vmovdqa32 zmm4, zmm8
+
+ add arg3, 128
+ jmp .fold_128_B_register
+
+
+
+ ; at this section of the code, there is 128*x+y (0<=y<128) bytes of buffer. The fold_128_B_loop
+ ; loop will fold 128B at a time until we have 128+y Bytes of buffer
+
+ ; fold 128B at a time. This section of the code folds 8 xmm registers in parallel
+.fold_128_B_loop:
+ add arg2, 128
+ vmovdqu8 zmm8, [arg2+16*0]
+ vpclmulqdq zmm2, zmm0, zmm10, 0x10
+ vpclmulqdq zmm1, zmm0, zmm10, 0x01
+ vpxorq zmm0, zmm2, zmm1
+ vpxorq zmm0, zmm0, zmm8
+
+ vmovdqu8 zmm9, [arg2+16*4]
+ vpclmulqdq zmm5, zmm4, zmm10, 0x10
+ vpclmulqdq zmm6, zmm4, zmm10, 0x01
+ vpxorq zmm4, zmm5, zmm6
+ vpxorq zmm4, zmm4, zmm9
+
+ sub arg3, 128
+ jge .fold_128_B_loop
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+ add arg2, 128
+ ; at this point, the buffer pointer is pointing at the last y Bytes of the buffer, where 0 <= y < 128
+ ; the 128B of folded data is in 8 of the xmm registers: xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7
+
+.fold_128_B_register:
+ ; fold the 8 128b parts into 1 xmm register with different constants
+ vmovdqu8 zmm16, [rk9] ; multiply by rk9-rk16
+ vmovdqu8 zmm11, [rk17] ; multiply by rk17-rk20, rk1,rk2, 0,0
+ vpclmulqdq zmm1, zmm0, zmm16, 0x01
+ vpclmulqdq zmm2, zmm0, zmm16, 0x10
+ vextracti64x2 xmm7, zmm4, 3 ; save last that has no multiplicand
+
+ vpclmulqdq zmm5, zmm4, zmm11, 0x01
+ vpclmulqdq zmm6, zmm4, zmm11, 0x10
+ vmovdqa xmm10, [rk1] ; Needed later in reduction loop
+ vpternlogq zmm1, zmm2, zmm5, 0x96 ; xor ABC
+ vpternlogq zmm1, zmm6, zmm7, 0x96 ; xor ABC
+
+ vshufi64x2 zmm8, zmm1, zmm1, 0x4e ; Swap 1,0,3,2 - 01 00 11 10
+ vpxorq ymm8, ymm8, ymm1
+ vextracti64x2 xmm5, ymm8, 1
+ vpxorq xmm7, xmm5, xmm8
+
+ ; instead of 128, we add 128-16 to the loop counter to save 1 instruction from the loop
+ ; instead of a cmp instruction, we use the negative flag with the jl instruction
+ add arg3, 128-16
+ jl .final_reduction_for_128
+
+ ; now we have 16+y bytes left to reduce. 16 Bytes is in register xmm7 and the rest is in memory
+ ; we can fold 16 bytes at a time if y>=16
+ ; continue folding 16B at a time
+
+.16B_reduction_loop:
+ vpclmulqdq xmm8, xmm7, xmm10, 0x1
+ vpclmulqdq xmm7, xmm7, xmm10, 0x10
+ vpxor xmm7, xmm8
+ vmovdqu xmm0, [arg2]
+ vpxor xmm7, xmm0
+ add arg2, 16
+ sub arg3, 16
+ ; instead of a cmp instruction, we utilize the flags with the jge instruction
+ ; equivalent of: cmp arg3, 16-16
+ ; check if there is any more 16B in the buffer to be able to fold
+ jge .16B_reduction_loop
+
+ ;now we have 16+z bytes left to reduce, where 0<= z < 16.
+ ;first, we reduce the data in the xmm7 register
+
+
+.final_reduction_for_128:
+ add arg3, 16
+ je .128_done
+
+ ; here we are getting data that is less than 16 bytes.
+ ; since we know that there was data before the pointer, we can offset
+ ; the input pointer before the actual point, to receive exactly 16 bytes.
+ ; after that the registers need to be adjusted.
+.get_last_two_xmms:
+
+ vmovdqa xmm2, xmm7
+ vmovdqu xmm1, [arg2 - 16 + arg3]
+
+ ; get rid of the extra data that was loaded before
+ ; load the shift constant
+ lea rax, [pshufb_shf_table]
+ add rax, arg3
+ vmovdqu xmm0, [rax]
+
+ vpshufb xmm7, xmm0
+ vpxor xmm0, [mask3]
+ vpshufb xmm2, xmm0
+
+ vpblendvb xmm2, xmm2, xmm1, xmm0
+ ;;;;;;;;;;
+ vpclmulqdq xmm8, xmm7, xmm10, 0x1
+ vpclmulqdq xmm7, xmm7, xmm10, 0x10
+ vpxor xmm7, xmm8
+ vpxor xmm7, xmm2
+
+.128_done:
+ ; compute crc of a 128-bit value
+ vmovdqa xmm10, [rk5]
+ vmovdqa xmm0, xmm7
+
+ ;64b fold
+ vpclmulqdq xmm7, xmm10, 0
+ vpsrldq xmm0, 8
+ vpxor xmm7, xmm0
+
+ ;32b fold
+ vmovdqa xmm0, xmm7
+ vpslldq xmm7, 4
+ vpclmulqdq xmm7, xmm10, 0x10
+ vpxor xmm7, xmm0
+
+
+ ;barrett reduction
+.barrett:
+ vpand xmm7, [mask2]
+ vmovdqa xmm1, xmm7
+ vmovdqa xmm2, xmm7
+ vmovdqa xmm10, [rk7]
+
+ vpclmulqdq xmm7, xmm10, 0
+ vpxor xmm7, xmm2
+ vpand xmm7, [mask]
+ vmovdqa xmm2, xmm7
+ vpclmulqdq xmm7, xmm10, 0x10
+ vpxor xmm7, xmm2
+ vpxor xmm7, xmm1
+ vpextrd eax, xmm7, 2
+
+.cleanup:
+
+%ifidn __OUTPUT_FORMAT__, win64
+ vmovdqa xmm6, [rsp + XMM_SAVE + 16*0]
+ vmovdqa xmm7, [rsp + XMM_SAVE + 16*1]
+ vmovdqa xmm8, [rsp + XMM_SAVE + 16*2]
+ vmovdqa xmm9, [rsp + XMM_SAVE + 16*3]
+ vmovdqa xmm10, [rsp + XMM_SAVE + 16*4]
+ vmovdqa xmm11, [rsp + XMM_SAVE + 16*5]
+ vmovdqa xmm12, [rsp + XMM_SAVE + 16*6]
+ vmovdqa xmm13, [rsp + XMM_SAVE + 16*7]
+ vmovdqa xmm14, [rsp + XMM_SAVE + 16*8]
+ vmovdqa xmm15, [rsp + XMM_SAVE + 16*9]
+%endif
+ add rsp, VARIABLE_OFFSET
+ ret
+
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+align 16
+.less_than_256:
+
+ ; check if there is enough buffer to be able to fold 16B at a time
+ cmp arg3, 32
+ jl .less_than_32
+
+ ; if there is, load the constants
+ vmovdqa xmm10, [rk1] ; rk1 and rk2 in xmm10
+
+ vmovd xmm0, arg1_low32 ; get the initial crc value
+ vmovdqu xmm7, [arg2] ; load the plaintext
+ vpxor xmm7, xmm0
+
+ ; update the buffer pointer
+ add arg2, 16
+
+ ; update the counter. subtract 32 instead of 16 to save one instruction from the loop
+ sub arg3, 32
+
+ jmp .16B_reduction_loop
+
+
+align 16
+.less_than_32:
+ ; mov initial crc to the return value. this is necessary for zero-length buffers.
+ mov eax, arg1_low32
+ test arg3, arg3
+ je .cleanup
+
+ vmovd xmm0, arg1_low32 ; get the initial crc value
+
+ cmp arg3, 16
+ je .exact_16_left
+ jl .less_than_16_left
+
+ vmovdqu xmm7, [arg2] ; load the plaintext
+ vpxor xmm7, xmm0 ; xor the initial crc value
+ add arg2, 16
+ sub arg3, 16
+ vmovdqa xmm10, [rk1] ; rk1 and rk2 in xmm10
+ jmp .get_last_two_xmms
+
+align 16
+.less_than_16_left:
+ ; use stack space to load data less than 16 bytes, zero-out the 16B in memory first.
+
+ vpxor xmm1, xmm1
+ mov r11, rsp
+ vmovdqa [r11], xmm1
+
+ cmp arg3, 4
+ jl .only_less_than_4
+
+ ; backup the counter value
+ mov r9, arg3
+ cmp arg3, 8
+ jl .less_than_8_left
+
+ ; load 8 Bytes
+ mov rax, [arg2]
+ mov [r11], rax
+ add r11, 8
+ sub arg3, 8
+ add arg2, 8
+.less_than_8_left:
+
+ cmp arg3, 4
+ jl .less_than_4_left
+
+ ; load 4 Bytes
+ mov eax, [arg2]
+ mov [r11], eax
+ add r11, 4
+ sub arg3, 4
+ add arg2, 4
+.less_than_4_left:
+
+ cmp arg3, 2
+ jl .less_than_2_left
+
+ ; load 2 Bytes
+ mov ax, [arg2]
+ mov [r11], ax
+ add r11, 2
+ sub arg3, 2
+ add arg2, 2
+.less_than_2_left:
+ cmp arg3, 1
+ jl .zero_left
+
+ ; load 1 Byte
+ mov al, [arg2]
+ mov [r11], al
+
+.zero_left:
+ vmovdqa xmm7, [rsp]
+ vpxor xmm7, xmm0 ; xor the initial crc value
+
+ lea rax,[pshufb_shf_table]
+ vmovdqu xmm0, [rax + r9]
+ vpshufb xmm7,xmm0
+ jmp .128_done
+
+align 16
+.exact_16_left:
+ vmovdqu xmm7, [arg2]
+ vpxor xmm7, xmm0 ; xor the initial crc value
+ jmp .128_done
+
+.only_less_than_4:
+ cmp arg3, 3
+ jl .only_less_than_3
+
+ ; load 3 Bytes
+ mov al, [arg2]
+ mov [r11], al
+
+ mov al, [arg2+1]
+ mov [r11+1], al
+
+ mov al, [arg2+2]
+ mov [r11+2], al
+
+ vmovdqa xmm7, [rsp]
+ vpxor xmm7, xmm0 ; xor the initial crc value
+
+ vpslldq xmm7, 5
+ jmp .barrett
+
+.only_less_than_3:
+ cmp arg3, 2
+ jl .only_less_than_2
+
+ ; load 2 Bytes
+ mov al, [arg2]
+ mov [r11], al
+
+ mov al, [arg2+1]
+ mov [r11+1], al
+
+ vmovdqa xmm7, [rsp]
+ vpxor xmm7, xmm0 ; xor the initial crc value
+
+ vpslldq xmm7, 6
+ jmp .barrett
+
+.only_less_than_2:
+ ; load 1 Byte
+ mov al, [arg2]
+ mov [r11], al
+
+ vmovdqa xmm7, [rsp]
+ vpxor xmm7, xmm0 ; xor the initial crc value
+
+ vpslldq xmm7, 7
+ jmp .barrett
+
+section .data
+align 32
+
+%ifndef USE_CONSTS
+; precomputed constants
+rk_1: dq 0x00000000b9e02b86
+rk_2: dq 0x00000000dcb17aa4
+rk1: dq 0x00000000493c7d27
+rk2: dq 0x0000000ec1068c50
+rk3: dq 0x0000000206e38d70
+rk4: dq 0x000000006992cea2
+rk5: dq 0x00000000493c7d27
+rk6: dq 0x00000000dd45aab8
+rk7: dq 0x00000000dea713f0
+rk8: dq 0x0000000105ec76f0
+rk9: dq 0x0000000047db8317
+rk10: dq 0x000000002ad91c30
+rk11: dq 0x000000000715ce53
+rk12: dq 0x00000000c49f4f67
+rk13: dq 0x0000000039d3b296
+rk14: dq 0x00000000083a6eec
+rk15: dq 0x000000009e4addf8
+rk16: dq 0x00000000740eef02
+rk17: dq 0x00000000ddc0152b
+rk18: dq 0x000000001c291d04
+rk19: dq 0x00000000ba4fc28e
+rk20: dq 0x000000003da6d0cb
+
+rk_1b: dq 0x00000000493c7d27
+rk_2b: dq 0x0000000ec1068c50
+ dq 0x0000000000000000
+ dq 0x0000000000000000
+
+%else
+INCLUDE_CONSTS
+%endif
+
+pshufb_shf_table:
+; use these values for shift constants for the pshufb instruction
+; different alignments result in values as shown:
+; dq 0x8887868584838281, 0x008f8e8d8c8b8a89 ; shl 15 (16-1) / shr1
+; dq 0x8988878685848382, 0x01008f8e8d8c8b8a ; shl 14 (16-3) / shr2
+; dq 0x8a89888786858483, 0x0201008f8e8d8c8b ; shl 13 (16-4) / shr3
+; dq 0x8b8a898887868584, 0x030201008f8e8d8c ; shl 12 (16-4) / shr4
+; dq 0x8c8b8a8988878685, 0x04030201008f8e8d ; shl 11 (16-5) / shr5
+; dq 0x8d8c8b8a89888786, 0x0504030201008f8e ; shl 10 (16-6) / shr6
+; dq 0x8e8d8c8b8a898887, 0x060504030201008f ; shl 9 (16-7) / shr7
+; dq 0x8f8e8d8c8b8a8988, 0x0706050403020100 ; shl 8 (16-8) / shr8
+; dq 0x008f8e8d8c8b8a89, 0x0807060504030201 ; shl 7 (16-9) / shr9
+; dq 0x01008f8e8d8c8b8a, 0x0908070605040302 ; shl 6 (16-10) / shr10
+; dq 0x0201008f8e8d8c8b, 0x0a09080706050403 ; shl 5 (16-11) / shr11
+; dq 0x030201008f8e8d8c, 0x0b0a090807060504 ; shl 4 (16-12) / shr12
+; dq 0x04030201008f8e8d, 0x0c0b0a0908070605 ; shl 3 (16-13) / shr13
+; dq 0x0504030201008f8e, 0x0d0c0b0a09080706 ; shl 2 (16-14) / shr14
+; dq 0x060504030201008f, 0x0e0d0c0b0a090807 ; shl 1 (16-15) / shr15
+dq 0x8786858483828100, 0x8f8e8d8c8b8a8988
+dq 0x0706050403020100, 0x000e0d0c0b0a0908
+
+mask: dq 0xFFFFFFFFFFFFFFFF, 0x0000000000000000
+mask2: dq 0xFFFFFFFF00000000, 0xFFFFFFFFFFFFFFFF
+mask3: dq 0x8080808080808080, 0x8080808080808080
+
+%else ; Assembler doesn't understand these opcodes. Add empty symbol for windows.
+%ifidn __OUTPUT_FORMAT__, win64
+global no_ %+ FUNCTION_NAME
+no_ %+ FUNCTION_NAME %+ :
+%endif
+%endif ; (AS_FEATURE_LEVEL) >= 10
diff --git a/src/isa-l/crc/crc32_iscsi_perf.c b/src/isa-l/crc/crc32_iscsi_perf.c
new file mode 100644
index 000000000..d768cdfa6
--- /dev/null
+++ b/src/isa-l/crc/crc32_iscsi_perf.c
@@ -0,0 +1,79 @@
+/**********************************************************************
+ Copyright(c) 2011-2015 Intel Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdint.h>
+#include <sys/time.h>
+#include "crc.h"
+#include "test.h"
+
+//#define CACHED_TEST
+#ifdef CACHED_TEST
+// Cached test, loop many times over small dataset
+# define TEST_LEN 8*1024
+# define TEST_TYPE_STR "_warm"
+#else
+// Uncached test. Pull from large mem base.
+# define GT_L3_CACHE 32*1024*1024 /* some number > last level cache */
+# define TEST_LEN (2 * GT_L3_CACHE)
+# define TEST_TYPE_STR "_cold"
+#endif
+
+#ifndef TEST_SEED
+# define TEST_SEED 0x1234
+#endif
+
+#define TEST_MEM TEST_LEN
+
+int main(int argc, char *argv[])
+{
+ void *buf;
+ uint32_t crc;
+ struct perf start;
+
+ printf("crc32_iscsi_perf:\n");
+
+ if (posix_memalign(&buf, 1024, TEST_LEN)) {
+ printf("alloc error: Fail");
+ return -1;
+ }
+
+ printf("Start timed tests\n");
+ fflush(0);
+
+ memset(buf, 0, TEST_LEN);
+ BENCHMARK(&start, BENCHMARK_TIME, crc = crc32_iscsi(buf, TEST_LEN, TEST_SEED));
+ printf("crc32_iscsi" TEST_TYPE_STR ": ");
+ perf_print(start, (long long)TEST_LEN);
+
+ printf("finish 0x%x\n", crc);
+ return 0;
+}
diff --git a/src/isa-l/crc/crc64_base.c b/src/isa-l/crc/crc64_base.c
new file mode 100644
index 000000000..7cf5a69cf
--- /dev/null
+++ b/src/isa-l/crc/crc64_base.c
@@ -0,0 +1,912 @@
+/**********************************************************************
+ Copyright(c) 2011-2016 Intel Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+#include "crc64.h"
+
+static const uint64_t crc64_ecma_refl_table[256] = {
+ 0x0000000000000000ULL, 0xb32e4cbe03a75f6fULL,
+ 0xf4843657a840a05bULL, 0x47aa7ae9abe7ff34ULL,
+ 0x7bd0c384ff8f5e33ULL, 0xc8fe8f3afc28015cULL,
+ 0x8f54f5d357cffe68ULL, 0x3c7ab96d5468a107ULL,
+ 0xf7a18709ff1ebc66ULL, 0x448fcbb7fcb9e309ULL,
+ 0x0325b15e575e1c3dULL, 0xb00bfde054f94352ULL,
+ 0x8c71448d0091e255ULL, 0x3f5f08330336bd3aULL,
+ 0x78f572daa8d1420eULL, 0xcbdb3e64ab761d61ULL,
+ 0x7d9ba13851336649ULL, 0xceb5ed8652943926ULL,
+ 0x891f976ff973c612ULL, 0x3a31dbd1fad4997dULL,
+ 0x064b62bcaebc387aULL, 0xb5652e02ad1b6715ULL,
+ 0xf2cf54eb06fc9821ULL, 0x41e11855055bc74eULL,
+ 0x8a3a2631ae2dda2fULL, 0x39146a8fad8a8540ULL,
+ 0x7ebe1066066d7a74ULL, 0xcd905cd805ca251bULL,
+ 0xf1eae5b551a2841cULL, 0x42c4a90b5205db73ULL,
+ 0x056ed3e2f9e22447ULL, 0xb6409f5cfa457b28ULL,
+ 0xfb374270a266cc92ULL, 0x48190ecea1c193fdULL,
+ 0x0fb374270a266cc9ULL, 0xbc9d3899098133a6ULL,
+ 0x80e781f45de992a1ULL, 0x33c9cd4a5e4ecdceULL,
+ 0x7463b7a3f5a932faULL, 0xc74dfb1df60e6d95ULL,
+ 0x0c96c5795d7870f4ULL, 0xbfb889c75edf2f9bULL,
+ 0xf812f32ef538d0afULL, 0x4b3cbf90f69f8fc0ULL,
+ 0x774606fda2f72ec7ULL, 0xc4684a43a15071a8ULL,
+ 0x83c230aa0ab78e9cULL, 0x30ec7c140910d1f3ULL,
+ 0x86ace348f355aadbULL, 0x3582aff6f0f2f5b4ULL,
+ 0x7228d51f5b150a80ULL, 0xc10699a158b255efULL,
+ 0xfd7c20cc0cdaf4e8ULL, 0x4e526c720f7dab87ULL,
+ 0x09f8169ba49a54b3ULL, 0xbad65a25a73d0bdcULL,
+ 0x710d64410c4b16bdULL, 0xc22328ff0fec49d2ULL,
+ 0x85895216a40bb6e6ULL, 0x36a71ea8a7ace989ULL,
+ 0x0adda7c5f3c4488eULL, 0xb9f3eb7bf06317e1ULL,
+ 0xfe5991925b84e8d5ULL, 0x4d77dd2c5823b7baULL,
+ 0x64b62bcaebc387a1ULL, 0xd7986774e864d8ceULL,
+ 0x90321d9d438327faULL, 0x231c512340247895ULL,
+ 0x1f66e84e144cd992ULL, 0xac48a4f017eb86fdULL,
+ 0xebe2de19bc0c79c9ULL, 0x58cc92a7bfab26a6ULL,
+ 0x9317acc314dd3bc7ULL, 0x2039e07d177a64a8ULL,
+ 0x67939a94bc9d9b9cULL, 0xd4bdd62abf3ac4f3ULL,
+ 0xe8c76f47eb5265f4ULL, 0x5be923f9e8f53a9bULL,
+ 0x1c4359104312c5afULL, 0xaf6d15ae40b59ac0ULL,
+ 0x192d8af2baf0e1e8ULL, 0xaa03c64cb957be87ULL,
+ 0xeda9bca512b041b3ULL, 0x5e87f01b11171edcULL,
+ 0x62fd4976457fbfdbULL, 0xd1d305c846d8e0b4ULL,
+ 0x96797f21ed3f1f80ULL, 0x2557339fee9840efULL,
+ 0xee8c0dfb45ee5d8eULL, 0x5da24145464902e1ULL,
+ 0x1a083bacedaefdd5ULL, 0xa9267712ee09a2baULL,
+ 0x955cce7fba6103bdULL, 0x267282c1b9c65cd2ULL,
+ 0x61d8f8281221a3e6ULL, 0xd2f6b4961186fc89ULL,
+ 0x9f8169ba49a54b33ULL, 0x2caf25044a02145cULL,
+ 0x6b055fede1e5eb68ULL, 0xd82b1353e242b407ULL,
+ 0xe451aa3eb62a1500ULL, 0x577fe680b58d4a6fULL,
+ 0x10d59c691e6ab55bULL, 0xa3fbd0d71dcdea34ULL,
+ 0x6820eeb3b6bbf755ULL, 0xdb0ea20db51ca83aULL,
+ 0x9ca4d8e41efb570eULL, 0x2f8a945a1d5c0861ULL,
+ 0x13f02d374934a966ULL, 0xa0de61894a93f609ULL,
+ 0xe7741b60e174093dULL, 0x545a57dee2d35652ULL,
+ 0xe21ac88218962d7aULL, 0x5134843c1b317215ULL,
+ 0x169efed5b0d68d21ULL, 0xa5b0b26bb371d24eULL,
+ 0x99ca0b06e7197349ULL, 0x2ae447b8e4be2c26ULL,
+ 0x6d4e3d514f59d312ULL, 0xde6071ef4cfe8c7dULL,
+ 0x15bb4f8be788911cULL, 0xa6950335e42fce73ULL,
+ 0xe13f79dc4fc83147ULL, 0x521135624c6f6e28ULL,
+ 0x6e6b8c0f1807cf2fULL, 0xdd45c0b11ba09040ULL,
+ 0x9aefba58b0476f74ULL, 0x29c1f6e6b3e0301bULL,
+ 0xc96c5795d7870f42ULL, 0x7a421b2bd420502dULL,
+ 0x3de861c27fc7af19ULL, 0x8ec62d7c7c60f076ULL,
+ 0xb2bc941128085171ULL, 0x0192d8af2baf0e1eULL,
+ 0x4638a2468048f12aULL, 0xf516eef883efae45ULL,
+ 0x3ecdd09c2899b324ULL, 0x8de39c222b3eec4bULL,
+ 0xca49e6cb80d9137fULL, 0x7967aa75837e4c10ULL,
+ 0x451d1318d716ed17ULL, 0xf6335fa6d4b1b278ULL,
+ 0xb199254f7f564d4cULL, 0x02b769f17cf11223ULL,
+ 0xb4f7f6ad86b4690bULL, 0x07d9ba1385133664ULL,
+ 0x4073c0fa2ef4c950ULL, 0xf35d8c442d53963fULL,
+ 0xcf273529793b3738ULL, 0x7c0979977a9c6857ULL,
+ 0x3ba3037ed17b9763ULL, 0x888d4fc0d2dcc80cULL,
+ 0x435671a479aad56dULL, 0xf0783d1a7a0d8a02ULL,
+ 0xb7d247f3d1ea7536ULL, 0x04fc0b4dd24d2a59ULL,
+ 0x3886b22086258b5eULL, 0x8ba8fe9e8582d431ULL,
+ 0xcc0284772e652b05ULL, 0x7f2cc8c92dc2746aULL,
+ 0x325b15e575e1c3d0ULL, 0x8175595b76469cbfULL,
+ 0xc6df23b2dda1638bULL, 0x75f16f0cde063ce4ULL,
+ 0x498bd6618a6e9de3ULL, 0xfaa59adf89c9c28cULL,
+ 0xbd0fe036222e3db8ULL, 0x0e21ac88218962d7ULL,
+ 0xc5fa92ec8aff7fb6ULL, 0x76d4de52895820d9ULL,
+ 0x317ea4bb22bfdfedULL, 0x8250e80521188082ULL,
+ 0xbe2a516875702185ULL, 0x0d041dd676d77eeaULL,
+ 0x4aae673fdd3081deULL, 0xf9802b81de97deb1ULL,
+ 0x4fc0b4dd24d2a599ULL, 0xfceef8632775faf6ULL,
+ 0xbb44828a8c9205c2ULL, 0x086ace348f355aadULL,
+ 0x34107759db5dfbaaULL, 0x873e3be7d8faa4c5ULL,
+ 0xc094410e731d5bf1ULL, 0x73ba0db070ba049eULL,
+ 0xb86133d4dbcc19ffULL, 0x0b4f7f6ad86b4690ULL,
+ 0x4ce50583738cb9a4ULL, 0xffcb493d702be6cbULL,
+ 0xc3b1f050244347ccULL, 0x709fbcee27e418a3ULL,
+ 0x3735c6078c03e797ULL, 0x841b8ab98fa4b8f8ULL,
+ 0xadda7c5f3c4488e3ULL, 0x1ef430e13fe3d78cULL,
+ 0x595e4a08940428b8ULL, 0xea7006b697a377d7ULL,
+ 0xd60abfdbc3cbd6d0ULL, 0x6524f365c06c89bfULL,
+ 0x228e898c6b8b768bULL, 0x91a0c532682c29e4ULL,
+ 0x5a7bfb56c35a3485ULL, 0xe955b7e8c0fd6beaULL,
+ 0xaeffcd016b1a94deULL, 0x1dd181bf68bdcbb1ULL,
+ 0x21ab38d23cd56ab6ULL, 0x9285746c3f7235d9ULL,
+ 0xd52f0e859495caedULL, 0x6601423b97329582ULL,
+ 0xd041dd676d77eeaaULL, 0x636f91d96ed0b1c5ULL,
+ 0x24c5eb30c5374ef1ULL, 0x97eba78ec690119eULL,
+ 0xab911ee392f8b099ULL, 0x18bf525d915feff6ULL,
+ 0x5f1528b43ab810c2ULL, 0xec3b640a391f4fadULL,
+ 0x27e05a6e926952ccULL, 0x94ce16d091ce0da3ULL,
+ 0xd3646c393a29f297ULL, 0x604a2087398eadf8ULL,
+ 0x5c3099ea6de60cffULL, 0xef1ed5546e415390ULL,
+ 0xa8b4afbdc5a6aca4ULL, 0x1b9ae303c601f3cbULL,
+ 0x56ed3e2f9e224471ULL, 0xe5c372919d851b1eULL,
+ 0xa26908783662e42aULL, 0x114744c635c5bb45ULL,
+ 0x2d3dfdab61ad1a42ULL, 0x9e13b115620a452dULL,
+ 0xd9b9cbfcc9edba19ULL, 0x6a978742ca4ae576ULL,
+ 0xa14cb926613cf817ULL, 0x1262f598629ba778ULL,
+ 0x55c88f71c97c584cULL, 0xe6e6c3cfcadb0723ULL,
+ 0xda9c7aa29eb3a624ULL, 0x69b2361c9d14f94bULL,
+ 0x2e184cf536f3067fULL, 0x9d36004b35545910ULL,
+ 0x2b769f17cf112238ULL, 0x9858d3a9ccb67d57ULL,
+ 0xdff2a94067518263ULL, 0x6cdce5fe64f6dd0cULL,
+ 0x50a65c93309e7c0bULL, 0xe388102d33392364ULL,
+ 0xa4226ac498dedc50ULL, 0x170c267a9b79833fULL,
+ 0xdcd7181e300f9e5eULL, 0x6ff954a033a8c131ULL,
+ 0x28532e49984f3e05ULL, 0x9b7d62f79be8616aULL,
+ 0xa707db9acf80c06dULL, 0x14299724cc279f02ULL,
+ 0x5383edcd67c06036ULL, 0xe0ada17364673f59ULL
+};
+
+static const uint64_t crc64_ecma_norm_table[256] = {
+ 0x0000000000000000ULL, 0x42f0e1eba9ea3693ULL,
+ 0x85e1c3d753d46d26ULL, 0xc711223cfa3e5bb5ULL,
+ 0x493366450e42ecdfULL, 0x0bc387aea7a8da4cULL,
+ 0xccd2a5925d9681f9ULL, 0x8e224479f47cb76aULL,
+ 0x9266cc8a1c85d9beULL, 0xd0962d61b56fef2dULL,
+ 0x17870f5d4f51b498ULL, 0x5577eeb6e6bb820bULL,
+ 0xdb55aacf12c73561ULL, 0x99a54b24bb2d03f2ULL,
+ 0x5eb4691841135847ULL, 0x1c4488f3e8f96ed4ULL,
+ 0x663d78ff90e185efULL, 0x24cd9914390bb37cULL,
+ 0xe3dcbb28c335e8c9ULL, 0xa12c5ac36adfde5aULL,
+ 0x2f0e1eba9ea36930ULL, 0x6dfeff5137495fa3ULL,
+ 0xaaefdd6dcd770416ULL, 0xe81f3c86649d3285ULL,
+ 0xf45bb4758c645c51ULL, 0xb6ab559e258e6ac2ULL,
+ 0x71ba77a2dfb03177ULL, 0x334a9649765a07e4ULL,
+ 0xbd68d2308226b08eULL, 0xff9833db2bcc861dULL,
+ 0x388911e7d1f2dda8ULL, 0x7a79f00c7818eb3bULL,
+ 0xcc7af1ff21c30bdeULL, 0x8e8a101488293d4dULL,
+ 0x499b3228721766f8ULL, 0x0b6bd3c3dbfd506bULL,
+ 0x854997ba2f81e701ULL, 0xc7b97651866bd192ULL,
+ 0x00a8546d7c558a27ULL, 0x4258b586d5bfbcb4ULL,
+ 0x5e1c3d753d46d260ULL, 0x1cecdc9e94ace4f3ULL,
+ 0xdbfdfea26e92bf46ULL, 0x990d1f49c77889d5ULL,
+ 0x172f5b3033043ebfULL, 0x55dfbadb9aee082cULL,
+ 0x92ce98e760d05399ULL, 0xd03e790cc93a650aULL,
+ 0xaa478900b1228e31ULL, 0xe8b768eb18c8b8a2ULL,
+ 0x2fa64ad7e2f6e317ULL, 0x6d56ab3c4b1cd584ULL,
+ 0xe374ef45bf6062eeULL, 0xa1840eae168a547dULL,
+ 0x66952c92ecb40fc8ULL, 0x2465cd79455e395bULL,
+ 0x3821458aada7578fULL, 0x7ad1a461044d611cULL,
+ 0xbdc0865dfe733aa9ULL, 0xff3067b657990c3aULL,
+ 0x711223cfa3e5bb50ULL, 0x33e2c2240a0f8dc3ULL,
+ 0xf4f3e018f031d676ULL, 0xb60301f359dbe0e5ULL,
+ 0xda050215ea6c212fULL, 0x98f5e3fe438617bcULL,
+ 0x5fe4c1c2b9b84c09ULL, 0x1d14202910527a9aULL,
+ 0x93366450e42ecdf0ULL, 0xd1c685bb4dc4fb63ULL,
+ 0x16d7a787b7faa0d6ULL, 0x5427466c1e109645ULL,
+ 0x4863ce9ff6e9f891ULL, 0x0a932f745f03ce02ULL,
+ 0xcd820d48a53d95b7ULL, 0x8f72eca30cd7a324ULL,
+ 0x0150a8daf8ab144eULL, 0x43a04931514122ddULL,
+ 0x84b16b0dab7f7968ULL, 0xc6418ae602954ffbULL,
+ 0xbc387aea7a8da4c0ULL, 0xfec89b01d3679253ULL,
+ 0x39d9b93d2959c9e6ULL, 0x7b2958d680b3ff75ULL,
+ 0xf50b1caf74cf481fULL, 0xb7fbfd44dd257e8cULL,
+ 0x70eadf78271b2539ULL, 0x321a3e938ef113aaULL,
+ 0x2e5eb66066087d7eULL, 0x6cae578bcfe24bedULL,
+ 0xabbf75b735dc1058ULL, 0xe94f945c9c3626cbULL,
+ 0x676dd025684a91a1ULL, 0x259d31cec1a0a732ULL,
+ 0xe28c13f23b9efc87ULL, 0xa07cf2199274ca14ULL,
+ 0x167ff3eacbaf2af1ULL, 0x548f120162451c62ULL,
+ 0x939e303d987b47d7ULL, 0xd16ed1d631917144ULL,
+ 0x5f4c95afc5edc62eULL, 0x1dbc74446c07f0bdULL,
+ 0xdaad56789639ab08ULL, 0x985db7933fd39d9bULL,
+ 0x84193f60d72af34fULL, 0xc6e9de8b7ec0c5dcULL,
+ 0x01f8fcb784fe9e69ULL, 0x43081d5c2d14a8faULL,
+ 0xcd2a5925d9681f90ULL, 0x8fdab8ce70822903ULL,
+ 0x48cb9af28abc72b6ULL, 0x0a3b7b1923564425ULL,
+ 0x70428b155b4eaf1eULL, 0x32b26afef2a4998dULL,
+ 0xf5a348c2089ac238ULL, 0xb753a929a170f4abULL,
+ 0x3971ed50550c43c1ULL, 0x7b810cbbfce67552ULL,
+ 0xbc902e8706d82ee7ULL, 0xfe60cf6caf321874ULL,
+ 0xe224479f47cb76a0ULL, 0xa0d4a674ee214033ULL,
+ 0x67c58448141f1b86ULL, 0x253565a3bdf52d15ULL,
+ 0xab1721da49899a7fULL, 0xe9e7c031e063acecULL,
+ 0x2ef6e20d1a5df759ULL, 0x6c0603e6b3b7c1caULL,
+ 0xf6fae5c07d3274cdULL, 0xb40a042bd4d8425eULL,
+ 0x731b26172ee619ebULL, 0x31ebc7fc870c2f78ULL,
+ 0xbfc9838573709812ULL, 0xfd39626eda9aae81ULL,
+ 0x3a28405220a4f534ULL, 0x78d8a1b9894ec3a7ULL,
+ 0x649c294a61b7ad73ULL, 0x266cc8a1c85d9be0ULL,
+ 0xe17dea9d3263c055ULL, 0xa38d0b769b89f6c6ULL,
+ 0x2daf4f0f6ff541acULL, 0x6f5faee4c61f773fULL,
+ 0xa84e8cd83c212c8aULL, 0xeabe6d3395cb1a19ULL,
+ 0x90c79d3fedd3f122ULL, 0xd2377cd44439c7b1ULL,
+ 0x15265ee8be079c04ULL, 0x57d6bf0317edaa97ULL,
+ 0xd9f4fb7ae3911dfdULL, 0x9b041a914a7b2b6eULL,
+ 0x5c1538adb04570dbULL, 0x1ee5d94619af4648ULL,
+ 0x02a151b5f156289cULL, 0x4051b05e58bc1e0fULL,
+ 0x87409262a28245baULL, 0xc5b073890b687329ULL,
+ 0x4b9237f0ff14c443ULL, 0x0962d61b56fef2d0ULL,
+ 0xce73f427acc0a965ULL, 0x8c8315cc052a9ff6ULL,
+ 0x3a80143f5cf17f13ULL, 0x7870f5d4f51b4980ULL,
+ 0xbf61d7e80f251235ULL, 0xfd913603a6cf24a6ULL,
+ 0x73b3727a52b393ccULL, 0x31439391fb59a55fULL,
+ 0xf652b1ad0167feeaULL, 0xb4a25046a88dc879ULL,
+ 0xa8e6d8b54074a6adULL, 0xea16395ee99e903eULL,
+ 0x2d071b6213a0cb8bULL, 0x6ff7fa89ba4afd18ULL,
+ 0xe1d5bef04e364a72ULL, 0xa3255f1be7dc7ce1ULL,
+ 0x64347d271de22754ULL, 0x26c49cccb40811c7ULL,
+ 0x5cbd6cc0cc10fafcULL, 0x1e4d8d2b65facc6fULL,
+ 0xd95caf179fc497daULL, 0x9bac4efc362ea149ULL,
+ 0x158e0a85c2521623ULL, 0x577eeb6e6bb820b0ULL,
+ 0x906fc95291867b05ULL, 0xd29f28b9386c4d96ULL,
+ 0xcedba04ad0952342ULL, 0x8c2b41a1797f15d1ULL,
+ 0x4b3a639d83414e64ULL, 0x09ca82762aab78f7ULL,
+ 0x87e8c60fded7cf9dULL, 0xc51827e4773df90eULL,
+ 0x020905d88d03a2bbULL, 0x40f9e43324e99428ULL,
+ 0x2cffe7d5975e55e2ULL, 0x6e0f063e3eb46371ULL,
+ 0xa91e2402c48a38c4ULL, 0xebeec5e96d600e57ULL,
+ 0x65cc8190991cb93dULL, 0x273c607b30f68faeULL,
+ 0xe02d4247cac8d41bULL, 0xa2dda3ac6322e288ULL,
+ 0xbe992b5f8bdb8c5cULL, 0xfc69cab42231bacfULL,
+ 0x3b78e888d80fe17aULL, 0x7988096371e5d7e9ULL,
+ 0xf7aa4d1a85996083ULL, 0xb55aacf12c735610ULL,
+ 0x724b8ecdd64d0da5ULL, 0x30bb6f267fa73b36ULL,
+ 0x4ac29f2a07bfd00dULL, 0x08327ec1ae55e69eULL,
+ 0xcf235cfd546bbd2bULL, 0x8dd3bd16fd818bb8ULL,
+ 0x03f1f96f09fd3cd2ULL, 0x41011884a0170a41ULL,
+ 0x86103ab85a2951f4ULL, 0xc4e0db53f3c36767ULL,
+ 0xd8a453a01b3a09b3ULL, 0x9a54b24bb2d03f20ULL,
+ 0x5d45907748ee6495ULL, 0x1fb5719ce1045206ULL,
+ 0x919735e51578e56cULL, 0xd367d40ebc92d3ffULL,
+ 0x1476f63246ac884aULL, 0x568617d9ef46bed9ULL,
+ 0xe085162ab69d5e3cULL, 0xa275f7c11f7768afULL,
+ 0x6564d5fde549331aULL, 0x279434164ca30589ULL,
+ 0xa9b6706fb8dfb2e3ULL, 0xeb46918411358470ULL,
+ 0x2c57b3b8eb0bdfc5ULL, 0x6ea7525342e1e956ULL,
+ 0x72e3daa0aa188782ULL, 0x30133b4b03f2b111ULL,
+ 0xf7021977f9cceaa4ULL, 0xb5f2f89c5026dc37ULL,
+ 0x3bd0bce5a45a6b5dULL, 0x79205d0e0db05dceULL,
+ 0xbe317f32f78e067bULL, 0xfcc19ed95e6430e8ULL,
+ 0x86b86ed5267cdbd3ULL, 0xc4488f3e8f96ed40ULL,
+ 0x0359ad0275a8b6f5ULL, 0x41a94ce9dc428066ULL,
+ 0xcf8b0890283e370cULL, 0x8d7be97b81d4019fULL,
+ 0x4a6acb477bea5a2aULL, 0x089a2aacd2006cb9ULL,
+ 0x14dea25f3af9026dULL, 0x562e43b4931334feULL,
+ 0x913f6188692d6f4bULL, 0xd3cf8063c0c759d8ULL,
+ 0x5dedc41a34bbeeb2ULL, 0x1f1d25f19d51d821ULL,
+ 0xd80c07cd676f8394ULL, 0x9afce626ce85b507ULL
+};
+
+static const uint64_t crc64_iso_refl_table[256] = {
+ 0x0000000000000000ULL, 0x01b0000000000000ULL,
+ 0x0360000000000000ULL, 0x02d0000000000000ULL,
+ 0x06c0000000000000ULL, 0x0770000000000000ULL,
+ 0x05a0000000000000ULL, 0x0410000000000000ULL,
+ 0x0d80000000000000ULL, 0x0c30000000000000ULL,
+ 0x0ee0000000000000ULL, 0x0f50000000000000ULL,
+ 0x0b40000000000000ULL, 0x0af0000000000000ULL,
+ 0x0820000000000000ULL, 0x0990000000000000ULL,
+ 0x1b00000000000000ULL, 0x1ab0000000000000ULL,
+ 0x1860000000000000ULL, 0x19d0000000000000ULL,
+ 0x1dc0000000000000ULL, 0x1c70000000000000ULL,
+ 0x1ea0000000000000ULL, 0x1f10000000000000ULL,
+ 0x1680000000000000ULL, 0x1730000000000000ULL,
+ 0x15e0000000000000ULL, 0x1450000000000000ULL,
+ 0x1040000000000000ULL, 0x11f0000000000000ULL,
+ 0x1320000000000000ULL, 0x1290000000000000ULL,
+ 0x3600000000000000ULL, 0x37b0000000000000ULL,
+ 0x3560000000000000ULL, 0x34d0000000000000ULL,
+ 0x30c0000000000000ULL, 0x3170000000000000ULL,
+ 0x33a0000000000000ULL, 0x3210000000000000ULL,
+ 0x3b80000000000000ULL, 0x3a30000000000000ULL,
+ 0x38e0000000000000ULL, 0x3950000000000000ULL,
+ 0x3d40000000000000ULL, 0x3cf0000000000000ULL,
+ 0x3e20000000000000ULL, 0x3f90000000000000ULL,
+ 0x2d00000000000000ULL, 0x2cb0000000000000ULL,
+ 0x2e60000000000000ULL, 0x2fd0000000000000ULL,
+ 0x2bc0000000000000ULL, 0x2a70000000000000ULL,
+ 0x28a0000000000000ULL, 0x2910000000000000ULL,
+ 0x2080000000000000ULL, 0x2130000000000000ULL,
+ 0x23e0000000000000ULL, 0x2250000000000000ULL,
+ 0x2640000000000000ULL, 0x27f0000000000000ULL,
+ 0x2520000000000000ULL, 0x2490000000000000ULL,
+ 0x6c00000000000000ULL, 0x6db0000000000000ULL,
+ 0x6f60000000000000ULL, 0x6ed0000000000000ULL,
+ 0x6ac0000000000000ULL, 0x6b70000000000000ULL,
+ 0x69a0000000000000ULL, 0x6810000000000000ULL,
+ 0x6180000000000000ULL, 0x6030000000000000ULL,
+ 0x62e0000000000000ULL, 0x6350000000000000ULL,
+ 0x6740000000000000ULL, 0x66f0000000000000ULL,
+ 0x6420000000000000ULL, 0x6590000000000000ULL,
+ 0x7700000000000000ULL, 0x76b0000000000000ULL,
+ 0x7460000000000000ULL, 0x75d0000000000000ULL,
+ 0x71c0000000000000ULL, 0x7070000000000000ULL,
+ 0x72a0000000000000ULL, 0x7310000000000000ULL,
+ 0x7a80000000000000ULL, 0x7b30000000000000ULL,
+ 0x79e0000000000000ULL, 0x7850000000000000ULL,
+ 0x7c40000000000000ULL, 0x7df0000000000000ULL,
+ 0x7f20000000000000ULL, 0x7e90000000000000ULL,
+ 0x5a00000000000000ULL, 0x5bb0000000000000ULL,
+ 0x5960000000000000ULL, 0x58d0000000000000ULL,
+ 0x5cc0000000000000ULL, 0x5d70000000000000ULL,
+ 0x5fa0000000000000ULL, 0x5e10000000000000ULL,
+ 0x5780000000000000ULL, 0x5630000000000000ULL,
+ 0x54e0000000000000ULL, 0x5550000000000000ULL,
+ 0x5140000000000000ULL, 0x50f0000000000000ULL,
+ 0x5220000000000000ULL, 0x5390000000000000ULL,
+ 0x4100000000000000ULL, 0x40b0000000000000ULL,
+ 0x4260000000000000ULL, 0x43d0000000000000ULL,
+ 0x47c0000000000000ULL, 0x4670000000000000ULL,
+ 0x44a0000000000000ULL, 0x4510000000000000ULL,
+ 0x4c80000000000000ULL, 0x4d30000000000000ULL,
+ 0x4fe0000000000000ULL, 0x4e50000000000000ULL,
+ 0x4a40000000000000ULL, 0x4bf0000000000000ULL,
+ 0x4920000000000000ULL, 0x4890000000000000ULL,
+ 0xd800000000000000ULL, 0xd9b0000000000000ULL,
+ 0xdb60000000000000ULL, 0xdad0000000000000ULL,
+ 0xdec0000000000000ULL, 0xdf70000000000000ULL,
+ 0xdda0000000000000ULL, 0xdc10000000000000ULL,
+ 0xd580000000000000ULL, 0xd430000000000000ULL,
+ 0xd6e0000000000000ULL, 0xd750000000000000ULL,
+ 0xd340000000000000ULL, 0xd2f0000000000000ULL,
+ 0xd020000000000000ULL, 0xd190000000000000ULL,
+ 0xc300000000000000ULL, 0xc2b0000000000000ULL,
+ 0xc060000000000000ULL, 0xc1d0000000000000ULL,
+ 0xc5c0000000000000ULL, 0xc470000000000000ULL,
+ 0xc6a0000000000000ULL, 0xc710000000000000ULL,
+ 0xce80000000000000ULL, 0xcf30000000000000ULL,
+ 0xcde0000000000000ULL, 0xcc50000000000000ULL,
+ 0xc840000000000000ULL, 0xc9f0000000000000ULL,
+ 0xcb20000000000000ULL, 0xca90000000000000ULL,
+ 0xee00000000000000ULL, 0xefb0000000000000ULL,
+ 0xed60000000000000ULL, 0xecd0000000000000ULL,
+ 0xe8c0000000000000ULL, 0xe970000000000000ULL,
+ 0xeba0000000000000ULL, 0xea10000000000000ULL,
+ 0xe380000000000000ULL, 0xe230000000000000ULL,
+ 0xe0e0000000000000ULL, 0xe150000000000000ULL,
+ 0xe540000000000000ULL, 0xe4f0000000000000ULL,
+ 0xe620000000000000ULL, 0xe790000000000000ULL,
+ 0xf500000000000000ULL, 0xf4b0000000000000ULL,
+ 0xf660000000000000ULL, 0xf7d0000000000000ULL,
+ 0xf3c0000000000000ULL, 0xf270000000000000ULL,
+ 0xf0a0000000000000ULL, 0xf110000000000000ULL,
+ 0xf880000000000000ULL, 0xf930000000000000ULL,
+ 0xfbe0000000000000ULL, 0xfa50000000000000ULL,
+ 0xfe40000000000000ULL, 0xfff0000000000000ULL,
+ 0xfd20000000000000ULL, 0xfc90000000000000ULL,
+ 0xb400000000000000ULL, 0xb5b0000000000000ULL,
+ 0xb760000000000000ULL, 0xb6d0000000000000ULL,
+ 0xb2c0000000000000ULL, 0xb370000000000000ULL,
+ 0xb1a0000000000000ULL, 0xb010000000000000ULL,
+ 0xb980000000000000ULL, 0xb830000000000000ULL,
+ 0xbae0000000000000ULL, 0xbb50000000000000ULL,
+ 0xbf40000000000000ULL, 0xbef0000000000000ULL,
+ 0xbc20000000000000ULL, 0xbd90000000000000ULL,
+ 0xaf00000000000000ULL, 0xaeb0000000000000ULL,
+ 0xac60000000000000ULL, 0xadd0000000000000ULL,
+ 0xa9c0000000000000ULL, 0xa870000000000000ULL,
+ 0xaaa0000000000000ULL, 0xab10000000000000ULL,
+ 0xa280000000000000ULL, 0xa330000000000000ULL,
+ 0xa1e0000000000000ULL, 0xa050000000000000ULL,
+ 0xa440000000000000ULL, 0xa5f0000000000000ULL,
+ 0xa720000000000000ULL, 0xa690000000000000ULL,
+ 0x8200000000000000ULL, 0x83b0000000000000ULL,
+ 0x8160000000000000ULL, 0x80d0000000000000ULL,
+ 0x84c0000000000000ULL, 0x8570000000000000ULL,
+ 0x87a0000000000000ULL, 0x8610000000000000ULL,
+ 0x8f80000000000000ULL, 0x8e30000000000000ULL,
+ 0x8ce0000000000000ULL, 0x8d50000000000000ULL,
+ 0x8940000000000000ULL, 0x88f0000000000000ULL,
+ 0x8a20000000000000ULL, 0x8b90000000000000ULL,
+ 0x9900000000000000ULL, 0x98b0000000000000ULL,
+ 0x9a60000000000000ULL, 0x9bd0000000000000ULL,
+ 0x9fc0000000000000ULL, 0x9e70000000000000ULL,
+ 0x9ca0000000000000ULL, 0x9d10000000000000ULL,
+ 0x9480000000000000ULL, 0x9530000000000000ULL,
+ 0x97e0000000000000ULL, 0x9650000000000000ULL,
+ 0x9240000000000000ULL, 0x93f0000000000000ULL,
+ 0x9120000000000000ULL, 0x9090000000000000ULL
+};
+
+static const uint64_t crc64_iso_norm_table[256] = {
+ 0x0000000000000000ULL, 0x000000000000001bULL,
+ 0x0000000000000036ULL, 0x000000000000002dULL,
+ 0x000000000000006cULL, 0x0000000000000077ULL,
+ 0x000000000000005aULL, 0x0000000000000041ULL,
+ 0x00000000000000d8ULL, 0x00000000000000c3ULL,
+ 0x00000000000000eeULL, 0x00000000000000f5ULL,
+ 0x00000000000000b4ULL, 0x00000000000000afULL,
+ 0x0000000000000082ULL, 0x0000000000000099ULL,
+ 0x00000000000001b0ULL, 0x00000000000001abULL,
+ 0x0000000000000186ULL, 0x000000000000019dULL,
+ 0x00000000000001dcULL, 0x00000000000001c7ULL,
+ 0x00000000000001eaULL, 0x00000000000001f1ULL,
+ 0x0000000000000168ULL, 0x0000000000000173ULL,
+ 0x000000000000015eULL, 0x0000000000000145ULL,
+ 0x0000000000000104ULL, 0x000000000000011fULL,
+ 0x0000000000000132ULL, 0x0000000000000129ULL,
+ 0x0000000000000360ULL, 0x000000000000037bULL,
+ 0x0000000000000356ULL, 0x000000000000034dULL,
+ 0x000000000000030cULL, 0x0000000000000317ULL,
+ 0x000000000000033aULL, 0x0000000000000321ULL,
+ 0x00000000000003b8ULL, 0x00000000000003a3ULL,
+ 0x000000000000038eULL, 0x0000000000000395ULL,
+ 0x00000000000003d4ULL, 0x00000000000003cfULL,
+ 0x00000000000003e2ULL, 0x00000000000003f9ULL,
+ 0x00000000000002d0ULL, 0x00000000000002cbULL,
+ 0x00000000000002e6ULL, 0x00000000000002fdULL,
+ 0x00000000000002bcULL, 0x00000000000002a7ULL,
+ 0x000000000000028aULL, 0x0000000000000291ULL,
+ 0x0000000000000208ULL, 0x0000000000000213ULL,
+ 0x000000000000023eULL, 0x0000000000000225ULL,
+ 0x0000000000000264ULL, 0x000000000000027fULL,
+ 0x0000000000000252ULL, 0x0000000000000249ULL,
+ 0x00000000000006c0ULL, 0x00000000000006dbULL,
+ 0x00000000000006f6ULL, 0x00000000000006edULL,
+ 0x00000000000006acULL, 0x00000000000006b7ULL,
+ 0x000000000000069aULL, 0x0000000000000681ULL,
+ 0x0000000000000618ULL, 0x0000000000000603ULL,
+ 0x000000000000062eULL, 0x0000000000000635ULL,
+ 0x0000000000000674ULL, 0x000000000000066fULL,
+ 0x0000000000000642ULL, 0x0000000000000659ULL,
+ 0x0000000000000770ULL, 0x000000000000076bULL,
+ 0x0000000000000746ULL, 0x000000000000075dULL,
+ 0x000000000000071cULL, 0x0000000000000707ULL,
+ 0x000000000000072aULL, 0x0000000000000731ULL,
+ 0x00000000000007a8ULL, 0x00000000000007b3ULL,
+ 0x000000000000079eULL, 0x0000000000000785ULL,
+ 0x00000000000007c4ULL, 0x00000000000007dfULL,
+ 0x00000000000007f2ULL, 0x00000000000007e9ULL,
+ 0x00000000000005a0ULL, 0x00000000000005bbULL,
+ 0x0000000000000596ULL, 0x000000000000058dULL,
+ 0x00000000000005ccULL, 0x00000000000005d7ULL,
+ 0x00000000000005faULL, 0x00000000000005e1ULL,
+ 0x0000000000000578ULL, 0x0000000000000563ULL,
+ 0x000000000000054eULL, 0x0000000000000555ULL,
+ 0x0000000000000514ULL, 0x000000000000050fULL,
+ 0x0000000000000522ULL, 0x0000000000000539ULL,
+ 0x0000000000000410ULL, 0x000000000000040bULL,
+ 0x0000000000000426ULL, 0x000000000000043dULL,
+ 0x000000000000047cULL, 0x0000000000000467ULL,
+ 0x000000000000044aULL, 0x0000000000000451ULL,
+ 0x00000000000004c8ULL, 0x00000000000004d3ULL,
+ 0x00000000000004feULL, 0x00000000000004e5ULL,
+ 0x00000000000004a4ULL, 0x00000000000004bfULL,
+ 0x0000000000000492ULL, 0x0000000000000489ULL,
+ 0x0000000000000d80ULL, 0x0000000000000d9bULL,
+ 0x0000000000000db6ULL, 0x0000000000000dadULL,
+ 0x0000000000000decULL, 0x0000000000000df7ULL,
+ 0x0000000000000ddaULL, 0x0000000000000dc1ULL,
+ 0x0000000000000d58ULL, 0x0000000000000d43ULL,
+ 0x0000000000000d6eULL, 0x0000000000000d75ULL,
+ 0x0000000000000d34ULL, 0x0000000000000d2fULL,
+ 0x0000000000000d02ULL, 0x0000000000000d19ULL,
+ 0x0000000000000c30ULL, 0x0000000000000c2bULL,
+ 0x0000000000000c06ULL, 0x0000000000000c1dULL,
+ 0x0000000000000c5cULL, 0x0000000000000c47ULL,
+ 0x0000000000000c6aULL, 0x0000000000000c71ULL,
+ 0x0000000000000ce8ULL, 0x0000000000000cf3ULL,
+ 0x0000000000000cdeULL, 0x0000000000000cc5ULL,
+ 0x0000000000000c84ULL, 0x0000000000000c9fULL,
+ 0x0000000000000cb2ULL, 0x0000000000000ca9ULL,
+ 0x0000000000000ee0ULL, 0x0000000000000efbULL,
+ 0x0000000000000ed6ULL, 0x0000000000000ecdULL,
+ 0x0000000000000e8cULL, 0x0000000000000e97ULL,
+ 0x0000000000000ebaULL, 0x0000000000000ea1ULL,
+ 0x0000000000000e38ULL, 0x0000000000000e23ULL,
+ 0x0000000000000e0eULL, 0x0000000000000e15ULL,
+ 0x0000000000000e54ULL, 0x0000000000000e4fULL,
+ 0x0000000000000e62ULL, 0x0000000000000e79ULL,
+ 0x0000000000000f50ULL, 0x0000000000000f4bULL,
+ 0x0000000000000f66ULL, 0x0000000000000f7dULL,
+ 0x0000000000000f3cULL, 0x0000000000000f27ULL,
+ 0x0000000000000f0aULL, 0x0000000000000f11ULL,
+ 0x0000000000000f88ULL, 0x0000000000000f93ULL,
+ 0x0000000000000fbeULL, 0x0000000000000fa5ULL,
+ 0x0000000000000fe4ULL, 0x0000000000000fffULL,
+ 0x0000000000000fd2ULL, 0x0000000000000fc9ULL,
+ 0x0000000000000b40ULL, 0x0000000000000b5bULL,
+ 0x0000000000000b76ULL, 0x0000000000000b6dULL,
+ 0x0000000000000b2cULL, 0x0000000000000b37ULL,
+ 0x0000000000000b1aULL, 0x0000000000000b01ULL,
+ 0x0000000000000b98ULL, 0x0000000000000b83ULL,
+ 0x0000000000000baeULL, 0x0000000000000bb5ULL,
+ 0x0000000000000bf4ULL, 0x0000000000000befULL,
+ 0x0000000000000bc2ULL, 0x0000000000000bd9ULL,
+ 0x0000000000000af0ULL, 0x0000000000000aebULL,
+ 0x0000000000000ac6ULL, 0x0000000000000addULL,
+ 0x0000000000000a9cULL, 0x0000000000000a87ULL,
+ 0x0000000000000aaaULL, 0x0000000000000ab1ULL,
+ 0x0000000000000a28ULL, 0x0000000000000a33ULL,
+ 0x0000000000000a1eULL, 0x0000000000000a05ULL,
+ 0x0000000000000a44ULL, 0x0000000000000a5fULL,
+ 0x0000000000000a72ULL, 0x0000000000000a69ULL,
+ 0x0000000000000820ULL, 0x000000000000083bULL,
+ 0x0000000000000816ULL, 0x000000000000080dULL,
+ 0x000000000000084cULL, 0x0000000000000857ULL,
+ 0x000000000000087aULL, 0x0000000000000861ULL,
+ 0x00000000000008f8ULL, 0x00000000000008e3ULL,
+ 0x00000000000008ceULL, 0x00000000000008d5ULL,
+ 0x0000000000000894ULL, 0x000000000000088fULL,
+ 0x00000000000008a2ULL, 0x00000000000008b9ULL,
+ 0x0000000000000990ULL, 0x000000000000098bULL,
+ 0x00000000000009a6ULL, 0x00000000000009bdULL,
+ 0x00000000000009fcULL, 0x00000000000009e7ULL,
+ 0x00000000000009caULL, 0x00000000000009d1ULL,
+ 0x0000000000000948ULL, 0x0000000000000953ULL,
+ 0x000000000000097eULL, 0x0000000000000965ULL,
+ 0x0000000000000924ULL, 0x000000000000093fULL,
+ 0x0000000000000912ULL, 0x0000000000000909ULL
+};
+
+static const uint64_t crc64_jones_refl_table[256] = {
+ 0x0000000000000000ULL, 0x7ad870c830358979ULL,
+ 0xf5b0e190606b12f2ULL, 0x8f689158505e9b8bULL,
+ 0xc038e5739841b68fULL, 0xbae095bba8743ff6ULL,
+ 0x358804e3f82aa47dULL, 0x4f50742bc81f2d04ULL,
+ 0xab28ecb46814fe75ULL, 0xd1f09c7c5821770cULL,
+ 0x5e980d24087fec87ULL, 0x24407dec384a65feULL,
+ 0x6b1009c7f05548faULL, 0x11c8790fc060c183ULL,
+ 0x9ea0e857903e5a08ULL, 0xe478989fa00bd371ULL,
+ 0x7d08ff3b88be6f81ULL, 0x07d08ff3b88be6f8ULL,
+ 0x88b81eabe8d57d73ULL, 0xf2606e63d8e0f40aULL,
+ 0xbd301a4810ffd90eULL, 0xc7e86a8020ca5077ULL,
+ 0x4880fbd87094cbfcULL, 0x32588b1040a14285ULL,
+ 0xd620138fe0aa91f4ULL, 0xacf86347d09f188dULL,
+ 0x2390f21f80c18306ULL, 0x594882d7b0f40a7fULL,
+ 0x1618f6fc78eb277bULL, 0x6cc0863448deae02ULL,
+ 0xe3a8176c18803589ULL, 0x997067a428b5bcf0ULL,
+ 0xfa11fe77117cdf02ULL, 0x80c98ebf2149567bULL,
+ 0x0fa11fe77117cdf0ULL, 0x75796f2f41224489ULL,
+ 0x3a291b04893d698dULL, 0x40f16bccb908e0f4ULL,
+ 0xcf99fa94e9567b7fULL, 0xb5418a5cd963f206ULL,
+ 0x513912c379682177ULL, 0x2be1620b495da80eULL,
+ 0xa489f35319033385ULL, 0xde51839b2936bafcULL,
+ 0x9101f7b0e12997f8ULL, 0xebd98778d11c1e81ULL,
+ 0x64b116208142850aULL, 0x1e6966e8b1770c73ULL,
+ 0x8719014c99c2b083ULL, 0xfdc17184a9f739faULL,
+ 0x72a9e0dcf9a9a271ULL, 0x08719014c99c2b08ULL,
+ 0x4721e43f0183060cULL, 0x3df994f731b68f75ULL,
+ 0xb29105af61e814feULL, 0xc849756751dd9d87ULL,
+ 0x2c31edf8f1d64ef6ULL, 0x56e99d30c1e3c78fULL,
+ 0xd9810c6891bd5c04ULL, 0xa3597ca0a188d57dULL,
+ 0xec09088b6997f879ULL, 0x96d1784359a27100ULL,
+ 0x19b9e91b09fcea8bULL, 0x636199d339c963f2ULL,
+ 0xdf7adabd7a6e2d6fULL, 0xa5a2aa754a5ba416ULL,
+ 0x2aca3b2d1a053f9dULL, 0x50124be52a30b6e4ULL,
+ 0x1f423fcee22f9be0ULL, 0x659a4f06d21a1299ULL,
+ 0xeaf2de5e82448912ULL, 0x902aae96b271006bULL,
+ 0x74523609127ad31aULL, 0x0e8a46c1224f5a63ULL,
+ 0x81e2d7997211c1e8ULL, 0xfb3aa75142244891ULL,
+ 0xb46ad37a8a3b6595ULL, 0xceb2a3b2ba0eececULL,
+ 0x41da32eaea507767ULL, 0x3b024222da65fe1eULL,
+ 0xa2722586f2d042eeULL, 0xd8aa554ec2e5cb97ULL,
+ 0x57c2c41692bb501cULL, 0x2d1ab4dea28ed965ULL,
+ 0x624ac0f56a91f461ULL, 0x1892b03d5aa47d18ULL,
+ 0x97fa21650afae693ULL, 0xed2251ad3acf6feaULL,
+ 0x095ac9329ac4bc9bULL, 0x7382b9faaaf135e2ULL,
+ 0xfcea28a2faafae69ULL, 0x8632586aca9a2710ULL,
+ 0xc9622c4102850a14ULL, 0xb3ba5c8932b0836dULL,
+ 0x3cd2cdd162ee18e6ULL, 0x460abd1952db919fULL,
+ 0x256b24ca6b12f26dULL, 0x5fb354025b277b14ULL,
+ 0xd0dbc55a0b79e09fULL, 0xaa03b5923b4c69e6ULL,
+ 0xe553c1b9f35344e2ULL, 0x9f8bb171c366cd9bULL,
+ 0x10e3202993385610ULL, 0x6a3b50e1a30ddf69ULL,
+ 0x8e43c87e03060c18ULL, 0xf49bb8b633338561ULL,
+ 0x7bf329ee636d1eeaULL, 0x012b592653589793ULL,
+ 0x4e7b2d0d9b47ba97ULL, 0x34a35dc5ab7233eeULL,
+ 0xbbcbcc9dfb2ca865ULL, 0xc113bc55cb19211cULL,
+ 0x5863dbf1e3ac9decULL, 0x22bbab39d3991495ULL,
+ 0xadd33a6183c78f1eULL, 0xd70b4aa9b3f20667ULL,
+ 0x985b3e827bed2b63ULL, 0xe2834e4a4bd8a21aULL,
+ 0x6debdf121b863991ULL, 0x1733afda2bb3b0e8ULL,
+ 0xf34b37458bb86399ULL, 0x8993478dbb8deae0ULL,
+ 0x06fbd6d5ebd3716bULL, 0x7c23a61ddbe6f812ULL,
+ 0x3373d23613f9d516ULL, 0x49aba2fe23cc5c6fULL,
+ 0xc6c333a67392c7e4ULL, 0xbc1b436e43a74e9dULL,
+ 0x95ac9329ac4bc9b5ULL, 0xef74e3e19c7e40ccULL,
+ 0x601c72b9cc20db47ULL, 0x1ac40271fc15523eULL,
+ 0x5594765a340a7f3aULL, 0x2f4c0692043ff643ULL,
+ 0xa02497ca54616dc8ULL, 0xdafce7026454e4b1ULL,
+ 0x3e847f9dc45f37c0ULL, 0x445c0f55f46abeb9ULL,
+ 0xcb349e0da4342532ULL, 0xb1eceec59401ac4bULL,
+ 0xfebc9aee5c1e814fULL, 0x8464ea266c2b0836ULL,
+ 0x0b0c7b7e3c7593bdULL, 0x71d40bb60c401ac4ULL,
+ 0xe8a46c1224f5a634ULL, 0x927c1cda14c02f4dULL,
+ 0x1d148d82449eb4c6ULL, 0x67ccfd4a74ab3dbfULL,
+ 0x289c8961bcb410bbULL, 0x5244f9a98c8199c2ULL,
+ 0xdd2c68f1dcdf0249ULL, 0xa7f41839ecea8b30ULL,
+ 0x438c80a64ce15841ULL, 0x3954f06e7cd4d138ULL,
+ 0xb63c61362c8a4ab3ULL, 0xcce411fe1cbfc3caULL,
+ 0x83b465d5d4a0eeceULL, 0xf96c151de49567b7ULL,
+ 0x76048445b4cbfc3cULL, 0x0cdcf48d84fe7545ULL,
+ 0x6fbd6d5ebd3716b7ULL, 0x15651d968d029fceULL,
+ 0x9a0d8ccedd5c0445ULL, 0xe0d5fc06ed698d3cULL,
+ 0xaf85882d2576a038ULL, 0xd55df8e515432941ULL,
+ 0x5a3569bd451db2caULL, 0x20ed197575283bb3ULL,
+ 0xc49581ead523e8c2ULL, 0xbe4df122e51661bbULL,
+ 0x3125607ab548fa30ULL, 0x4bfd10b2857d7349ULL,
+ 0x04ad64994d625e4dULL, 0x7e7514517d57d734ULL,
+ 0xf11d85092d094cbfULL, 0x8bc5f5c11d3cc5c6ULL,
+ 0x12b5926535897936ULL, 0x686de2ad05bcf04fULL,
+ 0xe70573f555e26bc4ULL, 0x9ddd033d65d7e2bdULL,
+ 0xd28d7716adc8cfb9ULL, 0xa85507de9dfd46c0ULL,
+ 0x273d9686cda3dd4bULL, 0x5de5e64efd965432ULL,
+ 0xb99d7ed15d9d8743ULL, 0xc3450e196da80e3aULL,
+ 0x4c2d9f413df695b1ULL, 0x36f5ef890dc31cc8ULL,
+ 0x79a59ba2c5dc31ccULL, 0x037deb6af5e9b8b5ULL,
+ 0x8c157a32a5b7233eULL, 0xf6cd0afa9582aa47ULL,
+ 0x4ad64994d625e4daULL, 0x300e395ce6106da3ULL,
+ 0xbf66a804b64ef628ULL, 0xc5bed8cc867b7f51ULL,
+ 0x8aeeace74e645255ULL, 0xf036dc2f7e51db2cULL,
+ 0x7f5e4d772e0f40a7ULL, 0x05863dbf1e3ac9deULL,
+ 0xe1fea520be311aafULL, 0x9b26d5e88e0493d6ULL,
+ 0x144e44b0de5a085dULL, 0x6e963478ee6f8124ULL,
+ 0x21c640532670ac20ULL, 0x5b1e309b16452559ULL,
+ 0xd476a1c3461bbed2ULL, 0xaeaed10b762e37abULL,
+ 0x37deb6af5e9b8b5bULL, 0x4d06c6676eae0222ULL,
+ 0xc26e573f3ef099a9ULL, 0xb8b627f70ec510d0ULL,
+ 0xf7e653dcc6da3dd4ULL, 0x8d3e2314f6efb4adULL,
+ 0x0256b24ca6b12f26ULL, 0x788ec2849684a65fULL,
+ 0x9cf65a1b368f752eULL, 0xe62e2ad306bafc57ULL,
+ 0x6946bb8b56e467dcULL, 0x139ecb4366d1eea5ULL,
+ 0x5ccebf68aecec3a1ULL, 0x2616cfa09efb4ad8ULL,
+ 0xa97e5ef8cea5d153ULL, 0xd3a62e30fe90582aULL,
+ 0xb0c7b7e3c7593bd8ULL, 0xca1fc72bf76cb2a1ULL,
+ 0x45775673a732292aULL, 0x3faf26bb9707a053ULL,
+ 0x70ff52905f188d57ULL, 0x0a2722586f2d042eULL,
+ 0x854fb3003f739fa5ULL, 0xff97c3c80f4616dcULL,
+ 0x1bef5b57af4dc5adULL, 0x61372b9f9f784cd4ULL,
+ 0xee5fbac7cf26d75fULL, 0x9487ca0fff135e26ULL,
+ 0xdbd7be24370c7322ULL, 0xa10fceec0739fa5bULL,
+ 0x2e675fb4576761d0ULL, 0x54bf2f7c6752e8a9ULL,
+ 0xcdcf48d84fe75459ULL, 0xb71738107fd2dd20ULL,
+ 0x387fa9482f8c46abULL, 0x42a7d9801fb9cfd2ULL,
+ 0x0df7adabd7a6e2d6ULL, 0x772fdd63e7936bafULL,
+ 0xf8474c3bb7cdf024ULL, 0x829f3cf387f8795dULL,
+ 0x66e7a46c27f3aa2cULL, 0x1c3fd4a417c62355ULL,
+ 0x935745fc4798b8deULL, 0xe98f353477ad31a7ULL,
+ 0xa6df411fbfb21ca3ULL, 0xdc0731d78f8795daULL,
+ 0x536fa08fdfd90e51ULL, 0x29b7d047efec8728ULL
+};
+
+static const uint64_t crc64_jones_norm_table[256] = {
+ 0x0000000000000000ULL, 0xad93d23594c935a9ULL,
+ 0xf6b4765ebd5b5efbULL, 0x5b27a46b29926b52ULL,
+ 0x40fb3e88ee7f885fULL, 0xed68ecbd7ab6bdf6ULL,
+ 0xb64f48d65324d6a4ULL, 0x1bdc9ae3c7ede30dULL,
+ 0x81f67d11dcff10beULL, 0x2c65af2448362517ULL,
+ 0x77420b4f61a44e45ULL, 0xdad1d97af56d7becULL,
+ 0xc10d4399328098e1ULL, 0x6c9e91aca649ad48ULL,
+ 0x37b935c78fdbc61aULL, 0x9a2ae7f21b12f3b3ULL,
+ 0xae7f28162d3714d5ULL, 0x03ecfa23b9fe217cULL,
+ 0x58cb5e48906c4a2eULL, 0xf5588c7d04a57f87ULL,
+ 0xee84169ec3489c8aULL, 0x4317c4ab5781a923ULL,
+ 0x183060c07e13c271ULL, 0xb5a3b2f5eadaf7d8ULL,
+ 0x2f895507f1c8046bULL, 0x821a8732650131c2ULL,
+ 0xd93d23594c935a90ULL, 0x74aef16cd85a6f39ULL,
+ 0x6f726b8f1fb78c34ULL, 0xc2e1b9ba8b7eb99dULL,
+ 0x99c61dd1a2ecd2cfULL, 0x3455cfe43625e766ULL,
+ 0xf16d8219cea71c03ULL, 0x5cfe502c5a6e29aaULL,
+ 0x07d9f44773fc42f8ULL, 0xaa4a2672e7357751ULL,
+ 0xb196bc9120d8945cULL, 0x1c056ea4b411a1f5ULL,
+ 0x4722cacf9d83caa7ULL, 0xeab118fa094aff0eULL,
+ 0x709bff0812580cbdULL, 0xdd082d3d86913914ULL,
+ 0x862f8956af035246ULL, 0x2bbc5b633bca67efULL,
+ 0x3060c180fc2784e2ULL, 0x9df313b568eeb14bULL,
+ 0xc6d4b7de417cda19ULL, 0x6b4765ebd5b5efb0ULL,
+ 0x5f12aa0fe39008d6ULL, 0xf281783a77593d7fULL,
+ 0xa9a6dc515ecb562dULL, 0x04350e64ca026384ULL,
+ 0x1fe994870def8089ULL, 0xb27a46b29926b520ULL,
+ 0xe95de2d9b0b4de72ULL, 0x44ce30ec247debdbULL,
+ 0xdee4d71e3f6f1868ULL, 0x7377052baba62dc1ULL,
+ 0x2850a14082344693ULL, 0x85c3737516fd733aULL,
+ 0x9e1fe996d1109037ULL, 0x338c3ba345d9a59eULL,
+ 0x68ab9fc86c4bceccULL, 0xc5384dfdf882fb65ULL,
+ 0x4f48d60609870dafULL, 0xe2db04339d4e3806ULL,
+ 0xb9fca058b4dc5354ULL, 0x146f726d201566fdULL,
+ 0x0fb3e88ee7f885f0ULL, 0xa2203abb7331b059ULL,
+ 0xf9079ed05aa3db0bULL, 0x54944ce5ce6aeea2ULL,
+ 0xcebeab17d5781d11ULL, 0x632d792241b128b8ULL,
+ 0x380add49682343eaULL, 0x95990f7cfcea7643ULL,
+ 0x8e45959f3b07954eULL, 0x23d647aaafcea0e7ULL,
+ 0x78f1e3c1865ccbb5ULL, 0xd56231f41295fe1cULL,
+ 0xe137fe1024b0197aULL, 0x4ca42c25b0792cd3ULL,
+ 0x1783884e99eb4781ULL, 0xba105a7b0d227228ULL,
+ 0xa1ccc098cacf9125ULL, 0x0c5f12ad5e06a48cULL,
+ 0x5778b6c67794cfdeULL, 0xfaeb64f3e35dfa77ULL,
+ 0x60c18301f84f09c4ULL, 0xcd5251346c863c6dULL,
+ 0x9675f55f4514573fULL, 0x3be6276ad1dd6296ULL,
+ 0x203abd891630819bULL, 0x8da96fbc82f9b432ULL,
+ 0xd68ecbd7ab6bdf60ULL, 0x7b1d19e23fa2eac9ULL,
+ 0xbe25541fc72011acULL, 0x13b6862a53e92405ULL,
+ 0x489122417a7b4f57ULL, 0xe502f074eeb27afeULL,
+ 0xfede6a97295f99f3ULL, 0x534db8a2bd96ac5aULL,
+ 0x086a1cc99404c708ULL, 0xa5f9cefc00cdf2a1ULL,
+ 0x3fd3290e1bdf0112ULL, 0x9240fb3b8f1634bbULL,
+ 0xc9675f50a6845fe9ULL, 0x64f48d65324d6a40ULL,
+ 0x7f281786f5a0894dULL, 0xd2bbc5b36169bce4ULL,
+ 0x899c61d848fbd7b6ULL, 0x240fb3eddc32e21fULL,
+ 0x105a7c09ea170579ULL, 0xbdc9ae3c7ede30d0ULL,
+ 0xe6ee0a57574c5b82ULL, 0x4b7dd862c3856e2bULL,
+ 0x50a1428104688d26ULL, 0xfd3290b490a1b88fULL,
+ 0xa61534dfb933d3ddULL, 0x0b86e6ea2dfae674ULL,
+ 0x91ac011836e815c7ULL, 0x3c3fd32da221206eULL,
+ 0x671877468bb34b3cULL, 0xca8ba5731f7a7e95ULL,
+ 0xd1573f90d8979d98ULL, 0x7cc4eda54c5ea831ULL,
+ 0x27e349ce65ccc363ULL, 0x8a709bfbf105f6caULL,
+ 0x9e91ac0c130e1b5eULL, 0x33027e3987c72ef7ULL,
+ 0x6825da52ae5545a5ULL, 0xc5b608673a9c700cULL,
+ 0xde6a9284fd719301ULL, 0x73f940b169b8a6a8ULL,
+ 0x28dee4da402acdfaULL, 0x854d36efd4e3f853ULL,
+ 0x1f67d11dcff10be0ULL, 0xb2f403285b383e49ULL,
+ 0xe9d3a74372aa551bULL, 0x44407576e66360b2ULL,
+ 0x5f9cef95218e83bfULL, 0xf20f3da0b547b616ULL,
+ 0xa92899cb9cd5dd44ULL, 0x04bb4bfe081ce8edULL,
+ 0x30ee841a3e390f8bULL, 0x9d7d562faaf03a22ULL,
+ 0xc65af24483625170ULL, 0x6bc9207117ab64d9ULL,
+ 0x7015ba92d04687d4ULL, 0xdd8668a7448fb27dULL,
+ 0x86a1cccc6d1dd92fULL, 0x2b321ef9f9d4ec86ULL,
+ 0xb118f90be2c61f35ULL, 0x1c8b2b3e760f2a9cULL,
+ 0x47ac8f555f9d41ceULL, 0xea3f5d60cb547467ULL,
+ 0xf1e3c7830cb9976aULL, 0x5c7015b69870a2c3ULL,
+ 0x0757b1ddb1e2c991ULL, 0xaac463e8252bfc38ULL,
+ 0x6ffc2e15dda9075dULL, 0xc26ffc20496032f4ULL,
+ 0x9948584b60f259a6ULL, 0x34db8a7ef43b6c0fULL,
+ 0x2f07109d33d68f02ULL, 0x8294c2a8a71fbaabULL,
+ 0xd9b366c38e8dd1f9ULL, 0x7420b4f61a44e450ULL,
+ 0xee0a5304015617e3ULL, 0x43998131959f224aULL,
+ 0x18be255abc0d4918ULL, 0xb52df76f28c47cb1ULL,
+ 0xaef16d8cef299fbcULL, 0x0362bfb97be0aa15ULL,
+ 0x58451bd25272c147ULL, 0xf5d6c9e7c6bbf4eeULL,
+ 0xc1830603f09e1388ULL, 0x6c10d43664572621ULL,
+ 0x3737705d4dc54d73ULL, 0x9aa4a268d90c78daULL,
+ 0x8178388b1ee19bd7ULL, 0x2cebeabe8a28ae7eULL,
+ 0x77cc4ed5a3bac52cULL, 0xda5f9ce03773f085ULL,
+ 0x40757b122c610336ULL, 0xede6a927b8a8369fULL,
+ 0xb6c10d4c913a5dcdULL, 0x1b52df7905f36864ULL,
+ 0x008e459ac21e8b69ULL, 0xad1d97af56d7bec0ULL,
+ 0xf63a33c47f45d592ULL, 0x5ba9e1f1eb8ce03bULL,
+ 0xd1d97a0a1a8916f1ULL, 0x7c4aa83f8e402358ULL,
+ 0x276d0c54a7d2480aULL, 0x8afede61331b7da3ULL,
+ 0x91224482f4f69eaeULL, 0x3cb196b7603fab07ULL,
+ 0x679632dc49adc055ULL, 0xca05e0e9dd64f5fcULL,
+ 0x502f071bc676064fULL, 0xfdbcd52e52bf33e6ULL,
+ 0xa69b71457b2d58b4ULL, 0x0b08a370efe46d1dULL,
+ 0x10d4399328098e10ULL, 0xbd47eba6bcc0bbb9ULL,
+ 0xe6604fcd9552d0ebULL, 0x4bf39df8019be542ULL,
+ 0x7fa6521c37be0224ULL, 0xd2358029a377378dULL,
+ 0x891224428ae55cdfULL, 0x2481f6771e2c6976ULL,
+ 0x3f5d6c94d9c18a7bULL, 0x92cebea14d08bfd2ULL,
+ 0xc9e91aca649ad480ULL, 0x647ac8fff053e129ULL,
+ 0xfe502f0deb41129aULL, 0x53c3fd387f882733ULL,
+ 0x08e45953561a4c61ULL, 0xa5778b66c2d379c8ULL,
+ 0xbeab1185053e9ac5ULL, 0x1338c3b091f7af6cULL,
+ 0x481f67dbb865c43eULL, 0xe58cb5ee2cacf197ULL,
+ 0x20b4f813d42e0af2ULL, 0x8d272a2640e73f5bULL,
+ 0xd6008e4d69755409ULL, 0x7b935c78fdbc61a0ULL,
+ 0x604fc69b3a5182adULL, 0xcddc14aeae98b704ULL,
+ 0x96fbb0c5870adc56ULL, 0x3b6862f013c3e9ffULL,
+ 0xa142850208d11a4cULL, 0x0cd157379c182fe5ULL,
+ 0x57f6f35cb58a44b7ULL, 0xfa6521692143711eULL,
+ 0xe1b9bb8ae6ae9213ULL, 0x4c2a69bf7267a7baULL,
+ 0x170dcdd45bf5cce8ULL, 0xba9e1fe1cf3cf941ULL,
+ 0x8ecbd005f9191e27ULL, 0x235802306dd02b8eULL,
+ 0x787fa65b444240dcULL, 0xd5ec746ed08b7575ULL,
+ 0xce30ee8d17669678ULL, 0x63a33cb883afa3d1ULL,
+ 0x388498d3aa3dc883ULL, 0x95174ae63ef4fd2aULL,
+ 0x0f3dad1425e60e99ULL, 0xa2ae7f21b12f3b30ULL,
+ 0xf989db4a98bd5062ULL, 0x541a097f0c7465cbULL,
+ 0x4fc6939ccb9986c6ULL, 0xe25541a95f50b36fULL,
+ 0xb972e5c276c2d83dULL, 0x14e137f7e20bed94ULL
+};
+
+uint64_t crc64_ecma_refl_base(uint64_t seed, const uint8_t * buf, uint64_t len)
+{
+ uint64_t i, crc = ~seed;
+
+ for (i = 0; i < len; i++) {
+ uint8_t byte = buf[i];
+ crc = crc64_ecma_refl_table[(uint8_t) crc ^ byte] ^ (crc >> 8);
+ }
+
+ return ~crc;
+}
+
+uint64_t crc64_ecma_norm_base(uint64_t seed, const uint8_t * buf, uint64_t len)
+{
+ uint64_t i, crc = ~seed;
+
+ for (i = 0; i < len; i++) {
+ uint8_t byte = buf[i];
+ crc = crc64_ecma_norm_table[((crc >> 56) ^ byte) & 0xff] ^ (crc << 8);
+ }
+
+ return ~crc;
+}
+
+uint64_t crc64_iso_refl_base(uint64_t seed, const uint8_t * buf, uint64_t len)
+{
+ uint64_t i, crc = ~seed;
+
+ for (i = 0; i < len; i++) {
+ uint8_t byte = buf[i];
+ crc = crc64_iso_refl_table[(uint8_t) crc ^ byte] ^ (crc >> 8);
+ }
+
+ return ~crc;
+}
+
+uint64_t crc64_iso_norm_base(uint64_t seed, const uint8_t * buf, uint64_t len)
+{
+ uint64_t i, crc = ~seed;
+
+ for (i = 0; i < len; i++) {
+ uint8_t byte = buf[i];
+ crc = crc64_iso_norm_table[((crc >> 56) ^ byte) & 0xff] ^ (crc << 8);
+ }
+
+ return ~crc;
+}
+
+uint64_t crc64_jones_refl_base(uint64_t seed, const uint8_t * buf, uint64_t len)
+{
+ uint64_t i, crc = ~seed;
+
+ for (i = 0; i < len; i++) {
+ uint8_t byte = buf[i];
+ crc = crc64_jones_refl_table[(uint8_t) crc ^ byte] ^ (crc >> 8);
+ }
+
+ return ~crc;
+}
+
+uint64_t crc64_jones_norm_base(uint64_t seed, const uint8_t * buf, uint64_t len)
+{
+ uint64_t i, crc = ~seed;
+
+ for (i = 0; i < len; i++) {
+ uint8_t byte = buf[i];
+ crc = crc64_jones_norm_table[((crc >> 56) ^ byte) & 0xff] ^ (crc << 8);
+ }
+
+ return ~crc;
+}
+
+struct slver {
+ unsigned short snum;
+ unsigned char ver;
+ unsigned char core;
+};
+
+struct slver crc64_ecma_refl_base_slver_0000001c;
+struct slver crc64_ecma_refl_base_slver = { 0x001c, 0x00, 0x00 };
+
+struct slver crc64_ecma_norm_base_slver_00000019;
+struct slver crc64_ecma_norm_base_slver = { 0x0019, 0x00, 0x00 };
+
+struct slver crc64_iso_refl_base_slver_00000022;
+struct slver crc64_iso_refl_base_slver = { 0x0022, 0x00, 0x00 };
+
+struct slver crc64_iso_norm_base_slver_0000001f;
+struct slver crc64_iso_norm_base_slver = { 0x001f, 0x00, 0x00 };
+
+struct slver crc64_jones_refl_base_slver_00000028;
+struct slver crc64_jones_refl_base_slver = { 0x0028, 0x00, 0x00 };
+
+struct slver crc64_jones_norm_base_slver_00000025;
+struct slver crc64_jones_norm_base_slver = { 0x0025, 0x00, 0x00 };
diff --git a/src/isa-l/crc/crc64_ecma_norm_by16_10.asm b/src/isa-l/crc/crc64_ecma_norm_by16_10.asm
new file mode 100644
index 000000000..8b09a89c4
--- /dev/null
+++ b/src/isa-l/crc/crc64_ecma_norm_by16_10.asm
@@ -0,0 +1,61 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright(c) 2011-2019 Intel Corporation All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in
+; the documentation and/or other materials provided with the
+; distribution.
+; * Neither the name of Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived
+; from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+%define FUNCTION_NAME crc64_ecma_norm_by16_10
+%define USE_CONSTS
+%macro INCLUDE_CONSTS 0
+rk_1: dq 0x7f52691a60ddc70d
+rk_2: dq 0x7036b0389f6a0c82
+rk1: dq 0x05f5c3c7eb52fab6
+rk2: dq 0x4eb938a7d257740e
+rk3: dq 0x05cf79dea9ac37d6
+rk4: dq 0x001067e571d7d5c2
+rk5: dq 0x05f5c3c7eb52fab6
+rk6: dq 0x0000000000000000
+rk7: dq 0x578d29d06cc4f872
+rk8: dq 0x42f0e1eba9ea3693
+rk9: dq 0xe464f4df5fb60ac1
+rk10: dq 0xb649c5b35a759cf2
+rk11: dq 0x9af04e1eff82d0dd
+rk12: dq 0x6e82e609297f8fe8
+rk13: dq 0x097c516e98bd2e73
+rk14: dq 0x0b76477b31e22e7b
+rk15: dq 0x5f6843ca540df020
+rk16: dq 0xddf4b6981205b83f
+rk17: dq 0x54819d8713758b2c
+rk18: dq 0x4a6b90073eb0af5a
+rk19: dq 0x571bee0a227ef92b
+rk20: dq 0x44bef2a201b5200c
+rk_1b: dq 0x05f5c3c7eb52fab6
+rk_2b: dq 0x4eb938a7d257740e
+ dq 0x0000000000000000
+ dq 0x0000000000000000
+%endm
+
+%include "crc64_iso_norm_by16_10.asm"
diff --git a/src/isa-l/crc/crc64_ecma_norm_by8.asm b/src/isa-l/crc/crc64_ecma_norm_by8.asm
new file mode 100644
index 000000000..ca99e344a
--- /dev/null
+++ b/src/isa-l/crc/crc64_ecma_norm_by8.asm
@@ -0,0 +1,584 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright(c) 2011-2016 Intel Corporation All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in
+; the documentation and/or other materials provided with the
+; distribution.
+; * Neither the name of Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived
+; from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+; Function API:
+; uint64_t crc64_ecma_norm_by8(
+; uint64_t init_crc, //initial CRC value, 64 bits
+; const unsigned char *buf, //buffer pointer to calculate CRC on
+; uint64_t len //buffer length in bytes (64-bit data)
+; );
+;
+; yasm -f x64 -f elf64 -X gnu -g dwarf2 crc64_ecma_norm_by8
+%include "reg_sizes.asm"
+
+%define fetch_dist 1024
+
+[bits 64]
+default rel
+
+section .text
+
+%ifidn __OUTPUT_FORMAT__, win64
+ %xdefine arg1 rcx
+ %xdefine arg2 rdx
+ %xdefine arg3 r8
+%else
+ %xdefine arg1 rdi
+ %xdefine arg2 rsi
+ %xdefine arg3 rdx
+%endif
+
+%define TMP 16*0
+%ifidn __OUTPUT_FORMAT__, win64
+ %define XMM_SAVE 16*2
+ %define VARIABLE_OFFSET 16*10+8
+%else
+ %define VARIABLE_OFFSET 16*2+8
+%endif
+align 16
+mk_global crc64_ecma_norm_by8, function
+crc64_ecma_norm_by8:
+ endbranch
+
+ not arg1 ;~init_crc
+
+ sub rsp,VARIABLE_OFFSET
+
+%ifidn __OUTPUT_FORMAT__, win64
+ ; push the xmm registers into the stack to maintain
+ movdqa [rsp + XMM_SAVE + 16*0], xmm6
+ movdqa [rsp + XMM_SAVE + 16*1], xmm7
+ movdqa [rsp + XMM_SAVE + 16*2], xmm8
+ movdqa [rsp + XMM_SAVE + 16*3], xmm9
+ movdqa [rsp + XMM_SAVE + 16*4], xmm10
+ movdqa [rsp + XMM_SAVE + 16*5], xmm11
+ movdqa [rsp + XMM_SAVE + 16*6], xmm12
+ movdqa [rsp + XMM_SAVE + 16*7], xmm13
+%endif
+
+
+ ; check if smaller than 256
+ cmp arg3, 256
+
+ ; for sizes less than 256, we can't fold 128B at a time...
+ jl _less_than_256
+
+
+ ; load the initial crc value
+ movq xmm10, arg1 ; initial crc
+
+ ; crc value does not need to be byte-reflected, but it needs to be moved to the high part of the register.
+ ; because data will be byte-reflected and will align with initial crc at correct place.
+ pslldq xmm10, 8
+
+ movdqa xmm11, [SHUF_MASK]
+ ; receive the initial 128B data, xor the initial crc value
+ movdqu xmm0, [arg2+16*0]
+ movdqu xmm1, [arg2+16*1]
+ movdqu xmm2, [arg2+16*2]
+ movdqu xmm3, [arg2+16*3]
+ movdqu xmm4, [arg2+16*4]
+ movdqu xmm5, [arg2+16*5]
+ movdqu xmm6, [arg2+16*6]
+ movdqu xmm7, [arg2+16*7]
+
+ pshufb xmm0, xmm11
+ ; XOR the initial_crc value
+ pxor xmm0, xmm10
+ pshufb xmm1, xmm11
+ pshufb xmm2, xmm11
+ pshufb xmm3, xmm11
+ pshufb xmm4, xmm11
+ pshufb xmm5, xmm11
+ pshufb xmm6, xmm11
+ pshufb xmm7, xmm11
+
+ movdqa xmm10, [rk3] ;xmm10 has rk3 and rk4
+ ;imm value of pclmulqdq instruction will determine which constant to use
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+ ; we subtract 256 instead of 128 to save one instruction from the loop
+ sub arg3, 256
+
+ ; at this section of the code, there is 128*x+y (0<=y<128) bytes of buffer. The _fold_128_B_loop
+ ; loop will fold 128B at a time until we have 128+y Bytes of buffer
+
+
+ ; fold 128B at a time. This section of the code folds 8 xmm registers in parallel
+_fold_128_B_loop:
+
+ ; update the buffer pointer
+ add arg2, 128 ; buf += 128;
+
+ prefetchnta [arg2+fetch_dist+0]
+ movdqu xmm9, [arg2+16*0]
+ movdqu xmm12, [arg2+16*1]
+ pshufb xmm9, xmm11
+ pshufb xmm12, xmm11
+ movdqa xmm8, xmm0
+ movdqa xmm13, xmm1
+ pclmulqdq xmm0, xmm10, 0x0
+ pclmulqdq xmm8, xmm10 , 0x11
+ pclmulqdq xmm1, xmm10, 0x0
+ pclmulqdq xmm13, xmm10 , 0x11
+ pxor xmm0, xmm9
+ xorps xmm0, xmm8
+ pxor xmm1, xmm12
+ xorps xmm1, xmm13
+
+ prefetchnta [arg2+fetch_dist+32]
+ movdqu xmm9, [arg2+16*2]
+ movdqu xmm12, [arg2+16*3]
+ pshufb xmm9, xmm11
+ pshufb xmm12, xmm11
+ movdqa xmm8, xmm2
+ movdqa xmm13, xmm3
+ pclmulqdq xmm2, xmm10, 0x0
+ pclmulqdq xmm8, xmm10 , 0x11
+ pclmulqdq xmm3, xmm10, 0x0
+ pclmulqdq xmm13, xmm10 , 0x11
+ pxor xmm2, xmm9
+ xorps xmm2, xmm8
+ pxor xmm3, xmm12
+ xorps xmm3, xmm13
+
+ prefetchnta [arg2+fetch_dist+64]
+ movdqu xmm9, [arg2+16*4]
+ movdqu xmm12, [arg2+16*5]
+ pshufb xmm9, xmm11
+ pshufb xmm12, xmm11
+ movdqa xmm8, xmm4
+ movdqa xmm13, xmm5
+ pclmulqdq xmm4, xmm10, 0x0
+ pclmulqdq xmm8, xmm10 , 0x11
+ pclmulqdq xmm5, xmm10, 0x0
+ pclmulqdq xmm13, xmm10 , 0x11
+ pxor xmm4, xmm9
+ xorps xmm4, xmm8
+ pxor xmm5, xmm12
+ xorps xmm5, xmm13
+
+ prefetchnta [arg2+fetch_dist+96]
+ movdqu xmm9, [arg2+16*6]
+ movdqu xmm12, [arg2+16*7]
+ pshufb xmm9, xmm11
+ pshufb xmm12, xmm11
+ movdqa xmm8, xmm6
+ movdqa xmm13, xmm7
+ pclmulqdq xmm6, xmm10, 0x0
+ pclmulqdq xmm8, xmm10 , 0x11
+ pclmulqdq xmm7, xmm10, 0x0
+ pclmulqdq xmm13, xmm10 , 0x11
+ pxor xmm6, xmm9
+ xorps xmm6, xmm8
+ pxor xmm7, xmm12
+ xorps xmm7, xmm13
+
+ sub arg3, 128
+
+ ; check if there is another 128B in the buffer to be able to fold
+ jge _fold_128_B_loop
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+ add arg2, 128
+ ; at this point, the buffer pointer is pointing at the last y Bytes of the buffer, where 0 <= y < 128
+ ; the 128B of folded data is in 8 of the xmm registers: xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7
+
+
+ ; fold the 8 xmm registers to 1 xmm register with different constants
+
+ movdqa xmm10, [rk9]
+ movdqa xmm8, xmm0
+ pclmulqdq xmm0, xmm10, 0x11
+ pclmulqdq xmm8, xmm10, 0x0
+ pxor xmm7, xmm8
+ xorps xmm7, xmm0
+
+ movdqa xmm10, [rk11]
+ movdqa xmm8, xmm1
+ pclmulqdq xmm1, xmm10, 0x11
+ pclmulqdq xmm8, xmm10, 0x0
+ pxor xmm7, xmm8
+ xorps xmm7, xmm1
+
+ movdqa xmm10, [rk13]
+ movdqa xmm8, xmm2
+ pclmulqdq xmm2, xmm10, 0x11
+ pclmulqdq xmm8, xmm10, 0x0
+ pxor xmm7, xmm8
+ pxor xmm7, xmm2
+
+ movdqa xmm10, [rk15]
+ movdqa xmm8, xmm3
+ pclmulqdq xmm3, xmm10, 0x11
+ pclmulqdq xmm8, xmm10, 0x0
+ pxor xmm7, xmm8
+ xorps xmm7, xmm3
+
+ movdqa xmm10, [rk17]
+ movdqa xmm8, xmm4
+ pclmulqdq xmm4, xmm10, 0x11
+ pclmulqdq xmm8, xmm10, 0x0
+ pxor xmm7, xmm8
+ pxor xmm7, xmm4
+
+ movdqa xmm10, [rk19]
+ movdqa xmm8, xmm5
+ pclmulqdq xmm5, xmm10, 0x11
+ pclmulqdq xmm8, xmm10, 0x0
+ pxor xmm7, xmm8
+ xorps xmm7, xmm5
+
+ movdqa xmm10, [rk1] ;xmm10 has rk1 and rk2
+
+ movdqa xmm8, xmm6
+ pclmulqdq xmm6, xmm10, 0x11
+ pclmulqdq xmm8, xmm10, 0x0
+ pxor xmm7, xmm8
+ pxor xmm7, xmm6
+
+
+ ; instead of 128, we add 112 to the loop counter to save 1 instruction from the loop
+ ; instead of a cmp instruction, we use the negative flag with the jl instruction
+ add arg3, 128-16
+ jl _final_reduction_for_128
+
+ ; now we have 16+y bytes left to reduce. 16 Bytes is in register xmm7 and the rest is in memory
+ ; we can fold 16 bytes at a time if y>=16
+ ; continue folding 16B at a time
+
+_16B_reduction_loop:
+ movdqa xmm8, xmm7
+ pclmulqdq xmm7, xmm10, 0x11
+ pclmulqdq xmm8, xmm10, 0x0
+ pxor xmm7, xmm8
+ movdqu xmm0, [arg2]
+ pshufb xmm0, xmm11
+ pxor xmm7, xmm0
+ add arg2, 16
+ sub arg3, 16
+ ; instead of a cmp instruction, we utilize the flags with the jge instruction
+ ; equivalent of: cmp arg3, 16-16
+ ; check if there is any more 16B in the buffer to be able to fold
+ jge _16B_reduction_loop
+
+ ;now we have 16+z bytes left to reduce, where 0<= z < 16.
+ ;first, we reduce the data in the xmm7 register
+
+
+_final_reduction_for_128:
+ ; check if any more data to fold. If not, compute the CRC of the final 128 bits
+ add arg3, 16
+ je _128_done
+
+ ; here we are getting data that is less than 16 bytes.
+ ; since we know that there was data before the pointer, we can offset the input pointer before the actual point, to receive exactly 16 bytes.
+ ; after that the registers need to be adjusted.
+_get_last_two_xmms:
+ movdqa xmm2, xmm7
+
+ movdqu xmm1, [arg2 - 16 + arg3]
+ pshufb xmm1, xmm11
+
+ ; get rid of the extra data that was loaded before
+ ; load the shift constant
+ lea rax, [pshufb_shf_table + 16]
+ sub rax, arg3
+ movdqu xmm0, [rax]
+
+ ; shift xmm2 to the left by arg3 bytes
+ pshufb xmm2, xmm0
+
+ ; shift xmm7 to the right by 16-arg3 bytes
+ pxor xmm0, [mask1]
+ pshufb xmm7, xmm0
+ pblendvb xmm1, xmm2 ;xmm0 is implicit
+
+ ; fold 16 Bytes
+ movdqa xmm2, xmm1
+ movdqa xmm8, xmm7
+ pclmulqdq xmm7, xmm10, 0x11
+ pclmulqdq xmm8, xmm10, 0x0
+ pxor xmm7, xmm8
+ pxor xmm7, xmm2
+
+_128_done:
+ ; compute crc of a 128-bit value
+ movdqa xmm10, [rk5] ; rk5 and rk6 in xmm10
+ movdqa xmm0, xmm7
+
+ ;64b fold
+ pclmulqdq xmm7, xmm10, 0x01 ; H*L
+ pslldq xmm0, 8
+ pxor xmm7, xmm0
+
+ ;barrett reduction
+_barrett:
+ movdqa xmm10, [rk7] ; rk7 and rk8 in xmm10
+ movdqa xmm0, xmm7
+
+ movdqa xmm1, xmm7
+ pand xmm1, [mask3]
+ pclmulqdq xmm7, xmm10, 0x01
+ pxor xmm7, xmm1
+
+ pclmulqdq xmm7, xmm10, 0x11
+ pxor xmm7, xmm0
+ pextrq rax, xmm7, 0
+
+_cleanup:
+ not rax
+%ifidn __OUTPUT_FORMAT__, win64
+ movdqa xmm6, [rsp + XMM_SAVE + 16*0]
+ movdqa xmm7, [rsp + XMM_SAVE + 16*1]
+ movdqa xmm8, [rsp + XMM_SAVE + 16*2]
+ movdqa xmm9, [rsp + XMM_SAVE + 16*3]
+ movdqa xmm10, [rsp + XMM_SAVE + 16*4]
+ movdqa xmm11, [rsp + XMM_SAVE + 16*5]
+ movdqa xmm12, [rsp + XMM_SAVE + 16*6]
+ movdqa xmm13, [rsp + XMM_SAVE + 16*7]
+%endif
+ add rsp, VARIABLE_OFFSET
+ ret
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+align 16
+_less_than_256:
+
+ ; check if there is enough buffer to be able to fold 16B at a time
+ cmp arg3, 32
+ jl _less_than_32
+ movdqa xmm11, [SHUF_MASK]
+
+ ; if there is, load the constants
+ movdqa xmm10, [rk1] ; rk1 and rk2 in xmm10
+
+ movq xmm0, arg1 ; get the initial crc value
+ pslldq xmm0, 8 ; align it to its correct place
+ movdqu xmm7, [arg2] ; load the plaintext
+ pshufb xmm7, xmm11 ; byte-reflect the plaintext
+ pxor xmm7, xmm0
+
+
+ ; update the buffer pointer
+ add arg2, 16
+
+ ; update the counter. subtract 32 instead of 16 to save one instruction from the loop
+ sub arg3, 32
+
+ jmp _16B_reduction_loop
+align 16
+_less_than_32:
+ ; mov initial crc to the return value. this is necessary for zero-length buffers.
+ mov rax, arg1
+ test arg3, arg3
+ je _cleanup
+
+ movdqa xmm11, [SHUF_MASK]
+
+ movq xmm0, arg1 ; get the initial crc value
+ pslldq xmm0, 8 ; align it to its correct place
+
+ cmp arg3, 16
+ je _exact_16_left
+ jl _less_than_16_left
+
+ movdqu xmm7, [arg2] ; load the plaintext
+ pshufb xmm7, xmm11 ; byte-reflect the plaintext
+ pxor xmm7, xmm0 ; xor the initial crc value
+ add arg2, 16
+ sub arg3, 16
+ movdqa xmm10, [rk1] ; rk1 and rk2 in xmm10
+ jmp _get_last_two_xmms
+align 16
+_less_than_16_left:
+ ; use stack space to load data less than 16 bytes, zero-out the 16B in memory first.
+ pxor xmm1, xmm1
+ mov r11, rsp
+ movdqa [r11], xmm1
+
+ ; backup the counter value
+ mov r9, arg3
+ cmp arg3, 8
+ jl _less_than_8_left
+
+ ; load 8 Bytes
+ mov rax, [arg2]
+ mov [r11], rax
+ add r11, 8
+ sub arg3, 8
+ add arg2, 8
+_less_than_8_left:
+
+ cmp arg3, 4
+ jl _less_than_4_left
+
+ ; load 4 Bytes
+ mov eax, [arg2]
+ mov [r11], eax
+ add r11, 4
+ sub arg3, 4
+ add arg2, 4
+_less_than_4_left:
+
+ cmp arg3, 2
+ jl _less_than_2_left
+
+ ; load 2 Bytes
+ mov ax, [arg2]
+ mov [r11], ax
+ add r11, 2
+ sub arg3, 2
+ add arg2, 2
+_less_than_2_left:
+ cmp arg3, 1
+ jl _zero_left
+
+ ; load 1 Byte
+ mov al, [arg2]
+ mov [r11], al
+_zero_left:
+ movdqa xmm7, [rsp]
+ pshufb xmm7, xmm11
+ pxor xmm7, xmm0 ; xor the initial crc value
+
+ ; shl r9, 4
+ lea rax, [pshufb_shf_table + 16]
+ sub rax, r9
+
+ cmp r9, 8
+ jl _end_1to7
+
+_end_8to15:
+ movdqu xmm0, [rax]
+ pxor xmm0, [mask1]
+
+ pshufb xmm7, xmm0
+ jmp _128_done
+
+_end_1to7:
+ ; Right shift (8-length) bytes in XMM
+ add rax, 8
+ movdqu xmm0, [rax]
+ pshufb xmm7,xmm0
+
+ jmp _barrett
+align 16
+_exact_16_left:
+ movdqu xmm7, [arg2]
+ pshufb xmm7, xmm11
+ pxor xmm7, xmm0 ; xor the initial crc value
+
+ jmp _128_done
+
+section .data
+
+; precomputed constants
+align 16
+
+rk1 :
+DQ 0x5f5c3c7eb52fab6
+rk2 :
+DQ 0x4eb938a7d257740e
+rk3 :
+DQ 0x5cf79dea9ac37d6
+rk4 :
+DQ 0x001067e571d7d5c2
+rk5 :
+DQ 0x5f5c3c7eb52fab6
+rk6 :
+DQ 0x0000000000000000
+rk7 :
+DQ 0x578d29d06cc4f872
+rk8 :
+DQ 0x42f0e1eba9ea3693
+rk9 :
+DQ 0xe464f4df5fb60ac1
+rk10 :
+DQ 0xb649c5b35a759cf2
+rk11 :
+DQ 0x9af04e1eff82d0dd
+rk12 :
+DQ 0x6e82e609297f8fe8
+rk13 :
+DQ 0x97c516e98bd2e73
+rk14 :
+DQ 0xb76477b31e22e7b
+rk15 :
+DQ 0x5f6843ca540df020
+rk16 :
+DQ 0xddf4b6981205b83f
+rk17 :
+DQ 0x54819d8713758b2c
+rk18 :
+DQ 0x4a6b90073eb0af5a
+rk19 :
+DQ 0x571bee0a227ef92b
+rk20 :
+DQ 0x44bef2a201b5200c
+
+
+mask1:
+dq 0x8080808080808080, 0x8080808080808080
+mask2:
+dq 0xFFFFFFFFFFFFFFFF, 0x00000000FFFFFFFF
+mask3:
+dq 0x0000000000000000, 0xFFFFFFFFFFFFFFFF
+
+SHUF_MASK:
+dq 0x08090A0B0C0D0E0F, 0x0001020304050607
+
+pshufb_shf_table:
+; use these values for shift constants for the pshufb instruction
+; different alignments result in values as shown:
+; dq 0x8887868584838281, 0x008f8e8d8c8b8a89 ; shl 15 (16-1) / shr1
+; dq 0x8988878685848382, 0x01008f8e8d8c8b8a ; shl 14 (16-3) / shr2
+; dq 0x8a89888786858483, 0x0201008f8e8d8c8b ; shl 13 (16-4) / shr3
+; dq 0x8b8a898887868584, 0x030201008f8e8d8c ; shl 12 (16-4) / shr4
+; dq 0x8c8b8a8988878685, 0x04030201008f8e8d ; shl 11 (16-5) / shr5
+; dq 0x8d8c8b8a89888786, 0x0504030201008f8e ; shl 10 (16-6) / shr6
+; dq 0x8e8d8c8b8a898887, 0x060504030201008f ; shl 9 (16-7) / shr7
+; dq 0x8f8e8d8c8b8a8988, 0x0706050403020100 ; shl 8 (16-8) / shr8
+; dq 0x008f8e8d8c8b8a89, 0x0807060504030201 ; shl 7 (16-9) / shr9
+; dq 0x01008f8e8d8c8b8a, 0x0908070605040302 ; shl 6 (16-10) / shr10
+; dq 0x0201008f8e8d8c8b, 0x0a09080706050403 ; shl 5 (16-11) / shr11
+; dq 0x030201008f8e8d8c, 0x0b0a090807060504 ; shl 4 (16-12) / shr12
+; dq 0x04030201008f8e8d, 0x0c0b0a0908070605 ; shl 3 (16-13) / shr13
+; dq 0x0504030201008f8e, 0x0d0c0b0a09080706 ; shl 2 (16-14) / shr14
+; dq 0x060504030201008f, 0x0e0d0c0b0a090807 ; shl 1 (16-15) / shr15
+dq 0x8786858483828100, 0x8f8e8d8c8b8a8988
+dq 0x0706050403020100, 0x0f0e0d0c0b0a0908
+dq 0x8080808080808080, 0x0f0e0d0c0b0a0908
+dq 0x8080808080808080, 0x8080808080808080
+
+;;; func core, ver, snum
+slversion crc64_ecma_norm_by8, 01, 00, 001a
diff --git a/src/isa-l/crc/crc64_ecma_refl_by16_10.asm b/src/isa-l/crc/crc64_ecma_refl_by16_10.asm
new file mode 100644
index 000000000..a48d0b203
--- /dev/null
+++ b/src/isa-l/crc/crc64_ecma_refl_by16_10.asm
@@ -0,0 +1,61 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright(c) 2011-2019 Intel Corporation All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in
+; the documentation and/or other materials provided with the
+; distribution.
+; * Neither the name of Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived
+; from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+%define FUNCTION_NAME crc64_ecma_refl_by16_10
+%define USE_CONSTS
+%macro INCLUDE_CONSTS 0
+rk_1: dq 0xf31fd9271e228b79
+rk_2: dq 0x8260adf2381ad81c
+rk1: dq 0xdabe95afc7875f40
+rk2: dq 0xe05dd497ca393ae4
+rk3: dq 0xd7d86b2af73de740
+rk4: dq 0x8757d71d4fcc1000
+rk5: dq 0xdabe95afc7875f40
+rk6: dq 0x0000000000000000
+rk7: dq 0x9c3e466c172963d5
+rk8: dq 0x92d8af2baf0e1e84
+rk9: dq 0x947874de595052cb
+rk10: dq 0x9e735cb59b4724da
+rk11: dq 0xe4ce2cd55fea0037
+rk12: dq 0x2fe3fd2920ce82ec
+rk13: dq 0x0e31d519421a63a5
+rk14: dq 0x2e30203212cac325
+rk15: dq 0x081f6054a7842df4
+rk16: dq 0x6ae3efbb9dd441f3
+rk17: dq 0x69a35d91c3730254
+rk18: dq 0xb5ea1af9c013aca4
+rk19: dq 0x3be653a30fe1af51
+rk20: dq 0x60095b008a9efa44
+rk_1b: dq 0xdabe95afc7875f40
+rk_2b: dq 0xe05dd497ca393ae4
+ dq 0x0000000000000000
+ dq 0x0000000000000000
+%endm
+
+%include "crc64_iso_refl_by16_10.asm"
diff --git a/src/isa-l/crc/crc64_ecma_refl_by8.asm b/src/isa-l/crc/crc64_ecma_refl_by8.asm
new file mode 100644
index 000000000..c09ddfa4f
--- /dev/null
+++ b/src/isa-l/crc/crc64_ecma_refl_by8.asm
@@ -0,0 +1,549 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright(c) 2011-2016 Intel Corporation All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in
+; the documentation and/or other materials provided with the
+; distribution.
+; * Neither the name of Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived
+; from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Function API:
+; uint64_t crc64_ecma_refl_by8(
+; uint64_t init_crc, //initial CRC value, 64 bits
+; const unsigned char *buf, //buffer pointer to calculate CRC on
+; uint64_t len //buffer length in bytes (64-bit data)
+; );
+;
+; Reference paper titled "Fast CRC Computation for Generic Polynomials Using PCLMULQDQ Instruction"
+; sample yasm command line:
+; yasm -f x64 -f elf64 -X gnu -g dwarf2 crc64_ecma_refl_by8
+%include "reg_sizes.asm"
+
+%define fetch_dist 1024
+
+[bits 64]
+default rel
+
+section .text
+
+
+%ifidn __OUTPUT_FORMAT__, win64
+ %xdefine arg1 rcx
+ %xdefine arg2 rdx
+ %xdefine arg3 r8
+%else
+ %xdefine arg1 rdi
+ %xdefine arg2 rsi
+ %xdefine arg3 rdx
+%endif
+
+%define TMP 16*0
+%ifidn __OUTPUT_FORMAT__, win64
+ %define XMM_SAVE 16*2
+ %define VARIABLE_OFFSET 16*10+8
+%else
+ %define VARIABLE_OFFSET 16*2+8
+%endif
+
+
+align 16
+mk_global crc64_ecma_refl_by8, function
+crc64_ecma_refl_by8:
+ endbranch
+ ; uint64_t c = crc ^ 0xffffffff,ffffffffL;
+ not arg1
+ sub rsp, VARIABLE_OFFSET
+
+%ifidn __OUTPUT_FORMAT__, win64
+ ; push the xmm registers into the stack to maintain
+ movdqa [rsp + XMM_SAVE + 16*0], xmm6
+ movdqa [rsp + XMM_SAVE + 16*1], xmm7
+ movdqa [rsp + XMM_SAVE + 16*2], xmm8
+ movdqa [rsp + XMM_SAVE + 16*3], xmm9
+ movdqa [rsp + XMM_SAVE + 16*4], xmm10
+ movdqa [rsp + XMM_SAVE + 16*5], xmm11
+ movdqa [rsp + XMM_SAVE + 16*6], xmm12
+ movdqa [rsp + XMM_SAVE + 16*7], xmm13
+%endif
+
+ ; check if smaller than 256B
+ cmp arg3, 256
+
+ ; for sizes less than 256, we can't fold 128B at a time...
+ jl _less_than_256
+
+
+ ; load the initial crc value
+ movq xmm10, arg1 ; initial crc
+ ; receive the initial 128B data, xor the initial crc value
+ movdqu xmm0, [arg2+16*0]
+ movdqu xmm1, [arg2+16*1]
+ movdqu xmm2, [arg2+16*2]
+ movdqu xmm3, [arg2+16*3]
+ movdqu xmm4, [arg2+16*4]
+ movdqu xmm5, [arg2+16*5]
+ movdqu xmm6, [arg2+16*6]
+ movdqu xmm7, [arg2+16*7]
+
+ ; XOR the initial_crc value
+ pxor xmm0, xmm10
+ movdqa xmm10, [rk3] ;xmm10 has rk3 and rk4
+ ;imm value of pclmulqdq instruction will determine which constant to use
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+ ; we subtract 256 instead of 128 to save one instruction from the loop
+ sub arg3, 256
+
+ ; at this section of the code, there is 128*x+y (0<=y<128) bytes of buffer. The _fold_128_B_loop
+ ; loop will fold 128B at a time until we have 128+y Bytes of buffer
+
+
+ ; fold 128B at a time. This section of the code folds 8 xmm registers in parallel
+_fold_128_B_loop:
+
+ ; update the buffer pointer
+ add arg2, 128
+
+ prefetchnta [arg2+fetch_dist+0]
+ movdqu xmm9, [arg2+16*0]
+ movdqu xmm12, [arg2+16*1]
+ movdqa xmm8, xmm0
+ movdqa xmm13, xmm1
+ pclmulqdq xmm0, xmm10, 0x10
+ pclmulqdq xmm8, xmm10 , 0x1
+ pclmulqdq xmm1, xmm10, 0x10
+ pclmulqdq xmm13, xmm10 , 0x1
+ pxor xmm0, xmm9
+ xorps xmm0, xmm8
+ pxor xmm1, xmm12
+ xorps xmm1, xmm13
+
+ prefetchnta [arg2+fetch_dist+32]
+ movdqu xmm9, [arg2+16*2]
+ movdqu xmm12, [arg2+16*3]
+ movdqa xmm8, xmm2
+ movdqa xmm13, xmm3
+ pclmulqdq xmm2, xmm10, 0x10
+ pclmulqdq xmm8, xmm10 , 0x1
+ pclmulqdq xmm3, xmm10, 0x10
+ pclmulqdq xmm13, xmm10 , 0x1
+ pxor xmm2, xmm9
+ xorps xmm2, xmm8
+ pxor xmm3, xmm12
+ xorps xmm3, xmm13
+
+ prefetchnta [arg2+fetch_dist+64]
+ movdqu xmm9, [arg2+16*4]
+ movdqu xmm12, [arg2+16*5]
+ movdqa xmm8, xmm4
+ movdqa xmm13, xmm5
+ pclmulqdq xmm4, xmm10, 0x10
+ pclmulqdq xmm8, xmm10 , 0x1
+ pclmulqdq xmm5, xmm10, 0x10
+ pclmulqdq xmm13, xmm10 , 0x1
+ pxor xmm4, xmm9
+ xorps xmm4, xmm8
+ pxor xmm5, xmm12
+ xorps xmm5, xmm13
+
+ prefetchnta [arg2+fetch_dist+96]
+ movdqu xmm9, [arg2+16*6]
+ movdqu xmm12, [arg2+16*7]
+ movdqa xmm8, xmm6
+ movdqa xmm13, xmm7
+ pclmulqdq xmm6, xmm10, 0x10
+ pclmulqdq xmm8, xmm10 , 0x1
+ pclmulqdq xmm7, xmm10, 0x10
+ pclmulqdq xmm13, xmm10 , 0x1
+ pxor xmm6, xmm9
+ xorps xmm6, xmm8
+ pxor xmm7, xmm12
+ xorps xmm7, xmm13
+
+ sub arg3, 128
+
+ ; check if there is another 128B in the buffer to be able to fold
+ jge _fold_128_B_loop
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+ add arg2, 128
+ ; at this point, the buffer pointer is pointing at the last y Bytes of the buffer, where 0 <= y < 128
+ ; the 128B of folded data is in 8 of the xmm registers: xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7
+
+
+ ; fold the 8 xmm registers to 1 xmm register with different constants
+ ; xmm0 to xmm7
+ movdqa xmm10, [rk9]
+ movdqa xmm8, xmm0
+ pclmulqdq xmm0, xmm10, 0x1
+ pclmulqdq xmm8, xmm10, 0x10
+ pxor xmm7, xmm8
+ xorps xmm7, xmm0
+ ;xmm1 to xmm7
+ movdqa xmm10, [rk11]
+ movdqa xmm8, xmm1
+ pclmulqdq xmm1, xmm10, 0x1
+ pclmulqdq xmm8, xmm10, 0x10
+ pxor xmm7, xmm8
+ xorps xmm7, xmm1
+
+ movdqa xmm10, [rk13]
+ movdqa xmm8, xmm2
+ pclmulqdq xmm2, xmm10, 0x1
+ pclmulqdq xmm8, xmm10, 0x10
+ pxor xmm7, xmm8
+ pxor xmm7, xmm2
+
+ movdqa xmm10, [rk15]
+ movdqa xmm8, xmm3
+ pclmulqdq xmm3, xmm10, 0x1
+ pclmulqdq xmm8, xmm10, 0x10
+ pxor xmm7, xmm8
+ xorps xmm7, xmm3
+
+ movdqa xmm10, [rk17]
+ movdqa xmm8, xmm4
+ pclmulqdq xmm4, xmm10, 0x1
+ pclmulqdq xmm8, xmm10, 0x10
+ pxor xmm7, xmm8
+ pxor xmm7, xmm4
+
+ movdqa xmm10, [rk19]
+ movdqa xmm8, xmm5
+ pclmulqdq xmm5, xmm10, 0x1
+ pclmulqdq xmm8, xmm10, 0x10
+ pxor xmm7, xmm8
+ xorps xmm7, xmm5
+ ; xmm6 to xmm7
+ movdqa xmm10, [rk1]
+ movdqa xmm8, xmm6
+ pclmulqdq xmm6, xmm10, 0x1
+ pclmulqdq xmm8, xmm10, 0x10
+ pxor xmm7, xmm8
+ pxor xmm7, xmm6
+
+
+ ; instead of 128, we add 128-16 to the loop counter to save 1 instruction from the loop
+ ; instead of a cmp instruction, we use the negative flag with the jl instruction
+ add arg3, 128-16
+ jl _final_reduction_for_128
+
+ ; now we have 16+y bytes left to reduce. 16 Bytes is in register xmm7 and the rest is in memory
+ ; we can fold 16 bytes at a time if y>=16
+ ; continue folding 16B at a time
+
+_16B_reduction_loop:
+ movdqa xmm8, xmm7
+ pclmulqdq xmm7, xmm10, 0x1
+ pclmulqdq xmm8, xmm10, 0x10
+ pxor xmm7, xmm8
+ movdqu xmm0, [arg2]
+ pxor xmm7, xmm0
+ add arg2, 16
+ sub arg3, 16
+ ; instead of a cmp instruction, we utilize the flags with the jge instruction
+ ; equivalent of: cmp arg3, 16-16
+ ; check if there is any more 16B in the buffer to be able to fold
+ jge _16B_reduction_loop
+
+ ;now we have 16+z bytes left to reduce, where 0<= z < 16.
+ ;first, we reduce the data in the xmm7 register
+
+
+_final_reduction_for_128:
+ add arg3, 16
+ je _128_done
+ ; here we are getting data that is less than 16 bytes.
+ ; since we know that there was data before the pointer, we can offset the input pointer before the actual point, to receive exactly 16 bytes.
+ ; after that the registers need to be adjusted.
+_get_last_two_xmms:
+
+
+ movdqa xmm2, xmm7
+ movdqu xmm1, [arg2 - 16 + arg3]
+
+ ; get rid of the extra data that was loaded before
+ ; load the shift constant
+ lea rax, [pshufb_shf_table]
+ add rax, arg3
+ movdqu xmm0, [rax]
+
+
+ pshufb xmm7, xmm0
+ pxor xmm0, [mask3]
+ pshufb xmm2, xmm0
+
+ pblendvb xmm2, xmm1 ;xmm0 is implicit
+ ;;;;;;;;;;
+ movdqa xmm8, xmm7
+ pclmulqdq xmm7, xmm10, 0x1
+
+ pclmulqdq xmm8, xmm10, 0x10
+ pxor xmm7, xmm8
+ pxor xmm7, xmm2
+
+_128_done:
+ ; compute crc of a 128-bit value
+ movdqa xmm10, [rk5]
+ movdqa xmm0, xmm7
+
+ ;64b fold
+ pclmulqdq xmm7, xmm10, 0
+ psrldq xmm0, 8
+ pxor xmm7, xmm0
+
+ ;barrett reduction
+_barrett:
+ movdqa xmm1, xmm7
+ movdqa xmm10, [rk7]
+
+ pclmulqdq xmm7, xmm10, 0
+ movdqa xmm2, xmm7
+ pclmulqdq xmm7, xmm10, 0x10
+ pslldq xmm2, 8
+ pxor xmm7, xmm2
+ pxor xmm7, xmm1
+ pextrq rax, xmm7, 1
+
+_cleanup:
+ ; return c ^ 0xffffffff, ffffffffL;
+ not rax
+
+
+%ifidn __OUTPUT_FORMAT__, win64
+ movdqa xmm6, [rsp + XMM_SAVE + 16*0]
+ movdqa xmm7, [rsp + XMM_SAVE + 16*1]
+ movdqa xmm8, [rsp + XMM_SAVE + 16*2]
+ movdqa xmm9, [rsp + XMM_SAVE + 16*3]
+ movdqa xmm10, [rsp + XMM_SAVE + 16*4]
+ movdqa xmm11, [rsp + XMM_SAVE + 16*5]
+ movdqa xmm12, [rsp + XMM_SAVE + 16*6]
+ movdqa xmm13, [rsp + XMM_SAVE + 16*7]
+%endif
+ add rsp, VARIABLE_OFFSET
+ ret
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+align 16
+_less_than_256:
+
+ ; check if there is enough buffer to be able to fold 16B at a time
+ cmp arg3, 32
+ jl _less_than_32
+
+ ; if there is, load the constants
+ movdqa xmm10, [rk1] ; rk1 and rk2 in xmm10
+
+ movq xmm0, arg1 ; get the initial crc value
+ movdqu xmm7, [arg2] ; load the plaintext
+ pxor xmm7, xmm0
+
+ ; update the buffer pointer
+ add arg2, 16
+
+ ; update the counter. subtract 32 instead of 16 to save one instruction from the loop
+ sub arg3, 32
+
+ jmp _16B_reduction_loop
+
+align 16
+_less_than_32:
+ ; mov initial crc to the return value. this is necessary for zero-length buffers.
+ mov rax, arg1
+ test arg3, arg3
+ je _cleanup
+
+ movq xmm0, arg1 ; get the initial crc value
+
+ cmp arg3, 16
+ je _exact_16_left
+ jl _less_than_16_left
+
+ movdqu xmm7, [arg2] ; load the plaintext
+ pxor xmm7, xmm0 ; xor the initial crc value
+ add arg2, 16
+ sub arg3, 16
+ movdqa xmm10, [rk1] ; rk1 and rk2 in xmm10
+ jmp _get_last_two_xmms
+
+
+align 16
+_less_than_16_left:
+ ; use stack space to load data less than 16 bytes, zero-out the 16B in memory first.
+
+ pxor xmm1, xmm1
+ mov r11, rsp
+ movdqa [r11], xmm1
+
+ ; backup the counter value
+ mov r9, arg3
+ cmp arg3, 8
+ jl _less_than_8_left
+
+ ; load 8 Bytes
+ mov rax, [arg2]
+ mov [r11], rax
+ add r11, 8
+ sub arg3, 8
+ add arg2, 8
+_less_than_8_left:
+
+ cmp arg3, 4
+ jl _less_than_4_left
+
+ ; load 4 Bytes
+ mov eax, [arg2]
+ mov [r11], eax
+ add r11, 4
+ sub arg3, 4
+ add arg2, 4
+_less_than_4_left:
+
+ cmp arg3, 2
+ jl _less_than_2_left
+
+ ; load 2 Bytes
+ mov ax, [arg2]
+ mov [r11], ax
+ add r11, 2
+ sub arg3, 2
+ add arg2, 2
+_less_than_2_left:
+ cmp arg3, 1
+ jl _zero_left
+
+ ; load 1 Byte
+ mov al, [arg2]
+ mov [r11], al
+
+_zero_left:
+ movdqa xmm7, [rsp]
+ pxor xmm7, xmm0 ; xor the initial crc value
+
+ lea rax,[pshufb_shf_table]
+
+ cmp r9, 8
+ jl _end_1to7
+
+_end_8to15:
+ movdqu xmm0, [rax + r9]
+ pshufb xmm7,xmm0
+ jmp _128_done
+
+_end_1to7:
+ ; Left shift (8-length) bytes in XMM
+ movdqu xmm0, [rax + r9 + 8]
+ pshufb xmm7,xmm0
+
+ jmp _barrett
+
+align 16
+_exact_16_left:
+ movdqu xmm7, [arg2]
+ pxor xmm7, xmm0 ; xor the initial crc value
+
+ jmp _128_done
+
+section .data
+
+; precomputed constants
+align 16
+; rk7 = floor(2^128/Q)
+; rk8 = Q
+rk1 :
+DQ 0xdabe95afc7875f40
+rk2 :
+DQ 0xe05dd497ca393ae4
+rk3 :
+DQ 0xd7d86b2af73de740
+rk4 :
+DQ 0x8757d71d4fcc1000
+rk5 :
+DQ 0xdabe95afc7875f40
+rk6 :
+DQ 0x0000000000000000
+rk7 :
+DQ 0x9c3e466c172963d5
+rk8 :
+DQ 0x92d8af2baf0e1e84
+rk9 :
+DQ 0x947874de595052cb
+rk10 :
+DQ 0x9e735cb59b4724da
+rk11 :
+DQ 0xe4ce2cd55fea0037
+rk12 :
+DQ 0x2fe3fd2920ce82ec
+rk13 :
+DQ 0xe31d519421a63a5
+rk14 :
+DQ 0x2e30203212cac325
+rk15 :
+DQ 0x81f6054a7842df4
+rk16 :
+DQ 0x6ae3efbb9dd441f3
+rk17 :
+DQ 0x69a35d91c3730254
+rk18 :
+DQ 0xb5ea1af9c013aca4
+rk19 :
+DQ 0x3be653a30fe1af51
+rk20 :
+DQ 0x60095b008a9efa44
+
+
+pshufb_shf_table:
+; use these values for shift constants for the pshufb instruction
+; different alignments result in values as shown:
+; dq 0x8887868584838281, 0x008f8e8d8c8b8a89 ; shl 15 (16-1) / shr1
+; dq 0x8988878685848382, 0x01008f8e8d8c8b8a ; shl 14 (16-3) / shr2
+; dq 0x8a89888786858483, 0x0201008f8e8d8c8b ; shl 13 (16-4) / shr3
+; dq 0x8b8a898887868584, 0x030201008f8e8d8c ; shl 12 (16-4) / shr4
+; dq 0x8c8b8a8988878685, 0x04030201008f8e8d ; shl 11 (16-5) / shr5
+; dq 0x8d8c8b8a89888786, 0x0504030201008f8e ; shl 10 (16-6) / shr6
+; dq 0x8e8d8c8b8a898887, 0x060504030201008f ; shl 9 (16-7) / shr7
+; dq 0x8f8e8d8c8b8a8988, 0x0706050403020100 ; shl 8 (16-8) / shr8
+; dq 0x008f8e8d8c8b8a89, 0x0807060504030201 ; shl 7 (16-9) / shr9
+; dq 0x01008f8e8d8c8b8a, 0x0908070605040302 ; shl 6 (16-10) / shr10
+; dq 0x0201008f8e8d8c8b, 0x0a09080706050403 ; shl 5 (16-11) / shr11
+; dq 0x030201008f8e8d8c, 0x0b0a090807060504 ; shl 4 (16-12) / shr12
+; dq 0x04030201008f8e8d, 0x0c0b0a0908070605 ; shl 3 (16-13) / shr13
+; dq 0x0504030201008f8e, 0x0d0c0b0a09080706 ; shl 2 (16-14) / shr14
+; dq 0x060504030201008f, 0x0e0d0c0b0a090807 ; shl 1 (16-15) / shr15
+dq 0x8786858483828100, 0x8f8e8d8c8b8a8988
+dq 0x0706050403020100, 0x000e0d0c0b0a0908
+
+
+mask:
+dq 0xFFFFFFFFFFFFFFFF, 0x0000000000000000
+mask2:
+dq 0xFFFFFFFF00000000, 0xFFFFFFFFFFFFFFFF
+mask3:
+dq 0x8080808080808080, 0x8080808080808080
+
+;;; func core, ver, snum
+slversion crc64_ecma_refl_by8, 01, 00, 001d
diff --git a/src/isa-l/crc/crc64_example.c b/src/isa-l/crc/crc64_example.c
new file mode 100644
index 000000000..64763a1b0
--- /dev/null
+++ b/src/isa-l/crc/crc64_example.c
@@ -0,0 +1,68 @@
+/**********************************************************************
+ Copyright(c) 2011-2016 Intel Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+#include <stdio.h>
+#include <stdlib.h>
+#include <assert.h>
+#include "crc64.h"
+
+#define BUF_SIZE 8192
+#define INIT_SEED 0x12345678
+
+int main(int argc, char *argv[])
+{
+ uint8_t inbuf[BUF_SIZE];
+ uint64_t avail_in, total_in = 0;
+ uint64_t crc64_checksum;
+ FILE *in;
+
+ if (argc != 2) {
+ fprintf(stderr, "Usage: crc64_example infile\n");
+ exit(0);
+ }
+ in = fopen(argv[1], "rb");
+ if (!in) {
+ fprintf(stderr, "Can't open %s for reading\n", argv[1]);
+ exit(0);
+ }
+
+ printf("crc64_example -- crc64_ecma_refl:\n");
+ fflush(0);
+
+ crc64_checksum = INIT_SEED;
+ while ((avail_in = fread(inbuf, 1, BUF_SIZE, in))) {
+ // crc update mode
+ crc64_checksum = crc64_ecma_refl(crc64_checksum, inbuf, avail_in);
+ total_in += avail_in;
+ }
+
+ fclose(in);
+ printf("total length is %ld, checksum is 0x%lx\n", total_in, crc64_checksum);
+
+ return 0;
+}
diff --git a/src/isa-l/crc/crc64_funcs_perf.c b/src/isa-l/crc/crc64_funcs_perf.c
new file mode 100644
index 000000000..4ad1cc199
--- /dev/null
+++ b/src/isa-l/crc/crc64_funcs_perf.c
@@ -0,0 +1,103 @@
+/**********************************************************************
+ Copyright(c) 2011-2016 Intel Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdint.h>
+#include <sys/time.h>
+#include "crc64.h"
+#include "test.h"
+
+//#define CACHED_TEST
+#ifdef CACHED_TEST
+// Cached test, loop many times over small dataset
+# define TEST_LEN 8*1024
+# define TEST_TYPE_STR "_warm"
+#else
+// Uncached test. Pull from large mem base.
+# define GT_L3_CACHE 32*1024*1024 /* some number > last level cache */
+# define TEST_LEN (2 * GT_L3_CACHE)
+# define TEST_TYPE_STR "_cold"
+#endif
+
+#ifndef TEST_SEED
+# define TEST_SEED 0x1234
+#endif
+
+#define TEST_MEM TEST_LEN
+
+typedef uint64_t(*crc64_func_t) (uint64_t, const uint8_t *, uint64_t);
+
+typedef struct func_case {
+ char *note;
+ crc64_func_t crc64_func_call;
+ crc64_func_t crc64_ref_call;
+} func_case_t;
+
+func_case_t test_funcs[] = {
+ {"crc64_ecma_norm", crc64_ecma_norm, crc64_ecma_norm_base},
+ {"crc64_ecma_refl", crc64_ecma_refl, crc64_ecma_refl_base},
+ {"crc64_iso_norm", crc64_iso_norm, crc64_iso_norm_base},
+ {"crc64_iso_refl", crc64_iso_refl, crc64_iso_refl_base},
+ {"crc64_jones_norm", crc64_jones_norm, crc64_jones_norm_base},
+ {"crc64_jones_refl", crc64_jones_refl, crc64_jones_refl_base}
+};
+
+int main(int argc, char *argv[])
+{
+ int j;
+ void *buf;
+ uint64_t crc;
+ struct perf start;
+ func_case_t *test_func;
+
+ if (posix_memalign(&buf, 1024, TEST_LEN)) {
+ printf("alloc error: Fail");
+ return -1;
+ }
+ memset(buf, (char)TEST_SEED, TEST_LEN);
+
+ for (j = 0; j < sizeof(test_funcs) / sizeof(test_funcs[0]); j++) {
+ test_func = &test_funcs[j];
+ printf("%s_perf:\n", test_func->note);
+
+ printf("Start timed tests\n");
+ fflush(0);
+
+ BENCHMARK(&start, BENCHMARK_TIME, crc =
+ test_func->crc64_func_call(TEST_SEED, buf, TEST_LEN));
+ printf("%s" TEST_TYPE_STR ": ", test_func->note);
+ perf_print(start, (long long)TEST_LEN);
+
+ printf("finish 0x%lx\n", crc);
+ }
+
+ return 0;
+}
diff --git a/src/isa-l/crc/crc64_funcs_test.c b/src/isa-l/crc/crc64_funcs_test.c
new file mode 100644
index 000000000..7e4ee2b37
--- /dev/null
+++ b/src/isa-l/crc/crc64_funcs_test.c
@@ -0,0 +1,315 @@
+/**********************************************************************
+ Copyright(c) 2011-2016 Intel Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdint.h>
+#include "crc64.h"
+#include "types.h"
+#include "crc64_ref.h"
+
+#ifndef TEST_SEED
+# define TEST_SEED 0x1234
+#endif
+
+#define MAX_BUF 4096
+#define TEST_SIZE 32
+
+typedef uint64_t u64;
+typedef uint32_t u32;
+typedef uint16_t u16;
+typedef uint8_t u8;
+
+typedef uint64_t(*crc64_func_t) (uint64_t, const uint8_t *, uint64_t);
+
+typedef struct func_case {
+ char *note;
+ crc64_func_t crc64_func_call;
+ crc64_func_t crc64_base_call;
+ crc64_func_t crc64_ref_call;
+} func_case_t;
+
+func_case_t test_funcs[] = {
+ {"crc64_ecma_norm", crc64_ecma_norm, crc64_ecma_norm_base, crc64_ecma_norm_ref},
+ {"crc64_ecma_refl", crc64_ecma_refl, crc64_ecma_refl_base, crc64_ecma_refl_ref},
+ {"crc64_iso_norm", crc64_iso_norm, crc64_iso_norm_base, crc64_iso_norm_ref},
+ {"crc64_iso_refl", crc64_iso_refl, crc64_iso_refl_base, crc64_iso_refl_ref},
+ {"crc64_jones_norm", crc64_jones_norm, crc64_jones_norm_base,
+ crc64_jones_norm_ref},
+ {"crc64_jones_refl", crc64_jones_refl, crc64_jones_refl_base, crc64_jones_refl_ref}
+};
+
+// Generates pseudo-random data
+
+void rand_buffer(unsigned char *buf, long buffer_size)
+{
+ long i;
+ for (i = 0; i < buffer_size; i++)
+ buf[i] = rand();
+}
+
+// Test cases
+int zeros_test(func_case_t * test_func);
+
+int simple_pattern_test(func_case_t * test_func);
+
+int seeds_sizes_test(func_case_t * test_func);
+
+int eob_test(func_case_t * test_func);
+
+int update_test(func_case_t * test_func);
+
+int verbose = 0;
+void *buf_alloc = NULL;
+
+int main(int argc, char *argv[])
+{
+ int fail = 0, fail_case;
+ int i, ret;
+ func_case_t *test_func;
+
+ verbose = argc - 1;
+
+ // Align to 32B boundary
+ ret = posix_memalign(&buf_alloc, TEST_SIZE, MAX_BUF * TEST_SIZE);
+ if (ret) {
+ printf("alloc error: Fail");
+ return -1;
+ }
+ srand(TEST_SEED);
+ printf("CRC64 Tests\n");
+
+ for (i = 0; i < sizeof(test_funcs) / sizeof(test_funcs[0]); i++) {
+ fail_case = 0;
+ test_func = &test_funcs[i];
+
+ printf("Test %s\t", test_func->note);
+ fail_case += zeros_test(test_func);
+ fail_case += simple_pattern_test(test_func);
+ fail_case += seeds_sizes_test(test_func);
+ fail_case += eob_test(test_func);
+ fail_case += update_test(test_func);
+ printf(" done: %s\n", fail_case ? "Fail" : "Pass");
+
+ if (fail_case) {
+ printf("\n%s Failed %d tests\n", test_func->note, fail_case);
+ fail++;
+ }
+ }
+
+ printf("CRC64 Tests all done: %s\n", fail ? "Fail" : "Pass");
+
+ return fail;
+}
+
+// Test of all zeros
+int zeros_test(func_case_t * test_func)
+{
+ uint64_t crc_ref, crc_base, crc;
+ int fail = 0;
+ unsigned char *buf = NULL;
+
+ buf = (unsigned char *)buf_alloc;
+ memset(buf, 0, MAX_BUF * 10);
+ crc_ref = test_func->crc64_ref_call(TEST_SEED, buf, MAX_BUF * 10);
+ crc_base = test_func->crc64_base_call(TEST_SEED, buf, MAX_BUF * 10);
+ crc = test_func->crc64_func_call(TEST_SEED, buf, MAX_BUF * 10);
+
+ if ((crc_base != crc_ref) || (crc != crc_ref)) {
+ fail++;
+ printf("\n opt ref\n");
+ printf(" ------ ------\n");
+ printf("crc zero = 0x%16lx 0x%16lx 0x%16lx \n", crc_ref, crc_base, crc);
+ } else
+ printf(".");
+
+ return fail;
+}
+
+// Another simple test pattern
+int simple_pattern_test(func_case_t * test_func)
+{
+ uint64_t crc_ref, crc_base, crc;
+ int fail = 0;
+ unsigned char *buf = NULL;
+
+ buf = (unsigned char *)buf_alloc;
+ memset(buf, 0x8a, MAX_BUF);
+ crc_ref = test_func->crc64_ref_call(TEST_SEED, buf, MAX_BUF);
+ crc_base = test_func->crc64_base_call(TEST_SEED, buf, MAX_BUF);
+ crc = test_func->crc64_func_call(TEST_SEED, buf, MAX_BUF);
+
+ if ((crc_base != crc_ref) || (crc != crc_ref))
+ fail++;
+ if (verbose)
+ printf("crc all 8a = 0x%16lx 0x%16lx 0x%16lx\n", crc_ref, crc_base, crc);
+ else
+ printf(".");
+
+ return fail;
+}
+
+int seeds_sizes_test(func_case_t * test_func)
+{
+ uint64_t crc_ref, crc_base, crc;
+ int fail = 0;
+ int i;
+ uint64_t r, s;
+ unsigned char *buf = NULL;
+
+ // Do a few random tests
+ buf = (unsigned char *)buf_alloc; //reset buf
+ r = rand();
+ rand_buffer(buf, MAX_BUF * TEST_SIZE);
+
+ for (i = 0; i < TEST_SIZE; i++) {
+ crc_ref = test_func->crc64_ref_call(r, buf, MAX_BUF);
+ crc_base = test_func->crc64_base_call(r, buf, MAX_BUF);
+ crc = test_func->crc64_func_call(r, buf, MAX_BUF);
+
+ if ((crc_base != crc_ref) || (crc != crc_ref))
+ fail++;
+ if (verbose)
+ printf("crc rand%3d = 0x%16lx 0x%16lx 0x%16lx\n", i, crc_ref, crc_base,
+ crc);
+ else if (i % (TEST_SIZE / 8) == 0)
+ printf(".");
+ buf += MAX_BUF;
+ }
+
+ // Do a few random sizes
+ buf = (unsigned char *)buf_alloc; //reset buf
+ r = rand();
+
+ for (i = MAX_BUF; i >= 0; i--) {
+ crc_ref = test_func->crc64_ref_call(r, buf, i);
+ crc_base = test_func->crc64_base_call(r, buf, i);
+ crc = test_func->crc64_func_call(r, buf, i);
+
+ if ((crc_base != crc_ref) || (crc != crc_ref)) {
+ fail++;
+ printf("fail random size%i 0x%16lx 0x%16lx 0x%16lx\n", i, crc_ref,
+ crc_base, crc);
+ } else if (i % (MAX_BUF / 8) == 0)
+ printf(".");
+ }
+
+ // Try different seeds
+ for (s = 0; s < 20; s++) {
+ buf = (unsigned char *)buf_alloc; //reset buf
+
+ r = rand(); // just to get a new seed
+ rand_buffer(buf, MAX_BUF * TEST_SIZE); // new pseudo-rand data
+
+ if (verbose)
+ printf("seed = 0x%lx\n", r);
+
+ for (i = 0; i < TEST_SIZE; i++) {
+ crc_ref = test_func->crc64_ref_call(r, buf, MAX_BUF);
+ crc_base = test_func->crc64_base_call(r, buf, MAX_BUF);
+ crc = test_func->crc64_func_call(r, buf, MAX_BUF);
+
+ if ((crc_base != crc_ref) || (crc != crc_ref))
+ fail++;
+ if (verbose)
+ printf("crc rand%3d = 0x%16lx 0x%16lx 0x%16lx\n", i, crc_ref,
+ crc_base, crc);
+ else if (i % (TEST_SIZE * 20 / 8) == 0)
+ printf(".");
+ buf += MAX_BUF;
+ }
+ }
+
+ return fail;
+}
+
+// Run tests at end of buffer
+int eob_test(func_case_t * test_func)
+{
+ uint64_t crc_ref, crc_base, crc;
+ int fail = 0;
+ int i;
+ unsigned char *buf = NULL;
+
+ // Null test
+ if (0 != test_func->crc64_func_call(0, NULL, 0)) {
+ fail++;
+ printf("crc null test fail\n");
+ }
+
+ buf = (unsigned char *)buf_alloc; //reset buf
+ buf = buf + ((MAX_BUF - 1) * TEST_SIZE); //Line up TEST_SIZE from end
+ for (i = 0; i <= TEST_SIZE; i++) {
+ crc_ref = test_func->crc64_ref_call(TEST_SEED, buf + i, TEST_SIZE - i);
+ crc_base = test_func->crc64_base_call(TEST_SEED, buf + i, TEST_SIZE - i);
+ crc = test_func->crc64_func_call(TEST_SEED, buf + i, TEST_SIZE - i);
+
+ if ((crc_base != crc_ref) || (crc != crc_ref))
+ fail++;
+ if (verbose)
+ printf("crc eob rand%3d = 0x%16lx 0x%16lx 0x%16lx\n", i, crc_ref,
+ crc_base, crc);
+ else if (i % (TEST_SIZE / 8) == 0)
+ printf(".");
+ }
+
+ return fail;
+}
+
+int update_test(func_case_t * test_func)
+{
+ uint64_t crc_ref, crc_base, crc;
+ int fail = 0;
+ int i;
+ uint64_t r;
+ unsigned char *buf = NULL;
+
+ buf = (unsigned char *)buf_alloc; //reset buf
+ r = rand();
+ // Process the whole buf with reference func single call.
+ crc_ref = test_func->crc64_ref_call(r, buf, MAX_BUF * TEST_SIZE);
+ crc_base = test_func->crc64_base_call(r, buf, MAX_BUF * TEST_SIZE);
+ // Process buf with update method.
+ for (i = 0; i < TEST_SIZE; i++) {
+ crc = test_func->crc64_func_call(r, buf, MAX_BUF);
+ // Update crc seeds and buf pointer.
+ r = crc;
+ buf += MAX_BUF;
+ }
+
+ if ((crc_base != crc_ref) || (crc != crc_ref))
+ fail++;
+ if (verbose)
+ printf("crc rand%3d = 0x%16lx 0x%16lx 0x%16lx\n", i, crc_ref, crc_base, crc);
+ else
+ printf(".");
+
+ return fail;
+}
diff --git a/src/isa-l/crc/crc64_iso_norm_by16_10.asm b/src/isa-l/crc/crc64_iso_norm_by16_10.asm
new file mode 100644
index 000000000..4eefbd35e
--- /dev/null
+++ b/src/isa-l/crc/crc64_iso_norm_by16_10.asm
@@ -0,0 +1,525 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright(c) 2011-2019 Intel Corporation All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in
+; the documentation and/or other materials provided with the
+; distribution.
+; * Neither the name of Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived
+; from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Function API:
+; uint64_t crc64_iso_norm_by16_10(
+; uint64_t init_crc, //initial CRC value, 64 bits
+; const unsigned char *buf, //buffer pointer to calculate CRC on
+; uint64_t len //buffer length in bytes (64-bit data)
+; );
+;
+%include "reg_sizes.asm"
+
+%ifndef FUNCTION_NAME
+%define FUNCTION_NAME crc64_iso_norm_by16_10
+%endif
+
+%if (AS_FEATURE_LEVEL) >= 10
+
+%define fetch_dist 1024
+
+[bits 64]
+default rel
+
+section .text
+
+
+%ifidn __OUTPUT_FORMAT__, win64
+ %xdefine arg1 rcx
+ %xdefine arg2 rdx
+ %xdefine arg3 r8
+%else
+ %xdefine arg1 rdi
+ %xdefine arg2 rsi
+ %xdefine arg3 rdx
+%endif
+
+%ifidn __OUTPUT_FORMAT__, win64
+ %define XMM_SAVE 16*2
+ %define VARIABLE_OFFSET 16*12+8
+%else
+ %define VARIABLE_OFFSET 16*2+8
+%endif
+
+align 16
+mk_global FUNCTION_NAME, function
+FUNCTION_NAME:
+ endbranch
+ not arg1
+ sub rsp, VARIABLE_OFFSET
+
+%ifidn __OUTPUT_FORMAT__, win64
+ ; push the xmm registers into the stack to maintain
+ vmovdqa [rsp + XMM_SAVE + 16*0], xmm6
+ vmovdqa [rsp + XMM_SAVE + 16*1], xmm7
+ vmovdqa [rsp + XMM_SAVE + 16*2], xmm8
+ vmovdqa [rsp + XMM_SAVE + 16*3], xmm9
+ vmovdqa [rsp + XMM_SAVE + 16*4], xmm10
+ vmovdqa [rsp + XMM_SAVE + 16*5], xmm11
+ vmovdqa [rsp + XMM_SAVE + 16*6], xmm12
+ vmovdqa [rsp + XMM_SAVE + 16*7], xmm13
+ vmovdqa [rsp + XMM_SAVE + 16*8], xmm14
+ vmovdqa [rsp + XMM_SAVE + 16*9], xmm15
+%endif
+ vbroadcasti32x4 zmm18, [SHUF_MASK]
+ cmp arg3, 256
+ jl _less_than_256
+
+ ; load the initial crc value
+ vmovq xmm10, arg1 ; initial crc
+
+ ; crc value does not need to be byte-reflected, but it needs to be moved to the high part of the register.
+ ; because data will be byte-reflected and will align with initial crc at correct place.
+ vpslldq xmm10, 8
+
+ ; receive the initial 128B data, xor the initial crc value
+ vmovdqu8 zmm0, [arg2+16*0]
+ vmovdqu8 zmm4, [arg2+16*4]
+ vpshufb zmm0, zmm0, zmm18
+ vpshufb zmm4, zmm4, zmm18
+ vpxorq zmm0, zmm10
+ vbroadcasti32x4 zmm10, [rk3] ;zmm10 has rk3 and rk4
+ ;imm value of pclmulqdq instruction will determine which constant to use
+ sub arg3, 256
+ cmp arg3, 256
+ jl _fold_128_B_loop
+
+ vmovdqu8 zmm7, [arg2+16*8]
+ vmovdqu8 zmm8, [arg2+16*12]
+ vpshufb zmm7, zmm7, zmm18
+ vpshufb zmm8, zmm8, zmm18
+ vbroadcasti32x4 zmm16, [rk_1] ;zmm16 has rk-1 and rk-2
+ sub arg3, 256
+
+_fold_256_B_loop:
+ add arg2, 256
+ vmovdqu8 zmm3, [arg2+16*0]
+ vpshufb zmm3, zmm3, zmm18
+ vpclmulqdq zmm1, zmm0, zmm16, 0x00
+ vpclmulqdq zmm2, zmm0, zmm16, 0x11
+ vpxorq zmm0, zmm1, zmm2
+ vpxorq zmm0, zmm0, zmm3
+
+ vmovdqu8 zmm9, [arg2+16*4]
+ vpshufb zmm9, zmm9, zmm18
+ vpclmulqdq zmm5, zmm4, zmm16, 0x00
+ vpclmulqdq zmm6, zmm4, zmm16, 0x11
+ vpxorq zmm4, zmm5, zmm6
+ vpxorq zmm4, zmm4, zmm9
+
+ vmovdqu8 zmm11, [arg2+16*8]
+ vpshufb zmm11, zmm11, zmm18
+ vpclmulqdq zmm12, zmm7, zmm16, 0x00
+ vpclmulqdq zmm13, zmm7, zmm16, 0x11
+ vpxorq zmm7, zmm12, zmm13
+ vpxorq zmm7, zmm7, zmm11
+
+ vmovdqu8 zmm17, [arg2+16*12]
+ vpshufb zmm17, zmm17, zmm18
+ vpclmulqdq zmm14, zmm8, zmm16, 0x00
+ vpclmulqdq zmm15, zmm8, zmm16, 0x11
+ vpxorq zmm8, zmm14, zmm15
+ vpxorq zmm8, zmm8, zmm17
+
+ sub arg3, 256
+ jge _fold_256_B_loop
+
+ ;; Fold 256 into 128
+ add arg2, 256
+ vpclmulqdq zmm1, zmm0, zmm10, 0x00
+ vpclmulqdq zmm2, zmm0, zmm10, 0x11
+ vpternlogq zmm7, zmm1, zmm2, 0x96 ; xor ABC
+
+ vpclmulqdq zmm5, zmm4, zmm10, 0x00
+ vpclmulqdq zmm6, zmm4, zmm10, 0x11
+ vpternlogq zmm8, zmm5, zmm6, 0x96 ; xor ABC
+
+ vmovdqa32 zmm0, zmm7
+ vmovdqa32 zmm4, zmm8
+
+ add arg3, 128
+ jmp _fold_128_B_register
+
+ ; fold 128B at a time. This section of the code folds 2 zmm registers in parallel
+_fold_128_B_loop:
+ add arg2, 128 ; update the buffer pointer
+ vmovdqu8 zmm8, [arg2+16*0]
+ vpshufb zmm8, zmm8, zmm18
+ vpclmulqdq zmm1, zmm0, zmm10, 0x00
+ vpclmulqdq zmm2, zmm0, zmm10, 0x11
+ vpxorq zmm0, zmm1, zmm2
+ vpxorq zmm0, zmm0, zmm8
+
+ vmovdqu8 zmm9, [arg2+16*4]
+ vpshufb zmm9, zmm9, zmm18
+ vpclmulqdq zmm5, zmm4, zmm10, 0x00
+ vpclmulqdq zmm6, zmm4, zmm10, 0x11
+ vpxorq zmm4, zmm5, zmm6
+ vpxorq zmm4, zmm4, zmm9
+ sub arg3, 128
+ jge _fold_128_B_loop
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+ add arg2, 128
+ ; at this point, the buffer pointer is pointing at the last y Bytes of the buffer, where 0 <= y < 128
+ ; the 128B of folded data is in 2 zmm registers: zmm0, zmm4
+
+_fold_128_B_register:
+ ; fold the 8 128b parts into 1 xmm register with different constants
+ vmovdqu8 zmm16, [rk9] ; multiply by rk9-rk16
+ vmovdqu8 zmm11, [rk17] ; multiply by rk17-rk20, rk1,rk2, 0,0
+ vpclmulqdq zmm1, zmm0, zmm16, 0x00
+ vpclmulqdq zmm2, zmm0, zmm16, 0x11
+ vextracti64x2 xmm7, zmm4, 3 ; save last that has no multiplicand
+
+ vpclmulqdq zmm5, zmm4, zmm11, 0x00
+ vpclmulqdq zmm6, zmm4, zmm11, 0x11
+ vmovdqa xmm10, [rk1] ; Needed later in reduction loop
+ vpternlogq zmm1, zmm2, zmm5, 0x96 ; xor ABC
+ vpternlogq zmm1, zmm6, zmm7, 0x96 ; xor ABC
+
+ vshufi64x2 zmm8, zmm1, zmm1, 0x4e ; Swap 1,0,3,2 - 01 00 11 10
+ vpxorq ymm8, ymm8, ymm1
+ vextracti64x2 xmm5, ymm8, 1
+ vpxorq xmm7, xmm5, xmm8
+
+ ; instead of 128, we add 128-16 to the loop counter to save 1 instruction from the loop
+ ; instead of a cmp instruction, we use the negative flag with the jl instruction
+ add arg3, 128-16
+ jl _final_reduction_for_128
+
+ ; now we have 16+y bytes left to reduce. 16 Bytes is in register xmm7 and the rest is in memory
+ ; we can fold 16 bytes at a time if y>=16
+ ; continue folding 16B at a time
+
+_16B_reduction_loop:
+ vmovdqa xmm8, xmm7
+ vpclmulqdq xmm7, xmm10, 0x11
+ vpclmulqdq xmm8, xmm10, 0x00
+ vpxor xmm7, xmm8
+ vmovdqu xmm0, [arg2]
+ vpshufb xmm0, xmm0, xmm18
+ vpxor xmm7, xmm0
+ add arg2, 16
+ sub arg3, 16
+ ; instead of a cmp instruction, we utilize the flags with the jge instruction
+ ; equivalent of: cmp arg3, 16-16
+ ; check if there is any more 16B in the buffer to be able to fold
+ jge _16B_reduction_loop
+
+ ;now we have 16+z bytes left to reduce, where 0<= z < 16.
+ ;first, we reduce the data in the xmm7 register
+
+
+_final_reduction_for_128:
+ add arg3, 16
+ je _128_done
+ ; here we are getting data that is less than 16 bytes.
+ ; since we know that there was data before the pointer, we can offset
+ ; the input pointer before the actual point, to receive exactly 16 bytes.
+ ; after that the registers need to be adjusted.
+_get_last_two_xmms:
+
+ vmovdqa xmm2, xmm7
+ vmovdqu xmm1, [arg2 - 16 + arg3]
+ vpshufb xmm1, xmm18
+
+ ; get rid of the extra data that was loaded before
+ ; load the shift constant
+ lea rax, [pshufb_shf_table + 16]
+ sub rax, arg3
+ vmovdqu xmm0, [rax]
+
+ ; shift xmm2 to the left by arg3 bytes
+ vpshufb xmm2, xmm0
+
+ ; shift xmm7 to the right by 16-arg3 bytes
+ vpxor xmm0, [mask1]
+ vpshufb xmm7, xmm0
+ vpblendvb xmm1, xmm1, xmm2, xmm0
+
+ ; fold 16 Bytes
+ vmovdqa xmm2, xmm1
+ vmovdqa xmm8, xmm7
+ vpclmulqdq xmm7, xmm10, 0x11
+ vpclmulqdq xmm8, xmm10, 0x0
+ vpxor xmm7, xmm8
+ vpxor xmm7, xmm2
+
+_128_done:
+ ; compute crc of a 128-bit value
+ vmovdqa xmm10, [rk5]
+ vmovdqa xmm0, xmm7
+
+ ;64b fold
+ vpclmulqdq xmm7, xmm10, 0x01 ; H*L
+ vpslldq xmm0, 8
+ vpxor xmm7, xmm0
+
+ ;barrett reduction
+_barrett:
+ vmovdqa xmm10, [rk7] ; rk7 and rk8 in xmm10
+ vmovdqa xmm0, xmm7
+
+ vmovdqa xmm1, xmm7
+ vpand xmm1, [mask3]
+ vpclmulqdq xmm7, xmm10, 0x01
+ vpxor xmm7, xmm1
+
+ vpclmulqdq xmm7, xmm10, 0x11
+ vpxor xmm7, xmm0
+ vpextrq rax, xmm7, 0
+
+_cleanup:
+ not rax
+
+
+%ifidn __OUTPUT_FORMAT__, win64
+ vmovdqa xmm6, [rsp + XMM_SAVE + 16*0]
+ vmovdqa xmm7, [rsp + XMM_SAVE + 16*1]
+ vmovdqa xmm8, [rsp + XMM_SAVE + 16*2]
+ vmovdqa xmm9, [rsp + XMM_SAVE + 16*3]
+ vmovdqa xmm10, [rsp + XMM_SAVE + 16*4]
+ vmovdqa xmm11, [rsp + XMM_SAVE + 16*5]
+ vmovdqa xmm12, [rsp + XMM_SAVE + 16*6]
+ vmovdqa xmm13, [rsp + XMM_SAVE + 16*7]
+ vmovdqa xmm14, [rsp + XMM_SAVE + 16*8]
+ vmovdqa xmm15, [rsp + XMM_SAVE + 16*9]
+%endif
+ add rsp, VARIABLE_OFFSET
+ ret
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+align 16
+_less_than_256:
+
+ ; check if there is enough buffer to be able to fold 16B at a time
+ cmp arg3, 32
+ jl _less_than_32
+
+ ; if there is, load the constants
+ vmovdqa xmm10, [rk1] ; rk1 and rk2 in xmm10
+
+ vmovq xmm0, arg1 ; get the initial crc value
+ vpslldq xmm0, 8 ; align it to its correct place
+ vmovdqu xmm7, [arg2] ; load the plaintext
+ vpshufb xmm7, xmm18 ; byte-reflect the plaintext
+ vpxor xmm7, xmm0
+
+ ; update the buffer pointer
+ add arg2, 16
+
+ ; update the counter. subtract 32 instead of 16 to save one instruction from the loop
+ sub arg3, 32
+
+ jmp _16B_reduction_loop
+
+align 16
+_less_than_32:
+ ; mov initial crc to the return value. this is necessary for zero-length buffers.
+ mov rax, arg1
+ test arg3, arg3
+ je _cleanup
+
+ vmovq xmm0, arg1 ; get the initial crc value
+ vpslldq xmm0, 8 ; align it to its correct place
+
+ cmp arg3, 16
+ je _exact_16_left
+ jl _less_than_16_left
+
+ vmovdqu xmm7, [arg2] ; load the plaintext
+ vpshufb xmm7, xmm18 ; byte-reflect the plaintext
+ vpxor xmm7, xmm0 ; xor the initial crc value
+ add arg2, 16
+ sub arg3, 16
+ vmovdqa xmm10, [rk1] ; rk1 and rk2 in xmm10
+ jmp _get_last_two_xmms
+
+
+align 16
+_less_than_16_left:
+ ; use stack space to load data less than 16 bytes, zero-out the 16B in memory first.
+
+ vpxor xmm1, xmm1
+ mov r11, rsp
+ vmovdqa [r11], xmm1
+
+ ; backup the counter value
+ mov r9, arg3
+ cmp arg3, 8
+ jl _less_than_8_left
+
+ ; load 8 Bytes
+ mov rax, [arg2]
+ mov [r11], rax
+ add r11, 8
+ sub arg3, 8
+ add arg2, 8
+_less_than_8_left:
+
+ cmp arg3, 4
+ jl _less_than_4_left
+
+ ; load 4 Bytes
+ mov eax, [arg2]
+ mov [r11], eax
+ add r11, 4
+ sub arg3, 4
+ add arg2, 4
+_less_than_4_left:
+
+ cmp arg3, 2
+ jl _less_than_2_left
+
+ ; load 2 Bytes
+ mov ax, [arg2]
+ mov [r11], ax
+ add r11, 2
+ sub arg3, 2
+ add arg2, 2
+_less_than_2_left:
+ cmp arg3, 1
+ jl _zero_left
+
+ ; load 1 Byte
+ mov al, [arg2]
+ mov [r11], al
+
+_zero_left:
+ vmovdqa xmm7, [rsp]
+ vpshufb xmm7, xmm18
+ vpxor xmm7, xmm0 ; xor the initial crc value
+
+ lea rax, [pshufb_shf_table + 16]
+ sub rax, r9
+
+ cmp r9, 8
+ jl _end_1to7
+
+_end_8to15:
+ vmovdqu xmm0, [rax]
+ vpxor xmm0, [mask1]
+
+ vpshufb xmm7, xmm0
+ jmp _128_done
+
+_end_1to7:
+ ; Right shift (8-length) bytes in XMM
+ add rax, 8
+ vmovdqu xmm0, [rax]
+ vpshufb xmm7,xmm0
+
+ jmp _barrett
+
+align 16
+_exact_16_left:
+ vmovdqu xmm7, [arg2]
+ vpshufb xmm7, xmm18
+ vpxor xmm7, xmm0 ; xor the initial crc value
+
+ jmp _128_done
+
+section .data
+align 32
+
+%ifndef USE_CONSTS
+; precomputed constants
+rk_1: dq 0x0000001a00000144
+rk_2: dq 0x0000015e00001dac
+rk1: dq 0x0000000000000145
+rk2: dq 0x0000000000001db7
+rk3: dq 0x000100000001001a
+rk4: dq 0x001b0000001b015e
+rk5: dq 0x0000000000000145
+rk6: dq 0x0000000000000000
+rk7: dq 0x000000000000001b
+rk8: dq 0x000000000000001b
+rk9: dq 0x0150145145145015
+rk10: dq 0x1c71db6db6db71c7
+rk11: dq 0x0001110110110111
+rk12: dq 0x001aab1ab1ab1aab
+rk13: dq 0x0000014445014445
+rk14: dq 0x00001daab71daab7
+rk15: dq 0x0000000101000101
+rk16: dq 0x0000001b1b001b1b
+rk17: dq 0x0000000001514515
+rk18: dq 0x000000001c6db6c7
+rk19: dq 0x0000000000011011
+rk20: dq 0x00000000001ab1ab
+
+rk_1b: dq 0x0000000000000145
+rk_2b: dq 0x0000000000001db7
+ dq 0x0000000000000000
+ dq 0x0000000000000000
+%else
+INCLUDE_CONSTS
+%endif
+
+mask1: dq 0x8080808080808080, 0x8080808080808080
+mask2: dq 0xFFFFFFFFFFFFFFFF, 0x00000000FFFFFFFF
+mask3: dq 0x0000000000000000, 0xFFFFFFFFFFFFFFFF
+
+SHUF_MASK: dq 0x08090A0B0C0D0E0F, 0x0001020304050607
+
+pshufb_shf_table:
+; use these values for shift constants for the pshufb instruction
+; different alignments result in values as shown:
+; dq 0x8887868584838281, 0x008f8e8d8c8b8a89 ; shl 15 (16-1) / shr1
+; dq 0x8988878685848382, 0x01008f8e8d8c8b8a ; shl 14 (16-3) / shr2
+; dq 0x8a89888786858483, 0x0201008f8e8d8c8b ; shl 13 (16-4) / shr3
+; dq 0x8b8a898887868584, 0x030201008f8e8d8c ; shl 12 (16-4) / shr4
+; dq 0x8c8b8a8988878685, 0x04030201008f8e8d ; shl 11 (16-5) / shr5
+; dq 0x8d8c8b8a89888786, 0x0504030201008f8e ; shl 10 (16-6) / shr6
+; dq 0x8e8d8c8b8a898887, 0x060504030201008f ; shl 9 (16-7) / shr7
+; dq 0x8f8e8d8c8b8a8988, 0x0706050403020100 ; shl 8 (16-8) / shr8
+; dq 0x008f8e8d8c8b8a89, 0x0807060504030201 ; shl 7 (16-9) / shr9
+; dq 0x01008f8e8d8c8b8a, 0x0908070605040302 ; shl 6 (16-10) / shr10
+; dq 0x0201008f8e8d8c8b, 0x0a09080706050403 ; shl 5 (16-11) / shr11
+; dq 0x030201008f8e8d8c, 0x0b0a090807060504 ; shl 4 (16-12) / shr12
+; dq 0x04030201008f8e8d, 0x0c0b0a0908070605 ; shl 3 (16-13) / shr13
+; dq 0x0504030201008f8e, 0x0d0c0b0a09080706 ; shl 2 (16-14) / shr14
+; dq 0x060504030201008f, 0x0e0d0c0b0a090807 ; shl 1 (16-15) / shr15
+dq 0x8786858483828100, 0x8f8e8d8c8b8a8988
+dq 0x0706050403020100, 0x0f0e0d0c0b0a0908
+dq 0x8080808080808080, 0x0f0e0d0c0b0a0908
+dq 0x8080808080808080, 0x8080808080808080
+
+
+%else ; Assembler doesn't understand these opcodes. Add empty symbol for windows.
+%ifidn __OUTPUT_FORMAT__, win64
+global no_ %+ FUNCTION_NAME
+no_ %+ FUNCTION_NAME %+ :
+%endif
+%endif ; (AS_FEATURE_LEVEL) >= 10
diff --git a/src/isa-l/crc/crc64_iso_norm_by8.asm b/src/isa-l/crc/crc64_iso_norm_by8.asm
new file mode 100644
index 000000000..16147d5ff
--- /dev/null
+++ b/src/isa-l/crc/crc64_iso_norm_by8.asm
@@ -0,0 +1,582 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright(c) 2011-2016 Intel Corporation All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in
+; the documentation and/or other materials provided with the
+; distribution.
+; * Neither the name of Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived
+; from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+; Function API:
+; uint64_t crc64_iso_norm_by8(
+; uint64_t init_crc, //initial CRC value, 64 bits
+; const unsigned char *buf, //buffer pointer to calculate CRC on
+; uint64_t len //buffer length in bytes (64-bit data)
+; );
+;
+%include "reg_sizes.asm"
+
+%define fetch_dist 1024
+
+[bits 64]
+default rel
+
+section .text
+
+%ifidn __OUTPUT_FORMAT__, win64
+ %xdefine arg1 rcx
+ %xdefine arg2 rdx
+ %xdefine arg3 r8
+%else
+ %xdefine arg1 rdi
+ %xdefine arg2 rsi
+ %xdefine arg3 rdx
+%endif
+
+%define TMP 16*0
+%ifidn __OUTPUT_FORMAT__, win64
+ %define XMM_SAVE 16*2
+ %define VARIABLE_OFFSET 16*10+8
+%else
+ %define VARIABLE_OFFSET 16*2+8
+%endif
+align 16
+mk_global crc64_iso_norm_by8, function
+crc64_iso_norm_by8:
+ endbranch
+
+ not arg1 ;~init_crc
+
+ sub rsp,VARIABLE_OFFSET
+
+%ifidn __OUTPUT_FORMAT__, win64
+ ; push the xmm registers into the stack to maintain
+ movdqa [rsp + XMM_SAVE + 16*0], xmm6
+ movdqa [rsp + XMM_SAVE + 16*1], xmm7
+ movdqa [rsp + XMM_SAVE + 16*2], xmm8
+ movdqa [rsp + XMM_SAVE + 16*3], xmm9
+ movdqa [rsp + XMM_SAVE + 16*4], xmm10
+ movdqa [rsp + XMM_SAVE + 16*5], xmm11
+ movdqa [rsp + XMM_SAVE + 16*6], xmm12
+ movdqa [rsp + XMM_SAVE + 16*7], xmm13
+%endif
+
+
+ ; check if smaller than 256
+ cmp arg3, 256
+
+ ; for sizes less than 256, we can't fold 128B at a time...
+ jl _less_than_256
+
+
+ ; load the initial crc value
+ movq xmm10, arg1 ; initial crc
+
+ ; crc value does not need to be byte-reflected, but it needs to be moved to the high part of the register.
+ ; because data will be byte-reflected and will align with initial crc at correct place.
+ pslldq xmm10, 8
+
+ movdqa xmm11, [SHUF_MASK]
+ ; receive the initial 128B data, xor the initial crc value
+ movdqu xmm0, [arg2+16*0]
+ movdqu xmm1, [arg2+16*1]
+ movdqu xmm2, [arg2+16*2]
+ movdqu xmm3, [arg2+16*3]
+ movdqu xmm4, [arg2+16*4]
+ movdqu xmm5, [arg2+16*5]
+ movdqu xmm6, [arg2+16*6]
+ movdqu xmm7, [arg2+16*7]
+
+ pshufb xmm0, xmm11
+ ; XOR the initial_crc value
+ pxor xmm0, xmm10
+ pshufb xmm1, xmm11
+ pshufb xmm2, xmm11
+ pshufb xmm3, xmm11
+ pshufb xmm4, xmm11
+ pshufb xmm5, xmm11
+ pshufb xmm6, xmm11
+ pshufb xmm7, xmm11
+
+ movdqa xmm10, [rk3] ;xmm10 has rk3 and rk4
+ ;imm value of pclmulqdq instruction will determine which constant to use
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+ ; we subtract 256 instead of 128 to save one instruction from the loop
+ sub arg3, 256
+
+ ; at this section of the code, there is 128*x+y (0<=y<128) bytes of buffer. The _fold_128_B_loop
+ ; loop will fold 128B at a time until we have 128+y Bytes of buffer
+
+
+ ; fold 128B at a time. This section of the code folds 8 xmm registers in parallel
+_fold_128_B_loop:
+
+ ; update the buffer pointer
+ add arg2, 128 ; buf += 128;
+
+ prefetchnta [arg2+fetch_dist+0]
+ movdqu xmm9, [arg2+16*0]
+ movdqu xmm12, [arg2+16*1]
+ pshufb xmm9, xmm11
+ pshufb xmm12, xmm11
+ movdqa xmm8, xmm0
+ movdqa xmm13, xmm1
+ pclmulqdq xmm0, xmm10, 0x0
+ pclmulqdq xmm8, xmm10 , 0x11
+ pclmulqdq xmm1, xmm10, 0x0
+ pclmulqdq xmm13, xmm10 , 0x11
+ pxor xmm0, xmm9
+ xorps xmm0, xmm8
+ pxor xmm1, xmm12
+ xorps xmm1, xmm13
+
+ prefetchnta [arg2+fetch_dist+32]
+ movdqu xmm9, [arg2+16*2]
+ movdqu xmm12, [arg2+16*3]
+ pshufb xmm9, xmm11
+ pshufb xmm12, xmm11
+ movdqa xmm8, xmm2
+ movdqa xmm13, xmm3
+ pclmulqdq xmm2, xmm10, 0x0
+ pclmulqdq xmm8, xmm10 , 0x11
+ pclmulqdq xmm3, xmm10, 0x0
+ pclmulqdq xmm13, xmm10 , 0x11
+ pxor xmm2, xmm9
+ xorps xmm2, xmm8
+ pxor xmm3, xmm12
+ xorps xmm3, xmm13
+
+ prefetchnta [arg2+fetch_dist+64]
+ movdqu xmm9, [arg2+16*4]
+ movdqu xmm12, [arg2+16*5]
+ pshufb xmm9, xmm11
+ pshufb xmm12, xmm11
+ movdqa xmm8, xmm4
+ movdqa xmm13, xmm5
+ pclmulqdq xmm4, xmm10, 0x0
+ pclmulqdq xmm8, xmm10 , 0x11
+ pclmulqdq xmm5, xmm10, 0x0
+ pclmulqdq xmm13, xmm10 , 0x11
+ pxor xmm4, xmm9
+ xorps xmm4, xmm8
+ pxor xmm5, xmm12
+ xorps xmm5, xmm13
+
+ prefetchnta [arg2+fetch_dist+96]
+ movdqu xmm9, [arg2+16*6]
+ movdqu xmm12, [arg2+16*7]
+ pshufb xmm9, xmm11
+ pshufb xmm12, xmm11
+ movdqa xmm8, xmm6
+ movdqa xmm13, xmm7
+ pclmulqdq xmm6, xmm10, 0x0
+ pclmulqdq xmm8, xmm10 , 0x11
+ pclmulqdq xmm7, xmm10, 0x0
+ pclmulqdq xmm13, xmm10 , 0x11
+ pxor xmm6, xmm9
+ xorps xmm6, xmm8
+ pxor xmm7, xmm12
+ xorps xmm7, xmm13
+
+ sub arg3, 128
+
+ ; check if there is another 128B in the buffer to be able to fold
+ jge _fold_128_B_loop
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+ add arg2, 128
+ ; at this point, the buffer pointer is pointing at the last y Bytes of the buffer, where 0 <= y < 128
+ ; the 128B of folded data is in 8 of the xmm registers: xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7
+
+
+ ; fold the 8 xmm registers to 1 xmm register with different constants
+
+ movdqa xmm10, [rk9]
+ movdqa xmm8, xmm0
+ pclmulqdq xmm0, xmm10, 0x11
+ pclmulqdq xmm8, xmm10, 0x0
+ pxor xmm7, xmm8
+ xorps xmm7, xmm0
+
+ movdqa xmm10, [rk11]
+ movdqa xmm8, xmm1
+ pclmulqdq xmm1, xmm10, 0x11
+ pclmulqdq xmm8, xmm10, 0x0
+ pxor xmm7, xmm8
+ xorps xmm7, xmm1
+
+ movdqa xmm10, [rk13]
+ movdqa xmm8, xmm2
+ pclmulqdq xmm2, xmm10, 0x11
+ pclmulqdq xmm8, xmm10, 0x0
+ pxor xmm7, xmm8
+ pxor xmm7, xmm2
+
+ movdqa xmm10, [rk15]
+ movdqa xmm8, xmm3
+ pclmulqdq xmm3, xmm10, 0x11
+ pclmulqdq xmm8, xmm10, 0x0
+ pxor xmm7, xmm8
+ xorps xmm7, xmm3
+
+ movdqa xmm10, [rk17]
+ movdqa xmm8, xmm4
+ pclmulqdq xmm4, xmm10, 0x11
+ pclmulqdq xmm8, xmm10, 0x0
+ pxor xmm7, xmm8
+ pxor xmm7, xmm4
+
+ movdqa xmm10, [rk19]
+ movdqa xmm8, xmm5
+ pclmulqdq xmm5, xmm10, 0x11
+ pclmulqdq xmm8, xmm10, 0x0
+ pxor xmm7, xmm8
+ xorps xmm7, xmm5
+
+ movdqa xmm10, [rk1] ;xmm10 has rk1 and rk2
+
+ movdqa xmm8, xmm6
+ pclmulqdq xmm6, xmm10, 0x11
+ pclmulqdq xmm8, xmm10, 0x0
+ pxor xmm7, xmm8
+ pxor xmm7, xmm6
+
+
+ ; instead of 128, we add 112 to the loop counter to save 1 instruction from the loop
+ ; instead of a cmp instruction, we use the negative flag with the jl instruction
+ add arg3, 128-16
+ jl _final_reduction_for_128
+
+ ; now we have 16+y bytes left to reduce. 16 Bytes is in register xmm7 and the rest is in memory
+ ; we can fold 16 bytes at a time if y>=16
+ ; continue folding 16B at a time
+
+_16B_reduction_loop:
+ movdqa xmm8, xmm7
+ pclmulqdq xmm7, xmm10, 0x11
+ pclmulqdq xmm8, xmm10, 0x0
+ pxor xmm7, xmm8
+ movdqu xmm0, [arg2]
+ pshufb xmm0, xmm11
+ pxor xmm7, xmm0
+ add arg2, 16
+ sub arg3, 16
+ ; instead of a cmp instruction, we utilize the flags with the jge instruction
+ ; equivalent of: cmp arg3, 16-16
+ ; check if there is any more 16B in the buffer to be able to fold
+ jge _16B_reduction_loop
+
+ ;now we have 16+z bytes left to reduce, where 0<= z < 16.
+ ;first, we reduce the data in the xmm7 register
+
+
+_final_reduction_for_128:
+ ; check if any more data to fold. If not, compute the CRC of the final 128 bits
+ add arg3, 16
+ je _128_done
+
+ ; here we are getting data that is less than 16 bytes.
+ ; since we know that there was data before the pointer, we can offset the input pointer before the actual point, to receive exactly 16 bytes.
+ ; after that the registers need to be adjusted.
+_get_last_two_xmms:
+ movdqa xmm2, xmm7
+
+ movdqu xmm1, [arg2 - 16 + arg3]
+ pshufb xmm1, xmm11
+
+ ; get rid of the extra data that was loaded before
+ ; load the shift constant
+ lea rax, [pshufb_shf_table + 16]
+ sub rax, arg3
+ movdqu xmm0, [rax]
+
+ ; shift xmm2 to the left by arg3 bytes
+ pshufb xmm2, xmm0
+
+ ; shift xmm7 to the right by 16-arg3 bytes
+ pxor xmm0, [mask1]
+ pshufb xmm7, xmm0
+ pblendvb xmm1, xmm2 ;xmm0 is implicit
+
+ ; fold 16 Bytes
+ movdqa xmm2, xmm1
+ movdqa xmm8, xmm7
+ pclmulqdq xmm7, xmm10, 0x11
+ pclmulqdq xmm8, xmm10, 0x0
+ pxor xmm7, xmm8
+ pxor xmm7, xmm2
+
+_128_done:
+ ; compute crc of a 128-bit value
+ movdqa xmm10, [rk5] ; rk5 and rk6 in xmm10
+ movdqa xmm0, xmm7
+
+ ;64b fold
+ pclmulqdq xmm7, xmm10, 0x01 ; H*L
+ pslldq xmm0, 8
+ pxor xmm7, xmm0
+
+ ;barrett reduction
+_barrett:
+ movdqa xmm10, [rk7] ; rk7 and rk8 in xmm10
+ movdqa xmm0, xmm7
+
+ movdqa xmm1, xmm7
+ pand xmm1, [mask3]
+ pclmulqdq xmm7, xmm10, 0x01
+ pxor xmm7, xmm1
+
+ pclmulqdq xmm7, xmm10, 0x11
+ pxor xmm7, xmm0
+ pextrq rax, xmm7, 0
+
+_cleanup:
+ not rax
+%ifidn __OUTPUT_FORMAT__, win64
+ movdqa xmm6, [rsp + XMM_SAVE + 16*0]
+ movdqa xmm7, [rsp + XMM_SAVE + 16*1]
+ movdqa xmm8, [rsp + XMM_SAVE + 16*2]
+ movdqa xmm9, [rsp + XMM_SAVE + 16*3]
+ movdqa xmm10, [rsp + XMM_SAVE + 16*4]
+ movdqa xmm11, [rsp + XMM_SAVE + 16*5]
+ movdqa xmm12, [rsp + XMM_SAVE + 16*6]
+ movdqa xmm13, [rsp + XMM_SAVE + 16*7]
+%endif
+ add rsp, VARIABLE_OFFSET
+ ret
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+align 16
+_less_than_256:
+
+ ; check if there is enough buffer to be able to fold 16B at a time
+ cmp arg3, 32
+ jl _less_than_32
+ movdqa xmm11, [SHUF_MASK]
+
+ ; if there is, load the constants
+ movdqa xmm10, [rk1] ; rk1 and rk2 in xmm10
+
+ movq xmm0, arg1 ; get the initial crc value
+ pslldq xmm0, 8 ; align it to its correct place
+ movdqu xmm7, [arg2] ; load the plaintext
+ pshufb xmm7, xmm11 ; byte-reflect the plaintext
+ pxor xmm7, xmm0
+
+
+ ; update the buffer pointer
+ add arg2, 16
+
+ ; update the counter. subtract 32 instead of 16 to save one instruction from the loop
+ sub arg3, 32
+
+ jmp _16B_reduction_loop
+align 16
+_less_than_32:
+ ; mov initial crc to the return value. this is necessary for zero-length buffers.
+ mov rax, arg1
+ test arg3, arg3
+ je _cleanup
+
+ movdqa xmm11, [SHUF_MASK]
+
+ movq xmm0, arg1 ; get the initial crc value
+ pslldq xmm0, 8 ; align it to its correct place
+
+ cmp arg3, 16
+ je _exact_16_left
+ jl _less_than_16_left
+
+ movdqu xmm7, [arg2] ; load the plaintext
+ pshufb xmm7, xmm11 ; byte-reflect the plaintext
+ pxor xmm7, xmm0 ; xor the initial crc value
+ add arg2, 16
+ sub arg3, 16
+ movdqa xmm10, [rk1] ; rk1 and rk2 in xmm10
+ jmp _get_last_two_xmms
+align 16
+_less_than_16_left:
+ ; use stack space to load data less than 16 bytes, zero-out the 16B in memory first.
+ pxor xmm1, xmm1
+ mov r11, rsp
+ movdqa [r11], xmm1
+
+ ; backup the counter value
+ mov r9, arg3
+ cmp arg3, 8
+ jl _less_than_8_left
+
+ ; load 8 Bytes
+ mov rax, [arg2]
+ mov [r11], rax
+ add r11, 8
+ sub arg3, 8
+ add arg2, 8
+_less_than_8_left:
+
+ cmp arg3, 4
+ jl _less_than_4_left
+
+ ; load 4 Bytes
+ mov eax, [arg2]
+ mov [r11], eax
+ add r11, 4
+ sub arg3, 4
+ add arg2, 4
+_less_than_4_left:
+
+ cmp arg3, 2
+ jl _less_than_2_left
+
+ ; load 2 Bytes
+ mov ax, [arg2]
+ mov [r11], ax
+ add r11, 2
+ sub arg3, 2
+ add arg2, 2
+_less_than_2_left:
+ cmp arg3, 1
+ jl _zero_left
+
+ ; load 1 Byte
+ mov al, [arg2]
+ mov [r11], al
+_zero_left:
+ movdqa xmm7, [rsp]
+ pshufb xmm7, xmm11
+ pxor xmm7, xmm0 ; xor the initial crc value
+
+ ; shl r9, 4
+ lea rax, [pshufb_shf_table + 16]
+ sub rax, r9
+
+ cmp r9, 8
+ jl _end_1to7
+
+_end_8to15:
+ movdqu xmm0, [rax]
+ pxor xmm0, [mask1]
+
+ pshufb xmm7, xmm0
+ jmp _128_done
+
+_end_1to7:
+ ; Right shift (8-length) bytes in XMM
+ add rax, 8
+ movdqu xmm0, [rax]
+ pshufb xmm7,xmm0
+
+ jmp _barrett
+align 16
+_exact_16_left:
+ movdqu xmm7, [arg2]
+ pshufb xmm7, xmm11
+ pxor xmm7, xmm0 ; xor the initial crc value
+
+ jmp _128_done
+
+section .data
+
+; precomputed constants
+align 16
+
+rk1:
+DQ 0x0000000000000145
+rk2:
+DQ 0x0000000000001db7
+rk3:
+DQ 0x000100000001001a
+rk4:
+DQ 0x001b0000001b015e
+rk5:
+DQ 0x0000000000000145
+rk6:
+DQ 0x0000000000000000
+rk7:
+DQ 0x000000000000001b
+rk8:
+DQ 0x000000000000001b
+rk9:
+DQ 0x0150145145145015
+rk10:
+DQ 0x1c71db6db6db71c7
+rk11:
+DQ 0x0001110110110111
+rk12:
+DQ 0x001aab1ab1ab1aab
+rk13:
+DQ 0x0000014445014445
+rk14:
+DQ 0x00001daab71daab7
+rk15:
+DQ 0x0000000101000101
+rk16:
+DQ 0x0000001b1b001b1b
+rk17:
+DQ 0x0000000001514515
+rk18:
+DQ 0x000000001c6db6c7
+rk19:
+DQ 0x0000000000011011
+rk20:
+DQ 0x00000000001ab1ab
+
+mask1:
+dq 0x8080808080808080, 0x8080808080808080
+mask2:
+dq 0xFFFFFFFFFFFFFFFF, 0x00000000FFFFFFFF
+mask3:
+dq 0x0000000000000000, 0xFFFFFFFFFFFFFFFF
+
+SHUF_MASK:
+dq 0x08090A0B0C0D0E0F, 0x0001020304050607
+
+pshufb_shf_table:
+; use these values for shift constants for the pshufb instruction
+; different alignments result in values as shown:
+; dq 0x8887868584838281, 0x008f8e8d8c8b8a89 ; shl 15 (16-1) / shr1
+; dq 0x8988878685848382, 0x01008f8e8d8c8b8a ; shl 14 (16-3) / shr2
+; dq 0x8a89888786858483, 0x0201008f8e8d8c8b ; shl 13 (16-4) / shr3
+; dq 0x8b8a898887868584, 0x030201008f8e8d8c ; shl 12 (16-4) / shr4
+; dq 0x8c8b8a8988878685, 0x04030201008f8e8d ; shl 11 (16-5) / shr5
+; dq 0x8d8c8b8a89888786, 0x0504030201008f8e ; shl 10 (16-6) / shr6
+; dq 0x8e8d8c8b8a898887, 0x060504030201008f ; shl 9 (16-7) / shr7
+; dq 0x8f8e8d8c8b8a8988, 0x0706050403020100 ; shl 8 (16-8) / shr8
+; dq 0x008f8e8d8c8b8a89, 0x0807060504030201 ; shl 7 (16-9) / shr9
+; dq 0x01008f8e8d8c8b8a, 0x0908070605040302 ; shl 6 (16-10) / shr10
+; dq 0x0201008f8e8d8c8b, 0x0a09080706050403 ; shl 5 (16-11) / shr11
+; dq 0x030201008f8e8d8c, 0x0b0a090807060504 ; shl 4 (16-12) / shr12
+; dq 0x04030201008f8e8d, 0x0c0b0a0908070605 ; shl 3 (16-13) / shr13
+; dq 0x0504030201008f8e, 0x0d0c0b0a09080706 ; shl 2 (16-14) / shr14
+; dq 0x060504030201008f, 0x0e0d0c0b0a090807 ; shl 1 (16-15) / shr15
+dq 0x8786858483828100, 0x8f8e8d8c8b8a8988
+dq 0x0706050403020100, 0x0f0e0d0c0b0a0908
+dq 0x8080808080808080, 0x0f0e0d0c0b0a0908
+dq 0x8080808080808080, 0x8080808080808080
+
+;;; func core, ver, snum
+slversion crc64_iso_norm_by8, 01, 00, 0020
diff --git a/src/isa-l/crc/crc64_iso_refl_by16_10.asm b/src/isa-l/crc/crc64_iso_refl_by16_10.asm
new file mode 100644
index 000000000..e5d5a08fe
--- /dev/null
+++ b/src/isa-l/crc/crc64_iso_refl_by16_10.asm
@@ -0,0 +1,495 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright(c) 2011-2019 Intel Corporation All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in
+; the documentation and/or other materials provided with the
+; distribution.
+; * Neither the name of Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived
+; from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Function API:
+; uint64_t crc64_iso_refl_by16_10(
+; uint64_t init_crc, //initial CRC value, 64 bits
+; const unsigned char *buf, //buffer pointer to calculate CRC on
+; uint64_t len //buffer length in bytes (64-bit data)
+; );
+;
+%include "reg_sizes.asm"
+
+%ifndef FUNCTION_NAME
+%define FUNCTION_NAME crc64_iso_refl_by16_10
+%endif
+
+%if (AS_FEATURE_LEVEL) >= 10
+
+%define fetch_dist 1024
+
+[bits 64]
+default rel
+
+section .text
+
+
+%ifidn __OUTPUT_FORMAT__, win64
+ %xdefine arg1 rcx
+ %xdefine arg2 rdx
+ %xdefine arg3 r8
+%else
+ %xdefine arg1 rdi
+ %xdefine arg2 rsi
+ %xdefine arg3 rdx
+%endif
+
+%define TMP 16*0
+%ifidn __OUTPUT_FORMAT__, win64
+ %define XMM_SAVE 16*2
+ %define VARIABLE_OFFSET 16*12+8
+%else
+ %define VARIABLE_OFFSET 16*2+8
+%endif
+
+align 16
+mk_global FUNCTION_NAME, function
+FUNCTION_NAME:
+ endbranch
+ not arg1
+ sub rsp, VARIABLE_OFFSET
+
+%ifidn __OUTPUT_FORMAT__, win64
+ ; push the xmm registers into the stack to maintain
+ vmovdqa [rsp + XMM_SAVE + 16*0], xmm6
+ vmovdqa [rsp + XMM_SAVE + 16*1], xmm7
+ vmovdqa [rsp + XMM_SAVE + 16*2], xmm8
+ vmovdqa [rsp + XMM_SAVE + 16*3], xmm9
+ vmovdqa [rsp + XMM_SAVE + 16*4], xmm10
+ vmovdqa [rsp + XMM_SAVE + 16*5], xmm11
+ vmovdqa [rsp + XMM_SAVE + 16*6], xmm12
+ vmovdqa [rsp + XMM_SAVE + 16*7], xmm13
+ vmovdqa [rsp + XMM_SAVE + 16*8], xmm14
+ vmovdqa [rsp + XMM_SAVE + 16*9], xmm15
+%endif
+
+ cmp arg3, 256
+ jl _less_than_256
+
+ ; load the initial crc value
+ vmovq xmm10, arg1 ; initial crc
+
+ ; receive the initial 128B data, xor the initial crc value
+ vmovdqu8 zmm0, [arg2+16*0]
+ vmovdqu8 zmm4, [arg2+16*4]
+ vpxorq zmm0, zmm10
+ vbroadcasti32x4 zmm10, [rk3] ;zmm10 has rk3 and rk4
+ ;imm value of pclmulqdq instruction will determine which constant to use
+
+ sub arg3, 256
+ cmp arg3, 256
+ jl _fold_128_B_loop
+
+ vmovdqu8 zmm7, [arg2+16*8]
+ vmovdqu8 zmm8, [arg2+16*12]
+ vbroadcasti32x4 zmm16, [rk_1] ;zmm16 has rk-1 and rk-2
+ sub arg3, 256
+
+_fold_256_B_loop:
+ add arg2, 256
+ vmovdqu8 zmm3, [arg2+16*0]
+ vpclmulqdq zmm1, zmm0, zmm16, 0x10
+ vpclmulqdq zmm2, zmm0, zmm16, 0x01
+ vpxorq zmm0, zmm1, zmm2
+ vpxorq zmm0, zmm0, zmm3
+
+ vmovdqu8 zmm9, [arg2+16*4]
+ vpclmulqdq zmm5, zmm4, zmm16, 0x10
+ vpclmulqdq zmm6, zmm4, zmm16, 0x01
+ vpxorq zmm4, zmm5, zmm6
+ vpxorq zmm4, zmm4, zmm9
+
+ vmovdqu8 zmm11, [arg2+16*8]
+ vpclmulqdq zmm12, zmm7, zmm16, 0x10
+ vpclmulqdq zmm13, zmm7, zmm16, 0x01
+ vpxorq zmm7, zmm12, zmm13
+ vpxorq zmm7, zmm7, zmm11
+
+ vmovdqu8 zmm17, [arg2+16*12]
+ vpclmulqdq zmm14, zmm8, zmm16, 0x10
+ vpclmulqdq zmm15, zmm8, zmm16, 0x01
+ vpxorq zmm8, zmm14, zmm15
+ vpxorq zmm8, zmm8, zmm17
+
+ sub arg3, 256
+ jge _fold_256_B_loop
+
+ ;; Fold 256 into 128
+ add arg2, 256
+ vpclmulqdq zmm1, zmm0, zmm10, 0x01
+ vpclmulqdq zmm2, zmm0, zmm10, 0x10
+ vpternlogq zmm7, zmm1, zmm2, 0x96 ; xor ABC
+
+ vpclmulqdq zmm5, zmm4, zmm10, 0x01
+ vpclmulqdq zmm6, zmm4, zmm10, 0x10
+ vpternlogq zmm8, zmm5, zmm6, 0x96 ; xor ABC
+
+ vmovdqa32 zmm0, zmm7
+ vmovdqa32 zmm4, zmm8
+
+ add arg3, 128
+ jmp _fold_128_B_register
+
+ ; fold 128B at a time. This section of the code folds 2 zmm registers in parallel
+_fold_128_B_loop:
+ add arg2, 128 ; update the buffer pointer
+ vmovdqu8 zmm8, [arg2+16*0]
+ vpclmulqdq zmm1, zmm0, zmm10, 0x10
+ vpclmulqdq zmm2, zmm0, zmm10, 0x01
+ vpxorq zmm0, zmm1, zmm2
+ vpxorq zmm0, zmm0, zmm8
+
+ vmovdqu8 zmm9, [arg2+16*4]
+ vpclmulqdq zmm5, zmm4, zmm10, 0x10
+ vpclmulqdq zmm6, zmm4, zmm10, 0x01
+ vpxorq zmm4, zmm5, zmm6
+ vpxorq zmm4, zmm4, zmm9
+
+ sub arg3, 128
+ jge _fold_128_B_loop
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+ add arg2, 128
+ ; at this point, the buffer pointer is pointing at the last y Bytes of the buffer, where 0 <= y < 128
+ ; the 128B of folded data is in 2 zmm registers: zmm0, zmm4
+
+_fold_128_B_register:
+ ; fold the 8 128b parts into 1 xmm register with different constants
+ vmovdqu8 zmm16, [rk9] ; multiply by rk9-rk16
+ vmovdqu8 zmm11, [rk17] ; multiply by rk17-rk20, rk1,rk2, 0,0
+ vpclmulqdq zmm1, zmm0, zmm16, 0x01
+ vpclmulqdq zmm2, zmm0, zmm16, 0x10
+ vextracti64x2 xmm7, zmm4, 3 ; save last that has no multiplicand
+
+ vpclmulqdq zmm5, zmm4, zmm11, 0x01
+ vpclmulqdq zmm6, zmm4, zmm11, 0x10
+ vmovdqa xmm10, [rk1] ; Needed later in reduction loop
+ vpternlogq zmm1, zmm2, zmm5, 0x96 ; xor ABC
+ vpternlogq zmm1, zmm6, zmm7, 0x96 ; xor ABC
+
+ vshufi64x2 zmm8, zmm1, zmm1, 0x4e ; Swap 1,0,3,2 - 01 00 11 10
+ vpxorq ymm8, ymm8, ymm1
+ vextracti64x2 xmm5, ymm8, 1
+ vpxorq xmm7, xmm5, xmm8
+
+ ; instead of 128, we add 128-16 to the loop counter to save 1 instruction from the loop
+ ; instead of a cmp instruction, we use the negative flag with the jl instruction
+ add arg3, 128-16
+ jl _final_reduction_for_128
+
+ ; now we have 16+y bytes left to reduce. 16 Bytes is in register xmm7 and the rest is in memory
+ ; we can fold 16 bytes at a time if y>=16
+ ; continue folding 16B at a time
+
+_16B_reduction_loop:
+ vmovdqa xmm8, xmm7
+ vpclmulqdq xmm7, xmm10, 0x1
+ vpclmulqdq xmm8, xmm10, 0x10
+ vpxor xmm7, xmm8
+ vmovdqu xmm0, [arg2]
+ vpxor xmm7, xmm0
+ add arg2, 16
+ sub arg3, 16
+ ; instead of a cmp instruction, we utilize the flags with the jge instruction
+ ; equivalent of: cmp arg3, 16-16
+ ; check if there is any more 16B in the buffer to be able to fold
+ jge _16B_reduction_loop
+
+ ;now we have 16+z bytes left to reduce, where 0<= z < 16.
+ ;first, we reduce the data in the xmm7 register
+
+
+_final_reduction_for_128:
+ add arg3, 16
+ je _128_done
+ ; here we are getting data that is less than 16 bytes.
+ ; since we know that there was data before the pointer, we can offset
+ ; the input pointer before the actual point, to receive exactly 16 bytes.
+ ; after that the registers need to be adjusted.
+_get_last_two_xmms:
+
+
+ vmovdqa xmm2, xmm7
+ vmovdqu xmm1, [arg2 - 16 + arg3]
+
+ ; get rid of the extra data that was loaded before
+ ; load the shift constant
+ lea rax, [pshufb_shf_table]
+ add rax, arg3
+ vmovdqu xmm0, [rax]
+
+
+ vpshufb xmm7, xmm0
+ vpxor xmm0, [mask3]
+ vpshufb xmm2, xmm0
+
+ vpblendvb xmm2, xmm2, xmm1, xmm0
+ ;;;;;;;;;;
+ vmovdqa xmm8, xmm7
+ vpclmulqdq xmm7, xmm10, 0x1
+
+ vpclmulqdq xmm8, xmm10, 0x10
+ vpxor xmm7, xmm8
+ vpxor xmm7, xmm2
+
+_128_done:
+ ; compute crc of a 128-bit value
+ vmovdqa xmm10, [rk5]
+ vmovdqa xmm0, xmm7
+
+ ;64b fold
+ vpclmulqdq xmm7, xmm10, 0
+ vpsrldq xmm0, 8
+ vpxor xmm7, xmm0
+
+ ;barrett reduction
+_barrett:
+ vmovdqa xmm1, xmm7
+ vmovdqa xmm10, [rk7]
+
+ vpclmulqdq xmm7, xmm10, 0
+ vmovdqa xmm2, xmm7
+ vpclmulqdq xmm7, xmm10, 0x10
+ vpslldq xmm2, 8
+ vpxor xmm7, xmm2
+ vpxor xmm7, xmm1
+ vpextrq rax, xmm7, 1
+
+_cleanup:
+ not rax
+
+
+%ifidn __OUTPUT_FORMAT__, win64
+ vmovdqa xmm6, [rsp + XMM_SAVE + 16*0]
+ vmovdqa xmm7, [rsp + XMM_SAVE + 16*1]
+ vmovdqa xmm8, [rsp + XMM_SAVE + 16*2]
+ vmovdqa xmm9, [rsp + XMM_SAVE + 16*3]
+ vmovdqa xmm10, [rsp + XMM_SAVE + 16*4]
+ vmovdqa xmm11, [rsp + XMM_SAVE + 16*5]
+ vmovdqa xmm12, [rsp + XMM_SAVE + 16*6]
+ vmovdqa xmm13, [rsp + XMM_SAVE + 16*7]
+ vmovdqa xmm14, [rsp + XMM_SAVE + 16*8]
+ vmovdqa xmm15, [rsp + XMM_SAVE + 16*9]
+%endif
+ add rsp, VARIABLE_OFFSET
+ ret
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+align 16
+_less_than_256:
+
+ ; check if there is enough buffer to be able to fold 16B at a time
+ cmp arg3, 32
+ jl _less_than_32
+
+ ; if there is, load the constants
+ vmovdqa xmm10, [rk1] ; rk1 and rk2 in xmm10
+
+ vmovq xmm0, arg1 ; get the initial crc value
+ vmovdqu xmm7, [arg2] ; load the plaintext
+ vpxor xmm7, xmm0
+
+ ; update the buffer pointer
+ add arg2, 16
+
+ ; update the counter. subtract 32 instead of 16 to save one instruction from the loop
+ sub arg3, 32
+
+ jmp _16B_reduction_loop
+
+align 16
+_less_than_32:
+ ; mov initial crc to the return value. this is necessary for zero-length buffers.
+ mov rax, arg1
+ test arg3, arg3
+ je _cleanup
+
+ vmovq xmm0, arg1 ; get the initial crc value
+
+ cmp arg3, 16
+ je _exact_16_left
+ jl _less_than_16_left
+
+ vmovdqu xmm7, [arg2] ; load the plaintext
+ vpxor xmm7, xmm0 ; xor the initial crc value
+ add arg2, 16
+ sub arg3, 16
+ vmovdqa xmm10, [rk1] ; rk1 and rk2 in xmm10
+ jmp _get_last_two_xmms
+
+
+align 16
+_less_than_16_left:
+ ; use stack space to load data less than 16 bytes, zero-out the 16B in memory first.
+
+ vpxor xmm1, xmm1
+ mov r11, rsp
+ vmovdqa [r11], xmm1
+
+ ; backup the counter value
+ mov r9, arg3
+ cmp arg3, 8
+ jl _less_than_8_left
+
+ ; load 8 Bytes
+ mov rax, [arg2]
+ mov [r11], rax
+ add r11, 8
+ sub arg3, 8
+ add arg2, 8
+_less_than_8_left:
+
+ cmp arg3, 4
+ jl _less_than_4_left
+
+ ; load 4 Bytes
+ mov eax, [arg2]
+ mov [r11], eax
+ add r11, 4
+ sub arg3, 4
+ add arg2, 4
+_less_than_4_left:
+
+ cmp arg3, 2
+ jl _less_than_2_left
+
+ ; load 2 Bytes
+ mov ax, [arg2]
+ mov [r11], ax
+ add r11, 2
+ sub arg3, 2
+ add arg2, 2
+_less_than_2_left:
+ cmp arg3, 1
+ jl _zero_left
+
+ ; load 1 Byte
+ mov al, [arg2]
+ mov [r11], al
+
+_zero_left:
+ vmovdqa xmm7, [rsp]
+ vpxor xmm7, xmm0 ; xor the initial crc value
+
+ lea rax,[pshufb_shf_table]
+
+ cmp r9, 8
+ jl _end_1to7
+
+_end_8to15:
+ vmovdqu xmm0, [rax + r9]
+ vpshufb xmm7,xmm0
+ jmp _128_done
+
+_end_1to7:
+ ; Left shift (8-length) bytes in XMM
+ vmovdqu xmm0, [rax + r9 + 8]
+ vpshufb xmm7,xmm0
+
+ jmp _barrett
+
+align 16
+_exact_16_left:
+ vmovdqu xmm7, [arg2]
+ vpxor xmm7, xmm0 ; xor the initial crc value
+
+ jmp _128_done
+
+section .data
+align 32
+
+%ifndef USE_CONSTS
+; precomputed constants
+rk_1: dq 0x45000000b0000000
+rk_2: dq 0x6b700000f5000000
+rk1: dq 0xf500000000000001
+rk2: dq 0x6b70000000000001
+rk3: dq 0xb001000000010000
+rk4: dq 0xf501b0000001b000
+rk5: dq 0xf500000000000001
+rk6: dq 0x0000000000000000
+rk7: dq 0xb000000000000001
+rk8: dq 0xb000000000000000
+rk9: dq 0xe014514514501501
+rk10: dq 0x771db6db6db71c71
+rk11: dq 0xa101101101110001
+rk12: dq 0x1ab1ab1ab1aab001
+rk13: dq 0xf445014445000001
+rk14: dq 0x6aab71daab700001
+rk15: dq 0xb100010100000001
+rk16: dq 0x01b001b1b0000001
+rk17: dq 0xe145150000000001
+rk18: dq 0x76db6c7000000001
+rk19: dq 0xa011000000000001
+rk20: dq 0x1b1ab00000000001
+
+rk_1b: dq 0xf500000000000001
+rk_2b: dq 0x6b70000000000001
+ dq 0x0000000000000000
+ dq 0x0000000000000000
+%else
+INCLUDE_CONSTS
+%endif
+
+pshufb_shf_table:
+; use these values for shift constants for the pshufb instruction
+; different alignments result in values as shown:
+; dq 0x8887868584838281, 0x008f8e8d8c8b8a89 ; shl 15 (16-1) / shr1
+; dq 0x8988878685848382, 0x01008f8e8d8c8b8a ; shl 14 (16-3) / shr2
+; dq 0x8a89888786858483, 0x0201008f8e8d8c8b ; shl 13 (16-4) / shr3
+; dq 0x8b8a898887868584, 0x030201008f8e8d8c ; shl 12 (16-4) / shr4
+; dq 0x8c8b8a8988878685, 0x04030201008f8e8d ; shl 11 (16-5) / shr5
+; dq 0x8d8c8b8a89888786, 0x0504030201008f8e ; shl 10 (16-6) / shr6
+; dq 0x8e8d8c8b8a898887, 0x060504030201008f ; shl 9 (16-7) / shr7
+; dq 0x8f8e8d8c8b8a8988, 0x0706050403020100 ; shl 8 (16-8) / shr8
+; dq 0x008f8e8d8c8b8a89, 0x0807060504030201 ; shl 7 (16-9) / shr9
+; dq 0x01008f8e8d8c8b8a, 0x0908070605040302 ; shl 6 (16-10) / shr10
+; dq 0x0201008f8e8d8c8b, 0x0a09080706050403 ; shl 5 (16-11) / shr11
+; dq 0x030201008f8e8d8c, 0x0b0a090807060504 ; shl 4 (16-12) / shr12
+; dq 0x04030201008f8e8d, 0x0c0b0a0908070605 ; shl 3 (16-13) / shr13
+; dq 0x0504030201008f8e, 0x0d0c0b0a09080706 ; shl 2 (16-14) / shr14
+; dq 0x060504030201008f, 0x0e0d0c0b0a090807 ; shl 1 (16-15) / shr15
+dq 0x8786858483828100, 0x8f8e8d8c8b8a8988
+dq 0x0706050403020100, 0x000e0d0c0b0a0908
+
+mask: dq 0xFFFFFFFFFFFFFFFF, 0x0000000000000000
+mask2: dq 0xFFFFFFFF00000000, 0xFFFFFFFFFFFFFFFF
+mask3: dq 0x8080808080808080, 0x8080808080808080
+
+%else ; Assembler doesn't understand these opcodes. Add empty symbol for windows.
+%ifidn __OUTPUT_FORMAT__, win64
+global no_ %+ FUNCTION_NAME
+no_ %+ FUNCTION_NAME %+ :
+%endif
+%endif ; (AS_FEATURE_LEVEL) >= 10
diff --git a/src/isa-l/crc/crc64_iso_refl_by8.asm b/src/isa-l/crc/crc64_iso_refl_by8.asm
new file mode 100644
index 000000000..b6dfcf0e4
--- /dev/null
+++ b/src/isa-l/crc/crc64_iso_refl_by8.asm
@@ -0,0 +1,545 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright(c) 2011-2016 Intel Corporation All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in
+; the documentation and/or other materials provided with the
+; distribution.
+; * Neither the name of Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived
+; from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Function API:
+; uint64_t crc64_iso_refl_by8(
+; uint64_t init_crc, //initial CRC value, 64 bits
+; const unsigned char *buf, //buffer pointer to calculate CRC on
+; uint64_t len //buffer length in bytes (64-bit data)
+; );
+;
+%include "reg_sizes.asm"
+
+%define fetch_dist 1024
+
+[bits 64]
+default rel
+
+section .text
+
+
+%ifidn __OUTPUT_FORMAT__, win64
+ %xdefine arg1 rcx
+ %xdefine arg2 rdx
+ %xdefine arg3 r8
+%else
+ %xdefine arg1 rdi
+ %xdefine arg2 rsi
+ %xdefine arg3 rdx
+%endif
+
+%define TMP 16*0
+%ifidn __OUTPUT_FORMAT__, win64
+ %define XMM_SAVE 16*2
+ %define VARIABLE_OFFSET 16*10+8
+%else
+ %define VARIABLE_OFFSET 16*2+8
+%endif
+
+
+align 16
+mk_global crc64_iso_refl_by8, function
+crc64_iso_refl_by8:
+ endbranch
+ ; uint64_t c = crc ^ 0xffffffff,ffffffffL;
+ not arg1
+ sub rsp, VARIABLE_OFFSET
+
+%ifidn __OUTPUT_FORMAT__, win64
+ ; push the xmm registers into the stack to maintain
+ movdqa [rsp + XMM_SAVE + 16*0], xmm6
+ movdqa [rsp + XMM_SAVE + 16*1], xmm7
+ movdqa [rsp + XMM_SAVE + 16*2], xmm8
+ movdqa [rsp + XMM_SAVE + 16*3], xmm9
+ movdqa [rsp + XMM_SAVE + 16*4], xmm10
+ movdqa [rsp + XMM_SAVE + 16*5], xmm11
+ movdqa [rsp + XMM_SAVE + 16*6], xmm12
+ movdqa [rsp + XMM_SAVE + 16*7], xmm13
+%endif
+
+ ; check if smaller than 256B
+ cmp arg3, 256
+
+ ; for sizes less than 256, we can't fold 128B at a time...
+ jl _less_than_256
+
+
+ ; load the initial crc value
+ movq xmm10, arg1 ; initial crc
+ ; receive the initial 128B data, xor the initial crc value
+ movdqu xmm0, [arg2+16*0]
+ movdqu xmm1, [arg2+16*1]
+ movdqu xmm2, [arg2+16*2]
+ movdqu xmm3, [arg2+16*3]
+ movdqu xmm4, [arg2+16*4]
+ movdqu xmm5, [arg2+16*5]
+ movdqu xmm6, [arg2+16*6]
+ movdqu xmm7, [arg2+16*7]
+
+ ; XOR the initial_crc value
+ pxor xmm0, xmm10
+ movdqa xmm10, [rk3] ;xmm10 has rk3 and rk4
+ ;imm value of pclmulqdq instruction will determine which constant to use
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+ ; we subtract 256 instead of 128 to save one instruction from the loop
+ sub arg3, 256
+
+ ; at this section of the code, there is 128*x+y (0<=y<128) bytes of buffer. The _fold_128_B_loop
+ ; loop will fold 128B at a time until we have 128+y Bytes of buffer
+
+
+ ; fold 128B at a time. This section of the code folds 8 xmm registers in parallel
+_fold_128_B_loop:
+
+ ; update the buffer pointer
+ add arg2, 128
+
+ prefetchnta [arg2+fetch_dist+0]
+ movdqu xmm9, [arg2+16*0]
+ movdqu xmm12, [arg2+16*1]
+ movdqa xmm8, xmm0
+ movdqa xmm13, xmm1
+ pclmulqdq xmm0, xmm10, 0x10
+ pclmulqdq xmm8, xmm10 , 0x1
+ pclmulqdq xmm1, xmm10, 0x10
+ pclmulqdq xmm13, xmm10 , 0x1
+ pxor xmm0, xmm9
+ xorps xmm0, xmm8
+ pxor xmm1, xmm12
+ xorps xmm1, xmm13
+
+ prefetchnta [arg2+fetch_dist+32]
+ movdqu xmm9, [arg2+16*2]
+ movdqu xmm12, [arg2+16*3]
+ movdqa xmm8, xmm2
+ movdqa xmm13, xmm3
+ pclmulqdq xmm2, xmm10, 0x10
+ pclmulqdq xmm8, xmm10 , 0x1
+ pclmulqdq xmm3, xmm10, 0x10
+ pclmulqdq xmm13, xmm10 , 0x1
+ pxor xmm2, xmm9
+ xorps xmm2, xmm8
+ pxor xmm3, xmm12
+ xorps xmm3, xmm13
+
+ prefetchnta [arg2+fetch_dist+64]
+ movdqu xmm9, [arg2+16*4]
+ movdqu xmm12, [arg2+16*5]
+ movdqa xmm8, xmm4
+ movdqa xmm13, xmm5
+ pclmulqdq xmm4, xmm10, 0x10
+ pclmulqdq xmm8, xmm10 , 0x1
+ pclmulqdq xmm5, xmm10, 0x10
+ pclmulqdq xmm13, xmm10 , 0x1
+ pxor xmm4, xmm9
+ xorps xmm4, xmm8
+ pxor xmm5, xmm12
+ xorps xmm5, xmm13
+
+ prefetchnta [arg2+fetch_dist+96]
+ movdqu xmm9, [arg2+16*6]
+ movdqu xmm12, [arg2+16*7]
+ movdqa xmm8, xmm6
+ movdqa xmm13, xmm7
+ pclmulqdq xmm6, xmm10, 0x10
+ pclmulqdq xmm8, xmm10 , 0x1
+ pclmulqdq xmm7, xmm10, 0x10
+ pclmulqdq xmm13, xmm10 , 0x1
+ pxor xmm6, xmm9
+ xorps xmm6, xmm8
+ pxor xmm7, xmm12
+ xorps xmm7, xmm13
+
+ sub arg3, 128
+
+ ; check if there is another 128B in the buffer to be able to fold
+ jge _fold_128_B_loop
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+ add arg2, 128
+ ; at this point, the buffer pointer is pointing at the last y Bytes of the buffer, where 0 <= y < 128
+ ; the 128B of folded data is in 8 of the xmm registers: xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7
+
+
+ ; fold the 8 xmm registers to 1 xmm register with different constants
+ ; xmm0 to xmm7
+ movdqa xmm10, [rk9]
+ movdqa xmm8, xmm0
+ pclmulqdq xmm0, xmm10, 0x1
+ pclmulqdq xmm8, xmm10, 0x10
+ pxor xmm7, xmm8
+ xorps xmm7, xmm0
+ ;xmm1 to xmm7
+ movdqa xmm10, [rk11]
+ movdqa xmm8, xmm1
+ pclmulqdq xmm1, xmm10, 0x1
+ pclmulqdq xmm8, xmm10, 0x10
+ pxor xmm7, xmm8
+ xorps xmm7, xmm1
+
+ movdqa xmm10, [rk13]
+ movdqa xmm8, xmm2
+ pclmulqdq xmm2, xmm10, 0x1
+ pclmulqdq xmm8, xmm10, 0x10
+ pxor xmm7, xmm8
+ pxor xmm7, xmm2
+
+ movdqa xmm10, [rk15]
+ movdqa xmm8, xmm3
+ pclmulqdq xmm3, xmm10, 0x1
+ pclmulqdq xmm8, xmm10, 0x10
+ pxor xmm7, xmm8
+ xorps xmm7, xmm3
+
+ movdqa xmm10, [rk17]
+ movdqa xmm8, xmm4
+ pclmulqdq xmm4, xmm10, 0x1
+ pclmulqdq xmm8, xmm10, 0x10
+ pxor xmm7, xmm8
+ pxor xmm7, xmm4
+
+ movdqa xmm10, [rk19]
+ movdqa xmm8, xmm5
+ pclmulqdq xmm5, xmm10, 0x1
+ pclmulqdq xmm8, xmm10, 0x10
+ pxor xmm7, xmm8
+ xorps xmm7, xmm5
+ ; xmm6 to xmm7
+ movdqa xmm10, [rk1]
+ movdqa xmm8, xmm6
+ pclmulqdq xmm6, xmm10, 0x1
+ pclmulqdq xmm8, xmm10, 0x10
+ pxor xmm7, xmm8
+ pxor xmm7, xmm6
+
+
+ ; instead of 128, we add 128-16 to the loop counter to save 1 instruction from the loop
+ ; instead of a cmp instruction, we use the negative flag with the jl instruction
+ add arg3, 128-16
+ jl _final_reduction_for_128
+
+ ; now we have 16+y bytes left to reduce. 16 Bytes is in register xmm7 and the rest is in memory
+ ; we can fold 16 bytes at a time if y>=16
+ ; continue folding 16B at a time
+
+_16B_reduction_loop:
+ movdqa xmm8, xmm7
+ pclmulqdq xmm7, xmm10, 0x1
+ pclmulqdq xmm8, xmm10, 0x10
+ pxor xmm7, xmm8
+ movdqu xmm0, [arg2]
+ pxor xmm7, xmm0
+ add arg2, 16
+ sub arg3, 16
+ ; instead of a cmp instruction, we utilize the flags with the jge instruction
+ ; equivalent of: cmp arg3, 16-16
+ ; check if there is any more 16B in the buffer to be able to fold
+ jge _16B_reduction_loop
+
+ ;now we have 16+z bytes left to reduce, where 0<= z < 16.
+ ;first, we reduce the data in the xmm7 register
+
+
+_final_reduction_for_128:
+ add arg3, 16
+ je _128_done
+ ; here we are getting data that is less than 16 bytes.
+ ; since we know that there was data before the pointer, we can offset the input pointer before the actual point, to receive exactly 16 bytes.
+ ; after that the registers need to be adjusted.
+_get_last_two_xmms:
+
+
+ movdqa xmm2, xmm7
+ movdqu xmm1, [arg2 - 16 + arg3]
+
+ ; get rid of the extra data that was loaded before
+ ; load the shift constant
+ lea rax, [pshufb_shf_table]
+ add rax, arg3
+ movdqu xmm0, [rax]
+
+
+ pshufb xmm7, xmm0
+ pxor xmm0, [mask3]
+ pshufb xmm2, xmm0
+
+ pblendvb xmm2, xmm1 ;xmm0 is implicit
+ ;;;;;;;;;;
+ movdqa xmm8, xmm7
+ pclmulqdq xmm7, xmm10, 0x1
+
+ pclmulqdq xmm8, xmm10, 0x10
+ pxor xmm7, xmm8
+ pxor xmm7, xmm2
+
+_128_done:
+ ; compute crc of a 128-bit value
+ movdqa xmm10, [rk5]
+ movdqa xmm0, xmm7
+
+ ;64b fold
+ pclmulqdq xmm7, xmm10, 0
+ psrldq xmm0, 8
+ pxor xmm7, xmm0
+
+ ;barrett reduction
+_barrett:
+ movdqa xmm1, xmm7
+ movdqa xmm10, [rk7]
+
+ pclmulqdq xmm7, xmm10, 0
+ movdqa xmm2, xmm7
+ pclmulqdq xmm7, xmm10, 0x10
+ pslldq xmm2, 8
+ pxor xmm7, xmm2
+ pxor xmm7, xmm1
+ pextrq rax, xmm7, 1
+
+_cleanup:
+ ; return c ^ 0xffffffff, ffffffffL;
+ not rax
+
+
+%ifidn __OUTPUT_FORMAT__, win64
+ movdqa xmm6, [rsp + XMM_SAVE + 16*0]
+ movdqa xmm7, [rsp + XMM_SAVE + 16*1]
+ movdqa xmm8, [rsp + XMM_SAVE + 16*2]
+ movdqa xmm9, [rsp + XMM_SAVE + 16*3]
+ movdqa xmm10, [rsp + XMM_SAVE + 16*4]
+ movdqa xmm11, [rsp + XMM_SAVE + 16*5]
+ movdqa xmm12, [rsp + XMM_SAVE + 16*6]
+ movdqa xmm13, [rsp + XMM_SAVE + 16*7]
+%endif
+ add rsp, VARIABLE_OFFSET
+ ret
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+align 16
+_less_than_256:
+
+ ; check if there is enough buffer to be able to fold 16B at a time
+ cmp arg3, 32
+ jl _less_than_32
+
+ ; if there is, load the constants
+ movdqa xmm10, [rk1] ; rk1 and rk2 in xmm10
+
+ movq xmm0, arg1 ; get the initial crc value
+ movdqu xmm7, [arg2] ; load the plaintext
+ pxor xmm7, xmm0
+
+ ; update the buffer pointer
+ add arg2, 16
+
+ ; update the counter. subtract 32 instead of 16 to save one instruction from the loop
+ sub arg3, 32
+
+ jmp _16B_reduction_loop
+
+align 16
+_less_than_32:
+ ; mov initial crc to the return value. this is necessary for zero-length buffers.
+ mov rax, arg1
+ test arg3, arg3
+ je _cleanup
+
+ movq xmm0, arg1 ; get the initial crc value
+
+ cmp arg3, 16
+ je _exact_16_left
+ jl _less_than_16_left
+
+ movdqu xmm7, [arg2] ; load the plaintext
+ pxor xmm7, xmm0 ; xor the initial crc value
+ add arg2, 16
+ sub arg3, 16
+ movdqa xmm10, [rk1] ; rk1 and rk2 in xmm10
+ jmp _get_last_two_xmms
+
+
+align 16
+_less_than_16_left:
+ ; use stack space to load data less than 16 bytes, zero-out the 16B in memory first.
+
+ pxor xmm1, xmm1
+ mov r11, rsp
+ movdqa [r11], xmm1
+
+ ; backup the counter value
+ mov r9, arg3
+ cmp arg3, 8
+ jl _less_than_8_left
+
+ ; load 8 Bytes
+ mov rax, [arg2]
+ mov [r11], rax
+ add r11, 8
+ sub arg3, 8
+ add arg2, 8
+_less_than_8_left:
+
+ cmp arg3, 4
+ jl _less_than_4_left
+
+ ; load 4 Bytes
+ mov eax, [arg2]
+ mov [r11], eax
+ add r11, 4
+ sub arg3, 4
+ add arg2, 4
+_less_than_4_left:
+
+ cmp arg3, 2
+ jl _less_than_2_left
+
+ ; load 2 Bytes
+ mov ax, [arg2]
+ mov [r11], ax
+ add r11, 2
+ sub arg3, 2
+ add arg2, 2
+_less_than_2_left:
+ cmp arg3, 1
+ jl _zero_left
+
+ ; load 1 Byte
+ mov al, [arg2]
+ mov [r11], al
+
+_zero_left:
+ movdqa xmm7, [rsp]
+ pxor xmm7, xmm0 ; xor the initial crc value
+
+ lea rax,[pshufb_shf_table]
+
+ cmp r9, 8
+ jl _end_1to7
+
+_end_8to15:
+ movdqu xmm0, [rax + r9]
+ pshufb xmm7,xmm0
+ jmp _128_done
+
+_end_1to7:
+ ; Left shift (8-length) bytes in XMM
+ movdqu xmm0, [rax + r9 + 8]
+ pshufb xmm7,xmm0
+
+ jmp _barrett
+
+align 16
+_exact_16_left:
+ movdqu xmm7, [arg2]
+ pxor xmm7, xmm0 ; xor the initial crc value
+
+ jmp _128_done
+
+section .data
+
+; precomputed constants
+align 16
+; rk7 = floor(2^128/Q)
+; rk8 = Q
+rk1:
+DQ 0xf500000000000001
+rk2:
+DQ 0x6b70000000000001
+rk3:
+DQ 0xb001000000010000
+rk4:
+DQ 0xf501b0000001b000
+rk5:
+DQ 0xf500000000000001
+rk6:
+DQ 0x0000000000000000
+rk7:
+DQ 0xb000000000000001
+rk8:
+DQ 0xb000000000000000
+rk9:
+DQ 0xe014514514501501
+rk10:
+DQ 0x771db6db6db71c71
+rk11:
+DQ 0xa101101101110001
+rk12:
+DQ 0x1ab1ab1ab1aab001
+rk13:
+DQ 0xf445014445000001
+rk14:
+DQ 0x6aab71daab700001
+rk15:
+DQ 0xb100010100000001
+rk16:
+DQ 0x01b001b1b0000001
+rk17:
+DQ 0xe145150000000001
+rk18:
+DQ 0x76db6c7000000001
+rk19:
+DQ 0xa011000000000001
+rk20:
+DQ 0x1b1ab00000000001
+
+pshufb_shf_table:
+; use these values for shift constants for the pshufb instruction
+; different alignments result in values as shown:
+; dq 0x8887868584838281, 0x008f8e8d8c8b8a89 ; shl 15 (16-1) / shr1
+; dq 0x8988878685848382, 0x01008f8e8d8c8b8a ; shl 14 (16-3) / shr2
+; dq 0x8a89888786858483, 0x0201008f8e8d8c8b ; shl 13 (16-4) / shr3
+; dq 0x8b8a898887868584, 0x030201008f8e8d8c ; shl 12 (16-4) / shr4
+; dq 0x8c8b8a8988878685, 0x04030201008f8e8d ; shl 11 (16-5) / shr5
+; dq 0x8d8c8b8a89888786, 0x0504030201008f8e ; shl 10 (16-6) / shr6
+; dq 0x8e8d8c8b8a898887, 0x060504030201008f ; shl 9 (16-7) / shr7
+; dq 0x8f8e8d8c8b8a8988, 0x0706050403020100 ; shl 8 (16-8) / shr8
+; dq 0x008f8e8d8c8b8a89, 0x0807060504030201 ; shl 7 (16-9) / shr9
+; dq 0x01008f8e8d8c8b8a, 0x0908070605040302 ; shl 6 (16-10) / shr10
+; dq 0x0201008f8e8d8c8b, 0x0a09080706050403 ; shl 5 (16-11) / shr11
+; dq 0x030201008f8e8d8c, 0x0b0a090807060504 ; shl 4 (16-12) / shr12
+; dq 0x04030201008f8e8d, 0x0c0b0a0908070605 ; shl 3 (16-13) / shr13
+; dq 0x0504030201008f8e, 0x0d0c0b0a09080706 ; shl 2 (16-14) / shr14
+; dq 0x060504030201008f, 0x0e0d0c0b0a090807 ; shl 1 (16-15) / shr15
+dq 0x8786858483828100, 0x8f8e8d8c8b8a8988
+dq 0x0706050403020100, 0x000e0d0c0b0a0908
+
+
+mask:
+dq 0xFFFFFFFFFFFFFFFF, 0x0000000000000000
+mask2:
+dq 0xFFFFFFFF00000000, 0xFFFFFFFFFFFFFFFF
+mask3:
+dq 0x8080808080808080, 0x8080808080808080
+
+;;; func core, ver, snum
+slversion crc64_iso_refl_by8, 01, 00, 0023
diff --git a/src/isa-l/crc/crc64_jones_norm_by16_10.asm b/src/isa-l/crc/crc64_jones_norm_by16_10.asm
new file mode 100644
index 000000000..2c9836b95
--- /dev/null
+++ b/src/isa-l/crc/crc64_jones_norm_by16_10.asm
@@ -0,0 +1,61 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright(c) 2011-2019 Intel Corporation All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in
+; the documentation and/or other materials provided with the
+; distribution.
+; * Neither the name of Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived
+; from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+%define FUNCTION_NAME crc64_jones_norm_by16_10
+%define USE_CONSTS
+%macro INCLUDE_CONSTS 0
+rk_1: dq 0x44ff5212394b1c52
+rk_2: dq 0x956d6cb0582122b2
+rk1: dq 0x4445ed2750017038
+rk2: dq 0x698b74157cfbd736
+rk3: dq 0x0cfcfb5101c4b775
+rk4: dq 0x65403fd47cbec866
+rk5: dq 0x4445ed2750017038
+rk6: dq 0x0000000000000000
+rk7: dq 0xddf3eeb298be6cf8
+rk8: dq 0xad93d23594c935a9
+rk9: dq 0xd8dc208e2ba527b4
+rk10: dq 0xf032cfec76bb2bc5
+rk11: dq 0xb536044f357f4238
+rk12: dq 0xfdbf104d938ba67a
+rk13: dq 0xeeddad9297a843e7
+rk14: dq 0x3550bce629466473
+rk15: dq 0x4e501e58ca43d25e
+rk16: dq 0x13c961588f27f643
+rk17: dq 0x3b60d00dcb1099bc
+rk18: dq 0x44bf1f468c53b9a3
+rk19: dq 0x96f2236e317179ee
+rk20: dq 0xf00839aa0dd64bac
+rk_1b: dq 0x4445ed2750017038
+rk_2b: dq 0x698b74157cfbd736
+ dq 0x0000000000000000
+ dq 0x0000000000000000
+%endm
+
+%include "crc64_iso_norm_by16_10.asm"
diff --git a/src/isa-l/crc/crc64_jones_norm_by8.asm b/src/isa-l/crc/crc64_jones_norm_by8.asm
new file mode 100644
index 000000000..0cf8b4ad9
--- /dev/null
+++ b/src/isa-l/crc/crc64_jones_norm_by8.asm
@@ -0,0 +1,582 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright(c) 2011-2016 Intel Corporation All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in
+; the documentation and/or other materials provided with the
+; distribution.
+; * Neither the name of Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived
+; from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+; Function API:
+; uint64_t crc64_jones_norm_by8(
+; uint64_t init_crc, //initial CRC value, 64 bits
+; const unsigned char *buf, //buffer pointer to calculate CRC on
+; uint64_t len //buffer length in bytes (64-bit data)
+; );
+;
+%include "reg_sizes.asm"
+
+%define fetch_dist 1024
+
+[bits 64]
+default rel
+
+section .text
+
+%ifidn __OUTPUT_FORMAT__, win64
+ %xdefine arg1 rcx
+ %xdefine arg2 rdx
+ %xdefine arg3 r8
+%else
+ %xdefine arg1 rdi
+ %xdefine arg2 rsi
+ %xdefine arg3 rdx
+%endif
+
+%define TMP 16*0
+%ifidn __OUTPUT_FORMAT__, win64
+ %define XMM_SAVE 16*2
+ %define VARIABLE_OFFSET 16*10+8
+%else
+ %define VARIABLE_OFFSET 16*2+8
+%endif
+align 16
+mk_global crc64_jones_norm_by8, function
+crc64_jones_norm_by8:
+ endbranch
+
+ not arg1 ;~init_crc
+
+ sub rsp,VARIABLE_OFFSET
+
+%ifidn __OUTPUT_FORMAT__, win64
+ ; push the xmm registers into the stack to maintain
+ movdqa [rsp + XMM_SAVE + 16*0], xmm6
+ movdqa [rsp + XMM_SAVE + 16*1], xmm7
+ movdqa [rsp + XMM_SAVE + 16*2], xmm8
+ movdqa [rsp + XMM_SAVE + 16*3], xmm9
+ movdqa [rsp + XMM_SAVE + 16*4], xmm10
+ movdqa [rsp + XMM_SAVE + 16*5], xmm11
+ movdqa [rsp + XMM_SAVE + 16*6], xmm12
+ movdqa [rsp + XMM_SAVE + 16*7], xmm13
+%endif
+
+
+ ; check if smaller than 256
+ cmp arg3, 256
+
+ ; for sizes less than 256, we can't fold 128B at a time...
+ jl _less_than_256
+
+
+ ; load the initial crc value
+ movq xmm10, arg1 ; initial crc
+
+ ; crc value does not need to be byte-reflected, but it needs to be moved to the high part of the register.
+ ; because data will be byte-reflected and will align with initial crc at correct place.
+ pslldq xmm10, 8
+
+ movdqa xmm11, [SHUF_MASK]
+ ; receive the initial 128B data, xor the initial crc value
+ movdqu xmm0, [arg2+16*0]
+ movdqu xmm1, [arg2+16*1]
+ movdqu xmm2, [arg2+16*2]
+ movdqu xmm3, [arg2+16*3]
+ movdqu xmm4, [arg2+16*4]
+ movdqu xmm5, [arg2+16*5]
+ movdqu xmm6, [arg2+16*6]
+ movdqu xmm7, [arg2+16*7]
+
+ pshufb xmm0, xmm11
+ ; XOR the initial_crc value
+ pxor xmm0, xmm10
+ pshufb xmm1, xmm11
+ pshufb xmm2, xmm11
+ pshufb xmm3, xmm11
+ pshufb xmm4, xmm11
+ pshufb xmm5, xmm11
+ pshufb xmm6, xmm11
+ pshufb xmm7, xmm11
+
+ movdqa xmm10, [rk3] ;xmm10 has rk3 and rk4
+ ;imm value of pclmulqdq instruction will determine which constant to use
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+ ; we subtract 256 instead of 128 to save one instruction from the loop
+ sub arg3, 256
+
+ ; at this section of the code, there is 128*x+y (0<=y<128) bytes of buffer. The _fold_128_B_loop
+ ; loop will fold 128B at a time until we have 128+y Bytes of buffer
+
+
+ ; fold 128B at a time. This section of the code folds 8 xmm registers in parallel
+_fold_128_B_loop:
+
+ ; update the buffer pointer
+ add arg2, 128 ; buf += 128;
+
+ prefetchnta [arg2+fetch_dist+0]
+ movdqu xmm9, [arg2+16*0]
+ movdqu xmm12, [arg2+16*1]
+ pshufb xmm9, xmm11
+ pshufb xmm12, xmm11
+ movdqa xmm8, xmm0
+ movdqa xmm13, xmm1
+ pclmulqdq xmm0, xmm10, 0x0
+ pclmulqdq xmm8, xmm10 , 0x11
+ pclmulqdq xmm1, xmm10, 0x0
+ pclmulqdq xmm13, xmm10 , 0x11
+ pxor xmm0, xmm9
+ xorps xmm0, xmm8
+ pxor xmm1, xmm12
+ xorps xmm1, xmm13
+
+ prefetchnta [arg2+fetch_dist+32]
+ movdqu xmm9, [arg2+16*2]
+ movdqu xmm12, [arg2+16*3]
+ pshufb xmm9, xmm11
+ pshufb xmm12, xmm11
+ movdqa xmm8, xmm2
+ movdqa xmm13, xmm3
+ pclmulqdq xmm2, xmm10, 0x0
+ pclmulqdq xmm8, xmm10 , 0x11
+ pclmulqdq xmm3, xmm10, 0x0
+ pclmulqdq xmm13, xmm10 , 0x11
+ pxor xmm2, xmm9
+ xorps xmm2, xmm8
+ pxor xmm3, xmm12
+ xorps xmm3, xmm13
+
+ prefetchnta [arg2+fetch_dist+64]
+ movdqu xmm9, [arg2+16*4]
+ movdqu xmm12, [arg2+16*5]
+ pshufb xmm9, xmm11
+ pshufb xmm12, xmm11
+ movdqa xmm8, xmm4
+ movdqa xmm13, xmm5
+ pclmulqdq xmm4, xmm10, 0x0
+ pclmulqdq xmm8, xmm10 , 0x11
+ pclmulqdq xmm5, xmm10, 0x0
+ pclmulqdq xmm13, xmm10 , 0x11
+ pxor xmm4, xmm9
+ xorps xmm4, xmm8
+ pxor xmm5, xmm12
+ xorps xmm5, xmm13
+
+ prefetchnta [arg2+fetch_dist+96]
+ movdqu xmm9, [arg2+16*6]
+ movdqu xmm12, [arg2+16*7]
+ pshufb xmm9, xmm11
+ pshufb xmm12, xmm11
+ movdqa xmm8, xmm6
+ movdqa xmm13, xmm7
+ pclmulqdq xmm6, xmm10, 0x0
+ pclmulqdq xmm8, xmm10 , 0x11
+ pclmulqdq xmm7, xmm10, 0x0
+ pclmulqdq xmm13, xmm10 , 0x11
+ pxor xmm6, xmm9
+ xorps xmm6, xmm8
+ pxor xmm7, xmm12
+ xorps xmm7, xmm13
+
+ sub arg3, 128
+
+ ; check if there is another 128B in the buffer to be able to fold
+ jge _fold_128_B_loop
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+ add arg2, 128
+ ; at this point, the buffer pointer is pointing at the last y Bytes of the buffer, where 0 <= y < 128
+ ; the 128B of folded data is in 8 of the xmm registers: xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7
+
+
+ ; fold the 8 xmm registers to 1 xmm register with different constants
+
+ movdqa xmm10, [rk9]
+ movdqa xmm8, xmm0
+ pclmulqdq xmm0, xmm10, 0x11
+ pclmulqdq xmm8, xmm10, 0x0
+ pxor xmm7, xmm8
+ xorps xmm7, xmm0
+
+ movdqa xmm10, [rk11]
+ movdqa xmm8, xmm1
+ pclmulqdq xmm1, xmm10, 0x11
+ pclmulqdq xmm8, xmm10, 0x0
+ pxor xmm7, xmm8
+ xorps xmm7, xmm1
+
+ movdqa xmm10, [rk13]
+ movdqa xmm8, xmm2
+ pclmulqdq xmm2, xmm10, 0x11
+ pclmulqdq xmm8, xmm10, 0x0
+ pxor xmm7, xmm8
+ pxor xmm7, xmm2
+
+ movdqa xmm10, [rk15]
+ movdqa xmm8, xmm3
+ pclmulqdq xmm3, xmm10, 0x11
+ pclmulqdq xmm8, xmm10, 0x0
+ pxor xmm7, xmm8
+ xorps xmm7, xmm3
+
+ movdqa xmm10, [rk17]
+ movdqa xmm8, xmm4
+ pclmulqdq xmm4, xmm10, 0x11
+ pclmulqdq xmm8, xmm10, 0x0
+ pxor xmm7, xmm8
+ pxor xmm7, xmm4
+
+ movdqa xmm10, [rk19]
+ movdqa xmm8, xmm5
+ pclmulqdq xmm5, xmm10, 0x11
+ pclmulqdq xmm8, xmm10, 0x0
+ pxor xmm7, xmm8
+ xorps xmm7, xmm5
+
+ movdqa xmm10, [rk1] ;xmm10 has rk1 and rk2
+
+ movdqa xmm8, xmm6
+ pclmulqdq xmm6, xmm10, 0x11
+ pclmulqdq xmm8, xmm10, 0x0
+ pxor xmm7, xmm8
+ pxor xmm7, xmm6
+
+
+ ; instead of 128, we add 112 to the loop counter to save 1 instruction from the loop
+ ; instead of a cmp instruction, we use the negative flag with the jl instruction
+ add arg3, 128-16
+ jl _final_reduction_for_128
+
+ ; now we have 16+y bytes left to reduce. 16 Bytes is in register xmm7 and the rest is in memory
+ ; we can fold 16 bytes at a time if y>=16
+ ; continue folding 16B at a time
+
+_16B_reduction_loop:
+ movdqa xmm8, xmm7
+ pclmulqdq xmm7, xmm10, 0x11
+ pclmulqdq xmm8, xmm10, 0x0
+ pxor xmm7, xmm8
+ movdqu xmm0, [arg2]
+ pshufb xmm0, xmm11
+ pxor xmm7, xmm0
+ add arg2, 16
+ sub arg3, 16
+ ; instead of a cmp instruction, we utilize the flags with the jge instruction
+ ; equivalent of: cmp arg3, 16-16
+ ; check if there is any more 16B in the buffer to be able to fold
+ jge _16B_reduction_loop
+
+ ;now we have 16+z bytes left to reduce, where 0<= z < 16.
+ ;first, we reduce the data in the xmm7 register
+
+
+_final_reduction_for_128:
+ ; check if any more data to fold. If not, compute the CRC of the final 128 bits
+ add arg3, 16
+ je _128_done
+
+ ; here we are getting data that is less than 16 bytes.
+ ; since we know that there was data before the pointer, we can offset the input pointer before the actual point, to receive exactly 16 bytes.
+ ; after that the registers need to be adjusted.
+_get_last_two_xmms:
+ movdqa xmm2, xmm7
+
+ movdqu xmm1, [arg2 - 16 + arg3]
+ pshufb xmm1, xmm11
+
+ ; get rid of the extra data that was loaded before
+ ; load the shift constant
+ lea rax, [pshufb_shf_table + 16]
+ sub rax, arg3
+ movdqu xmm0, [rax]
+
+ ; shift xmm2 to the left by arg3 bytes
+ pshufb xmm2, xmm0
+
+ ; shift xmm7 to the right by 16-arg3 bytes
+ pxor xmm0, [mask1]
+ pshufb xmm7, xmm0
+ pblendvb xmm1, xmm2 ;xmm0 is implicit
+
+ ; fold 16 Bytes
+ movdqa xmm2, xmm1
+ movdqa xmm8, xmm7
+ pclmulqdq xmm7, xmm10, 0x11
+ pclmulqdq xmm8, xmm10, 0x0
+ pxor xmm7, xmm8
+ pxor xmm7, xmm2
+
+_128_done:
+ ; compute crc of a 128-bit value
+ movdqa xmm10, [rk5] ; rk5 and rk6 in xmm10
+ movdqa xmm0, xmm7
+
+ ;64b fold
+ pclmulqdq xmm7, xmm10, 0x01 ; H*L
+ pslldq xmm0, 8
+ pxor xmm7, xmm0
+
+ ;barrett reduction
+_barrett:
+ movdqa xmm10, [rk7] ; rk7 and rk8 in xmm10
+ movdqa xmm0, xmm7
+
+ movdqa xmm1, xmm7
+ pand xmm1, [mask3]
+ pclmulqdq xmm7, xmm10, 0x01
+ pxor xmm7, xmm1
+
+ pclmulqdq xmm7, xmm10, 0x11
+ pxor xmm7, xmm0
+ pextrq rax, xmm7, 0
+
+_cleanup:
+ not rax
+%ifidn __OUTPUT_FORMAT__, win64
+ movdqa xmm6, [rsp + XMM_SAVE + 16*0]
+ movdqa xmm7, [rsp + XMM_SAVE + 16*1]
+ movdqa xmm8, [rsp + XMM_SAVE + 16*2]
+ movdqa xmm9, [rsp + XMM_SAVE + 16*3]
+ movdqa xmm10, [rsp + XMM_SAVE + 16*4]
+ movdqa xmm11, [rsp + XMM_SAVE + 16*5]
+ movdqa xmm12, [rsp + XMM_SAVE + 16*6]
+ movdqa xmm13, [rsp + XMM_SAVE + 16*7]
+%endif
+ add rsp, VARIABLE_OFFSET
+ ret
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+align 16
+_less_than_256:
+
+ ; check if there is enough buffer to be able to fold 16B at a time
+ cmp arg3, 32
+ jl _less_than_32
+ movdqa xmm11, [SHUF_MASK]
+
+ ; if there is, load the constants
+ movdqa xmm10, [rk1] ; rk1 and rk2 in xmm10
+
+ movq xmm0, arg1 ; get the initial crc value
+ pslldq xmm0, 8 ; align it to its correct place
+ movdqu xmm7, [arg2] ; load the plaintext
+ pshufb xmm7, xmm11 ; byte-reflect the plaintext
+ pxor xmm7, xmm0
+
+
+ ; update the buffer pointer
+ add arg2, 16
+
+ ; update the counter. subtract 32 instead of 16 to save one instruction from the loop
+ sub arg3, 32
+
+ jmp _16B_reduction_loop
+align 16
+_less_than_32:
+ ; mov initial crc to the return value. this is necessary for zero-length buffers.
+ mov rax, arg1
+ test arg3, arg3
+ je _cleanup
+
+ movdqa xmm11, [SHUF_MASK]
+
+ movq xmm0, arg1 ; get the initial crc value
+ pslldq xmm0, 8 ; align it to its correct place
+
+ cmp arg3, 16
+ je _exact_16_left
+ jl _less_than_16_left
+
+ movdqu xmm7, [arg2] ; load the plaintext
+ pshufb xmm7, xmm11 ; byte-reflect the plaintext
+ pxor xmm7, xmm0 ; xor the initial crc value
+ add arg2, 16
+ sub arg3, 16
+ movdqa xmm10, [rk1] ; rk1 and rk2 in xmm10
+ jmp _get_last_two_xmms
+align 16
+_less_than_16_left:
+ ; use stack space to load data less than 16 bytes, zero-out the 16B in memory first.
+ pxor xmm1, xmm1
+ mov r11, rsp
+ movdqa [r11], xmm1
+
+ ; backup the counter value
+ mov r9, arg3
+ cmp arg3, 8
+ jl _less_than_8_left
+
+ ; load 8 Bytes
+ mov rax, [arg2]
+ mov [r11], rax
+ add r11, 8
+ sub arg3, 8
+ add arg2, 8
+_less_than_8_left:
+
+ cmp arg3, 4
+ jl _less_than_4_left
+
+ ; load 4 Bytes
+ mov eax, [arg2]
+ mov [r11], eax
+ add r11, 4
+ sub arg3, 4
+ add arg2, 4
+_less_than_4_left:
+
+ cmp arg3, 2
+ jl _less_than_2_left
+
+ ; load 2 Bytes
+ mov ax, [arg2]
+ mov [r11], ax
+ add r11, 2
+ sub arg3, 2
+ add arg2, 2
+_less_than_2_left:
+ cmp arg3, 1
+ jl _zero_left
+
+ ; load 1 Byte
+ mov al, [arg2]
+ mov [r11], al
+_zero_left:
+ movdqa xmm7, [rsp]
+ pshufb xmm7, xmm11
+ pxor xmm7, xmm0 ; xor the initial crc value
+
+ ; shl r9, 4
+ lea rax, [pshufb_shf_table + 16]
+ sub rax, r9
+
+ cmp r9, 8
+ jl _end_1to7
+
+_end_8to15:
+ movdqu xmm0, [rax]
+ pxor xmm0, [mask1]
+
+ pshufb xmm7, xmm0
+ jmp _128_done
+
+_end_1to7:
+ ; Right shift (8-length) bytes in XMM
+ add rax, 8
+ movdqu xmm0, [rax]
+ pshufb xmm7,xmm0
+
+ jmp _barrett
+align 16
+_exact_16_left:
+ movdqu xmm7, [arg2]
+ pshufb xmm7, xmm11
+ pxor xmm7, xmm0 ; xor the initial crc value
+
+ jmp _128_done
+
+section .data
+
+; precomputed constants
+align 16
+
+rk1:
+DQ 0x4445ed2750017038
+rk2:
+DQ 0x698b74157cfbd736
+rk3:
+DQ 0x0cfcfb5101c4b775
+rk4:
+DQ 0x65403fd47cbec866
+rk5:
+DQ 0x4445ed2750017038
+rk6:
+DQ 0x0000000000000000
+rk7:
+DQ 0xddf3eeb298be6cf8
+rk8:
+DQ 0xad93d23594c935a9
+rk9:
+DQ 0xd8dc208e2ba527b4
+rk10:
+DQ 0xf032cfec76bb2bc5
+rk11:
+DQ 0xb536044f357f4238
+rk12:
+DQ 0xfdbf104d938ba67a
+rk13:
+DQ 0xeeddad9297a843e7
+rk14:
+DQ 0x3550bce629466473
+rk15:
+DQ 0x4e501e58ca43d25e
+rk16:
+DQ 0x13c961588f27f643
+rk17:
+DQ 0x3b60d00dcb1099bc
+rk18:
+DQ 0x44bf1f468c53b9a3
+rk19:
+DQ 0x96f2236e317179ee
+rk20:
+DQ 0xf00839aa0dd64bac
+
+mask1:
+dq 0x8080808080808080, 0x8080808080808080
+mask2:
+dq 0xFFFFFFFFFFFFFFFF, 0x00000000FFFFFFFF
+mask3:
+dq 0x0000000000000000, 0xFFFFFFFFFFFFFFFF
+
+SHUF_MASK:
+dq 0x08090A0B0C0D0E0F, 0x0001020304050607
+
+pshufb_shf_table:
+; use these values for shift constants for the pshufb instruction
+; different alignments result in values as shown:
+; dq 0x8887868584838281, 0x008f8e8d8c8b8a89 ; shl 15 (16-1) / shr1
+; dq 0x8988878685848382, 0x01008f8e8d8c8b8a ; shl 14 (16-3) / shr2
+; dq 0x8a89888786858483, 0x0201008f8e8d8c8b ; shl 13 (16-4) / shr3
+; dq 0x8b8a898887868584, 0x030201008f8e8d8c ; shl 12 (16-4) / shr4
+; dq 0x8c8b8a8988878685, 0x04030201008f8e8d ; shl 11 (16-5) / shr5
+; dq 0x8d8c8b8a89888786, 0x0504030201008f8e ; shl 10 (16-6) / shr6
+; dq 0x8e8d8c8b8a898887, 0x060504030201008f ; shl 9 (16-7) / shr7
+; dq 0x8f8e8d8c8b8a8988, 0x0706050403020100 ; shl 8 (16-8) / shr8
+; dq 0x008f8e8d8c8b8a89, 0x0807060504030201 ; shl 7 (16-9) / shr9
+; dq 0x01008f8e8d8c8b8a, 0x0908070605040302 ; shl 6 (16-10) / shr10
+; dq 0x0201008f8e8d8c8b, 0x0a09080706050403 ; shl 5 (16-11) / shr11
+; dq 0x030201008f8e8d8c, 0x0b0a090807060504 ; shl 4 (16-12) / shr12
+; dq 0x04030201008f8e8d, 0x0c0b0a0908070605 ; shl 3 (16-13) / shr13
+; dq 0x0504030201008f8e, 0x0d0c0b0a09080706 ; shl 2 (16-14) / shr14
+; dq 0x060504030201008f, 0x0e0d0c0b0a090807 ; shl 1 (16-15) / shr15
+dq 0x8786858483828100, 0x8f8e8d8c8b8a8988
+dq 0x0706050403020100, 0x0f0e0d0c0b0a0908
+dq 0x8080808080808080, 0x0f0e0d0c0b0a0908
+dq 0x8080808080808080, 0x8080808080808080
+
+;;; func core, ver, snum
+slversion crc64_jones_norm_by8, 01, 00, 0026
diff --git a/src/isa-l/crc/crc64_jones_refl_by16_10.asm b/src/isa-l/crc/crc64_jones_refl_by16_10.asm
new file mode 100644
index 000000000..39502729b
--- /dev/null
+++ b/src/isa-l/crc/crc64_jones_refl_by16_10.asm
@@ -0,0 +1,61 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright(c) 2011-2019 Intel Corporation All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in
+; the documentation and/or other materials provided with the
+; distribution.
+; * Neither the name of Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived
+; from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+%define FUNCTION_NAME crc64_jones_refl_by16_10
+%define USE_CONSTS
+%macro INCLUDE_CONSTS 0
+rk_1: dq 0x9471a5389095fe44
+rk_2: dq 0x9a8908341a6d6d52
+rk1: dq 0x381d0015c96f4444
+rk2: dq 0xd9d7be7d505da32c
+rk3: dq 0x768361524d29ed0b
+rk4: dq 0xcc26fa7c57f8054c
+rk5: dq 0x381d0015c96f4444
+rk6: dq 0x0000000000000000
+rk7: dq 0x3e6cfa329aef9f77
+rk8: dq 0x2b5926535897936a
+rk9: dq 0x5bc94ba8e2087636
+rk10: dq 0x6cf09c8f37710b75
+rk11: dq 0x3885fd59e440d95a
+rk12: dq 0xbccba3936411fb7e
+rk13: dq 0xe4dd0d81cbfce585
+rk14: dq 0xb715e37b96ed8633
+rk15: dq 0xf49784a634f014e4
+rk16: dq 0xaf86efb16d9ab4fb
+rk17: dq 0x7b3211a760160db8
+rk18: dq 0xa062b2319d66692f
+rk19: dq 0xef3d1d18ed889ed2
+rk20: dq 0x6ba4d760ab38201e
+rk_1b: dq 0x381d0015c96f4444
+rk_2b: dq 0xd9d7be7d505da32c
+ dq 0x0000000000000000
+ dq 0x0000000000000000
+%endm
+
+%include "crc64_iso_refl_by16_10.asm"
diff --git a/src/isa-l/crc/crc64_jones_refl_by8.asm b/src/isa-l/crc/crc64_jones_refl_by8.asm
new file mode 100644
index 000000000..eea9c8ddf
--- /dev/null
+++ b/src/isa-l/crc/crc64_jones_refl_by8.asm
@@ -0,0 +1,545 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright(c) 2011-2016 Intel Corporation All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in
+; the documentation and/or other materials provided with the
+; distribution.
+; * Neither the name of Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived
+; from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Function API:
+; uint64_t crc64_jones_refl_by8(
+; uint64_t init_crc, //initial CRC value, 64 bits
+; const unsigned char *buf, //buffer pointer to calculate CRC on
+; uint64_t len //buffer length in bytes (64-bit data)
+; );
+;
+%include "reg_sizes.asm"
+
+%define fetch_dist 1024
+
+[bits 64]
+default rel
+
+section .text
+
+
+%ifidn __OUTPUT_FORMAT__, win64
+ %xdefine arg1 rcx
+ %xdefine arg2 rdx
+ %xdefine arg3 r8
+%else
+ %xdefine arg1 rdi
+ %xdefine arg2 rsi
+ %xdefine arg3 rdx
+%endif
+
+%define TMP 16*0
+%ifidn __OUTPUT_FORMAT__, win64
+ %define XMM_SAVE 16*2
+ %define VARIABLE_OFFSET 16*10+8
+%else
+ %define VARIABLE_OFFSET 16*2+8
+%endif
+
+
+align 16
+mk_global crc64_jones_refl_by8, function
+crc64_jones_refl_by8:
+ endbranch
+ ; uint64_t c = crc ^ 0xffffffff,ffffffffL;
+ not arg1
+ sub rsp, VARIABLE_OFFSET
+
+%ifidn __OUTPUT_FORMAT__, win64
+ ; push the xmm registers into the stack to maintain
+ movdqa [rsp + XMM_SAVE + 16*0], xmm6
+ movdqa [rsp + XMM_SAVE + 16*1], xmm7
+ movdqa [rsp + XMM_SAVE + 16*2], xmm8
+ movdqa [rsp + XMM_SAVE + 16*3], xmm9
+ movdqa [rsp + XMM_SAVE + 16*4], xmm10
+ movdqa [rsp + XMM_SAVE + 16*5], xmm11
+ movdqa [rsp + XMM_SAVE + 16*6], xmm12
+ movdqa [rsp + XMM_SAVE + 16*7], xmm13
+%endif
+
+ ; check if smaller than 256B
+ cmp arg3, 256
+
+ ; for sizes less than 256, we can't fold 128B at a time...
+ jl _less_than_256
+
+
+ ; load the initial crc value
+ movq xmm10, arg1 ; initial crc
+ ; receive the initial 128B data, xor the initial crc value
+ movdqu xmm0, [arg2+16*0]
+ movdqu xmm1, [arg2+16*1]
+ movdqu xmm2, [arg2+16*2]
+ movdqu xmm3, [arg2+16*3]
+ movdqu xmm4, [arg2+16*4]
+ movdqu xmm5, [arg2+16*5]
+ movdqu xmm6, [arg2+16*6]
+ movdqu xmm7, [arg2+16*7]
+
+ ; XOR the initial_crc value
+ pxor xmm0, xmm10
+ movdqa xmm10, [rk3] ;xmm10 has rk3 and rk4
+ ;imm value of pclmulqdq instruction will determine which constant to use
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+ ; we subtract 256 instead of 128 to save one instruction from the loop
+ sub arg3, 256
+
+ ; at this section of the code, there is 128*x+y (0<=y<128) bytes of buffer. The _fold_128_B_loop
+ ; loop will fold 128B at a time until we have 128+y Bytes of buffer
+
+
+ ; fold 128B at a time. This section of the code folds 8 xmm registers in parallel
+_fold_128_B_loop:
+
+ ; update the buffer pointer
+ add arg2, 128
+
+ prefetchnta [arg2+fetch_dist+0]
+ movdqu xmm9, [arg2+16*0]
+ movdqu xmm12, [arg2+16*1]
+ movdqa xmm8, xmm0
+ movdqa xmm13, xmm1
+ pclmulqdq xmm0, xmm10, 0x10
+ pclmulqdq xmm8, xmm10 , 0x1
+ pclmulqdq xmm1, xmm10, 0x10
+ pclmulqdq xmm13, xmm10 , 0x1
+ pxor xmm0, xmm9
+ xorps xmm0, xmm8
+ pxor xmm1, xmm12
+ xorps xmm1, xmm13
+
+ prefetchnta [arg2+fetch_dist+32]
+ movdqu xmm9, [arg2+16*2]
+ movdqu xmm12, [arg2+16*3]
+ movdqa xmm8, xmm2
+ movdqa xmm13, xmm3
+ pclmulqdq xmm2, xmm10, 0x10
+ pclmulqdq xmm8, xmm10 , 0x1
+ pclmulqdq xmm3, xmm10, 0x10
+ pclmulqdq xmm13, xmm10 , 0x1
+ pxor xmm2, xmm9
+ xorps xmm2, xmm8
+ pxor xmm3, xmm12
+ xorps xmm3, xmm13
+
+ prefetchnta [arg2+fetch_dist+64]
+ movdqu xmm9, [arg2+16*4]
+ movdqu xmm12, [arg2+16*5]
+ movdqa xmm8, xmm4
+ movdqa xmm13, xmm5
+ pclmulqdq xmm4, xmm10, 0x10
+ pclmulqdq xmm8, xmm10 , 0x1
+ pclmulqdq xmm5, xmm10, 0x10
+ pclmulqdq xmm13, xmm10 , 0x1
+ pxor xmm4, xmm9
+ xorps xmm4, xmm8
+ pxor xmm5, xmm12
+ xorps xmm5, xmm13
+
+ prefetchnta [arg2+fetch_dist+96]
+ movdqu xmm9, [arg2+16*6]
+ movdqu xmm12, [arg2+16*7]
+ movdqa xmm8, xmm6
+ movdqa xmm13, xmm7
+ pclmulqdq xmm6, xmm10, 0x10
+ pclmulqdq xmm8, xmm10 , 0x1
+ pclmulqdq xmm7, xmm10, 0x10
+ pclmulqdq xmm13, xmm10 , 0x1
+ pxor xmm6, xmm9
+ xorps xmm6, xmm8
+ pxor xmm7, xmm12
+ xorps xmm7, xmm13
+
+ sub arg3, 128
+
+ ; check if there is another 128B in the buffer to be able to fold
+ jge _fold_128_B_loop
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+ add arg2, 128
+ ; at this point, the buffer pointer is pointing at the last y Bytes of the buffer, where 0 <= y < 128
+ ; the 128B of folded data is in 8 of the xmm registers: xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7
+
+
+ ; fold the 8 xmm registers to 1 xmm register with different constants
+ ; xmm0 to xmm7
+ movdqa xmm10, [rk9]
+ movdqa xmm8, xmm0
+ pclmulqdq xmm0, xmm10, 0x1
+ pclmulqdq xmm8, xmm10, 0x10
+ pxor xmm7, xmm8
+ xorps xmm7, xmm0
+ ;xmm1 to xmm7
+ movdqa xmm10, [rk11]
+ movdqa xmm8, xmm1
+ pclmulqdq xmm1, xmm10, 0x1
+ pclmulqdq xmm8, xmm10, 0x10
+ pxor xmm7, xmm8
+ xorps xmm7, xmm1
+
+ movdqa xmm10, [rk13]
+ movdqa xmm8, xmm2
+ pclmulqdq xmm2, xmm10, 0x1
+ pclmulqdq xmm8, xmm10, 0x10
+ pxor xmm7, xmm8
+ pxor xmm7, xmm2
+
+ movdqa xmm10, [rk15]
+ movdqa xmm8, xmm3
+ pclmulqdq xmm3, xmm10, 0x1
+ pclmulqdq xmm8, xmm10, 0x10
+ pxor xmm7, xmm8
+ xorps xmm7, xmm3
+
+ movdqa xmm10, [rk17]
+ movdqa xmm8, xmm4
+ pclmulqdq xmm4, xmm10, 0x1
+ pclmulqdq xmm8, xmm10, 0x10
+ pxor xmm7, xmm8
+ pxor xmm7, xmm4
+
+ movdqa xmm10, [rk19]
+ movdqa xmm8, xmm5
+ pclmulqdq xmm5, xmm10, 0x1
+ pclmulqdq xmm8, xmm10, 0x10
+ pxor xmm7, xmm8
+ xorps xmm7, xmm5
+ ; xmm6 to xmm7
+ movdqa xmm10, [rk1]
+ movdqa xmm8, xmm6
+ pclmulqdq xmm6, xmm10, 0x1
+ pclmulqdq xmm8, xmm10, 0x10
+ pxor xmm7, xmm8
+ pxor xmm7, xmm6
+
+
+ ; instead of 128, we add 128-16 to the loop counter to save 1 instruction from the loop
+ ; instead of a cmp instruction, we use the negative flag with the jl instruction
+ add arg3, 128-16
+ jl _final_reduction_for_128
+
+ ; now we have 16+y bytes left to reduce. 16 Bytes is in register xmm7 and the rest is in memory
+ ; we can fold 16 bytes at a time if y>=16
+ ; continue folding 16B at a time
+
+_16B_reduction_loop:
+ movdqa xmm8, xmm7
+ pclmulqdq xmm7, xmm10, 0x1
+ pclmulqdq xmm8, xmm10, 0x10
+ pxor xmm7, xmm8
+ movdqu xmm0, [arg2]
+ pxor xmm7, xmm0
+ add arg2, 16
+ sub arg3, 16
+ ; instead of a cmp instruction, we utilize the flags with the jge instruction
+ ; equivalent of: cmp arg3, 16-16
+ ; check if there is any more 16B in the buffer to be able to fold
+ jge _16B_reduction_loop
+
+ ;now we have 16+z bytes left to reduce, where 0<= z < 16.
+ ;first, we reduce the data in the xmm7 register
+
+
+_final_reduction_for_128:
+ add arg3, 16
+ je _128_done
+ ; here we are getting data that is less than 16 bytes.
+ ; since we know that there was data before the pointer, we can offset the input pointer before the actual point, to receive exactly 16 bytes.
+ ; after that the registers need to be adjusted.
+_get_last_two_xmms:
+
+
+ movdqa xmm2, xmm7
+ movdqu xmm1, [arg2 - 16 + arg3]
+
+ ; get rid of the extra data that was loaded before
+ ; load the shift constant
+ lea rax, [pshufb_shf_table]
+ add rax, arg3
+ movdqu xmm0, [rax]
+
+
+ pshufb xmm7, xmm0
+ pxor xmm0, [mask3]
+ pshufb xmm2, xmm0
+
+ pblendvb xmm2, xmm1 ;xmm0 is implicit
+ ;;;;;;;;;;
+ movdqa xmm8, xmm7
+ pclmulqdq xmm7, xmm10, 0x1
+
+ pclmulqdq xmm8, xmm10, 0x10
+ pxor xmm7, xmm8
+ pxor xmm7, xmm2
+
+_128_done:
+ ; compute crc of a 128-bit value
+ movdqa xmm10, [rk5]
+ movdqa xmm0, xmm7
+
+ ;64b fold
+ pclmulqdq xmm7, xmm10, 0
+ psrldq xmm0, 8
+ pxor xmm7, xmm0
+
+ ;barrett reduction
+_barrett:
+ movdqa xmm1, xmm7
+ movdqa xmm10, [rk7]
+
+ pclmulqdq xmm7, xmm10, 0
+ movdqa xmm2, xmm7
+ pclmulqdq xmm7, xmm10, 0x10
+ pslldq xmm2, 8
+ pxor xmm7, xmm2
+ pxor xmm7, xmm1
+ pextrq rax, xmm7, 1
+
+_cleanup:
+ ; return c ^ 0xffffffff, ffffffffL;
+ not rax
+
+
+%ifidn __OUTPUT_FORMAT__, win64
+ movdqa xmm6, [rsp + XMM_SAVE + 16*0]
+ movdqa xmm7, [rsp + XMM_SAVE + 16*1]
+ movdqa xmm8, [rsp + XMM_SAVE + 16*2]
+ movdqa xmm9, [rsp + XMM_SAVE + 16*3]
+ movdqa xmm10, [rsp + XMM_SAVE + 16*4]
+ movdqa xmm11, [rsp + XMM_SAVE + 16*5]
+ movdqa xmm12, [rsp + XMM_SAVE + 16*6]
+ movdqa xmm13, [rsp + XMM_SAVE + 16*7]
+%endif
+ add rsp, VARIABLE_OFFSET
+ ret
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+align 16
+_less_than_256:
+
+ ; check if there is enough buffer to be able to fold 16B at a time
+ cmp arg3, 32
+ jl _less_than_32
+
+ ; if there is, load the constants
+ movdqa xmm10, [rk1] ; rk1 and rk2 in xmm10
+
+ movq xmm0, arg1 ; get the initial crc value
+ movdqu xmm7, [arg2] ; load the plaintext
+ pxor xmm7, xmm0
+
+ ; update the buffer pointer
+ add arg2, 16
+
+ ; update the counter. subtract 32 instead of 16 to save one instruction from the loop
+ sub arg3, 32
+
+ jmp _16B_reduction_loop
+
+align 16
+_less_than_32:
+ ; mov initial crc to the return value. this is necessary for zero-length buffers.
+ mov rax, arg1
+ test arg3, arg3
+ je _cleanup
+
+ movq xmm0, arg1 ; get the initial crc value
+
+ cmp arg3, 16
+ je _exact_16_left
+ jl _less_than_16_left
+
+ movdqu xmm7, [arg2] ; load the plaintext
+ pxor xmm7, xmm0 ; xor the initial crc value
+ add arg2, 16
+ sub arg3, 16
+ movdqa xmm10, [rk1] ; rk1 and rk2 in xmm10
+ jmp _get_last_two_xmms
+
+
+align 16
+_less_than_16_left:
+ ; use stack space to load data less than 16 bytes, zero-out the 16B in memory first.
+
+ pxor xmm1, xmm1
+ mov r11, rsp
+ movdqa [r11], xmm1
+
+ ; backup the counter value
+ mov r9, arg3
+ cmp arg3, 8
+ jl _less_than_8_left
+
+ ; load 8 Bytes
+ mov rax, [arg2]
+ mov [r11], rax
+ add r11, 8
+ sub arg3, 8
+ add arg2, 8
+_less_than_8_left:
+
+ cmp arg3, 4
+ jl _less_than_4_left
+
+ ; load 4 Bytes
+ mov eax, [arg2]
+ mov [r11], eax
+ add r11, 4
+ sub arg3, 4
+ add arg2, 4
+_less_than_4_left:
+
+ cmp arg3, 2
+ jl _less_than_2_left
+
+ ; load 2 Bytes
+ mov ax, [arg2]
+ mov [r11], ax
+ add r11, 2
+ sub arg3, 2
+ add arg2, 2
+_less_than_2_left:
+ cmp arg3, 1
+ jl _zero_left
+
+ ; load 1 Byte
+ mov al, [arg2]
+ mov [r11], al
+
+_zero_left:
+ movdqa xmm7, [rsp]
+ pxor xmm7, xmm0 ; xor the initial crc value
+
+ lea rax,[pshufb_shf_table]
+
+ cmp r9, 8
+ jl _end_1to7
+
+_end_8to15:
+ movdqu xmm0, [rax + r9]
+ pshufb xmm7,xmm0
+ jmp _128_done
+
+_end_1to7:
+ ; Left shift (8-length) bytes in XMM
+ movdqu xmm0, [rax + r9 + 8]
+ pshufb xmm7,xmm0
+
+ jmp _barrett
+
+align 16
+_exact_16_left:
+ movdqu xmm7, [arg2]
+ pxor xmm7, xmm0 ; xor the initial crc value
+
+ jmp _128_done
+
+section .data
+
+; precomputed constants
+align 16
+; rk7 = floor(2^128/Q)
+; rk8 = Q
+rk1:
+DQ 0x381d0015c96f4444
+rk2:
+DQ 0xd9d7be7d505da32c
+rk3:
+DQ 0x768361524d29ed0b
+rk4:
+DQ 0xcc26fa7c57f8054c
+rk5:
+DQ 0x381d0015c96f4444
+rk6:
+DQ 0x0000000000000000
+rk7:
+DQ 0x3e6cfa329aef9f77
+rk8:
+DQ 0x2b5926535897936a
+rk9:
+DQ 0x5bc94ba8e2087636
+rk10:
+DQ 0x6cf09c8f37710b75
+rk11:
+DQ 0x3885fd59e440d95a
+rk12:
+DQ 0xbccba3936411fb7e
+rk13:
+DQ 0xe4dd0d81cbfce585
+rk14:
+DQ 0xb715e37b96ed8633
+rk15:
+DQ 0xf49784a634f014e4
+rk16:
+DQ 0xaf86efb16d9ab4fb
+rk17:
+DQ 0x7b3211a760160db8
+rk18:
+DQ 0xa062b2319d66692f
+rk19:
+DQ 0xef3d1d18ed889ed2
+rk20:
+DQ 0x6ba4d760ab38201e
+
+pshufb_shf_table:
+; use these values for shift constants for the pshufb instruction
+; different alignments result in values as shown:
+; dq 0x8887868584838281, 0x008f8e8d8c8b8a89 ; shl 15 (16-1) / shr1
+; dq 0x8988878685848382, 0x01008f8e8d8c8b8a ; shl 14 (16-3) / shr2
+; dq 0x8a89888786858483, 0x0201008f8e8d8c8b ; shl 13 (16-4) / shr3
+; dq 0x8b8a898887868584, 0x030201008f8e8d8c ; shl 12 (16-4) / shr4
+; dq 0x8c8b8a8988878685, 0x04030201008f8e8d ; shl 11 (16-5) / shr5
+; dq 0x8d8c8b8a89888786, 0x0504030201008f8e ; shl 10 (16-6) / shr6
+; dq 0x8e8d8c8b8a898887, 0x060504030201008f ; shl 9 (16-7) / shr7
+; dq 0x8f8e8d8c8b8a8988, 0x0706050403020100 ; shl 8 (16-8) / shr8
+; dq 0x008f8e8d8c8b8a89, 0x0807060504030201 ; shl 7 (16-9) / shr9
+; dq 0x01008f8e8d8c8b8a, 0x0908070605040302 ; shl 6 (16-10) / shr10
+; dq 0x0201008f8e8d8c8b, 0x0a09080706050403 ; shl 5 (16-11) / shr11
+; dq 0x030201008f8e8d8c, 0x0b0a090807060504 ; shl 4 (16-12) / shr12
+; dq 0x04030201008f8e8d, 0x0c0b0a0908070605 ; shl 3 (16-13) / shr13
+; dq 0x0504030201008f8e, 0x0d0c0b0a09080706 ; shl 2 (16-14) / shr14
+; dq 0x060504030201008f, 0x0e0d0c0b0a090807 ; shl 1 (16-15) / shr15
+dq 0x8786858483828100, 0x8f8e8d8c8b8a8988
+dq 0x0706050403020100, 0x000e0d0c0b0a0908
+
+
+mask:
+dq 0xFFFFFFFFFFFFFFFF, 0x0000000000000000
+mask2:
+dq 0xFFFFFFFF00000000, 0xFFFFFFFFFFFFFFFF
+mask3:
+dq 0x8080808080808080, 0x8080808080808080
+
+;;; func core, ver, snum
+slversion crc64_jones_refl_by8, 01, 00, 0029
diff --git a/src/isa-l/crc/crc64_multibinary.asm b/src/isa-l/crc/crc64_multibinary.asm
new file mode 100644
index 000000000..3e06a0ecb
--- /dev/null
+++ b/src/isa-l/crc/crc64_multibinary.asm
@@ -0,0 +1,92 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright(c) 2011-2016 Intel Corporation All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in
+; the documentation and/or other materials provided with the
+; distribution.
+; * Neither the name of Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived
+; from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;
+;;; uint64_t crc64_func(uint64_t init_crc, const unsigned char *buf, uint64_t len);
+;;;
+
+default rel
+[bits 64]
+
+%include "reg_sizes.asm"
+
+extern crc64_ecma_refl_by8
+extern crc64_ecma_refl_base
+
+extern crc64_ecma_norm_by8
+extern crc64_ecma_norm_base
+
+extern crc64_iso_refl_by8
+extern crc64_iso_refl_base
+
+extern crc64_iso_norm_by8
+extern crc64_iso_norm_base
+
+extern crc64_jones_refl_by8
+extern crc64_jones_refl_base
+
+extern crc64_jones_norm_by8
+extern crc64_jones_norm_base
+
+%if (AS_FEATURE_LEVEL) >= 10
+extern crc64_iso_refl_by16_10
+extern crc64_iso_norm_by16_10
+extern crc64_jones_refl_by16_10
+extern crc64_jones_norm_by16_10
+extern crc64_ecma_refl_by16_10
+extern crc64_ecma_norm_by16_10
+%endif
+
+section .text
+
+%include "multibinary.asm"
+
+mbin_interface crc64_ecma_refl
+mbin_dispatch_init7 crc64_ecma_refl, crc64_ecma_refl_base, crc64_ecma_refl_by8, crc64_ecma_refl_by8, crc64_ecma_refl_by8, crc64_ecma_refl_by8, crc64_ecma_refl_by16_10
+mbin_interface crc64_ecma_norm
+mbin_dispatch_init7 crc64_ecma_norm, crc64_ecma_norm_base, crc64_ecma_norm_by8, crc64_ecma_norm_by8, crc64_ecma_norm_by8, crc64_ecma_norm_by8, crc64_ecma_norm_by16_10
+
+mbin_interface crc64_iso_refl
+mbin_dispatch_init7 crc64_iso_refl, crc64_iso_refl_base, crc64_iso_refl_by8, crc64_iso_refl_by8, crc64_iso_refl_by8, crc64_iso_refl_by8, crc64_iso_refl_by16_10
+mbin_interface crc64_iso_norm
+mbin_dispatch_init7 crc64_iso_norm, crc64_iso_norm_base, crc64_iso_norm_by8, crc64_iso_norm_by8, crc64_iso_norm_by8, crc64_iso_norm_by8, crc64_iso_norm_by16_10
+
+mbin_interface crc64_jones_refl
+mbin_dispatch_init7 crc64_jones_refl, crc64_jones_refl_base, crc64_jones_refl_by8, crc64_jones_refl_by8, crc64_jones_refl_by8, crc64_jones_refl_by8, crc64_jones_refl_by16_10
+mbin_interface crc64_jones_norm
+mbin_dispatch_init7 crc64_jones_norm, crc64_jones_norm_base, crc64_jones_norm_by8, crc64_jones_norm_by8, crc64_jones_norm_by8, crc64_jones_norm_by8, crc64_jones_norm_by16_10
+
+;;; func core, ver, snum
+slversion crc64_ecma_refl, 00, 00, 001b
+slversion crc64_ecma_norm, 00, 00, 0018
+slversion crc64_iso_refl, 00, 00, 0021
+slversion crc64_iso_norm, 00, 00, 001e
+slversion crc64_jones_refl, 00, 00, 0027
+slversion crc64_jones_norm, 00, 00, 0024
diff --git a/src/isa-l/crc/crc64_ref.h b/src/isa-l/crc/crc64_ref.h
new file mode 100644
index 000000000..b30f63866
--- /dev/null
+++ b/src/isa-l/crc/crc64_ref.h
@@ -0,0 +1,148 @@
+/**********************************************************************
+ Copyright(c) 2011-2016 Intel Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+#ifndef _CRC64_REF_H
+#define _CRC64_REF_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "crc64.h"
+
+#ifdef _MSC_VER
+# define inline __inline
+#endif
+
+#define MAX_ITER 8
+
+// crc64_ecma reference function, slow crc64 from the definition.
+static inline uint64_t crc64_ecma_refl_ref(uint64_t seed, const uint8_t * buf, uint64_t len)
+{
+ uint64_t rem = ~seed;
+ unsigned int i, j;
+
+ uint64_t poly = 0xC96C5795D7870F42ULL; // ECMA-182 standard reflected
+
+ for (i = 0; i < len; i++) {
+ rem = rem ^ (uint64_t) buf[i];
+ for (j = 0; j < MAX_ITER; j++) {
+ rem = (rem & 0x1ULL ? poly : 0) ^ (rem >> 1);
+ }
+ }
+ return ~rem;
+}
+
+static inline uint64_t crc64_ecma_norm_ref(uint64_t seed, const uint8_t * buf, uint64_t len)
+{
+ uint64_t rem = ~seed;
+ unsigned int i, j;
+
+ uint64_t poly = 0x42F0E1EBA9EA3693ULL; // ECMA-182 standard
+
+ for (i = 0; i < len; i++) {
+ rem = rem ^ ((uint64_t) buf[i] << 56);
+ for (j = 0; j < MAX_ITER; j++) {
+ rem = (rem & 0x8000000000000000ULL ? poly : 0) ^ (rem << 1);
+ }
+ }
+ return ~rem;
+}
+
+// crc64_iso reference function, slow crc64 from the definition.
+static inline uint64_t crc64_iso_refl_ref(uint64_t seed, const uint8_t * buf, uint64_t len)
+{
+ uint64_t rem = ~seed;
+ unsigned int i, j;
+
+ uint64_t poly = 0xD800000000000000ULL; // ISO standard reflected
+
+ for (i = 0; i < len; i++) {
+ rem = rem ^ (uint64_t) buf[i];
+ for (j = 0; j < MAX_ITER; j++) {
+ rem = (rem & 0x1ULL ? poly : 0) ^ (rem >> 1);
+ }
+ }
+ return ~rem;
+}
+
+static inline uint64_t crc64_iso_norm_ref(uint64_t seed, const uint8_t * buf, uint64_t len)
+{
+ uint64_t rem = ~seed;
+ unsigned int i, j;
+
+ uint64_t poly = 0x000000000000001BULL; // ISO standard
+
+ for (i = 0; i < len; i++) {
+ rem = rem ^ ((uint64_t) buf[i] << 56);
+ for (j = 0; j < MAX_ITER; j++) {
+ rem = (rem & 0x8000000000000000ULL ? poly : 0) ^ (rem << 1);
+ }
+ }
+ return ~rem;
+}
+
+// crc64_jones reference function, slow crc64 from the definition.
+static inline uint64_t crc64_jones_refl_ref(uint64_t seed, const uint8_t * buf, uint64_t len)
+{
+ uint64_t rem = ~seed;
+ unsigned int i, j;
+
+ uint64_t poly = 0x95ac9329ac4bc9b5ULL; // Jones coefficients reflected
+
+ for (i = 0; i < len; i++) {
+ rem = rem ^ (uint64_t) buf[i];
+ for (j = 0; j < MAX_ITER; j++) {
+ rem = (rem & 0x1ULL ? poly : 0) ^ (rem >> 1);
+ }
+ }
+ return ~rem;
+}
+
+static inline uint64_t crc64_jones_norm_ref(uint64_t seed, const uint8_t * buf, uint64_t len)
+{
+ uint64_t rem = ~seed;
+ unsigned int i, j;
+
+ uint64_t poly = 0xad93d23594c935a9ULL; // Jones coefficients
+
+ for (i = 0; i < len; i++) {
+ rem = rem ^ ((uint64_t) buf[i] << 56);
+ for (j = 0; j < MAX_ITER; j++) {
+ rem = (rem & 0x8000000000000000ULL ? poly : 0) ^ (rem << 1);
+ }
+ }
+ return ~rem;
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/isa-l/crc/crc_base.c b/src/isa-l/crc/crc_base.c
new file mode 100644
index 000000000..d1eb2d22e
--- /dev/null
+++ b/src/isa-l/crc/crc_base.c
@@ -0,0 +1,351 @@
+/**********************************************************************
+ Copyright(c) 2011-2015 Intel Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+#include "crc.h"
+
+static const uint16_t crc16tab[256] = {
+ 0x0000, 0x8BB7, 0x9CD9, 0x176E, 0xB205, 0x39B2, 0x2EDC, 0xA56B,
+ 0xEFBD, 0x640A, 0x7364, 0xF8D3, 0x5DB8, 0xD60F, 0xC161, 0x4AD6,
+ 0x54CD, 0xDF7A, 0xC814, 0x43A3, 0xE6C8, 0x6D7F, 0x7A11, 0xF1A6,
+ 0xBB70, 0x30C7, 0x27A9, 0xAC1E, 0x0975, 0x82C2, 0x95AC, 0x1E1B,
+ 0xA99A, 0x222D, 0x3543, 0xBEF4, 0x1B9F, 0x9028, 0x8746, 0x0CF1,
+ 0x4627, 0xCD90, 0xDAFE, 0x5149, 0xF422, 0x7F95, 0x68FB, 0xE34C,
+ 0xFD57, 0x76E0, 0x618E, 0xEA39, 0x4F52, 0xC4E5, 0xD38B, 0x583C,
+ 0x12EA, 0x995D, 0x8E33, 0x0584, 0xA0EF, 0x2B58, 0x3C36, 0xB781,
+ 0xD883, 0x5334, 0x445A, 0xCFED, 0x6A86, 0xE131, 0xF65F, 0x7DE8,
+ 0x373E, 0xBC89, 0xABE7, 0x2050, 0x853B, 0x0E8C, 0x19E2, 0x9255,
+ 0x8C4E, 0x07F9, 0x1097, 0x9B20, 0x3E4B, 0xB5FC, 0xA292, 0x2925,
+ 0x63F3, 0xE844, 0xFF2A, 0x749D, 0xD1F6, 0x5A41, 0x4D2F, 0xC698,
+ 0x7119, 0xFAAE, 0xEDC0, 0x6677, 0xC31C, 0x48AB, 0x5FC5, 0xD472,
+ 0x9EA4, 0x1513, 0x027D, 0x89CA, 0x2CA1, 0xA716, 0xB078, 0x3BCF,
+ 0x25D4, 0xAE63, 0xB90D, 0x32BA, 0x97D1, 0x1C66, 0x0B08, 0x80BF,
+ 0xCA69, 0x41DE, 0x56B0, 0xDD07, 0x786C, 0xF3DB, 0xE4B5, 0x6F02,
+ 0x3AB1, 0xB106, 0xA668, 0x2DDF, 0x88B4, 0x0303, 0x146D, 0x9FDA,
+ 0xD50C, 0x5EBB, 0x49D5, 0xC262, 0x6709, 0xECBE, 0xFBD0, 0x7067,
+ 0x6E7C, 0xE5CB, 0xF2A5, 0x7912, 0xDC79, 0x57CE, 0x40A0, 0xCB17,
+ 0x81C1, 0x0A76, 0x1D18, 0x96AF, 0x33C4, 0xB873, 0xAF1D, 0x24AA,
+ 0x932B, 0x189C, 0x0FF2, 0x8445, 0x212E, 0xAA99, 0xBDF7, 0x3640,
+ 0x7C96, 0xF721, 0xE04F, 0x6BF8, 0xCE93, 0x4524, 0x524A, 0xD9FD,
+ 0xC7E6, 0x4C51, 0x5B3F, 0xD088, 0x75E3, 0xFE54, 0xE93A, 0x628D,
+ 0x285B, 0xA3EC, 0xB482, 0x3F35, 0x9A5E, 0x11E9, 0x0687, 0x8D30,
+ 0xE232, 0x6985, 0x7EEB, 0xF55C, 0x5037, 0xDB80, 0xCCEE, 0x4759,
+ 0x0D8F, 0x8638, 0x9156, 0x1AE1, 0xBF8A, 0x343D, 0x2353, 0xA8E4,
+ 0xB6FF, 0x3D48, 0x2A26, 0xA191, 0x04FA, 0x8F4D, 0x9823, 0x1394,
+ 0x5942, 0xD2F5, 0xC59B, 0x4E2C, 0xEB47, 0x60F0, 0x779E, 0xFC29,
+ 0x4BA8, 0xC01F, 0xD771, 0x5CC6, 0xF9AD, 0x721A, 0x6574, 0xEEC3,
+ 0xA415, 0x2FA2, 0x38CC, 0xB37B, 0x1610, 0x9DA7, 0x8AC9, 0x017E,
+ 0x1F65, 0x94D2, 0x83BC, 0x080B, 0xAD60, 0x26D7, 0x31B9, 0xBA0E,
+ 0xF0D8, 0x7B6F, 0x6C01, 0xE7B6, 0x42DD, 0xC96A, 0xDE04, 0x55B3
+};
+
+static const uint32_t crc32_table_iscsi_refl[256] = {
+ 0x00000000, 0xF26B8303, 0xE13B70F7, 0x1350F3F4,
+ 0xC79A971F, 0x35F1141C, 0x26A1E7E8, 0xD4CA64EB,
+ 0x8AD958CF, 0x78B2DBCC, 0x6BE22838, 0x9989AB3B,
+ 0x4D43CFD0, 0xBF284CD3, 0xAC78BF27, 0x5E133C24,
+ 0x105EC76F, 0xE235446C, 0xF165B798, 0x030E349B,
+ 0xD7C45070, 0x25AFD373, 0x36FF2087, 0xC494A384,
+ 0x9A879FA0, 0x68EC1CA3, 0x7BBCEF57, 0x89D76C54,
+ 0x5D1D08BF, 0xAF768BBC, 0xBC267848, 0x4E4DFB4B,
+ 0x20BD8EDE, 0xD2D60DDD, 0xC186FE29, 0x33ED7D2A,
+ 0xE72719C1, 0x154C9AC2, 0x061C6936, 0xF477EA35,
+ 0xAA64D611, 0x580F5512, 0x4B5FA6E6, 0xB93425E5,
+ 0x6DFE410E, 0x9F95C20D, 0x8CC531F9, 0x7EAEB2FA,
+ 0x30E349B1, 0xC288CAB2, 0xD1D83946, 0x23B3BA45,
+ 0xF779DEAE, 0x05125DAD, 0x1642AE59, 0xE4292D5A,
+ 0xBA3A117E, 0x4851927D, 0x5B016189, 0xA96AE28A,
+ 0x7DA08661, 0x8FCB0562, 0x9C9BF696, 0x6EF07595,
+ 0x417B1DBC, 0xB3109EBF, 0xA0406D4B, 0x522BEE48,
+ 0x86E18AA3, 0x748A09A0, 0x67DAFA54, 0x95B17957,
+ 0xCBA24573, 0x39C9C670, 0x2A993584, 0xD8F2B687,
+ 0x0C38D26C, 0xFE53516F, 0xED03A29B, 0x1F682198,
+ 0x5125DAD3, 0xA34E59D0, 0xB01EAA24, 0x42752927,
+ 0x96BF4DCC, 0x64D4CECF, 0x77843D3B, 0x85EFBE38,
+ 0xDBFC821C, 0x2997011F, 0x3AC7F2EB, 0xC8AC71E8,
+ 0x1C661503, 0xEE0D9600, 0xFD5D65F4, 0x0F36E6F7,
+ 0x61C69362, 0x93AD1061, 0x80FDE395, 0x72966096,
+ 0xA65C047D, 0x5437877E, 0x4767748A, 0xB50CF789,
+ 0xEB1FCBAD, 0x197448AE, 0x0A24BB5A, 0xF84F3859,
+ 0x2C855CB2, 0xDEEEDFB1, 0xCDBE2C45, 0x3FD5AF46,
+ 0x7198540D, 0x83F3D70E, 0x90A324FA, 0x62C8A7F9,
+ 0xB602C312, 0x44694011, 0x5739B3E5, 0xA55230E6,
+ 0xFB410CC2, 0x092A8FC1, 0x1A7A7C35, 0xE811FF36,
+ 0x3CDB9BDD, 0xCEB018DE, 0xDDE0EB2A, 0x2F8B6829,
+ 0x82F63B78, 0x709DB87B, 0x63CD4B8F, 0x91A6C88C,
+ 0x456CAC67, 0xB7072F64, 0xA457DC90, 0x563C5F93,
+ 0x082F63B7, 0xFA44E0B4, 0xE9141340, 0x1B7F9043,
+ 0xCFB5F4A8, 0x3DDE77AB, 0x2E8E845F, 0xDCE5075C,
+ 0x92A8FC17, 0x60C37F14, 0x73938CE0, 0x81F80FE3,
+ 0x55326B08, 0xA759E80B, 0xB4091BFF, 0x466298FC,
+ 0x1871A4D8, 0xEA1A27DB, 0xF94AD42F, 0x0B21572C,
+ 0xDFEB33C7, 0x2D80B0C4, 0x3ED04330, 0xCCBBC033,
+ 0xA24BB5A6, 0x502036A5, 0x4370C551, 0xB11B4652,
+ 0x65D122B9, 0x97BAA1BA, 0x84EA524E, 0x7681D14D,
+ 0x2892ED69, 0xDAF96E6A, 0xC9A99D9E, 0x3BC21E9D,
+ 0xEF087A76, 0x1D63F975, 0x0E330A81, 0xFC588982,
+ 0xB21572C9, 0x407EF1CA, 0x532E023E, 0xA145813D,
+ 0x758FE5D6, 0x87E466D5, 0x94B49521, 0x66DF1622,
+ 0x38CC2A06, 0xCAA7A905, 0xD9F75AF1, 0x2B9CD9F2,
+ 0xFF56BD19, 0x0D3D3E1A, 0x1E6DCDEE, 0xEC064EED,
+ 0xC38D26C4, 0x31E6A5C7, 0x22B65633, 0xD0DDD530,
+ 0x0417B1DB, 0xF67C32D8, 0xE52CC12C, 0x1747422F,
+ 0x49547E0B, 0xBB3FFD08, 0xA86F0EFC, 0x5A048DFF,
+ 0x8ECEE914, 0x7CA56A17, 0x6FF599E3, 0x9D9E1AE0,
+ 0xD3D3E1AB, 0x21B862A8, 0x32E8915C, 0xC083125F,
+ 0x144976B4, 0xE622F5B7, 0xF5720643, 0x07198540,
+ 0x590AB964, 0xAB613A67, 0xB831C993, 0x4A5A4A90,
+ 0x9E902E7B, 0x6CFBAD78, 0x7FAB5E8C, 0x8DC0DD8F,
+ 0xE330A81A, 0x115B2B19, 0x020BD8ED, 0xF0605BEE,
+ 0x24AA3F05, 0xD6C1BC06, 0xC5914FF2, 0x37FACCF1,
+ 0x69E9F0D5, 0x9B8273D6, 0x88D28022, 0x7AB90321,
+ 0xAE7367CA, 0x5C18E4C9, 0x4F48173D, 0xBD23943E,
+ 0xF36E6F75, 0x0105EC76, 0x12551F82, 0xE03E9C81,
+ 0x34F4F86A, 0xC69F7B69, 0xD5CF889D, 0x27A40B9E,
+ 0x79B737BA, 0x8BDCB4B9, 0x988C474D, 0x6AE7C44E,
+ 0xBE2DA0A5, 0x4C4623A6, 0x5F16D052, 0xAD7D5351
+};
+
+static const uint32_t crc32_table_ieee_norm[256] = {
+ 0x00000000, 0x04c11db7, 0x09823b6e, 0x0d4326d9,
+ 0x130476dc, 0x17c56b6b, 0x1a864db2, 0x1e475005,
+ 0x2608edb8, 0x22c9f00f, 0x2f8ad6d6, 0x2b4bcb61,
+ 0x350c9b64, 0x31cd86d3, 0x3c8ea00a, 0x384fbdbd,
+ 0x4c11db70, 0x48d0c6c7, 0x4593e01e, 0x4152fda9,
+ 0x5f15adac, 0x5bd4b01b, 0x569796c2, 0x52568b75,
+ 0x6a1936c8, 0x6ed82b7f, 0x639b0da6, 0x675a1011,
+ 0x791d4014, 0x7ddc5da3, 0x709f7b7a, 0x745e66cd,
+ 0x9823b6e0, 0x9ce2ab57, 0x91a18d8e, 0x95609039,
+ 0x8b27c03c, 0x8fe6dd8b, 0x82a5fb52, 0x8664e6e5,
+ 0xbe2b5b58, 0xbaea46ef, 0xb7a96036, 0xb3687d81,
+ 0xad2f2d84, 0xa9ee3033, 0xa4ad16ea, 0xa06c0b5d,
+ 0xd4326d90, 0xd0f37027, 0xddb056fe, 0xd9714b49,
+ 0xc7361b4c, 0xc3f706fb, 0xceb42022, 0xca753d95,
+ 0xf23a8028, 0xf6fb9d9f, 0xfbb8bb46, 0xff79a6f1,
+ 0xe13ef6f4, 0xe5ffeb43, 0xe8bccd9a, 0xec7dd02d,
+ 0x34867077, 0x30476dc0, 0x3d044b19, 0x39c556ae,
+ 0x278206ab, 0x23431b1c, 0x2e003dc5, 0x2ac12072,
+ 0x128e9dcf, 0x164f8078, 0x1b0ca6a1, 0x1fcdbb16,
+ 0x018aeb13, 0x054bf6a4, 0x0808d07d, 0x0cc9cdca,
+ 0x7897ab07, 0x7c56b6b0, 0x71159069, 0x75d48dde,
+ 0x6b93dddb, 0x6f52c06c, 0x6211e6b5, 0x66d0fb02,
+ 0x5e9f46bf, 0x5a5e5b08, 0x571d7dd1, 0x53dc6066,
+ 0x4d9b3063, 0x495a2dd4, 0x44190b0d, 0x40d816ba,
+ 0xaca5c697, 0xa864db20, 0xa527fdf9, 0xa1e6e04e,
+ 0xbfa1b04b, 0xbb60adfc, 0xb6238b25, 0xb2e29692,
+ 0x8aad2b2f, 0x8e6c3698, 0x832f1041, 0x87ee0df6,
+ 0x99a95df3, 0x9d684044, 0x902b669d, 0x94ea7b2a,
+ 0xe0b41de7, 0xe4750050, 0xe9362689, 0xedf73b3e,
+ 0xf3b06b3b, 0xf771768c, 0xfa325055, 0xfef34de2,
+ 0xc6bcf05f, 0xc27dede8, 0xcf3ecb31, 0xcbffd686,
+ 0xd5b88683, 0xd1799b34, 0xdc3abded, 0xd8fba05a,
+ 0x690ce0ee, 0x6dcdfd59, 0x608edb80, 0x644fc637,
+ 0x7a089632, 0x7ec98b85, 0x738aad5c, 0x774bb0eb,
+ 0x4f040d56, 0x4bc510e1, 0x46863638, 0x42472b8f,
+ 0x5c007b8a, 0x58c1663d, 0x558240e4, 0x51435d53,
+ 0x251d3b9e, 0x21dc2629, 0x2c9f00f0, 0x285e1d47,
+ 0x36194d42, 0x32d850f5, 0x3f9b762c, 0x3b5a6b9b,
+ 0x0315d626, 0x07d4cb91, 0x0a97ed48, 0x0e56f0ff,
+ 0x1011a0fa, 0x14d0bd4d, 0x19939b94, 0x1d528623,
+ 0xf12f560e, 0xf5ee4bb9, 0xf8ad6d60, 0xfc6c70d7,
+ 0xe22b20d2, 0xe6ea3d65, 0xeba91bbc, 0xef68060b,
+ 0xd727bbb6, 0xd3e6a601, 0xdea580d8, 0xda649d6f,
+ 0xc423cd6a, 0xc0e2d0dd, 0xcda1f604, 0xc960ebb3,
+ 0xbd3e8d7e, 0xb9ff90c9, 0xb4bcb610, 0xb07daba7,
+ 0xae3afba2, 0xaafbe615, 0xa7b8c0cc, 0xa379dd7b,
+ 0x9b3660c6, 0x9ff77d71, 0x92b45ba8, 0x9675461f,
+ 0x8832161a, 0x8cf30bad, 0x81b02d74, 0x857130c3,
+ 0x5d8a9099, 0x594b8d2e, 0x5408abf7, 0x50c9b640,
+ 0x4e8ee645, 0x4a4ffbf2, 0x470cdd2b, 0x43cdc09c,
+ 0x7b827d21, 0x7f436096, 0x7200464f, 0x76c15bf8,
+ 0x68860bfd, 0x6c47164a, 0x61043093, 0x65c52d24,
+ 0x119b4be9, 0x155a565e, 0x18197087, 0x1cd86d30,
+ 0x029f3d35, 0x065e2082, 0x0b1d065b, 0x0fdc1bec,
+ 0x3793a651, 0x3352bbe6, 0x3e119d3f, 0x3ad08088,
+ 0x2497d08d, 0x2056cd3a, 0x2d15ebe3, 0x29d4f654,
+ 0xc5a92679, 0xc1683bce, 0xcc2b1d17, 0xc8ea00a0,
+ 0xd6ad50a5, 0xd26c4d12, 0xdf2f6bcb, 0xdbee767c,
+ 0xe3a1cbc1, 0xe760d676, 0xea23f0af, 0xeee2ed18,
+ 0xf0a5bd1d, 0xf464a0aa, 0xf9278673, 0xfde69bc4,
+ 0x89b8fd09, 0x8d79e0be, 0x803ac667, 0x84fbdbd0,
+ 0x9abc8bd5, 0x9e7d9662, 0x933eb0bb, 0x97ffad0c,
+ 0xafb010b1, 0xab710d06, 0xa6322bdf, 0xa2f33668,
+ 0xbcb4666d, 0xb8757bda, 0xb5365d03, 0xb1f740b4
+};
+
+static const uint32_t crc32_table_gzip_refl[256] = {
+ 0x00000000, 0x77073096, 0xee0e612c, 0x990951ba,
+ 0x076dc419, 0x706af48f, 0xe963a535, 0x9e6495a3,
+ 0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988,
+ 0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, 0x90bf1d91,
+ 0x1db71064, 0x6ab020f2, 0xf3b97148, 0x84be41de,
+ 0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7,
+ 0x136c9856, 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec,
+ 0x14015c4f, 0x63066cd9, 0xfa0f3d63, 0x8d080df5,
+ 0x3b6e20c8, 0x4c69105e, 0xd56041e4, 0xa2677172,
+ 0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b,
+ 0x35b5a8fa, 0x42b2986c, 0xdbbbc9d6, 0xacbcf940,
+ 0x32d86ce3, 0x45df5c75, 0xdcd60dcf, 0xabd13d59,
+ 0x26d930ac, 0x51de003a, 0xc8d75180, 0xbfd06116,
+ 0x21b4f4b5, 0x56b3c423, 0xcfba9599, 0xb8bda50f,
+ 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924,
+ 0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d,
+ 0x76dc4190, 0x01db7106, 0x98d220bc, 0xefd5102a,
+ 0x71b18589, 0x06b6b51f, 0x9fbfe4a5, 0xe8b8d433,
+ 0x7807c9a2, 0x0f00f934, 0x9609a88e, 0xe10e9818,
+ 0x7f6a0dbb, 0x086d3d2d, 0x91646c97, 0xe6635c01,
+ 0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e,
+ 0x6c0695ed, 0x1b01a57b, 0x8208f4c1, 0xf50fc457,
+ 0x65b0d9c6, 0x12b7e950, 0x8bbeb8ea, 0xfcb9887c,
+ 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, 0xfbd44c65,
+ 0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2,
+ 0x4adfa541, 0x3dd895d7, 0xa4d1c46d, 0xd3d6f4fb,
+ 0x4369e96a, 0x346ed9fc, 0xad678846, 0xda60b8d0,
+ 0x44042d73, 0x33031de5, 0xaa0a4c5f, 0xdd0d7cc9,
+ 0x5005713c, 0x270241aa, 0xbe0b1010, 0xc90c2086,
+ 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f,
+ 0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4,
+ 0x59b33d17, 0x2eb40d81, 0xb7bd5c3b, 0xc0ba6cad,
+ 0xedb88320, 0x9abfb3b6, 0x03b6e20c, 0x74b1d29a,
+ 0xead54739, 0x9dd277af, 0x04db2615, 0x73dc1683,
+ 0xe3630b12, 0x94643b84, 0x0d6d6a3e, 0x7a6a5aa8,
+ 0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1,
+ 0xf00f9344, 0x8708a3d2, 0x1e01f268, 0x6906c2fe,
+ 0xf762575d, 0x806567cb, 0x196c3671, 0x6e6b06e7,
+ 0xfed41b76, 0x89d32be0, 0x10da7a5a, 0x67dd4acc,
+ 0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5,
+ 0xd6d6a3e8, 0xa1d1937e, 0x38d8c2c4, 0x4fdff252,
+ 0xd1bb67f1, 0xa6bc5767, 0x3fb506dd, 0x48b2364b,
+ 0xd80d2bda, 0xaf0a1b4c, 0x36034af6, 0x41047a60,
+ 0xdf60efc3, 0xa867df55, 0x316e8eef, 0x4669be79,
+ 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236,
+ 0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f,
+ 0xc5ba3bbe, 0xb2bd0b28, 0x2bb45a92, 0x5cb36a04,
+ 0xc2d7ffa7, 0xb5d0cf31, 0x2cd99e8b, 0x5bdeae1d,
+ 0x9b64c2b0, 0xec63f226, 0x756aa39c, 0x026d930a,
+ 0x9c0906a9, 0xeb0e363f, 0x72076785, 0x05005713,
+ 0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38,
+ 0x92d28e9b, 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21,
+ 0x86d3d2d4, 0xf1d4e242, 0x68ddb3f8, 0x1fda836e,
+ 0x81be16cd, 0xf6b9265b, 0x6fb077e1, 0x18b74777,
+ 0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c,
+ 0x8f659eff, 0xf862ae69, 0x616bffd3, 0x166ccf45,
+ 0xa00ae278, 0xd70dd2ee, 0x4e048354, 0x3903b3c2,
+ 0xa7672661, 0xd06016f7, 0x4969474d, 0x3e6e77db,
+ 0xaed16a4a, 0xd9d65adc, 0x40df0b66, 0x37d83bf0,
+ 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9,
+ 0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6,
+ 0xbad03605, 0xcdd70693, 0x54de5729, 0x23d967bf,
+ 0xb3667a2e, 0xc4614ab8, 0x5d681b02, 0x2a6f2b94,
+ 0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d
+};
+
+uint16_t crc16_t10dif_base(uint16_t seed, uint8_t * buf, uint64_t len)
+{
+ int i;
+ uint16_t crc = seed;
+
+ for (i = 0; i < len; i++)
+ crc = (crc << 8) ^ crc16tab[((crc >> 8) ^ *buf++) & 0x00FF];
+
+ return crc;
+}
+
+uint16_t crc16_t10dif_copy_base(uint16_t seed, uint8_t * dst, uint8_t * src, uint64_t len)
+{
+ int i;
+ uint16_t crc = seed;
+
+ for (i = 0; i < len; i++) {
+ crc = (crc << 8) ^ crc16tab[((crc >> 8) ^ *src) & 0x00FF];
+ *dst++ = *src++;
+ }
+
+ return crc;
+}
+
+unsigned int crc32_iscsi_base(unsigned char *buffer, int len, unsigned int crc_init)
+{
+ unsigned int crc;
+ unsigned char *p_buf;
+ unsigned char *p_end = buffer + len;
+
+ p_buf = buffer;
+ crc = crc_init;
+
+ while (p_buf < p_end) {
+ crc = (crc >> 8) ^ crc32_table_iscsi_refl[(crc & 0x000000FF) ^ *p_buf++];
+ }
+ return crc;
+}
+
+uint32_t crc32_ieee_base(uint32_t seed, uint8_t * buf, uint64_t len)
+{
+ unsigned int crc = ~seed;
+
+ while (len--) {
+ crc = (crc << 8) ^ crc32_table_ieee_norm[((crc >> 24) ^ *buf) & 255];
+ buf++;
+ }
+
+ return ~crc;
+}
+
+uint32_t crc32_gzip_refl_base(uint32_t seed, uint8_t * buf, uint64_t len)
+{
+ unsigned int crc;
+ unsigned char *p_buf;
+ unsigned char *p_end = buf + len;
+
+ p_buf = (unsigned char *)buf;
+ crc = ~seed;
+
+ while (p_buf < p_end) {
+ crc = (crc >> 8) ^ crc32_table_gzip_refl[(crc & 0x000000FF) ^ *p_buf++];
+ }
+
+ return ~crc;
+}
+
+struct slver {
+ unsigned short snum;
+ unsigned char ver;
+ unsigned char core;
+};
+
+struct slver crc32_iscsi_base_slver_0001011d;
+struct slver crc32_iscsi_base_slver = { 0x011d, 0x02, 0x00 };
+
+struct slver crc16_t10dif_base_slver_0001011e;
+struct slver crc16_t10dif_base_slver = { 0x011e, 0x02, 0x00 };
+
+struct slver crc32_ieee_base_slver_0001011f;
+struct slver crc32_ieee_base_slver = { 0x011f, 0x02, 0x00 };
+
+struct slver crc32_gzip_refl_base_slver_0000002b;
+struct slver crc32_gzip_refl_base_slver = { 0x002b, 0x00, 0x00 };
diff --git a/src/isa-l/crc/crc_base_aliases.c b/src/isa-l/crc/crc_base_aliases.c
new file mode 100644
index 000000000..0ffc62f96
--- /dev/null
+++ b/src/isa-l/crc/crc_base_aliases.c
@@ -0,0 +1,87 @@
+/**********************************************************************
+ Copyright(c) 2011-2017 Intel Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+#include "crc.h"
+#include "crc64.h"
+#include <stdint.h>
+
+unsigned int crc32_iscsi(unsigned char *buffer, int len, unsigned int crc_init)
+{
+ return crc32_iscsi_base(buffer, len, crc_init);
+}
+
+uint16_t crc16_t10dif(uint16_t seed, const unsigned char *buf, uint64_t len)
+{
+ return crc16_t10dif_base(seed, (uint8_t *) buf, len);
+}
+
+uint16_t crc16_t10dif_copy(uint16_t seed, uint8_t * dst, uint8_t * src, uint64_t len)
+{
+ return crc16_t10dif_copy_base(seed, dst, src, len);
+}
+
+uint32_t crc32_ieee(uint32_t seed, const unsigned char *buf, uint64_t len)
+{
+ return crc32_ieee_base(seed, (uint8_t *) buf, len);
+}
+
+uint32_t crc32_gzip_refl(uint32_t seed, const unsigned char *buf, uint64_t len)
+{
+ return crc32_gzip_refl_base(seed, (uint8_t *) buf, len);
+}
+
+uint64_t crc64_ecma_refl(uint64_t seed, const uint8_t * buf, uint64_t len)
+{
+ return crc64_ecma_refl_base(seed, buf, len);
+}
+
+uint64_t crc64_ecma_norm(uint64_t seed, const uint8_t * buf, uint64_t len)
+{
+ return crc64_ecma_norm_base(seed, buf, len);
+}
+
+uint64_t crc64_iso_refl(uint64_t seed, const uint8_t * buf, uint64_t len)
+{
+ return crc64_iso_refl_base(seed, buf, len);
+}
+
+uint64_t crc64_iso_norm(uint64_t seed, const uint8_t * buf, uint64_t len)
+{
+ return crc64_iso_norm_base(seed, buf, len);
+}
+
+uint64_t crc64_jones_refl(uint64_t seed, const uint8_t * buf, uint64_t len)
+{
+ return crc64_jones_refl_base(seed, buf, len);
+}
+
+uint64_t crc64_jones_norm(uint64_t seed, const uint8_t * buf, uint64_t len)
+{
+ return crc64_jones_norm_base(seed, buf, len);
+}
diff --git a/src/isa-l/crc/crc_multibinary.asm b/src/isa-l/crc/crc_multibinary.asm
new file mode 100644
index 000000000..a28a468fc
--- /dev/null
+++ b/src/isa-l/crc/crc_multibinary.asm
@@ -0,0 +1,328 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright(c) 2011-2015 Intel Corporation All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in
+; the documentation and/or other materials provided with the
+; distribution.
+; * Neither the name of Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived
+; from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+default rel
+[bits 64]
+
+%include "reg_sizes.asm"
+
+extern crc32_iscsi_00
+extern crc32_iscsi_01
+extern crc32_iscsi_base
+
+extern crc32_ieee_01
+extern crc32_ieee_by4 ;; Optimized for SLM
+extern crc32_ieee_02
+extern crc32_ieee_base
+
+extern crc16_t10dif_01
+extern crc16_t10dif_by4 ;; Optimized for SLM
+extern crc16_t10dif_02
+extern crc16_t10dif_base
+
+extern crc32_gzip_refl_by8
+extern crc32_gzip_refl_by8_02
+extern crc32_gzip_refl_base
+
+extern crc16_t10dif_copy_by4
+extern crc16_t10dif_copy_by4_02
+extern crc16_t10dif_copy_base
+
+%if (AS_FEATURE_LEVEL) >= 10
+extern crc32_gzip_refl_by16_10
+extern crc32_ieee_by16_10
+extern crc32_iscsi_by16_10
+extern crc16_t10dif_by16_10
+%endif
+
+%include "multibinary.asm"
+
+section .data
+;;; *_mbinit are initial values for *_dispatched; is updated on first call.
+;;; Therefore, *_dispatch_init is only executed on first call.
+
+crc32_iscsi_dispatched:
+ dq crc32_iscsi_mbinit
+
+crc32_ieee_dispatched:
+ dq crc32_ieee_mbinit
+
+crc16_t10dif_dispatched:
+ dq crc16_t10dif_mbinit
+
+section .text
+;;;;
+; crc32_iscsi multibinary function
+;;;;
+mk_global crc32_iscsi, function
+crc32_iscsi_mbinit:
+ endbranch
+ call crc32_iscsi_dispatch_init
+crc32_iscsi:
+ endbranch
+ jmp qword [crc32_iscsi_dispatched]
+
+crc32_iscsi_dispatch_init:
+ push rax
+ push rbx
+ push rcx
+ push rdx
+ push rsi
+ push rdi
+ lea rsi, [crc32_iscsi_base WRT_OPT] ; Default
+
+ mov eax, 1
+ cpuid
+ mov ebx, ecx ; save cpuid1.ecx
+ test ecx, FLAG_CPUID1_ECX_SSE4_2
+ jz .crc_iscsi_init_done ; use iscsi_base
+ lea rsi, [crc32_iscsi_00 WRT_OPT]
+ test ecx, FLAG_CPUID1_ECX_CLMUL
+ jz .crc_iscsi_init_done ; use ieee_base
+ lea rsi, [crc32_iscsi_01 WRT_OPT]
+
+ ;; Test for XMM_YMM support/AVX
+ test ecx, FLAG_CPUID1_ECX_OSXSAVE
+ je .crc_iscsi_init_done
+ xor ecx, ecx
+ xgetbv ; xcr -> edx:eax
+ mov edi, eax ; save xgetvb.eax
+
+ and eax, FLAG_XGETBV_EAX_XMM_YMM
+ cmp eax, FLAG_XGETBV_EAX_XMM_YMM
+ jne .crc_iscsi_init_done
+ test ebx, FLAG_CPUID1_ECX_AVX
+ je .crc_iscsi_init_done
+ ;; AVX/02 opt if available
+
+%if AS_FEATURE_LEVEL >= 10
+ ;; Test for AVX2
+ xor ecx, ecx
+ mov eax, 7
+ cpuid
+ test ebx, FLAG_CPUID7_EBX_AVX2
+ je .crc_iscsi_init_done ; No AVX2 possible
+
+ ;; Test for AVX512
+ and edi, FLAG_XGETBV_EAX_ZMM_OPM
+ cmp edi, FLAG_XGETBV_EAX_ZMM_OPM
+ jne .crc_iscsi_init_done ; No AVX512 possible
+ and ebx, FLAGS_CPUID7_EBX_AVX512_G1
+ cmp ebx, FLAGS_CPUID7_EBX_AVX512_G1
+ jne .crc_iscsi_init_done
+
+ and ecx, FLAGS_CPUID7_ECX_AVX512_G2
+ cmp ecx, FLAGS_CPUID7_ECX_AVX512_G2
+ lea rbx, [crc32_iscsi_by16_10 WRT_OPT] ; AVX512/10 opt
+ cmove rsi, rbx
+%endif
+
+.crc_iscsi_init_done:
+ mov [crc32_iscsi_dispatched], rsi
+ pop rdi
+ pop rsi
+ pop rdx
+ pop rcx
+ pop rbx
+ pop rax
+ ret
+
+;;;;
+; crc32_ieee multibinary function
+;;;;
+mk_global crc32_ieee, function
+crc32_ieee_mbinit:
+ endbranch
+ call crc32_ieee_dispatch_init
+crc32_ieee:
+ endbranch
+ jmp qword [crc32_ieee_dispatched]
+
+crc32_ieee_dispatch_init:
+ push rax
+ push rbx
+ push rcx
+ push rdx
+ push rsi
+ push rdi
+ lea rsi, [crc32_ieee_base WRT_OPT] ; Default
+
+ mov eax, 1
+ cpuid
+ mov ebx, ecx ; save cpuid1.ecx
+ test ecx, FLAG_CPUID1_ECX_SSE3
+ jz .crc_ieee_init_done ; use ieee_base
+ test ecx, FLAG_CPUID1_ECX_CLMUL
+ jz .crc_ieee_init_done ; use ieee_base
+ lea rsi, [crc32_ieee_01 WRT_OPT]
+
+ ;; Extra Avoton test
+ lea rdx, [crc32_ieee_by4 WRT_OPT]
+ and eax, FLAG_CPUID1_EAX_STEP_MASK
+ cmp eax, FLAG_CPUID1_EAX_AVOTON
+ cmove rsi, rdx
+
+ ;; Test for XMM_YMM support/AVX
+ test ecx, FLAG_CPUID1_ECX_OSXSAVE
+ je .crc_ieee_init_done
+ xor ecx, ecx
+ xgetbv ; xcr -> edx:eax
+ mov edi, eax ; save xgetvb.eax
+
+ and eax, FLAG_XGETBV_EAX_XMM_YMM
+ cmp eax, FLAG_XGETBV_EAX_XMM_YMM
+ jne .crc_ieee_init_done
+ test ebx, FLAG_CPUID1_ECX_AVX
+ je .crc_ieee_init_done
+ lea rsi, [crc32_ieee_02 WRT_OPT] ; AVX/02 opt
+
+%if AS_FEATURE_LEVEL >= 10
+ ;; Test for AVX2
+ xor ecx, ecx
+ mov eax, 7
+ cpuid
+ test ebx, FLAG_CPUID7_EBX_AVX2
+ je .crc_ieee_init_done ; No AVX2 possible
+
+ ;; Test for AVX512
+ and edi, FLAG_XGETBV_EAX_ZMM_OPM
+ cmp edi, FLAG_XGETBV_EAX_ZMM_OPM
+ jne .crc_ieee_init_done ; No AVX512 possible
+ and ebx, FLAGS_CPUID7_EBX_AVX512_G1
+ cmp ebx, FLAGS_CPUID7_EBX_AVX512_G1
+ jne .crc_ieee_init_done
+
+ and ecx, FLAGS_CPUID7_ECX_AVX512_G2
+ cmp ecx, FLAGS_CPUID7_ECX_AVX512_G2
+ lea rbx, [crc32_ieee_by16_10 WRT_OPT] ; AVX512/10 opt
+ cmove rsi, rbx
+%endif
+
+.crc_ieee_init_done:
+ mov [crc32_ieee_dispatched], rsi
+ pop rdi
+ pop rsi
+ pop rdx
+ pop rcx
+ pop rbx
+ pop rax
+ ret
+
+;;;;
+; crc16_t10dif multibinary function
+;;;;
+mk_global crc16_t10dif, function
+crc16_t10dif_mbinit:
+ endbranch
+ call crc16_t10dif_dispatch_init
+crc16_t10dif:
+ endbranch
+ jmp qword [crc16_t10dif_dispatched]
+
+crc16_t10dif_dispatch_init:
+ push rax
+ push rbx
+ push rcx
+ push rdx
+ push rsi
+ push rdi
+ lea rsi, [crc16_t10dif_base WRT_OPT] ; Default
+
+ mov eax, 1
+ cpuid
+ mov ebx, ecx ; save cpuid1.ecx
+ test ecx, FLAG_CPUID1_ECX_SSE3
+ jz .t10dif_init_done ; use t10dif_base
+ test ecx, FLAG_CPUID1_ECX_CLMUL
+ jz .t10dif_init_done ; use t10dif_base
+ lea rsi, [crc16_t10dif_01 WRT_OPT]
+
+ ;; Extra Avoton test
+ lea rdx, [crc16_t10dif_by4 WRT_OPT]
+ and eax, FLAG_CPUID1_EAX_STEP_MASK
+ cmp eax, FLAG_CPUID1_EAX_AVOTON
+ cmove rsi, rdx
+
+ ;; Test for XMM_YMM support/AVX
+ test ecx, FLAG_CPUID1_ECX_OSXSAVE
+ je .t10dif_init_done
+ xor ecx, ecx
+ xgetbv ; xcr -> edx:eax
+ mov edi, eax ; save xgetvb.eax
+
+ and eax, FLAG_XGETBV_EAX_XMM_YMM
+ cmp eax, FLAG_XGETBV_EAX_XMM_YMM
+ jne .t10dif_init_done
+ test ebx, FLAG_CPUID1_ECX_AVX
+ je .t10dif_init_done
+ lea rsi, [crc16_t10dif_02 WRT_OPT] ; AVX/02 opt
+
+%if AS_FEATURE_LEVEL >= 10
+ ;; Test for AVX2
+ xor ecx, ecx
+ mov eax, 7
+ cpuid
+ test ebx, FLAG_CPUID7_EBX_AVX2
+ je .t10dif_init_done ; No AVX2 possible
+
+ ;; Test for AVX512
+ and edi, FLAG_XGETBV_EAX_ZMM_OPM
+ cmp edi, FLAG_XGETBV_EAX_ZMM_OPM
+ jne .t10dif_init_done ; No AVX512 possible
+ and ebx, FLAGS_CPUID7_EBX_AVX512_G1
+ cmp ebx, FLAGS_CPUID7_EBX_AVX512_G1
+ jne .t10dif_init_done
+
+ and ecx, FLAGS_CPUID7_ECX_AVX512_G2
+ cmp ecx, FLAGS_CPUID7_ECX_AVX512_G2
+ lea rbx, [crc16_t10dif_by16_10 WRT_OPT] ; AVX512/10 opt
+ cmove rsi, rbx
+%endif
+
+.t10dif_init_done:
+ mov [crc16_t10dif_dispatched], rsi
+ pop rdi
+ pop rsi
+ pop rdx
+ pop rcx
+ pop rbx
+ pop rax
+ ret
+
+mbin_interface crc32_gzip_refl
+mbin_dispatch_init_clmul crc32_gzip_refl, crc32_gzip_refl_base, crc32_gzip_refl_by8, crc32_gzip_refl_by8_02, crc32_gzip_refl_by16_10
+
+mbin_interface crc16_t10dif_copy
+mbin_dispatch_init_clmul crc16_t10dif_copy, crc16_t10dif_copy_base, crc16_t10dif_copy_by4, crc16_t10dif_copy_by4_02, crc16_t10dif_copy_by4_02
+
+;;; func core, ver, snum
+slversion crc16_t10dif, 00, 03, 011a
+slversion crc32_ieee, 00, 03, 011b
+slversion crc32_iscsi, 00, 03, 011c
+slversion crc32_gzip_refl, 00, 00, 002a
diff --git a/src/isa-l/crc/crc_ref.h b/src/isa-l/crc/crc_ref.h
new file mode 100644
index 000000000..e97a60b5e
--- /dev/null
+++ b/src/isa-l/crc/crc_ref.h
@@ -0,0 +1,140 @@
+/**********************************************************************
+ Copyright(c) 2011-2015 Intel Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+#ifndef _CRC_REF_H
+#define _CRC_REF_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "crc.h"
+
+#ifdef _MSC_VER
+# define inline __inline
+#endif
+
+#define MAX_ITER 8
+
+// iSCSI CRC reference function
+static inline unsigned int crc32_iscsi_ref(unsigned char *buffer, int len, unsigned int crc_init)
+{
+ uint64_t rem = crc_init;
+ int i, j;
+
+ uint32_t poly = 0x82F63B78;
+
+ for (i = 0; i < len; i++) {
+ rem = rem ^ (buffer[i]);
+ for (j = 0; j < MAX_ITER; j++) {
+ rem = (rem & 0x1ULL) ? (rem >> 1) ^ poly : (rem >> 1);
+ }
+ }
+ return rem;
+}
+
+// crc16_t10dif reference function, slow crc16 from the definition.
+static inline uint16_t crc16_t10dif_ref(uint16_t seed, uint8_t * buf, uint64_t len)
+{
+ size_t rem = seed;
+ unsigned int i, j;
+
+ uint16_t poly = 0x8bb7; // t10dif standard
+
+ for (i = 0; i < len; i++) {
+ rem = rem ^ (buf[i] << 8);
+ for (j = 0; j < MAX_ITER; j++) {
+ rem = rem << 1;
+ rem = (rem & 0x10000) ? rem ^ poly : rem;
+ }
+ }
+ return rem;
+}
+
+// crc16_t10dif reference function, slow crc16 from the definition.
+static inline uint16_t crc16_t10dif_copy_ref(uint16_t seed, uint8_t * dst, uint8_t * src, uint64_t len)
+{
+ size_t rem = seed;
+ unsigned int i, j;
+
+ uint16_t poly = 0x8bb7; // t10dif standard
+
+ for (i = 0; i < len; i++) {
+ rem = rem ^ (src[i] << 8);
+ dst[i] = src[i];
+ for (j = 0; j < MAX_ITER; j++) {
+ rem = rem << 1;
+ rem = (rem & 0x10000) ? rem ^ poly : rem;
+ }
+ }
+ return rem;
+}
+
+// crc32_ieee reference function, slow crc32 from the definition.
+static inline uint32_t crc32_ieee_ref(uint32_t seed, uint8_t * buf, uint64_t len)
+{
+ uint64_t rem = ~seed;
+ unsigned int i, j;
+
+ uint32_t poly = 0x04C11DB7; // IEEE standard
+
+ for (i = 0; i < len; i++) {
+ rem = rem ^ ((uint64_t) buf[i] << 24);
+ for (j = 0; j < MAX_ITER; j++) {
+ rem = rem << 1;
+ rem = (rem & 0x100000000ULL) ? rem ^ poly : rem;
+ }
+ }
+ return ~rem;
+}
+
+// crc32_gzip_refl reference function, slow crc32 from the definition.
+// Please get difference details between crc32_gzip_ref and crc32_ieee
+// from crc.h.
+static inline uint32_t crc32_gzip_refl_ref(uint32_t seed, uint8_t * buf, uint64_t len)
+{
+ uint64_t rem = ~seed;
+ int i, j;
+
+ uint32_t poly = 0xEDB88320; // IEEE standard
+
+ for (i = 0; i < len; i++) {
+ rem = rem ^ (buf[i]);
+ for (j = 0; j < MAX_ITER; j++) {
+ rem = (rem & 0x1ULL) ? (rem >> 1) ^ poly : (rem >> 1);
+ }
+ }
+ return ~rem;
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/isa-l/crc/crc_simple_test.c b/src/isa-l/crc/crc_simple_test.c
new file mode 100644
index 000000000..4799f8745
--- /dev/null
+++ b/src/isa-l/crc/crc_simple_test.c
@@ -0,0 +1,64 @@
+/**********************************************************************
+ Copyright(c) 2011-2013 Intel Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+#include <stdio.h>
+#include <stdint.h>
+#include "crc.h"
+
+const uint16_t init_crc_16 = 0x1234;
+const uint16_t t10_dif_expected = 0x60b3;
+const uint32_t init_crc_32 = 0x12345678;
+const uint32_t ieee_expected = 0x2ceadbe3;
+
+int main(void)
+{
+ unsigned char p_buf[48];
+ uint16_t t10_dif_computed;
+ uint32_t ieee_computed;
+ int i;
+
+ for (i = 0; i < 48; i++)
+ p_buf[i] = i;
+
+ t10_dif_computed = crc16_t10dif(init_crc_16, p_buf, 48);
+
+ if (t10_dif_computed != t10_dif_expected)
+ printf("WRONG CRC-16(T10 DIF) value\n");
+ else
+ printf("CORRECT CRC-16(T10 DIF) value\n");
+
+ ieee_computed = crc32_ieee(init_crc_32, p_buf, 48);
+
+ if (ieee_computed != ieee_expected)
+ printf("WRONG CRC-32(IEEE) value\n");
+ else
+ printf("CORRECT CRC-32(IEEE) value\n");
+
+ return 0;
+}
diff --git a/src/isa-l/doc/build.md b/src/isa-l/doc/build.md
new file mode 100644
index 000000000..db2c4f0d2
--- /dev/null
+++ b/src/isa-l/doc/build.md
@@ -0,0 +1,46 @@
+# ISA-L Build Details
+
+For x86-64 builds it is highly recommended to get an up-to-date version of
+[nasm] that can understand the latest instruction sets. Building with an older
+version is usually possible but the library may lack some function versions for
+the best performance.
+
+## Windows Build Environment Details
+
+The windows dynamic and static libraries can be built with the nmake tool on the
+windows command line when appropriate paths and tools are setup as follows.
+
+### Download nasm and put into path
+
+Download and install [nasm] and add location to path.
+
+ set PATH=%PATH%;C:\Program Files\NASM
+
+### Setup compiler environment
+
+Install compiler and run environment setup script.
+
+Compilers for windows usually have a batch file to setup environment variables
+for the command line called `vcvarsall.bat` or `compilervars.bat` or a link to
+run these. For Visual Studio this may be as follows for Community edition.
+
+ C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\VC\Auxiliary\Build\vcvarsall.bat x64
+
+For the Intel compiler the path is typically as follows where yyyy, x, zzz
+represent the version.
+
+ C:\Program Files (x86)\IntelSWTools\system_studio_for_windows_yyyy.x.zzz\compilers_and_libraries_yyyy\bin\compilervars.bat intel64
+
+### Build ISA-L libs and copy to appropriate place
+
+Run `nmake /f Makefile.nmake`
+
+This should build isa-l.dll, isa-l.lib and isa-l_static.lib. You may want to
+copy the libs to a system directory in the dynamic linking path such as
+`C:\windows\system32` or to a project directory.
+
+To build a simple program with a static library.
+
+ cl /Fe: test.exe test.c isa-l_static.lib
+
+[nasm]: https://www.nasm.us
diff --git a/src/isa-l/doc/test.md b/src/isa-l/doc/test.md
new file mode 100644
index 000000000..e3a4ccda7
--- /dev/null
+++ b/src/isa-l/doc/test.md
@@ -0,0 +1,49 @@
+# ISA-L Testing
+
+Tests are divided into check tests, unit tests and fuzz tests. Check tests,
+built with `make check`, should have no additional dependencies. Other unit
+tests built with `make test` may have additional dependencies in order to make
+comparisons of the output of ISA-L to other standard libraries and ensure
+compatibility. Fuzz tests are meant to be run with a fuzzing tool such as [AFL]
+or [llvm libFuzzer] fuzzing to direct the input data based on coverage. There
+are a number of scripts in the /tools directory to help with automating the
+running of tests.
+
+## Test check
+
+`./tools/test_autorun.sh` is a helper script for kicking off check tests, that
+typically run for a few minutes, or extended tests that could run much
+longer. The command `test_autorun.sh check` build and runs all check tests with
+autotools and runs other short tests to ensure check tests, unit tests,
+examples, install, exe stack, format are correct. Each run of `test_autorun.sh`
+builds tests with a new random test seed that ensures that each run is unique to
+the seed but deterministic for debugging. Tests are also built with sanitizers
+and Electric Fence if available.
+
+## Extended tests
+
+Extended tests are initiated with the command `./tools/test_autorun.sh
+ext`. These build and run check tests, unit tests, and other utilities that can
+take much longer than check tests alone. This includes special compression tools
+and some cross targets such as the no-arch build of base functions only and
+mingw build if tools are available.
+
+## Fuzz testing
+
+`./tools/test_fuzz.sh` is a helper script for fuzzing to setup, build and run
+the ISA-L inflate fuzz tests on multiple fuzz tools. Fuzzing with
+[llvm libFuzzer] requires clang compiler tools with `-fsanitize=fuzzer` or
+`libFuzzer` installed. You can invoke the default fuzz tests under llvm with
+
+ ./tools/test_fuzz.sh -e checked
+
+To use [AFL], install tools and system setup for `afl-fuzz` and run
+
+ ./tools/test_fuzz.sh -e checked --afl 1 --llvm -1 -d 1
+
+This uses internal vectors as a seed. You can also specify a sample file to use
+as a seed instead with `-f <file>`. One of three fuzz tests can be invoked:
+checked, simple, and round_trip.
+
+[llvm libFuzzer]: https://llvm.org/docs/LibFuzzer.html
+[AFL]: https://github.com/google/AFL
diff --git a/src/isa-l/erasure_code/Makefile.am b/src/isa-l/erasure_code/Makefile.am
new file mode 100644
index 000000000..f1d0d1d6a
--- /dev/null
+++ b/src/isa-l/erasure_code/Makefile.am
@@ -0,0 +1,153 @@
+########################################################################
+# Copyright(c) 2011-2019 Intel Corporation All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Intel Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+########################################################################
+
+include erasure_code/aarch64/Makefile.am
+
+include erasure_code/ppc64le/Makefile.am
+
+lsrc += erasure_code/ec_base.c
+
+lsrc_base_aliases += erasure_code/ec_base_aliases.c
+lsrc_x86_64 += \
+ erasure_code/ec_highlevel_func.c \
+ erasure_code/gf_vect_mul_sse.asm \
+ erasure_code/gf_vect_mul_avx.asm \
+ erasure_code/gf_vect_dot_prod_sse.asm \
+ erasure_code/gf_vect_dot_prod_avx.asm \
+ erasure_code/gf_vect_dot_prod_avx2.asm \
+ erasure_code/gf_2vect_dot_prod_sse.asm \
+ erasure_code/gf_3vect_dot_prod_sse.asm \
+ erasure_code/gf_4vect_dot_prod_sse.asm \
+ erasure_code/gf_5vect_dot_prod_sse.asm \
+ erasure_code/gf_6vect_dot_prod_sse.asm \
+ erasure_code/gf_2vect_dot_prod_avx.asm \
+ erasure_code/gf_3vect_dot_prod_avx.asm \
+ erasure_code/gf_4vect_dot_prod_avx.asm \
+ erasure_code/gf_5vect_dot_prod_avx.asm \
+ erasure_code/gf_6vect_dot_prod_avx.asm \
+ erasure_code/gf_2vect_dot_prod_avx2.asm \
+ erasure_code/gf_3vect_dot_prod_avx2.asm \
+ erasure_code/gf_4vect_dot_prod_avx2.asm \
+ erasure_code/gf_5vect_dot_prod_avx2.asm \
+ erasure_code/gf_6vect_dot_prod_avx2.asm \
+ erasure_code/gf_vect_mad_sse.asm \
+ erasure_code/gf_2vect_mad_sse.asm \
+ erasure_code/gf_3vect_mad_sse.asm \
+ erasure_code/gf_4vect_mad_sse.asm \
+ erasure_code/gf_5vect_mad_sse.asm \
+ erasure_code/gf_6vect_mad_sse.asm \
+ erasure_code/gf_vect_mad_avx.asm \
+ erasure_code/gf_2vect_mad_avx.asm \
+ erasure_code/gf_3vect_mad_avx.asm \
+ erasure_code/gf_4vect_mad_avx.asm \
+ erasure_code/gf_5vect_mad_avx.asm \
+ erasure_code/gf_6vect_mad_avx.asm \
+ erasure_code/gf_vect_mad_avx2.asm \
+ erasure_code/gf_2vect_mad_avx2.asm \
+ erasure_code/gf_3vect_mad_avx2.asm \
+ erasure_code/gf_4vect_mad_avx2.asm \
+ erasure_code/gf_5vect_mad_avx2.asm \
+ erasure_code/gf_6vect_mad_avx2.asm \
+ erasure_code/ec_multibinary.asm
+
+#if HAVE_AVX512
+lsrc_x86_64 += \
+ erasure_code/gf_vect_dot_prod_avx512.asm \
+ erasure_code/gf_2vect_dot_prod_avx512.asm \
+ erasure_code/gf_3vect_dot_prod_avx512.asm \
+ erasure_code/gf_4vect_dot_prod_avx512.asm \
+ erasure_code/gf_5vect_dot_prod_avx512.asm \
+ erasure_code/gf_6vect_dot_prod_avx512.asm \
+ erasure_code/gf_vect_mad_avx512.asm \
+ erasure_code/gf_2vect_mad_avx512.asm \
+ erasure_code/gf_3vect_mad_avx512.asm \
+ erasure_code/gf_4vect_mad_avx512.asm \
+ erasure_code/gf_5vect_mad_avx512.asm \
+ erasure_code/gf_6vect_mad_avx512.asm
+
+lsrc_x86_32 += \
+ erasure_code/ec_highlevel_func.c \
+ erasure_code/ec_multibinary.asm \
+ erasure_code/gf_vect_dot_prod_avx.asm \
+ erasure_code/gf_2vect_dot_prod_avx.asm \
+ erasure_code/gf_3vect_dot_prod_avx.asm \
+ erasure_code/gf_4vect_dot_prod_avx.asm \
+ erasure_code/gf_vect_dot_prod_sse.asm \
+ erasure_code/gf_2vect_dot_prod_sse.asm \
+ erasure_code/gf_3vect_dot_prod_sse.asm \
+ erasure_code/gf_4vect_dot_prod_sse.asm \
+ erasure_code/gf_vect_dot_prod_avx2.asm \
+ erasure_code/gf_2vect_dot_prod_avx2.asm \
+ erasure_code/gf_3vect_dot_prod_avx2.asm \
+ erasure_code/gf_4vect_dot_prod_avx2.asm
+
+unit_tests32 += erasure_code/erasure_code_base_test \
+ erasure_code/erasure_code_test \
+ erasure_code/gf_vect_mul_test \
+ erasure_code/gf_vect_mul_base_test \
+ erasure_code/gf_vect_dot_prod_base_test \
+ erasure_code/gf_vect_dot_prod_test
+
+perf_tests32 += erasure_code/gf_vect_mul_perf \
+ erasure_code/gf_vect_dot_prod_perf \
+ erasure_code/erasure_code_perf \
+ erasure_code/erasure_code_base_perf \
+ erasure_code/gf_vect_dot_prod_1tbl
+
+src_include += -I $(srcdir)/erasure_code
+extern_hdrs += include/erasure_code.h \
+ include/gf_vect_mul.h
+
+other_src += erasure_code/ec_base.h \
+ include/multibinary.asm \
+ include/reg_sizes.asm
+
+check_tests += erasure_code/gf_vect_mul_test \
+ erasure_code/erasure_code_test \
+ erasure_code/gf_inverse_test \
+ erasure_code/erasure_code_update_test
+
+unit_tests += \
+ erasure_code/gf_vect_mul_base_test \
+ erasure_code/gf_vect_dot_prod_base_test \
+ erasure_code/gf_vect_dot_prod_test \
+ erasure_code/gf_vect_mad_test \
+ erasure_code/erasure_code_base_test
+
+perf_tests += erasure_code/gf_vect_mul_perf \
+ erasure_code/gf_vect_dot_prod_perf \
+ erasure_code/gf_vect_dot_prod_1tbl \
+ erasure_code/erasure_code_perf \
+ erasure_code/erasure_code_base_perf \
+ erasure_code/erasure_code_update_perf
+
+other_tests += erasure_code/gen_rs_matrix_limits
+
+other_src += include/test.h \
+ include/types.h
diff --git a/src/isa-l/erasure_code/aarch64/Makefile.am b/src/isa-l/erasure_code/aarch64/Makefile.am
new file mode 100644
index 000000000..94bb5a139
--- /dev/null
+++ b/src/isa-l/erasure_code/aarch64/Makefile.am
@@ -0,0 +1,45 @@
+##################################################################
+# Copyright (c) 2019 Huawei Technologies Co., Ltd.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Huawei Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+########################################################################
+
+lsrc_aarch64 += \
+ erasure_code/aarch64/ec_aarch64_highlevel_func.c \
+ erasure_code/aarch64/ec_aarch64_dispatcher.c \
+ erasure_code/aarch64/gf_vect_dot_prod_neon.S \
+ erasure_code/aarch64/gf_2vect_dot_prod_neon.S \
+ erasure_code/aarch64/gf_3vect_dot_prod_neon.S \
+ erasure_code/aarch64/gf_4vect_dot_prod_neon.S \
+ erasure_code/aarch64/gf_5vect_dot_prod_neon.S \
+ erasure_code/aarch64/gf_vect_mad_neon.S \
+ erasure_code/aarch64/gf_2vect_mad_neon.S \
+ erasure_code/aarch64/gf_3vect_mad_neon.S \
+ erasure_code/aarch64/gf_4vect_mad_neon.S \
+ erasure_code/aarch64/gf_5vect_mad_neon.S \
+ erasure_code/aarch64/gf_6vect_mad_neon.S \
+ erasure_code/aarch64/gf_vect_mul_neon.S \
+ erasure_code/aarch64/ec_multibinary_arm.S
diff --git a/src/isa-l/erasure_code/aarch64/ec_aarch64_dispatcher.c b/src/isa-l/erasure_code/aarch64/ec_aarch64_dispatcher.c
new file mode 100644
index 000000000..ba6634785
--- /dev/null
+++ b/src/isa-l/erasure_code/aarch64/ec_aarch64_dispatcher.c
@@ -0,0 +1,69 @@
+/**************************************************************
+ Copyright (c) 2019 Huawei Technologies Co., Ltd.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Huawei Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+#include <aarch64_multibinary.h>
+
+DEFINE_INTERFACE_DISPATCHER(gf_vect_dot_prod)
+{
+ if (getauxval(AT_HWCAP) & HWCAP_ASIMD)
+ return PROVIDER_INFO(gf_vect_dot_prod_neon);
+ return PROVIDER_BASIC(gf_vect_dot_prod);
+
+}
+
+DEFINE_INTERFACE_DISPATCHER(gf_vect_mad)
+{
+ if (getauxval(AT_HWCAP) & HWCAP_ASIMD)
+ return PROVIDER_INFO(gf_vect_mad_neon);
+ return PROVIDER_BASIC(gf_vect_mad);
+
+}
+
+DEFINE_INTERFACE_DISPATCHER(ec_encode_data)
+{
+ if (getauxval(AT_HWCAP) & HWCAP_ASIMD)
+ return PROVIDER_INFO(ec_encode_data_neon);
+ return PROVIDER_BASIC(ec_encode_data);
+
+}
+
+DEFINE_INTERFACE_DISPATCHER(ec_encode_data_update)
+{
+ if (getauxval(AT_HWCAP) & HWCAP_ASIMD)
+ return PROVIDER_INFO(ec_encode_data_update_neon);
+ return PROVIDER_BASIC(ec_encode_data_update);
+
+}
+
+DEFINE_INTERFACE_DISPATCHER(gf_vect_mul)
+{
+ if (getauxval(AT_HWCAP) & HWCAP_ASIMD)
+ return PROVIDER_INFO(gf_vect_mul_neon);
+ return PROVIDER_BASIC(gf_vect_mul);
+
+}
diff --git a/src/isa-l/erasure_code/aarch64/ec_aarch64_highlevel_func.c b/src/isa-l/erasure_code/aarch64/ec_aarch64_highlevel_func.c
new file mode 100644
index 000000000..dd23702ce
--- /dev/null
+++ b/src/isa-l/erasure_code/aarch64/ec_aarch64_highlevel_func.c
@@ -0,0 +1,127 @@
+/**************************************************************
+ Copyright (c) 2019 Huawei Technologies Co., Ltd.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Huawei Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+#include "erasure_code.h"
+
+/*external function*/
+extern void gf_vect_dot_prod_neon(int len, int vlen, unsigned char *gftbls,
+ unsigned char **src, unsigned char *dest);
+extern void gf_2vect_dot_prod_neon(int len, int vlen, unsigned char *gftbls,
+ unsigned char **src, unsigned char **dest);
+extern void gf_3vect_dot_prod_neon(int len, int vlen, unsigned char *gftbls,
+ unsigned char **src, unsigned char **dest);
+extern void gf_4vect_dot_prod_neon(int len, int vlen, unsigned char *gftbls,
+ unsigned char **src, unsigned char **dest);
+extern void gf_5vect_dot_prod_neon(int len, int vlen, unsigned char *gftbls,
+ unsigned char **src, unsigned char **dest);
+extern void gf_vect_mad_neon(int len, int vec, int vec_i, unsigned char *gftbls,
+ unsigned char *src, unsigned char *dest);
+extern void gf_2vect_mad_neon(int len, int vec, int vec_i, unsigned char *gftbls,
+ unsigned char *src, unsigned char **dest);
+extern void gf_3vect_mad_neon(int len, int vec, int vec_i, unsigned char *gftbls,
+ unsigned char *src, unsigned char **dest);
+extern void gf_4vect_mad_neon(int len, int vec, int vec_i, unsigned char *gftbls,
+ unsigned char *src, unsigned char **dest);
+extern void gf_5vect_mad_neon(int len, int vec, int vec_i, unsigned char *gftbls,
+ unsigned char *src, unsigned char **dest);
+extern void gf_6vect_mad_neon(int len, int vec, int vec_i, unsigned char *gftbls,
+ unsigned char *src, unsigned char **dest);
+
+void ec_encode_data_neon(int len, int k, int rows, unsigned char *g_tbls, unsigned char **data,
+ unsigned char **coding)
+{
+ if (len < 16) {
+ ec_encode_data_base(len, k, rows, g_tbls, data, coding);
+ return;
+ }
+
+ while (rows > 5) {
+ gf_5vect_dot_prod_neon(len, k, g_tbls, data, coding);
+ g_tbls += 5 * k * 32;
+ coding += 5;
+ rows -= 5;
+ }
+ switch (rows) {
+ case 5:
+ gf_5vect_dot_prod_neon(len, k, g_tbls, data, coding);
+ break;
+ case 4:
+ gf_4vect_dot_prod_neon(len, k, g_tbls, data, coding);
+ break;
+ case 3:
+ gf_3vect_dot_prod_neon(len, k, g_tbls, data, coding);
+ break;
+ case 2:
+ gf_2vect_dot_prod_neon(len, k, g_tbls, data, coding);
+ break;
+ case 1:
+ gf_vect_dot_prod_neon(len, k, g_tbls, data, *coding);
+ break;
+ case 0:
+ break;
+ default:
+ break;
+ }
+}
+
+void ec_encode_data_update_neon(int len, int k, int rows, int vec_i, unsigned char *g_tbls,
+ unsigned char *data, unsigned char **coding)
+{
+ if (len < 16) {
+ ec_encode_data_update_base(len, k, rows, vec_i, g_tbls, data, coding);
+ return;
+ }
+ while (rows > 6) {
+ gf_6vect_mad_neon(len, k, vec_i, g_tbls, data, coding);
+ g_tbls += 6 * k * 32;
+ coding += 6;
+ rows -= 6;
+ }
+ switch (rows) {
+ case 6:
+ gf_6vect_mad_neon(len, k, vec_i, g_tbls, data, coding);
+ break;
+ case 5:
+ gf_5vect_mad_neon(len, k, vec_i, g_tbls, data, coding);
+ break;
+ case 4:
+ gf_4vect_mad_neon(len, k, vec_i, g_tbls, data, coding);
+ break;
+ case 3:
+ gf_3vect_mad_neon(len, k, vec_i, g_tbls, data, coding);
+ break;
+ case 2:
+ gf_2vect_mad_neon(len, k, vec_i, g_tbls, data, coding);
+ break;
+ case 1:
+ gf_vect_mad_neon(len, k, vec_i, g_tbls, data, *coding);
+ break;
+ case 0:
+ break;
+ }
+}
diff --git a/src/isa-l/erasure_code/aarch64/ec_multibinary_arm.S b/src/isa-l/erasure_code/aarch64/ec_multibinary_arm.S
new file mode 100644
index 000000000..0b75a4902
--- /dev/null
+++ b/src/isa-l/erasure_code/aarch64/ec_multibinary_arm.S
@@ -0,0 +1,36 @@
+/**************************************************************
+ Copyright (c) 2019 Huawei Technologies Co., Ltd.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Huawei Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+#include "aarch64_multibinary.h"
+
+mbin_interface ec_encode_data
+mbin_interface gf_vect_mul
+mbin_interface gf_vect_dot_prod
+mbin_interface gf_vect_mad
+mbin_interface ec_encode_data_update
diff --git a/src/isa-l/erasure_code/aarch64/gf_2vect_dot_prod_neon.S b/src/isa-l/erasure_code/aarch64/gf_2vect_dot_prod_neon.S
new file mode 100644
index 000000000..33a28501d
--- /dev/null
+++ b/src/isa-l/erasure_code/aarch64/gf_2vect_dot_prod_neon.S
@@ -0,0 +1,399 @@
+/**************************************************************
+ Copyright (c) 2019 Huawei Technologies Co., Ltd.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Huawei Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+.text
+
+.global gf_2vect_dot_prod_neon
+.type gf_2vect_dot_prod_neon, %function
+
+
+/* arguments */
+x_len .req x0
+x_vec .req x1
+x_tbl .req x2
+x_src .req x3
+x_dest .req x4
+
+/* returns */
+w_ret .req w0
+
+/* local variables */
+x_vec_i .req x5
+x_ptr .req x6
+x_pos .req x7
+x_tmp .req x8
+x_tbl1 .req x9
+x_tbl2 .req x10
+x_dest1 .req x11
+x_dest2 .req x12
+
+/* vectors */
+v_gft1_lo .req v0
+v_gft1_hi .req v1
+v_gft2_lo .req v2
+v_gft2_hi .req v3
+q_gft1_lo .req q0
+q_gft1_hi .req q1
+q_gft2_lo .req q2
+q_gft2_hi .req q3
+
+v_mask0f .req v4
+q_mask0f .req q4
+
+v_tmp1_lo .req v5
+v_tmp1_hi .req v6
+v_tmp1 .req v7
+
+v_data_0 .req v8
+v_data_1 .req v9
+v_data_2 .req v10
+v_data_3 .req v11
+v_data_4 .req v12
+v_data_5 .req v13
+v_data_6 .req v14
+v_data_7 .req v15
+q_data_0 .req q8
+q_data_1 .req q9
+q_data_2 .req q10
+q_data_3 .req q11
+q_data_4 .req q12
+q_data_5 .req q13
+q_data_6 .req q14
+q_data_7 .req q15
+
+v_p1_0 .req v16
+v_p1_1 .req v17
+v_p1_2 .req v18
+v_p1_3 .req v19
+v_p1_4 .req v20
+v_p1_5 .req v21
+v_p1_6 .req v22
+v_p1_7 .req v23
+v_p2_0 .req v24
+v_p2_1 .req v25
+v_p2_2 .req v26
+v_p2_3 .req v27
+v_p2_4 .req v28
+v_p2_5 .req v29
+v_p2_6 .req v30
+v_p2_7 .req v31
+
+q_p1_0 .req q16
+q_p1_1 .req q17
+q_p1_2 .req q18
+q_p1_3 .req q19
+q_p1_4 .req q20
+q_p1_5 .req q21
+q_p1_6 .req q22
+q_p1_7 .req q23
+q_p2_0 .req q24
+q_p2_1 .req q25
+q_p2_2 .req q26
+q_p2_3 .req q27
+q_p2_4 .req q28
+q_p2_5 .req q29
+q_p2_6 .req q30
+q_p2_7 .req q31
+
+v_p1 .req v_p1_0
+q_p1 .req q_p1_0
+v_p2 .req v_p2_0
+q_p2 .req q_p2_0
+v_data .req v_p1_1
+q_data .req q_p1_1
+v_data_lo .req v_p1_2
+v_data_hi .req v_p1_3
+
+gf_2vect_dot_prod_neon:
+ /* less than 16 bytes, return_fail */
+ cmp x_len, #16
+ blt .return_fail
+
+ movi v_mask0f.16b, #0x0f
+ mov x_pos, #0
+ lsl x_vec, x_vec, #3
+ ldr x_dest1, [x_dest, #8*0]
+ ldr x_dest2, [x_dest, #8*1]
+
+.Lloop128_init:
+ /* less than 128 bytes, goto Lloop16_init */
+ cmp x_len, #128
+ blt .Lloop16_init
+
+ /* save d8 ~ d15 to stack */
+ sub sp, sp, #64
+ stp d8, d9, [sp]
+ stp d10, d11, [sp, #16]
+ stp d12, d13, [sp, #32]
+ stp d14, d15, [sp, #48]
+
+ sub x_len, x_len, #128
+
+.Lloop128:
+ movi v_p1_0.16b, #0
+ movi v_p1_1.16b, #0
+ movi v_p1_2.16b, #0
+ movi v_p1_3.16b, #0
+ movi v_p1_4.16b, #0
+ movi v_p1_5.16b, #0
+ movi v_p1_6.16b, #0
+ movi v_p1_7.16b, #0
+
+ movi v_p2_0.16b, #0
+ movi v_p2_1.16b, #0
+ movi v_p2_2.16b, #0
+ movi v_p2_3.16b, #0
+ movi v_p2_4.16b, #0
+ movi v_p2_5.16b, #0
+ movi v_p2_6.16b, #0
+ movi v_p2_7.16b, #0
+
+ mov x_tbl1, x_tbl
+ add x_tbl2, x_tbl, x_vec, lsl #2
+ mov x_vec_i, #0
+
+.Lloop128_vects:
+ ldr x_ptr, [x_src, x_vec_i]
+ add x_vec_i, x_vec_i, #8
+ add x_ptr, x_ptr, x_pos
+
+ ldp q_data_0, q_data_1, [x_ptr], #32
+ ldp q_data_2, q_data_3, [x_ptr], #32
+
+ ldp q_gft1_lo, q_gft1_hi, [x_tbl1], #32
+ ldp q_gft2_lo, q_gft2_hi, [x_tbl2], #32
+ ldp q_data_4, q_data_5, [x_ptr], #32
+ ldp q_data_6, q_data_7, [x_ptr], #32
+ prfm pldl1strm, [x_ptr]
+ prfm pldl1keep, [x_tbl1]
+ prfm pldl1keep, [x_tbl2]
+
+ /* data_0 */
+ and v_tmp1.16b, v_data_0.16b, v_mask0f.16b
+ ushr v_data_0.16b, v_data_0.16b, #4
+
+ tbl v_tmp1_lo.16b, {v_gft1_lo.16b}, v_tmp1.16b
+ tbl v_tmp1_hi.16b, {v_gft1_hi.16b}, v_data_0.16b
+ eor v_p1_0.16b, v_tmp1_lo.16b, v_p1_0.16b
+ eor v_p1_0.16b, v_p1_0.16b, v_tmp1_hi.16b
+
+ tbl v_tmp1_lo.16b, {v_gft2_lo.16b}, v_tmp1.16b
+ tbl v_tmp1_hi.16b, {v_gft2_hi.16b}, v_data_0.16b
+ eor v_p2_0.16b, v_tmp1_lo.16b, v_p2_0.16b
+ eor v_p2_0.16b, v_p2_0.16b, v_tmp1_hi.16b
+
+ /* data_1 */
+ and v_tmp1.16b, v_data_1.16b, v_mask0f.16b
+ ushr v_data_1.16b, v_data_1.16b, #4
+
+ tbl v_tmp1_lo.16b, {v_gft1_lo.16b}, v_tmp1.16b
+ tbl v_tmp1_hi.16b, {v_gft1_hi.16b}, v_data_1.16b
+ eor v_p1_1.16b, v_tmp1_lo.16b, v_p1_1.16b
+ eor v_p1_1.16b, v_p1_1.16b, v_tmp1_hi.16b
+
+ tbl v_tmp1_lo.16b, {v_gft2_lo.16b}, v_tmp1.16b
+ tbl v_tmp1_hi.16b, {v_gft2_hi.16b}, v_data_1.16b
+ eor v_p2_1.16b, v_tmp1_lo.16b, v_p2_1.16b
+ eor v_p2_1.16b, v_p2_1.16b, v_tmp1_hi.16b
+
+ /* data_2 */
+ and v_tmp1.16b, v_data_2.16b, v_mask0f.16b
+ ushr v_data_2.16b, v_data_2.16b, #4
+
+ tbl v_tmp1_lo.16b, {v_gft1_lo.16b}, v_tmp1.16b
+ tbl v_tmp1_hi.16b, {v_gft1_hi.16b}, v_data_2.16b
+ eor v_p1_2.16b, v_tmp1_lo.16b, v_p1_2.16b
+ eor v_p1_2.16b, v_p1_2.16b, v_tmp1_hi.16b
+
+ tbl v_tmp1_lo.16b, {v_gft2_lo.16b}, v_tmp1.16b
+ tbl v_tmp1_hi.16b, {v_gft2_hi.16b}, v_data_2.16b
+ eor v_p2_2.16b, v_tmp1_lo.16b, v_p2_2.16b
+ eor v_p2_2.16b, v_p2_2.16b, v_tmp1_hi.16b
+
+ /* data_3 */
+ and v_tmp1.16b, v_data_3.16b, v_mask0f.16b
+ ushr v_data_3.16b, v_data_3.16b, #4
+
+ tbl v_tmp1_lo.16b, {v_gft1_lo.16b}, v_tmp1.16b
+ tbl v_tmp1_hi.16b, {v_gft1_hi.16b}, v_data_3.16b
+ eor v_p1_3.16b, v_tmp1_lo.16b, v_p1_3.16b
+ eor v_p1_3.16b, v_p1_3.16b, v_tmp1_hi.16b
+
+ tbl v_tmp1_lo.16b, {v_gft2_lo.16b}, v_tmp1.16b
+ tbl v_tmp1_hi.16b, {v_gft2_hi.16b}, v_data_3.16b
+ eor v_p2_3.16b, v_tmp1_lo.16b, v_p2_3.16b
+ eor v_p2_3.16b, v_p2_3.16b, v_tmp1_hi.16b
+
+ /* data_4 */
+ and v_tmp1.16b, v_data_4.16b, v_mask0f.16b
+ ushr v_data_4.16b, v_data_4.16b, #4
+
+ tbl v_tmp1_lo.16b, {v_gft1_lo.16b}, v_tmp1.16b
+ tbl v_tmp1_hi.16b, {v_gft1_hi.16b}, v_data_4.16b
+ eor v_p1_4.16b, v_tmp1_lo.16b, v_p1_4.16b
+ eor v_p1_4.16b, v_p1_4.16b, v_tmp1_hi.16b
+
+ tbl v_tmp1_lo.16b, {v_gft2_lo.16b}, v_tmp1.16b
+ tbl v_tmp1_hi.16b, {v_gft2_hi.16b}, v_data_4.16b
+ eor v_p2_4.16b, v_tmp1_lo.16b, v_p2_4.16b
+ eor v_p2_4.16b, v_p2_4.16b, v_tmp1_hi.16b
+
+ /* data_5 */
+ and v_tmp1.16b, v_data_5.16b, v_mask0f.16b
+ ushr v_data_5.16b, v_data_5.16b, #4
+
+ tbl v_tmp1_lo.16b, {v_gft1_lo.16b}, v_tmp1.16b
+ tbl v_tmp1_hi.16b, {v_gft1_hi.16b}, v_data_5.16b
+ eor v_p1_5.16b, v_tmp1_lo.16b, v_p1_5.16b
+ eor v_p1_5.16b, v_p1_5.16b, v_tmp1_hi.16b
+
+ tbl v_tmp1_lo.16b, {v_gft2_lo.16b}, v_tmp1.16b
+ tbl v_tmp1_hi.16b, {v_gft2_hi.16b}, v_data_5.16b
+ eor v_p2_5.16b, v_tmp1_lo.16b, v_p2_5.16b
+ eor v_p2_5.16b, v_p2_5.16b, v_tmp1_hi.16b
+
+ /* data_6 */
+ and v_tmp1.16b, v_data_6.16b, v_mask0f.16b
+ ushr v_data_6.16b, v_data_6.16b, #4
+
+ tbl v_tmp1_lo.16b, {v_gft1_lo.16b}, v_tmp1.16b
+ tbl v_tmp1_hi.16b, {v_gft1_hi.16b}, v_data_6.16b
+ eor v_p1_6.16b, v_tmp1_lo.16b, v_p1_6.16b
+ eor v_p1_6.16b, v_p1_6.16b, v_tmp1_hi.16b
+
+ tbl v_tmp1_lo.16b, {v_gft2_lo.16b}, v_tmp1.16b
+ tbl v_tmp1_hi.16b, {v_gft2_hi.16b}, v_data_6.16b
+ eor v_p2_6.16b, v_tmp1_lo.16b, v_p2_6.16b
+ eor v_p2_6.16b, v_p2_6.16b, v_tmp1_hi.16b
+
+ /* data_7 */
+ and v_tmp1.16b, v_data_7.16b, v_mask0f.16b
+ ushr v_data_7.16b, v_data_7.16b, #4
+
+ tbl v_tmp1_lo.16b, {v_gft1_lo.16b}, v_tmp1.16b
+ tbl v_tmp1_hi.16b, {v_gft1_hi.16b}, v_data_7.16b
+ eor v_p1_7.16b, v_tmp1_lo.16b, v_p1_7.16b
+ eor v_p1_7.16b, v_p1_7.16b, v_tmp1_hi.16b
+
+ tbl v_tmp1_lo.16b, {v_gft2_lo.16b}, v_tmp1.16b
+ tbl v_tmp1_hi.16b, {v_gft2_hi.16b}, v_data_7.16b
+ eor v_p2_7.16b, v_tmp1_lo.16b, v_p2_7.16b
+ eor v_p2_7.16b, v_p2_7.16b, v_tmp1_hi.16b
+
+ cmp x_vec_i, x_vec
+ blt .Lloop128_vects
+
+.Lloop128_vects_end:
+ add x_ptr, x_dest1, x_pos
+ stp q_p1_0, q_p1_1, [x_ptr], #32
+ stp q_p1_2, q_p1_3, [x_ptr], #32
+ stp q_p1_4, q_p1_5, [x_ptr], #32
+ stp q_p1_6, q_p1_7, [x_ptr]
+
+ add x_ptr, x_dest2, x_pos
+ stp q_p2_0, q_p2_1, [x_ptr], #32
+ stp q_p2_2, q_p2_3, [x_ptr], #32
+ stp q_p2_4, q_p2_5, [x_ptr], #32
+ stp q_p2_6, q_p2_7, [x_ptr]
+
+ add x_pos, x_pos, #128
+ cmp x_pos, x_len
+ ble .Lloop128
+
+.Lloop128_end:
+ /* restore d8 ~ d15 */
+ ldp d8, d9, [sp]
+ ldp d10, d11, [sp, #16]
+ ldp d12, d13, [sp, #32]
+ ldp d14, d15, [sp, #48]
+ add sp, sp, #64
+
+ add x_len, x_len, #128
+ cmp x_pos, x_len
+ beq .return_pass
+
+.Lloop16_init:
+ sub x_len, x_len, #16
+ cmp x_pos, x_len
+ bgt .lessthan16_init
+
+.Lloop16:
+ movi v_p1.16b, #0
+ movi v_p2.16b, #0
+ mov x_tbl1, x_tbl
+ add x_tbl2, x_tbl, x_vec, lsl #2
+ mov x_vec_i, #0
+
+.Lloop16_vects:
+ ldr x_ptr, [x_src, x_vec_i]
+ ldr q_data, [x_ptr, x_pos]
+ add x_vec_i, x_vec_i, #8
+
+ ldp q_gft1_lo, q_gft1_hi, [x_tbl1], #32
+ ldp q_gft2_lo, q_gft2_hi, [x_tbl2], #32
+
+ and v_data_lo.16b, v_data.16b, v_mask0f.16b
+ ushr v_data_hi.16b, v_data.16b, #4
+
+ tbl v_tmp1_hi.16b, {v_gft1_hi.16b}, v_data_hi.16b
+ tbl v_tmp1_lo.16b, {v_gft1_lo.16b}, v_data_lo.16b
+ eor v_p1.16b, v_tmp1_lo.16b, v_p1.16b
+ eor v_p1.16b, v_p1.16b, v_tmp1_hi.16b
+
+ tbl v_tmp1_hi.16b, {v_gft2_hi.16b}, v_data_hi.16b
+ tbl v_tmp1_lo.16b, {v_gft2_lo.16b}, v_data_lo.16b
+ eor v_p2.16b, v_tmp1_lo.16b, v_p2.16b
+ eor v_p2.16b, v_p2.16b, v_tmp1_hi.16b
+
+ cmp x_vec_i, x_vec
+ bne .Lloop16_vects
+
+.Lloop16_vects_end:
+ str q_p1, [x_dest1, x_pos]
+ str q_p2, [x_dest2, x_pos]
+ add x_pos, x_pos, #16
+ cmp x_pos, x_len
+ ble .Lloop16
+
+.Lloop16_end:
+ sub x_tmp, x_pos, x_len
+ cmp x_tmp, #16
+ beq .return_pass
+
+.lessthan16_init:
+ mov x_pos, x_len
+ b .Lloop16
+
+.return_pass:
+ mov w_ret, #0
+ ret
+
+.return_fail:
+ mov w_ret, #1
+ ret
diff --git a/src/isa-l/erasure_code/aarch64/gf_2vect_mad_neon.S b/src/isa-l/erasure_code/aarch64/gf_2vect_mad_neon.S
new file mode 100644
index 000000000..b8a8cadc6
--- /dev/null
+++ b/src/isa-l/erasure_code/aarch64/gf_2vect_mad_neon.S
@@ -0,0 +1,402 @@
+/**************************************************************
+ Copyright (c) 2019 Huawei Technologies Co., Ltd.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Huawei Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+.text
+
+.global gf_2vect_mad_neon
+.type gf_2vect_mad_neon, %function
+
+
+/* arguments */
+x_len .req x0
+x_vec .req x1
+x_vec_i .req x2
+x_tbl .req x3
+x_src .req x4
+x_dest .req x5
+
+/* returns */
+w_ret .req w0
+
+/* local variables */
+x_src_end .req x6
+x_dest1 .req x7
+x_dest2 .req x8
+x_tmp .req x9
+x_tbl1 .req x10
+x_tbl2 .req x11
+x_const .req x12
+
+/* vectors */
+v_mask0f .req v0
+v_tmp_lo .req v1
+v_tmp_hi .req v2
+v_tmp .req v3
+q_tmp .req q3
+
+v_gft1_lo .req v4
+v_gft1_hi .req v5
+v_gft2_lo .req v6
+v_gft2_hi .req v7
+q_gft1_lo .req q4
+q_gft1_hi .req q5
+q_gft2_lo .req q6
+q_gft2_hi .req q7
+
+v_data_0 .req v8
+v_data_1 .req v9
+v_data_2 .req v10
+v_data_3 .req v11
+v_data_4 .req v12
+v_data_5 .req v13
+v_data_6 .req v14
+v_data_7 .req v15
+q_data_0 .req q8
+q_data_1 .req q9
+q_data_2 .req q10
+q_data_3 .req q11
+q_data_4 .req q12
+q_data_5 .req q13
+q_data_6 .req q14
+q_data_7 .req q15
+
+v_data_0_lo .req v16
+v_data_1_lo .req v17
+v_data_2_lo .req v18
+v_data_3_lo .req v19
+v_data_4_lo .req v20
+v_data_5_lo .req v21
+v_data_6_lo .req v22
+v_data_7_lo .req v23
+v_data_0_hi .req v_data_0
+v_data_1_hi .req v_data_1
+v_data_2_hi .req v_data_2
+v_data_3_hi .req v_data_3
+v_data_4_hi .req v_data_4
+v_data_5_hi .req v_data_5
+v_data_6_hi .req v_data_6
+v_data_7_hi .req v_data_7
+
+v_d0 .req v24
+v_d1 .req v25
+v_d2 .req v26
+v_d3 .req v27
+v_d4 .req v28
+v_d5 .req v29
+v_d6 .req v30
+v_d7 .req v31
+q_d0 .req q24
+q_d1 .req q25
+q_d2 .req q26
+q_d3 .req q27
+q_d4 .req q28
+q_d5 .req q29
+q_d6 .req q30
+q_d7 .req q31
+
+v_data .req v16
+q_data .req q16
+v_data_lo .req v17
+v_data_hi .req v18
+
+
+gf_2vect_mad_neon:
+ /* less than 16 bytes, return_fail */
+ cmp x_len, #16
+ blt .return_fail
+
+ movi v_mask0f.16b, #0x0f
+ lsl x_vec_i, x_vec_i, #5
+ lsl x_vec, x_vec, #5
+ add x_tbl1, x_tbl, x_vec_i
+ add x_tbl2, x_tbl1, x_vec
+ add x_src_end, x_src, x_len
+
+ ldr x_dest1, [x_dest]
+ ldr x_dest2, [x_dest, #8]
+ ldr q_gft1_lo, [x_tbl1]
+ ldr q_gft1_hi, [x_tbl1, #16]
+ ldr q_gft2_lo, [x_tbl2]
+ ldr q_gft2_hi, [x_tbl2, #16]
+
+.Lloop128_init:
+ /* less than 128 bytes, goto Lloop16_init */
+ cmp x_len, #128
+ blt .Lloop16_init
+
+ /* save d8 ~ d15 to stack */
+ sub sp, sp, #64
+ stp d8, d9, [sp]
+ stp d10, d11, [sp, #16]
+ stp d12, d13, [sp, #32]
+ stp d14, d15, [sp, #48]
+
+ sub x_src_end, x_src_end, #128
+
+.Lloop128:
+ ldr q_data_0, [x_src, #16*0]
+ ldr q_data_1, [x_src, #16*1]
+ ldr q_data_2, [x_src, #16*2]
+ ldr q_data_3, [x_src, #16*3]
+ ldr q_data_4, [x_src, #16*4]
+ ldr q_data_5, [x_src, #16*5]
+ ldr q_data_6, [x_src, #16*6]
+ ldr q_data_7, [x_src, #16*7]
+
+ ldr q_d0, [x_dest1, #16*0]
+ ldr q_d1, [x_dest1, #16*1]
+ ldr q_d2, [x_dest1, #16*2]
+ ldr q_d3, [x_dest1, #16*3]
+ ldr q_d4, [x_dest1, #16*4]
+ ldr q_d5, [x_dest1, #16*5]
+ ldr q_d6, [x_dest1, #16*6]
+ ldr q_d7, [x_dest1, #16*7]
+
+ and v_data_0_lo.16b, v_data_0.16b, v_mask0f.16b
+ and v_data_1_lo.16b, v_data_1.16b, v_mask0f.16b
+ and v_data_2_lo.16b, v_data_2.16b, v_mask0f.16b
+ and v_data_3_lo.16b, v_data_3.16b, v_mask0f.16b
+ and v_data_4_lo.16b, v_data_4.16b, v_mask0f.16b
+ and v_data_5_lo.16b, v_data_5.16b, v_mask0f.16b
+ and v_data_6_lo.16b, v_data_6.16b, v_mask0f.16b
+ and v_data_7_lo.16b, v_data_7.16b, v_mask0f.16b
+
+ ushr v_data_0_hi.16b, v_data_0.16b, #4
+ ushr v_data_1_hi.16b, v_data_1.16b, #4
+ ushr v_data_2_hi.16b, v_data_2.16b, #4
+ ushr v_data_3_hi.16b, v_data_3.16b, #4
+ ushr v_data_4_hi.16b, v_data_4.16b, #4
+ ushr v_data_5_hi.16b, v_data_5.16b, #4
+ ushr v_data_6_hi.16b, v_data_6.16b, #4
+ ushr v_data_7_hi.16b, v_data_7.16b, #4
+
+ tbl v_tmp_lo.16b, {v_gft1_lo.16b}, v_data_0_lo.16b
+ tbl v_tmp_hi.16b, {v_gft1_hi.16b}, v_data_0_hi.16b
+ eor v_d0.16b, v_tmp_lo.16b, v_d0.16b
+ eor v_d0.16b, v_d0.16b, v_tmp_hi.16b
+
+ tbl v_tmp_lo.16b, {v_gft1_lo.16b}, v_data_1_lo.16b
+ tbl v_tmp_hi.16b, {v_gft1_hi.16b}, v_data_1_hi.16b
+ eor v_d1.16b, v_tmp_lo.16b, v_d1.16b
+ eor v_d1.16b, v_d1.16b, v_tmp_hi.16b
+
+ tbl v_tmp_lo.16b, {v_gft1_lo.16b}, v_data_2_lo.16b
+ tbl v_tmp_hi.16b, {v_gft1_hi.16b}, v_data_2_hi.16b
+ eor v_d2.16b, v_tmp_lo.16b, v_d2.16b
+ eor v_d2.16b, v_d2.16b, v_tmp_hi.16b
+
+ tbl v_tmp_lo.16b, {v_gft1_lo.16b}, v_data_3_lo.16b
+ tbl v_tmp_hi.16b, {v_gft1_hi.16b}, v_data_3_hi.16b
+ eor v_d3.16b, v_tmp_lo.16b, v_d3.16b
+ eor v_d3.16b, v_d3.16b, v_tmp_hi.16b
+
+ tbl v_tmp_lo.16b, {v_gft1_lo.16b}, v_data_4_lo.16b
+ tbl v_tmp_hi.16b, {v_gft1_hi.16b}, v_data_4_hi.16b
+ eor v_d4.16b, v_tmp_lo.16b, v_d4.16b
+ eor v_d4.16b, v_d4.16b, v_tmp_hi.16b
+
+ tbl v_tmp_lo.16b, {v_gft1_lo.16b}, v_data_5_lo.16b
+ tbl v_tmp_hi.16b, {v_gft1_hi.16b}, v_data_5_hi.16b
+ eor v_d5.16b, v_tmp_lo.16b, v_d5.16b
+ eor v_d5.16b, v_d5.16b, v_tmp_hi.16b
+
+ tbl v_tmp_lo.16b, {v_gft1_lo.16b}, v_data_6_lo.16b
+ tbl v_tmp_hi.16b, {v_gft1_hi.16b}, v_data_6_hi.16b
+ eor v_d6.16b, v_tmp_lo.16b, v_d6.16b
+ eor v_d6.16b, v_d6.16b, v_tmp_hi.16b
+
+ tbl v_tmp_lo.16b, {v_gft1_lo.16b}, v_data_7_lo.16b
+ tbl v_tmp_hi.16b, {v_gft1_hi.16b}, v_data_7_hi.16b
+ eor v_d7.16b, v_tmp_lo.16b, v_d7.16b
+ eor v_d7.16b, v_d7.16b, v_tmp_hi.16b
+
+ str q_d0, [x_dest1, #16*0]
+ str q_d1, [x_dest1, #16*1]
+ str q_d2, [x_dest1, #16*2]
+ str q_d3, [x_dest1, #16*3]
+ str q_d4, [x_dest1, #16*4]
+ str q_d5, [x_dest1, #16*5]
+ str q_d6, [x_dest1, #16*6]
+ str q_d7, [x_dest1, #16*7]
+
+ ldr q_d0, [x_dest2, #16*0]
+ ldr q_d1, [x_dest2, #16*1]
+ ldr q_d2, [x_dest2, #16*2]
+ ldr q_d3, [x_dest2, #16*3]
+ ldr q_d4, [x_dest2, #16*4]
+ ldr q_d5, [x_dest2, #16*5]
+ ldr q_d6, [x_dest2, #16*6]
+ ldr q_d7, [x_dest2, #16*7]
+
+ tbl v_tmp_lo.16b, {v_gft2_lo.16b}, v_data_0_lo.16b
+ tbl v_tmp_hi.16b, {v_gft2_hi.16b}, v_data_0_hi.16b
+ eor v_d0.16b, v_tmp_lo.16b, v_d0.16b
+ eor v_d0.16b, v_d0.16b, v_tmp_hi.16b
+
+ tbl v_tmp_lo.16b, {v_gft2_lo.16b}, v_data_1_lo.16b
+ tbl v_tmp_hi.16b, {v_gft2_hi.16b}, v_data_1_hi.16b
+ eor v_d1.16b, v_tmp_lo.16b, v_d1.16b
+ eor v_d1.16b, v_d1.16b, v_tmp_hi.16b
+
+ tbl v_tmp_lo.16b, {v_gft2_lo.16b}, v_data_2_lo.16b
+ tbl v_tmp_hi.16b, {v_gft2_hi.16b}, v_data_2_hi.16b
+ eor v_d2.16b, v_tmp_lo.16b, v_d2.16b
+ eor v_d2.16b, v_d2.16b, v_tmp_hi.16b
+
+ tbl v_tmp_lo.16b, {v_gft2_lo.16b}, v_data_3_lo.16b
+ tbl v_tmp_hi.16b, {v_gft2_hi.16b}, v_data_3_hi.16b
+ eor v_d3.16b, v_tmp_lo.16b, v_d3.16b
+ eor v_d3.16b, v_d3.16b, v_tmp_hi.16b
+
+ tbl v_tmp_lo.16b, {v_gft2_lo.16b}, v_data_4_lo.16b
+ tbl v_tmp_hi.16b, {v_gft2_hi.16b}, v_data_4_hi.16b
+ eor v_d4.16b, v_tmp_lo.16b, v_d4.16b
+ eor v_d4.16b, v_d4.16b, v_tmp_hi.16b
+
+ tbl v_tmp_lo.16b, {v_gft2_lo.16b}, v_data_5_lo.16b
+ tbl v_tmp_hi.16b, {v_gft2_hi.16b}, v_data_5_hi.16b
+ eor v_d5.16b, v_tmp_lo.16b, v_d5.16b
+ eor v_d5.16b, v_d5.16b, v_tmp_hi.16b
+
+ tbl v_tmp_lo.16b, {v_gft2_lo.16b}, v_data_6_lo.16b
+ tbl v_tmp_hi.16b, {v_gft2_hi.16b}, v_data_6_hi.16b
+ eor v_d6.16b, v_tmp_lo.16b, v_d6.16b
+ eor v_d6.16b, v_d6.16b, v_tmp_hi.16b
+
+ tbl v_tmp_lo.16b, {v_gft2_lo.16b}, v_data_7_lo.16b
+ tbl v_tmp_hi.16b, {v_gft2_hi.16b}, v_data_7_hi.16b
+ eor v_d7.16b, v_tmp_lo.16b, v_d7.16b
+ eor v_d7.16b, v_d7.16b, v_tmp_hi.16b
+
+ str q_d0, [x_dest2, #16*0]
+ str q_d1, [x_dest2, #16*1]
+ str q_d2, [x_dest2, #16*2]
+ str q_d3, [x_dest2, #16*3]
+ str q_d4, [x_dest2, #16*4]
+ str q_d5, [x_dest2, #16*5]
+ str q_d6, [x_dest2, #16*6]
+ str q_d7, [x_dest2, #16*7]
+
+ add x_src, x_src, #128
+ add x_dest1, x_dest1, #128
+ add x_dest2, x_dest2, #128
+ cmp x_src, x_src_end
+ bls .Lloop128
+
+.Lloop128_end:
+ /* restore d8 ~ d15 */
+ ldp d8, d9, [sp]
+ ldp d10, d11, [sp, #16]
+ ldp d12, d13, [sp, #32]
+ ldp d14, d15, [sp, #48]
+ add sp, sp, #64
+ add x_src_end, x_src_end, #128
+
+.Lloop16_init:
+ sub x_src_end, x_src_end, #16
+ cmp x_src, x_src_end
+ bhi .lessthan16_init
+
+.Lloop16:
+ ldr q_data, [x_src]
+
+ ldr q_d0, [x_dest1]
+ ldr q_d1, [x_dest2]
+
+ and v_data_lo.16b, v_data.16b, v_mask0f.16b
+ ushr v_data_hi.16b, v_data.16b, #4
+
+ tbl v_tmp_lo.16b, {v_gft1_lo.16b}, v_data_lo.16b
+ tbl v_tmp_hi.16b, {v_gft1_hi.16b}, v_data_hi.16b
+ eor v_d0.16b, v_tmp_lo.16b, v_d0.16b
+ eor v_d0.16b, v_d0.16b, v_tmp_hi.16b
+
+ tbl v_tmp_lo.16b, {v_gft2_lo.16b}, v_data_lo.16b
+ tbl v_tmp_hi.16b, {v_gft2_hi.16b}, v_data_hi.16b
+ eor v_d1.16b, v_tmp_lo.16b, v_d1.16b
+ eor v_d1.16b, v_d1.16b, v_tmp_hi.16b
+
+ str q_d0, [x_dest1]
+ str q_d1, [x_dest2]
+
+ add x_dest1, x_dest1, #16
+ add x_dest2, x_dest2, #16
+ add x_src, x_src, #16
+ cmp x_src, x_src_end
+ bls .Lloop16
+
+.lessthan16_init:
+ sub x_tmp, x_src, x_src_end
+ cmp x_tmp, #16
+ beq .return_pass
+
+.lessthan16:
+ mov x_src, x_src_end
+ sub x_dest1, x_dest1, x_tmp
+ sub x_dest2, x_dest2, x_tmp
+
+ adrp x_const, const_tbl
+ add x_const, x_const, :lo12:const_tbl
+ sub x_const, x_const, x_tmp
+ ldr q_tmp, [x_const, #16]
+
+ ldr q_data, [x_src]
+ ldr q_d0, [x_dest1]
+ ldr q_d1, [x_dest2]
+
+ and v_data_lo.16b, v_data.16b, v_mask0f.16b
+ ushr v_data_hi.16b, v_data.16b, #4
+
+ tbl v_tmp_lo.16b, {v_gft1_lo.16b}, v_data_lo.16b
+ tbl v_tmp_hi.16b, {v_gft1_hi.16b}, v_data_hi.16b
+ eor v_tmp_hi.16b, v_tmp_lo.16b, v_tmp_hi.16b
+ and v_tmp_hi.16b, v_tmp_hi.16b, v_tmp.16b
+ eor v_d0.16b, v_d0.16b, v_tmp_hi.16b
+
+ tbl v_tmp_lo.16b, {v_gft2_lo.16b}, v_data_lo.16b
+ tbl v_tmp_hi.16b, {v_gft2_hi.16b}, v_data_hi.16b
+ eor v_tmp_hi.16b, v_tmp_lo.16b, v_tmp_hi.16b
+ and v_tmp_hi.16b, v_tmp_hi.16b, v_tmp.16b
+ eor v_d1.16b, v_d1.16b, v_tmp_hi.16b
+
+ str q_d0, [x_dest1]
+ str q_d1, [x_dest2]
+
+.return_pass:
+ mov w_ret, #0
+ ret
+
+.return_fail:
+ mov w_ret, #1
+ ret
+
+.section .rodata
+.balign 8
+const_tbl:
+ .dword 0x0000000000000000, 0x0000000000000000
+ .dword 0xffffffffffffffff, 0xffffffffffffffff
diff --git a/src/isa-l/erasure_code/aarch64/gf_3vect_dot_prod_neon.S b/src/isa-l/erasure_code/aarch64/gf_3vect_dot_prod_neon.S
new file mode 100644
index 000000000..becca90e2
--- /dev/null
+++ b/src/isa-l/erasure_code/aarch64/gf_3vect_dot_prod_neon.S
@@ -0,0 +1,358 @@
+/**************************************************************
+ Copyright (c) 2019 Huawei Technologies Co., Ltd.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Huawei Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+.text
+
+.global gf_3vect_dot_prod_neon
+.type gf_3vect_dot_prod_neon, %function
+
+
+/* arguments */
+x_len .req x0
+x_vec .req x1
+x_tbl .req x2
+x_src .req x3
+x_dest .req x4
+
+/* returns */
+w_ret .req w0
+
+/* local variables */
+x_vec_i .req x5
+x_ptr .req x6
+x_pos .req x7
+x_tmp .req x8
+x_dest1 .req x9
+x_tbl1 .req x10
+x_dest2 .req x11
+x_tbl2 .req x12
+x_dest3 .req x13
+x_tbl3 .req x14
+
+/* vectors */
+v_gft1_lo .req v0
+v_gft1_hi .req v1
+v_gft2_lo .req v2
+v_gft2_hi .req v3
+v_gft3_lo .req v4
+v_gft3_hi .req v5
+q_gft1_lo .req q0
+q_gft1_hi .req q1
+q_gft2_lo .req q2
+q_gft2_hi .req q3
+q_gft3_lo .req q4
+q_gft3_hi .req q5
+
+v_mask0f .req v6
+q_mask0f .req q6
+v_tmp1 .req v7
+
+v_data_0 .req v8
+v_data_1 .req v9
+v_data_2 .req v10
+v_data_3 .req v11
+q_data_0 .req q8
+q_data_1 .req q9
+q_data_2 .req q10
+q_data_3 .req q11
+
+v_tmp1_lo .req v12
+v_tmp1_hi .req v13
+
+v_p1_0 .req v20
+v_p1_1 .req v21
+v_p1_2 .req v22
+v_p1_3 .req v23
+v_p2_0 .req v24
+v_p2_1 .req v25
+v_p2_2 .req v26
+v_p2_3 .req v27
+v_p3_0 .req v28
+v_p3_1 .req v29
+v_p3_2 .req v30
+v_p3_3 .req v31
+
+q_p1_0 .req q20
+q_p1_1 .req q21
+q_p1_2 .req q22
+q_p1_3 .req q23
+q_p2_0 .req q24
+q_p2_1 .req q25
+q_p2_2 .req q26
+q_p2_3 .req q27
+q_p3_0 .req q28
+q_p3_1 .req q29
+q_p3_2 .req q30
+q_p3_3 .req q31
+
+v_data .req v_p1_1
+q_data .req q_p1_1
+v_data_lo .req v_p1_2
+v_data_hi .req v_p1_3
+
+
+gf_3vect_dot_prod_neon:
+ /* less than 16 bytes, return_fail */
+ cmp x_len, #16
+ blt .return_fail
+
+ movi v_mask0f.16b, #0x0f
+ mov x_pos, #0
+ lsl x_vec, x_vec, #3
+ ldr x_dest1, [x_dest, #8*0]
+ ldr x_dest2, [x_dest, #8*1]
+ ldr x_dest3, [x_dest, #8*2]
+
+.Lloop64_init:
+ /* less than 64 bytes, goto Lloop16_init */
+ cmp x_len, #64
+ blt .Lloop16_init
+
+ /* save d8 ~ d15 to stack */
+ sub sp, sp, #64
+ stp d8, d9, [sp]
+ stp d10, d11, [sp, #16]
+ stp d12, d13, [sp, #32]
+ stp d14, d15, [sp, #48]
+
+ sub x_len, x_len, #64
+
+.Lloop64:
+ movi v_p1_0.16b, #0
+ movi v_p1_1.16b, #0
+ movi v_p1_2.16b, #0
+ movi v_p1_3.16b, #0
+ movi v_p2_0.16b, #0
+ movi v_p2_1.16b, #0
+ movi v_p2_2.16b, #0
+ movi v_p2_3.16b, #0
+ movi v_p3_0.16b, #0
+ movi v_p3_1.16b, #0
+ movi v_p3_2.16b, #0
+ movi v_p3_3.16b, #0
+
+ mov x_tbl1, x_tbl
+ add x_tbl2, x_tbl1, x_vec, lsl #2
+ add x_tbl3, x_tbl2, x_vec, lsl #2
+ mov x_vec_i, #0
+
+.Lloop64_vects:
+ ldr x_ptr, [x_src, x_vec_i]
+ add x_vec_i, x_vec_i, #8
+ add x_ptr, x_ptr, x_pos
+
+ ldr q_data_0, [x_ptr], #16
+ ldr q_data_1, [x_ptr], #16
+
+ ldp q_gft1_lo, q_gft1_hi, [x_tbl1], #32
+ ldp q_gft2_lo, q_gft2_hi, [x_tbl2], #32
+ ldp q_gft3_lo, q_gft3_hi, [x_tbl3], #32
+
+ ldr q_data_2, [x_ptr], #16
+ ldr q_data_3, [x_ptr], #16
+ prfm pldl1strm, [x_ptr]
+ prfm pldl1keep, [x_tbl1]
+ prfm pldl1keep, [x_tbl2]
+ prfm pldl1keep, [x_tbl3]
+
+ /* data_0 */
+ and v_tmp1.16b, v_data_0.16b, v_mask0f.16b
+ ushr v_data_0.16b, v_data_0.16b, #4
+
+ tbl v_tmp1_lo.16b, {v_gft1_lo.16b}, v_tmp1.16b
+ tbl v_tmp1_hi.16b, {v_gft1_hi.16b}, v_data_0.16b
+ eor v_p1_0.16b, v_tmp1_lo.16b, v_p1_0.16b
+ eor v_p1_0.16b, v_p1_0.16b, v_tmp1_hi.16b
+
+ tbl v_tmp1_lo.16b, {v_gft2_lo.16b}, v_tmp1.16b
+ tbl v_tmp1_hi.16b, {v_gft2_hi.16b}, v_data_0.16b
+ eor v_p2_0.16b, v_tmp1_lo.16b, v_p2_0.16b
+ eor v_p2_0.16b, v_p2_0.16b, v_tmp1_hi.16b
+
+ tbl v_tmp1_lo.16b, {v_gft3_lo.16b}, v_tmp1.16b
+ tbl v_tmp1_hi.16b, {v_gft3_hi.16b}, v_data_0.16b
+ eor v_p3_0.16b, v_tmp1_lo.16b, v_p3_0.16b
+ eor v_p3_0.16b, v_p3_0.16b, v_tmp1_hi.16b
+
+ /* data_1 */
+ and v_tmp1.16b, v_data_1.16b, v_mask0f.16b
+ ushr v_data_1.16b, v_data_1.16b, #4
+
+ tbl v_tmp1_lo.16b, {v_gft1_lo.16b}, v_tmp1.16b
+ tbl v_tmp1_hi.16b, {v_gft1_hi.16b}, v_data_1.16b
+ eor v_p1_1.16b, v_tmp1_lo.16b, v_p1_1.16b
+ eor v_p1_1.16b, v_p1_1.16b, v_tmp1_hi.16b
+
+ tbl v_tmp1_lo.16b, {v_gft2_lo.16b}, v_tmp1.16b
+ tbl v_tmp1_hi.16b, {v_gft2_hi.16b}, v_data_1.16b
+ eor v_p2_1.16b, v_tmp1_lo.16b, v_p2_1.16b
+ eor v_p2_1.16b, v_p2_1.16b, v_tmp1_hi.16b
+
+ tbl v_tmp1_lo.16b, {v_gft3_lo.16b}, v_tmp1.16b
+ tbl v_tmp1_hi.16b, {v_gft3_hi.16b}, v_data_1.16b
+ eor v_p3_1.16b, v_tmp1_lo.16b, v_p3_1.16b
+ eor v_p3_1.16b, v_p3_1.16b, v_tmp1_hi.16b
+
+ /* data_2 */
+ and v_tmp1.16b, v_data_2.16b, v_mask0f.16b
+ ushr v_data_2.16b, v_data_2.16b, #4
+
+ tbl v_tmp1_lo.16b, {v_gft1_lo.16b}, v_tmp1.16b
+ tbl v_tmp1_hi.16b, {v_gft1_hi.16b}, v_data_2.16b
+ eor v_p1_2.16b, v_tmp1_lo.16b, v_p1_2.16b
+ eor v_p1_2.16b, v_p1_2.16b, v_tmp1_hi.16b
+
+ tbl v_tmp1_lo.16b, {v_gft2_lo.16b}, v_tmp1.16b
+ tbl v_tmp1_hi.16b, {v_gft2_hi.16b}, v_data_2.16b
+ eor v_p2_2.16b, v_tmp1_lo.16b, v_p2_2.16b
+ eor v_p2_2.16b, v_p2_2.16b, v_tmp1_hi.16b
+
+ tbl v_tmp1_lo.16b, {v_gft3_lo.16b}, v_tmp1.16b
+ tbl v_tmp1_hi.16b, {v_gft3_hi.16b}, v_data_2.16b
+ eor v_p3_2.16b, v_tmp1_lo.16b, v_p3_2.16b
+ eor v_p3_2.16b, v_p3_2.16b, v_tmp1_hi.16b
+
+ /* data_3 */
+ and v_tmp1.16b, v_data_3.16b, v_mask0f.16b
+ ushr v_data_3.16b, v_data_3.16b, #4
+
+ tbl v_tmp1_lo.16b, {v_gft1_lo.16b}, v_tmp1.16b
+ tbl v_tmp1_hi.16b, {v_gft1_hi.16b}, v_data_3.16b
+ eor v_p1_3.16b, v_tmp1_lo.16b, v_p1_3.16b
+ eor v_p1_3.16b, v_p1_3.16b, v_tmp1_hi.16b
+
+ tbl v_tmp1_lo.16b, {v_gft2_lo.16b}, v_tmp1.16b
+ tbl v_tmp1_hi.16b, {v_gft2_hi.16b}, v_data_3.16b
+ eor v_p2_3.16b, v_tmp1_lo.16b, v_p2_3.16b
+ eor v_p2_3.16b, v_p2_3.16b, v_tmp1_hi.16b
+
+ tbl v_tmp1_lo.16b, {v_gft3_lo.16b}, v_tmp1.16b
+ tbl v_tmp1_hi.16b, {v_gft3_hi.16b}, v_data_3.16b
+ eor v_p3_3.16b, v_tmp1_lo.16b, v_p3_3.16b
+ eor v_p3_3.16b, v_p3_3.16b, v_tmp1_hi.16b
+
+ cmp x_vec_i, x_vec
+ blt .Lloop64_vects
+
+.Lloop64_vects_end:
+ add x_ptr, x_dest1, x_pos
+ stp q_p1_0, q_p1_1, [x_ptr], #32
+ stp q_p1_2, q_p1_3, [x_ptr]
+
+ add x_ptr, x_dest2, x_pos
+ stp q_p2_0, q_p2_1, [x_ptr], #32
+ stp q_p2_2, q_p2_3, [x_ptr]
+
+ add x_ptr, x_dest3, x_pos
+ stp q_p3_0, q_p3_1, [x_ptr], #32
+ stp q_p3_2, q_p3_3, [x_ptr]
+
+ add x_pos, x_pos, #64
+ cmp x_pos, x_len
+ ble .Lloop64
+
+.Lloop64_end:
+ /* restore d8 ~ d15 */
+ ldp d8, d9, [sp]
+ ldp d10, d11, [sp, #16]
+ ldp d12, d13, [sp, #32]
+ ldp d14, d15, [sp, #48]
+ add sp, sp, #64
+
+ add x_len, x_len, #64
+ cmp x_pos, x_len
+ beq .return_pass
+
+.Lloop16_init:
+ sub x_len, x_len, #16
+ cmp x_pos, x_len
+ bgt .lessthan16_init
+
+.Lloop16:
+ movi v_p1_0.16b, #0
+ movi v_p2_0.16b, #0
+ movi v_p3_0.16b, #0
+ mov x_tbl1, x_tbl
+ add x_tbl2, x_tbl1, x_vec, lsl #2
+ add x_tbl3, x_tbl2, x_vec, lsl #2
+ mov x_vec_i, #0
+
+.Lloop16_vects:
+ ldr x_ptr, [x_src, x_vec_i]
+ add x_vec_i, x_vec_i, #8
+ ldr q_data, [x_ptr, x_pos]
+
+ ldp q_gft1_lo, q_gft1_hi, [x_tbl1], #32
+ ldp q_gft2_lo, q_gft2_hi, [x_tbl2], #32
+ ldp q_gft3_lo, q_gft3_hi, [x_tbl3], #32
+
+ and v_data_lo.16b, v_data.16b, v_mask0f.16b
+ ushr v_data_hi.16b, v_data.16b, #4
+
+ tbl v_gft1_lo.16b, {v_gft1_lo.16b}, v_data_lo.16b
+ tbl v_gft1_hi.16b, {v_gft1_hi.16b}, v_data_hi.16b
+ tbl v_gft2_lo.16b, {v_gft2_lo.16b}, v_data_lo.16b
+ tbl v_gft2_hi.16b, {v_gft2_hi.16b}, v_data_hi.16b
+ tbl v_gft3_lo.16b, {v_gft3_lo.16b}, v_data_lo.16b
+ tbl v_gft3_hi.16b, {v_gft3_hi.16b}, v_data_hi.16b
+
+ eor v_p1_0.16b, v_gft1_hi.16b, v_p1_0.16b
+ eor v_p1_0.16b, v_p1_0.16b, v_gft1_lo.16b
+ eor v_p2_0.16b, v_gft2_hi.16b, v_p2_0.16b
+ eor v_p2_0.16b, v_p2_0.16b, v_gft2_lo.16b
+ eor v_p3_0.16b, v_gft3_hi.16b, v_p3_0.16b
+ eor v_p3_0.16b, v_p3_0.16b, v_gft3_lo.16b
+
+ cmp x_vec_i, x_vec
+ bne .Lloop16_vects
+
+.Lloop16_vects_end:
+ str q_p1_0, [x_dest1, x_pos]
+ str q_p2_0, [x_dest2, x_pos]
+ str q_p3_0, [x_dest3, x_pos]
+ add x_pos, x_pos, #16
+ cmp x_pos, x_len
+ ble .Lloop16
+
+.Lloop16_end:
+ sub x_tmp, x_pos, x_len
+ cmp x_tmp, #16
+ beq .return_pass
+
+.lessthan16_init:
+ mov x_pos, x_len
+ b .Lloop16
+
+.return_pass:
+ mov w_ret, #0
+ ret
+
+.return_fail:
+ mov w_ret, #1
+ ret
diff --git a/src/isa-l/erasure_code/aarch64/gf_3vect_mad_neon.S b/src/isa-l/erasure_code/aarch64/gf_3vect_mad_neon.S
new file mode 100644
index 000000000..d9a3cccc4
--- /dev/null
+++ b/src/isa-l/erasure_code/aarch64/gf_3vect_mad_neon.S
@@ -0,0 +1,382 @@
+/**************************************************************
+ Copyright (c) 2019 Huawei Technologies Co., Ltd.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Huawei Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+.text
+
+.global gf_3vect_mad_neon
+.type gf_3vect_mad_neon, %function
+
+
+/* arguments */
+x_len .req x0
+x_vec .req x1
+x_vec_i .req x2
+x_tbl .req x3
+x_src .req x4
+x_dest .req x5
+
+/* returns */
+w_ret .req w0
+
+/* local variables */
+x_src_end .req x6
+x_dest1 .req x7
+x_dest2 .req x8
+x_dest3 .req x_dest
+x_tmp .req x10
+x_tbl1 .req x11
+x_tbl2 .req x12
+x_tbl3 .req x13
+x_const .req x14
+
+/* vectors */
+v_mask0f .req v0
+v_tmp_lo .req v1
+v_tmp_hi .req v2
+v_tmp .req v3
+q_tmp .req q3
+
+v_gft1_lo .req v4
+v_gft1_hi .req v5
+v_gft2_lo .req v6
+v_gft2_hi .req v7
+v_gft3_lo .req v16
+v_gft3_hi .req v17
+q_gft1_lo .req q4
+q_gft1_hi .req q5
+q_gft2_lo .req q6
+q_gft2_hi .req q7
+q_gft3_lo .req q16
+q_gft3_hi .req q17
+
+v_data_0 .req v8
+v_data_1 .req v9
+v_data_2 .req v10
+v_data_3 .req v11
+q_data_0 .req q8
+q_data_1 .req q9
+q_data_2 .req q10
+q_data_3 .req q11
+
+v_data_0_lo .req v12
+v_data_1_lo .req v13
+v_data_2_lo .req v14
+v_data_3_lo .req v15
+v_data_0_hi .req v_data_0
+v_data_1_hi .req v_data_1
+v_data_2_hi .req v_data_2
+v_data_3_hi .req v_data_3
+
+v_d1_0 .req v20
+v_d1_1 .req v21
+v_d1_2 .req v22
+v_d1_3 .req v23
+v_d2_0 .req v24
+v_d2_1 .req v25
+v_d2_2 .req v26
+v_d2_3 .req v27
+v_d3_0 .req v28
+v_d3_1 .req v29
+v_d3_2 .req v30
+v_d3_3 .req v31
+q_d1_0 .req q20
+q_d1_1 .req q21
+q_d1_2 .req q22
+q_d1_3 .req q23
+q_d2_0 .req q24
+q_d2_1 .req q25
+q_d2_2 .req q26
+q_d2_3 .req q27
+q_d3_0 .req q28
+q_d3_1 .req q29
+q_d3_2 .req q30
+q_d3_3 .req q31
+
+v_data .req v21
+q_data .req q21
+v_data_lo .req v22
+v_data_hi .req v23
+
+gf_3vect_mad_neon:
+ /* less than 16 bytes, return_fail */
+ cmp x_len, #16
+ blt .return_fail
+
+ movi v_mask0f.16b, #0x0f
+ lsl x_vec_i, x_vec_i, #5
+ lsl x_vec, x_vec, #5
+ add x_tbl1, x_tbl, x_vec_i
+ add x_tbl2, x_tbl1, x_vec
+ add x_tbl3, x_tbl2, x_vec
+ add x_src_end, x_src, x_len
+ ldr x_dest1, [x_dest]
+ ldr x_dest2, [x_dest, #8]
+ ldr x_dest3, [x_dest, #16]
+ ldr q_gft1_lo, [x_tbl1]
+ ldr q_gft1_hi, [x_tbl1, #16]
+ ldr q_gft2_lo, [x_tbl2]
+ ldr q_gft2_hi, [x_tbl2, #16]
+ ldr q_gft3_lo, [x_tbl3]
+ ldr q_gft3_hi, [x_tbl3, #16]
+
+.Lloop64_init:
+ /* less than 64 bytes, goto Lloop16_init */
+ cmp x_len, #64
+ blt .Lloop16_init
+
+ /* save d8 ~ d15 to stack */
+ sub sp, sp, #64
+ stp d8, d9, [sp]
+ stp d10, d11, [sp, #16]
+ stp d12, d13, [sp, #32]
+ stp d14, d15, [sp, #48]
+
+ sub x_src_end, x_src_end, #64
+
+.Lloop64:
+ ldr q_data_0, [x_src, #16*0]
+ ldr q_data_1, [x_src, #16*1]
+ ldr q_data_2, [x_src, #16*2]
+ ldr q_data_3, [x_src, #16*3]
+ add x_src, x_src, #64
+
+ ldr q_d1_0, [x_dest1, #16*0]
+ ldr q_d1_1, [x_dest1, #16*1]
+ ldr q_d1_2, [x_dest1, #16*2]
+ ldr q_d1_3, [x_dest1, #16*3]
+
+ ldr q_d2_0, [x_dest2, #16*0]
+ ldr q_d2_1, [x_dest2, #16*1]
+ ldr q_d2_2, [x_dest2, #16*2]
+ ldr q_d2_3, [x_dest2, #16*3]
+
+ ldr q_d3_0, [x_dest3, #16*0]
+ ldr q_d3_1, [x_dest3, #16*1]
+ ldr q_d3_2, [x_dest3, #16*2]
+ ldr q_d3_3, [x_dest3, #16*3]
+
+ and v_data_0_lo.16b, v_data_0.16b, v_mask0f.16b
+ and v_data_1_lo.16b, v_data_1.16b, v_mask0f.16b
+ and v_data_2_lo.16b, v_data_2.16b, v_mask0f.16b
+ and v_data_3_lo.16b, v_data_3.16b, v_mask0f.16b
+
+ ushr v_data_0_hi.16b, v_data_0.16b, #4
+ ushr v_data_1_hi.16b, v_data_1.16b, #4
+ ushr v_data_2_hi.16b, v_data_2.16b, #4
+ ushr v_data_3_hi.16b, v_data_3.16b, #4
+
+ tbl v_tmp_lo.16b, {v_gft1_lo.16b}, v_data_0_lo.16b
+ tbl v_tmp_hi.16b, {v_gft1_hi.16b}, v_data_0_hi.16b
+ eor v_d1_0.16b, v_tmp_lo.16b, v_d1_0.16b
+ eor v_d1_0.16b, v_d1_0.16b, v_tmp_hi.16b
+
+ tbl v_tmp_lo.16b, {v_gft1_lo.16b}, v_data_1_lo.16b
+ tbl v_tmp_hi.16b, {v_gft1_hi.16b}, v_data_1_hi.16b
+ eor v_d1_1.16b, v_tmp_lo.16b, v_d1_1.16b
+ eor v_d1_1.16b, v_d1_1.16b, v_tmp_hi.16b
+
+ tbl v_tmp_lo.16b, {v_gft1_lo.16b}, v_data_2_lo.16b
+ tbl v_tmp_hi.16b, {v_gft1_hi.16b}, v_data_2_hi.16b
+ eor v_d1_2.16b, v_tmp_lo.16b, v_d1_2.16b
+ eor v_d1_2.16b, v_d1_2.16b, v_tmp_hi.16b
+
+ tbl v_tmp_lo.16b, {v_gft1_lo.16b}, v_data_3_lo.16b
+ tbl v_tmp_hi.16b, {v_gft1_hi.16b}, v_data_3_hi.16b
+ eor v_d1_3.16b, v_tmp_lo.16b, v_d1_3.16b
+ eor v_d1_3.16b, v_d1_3.16b, v_tmp_hi.16b
+
+ tbl v_tmp_lo.16b, {v_gft2_lo.16b}, v_data_0_lo.16b
+ tbl v_tmp_hi.16b, {v_gft2_hi.16b}, v_data_0_hi.16b
+ eor v_d2_0.16b, v_tmp_lo.16b, v_d2_0.16b
+ eor v_d2_0.16b, v_d2_0.16b, v_tmp_hi.16b
+
+ tbl v_tmp_lo.16b, {v_gft2_lo.16b}, v_data_1_lo.16b
+ tbl v_tmp_hi.16b, {v_gft2_hi.16b}, v_data_1_hi.16b
+ eor v_d2_1.16b, v_tmp_lo.16b, v_d2_1.16b
+ eor v_d2_1.16b, v_d2_1.16b, v_tmp_hi.16b
+
+ tbl v_tmp_lo.16b, {v_gft2_lo.16b}, v_data_2_lo.16b
+ tbl v_tmp_hi.16b, {v_gft2_hi.16b}, v_data_2_hi.16b
+ eor v_d2_2.16b, v_tmp_lo.16b, v_d2_2.16b
+ eor v_d2_2.16b, v_d2_2.16b, v_tmp_hi.16b
+
+ tbl v_tmp_lo.16b, {v_gft2_lo.16b}, v_data_3_lo.16b
+ tbl v_tmp_hi.16b, {v_gft2_hi.16b}, v_data_3_hi.16b
+ eor v_d2_3.16b, v_tmp_lo.16b, v_d2_3.16b
+ eor v_d2_3.16b, v_d2_3.16b, v_tmp_hi.16b
+
+ tbl v_tmp_lo.16b, {v_gft3_lo.16b}, v_data_0_lo.16b
+ tbl v_tmp_hi.16b, {v_gft3_hi.16b}, v_data_0_hi.16b
+ eor v_d3_0.16b, v_tmp_lo.16b, v_d3_0.16b
+ eor v_d3_0.16b, v_d3_0.16b, v_tmp_hi.16b
+
+ tbl v_tmp_lo.16b, {v_gft3_lo.16b}, v_data_1_lo.16b
+ tbl v_tmp_hi.16b, {v_gft3_hi.16b}, v_data_1_hi.16b
+ eor v_d3_1.16b, v_tmp_lo.16b, v_d3_1.16b
+ eor v_d3_1.16b, v_d3_1.16b, v_tmp_hi.16b
+
+ tbl v_tmp_lo.16b, {v_gft3_lo.16b}, v_data_2_lo.16b
+ tbl v_tmp_hi.16b, {v_gft3_hi.16b}, v_data_2_hi.16b
+ eor v_d3_2.16b, v_tmp_lo.16b, v_d3_2.16b
+ eor v_d3_2.16b, v_d3_2.16b, v_tmp_hi.16b
+
+ tbl v_tmp_lo.16b, {v_gft3_lo.16b}, v_data_3_lo.16b
+ tbl v_tmp_hi.16b, {v_gft3_hi.16b}, v_data_3_hi.16b
+ eor v_d3_3.16b, v_tmp_lo.16b, v_d3_3.16b
+ eor v_d3_3.16b, v_d3_3.16b, v_tmp_hi.16b
+
+ str q_d1_0, [x_dest1, #16*0]
+ str q_d1_1, [x_dest1, #16*1]
+ str q_d1_2, [x_dest1, #16*2]
+ str q_d1_3, [x_dest1, #16*3]
+ add x_dest1, x_dest1, #64
+
+ str q_d2_0, [x_dest2, #16*0]
+ str q_d2_1, [x_dest2, #16*1]
+ str q_d2_2, [x_dest2, #16*2]
+ str q_d2_3, [x_dest2, #16*3]
+ add x_dest2, x_dest2, #64
+
+ str q_d3_0, [x_dest3, #16*0]
+ str q_d3_1, [x_dest3, #16*1]
+ str q_d3_2, [x_dest3, #16*2]
+ str q_d3_3, [x_dest3, #16*3]
+ add x_dest3, x_dest3, #64
+
+ cmp x_src, x_src_end
+ bls .Lloop64
+
+.Lloop64_end:
+ /* restore d8 ~ d15 */
+ ldp d8, d9, [sp]
+ ldp d10, d11, [sp, #16]
+ ldp d12, d13, [sp, #32]
+ ldp d14, d15, [sp, #48]
+ add sp, sp, #64
+ add x_src_end, x_src_end, #64
+
+.Lloop16_init:
+ sub x_src_end, x_src_end, #16
+ cmp x_src, x_src_end
+ bhi .lessthan16_init
+
+.Lloop16:
+ ldr q_data, [x_src]
+
+ ldr q_d1_0, [x_dest1]
+ ldr q_d2_0, [x_dest2]
+ ldr q_d3_0, [x_dest3]
+
+ and v_data_lo.16b, v_data.16b, v_mask0f.16b
+ ushr v_data_hi.16b, v_data.16b, #4
+
+ tbl v_tmp_lo.16b, {v_gft1_lo.16b}, v_data_lo.16b
+ tbl v_tmp_hi.16b, {v_gft1_hi.16b}, v_data_hi.16b
+ eor v_d1_0.16b, v_tmp_lo.16b, v_d1_0.16b
+ eor v_d1_0.16b, v_d1_0.16b, v_tmp_hi.16b
+
+ tbl v_tmp_lo.16b, {v_gft2_lo.16b}, v_data_lo.16b
+ tbl v_tmp_hi.16b, {v_gft2_hi.16b}, v_data_hi.16b
+ eor v_d2_0.16b, v_tmp_lo.16b, v_d2_0.16b
+ eor v_d2_0.16b, v_d2_0.16b, v_tmp_hi.16b
+
+ tbl v_tmp_lo.16b, {v_gft3_lo.16b}, v_data_lo.16b
+ tbl v_tmp_hi.16b, {v_gft3_hi.16b}, v_data_hi.16b
+ eor v_d3_0.16b, v_tmp_lo.16b, v_d3_0.16b
+ eor v_d3_0.16b, v_d3_0.16b, v_tmp_hi.16b
+
+ str q_d1_0, [x_dest1]
+ str q_d2_0, [x_dest2]
+ str q_d3_0, [x_dest3]
+
+ add x_src, x_src, #16
+ add x_dest1, x_dest1, #16
+ add x_dest2, x_dest2, #16
+ add x_dest3, x_dest3, #16
+ cmp x_src, x_src_end
+ bls .Lloop16
+
+.lessthan16_init:
+ sub x_tmp, x_src, x_src_end
+ cmp x_tmp, #16
+ beq .return_pass
+
+.lessthan16:
+ mov x_src, x_src_end
+ sub x_dest1, x_dest1, x_tmp
+ sub x_dest2, x_dest2, x_tmp
+ sub x_dest3, x_dest3, x_tmp
+
+ adrp x_const, const_tbl
+ add x_const, x_const, :lo12:const_tbl
+ sub x_const, x_const, x_tmp
+ ldr q_tmp, [x_const, #16]
+
+ ldr q_data, [x_src]
+ ldr q_d1_0, [x_dest1]
+ ldr q_d2_0, [x_dest2]
+ ldr q_d3_0, [x_dest3]
+
+ and v_data_lo.16b, v_data.16b, v_mask0f.16b
+ ushr v_data_hi.16b, v_data.16b, #4
+
+ tbl v_tmp_lo.16b, {v_gft1_lo.16b}, v_data_lo.16b
+ tbl v_tmp_hi.16b, {v_gft1_hi.16b}, v_data_hi.16b
+ eor v_tmp_hi.16b, v_tmp_lo.16b, v_tmp_hi.16b
+ and v_tmp_hi.16b, v_tmp_hi.16b, v_tmp.16b
+ eor v_d1_0.16b, v_d1_0.16b, v_tmp_hi.16b
+
+ tbl v_tmp_lo.16b, {v_gft2_lo.16b}, v_data_lo.16b
+ tbl v_tmp_hi.16b, {v_gft2_hi.16b}, v_data_hi.16b
+ eor v_tmp_hi.16b, v_tmp_lo.16b, v_tmp_hi.16b
+ and v_tmp_hi.16b, v_tmp_hi.16b, v_tmp.16b
+ eor v_d2_0.16b, v_d2_0.16b, v_tmp_hi.16b
+
+ tbl v_tmp_lo.16b, {v_gft3_lo.16b}, v_data_lo.16b
+ tbl v_tmp_hi.16b, {v_gft3_hi.16b}, v_data_hi.16b
+ eor v_tmp_hi.16b, v_tmp_lo.16b, v_tmp_hi.16b
+ and v_tmp_hi.16b, v_tmp_hi.16b, v_tmp.16b
+ eor v_d3_0.16b, v_d3_0.16b, v_tmp_hi.16b
+
+ str q_d1_0, [x_dest1]
+ str q_d2_0, [x_dest2]
+ str q_d3_0, [x_dest3]
+
+.return_pass:
+ mov w_ret, #0
+ ret
+
+.return_fail:
+ mov w_ret, #1
+ ret
+
+.section .rodata
+.balign 8
+const_tbl:
+ .dword 0x0000000000000000, 0x0000000000000000
+ .dword 0xffffffffffffffff, 0xffffffffffffffff
diff --git a/src/isa-l/erasure_code/aarch64/gf_4vect_dot_prod_neon.S b/src/isa-l/erasure_code/aarch64/gf_4vect_dot_prod_neon.S
new file mode 100644
index 000000000..2cfe5aab0
--- /dev/null
+++ b/src/isa-l/erasure_code/aarch64/gf_4vect_dot_prod_neon.S
@@ -0,0 +1,421 @@
+/**************************************************************
+ Copyright (c) 2019 Huawei Technologies Co., Ltd.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Huawei Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+.text
+
+.global gf_4vect_dot_prod_neon
+.type gf_4vect_dot_prod_neon, %function
+
+
+/* arguments */
+x_len .req x0
+x_vec .req x1
+x_tbl .req x2
+x_src .req x3
+x_dest .req x4
+
+/* returns */
+w_ret .req w0
+
+/* local variables */
+x_vec_i .req x5
+x_ptr .req x6
+x_pos .req x7
+x_tmp .req x8
+x_dest1 .req x9
+x_tbl1 .req x10
+x_dest2 .req x11
+x_tbl2 .req x12
+x_dest3 .req x13
+x_tbl3 .req x14
+x_dest4 .req x_dest
+x_tbl4 .req x15
+
+/* vectors */
+v_mask0f .req v0
+q_mask0f .req q0
+v_tmp1_lo .req v1
+v_tmp1_hi .req v2
+v_tmp1 .req v3
+q_tmp1 .req q3
+
+v_p1_0 .req v4
+v_p2_0 .req v5
+v_p3_0 .req v6
+v_p4_0 .req v7
+
+q_p1_0 .req q4
+q_p2_0 .req q5
+q_p3_0 .req q6
+q_p4_0 .req q7
+
+v_data_0 .req v8
+v_data_1 .req v9
+v_data_2 .req v10
+v_data_3 .req v11
+q_data_0 .req q8
+q_data_1 .req q9
+q_data_2 .req q10
+q_data_3 .req q11
+
+v_p1_3 .req v12
+v_p2_3 .req v13
+v_p3_3 .req v14
+v_p4_3 .req v15
+q_p1_3 .req q12
+q_p2_3 .req q13
+q_p3_3 .req q14
+q_p4_3 .req q15
+
+v_gft1_lo .req v16
+v_gft1_hi .req v17
+v_gft2_lo .req v18
+v_gft2_hi .req v19
+v_gft3_lo .req v20
+v_gft3_hi .req v21
+v_gft4_lo .req v22
+v_gft4_hi .req v23
+q_gft1_lo .req q16
+q_gft1_hi .req q17
+q_gft2_lo .req q18
+q_gft2_hi .req q19
+q_gft3_lo .req q20
+q_gft3_hi .req q21
+q_gft4_lo .req q22
+q_gft4_hi .req q23
+
+v_p1_1 .req v24
+v_p1_2 .req v25
+v_p2_1 .req v26
+v_p2_2 .req v27
+v_p3_1 .req v28
+v_p3_2 .req v29
+v_p4_1 .req v30
+v_p4_2 .req v31
+
+q_p1_1 .req q24
+q_p1_2 .req q25
+q_p2_1 .req q26
+q_p2_2 .req q27
+q_p3_1 .req q28
+q_p3_2 .req q29
+q_p4_1 .req q30
+q_p4_2 .req q31
+
+v_data .req v_tmp1
+q_data .req q_tmp1
+v_data_lo .req v_tmp1_lo
+v_data_hi .req v_tmp1_hi
+
+gf_4vect_dot_prod_neon:
+ /* less than 16 bytes, return_fail */
+ cmp x_len, #16
+ blt .return_fail
+
+ movi v_mask0f.16b, #0x0f
+ mov x_pos, #0
+ lsl x_vec, x_vec, #3
+ ldr x_dest1, [x_dest, #8*0]
+ ldr x_dest2, [x_dest, #8*1]
+ ldr x_dest3, [x_dest, #8*2]
+ ldr x_dest4, [x_dest, #8*3]
+
+.Lloop64_init:
+ /* less than 64 bytes, goto Lloop16_init */
+ cmp x_len, #64
+ blt .Lloop16_init
+
+ /* save d8 ~ d15 to stack */
+ sub sp, sp, #64
+ stp d8, d9, [sp]
+ stp d10, d11, [sp, #16]
+ stp d12, d13, [sp, #32]
+ stp d14, d15, [sp, #48]
+
+ sub x_len, x_len, #64
+
+.Lloop64:
+ movi v_p1_0.16b, #0
+ movi v_p1_1.16b, #0
+ movi v_p1_2.16b, #0
+ movi v_p1_3.16b, #0
+ movi v_p2_0.16b, #0
+ movi v_p2_1.16b, #0
+ movi v_p2_2.16b, #0
+ movi v_p2_3.16b, #0
+ movi v_p3_0.16b, #0
+ movi v_p3_1.16b, #0
+ movi v_p3_2.16b, #0
+ movi v_p3_3.16b, #0
+ movi v_p4_0.16b, #0
+ movi v_p4_1.16b, #0
+ movi v_p4_2.16b, #0
+ movi v_p4_3.16b, #0
+
+ mov x_tbl1, x_tbl
+ add x_tbl2, x_tbl1, x_vec, lsl #2
+ add x_tbl3, x_tbl2, x_vec, lsl #2
+ add x_tbl4, x_tbl3, x_vec, lsl #2
+ mov x_vec_i, #0
+ prfm pldl1keep, [x_tbl1]
+ prfm pldl1keep, [x_tbl2]
+ prfm pldl1keep, [x_tbl3]
+ prfm pldl1keep, [x_tbl4]
+
+.Lloop64_vects:
+ ldr x_ptr, [x_src, x_vec_i]
+ add x_vec_i, x_vec_i, #8
+ add x_ptr, x_ptr, x_pos
+
+ ldr q_data_0, [x_ptr], #16
+ ldr q_data_1, [x_ptr], #16
+ ldp q_gft1_lo, q_gft1_hi, [x_tbl1], #32
+ ldp q_gft2_lo, q_gft2_hi, [x_tbl2], #32
+ ldp q_gft3_lo, q_gft3_hi, [x_tbl3], #32
+ ldp q_gft4_lo, q_gft4_hi, [x_tbl4], #32
+ ldr q_data_2, [x_ptr], #16
+ ldr q_data_3, [x_ptr], #16
+
+ prfm pldl1strm, [x_ptr]
+ prfm pldl1keep, [x_tbl1]
+ prfm pldl1keep, [x_tbl2]
+ prfm pldl1keep, [x_tbl3]
+ prfm pldl1keep, [x_tbl4]
+
+ /* data_0 */
+ and v_tmp1.16b, v_data_0.16b, v_mask0f.16b
+ ushr v_data_0.16b, v_data_0.16b, #4
+
+ tbl v_tmp1_lo.16b, {v_gft1_lo.16b}, v_tmp1.16b
+ tbl v_tmp1_hi.16b, {v_gft1_hi.16b}, v_data_0.16b
+ eor v_p1_0.16b, v_tmp1_lo.16b, v_p1_0.16b
+ eor v_p1_0.16b, v_p1_0.16b, v_tmp1_hi.16b
+
+ tbl v_tmp1_lo.16b, {v_gft2_lo.16b}, v_tmp1.16b
+ tbl v_tmp1_hi.16b, {v_gft2_hi.16b}, v_data_0.16b
+ eor v_p2_0.16b, v_tmp1_lo.16b, v_p2_0.16b
+ eor v_p2_0.16b, v_p2_0.16b, v_tmp1_hi.16b
+
+ tbl v_tmp1_lo.16b, {v_gft3_lo.16b}, v_tmp1.16b
+ tbl v_tmp1_hi.16b, {v_gft3_hi.16b}, v_data_0.16b
+ eor v_p3_0.16b, v_tmp1_lo.16b, v_p3_0.16b
+ eor v_p3_0.16b, v_p3_0.16b, v_tmp1_hi.16b
+
+ tbl v_tmp1_lo.16b, {v_gft4_lo.16b}, v_tmp1.16b
+ tbl v_tmp1_hi.16b, {v_gft4_hi.16b}, v_data_0.16b
+ eor v_p4_0.16b, v_tmp1_lo.16b, v_p4_0.16b
+ eor v_p4_0.16b, v_p4_0.16b, v_tmp1_hi.16b
+
+ /* data_1 */
+ and v_tmp1.16b, v_data_1.16b, v_mask0f.16b
+ ushr v_data_1.16b, v_data_1.16b, #4
+
+ tbl v_tmp1_lo.16b, {v_gft1_lo.16b}, v_tmp1.16b
+ tbl v_tmp1_hi.16b, {v_gft1_hi.16b}, v_data_1.16b
+ eor v_p1_1.16b, v_tmp1_lo.16b, v_p1_1.16b
+ eor v_p1_1.16b, v_p1_1.16b, v_tmp1_hi.16b
+
+ tbl v_tmp1_lo.16b, {v_gft2_lo.16b}, v_tmp1.16b
+ tbl v_tmp1_hi.16b, {v_gft2_hi.16b}, v_data_1.16b
+ eor v_p2_1.16b, v_tmp1_lo.16b, v_p2_1.16b
+ eor v_p2_1.16b, v_p2_1.16b, v_tmp1_hi.16b
+
+ tbl v_tmp1_lo.16b, {v_gft3_lo.16b}, v_tmp1.16b
+ tbl v_tmp1_hi.16b, {v_gft3_hi.16b}, v_data_1.16b
+ eor v_p3_1.16b, v_tmp1_lo.16b, v_p3_1.16b
+ eor v_p3_1.16b, v_p3_1.16b, v_tmp1_hi.16b
+
+ tbl v_tmp1_lo.16b, {v_gft4_lo.16b}, v_tmp1.16b
+ tbl v_tmp1_hi.16b, {v_gft4_hi.16b}, v_data_1.16b
+ eor v_p4_1.16b, v_tmp1_lo.16b, v_p4_1.16b
+ eor v_p4_1.16b, v_p4_1.16b, v_tmp1_hi.16b
+
+ /* data_2 */
+ and v_tmp1.16b, v_data_2.16b, v_mask0f.16b
+ ushr v_data_2.16b, v_data_2.16b, #4
+
+ tbl v_tmp1_lo.16b, {v_gft1_lo.16b}, v_tmp1.16b
+ tbl v_tmp1_hi.16b, {v_gft1_hi.16b}, v_data_2.16b
+ eor v_p1_2.16b, v_tmp1_lo.16b, v_p1_2.16b
+ eor v_p1_2.16b, v_p1_2.16b, v_tmp1_hi.16b
+
+ tbl v_tmp1_lo.16b, {v_gft2_lo.16b}, v_tmp1.16b
+ tbl v_tmp1_hi.16b, {v_gft2_hi.16b}, v_data_2.16b
+ eor v_p2_2.16b, v_tmp1_lo.16b, v_p2_2.16b
+ eor v_p2_2.16b, v_p2_2.16b, v_tmp1_hi.16b
+
+ tbl v_tmp1_lo.16b, {v_gft3_lo.16b}, v_tmp1.16b
+ tbl v_tmp1_hi.16b, {v_gft3_hi.16b}, v_data_2.16b
+ eor v_p3_2.16b, v_tmp1_lo.16b, v_p3_2.16b
+ eor v_p3_2.16b, v_p3_2.16b, v_tmp1_hi.16b
+
+ tbl v_tmp1_lo.16b, {v_gft4_lo.16b}, v_tmp1.16b
+ tbl v_tmp1_hi.16b, {v_gft4_hi.16b}, v_data_2.16b
+ eor v_p4_2.16b, v_tmp1_lo.16b, v_p4_2.16b
+ eor v_p4_2.16b, v_p4_2.16b, v_tmp1_hi.16b
+
+ /* data_3 */
+ and v_tmp1.16b, v_data_3.16b, v_mask0f.16b
+ ushr v_data_3.16b, v_data_3.16b, #4
+
+ tbl v_tmp1_lo.16b, {v_gft1_lo.16b}, v_tmp1.16b
+ tbl v_tmp1_hi.16b, {v_gft1_hi.16b}, v_data_3.16b
+ eor v_p1_3.16b, v_tmp1_lo.16b, v_p1_3.16b
+ eor v_p1_3.16b, v_p1_3.16b, v_tmp1_hi.16b
+
+ tbl v_tmp1_lo.16b, {v_gft2_lo.16b}, v_tmp1.16b
+ tbl v_tmp1_hi.16b, {v_gft2_hi.16b}, v_data_3.16b
+ eor v_p2_3.16b, v_tmp1_lo.16b, v_p2_3.16b
+ eor v_p2_3.16b, v_p2_3.16b, v_tmp1_hi.16b
+
+ tbl v_tmp1_lo.16b, {v_gft3_lo.16b}, v_tmp1.16b
+ tbl v_tmp1_hi.16b, {v_gft3_hi.16b}, v_data_3.16b
+ eor v_p3_3.16b, v_tmp1_lo.16b, v_p3_3.16b
+ eor v_p3_3.16b, v_p3_3.16b, v_tmp1_hi.16b
+
+ tbl v_tmp1_lo.16b, {v_gft4_lo.16b}, v_tmp1.16b
+ tbl v_tmp1_hi.16b, {v_gft4_hi.16b}, v_data_3.16b
+ eor v_p4_3.16b, v_tmp1_lo.16b, v_p4_3.16b
+ eor v_p4_3.16b, v_p4_3.16b, v_tmp1_hi.16b
+
+ cmp x_vec_i, x_vec
+ blt .Lloop64_vects
+
+.Lloop64_vects_end:
+ add x_ptr, x_dest1, x_pos
+ stp q_p1_0, q_p1_1, [x_ptr], #32
+ stp q_p1_2, q_p1_3, [x_ptr]
+
+ add x_ptr, x_dest2, x_pos
+ stp q_p2_0, q_p2_1, [x_ptr], #32
+ stp q_p2_2, q_p2_3, [x_ptr]
+
+ add x_ptr, x_dest3, x_pos
+ stp q_p3_0, q_p3_1, [x_ptr], #32
+ stp q_p3_2, q_p3_3, [x_ptr]
+
+ add x_ptr, x_dest4, x_pos
+ stp q_p4_0, q_p4_1, [x_ptr], #32
+ stp q_p4_2, q_p4_3, [x_ptr]
+
+ add x_pos, x_pos, #64
+ cmp x_pos, x_len
+ ble .Lloop64
+
+.Lloop64_end:
+ /* restore d8 ~ d15 */
+ ldp d8, d9, [sp]
+ ldp d10, d11, [sp, #16]
+ ldp d12, d13, [sp, #32]
+ ldp d14, d15, [sp, #48]
+ add sp, sp, #64
+
+ add x_len, x_len, #64
+ cmp x_pos, x_len
+ beq .return_pass
+
+.Lloop16_init:
+ sub x_len, x_len, #16
+ cmp x_pos, x_len
+ bgt .lessthan16_init
+
+.Lloop16:
+ movi v_p1_0.16b, #0
+ movi v_p2_0.16b, #0
+ movi v_p3_0.16b, #0
+ movi v_p4_0.16b, #0
+ mov x_tbl1, x_tbl
+ add x_tbl2, x_tbl1, x_vec, lsl #2
+ add x_tbl3, x_tbl2, x_vec, lsl #2
+ add x_tbl4, x_tbl3, x_vec, lsl #2
+ mov x_vec_i, #0
+
+.Lloop16_vects:
+ ldr x_ptr, [x_src, x_vec_i]
+ add x_vec_i, x_vec_i, #8
+ ldr q_data, [x_ptr, x_pos]
+
+ ldp q_gft1_lo, q_gft1_hi, [x_tbl1], #32
+ ldp q_gft2_lo, q_gft2_hi, [x_tbl2], #32
+ ldp q_gft3_lo, q_gft3_hi, [x_tbl3], #32
+ ldp q_gft4_lo, q_gft4_hi, [x_tbl4], #32
+
+ prfm pldl1keep, [x_tbl1]
+ prfm pldl1keep, [x_tbl2]
+ prfm pldl1keep, [x_tbl3]
+ prfm pldl1keep, [x_tbl4]
+
+ and v_data_lo.16b, v_data.16b, v_mask0f.16b
+ ushr v_data_hi.16b, v_data.16b, #4
+
+ tbl v_gft1_lo.16b, {v_gft1_lo.16b}, v_data_lo.16b
+ tbl v_gft1_hi.16b, {v_gft1_hi.16b}, v_data_hi.16b
+ tbl v_gft2_lo.16b, {v_gft2_lo.16b}, v_data_lo.16b
+ tbl v_gft2_hi.16b, {v_gft2_hi.16b}, v_data_hi.16b
+ tbl v_gft3_lo.16b, {v_gft3_lo.16b}, v_data_lo.16b
+ tbl v_gft3_hi.16b, {v_gft3_hi.16b}, v_data_hi.16b
+ tbl v_gft4_lo.16b, {v_gft4_lo.16b}, v_data_lo.16b
+ tbl v_gft4_hi.16b, {v_gft4_hi.16b}, v_data_hi.16b
+
+ eor v_p1_0.16b, v_gft1_hi.16b, v_p1_0.16b
+ eor v_p1_0.16b, v_p1_0.16b, v_gft1_lo.16b
+ eor v_p2_0.16b, v_gft2_hi.16b, v_p2_0.16b
+ eor v_p2_0.16b, v_p2_0.16b, v_gft2_lo.16b
+ eor v_p3_0.16b, v_gft3_hi.16b, v_p3_0.16b
+ eor v_p3_0.16b, v_p3_0.16b, v_gft3_lo.16b
+ eor v_p4_0.16b, v_gft4_hi.16b, v_p4_0.16b
+ eor v_p4_0.16b, v_p4_0.16b, v_gft4_lo.16b
+
+ cmp x_vec_i, x_vec
+ bne .Lloop16_vects
+
+.Lloop16_vects_end:
+ str q_p1_0, [x_dest1, x_pos]
+ str q_p2_0, [x_dest2, x_pos]
+ str q_p3_0, [x_dest3, x_pos]
+ str q_p4_0, [x_dest4, x_pos]
+ add x_pos, x_pos, #16
+ cmp x_pos, x_len
+ ble .Lloop16
+
+.Lloop16_end:
+ sub x_tmp, x_pos, x_len
+ cmp x_tmp, #16
+ beq .return_pass
+
+.lessthan16_init:
+ mov x_pos, x_len
+ b .Lloop16
+
+.return_pass:
+ mov w_ret, #0
+ ret
+
+.return_fail:
+ mov w_ret, #1
+ ret
diff --git a/src/isa-l/erasure_code/aarch64/gf_4vect_mad_neon.S b/src/isa-l/erasure_code/aarch64/gf_4vect_mad_neon.S
new file mode 100644
index 000000000..869243702
--- /dev/null
+++ b/src/isa-l/erasure_code/aarch64/gf_4vect_mad_neon.S
@@ -0,0 +1,456 @@
+/**************************************************************
+ Copyright (c) 2019 Huawei Technologies Co., Ltd.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Huawei Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+.text
+
+.global gf_4vect_mad_neon
+.type gf_4vect_mad_neon, %function
+
+
+/* arguments */
+x_len .req x0
+x_vec .req x1
+x_vec_i .req x2
+x_tbl .req x3
+x_src .req x4
+x_dest .req x5
+
+/* returns */
+w_ret .req w0
+
+/* local variables */
+x_src_end .req x6
+x_dest1 .req x7
+x_dest2 .req x8
+x_dest3 .req x9
+x_dest4 .req x_dest
+x_tmp .req x10
+x_tbl1 .req x11
+x_tbl2 .req x12
+x_tbl3 .req x13
+x_tbl4 .req x14
+x_const .req x15
+
+/* vectors */
+v_mask0f .req v0
+v_tmp_lo .req v1
+v_tmp_hi .req v2
+v_tmp .req v3
+q_tmp .req q3
+
+v_gft1_lo .req v4
+v_gft1_hi .req v5
+v_gft2_lo .req v6
+v_gft2_hi .req v7
+v_gft3_lo .req v16
+v_gft3_hi .req v17
+v_gft4_lo .req v18
+v_gft4_hi .req v19
+q_gft1_lo .req q4
+q_gft1_hi .req q5
+q_gft2_lo .req q6
+q_gft2_hi .req q7
+q_gft3_lo .req q16
+q_gft3_hi .req q17
+q_gft4_lo .req q18
+q_gft4_hi .req q19
+
+v_data_0 .req v8
+v_data_1 .req v9
+v_data_2 .req v10
+v_data_3 .req v11
+q_data_0 .req q8
+q_data_1 .req q9
+q_data_2 .req q10
+q_data_3 .req q11
+
+v_data_0_lo .req v12
+v_data_1_lo .req v13
+v_data_2_lo .req v14
+v_data_3_lo .req v15
+v_data_0_hi .req v_data_0
+v_data_1_hi .req v_data_1
+v_data_2_hi .req v_data_2
+v_data_3_hi .req v_data_3
+
+v_d1_0 .req v20
+v_d1_1 .req v21
+v_d1_2 .req v22
+v_d1_3 .req v23
+v_d2_0 .req v24
+v_d2_1 .req v25
+v_d2_2 .req v26
+v_d2_3 .req v27
+v_d3_0 .req v28
+v_d3_1 .req v29
+v_d3_2 .req v30
+v_d3_3 .req v31
+q_d1_0 .req q20
+q_d1_1 .req q21
+q_d1_2 .req q22
+q_d1_3 .req q23
+q_d2_0 .req q24
+q_d2_1 .req q25
+q_d2_2 .req q26
+q_d2_3 .req q27
+q_d3_0 .req q28
+q_d3_1 .req q29
+q_d3_2 .req q30
+q_d3_3 .req q31
+
+v_d4_0 .req v_d1_0
+v_d4_1 .req v_d1_1
+v_d4_2 .req v_d1_2
+v_d4_3 .req v_d1_3
+q_d4_0 .req q_d1_0
+q_d4_1 .req q_d1_1
+q_d4_2 .req q_d1_2
+q_d4_3 .req q_d1_3
+
+v_data .req v21
+q_data .req q21
+v_data_lo .req v22
+v_data_hi .req v23
+
+gf_4vect_mad_neon:
+ /* less than 16 bytes, return_fail */
+ cmp x_len, #16
+ blt .return_fail
+
+ movi v_mask0f.16b, #0x0f
+ lsl x_vec_i, x_vec_i, #5
+ lsl x_vec, x_vec, #5
+ add x_tbl1, x_tbl, x_vec_i
+ add x_tbl2, x_tbl1, x_vec
+ add x_tbl3, x_tbl2, x_vec
+ add x_tbl4, x_tbl3, x_vec
+ add x_src_end, x_src, x_len
+ ldr x_dest1, [x_dest, #8*0]
+ ldr x_dest2, [x_dest, #8*1]
+ ldr x_dest3, [x_dest, #8*2]
+ ldr x_dest4, [x_dest, #8*3]
+ ldr q_gft1_lo, [x_tbl1]
+ ldr q_gft1_hi, [x_tbl1, #16]
+ ldr q_gft2_lo, [x_tbl2]
+ ldr q_gft2_hi, [x_tbl2, #16]
+ ldr q_gft3_lo, [x_tbl3]
+ ldr q_gft3_hi, [x_tbl3, #16]
+ ldr q_gft4_lo, [x_tbl4]
+ ldr q_gft4_hi, [x_tbl4, #16]
+
+.Lloop64_init:
+ /* less than 64 bytes, goto Lloop16_init */
+ cmp x_len, #64
+ blt .Lloop16_init
+
+ /* save d8 ~ d15 to stack */
+ sub sp, sp, #64
+ stp d8, d9, [sp]
+ stp d10, d11, [sp, #16]
+ stp d12, d13, [sp, #32]
+ stp d14, d15, [sp, #48]
+
+ sub x_src_end, x_src_end, #64
+
+.Lloop64:
+ ldr q_data_0, [x_src, #16*0]
+ ldr q_data_1, [x_src, #16*1]
+ ldr q_data_2, [x_src, #16*2]
+ ldr q_data_3, [x_src, #16*3]
+ add x_src, x_src, #64
+
+ ldr q_d1_0, [x_dest1, #16*0]
+ ldr q_d1_1, [x_dest1, #16*1]
+ ldr q_d1_2, [x_dest1, #16*2]
+ ldr q_d1_3, [x_dest1, #16*3]
+
+ ldr q_d2_0, [x_dest2, #16*0]
+ ldr q_d2_1, [x_dest2, #16*1]
+ ldr q_d2_2, [x_dest2, #16*2]
+ ldr q_d2_3, [x_dest2, #16*3]
+
+ and v_data_0_lo.16b, v_data_0.16b, v_mask0f.16b
+ and v_data_1_lo.16b, v_data_1.16b, v_mask0f.16b
+ and v_data_2_lo.16b, v_data_2.16b, v_mask0f.16b
+ and v_data_3_lo.16b, v_data_3.16b, v_mask0f.16b
+
+ ushr v_data_0_hi.16b, v_data_0.16b, #4
+ ushr v_data_1_hi.16b, v_data_1.16b, #4
+ ushr v_data_2_hi.16b, v_data_2.16b, #4
+ ushr v_data_3_hi.16b, v_data_3.16b, #4
+
+ /* dest1 */
+ tbl v_tmp_lo.16b, {v_gft1_lo.16b}, v_data_0_lo.16b
+ tbl v_tmp_hi.16b, {v_gft1_hi.16b}, v_data_0_hi.16b
+ eor v_d1_0.16b, v_tmp_lo.16b, v_d1_0.16b
+ eor v_d1_0.16b, v_d1_0.16b, v_tmp_hi.16b
+
+ tbl v_tmp_lo.16b, {v_gft1_lo.16b}, v_data_1_lo.16b
+ tbl v_tmp_hi.16b, {v_gft1_hi.16b}, v_data_1_hi.16b
+ eor v_d1_1.16b, v_tmp_lo.16b, v_d1_1.16b
+ eor v_d1_1.16b, v_d1_1.16b, v_tmp_hi.16b
+
+ tbl v_tmp_lo.16b, {v_gft1_lo.16b}, v_data_2_lo.16b
+ tbl v_tmp_hi.16b, {v_gft1_hi.16b}, v_data_2_hi.16b
+ eor v_d1_2.16b, v_tmp_lo.16b, v_d1_2.16b
+ eor v_d1_2.16b, v_d1_2.16b, v_tmp_hi.16b
+
+ tbl v_tmp_lo.16b, {v_gft1_lo.16b}, v_data_3_lo.16b
+ tbl v_tmp_hi.16b, {v_gft1_hi.16b}, v_data_3_hi.16b
+ eor v_d1_3.16b, v_tmp_lo.16b, v_d1_3.16b
+ eor v_d1_3.16b, v_d1_3.16b, v_tmp_hi.16b
+
+ /* dest2 */
+ tbl v_tmp_lo.16b, {v_gft2_lo.16b}, v_data_0_lo.16b
+ tbl v_tmp_hi.16b, {v_gft2_hi.16b}, v_data_0_hi.16b
+ eor v_d2_0.16b, v_tmp_lo.16b, v_d2_0.16b
+ eor v_d2_0.16b, v_d2_0.16b, v_tmp_hi.16b
+
+ tbl v_tmp_lo.16b, {v_gft2_lo.16b}, v_data_1_lo.16b
+ tbl v_tmp_hi.16b, {v_gft2_hi.16b}, v_data_1_hi.16b
+ eor v_d2_1.16b, v_tmp_lo.16b, v_d2_1.16b
+ eor v_d2_1.16b, v_d2_1.16b, v_tmp_hi.16b
+
+ tbl v_tmp_lo.16b, {v_gft2_lo.16b}, v_data_2_lo.16b
+ tbl v_tmp_hi.16b, {v_gft2_hi.16b}, v_data_2_hi.16b
+ eor v_d2_2.16b, v_tmp_lo.16b, v_d2_2.16b
+ eor v_d2_2.16b, v_d2_2.16b, v_tmp_hi.16b
+
+ tbl v_tmp_lo.16b, {v_gft2_lo.16b}, v_data_3_lo.16b
+ tbl v_tmp_hi.16b, {v_gft2_hi.16b}, v_data_3_hi.16b
+ eor v_d2_3.16b, v_tmp_lo.16b, v_d2_3.16b
+ eor v_d2_3.16b, v_d2_3.16b, v_tmp_hi.16b
+
+ str q_d1_0, [x_dest1, #16*0]
+ str q_d1_1, [x_dest1, #16*1]
+ str q_d1_2, [x_dest1, #16*2]
+ str q_d1_3, [x_dest1, #16*3]
+ add x_dest1, x_dest1, #64
+
+ str q_d2_0, [x_dest2, #16*0]
+ str q_d2_1, [x_dest2, #16*1]
+ str q_d2_2, [x_dest2, #16*2]
+ str q_d2_3, [x_dest2, #16*3]
+ add x_dest2, x_dest2, #64
+
+ ldr q_d3_0, [x_dest3, #16*0]
+ ldr q_d3_1, [x_dest3, #16*1]
+ ldr q_d3_2, [x_dest3, #16*2]
+ ldr q_d3_3, [x_dest3, #16*3]
+
+ ldr q_d4_0, [x_dest4, #16*0]
+ ldr q_d4_1, [x_dest4, #16*1]
+ ldr q_d4_2, [x_dest4, #16*2]
+ ldr q_d4_3, [x_dest4, #16*3]
+
+ /* dest3 */
+ tbl v_tmp_lo.16b, {v_gft3_lo.16b}, v_data_0_lo.16b
+ tbl v_tmp_hi.16b, {v_gft3_hi.16b}, v_data_0_hi.16b
+ eor v_d3_0.16b, v_tmp_lo.16b, v_d3_0.16b
+ eor v_d3_0.16b, v_d3_0.16b, v_tmp_hi.16b
+
+ tbl v_tmp_lo.16b, {v_gft3_lo.16b}, v_data_1_lo.16b
+ tbl v_tmp_hi.16b, {v_gft3_hi.16b}, v_data_1_hi.16b
+ eor v_d3_1.16b, v_tmp_lo.16b, v_d3_1.16b
+ eor v_d3_1.16b, v_d3_1.16b, v_tmp_hi.16b
+
+ tbl v_tmp_lo.16b, {v_gft3_lo.16b}, v_data_2_lo.16b
+ tbl v_tmp_hi.16b, {v_gft3_hi.16b}, v_data_2_hi.16b
+ eor v_d3_2.16b, v_tmp_lo.16b, v_d3_2.16b
+ eor v_d3_2.16b, v_d3_2.16b, v_tmp_hi.16b
+
+ tbl v_tmp_lo.16b, {v_gft3_lo.16b}, v_data_3_lo.16b
+ tbl v_tmp_hi.16b, {v_gft3_hi.16b}, v_data_3_hi.16b
+ eor v_d3_3.16b, v_tmp_lo.16b, v_d3_3.16b
+ eor v_d3_3.16b, v_d3_3.16b, v_tmp_hi.16b
+
+ /* dest4 */
+ tbl v_tmp_lo.16b, {v_gft4_lo.16b}, v_data_0_lo.16b
+ tbl v_tmp_hi.16b, {v_gft4_hi.16b}, v_data_0_hi.16b
+ eor v_d4_0.16b, v_tmp_lo.16b, v_d4_0.16b
+ eor v_d4_0.16b, v_d4_0.16b, v_tmp_hi.16b
+
+ tbl v_tmp_lo.16b, {v_gft4_lo.16b}, v_data_1_lo.16b
+ tbl v_tmp_hi.16b, {v_gft4_hi.16b}, v_data_1_hi.16b
+ eor v_d4_1.16b, v_tmp_lo.16b, v_d4_1.16b
+ eor v_d4_1.16b, v_d4_1.16b, v_tmp_hi.16b
+
+ tbl v_tmp_lo.16b, {v_gft4_lo.16b}, v_data_2_lo.16b
+ tbl v_tmp_hi.16b, {v_gft4_hi.16b}, v_data_2_hi.16b
+ eor v_d4_2.16b, v_tmp_lo.16b, v_d4_2.16b
+ eor v_d4_2.16b, v_d4_2.16b, v_tmp_hi.16b
+
+ tbl v_tmp_lo.16b, {v_gft4_lo.16b}, v_data_3_lo.16b
+ tbl v_tmp_hi.16b, {v_gft4_hi.16b}, v_data_3_hi.16b
+ eor v_d4_3.16b, v_tmp_lo.16b, v_d4_3.16b
+ eor v_d4_3.16b, v_d4_3.16b, v_tmp_hi.16b
+
+ str q_d3_0, [x_dest3, #16*0]
+ str q_d3_1, [x_dest3, #16*1]
+ str q_d3_2, [x_dest3, #16*2]
+ str q_d3_3, [x_dest3, #16*3]
+ add x_dest3, x_dest3, #64
+
+ str q_d4_0, [x_dest4, #16*0]
+ str q_d4_1, [x_dest4, #16*1]
+ str q_d4_2, [x_dest4, #16*2]
+ str q_d4_3, [x_dest4, #16*3]
+ add x_dest4, x_dest4, #64
+
+ cmp x_src, x_src_end
+ bls .Lloop64
+
+.Lloop64_end:
+ /* restore d8 ~ d15 */
+ ldp d8, d9, [sp]
+ ldp d10, d11, [sp, #16]
+ ldp d12, d13, [sp, #32]
+ ldp d14, d15, [sp, #48]
+ add sp, sp, #64
+ add x_src_end, x_src_end, #64
+
+.Lloop16_init:
+ sub x_src_end, x_src_end, #16
+ cmp x_src, x_src_end
+ bhi .lessthan16_init
+
+.Lloop16:
+ ldr q_data, [x_src]
+
+ ldr q_d1_0, [x_dest1]
+ ldr q_d2_0, [x_dest2]
+
+ and v_data_lo.16b, v_data.16b, v_mask0f.16b
+ ushr v_data_hi.16b, v_data.16b, #4
+
+ tbl v_tmp_lo.16b, {v_gft1_lo.16b}, v_data_lo.16b
+ tbl v_tmp_hi.16b, {v_gft1_hi.16b}, v_data_hi.16b
+ eor v_d1_0.16b, v_tmp_lo.16b, v_d1_0.16b
+ eor v_d1_0.16b, v_d1_0.16b, v_tmp_hi.16b
+
+ tbl v_tmp_lo.16b, {v_gft2_lo.16b}, v_data_lo.16b
+ tbl v_tmp_hi.16b, {v_gft2_hi.16b}, v_data_hi.16b
+ eor v_d2_0.16b, v_tmp_lo.16b, v_d2_0.16b
+ eor v_d2_0.16b, v_d2_0.16b, v_tmp_hi.16b
+
+ str q_d1_0, [x_dest1]
+ str q_d2_0, [x_dest2]
+ ldr q_d3_0, [x_dest3]
+ ldr q_d4_0, [x_dest4]
+
+ tbl v_tmp_lo.16b, {v_gft3_lo.16b}, v_data_lo.16b
+ tbl v_tmp_hi.16b, {v_gft3_hi.16b}, v_data_hi.16b
+ eor v_d3_0.16b, v_tmp_lo.16b, v_d3_0.16b
+ eor v_d3_0.16b, v_d3_0.16b, v_tmp_hi.16b
+
+ tbl v_tmp_lo.16b, {v_gft4_lo.16b}, v_data_lo.16b
+ tbl v_tmp_hi.16b, {v_gft4_hi.16b}, v_data_hi.16b
+ eor v_d4_0.16b, v_tmp_lo.16b, v_d4_0.16b
+ eor v_d4_0.16b, v_d4_0.16b, v_tmp_hi.16b
+
+ str q_d3_0, [x_dest3]
+ str q_d4_0, [x_dest4]
+
+ add x_src, x_src, #16
+ add x_dest1, x_dest1, #16
+ add x_dest2, x_dest2, #16
+ add x_dest3, x_dest3, #16
+ add x_dest4, x_dest4, #16
+ cmp x_src, x_src_end
+ bls .Lloop16
+
+.lessthan16_init:
+ sub x_tmp, x_src, x_src_end
+ cmp x_tmp, #16
+ beq .return_pass
+
+.lessthan16:
+ mov x_src, x_src_end
+ sub x_dest1, x_dest1, x_tmp
+ sub x_dest2, x_dest2, x_tmp
+ sub x_dest3, x_dest3, x_tmp
+ sub x_dest4, x_dest4, x_tmp
+
+ adrp x_const, const_tbl
+ add x_const, x_const, :lo12:const_tbl
+ sub x_const, x_const, x_tmp
+ ldr q_tmp, [x_const, #16]
+
+ ldr q_data, [x_src]
+ ldr q_d1_0, [x_dest1]
+ ldr q_d2_0, [x_dest2]
+
+ and v_data_lo.16b, v_data.16b, v_mask0f.16b
+ ushr v_data_hi.16b, v_data.16b, #4
+
+ tbl v_tmp_lo.16b, {v_gft1_lo.16b}, v_data_lo.16b
+ tbl v_tmp_hi.16b, {v_gft1_hi.16b}, v_data_hi.16b
+ eor v_tmp_hi.16b, v_tmp_lo.16b, v_tmp_hi.16b
+ and v_tmp_hi.16b, v_tmp_hi.16b, v_tmp.16b
+ eor v_d1_0.16b, v_d1_0.16b, v_tmp_hi.16b
+
+ tbl v_tmp_lo.16b, {v_gft2_lo.16b}, v_data_lo.16b
+ tbl v_tmp_hi.16b, {v_gft2_hi.16b}, v_data_hi.16b
+ eor v_tmp_hi.16b, v_tmp_lo.16b, v_tmp_hi.16b
+ and v_tmp_hi.16b, v_tmp_hi.16b, v_tmp.16b
+ eor v_d2_0.16b, v_d2_0.16b, v_tmp_hi.16b
+
+ str q_d1_0, [x_dest1]
+ str q_d2_0, [x_dest2]
+ ldr q_d3_0, [x_dest3]
+ ldr q_d4_0, [x_dest4]
+
+ tbl v_tmp_lo.16b, {v_gft3_lo.16b}, v_data_lo.16b
+ tbl v_tmp_hi.16b, {v_gft3_hi.16b}, v_data_hi.16b
+ eor v_tmp_hi.16b, v_tmp_lo.16b, v_tmp_hi.16b
+ and v_tmp_hi.16b, v_tmp_hi.16b, v_tmp.16b
+ eor v_d3_0.16b, v_d3_0.16b, v_tmp_hi.16b
+
+ tbl v_tmp_lo.16b, {v_gft4_lo.16b}, v_data_lo.16b
+ tbl v_tmp_hi.16b, {v_gft4_hi.16b}, v_data_hi.16b
+ eor v_tmp_hi.16b, v_tmp_lo.16b, v_tmp_hi.16b
+ and v_tmp_hi.16b, v_tmp_hi.16b, v_tmp.16b
+ eor v_d4_0.16b, v_d4_0.16b, v_tmp_hi.16b
+
+ str q_d3_0, [x_dest3]
+ str q_d4_0, [x_dest4]
+
+.return_pass:
+ mov w_ret, #0
+ ret
+
+.return_fail:
+ mov w_ret, #1
+ ret
+
+.section .rodata
+.balign 8
+const_tbl:
+ .dword 0x0000000000000000, 0x0000000000000000
+ .dword 0xffffffffffffffff, 0xffffffffffffffff
diff --git a/src/isa-l/erasure_code/aarch64/gf_5vect_dot_prod_neon.S b/src/isa-l/erasure_code/aarch64/gf_5vect_dot_prod_neon.S
new file mode 100644
index 000000000..2e4dea4ad
--- /dev/null
+++ b/src/isa-l/erasure_code/aarch64/gf_5vect_dot_prod_neon.S
@@ -0,0 +1,481 @@
+/**************************************************************
+ Copyright (c) 2019 Huawei Technologies Co., Ltd.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Huawei Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+.text
+
+.global gf_5vect_dot_prod_neon
+.type gf_5vect_dot_prod_neon, %function
+
+
+/* arguments */
+x_len .req x0
+x_vec .req x1
+x_tbl .req x2
+x_src .req x3
+x_dest .req x4
+
+/* returns */
+w_ret .req w0
+
+/* local variables */
+x_vec_i .req x5
+x_ptr .req x6
+x_pos .req x7
+x_tmp .req x8
+x_dest1 .req x9
+x_dest2 .req x10
+x_dest3 .req x11
+x_dest4 .req x12
+x_dest5 .req x13
+
+/* vectors */
+v_tmp1 .req v0
+q_tmp1 .req q0
+v_tmp2 .req v1
+q_tmp2 .req q1
+
+v_mask0f .req v_tmp1
+q_mask0f .req q_tmp1
+v_tmp_lo .req v_tmp1
+v_tmp_hi .req v_tmp2
+
+v_gft_lo .req v2
+v_gft_hi .req v3
+q_gft_lo .req q2
+q_gft_hi .req q3
+
+v_p1_0 .req v4
+v_p2_0 .req v5
+v_p3_0 .req v6
+v_p4_0 .req v7
+
+q_p1_0 .req q4
+q_p2_0 .req q5
+q_p3_0 .req q6
+q_p4_0 .req q7
+
+v_data_0 .req v8
+v_data_1 .req v9
+v_data_2 .req v10
+v_data_3 .req v11
+q_data_0 .req q8
+q_data_1 .req q9
+q_data_2 .req q10
+q_data_3 .req q11
+
+v_data_0_lo .req v12
+v_data_1_lo .req v13
+v_data_2_lo .req v14
+v_data_3_lo .req v15
+v_data_0_hi .req v_data_0
+v_data_1_hi .req v_data_1
+v_data_2_hi .req v_data_2
+v_data_3_hi .req v_data_3
+
+v_p5_0 .req v16
+v_p1_1 .req v17
+v_p2_1 .req v18
+v_p3_1 .req v19
+v_p4_1 .req v20
+v_p5_1 .req v21
+v_p1_2 .req v22
+v_p2_2 .req v23
+v_p3_2 .req v24
+v_p4_2 .req v25
+v_p5_2 .req v26
+v_p1_3 .req v27
+v_p2_3 .req v28
+v_p3_3 .req v29
+v_p4_3 .req v30
+v_p5_3 .req v31
+
+q_p5_0 .req q16
+q_p1_1 .req q17
+q_p2_1 .req q18
+q_p3_1 .req q19
+q_p4_1 .req q20
+q_p5_1 .req q21
+q_p1_2 .req q22
+q_p2_2 .req q23
+q_p3_2 .req q24
+q_p4_2 .req q25
+q_p5_2 .req q26
+q_p1_3 .req q27
+q_p2_3 .req q28
+q_p3_3 .req q29
+q_p4_3 .req q30
+q_p5_3 .req q31
+
+v_data .req v_p1_1
+q_data .req q_p1_1
+v_data_lo .req v_p2_1
+v_data_hi .req v_p3_1
+
+v_gft1_lo .req v_p4_1
+v_gft1_hi .req v_p5_1
+v_gft2_lo .req v_p1_2
+v_gft2_hi .req v_p2_2
+v_gft3_lo .req v_p3_2
+v_gft3_hi .req v_p4_2
+v_gft4_lo .req v_p5_2
+v_gft4_hi .req v_p1_3
+v_gft5_lo .req v_p2_3
+v_gft5_hi .req v_p3_3
+q_gft1_lo .req q_p4_1
+q_gft1_hi .req q_p5_1
+q_gft2_lo .req q_p1_2
+q_gft2_hi .req q_p2_2
+q_gft3_lo .req q_p3_2
+q_gft3_hi .req q_p4_2
+q_gft4_lo .req q_p5_2
+q_gft4_hi .req q_p1_3
+q_gft5_lo .req q_p2_3
+q_gft5_hi .req q_p3_3
+
+
+gf_5vect_dot_prod_neon:
+ /* less than 16 bytes, return_fail */
+ cmp x_len, #16
+ blt .return_fail
+
+ mov x_pos, #0
+ lsl x_vec, x_vec, #3
+ ldr x_dest1, [x_dest, #8*0]
+ ldr x_dest2, [x_dest, #8*1]
+ ldr x_dest3, [x_dest, #8*2]
+ ldr x_dest4, [x_dest, #8*3]
+ ldr x_dest5, [x_dest, #8*4]
+
+.Lloop64_init:
+ /* less than 64 bytes, goto Lloop16_init */
+ cmp x_len, #64
+ blt .Lloop16_init
+
+ /* save d8 ~ d15 to stack */
+ sub sp, sp, #64
+ stp d8, d9, [sp]
+ stp d10, d11, [sp, #16]
+ stp d12, d13, [sp, #32]
+ stp d14, d15, [sp, #48]
+
+ sub x_len, x_len, #64
+
+.Lloop64:
+ movi v_p1_0.16b, #0
+ movi v_p1_1.16b, #0
+ movi v_p1_2.16b, #0
+ movi v_p1_3.16b, #0
+ movi v_p2_0.16b, #0
+ movi v_p2_1.16b, #0
+ movi v_p2_2.16b, #0
+ movi v_p2_3.16b, #0
+ movi v_p3_0.16b, #0
+ movi v_p3_1.16b, #0
+ movi v_p3_2.16b, #0
+ movi v_p3_3.16b, #0
+ movi v_p4_0.16b, #0
+ movi v_p4_1.16b, #0
+ movi v_p4_2.16b, #0
+ movi v_p4_3.16b, #0
+ movi v_p5_0.16b, #0
+ movi v_p5_1.16b, #0
+ movi v_p5_2.16b, #0
+ movi v_p5_3.16b, #0
+ mov x_vec_i, #0
+
+.Lloop64_vects:
+ ldr x_ptr, [x_src, x_vec_i]
+ add x_ptr, x_ptr, x_pos
+
+ ldr q_data_0, [x_ptr], #16
+ ldr q_data_1, [x_ptr], #16
+ ldr q_data_2, [x_ptr], #16
+ ldr q_data_3, [x_ptr], #16
+ prfm pldl2keep, [x_ptr]
+
+ movi v_mask0f.16b, #0x0f
+ and v_data_0_lo.16b, v_data_0.16b, v_mask0f.16b
+ and v_data_1_lo.16b, v_data_1.16b, v_mask0f.16b
+ and v_data_2_lo.16b, v_data_2.16b, v_mask0f.16b
+ and v_data_3_lo.16b, v_data_3.16b, v_mask0f.16b
+ ushr v_data_0_hi.16b, v_data_0.16b, #4
+ ushr v_data_1_hi.16b, v_data_1.16b, #4
+ ushr v_data_2_hi.16b, v_data_2.16b, #4
+ ushr v_data_3_hi.16b, v_data_3.16b, #4
+
+ /* v_p1_x */
+ add x_tmp, x_tbl, x_vec_i, lsl #2
+ add x_vec_i, x_vec_i, #8
+ ldp q_gft_lo, q_gft_hi, [x_tmp]
+ prfm pldl3keep, [x_tmp, #32]
+ add x_tmp, x_tmp, x_vec, lsl #2
+
+ tbl v_tmp_lo.16b, {v_gft_lo.16b}, v_data_0_lo.16b
+ tbl v_tmp_hi.16b, {v_gft_hi.16b}, v_data_0_hi.16b
+ eor v_p1_0.16b, v_tmp_lo.16b, v_p1_0.16b
+ eor v_p1_0.16b, v_p1_0.16b, v_tmp_hi.16b
+
+ tbl v_tmp_lo.16b, {v_gft_lo.16b}, v_data_1_lo.16b
+ tbl v_tmp_hi.16b, {v_gft_hi.16b}, v_data_1_hi.16b
+ eor v_p1_1.16b, v_tmp_lo.16b, v_p1_1.16b
+ eor v_p1_1.16b, v_p1_1.16b, v_tmp_hi.16b
+
+ tbl v_tmp_lo.16b, {v_gft_lo.16b}, v_data_2_lo.16b
+ tbl v_tmp_hi.16b, {v_gft_hi.16b}, v_data_2_hi.16b
+ eor v_p1_2.16b, v_tmp_lo.16b, v_p1_2.16b
+ eor v_p1_2.16b, v_p1_2.16b, v_tmp_hi.16b
+
+ tbl v_tmp_lo.16b, {v_gft_lo.16b}, v_data_3_lo.16b
+ tbl v_tmp_hi.16b, {v_gft_hi.16b}, v_data_3_hi.16b
+ eor v_p1_3.16b, v_tmp_lo.16b, v_p1_3.16b
+ eor v_p1_3.16b, v_p1_3.16b, v_tmp_hi.16b
+
+ /* v_p2_x */
+ ldp q_gft_lo, q_gft_hi, [x_tmp]
+ prfm pldl3keep, [x_tmp, #32]
+ add x_tmp, x_tmp, x_vec, lsl #2
+
+ tbl v_tmp_lo.16b, {v_gft_lo.16b}, v_data_0_lo.16b
+ tbl v_tmp_hi.16b, {v_gft_hi.16b}, v_data_0_hi.16b
+ eor v_p2_0.16b, v_tmp_lo.16b, v_p2_0.16b
+ eor v_p2_0.16b, v_p2_0.16b, v_tmp_hi.16b
+
+ tbl v_tmp_lo.16b, {v_gft_lo.16b}, v_data_1_lo.16b
+ tbl v_tmp_hi.16b, {v_gft_hi.16b}, v_data_1_hi.16b
+ eor v_p2_1.16b, v_tmp_lo.16b, v_p2_1.16b
+ eor v_p2_1.16b, v_p2_1.16b, v_tmp_hi.16b
+
+ tbl v_tmp_lo.16b, {v_gft_lo.16b}, v_data_2_lo.16b
+ tbl v_tmp_hi.16b, {v_gft_hi.16b}, v_data_2_hi.16b
+ eor v_p2_2.16b, v_tmp_lo.16b, v_p2_2.16b
+ eor v_p2_2.16b, v_p2_2.16b, v_tmp_hi.16b
+
+ tbl v_tmp_lo.16b, {v_gft_lo.16b}, v_data_3_lo.16b
+ tbl v_tmp_hi.16b, {v_gft_hi.16b}, v_data_3_hi.16b
+ eor v_p2_3.16b, v_tmp_lo.16b, v_p2_3.16b
+ eor v_p2_3.16b, v_p2_3.16b, v_tmp_hi.16b
+
+ /* v_p3_x */
+ ldp q_gft_lo, q_gft_hi, [x_tmp]
+ prfm pldl3keep, [x_tmp, #32]
+ add x_tmp, x_tmp, x_vec, lsl #2
+
+ tbl v_tmp_lo.16b, {v_gft_lo.16b}, v_data_0_lo.16b
+ tbl v_tmp_hi.16b, {v_gft_hi.16b}, v_data_0_hi.16b
+ eor v_p3_0.16b, v_tmp_lo.16b, v_p3_0.16b
+ eor v_p3_0.16b, v_p3_0.16b, v_tmp_hi.16b
+
+ tbl v_tmp_lo.16b, {v_gft_lo.16b}, v_data_1_lo.16b
+ tbl v_tmp_hi.16b, {v_gft_hi.16b}, v_data_1_hi.16b
+ eor v_p3_1.16b, v_tmp_lo.16b, v_p3_1.16b
+ eor v_p3_1.16b, v_p3_1.16b, v_tmp_hi.16b
+
+ tbl v_tmp_lo.16b, {v_gft_lo.16b}, v_data_2_lo.16b
+ tbl v_tmp_hi.16b, {v_gft_hi.16b}, v_data_2_hi.16b
+ eor v_p3_2.16b, v_tmp_lo.16b, v_p3_2.16b
+ eor v_p3_2.16b, v_p3_2.16b, v_tmp_hi.16b
+
+ tbl v_tmp_lo.16b, {v_gft_lo.16b}, v_data_3_lo.16b
+ tbl v_tmp_hi.16b, {v_gft_hi.16b}, v_data_3_hi.16b
+ eor v_p3_3.16b, v_tmp_lo.16b, v_p3_3.16b
+ eor v_p3_3.16b, v_p3_3.16b, v_tmp_hi.16b
+
+ /* v_p4_x */
+ ldp q_gft_lo, q_gft_hi, [x_tmp]
+ prfm pldl3keep, [x_tmp, #32]
+ add x_tmp, x_tmp, x_vec, lsl #2
+
+ tbl v_tmp_lo.16b, {v_gft_lo.16b}, v_data_0_lo.16b
+ tbl v_tmp_hi.16b, {v_gft_hi.16b}, v_data_0_hi.16b
+ eor v_p4_0.16b, v_tmp_lo.16b, v_p4_0.16b
+ eor v_p4_0.16b, v_p4_0.16b, v_tmp_hi.16b
+
+ tbl v_tmp_lo.16b, {v_gft_lo.16b}, v_data_1_lo.16b
+ tbl v_tmp_hi.16b, {v_gft_hi.16b}, v_data_1_hi.16b
+ eor v_p4_1.16b, v_tmp_lo.16b, v_p4_1.16b
+ eor v_p4_1.16b, v_p4_1.16b, v_tmp_hi.16b
+
+ tbl v_tmp_lo.16b, {v_gft_lo.16b}, v_data_2_lo.16b
+ tbl v_tmp_hi.16b, {v_gft_hi.16b}, v_data_2_hi.16b
+ eor v_p4_2.16b, v_tmp_lo.16b, v_p4_2.16b
+ eor v_p4_2.16b, v_p4_2.16b, v_tmp_hi.16b
+
+ tbl v_tmp_lo.16b, {v_gft_lo.16b}, v_data_3_lo.16b
+ tbl v_tmp_hi.16b, {v_gft_hi.16b}, v_data_3_hi.16b
+ eor v_p4_3.16b, v_tmp_lo.16b, v_p4_3.16b
+ eor v_p4_3.16b, v_p4_3.16b, v_tmp_hi.16b
+
+ /* v_p5_x */
+ ldp q_gft_lo, q_gft_hi, [x_tmp]
+ prfm pldl3keep, [x_tmp, #32]
+
+ tbl v_tmp_lo.16b, {v_gft_lo.16b}, v_data_0_lo.16b
+ tbl v_tmp_hi.16b, {v_gft_hi.16b}, v_data_0_hi.16b
+ eor v_p5_0.16b, v_tmp_lo.16b, v_p5_0.16b
+ eor v_p5_0.16b, v_p5_0.16b, v_tmp_hi.16b
+
+ tbl v_tmp_lo.16b, {v_gft_lo.16b}, v_data_1_lo.16b
+ tbl v_tmp_hi.16b, {v_gft_hi.16b}, v_data_1_hi.16b
+ eor v_p5_1.16b, v_tmp_lo.16b, v_p5_1.16b
+ eor v_p5_1.16b, v_p5_1.16b, v_tmp_hi.16b
+
+ tbl v_tmp_lo.16b, {v_gft_lo.16b}, v_data_2_lo.16b
+ tbl v_tmp_hi.16b, {v_gft_hi.16b}, v_data_2_hi.16b
+ eor v_p5_2.16b, v_tmp_lo.16b, v_p5_2.16b
+ eor v_p5_2.16b, v_p5_2.16b, v_tmp_hi.16b
+
+ tbl v_tmp_lo.16b, {v_gft_lo.16b}, v_data_3_lo.16b
+ tbl v_tmp_hi.16b, {v_gft_hi.16b}, v_data_3_hi.16b
+ eor v_p5_3.16b, v_tmp_lo.16b, v_p5_3.16b
+ eor v_p5_3.16b, v_p5_3.16b, v_tmp_hi.16b
+
+ cmp x_vec_i, x_vec
+ blt .Lloop64_vects
+
+.Lloop64_vects_end:
+ add x_ptr, x_dest1, x_pos
+ stp q_p1_0, q_p1_1, [x_ptr], #32
+ stp q_p1_2, q_p1_3, [x_ptr]
+
+ add x_ptr, x_dest2, x_pos
+ stp q_p2_0, q_p2_1, [x_ptr], #32
+ stp q_p2_2, q_p2_3, [x_ptr]
+
+ add x_ptr, x_dest3, x_pos
+ stp q_p3_0, q_p3_1, [x_ptr], #32
+ stp q_p3_2, q_p3_3, [x_ptr]
+
+ add x_ptr, x_dest4, x_pos
+ stp q_p4_0, q_p4_1, [x_ptr], #32
+ stp q_p4_2, q_p4_3, [x_ptr]
+
+ add x_ptr, x_dest5, x_pos
+ stp q_p5_0, q_p5_1, [x_ptr], #32
+ stp q_p5_2, q_p5_3, [x_ptr]
+
+ add x_pos, x_pos, #64
+ cmp x_pos, x_len
+ ble .Lloop64
+
+.Lloop64_end:
+ /* restore d8 ~ d15 */
+ ldp d8, d9, [sp]
+ ldp d10, d11, [sp, #16]
+ ldp d12, d13, [sp, #32]
+ ldp d14, d15, [sp, #48]
+ add sp, sp, #64
+
+ add x_len, x_len, #64
+ cmp x_pos, x_len
+ beq .return_pass
+
+.Lloop16_init:
+ sub x_len, x_len, #16
+ cmp x_pos, x_len
+ bgt .lessthan16_init
+
+.Lloop16:
+ movi v_p1_0.16b, #0
+ movi v_p2_0.16b, #0
+ movi v_p3_0.16b, #0
+ movi v_p4_0.16b, #0
+ movi v_p5_0.16b, #0
+ mov x_vec_i, #0
+
+.Lloop16_vects:
+ ldr x_ptr, [x_src, x_vec_i]
+ ldr q_data, [x_ptr, x_pos]
+
+ movi v_mask0f.16b, #0x0f
+ and v_data_lo.16b, v_data.16b, v_mask0f.16b
+ ushr v_data_hi.16b, v_data.16b, #4
+
+ add x_tmp, x_tbl, x_vec_i, lsl #2
+ add x_vec_i, x_vec_i, #8
+ ldp q_gft1_lo, q_gft1_hi, [x_tmp]
+ add x_tmp, x_tmp, x_vec, lsl #2
+ ldp q_gft2_lo, q_gft2_hi, [x_tmp]
+ add x_tmp, x_tmp, x_vec, lsl #2
+ ldp q_gft3_lo, q_gft3_hi, [x_tmp]
+ add x_tmp, x_tmp, x_vec, lsl #2
+ ldp q_gft4_lo, q_gft4_hi, [x_tmp]
+ add x_tmp, x_tmp, x_vec, lsl #2
+ ldp q_gft5_lo, q_gft5_hi, [x_tmp]
+
+ tbl v_gft1_lo.16b, {v_gft1_lo.16b}, v_data_lo.16b
+ tbl v_gft1_hi.16b, {v_gft1_hi.16b}, v_data_hi.16b
+ tbl v_gft2_lo.16b, {v_gft2_lo.16b}, v_data_lo.16b
+ tbl v_gft2_hi.16b, {v_gft2_hi.16b}, v_data_hi.16b
+ tbl v_gft3_lo.16b, {v_gft3_lo.16b}, v_data_lo.16b
+ tbl v_gft3_hi.16b, {v_gft3_hi.16b}, v_data_hi.16b
+ tbl v_gft4_lo.16b, {v_gft4_lo.16b}, v_data_lo.16b
+ tbl v_gft4_hi.16b, {v_gft4_hi.16b}, v_data_hi.16b
+ tbl v_gft5_lo.16b, {v_gft5_lo.16b}, v_data_lo.16b
+ tbl v_gft5_hi.16b, {v_gft5_hi.16b}, v_data_hi.16b
+
+ eor v_p1_0.16b, v_gft1_hi.16b, v_p1_0.16b
+ eor v_p1_0.16b, v_p1_0.16b, v_gft1_lo.16b
+ eor v_p2_0.16b, v_gft2_hi.16b, v_p2_0.16b
+ eor v_p2_0.16b, v_p2_0.16b, v_gft2_lo.16b
+ eor v_p3_0.16b, v_gft3_hi.16b, v_p3_0.16b
+ eor v_p3_0.16b, v_p3_0.16b, v_gft3_lo.16b
+ eor v_p4_0.16b, v_gft4_hi.16b, v_p4_0.16b
+ eor v_p4_0.16b, v_p4_0.16b, v_gft4_lo.16b
+ eor v_p5_0.16b, v_gft5_hi.16b, v_p5_0.16b
+ eor v_p5_0.16b, v_p5_0.16b, v_gft5_lo.16b
+
+ cmp x_vec_i, x_vec
+ bne .Lloop16_vects
+
+.Lloop16_vects_end:
+ str q_p1_0, [x_dest1, x_pos]
+ str q_p2_0, [x_dest2, x_pos]
+ str q_p3_0, [x_dest3, x_pos]
+ str q_p4_0, [x_dest4, x_pos]
+ str q_p5_0, [x_dest5, x_pos]
+ add x_pos, x_pos, #16
+ cmp x_pos, x_len
+ ble .Lloop16
+
+.Lloop16_end:
+ sub x_tmp, x_pos, x_len
+ cmp x_tmp, #16
+ beq .return_pass
+
+.lessthan16_init:
+ mov x_pos, x_len
+ b .Lloop16
+
+.return_pass:
+ mov w_ret, #0
+ ret
+
+.return_fail:
+ mov w_ret, #1
+ ret
diff --git a/src/isa-l/erasure_code/aarch64/gf_5vect_mad_neon.S b/src/isa-l/erasure_code/aarch64/gf_5vect_mad_neon.S
new file mode 100644
index 000000000..f0ff163fe
--- /dev/null
+++ b/src/isa-l/erasure_code/aarch64/gf_5vect_mad_neon.S
@@ -0,0 +1,535 @@
+/**************************************************************
+ Copyright (c) 2019 Huawei Technologies Co., Ltd.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Huawei Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+.text
+
+.global gf_5vect_mad_neon
+.type gf_5vect_mad_neon, %function
+
+
+/* arguments */
+x_len .req x0
+x_vec .req x1
+x_vec_i .req x2
+x_tbl .req x3
+x_src .req x4
+x_dest .req x5
+
+/* returns */
+w_ret .req w0
+
+/* local variables */
+x_src_end .req x6
+x_dest1 .req x7
+x_dest2 .req x8
+x_dest3 .req x9
+x_dest4 .req x10
+x_dest5 .req x_dest
+x_tmp .req x11
+x_tbl1 .req x12
+x_tbl2 .req x13
+x_tbl3 .req x14
+x_tbl4 .req x15
+x_tbl5 .req x16
+x_const .req x17
+
+/* vectors */
+v_mask0f .req v0
+v_tmp_lo .req v1
+v_tmp_hi .req v2
+v_tmp .req v3
+q_tmp .req q3
+
+v_gft1_lo .req v4
+v_gft1_hi .req v5
+v_gft2_lo .req v6
+v_gft2_hi .req v7
+v_gft3_lo .req v16
+v_gft3_hi .req v17
+q_gft1_lo .req q4
+q_gft1_hi .req q5
+q_gft2_lo .req q6
+q_gft2_hi .req q7
+q_gft3_lo .req q16
+q_gft3_hi .req q17
+
+v_gft4_lo .req v18
+v_gft4_hi .req v19
+q_gft4_lo .req q18
+q_gft4_hi .req q19
+v_gft5_lo .req v_gft2_lo
+v_gft5_hi .req v_gft2_hi
+q_gft5_lo .req q_gft2_lo
+q_gft5_hi .req q_gft2_hi
+
+v_data_0 .req v8
+v_data_1 .req v9
+v_data_2 .req v10
+v_data_3 .req v11
+q_data_0 .req q8
+q_data_1 .req q9
+q_data_2 .req q10
+q_data_3 .req q11
+
+v_data_0_lo .req v12
+v_data_1_lo .req v13
+v_data_2_lo .req v14
+v_data_3_lo .req v15
+v_data_0_hi .req v_data_0
+v_data_1_hi .req v_data_1
+v_data_2_hi .req v_data_2
+v_data_3_hi .req v_data_3
+
+v_d1_0 .req v20
+v_d1_1 .req v21
+v_d1_2 .req v22
+v_d1_3 .req v23
+v_d2_0 .req v24
+v_d2_1 .req v25
+v_d2_2 .req v26
+v_d2_3 .req v27
+v_d3_0 .req v28
+v_d3_1 .req v29
+v_d3_2 .req v30
+v_d3_3 .req v31
+q_d1_0 .req q20
+q_d1_1 .req q21
+q_d1_2 .req q22
+q_d1_3 .req q23
+q_d2_0 .req q24
+q_d2_1 .req q25
+q_d2_2 .req q26
+q_d2_3 .req q27
+q_d3_0 .req q28
+q_d3_1 .req q29
+q_d3_2 .req q30
+q_d3_3 .req q31
+
+v_d4_0 .req v_d1_0
+v_d4_1 .req v_d1_1
+v_d4_2 .req v_d1_2
+v_d4_3 .req v_d1_3
+q_d4_0 .req q_d1_0
+q_d4_1 .req q_d1_1
+q_d4_2 .req q_d1_2
+q_d4_3 .req q_d1_3
+v_d5_0 .req v_d2_0
+v_d5_1 .req v_d2_1
+v_d5_2 .req v_d2_2
+v_d5_3 .req v_d2_3
+q_d5_0 .req q_d2_0
+q_d5_1 .req q_d2_1
+q_d5_2 .req q_d2_2
+q_d5_3 .req q_d2_3
+
+v_data .req v21
+q_data .req q21
+v_data_lo .req v22
+v_data_hi .req v23
+
+gf_5vect_mad_neon:
+ /* less than 16 bytes, return_fail */
+ cmp x_len, #16
+ blt .return_fail
+
+ movi v_mask0f.16b, #0x0f
+ lsl x_vec_i, x_vec_i, #5
+ lsl x_vec, x_vec, #5
+ add x_tbl1, x_tbl, x_vec_i
+ add x_tbl2, x_tbl1, x_vec
+ add x_tbl3, x_tbl2, x_vec
+ add x_tbl4, x_tbl3, x_vec
+ add x_tbl5, x_tbl4, x_vec
+ add x_src_end, x_src, x_len
+ ldr x_dest1, [x_dest, #8*0]
+ ldr x_dest2, [x_dest, #8*1]
+ ldr x_dest3, [x_dest, #8*2]
+ ldr x_dest4, [x_dest, #8*3]
+ ldr x_dest5, [x_dest, #8*4]
+ ldr q_gft1_lo, [x_tbl1]
+ ldr q_gft1_hi, [x_tbl1, #16]
+ ldr q_gft3_lo, [x_tbl3]
+ ldr q_gft3_hi, [x_tbl3, #16]
+ ldr q_gft4_lo, [x_tbl4]
+ ldr q_gft4_hi, [x_tbl4, #16]
+
+.Lloop64_init:
+ /* less than 64 bytes, goto Lloop16_init */
+ cmp x_len, #64
+ blt .Lloop16_init
+
+ /* save d8 ~ d15 to stack */
+ sub sp, sp, #64
+ stp d8, d9, [sp]
+ stp d10, d11, [sp, #16]
+ stp d12, d13, [sp, #32]
+ stp d14, d15, [sp, #48]
+
+ sub x_src_end, x_src_end, #64
+
+.Lloop64:
+ ldr q_data_0, [x_src, #16*0]
+ ldr q_data_1, [x_src, #16*1]
+ ldr q_data_2, [x_src, #16*2]
+ ldr q_data_3, [x_src, #16*3]
+ add x_src, x_src, #64
+
+ ldr q_d1_0, [x_dest1, #16*0]
+ ldr q_d1_1, [x_dest1, #16*1]
+ ldr q_d1_2, [x_dest1, #16*2]
+ ldr q_d1_3, [x_dest1, #16*3]
+
+ ldr q_d2_0, [x_dest2, #16*0]
+ ldr q_d2_1, [x_dest2, #16*1]
+ ldr q_d2_2, [x_dest2, #16*2]
+ ldr q_d2_3, [x_dest2, #16*3]
+
+ ldr q_d3_0, [x_dest3, #16*0]
+ ldr q_d3_1, [x_dest3, #16*1]
+ ldr q_d3_2, [x_dest3, #16*2]
+ ldr q_d3_3, [x_dest3, #16*3]
+
+ ldr q_gft2_lo, [x_tbl2]
+ ldr q_gft2_hi, [x_tbl2, #16]
+
+ and v_data_0_lo.16b, v_data_0.16b, v_mask0f.16b
+ and v_data_1_lo.16b, v_data_1.16b, v_mask0f.16b
+ and v_data_2_lo.16b, v_data_2.16b, v_mask0f.16b
+ and v_data_3_lo.16b, v_data_3.16b, v_mask0f.16b
+
+ ushr v_data_0_hi.16b, v_data_0.16b, #4
+ ushr v_data_1_hi.16b, v_data_1.16b, #4
+ ushr v_data_2_hi.16b, v_data_2.16b, #4
+ ushr v_data_3_hi.16b, v_data_3.16b, #4
+
+ /* dest1 */
+ tbl v_tmp_lo.16b, {v_gft1_lo.16b}, v_data_0_lo.16b
+ tbl v_tmp_hi.16b, {v_gft1_hi.16b}, v_data_0_hi.16b
+ eor v_d1_0.16b, v_tmp_lo.16b, v_d1_0.16b
+ eor v_d1_0.16b, v_d1_0.16b, v_tmp_hi.16b
+
+ tbl v_tmp_lo.16b, {v_gft1_lo.16b}, v_data_1_lo.16b
+ tbl v_tmp_hi.16b, {v_gft1_hi.16b}, v_data_1_hi.16b
+ eor v_d1_1.16b, v_tmp_lo.16b, v_d1_1.16b
+ eor v_d1_1.16b, v_d1_1.16b, v_tmp_hi.16b
+
+ tbl v_tmp_lo.16b, {v_gft1_lo.16b}, v_data_2_lo.16b
+ tbl v_tmp_hi.16b, {v_gft1_hi.16b}, v_data_2_hi.16b
+ eor v_d1_2.16b, v_tmp_lo.16b, v_d1_2.16b
+ eor v_d1_2.16b, v_d1_2.16b, v_tmp_hi.16b
+
+ tbl v_tmp_lo.16b, {v_gft1_lo.16b}, v_data_3_lo.16b
+ tbl v_tmp_hi.16b, {v_gft1_hi.16b}, v_data_3_hi.16b
+ eor v_d1_3.16b, v_tmp_lo.16b, v_d1_3.16b
+ eor v_d1_3.16b, v_d1_3.16b, v_tmp_hi.16b
+
+ /* dest2 */
+ tbl v_tmp_lo.16b, {v_gft2_lo.16b}, v_data_0_lo.16b
+ tbl v_tmp_hi.16b, {v_gft2_hi.16b}, v_data_0_hi.16b
+ eor v_d2_0.16b, v_tmp_lo.16b, v_d2_0.16b
+ eor v_d2_0.16b, v_d2_0.16b, v_tmp_hi.16b
+
+ tbl v_tmp_lo.16b, {v_gft2_lo.16b}, v_data_1_lo.16b
+ tbl v_tmp_hi.16b, {v_gft2_hi.16b}, v_data_1_hi.16b
+ eor v_d2_1.16b, v_tmp_lo.16b, v_d2_1.16b
+ eor v_d2_1.16b, v_d2_1.16b, v_tmp_hi.16b
+
+ tbl v_tmp_lo.16b, {v_gft2_lo.16b}, v_data_2_lo.16b
+ tbl v_tmp_hi.16b, {v_gft2_hi.16b}, v_data_2_hi.16b
+ eor v_d2_2.16b, v_tmp_lo.16b, v_d2_2.16b
+ eor v_d2_2.16b, v_d2_2.16b, v_tmp_hi.16b
+
+ tbl v_tmp_lo.16b, {v_gft2_lo.16b}, v_data_3_lo.16b
+ tbl v_tmp_hi.16b, {v_gft2_hi.16b}, v_data_3_hi.16b
+ eor v_d2_3.16b, v_tmp_lo.16b, v_d2_3.16b
+ eor v_d2_3.16b, v_d2_3.16b, v_tmp_hi.16b
+
+ /* dest3 */
+ tbl v_tmp_lo.16b, {v_gft3_lo.16b}, v_data_0_lo.16b
+ tbl v_tmp_hi.16b, {v_gft3_hi.16b}, v_data_0_hi.16b
+ eor v_d3_0.16b, v_tmp_lo.16b, v_d3_0.16b
+ eor v_d3_0.16b, v_d3_0.16b, v_tmp_hi.16b
+
+ tbl v_tmp_lo.16b, {v_gft3_lo.16b}, v_data_1_lo.16b
+ tbl v_tmp_hi.16b, {v_gft3_hi.16b}, v_data_1_hi.16b
+ eor v_d3_1.16b, v_tmp_lo.16b, v_d3_1.16b
+ eor v_d3_1.16b, v_d3_1.16b, v_tmp_hi.16b
+
+ tbl v_tmp_lo.16b, {v_gft3_lo.16b}, v_data_2_lo.16b
+ tbl v_tmp_hi.16b, {v_gft3_hi.16b}, v_data_2_hi.16b
+ eor v_d3_2.16b, v_tmp_lo.16b, v_d3_2.16b
+ eor v_d3_2.16b, v_d3_2.16b, v_tmp_hi.16b
+
+ tbl v_tmp_lo.16b, {v_gft3_lo.16b}, v_data_3_lo.16b
+ tbl v_tmp_hi.16b, {v_gft3_hi.16b}, v_data_3_hi.16b
+ eor v_d3_3.16b, v_tmp_lo.16b, v_d3_3.16b
+ eor v_d3_3.16b, v_d3_3.16b, v_tmp_hi.16b
+
+ str q_d1_0, [x_dest1, #16*0]
+ str q_d1_1, [x_dest1, #16*1]
+ str q_d1_2, [x_dest1, #16*2]
+ str q_d1_3, [x_dest1, #16*3]
+ add x_dest1, x_dest1, #64
+
+ str q_d2_0, [x_dest2, #16*0]
+ str q_d2_1, [x_dest2, #16*1]
+ str q_d2_2, [x_dest2, #16*2]
+ str q_d2_3, [x_dest2, #16*3]
+ add x_dest2, x_dest2, #64
+
+ str q_d3_0, [x_dest3, #16*0]
+ str q_d3_1, [x_dest3, #16*1]
+ str q_d3_2, [x_dest3, #16*2]
+ str q_d3_3, [x_dest3, #16*3]
+ add x_dest3, x_dest3, #64
+
+ ldr q_d4_0, [x_dest4, #16*0]
+ ldr q_d4_1, [x_dest4, #16*1]
+ ldr q_d4_2, [x_dest4, #16*2]
+ ldr q_d4_3, [x_dest4, #16*3]
+
+ ldr q_d5_0, [x_dest5, #16*0]
+ ldr q_d5_1, [x_dest5, #16*1]
+ ldr q_d5_2, [x_dest5, #16*2]
+ ldr q_d5_3, [x_dest5, #16*3]
+
+ ldr q_gft5_lo, [x_tbl5]
+ ldr q_gft5_hi, [x_tbl5, #16]
+
+ /* dest4 */
+ tbl v_tmp_lo.16b, {v_gft4_lo.16b}, v_data_0_lo.16b
+ tbl v_tmp_hi.16b, {v_gft4_hi.16b}, v_data_0_hi.16b
+ eor v_d4_0.16b, v_tmp_lo.16b, v_d4_0.16b
+ eor v_d4_0.16b, v_d4_0.16b, v_tmp_hi.16b
+
+ tbl v_tmp_lo.16b, {v_gft4_lo.16b}, v_data_1_lo.16b
+ tbl v_tmp_hi.16b, {v_gft4_hi.16b}, v_data_1_hi.16b
+ eor v_d4_1.16b, v_tmp_lo.16b, v_d4_1.16b
+ eor v_d4_1.16b, v_d4_1.16b, v_tmp_hi.16b
+
+ tbl v_tmp_lo.16b, {v_gft4_lo.16b}, v_data_2_lo.16b
+ tbl v_tmp_hi.16b, {v_gft4_hi.16b}, v_data_2_hi.16b
+ eor v_d4_2.16b, v_tmp_lo.16b, v_d4_2.16b
+ eor v_d4_2.16b, v_d4_2.16b, v_tmp_hi.16b
+
+ tbl v_tmp_lo.16b, {v_gft4_lo.16b}, v_data_3_lo.16b
+ tbl v_tmp_hi.16b, {v_gft4_hi.16b}, v_data_3_hi.16b
+ eor v_d4_3.16b, v_tmp_lo.16b, v_d4_3.16b
+ eor v_d4_3.16b, v_d4_3.16b, v_tmp_hi.16b
+
+ /* dest5 */
+ tbl v_tmp_lo.16b, {v_gft5_lo.16b}, v_data_0_lo.16b
+ tbl v_tmp_hi.16b, {v_gft5_hi.16b}, v_data_0_hi.16b
+ eor v_d5_0.16b, v_tmp_lo.16b, v_d5_0.16b
+ eor v_d5_0.16b, v_d5_0.16b, v_tmp_hi.16b
+
+ tbl v_tmp_lo.16b, {v_gft5_lo.16b}, v_data_1_lo.16b
+ tbl v_tmp_hi.16b, {v_gft5_hi.16b}, v_data_1_hi.16b
+ eor v_d5_1.16b, v_tmp_lo.16b, v_d5_1.16b
+ eor v_d5_1.16b, v_d5_1.16b, v_tmp_hi.16b
+
+ tbl v_tmp_lo.16b, {v_gft5_lo.16b}, v_data_2_lo.16b
+ tbl v_tmp_hi.16b, {v_gft5_hi.16b}, v_data_2_hi.16b
+ eor v_d5_2.16b, v_tmp_lo.16b, v_d5_2.16b
+ eor v_d5_2.16b, v_d5_2.16b, v_tmp_hi.16b
+
+ tbl v_tmp_lo.16b, {v_gft5_lo.16b}, v_data_3_lo.16b
+ tbl v_tmp_hi.16b, {v_gft5_hi.16b}, v_data_3_hi.16b
+ eor v_d5_3.16b, v_tmp_lo.16b, v_d5_3.16b
+ eor v_d5_3.16b, v_d5_3.16b, v_tmp_hi.16b
+
+ str q_d4_0, [x_dest4, #16*0]
+ str q_d4_1, [x_dest4, #16*1]
+ str q_d4_2, [x_dest4, #16*2]
+ str q_d4_3, [x_dest4, #16*3]
+ add x_dest4, x_dest4, #64
+
+ str q_d5_0, [x_dest5, #16*0]
+ str q_d5_1, [x_dest5, #16*1]
+ str q_d5_2, [x_dest5, #16*2]
+ str q_d5_3, [x_dest5, #16*3]
+ add x_dest5, x_dest5, #64
+
+ cmp x_src, x_src_end
+ bls .Lloop64
+
+.Lloop64_end:
+ /* restore d8 ~ d15 */
+ ldp d8, d9, [sp]
+ ldp d10, d11, [sp, #16]
+ ldp d12, d13, [sp, #32]
+ ldp d14, d15, [sp, #48]
+ add sp, sp, #64
+ add x_src_end, x_src_end, #64
+
+.Lloop16_init:
+ sub x_src_end, x_src_end, #16
+ cmp x_src, x_src_end
+ bhi .lessthan16_init
+
+.Lloop16:
+ ldr q_data, [x_src]
+
+ ldr q_d1_0, [x_dest1]
+ ldr q_d2_0, [x_dest2]
+ ldr q_d3_0, [x_dest3]
+ ldr q_gft2_lo, [x_tbl2]
+ ldr q_gft2_hi, [x_tbl2, #16]
+
+ and v_data_lo.16b, v_data.16b, v_mask0f.16b
+ ushr v_data_hi.16b, v_data.16b, #4
+
+ tbl v_tmp_lo.16b, {v_gft1_lo.16b}, v_data_lo.16b
+ tbl v_tmp_hi.16b, {v_gft1_hi.16b}, v_data_hi.16b
+ eor v_d1_0.16b, v_tmp_lo.16b, v_d1_0.16b
+ eor v_d1_0.16b, v_d1_0.16b, v_tmp_hi.16b
+
+ tbl v_tmp_lo.16b, {v_gft2_lo.16b}, v_data_lo.16b
+ tbl v_tmp_hi.16b, {v_gft2_hi.16b}, v_data_hi.16b
+ eor v_d2_0.16b, v_tmp_lo.16b, v_d2_0.16b
+ eor v_d2_0.16b, v_d2_0.16b, v_tmp_hi.16b
+
+ tbl v_tmp_lo.16b, {v_gft3_lo.16b}, v_data_lo.16b
+ tbl v_tmp_hi.16b, {v_gft3_hi.16b}, v_data_hi.16b
+ eor v_d3_0.16b, v_tmp_lo.16b, v_d3_0.16b
+ eor v_d3_0.16b, v_d3_0.16b, v_tmp_hi.16b
+
+ str q_d1_0, [x_dest1]
+ str q_d2_0, [x_dest2]
+ str q_d3_0, [x_dest3]
+
+ ldr q_d4_0, [x_dest4]
+ ldr q_d5_0, [x_dest5]
+ ldr q_gft5_lo, [x_tbl5]
+ ldr q_gft5_hi, [x_tbl5, #16]
+
+ tbl v_tmp_lo.16b, {v_gft4_lo.16b}, v_data_lo.16b
+ tbl v_tmp_hi.16b, {v_gft4_hi.16b}, v_data_hi.16b
+ eor v_d4_0.16b, v_tmp_lo.16b, v_d4_0.16b
+ eor v_d4_0.16b, v_d4_0.16b, v_tmp_hi.16b
+
+ tbl v_tmp_lo.16b, {v_gft5_lo.16b}, v_data_lo.16b
+ tbl v_tmp_hi.16b, {v_gft5_hi.16b}, v_data_hi.16b
+ eor v_d5_0.16b, v_tmp_lo.16b, v_d5_0.16b
+ eor v_d5_0.16b, v_d5_0.16b, v_tmp_hi.16b
+
+ str q_d4_0, [x_dest4]
+ str q_d5_0, [x_dest5]
+
+ add x_src, x_src, #16
+ add x_dest1, x_dest1, #16
+ add x_dest2, x_dest2, #16
+ add x_dest3, x_dest3, #16
+ add x_dest4, x_dest4, #16
+ add x_dest5, x_dest5, #16
+ cmp x_src, x_src_end
+ bls .Lloop16
+
+.lessthan16_init:
+ sub x_tmp, x_src, x_src_end
+ cmp x_tmp, #16
+ beq .return_pass
+
+.lessthan16:
+ mov x_src, x_src_end
+ sub x_dest1, x_dest1, x_tmp
+ sub x_dest2, x_dest2, x_tmp
+ sub x_dest3, x_dest3, x_tmp
+ sub x_dest4, x_dest4, x_tmp
+ sub x_dest5, x_dest5, x_tmp
+
+ adrp x_const, const_tbl
+ add x_const, x_const, :lo12:const_tbl
+ sub x_const, x_const, x_tmp
+ ldr q_tmp, [x_const, #16]
+
+ ldr q_data, [x_src]
+ ldr q_d1_0, [x_dest1]
+ ldr q_d2_0, [x_dest2]
+ ldr q_d3_0, [x_dest3]
+ ldr q_gft2_lo, [x_tbl2]
+ ldr q_gft2_hi, [x_tbl2, #16]
+
+ and v_data_lo.16b, v_data.16b, v_mask0f.16b
+ ushr v_data_hi.16b, v_data.16b, #4
+
+ tbl v_tmp_lo.16b, {v_gft1_lo.16b}, v_data_lo.16b
+ tbl v_tmp_hi.16b, {v_gft1_hi.16b}, v_data_hi.16b
+ eor v_tmp_hi.16b, v_tmp_lo.16b, v_tmp_hi.16b
+ and v_tmp_hi.16b, v_tmp_hi.16b, v_tmp.16b
+ eor v_d1_0.16b, v_d1_0.16b, v_tmp_hi.16b
+
+ tbl v_tmp_lo.16b, {v_gft2_lo.16b}, v_data_lo.16b
+ tbl v_tmp_hi.16b, {v_gft2_hi.16b}, v_data_hi.16b
+ eor v_tmp_hi.16b, v_tmp_lo.16b, v_tmp_hi.16b
+ and v_tmp_hi.16b, v_tmp_hi.16b, v_tmp.16b
+ eor v_d2_0.16b, v_d2_0.16b, v_tmp_hi.16b
+
+ tbl v_tmp_lo.16b, {v_gft3_lo.16b}, v_data_lo.16b
+ tbl v_tmp_hi.16b, {v_gft3_hi.16b}, v_data_hi.16b
+ eor v_tmp_hi.16b, v_tmp_lo.16b, v_tmp_hi.16b
+ and v_tmp_hi.16b, v_tmp_hi.16b, v_tmp.16b
+ eor v_d3_0.16b, v_d3_0.16b, v_tmp_hi.16b
+
+ str q_d1_0, [x_dest1]
+ str q_d2_0, [x_dest2]
+ str q_d3_0, [x_dest3]
+
+ ldr q_d4_0, [x_dest4]
+ ldr q_d5_0, [x_dest5]
+ ldr q_gft5_lo, [x_tbl5]
+ ldr q_gft5_hi, [x_tbl5, #16]
+
+ tbl v_tmp_lo.16b, {v_gft4_lo.16b}, v_data_lo.16b
+ tbl v_tmp_hi.16b, {v_gft4_hi.16b}, v_data_hi.16b
+ eor v_tmp_hi.16b, v_tmp_lo.16b, v_tmp_hi.16b
+ and v_tmp_hi.16b, v_tmp_hi.16b, v_tmp.16b
+ eor v_d4_0.16b, v_d4_0.16b, v_tmp_hi.16b
+
+ tbl v_tmp_lo.16b, {v_gft5_lo.16b}, v_data_lo.16b
+ tbl v_tmp_hi.16b, {v_gft5_hi.16b}, v_data_hi.16b
+ eor v_tmp_hi.16b, v_tmp_lo.16b, v_tmp_hi.16b
+ and v_tmp_hi.16b, v_tmp_hi.16b, v_tmp.16b
+ eor v_d5_0.16b, v_d5_0.16b, v_tmp_hi.16b
+
+ str q_d4_0, [x_dest4]
+ str q_d5_0, [x_dest5]
+
+.return_pass:
+ mov w_ret, #0
+ ret
+
+.return_fail:
+ mov w_ret, #1
+ ret
+
+.section .rodata
+.balign 8
+const_tbl:
+ .dword 0x0000000000000000, 0x0000000000000000
+ .dword 0xffffffffffffffff, 0xffffffffffffffff
diff --git a/src/isa-l/erasure_code/aarch64/gf_6vect_mad_neon.S b/src/isa-l/erasure_code/aarch64/gf_6vect_mad_neon.S
new file mode 100644
index 000000000..7ec2d80b6
--- /dev/null
+++ b/src/isa-l/erasure_code/aarch64/gf_6vect_mad_neon.S
@@ -0,0 +1,610 @@
+/**************************************************************
+ Copyright (c) 2019 Huawei Technologies Co., Ltd.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Huawei Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+.text
+.global gf_6vect_mad_neon
+.type gf_6vect_mad_neon, %function
+
+
+/* arguments */
+x_len .req x0
+x_vec .req x1
+x_vec_i .req x2
+x_tbl .req x3
+x_src .req x4
+x_dest .req x5
+
+/* returns */
+w_ret .req w0
+
+/* local variables */
+x_src_end .req x6
+x_dest1 .req x7
+x_dest2 .req x8
+x_dest3 .req x9
+x_dest4 .req x10
+x_dest5 .req x11
+x_dest6 .req x_dest
+x_tmp .req x12
+x_tbl1 .req x13
+x_tbl2 .req x14
+x_tbl3 .req x15
+x_tbl4 .req x16
+x_tbl5 .req x17
+x_tbl6 .req x_tbl
+x_const .req x18
+
+/* vectors */
+v_mask0f .req v0
+v_tmp_lo .req v1
+v_tmp_hi .req v2
+v_tmp .req v3
+q_tmp .req q3
+
+v_gft1_lo .req v4
+v_gft1_hi .req v5
+v_gft2_lo .req v6
+v_gft2_hi .req v7
+v_gft3_lo .req v16
+v_gft3_hi .req v17
+q_gft1_lo .req q4
+q_gft1_hi .req q5
+q_gft2_lo .req q6
+q_gft2_hi .req q7
+q_gft3_lo .req q16
+q_gft3_hi .req q17
+
+v_gft4_lo .req v18
+v_gft4_hi .req v19
+q_gft4_lo .req q18
+q_gft4_hi .req q19
+v_gft5_lo .req v_gft2_lo
+v_gft5_hi .req v_gft2_hi
+q_gft5_lo .req q_gft2_lo
+q_gft5_hi .req q_gft2_hi
+v_gft6_lo .req v_gft3_lo
+v_gft6_hi .req v_gft3_hi
+q_gft6_lo .req q_gft3_lo
+q_gft6_hi .req q_gft3_hi
+
+v_data_0 .req v8
+v_data_1 .req v9
+v_data_2 .req v10
+v_data_3 .req v11
+q_data_0 .req q8
+q_data_1 .req q9
+q_data_2 .req q10
+q_data_3 .req q11
+
+v_data_0_lo .req v12
+v_data_1_lo .req v13
+v_data_2_lo .req v14
+v_data_3_lo .req v15
+v_data_0_hi .req v_data_0
+v_data_1_hi .req v_data_1
+v_data_2_hi .req v_data_2
+v_data_3_hi .req v_data_3
+
+v_d1_0 .req v20
+v_d1_1 .req v21
+v_d1_2 .req v22
+v_d1_3 .req v23
+v_d2_0 .req v24
+v_d2_1 .req v25
+v_d2_2 .req v26
+v_d2_3 .req v27
+v_d3_0 .req v28
+v_d3_1 .req v29
+v_d3_2 .req v30
+v_d3_3 .req v31
+q_d1_0 .req q20
+q_d1_1 .req q21
+q_d1_2 .req q22
+q_d1_3 .req q23
+q_d2_0 .req q24
+q_d2_1 .req q25
+q_d2_2 .req q26
+q_d2_3 .req q27
+q_d3_0 .req q28
+q_d3_1 .req q29
+q_d3_2 .req q30
+q_d3_3 .req q31
+
+v_d4_0 .req v_d1_0
+v_d4_1 .req v_d1_1
+v_d4_2 .req v_d1_2
+v_d4_3 .req v_d1_3
+q_d4_0 .req q_d1_0
+q_d4_1 .req q_d1_1
+q_d4_2 .req q_d1_2
+q_d4_3 .req q_d1_3
+v_d5_0 .req v_d2_0
+v_d5_1 .req v_d2_1
+v_d5_2 .req v_d2_2
+v_d5_3 .req v_d2_3
+q_d5_0 .req q_d2_0
+q_d5_1 .req q_d2_1
+q_d5_2 .req q_d2_2
+q_d5_3 .req q_d2_3
+v_d6_0 .req v_d3_0
+v_d6_1 .req v_d3_1
+v_d6_2 .req v_d3_2
+v_d6_3 .req v_d3_3
+q_d6_0 .req q_d3_0
+q_d6_1 .req q_d3_1
+q_d6_2 .req q_d3_2
+q_d6_3 .req q_d3_3
+
+v_data .req v21
+q_data .req q21
+v_data_lo .req v22
+v_data_hi .req v23
+
+gf_6vect_mad_neon:
+ /* less than 16 bytes, return_fail */
+ cmp x_len, #16
+ blt .return_fail
+
+ movi v_mask0f.16b, #0x0f
+ lsl x_vec_i, x_vec_i, #5
+ lsl x_vec, x_vec, #5
+ add x_tbl1, x_tbl, x_vec_i
+ add x_tbl2, x_tbl1, x_vec
+ add x_tbl3, x_tbl2, x_vec
+ add x_tbl4, x_tbl3, x_vec
+ add x_tbl5, x_tbl4, x_vec
+ add x_tbl6, x_tbl5, x_vec
+ add x_src_end, x_src, x_len
+ ldr x_dest1, [x_dest, #8*0]
+ ldr x_dest2, [x_dest, #8*1]
+ ldr x_dest3, [x_dest, #8*2]
+ ldr x_dest4, [x_dest, #8*3]
+ ldr x_dest5, [x_dest, #8*4]
+ ldr x_dest6, [x_dest, #8*5]
+ ldr q_gft1_lo, [x_tbl1]
+ ldr q_gft1_hi, [x_tbl1, #16]
+ ldr q_gft4_lo, [x_tbl4]
+ ldr q_gft4_hi, [x_tbl4, #16]
+
+.Lloop64_init:
+ /* less than 64 bytes, goto Lloop16_init */
+ cmp x_len, #64
+ blt .Lloop16_init
+
+ /* save d8 ~ d15 to stack */
+ sub sp, sp, #64
+ stp d8, d9, [sp]
+ stp d10, d11, [sp, #16]
+ stp d12, d13, [sp, #32]
+ stp d14, d15, [sp, #48]
+
+ sub x_src_end, x_src_end, #64
+
+.Lloop64:
+ ldr q_data_0, [x_src, #16*0]
+ ldr q_data_1, [x_src, #16*1]
+ ldr q_data_2, [x_src, #16*2]
+ ldr q_data_3, [x_src, #16*3]
+ add x_src, x_src, #64
+
+ ldr q_d1_0, [x_dest1, #16*0]
+ ldr q_d1_1, [x_dest1, #16*1]
+ ldr q_d1_2, [x_dest1, #16*2]
+ ldr q_d1_3, [x_dest1, #16*3]
+
+ ldr q_d2_0, [x_dest2, #16*0]
+ ldr q_d2_1, [x_dest2, #16*1]
+ ldr q_d2_2, [x_dest2, #16*2]
+ ldr q_d2_3, [x_dest2, #16*3]
+
+ ldr q_d3_0, [x_dest3, #16*0]
+ ldr q_d3_1, [x_dest3, #16*1]
+ ldr q_d3_2, [x_dest3, #16*2]
+ ldr q_d3_3, [x_dest3, #16*3]
+
+ ldr q_gft2_lo, [x_tbl2]
+ ldr q_gft2_hi, [x_tbl2, #16]
+ ldr q_gft3_lo, [x_tbl3]
+ ldr q_gft3_hi, [x_tbl3, #16]
+
+ and v_data_0_lo.16b, v_data_0.16b, v_mask0f.16b
+ and v_data_1_lo.16b, v_data_1.16b, v_mask0f.16b
+ and v_data_2_lo.16b, v_data_2.16b, v_mask0f.16b
+ and v_data_3_lo.16b, v_data_3.16b, v_mask0f.16b
+
+ ushr v_data_0_hi.16b, v_data_0.16b, #4
+ ushr v_data_1_hi.16b, v_data_1.16b, #4
+ ushr v_data_2_hi.16b, v_data_2.16b, #4
+ ushr v_data_3_hi.16b, v_data_3.16b, #4
+
+ /* dest1 */
+ tbl v_tmp_lo.16b, {v_gft1_lo.16b}, v_data_0_lo.16b
+ tbl v_tmp_hi.16b, {v_gft1_hi.16b}, v_data_0_hi.16b
+ eor v_d1_0.16b, v_tmp_lo.16b, v_d1_0.16b
+ eor v_d1_0.16b, v_d1_0.16b, v_tmp_hi.16b
+
+ tbl v_tmp_lo.16b, {v_gft1_lo.16b}, v_data_1_lo.16b
+ tbl v_tmp_hi.16b, {v_gft1_hi.16b}, v_data_1_hi.16b
+ eor v_d1_1.16b, v_tmp_lo.16b, v_d1_1.16b
+ eor v_d1_1.16b, v_d1_1.16b, v_tmp_hi.16b
+
+ tbl v_tmp_lo.16b, {v_gft1_lo.16b}, v_data_2_lo.16b
+ tbl v_tmp_hi.16b, {v_gft1_hi.16b}, v_data_2_hi.16b
+ eor v_d1_2.16b, v_tmp_lo.16b, v_d1_2.16b
+ eor v_d1_2.16b, v_d1_2.16b, v_tmp_hi.16b
+
+ tbl v_tmp_lo.16b, {v_gft1_lo.16b}, v_data_3_lo.16b
+ tbl v_tmp_hi.16b, {v_gft1_hi.16b}, v_data_3_hi.16b
+ eor v_d1_3.16b, v_tmp_lo.16b, v_d1_3.16b
+ eor v_d1_3.16b, v_d1_3.16b, v_tmp_hi.16b
+
+ /* dest2 */
+ tbl v_tmp_lo.16b, {v_gft2_lo.16b}, v_data_0_lo.16b
+ tbl v_tmp_hi.16b, {v_gft2_hi.16b}, v_data_0_hi.16b
+ eor v_d2_0.16b, v_tmp_lo.16b, v_d2_0.16b
+ eor v_d2_0.16b, v_d2_0.16b, v_tmp_hi.16b
+
+ tbl v_tmp_lo.16b, {v_gft2_lo.16b}, v_data_1_lo.16b
+ tbl v_tmp_hi.16b, {v_gft2_hi.16b}, v_data_1_hi.16b
+ eor v_d2_1.16b, v_tmp_lo.16b, v_d2_1.16b
+ eor v_d2_1.16b, v_d2_1.16b, v_tmp_hi.16b
+
+ tbl v_tmp_lo.16b, {v_gft2_lo.16b}, v_data_2_lo.16b
+ tbl v_tmp_hi.16b, {v_gft2_hi.16b}, v_data_2_hi.16b
+ eor v_d2_2.16b, v_tmp_lo.16b, v_d2_2.16b
+ eor v_d2_2.16b, v_d2_2.16b, v_tmp_hi.16b
+
+ tbl v_tmp_lo.16b, {v_gft2_lo.16b}, v_data_3_lo.16b
+ tbl v_tmp_hi.16b, {v_gft2_hi.16b}, v_data_3_hi.16b
+ eor v_d2_3.16b, v_tmp_lo.16b, v_d2_3.16b
+ eor v_d2_3.16b, v_d2_3.16b, v_tmp_hi.16b
+
+ /* dest3 */
+ tbl v_tmp_lo.16b, {v_gft3_lo.16b}, v_data_0_lo.16b
+ tbl v_tmp_hi.16b, {v_gft3_hi.16b}, v_data_0_hi.16b
+ eor v_d3_0.16b, v_tmp_lo.16b, v_d3_0.16b
+ eor v_d3_0.16b, v_d3_0.16b, v_tmp_hi.16b
+
+ tbl v_tmp_lo.16b, {v_gft3_lo.16b}, v_data_1_lo.16b
+ tbl v_tmp_hi.16b, {v_gft3_hi.16b}, v_data_1_hi.16b
+ eor v_d3_1.16b, v_tmp_lo.16b, v_d3_1.16b
+ eor v_d3_1.16b, v_d3_1.16b, v_tmp_hi.16b
+
+ tbl v_tmp_lo.16b, {v_gft3_lo.16b}, v_data_2_lo.16b
+ tbl v_tmp_hi.16b, {v_gft3_hi.16b}, v_data_2_hi.16b
+ eor v_d3_2.16b, v_tmp_lo.16b, v_d3_2.16b
+ eor v_d3_2.16b, v_d3_2.16b, v_tmp_hi.16b
+
+ tbl v_tmp_lo.16b, {v_gft3_lo.16b}, v_data_3_lo.16b
+ tbl v_tmp_hi.16b, {v_gft3_hi.16b}, v_data_3_hi.16b
+ eor v_d3_3.16b, v_tmp_lo.16b, v_d3_3.16b
+ eor v_d3_3.16b, v_d3_3.16b, v_tmp_hi.16b
+
+ str q_d1_0, [x_dest1, #16*0]
+ str q_d1_1, [x_dest1, #16*1]
+ str q_d1_2, [x_dest1, #16*2]
+ str q_d1_3, [x_dest1, #16*3]
+ add x_dest1, x_dest1, #64
+
+ str q_d2_0, [x_dest2, #16*0]
+ str q_d2_1, [x_dest2, #16*1]
+ str q_d2_2, [x_dest2, #16*2]
+ str q_d2_3, [x_dest2, #16*3]
+ add x_dest2, x_dest2, #64
+
+ str q_d3_0, [x_dest3, #16*0]
+ str q_d3_1, [x_dest3, #16*1]
+ str q_d3_2, [x_dest3, #16*2]
+ str q_d3_3, [x_dest3, #16*3]
+ add x_dest3, x_dest3, #64
+
+ ldr q_d4_0, [x_dest4, #16*0]
+ ldr q_d4_1, [x_dest4, #16*1]
+ ldr q_d4_2, [x_dest4, #16*2]
+ ldr q_d4_3, [x_dest4, #16*3]
+
+ ldr q_d5_0, [x_dest5, #16*0]
+ ldr q_d5_1, [x_dest5, #16*1]
+ ldr q_d5_2, [x_dest5, #16*2]
+ ldr q_d5_3, [x_dest5, #16*3]
+
+ ldr q_d6_0, [x_dest6, #16*0]
+ ldr q_d6_1, [x_dest6, #16*1]
+ ldr q_d6_2, [x_dest6, #16*2]
+ ldr q_d6_3, [x_dest6, #16*3]
+
+ ldr q_gft5_lo, [x_tbl5]
+ ldr q_gft5_hi, [x_tbl5, #16]
+ ldr q_gft6_lo, [x_tbl6]
+ ldr q_gft6_hi, [x_tbl6, #16]
+
+ /* dest4 */
+ tbl v_tmp_lo.16b, {v_gft4_lo.16b}, v_data_0_lo.16b
+ tbl v_tmp_hi.16b, {v_gft4_hi.16b}, v_data_0_hi.16b
+ eor v_d4_0.16b, v_tmp_lo.16b, v_d4_0.16b
+ eor v_d4_0.16b, v_d4_0.16b, v_tmp_hi.16b
+
+ tbl v_tmp_lo.16b, {v_gft4_lo.16b}, v_data_1_lo.16b
+ tbl v_tmp_hi.16b, {v_gft4_hi.16b}, v_data_1_hi.16b
+ eor v_d4_1.16b, v_tmp_lo.16b, v_d4_1.16b
+ eor v_d4_1.16b, v_d4_1.16b, v_tmp_hi.16b
+
+ tbl v_tmp_lo.16b, {v_gft4_lo.16b}, v_data_2_lo.16b
+ tbl v_tmp_hi.16b, {v_gft4_hi.16b}, v_data_2_hi.16b
+ eor v_d4_2.16b, v_tmp_lo.16b, v_d4_2.16b
+ eor v_d4_2.16b, v_d4_2.16b, v_tmp_hi.16b
+
+ tbl v_tmp_lo.16b, {v_gft4_lo.16b}, v_data_3_lo.16b
+ tbl v_tmp_hi.16b, {v_gft4_hi.16b}, v_data_3_hi.16b
+ eor v_d4_3.16b, v_tmp_lo.16b, v_d4_3.16b
+ eor v_d4_3.16b, v_d4_3.16b, v_tmp_hi.16b
+
+ /* dest5 */
+ tbl v_tmp_lo.16b, {v_gft5_lo.16b}, v_data_0_lo.16b
+ tbl v_tmp_hi.16b, {v_gft5_hi.16b}, v_data_0_hi.16b
+ eor v_d5_0.16b, v_tmp_lo.16b, v_d5_0.16b
+ eor v_d5_0.16b, v_d5_0.16b, v_tmp_hi.16b
+
+ tbl v_tmp_lo.16b, {v_gft5_lo.16b}, v_data_1_lo.16b
+ tbl v_tmp_hi.16b, {v_gft5_hi.16b}, v_data_1_hi.16b
+ eor v_d5_1.16b, v_tmp_lo.16b, v_d5_1.16b
+ eor v_d5_1.16b, v_d5_1.16b, v_tmp_hi.16b
+
+ tbl v_tmp_lo.16b, {v_gft5_lo.16b}, v_data_2_lo.16b
+ tbl v_tmp_hi.16b, {v_gft5_hi.16b}, v_data_2_hi.16b
+ eor v_d5_2.16b, v_tmp_lo.16b, v_d5_2.16b
+ eor v_d5_2.16b, v_d5_2.16b, v_tmp_hi.16b
+
+ tbl v_tmp_lo.16b, {v_gft5_lo.16b}, v_data_3_lo.16b
+ tbl v_tmp_hi.16b, {v_gft5_hi.16b}, v_data_3_hi.16b
+ eor v_d5_3.16b, v_tmp_lo.16b, v_d5_3.16b
+ eor v_d5_3.16b, v_d5_3.16b, v_tmp_hi.16b
+
+ /* dest6 */
+ tbl v_tmp_lo.16b, {v_gft6_lo.16b}, v_data_0_lo.16b
+ tbl v_tmp_hi.16b, {v_gft6_hi.16b}, v_data_0_hi.16b
+ eor v_d6_0.16b, v_tmp_lo.16b, v_d6_0.16b
+ eor v_d6_0.16b, v_d6_0.16b, v_tmp_hi.16b
+
+ tbl v_tmp_lo.16b, {v_gft6_lo.16b}, v_data_1_lo.16b
+ tbl v_tmp_hi.16b, {v_gft6_hi.16b}, v_data_1_hi.16b
+ eor v_d6_1.16b, v_tmp_lo.16b, v_d6_1.16b
+ eor v_d6_1.16b, v_d6_1.16b, v_tmp_hi.16b
+
+ tbl v_tmp_lo.16b, {v_gft6_lo.16b}, v_data_2_lo.16b
+ tbl v_tmp_hi.16b, {v_gft6_hi.16b}, v_data_2_hi.16b
+ eor v_d6_2.16b, v_tmp_lo.16b, v_d6_2.16b
+ eor v_d6_2.16b, v_d6_2.16b, v_tmp_hi.16b
+
+ tbl v_tmp_lo.16b, {v_gft6_lo.16b}, v_data_3_lo.16b
+ tbl v_tmp_hi.16b, {v_gft6_hi.16b}, v_data_3_hi.16b
+ eor v_d6_3.16b, v_tmp_lo.16b, v_d6_3.16b
+ eor v_d6_3.16b, v_d6_3.16b, v_tmp_hi.16b
+
+ str q_d4_0, [x_dest4, #16*0]
+ str q_d4_1, [x_dest4, #16*1]
+ str q_d4_2, [x_dest4, #16*2]
+ str q_d4_3, [x_dest4, #16*3]
+ add x_dest4, x_dest4, #64
+
+ str q_d5_0, [x_dest5, #16*0]
+ str q_d5_1, [x_dest5, #16*1]
+ str q_d5_2, [x_dest5, #16*2]
+ str q_d5_3, [x_dest5, #16*3]
+ add x_dest5, x_dest5, #64
+
+ str q_d6_0, [x_dest6, #16*0]
+ str q_d6_1, [x_dest6, #16*1]
+ str q_d6_2, [x_dest6, #16*2]
+ str q_d6_3, [x_dest6, #16*3]
+ add x_dest6, x_dest6, #64
+
+ cmp x_src, x_src_end
+ bls .Lloop64
+
+.Lloop64_end:
+ /* restore d8 ~ d15 */
+ ldp d8, d9, [sp]
+ ldp d10, d11, [sp, #16]
+ ldp d12, d13, [sp, #32]
+ ldp d14, d15, [sp, #48]
+ add sp, sp, #64
+ add x_src_end, x_src_end, #64
+
+.Lloop16_init:
+ sub x_src_end, x_src_end, #16
+ cmp x_src, x_src_end
+ bhi .lessthan16_init
+
+.Lloop16:
+ ldr q_data, [x_src]
+
+ ldr q_d1_0, [x_dest1]
+ ldr q_d2_0, [x_dest2]
+ ldr q_d3_0, [x_dest3]
+ ldr q_gft2_lo, [x_tbl2]
+ ldr q_gft2_hi, [x_tbl2, #16]
+ ldr q_gft3_lo, [x_tbl3]
+ ldr q_gft3_hi, [x_tbl3, #16]
+
+ and v_data_lo.16b, v_data.16b, v_mask0f.16b
+ ushr v_data_hi.16b, v_data.16b, #4
+
+ tbl v_tmp_lo.16b, {v_gft1_lo.16b}, v_data_lo.16b
+ tbl v_tmp_hi.16b, {v_gft1_hi.16b}, v_data_hi.16b
+ eor v_d1_0.16b, v_tmp_lo.16b, v_d1_0.16b
+ eor v_d1_0.16b, v_d1_0.16b, v_tmp_hi.16b
+
+ tbl v_tmp_lo.16b, {v_gft2_lo.16b}, v_data_lo.16b
+ tbl v_tmp_hi.16b, {v_gft2_hi.16b}, v_data_hi.16b
+ eor v_d2_0.16b, v_tmp_lo.16b, v_d2_0.16b
+ eor v_d2_0.16b, v_d2_0.16b, v_tmp_hi.16b
+
+ tbl v_tmp_lo.16b, {v_gft3_lo.16b}, v_data_lo.16b
+ tbl v_tmp_hi.16b, {v_gft3_hi.16b}, v_data_hi.16b
+ eor v_d3_0.16b, v_tmp_lo.16b, v_d3_0.16b
+ eor v_d3_0.16b, v_d3_0.16b, v_tmp_hi.16b
+
+ str q_d1_0, [x_dest1]
+ str q_d2_0, [x_dest2]
+ str q_d3_0, [x_dest3]
+
+ ldr q_d4_0, [x_dest4]
+ ldr q_d5_0, [x_dest5]
+ ldr q_d6_0, [x_dest6]
+ ldr q_gft5_lo, [x_tbl5]
+ ldr q_gft5_hi, [x_tbl5, #16]
+ ldr q_gft6_lo, [x_tbl6]
+ ldr q_gft6_hi, [x_tbl6, #16]
+
+ tbl v_tmp_lo.16b, {v_gft4_lo.16b}, v_data_lo.16b
+ tbl v_tmp_hi.16b, {v_gft4_hi.16b}, v_data_hi.16b
+ eor v_d4_0.16b, v_tmp_lo.16b, v_d4_0.16b
+ eor v_d4_0.16b, v_d4_0.16b, v_tmp_hi.16b
+
+ tbl v_tmp_lo.16b, {v_gft5_lo.16b}, v_data_lo.16b
+ tbl v_tmp_hi.16b, {v_gft5_hi.16b}, v_data_hi.16b
+ eor v_d5_0.16b, v_tmp_lo.16b, v_d5_0.16b
+ eor v_d5_0.16b, v_d5_0.16b, v_tmp_hi.16b
+
+ tbl v_tmp_lo.16b, {v_gft6_lo.16b}, v_data_lo.16b
+ tbl v_tmp_hi.16b, {v_gft6_hi.16b}, v_data_hi.16b
+ eor v_d6_0.16b, v_tmp_lo.16b, v_d6_0.16b
+ eor v_d6_0.16b, v_d6_0.16b, v_tmp_hi.16b
+
+ str q_d4_0, [x_dest4]
+ str q_d5_0, [x_dest5]
+ str q_d6_0, [x_dest6]
+
+ add x_src, x_src, #16
+ add x_dest1, x_dest1, #16
+ add x_dest2, x_dest2, #16
+ add x_dest3, x_dest3, #16
+ add x_dest4, x_dest4, #16
+ add x_dest5, x_dest5, #16
+ add x_dest6, x_dest6, #16
+ cmp x_src, x_src_end
+ bls .Lloop16
+
+.lessthan16_init:
+ sub x_tmp, x_src, x_src_end
+ cmp x_tmp, #16
+ beq .return_pass
+
+.lessthan16:
+ mov x_src, x_src_end
+ sub x_dest1, x_dest1, x_tmp
+ sub x_dest2, x_dest2, x_tmp
+ sub x_dest3, x_dest3, x_tmp
+ sub x_dest4, x_dest4, x_tmp
+ sub x_dest5, x_dest5, x_tmp
+ sub x_dest6, x_dest6, x_tmp
+
+ adrp x_const, const_tbl
+ add x_const, x_const, :lo12:const_tbl
+ sub x_const, x_const, x_tmp
+ ldr q_tmp, [x_const, #16]
+
+ ldr q_data, [x_src]
+ ldr q_d1_0, [x_dest1]
+ ldr q_d2_0, [x_dest2]
+ ldr q_d3_0, [x_dest3]
+ ldr q_gft2_lo, [x_tbl2]
+ ldr q_gft2_hi, [x_tbl2, #16]
+ ldr q_gft3_lo, [x_tbl3]
+ ldr q_gft3_hi, [x_tbl3, #16]
+
+ and v_data_lo.16b, v_data.16b, v_mask0f.16b
+ ushr v_data_hi.16b, v_data.16b, #4
+
+ tbl v_tmp_lo.16b, {v_gft1_lo.16b}, v_data_lo.16b
+ tbl v_tmp_hi.16b, {v_gft1_hi.16b}, v_data_hi.16b
+ eor v_tmp_hi.16b, v_tmp_lo.16b, v_tmp_hi.16b
+ and v_tmp_hi.16b, v_tmp_hi.16b, v_tmp.16b
+ eor v_d1_0.16b, v_d1_0.16b, v_tmp_hi.16b
+
+ tbl v_tmp_lo.16b, {v_gft2_lo.16b}, v_data_lo.16b
+ tbl v_tmp_hi.16b, {v_gft2_hi.16b}, v_data_hi.16b
+ eor v_tmp_hi.16b, v_tmp_lo.16b, v_tmp_hi.16b
+ and v_tmp_hi.16b, v_tmp_hi.16b, v_tmp.16b
+ eor v_d2_0.16b, v_d2_0.16b, v_tmp_hi.16b
+
+ tbl v_tmp_lo.16b, {v_gft3_lo.16b}, v_data_lo.16b
+ tbl v_tmp_hi.16b, {v_gft3_hi.16b}, v_data_hi.16b
+ eor v_tmp_hi.16b, v_tmp_lo.16b, v_tmp_hi.16b
+ and v_tmp_hi.16b, v_tmp_hi.16b, v_tmp.16b
+ eor v_d3_0.16b, v_d3_0.16b, v_tmp_hi.16b
+
+ str q_d1_0, [x_dest1]
+ str q_d2_0, [x_dest2]
+ str q_d3_0, [x_dest3]
+
+ ldr q_d4_0, [x_dest4]
+ ldr q_d5_0, [x_dest5]
+ ldr q_d6_0, [x_dest6]
+ ldr q_gft5_lo, [x_tbl5]
+ ldr q_gft5_hi, [x_tbl5, #16]
+ ldr q_gft6_lo, [x_tbl6]
+ ldr q_gft6_hi, [x_tbl6, #16]
+
+ tbl v_tmp_lo.16b, {v_gft4_lo.16b}, v_data_lo.16b
+ tbl v_tmp_hi.16b, {v_gft4_hi.16b}, v_data_hi.16b
+ eor v_tmp_hi.16b, v_tmp_lo.16b, v_tmp_hi.16b
+ and v_tmp_hi.16b, v_tmp_hi.16b, v_tmp.16b
+ eor v_d4_0.16b, v_d4_0.16b, v_tmp_hi.16b
+
+ tbl v_tmp_lo.16b, {v_gft5_lo.16b}, v_data_lo.16b
+ tbl v_tmp_hi.16b, {v_gft5_hi.16b}, v_data_hi.16b
+ eor v_tmp_hi.16b, v_tmp_lo.16b, v_tmp_hi.16b
+ and v_tmp_hi.16b, v_tmp_hi.16b, v_tmp.16b
+ eor v_d5_0.16b, v_d5_0.16b, v_tmp_hi.16b
+
+ tbl v_tmp_lo.16b, {v_gft6_lo.16b}, v_data_lo.16b
+ tbl v_tmp_hi.16b, {v_gft6_hi.16b}, v_data_hi.16b
+ eor v_tmp_hi.16b, v_tmp_lo.16b, v_tmp_hi.16b
+ and v_tmp_hi.16b, v_tmp_hi.16b, v_tmp.16b
+ eor v_d6_0.16b, v_d6_0.16b, v_tmp_hi.16b
+
+ str q_d4_0, [x_dest4]
+ str q_d5_0, [x_dest5]
+ str q_d6_0, [x_dest6]
+
+.return_pass:
+ mov w_ret, #0
+ ret
+
+.return_fail:
+ mov w_ret, #1
+ ret
+
+.section .rodata
+.balign 8
+const_tbl:
+ .dword 0x0000000000000000, 0x0000000000000000
+ .dword 0xffffffffffffffff, 0xffffffffffffffff
diff --git a/src/isa-l/erasure_code/aarch64/gf_vect_dot_prod_neon.S b/src/isa-l/erasure_code/aarch64/gf_vect_dot_prod_neon.S
new file mode 100644
index 000000000..117110c8a
--- /dev/null
+++ b/src/isa-l/erasure_code/aarch64/gf_vect_dot_prod_neon.S
@@ -0,0 +1,298 @@
+/**************************************************************
+ Copyright (c) 2019 Huawei Technologies Co., Ltd.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Huawei Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+.text
+
+.global gf_vect_dot_prod_neon
+.type gf_vect_dot_prod_neon, %function
+
+/* arguments */
+x_len .req x0
+x_vec .req x1
+x_tbl .req x2
+x_src .req x3
+x_dest1 .req x4
+
+/* returns */
+w_ret .req w0
+
+/* local variables */
+x_vec_i .req x5
+x_ptr .req x6
+x_pos .req x7
+x_tmp .req x8
+x_tbl1 .req x9
+
+/* vectors */
+v_gft1_lo .req v0
+v_gft1_hi .req v1
+q_gft1_lo .req q0
+q_gft1_hi .req q1
+v_mask0f .req v2
+q_mask0f .req q2
+
+v_data_0 .req v8
+v_data_1 .req v9
+v_data_2 .req v10
+v_data_3 .req v11
+v_data_4 .req v12
+v_data_5 .req v13
+v_data_6 .req v14
+v_data_7 .req v15
+q_data_0 .req q8
+q_data_1 .req q9
+q_data_2 .req q10
+q_data_3 .req q11
+q_data_4 .req q12
+q_data_5 .req q13
+q_data_6 .req q14
+q_data_7 .req q15
+
+v_data_0_lo .req v16
+v_data_1_lo .req v17
+v_data_2_lo .req v18
+v_data_3_lo .req v19
+v_data_4_lo .req v20
+v_data_5_lo .req v21
+v_data_6_lo .req v22
+v_data_7_lo .req v23
+v_data_0_hi .req v_data_0
+v_data_1_hi .req v_data_1
+v_data_2_hi .req v_data_2
+v_data_3_hi .req v_data_3
+v_data_4_hi .req v_data_4
+v_data_5_hi .req v_data_5
+v_data_6_hi .req v_data_6
+v_data_7_hi .req v_data_7
+
+v_p0 .req v24
+v_p1 .req v25
+v_p2 .req v26
+v_p3 .req v27
+v_p4 .req v28
+v_p5 .req v29
+v_p6 .req v30
+v_p7 .req v31
+q_p0 .req q24
+q_p1 .req q25
+q_p2 .req q26
+q_p3 .req q27
+q_p4 .req q28
+q_p5 .req q29
+q_p6 .req q30
+q_p7 .req q31
+
+v_p .req v_p0
+q_p .req q_p0
+v_data .req v_p1
+q_data .req q_p1
+v_data_lo .req v_p2
+v_data_hi .req v_p3
+
+
+gf_vect_dot_prod_neon:
+ /* less than 16 bytes, return_fail */
+ cmp x_len, #16
+ blt .return_fail
+
+ movi v_mask0f.16b, #0x0f
+ mov x_pos, #0
+
+ lsl x_vec, x_vec, #3
+
+.Lloop128_init:
+ /* less than 128 bytes, goto Lloop16_init */
+ cmp x_len, #128
+ blt .Lloop16_init
+
+ /* save d8 ~ d15 to stack */
+ sub sp, sp, #64
+ stp d8, d9, [sp]
+ stp d10, d11, [sp, #16]
+ stp d12, d13, [sp, #32]
+ stp d14, d15, [sp, #48]
+
+ sub x_len, x_len, #128
+
+.Lloop128:
+ movi v_p0.16b, #0
+ movi v_p1.16b, #0
+ movi v_p2.16b, #0
+ movi v_p3.16b, #0
+ movi v_p4.16b, #0
+ movi v_p5.16b, #0
+ movi v_p6.16b, #0
+ movi v_p7.16b, #0
+
+ mov x_tbl1, x_tbl
+ mov x_vec_i, #0
+
+.Lloop128_vects:
+ ldr x_ptr, [x_src, x_vec_i]
+ add x_vec_i, x_vec_i, #8
+ add x_ptr, x_ptr, x_pos
+
+ ldp q_gft1_lo, q_gft1_hi, [x_tbl1], #32
+
+ ldp q_data_0, q_data_1, [x_ptr], #32
+ ldp q_data_2, q_data_3, [x_ptr], #32
+ ldp q_data_4, q_data_5, [x_ptr], #32
+ ldp q_data_6, q_data_7, [x_ptr]
+
+ prfm pldl1keep, [x_tbl1]
+ prfm pldl1strm, [x_ptr]
+
+ and v_data_0_lo.16b, v_data_0.16b, v_mask0f.16b
+ and v_data_1_lo.16b, v_data_1.16b, v_mask0f.16b
+ and v_data_2_lo.16b, v_data_2.16b, v_mask0f.16b
+ and v_data_3_lo.16b, v_data_3.16b, v_mask0f.16b
+ and v_data_4_lo.16b, v_data_4.16b, v_mask0f.16b
+ and v_data_5_lo.16b, v_data_5.16b, v_mask0f.16b
+ and v_data_6_lo.16b, v_data_6.16b, v_mask0f.16b
+ and v_data_7_lo.16b, v_data_7.16b, v_mask0f.16b
+
+ ushr v_data_0_hi.16b, v_data_0.16b, #4
+ ushr v_data_1_hi.16b, v_data_1.16b, #4
+ ushr v_data_2_hi.16b, v_data_2.16b, #4
+ ushr v_data_3_hi.16b, v_data_3.16b, #4
+ ushr v_data_4_hi.16b, v_data_4.16b, #4
+ ushr v_data_5_hi.16b, v_data_5.16b, #4
+ ushr v_data_6_hi.16b, v_data_6.16b, #4
+ ushr v_data_7_hi.16b, v_data_7.16b, #4
+
+ tbl v_data_0_lo.16b, {v_gft1_lo.16b}, v_data_0_lo.16b
+ tbl v_data_1_lo.16b, {v_gft1_lo.16b}, v_data_1_lo.16b
+ tbl v_data_2_lo.16b, {v_gft1_lo.16b}, v_data_2_lo.16b
+ tbl v_data_3_lo.16b, {v_gft1_lo.16b}, v_data_3_lo.16b
+ tbl v_data_4_lo.16b, {v_gft1_lo.16b}, v_data_4_lo.16b
+ tbl v_data_5_lo.16b, {v_gft1_lo.16b}, v_data_5_lo.16b
+ tbl v_data_6_lo.16b, {v_gft1_lo.16b}, v_data_6_lo.16b
+ tbl v_data_7_lo.16b, {v_gft1_lo.16b}, v_data_7_lo.16b
+
+ tbl v_data_0_hi.16b, {v_gft1_hi.16b}, v_data_0_hi.16b
+ tbl v_data_1_hi.16b, {v_gft1_hi.16b}, v_data_1_hi.16b
+ tbl v_data_2_hi.16b, {v_gft1_hi.16b}, v_data_2_hi.16b
+ tbl v_data_3_hi.16b, {v_gft1_hi.16b}, v_data_3_hi.16b
+ tbl v_data_4_hi.16b, {v_gft1_hi.16b}, v_data_4_hi.16b
+ tbl v_data_5_hi.16b, {v_gft1_hi.16b}, v_data_5_hi.16b
+ tbl v_data_6_hi.16b, {v_gft1_hi.16b}, v_data_6_hi.16b
+ tbl v_data_7_hi.16b, {v_gft1_hi.16b}, v_data_7_hi.16b
+
+ eor v_p0.16b, v_data_0_lo.16b, v_p0.16b
+ eor v_p0.16b, v_p0.16b, v_data_0_hi.16b
+ eor v_p1.16b, v_data_1_lo.16b, v_p1.16b
+ eor v_p1.16b, v_p1.16b, v_data_1_hi.16b
+ eor v_p2.16b, v_data_2_lo.16b, v_p2.16b
+ eor v_p2.16b, v_p2.16b, v_data_2_hi.16b
+ eor v_p3.16b, v_data_3_lo.16b, v_p3.16b
+ eor v_p3.16b, v_p3.16b, v_data_3_hi.16b
+ eor v_p4.16b, v_data_4_lo.16b, v_p4.16b
+ eor v_p4.16b, v_p4.16b, v_data_4_hi.16b
+ eor v_p5.16b, v_data_5_lo.16b, v_p5.16b
+ eor v_p5.16b, v_p5.16b, v_data_5_hi.16b
+ eor v_p6.16b, v_data_6_lo.16b, v_p6.16b
+ eor v_p6.16b, v_p6.16b, v_data_6_hi.16b
+ eor v_p7.16b, v_data_7_lo.16b, v_p7.16b
+ eor v_p7.16b, v_p7.16b, v_data_7_hi.16b
+
+ cmp x_vec_i, x_vec
+ blt .Lloop128_vects
+
+.Lloop128_vects_end:
+ add x_ptr, x_dest1, x_pos
+ stp q_p0, q_p1, [x_ptr], #32
+ stp q_p2, q_p3, [x_ptr], #32
+ stp q_p4, q_p5, [x_ptr], #32
+ stp q_p6, q_p7, [x_ptr]
+
+ add x_pos, x_pos, #128
+ cmp x_pos, x_len
+ ble .Lloop128
+
+.Lloop128_end:
+ /* restore d8 ~ d15 */
+ ldp d8, d9, [sp]
+ ldp d10, d11, [sp, #16]
+ ldp d12, d13, [sp, #32]
+ ldp d14, d15, [sp, #48]
+ add sp, sp, #64
+
+ add x_len, x_len, #128
+ cmp x_pos, x_len
+ beq .return_pass
+
+.Lloop16_init:
+ sub x_len, x_len, #16
+ cmp x_pos, x_len
+ bgt .lessthan16_init
+
+.Lloop16:
+ movi v_p.16b, #0
+ mov x_tbl1, x_tbl
+ mov x_vec_i, #0
+
+.Lloop16_vects:
+ ldr x_ptr, [x_src, x_vec_i]
+ ldr q_data, [x_ptr, x_pos]
+ add x_vec_i, x_vec_i, #8
+
+ ldp q_gft1_lo, q_gft1_hi, [x_tbl1], #32
+
+ and v_data_lo.16b, v_data.16b, v_mask0f.16b
+ ushr v_data_hi.16b, v_data.16b, #4
+
+ tbl v_data_lo.16b, {v_gft1_lo.16b}, v_data_lo.16b
+ tbl v_data_hi.16b, {v_gft1_hi.16b}, v_data_hi.16b
+ eor v_p.16b, v_data_lo.16b, v_p.16b
+ eor v_p.16b, v_p.16b, v_data_hi.16b
+
+ cmp x_vec_i, x_vec
+ blt .Lloop16_vects
+
+.Lloop16_vects_end:
+ str q_p, [x_dest1, x_pos]
+ add x_pos, x_pos, #16
+ cmp x_pos, x_len
+ ble .Lloop16
+
+.Lloop16_end:
+ sub x_tmp, x_pos, x_len
+ cmp x_tmp, #16
+ beq .return_pass
+
+.lessthan16_init:
+ mov x_pos, x_len
+ b .Lloop16
+
+.return_pass:
+ mov w_ret, #0
+ ret
+
+.return_fail:
+ mov w_ret, #1
+ ret
diff --git a/src/isa-l/erasure_code/aarch64/gf_vect_mad_neon.S b/src/isa-l/erasure_code/aarch64/gf_vect_mad_neon.S
new file mode 100644
index 000000000..9ebd86b4a
--- /dev/null
+++ b/src/isa-l/erasure_code/aarch64/gf_vect_mad_neon.S
@@ -0,0 +1,315 @@
+/**************************************************************
+ Copyright (c) 2019 Huawei Technologies Co., Ltd.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Huawei Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+.text
+
+.global gf_vect_mad_neon
+.type gf_vect_mad_neon, %function
+
+
+/* arguments */
+x_len .req x0
+x_vec .req x1
+x_vec_i .req x2
+x_tbl .req x3
+x_src .req x4
+x_dest .req x5
+
+/* returns */
+w_ret .req w0
+
+/* local variables */
+x_src_end .req x6
+x_dest1 .req x_dest
+x_tmp .req x7
+x_const .req x8
+
+/* vectors */
+v_mask0f .req v0
+v_tmp .req v1
+q_tmp .req q1
+
+v_tmp1_lo .req v2
+v_tmp1_hi .req v3
+v_tmp2_lo .req v4
+v_tmp2_hi .req v5
+
+v_gft1_lo .req v6
+v_gft1_hi .req v7
+q_gft1_lo .req q6
+q_gft1_hi .req q7
+
+v_data_0 .req v8
+v_data_1 .req v9
+v_data_2 .req v10
+v_data_3 .req v11
+v_data_4 .req v12
+v_data_5 .req v13
+v_data_6 .req v14
+v_data_7 .req v15
+q_data_0 .req q8
+q_data_1 .req q9
+q_data_2 .req q10
+q_data_3 .req q11
+q_data_4 .req q12
+q_data_5 .req q13
+q_data_6 .req q14
+q_data_7 .req q15
+
+v_data_0_lo .req v16
+v_data_1_lo .req v17
+v_data_2_lo .req v18
+v_data_3_lo .req v19
+v_data_4_lo .req v20
+v_data_5_lo .req v21
+v_data_6_lo .req v22
+v_data_7_lo .req v23
+v_data_0_hi .req v_data_0
+v_data_1_hi .req v_data_1
+v_data_2_hi .req v_data_2
+v_data_3_hi .req v_data_3
+v_data_4_hi .req v_data_4
+v_data_5_hi .req v_data_5
+v_data_6_hi .req v_data_6
+v_data_7_hi .req v_data_7
+
+v_d1_0 .req v24
+v_d1_1 .req v25
+v_d1_2 .req v26
+v_d1_3 .req v27
+v_d1_4 .req v28
+v_d1_5 .req v29
+v_d1_6 .req v30
+v_d1_7 .req v31
+q_d1_0 .req q24
+q_d1_1 .req q25
+q_d1_2 .req q26
+q_d1_3 .req q27
+q_d1_4 .req q28
+q_d1_5 .req q29
+q_d1_6 .req q30
+q_d1_7 .req q31
+
+v_data .req v_d1_1
+q_data .req q_d1_1
+v_data_lo .req v_d1_2
+v_data_hi .req v_d1_3
+
+
+gf_vect_mad_neon:
+ /* less than 16 bytes, return_fail */
+ cmp x_len, #16
+ blt .return_fail
+
+ movi v_mask0f.16b, #0x0f
+ lsl x_vec_i, x_vec_i, #5
+ add x_tbl, x_tbl, x_vec_i
+ add x_src_end, x_src, x_len
+
+ ldr q_gft1_lo, [x_tbl]
+ ldr q_gft1_hi, [x_tbl, #16]
+
+.Lloop128_init:
+ /* less than 128 bytes, goto Lloop16_init */
+ cmp x_len, #128
+ blt .Lloop16_init
+
+ /* save d8 ~ d15 to stack */
+ sub sp, sp, #64
+ stp d8, d9, [sp]
+ stp d10, d11, [sp, #16]
+ stp d12, d13, [sp, #32]
+ stp d14, d15, [sp, #48]
+
+ sub x_src_end, x_src_end, #128
+
+.Lloop128:
+ ldr q_data_0, [x_src, #16*0]
+ ldr q_data_1, [x_src, #16*1]
+ ldr q_data_2, [x_src, #16*2]
+ ldr q_data_3, [x_src, #16*3]
+ ldr q_data_4, [x_src, #16*4]
+ ldr q_data_5, [x_src, #16*5]
+ ldr q_data_6, [x_src, #16*6]
+ ldr q_data_7, [x_src, #16*7]
+
+ ldr q_d1_0, [x_dest1, #16*0]
+ ldr q_d1_1, [x_dest1, #16*1]
+ ldr q_d1_2, [x_dest1, #16*2]
+ ldr q_d1_3, [x_dest1, #16*3]
+ ldr q_d1_4, [x_dest1, #16*4]
+ ldr q_d1_5, [x_dest1, #16*5]
+ ldr q_d1_6, [x_dest1, #16*6]
+ ldr q_d1_7, [x_dest1, #16*7]
+
+ and v_data_0_lo.16b, v_data_0.16b, v_mask0f.16b
+ and v_data_1_lo.16b, v_data_1.16b, v_mask0f.16b
+ and v_data_2_lo.16b, v_data_2.16b, v_mask0f.16b
+ and v_data_3_lo.16b, v_data_3.16b, v_mask0f.16b
+ and v_data_4_lo.16b, v_data_4.16b, v_mask0f.16b
+ and v_data_5_lo.16b, v_data_5.16b, v_mask0f.16b
+ and v_data_6_lo.16b, v_data_6.16b, v_mask0f.16b
+ and v_data_7_lo.16b, v_data_7.16b, v_mask0f.16b
+
+ ushr v_data_0_hi.16b, v_data_0.16b, #4
+ ushr v_data_1_hi.16b, v_data_1.16b, #4
+ ushr v_data_2_hi.16b, v_data_2.16b, #4
+ ushr v_data_3_hi.16b, v_data_3.16b, #4
+ ushr v_data_4_hi.16b, v_data_4.16b, #4
+ ushr v_data_5_hi.16b, v_data_5.16b, #4
+ ushr v_data_6_hi.16b, v_data_6.16b, #4
+ ushr v_data_7_hi.16b, v_data_7.16b, #4
+
+ tbl v_tmp1_lo.16b, {v_gft1_lo.16b}, v_data_0_lo.16b
+ tbl v_tmp1_hi.16b, {v_gft1_hi.16b}, v_data_0_hi.16b
+ tbl v_tmp2_lo.16b, {v_gft1_lo.16b}, v_data_1_lo.16b
+ tbl v_tmp2_hi.16b, {v_gft1_hi.16b}, v_data_1_hi.16b
+
+ eor v_d1_0.16b, v_tmp1_lo.16b, v_d1_0.16b
+ eor v_d1_0.16b, v_d1_0.16b, v_tmp1_hi.16b
+ eor v_d1_1.16b, v_tmp2_lo.16b, v_d1_1.16b
+ eor v_d1_1.16b, v_d1_1.16b, v_tmp2_hi.16b
+
+ tbl v_tmp1_lo.16b, {v_gft1_lo.16b}, v_data_2_lo.16b
+ tbl v_tmp1_hi.16b, {v_gft1_hi.16b}, v_data_2_hi.16b
+ tbl v_tmp2_lo.16b, {v_gft1_lo.16b}, v_data_3_lo.16b
+ tbl v_tmp2_hi.16b, {v_gft1_hi.16b}, v_data_3_hi.16b
+
+ eor v_d1_2.16b, v_tmp1_lo.16b, v_d1_2.16b
+ eor v_d1_2.16b, v_d1_2.16b, v_tmp1_hi.16b
+ eor v_d1_3.16b, v_tmp2_lo.16b, v_d1_3.16b
+ eor v_d1_3.16b, v_d1_3.16b, v_tmp2_hi.16b
+
+ tbl v_tmp1_lo.16b, {v_gft1_lo.16b}, v_data_4_lo.16b
+ tbl v_tmp1_hi.16b, {v_gft1_hi.16b}, v_data_4_hi.16b
+ tbl v_tmp2_lo.16b, {v_gft1_lo.16b}, v_data_5_lo.16b
+ tbl v_tmp2_hi.16b, {v_gft1_hi.16b}, v_data_5_hi.16b
+
+ eor v_d1_4.16b, v_tmp1_lo.16b, v_d1_4.16b
+ eor v_d1_4.16b, v_d1_4.16b, v_tmp1_hi.16b
+ eor v_d1_5.16b, v_tmp2_lo.16b, v_d1_5.16b
+ eor v_d1_5.16b, v_d1_5.16b, v_tmp2_hi.16b
+
+ tbl v_tmp1_lo.16b, {v_gft1_lo.16b}, v_data_6_lo.16b
+ tbl v_tmp1_hi.16b, {v_gft1_hi.16b}, v_data_6_hi.16b
+ tbl v_tmp2_lo.16b, {v_gft1_lo.16b}, v_data_7_lo.16b
+ tbl v_tmp2_hi.16b, {v_gft1_hi.16b}, v_data_7_hi.16b
+
+ eor v_d1_6.16b, v_tmp1_lo.16b, v_d1_6.16b
+ eor v_d1_6.16b, v_d1_6.16b, v_tmp1_hi.16b
+ eor v_d1_7.16b, v_tmp2_lo.16b, v_d1_7.16b
+ eor v_d1_7.16b, v_d1_7.16b, v_tmp2_hi.16b
+
+ str q_d1_0, [x_dest1, #16*0]
+ str q_d1_1, [x_dest1, #16*1]
+ str q_d1_2, [x_dest1, #16*2]
+ str q_d1_3, [x_dest1, #16*3]
+ str q_d1_4, [x_dest1, #16*4]
+ str q_d1_5, [x_dest1, #16*5]
+ str q_d1_6, [x_dest1, #16*6]
+ str q_d1_7, [x_dest1, #16*7]
+
+ add x_src, x_src, #128
+ add x_dest1, x_dest1, #128
+ cmp x_src, x_src_end
+ bls .Lloop128
+
+.Lloop128_end:
+ /* restore d8 ~ d15 */
+ ldp d8, d9, [sp]
+ ldp d10, d11, [sp, #16]
+ ldp d12, d13, [sp, #32]
+ ldp d14, d15, [sp, #48]
+ add sp, sp, #64
+ add x_src_end, x_src_end, #128
+
+.Lloop16_init:
+ sub x_src_end, x_src_end, #16
+ cmp x_src, x_src_end
+ bhi .lessthan16_init
+
+.Lloop16:
+ ldr q_data, [x_src]
+ ldr q_d1_0, [x_dest1]
+
+ and v_data_lo.16b, v_data.16b, v_mask0f.16b
+ ushr v_data_hi.16b, v_data.16b, #4
+
+ tbl v_tmp1_lo.16b, {v_gft1_lo.16b}, v_data_lo.16b
+ tbl v_tmp1_hi.16b, {v_gft1_hi.16b}, v_data_hi.16b
+ eor v_d1_0.16b, v_tmp1_lo.16b, v_d1_0.16b
+ eor v_d1_0.16b, v_d1_0.16b, v_tmp1_hi.16b
+
+ str q_d1_0, [x_dest1]
+
+ add x_dest1, x_dest1, #16
+ add x_src, x_src, #16
+ cmp x_src, x_src_end
+ bls .Lloop16
+
+.lessthan16_init:
+ sub x_tmp, x_src, x_src_end
+ cmp x_tmp, #16
+ beq .return_pass
+
+.lessthan16:
+ mov x_src, x_src_end
+ sub x_dest1, x_dest1, x_tmp
+
+ adrp x_const, const_tbl
+ add x_const, x_const, :lo12:const_tbl
+ sub x_const, x_const, x_tmp
+ ldr q_tmp, [x_const, #16]
+
+ ldr q_data, [x_src]
+ ldr q_d1_0, [x_dest1]
+
+ and v_data_lo.16b, v_data.16b, v_mask0f.16b
+ ushr v_data_hi.16b, v_data.16b, #4
+
+ tbl v_tmp1_lo.16b, {v_gft1_lo.16b}, v_data_lo.16b
+ tbl v_tmp1_hi.16b, {v_gft1_hi.16b}, v_data_hi.16b
+ eor v_tmp1_hi.16b, v_tmp1_lo.16b, v_tmp1_hi.16b
+ and v_tmp1_hi.16b, v_tmp1_hi.16b, v_tmp.16b
+ eor v_d1_0.16b, v_d1_0.16b, v_tmp1_hi.16b
+
+ str q_d1_0, [x_dest1]
+
+.return_pass:
+ mov w_ret, #0
+ ret
+
+.return_fail:
+ mov w_ret, #1
+ ret
+
+.section .rodata
+.balign 8
+const_tbl:
+ .dword 0x0000000000000000, 0x0000000000000000
+ .dword 0xffffffffffffffff, 0xffffffffffffffff
diff --git a/src/isa-l/erasure_code/aarch64/gf_vect_mul_neon.S b/src/isa-l/erasure_code/aarch64/gf_vect_mul_neon.S
new file mode 100644
index 000000000..c88c53b8e
--- /dev/null
+++ b/src/isa-l/erasure_code/aarch64/gf_vect_mul_neon.S
@@ -0,0 +1,235 @@
+/**************************************************************
+ Copyright (c) 2019 Huawei Technologies Co., Ltd.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Huawei Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+.text
+
+.global gf_vect_mul_neon
+.type gf_vect_mul_neon, %function
+
+
+/* arguments */
+x_len .req x0
+x_tbl .req x1
+x_src .req x2
+x_dest .req x3
+
+/* returns */
+w_ret .req w0
+
+/* local variables */
+x_dest1 .req x_dest
+x_src_end .req x4
+x_tmp .req x5
+
+/* vectors */
+v_mask0f .req v0
+
+v_gft1_lo .req v2
+v_gft1_hi .req v3
+q_gft1_lo .req q2
+q_gft1_hi .req q3
+
+v_data_0 .req v16
+v_data_1 .req v17
+v_data_2 .req v18
+v_data_3 .req v19
+v_data_4 .req v20
+v_data_5 .req v21
+v_data_6 .req v22
+v_data_7 .req v23
+q_data_0 .req q16
+q_data_1 .req q17
+q_data_2 .req q18
+q_data_3 .req q19
+q_data_4 .req q20
+q_data_5 .req q21
+q_data_6 .req q22
+q_data_7 .req q23
+
+v_data_0_lo .req v24
+v_data_1_lo .req v25
+v_data_2_lo .req v26
+v_data_3_lo .req v27
+v_data_4_lo .req v28
+v_data_5_lo .req v29
+v_data_6_lo .req v30
+v_data_7_lo .req v31
+v_data_0_hi .req v_data_0
+v_data_1_hi .req v_data_1
+v_data_2_hi .req v_data_2
+v_data_3_hi .req v_data_3
+v_data_4_hi .req v_data_4
+v_data_5_hi .req v_data_5
+v_data_6_hi .req v_data_6
+v_data_7_hi .req v_data_7
+
+
+gf_vect_mul_neon:
+ /* less than 32 bytes, return_fail */
+ cmp x_len, #32
+ blt .return_fail
+
+ movi v_mask0f.16b, #0x0f
+ add x_src_end, x_src, x_len
+ ldr q_gft1_lo, [x_tbl]
+ ldr q_gft1_hi, [x_tbl, #16]
+
+
+.Lloop128_init:
+ /* less than 128 bytes, goto Lloop16_init */
+ cmp x_len, #128
+ blt .Lloop32_init
+
+ /* save d8 ~ d15 to stack */
+ sub sp, sp, #64
+ stp d8, d9, [sp]
+ stp d10, d11, [sp, #16]
+ stp d12, d13, [sp, #32]
+ stp d14, d15, [sp, #48]
+
+ sub x_src_end, x_src_end, #128
+
+.Lloop128:
+ ldr q_data_0, [x_src, #16*0]
+ ldr q_data_1, [x_src, #16*1]
+ ldr q_data_2, [x_src, #16*2]
+ ldr q_data_3, [x_src, #16*3]
+ ldr q_data_4, [x_src, #16*4]
+ ldr q_data_5, [x_src, #16*5]
+ ldr q_data_6, [x_src, #16*6]
+ ldr q_data_7, [x_src, #16*7]
+
+ and v_data_0_lo.16b, v_data_0.16b, v_mask0f.16b
+ and v_data_1_lo.16b, v_data_1.16b, v_mask0f.16b
+ and v_data_2_lo.16b, v_data_2.16b, v_mask0f.16b
+ and v_data_3_lo.16b, v_data_3.16b, v_mask0f.16b
+ and v_data_4_lo.16b, v_data_4.16b, v_mask0f.16b
+ and v_data_5_lo.16b, v_data_5.16b, v_mask0f.16b
+ and v_data_6_lo.16b, v_data_6.16b, v_mask0f.16b
+ and v_data_7_lo.16b, v_data_7.16b, v_mask0f.16b
+
+ ushr v_data_0_hi.16b, v_data_0.16b, #4
+ ushr v_data_1_hi.16b, v_data_1.16b, #4
+ ushr v_data_2_hi.16b, v_data_2.16b, #4
+ ushr v_data_3_hi.16b, v_data_3.16b, #4
+ ushr v_data_4_hi.16b, v_data_4.16b, #4
+ ushr v_data_5_hi.16b, v_data_5.16b, #4
+ ushr v_data_6_hi.16b, v_data_6.16b, #4
+ ushr v_data_7_hi.16b, v_data_7.16b, #4
+
+ tbl v_data_0_lo.16b, {v_gft1_lo.16b}, v_data_0_lo.16b
+ tbl v_data_1_lo.16b, {v_gft1_lo.16b}, v_data_1_lo.16b
+ tbl v_data_2_lo.16b, {v_gft1_lo.16b}, v_data_2_lo.16b
+ tbl v_data_3_lo.16b, {v_gft1_lo.16b}, v_data_3_lo.16b
+ tbl v_data_4_lo.16b, {v_gft1_lo.16b}, v_data_4_lo.16b
+ tbl v_data_5_lo.16b, {v_gft1_lo.16b}, v_data_5_lo.16b
+ tbl v_data_6_lo.16b, {v_gft1_lo.16b}, v_data_6_lo.16b
+ tbl v_data_7_lo.16b, {v_gft1_lo.16b}, v_data_7_lo.16b
+
+ tbl v_data_0_hi.16b, {v_gft1_hi.16b}, v_data_0_hi.16b
+ tbl v_data_1_hi.16b, {v_gft1_hi.16b}, v_data_1_hi.16b
+ tbl v_data_2_hi.16b, {v_gft1_hi.16b}, v_data_2_hi.16b
+ tbl v_data_3_hi.16b, {v_gft1_hi.16b}, v_data_3_hi.16b
+ tbl v_data_4_hi.16b, {v_gft1_hi.16b}, v_data_4_hi.16b
+ tbl v_data_5_hi.16b, {v_gft1_hi.16b}, v_data_5_hi.16b
+ tbl v_data_6_hi.16b, {v_gft1_hi.16b}, v_data_6_hi.16b
+ tbl v_data_7_hi.16b, {v_gft1_hi.16b}, v_data_7_hi.16b
+
+ eor v_data_0.16b, v_data_0_hi.16b, v_data_0_lo.16b
+ eor v_data_1.16b, v_data_1_hi.16b, v_data_1_lo.16b
+ eor v_data_2.16b, v_data_2_hi.16b, v_data_2_lo.16b
+ eor v_data_3.16b, v_data_3_hi.16b, v_data_3_lo.16b
+ eor v_data_4.16b, v_data_4_hi.16b, v_data_4_lo.16b
+ eor v_data_5.16b, v_data_5_hi.16b, v_data_5_lo.16b
+ eor v_data_6.16b, v_data_6_hi.16b, v_data_6_lo.16b
+ eor v_data_7.16b, v_data_7_hi.16b, v_data_7_lo.16b
+
+ str q_data_0, [x_dest1, #16*0]
+ str q_data_1, [x_dest1, #16*1]
+ str q_data_2, [x_dest1, #16*2]
+ str q_data_3, [x_dest1, #16*3]
+ str q_data_4, [x_dest1, #16*4]
+ str q_data_5, [x_dest1, #16*5]
+ str q_data_6, [x_dest1, #16*6]
+ str q_data_7, [x_dest1, #16*7]
+
+ add x_src, x_src, #128
+ add x_dest1, x_dest1, #128
+ cmp x_src, x_src_end
+ bls .Lloop128
+
+.Lloop128_end:
+ /* restore d8 ~ d15 */
+ ldp d8, d9, [sp]
+ ldp d10, d11, [sp, #16]
+ ldp d12, d13, [sp, #32]
+ ldp d14, d15, [sp, #48]
+ add sp, sp, #64
+ add x_src_end, x_src_end, #128
+
+.Lloop32_init:
+ sub x_src_end, x_src_end, #32
+ cmp x_src, x_src_end
+ bhi .return_fail
+
+.Lloop32:
+ ldr q_data_0, [x_src, #16*0]
+ ldr q_data_1, [x_src, #16*1]
+
+ and v_data_0_lo.16b, v_data_0.16b, v_mask0f.16b
+ and v_data_1_lo.16b, v_data_1.16b, v_mask0f.16b
+ ushr v_data_0_hi.16b, v_data_0.16b, #4
+ ushr v_data_1_hi.16b, v_data_1.16b, #4
+ tbl v_data_0_lo.16b, {v_gft1_lo.16b}, v_data_0_lo.16b
+ tbl v_data_1_lo.16b, {v_gft1_lo.16b}, v_data_1_lo.16b
+ tbl v_data_0_hi.16b, {v_gft1_hi.16b}, v_data_0_hi.16b
+ tbl v_data_1_hi.16b, {v_gft1_hi.16b}, v_data_1_hi.16b
+ eor v_data_0.16b, v_data_0_hi.16b, v_data_0_lo.16b
+ eor v_data_1.16b, v_data_1_hi.16b, v_data_1_lo.16b
+ str q_data_0, [x_dest1, #16*0]
+ str q_data_1, [x_dest1, #16*1]
+
+ add x_dest1, x_dest1, #32
+ add x_src, x_src, #32
+ cmp x_src, x_src_end
+ bls .Lloop32
+
+.Lloop32_end:
+ sub x_tmp, x_src, x_src_end
+ cmp x_tmp, #32
+ beq .return_pass
+ b .return_fail
+
+.return_pass:
+ mov w_ret, #0
+ ret
+
+.return_fail:
+ mov w_ret, #1
+ ret
diff --git a/src/isa-l/erasure_code/ec_base.c b/src/isa-l/erasure_code/ec_base.c
new file mode 100644
index 000000000..9d76c8df4
--- /dev/null
+++ b/src/isa-l/erasure_code/ec_base.c
@@ -0,0 +1,371 @@
+/**********************************************************************
+ Copyright(c) 2011-2015 Intel Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+#include <limits.h>
+#include <string.h> // for memset
+#include "erasure_code.h"
+#include "ec_base.h" // for GF tables
+
+void ec_init_tables(int k, int rows, unsigned char *a, unsigned char *g_tbls)
+{
+ int i, j;
+
+ for (i = 0; i < rows; i++) {
+ for (j = 0; j < k; j++) {
+ gf_vect_mul_init(*a++, g_tbls);
+ g_tbls += 32;
+ }
+ }
+}
+
+unsigned char gf_mul(unsigned char a, unsigned char b)
+{
+#ifndef GF_LARGE_TABLES
+ int i;
+
+ if ((a == 0) || (b == 0))
+ return 0;
+
+ return gff_base[(i = gflog_base[a] + gflog_base[b]) > 254 ? i - 255 : i];
+#else
+ return gf_mul_table_base[b * 256 + a];
+#endif
+}
+
+unsigned char gf_inv(unsigned char a)
+{
+#ifndef GF_LARGE_TABLES
+ if (a == 0)
+ return 0;
+
+ return gff_base[255 - gflog_base[a]];
+#else
+ return gf_inv_table_base[a];
+#endif
+}
+
+void gf_gen_rs_matrix(unsigned char *a, int m, int k)
+{
+ int i, j;
+ unsigned char p, gen = 1;
+
+ memset(a, 0, k * m);
+ for (i = 0; i < k; i++)
+ a[k * i + i] = 1;
+
+ for (i = k; i < m; i++) {
+ p = 1;
+ for (j = 0; j < k; j++) {
+ a[k * i + j] = p;
+ p = gf_mul(p, gen);
+ }
+ gen = gf_mul(gen, 2);
+ }
+}
+
+void gf_gen_cauchy1_matrix(unsigned char *a, int m, int k)
+{
+ int i, j;
+ unsigned char *p;
+
+ // Identity matrix in high position
+ memset(a, 0, k * m);
+ for (i = 0; i < k; i++)
+ a[k * i + i] = 1;
+
+ // For the rest choose 1/(i + j) | i != j
+ p = &a[k * k];
+ for (i = k; i < m; i++)
+ for (j = 0; j < k; j++)
+ *p++ = gf_inv(i ^ j);
+
+}
+
+int gf_invert_matrix(unsigned char *in_mat, unsigned char *out_mat, const int n)
+{
+ int i, j, k;
+ unsigned char temp;
+
+ // Set out_mat[] to the identity matrix
+ for (i = 0; i < n * n; i++) // memset(out_mat, 0, n*n)
+ out_mat[i] = 0;
+
+ for (i = 0; i < n; i++)
+ out_mat[i * n + i] = 1;
+
+ // Inverse
+ for (i = 0; i < n; i++) {
+ // Check for 0 in pivot element
+ if (in_mat[i * n + i] == 0) {
+ // Find a row with non-zero in current column and swap
+ for (j = i + 1; j < n; j++)
+ if (in_mat[j * n + i])
+ break;
+
+ if (j == n) // Couldn't find means it's singular
+ return -1;
+
+ for (k = 0; k < n; k++) { // Swap rows i,j
+ temp = in_mat[i * n + k];
+ in_mat[i * n + k] = in_mat[j * n + k];
+ in_mat[j * n + k] = temp;
+
+ temp = out_mat[i * n + k];
+ out_mat[i * n + k] = out_mat[j * n + k];
+ out_mat[j * n + k] = temp;
+ }
+ }
+
+ temp = gf_inv(in_mat[i * n + i]); // 1/pivot
+ for (j = 0; j < n; j++) { // Scale row i by 1/pivot
+ in_mat[i * n + j] = gf_mul(in_mat[i * n + j], temp);
+ out_mat[i * n + j] = gf_mul(out_mat[i * n + j], temp);
+ }
+
+ for (j = 0; j < n; j++) {
+ if (j == i)
+ continue;
+
+ temp = in_mat[j * n + i];
+ for (k = 0; k < n; k++) {
+ out_mat[j * n + k] ^= gf_mul(temp, out_mat[i * n + k]);
+ in_mat[j * n + k] ^= gf_mul(temp, in_mat[i * n + k]);
+ }
+ }
+ }
+ return 0;
+}
+
+// Calculates const table gftbl in GF(2^8) from single input A
+// gftbl(A) = {A{00}, A{01}, A{02}, ... , A{0f} }, {A{00}, A{10}, A{20}, ... , A{f0} }
+
+void gf_vect_mul_init(unsigned char c, unsigned char *tbl)
+{
+ unsigned char c2 = (c << 1) ^ ((c & 0x80) ? 0x1d : 0); //Mult by GF{2}
+ unsigned char c4 = (c2 << 1) ^ ((c2 & 0x80) ? 0x1d : 0); //Mult by GF{2}
+ unsigned char c8 = (c4 << 1) ^ ((c4 & 0x80) ? 0x1d : 0); //Mult by GF{2}
+
+#if __WORDSIZE == 64 || _WIN64 || __x86_64__
+ unsigned long long v1, v2, v4, v8, *t;
+ unsigned long long v10, v20, v40, v80;
+ unsigned char c17, c18, c20, c24;
+
+ t = (unsigned long long *)tbl;
+
+ v1 = c * 0x0100010001000100ull;
+ v2 = c2 * 0x0101000001010000ull;
+ v4 = c4 * 0x0101010100000000ull;
+ v8 = c8 * 0x0101010101010101ull;
+
+ v4 = v1 ^ v2 ^ v4;
+ t[0] = v4;
+ t[1] = v8 ^ v4;
+
+ c17 = (c8 << 1) ^ ((c8 & 0x80) ? 0x1d : 0); //Mult by GF{2}
+ c18 = (c17 << 1) ^ ((c17 & 0x80) ? 0x1d : 0); //Mult by GF{2}
+ c20 = (c18 << 1) ^ ((c18 & 0x80) ? 0x1d : 0); //Mult by GF{2}
+ c24 = (c20 << 1) ^ ((c20 & 0x80) ? 0x1d : 0); //Mult by GF{2}
+
+ v10 = c17 * 0x0100010001000100ull;
+ v20 = c18 * 0x0101000001010000ull;
+ v40 = c20 * 0x0101010100000000ull;
+ v80 = c24 * 0x0101010101010101ull;
+
+ v40 = v10 ^ v20 ^ v40;
+ t[2] = v40;
+ t[3] = v80 ^ v40;
+
+#else // 32-bit or other
+ unsigned char c3, c5, c6, c7, c9, c10, c11, c12, c13, c14, c15;
+ unsigned char c17, c18, c19, c20, c21, c22, c23, c24, c25, c26, c27, c28, c29, c30,
+ c31;
+
+ c3 = c2 ^ c;
+ c5 = c4 ^ c;
+ c6 = c4 ^ c2;
+ c7 = c4 ^ c3;
+
+ c9 = c8 ^ c;
+ c10 = c8 ^ c2;
+ c11 = c8 ^ c3;
+ c12 = c8 ^ c4;
+ c13 = c8 ^ c5;
+ c14 = c8 ^ c6;
+ c15 = c8 ^ c7;
+
+ tbl[0] = 0;
+ tbl[1] = c;
+ tbl[2] = c2;
+ tbl[3] = c3;
+ tbl[4] = c4;
+ tbl[5] = c5;
+ tbl[6] = c6;
+ tbl[7] = c7;
+ tbl[8] = c8;
+ tbl[9] = c9;
+ tbl[10] = c10;
+ tbl[11] = c11;
+ tbl[12] = c12;
+ tbl[13] = c13;
+ tbl[14] = c14;
+ tbl[15] = c15;
+
+ c17 = (c8 << 1) ^ ((c8 & 0x80) ? 0x1d : 0); //Mult by GF{2}
+ c18 = (c17 << 1) ^ ((c17 & 0x80) ? 0x1d : 0); //Mult by GF{2}
+ c19 = c18 ^ c17;
+ c20 = (c18 << 1) ^ ((c18 & 0x80) ? 0x1d : 0); //Mult by GF{2}
+ c21 = c20 ^ c17;
+ c22 = c20 ^ c18;
+ c23 = c20 ^ c19;
+ c24 = (c20 << 1) ^ ((c20 & 0x80) ? 0x1d : 0); //Mult by GF{2}
+ c25 = c24 ^ c17;
+ c26 = c24 ^ c18;
+ c27 = c24 ^ c19;
+ c28 = c24 ^ c20;
+ c29 = c24 ^ c21;
+ c30 = c24 ^ c22;
+ c31 = c24 ^ c23;
+
+ tbl[16] = 0;
+ tbl[17] = c17;
+ tbl[18] = c18;
+ tbl[19] = c19;
+ tbl[20] = c20;
+ tbl[21] = c21;
+ tbl[22] = c22;
+ tbl[23] = c23;
+ tbl[24] = c24;
+ tbl[25] = c25;
+ tbl[26] = c26;
+ tbl[27] = c27;
+ tbl[28] = c28;
+ tbl[29] = c29;
+ tbl[30] = c30;
+ tbl[31] = c31;
+
+#endif //__WORDSIZE == 64 || _WIN64 || __x86_64__
+}
+
+void gf_vect_dot_prod_base(int len, int vlen, unsigned char *v,
+ unsigned char **src, unsigned char *dest)
+{
+ int i, j;
+ unsigned char s;
+ for (i = 0; i < len; i++) {
+ s = 0;
+ for (j = 0; j < vlen; j++)
+ s ^= gf_mul(src[j][i], v[j * 32 + 1]);
+
+ dest[i] = s;
+ }
+}
+
+void gf_vect_mad_base(int len, int vec, int vec_i,
+ unsigned char *v, unsigned char *src, unsigned char *dest)
+{
+ int i;
+ unsigned char s;
+ for (i = 0; i < len; i++) {
+ s = dest[i];
+ s ^= gf_mul(src[i], v[vec_i * 32 + 1]);
+ dest[i] = s;
+ }
+}
+
+void ec_encode_data_base(int len, int srcs, int dests, unsigned char *v,
+ unsigned char **src, unsigned char **dest)
+{
+ int i, j, l;
+ unsigned char s;
+
+ for (l = 0; l < dests; l++) {
+ for (i = 0; i < len; i++) {
+ s = 0;
+ for (j = 0; j < srcs; j++)
+ s ^= gf_mul(src[j][i], v[j * 32 + l * srcs * 32 + 1]);
+
+ dest[l][i] = s;
+ }
+ }
+}
+
+void ec_encode_data_update_base(int len, int k, int rows, int vec_i, unsigned char *v,
+ unsigned char *data, unsigned char **dest)
+{
+ int i, l;
+ unsigned char s;
+
+ for (l = 0; l < rows; l++) {
+ for (i = 0; i < len; i++) {
+ s = dest[l][i];
+ s ^= gf_mul(data[i], v[vec_i * 32 + l * k * 32 + 1]);
+
+ dest[l][i] = s;
+ }
+ }
+}
+
+void gf_vect_mul_base(int len, unsigned char *a, unsigned char *src, unsigned char *dest)
+{
+ //2nd element of table array is ref value used to fill it in
+ unsigned char c = a[1];
+ while (len-- > 0)
+ *dest++ = gf_mul(c, *src++);
+}
+
+struct slver {
+ unsigned short snum;
+ unsigned char ver;
+ unsigned char core;
+};
+
+// Version info
+struct slver gf_vect_mul_init_slver_00020035;
+struct slver gf_vect_mul_init_slver = { 0x0035, 0x02, 0x00 };
+
+struct slver ec_encode_data_base_slver_00010135;
+struct slver ec_encode_data_base_slver = { 0x0135, 0x01, 0x00 };
+
+struct slver gf_vect_mul_base_slver_00010136;
+struct slver gf_vect_mul_base_slver = { 0x0136, 0x01, 0x00 };
+
+struct slver gf_vect_dot_prod_base_slver_00010137;
+struct slver gf_vect_dot_prod_base_slver = { 0x0137, 0x01, 0x00 };
+
+struct slver gf_mul_slver_00000214;
+struct slver gf_mul_slver = { 0x0214, 0x00, 0x00 };
+
+struct slver gf_invert_matrix_slver_00000215;
+struct slver gf_invert_matrix_slver = { 0x0215, 0x00, 0x00 };
+
+struct slver gf_gen_rs_matrix_slver_00000216;
+struct slver gf_gen_rs_matrix_slver = { 0x0216, 0x00, 0x00 };
+
+struct slver gf_gen_cauchy1_matrix_slver_00000217;
+struct slver gf_gen_cauchy1_matrix_slver = { 0x0217, 0x00, 0x00 };
diff --git a/src/isa-l/erasure_code/ec_base.h b/src/isa-l/erasure_code/ec_base.h
new file mode 100644
index 000000000..070b27665
--- /dev/null
+++ b/src/isa-l/erasure_code/ec_base.h
@@ -0,0 +1,6680 @@
+/**********************************************************************
+ Copyright(c) 2011-2015 Intel Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+#ifndef _EC_BASE_H_
+#define _EC_BASE_H_
+
+// Global GF(256) tables
+#ifndef GF_LARGE_TABLES
+static const unsigned char gff_base[] = {
+ 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1d, 0x3a,
+ 0x74, 0xe8, 0xcd, 0x87, 0x13, 0x26, 0x4c, 0x98, 0x2d, 0x5a,
+ 0xb4, 0x75, 0xea, 0xc9, 0x8f, 0x03, 0x06, 0x0c, 0x18, 0x30,
+ 0x60, 0xc0, 0x9d, 0x27, 0x4e, 0x9c, 0x25, 0x4a, 0x94, 0x35,
+ 0x6a, 0xd4, 0xb5, 0x77, 0xee, 0xc1, 0x9f, 0x23, 0x46, 0x8c,
+ 0x05, 0x0a, 0x14, 0x28, 0x50, 0xa0, 0x5d, 0xba, 0x69, 0xd2,
+ 0xb9, 0x6f, 0xde, 0xa1, 0x5f, 0xbe, 0x61, 0xc2, 0x99, 0x2f,
+ 0x5e, 0xbc, 0x65, 0xca, 0x89, 0x0f, 0x1e, 0x3c, 0x78, 0xf0,
+ 0xfd, 0xe7, 0xd3, 0xbb, 0x6b, 0xd6, 0xb1, 0x7f, 0xfe, 0xe1,
+ 0xdf, 0xa3, 0x5b, 0xb6, 0x71, 0xe2, 0xd9, 0xaf, 0x43, 0x86,
+ 0x11, 0x22, 0x44, 0x88, 0x0d, 0x1a, 0x34, 0x68, 0xd0, 0xbd,
+ 0x67, 0xce, 0x81, 0x1f, 0x3e, 0x7c, 0xf8, 0xed, 0xc7, 0x93,
+ 0x3b, 0x76, 0xec, 0xc5, 0x97, 0x33, 0x66, 0xcc, 0x85, 0x17,
+ 0x2e, 0x5c, 0xb8, 0x6d, 0xda, 0xa9, 0x4f, 0x9e, 0x21, 0x42,
+ 0x84, 0x15, 0x2a, 0x54, 0xa8, 0x4d, 0x9a, 0x29, 0x52, 0xa4,
+ 0x55, 0xaa, 0x49, 0x92, 0x39, 0x72, 0xe4, 0xd5, 0xb7, 0x73,
+ 0xe6, 0xd1, 0xbf, 0x63, 0xc6, 0x91, 0x3f, 0x7e, 0xfc, 0xe5,
+ 0xd7, 0xb3, 0x7b, 0xf6, 0xf1, 0xff, 0xe3, 0xdb, 0xab, 0x4b,
+ 0x96, 0x31, 0x62, 0xc4, 0x95, 0x37, 0x6e, 0xdc, 0xa5, 0x57,
+ 0xae, 0x41, 0x82, 0x19, 0x32, 0x64, 0xc8, 0x8d, 0x07, 0x0e,
+ 0x1c, 0x38, 0x70, 0xe0, 0xdd, 0xa7, 0x53, 0xa6, 0x51, 0xa2,
+ 0x59, 0xb2, 0x79, 0xf2, 0xf9, 0xef, 0xc3, 0x9b, 0x2b, 0x56,
+ 0xac, 0x45, 0x8a, 0x09, 0x12, 0x24, 0x48, 0x90, 0x3d, 0x7a,
+ 0xf4, 0xf5, 0xf7, 0xf3, 0xfb, 0xeb, 0xcb, 0x8b, 0x0b, 0x16,
+ 0x2c, 0x58, 0xb0, 0x7d, 0xfa, 0xe9, 0xcf, 0x83, 0x1b, 0x36,
+ 0x6c, 0xd8, 0xad, 0x47, 0x8e, 0x01
+};
+
+static const unsigned char gflog_base[] = {
+ 0x00, 0xff, 0x01, 0x19, 0x02, 0x32, 0x1a, 0xc6, 0x03, 0xdf,
+ 0x33, 0xee, 0x1b, 0x68, 0xc7, 0x4b, 0x04, 0x64, 0xe0, 0x0e,
+ 0x34, 0x8d, 0xef, 0x81, 0x1c, 0xc1, 0x69, 0xf8, 0xc8, 0x08,
+ 0x4c, 0x71, 0x05, 0x8a, 0x65, 0x2f, 0xe1, 0x24, 0x0f, 0x21,
+ 0x35, 0x93, 0x8e, 0xda, 0xf0, 0x12, 0x82, 0x45, 0x1d, 0xb5,
+ 0xc2, 0x7d, 0x6a, 0x27, 0xf9, 0xb9, 0xc9, 0x9a, 0x09, 0x78,
+ 0x4d, 0xe4, 0x72, 0xa6, 0x06, 0xbf, 0x8b, 0x62, 0x66, 0xdd,
+ 0x30, 0xfd, 0xe2, 0x98, 0x25, 0xb3, 0x10, 0x91, 0x22, 0x88,
+ 0x36, 0xd0, 0x94, 0xce, 0x8f, 0x96, 0xdb, 0xbd, 0xf1, 0xd2,
+ 0x13, 0x5c, 0x83, 0x38, 0x46, 0x40, 0x1e, 0x42, 0xb6, 0xa3,
+ 0xc3, 0x48, 0x7e, 0x6e, 0x6b, 0x3a, 0x28, 0x54, 0xfa, 0x85,
+ 0xba, 0x3d, 0xca, 0x5e, 0x9b, 0x9f, 0x0a, 0x15, 0x79, 0x2b,
+ 0x4e, 0xd4, 0xe5, 0xac, 0x73, 0xf3, 0xa7, 0x57, 0x07, 0x70,
+ 0xc0, 0xf7, 0x8c, 0x80, 0x63, 0x0d, 0x67, 0x4a, 0xde, 0xed,
+ 0x31, 0xc5, 0xfe, 0x18, 0xe3, 0xa5, 0x99, 0x77, 0x26, 0xb8,
+ 0xb4, 0x7c, 0x11, 0x44, 0x92, 0xd9, 0x23, 0x20, 0x89, 0x2e,
+ 0x37, 0x3f, 0xd1, 0x5b, 0x95, 0xbc, 0xcf, 0xcd, 0x90, 0x87,
+ 0x97, 0xb2, 0xdc, 0xfc, 0xbe, 0x61, 0xf2, 0x56, 0xd3, 0xab,
+ 0x14, 0x2a, 0x5d, 0x9e, 0x84, 0x3c, 0x39, 0x53, 0x47, 0x6d,
+ 0x41, 0xa2, 0x1f, 0x2d, 0x43, 0xd8, 0xb7, 0x7b, 0xa4, 0x76,
+ 0xc4, 0x17, 0x49, 0xec, 0x7f, 0x0c, 0x6f, 0xf6, 0x6c, 0xa1,
+ 0x3b, 0x52, 0x29, 0x9d, 0x55, 0xaa, 0xfb, 0x60, 0x86, 0xb1,
+ 0xbb, 0xcc, 0x3e, 0x5a, 0xcb, 0x59, 0x5f, 0xb0, 0x9c, 0xa9,
+ 0xa0, 0x51, 0x0b, 0xf5, 0x16, 0xeb, 0x7a, 0x75, 0x2c, 0xd7,
+ 0x4f, 0xae, 0xd5, 0xe9, 0xe6, 0xe7, 0xad, 0xe8, 0x74, 0xd6,
+ 0xf4, 0xea, 0xa8, 0x50, 0x58, 0xaf
+};
+#else
+static const unsigned char gf_mul_table_base[] = {
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x02, 0x03,
+ 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d,
+ 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
+ 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20, 0x21,
+ 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
+ 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35,
+ 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f,
+ 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49,
+ 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53,
+ 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d,
+ 0x5e, 0x5f, 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
+ 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71,
+ 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x7b,
+ 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83, 0x84, 0x85,
+ 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
+ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
+ 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3,
+ 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad,
+ 0xae, 0xaf, 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7,
+ 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1,
+ 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb,
+ 0xcc, 0xcd, 0xce, 0xcf, 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5,
+ 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf,
+ 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9,
+ 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1, 0xf2, 0xf3,
+ 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd,
+ 0xfe, 0xff, 0x00, 0x02, 0x04, 0x06, 0x08, 0x0a, 0x0c, 0x0e,
+ 0x10, 0x12, 0x14, 0x16, 0x18, 0x1a, 0x1c, 0x1e, 0x20, 0x22,
+ 0x24, 0x26, 0x28, 0x2a, 0x2c, 0x2e, 0x30, 0x32, 0x34, 0x36,
+ 0x38, 0x3a, 0x3c, 0x3e, 0x40, 0x42, 0x44, 0x46, 0x48, 0x4a,
+ 0x4c, 0x4e, 0x50, 0x52, 0x54, 0x56, 0x58, 0x5a, 0x5c, 0x5e,
+ 0x60, 0x62, 0x64, 0x66, 0x68, 0x6a, 0x6c, 0x6e, 0x70, 0x72,
+ 0x74, 0x76, 0x78, 0x7a, 0x7c, 0x7e, 0x80, 0x82, 0x84, 0x86,
+ 0x88, 0x8a, 0x8c, 0x8e, 0x90, 0x92, 0x94, 0x96, 0x98, 0x9a,
+ 0x9c, 0x9e, 0xa0, 0xa2, 0xa4, 0xa6, 0xa8, 0xaa, 0xac, 0xae,
+ 0xb0, 0xb2, 0xb4, 0xb6, 0xb8, 0xba, 0xbc, 0xbe, 0xc0, 0xc2,
+ 0xc4, 0xc6, 0xc8, 0xca, 0xcc, 0xce, 0xd0, 0xd2, 0xd4, 0xd6,
+ 0xd8, 0xda, 0xdc, 0xde, 0xe0, 0xe2, 0xe4, 0xe6, 0xe8, 0xea,
+ 0xec, 0xee, 0xf0, 0xf2, 0xf4, 0xf6, 0xf8, 0xfa, 0xfc, 0xfe,
+ 0x1d, 0x1f, 0x19, 0x1b, 0x15, 0x17, 0x11, 0x13, 0x0d, 0x0f,
+ 0x09, 0x0b, 0x05, 0x07, 0x01, 0x03, 0x3d, 0x3f, 0x39, 0x3b,
+ 0x35, 0x37, 0x31, 0x33, 0x2d, 0x2f, 0x29, 0x2b, 0x25, 0x27,
+ 0x21, 0x23, 0x5d, 0x5f, 0x59, 0x5b, 0x55, 0x57, 0x51, 0x53,
+ 0x4d, 0x4f, 0x49, 0x4b, 0x45, 0x47, 0x41, 0x43, 0x7d, 0x7f,
+ 0x79, 0x7b, 0x75, 0x77, 0x71, 0x73, 0x6d, 0x6f, 0x69, 0x6b,
+ 0x65, 0x67, 0x61, 0x63, 0x9d, 0x9f, 0x99, 0x9b, 0x95, 0x97,
+ 0x91, 0x93, 0x8d, 0x8f, 0x89, 0x8b, 0x85, 0x87, 0x81, 0x83,
+ 0xbd, 0xbf, 0xb9, 0xbb, 0xb5, 0xb7, 0xb1, 0xb3, 0xad, 0xaf,
+ 0xa9, 0xab, 0xa5, 0xa7, 0xa1, 0xa3, 0xdd, 0xdf, 0xd9, 0xdb,
+ 0xd5, 0xd7, 0xd1, 0xd3, 0xcd, 0xcf, 0xc9, 0xcb, 0xc5, 0xc7,
+ 0xc1, 0xc3, 0xfd, 0xff, 0xf9, 0xfb, 0xf5, 0xf7, 0xf1, 0xf3,
+ 0xed, 0xef, 0xe9, 0xeb, 0xe5, 0xe7, 0xe1, 0xe3, 0x00, 0x03,
+ 0x06, 0x05, 0x0c, 0x0f, 0x0a, 0x09, 0x18, 0x1b, 0x1e, 0x1d,
+ 0x14, 0x17, 0x12, 0x11, 0x30, 0x33, 0x36, 0x35, 0x3c, 0x3f,
+ 0x3a, 0x39, 0x28, 0x2b, 0x2e, 0x2d, 0x24, 0x27, 0x22, 0x21,
+ 0x60, 0x63, 0x66, 0x65, 0x6c, 0x6f, 0x6a, 0x69, 0x78, 0x7b,
+ 0x7e, 0x7d, 0x74, 0x77, 0x72, 0x71, 0x50, 0x53, 0x56, 0x55,
+ 0x5c, 0x5f, 0x5a, 0x59, 0x48, 0x4b, 0x4e, 0x4d, 0x44, 0x47,
+ 0x42, 0x41, 0xc0, 0xc3, 0xc6, 0xc5, 0xcc, 0xcf, 0xca, 0xc9,
+ 0xd8, 0xdb, 0xde, 0xdd, 0xd4, 0xd7, 0xd2, 0xd1, 0xf0, 0xf3,
+ 0xf6, 0xf5, 0xfc, 0xff, 0xfa, 0xf9, 0xe8, 0xeb, 0xee, 0xed,
+ 0xe4, 0xe7, 0xe2, 0xe1, 0xa0, 0xa3, 0xa6, 0xa5, 0xac, 0xaf,
+ 0xaa, 0xa9, 0xb8, 0xbb, 0xbe, 0xbd, 0xb4, 0xb7, 0xb2, 0xb1,
+ 0x90, 0x93, 0x96, 0x95, 0x9c, 0x9f, 0x9a, 0x99, 0x88, 0x8b,
+ 0x8e, 0x8d, 0x84, 0x87, 0x82, 0x81, 0x9d, 0x9e, 0x9b, 0x98,
+ 0x91, 0x92, 0x97, 0x94, 0x85, 0x86, 0x83, 0x80, 0x89, 0x8a,
+ 0x8f, 0x8c, 0xad, 0xae, 0xab, 0xa8, 0xa1, 0xa2, 0xa7, 0xa4,
+ 0xb5, 0xb6, 0xb3, 0xb0, 0xb9, 0xba, 0xbf, 0xbc, 0xfd, 0xfe,
+ 0xfb, 0xf8, 0xf1, 0xf2, 0xf7, 0xf4, 0xe5, 0xe6, 0xe3, 0xe0,
+ 0xe9, 0xea, 0xef, 0xec, 0xcd, 0xce, 0xcb, 0xc8, 0xc1, 0xc2,
+ 0xc7, 0xc4, 0xd5, 0xd6, 0xd3, 0xd0, 0xd9, 0xda, 0xdf, 0xdc,
+ 0x5d, 0x5e, 0x5b, 0x58, 0x51, 0x52, 0x57, 0x54, 0x45, 0x46,
+ 0x43, 0x40, 0x49, 0x4a, 0x4f, 0x4c, 0x6d, 0x6e, 0x6b, 0x68,
+ 0x61, 0x62, 0x67, 0x64, 0x75, 0x76, 0x73, 0x70, 0x79, 0x7a,
+ 0x7f, 0x7c, 0x3d, 0x3e, 0x3b, 0x38, 0x31, 0x32, 0x37, 0x34,
+ 0x25, 0x26, 0x23, 0x20, 0x29, 0x2a, 0x2f, 0x2c, 0x0d, 0x0e,
+ 0x0b, 0x08, 0x01, 0x02, 0x07, 0x04, 0x15, 0x16, 0x13, 0x10,
+ 0x19, 0x1a, 0x1f, 0x1c, 0x00, 0x04, 0x08, 0x0c, 0x10, 0x14,
+ 0x18, 0x1c, 0x20, 0x24, 0x28, 0x2c, 0x30, 0x34, 0x38, 0x3c,
+ 0x40, 0x44, 0x48, 0x4c, 0x50, 0x54, 0x58, 0x5c, 0x60, 0x64,
+ 0x68, 0x6c, 0x70, 0x74, 0x78, 0x7c, 0x80, 0x84, 0x88, 0x8c,
+ 0x90, 0x94, 0x98, 0x9c, 0xa0, 0xa4, 0xa8, 0xac, 0xb0, 0xb4,
+ 0xb8, 0xbc, 0xc0, 0xc4, 0xc8, 0xcc, 0xd0, 0xd4, 0xd8, 0xdc,
+ 0xe0, 0xe4, 0xe8, 0xec, 0xf0, 0xf4, 0xf8, 0xfc, 0x1d, 0x19,
+ 0x15, 0x11, 0x0d, 0x09, 0x05, 0x01, 0x3d, 0x39, 0x35, 0x31,
+ 0x2d, 0x29, 0x25, 0x21, 0x5d, 0x59, 0x55, 0x51, 0x4d, 0x49,
+ 0x45, 0x41, 0x7d, 0x79, 0x75, 0x71, 0x6d, 0x69, 0x65, 0x61,
+ 0x9d, 0x99, 0x95, 0x91, 0x8d, 0x89, 0x85, 0x81, 0xbd, 0xb9,
+ 0xb5, 0xb1, 0xad, 0xa9, 0xa5, 0xa1, 0xdd, 0xd9, 0xd5, 0xd1,
+ 0xcd, 0xc9, 0xc5, 0xc1, 0xfd, 0xf9, 0xf5, 0xf1, 0xed, 0xe9,
+ 0xe5, 0xe1, 0x3a, 0x3e, 0x32, 0x36, 0x2a, 0x2e, 0x22, 0x26,
+ 0x1a, 0x1e, 0x12, 0x16, 0x0a, 0x0e, 0x02, 0x06, 0x7a, 0x7e,
+ 0x72, 0x76, 0x6a, 0x6e, 0x62, 0x66, 0x5a, 0x5e, 0x52, 0x56,
+ 0x4a, 0x4e, 0x42, 0x46, 0xba, 0xbe, 0xb2, 0xb6, 0xaa, 0xae,
+ 0xa2, 0xa6, 0x9a, 0x9e, 0x92, 0x96, 0x8a, 0x8e, 0x82, 0x86,
+ 0xfa, 0xfe, 0xf2, 0xf6, 0xea, 0xee, 0xe2, 0xe6, 0xda, 0xde,
+ 0xd2, 0xd6, 0xca, 0xce, 0xc2, 0xc6, 0x27, 0x23, 0x2f, 0x2b,
+ 0x37, 0x33, 0x3f, 0x3b, 0x07, 0x03, 0x0f, 0x0b, 0x17, 0x13,
+ 0x1f, 0x1b, 0x67, 0x63, 0x6f, 0x6b, 0x77, 0x73, 0x7f, 0x7b,
+ 0x47, 0x43, 0x4f, 0x4b, 0x57, 0x53, 0x5f, 0x5b, 0xa7, 0xa3,
+ 0xaf, 0xab, 0xb7, 0xb3, 0xbf, 0xbb, 0x87, 0x83, 0x8f, 0x8b,
+ 0x97, 0x93, 0x9f, 0x9b, 0xe7, 0xe3, 0xef, 0xeb, 0xf7, 0xf3,
+ 0xff, 0xfb, 0xc7, 0xc3, 0xcf, 0xcb, 0xd7, 0xd3, 0xdf, 0xdb,
+ 0x00, 0x05, 0x0a, 0x0f, 0x14, 0x11, 0x1e, 0x1b, 0x28, 0x2d,
+ 0x22, 0x27, 0x3c, 0x39, 0x36, 0x33, 0x50, 0x55, 0x5a, 0x5f,
+ 0x44, 0x41, 0x4e, 0x4b, 0x78, 0x7d, 0x72, 0x77, 0x6c, 0x69,
+ 0x66, 0x63, 0xa0, 0xa5, 0xaa, 0xaf, 0xb4, 0xb1, 0xbe, 0xbb,
+ 0x88, 0x8d, 0x82, 0x87, 0x9c, 0x99, 0x96, 0x93, 0xf0, 0xf5,
+ 0xfa, 0xff, 0xe4, 0xe1, 0xee, 0xeb, 0xd8, 0xdd, 0xd2, 0xd7,
+ 0xcc, 0xc9, 0xc6, 0xc3, 0x5d, 0x58, 0x57, 0x52, 0x49, 0x4c,
+ 0x43, 0x46, 0x75, 0x70, 0x7f, 0x7a, 0x61, 0x64, 0x6b, 0x6e,
+ 0x0d, 0x08, 0x07, 0x02, 0x19, 0x1c, 0x13, 0x16, 0x25, 0x20,
+ 0x2f, 0x2a, 0x31, 0x34, 0x3b, 0x3e, 0xfd, 0xf8, 0xf7, 0xf2,
+ 0xe9, 0xec, 0xe3, 0xe6, 0xd5, 0xd0, 0xdf, 0xda, 0xc1, 0xc4,
+ 0xcb, 0xce, 0xad, 0xa8, 0xa7, 0xa2, 0xb9, 0xbc, 0xb3, 0xb6,
+ 0x85, 0x80, 0x8f, 0x8a, 0x91, 0x94, 0x9b, 0x9e, 0xba, 0xbf,
+ 0xb0, 0xb5, 0xae, 0xab, 0xa4, 0xa1, 0x92, 0x97, 0x98, 0x9d,
+ 0x86, 0x83, 0x8c, 0x89, 0xea, 0xef, 0xe0, 0xe5, 0xfe, 0xfb,
+ 0xf4, 0xf1, 0xc2, 0xc7, 0xc8, 0xcd, 0xd6, 0xd3, 0xdc, 0xd9,
+ 0x1a, 0x1f, 0x10, 0x15, 0x0e, 0x0b, 0x04, 0x01, 0x32, 0x37,
+ 0x38, 0x3d, 0x26, 0x23, 0x2c, 0x29, 0x4a, 0x4f, 0x40, 0x45,
+ 0x5e, 0x5b, 0x54, 0x51, 0x62, 0x67, 0x68, 0x6d, 0x76, 0x73,
+ 0x7c, 0x79, 0xe7, 0xe2, 0xed, 0xe8, 0xf3, 0xf6, 0xf9, 0xfc,
+ 0xcf, 0xca, 0xc5, 0xc0, 0xdb, 0xde, 0xd1, 0xd4, 0xb7, 0xb2,
+ 0xbd, 0xb8, 0xa3, 0xa6, 0xa9, 0xac, 0x9f, 0x9a, 0x95, 0x90,
+ 0x8b, 0x8e, 0x81, 0x84, 0x47, 0x42, 0x4d, 0x48, 0x53, 0x56,
+ 0x59, 0x5c, 0x6f, 0x6a, 0x65, 0x60, 0x7b, 0x7e, 0x71, 0x74,
+ 0x17, 0x12, 0x1d, 0x18, 0x03, 0x06, 0x09, 0x0c, 0x3f, 0x3a,
+ 0x35, 0x30, 0x2b, 0x2e, 0x21, 0x24, 0x00, 0x06, 0x0c, 0x0a,
+ 0x18, 0x1e, 0x14, 0x12, 0x30, 0x36, 0x3c, 0x3a, 0x28, 0x2e,
+ 0x24, 0x22, 0x60, 0x66, 0x6c, 0x6a, 0x78, 0x7e, 0x74, 0x72,
+ 0x50, 0x56, 0x5c, 0x5a, 0x48, 0x4e, 0x44, 0x42, 0xc0, 0xc6,
+ 0xcc, 0xca, 0xd8, 0xde, 0xd4, 0xd2, 0xf0, 0xf6, 0xfc, 0xfa,
+ 0xe8, 0xee, 0xe4, 0xe2, 0xa0, 0xa6, 0xac, 0xaa, 0xb8, 0xbe,
+ 0xb4, 0xb2, 0x90, 0x96, 0x9c, 0x9a, 0x88, 0x8e, 0x84, 0x82,
+ 0x9d, 0x9b, 0x91, 0x97, 0x85, 0x83, 0x89, 0x8f, 0xad, 0xab,
+ 0xa1, 0xa7, 0xb5, 0xb3, 0xb9, 0xbf, 0xfd, 0xfb, 0xf1, 0xf7,
+ 0xe5, 0xe3, 0xe9, 0xef, 0xcd, 0xcb, 0xc1, 0xc7, 0xd5, 0xd3,
+ 0xd9, 0xdf, 0x5d, 0x5b, 0x51, 0x57, 0x45, 0x43, 0x49, 0x4f,
+ 0x6d, 0x6b, 0x61, 0x67, 0x75, 0x73, 0x79, 0x7f, 0x3d, 0x3b,
+ 0x31, 0x37, 0x25, 0x23, 0x29, 0x2f, 0x0d, 0x0b, 0x01, 0x07,
+ 0x15, 0x13, 0x19, 0x1f, 0x27, 0x21, 0x2b, 0x2d, 0x3f, 0x39,
+ 0x33, 0x35, 0x17, 0x11, 0x1b, 0x1d, 0x0f, 0x09, 0x03, 0x05,
+ 0x47, 0x41, 0x4b, 0x4d, 0x5f, 0x59, 0x53, 0x55, 0x77, 0x71,
+ 0x7b, 0x7d, 0x6f, 0x69, 0x63, 0x65, 0xe7, 0xe1, 0xeb, 0xed,
+ 0xff, 0xf9, 0xf3, 0xf5, 0xd7, 0xd1, 0xdb, 0xdd, 0xcf, 0xc9,
+ 0xc3, 0xc5, 0x87, 0x81, 0x8b, 0x8d, 0x9f, 0x99, 0x93, 0x95,
+ 0xb7, 0xb1, 0xbb, 0xbd, 0xaf, 0xa9, 0xa3, 0xa5, 0xba, 0xbc,
+ 0xb6, 0xb0, 0xa2, 0xa4, 0xae, 0xa8, 0x8a, 0x8c, 0x86, 0x80,
+ 0x92, 0x94, 0x9e, 0x98, 0xda, 0xdc, 0xd6, 0xd0, 0xc2, 0xc4,
+ 0xce, 0xc8, 0xea, 0xec, 0xe6, 0xe0, 0xf2, 0xf4, 0xfe, 0xf8,
+ 0x7a, 0x7c, 0x76, 0x70, 0x62, 0x64, 0x6e, 0x68, 0x4a, 0x4c,
+ 0x46, 0x40, 0x52, 0x54, 0x5e, 0x58, 0x1a, 0x1c, 0x16, 0x10,
+ 0x02, 0x04, 0x0e, 0x08, 0x2a, 0x2c, 0x26, 0x20, 0x32, 0x34,
+ 0x3e, 0x38, 0x00, 0x07, 0x0e, 0x09, 0x1c, 0x1b, 0x12, 0x15,
+ 0x38, 0x3f, 0x36, 0x31, 0x24, 0x23, 0x2a, 0x2d, 0x70, 0x77,
+ 0x7e, 0x79, 0x6c, 0x6b, 0x62, 0x65, 0x48, 0x4f, 0x46, 0x41,
+ 0x54, 0x53, 0x5a, 0x5d, 0xe0, 0xe7, 0xee, 0xe9, 0xfc, 0xfb,
+ 0xf2, 0xf5, 0xd8, 0xdf, 0xd6, 0xd1, 0xc4, 0xc3, 0xca, 0xcd,
+ 0x90, 0x97, 0x9e, 0x99, 0x8c, 0x8b, 0x82, 0x85, 0xa8, 0xaf,
+ 0xa6, 0xa1, 0xb4, 0xb3, 0xba, 0xbd, 0xdd, 0xda, 0xd3, 0xd4,
+ 0xc1, 0xc6, 0xcf, 0xc8, 0xe5, 0xe2, 0xeb, 0xec, 0xf9, 0xfe,
+ 0xf7, 0xf0, 0xad, 0xaa, 0xa3, 0xa4, 0xb1, 0xb6, 0xbf, 0xb8,
+ 0x95, 0x92, 0x9b, 0x9c, 0x89, 0x8e, 0x87, 0x80, 0x3d, 0x3a,
+ 0x33, 0x34, 0x21, 0x26, 0x2f, 0x28, 0x05, 0x02, 0x0b, 0x0c,
+ 0x19, 0x1e, 0x17, 0x10, 0x4d, 0x4a, 0x43, 0x44, 0x51, 0x56,
+ 0x5f, 0x58, 0x75, 0x72, 0x7b, 0x7c, 0x69, 0x6e, 0x67, 0x60,
+ 0xa7, 0xa0, 0xa9, 0xae, 0xbb, 0xbc, 0xb5, 0xb2, 0x9f, 0x98,
+ 0x91, 0x96, 0x83, 0x84, 0x8d, 0x8a, 0xd7, 0xd0, 0xd9, 0xde,
+ 0xcb, 0xcc, 0xc5, 0xc2, 0xef, 0xe8, 0xe1, 0xe6, 0xf3, 0xf4,
+ 0xfd, 0xfa, 0x47, 0x40, 0x49, 0x4e, 0x5b, 0x5c, 0x55, 0x52,
+ 0x7f, 0x78, 0x71, 0x76, 0x63, 0x64, 0x6d, 0x6a, 0x37, 0x30,
+ 0x39, 0x3e, 0x2b, 0x2c, 0x25, 0x22, 0x0f, 0x08, 0x01, 0x06,
+ 0x13, 0x14, 0x1d, 0x1a, 0x7a, 0x7d, 0x74, 0x73, 0x66, 0x61,
+ 0x68, 0x6f, 0x42, 0x45, 0x4c, 0x4b, 0x5e, 0x59, 0x50, 0x57,
+ 0x0a, 0x0d, 0x04, 0x03, 0x16, 0x11, 0x18, 0x1f, 0x32, 0x35,
+ 0x3c, 0x3b, 0x2e, 0x29, 0x20, 0x27, 0x9a, 0x9d, 0x94, 0x93,
+ 0x86, 0x81, 0x88, 0x8f, 0xa2, 0xa5, 0xac, 0xab, 0xbe, 0xb9,
+ 0xb0, 0xb7, 0xea, 0xed, 0xe4, 0xe3, 0xf6, 0xf1, 0xf8, 0xff,
+ 0xd2, 0xd5, 0xdc, 0xdb, 0xce, 0xc9, 0xc0, 0xc7, 0x00, 0x08,
+ 0x10, 0x18, 0x20, 0x28, 0x30, 0x38, 0x40, 0x48, 0x50, 0x58,
+ 0x60, 0x68, 0x70, 0x78, 0x80, 0x88, 0x90, 0x98, 0xa0, 0xa8,
+ 0xb0, 0xb8, 0xc0, 0xc8, 0xd0, 0xd8, 0xe0, 0xe8, 0xf0, 0xf8,
+ 0x1d, 0x15, 0x0d, 0x05, 0x3d, 0x35, 0x2d, 0x25, 0x5d, 0x55,
+ 0x4d, 0x45, 0x7d, 0x75, 0x6d, 0x65, 0x9d, 0x95, 0x8d, 0x85,
+ 0xbd, 0xb5, 0xad, 0xa5, 0xdd, 0xd5, 0xcd, 0xc5, 0xfd, 0xf5,
+ 0xed, 0xe5, 0x3a, 0x32, 0x2a, 0x22, 0x1a, 0x12, 0x0a, 0x02,
+ 0x7a, 0x72, 0x6a, 0x62, 0x5a, 0x52, 0x4a, 0x42, 0xba, 0xb2,
+ 0xaa, 0xa2, 0x9a, 0x92, 0x8a, 0x82, 0xfa, 0xf2, 0xea, 0xe2,
+ 0xda, 0xd2, 0xca, 0xc2, 0x27, 0x2f, 0x37, 0x3f, 0x07, 0x0f,
+ 0x17, 0x1f, 0x67, 0x6f, 0x77, 0x7f, 0x47, 0x4f, 0x57, 0x5f,
+ 0xa7, 0xaf, 0xb7, 0xbf, 0x87, 0x8f, 0x97, 0x9f, 0xe7, 0xef,
+ 0xf7, 0xff, 0xc7, 0xcf, 0xd7, 0xdf, 0x74, 0x7c, 0x64, 0x6c,
+ 0x54, 0x5c, 0x44, 0x4c, 0x34, 0x3c, 0x24, 0x2c, 0x14, 0x1c,
+ 0x04, 0x0c, 0xf4, 0xfc, 0xe4, 0xec, 0xd4, 0xdc, 0xc4, 0xcc,
+ 0xb4, 0xbc, 0xa4, 0xac, 0x94, 0x9c, 0x84, 0x8c, 0x69, 0x61,
+ 0x79, 0x71, 0x49, 0x41, 0x59, 0x51, 0x29, 0x21, 0x39, 0x31,
+ 0x09, 0x01, 0x19, 0x11, 0xe9, 0xe1, 0xf9, 0xf1, 0xc9, 0xc1,
+ 0xd9, 0xd1, 0xa9, 0xa1, 0xb9, 0xb1, 0x89, 0x81, 0x99, 0x91,
+ 0x4e, 0x46, 0x5e, 0x56, 0x6e, 0x66, 0x7e, 0x76, 0x0e, 0x06,
+ 0x1e, 0x16, 0x2e, 0x26, 0x3e, 0x36, 0xce, 0xc6, 0xde, 0xd6,
+ 0xee, 0xe6, 0xfe, 0xf6, 0x8e, 0x86, 0x9e, 0x96, 0xae, 0xa6,
+ 0xbe, 0xb6, 0x53, 0x5b, 0x43, 0x4b, 0x73, 0x7b, 0x63, 0x6b,
+ 0x13, 0x1b, 0x03, 0x0b, 0x33, 0x3b, 0x23, 0x2b, 0xd3, 0xdb,
+ 0xc3, 0xcb, 0xf3, 0xfb, 0xe3, 0xeb, 0x93, 0x9b, 0x83, 0x8b,
+ 0xb3, 0xbb, 0xa3, 0xab, 0x00, 0x09, 0x12, 0x1b, 0x24, 0x2d,
+ 0x36, 0x3f, 0x48, 0x41, 0x5a, 0x53, 0x6c, 0x65, 0x7e, 0x77,
+ 0x90, 0x99, 0x82, 0x8b, 0xb4, 0xbd, 0xa6, 0xaf, 0xd8, 0xd1,
+ 0xca, 0xc3, 0xfc, 0xf5, 0xee, 0xe7, 0x3d, 0x34, 0x2f, 0x26,
+ 0x19, 0x10, 0x0b, 0x02, 0x75, 0x7c, 0x67, 0x6e, 0x51, 0x58,
+ 0x43, 0x4a, 0xad, 0xa4, 0xbf, 0xb6, 0x89, 0x80, 0x9b, 0x92,
+ 0xe5, 0xec, 0xf7, 0xfe, 0xc1, 0xc8, 0xd3, 0xda, 0x7a, 0x73,
+ 0x68, 0x61, 0x5e, 0x57, 0x4c, 0x45, 0x32, 0x3b, 0x20, 0x29,
+ 0x16, 0x1f, 0x04, 0x0d, 0xea, 0xe3, 0xf8, 0xf1, 0xce, 0xc7,
+ 0xdc, 0xd5, 0xa2, 0xab, 0xb0, 0xb9, 0x86, 0x8f, 0x94, 0x9d,
+ 0x47, 0x4e, 0x55, 0x5c, 0x63, 0x6a, 0x71, 0x78, 0x0f, 0x06,
+ 0x1d, 0x14, 0x2b, 0x22, 0x39, 0x30, 0xd7, 0xde, 0xc5, 0xcc,
+ 0xf3, 0xfa, 0xe1, 0xe8, 0x9f, 0x96, 0x8d, 0x84, 0xbb, 0xb2,
+ 0xa9, 0xa0, 0xf4, 0xfd, 0xe6, 0xef, 0xd0, 0xd9, 0xc2, 0xcb,
+ 0xbc, 0xb5, 0xae, 0xa7, 0x98, 0x91, 0x8a, 0x83, 0x64, 0x6d,
+ 0x76, 0x7f, 0x40, 0x49, 0x52, 0x5b, 0x2c, 0x25, 0x3e, 0x37,
+ 0x08, 0x01, 0x1a, 0x13, 0xc9, 0xc0, 0xdb, 0xd2, 0xed, 0xe4,
+ 0xff, 0xf6, 0x81, 0x88, 0x93, 0x9a, 0xa5, 0xac, 0xb7, 0xbe,
+ 0x59, 0x50, 0x4b, 0x42, 0x7d, 0x74, 0x6f, 0x66, 0x11, 0x18,
+ 0x03, 0x0a, 0x35, 0x3c, 0x27, 0x2e, 0x8e, 0x87, 0x9c, 0x95,
+ 0xaa, 0xa3, 0xb8, 0xb1, 0xc6, 0xcf, 0xd4, 0xdd, 0xe2, 0xeb,
+ 0xf0, 0xf9, 0x1e, 0x17, 0x0c, 0x05, 0x3a, 0x33, 0x28, 0x21,
+ 0x56, 0x5f, 0x44, 0x4d, 0x72, 0x7b, 0x60, 0x69, 0xb3, 0xba,
+ 0xa1, 0xa8, 0x97, 0x9e, 0x85, 0x8c, 0xfb, 0xf2, 0xe9, 0xe0,
+ 0xdf, 0xd6, 0xcd, 0xc4, 0x23, 0x2a, 0x31, 0x38, 0x07, 0x0e,
+ 0x15, 0x1c, 0x6b, 0x62, 0x79, 0x70, 0x4f, 0x46, 0x5d, 0x54,
+ 0x00, 0x0a, 0x14, 0x1e, 0x28, 0x22, 0x3c, 0x36, 0x50, 0x5a,
+ 0x44, 0x4e, 0x78, 0x72, 0x6c, 0x66, 0xa0, 0xaa, 0xb4, 0xbe,
+ 0x88, 0x82, 0x9c, 0x96, 0xf0, 0xfa, 0xe4, 0xee, 0xd8, 0xd2,
+ 0xcc, 0xc6, 0x5d, 0x57, 0x49, 0x43, 0x75, 0x7f, 0x61, 0x6b,
+ 0x0d, 0x07, 0x19, 0x13, 0x25, 0x2f, 0x31, 0x3b, 0xfd, 0xf7,
+ 0xe9, 0xe3, 0xd5, 0xdf, 0xc1, 0xcb, 0xad, 0xa7, 0xb9, 0xb3,
+ 0x85, 0x8f, 0x91, 0x9b, 0xba, 0xb0, 0xae, 0xa4, 0x92, 0x98,
+ 0x86, 0x8c, 0xea, 0xe0, 0xfe, 0xf4, 0xc2, 0xc8, 0xd6, 0xdc,
+ 0x1a, 0x10, 0x0e, 0x04, 0x32, 0x38, 0x26, 0x2c, 0x4a, 0x40,
+ 0x5e, 0x54, 0x62, 0x68, 0x76, 0x7c, 0xe7, 0xed, 0xf3, 0xf9,
+ 0xcf, 0xc5, 0xdb, 0xd1, 0xb7, 0xbd, 0xa3, 0xa9, 0x9f, 0x95,
+ 0x8b, 0x81, 0x47, 0x4d, 0x53, 0x59, 0x6f, 0x65, 0x7b, 0x71,
+ 0x17, 0x1d, 0x03, 0x09, 0x3f, 0x35, 0x2b, 0x21, 0x69, 0x63,
+ 0x7d, 0x77, 0x41, 0x4b, 0x55, 0x5f, 0x39, 0x33, 0x2d, 0x27,
+ 0x11, 0x1b, 0x05, 0x0f, 0xc9, 0xc3, 0xdd, 0xd7, 0xe1, 0xeb,
+ 0xf5, 0xff, 0x99, 0x93, 0x8d, 0x87, 0xb1, 0xbb, 0xa5, 0xaf,
+ 0x34, 0x3e, 0x20, 0x2a, 0x1c, 0x16, 0x08, 0x02, 0x64, 0x6e,
+ 0x70, 0x7a, 0x4c, 0x46, 0x58, 0x52, 0x94, 0x9e, 0x80, 0x8a,
+ 0xbc, 0xb6, 0xa8, 0xa2, 0xc4, 0xce, 0xd0, 0xda, 0xec, 0xe6,
+ 0xf8, 0xf2, 0xd3, 0xd9, 0xc7, 0xcd, 0xfb, 0xf1, 0xef, 0xe5,
+ 0x83, 0x89, 0x97, 0x9d, 0xab, 0xa1, 0xbf, 0xb5, 0x73, 0x79,
+ 0x67, 0x6d, 0x5b, 0x51, 0x4f, 0x45, 0x23, 0x29, 0x37, 0x3d,
+ 0x0b, 0x01, 0x1f, 0x15, 0x8e, 0x84, 0x9a, 0x90, 0xa6, 0xac,
+ 0xb2, 0xb8, 0xde, 0xd4, 0xca, 0xc0, 0xf6, 0xfc, 0xe2, 0xe8,
+ 0x2e, 0x24, 0x3a, 0x30, 0x06, 0x0c, 0x12, 0x18, 0x7e, 0x74,
+ 0x6a, 0x60, 0x56, 0x5c, 0x42, 0x48, 0x00, 0x0b, 0x16, 0x1d,
+ 0x2c, 0x27, 0x3a, 0x31, 0x58, 0x53, 0x4e, 0x45, 0x74, 0x7f,
+ 0x62, 0x69, 0xb0, 0xbb, 0xa6, 0xad, 0x9c, 0x97, 0x8a, 0x81,
+ 0xe8, 0xe3, 0xfe, 0xf5, 0xc4, 0xcf, 0xd2, 0xd9, 0x7d, 0x76,
+ 0x6b, 0x60, 0x51, 0x5a, 0x47, 0x4c, 0x25, 0x2e, 0x33, 0x38,
+ 0x09, 0x02, 0x1f, 0x14, 0xcd, 0xc6, 0xdb, 0xd0, 0xe1, 0xea,
+ 0xf7, 0xfc, 0x95, 0x9e, 0x83, 0x88, 0xb9, 0xb2, 0xaf, 0xa4,
+ 0xfa, 0xf1, 0xec, 0xe7, 0xd6, 0xdd, 0xc0, 0xcb, 0xa2, 0xa9,
+ 0xb4, 0xbf, 0x8e, 0x85, 0x98, 0x93, 0x4a, 0x41, 0x5c, 0x57,
+ 0x66, 0x6d, 0x70, 0x7b, 0x12, 0x19, 0x04, 0x0f, 0x3e, 0x35,
+ 0x28, 0x23, 0x87, 0x8c, 0x91, 0x9a, 0xab, 0xa0, 0xbd, 0xb6,
+ 0xdf, 0xd4, 0xc9, 0xc2, 0xf3, 0xf8, 0xe5, 0xee, 0x37, 0x3c,
+ 0x21, 0x2a, 0x1b, 0x10, 0x0d, 0x06, 0x6f, 0x64, 0x79, 0x72,
+ 0x43, 0x48, 0x55, 0x5e, 0xe9, 0xe2, 0xff, 0xf4, 0xc5, 0xce,
+ 0xd3, 0xd8, 0xb1, 0xba, 0xa7, 0xac, 0x9d, 0x96, 0x8b, 0x80,
+ 0x59, 0x52, 0x4f, 0x44, 0x75, 0x7e, 0x63, 0x68, 0x01, 0x0a,
+ 0x17, 0x1c, 0x2d, 0x26, 0x3b, 0x30, 0x94, 0x9f, 0x82, 0x89,
+ 0xb8, 0xb3, 0xae, 0xa5, 0xcc, 0xc7, 0xda, 0xd1, 0xe0, 0xeb,
+ 0xf6, 0xfd, 0x24, 0x2f, 0x32, 0x39, 0x08, 0x03, 0x1e, 0x15,
+ 0x7c, 0x77, 0x6a, 0x61, 0x50, 0x5b, 0x46, 0x4d, 0x13, 0x18,
+ 0x05, 0x0e, 0x3f, 0x34, 0x29, 0x22, 0x4b, 0x40, 0x5d, 0x56,
+ 0x67, 0x6c, 0x71, 0x7a, 0xa3, 0xa8, 0xb5, 0xbe, 0x8f, 0x84,
+ 0x99, 0x92, 0xfb, 0xf0, 0xed, 0xe6, 0xd7, 0xdc, 0xc1, 0xca,
+ 0x6e, 0x65, 0x78, 0x73, 0x42, 0x49, 0x54, 0x5f, 0x36, 0x3d,
+ 0x20, 0x2b, 0x1a, 0x11, 0x0c, 0x07, 0xde, 0xd5, 0xc8, 0xc3,
+ 0xf2, 0xf9, 0xe4, 0xef, 0x86, 0x8d, 0x90, 0x9b, 0xaa, 0xa1,
+ 0xbc, 0xb7, 0x00, 0x0c, 0x18, 0x14, 0x30, 0x3c, 0x28, 0x24,
+ 0x60, 0x6c, 0x78, 0x74, 0x50, 0x5c, 0x48, 0x44, 0xc0, 0xcc,
+ 0xd8, 0xd4, 0xf0, 0xfc, 0xe8, 0xe4, 0xa0, 0xac, 0xb8, 0xb4,
+ 0x90, 0x9c, 0x88, 0x84, 0x9d, 0x91, 0x85, 0x89, 0xad, 0xa1,
+ 0xb5, 0xb9, 0xfd, 0xf1, 0xe5, 0xe9, 0xcd, 0xc1, 0xd5, 0xd9,
+ 0x5d, 0x51, 0x45, 0x49, 0x6d, 0x61, 0x75, 0x79, 0x3d, 0x31,
+ 0x25, 0x29, 0x0d, 0x01, 0x15, 0x19, 0x27, 0x2b, 0x3f, 0x33,
+ 0x17, 0x1b, 0x0f, 0x03, 0x47, 0x4b, 0x5f, 0x53, 0x77, 0x7b,
+ 0x6f, 0x63, 0xe7, 0xeb, 0xff, 0xf3, 0xd7, 0xdb, 0xcf, 0xc3,
+ 0x87, 0x8b, 0x9f, 0x93, 0xb7, 0xbb, 0xaf, 0xa3, 0xba, 0xb6,
+ 0xa2, 0xae, 0x8a, 0x86, 0x92, 0x9e, 0xda, 0xd6, 0xc2, 0xce,
+ 0xea, 0xe6, 0xf2, 0xfe, 0x7a, 0x76, 0x62, 0x6e, 0x4a, 0x46,
+ 0x52, 0x5e, 0x1a, 0x16, 0x02, 0x0e, 0x2a, 0x26, 0x32, 0x3e,
+ 0x4e, 0x42, 0x56, 0x5a, 0x7e, 0x72, 0x66, 0x6a, 0x2e, 0x22,
+ 0x36, 0x3a, 0x1e, 0x12, 0x06, 0x0a, 0x8e, 0x82, 0x96, 0x9a,
+ 0xbe, 0xb2, 0xa6, 0xaa, 0xee, 0xe2, 0xf6, 0xfa, 0xde, 0xd2,
+ 0xc6, 0xca, 0xd3, 0xdf, 0xcb, 0xc7, 0xe3, 0xef, 0xfb, 0xf7,
+ 0xb3, 0xbf, 0xab, 0xa7, 0x83, 0x8f, 0x9b, 0x97, 0x13, 0x1f,
+ 0x0b, 0x07, 0x23, 0x2f, 0x3b, 0x37, 0x73, 0x7f, 0x6b, 0x67,
+ 0x43, 0x4f, 0x5b, 0x57, 0x69, 0x65, 0x71, 0x7d, 0x59, 0x55,
+ 0x41, 0x4d, 0x09, 0x05, 0x11, 0x1d, 0x39, 0x35, 0x21, 0x2d,
+ 0xa9, 0xa5, 0xb1, 0xbd, 0x99, 0x95, 0x81, 0x8d, 0xc9, 0xc5,
+ 0xd1, 0xdd, 0xf9, 0xf5, 0xe1, 0xed, 0xf4, 0xf8, 0xec, 0xe0,
+ 0xc4, 0xc8, 0xdc, 0xd0, 0x94, 0x98, 0x8c, 0x80, 0xa4, 0xa8,
+ 0xbc, 0xb0, 0x34, 0x38, 0x2c, 0x20, 0x04, 0x08, 0x1c, 0x10,
+ 0x54, 0x58, 0x4c, 0x40, 0x64, 0x68, 0x7c, 0x70, 0x00, 0x0d,
+ 0x1a, 0x17, 0x34, 0x39, 0x2e, 0x23, 0x68, 0x65, 0x72, 0x7f,
+ 0x5c, 0x51, 0x46, 0x4b, 0xd0, 0xdd, 0xca, 0xc7, 0xe4, 0xe9,
+ 0xfe, 0xf3, 0xb8, 0xb5, 0xa2, 0xaf, 0x8c, 0x81, 0x96, 0x9b,
+ 0xbd, 0xb0, 0xa7, 0xaa, 0x89, 0x84, 0x93, 0x9e, 0xd5, 0xd8,
+ 0xcf, 0xc2, 0xe1, 0xec, 0xfb, 0xf6, 0x6d, 0x60, 0x77, 0x7a,
+ 0x59, 0x54, 0x43, 0x4e, 0x05, 0x08, 0x1f, 0x12, 0x31, 0x3c,
+ 0x2b, 0x26, 0x67, 0x6a, 0x7d, 0x70, 0x53, 0x5e, 0x49, 0x44,
+ 0x0f, 0x02, 0x15, 0x18, 0x3b, 0x36, 0x21, 0x2c, 0xb7, 0xba,
+ 0xad, 0xa0, 0x83, 0x8e, 0x99, 0x94, 0xdf, 0xd2, 0xc5, 0xc8,
+ 0xeb, 0xe6, 0xf1, 0xfc, 0xda, 0xd7, 0xc0, 0xcd, 0xee, 0xe3,
+ 0xf4, 0xf9, 0xb2, 0xbf, 0xa8, 0xa5, 0x86, 0x8b, 0x9c, 0x91,
+ 0x0a, 0x07, 0x10, 0x1d, 0x3e, 0x33, 0x24, 0x29, 0x62, 0x6f,
+ 0x78, 0x75, 0x56, 0x5b, 0x4c, 0x41, 0xce, 0xc3, 0xd4, 0xd9,
+ 0xfa, 0xf7, 0xe0, 0xed, 0xa6, 0xab, 0xbc, 0xb1, 0x92, 0x9f,
+ 0x88, 0x85, 0x1e, 0x13, 0x04, 0x09, 0x2a, 0x27, 0x30, 0x3d,
+ 0x76, 0x7b, 0x6c, 0x61, 0x42, 0x4f, 0x58, 0x55, 0x73, 0x7e,
+ 0x69, 0x64, 0x47, 0x4a, 0x5d, 0x50, 0x1b, 0x16, 0x01, 0x0c,
+ 0x2f, 0x22, 0x35, 0x38, 0xa3, 0xae, 0xb9, 0xb4, 0x97, 0x9a,
+ 0x8d, 0x80, 0xcb, 0xc6, 0xd1, 0xdc, 0xff, 0xf2, 0xe5, 0xe8,
+ 0xa9, 0xa4, 0xb3, 0xbe, 0x9d, 0x90, 0x87, 0x8a, 0xc1, 0xcc,
+ 0xdb, 0xd6, 0xf5, 0xf8, 0xef, 0xe2, 0x79, 0x74, 0x63, 0x6e,
+ 0x4d, 0x40, 0x57, 0x5a, 0x11, 0x1c, 0x0b, 0x06, 0x25, 0x28,
+ 0x3f, 0x32, 0x14, 0x19, 0x0e, 0x03, 0x20, 0x2d, 0x3a, 0x37,
+ 0x7c, 0x71, 0x66, 0x6b, 0x48, 0x45, 0x52, 0x5f, 0xc4, 0xc9,
+ 0xde, 0xd3, 0xf0, 0xfd, 0xea, 0xe7, 0xac, 0xa1, 0xb6, 0xbb,
+ 0x98, 0x95, 0x82, 0x8f, 0x00, 0x0e, 0x1c, 0x12, 0x38, 0x36,
+ 0x24, 0x2a, 0x70, 0x7e, 0x6c, 0x62, 0x48, 0x46, 0x54, 0x5a,
+ 0xe0, 0xee, 0xfc, 0xf2, 0xd8, 0xd6, 0xc4, 0xca, 0x90, 0x9e,
+ 0x8c, 0x82, 0xa8, 0xa6, 0xb4, 0xba, 0xdd, 0xd3, 0xc1, 0xcf,
+ 0xe5, 0xeb, 0xf9, 0xf7, 0xad, 0xa3, 0xb1, 0xbf, 0x95, 0x9b,
+ 0x89, 0x87, 0x3d, 0x33, 0x21, 0x2f, 0x05, 0x0b, 0x19, 0x17,
+ 0x4d, 0x43, 0x51, 0x5f, 0x75, 0x7b, 0x69, 0x67, 0xa7, 0xa9,
+ 0xbb, 0xb5, 0x9f, 0x91, 0x83, 0x8d, 0xd7, 0xd9, 0xcb, 0xc5,
+ 0xef, 0xe1, 0xf3, 0xfd, 0x47, 0x49, 0x5b, 0x55, 0x7f, 0x71,
+ 0x63, 0x6d, 0x37, 0x39, 0x2b, 0x25, 0x0f, 0x01, 0x13, 0x1d,
+ 0x7a, 0x74, 0x66, 0x68, 0x42, 0x4c, 0x5e, 0x50, 0x0a, 0x04,
+ 0x16, 0x18, 0x32, 0x3c, 0x2e, 0x20, 0x9a, 0x94, 0x86, 0x88,
+ 0xa2, 0xac, 0xbe, 0xb0, 0xea, 0xe4, 0xf6, 0xf8, 0xd2, 0xdc,
+ 0xce, 0xc0, 0x53, 0x5d, 0x4f, 0x41, 0x6b, 0x65, 0x77, 0x79,
+ 0x23, 0x2d, 0x3f, 0x31, 0x1b, 0x15, 0x07, 0x09, 0xb3, 0xbd,
+ 0xaf, 0xa1, 0x8b, 0x85, 0x97, 0x99, 0xc3, 0xcd, 0xdf, 0xd1,
+ 0xfb, 0xf5, 0xe7, 0xe9, 0x8e, 0x80, 0x92, 0x9c, 0xb6, 0xb8,
+ 0xaa, 0xa4, 0xfe, 0xf0, 0xe2, 0xec, 0xc6, 0xc8, 0xda, 0xd4,
+ 0x6e, 0x60, 0x72, 0x7c, 0x56, 0x58, 0x4a, 0x44, 0x1e, 0x10,
+ 0x02, 0x0c, 0x26, 0x28, 0x3a, 0x34, 0xf4, 0xfa, 0xe8, 0xe6,
+ 0xcc, 0xc2, 0xd0, 0xde, 0x84, 0x8a, 0x98, 0x96, 0xbc, 0xb2,
+ 0xa0, 0xae, 0x14, 0x1a, 0x08, 0x06, 0x2c, 0x22, 0x30, 0x3e,
+ 0x64, 0x6a, 0x78, 0x76, 0x5c, 0x52, 0x40, 0x4e, 0x29, 0x27,
+ 0x35, 0x3b, 0x11, 0x1f, 0x0d, 0x03, 0x59, 0x57, 0x45, 0x4b,
+ 0x61, 0x6f, 0x7d, 0x73, 0xc9, 0xc7, 0xd5, 0xdb, 0xf1, 0xff,
+ 0xed, 0xe3, 0xb9, 0xb7, 0xa5, 0xab, 0x81, 0x8f, 0x9d, 0x93,
+ 0x00, 0x0f, 0x1e, 0x11, 0x3c, 0x33, 0x22, 0x2d, 0x78, 0x77,
+ 0x66, 0x69, 0x44, 0x4b, 0x5a, 0x55, 0xf0, 0xff, 0xee, 0xe1,
+ 0xcc, 0xc3, 0xd2, 0xdd, 0x88, 0x87, 0x96, 0x99, 0xb4, 0xbb,
+ 0xaa, 0xa5, 0xfd, 0xf2, 0xe3, 0xec, 0xc1, 0xce, 0xdf, 0xd0,
+ 0x85, 0x8a, 0x9b, 0x94, 0xb9, 0xb6, 0xa7, 0xa8, 0x0d, 0x02,
+ 0x13, 0x1c, 0x31, 0x3e, 0x2f, 0x20, 0x75, 0x7a, 0x6b, 0x64,
+ 0x49, 0x46, 0x57, 0x58, 0xe7, 0xe8, 0xf9, 0xf6, 0xdb, 0xd4,
+ 0xc5, 0xca, 0x9f, 0x90, 0x81, 0x8e, 0xa3, 0xac, 0xbd, 0xb2,
+ 0x17, 0x18, 0x09, 0x06, 0x2b, 0x24, 0x35, 0x3a, 0x6f, 0x60,
+ 0x71, 0x7e, 0x53, 0x5c, 0x4d, 0x42, 0x1a, 0x15, 0x04, 0x0b,
+ 0x26, 0x29, 0x38, 0x37, 0x62, 0x6d, 0x7c, 0x73, 0x5e, 0x51,
+ 0x40, 0x4f, 0xea, 0xe5, 0xf4, 0xfb, 0xd6, 0xd9, 0xc8, 0xc7,
+ 0x92, 0x9d, 0x8c, 0x83, 0xae, 0xa1, 0xb0, 0xbf, 0xd3, 0xdc,
+ 0xcd, 0xc2, 0xef, 0xe0, 0xf1, 0xfe, 0xab, 0xa4, 0xb5, 0xba,
+ 0x97, 0x98, 0x89, 0x86, 0x23, 0x2c, 0x3d, 0x32, 0x1f, 0x10,
+ 0x01, 0x0e, 0x5b, 0x54, 0x45, 0x4a, 0x67, 0x68, 0x79, 0x76,
+ 0x2e, 0x21, 0x30, 0x3f, 0x12, 0x1d, 0x0c, 0x03, 0x56, 0x59,
+ 0x48, 0x47, 0x6a, 0x65, 0x74, 0x7b, 0xde, 0xd1, 0xc0, 0xcf,
+ 0xe2, 0xed, 0xfc, 0xf3, 0xa6, 0xa9, 0xb8, 0xb7, 0x9a, 0x95,
+ 0x84, 0x8b, 0x34, 0x3b, 0x2a, 0x25, 0x08, 0x07, 0x16, 0x19,
+ 0x4c, 0x43, 0x52, 0x5d, 0x70, 0x7f, 0x6e, 0x61, 0xc4, 0xcb,
+ 0xda, 0xd5, 0xf8, 0xf7, 0xe6, 0xe9, 0xbc, 0xb3, 0xa2, 0xad,
+ 0x80, 0x8f, 0x9e, 0x91, 0xc9, 0xc6, 0xd7, 0xd8, 0xf5, 0xfa,
+ 0xeb, 0xe4, 0xb1, 0xbe, 0xaf, 0xa0, 0x8d, 0x82, 0x93, 0x9c,
+ 0x39, 0x36, 0x27, 0x28, 0x05, 0x0a, 0x1b, 0x14, 0x41, 0x4e,
+ 0x5f, 0x50, 0x7d, 0x72, 0x63, 0x6c, 0x00, 0x10, 0x20, 0x30,
+ 0x40, 0x50, 0x60, 0x70, 0x80, 0x90, 0xa0, 0xb0, 0xc0, 0xd0,
+ 0xe0, 0xf0, 0x1d, 0x0d, 0x3d, 0x2d, 0x5d, 0x4d, 0x7d, 0x6d,
+ 0x9d, 0x8d, 0xbd, 0xad, 0xdd, 0xcd, 0xfd, 0xed, 0x3a, 0x2a,
+ 0x1a, 0x0a, 0x7a, 0x6a, 0x5a, 0x4a, 0xba, 0xaa, 0x9a, 0x8a,
+ 0xfa, 0xea, 0xda, 0xca, 0x27, 0x37, 0x07, 0x17, 0x67, 0x77,
+ 0x47, 0x57, 0xa7, 0xb7, 0x87, 0x97, 0xe7, 0xf7, 0xc7, 0xd7,
+ 0x74, 0x64, 0x54, 0x44, 0x34, 0x24, 0x14, 0x04, 0xf4, 0xe4,
+ 0xd4, 0xc4, 0xb4, 0xa4, 0x94, 0x84, 0x69, 0x79, 0x49, 0x59,
+ 0x29, 0x39, 0x09, 0x19, 0xe9, 0xf9, 0xc9, 0xd9, 0xa9, 0xb9,
+ 0x89, 0x99, 0x4e, 0x5e, 0x6e, 0x7e, 0x0e, 0x1e, 0x2e, 0x3e,
+ 0xce, 0xde, 0xee, 0xfe, 0x8e, 0x9e, 0xae, 0xbe, 0x53, 0x43,
+ 0x73, 0x63, 0x13, 0x03, 0x33, 0x23, 0xd3, 0xc3, 0xf3, 0xe3,
+ 0x93, 0x83, 0xb3, 0xa3, 0xe8, 0xf8, 0xc8, 0xd8, 0xa8, 0xb8,
+ 0x88, 0x98, 0x68, 0x78, 0x48, 0x58, 0x28, 0x38, 0x08, 0x18,
+ 0xf5, 0xe5, 0xd5, 0xc5, 0xb5, 0xa5, 0x95, 0x85, 0x75, 0x65,
+ 0x55, 0x45, 0x35, 0x25, 0x15, 0x05, 0xd2, 0xc2, 0xf2, 0xe2,
+ 0x92, 0x82, 0xb2, 0xa2, 0x52, 0x42, 0x72, 0x62, 0x12, 0x02,
+ 0x32, 0x22, 0xcf, 0xdf, 0xef, 0xff, 0x8f, 0x9f, 0xaf, 0xbf,
+ 0x4f, 0x5f, 0x6f, 0x7f, 0x0f, 0x1f, 0x2f, 0x3f, 0x9c, 0x8c,
+ 0xbc, 0xac, 0xdc, 0xcc, 0xfc, 0xec, 0x1c, 0x0c, 0x3c, 0x2c,
+ 0x5c, 0x4c, 0x7c, 0x6c, 0x81, 0x91, 0xa1, 0xb1, 0xc1, 0xd1,
+ 0xe1, 0xf1, 0x01, 0x11, 0x21, 0x31, 0x41, 0x51, 0x61, 0x71,
+ 0xa6, 0xb6, 0x86, 0x96, 0xe6, 0xf6, 0xc6, 0xd6, 0x26, 0x36,
+ 0x06, 0x16, 0x66, 0x76, 0x46, 0x56, 0xbb, 0xab, 0x9b, 0x8b,
+ 0xfb, 0xeb, 0xdb, 0xcb, 0x3b, 0x2b, 0x1b, 0x0b, 0x7b, 0x6b,
+ 0x5b, 0x4b, 0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77,
+ 0x88, 0x99, 0xaa, 0xbb, 0xcc, 0xdd, 0xee, 0xff, 0x0d, 0x1c,
+ 0x2f, 0x3e, 0x49, 0x58, 0x6b, 0x7a, 0x85, 0x94, 0xa7, 0xb6,
+ 0xc1, 0xd0, 0xe3, 0xf2, 0x1a, 0x0b, 0x38, 0x29, 0x5e, 0x4f,
+ 0x7c, 0x6d, 0x92, 0x83, 0xb0, 0xa1, 0xd6, 0xc7, 0xf4, 0xe5,
+ 0x17, 0x06, 0x35, 0x24, 0x53, 0x42, 0x71, 0x60, 0x9f, 0x8e,
+ 0xbd, 0xac, 0xdb, 0xca, 0xf9, 0xe8, 0x34, 0x25, 0x16, 0x07,
+ 0x70, 0x61, 0x52, 0x43, 0xbc, 0xad, 0x9e, 0x8f, 0xf8, 0xe9,
+ 0xda, 0xcb, 0x39, 0x28, 0x1b, 0x0a, 0x7d, 0x6c, 0x5f, 0x4e,
+ 0xb1, 0xa0, 0x93, 0x82, 0xf5, 0xe4, 0xd7, 0xc6, 0x2e, 0x3f,
+ 0x0c, 0x1d, 0x6a, 0x7b, 0x48, 0x59, 0xa6, 0xb7, 0x84, 0x95,
+ 0xe2, 0xf3, 0xc0, 0xd1, 0x23, 0x32, 0x01, 0x10, 0x67, 0x76,
+ 0x45, 0x54, 0xab, 0xba, 0x89, 0x98, 0xef, 0xfe, 0xcd, 0xdc,
+ 0x68, 0x79, 0x4a, 0x5b, 0x2c, 0x3d, 0x0e, 0x1f, 0xe0, 0xf1,
+ 0xc2, 0xd3, 0xa4, 0xb5, 0x86, 0x97, 0x65, 0x74, 0x47, 0x56,
+ 0x21, 0x30, 0x03, 0x12, 0xed, 0xfc, 0xcf, 0xde, 0xa9, 0xb8,
+ 0x8b, 0x9a, 0x72, 0x63, 0x50, 0x41, 0x36, 0x27, 0x14, 0x05,
+ 0xfa, 0xeb, 0xd8, 0xc9, 0xbe, 0xaf, 0x9c, 0x8d, 0x7f, 0x6e,
+ 0x5d, 0x4c, 0x3b, 0x2a, 0x19, 0x08, 0xf7, 0xe6, 0xd5, 0xc4,
+ 0xb3, 0xa2, 0x91, 0x80, 0x5c, 0x4d, 0x7e, 0x6f, 0x18, 0x09,
+ 0x3a, 0x2b, 0xd4, 0xc5, 0xf6, 0xe7, 0x90, 0x81, 0xb2, 0xa3,
+ 0x51, 0x40, 0x73, 0x62, 0x15, 0x04, 0x37, 0x26, 0xd9, 0xc8,
+ 0xfb, 0xea, 0x9d, 0x8c, 0xbf, 0xae, 0x46, 0x57, 0x64, 0x75,
+ 0x02, 0x13, 0x20, 0x31, 0xce, 0xdf, 0xec, 0xfd, 0x8a, 0x9b,
+ 0xa8, 0xb9, 0x4b, 0x5a, 0x69, 0x78, 0x0f, 0x1e, 0x2d, 0x3c,
+ 0xc3, 0xd2, 0xe1, 0xf0, 0x87, 0x96, 0xa5, 0xb4, 0x00, 0x12,
+ 0x24, 0x36, 0x48, 0x5a, 0x6c, 0x7e, 0x90, 0x82, 0xb4, 0xa6,
+ 0xd8, 0xca, 0xfc, 0xee, 0x3d, 0x2f, 0x19, 0x0b, 0x75, 0x67,
+ 0x51, 0x43, 0xad, 0xbf, 0x89, 0x9b, 0xe5, 0xf7, 0xc1, 0xd3,
+ 0x7a, 0x68, 0x5e, 0x4c, 0x32, 0x20, 0x16, 0x04, 0xea, 0xf8,
+ 0xce, 0xdc, 0xa2, 0xb0, 0x86, 0x94, 0x47, 0x55, 0x63, 0x71,
+ 0x0f, 0x1d, 0x2b, 0x39, 0xd7, 0xc5, 0xf3, 0xe1, 0x9f, 0x8d,
+ 0xbb, 0xa9, 0xf4, 0xe6, 0xd0, 0xc2, 0xbc, 0xae, 0x98, 0x8a,
+ 0x64, 0x76, 0x40, 0x52, 0x2c, 0x3e, 0x08, 0x1a, 0xc9, 0xdb,
+ 0xed, 0xff, 0x81, 0x93, 0xa5, 0xb7, 0x59, 0x4b, 0x7d, 0x6f,
+ 0x11, 0x03, 0x35, 0x27, 0x8e, 0x9c, 0xaa, 0xb8, 0xc6, 0xd4,
+ 0xe2, 0xf0, 0x1e, 0x0c, 0x3a, 0x28, 0x56, 0x44, 0x72, 0x60,
+ 0xb3, 0xa1, 0x97, 0x85, 0xfb, 0xe9, 0xdf, 0xcd, 0x23, 0x31,
+ 0x07, 0x15, 0x6b, 0x79, 0x4f, 0x5d, 0xf5, 0xe7, 0xd1, 0xc3,
+ 0xbd, 0xaf, 0x99, 0x8b, 0x65, 0x77, 0x41, 0x53, 0x2d, 0x3f,
+ 0x09, 0x1b, 0xc8, 0xda, 0xec, 0xfe, 0x80, 0x92, 0xa4, 0xb6,
+ 0x58, 0x4a, 0x7c, 0x6e, 0x10, 0x02, 0x34, 0x26, 0x8f, 0x9d,
+ 0xab, 0xb9, 0xc7, 0xd5, 0xe3, 0xf1, 0x1f, 0x0d, 0x3b, 0x29,
+ 0x57, 0x45, 0x73, 0x61, 0xb2, 0xa0, 0x96, 0x84, 0xfa, 0xe8,
+ 0xde, 0xcc, 0x22, 0x30, 0x06, 0x14, 0x6a, 0x78, 0x4e, 0x5c,
+ 0x01, 0x13, 0x25, 0x37, 0x49, 0x5b, 0x6d, 0x7f, 0x91, 0x83,
+ 0xb5, 0xa7, 0xd9, 0xcb, 0xfd, 0xef, 0x3c, 0x2e, 0x18, 0x0a,
+ 0x74, 0x66, 0x50, 0x42, 0xac, 0xbe, 0x88, 0x9a, 0xe4, 0xf6,
+ 0xc0, 0xd2, 0x7b, 0x69, 0x5f, 0x4d, 0x33, 0x21, 0x17, 0x05,
+ 0xeb, 0xf9, 0xcf, 0xdd, 0xa3, 0xb1, 0x87, 0x95, 0x46, 0x54,
+ 0x62, 0x70, 0x0e, 0x1c, 0x2a, 0x38, 0xd6, 0xc4, 0xf2, 0xe0,
+ 0x9e, 0x8c, 0xba, 0xa8, 0x00, 0x13, 0x26, 0x35, 0x4c, 0x5f,
+ 0x6a, 0x79, 0x98, 0x8b, 0xbe, 0xad, 0xd4, 0xc7, 0xf2, 0xe1,
+ 0x2d, 0x3e, 0x0b, 0x18, 0x61, 0x72, 0x47, 0x54, 0xb5, 0xa6,
+ 0x93, 0x80, 0xf9, 0xea, 0xdf, 0xcc, 0x5a, 0x49, 0x7c, 0x6f,
+ 0x16, 0x05, 0x30, 0x23, 0xc2, 0xd1, 0xe4, 0xf7, 0x8e, 0x9d,
+ 0xa8, 0xbb, 0x77, 0x64, 0x51, 0x42, 0x3b, 0x28, 0x1d, 0x0e,
+ 0xef, 0xfc, 0xc9, 0xda, 0xa3, 0xb0, 0x85, 0x96, 0xb4, 0xa7,
+ 0x92, 0x81, 0xf8, 0xeb, 0xde, 0xcd, 0x2c, 0x3f, 0x0a, 0x19,
+ 0x60, 0x73, 0x46, 0x55, 0x99, 0x8a, 0xbf, 0xac, 0xd5, 0xc6,
+ 0xf3, 0xe0, 0x01, 0x12, 0x27, 0x34, 0x4d, 0x5e, 0x6b, 0x78,
+ 0xee, 0xfd, 0xc8, 0xdb, 0xa2, 0xb1, 0x84, 0x97, 0x76, 0x65,
+ 0x50, 0x43, 0x3a, 0x29, 0x1c, 0x0f, 0xc3, 0xd0, 0xe5, 0xf6,
+ 0x8f, 0x9c, 0xa9, 0xba, 0x5b, 0x48, 0x7d, 0x6e, 0x17, 0x04,
+ 0x31, 0x22, 0x75, 0x66, 0x53, 0x40, 0x39, 0x2a, 0x1f, 0x0c,
+ 0xed, 0xfe, 0xcb, 0xd8, 0xa1, 0xb2, 0x87, 0x94, 0x58, 0x4b,
+ 0x7e, 0x6d, 0x14, 0x07, 0x32, 0x21, 0xc0, 0xd3, 0xe6, 0xf5,
+ 0x8c, 0x9f, 0xaa, 0xb9, 0x2f, 0x3c, 0x09, 0x1a, 0x63, 0x70,
+ 0x45, 0x56, 0xb7, 0xa4, 0x91, 0x82, 0xfb, 0xe8, 0xdd, 0xce,
+ 0x02, 0x11, 0x24, 0x37, 0x4e, 0x5d, 0x68, 0x7b, 0x9a, 0x89,
+ 0xbc, 0xaf, 0xd6, 0xc5, 0xf0, 0xe3, 0xc1, 0xd2, 0xe7, 0xf4,
+ 0x8d, 0x9e, 0xab, 0xb8, 0x59, 0x4a, 0x7f, 0x6c, 0x15, 0x06,
+ 0x33, 0x20, 0xec, 0xff, 0xca, 0xd9, 0xa0, 0xb3, 0x86, 0x95,
+ 0x74, 0x67, 0x52, 0x41, 0x38, 0x2b, 0x1e, 0x0d, 0x9b, 0x88,
+ 0xbd, 0xae, 0xd7, 0xc4, 0xf1, 0xe2, 0x03, 0x10, 0x25, 0x36,
+ 0x4f, 0x5c, 0x69, 0x7a, 0xb6, 0xa5, 0x90, 0x83, 0xfa, 0xe9,
+ 0xdc, 0xcf, 0x2e, 0x3d, 0x08, 0x1b, 0x62, 0x71, 0x44, 0x57,
+ 0x00, 0x14, 0x28, 0x3c, 0x50, 0x44, 0x78, 0x6c, 0xa0, 0xb4,
+ 0x88, 0x9c, 0xf0, 0xe4, 0xd8, 0xcc, 0x5d, 0x49, 0x75, 0x61,
+ 0x0d, 0x19, 0x25, 0x31, 0xfd, 0xe9, 0xd5, 0xc1, 0xad, 0xb9,
+ 0x85, 0x91, 0xba, 0xae, 0x92, 0x86, 0xea, 0xfe, 0xc2, 0xd6,
+ 0x1a, 0x0e, 0x32, 0x26, 0x4a, 0x5e, 0x62, 0x76, 0xe7, 0xf3,
+ 0xcf, 0xdb, 0xb7, 0xa3, 0x9f, 0x8b, 0x47, 0x53, 0x6f, 0x7b,
+ 0x17, 0x03, 0x3f, 0x2b, 0x69, 0x7d, 0x41, 0x55, 0x39, 0x2d,
+ 0x11, 0x05, 0xc9, 0xdd, 0xe1, 0xf5, 0x99, 0x8d, 0xb1, 0xa5,
+ 0x34, 0x20, 0x1c, 0x08, 0x64, 0x70, 0x4c, 0x58, 0x94, 0x80,
+ 0xbc, 0xa8, 0xc4, 0xd0, 0xec, 0xf8, 0xd3, 0xc7, 0xfb, 0xef,
+ 0x83, 0x97, 0xab, 0xbf, 0x73, 0x67, 0x5b, 0x4f, 0x23, 0x37,
+ 0x0b, 0x1f, 0x8e, 0x9a, 0xa6, 0xb2, 0xde, 0xca, 0xf6, 0xe2,
+ 0x2e, 0x3a, 0x06, 0x12, 0x7e, 0x6a, 0x56, 0x42, 0xd2, 0xc6,
+ 0xfa, 0xee, 0x82, 0x96, 0xaa, 0xbe, 0x72, 0x66, 0x5a, 0x4e,
+ 0x22, 0x36, 0x0a, 0x1e, 0x8f, 0x9b, 0xa7, 0xb3, 0xdf, 0xcb,
+ 0xf7, 0xe3, 0x2f, 0x3b, 0x07, 0x13, 0x7f, 0x6b, 0x57, 0x43,
+ 0x68, 0x7c, 0x40, 0x54, 0x38, 0x2c, 0x10, 0x04, 0xc8, 0xdc,
+ 0xe0, 0xf4, 0x98, 0x8c, 0xb0, 0xa4, 0x35, 0x21, 0x1d, 0x09,
+ 0x65, 0x71, 0x4d, 0x59, 0x95, 0x81, 0xbd, 0xa9, 0xc5, 0xd1,
+ 0xed, 0xf9, 0xbb, 0xaf, 0x93, 0x87, 0xeb, 0xff, 0xc3, 0xd7,
+ 0x1b, 0x0f, 0x33, 0x27, 0x4b, 0x5f, 0x63, 0x77, 0xe6, 0xf2,
+ 0xce, 0xda, 0xb6, 0xa2, 0x9e, 0x8a, 0x46, 0x52, 0x6e, 0x7a,
+ 0x16, 0x02, 0x3e, 0x2a, 0x01, 0x15, 0x29, 0x3d, 0x51, 0x45,
+ 0x79, 0x6d, 0xa1, 0xb5, 0x89, 0x9d, 0xf1, 0xe5, 0xd9, 0xcd,
+ 0x5c, 0x48, 0x74, 0x60, 0x0c, 0x18, 0x24, 0x30, 0xfc, 0xe8,
+ 0xd4, 0xc0, 0xac, 0xb8, 0x84, 0x90, 0x00, 0x15, 0x2a, 0x3f,
+ 0x54, 0x41, 0x7e, 0x6b, 0xa8, 0xbd, 0x82, 0x97, 0xfc, 0xe9,
+ 0xd6, 0xc3, 0x4d, 0x58, 0x67, 0x72, 0x19, 0x0c, 0x33, 0x26,
+ 0xe5, 0xf0, 0xcf, 0xda, 0xb1, 0xa4, 0x9b, 0x8e, 0x9a, 0x8f,
+ 0xb0, 0xa5, 0xce, 0xdb, 0xe4, 0xf1, 0x32, 0x27, 0x18, 0x0d,
+ 0x66, 0x73, 0x4c, 0x59, 0xd7, 0xc2, 0xfd, 0xe8, 0x83, 0x96,
+ 0xa9, 0xbc, 0x7f, 0x6a, 0x55, 0x40, 0x2b, 0x3e, 0x01, 0x14,
+ 0x29, 0x3c, 0x03, 0x16, 0x7d, 0x68, 0x57, 0x42, 0x81, 0x94,
+ 0xab, 0xbe, 0xd5, 0xc0, 0xff, 0xea, 0x64, 0x71, 0x4e, 0x5b,
+ 0x30, 0x25, 0x1a, 0x0f, 0xcc, 0xd9, 0xe6, 0xf3, 0x98, 0x8d,
+ 0xb2, 0xa7, 0xb3, 0xa6, 0x99, 0x8c, 0xe7, 0xf2, 0xcd, 0xd8,
+ 0x1b, 0x0e, 0x31, 0x24, 0x4f, 0x5a, 0x65, 0x70, 0xfe, 0xeb,
+ 0xd4, 0xc1, 0xaa, 0xbf, 0x80, 0x95, 0x56, 0x43, 0x7c, 0x69,
+ 0x02, 0x17, 0x28, 0x3d, 0x52, 0x47, 0x78, 0x6d, 0x06, 0x13,
+ 0x2c, 0x39, 0xfa, 0xef, 0xd0, 0xc5, 0xae, 0xbb, 0x84, 0x91,
+ 0x1f, 0x0a, 0x35, 0x20, 0x4b, 0x5e, 0x61, 0x74, 0xb7, 0xa2,
+ 0x9d, 0x88, 0xe3, 0xf6, 0xc9, 0xdc, 0xc8, 0xdd, 0xe2, 0xf7,
+ 0x9c, 0x89, 0xb6, 0xa3, 0x60, 0x75, 0x4a, 0x5f, 0x34, 0x21,
+ 0x1e, 0x0b, 0x85, 0x90, 0xaf, 0xba, 0xd1, 0xc4, 0xfb, 0xee,
+ 0x2d, 0x38, 0x07, 0x12, 0x79, 0x6c, 0x53, 0x46, 0x7b, 0x6e,
+ 0x51, 0x44, 0x2f, 0x3a, 0x05, 0x10, 0xd3, 0xc6, 0xf9, 0xec,
+ 0x87, 0x92, 0xad, 0xb8, 0x36, 0x23, 0x1c, 0x09, 0x62, 0x77,
+ 0x48, 0x5d, 0x9e, 0x8b, 0xb4, 0xa1, 0xca, 0xdf, 0xe0, 0xf5,
+ 0xe1, 0xf4, 0xcb, 0xde, 0xb5, 0xa0, 0x9f, 0x8a, 0x49, 0x5c,
+ 0x63, 0x76, 0x1d, 0x08, 0x37, 0x22, 0xac, 0xb9, 0x86, 0x93,
+ 0xf8, 0xed, 0xd2, 0xc7, 0x04, 0x11, 0x2e, 0x3b, 0x50, 0x45,
+ 0x7a, 0x6f, 0x00, 0x16, 0x2c, 0x3a, 0x58, 0x4e, 0x74, 0x62,
+ 0xb0, 0xa6, 0x9c, 0x8a, 0xe8, 0xfe, 0xc4, 0xd2, 0x7d, 0x6b,
+ 0x51, 0x47, 0x25, 0x33, 0x09, 0x1f, 0xcd, 0xdb, 0xe1, 0xf7,
+ 0x95, 0x83, 0xb9, 0xaf, 0xfa, 0xec, 0xd6, 0xc0, 0xa2, 0xb4,
+ 0x8e, 0x98, 0x4a, 0x5c, 0x66, 0x70, 0x12, 0x04, 0x3e, 0x28,
+ 0x87, 0x91, 0xab, 0xbd, 0xdf, 0xc9, 0xf3, 0xe5, 0x37, 0x21,
+ 0x1b, 0x0d, 0x6f, 0x79, 0x43, 0x55, 0xe9, 0xff, 0xc5, 0xd3,
+ 0xb1, 0xa7, 0x9d, 0x8b, 0x59, 0x4f, 0x75, 0x63, 0x01, 0x17,
+ 0x2d, 0x3b, 0x94, 0x82, 0xb8, 0xae, 0xcc, 0xda, 0xe0, 0xf6,
+ 0x24, 0x32, 0x08, 0x1e, 0x7c, 0x6a, 0x50, 0x46, 0x13, 0x05,
+ 0x3f, 0x29, 0x4b, 0x5d, 0x67, 0x71, 0xa3, 0xb5, 0x8f, 0x99,
+ 0xfb, 0xed, 0xd7, 0xc1, 0x6e, 0x78, 0x42, 0x54, 0x36, 0x20,
+ 0x1a, 0x0c, 0xde, 0xc8, 0xf2, 0xe4, 0x86, 0x90, 0xaa, 0xbc,
+ 0xcf, 0xd9, 0xe3, 0xf5, 0x97, 0x81, 0xbb, 0xad, 0x7f, 0x69,
+ 0x53, 0x45, 0x27, 0x31, 0x0b, 0x1d, 0xb2, 0xa4, 0x9e, 0x88,
+ 0xea, 0xfc, 0xc6, 0xd0, 0x02, 0x14, 0x2e, 0x38, 0x5a, 0x4c,
+ 0x76, 0x60, 0x35, 0x23, 0x19, 0x0f, 0x6d, 0x7b, 0x41, 0x57,
+ 0x85, 0x93, 0xa9, 0xbf, 0xdd, 0xcb, 0xf1, 0xe7, 0x48, 0x5e,
+ 0x64, 0x72, 0x10, 0x06, 0x3c, 0x2a, 0xf8, 0xee, 0xd4, 0xc2,
+ 0xa0, 0xb6, 0x8c, 0x9a, 0x26, 0x30, 0x0a, 0x1c, 0x7e, 0x68,
+ 0x52, 0x44, 0x96, 0x80, 0xba, 0xac, 0xce, 0xd8, 0xe2, 0xf4,
+ 0x5b, 0x4d, 0x77, 0x61, 0x03, 0x15, 0x2f, 0x39, 0xeb, 0xfd,
+ 0xc7, 0xd1, 0xb3, 0xa5, 0x9f, 0x89, 0xdc, 0xca, 0xf0, 0xe6,
+ 0x84, 0x92, 0xa8, 0xbe, 0x6c, 0x7a, 0x40, 0x56, 0x34, 0x22,
+ 0x18, 0x0e, 0xa1, 0xb7, 0x8d, 0x9b, 0xf9, 0xef, 0xd5, 0xc3,
+ 0x11, 0x07, 0x3d, 0x2b, 0x49, 0x5f, 0x65, 0x73, 0x00, 0x17,
+ 0x2e, 0x39, 0x5c, 0x4b, 0x72, 0x65, 0xb8, 0xaf, 0x96, 0x81,
+ 0xe4, 0xf3, 0xca, 0xdd, 0x6d, 0x7a, 0x43, 0x54, 0x31, 0x26,
+ 0x1f, 0x08, 0xd5, 0xc2, 0xfb, 0xec, 0x89, 0x9e, 0xa7, 0xb0,
+ 0xda, 0xcd, 0xf4, 0xe3, 0x86, 0x91, 0xa8, 0xbf, 0x62, 0x75,
+ 0x4c, 0x5b, 0x3e, 0x29, 0x10, 0x07, 0xb7, 0xa0, 0x99, 0x8e,
+ 0xeb, 0xfc, 0xc5, 0xd2, 0x0f, 0x18, 0x21, 0x36, 0x53, 0x44,
+ 0x7d, 0x6a, 0xa9, 0xbe, 0x87, 0x90, 0xf5, 0xe2, 0xdb, 0xcc,
+ 0x11, 0x06, 0x3f, 0x28, 0x4d, 0x5a, 0x63, 0x74, 0xc4, 0xd3,
+ 0xea, 0xfd, 0x98, 0x8f, 0xb6, 0xa1, 0x7c, 0x6b, 0x52, 0x45,
+ 0x20, 0x37, 0x0e, 0x19, 0x73, 0x64, 0x5d, 0x4a, 0x2f, 0x38,
+ 0x01, 0x16, 0xcb, 0xdc, 0xe5, 0xf2, 0x97, 0x80, 0xb9, 0xae,
+ 0x1e, 0x09, 0x30, 0x27, 0x42, 0x55, 0x6c, 0x7b, 0xa6, 0xb1,
+ 0x88, 0x9f, 0xfa, 0xed, 0xd4, 0xc3, 0x4f, 0x58, 0x61, 0x76,
+ 0x13, 0x04, 0x3d, 0x2a, 0xf7, 0xe0, 0xd9, 0xce, 0xab, 0xbc,
+ 0x85, 0x92, 0x22, 0x35, 0x0c, 0x1b, 0x7e, 0x69, 0x50, 0x47,
+ 0x9a, 0x8d, 0xb4, 0xa3, 0xc6, 0xd1, 0xe8, 0xff, 0x95, 0x82,
+ 0xbb, 0xac, 0xc9, 0xde, 0xe7, 0xf0, 0x2d, 0x3a, 0x03, 0x14,
+ 0x71, 0x66, 0x5f, 0x48, 0xf8, 0xef, 0xd6, 0xc1, 0xa4, 0xb3,
+ 0x8a, 0x9d, 0x40, 0x57, 0x6e, 0x79, 0x1c, 0x0b, 0x32, 0x25,
+ 0xe6, 0xf1, 0xc8, 0xdf, 0xba, 0xad, 0x94, 0x83, 0x5e, 0x49,
+ 0x70, 0x67, 0x02, 0x15, 0x2c, 0x3b, 0x8b, 0x9c, 0xa5, 0xb2,
+ 0xd7, 0xc0, 0xf9, 0xee, 0x33, 0x24, 0x1d, 0x0a, 0x6f, 0x78,
+ 0x41, 0x56, 0x3c, 0x2b, 0x12, 0x05, 0x60, 0x77, 0x4e, 0x59,
+ 0x84, 0x93, 0xaa, 0xbd, 0xd8, 0xcf, 0xf6, 0xe1, 0x51, 0x46,
+ 0x7f, 0x68, 0x0d, 0x1a, 0x23, 0x34, 0xe9, 0xfe, 0xc7, 0xd0,
+ 0xb5, 0xa2, 0x9b, 0x8c, 0x00, 0x18, 0x30, 0x28, 0x60, 0x78,
+ 0x50, 0x48, 0xc0, 0xd8, 0xf0, 0xe8, 0xa0, 0xb8, 0x90, 0x88,
+ 0x9d, 0x85, 0xad, 0xb5, 0xfd, 0xe5, 0xcd, 0xd5, 0x5d, 0x45,
+ 0x6d, 0x75, 0x3d, 0x25, 0x0d, 0x15, 0x27, 0x3f, 0x17, 0x0f,
+ 0x47, 0x5f, 0x77, 0x6f, 0xe7, 0xff, 0xd7, 0xcf, 0x87, 0x9f,
+ 0xb7, 0xaf, 0xba, 0xa2, 0x8a, 0x92, 0xda, 0xc2, 0xea, 0xf2,
+ 0x7a, 0x62, 0x4a, 0x52, 0x1a, 0x02, 0x2a, 0x32, 0x4e, 0x56,
+ 0x7e, 0x66, 0x2e, 0x36, 0x1e, 0x06, 0x8e, 0x96, 0xbe, 0xa6,
+ 0xee, 0xf6, 0xde, 0xc6, 0xd3, 0xcb, 0xe3, 0xfb, 0xb3, 0xab,
+ 0x83, 0x9b, 0x13, 0x0b, 0x23, 0x3b, 0x73, 0x6b, 0x43, 0x5b,
+ 0x69, 0x71, 0x59, 0x41, 0x09, 0x11, 0x39, 0x21, 0xa9, 0xb1,
+ 0x99, 0x81, 0xc9, 0xd1, 0xf9, 0xe1, 0xf4, 0xec, 0xc4, 0xdc,
+ 0x94, 0x8c, 0xa4, 0xbc, 0x34, 0x2c, 0x04, 0x1c, 0x54, 0x4c,
+ 0x64, 0x7c, 0x9c, 0x84, 0xac, 0xb4, 0xfc, 0xe4, 0xcc, 0xd4,
+ 0x5c, 0x44, 0x6c, 0x74, 0x3c, 0x24, 0x0c, 0x14, 0x01, 0x19,
+ 0x31, 0x29, 0x61, 0x79, 0x51, 0x49, 0xc1, 0xd9, 0xf1, 0xe9,
+ 0xa1, 0xb9, 0x91, 0x89, 0xbb, 0xa3, 0x8b, 0x93, 0xdb, 0xc3,
+ 0xeb, 0xf3, 0x7b, 0x63, 0x4b, 0x53, 0x1b, 0x03, 0x2b, 0x33,
+ 0x26, 0x3e, 0x16, 0x0e, 0x46, 0x5e, 0x76, 0x6e, 0xe6, 0xfe,
+ 0xd6, 0xce, 0x86, 0x9e, 0xb6, 0xae, 0xd2, 0xca, 0xe2, 0xfa,
+ 0xb2, 0xaa, 0x82, 0x9a, 0x12, 0x0a, 0x22, 0x3a, 0x72, 0x6a,
+ 0x42, 0x5a, 0x4f, 0x57, 0x7f, 0x67, 0x2f, 0x37, 0x1f, 0x07,
+ 0x8f, 0x97, 0xbf, 0xa7, 0xef, 0xf7, 0xdf, 0xc7, 0xf5, 0xed,
+ 0xc5, 0xdd, 0x95, 0x8d, 0xa5, 0xbd, 0x35, 0x2d, 0x05, 0x1d,
+ 0x55, 0x4d, 0x65, 0x7d, 0x68, 0x70, 0x58, 0x40, 0x08, 0x10,
+ 0x38, 0x20, 0xa8, 0xb0, 0x98, 0x80, 0xc8, 0xd0, 0xf8, 0xe0,
+ 0x00, 0x19, 0x32, 0x2b, 0x64, 0x7d, 0x56, 0x4f, 0xc8, 0xd1,
+ 0xfa, 0xe3, 0xac, 0xb5, 0x9e, 0x87, 0x8d, 0x94, 0xbf, 0xa6,
+ 0xe9, 0xf0, 0xdb, 0xc2, 0x45, 0x5c, 0x77, 0x6e, 0x21, 0x38,
+ 0x13, 0x0a, 0x07, 0x1e, 0x35, 0x2c, 0x63, 0x7a, 0x51, 0x48,
+ 0xcf, 0xd6, 0xfd, 0xe4, 0xab, 0xb2, 0x99, 0x80, 0x8a, 0x93,
+ 0xb8, 0xa1, 0xee, 0xf7, 0xdc, 0xc5, 0x42, 0x5b, 0x70, 0x69,
+ 0x26, 0x3f, 0x14, 0x0d, 0x0e, 0x17, 0x3c, 0x25, 0x6a, 0x73,
+ 0x58, 0x41, 0xc6, 0xdf, 0xf4, 0xed, 0xa2, 0xbb, 0x90, 0x89,
+ 0x83, 0x9a, 0xb1, 0xa8, 0xe7, 0xfe, 0xd5, 0xcc, 0x4b, 0x52,
+ 0x79, 0x60, 0x2f, 0x36, 0x1d, 0x04, 0x09, 0x10, 0x3b, 0x22,
+ 0x6d, 0x74, 0x5f, 0x46, 0xc1, 0xd8, 0xf3, 0xea, 0xa5, 0xbc,
+ 0x97, 0x8e, 0x84, 0x9d, 0xb6, 0xaf, 0xe0, 0xf9, 0xd2, 0xcb,
+ 0x4c, 0x55, 0x7e, 0x67, 0x28, 0x31, 0x1a, 0x03, 0x1c, 0x05,
+ 0x2e, 0x37, 0x78, 0x61, 0x4a, 0x53, 0xd4, 0xcd, 0xe6, 0xff,
+ 0xb0, 0xa9, 0x82, 0x9b, 0x91, 0x88, 0xa3, 0xba, 0xf5, 0xec,
+ 0xc7, 0xde, 0x59, 0x40, 0x6b, 0x72, 0x3d, 0x24, 0x0f, 0x16,
+ 0x1b, 0x02, 0x29, 0x30, 0x7f, 0x66, 0x4d, 0x54, 0xd3, 0xca,
+ 0xe1, 0xf8, 0xb7, 0xae, 0x85, 0x9c, 0x96, 0x8f, 0xa4, 0xbd,
+ 0xf2, 0xeb, 0xc0, 0xd9, 0x5e, 0x47, 0x6c, 0x75, 0x3a, 0x23,
+ 0x08, 0x11, 0x12, 0x0b, 0x20, 0x39, 0x76, 0x6f, 0x44, 0x5d,
+ 0xda, 0xc3, 0xe8, 0xf1, 0xbe, 0xa7, 0x8c, 0x95, 0x9f, 0x86,
+ 0xad, 0xb4, 0xfb, 0xe2, 0xc9, 0xd0, 0x57, 0x4e, 0x65, 0x7c,
+ 0x33, 0x2a, 0x01, 0x18, 0x15, 0x0c, 0x27, 0x3e, 0x71, 0x68,
+ 0x43, 0x5a, 0xdd, 0xc4, 0xef, 0xf6, 0xb9, 0xa0, 0x8b, 0x92,
+ 0x98, 0x81, 0xaa, 0xb3, 0xfc, 0xe5, 0xce, 0xd7, 0x50, 0x49,
+ 0x62, 0x7b, 0x34, 0x2d, 0x06, 0x1f, 0x00, 0x1a, 0x34, 0x2e,
+ 0x68, 0x72, 0x5c, 0x46, 0xd0, 0xca, 0xe4, 0xfe, 0xb8, 0xa2,
+ 0x8c, 0x96, 0xbd, 0xa7, 0x89, 0x93, 0xd5, 0xcf, 0xe1, 0xfb,
+ 0x6d, 0x77, 0x59, 0x43, 0x05, 0x1f, 0x31, 0x2b, 0x67, 0x7d,
+ 0x53, 0x49, 0x0f, 0x15, 0x3b, 0x21, 0xb7, 0xad, 0x83, 0x99,
+ 0xdf, 0xc5, 0xeb, 0xf1, 0xda, 0xc0, 0xee, 0xf4, 0xb2, 0xa8,
+ 0x86, 0x9c, 0x0a, 0x10, 0x3e, 0x24, 0x62, 0x78, 0x56, 0x4c,
+ 0xce, 0xd4, 0xfa, 0xe0, 0xa6, 0xbc, 0x92, 0x88, 0x1e, 0x04,
+ 0x2a, 0x30, 0x76, 0x6c, 0x42, 0x58, 0x73, 0x69, 0x47, 0x5d,
+ 0x1b, 0x01, 0x2f, 0x35, 0xa3, 0xb9, 0x97, 0x8d, 0xcb, 0xd1,
+ 0xff, 0xe5, 0xa9, 0xb3, 0x9d, 0x87, 0xc1, 0xdb, 0xf5, 0xef,
+ 0x79, 0x63, 0x4d, 0x57, 0x11, 0x0b, 0x25, 0x3f, 0x14, 0x0e,
+ 0x20, 0x3a, 0x7c, 0x66, 0x48, 0x52, 0xc4, 0xde, 0xf0, 0xea,
+ 0xac, 0xb6, 0x98, 0x82, 0x81, 0x9b, 0xb5, 0xaf, 0xe9, 0xf3,
+ 0xdd, 0xc7, 0x51, 0x4b, 0x65, 0x7f, 0x39, 0x23, 0x0d, 0x17,
+ 0x3c, 0x26, 0x08, 0x12, 0x54, 0x4e, 0x60, 0x7a, 0xec, 0xf6,
+ 0xd8, 0xc2, 0x84, 0x9e, 0xb0, 0xaa, 0xe6, 0xfc, 0xd2, 0xc8,
+ 0x8e, 0x94, 0xba, 0xa0, 0x36, 0x2c, 0x02, 0x18, 0x5e, 0x44,
+ 0x6a, 0x70, 0x5b, 0x41, 0x6f, 0x75, 0x33, 0x29, 0x07, 0x1d,
+ 0x8b, 0x91, 0xbf, 0xa5, 0xe3, 0xf9, 0xd7, 0xcd, 0x4f, 0x55,
+ 0x7b, 0x61, 0x27, 0x3d, 0x13, 0x09, 0x9f, 0x85, 0xab, 0xb1,
+ 0xf7, 0xed, 0xc3, 0xd9, 0xf2, 0xe8, 0xc6, 0xdc, 0x9a, 0x80,
+ 0xae, 0xb4, 0x22, 0x38, 0x16, 0x0c, 0x4a, 0x50, 0x7e, 0x64,
+ 0x28, 0x32, 0x1c, 0x06, 0x40, 0x5a, 0x74, 0x6e, 0xf8, 0xe2,
+ 0xcc, 0xd6, 0x90, 0x8a, 0xa4, 0xbe, 0x95, 0x8f, 0xa1, 0xbb,
+ 0xfd, 0xe7, 0xc9, 0xd3, 0x45, 0x5f, 0x71, 0x6b, 0x2d, 0x37,
+ 0x19, 0x03, 0x00, 0x1b, 0x36, 0x2d, 0x6c, 0x77, 0x5a, 0x41,
+ 0xd8, 0xc3, 0xee, 0xf5, 0xb4, 0xaf, 0x82, 0x99, 0xad, 0xb6,
+ 0x9b, 0x80, 0xc1, 0xda, 0xf7, 0xec, 0x75, 0x6e, 0x43, 0x58,
+ 0x19, 0x02, 0x2f, 0x34, 0x47, 0x5c, 0x71, 0x6a, 0x2b, 0x30,
+ 0x1d, 0x06, 0x9f, 0x84, 0xa9, 0xb2, 0xf3, 0xe8, 0xc5, 0xde,
+ 0xea, 0xf1, 0xdc, 0xc7, 0x86, 0x9d, 0xb0, 0xab, 0x32, 0x29,
+ 0x04, 0x1f, 0x5e, 0x45, 0x68, 0x73, 0x8e, 0x95, 0xb8, 0xa3,
+ 0xe2, 0xf9, 0xd4, 0xcf, 0x56, 0x4d, 0x60, 0x7b, 0x3a, 0x21,
+ 0x0c, 0x17, 0x23, 0x38, 0x15, 0x0e, 0x4f, 0x54, 0x79, 0x62,
+ 0xfb, 0xe0, 0xcd, 0xd6, 0x97, 0x8c, 0xa1, 0xba, 0xc9, 0xd2,
+ 0xff, 0xe4, 0xa5, 0xbe, 0x93, 0x88, 0x11, 0x0a, 0x27, 0x3c,
+ 0x7d, 0x66, 0x4b, 0x50, 0x64, 0x7f, 0x52, 0x49, 0x08, 0x13,
+ 0x3e, 0x25, 0xbc, 0xa7, 0x8a, 0x91, 0xd0, 0xcb, 0xe6, 0xfd,
+ 0x01, 0x1a, 0x37, 0x2c, 0x6d, 0x76, 0x5b, 0x40, 0xd9, 0xc2,
+ 0xef, 0xf4, 0xb5, 0xae, 0x83, 0x98, 0xac, 0xb7, 0x9a, 0x81,
+ 0xc0, 0xdb, 0xf6, 0xed, 0x74, 0x6f, 0x42, 0x59, 0x18, 0x03,
+ 0x2e, 0x35, 0x46, 0x5d, 0x70, 0x6b, 0x2a, 0x31, 0x1c, 0x07,
+ 0x9e, 0x85, 0xa8, 0xb3, 0xf2, 0xe9, 0xc4, 0xdf, 0xeb, 0xf0,
+ 0xdd, 0xc6, 0x87, 0x9c, 0xb1, 0xaa, 0x33, 0x28, 0x05, 0x1e,
+ 0x5f, 0x44, 0x69, 0x72, 0x8f, 0x94, 0xb9, 0xa2, 0xe3, 0xf8,
+ 0xd5, 0xce, 0x57, 0x4c, 0x61, 0x7a, 0x3b, 0x20, 0x0d, 0x16,
+ 0x22, 0x39, 0x14, 0x0f, 0x4e, 0x55, 0x78, 0x63, 0xfa, 0xe1,
+ 0xcc, 0xd7, 0x96, 0x8d, 0xa0, 0xbb, 0xc8, 0xd3, 0xfe, 0xe5,
+ 0xa4, 0xbf, 0x92, 0x89, 0x10, 0x0b, 0x26, 0x3d, 0x7c, 0x67,
+ 0x4a, 0x51, 0x65, 0x7e, 0x53, 0x48, 0x09, 0x12, 0x3f, 0x24,
+ 0xbd, 0xa6, 0x8b, 0x90, 0xd1, 0xca, 0xe7, 0xfc, 0x00, 0x1c,
+ 0x38, 0x24, 0x70, 0x6c, 0x48, 0x54, 0xe0, 0xfc, 0xd8, 0xc4,
+ 0x90, 0x8c, 0xa8, 0xb4, 0xdd, 0xc1, 0xe5, 0xf9, 0xad, 0xb1,
+ 0x95, 0x89, 0x3d, 0x21, 0x05, 0x19, 0x4d, 0x51, 0x75, 0x69,
+ 0xa7, 0xbb, 0x9f, 0x83, 0xd7, 0xcb, 0xef, 0xf3, 0x47, 0x5b,
+ 0x7f, 0x63, 0x37, 0x2b, 0x0f, 0x13, 0x7a, 0x66, 0x42, 0x5e,
+ 0x0a, 0x16, 0x32, 0x2e, 0x9a, 0x86, 0xa2, 0xbe, 0xea, 0xf6,
+ 0xd2, 0xce, 0x53, 0x4f, 0x6b, 0x77, 0x23, 0x3f, 0x1b, 0x07,
+ 0xb3, 0xaf, 0x8b, 0x97, 0xc3, 0xdf, 0xfb, 0xe7, 0x8e, 0x92,
+ 0xb6, 0xaa, 0xfe, 0xe2, 0xc6, 0xda, 0x6e, 0x72, 0x56, 0x4a,
+ 0x1e, 0x02, 0x26, 0x3a, 0xf4, 0xe8, 0xcc, 0xd0, 0x84, 0x98,
+ 0xbc, 0xa0, 0x14, 0x08, 0x2c, 0x30, 0x64, 0x78, 0x5c, 0x40,
+ 0x29, 0x35, 0x11, 0x0d, 0x59, 0x45, 0x61, 0x7d, 0xc9, 0xd5,
+ 0xf1, 0xed, 0xb9, 0xa5, 0x81, 0x9d, 0xa6, 0xba, 0x9e, 0x82,
+ 0xd6, 0xca, 0xee, 0xf2, 0x46, 0x5a, 0x7e, 0x62, 0x36, 0x2a,
+ 0x0e, 0x12, 0x7b, 0x67, 0x43, 0x5f, 0x0b, 0x17, 0x33, 0x2f,
+ 0x9b, 0x87, 0xa3, 0xbf, 0xeb, 0xf7, 0xd3, 0xcf, 0x01, 0x1d,
+ 0x39, 0x25, 0x71, 0x6d, 0x49, 0x55, 0xe1, 0xfd, 0xd9, 0xc5,
+ 0x91, 0x8d, 0xa9, 0xb5, 0xdc, 0xc0, 0xe4, 0xf8, 0xac, 0xb0,
+ 0x94, 0x88, 0x3c, 0x20, 0x04, 0x18, 0x4c, 0x50, 0x74, 0x68,
+ 0xf5, 0xe9, 0xcd, 0xd1, 0x85, 0x99, 0xbd, 0xa1, 0x15, 0x09,
+ 0x2d, 0x31, 0x65, 0x79, 0x5d, 0x41, 0x28, 0x34, 0x10, 0x0c,
+ 0x58, 0x44, 0x60, 0x7c, 0xc8, 0xd4, 0xf0, 0xec, 0xb8, 0xa4,
+ 0x80, 0x9c, 0x52, 0x4e, 0x6a, 0x76, 0x22, 0x3e, 0x1a, 0x06,
+ 0xb2, 0xae, 0x8a, 0x96, 0xc2, 0xde, 0xfa, 0xe6, 0x8f, 0x93,
+ 0xb7, 0xab, 0xff, 0xe3, 0xc7, 0xdb, 0x6f, 0x73, 0x57, 0x4b,
+ 0x1f, 0x03, 0x27, 0x3b, 0x00, 0x1d, 0x3a, 0x27, 0x74, 0x69,
+ 0x4e, 0x53, 0xe8, 0xf5, 0xd2, 0xcf, 0x9c, 0x81, 0xa6, 0xbb,
+ 0xcd, 0xd0, 0xf7, 0xea, 0xb9, 0xa4, 0x83, 0x9e, 0x25, 0x38,
+ 0x1f, 0x02, 0x51, 0x4c, 0x6b, 0x76, 0x87, 0x9a, 0xbd, 0xa0,
+ 0xf3, 0xee, 0xc9, 0xd4, 0x6f, 0x72, 0x55, 0x48, 0x1b, 0x06,
+ 0x21, 0x3c, 0x4a, 0x57, 0x70, 0x6d, 0x3e, 0x23, 0x04, 0x19,
+ 0xa2, 0xbf, 0x98, 0x85, 0xd6, 0xcb, 0xec, 0xf1, 0x13, 0x0e,
+ 0x29, 0x34, 0x67, 0x7a, 0x5d, 0x40, 0xfb, 0xe6, 0xc1, 0xdc,
+ 0x8f, 0x92, 0xb5, 0xa8, 0xde, 0xc3, 0xe4, 0xf9, 0xaa, 0xb7,
+ 0x90, 0x8d, 0x36, 0x2b, 0x0c, 0x11, 0x42, 0x5f, 0x78, 0x65,
+ 0x94, 0x89, 0xae, 0xb3, 0xe0, 0xfd, 0xda, 0xc7, 0x7c, 0x61,
+ 0x46, 0x5b, 0x08, 0x15, 0x32, 0x2f, 0x59, 0x44, 0x63, 0x7e,
+ 0x2d, 0x30, 0x17, 0x0a, 0xb1, 0xac, 0x8b, 0x96, 0xc5, 0xd8,
+ 0xff, 0xe2, 0x26, 0x3b, 0x1c, 0x01, 0x52, 0x4f, 0x68, 0x75,
+ 0xce, 0xd3, 0xf4, 0xe9, 0xba, 0xa7, 0x80, 0x9d, 0xeb, 0xf6,
+ 0xd1, 0xcc, 0x9f, 0x82, 0xa5, 0xb8, 0x03, 0x1e, 0x39, 0x24,
+ 0x77, 0x6a, 0x4d, 0x50, 0xa1, 0xbc, 0x9b, 0x86, 0xd5, 0xc8,
+ 0xef, 0xf2, 0x49, 0x54, 0x73, 0x6e, 0x3d, 0x20, 0x07, 0x1a,
+ 0x6c, 0x71, 0x56, 0x4b, 0x18, 0x05, 0x22, 0x3f, 0x84, 0x99,
+ 0xbe, 0xa3, 0xf0, 0xed, 0xca, 0xd7, 0x35, 0x28, 0x0f, 0x12,
+ 0x41, 0x5c, 0x7b, 0x66, 0xdd, 0xc0, 0xe7, 0xfa, 0xa9, 0xb4,
+ 0x93, 0x8e, 0xf8, 0xe5, 0xc2, 0xdf, 0x8c, 0x91, 0xb6, 0xab,
+ 0x10, 0x0d, 0x2a, 0x37, 0x64, 0x79, 0x5e, 0x43, 0xb2, 0xaf,
+ 0x88, 0x95, 0xc6, 0xdb, 0xfc, 0xe1, 0x5a, 0x47, 0x60, 0x7d,
+ 0x2e, 0x33, 0x14, 0x09, 0x7f, 0x62, 0x45, 0x58, 0x0b, 0x16,
+ 0x31, 0x2c, 0x97, 0x8a, 0xad, 0xb0, 0xe3, 0xfe, 0xd9, 0xc4,
+ 0x00, 0x1e, 0x3c, 0x22, 0x78, 0x66, 0x44, 0x5a, 0xf0, 0xee,
+ 0xcc, 0xd2, 0x88, 0x96, 0xb4, 0xaa, 0xfd, 0xe3, 0xc1, 0xdf,
+ 0x85, 0x9b, 0xb9, 0xa7, 0x0d, 0x13, 0x31, 0x2f, 0x75, 0x6b,
+ 0x49, 0x57, 0xe7, 0xf9, 0xdb, 0xc5, 0x9f, 0x81, 0xa3, 0xbd,
+ 0x17, 0x09, 0x2b, 0x35, 0x6f, 0x71, 0x53, 0x4d, 0x1a, 0x04,
+ 0x26, 0x38, 0x62, 0x7c, 0x5e, 0x40, 0xea, 0xf4, 0xd6, 0xc8,
+ 0x92, 0x8c, 0xae, 0xb0, 0xd3, 0xcd, 0xef, 0xf1, 0xab, 0xb5,
+ 0x97, 0x89, 0x23, 0x3d, 0x1f, 0x01, 0x5b, 0x45, 0x67, 0x79,
+ 0x2e, 0x30, 0x12, 0x0c, 0x56, 0x48, 0x6a, 0x74, 0xde, 0xc0,
+ 0xe2, 0xfc, 0xa6, 0xb8, 0x9a, 0x84, 0x34, 0x2a, 0x08, 0x16,
+ 0x4c, 0x52, 0x70, 0x6e, 0xc4, 0xda, 0xf8, 0xe6, 0xbc, 0xa2,
+ 0x80, 0x9e, 0xc9, 0xd7, 0xf5, 0xeb, 0xb1, 0xaf, 0x8d, 0x93,
+ 0x39, 0x27, 0x05, 0x1b, 0x41, 0x5f, 0x7d, 0x63, 0xbb, 0xa5,
+ 0x87, 0x99, 0xc3, 0xdd, 0xff, 0xe1, 0x4b, 0x55, 0x77, 0x69,
+ 0x33, 0x2d, 0x0f, 0x11, 0x46, 0x58, 0x7a, 0x64, 0x3e, 0x20,
+ 0x02, 0x1c, 0xb6, 0xa8, 0x8a, 0x94, 0xce, 0xd0, 0xf2, 0xec,
+ 0x5c, 0x42, 0x60, 0x7e, 0x24, 0x3a, 0x18, 0x06, 0xac, 0xb2,
+ 0x90, 0x8e, 0xd4, 0xca, 0xe8, 0xf6, 0xa1, 0xbf, 0x9d, 0x83,
+ 0xd9, 0xc7, 0xe5, 0xfb, 0x51, 0x4f, 0x6d, 0x73, 0x29, 0x37,
+ 0x15, 0x0b, 0x68, 0x76, 0x54, 0x4a, 0x10, 0x0e, 0x2c, 0x32,
+ 0x98, 0x86, 0xa4, 0xba, 0xe0, 0xfe, 0xdc, 0xc2, 0x95, 0x8b,
+ 0xa9, 0xb7, 0xed, 0xf3, 0xd1, 0xcf, 0x65, 0x7b, 0x59, 0x47,
+ 0x1d, 0x03, 0x21, 0x3f, 0x8f, 0x91, 0xb3, 0xad, 0xf7, 0xe9,
+ 0xcb, 0xd5, 0x7f, 0x61, 0x43, 0x5d, 0x07, 0x19, 0x3b, 0x25,
+ 0x72, 0x6c, 0x4e, 0x50, 0x0a, 0x14, 0x36, 0x28, 0x82, 0x9c,
+ 0xbe, 0xa0, 0xfa, 0xe4, 0xc6, 0xd8, 0x00, 0x1f, 0x3e, 0x21,
+ 0x7c, 0x63, 0x42, 0x5d, 0xf8, 0xe7, 0xc6, 0xd9, 0x84, 0x9b,
+ 0xba, 0xa5, 0xed, 0xf2, 0xd3, 0xcc, 0x91, 0x8e, 0xaf, 0xb0,
+ 0x15, 0x0a, 0x2b, 0x34, 0x69, 0x76, 0x57, 0x48, 0xc7, 0xd8,
+ 0xf9, 0xe6, 0xbb, 0xa4, 0x85, 0x9a, 0x3f, 0x20, 0x01, 0x1e,
+ 0x43, 0x5c, 0x7d, 0x62, 0x2a, 0x35, 0x14, 0x0b, 0x56, 0x49,
+ 0x68, 0x77, 0xd2, 0xcd, 0xec, 0xf3, 0xae, 0xb1, 0x90, 0x8f,
+ 0x93, 0x8c, 0xad, 0xb2, 0xef, 0xf0, 0xd1, 0xce, 0x6b, 0x74,
+ 0x55, 0x4a, 0x17, 0x08, 0x29, 0x36, 0x7e, 0x61, 0x40, 0x5f,
+ 0x02, 0x1d, 0x3c, 0x23, 0x86, 0x99, 0xb8, 0xa7, 0xfa, 0xe5,
+ 0xc4, 0xdb, 0x54, 0x4b, 0x6a, 0x75, 0x28, 0x37, 0x16, 0x09,
+ 0xac, 0xb3, 0x92, 0x8d, 0xd0, 0xcf, 0xee, 0xf1, 0xb9, 0xa6,
+ 0x87, 0x98, 0xc5, 0xda, 0xfb, 0xe4, 0x41, 0x5e, 0x7f, 0x60,
+ 0x3d, 0x22, 0x03, 0x1c, 0x3b, 0x24, 0x05, 0x1a, 0x47, 0x58,
+ 0x79, 0x66, 0xc3, 0xdc, 0xfd, 0xe2, 0xbf, 0xa0, 0x81, 0x9e,
+ 0xd6, 0xc9, 0xe8, 0xf7, 0xaa, 0xb5, 0x94, 0x8b, 0x2e, 0x31,
+ 0x10, 0x0f, 0x52, 0x4d, 0x6c, 0x73, 0xfc, 0xe3, 0xc2, 0xdd,
+ 0x80, 0x9f, 0xbe, 0xa1, 0x04, 0x1b, 0x3a, 0x25, 0x78, 0x67,
+ 0x46, 0x59, 0x11, 0x0e, 0x2f, 0x30, 0x6d, 0x72, 0x53, 0x4c,
+ 0xe9, 0xf6, 0xd7, 0xc8, 0x95, 0x8a, 0xab, 0xb4, 0xa8, 0xb7,
+ 0x96, 0x89, 0xd4, 0xcb, 0xea, 0xf5, 0x50, 0x4f, 0x6e, 0x71,
+ 0x2c, 0x33, 0x12, 0x0d, 0x45, 0x5a, 0x7b, 0x64, 0x39, 0x26,
+ 0x07, 0x18, 0xbd, 0xa2, 0x83, 0x9c, 0xc1, 0xde, 0xff, 0xe0,
+ 0x6f, 0x70, 0x51, 0x4e, 0x13, 0x0c, 0x2d, 0x32, 0x97, 0x88,
+ 0xa9, 0xb6, 0xeb, 0xf4, 0xd5, 0xca, 0x82, 0x9d, 0xbc, 0xa3,
+ 0xfe, 0xe1, 0xc0, 0xdf, 0x7a, 0x65, 0x44, 0x5b, 0x06, 0x19,
+ 0x38, 0x27, 0x00, 0x20, 0x40, 0x60, 0x80, 0xa0, 0xc0, 0xe0,
+ 0x1d, 0x3d, 0x5d, 0x7d, 0x9d, 0xbd, 0xdd, 0xfd, 0x3a, 0x1a,
+ 0x7a, 0x5a, 0xba, 0x9a, 0xfa, 0xda, 0x27, 0x07, 0x67, 0x47,
+ 0xa7, 0x87, 0xe7, 0xc7, 0x74, 0x54, 0x34, 0x14, 0xf4, 0xd4,
+ 0xb4, 0x94, 0x69, 0x49, 0x29, 0x09, 0xe9, 0xc9, 0xa9, 0x89,
+ 0x4e, 0x6e, 0x0e, 0x2e, 0xce, 0xee, 0x8e, 0xae, 0x53, 0x73,
+ 0x13, 0x33, 0xd3, 0xf3, 0x93, 0xb3, 0xe8, 0xc8, 0xa8, 0x88,
+ 0x68, 0x48, 0x28, 0x08, 0xf5, 0xd5, 0xb5, 0x95, 0x75, 0x55,
+ 0x35, 0x15, 0xd2, 0xf2, 0x92, 0xb2, 0x52, 0x72, 0x12, 0x32,
+ 0xcf, 0xef, 0x8f, 0xaf, 0x4f, 0x6f, 0x0f, 0x2f, 0x9c, 0xbc,
+ 0xdc, 0xfc, 0x1c, 0x3c, 0x5c, 0x7c, 0x81, 0xa1, 0xc1, 0xe1,
+ 0x01, 0x21, 0x41, 0x61, 0xa6, 0x86, 0xe6, 0xc6, 0x26, 0x06,
+ 0x66, 0x46, 0xbb, 0x9b, 0xfb, 0xdb, 0x3b, 0x1b, 0x7b, 0x5b,
+ 0xcd, 0xed, 0x8d, 0xad, 0x4d, 0x6d, 0x0d, 0x2d, 0xd0, 0xf0,
+ 0x90, 0xb0, 0x50, 0x70, 0x10, 0x30, 0xf7, 0xd7, 0xb7, 0x97,
+ 0x77, 0x57, 0x37, 0x17, 0xea, 0xca, 0xaa, 0x8a, 0x6a, 0x4a,
+ 0x2a, 0x0a, 0xb9, 0x99, 0xf9, 0xd9, 0x39, 0x19, 0x79, 0x59,
+ 0xa4, 0x84, 0xe4, 0xc4, 0x24, 0x04, 0x64, 0x44, 0x83, 0xa3,
+ 0xc3, 0xe3, 0x03, 0x23, 0x43, 0x63, 0x9e, 0xbe, 0xde, 0xfe,
+ 0x1e, 0x3e, 0x5e, 0x7e, 0x25, 0x05, 0x65, 0x45, 0xa5, 0x85,
+ 0xe5, 0xc5, 0x38, 0x18, 0x78, 0x58, 0xb8, 0x98, 0xf8, 0xd8,
+ 0x1f, 0x3f, 0x5f, 0x7f, 0x9f, 0xbf, 0xdf, 0xff, 0x02, 0x22,
+ 0x42, 0x62, 0x82, 0xa2, 0xc2, 0xe2, 0x51, 0x71, 0x11, 0x31,
+ 0xd1, 0xf1, 0x91, 0xb1, 0x4c, 0x6c, 0x0c, 0x2c, 0xcc, 0xec,
+ 0x8c, 0xac, 0x6b, 0x4b, 0x2b, 0x0b, 0xeb, 0xcb, 0xab, 0x8b,
+ 0x76, 0x56, 0x36, 0x16, 0xf6, 0xd6, 0xb6, 0x96, 0x00, 0x21,
+ 0x42, 0x63, 0x84, 0xa5, 0xc6, 0xe7, 0x15, 0x34, 0x57, 0x76,
+ 0x91, 0xb0, 0xd3, 0xf2, 0x2a, 0x0b, 0x68, 0x49, 0xae, 0x8f,
+ 0xec, 0xcd, 0x3f, 0x1e, 0x7d, 0x5c, 0xbb, 0x9a, 0xf9, 0xd8,
+ 0x54, 0x75, 0x16, 0x37, 0xd0, 0xf1, 0x92, 0xb3, 0x41, 0x60,
+ 0x03, 0x22, 0xc5, 0xe4, 0x87, 0xa6, 0x7e, 0x5f, 0x3c, 0x1d,
+ 0xfa, 0xdb, 0xb8, 0x99, 0x6b, 0x4a, 0x29, 0x08, 0xef, 0xce,
+ 0xad, 0x8c, 0xa8, 0x89, 0xea, 0xcb, 0x2c, 0x0d, 0x6e, 0x4f,
+ 0xbd, 0x9c, 0xff, 0xde, 0x39, 0x18, 0x7b, 0x5a, 0x82, 0xa3,
+ 0xc0, 0xe1, 0x06, 0x27, 0x44, 0x65, 0x97, 0xb6, 0xd5, 0xf4,
+ 0x13, 0x32, 0x51, 0x70, 0xfc, 0xdd, 0xbe, 0x9f, 0x78, 0x59,
+ 0x3a, 0x1b, 0xe9, 0xc8, 0xab, 0x8a, 0x6d, 0x4c, 0x2f, 0x0e,
+ 0xd6, 0xf7, 0x94, 0xb5, 0x52, 0x73, 0x10, 0x31, 0xc3, 0xe2,
+ 0x81, 0xa0, 0x47, 0x66, 0x05, 0x24, 0x4d, 0x6c, 0x0f, 0x2e,
+ 0xc9, 0xe8, 0x8b, 0xaa, 0x58, 0x79, 0x1a, 0x3b, 0xdc, 0xfd,
+ 0x9e, 0xbf, 0x67, 0x46, 0x25, 0x04, 0xe3, 0xc2, 0xa1, 0x80,
+ 0x72, 0x53, 0x30, 0x11, 0xf6, 0xd7, 0xb4, 0x95, 0x19, 0x38,
+ 0x5b, 0x7a, 0x9d, 0xbc, 0xdf, 0xfe, 0x0c, 0x2d, 0x4e, 0x6f,
+ 0x88, 0xa9, 0xca, 0xeb, 0x33, 0x12, 0x71, 0x50, 0xb7, 0x96,
+ 0xf5, 0xd4, 0x26, 0x07, 0x64, 0x45, 0xa2, 0x83, 0xe0, 0xc1,
+ 0xe5, 0xc4, 0xa7, 0x86, 0x61, 0x40, 0x23, 0x02, 0xf0, 0xd1,
+ 0xb2, 0x93, 0x74, 0x55, 0x36, 0x17, 0xcf, 0xee, 0x8d, 0xac,
+ 0x4b, 0x6a, 0x09, 0x28, 0xda, 0xfb, 0x98, 0xb9, 0x5e, 0x7f,
+ 0x1c, 0x3d, 0xb1, 0x90, 0xf3, 0xd2, 0x35, 0x14, 0x77, 0x56,
+ 0xa4, 0x85, 0xe6, 0xc7, 0x20, 0x01, 0x62, 0x43, 0x9b, 0xba,
+ 0xd9, 0xf8, 0x1f, 0x3e, 0x5d, 0x7c, 0x8e, 0xaf, 0xcc, 0xed,
+ 0x0a, 0x2b, 0x48, 0x69, 0x00, 0x22, 0x44, 0x66, 0x88, 0xaa,
+ 0xcc, 0xee, 0x0d, 0x2f, 0x49, 0x6b, 0x85, 0xa7, 0xc1, 0xe3,
+ 0x1a, 0x38, 0x5e, 0x7c, 0x92, 0xb0, 0xd6, 0xf4, 0x17, 0x35,
+ 0x53, 0x71, 0x9f, 0xbd, 0xdb, 0xf9, 0x34, 0x16, 0x70, 0x52,
+ 0xbc, 0x9e, 0xf8, 0xda, 0x39, 0x1b, 0x7d, 0x5f, 0xb1, 0x93,
+ 0xf5, 0xd7, 0x2e, 0x0c, 0x6a, 0x48, 0xa6, 0x84, 0xe2, 0xc0,
+ 0x23, 0x01, 0x67, 0x45, 0xab, 0x89, 0xef, 0xcd, 0x68, 0x4a,
+ 0x2c, 0x0e, 0xe0, 0xc2, 0xa4, 0x86, 0x65, 0x47, 0x21, 0x03,
+ 0xed, 0xcf, 0xa9, 0x8b, 0x72, 0x50, 0x36, 0x14, 0xfa, 0xd8,
+ 0xbe, 0x9c, 0x7f, 0x5d, 0x3b, 0x19, 0xf7, 0xd5, 0xb3, 0x91,
+ 0x5c, 0x7e, 0x18, 0x3a, 0xd4, 0xf6, 0x90, 0xb2, 0x51, 0x73,
+ 0x15, 0x37, 0xd9, 0xfb, 0x9d, 0xbf, 0x46, 0x64, 0x02, 0x20,
+ 0xce, 0xec, 0x8a, 0xa8, 0x4b, 0x69, 0x0f, 0x2d, 0xc3, 0xe1,
+ 0x87, 0xa5, 0xd0, 0xf2, 0x94, 0xb6, 0x58, 0x7a, 0x1c, 0x3e,
+ 0xdd, 0xff, 0x99, 0xbb, 0x55, 0x77, 0x11, 0x33, 0xca, 0xe8,
+ 0x8e, 0xac, 0x42, 0x60, 0x06, 0x24, 0xc7, 0xe5, 0x83, 0xa1,
+ 0x4f, 0x6d, 0x0b, 0x29, 0xe4, 0xc6, 0xa0, 0x82, 0x6c, 0x4e,
+ 0x28, 0x0a, 0xe9, 0xcb, 0xad, 0x8f, 0x61, 0x43, 0x25, 0x07,
+ 0xfe, 0xdc, 0xba, 0x98, 0x76, 0x54, 0x32, 0x10, 0xf3, 0xd1,
+ 0xb7, 0x95, 0x7b, 0x59, 0x3f, 0x1d, 0xb8, 0x9a, 0xfc, 0xde,
+ 0x30, 0x12, 0x74, 0x56, 0xb5, 0x97, 0xf1, 0xd3, 0x3d, 0x1f,
+ 0x79, 0x5b, 0xa2, 0x80, 0xe6, 0xc4, 0x2a, 0x08, 0x6e, 0x4c,
+ 0xaf, 0x8d, 0xeb, 0xc9, 0x27, 0x05, 0x63, 0x41, 0x8c, 0xae,
+ 0xc8, 0xea, 0x04, 0x26, 0x40, 0x62, 0x81, 0xa3, 0xc5, 0xe7,
+ 0x09, 0x2b, 0x4d, 0x6f, 0x96, 0xb4, 0xd2, 0xf0, 0x1e, 0x3c,
+ 0x5a, 0x78, 0x9b, 0xb9, 0xdf, 0xfd, 0x13, 0x31, 0x57, 0x75,
+ 0x00, 0x23, 0x46, 0x65, 0x8c, 0xaf, 0xca, 0xe9, 0x05, 0x26,
+ 0x43, 0x60, 0x89, 0xaa, 0xcf, 0xec, 0x0a, 0x29, 0x4c, 0x6f,
+ 0x86, 0xa5, 0xc0, 0xe3, 0x0f, 0x2c, 0x49, 0x6a, 0x83, 0xa0,
+ 0xc5, 0xe6, 0x14, 0x37, 0x52, 0x71, 0x98, 0xbb, 0xde, 0xfd,
+ 0x11, 0x32, 0x57, 0x74, 0x9d, 0xbe, 0xdb, 0xf8, 0x1e, 0x3d,
+ 0x58, 0x7b, 0x92, 0xb1, 0xd4, 0xf7, 0x1b, 0x38, 0x5d, 0x7e,
+ 0x97, 0xb4, 0xd1, 0xf2, 0x28, 0x0b, 0x6e, 0x4d, 0xa4, 0x87,
+ 0xe2, 0xc1, 0x2d, 0x0e, 0x6b, 0x48, 0xa1, 0x82, 0xe7, 0xc4,
+ 0x22, 0x01, 0x64, 0x47, 0xae, 0x8d, 0xe8, 0xcb, 0x27, 0x04,
+ 0x61, 0x42, 0xab, 0x88, 0xed, 0xce, 0x3c, 0x1f, 0x7a, 0x59,
+ 0xb0, 0x93, 0xf6, 0xd5, 0x39, 0x1a, 0x7f, 0x5c, 0xb5, 0x96,
+ 0xf3, 0xd0, 0x36, 0x15, 0x70, 0x53, 0xba, 0x99, 0xfc, 0xdf,
+ 0x33, 0x10, 0x75, 0x56, 0xbf, 0x9c, 0xf9, 0xda, 0x50, 0x73,
+ 0x16, 0x35, 0xdc, 0xff, 0x9a, 0xb9, 0x55, 0x76, 0x13, 0x30,
+ 0xd9, 0xfa, 0x9f, 0xbc, 0x5a, 0x79, 0x1c, 0x3f, 0xd6, 0xf5,
+ 0x90, 0xb3, 0x5f, 0x7c, 0x19, 0x3a, 0xd3, 0xf0, 0x95, 0xb6,
+ 0x44, 0x67, 0x02, 0x21, 0xc8, 0xeb, 0x8e, 0xad, 0x41, 0x62,
+ 0x07, 0x24, 0xcd, 0xee, 0x8b, 0xa8, 0x4e, 0x6d, 0x08, 0x2b,
+ 0xc2, 0xe1, 0x84, 0xa7, 0x4b, 0x68, 0x0d, 0x2e, 0xc7, 0xe4,
+ 0x81, 0xa2, 0x78, 0x5b, 0x3e, 0x1d, 0xf4, 0xd7, 0xb2, 0x91,
+ 0x7d, 0x5e, 0x3b, 0x18, 0xf1, 0xd2, 0xb7, 0x94, 0x72, 0x51,
+ 0x34, 0x17, 0xfe, 0xdd, 0xb8, 0x9b, 0x77, 0x54, 0x31, 0x12,
+ 0xfb, 0xd8, 0xbd, 0x9e, 0x6c, 0x4f, 0x2a, 0x09, 0xe0, 0xc3,
+ 0xa6, 0x85, 0x69, 0x4a, 0x2f, 0x0c, 0xe5, 0xc6, 0xa3, 0x80,
+ 0x66, 0x45, 0x20, 0x03, 0xea, 0xc9, 0xac, 0x8f, 0x63, 0x40,
+ 0x25, 0x06, 0xef, 0xcc, 0xa9, 0x8a, 0x00, 0x24, 0x48, 0x6c,
+ 0x90, 0xb4, 0xd8, 0xfc, 0x3d, 0x19, 0x75, 0x51, 0xad, 0x89,
+ 0xe5, 0xc1, 0x7a, 0x5e, 0x32, 0x16, 0xea, 0xce, 0xa2, 0x86,
+ 0x47, 0x63, 0x0f, 0x2b, 0xd7, 0xf3, 0x9f, 0xbb, 0xf4, 0xd0,
+ 0xbc, 0x98, 0x64, 0x40, 0x2c, 0x08, 0xc9, 0xed, 0x81, 0xa5,
+ 0x59, 0x7d, 0x11, 0x35, 0x8e, 0xaa, 0xc6, 0xe2, 0x1e, 0x3a,
+ 0x56, 0x72, 0xb3, 0x97, 0xfb, 0xdf, 0x23, 0x07, 0x6b, 0x4f,
+ 0xf5, 0xd1, 0xbd, 0x99, 0x65, 0x41, 0x2d, 0x09, 0xc8, 0xec,
+ 0x80, 0xa4, 0x58, 0x7c, 0x10, 0x34, 0x8f, 0xab, 0xc7, 0xe3,
+ 0x1f, 0x3b, 0x57, 0x73, 0xb2, 0x96, 0xfa, 0xde, 0x22, 0x06,
+ 0x6a, 0x4e, 0x01, 0x25, 0x49, 0x6d, 0x91, 0xb5, 0xd9, 0xfd,
+ 0x3c, 0x18, 0x74, 0x50, 0xac, 0x88, 0xe4, 0xc0, 0x7b, 0x5f,
+ 0x33, 0x17, 0xeb, 0xcf, 0xa3, 0x87, 0x46, 0x62, 0x0e, 0x2a,
+ 0xd6, 0xf2, 0x9e, 0xba, 0xf7, 0xd3, 0xbf, 0x9b, 0x67, 0x43,
+ 0x2f, 0x0b, 0xca, 0xee, 0x82, 0xa6, 0x5a, 0x7e, 0x12, 0x36,
+ 0x8d, 0xa9, 0xc5, 0xe1, 0x1d, 0x39, 0x55, 0x71, 0xb0, 0x94,
+ 0xf8, 0xdc, 0x20, 0x04, 0x68, 0x4c, 0x03, 0x27, 0x4b, 0x6f,
+ 0x93, 0xb7, 0xdb, 0xff, 0x3e, 0x1a, 0x76, 0x52, 0xae, 0x8a,
+ 0xe6, 0xc2, 0x79, 0x5d, 0x31, 0x15, 0xe9, 0xcd, 0xa1, 0x85,
+ 0x44, 0x60, 0x0c, 0x28, 0xd4, 0xf0, 0x9c, 0xb8, 0x02, 0x26,
+ 0x4a, 0x6e, 0x92, 0xb6, 0xda, 0xfe, 0x3f, 0x1b, 0x77, 0x53,
+ 0xaf, 0x8b, 0xe7, 0xc3, 0x78, 0x5c, 0x30, 0x14, 0xe8, 0xcc,
+ 0xa0, 0x84, 0x45, 0x61, 0x0d, 0x29, 0xd5, 0xf1, 0x9d, 0xb9,
+ 0xf6, 0xd2, 0xbe, 0x9a, 0x66, 0x42, 0x2e, 0x0a, 0xcb, 0xef,
+ 0x83, 0xa7, 0x5b, 0x7f, 0x13, 0x37, 0x8c, 0xa8, 0xc4, 0xe0,
+ 0x1c, 0x38, 0x54, 0x70, 0xb1, 0x95, 0xf9, 0xdd, 0x21, 0x05,
+ 0x69, 0x4d, 0x00, 0x25, 0x4a, 0x6f, 0x94, 0xb1, 0xde, 0xfb,
+ 0x35, 0x10, 0x7f, 0x5a, 0xa1, 0x84, 0xeb, 0xce, 0x6a, 0x4f,
+ 0x20, 0x05, 0xfe, 0xdb, 0xb4, 0x91, 0x5f, 0x7a, 0x15, 0x30,
+ 0xcb, 0xee, 0x81, 0xa4, 0xd4, 0xf1, 0x9e, 0xbb, 0x40, 0x65,
+ 0x0a, 0x2f, 0xe1, 0xc4, 0xab, 0x8e, 0x75, 0x50, 0x3f, 0x1a,
+ 0xbe, 0x9b, 0xf4, 0xd1, 0x2a, 0x0f, 0x60, 0x45, 0x8b, 0xae,
+ 0xc1, 0xe4, 0x1f, 0x3a, 0x55, 0x70, 0xb5, 0x90, 0xff, 0xda,
+ 0x21, 0x04, 0x6b, 0x4e, 0x80, 0xa5, 0xca, 0xef, 0x14, 0x31,
+ 0x5e, 0x7b, 0xdf, 0xfa, 0x95, 0xb0, 0x4b, 0x6e, 0x01, 0x24,
+ 0xea, 0xcf, 0xa0, 0x85, 0x7e, 0x5b, 0x34, 0x11, 0x61, 0x44,
+ 0x2b, 0x0e, 0xf5, 0xd0, 0xbf, 0x9a, 0x54, 0x71, 0x1e, 0x3b,
+ 0xc0, 0xe5, 0x8a, 0xaf, 0x0b, 0x2e, 0x41, 0x64, 0x9f, 0xba,
+ 0xd5, 0xf0, 0x3e, 0x1b, 0x74, 0x51, 0xaa, 0x8f, 0xe0, 0xc5,
+ 0x77, 0x52, 0x3d, 0x18, 0xe3, 0xc6, 0xa9, 0x8c, 0x42, 0x67,
+ 0x08, 0x2d, 0xd6, 0xf3, 0x9c, 0xb9, 0x1d, 0x38, 0x57, 0x72,
+ 0x89, 0xac, 0xc3, 0xe6, 0x28, 0x0d, 0x62, 0x47, 0xbc, 0x99,
+ 0xf6, 0xd3, 0xa3, 0x86, 0xe9, 0xcc, 0x37, 0x12, 0x7d, 0x58,
+ 0x96, 0xb3, 0xdc, 0xf9, 0x02, 0x27, 0x48, 0x6d, 0xc9, 0xec,
+ 0x83, 0xa6, 0x5d, 0x78, 0x17, 0x32, 0xfc, 0xd9, 0xb6, 0x93,
+ 0x68, 0x4d, 0x22, 0x07, 0xc2, 0xe7, 0x88, 0xad, 0x56, 0x73,
+ 0x1c, 0x39, 0xf7, 0xd2, 0xbd, 0x98, 0x63, 0x46, 0x29, 0x0c,
+ 0xa8, 0x8d, 0xe2, 0xc7, 0x3c, 0x19, 0x76, 0x53, 0x9d, 0xb8,
+ 0xd7, 0xf2, 0x09, 0x2c, 0x43, 0x66, 0x16, 0x33, 0x5c, 0x79,
+ 0x82, 0xa7, 0xc8, 0xed, 0x23, 0x06, 0x69, 0x4c, 0xb7, 0x92,
+ 0xfd, 0xd8, 0x7c, 0x59, 0x36, 0x13, 0xe8, 0xcd, 0xa2, 0x87,
+ 0x49, 0x6c, 0x03, 0x26, 0xdd, 0xf8, 0x97, 0xb2, 0x00, 0x26,
+ 0x4c, 0x6a, 0x98, 0xbe, 0xd4, 0xf2, 0x2d, 0x0b, 0x61, 0x47,
+ 0xb5, 0x93, 0xf9, 0xdf, 0x5a, 0x7c, 0x16, 0x30, 0xc2, 0xe4,
+ 0x8e, 0xa8, 0x77, 0x51, 0x3b, 0x1d, 0xef, 0xc9, 0xa3, 0x85,
+ 0xb4, 0x92, 0xf8, 0xde, 0x2c, 0x0a, 0x60, 0x46, 0x99, 0xbf,
+ 0xd5, 0xf3, 0x01, 0x27, 0x4d, 0x6b, 0xee, 0xc8, 0xa2, 0x84,
+ 0x76, 0x50, 0x3a, 0x1c, 0xc3, 0xe5, 0x8f, 0xa9, 0x5b, 0x7d,
+ 0x17, 0x31, 0x75, 0x53, 0x39, 0x1f, 0xed, 0xcb, 0xa1, 0x87,
+ 0x58, 0x7e, 0x14, 0x32, 0xc0, 0xe6, 0x8c, 0xaa, 0x2f, 0x09,
+ 0x63, 0x45, 0xb7, 0x91, 0xfb, 0xdd, 0x02, 0x24, 0x4e, 0x68,
+ 0x9a, 0xbc, 0xd6, 0xf0, 0xc1, 0xe7, 0x8d, 0xab, 0x59, 0x7f,
+ 0x15, 0x33, 0xec, 0xca, 0xa0, 0x86, 0x74, 0x52, 0x38, 0x1e,
+ 0x9b, 0xbd, 0xd7, 0xf1, 0x03, 0x25, 0x4f, 0x69, 0xb6, 0x90,
+ 0xfa, 0xdc, 0x2e, 0x08, 0x62, 0x44, 0xea, 0xcc, 0xa6, 0x80,
+ 0x72, 0x54, 0x3e, 0x18, 0xc7, 0xe1, 0x8b, 0xad, 0x5f, 0x79,
+ 0x13, 0x35, 0xb0, 0x96, 0xfc, 0xda, 0x28, 0x0e, 0x64, 0x42,
+ 0x9d, 0xbb, 0xd1, 0xf7, 0x05, 0x23, 0x49, 0x6f, 0x5e, 0x78,
+ 0x12, 0x34, 0xc6, 0xe0, 0x8a, 0xac, 0x73, 0x55, 0x3f, 0x19,
+ 0xeb, 0xcd, 0xa7, 0x81, 0x04, 0x22, 0x48, 0x6e, 0x9c, 0xba,
+ 0xd0, 0xf6, 0x29, 0x0f, 0x65, 0x43, 0xb1, 0x97, 0xfd, 0xdb,
+ 0x9f, 0xb9, 0xd3, 0xf5, 0x07, 0x21, 0x4b, 0x6d, 0xb2, 0x94,
+ 0xfe, 0xd8, 0x2a, 0x0c, 0x66, 0x40, 0xc5, 0xe3, 0x89, 0xaf,
+ 0x5d, 0x7b, 0x11, 0x37, 0xe8, 0xce, 0xa4, 0x82, 0x70, 0x56,
+ 0x3c, 0x1a, 0x2b, 0x0d, 0x67, 0x41, 0xb3, 0x95, 0xff, 0xd9,
+ 0x06, 0x20, 0x4a, 0x6c, 0x9e, 0xb8, 0xd2, 0xf4, 0x71, 0x57,
+ 0x3d, 0x1b, 0xe9, 0xcf, 0xa5, 0x83, 0x5c, 0x7a, 0x10, 0x36,
+ 0xc4, 0xe2, 0x88, 0xae, 0x00, 0x27, 0x4e, 0x69, 0x9c, 0xbb,
+ 0xd2, 0xf5, 0x25, 0x02, 0x6b, 0x4c, 0xb9, 0x9e, 0xf7, 0xd0,
+ 0x4a, 0x6d, 0x04, 0x23, 0xd6, 0xf1, 0x98, 0xbf, 0x6f, 0x48,
+ 0x21, 0x06, 0xf3, 0xd4, 0xbd, 0x9a, 0x94, 0xb3, 0xda, 0xfd,
+ 0x08, 0x2f, 0x46, 0x61, 0xb1, 0x96, 0xff, 0xd8, 0x2d, 0x0a,
+ 0x63, 0x44, 0xde, 0xf9, 0x90, 0xb7, 0x42, 0x65, 0x0c, 0x2b,
+ 0xfb, 0xdc, 0xb5, 0x92, 0x67, 0x40, 0x29, 0x0e, 0x35, 0x12,
+ 0x7b, 0x5c, 0xa9, 0x8e, 0xe7, 0xc0, 0x10, 0x37, 0x5e, 0x79,
+ 0x8c, 0xab, 0xc2, 0xe5, 0x7f, 0x58, 0x31, 0x16, 0xe3, 0xc4,
+ 0xad, 0x8a, 0x5a, 0x7d, 0x14, 0x33, 0xc6, 0xe1, 0x88, 0xaf,
+ 0xa1, 0x86, 0xef, 0xc8, 0x3d, 0x1a, 0x73, 0x54, 0x84, 0xa3,
+ 0xca, 0xed, 0x18, 0x3f, 0x56, 0x71, 0xeb, 0xcc, 0xa5, 0x82,
+ 0x77, 0x50, 0x39, 0x1e, 0xce, 0xe9, 0x80, 0xa7, 0x52, 0x75,
+ 0x1c, 0x3b, 0x6a, 0x4d, 0x24, 0x03, 0xf6, 0xd1, 0xb8, 0x9f,
+ 0x4f, 0x68, 0x01, 0x26, 0xd3, 0xf4, 0x9d, 0xba, 0x20, 0x07,
+ 0x6e, 0x49, 0xbc, 0x9b, 0xf2, 0xd5, 0x05, 0x22, 0x4b, 0x6c,
+ 0x99, 0xbe, 0xd7, 0xf0, 0xfe, 0xd9, 0xb0, 0x97, 0x62, 0x45,
+ 0x2c, 0x0b, 0xdb, 0xfc, 0x95, 0xb2, 0x47, 0x60, 0x09, 0x2e,
+ 0xb4, 0x93, 0xfa, 0xdd, 0x28, 0x0f, 0x66, 0x41, 0x91, 0xb6,
+ 0xdf, 0xf8, 0x0d, 0x2a, 0x43, 0x64, 0x5f, 0x78, 0x11, 0x36,
+ 0xc3, 0xe4, 0x8d, 0xaa, 0x7a, 0x5d, 0x34, 0x13, 0xe6, 0xc1,
+ 0xa8, 0x8f, 0x15, 0x32, 0x5b, 0x7c, 0x89, 0xae, 0xc7, 0xe0,
+ 0x30, 0x17, 0x7e, 0x59, 0xac, 0x8b, 0xe2, 0xc5, 0xcb, 0xec,
+ 0x85, 0xa2, 0x57, 0x70, 0x19, 0x3e, 0xee, 0xc9, 0xa0, 0x87,
+ 0x72, 0x55, 0x3c, 0x1b, 0x81, 0xa6, 0xcf, 0xe8, 0x1d, 0x3a,
+ 0x53, 0x74, 0xa4, 0x83, 0xea, 0xcd, 0x38, 0x1f, 0x76, 0x51,
+ 0x00, 0x28, 0x50, 0x78, 0xa0, 0x88, 0xf0, 0xd8, 0x5d, 0x75,
+ 0x0d, 0x25, 0xfd, 0xd5, 0xad, 0x85, 0xba, 0x92, 0xea, 0xc2,
+ 0x1a, 0x32, 0x4a, 0x62, 0xe7, 0xcf, 0xb7, 0x9f, 0x47, 0x6f,
+ 0x17, 0x3f, 0x69, 0x41, 0x39, 0x11, 0xc9, 0xe1, 0x99, 0xb1,
+ 0x34, 0x1c, 0x64, 0x4c, 0x94, 0xbc, 0xc4, 0xec, 0xd3, 0xfb,
+ 0x83, 0xab, 0x73, 0x5b, 0x23, 0x0b, 0x8e, 0xa6, 0xde, 0xf6,
+ 0x2e, 0x06, 0x7e, 0x56, 0xd2, 0xfa, 0x82, 0xaa, 0x72, 0x5a,
+ 0x22, 0x0a, 0x8f, 0xa7, 0xdf, 0xf7, 0x2f, 0x07, 0x7f, 0x57,
+ 0x68, 0x40, 0x38, 0x10, 0xc8, 0xe0, 0x98, 0xb0, 0x35, 0x1d,
+ 0x65, 0x4d, 0x95, 0xbd, 0xc5, 0xed, 0xbb, 0x93, 0xeb, 0xc3,
+ 0x1b, 0x33, 0x4b, 0x63, 0xe6, 0xce, 0xb6, 0x9e, 0x46, 0x6e,
+ 0x16, 0x3e, 0x01, 0x29, 0x51, 0x79, 0xa1, 0x89, 0xf1, 0xd9,
+ 0x5c, 0x74, 0x0c, 0x24, 0xfc, 0xd4, 0xac, 0x84, 0xb9, 0x91,
+ 0xe9, 0xc1, 0x19, 0x31, 0x49, 0x61, 0xe4, 0xcc, 0xb4, 0x9c,
+ 0x44, 0x6c, 0x14, 0x3c, 0x03, 0x2b, 0x53, 0x7b, 0xa3, 0x8b,
+ 0xf3, 0xdb, 0x5e, 0x76, 0x0e, 0x26, 0xfe, 0xd6, 0xae, 0x86,
+ 0xd0, 0xf8, 0x80, 0xa8, 0x70, 0x58, 0x20, 0x08, 0x8d, 0xa5,
+ 0xdd, 0xf5, 0x2d, 0x05, 0x7d, 0x55, 0x6a, 0x42, 0x3a, 0x12,
+ 0xca, 0xe2, 0x9a, 0xb2, 0x37, 0x1f, 0x67, 0x4f, 0x97, 0xbf,
+ 0xc7, 0xef, 0x6b, 0x43, 0x3b, 0x13, 0xcb, 0xe3, 0x9b, 0xb3,
+ 0x36, 0x1e, 0x66, 0x4e, 0x96, 0xbe, 0xc6, 0xee, 0xd1, 0xf9,
+ 0x81, 0xa9, 0x71, 0x59, 0x21, 0x09, 0x8c, 0xa4, 0xdc, 0xf4,
+ 0x2c, 0x04, 0x7c, 0x54, 0x02, 0x2a, 0x52, 0x7a, 0xa2, 0x8a,
+ 0xf2, 0xda, 0x5f, 0x77, 0x0f, 0x27, 0xff, 0xd7, 0xaf, 0x87,
+ 0xb8, 0x90, 0xe8, 0xc0, 0x18, 0x30, 0x48, 0x60, 0xe5, 0xcd,
+ 0xb5, 0x9d, 0x45, 0x6d, 0x15, 0x3d, 0x00, 0x29, 0x52, 0x7b,
+ 0xa4, 0x8d, 0xf6, 0xdf, 0x55, 0x7c, 0x07, 0x2e, 0xf1, 0xd8,
+ 0xa3, 0x8a, 0xaa, 0x83, 0xf8, 0xd1, 0x0e, 0x27, 0x5c, 0x75,
+ 0xff, 0xd6, 0xad, 0x84, 0x5b, 0x72, 0x09, 0x20, 0x49, 0x60,
+ 0x1b, 0x32, 0xed, 0xc4, 0xbf, 0x96, 0x1c, 0x35, 0x4e, 0x67,
+ 0xb8, 0x91, 0xea, 0xc3, 0xe3, 0xca, 0xb1, 0x98, 0x47, 0x6e,
+ 0x15, 0x3c, 0xb6, 0x9f, 0xe4, 0xcd, 0x12, 0x3b, 0x40, 0x69,
+ 0x92, 0xbb, 0xc0, 0xe9, 0x36, 0x1f, 0x64, 0x4d, 0xc7, 0xee,
+ 0x95, 0xbc, 0x63, 0x4a, 0x31, 0x18, 0x38, 0x11, 0x6a, 0x43,
+ 0x9c, 0xb5, 0xce, 0xe7, 0x6d, 0x44, 0x3f, 0x16, 0xc9, 0xe0,
+ 0x9b, 0xb2, 0xdb, 0xf2, 0x89, 0xa0, 0x7f, 0x56, 0x2d, 0x04,
+ 0x8e, 0xa7, 0xdc, 0xf5, 0x2a, 0x03, 0x78, 0x51, 0x71, 0x58,
+ 0x23, 0x0a, 0xd5, 0xfc, 0x87, 0xae, 0x24, 0x0d, 0x76, 0x5f,
+ 0x80, 0xa9, 0xd2, 0xfb, 0x39, 0x10, 0x6b, 0x42, 0x9d, 0xb4,
+ 0xcf, 0xe6, 0x6c, 0x45, 0x3e, 0x17, 0xc8, 0xe1, 0x9a, 0xb3,
+ 0x93, 0xba, 0xc1, 0xe8, 0x37, 0x1e, 0x65, 0x4c, 0xc6, 0xef,
+ 0x94, 0xbd, 0x62, 0x4b, 0x30, 0x19, 0x70, 0x59, 0x22, 0x0b,
+ 0xd4, 0xfd, 0x86, 0xaf, 0x25, 0x0c, 0x77, 0x5e, 0x81, 0xa8,
+ 0xd3, 0xfa, 0xda, 0xf3, 0x88, 0xa1, 0x7e, 0x57, 0x2c, 0x05,
+ 0x8f, 0xa6, 0xdd, 0xf4, 0x2b, 0x02, 0x79, 0x50, 0xab, 0x82,
+ 0xf9, 0xd0, 0x0f, 0x26, 0x5d, 0x74, 0xfe, 0xd7, 0xac, 0x85,
+ 0x5a, 0x73, 0x08, 0x21, 0x01, 0x28, 0x53, 0x7a, 0xa5, 0x8c,
+ 0xf7, 0xde, 0x54, 0x7d, 0x06, 0x2f, 0xf0, 0xd9, 0xa2, 0x8b,
+ 0xe2, 0xcb, 0xb0, 0x99, 0x46, 0x6f, 0x14, 0x3d, 0xb7, 0x9e,
+ 0xe5, 0xcc, 0x13, 0x3a, 0x41, 0x68, 0x48, 0x61, 0x1a, 0x33,
+ 0xec, 0xc5, 0xbe, 0x97, 0x1d, 0x34, 0x4f, 0x66, 0xb9, 0x90,
+ 0xeb, 0xc2, 0x00, 0x2a, 0x54, 0x7e, 0xa8, 0x82, 0xfc, 0xd6,
+ 0x4d, 0x67, 0x19, 0x33, 0xe5, 0xcf, 0xb1, 0x9b, 0x9a, 0xb0,
+ 0xce, 0xe4, 0x32, 0x18, 0x66, 0x4c, 0xd7, 0xfd, 0x83, 0xa9,
+ 0x7f, 0x55, 0x2b, 0x01, 0x29, 0x03, 0x7d, 0x57, 0x81, 0xab,
+ 0xd5, 0xff, 0x64, 0x4e, 0x30, 0x1a, 0xcc, 0xe6, 0x98, 0xb2,
+ 0xb3, 0x99, 0xe7, 0xcd, 0x1b, 0x31, 0x4f, 0x65, 0xfe, 0xd4,
+ 0xaa, 0x80, 0x56, 0x7c, 0x02, 0x28, 0x52, 0x78, 0x06, 0x2c,
+ 0xfa, 0xd0, 0xae, 0x84, 0x1f, 0x35, 0x4b, 0x61, 0xb7, 0x9d,
+ 0xe3, 0xc9, 0xc8, 0xe2, 0x9c, 0xb6, 0x60, 0x4a, 0x34, 0x1e,
+ 0x85, 0xaf, 0xd1, 0xfb, 0x2d, 0x07, 0x79, 0x53, 0x7b, 0x51,
+ 0x2f, 0x05, 0xd3, 0xf9, 0x87, 0xad, 0x36, 0x1c, 0x62, 0x48,
+ 0x9e, 0xb4, 0xca, 0xe0, 0xe1, 0xcb, 0xb5, 0x9f, 0x49, 0x63,
+ 0x1d, 0x37, 0xac, 0x86, 0xf8, 0xd2, 0x04, 0x2e, 0x50, 0x7a,
+ 0xa4, 0x8e, 0xf0, 0xda, 0x0c, 0x26, 0x58, 0x72, 0xe9, 0xc3,
+ 0xbd, 0x97, 0x41, 0x6b, 0x15, 0x3f, 0x3e, 0x14, 0x6a, 0x40,
+ 0x96, 0xbc, 0xc2, 0xe8, 0x73, 0x59, 0x27, 0x0d, 0xdb, 0xf1,
+ 0x8f, 0xa5, 0x8d, 0xa7, 0xd9, 0xf3, 0x25, 0x0f, 0x71, 0x5b,
+ 0xc0, 0xea, 0x94, 0xbe, 0x68, 0x42, 0x3c, 0x16, 0x17, 0x3d,
+ 0x43, 0x69, 0xbf, 0x95, 0xeb, 0xc1, 0x5a, 0x70, 0x0e, 0x24,
+ 0xf2, 0xd8, 0xa6, 0x8c, 0xf6, 0xdc, 0xa2, 0x88, 0x5e, 0x74,
+ 0x0a, 0x20, 0xbb, 0x91, 0xef, 0xc5, 0x13, 0x39, 0x47, 0x6d,
+ 0x6c, 0x46, 0x38, 0x12, 0xc4, 0xee, 0x90, 0xba, 0x21, 0x0b,
+ 0x75, 0x5f, 0x89, 0xa3, 0xdd, 0xf7, 0xdf, 0xf5, 0x8b, 0xa1,
+ 0x77, 0x5d, 0x23, 0x09, 0x92, 0xb8, 0xc6, 0xec, 0x3a, 0x10,
+ 0x6e, 0x44, 0x45, 0x6f, 0x11, 0x3b, 0xed, 0xc7, 0xb9, 0x93,
+ 0x08, 0x22, 0x5c, 0x76, 0xa0, 0x8a, 0xf4, 0xde, 0x00, 0x2b,
+ 0x56, 0x7d, 0xac, 0x87, 0xfa, 0xd1, 0x45, 0x6e, 0x13, 0x38,
+ 0xe9, 0xc2, 0xbf, 0x94, 0x8a, 0xa1, 0xdc, 0xf7, 0x26, 0x0d,
+ 0x70, 0x5b, 0xcf, 0xe4, 0x99, 0xb2, 0x63, 0x48, 0x35, 0x1e,
+ 0x09, 0x22, 0x5f, 0x74, 0xa5, 0x8e, 0xf3, 0xd8, 0x4c, 0x67,
+ 0x1a, 0x31, 0xe0, 0xcb, 0xb6, 0x9d, 0x83, 0xa8, 0xd5, 0xfe,
+ 0x2f, 0x04, 0x79, 0x52, 0xc6, 0xed, 0x90, 0xbb, 0x6a, 0x41,
+ 0x3c, 0x17, 0x12, 0x39, 0x44, 0x6f, 0xbe, 0x95, 0xe8, 0xc3,
+ 0x57, 0x7c, 0x01, 0x2a, 0xfb, 0xd0, 0xad, 0x86, 0x98, 0xb3,
+ 0xce, 0xe5, 0x34, 0x1f, 0x62, 0x49, 0xdd, 0xf6, 0x8b, 0xa0,
+ 0x71, 0x5a, 0x27, 0x0c, 0x1b, 0x30, 0x4d, 0x66, 0xb7, 0x9c,
+ 0xe1, 0xca, 0x5e, 0x75, 0x08, 0x23, 0xf2, 0xd9, 0xa4, 0x8f,
+ 0x91, 0xba, 0xc7, 0xec, 0x3d, 0x16, 0x6b, 0x40, 0xd4, 0xff,
+ 0x82, 0xa9, 0x78, 0x53, 0x2e, 0x05, 0x24, 0x0f, 0x72, 0x59,
+ 0x88, 0xa3, 0xde, 0xf5, 0x61, 0x4a, 0x37, 0x1c, 0xcd, 0xe6,
+ 0x9b, 0xb0, 0xae, 0x85, 0xf8, 0xd3, 0x02, 0x29, 0x54, 0x7f,
+ 0xeb, 0xc0, 0xbd, 0x96, 0x47, 0x6c, 0x11, 0x3a, 0x2d, 0x06,
+ 0x7b, 0x50, 0x81, 0xaa, 0xd7, 0xfc, 0x68, 0x43, 0x3e, 0x15,
+ 0xc4, 0xef, 0x92, 0xb9, 0xa7, 0x8c, 0xf1, 0xda, 0x0b, 0x20,
+ 0x5d, 0x76, 0xe2, 0xc9, 0xb4, 0x9f, 0x4e, 0x65, 0x18, 0x33,
+ 0x36, 0x1d, 0x60, 0x4b, 0x9a, 0xb1, 0xcc, 0xe7, 0x73, 0x58,
+ 0x25, 0x0e, 0xdf, 0xf4, 0x89, 0xa2, 0xbc, 0x97, 0xea, 0xc1,
+ 0x10, 0x3b, 0x46, 0x6d, 0xf9, 0xd2, 0xaf, 0x84, 0x55, 0x7e,
+ 0x03, 0x28, 0x3f, 0x14, 0x69, 0x42, 0x93, 0xb8, 0xc5, 0xee,
+ 0x7a, 0x51, 0x2c, 0x07, 0xd6, 0xfd, 0x80, 0xab, 0xb5, 0x9e,
+ 0xe3, 0xc8, 0x19, 0x32, 0x4f, 0x64, 0xf0, 0xdb, 0xa6, 0x8d,
+ 0x5c, 0x77, 0x0a, 0x21, 0x00, 0x2c, 0x58, 0x74, 0xb0, 0x9c,
+ 0xe8, 0xc4, 0x7d, 0x51, 0x25, 0x09, 0xcd, 0xe1, 0x95, 0xb9,
+ 0xfa, 0xd6, 0xa2, 0x8e, 0x4a, 0x66, 0x12, 0x3e, 0x87, 0xab,
+ 0xdf, 0xf3, 0x37, 0x1b, 0x6f, 0x43, 0xe9, 0xc5, 0xb1, 0x9d,
+ 0x59, 0x75, 0x01, 0x2d, 0x94, 0xb8, 0xcc, 0xe0, 0x24, 0x08,
+ 0x7c, 0x50, 0x13, 0x3f, 0x4b, 0x67, 0xa3, 0x8f, 0xfb, 0xd7,
+ 0x6e, 0x42, 0x36, 0x1a, 0xde, 0xf2, 0x86, 0xaa, 0xcf, 0xe3,
+ 0x97, 0xbb, 0x7f, 0x53, 0x27, 0x0b, 0xb2, 0x9e, 0xea, 0xc6,
+ 0x02, 0x2e, 0x5a, 0x76, 0x35, 0x19, 0x6d, 0x41, 0x85, 0xa9,
+ 0xdd, 0xf1, 0x48, 0x64, 0x10, 0x3c, 0xf8, 0xd4, 0xa0, 0x8c,
+ 0x26, 0x0a, 0x7e, 0x52, 0x96, 0xba, 0xce, 0xe2, 0x5b, 0x77,
+ 0x03, 0x2f, 0xeb, 0xc7, 0xb3, 0x9f, 0xdc, 0xf0, 0x84, 0xa8,
+ 0x6c, 0x40, 0x34, 0x18, 0xa1, 0x8d, 0xf9, 0xd5, 0x11, 0x3d,
+ 0x49, 0x65, 0x83, 0xaf, 0xdb, 0xf7, 0x33, 0x1f, 0x6b, 0x47,
+ 0xfe, 0xd2, 0xa6, 0x8a, 0x4e, 0x62, 0x16, 0x3a, 0x79, 0x55,
+ 0x21, 0x0d, 0xc9, 0xe5, 0x91, 0xbd, 0x04, 0x28, 0x5c, 0x70,
+ 0xb4, 0x98, 0xec, 0xc0, 0x6a, 0x46, 0x32, 0x1e, 0xda, 0xf6,
+ 0x82, 0xae, 0x17, 0x3b, 0x4f, 0x63, 0xa7, 0x8b, 0xff, 0xd3,
+ 0x90, 0xbc, 0xc8, 0xe4, 0x20, 0x0c, 0x78, 0x54, 0xed, 0xc1,
+ 0xb5, 0x99, 0x5d, 0x71, 0x05, 0x29, 0x4c, 0x60, 0x14, 0x38,
+ 0xfc, 0xd0, 0xa4, 0x88, 0x31, 0x1d, 0x69, 0x45, 0x81, 0xad,
+ 0xd9, 0xf5, 0xb6, 0x9a, 0xee, 0xc2, 0x06, 0x2a, 0x5e, 0x72,
+ 0xcb, 0xe7, 0x93, 0xbf, 0x7b, 0x57, 0x23, 0x0f, 0xa5, 0x89,
+ 0xfd, 0xd1, 0x15, 0x39, 0x4d, 0x61, 0xd8, 0xf4, 0x80, 0xac,
+ 0x68, 0x44, 0x30, 0x1c, 0x5f, 0x73, 0x07, 0x2b, 0xef, 0xc3,
+ 0xb7, 0x9b, 0x22, 0x0e, 0x7a, 0x56, 0x92, 0xbe, 0xca, 0xe6,
+ 0x00, 0x2d, 0x5a, 0x77, 0xb4, 0x99, 0xee, 0xc3, 0x75, 0x58,
+ 0x2f, 0x02, 0xc1, 0xec, 0x9b, 0xb6, 0xea, 0xc7, 0xb0, 0x9d,
+ 0x5e, 0x73, 0x04, 0x29, 0x9f, 0xb2, 0xc5, 0xe8, 0x2b, 0x06,
+ 0x71, 0x5c, 0xc9, 0xe4, 0x93, 0xbe, 0x7d, 0x50, 0x27, 0x0a,
+ 0xbc, 0x91, 0xe6, 0xcb, 0x08, 0x25, 0x52, 0x7f, 0x23, 0x0e,
+ 0x79, 0x54, 0x97, 0xba, 0xcd, 0xe0, 0x56, 0x7b, 0x0c, 0x21,
+ 0xe2, 0xcf, 0xb8, 0x95, 0x8f, 0xa2, 0xd5, 0xf8, 0x3b, 0x16,
+ 0x61, 0x4c, 0xfa, 0xd7, 0xa0, 0x8d, 0x4e, 0x63, 0x14, 0x39,
+ 0x65, 0x48, 0x3f, 0x12, 0xd1, 0xfc, 0x8b, 0xa6, 0x10, 0x3d,
+ 0x4a, 0x67, 0xa4, 0x89, 0xfe, 0xd3, 0x46, 0x6b, 0x1c, 0x31,
+ 0xf2, 0xdf, 0xa8, 0x85, 0x33, 0x1e, 0x69, 0x44, 0x87, 0xaa,
+ 0xdd, 0xf0, 0xac, 0x81, 0xf6, 0xdb, 0x18, 0x35, 0x42, 0x6f,
+ 0xd9, 0xf4, 0x83, 0xae, 0x6d, 0x40, 0x37, 0x1a, 0x03, 0x2e,
+ 0x59, 0x74, 0xb7, 0x9a, 0xed, 0xc0, 0x76, 0x5b, 0x2c, 0x01,
+ 0xc2, 0xef, 0x98, 0xb5, 0xe9, 0xc4, 0xb3, 0x9e, 0x5d, 0x70,
+ 0x07, 0x2a, 0x9c, 0xb1, 0xc6, 0xeb, 0x28, 0x05, 0x72, 0x5f,
+ 0xca, 0xe7, 0x90, 0xbd, 0x7e, 0x53, 0x24, 0x09, 0xbf, 0x92,
+ 0xe5, 0xc8, 0x0b, 0x26, 0x51, 0x7c, 0x20, 0x0d, 0x7a, 0x57,
+ 0x94, 0xb9, 0xce, 0xe3, 0x55, 0x78, 0x0f, 0x22, 0xe1, 0xcc,
+ 0xbb, 0x96, 0x8c, 0xa1, 0xd6, 0xfb, 0x38, 0x15, 0x62, 0x4f,
+ 0xf9, 0xd4, 0xa3, 0x8e, 0x4d, 0x60, 0x17, 0x3a, 0x66, 0x4b,
+ 0x3c, 0x11, 0xd2, 0xff, 0x88, 0xa5, 0x13, 0x3e, 0x49, 0x64,
+ 0xa7, 0x8a, 0xfd, 0xd0, 0x45, 0x68, 0x1f, 0x32, 0xf1, 0xdc,
+ 0xab, 0x86, 0x30, 0x1d, 0x6a, 0x47, 0x84, 0xa9, 0xde, 0xf3,
+ 0xaf, 0x82, 0xf5, 0xd8, 0x1b, 0x36, 0x41, 0x6c, 0xda, 0xf7,
+ 0x80, 0xad, 0x6e, 0x43, 0x34, 0x19, 0x00, 0x2e, 0x5c, 0x72,
+ 0xb8, 0x96, 0xe4, 0xca, 0x6d, 0x43, 0x31, 0x1f, 0xd5, 0xfb,
+ 0x89, 0xa7, 0xda, 0xf4, 0x86, 0xa8, 0x62, 0x4c, 0x3e, 0x10,
+ 0xb7, 0x99, 0xeb, 0xc5, 0x0f, 0x21, 0x53, 0x7d, 0xa9, 0x87,
+ 0xf5, 0xdb, 0x11, 0x3f, 0x4d, 0x63, 0xc4, 0xea, 0x98, 0xb6,
+ 0x7c, 0x52, 0x20, 0x0e, 0x73, 0x5d, 0x2f, 0x01, 0xcb, 0xe5,
+ 0x97, 0xb9, 0x1e, 0x30, 0x42, 0x6c, 0xa6, 0x88, 0xfa, 0xd4,
+ 0x4f, 0x61, 0x13, 0x3d, 0xf7, 0xd9, 0xab, 0x85, 0x22, 0x0c,
+ 0x7e, 0x50, 0x9a, 0xb4, 0xc6, 0xe8, 0x95, 0xbb, 0xc9, 0xe7,
+ 0x2d, 0x03, 0x71, 0x5f, 0xf8, 0xd6, 0xa4, 0x8a, 0x40, 0x6e,
+ 0x1c, 0x32, 0xe6, 0xc8, 0xba, 0x94, 0x5e, 0x70, 0x02, 0x2c,
+ 0x8b, 0xa5, 0xd7, 0xf9, 0x33, 0x1d, 0x6f, 0x41, 0x3c, 0x12,
+ 0x60, 0x4e, 0x84, 0xaa, 0xd8, 0xf6, 0x51, 0x7f, 0x0d, 0x23,
+ 0xe9, 0xc7, 0xb5, 0x9b, 0x9e, 0xb0, 0xc2, 0xec, 0x26, 0x08,
+ 0x7a, 0x54, 0xf3, 0xdd, 0xaf, 0x81, 0x4b, 0x65, 0x17, 0x39,
+ 0x44, 0x6a, 0x18, 0x36, 0xfc, 0xd2, 0xa0, 0x8e, 0x29, 0x07,
+ 0x75, 0x5b, 0x91, 0xbf, 0xcd, 0xe3, 0x37, 0x19, 0x6b, 0x45,
+ 0x8f, 0xa1, 0xd3, 0xfd, 0x5a, 0x74, 0x06, 0x28, 0xe2, 0xcc,
+ 0xbe, 0x90, 0xed, 0xc3, 0xb1, 0x9f, 0x55, 0x7b, 0x09, 0x27,
+ 0x80, 0xae, 0xdc, 0xf2, 0x38, 0x16, 0x64, 0x4a, 0xd1, 0xff,
+ 0x8d, 0xa3, 0x69, 0x47, 0x35, 0x1b, 0xbc, 0x92, 0xe0, 0xce,
+ 0x04, 0x2a, 0x58, 0x76, 0x0b, 0x25, 0x57, 0x79, 0xb3, 0x9d,
+ 0xef, 0xc1, 0x66, 0x48, 0x3a, 0x14, 0xde, 0xf0, 0x82, 0xac,
+ 0x78, 0x56, 0x24, 0x0a, 0xc0, 0xee, 0x9c, 0xb2, 0x15, 0x3b,
+ 0x49, 0x67, 0xad, 0x83, 0xf1, 0xdf, 0xa2, 0x8c, 0xfe, 0xd0,
+ 0x1a, 0x34, 0x46, 0x68, 0xcf, 0xe1, 0x93, 0xbd, 0x77, 0x59,
+ 0x2b, 0x05, 0x00, 0x2f, 0x5e, 0x71, 0xbc, 0x93, 0xe2, 0xcd,
+ 0x65, 0x4a, 0x3b, 0x14, 0xd9, 0xf6, 0x87, 0xa8, 0xca, 0xe5,
+ 0x94, 0xbb, 0x76, 0x59, 0x28, 0x07, 0xaf, 0x80, 0xf1, 0xde,
+ 0x13, 0x3c, 0x4d, 0x62, 0x89, 0xa6, 0xd7, 0xf8, 0x35, 0x1a,
+ 0x6b, 0x44, 0xec, 0xc3, 0xb2, 0x9d, 0x50, 0x7f, 0x0e, 0x21,
+ 0x43, 0x6c, 0x1d, 0x32, 0xff, 0xd0, 0xa1, 0x8e, 0x26, 0x09,
+ 0x78, 0x57, 0x9a, 0xb5, 0xc4, 0xeb, 0x0f, 0x20, 0x51, 0x7e,
+ 0xb3, 0x9c, 0xed, 0xc2, 0x6a, 0x45, 0x34, 0x1b, 0xd6, 0xf9,
+ 0x88, 0xa7, 0xc5, 0xea, 0x9b, 0xb4, 0x79, 0x56, 0x27, 0x08,
+ 0xa0, 0x8f, 0xfe, 0xd1, 0x1c, 0x33, 0x42, 0x6d, 0x86, 0xa9,
+ 0xd8, 0xf7, 0x3a, 0x15, 0x64, 0x4b, 0xe3, 0xcc, 0xbd, 0x92,
+ 0x5f, 0x70, 0x01, 0x2e, 0x4c, 0x63, 0x12, 0x3d, 0xf0, 0xdf,
+ 0xae, 0x81, 0x29, 0x06, 0x77, 0x58, 0x95, 0xba, 0xcb, 0xe4,
+ 0x1e, 0x31, 0x40, 0x6f, 0xa2, 0x8d, 0xfc, 0xd3, 0x7b, 0x54,
+ 0x25, 0x0a, 0xc7, 0xe8, 0x99, 0xb6, 0xd4, 0xfb, 0x8a, 0xa5,
+ 0x68, 0x47, 0x36, 0x19, 0xb1, 0x9e, 0xef, 0xc0, 0x0d, 0x22,
+ 0x53, 0x7c, 0x97, 0xb8, 0xc9, 0xe6, 0x2b, 0x04, 0x75, 0x5a,
+ 0xf2, 0xdd, 0xac, 0x83, 0x4e, 0x61, 0x10, 0x3f, 0x5d, 0x72,
+ 0x03, 0x2c, 0xe1, 0xce, 0xbf, 0x90, 0x38, 0x17, 0x66, 0x49,
+ 0x84, 0xab, 0xda, 0xf5, 0x11, 0x3e, 0x4f, 0x60, 0xad, 0x82,
+ 0xf3, 0xdc, 0x74, 0x5b, 0x2a, 0x05, 0xc8, 0xe7, 0x96, 0xb9,
+ 0xdb, 0xf4, 0x85, 0xaa, 0x67, 0x48, 0x39, 0x16, 0xbe, 0x91,
+ 0xe0, 0xcf, 0x02, 0x2d, 0x5c, 0x73, 0x98, 0xb7, 0xc6, 0xe9,
+ 0x24, 0x0b, 0x7a, 0x55, 0xfd, 0xd2, 0xa3, 0x8c, 0x41, 0x6e,
+ 0x1f, 0x30, 0x52, 0x7d, 0x0c, 0x23, 0xee, 0xc1, 0xb0, 0x9f,
+ 0x37, 0x18, 0x69, 0x46, 0x8b, 0xa4, 0xd5, 0xfa, 0x00, 0x30,
+ 0x60, 0x50, 0xc0, 0xf0, 0xa0, 0x90, 0x9d, 0xad, 0xfd, 0xcd,
+ 0x5d, 0x6d, 0x3d, 0x0d, 0x27, 0x17, 0x47, 0x77, 0xe7, 0xd7,
+ 0x87, 0xb7, 0xba, 0x8a, 0xda, 0xea, 0x7a, 0x4a, 0x1a, 0x2a,
+ 0x4e, 0x7e, 0x2e, 0x1e, 0x8e, 0xbe, 0xee, 0xde, 0xd3, 0xe3,
+ 0xb3, 0x83, 0x13, 0x23, 0x73, 0x43, 0x69, 0x59, 0x09, 0x39,
+ 0xa9, 0x99, 0xc9, 0xf9, 0xf4, 0xc4, 0x94, 0xa4, 0x34, 0x04,
+ 0x54, 0x64, 0x9c, 0xac, 0xfc, 0xcc, 0x5c, 0x6c, 0x3c, 0x0c,
+ 0x01, 0x31, 0x61, 0x51, 0xc1, 0xf1, 0xa1, 0x91, 0xbb, 0x8b,
+ 0xdb, 0xeb, 0x7b, 0x4b, 0x1b, 0x2b, 0x26, 0x16, 0x46, 0x76,
+ 0xe6, 0xd6, 0x86, 0xb6, 0xd2, 0xe2, 0xb2, 0x82, 0x12, 0x22,
+ 0x72, 0x42, 0x4f, 0x7f, 0x2f, 0x1f, 0x8f, 0xbf, 0xef, 0xdf,
+ 0xf5, 0xc5, 0x95, 0xa5, 0x35, 0x05, 0x55, 0x65, 0x68, 0x58,
+ 0x08, 0x38, 0xa8, 0x98, 0xc8, 0xf8, 0x25, 0x15, 0x45, 0x75,
+ 0xe5, 0xd5, 0x85, 0xb5, 0xb8, 0x88, 0xd8, 0xe8, 0x78, 0x48,
+ 0x18, 0x28, 0x02, 0x32, 0x62, 0x52, 0xc2, 0xf2, 0xa2, 0x92,
+ 0x9f, 0xaf, 0xff, 0xcf, 0x5f, 0x6f, 0x3f, 0x0f, 0x6b, 0x5b,
+ 0x0b, 0x3b, 0xab, 0x9b, 0xcb, 0xfb, 0xf6, 0xc6, 0x96, 0xa6,
+ 0x36, 0x06, 0x56, 0x66, 0x4c, 0x7c, 0x2c, 0x1c, 0x8c, 0xbc,
+ 0xec, 0xdc, 0xd1, 0xe1, 0xb1, 0x81, 0x11, 0x21, 0x71, 0x41,
+ 0xb9, 0x89, 0xd9, 0xe9, 0x79, 0x49, 0x19, 0x29, 0x24, 0x14,
+ 0x44, 0x74, 0xe4, 0xd4, 0x84, 0xb4, 0x9e, 0xae, 0xfe, 0xce,
+ 0x5e, 0x6e, 0x3e, 0x0e, 0x03, 0x33, 0x63, 0x53, 0xc3, 0xf3,
+ 0xa3, 0x93, 0xf7, 0xc7, 0x97, 0xa7, 0x37, 0x07, 0x57, 0x67,
+ 0x6a, 0x5a, 0x0a, 0x3a, 0xaa, 0x9a, 0xca, 0xfa, 0xd0, 0xe0,
+ 0xb0, 0x80, 0x10, 0x20, 0x70, 0x40, 0x4d, 0x7d, 0x2d, 0x1d,
+ 0x8d, 0xbd, 0xed, 0xdd, 0x00, 0x31, 0x62, 0x53, 0xc4, 0xf5,
+ 0xa6, 0x97, 0x95, 0xa4, 0xf7, 0xc6, 0x51, 0x60, 0x33, 0x02,
+ 0x37, 0x06, 0x55, 0x64, 0xf3, 0xc2, 0x91, 0xa0, 0xa2, 0x93,
+ 0xc0, 0xf1, 0x66, 0x57, 0x04, 0x35, 0x6e, 0x5f, 0x0c, 0x3d,
+ 0xaa, 0x9b, 0xc8, 0xf9, 0xfb, 0xca, 0x99, 0xa8, 0x3f, 0x0e,
+ 0x5d, 0x6c, 0x59, 0x68, 0x3b, 0x0a, 0x9d, 0xac, 0xff, 0xce,
+ 0xcc, 0xfd, 0xae, 0x9f, 0x08, 0x39, 0x6a, 0x5b, 0xdc, 0xed,
+ 0xbe, 0x8f, 0x18, 0x29, 0x7a, 0x4b, 0x49, 0x78, 0x2b, 0x1a,
+ 0x8d, 0xbc, 0xef, 0xde, 0xeb, 0xda, 0x89, 0xb8, 0x2f, 0x1e,
+ 0x4d, 0x7c, 0x7e, 0x4f, 0x1c, 0x2d, 0xba, 0x8b, 0xd8, 0xe9,
+ 0xb2, 0x83, 0xd0, 0xe1, 0x76, 0x47, 0x14, 0x25, 0x27, 0x16,
+ 0x45, 0x74, 0xe3, 0xd2, 0x81, 0xb0, 0x85, 0xb4, 0xe7, 0xd6,
+ 0x41, 0x70, 0x23, 0x12, 0x10, 0x21, 0x72, 0x43, 0xd4, 0xe5,
+ 0xb6, 0x87, 0xa5, 0x94, 0xc7, 0xf6, 0x61, 0x50, 0x03, 0x32,
+ 0x30, 0x01, 0x52, 0x63, 0xf4, 0xc5, 0x96, 0xa7, 0x92, 0xa3,
+ 0xf0, 0xc1, 0x56, 0x67, 0x34, 0x05, 0x07, 0x36, 0x65, 0x54,
+ 0xc3, 0xf2, 0xa1, 0x90, 0xcb, 0xfa, 0xa9, 0x98, 0x0f, 0x3e,
+ 0x6d, 0x5c, 0x5e, 0x6f, 0x3c, 0x0d, 0x9a, 0xab, 0xf8, 0xc9,
+ 0xfc, 0xcd, 0x9e, 0xaf, 0x38, 0x09, 0x5a, 0x6b, 0x69, 0x58,
+ 0x0b, 0x3a, 0xad, 0x9c, 0xcf, 0xfe, 0x79, 0x48, 0x1b, 0x2a,
+ 0xbd, 0x8c, 0xdf, 0xee, 0xec, 0xdd, 0x8e, 0xbf, 0x28, 0x19,
+ 0x4a, 0x7b, 0x4e, 0x7f, 0x2c, 0x1d, 0x8a, 0xbb, 0xe8, 0xd9,
+ 0xdb, 0xea, 0xb9, 0x88, 0x1f, 0x2e, 0x7d, 0x4c, 0x17, 0x26,
+ 0x75, 0x44, 0xd3, 0xe2, 0xb1, 0x80, 0x82, 0xb3, 0xe0, 0xd1,
+ 0x46, 0x77, 0x24, 0x15, 0x20, 0x11, 0x42, 0x73, 0xe4, 0xd5,
+ 0x86, 0xb7, 0xb5, 0x84, 0xd7, 0xe6, 0x71, 0x40, 0x13, 0x22,
+ 0x00, 0x32, 0x64, 0x56, 0xc8, 0xfa, 0xac, 0x9e, 0x8d, 0xbf,
+ 0xe9, 0xdb, 0x45, 0x77, 0x21, 0x13, 0x07, 0x35, 0x63, 0x51,
+ 0xcf, 0xfd, 0xab, 0x99, 0x8a, 0xb8, 0xee, 0xdc, 0x42, 0x70,
+ 0x26, 0x14, 0x0e, 0x3c, 0x6a, 0x58, 0xc6, 0xf4, 0xa2, 0x90,
+ 0x83, 0xb1, 0xe7, 0xd5, 0x4b, 0x79, 0x2f, 0x1d, 0x09, 0x3b,
+ 0x6d, 0x5f, 0xc1, 0xf3, 0xa5, 0x97, 0x84, 0xb6, 0xe0, 0xd2,
+ 0x4c, 0x7e, 0x28, 0x1a, 0x1c, 0x2e, 0x78, 0x4a, 0xd4, 0xe6,
+ 0xb0, 0x82, 0x91, 0xa3, 0xf5, 0xc7, 0x59, 0x6b, 0x3d, 0x0f,
+ 0x1b, 0x29, 0x7f, 0x4d, 0xd3, 0xe1, 0xb7, 0x85, 0x96, 0xa4,
+ 0xf2, 0xc0, 0x5e, 0x6c, 0x3a, 0x08, 0x12, 0x20, 0x76, 0x44,
+ 0xda, 0xe8, 0xbe, 0x8c, 0x9f, 0xad, 0xfb, 0xc9, 0x57, 0x65,
+ 0x33, 0x01, 0x15, 0x27, 0x71, 0x43, 0xdd, 0xef, 0xb9, 0x8b,
+ 0x98, 0xaa, 0xfc, 0xce, 0x50, 0x62, 0x34, 0x06, 0x38, 0x0a,
+ 0x5c, 0x6e, 0xf0, 0xc2, 0x94, 0xa6, 0xb5, 0x87, 0xd1, 0xe3,
+ 0x7d, 0x4f, 0x19, 0x2b, 0x3f, 0x0d, 0x5b, 0x69, 0xf7, 0xc5,
+ 0x93, 0xa1, 0xb2, 0x80, 0xd6, 0xe4, 0x7a, 0x48, 0x1e, 0x2c,
+ 0x36, 0x04, 0x52, 0x60, 0xfe, 0xcc, 0x9a, 0xa8, 0xbb, 0x89,
+ 0xdf, 0xed, 0x73, 0x41, 0x17, 0x25, 0x31, 0x03, 0x55, 0x67,
+ 0xf9, 0xcb, 0x9d, 0xaf, 0xbc, 0x8e, 0xd8, 0xea, 0x74, 0x46,
+ 0x10, 0x22, 0x24, 0x16, 0x40, 0x72, 0xec, 0xde, 0x88, 0xba,
+ 0xa9, 0x9b, 0xcd, 0xff, 0x61, 0x53, 0x05, 0x37, 0x23, 0x11,
+ 0x47, 0x75, 0xeb, 0xd9, 0x8f, 0xbd, 0xae, 0x9c, 0xca, 0xf8,
+ 0x66, 0x54, 0x02, 0x30, 0x2a, 0x18, 0x4e, 0x7c, 0xe2, 0xd0,
+ 0x86, 0xb4, 0xa7, 0x95, 0xc3, 0xf1, 0x6f, 0x5d, 0x0b, 0x39,
+ 0x2d, 0x1f, 0x49, 0x7b, 0xe5, 0xd7, 0x81, 0xb3, 0xa0, 0x92,
+ 0xc4, 0xf6, 0x68, 0x5a, 0x0c, 0x3e, 0x00, 0x33, 0x66, 0x55,
+ 0xcc, 0xff, 0xaa, 0x99, 0x85, 0xb6, 0xe3, 0xd0, 0x49, 0x7a,
+ 0x2f, 0x1c, 0x17, 0x24, 0x71, 0x42, 0xdb, 0xe8, 0xbd, 0x8e,
+ 0x92, 0xa1, 0xf4, 0xc7, 0x5e, 0x6d, 0x38, 0x0b, 0x2e, 0x1d,
+ 0x48, 0x7b, 0xe2, 0xd1, 0x84, 0xb7, 0xab, 0x98, 0xcd, 0xfe,
+ 0x67, 0x54, 0x01, 0x32, 0x39, 0x0a, 0x5f, 0x6c, 0xf5, 0xc6,
+ 0x93, 0xa0, 0xbc, 0x8f, 0xda, 0xe9, 0x70, 0x43, 0x16, 0x25,
+ 0x5c, 0x6f, 0x3a, 0x09, 0x90, 0xa3, 0xf6, 0xc5, 0xd9, 0xea,
+ 0xbf, 0x8c, 0x15, 0x26, 0x73, 0x40, 0x4b, 0x78, 0x2d, 0x1e,
+ 0x87, 0xb4, 0xe1, 0xd2, 0xce, 0xfd, 0xa8, 0x9b, 0x02, 0x31,
+ 0x64, 0x57, 0x72, 0x41, 0x14, 0x27, 0xbe, 0x8d, 0xd8, 0xeb,
+ 0xf7, 0xc4, 0x91, 0xa2, 0x3b, 0x08, 0x5d, 0x6e, 0x65, 0x56,
+ 0x03, 0x30, 0xa9, 0x9a, 0xcf, 0xfc, 0xe0, 0xd3, 0x86, 0xb5,
+ 0x2c, 0x1f, 0x4a, 0x79, 0xb8, 0x8b, 0xde, 0xed, 0x74, 0x47,
+ 0x12, 0x21, 0x3d, 0x0e, 0x5b, 0x68, 0xf1, 0xc2, 0x97, 0xa4,
+ 0xaf, 0x9c, 0xc9, 0xfa, 0x63, 0x50, 0x05, 0x36, 0x2a, 0x19,
+ 0x4c, 0x7f, 0xe6, 0xd5, 0x80, 0xb3, 0x96, 0xa5, 0xf0, 0xc3,
+ 0x5a, 0x69, 0x3c, 0x0f, 0x13, 0x20, 0x75, 0x46, 0xdf, 0xec,
+ 0xb9, 0x8a, 0x81, 0xb2, 0xe7, 0xd4, 0x4d, 0x7e, 0x2b, 0x18,
+ 0x04, 0x37, 0x62, 0x51, 0xc8, 0xfb, 0xae, 0x9d, 0xe4, 0xd7,
+ 0x82, 0xb1, 0x28, 0x1b, 0x4e, 0x7d, 0x61, 0x52, 0x07, 0x34,
+ 0xad, 0x9e, 0xcb, 0xf8, 0xf3, 0xc0, 0x95, 0xa6, 0x3f, 0x0c,
+ 0x59, 0x6a, 0x76, 0x45, 0x10, 0x23, 0xba, 0x89, 0xdc, 0xef,
+ 0xca, 0xf9, 0xac, 0x9f, 0x06, 0x35, 0x60, 0x53, 0x4f, 0x7c,
+ 0x29, 0x1a, 0x83, 0xb0, 0xe5, 0xd6, 0xdd, 0xee, 0xbb, 0x88,
+ 0x11, 0x22, 0x77, 0x44, 0x58, 0x6b, 0x3e, 0x0d, 0x94, 0xa7,
+ 0xf2, 0xc1, 0x00, 0x34, 0x68, 0x5c, 0xd0, 0xe4, 0xb8, 0x8c,
+ 0xbd, 0x89, 0xd5, 0xe1, 0x6d, 0x59, 0x05, 0x31, 0x67, 0x53,
+ 0x0f, 0x3b, 0xb7, 0x83, 0xdf, 0xeb, 0xda, 0xee, 0xb2, 0x86,
+ 0x0a, 0x3e, 0x62, 0x56, 0xce, 0xfa, 0xa6, 0x92, 0x1e, 0x2a,
+ 0x76, 0x42, 0x73, 0x47, 0x1b, 0x2f, 0xa3, 0x97, 0xcb, 0xff,
+ 0xa9, 0x9d, 0xc1, 0xf5, 0x79, 0x4d, 0x11, 0x25, 0x14, 0x20,
+ 0x7c, 0x48, 0xc4, 0xf0, 0xac, 0x98, 0x81, 0xb5, 0xe9, 0xdd,
+ 0x51, 0x65, 0x39, 0x0d, 0x3c, 0x08, 0x54, 0x60, 0xec, 0xd8,
+ 0x84, 0xb0, 0xe6, 0xd2, 0x8e, 0xba, 0x36, 0x02, 0x5e, 0x6a,
+ 0x5b, 0x6f, 0x33, 0x07, 0x8b, 0xbf, 0xe3, 0xd7, 0x4f, 0x7b,
+ 0x27, 0x13, 0x9f, 0xab, 0xf7, 0xc3, 0xf2, 0xc6, 0x9a, 0xae,
+ 0x22, 0x16, 0x4a, 0x7e, 0x28, 0x1c, 0x40, 0x74, 0xf8, 0xcc,
+ 0x90, 0xa4, 0x95, 0xa1, 0xfd, 0xc9, 0x45, 0x71, 0x2d, 0x19,
+ 0x1f, 0x2b, 0x77, 0x43, 0xcf, 0xfb, 0xa7, 0x93, 0xa2, 0x96,
+ 0xca, 0xfe, 0x72, 0x46, 0x1a, 0x2e, 0x78, 0x4c, 0x10, 0x24,
+ 0xa8, 0x9c, 0xc0, 0xf4, 0xc5, 0xf1, 0xad, 0x99, 0x15, 0x21,
+ 0x7d, 0x49, 0xd1, 0xe5, 0xb9, 0x8d, 0x01, 0x35, 0x69, 0x5d,
+ 0x6c, 0x58, 0x04, 0x30, 0xbc, 0x88, 0xd4, 0xe0, 0xb6, 0x82,
+ 0xde, 0xea, 0x66, 0x52, 0x0e, 0x3a, 0x0b, 0x3f, 0x63, 0x57,
+ 0xdb, 0xef, 0xb3, 0x87, 0x9e, 0xaa, 0xf6, 0xc2, 0x4e, 0x7a,
+ 0x26, 0x12, 0x23, 0x17, 0x4b, 0x7f, 0xf3, 0xc7, 0x9b, 0xaf,
+ 0xf9, 0xcd, 0x91, 0xa5, 0x29, 0x1d, 0x41, 0x75, 0x44, 0x70,
+ 0x2c, 0x18, 0x94, 0xa0, 0xfc, 0xc8, 0x50, 0x64, 0x38, 0x0c,
+ 0x80, 0xb4, 0xe8, 0xdc, 0xed, 0xd9, 0x85, 0xb1, 0x3d, 0x09,
+ 0x55, 0x61, 0x37, 0x03, 0x5f, 0x6b, 0xe7, 0xd3, 0x8f, 0xbb,
+ 0x8a, 0xbe, 0xe2, 0xd6, 0x5a, 0x6e, 0x32, 0x06, 0x00, 0x35,
+ 0x6a, 0x5f, 0xd4, 0xe1, 0xbe, 0x8b, 0xb5, 0x80, 0xdf, 0xea,
+ 0x61, 0x54, 0x0b, 0x3e, 0x77, 0x42, 0x1d, 0x28, 0xa3, 0x96,
+ 0xc9, 0xfc, 0xc2, 0xf7, 0xa8, 0x9d, 0x16, 0x23, 0x7c, 0x49,
+ 0xee, 0xdb, 0x84, 0xb1, 0x3a, 0x0f, 0x50, 0x65, 0x5b, 0x6e,
+ 0x31, 0x04, 0x8f, 0xba, 0xe5, 0xd0, 0x99, 0xac, 0xf3, 0xc6,
+ 0x4d, 0x78, 0x27, 0x12, 0x2c, 0x19, 0x46, 0x73, 0xf8, 0xcd,
+ 0x92, 0xa7, 0xc1, 0xf4, 0xab, 0x9e, 0x15, 0x20, 0x7f, 0x4a,
+ 0x74, 0x41, 0x1e, 0x2b, 0xa0, 0x95, 0xca, 0xff, 0xb6, 0x83,
+ 0xdc, 0xe9, 0x62, 0x57, 0x08, 0x3d, 0x03, 0x36, 0x69, 0x5c,
+ 0xd7, 0xe2, 0xbd, 0x88, 0x2f, 0x1a, 0x45, 0x70, 0xfb, 0xce,
+ 0x91, 0xa4, 0x9a, 0xaf, 0xf0, 0xc5, 0x4e, 0x7b, 0x24, 0x11,
+ 0x58, 0x6d, 0x32, 0x07, 0x8c, 0xb9, 0xe6, 0xd3, 0xed, 0xd8,
+ 0x87, 0xb2, 0x39, 0x0c, 0x53, 0x66, 0x9f, 0xaa, 0xf5, 0xc0,
+ 0x4b, 0x7e, 0x21, 0x14, 0x2a, 0x1f, 0x40, 0x75, 0xfe, 0xcb,
+ 0x94, 0xa1, 0xe8, 0xdd, 0x82, 0xb7, 0x3c, 0x09, 0x56, 0x63,
+ 0x5d, 0x68, 0x37, 0x02, 0x89, 0xbc, 0xe3, 0xd6, 0x71, 0x44,
+ 0x1b, 0x2e, 0xa5, 0x90, 0xcf, 0xfa, 0xc4, 0xf1, 0xae, 0x9b,
+ 0x10, 0x25, 0x7a, 0x4f, 0x06, 0x33, 0x6c, 0x59, 0xd2, 0xe7,
+ 0xb8, 0x8d, 0xb3, 0x86, 0xd9, 0xec, 0x67, 0x52, 0x0d, 0x38,
+ 0x5e, 0x6b, 0x34, 0x01, 0x8a, 0xbf, 0xe0, 0xd5, 0xeb, 0xde,
+ 0x81, 0xb4, 0x3f, 0x0a, 0x55, 0x60, 0x29, 0x1c, 0x43, 0x76,
+ 0xfd, 0xc8, 0x97, 0xa2, 0x9c, 0xa9, 0xf6, 0xc3, 0x48, 0x7d,
+ 0x22, 0x17, 0xb0, 0x85, 0xda, 0xef, 0x64, 0x51, 0x0e, 0x3b,
+ 0x05, 0x30, 0x6f, 0x5a, 0xd1, 0xe4, 0xbb, 0x8e, 0xc7, 0xf2,
+ 0xad, 0x98, 0x13, 0x26, 0x79, 0x4c, 0x72, 0x47, 0x18, 0x2d,
+ 0xa6, 0x93, 0xcc, 0xf9, 0x00, 0x36, 0x6c, 0x5a, 0xd8, 0xee,
+ 0xb4, 0x82, 0xad, 0x9b, 0xc1, 0xf7, 0x75, 0x43, 0x19, 0x2f,
+ 0x47, 0x71, 0x2b, 0x1d, 0x9f, 0xa9, 0xf3, 0xc5, 0xea, 0xdc,
+ 0x86, 0xb0, 0x32, 0x04, 0x5e, 0x68, 0x8e, 0xb8, 0xe2, 0xd4,
+ 0x56, 0x60, 0x3a, 0x0c, 0x23, 0x15, 0x4f, 0x79, 0xfb, 0xcd,
+ 0x97, 0xa1, 0xc9, 0xff, 0xa5, 0x93, 0x11, 0x27, 0x7d, 0x4b,
+ 0x64, 0x52, 0x08, 0x3e, 0xbc, 0x8a, 0xd0, 0xe6, 0x01, 0x37,
+ 0x6d, 0x5b, 0xd9, 0xef, 0xb5, 0x83, 0xac, 0x9a, 0xc0, 0xf6,
+ 0x74, 0x42, 0x18, 0x2e, 0x46, 0x70, 0x2a, 0x1c, 0x9e, 0xa8,
+ 0xf2, 0xc4, 0xeb, 0xdd, 0x87, 0xb1, 0x33, 0x05, 0x5f, 0x69,
+ 0x8f, 0xb9, 0xe3, 0xd5, 0x57, 0x61, 0x3b, 0x0d, 0x22, 0x14,
+ 0x4e, 0x78, 0xfa, 0xcc, 0x96, 0xa0, 0xc8, 0xfe, 0xa4, 0x92,
+ 0x10, 0x26, 0x7c, 0x4a, 0x65, 0x53, 0x09, 0x3f, 0xbd, 0x8b,
+ 0xd1, 0xe7, 0x02, 0x34, 0x6e, 0x58, 0xda, 0xec, 0xb6, 0x80,
+ 0xaf, 0x99, 0xc3, 0xf5, 0x77, 0x41, 0x1b, 0x2d, 0x45, 0x73,
+ 0x29, 0x1f, 0x9d, 0xab, 0xf1, 0xc7, 0xe8, 0xde, 0x84, 0xb2,
+ 0x30, 0x06, 0x5c, 0x6a, 0x8c, 0xba, 0xe0, 0xd6, 0x54, 0x62,
+ 0x38, 0x0e, 0x21, 0x17, 0x4d, 0x7b, 0xf9, 0xcf, 0x95, 0xa3,
+ 0xcb, 0xfd, 0xa7, 0x91, 0x13, 0x25, 0x7f, 0x49, 0x66, 0x50,
+ 0x0a, 0x3c, 0xbe, 0x88, 0xd2, 0xe4, 0x03, 0x35, 0x6f, 0x59,
+ 0xdb, 0xed, 0xb7, 0x81, 0xae, 0x98, 0xc2, 0xf4, 0x76, 0x40,
+ 0x1a, 0x2c, 0x44, 0x72, 0x28, 0x1e, 0x9c, 0xaa, 0xf0, 0xc6,
+ 0xe9, 0xdf, 0x85, 0xb3, 0x31, 0x07, 0x5d, 0x6b, 0x8d, 0xbb,
+ 0xe1, 0xd7, 0x55, 0x63, 0x39, 0x0f, 0x20, 0x16, 0x4c, 0x7a,
+ 0xf8, 0xce, 0x94, 0xa2, 0xca, 0xfc, 0xa6, 0x90, 0x12, 0x24,
+ 0x7e, 0x48, 0x67, 0x51, 0x0b, 0x3d, 0xbf, 0x89, 0xd3, 0xe5,
+ 0x00, 0x37, 0x6e, 0x59, 0xdc, 0xeb, 0xb2, 0x85, 0xa5, 0x92,
+ 0xcb, 0xfc, 0x79, 0x4e, 0x17, 0x20, 0x57, 0x60, 0x39, 0x0e,
+ 0x8b, 0xbc, 0xe5, 0xd2, 0xf2, 0xc5, 0x9c, 0xab, 0x2e, 0x19,
+ 0x40, 0x77, 0xae, 0x99, 0xc0, 0xf7, 0x72, 0x45, 0x1c, 0x2b,
+ 0x0b, 0x3c, 0x65, 0x52, 0xd7, 0xe0, 0xb9, 0x8e, 0xf9, 0xce,
+ 0x97, 0xa0, 0x25, 0x12, 0x4b, 0x7c, 0x5c, 0x6b, 0x32, 0x05,
+ 0x80, 0xb7, 0xee, 0xd9, 0x41, 0x76, 0x2f, 0x18, 0x9d, 0xaa,
+ 0xf3, 0xc4, 0xe4, 0xd3, 0x8a, 0xbd, 0x38, 0x0f, 0x56, 0x61,
+ 0x16, 0x21, 0x78, 0x4f, 0xca, 0xfd, 0xa4, 0x93, 0xb3, 0x84,
+ 0xdd, 0xea, 0x6f, 0x58, 0x01, 0x36, 0xef, 0xd8, 0x81, 0xb6,
+ 0x33, 0x04, 0x5d, 0x6a, 0x4a, 0x7d, 0x24, 0x13, 0x96, 0xa1,
+ 0xf8, 0xcf, 0xb8, 0x8f, 0xd6, 0xe1, 0x64, 0x53, 0x0a, 0x3d,
+ 0x1d, 0x2a, 0x73, 0x44, 0xc1, 0xf6, 0xaf, 0x98, 0x82, 0xb5,
+ 0xec, 0xdb, 0x5e, 0x69, 0x30, 0x07, 0x27, 0x10, 0x49, 0x7e,
+ 0xfb, 0xcc, 0x95, 0xa2, 0xd5, 0xe2, 0xbb, 0x8c, 0x09, 0x3e,
+ 0x67, 0x50, 0x70, 0x47, 0x1e, 0x29, 0xac, 0x9b, 0xc2, 0xf5,
+ 0x2c, 0x1b, 0x42, 0x75, 0xf0, 0xc7, 0x9e, 0xa9, 0x89, 0xbe,
+ 0xe7, 0xd0, 0x55, 0x62, 0x3b, 0x0c, 0x7b, 0x4c, 0x15, 0x22,
+ 0xa7, 0x90, 0xc9, 0xfe, 0xde, 0xe9, 0xb0, 0x87, 0x02, 0x35,
+ 0x6c, 0x5b, 0xc3, 0xf4, 0xad, 0x9a, 0x1f, 0x28, 0x71, 0x46,
+ 0x66, 0x51, 0x08, 0x3f, 0xba, 0x8d, 0xd4, 0xe3, 0x94, 0xa3,
+ 0xfa, 0xcd, 0x48, 0x7f, 0x26, 0x11, 0x31, 0x06, 0x5f, 0x68,
+ 0xed, 0xda, 0x83, 0xb4, 0x6d, 0x5a, 0x03, 0x34, 0xb1, 0x86,
+ 0xdf, 0xe8, 0xc8, 0xff, 0xa6, 0x91, 0x14, 0x23, 0x7a, 0x4d,
+ 0x3a, 0x0d, 0x54, 0x63, 0xe6, 0xd1, 0x88, 0xbf, 0x9f, 0xa8,
+ 0xf1, 0xc6, 0x43, 0x74, 0x2d, 0x1a, 0x00, 0x38, 0x70, 0x48,
+ 0xe0, 0xd8, 0x90, 0xa8, 0xdd, 0xe5, 0xad, 0x95, 0x3d, 0x05,
+ 0x4d, 0x75, 0xa7, 0x9f, 0xd7, 0xef, 0x47, 0x7f, 0x37, 0x0f,
+ 0x7a, 0x42, 0x0a, 0x32, 0x9a, 0xa2, 0xea, 0xd2, 0x53, 0x6b,
+ 0x23, 0x1b, 0xb3, 0x8b, 0xc3, 0xfb, 0x8e, 0xb6, 0xfe, 0xc6,
+ 0x6e, 0x56, 0x1e, 0x26, 0xf4, 0xcc, 0x84, 0xbc, 0x14, 0x2c,
+ 0x64, 0x5c, 0x29, 0x11, 0x59, 0x61, 0xc9, 0xf1, 0xb9, 0x81,
+ 0xa6, 0x9e, 0xd6, 0xee, 0x46, 0x7e, 0x36, 0x0e, 0x7b, 0x43,
+ 0x0b, 0x33, 0x9b, 0xa3, 0xeb, 0xd3, 0x01, 0x39, 0x71, 0x49,
+ 0xe1, 0xd9, 0x91, 0xa9, 0xdc, 0xe4, 0xac, 0x94, 0x3c, 0x04,
+ 0x4c, 0x74, 0xf5, 0xcd, 0x85, 0xbd, 0x15, 0x2d, 0x65, 0x5d,
+ 0x28, 0x10, 0x58, 0x60, 0xc8, 0xf0, 0xb8, 0x80, 0x52, 0x6a,
+ 0x22, 0x1a, 0xb2, 0x8a, 0xc2, 0xfa, 0x8f, 0xb7, 0xff, 0xc7,
+ 0x6f, 0x57, 0x1f, 0x27, 0x51, 0x69, 0x21, 0x19, 0xb1, 0x89,
+ 0xc1, 0xf9, 0x8c, 0xb4, 0xfc, 0xc4, 0x6c, 0x54, 0x1c, 0x24,
+ 0xf6, 0xce, 0x86, 0xbe, 0x16, 0x2e, 0x66, 0x5e, 0x2b, 0x13,
+ 0x5b, 0x63, 0xcb, 0xf3, 0xbb, 0x83, 0x02, 0x3a, 0x72, 0x4a,
+ 0xe2, 0xda, 0x92, 0xaa, 0xdf, 0xe7, 0xaf, 0x97, 0x3f, 0x07,
+ 0x4f, 0x77, 0xa5, 0x9d, 0xd5, 0xed, 0x45, 0x7d, 0x35, 0x0d,
+ 0x78, 0x40, 0x08, 0x30, 0x98, 0xa0, 0xe8, 0xd0, 0xf7, 0xcf,
+ 0x87, 0xbf, 0x17, 0x2f, 0x67, 0x5f, 0x2a, 0x12, 0x5a, 0x62,
+ 0xca, 0xf2, 0xba, 0x82, 0x50, 0x68, 0x20, 0x18, 0xb0, 0x88,
+ 0xc0, 0xf8, 0x8d, 0xb5, 0xfd, 0xc5, 0x6d, 0x55, 0x1d, 0x25,
+ 0xa4, 0x9c, 0xd4, 0xec, 0x44, 0x7c, 0x34, 0x0c, 0x79, 0x41,
+ 0x09, 0x31, 0x99, 0xa1, 0xe9, 0xd1, 0x03, 0x3b, 0x73, 0x4b,
+ 0xe3, 0xdb, 0x93, 0xab, 0xde, 0xe6, 0xae, 0x96, 0x3e, 0x06,
+ 0x4e, 0x76, 0x00, 0x39, 0x72, 0x4b, 0xe4, 0xdd, 0x96, 0xaf,
+ 0xd5, 0xec, 0xa7, 0x9e, 0x31, 0x08, 0x43, 0x7a, 0xb7, 0x8e,
+ 0xc5, 0xfc, 0x53, 0x6a, 0x21, 0x18, 0x62, 0x5b, 0x10, 0x29,
+ 0x86, 0xbf, 0xf4, 0xcd, 0x73, 0x4a, 0x01, 0x38, 0x97, 0xae,
+ 0xe5, 0xdc, 0xa6, 0x9f, 0xd4, 0xed, 0x42, 0x7b, 0x30, 0x09,
+ 0xc4, 0xfd, 0xb6, 0x8f, 0x20, 0x19, 0x52, 0x6b, 0x11, 0x28,
+ 0x63, 0x5a, 0xf5, 0xcc, 0x87, 0xbe, 0xe6, 0xdf, 0x94, 0xad,
+ 0x02, 0x3b, 0x70, 0x49, 0x33, 0x0a, 0x41, 0x78, 0xd7, 0xee,
+ 0xa5, 0x9c, 0x51, 0x68, 0x23, 0x1a, 0xb5, 0x8c, 0xc7, 0xfe,
+ 0x84, 0xbd, 0xf6, 0xcf, 0x60, 0x59, 0x12, 0x2b, 0x95, 0xac,
+ 0xe7, 0xde, 0x71, 0x48, 0x03, 0x3a, 0x40, 0x79, 0x32, 0x0b,
+ 0xa4, 0x9d, 0xd6, 0xef, 0x22, 0x1b, 0x50, 0x69, 0xc6, 0xff,
+ 0xb4, 0x8d, 0xf7, 0xce, 0x85, 0xbc, 0x13, 0x2a, 0x61, 0x58,
+ 0xd1, 0xe8, 0xa3, 0x9a, 0x35, 0x0c, 0x47, 0x7e, 0x04, 0x3d,
+ 0x76, 0x4f, 0xe0, 0xd9, 0x92, 0xab, 0x66, 0x5f, 0x14, 0x2d,
+ 0x82, 0xbb, 0xf0, 0xc9, 0xb3, 0x8a, 0xc1, 0xf8, 0x57, 0x6e,
+ 0x25, 0x1c, 0xa2, 0x9b, 0xd0, 0xe9, 0x46, 0x7f, 0x34, 0x0d,
+ 0x77, 0x4e, 0x05, 0x3c, 0x93, 0xaa, 0xe1, 0xd8, 0x15, 0x2c,
+ 0x67, 0x5e, 0xf1, 0xc8, 0x83, 0xba, 0xc0, 0xf9, 0xb2, 0x8b,
+ 0x24, 0x1d, 0x56, 0x6f, 0x37, 0x0e, 0x45, 0x7c, 0xd3, 0xea,
+ 0xa1, 0x98, 0xe2, 0xdb, 0x90, 0xa9, 0x06, 0x3f, 0x74, 0x4d,
+ 0x80, 0xb9, 0xf2, 0xcb, 0x64, 0x5d, 0x16, 0x2f, 0x55, 0x6c,
+ 0x27, 0x1e, 0xb1, 0x88, 0xc3, 0xfa, 0x44, 0x7d, 0x36, 0x0f,
+ 0xa0, 0x99, 0xd2, 0xeb, 0x91, 0xa8, 0xe3, 0xda, 0x75, 0x4c,
+ 0x07, 0x3e, 0xf3, 0xca, 0x81, 0xb8, 0x17, 0x2e, 0x65, 0x5c,
+ 0x26, 0x1f, 0x54, 0x6d, 0xc2, 0xfb, 0xb0, 0x89, 0x00, 0x3a,
+ 0x74, 0x4e, 0xe8, 0xd2, 0x9c, 0xa6, 0xcd, 0xf7, 0xb9, 0x83,
+ 0x25, 0x1f, 0x51, 0x6b, 0x87, 0xbd, 0xf3, 0xc9, 0x6f, 0x55,
+ 0x1b, 0x21, 0x4a, 0x70, 0x3e, 0x04, 0xa2, 0x98, 0xd6, 0xec,
+ 0x13, 0x29, 0x67, 0x5d, 0xfb, 0xc1, 0x8f, 0xb5, 0xde, 0xe4,
+ 0xaa, 0x90, 0x36, 0x0c, 0x42, 0x78, 0x94, 0xae, 0xe0, 0xda,
+ 0x7c, 0x46, 0x08, 0x32, 0x59, 0x63, 0x2d, 0x17, 0xb1, 0x8b,
+ 0xc5, 0xff, 0x26, 0x1c, 0x52, 0x68, 0xce, 0xf4, 0xba, 0x80,
+ 0xeb, 0xd1, 0x9f, 0xa5, 0x03, 0x39, 0x77, 0x4d, 0xa1, 0x9b,
+ 0xd5, 0xef, 0x49, 0x73, 0x3d, 0x07, 0x6c, 0x56, 0x18, 0x22,
+ 0x84, 0xbe, 0xf0, 0xca, 0x35, 0x0f, 0x41, 0x7b, 0xdd, 0xe7,
+ 0xa9, 0x93, 0xf8, 0xc2, 0x8c, 0xb6, 0x10, 0x2a, 0x64, 0x5e,
+ 0xb2, 0x88, 0xc6, 0xfc, 0x5a, 0x60, 0x2e, 0x14, 0x7f, 0x45,
+ 0x0b, 0x31, 0x97, 0xad, 0xe3, 0xd9, 0x4c, 0x76, 0x38, 0x02,
+ 0xa4, 0x9e, 0xd0, 0xea, 0x81, 0xbb, 0xf5, 0xcf, 0x69, 0x53,
+ 0x1d, 0x27, 0xcb, 0xf1, 0xbf, 0x85, 0x23, 0x19, 0x57, 0x6d,
+ 0x06, 0x3c, 0x72, 0x48, 0xee, 0xd4, 0x9a, 0xa0, 0x5f, 0x65,
+ 0x2b, 0x11, 0xb7, 0x8d, 0xc3, 0xf9, 0x92, 0xa8, 0xe6, 0xdc,
+ 0x7a, 0x40, 0x0e, 0x34, 0xd8, 0xe2, 0xac, 0x96, 0x30, 0x0a,
+ 0x44, 0x7e, 0x15, 0x2f, 0x61, 0x5b, 0xfd, 0xc7, 0x89, 0xb3,
+ 0x6a, 0x50, 0x1e, 0x24, 0x82, 0xb8, 0xf6, 0xcc, 0xa7, 0x9d,
+ 0xd3, 0xe9, 0x4f, 0x75, 0x3b, 0x01, 0xed, 0xd7, 0x99, 0xa3,
+ 0x05, 0x3f, 0x71, 0x4b, 0x20, 0x1a, 0x54, 0x6e, 0xc8, 0xf2,
+ 0xbc, 0x86, 0x79, 0x43, 0x0d, 0x37, 0x91, 0xab, 0xe5, 0xdf,
+ 0xb4, 0x8e, 0xc0, 0xfa, 0x5c, 0x66, 0x28, 0x12, 0xfe, 0xc4,
+ 0x8a, 0xb0, 0x16, 0x2c, 0x62, 0x58, 0x33, 0x09, 0x47, 0x7d,
+ 0xdb, 0xe1, 0xaf, 0x95, 0x00, 0x3b, 0x76, 0x4d, 0xec, 0xd7,
+ 0x9a, 0xa1, 0xc5, 0xfe, 0xb3, 0x88, 0x29, 0x12, 0x5f, 0x64,
+ 0x97, 0xac, 0xe1, 0xda, 0x7b, 0x40, 0x0d, 0x36, 0x52, 0x69,
+ 0x24, 0x1f, 0xbe, 0x85, 0xc8, 0xf3, 0x33, 0x08, 0x45, 0x7e,
+ 0xdf, 0xe4, 0xa9, 0x92, 0xf6, 0xcd, 0x80, 0xbb, 0x1a, 0x21,
+ 0x6c, 0x57, 0xa4, 0x9f, 0xd2, 0xe9, 0x48, 0x73, 0x3e, 0x05,
+ 0x61, 0x5a, 0x17, 0x2c, 0x8d, 0xb6, 0xfb, 0xc0, 0x66, 0x5d,
+ 0x10, 0x2b, 0x8a, 0xb1, 0xfc, 0xc7, 0xa3, 0x98, 0xd5, 0xee,
+ 0x4f, 0x74, 0x39, 0x02, 0xf1, 0xca, 0x87, 0xbc, 0x1d, 0x26,
+ 0x6b, 0x50, 0x34, 0x0f, 0x42, 0x79, 0xd8, 0xe3, 0xae, 0x95,
+ 0x55, 0x6e, 0x23, 0x18, 0xb9, 0x82, 0xcf, 0xf4, 0x90, 0xab,
+ 0xe6, 0xdd, 0x7c, 0x47, 0x0a, 0x31, 0xc2, 0xf9, 0xb4, 0x8f,
+ 0x2e, 0x15, 0x58, 0x63, 0x07, 0x3c, 0x71, 0x4a, 0xeb, 0xd0,
+ 0x9d, 0xa6, 0xcc, 0xf7, 0xba, 0x81, 0x20, 0x1b, 0x56, 0x6d,
+ 0x09, 0x32, 0x7f, 0x44, 0xe5, 0xde, 0x93, 0xa8, 0x5b, 0x60,
+ 0x2d, 0x16, 0xb7, 0x8c, 0xc1, 0xfa, 0x9e, 0xa5, 0xe8, 0xd3,
+ 0x72, 0x49, 0x04, 0x3f, 0xff, 0xc4, 0x89, 0xb2, 0x13, 0x28,
+ 0x65, 0x5e, 0x3a, 0x01, 0x4c, 0x77, 0xd6, 0xed, 0xa0, 0x9b,
+ 0x68, 0x53, 0x1e, 0x25, 0x84, 0xbf, 0xf2, 0xc9, 0xad, 0x96,
+ 0xdb, 0xe0, 0x41, 0x7a, 0x37, 0x0c, 0xaa, 0x91, 0xdc, 0xe7,
+ 0x46, 0x7d, 0x30, 0x0b, 0x6f, 0x54, 0x19, 0x22, 0x83, 0xb8,
+ 0xf5, 0xce, 0x3d, 0x06, 0x4b, 0x70, 0xd1, 0xea, 0xa7, 0x9c,
+ 0xf8, 0xc3, 0x8e, 0xb5, 0x14, 0x2f, 0x62, 0x59, 0x99, 0xa2,
+ 0xef, 0xd4, 0x75, 0x4e, 0x03, 0x38, 0x5c, 0x67, 0x2a, 0x11,
+ 0xb0, 0x8b, 0xc6, 0xfd, 0x0e, 0x35, 0x78, 0x43, 0xe2, 0xd9,
+ 0x94, 0xaf, 0xcb, 0xf0, 0xbd, 0x86, 0x27, 0x1c, 0x51, 0x6a,
+ 0x00, 0x3c, 0x78, 0x44, 0xf0, 0xcc, 0x88, 0xb4, 0xfd, 0xc1,
+ 0x85, 0xb9, 0x0d, 0x31, 0x75, 0x49, 0xe7, 0xdb, 0x9f, 0xa3,
+ 0x17, 0x2b, 0x6f, 0x53, 0x1a, 0x26, 0x62, 0x5e, 0xea, 0xd6,
+ 0x92, 0xae, 0xd3, 0xef, 0xab, 0x97, 0x23, 0x1f, 0x5b, 0x67,
+ 0x2e, 0x12, 0x56, 0x6a, 0xde, 0xe2, 0xa6, 0x9a, 0x34, 0x08,
+ 0x4c, 0x70, 0xc4, 0xf8, 0xbc, 0x80, 0xc9, 0xf5, 0xb1, 0x8d,
+ 0x39, 0x05, 0x41, 0x7d, 0xbb, 0x87, 0xc3, 0xff, 0x4b, 0x77,
+ 0x33, 0x0f, 0x46, 0x7a, 0x3e, 0x02, 0xb6, 0x8a, 0xce, 0xf2,
+ 0x5c, 0x60, 0x24, 0x18, 0xac, 0x90, 0xd4, 0xe8, 0xa1, 0x9d,
+ 0xd9, 0xe5, 0x51, 0x6d, 0x29, 0x15, 0x68, 0x54, 0x10, 0x2c,
+ 0x98, 0xa4, 0xe0, 0xdc, 0x95, 0xa9, 0xed, 0xd1, 0x65, 0x59,
+ 0x1d, 0x21, 0x8f, 0xb3, 0xf7, 0xcb, 0x7f, 0x43, 0x07, 0x3b,
+ 0x72, 0x4e, 0x0a, 0x36, 0x82, 0xbe, 0xfa, 0xc6, 0x6b, 0x57,
+ 0x13, 0x2f, 0x9b, 0xa7, 0xe3, 0xdf, 0x96, 0xaa, 0xee, 0xd2,
+ 0x66, 0x5a, 0x1e, 0x22, 0x8c, 0xb0, 0xf4, 0xc8, 0x7c, 0x40,
+ 0x04, 0x38, 0x71, 0x4d, 0x09, 0x35, 0x81, 0xbd, 0xf9, 0xc5,
+ 0xb8, 0x84, 0xc0, 0xfc, 0x48, 0x74, 0x30, 0x0c, 0x45, 0x79,
+ 0x3d, 0x01, 0xb5, 0x89, 0xcd, 0xf1, 0x5f, 0x63, 0x27, 0x1b,
+ 0xaf, 0x93, 0xd7, 0xeb, 0xa2, 0x9e, 0xda, 0xe6, 0x52, 0x6e,
+ 0x2a, 0x16, 0xd0, 0xec, 0xa8, 0x94, 0x20, 0x1c, 0x58, 0x64,
+ 0x2d, 0x11, 0x55, 0x69, 0xdd, 0xe1, 0xa5, 0x99, 0x37, 0x0b,
+ 0x4f, 0x73, 0xc7, 0xfb, 0xbf, 0x83, 0xca, 0xf6, 0xb2, 0x8e,
+ 0x3a, 0x06, 0x42, 0x7e, 0x03, 0x3f, 0x7b, 0x47, 0xf3, 0xcf,
+ 0x8b, 0xb7, 0xfe, 0xc2, 0x86, 0xba, 0x0e, 0x32, 0x76, 0x4a,
+ 0xe4, 0xd8, 0x9c, 0xa0, 0x14, 0x28, 0x6c, 0x50, 0x19, 0x25,
+ 0x61, 0x5d, 0xe9, 0xd5, 0x91, 0xad, 0x00, 0x3d, 0x7a, 0x47,
+ 0xf4, 0xc9, 0x8e, 0xb3, 0xf5, 0xc8, 0x8f, 0xb2, 0x01, 0x3c,
+ 0x7b, 0x46, 0xf7, 0xca, 0x8d, 0xb0, 0x03, 0x3e, 0x79, 0x44,
+ 0x02, 0x3f, 0x78, 0x45, 0xf6, 0xcb, 0x8c, 0xb1, 0xf3, 0xce,
+ 0x89, 0xb4, 0x07, 0x3a, 0x7d, 0x40, 0x06, 0x3b, 0x7c, 0x41,
+ 0xf2, 0xcf, 0x88, 0xb5, 0x04, 0x39, 0x7e, 0x43, 0xf0, 0xcd,
+ 0x8a, 0xb7, 0xf1, 0xcc, 0x8b, 0xb6, 0x05, 0x38, 0x7f, 0x42,
+ 0xfb, 0xc6, 0x81, 0xbc, 0x0f, 0x32, 0x75, 0x48, 0x0e, 0x33,
+ 0x74, 0x49, 0xfa, 0xc7, 0x80, 0xbd, 0x0c, 0x31, 0x76, 0x4b,
+ 0xf8, 0xc5, 0x82, 0xbf, 0xf9, 0xc4, 0x83, 0xbe, 0x0d, 0x30,
+ 0x77, 0x4a, 0x08, 0x35, 0x72, 0x4f, 0xfc, 0xc1, 0x86, 0xbb,
+ 0xfd, 0xc0, 0x87, 0xba, 0x09, 0x34, 0x73, 0x4e, 0xff, 0xc2,
+ 0x85, 0xb8, 0x0b, 0x36, 0x71, 0x4c, 0x0a, 0x37, 0x70, 0x4d,
+ 0xfe, 0xc3, 0x84, 0xb9, 0xeb, 0xd6, 0x91, 0xac, 0x1f, 0x22,
+ 0x65, 0x58, 0x1e, 0x23, 0x64, 0x59, 0xea, 0xd7, 0x90, 0xad,
+ 0x1c, 0x21, 0x66, 0x5b, 0xe8, 0xd5, 0x92, 0xaf, 0xe9, 0xd4,
+ 0x93, 0xae, 0x1d, 0x20, 0x67, 0x5a, 0x18, 0x25, 0x62, 0x5f,
+ 0xec, 0xd1, 0x96, 0xab, 0xed, 0xd0, 0x97, 0xaa, 0x19, 0x24,
+ 0x63, 0x5e, 0xef, 0xd2, 0x95, 0xa8, 0x1b, 0x26, 0x61, 0x5c,
+ 0x1a, 0x27, 0x60, 0x5d, 0xee, 0xd3, 0x94, 0xa9, 0x10, 0x2d,
+ 0x6a, 0x57, 0xe4, 0xd9, 0x9e, 0xa3, 0xe5, 0xd8, 0x9f, 0xa2,
+ 0x11, 0x2c, 0x6b, 0x56, 0xe7, 0xda, 0x9d, 0xa0, 0x13, 0x2e,
+ 0x69, 0x54, 0x12, 0x2f, 0x68, 0x55, 0xe6, 0xdb, 0x9c, 0xa1,
+ 0xe3, 0xde, 0x99, 0xa4, 0x17, 0x2a, 0x6d, 0x50, 0x16, 0x2b,
+ 0x6c, 0x51, 0xe2, 0xdf, 0x98, 0xa5, 0x14, 0x29, 0x6e, 0x53,
+ 0xe0, 0xdd, 0x9a, 0xa7, 0xe1, 0xdc, 0x9b, 0xa6, 0x15, 0x28,
+ 0x6f, 0x52, 0x00, 0x3e, 0x7c, 0x42, 0xf8, 0xc6, 0x84, 0xba,
+ 0xed, 0xd3, 0x91, 0xaf, 0x15, 0x2b, 0x69, 0x57, 0xc7, 0xf9,
+ 0xbb, 0x85, 0x3f, 0x01, 0x43, 0x7d, 0x2a, 0x14, 0x56, 0x68,
+ 0xd2, 0xec, 0xae, 0x90, 0x93, 0xad, 0xef, 0xd1, 0x6b, 0x55,
+ 0x17, 0x29, 0x7e, 0x40, 0x02, 0x3c, 0x86, 0xb8, 0xfa, 0xc4,
+ 0x54, 0x6a, 0x28, 0x16, 0xac, 0x92, 0xd0, 0xee, 0xb9, 0x87,
+ 0xc5, 0xfb, 0x41, 0x7f, 0x3d, 0x03, 0x3b, 0x05, 0x47, 0x79,
+ 0xc3, 0xfd, 0xbf, 0x81, 0xd6, 0xe8, 0xaa, 0x94, 0x2e, 0x10,
+ 0x52, 0x6c, 0xfc, 0xc2, 0x80, 0xbe, 0x04, 0x3a, 0x78, 0x46,
+ 0x11, 0x2f, 0x6d, 0x53, 0xe9, 0xd7, 0x95, 0xab, 0xa8, 0x96,
+ 0xd4, 0xea, 0x50, 0x6e, 0x2c, 0x12, 0x45, 0x7b, 0x39, 0x07,
+ 0xbd, 0x83, 0xc1, 0xff, 0x6f, 0x51, 0x13, 0x2d, 0x97, 0xa9,
+ 0xeb, 0xd5, 0x82, 0xbc, 0xfe, 0xc0, 0x7a, 0x44, 0x06, 0x38,
+ 0x76, 0x48, 0x0a, 0x34, 0x8e, 0xb0, 0xf2, 0xcc, 0x9b, 0xa5,
+ 0xe7, 0xd9, 0x63, 0x5d, 0x1f, 0x21, 0xb1, 0x8f, 0xcd, 0xf3,
+ 0x49, 0x77, 0x35, 0x0b, 0x5c, 0x62, 0x20, 0x1e, 0xa4, 0x9a,
+ 0xd8, 0xe6, 0xe5, 0xdb, 0x99, 0xa7, 0x1d, 0x23, 0x61, 0x5f,
+ 0x08, 0x36, 0x74, 0x4a, 0xf0, 0xce, 0x8c, 0xb2, 0x22, 0x1c,
+ 0x5e, 0x60, 0xda, 0xe4, 0xa6, 0x98, 0xcf, 0xf1, 0xb3, 0x8d,
+ 0x37, 0x09, 0x4b, 0x75, 0x4d, 0x73, 0x31, 0x0f, 0xb5, 0x8b,
+ 0xc9, 0xf7, 0xa0, 0x9e, 0xdc, 0xe2, 0x58, 0x66, 0x24, 0x1a,
+ 0x8a, 0xb4, 0xf6, 0xc8, 0x72, 0x4c, 0x0e, 0x30, 0x67, 0x59,
+ 0x1b, 0x25, 0x9f, 0xa1, 0xe3, 0xdd, 0xde, 0xe0, 0xa2, 0x9c,
+ 0x26, 0x18, 0x5a, 0x64, 0x33, 0x0d, 0x4f, 0x71, 0xcb, 0xf5,
+ 0xb7, 0x89, 0x19, 0x27, 0x65, 0x5b, 0xe1, 0xdf, 0x9d, 0xa3,
+ 0xf4, 0xca, 0x88, 0xb6, 0x0c, 0x32, 0x70, 0x4e, 0x00, 0x3f,
+ 0x7e, 0x41, 0xfc, 0xc3, 0x82, 0xbd, 0xe5, 0xda, 0x9b, 0xa4,
+ 0x19, 0x26, 0x67, 0x58, 0xd7, 0xe8, 0xa9, 0x96, 0x2b, 0x14,
+ 0x55, 0x6a, 0x32, 0x0d, 0x4c, 0x73, 0xce, 0xf1, 0xb0, 0x8f,
+ 0xb3, 0x8c, 0xcd, 0xf2, 0x4f, 0x70, 0x31, 0x0e, 0x56, 0x69,
+ 0x28, 0x17, 0xaa, 0x95, 0xd4, 0xeb, 0x64, 0x5b, 0x1a, 0x25,
+ 0x98, 0xa7, 0xe6, 0xd9, 0x81, 0xbe, 0xff, 0xc0, 0x7d, 0x42,
+ 0x03, 0x3c, 0x7b, 0x44, 0x05, 0x3a, 0x87, 0xb8, 0xf9, 0xc6,
+ 0x9e, 0xa1, 0xe0, 0xdf, 0x62, 0x5d, 0x1c, 0x23, 0xac, 0x93,
+ 0xd2, 0xed, 0x50, 0x6f, 0x2e, 0x11, 0x49, 0x76, 0x37, 0x08,
+ 0xb5, 0x8a, 0xcb, 0xf4, 0xc8, 0xf7, 0xb6, 0x89, 0x34, 0x0b,
+ 0x4a, 0x75, 0x2d, 0x12, 0x53, 0x6c, 0xd1, 0xee, 0xaf, 0x90,
+ 0x1f, 0x20, 0x61, 0x5e, 0xe3, 0xdc, 0x9d, 0xa2, 0xfa, 0xc5,
+ 0x84, 0xbb, 0x06, 0x39, 0x78, 0x47, 0xf6, 0xc9, 0x88, 0xb7,
+ 0x0a, 0x35, 0x74, 0x4b, 0x13, 0x2c, 0x6d, 0x52, 0xef, 0xd0,
+ 0x91, 0xae, 0x21, 0x1e, 0x5f, 0x60, 0xdd, 0xe2, 0xa3, 0x9c,
+ 0xc4, 0xfb, 0xba, 0x85, 0x38, 0x07, 0x46, 0x79, 0x45, 0x7a,
+ 0x3b, 0x04, 0xb9, 0x86, 0xc7, 0xf8, 0xa0, 0x9f, 0xde, 0xe1,
+ 0x5c, 0x63, 0x22, 0x1d, 0x92, 0xad, 0xec, 0xd3, 0x6e, 0x51,
+ 0x10, 0x2f, 0x77, 0x48, 0x09, 0x36, 0x8b, 0xb4, 0xf5, 0xca,
+ 0x8d, 0xb2, 0xf3, 0xcc, 0x71, 0x4e, 0x0f, 0x30, 0x68, 0x57,
+ 0x16, 0x29, 0x94, 0xab, 0xea, 0xd5, 0x5a, 0x65, 0x24, 0x1b,
+ 0xa6, 0x99, 0xd8, 0xe7, 0xbf, 0x80, 0xc1, 0xfe, 0x43, 0x7c,
+ 0x3d, 0x02, 0x3e, 0x01, 0x40, 0x7f, 0xc2, 0xfd, 0xbc, 0x83,
+ 0xdb, 0xe4, 0xa5, 0x9a, 0x27, 0x18, 0x59, 0x66, 0xe9, 0xd6,
+ 0x97, 0xa8, 0x15, 0x2a, 0x6b, 0x54, 0x0c, 0x33, 0x72, 0x4d,
+ 0xf0, 0xcf, 0x8e, 0xb1, 0x00, 0x40, 0x80, 0xc0, 0x1d, 0x5d,
+ 0x9d, 0xdd, 0x3a, 0x7a, 0xba, 0xfa, 0x27, 0x67, 0xa7, 0xe7,
+ 0x74, 0x34, 0xf4, 0xb4, 0x69, 0x29, 0xe9, 0xa9, 0x4e, 0x0e,
+ 0xce, 0x8e, 0x53, 0x13, 0xd3, 0x93, 0xe8, 0xa8, 0x68, 0x28,
+ 0xf5, 0xb5, 0x75, 0x35, 0xd2, 0x92, 0x52, 0x12, 0xcf, 0x8f,
+ 0x4f, 0x0f, 0x9c, 0xdc, 0x1c, 0x5c, 0x81, 0xc1, 0x01, 0x41,
+ 0xa6, 0xe6, 0x26, 0x66, 0xbb, 0xfb, 0x3b, 0x7b, 0xcd, 0x8d,
+ 0x4d, 0x0d, 0xd0, 0x90, 0x50, 0x10, 0xf7, 0xb7, 0x77, 0x37,
+ 0xea, 0xaa, 0x6a, 0x2a, 0xb9, 0xf9, 0x39, 0x79, 0xa4, 0xe4,
+ 0x24, 0x64, 0x83, 0xc3, 0x03, 0x43, 0x9e, 0xde, 0x1e, 0x5e,
+ 0x25, 0x65, 0xa5, 0xe5, 0x38, 0x78, 0xb8, 0xf8, 0x1f, 0x5f,
+ 0x9f, 0xdf, 0x02, 0x42, 0x82, 0xc2, 0x51, 0x11, 0xd1, 0x91,
+ 0x4c, 0x0c, 0xcc, 0x8c, 0x6b, 0x2b, 0xeb, 0xab, 0x76, 0x36,
+ 0xf6, 0xb6, 0x87, 0xc7, 0x07, 0x47, 0x9a, 0xda, 0x1a, 0x5a,
+ 0xbd, 0xfd, 0x3d, 0x7d, 0xa0, 0xe0, 0x20, 0x60, 0xf3, 0xb3,
+ 0x73, 0x33, 0xee, 0xae, 0x6e, 0x2e, 0xc9, 0x89, 0x49, 0x09,
+ 0xd4, 0x94, 0x54, 0x14, 0x6f, 0x2f, 0xef, 0xaf, 0x72, 0x32,
+ 0xf2, 0xb2, 0x55, 0x15, 0xd5, 0x95, 0x48, 0x08, 0xc8, 0x88,
+ 0x1b, 0x5b, 0x9b, 0xdb, 0x06, 0x46, 0x86, 0xc6, 0x21, 0x61,
+ 0xa1, 0xe1, 0x3c, 0x7c, 0xbc, 0xfc, 0x4a, 0x0a, 0xca, 0x8a,
+ 0x57, 0x17, 0xd7, 0x97, 0x70, 0x30, 0xf0, 0xb0, 0x6d, 0x2d,
+ 0xed, 0xad, 0x3e, 0x7e, 0xbe, 0xfe, 0x23, 0x63, 0xa3, 0xe3,
+ 0x04, 0x44, 0x84, 0xc4, 0x19, 0x59, 0x99, 0xd9, 0xa2, 0xe2,
+ 0x22, 0x62, 0xbf, 0xff, 0x3f, 0x7f, 0x98, 0xd8, 0x18, 0x58,
+ 0x85, 0xc5, 0x05, 0x45, 0xd6, 0x96, 0x56, 0x16, 0xcb, 0x8b,
+ 0x4b, 0x0b, 0xec, 0xac, 0x6c, 0x2c, 0xf1, 0xb1, 0x71, 0x31,
+ 0x00, 0x41, 0x82, 0xc3, 0x19, 0x58, 0x9b, 0xda, 0x32, 0x73,
+ 0xb0, 0xf1, 0x2b, 0x6a, 0xa9, 0xe8, 0x64, 0x25, 0xe6, 0xa7,
+ 0x7d, 0x3c, 0xff, 0xbe, 0x56, 0x17, 0xd4, 0x95, 0x4f, 0x0e,
+ 0xcd, 0x8c, 0xc8, 0x89, 0x4a, 0x0b, 0xd1, 0x90, 0x53, 0x12,
+ 0xfa, 0xbb, 0x78, 0x39, 0xe3, 0xa2, 0x61, 0x20, 0xac, 0xed,
+ 0x2e, 0x6f, 0xb5, 0xf4, 0x37, 0x76, 0x9e, 0xdf, 0x1c, 0x5d,
+ 0x87, 0xc6, 0x05, 0x44, 0x8d, 0xcc, 0x0f, 0x4e, 0x94, 0xd5,
+ 0x16, 0x57, 0xbf, 0xfe, 0x3d, 0x7c, 0xa6, 0xe7, 0x24, 0x65,
+ 0xe9, 0xa8, 0x6b, 0x2a, 0xf0, 0xb1, 0x72, 0x33, 0xdb, 0x9a,
+ 0x59, 0x18, 0xc2, 0x83, 0x40, 0x01, 0x45, 0x04, 0xc7, 0x86,
+ 0x5c, 0x1d, 0xde, 0x9f, 0x77, 0x36, 0xf5, 0xb4, 0x6e, 0x2f,
+ 0xec, 0xad, 0x21, 0x60, 0xa3, 0xe2, 0x38, 0x79, 0xba, 0xfb,
+ 0x13, 0x52, 0x91, 0xd0, 0x0a, 0x4b, 0x88, 0xc9, 0x07, 0x46,
+ 0x85, 0xc4, 0x1e, 0x5f, 0x9c, 0xdd, 0x35, 0x74, 0xb7, 0xf6,
+ 0x2c, 0x6d, 0xae, 0xef, 0x63, 0x22, 0xe1, 0xa0, 0x7a, 0x3b,
+ 0xf8, 0xb9, 0x51, 0x10, 0xd3, 0x92, 0x48, 0x09, 0xca, 0x8b,
+ 0xcf, 0x8e, 0x4d, 0x0c, 0xd6, 0x97, 0x54, 0x15, 0xfd, 0xbc,
+ 0x7f, 0x3e, 0xe4, 0xa5, 0x66, 0x27, 0xab, 0xea, 0x29, 0x68,
+ 0xb2, 0xf3, 0x30, 0x71, 0x99, 0xd8, 0x1b, 0x5a, 0x80, 0xc1,
+ 0x02, 0x43, 0x8a, 0xcb, 0x08, 0x49, 0x93, 0xd2, 0x11, 0x50,
+ 0xb8, 0xf9, 0x3a, 0x7b, 0xa1, 0xe0, 0x23, 0x62, 0xee, 0xaf,
+ 0x6c, 0x2d, 0xf7, 0xb6, 0x75, 0x34, 0xdc, 0x9d, 0x5e, 0x1f,
+ 0xc5, 0x84, 0x47, 0x06, 0x42, 0x03, 0xc0, 0x81, 0x5b, 0x1a,
+ 0xd9, 0x98, 0x70, 0x31, 0xf2, 0xb3, 0x69, 0x28, 0xeb, 0xaa,
+ 0x26, 0x67, 0xa4, 0xe5, 0x3f, 0x7e, 0xbd, 0xfc, 0x14, 0x55,
+ 0x96, 0xd7, 0x0d, 0x4c, 0x8f, 0xce, 0x00, 0x42, 0x84, 0xc6,
+ 0x15, 0x57, 0x91, 0xd3, 0x2a, 0x68, 0xae, 0xec, 0x3f, 0x7d,
+ 0xbb, 0xf9, 0x54, 0x16, 0xd0, 0x92, 0x41, 0x03, 0xc5, 0x87,
+ 0x7e, 0x3c, 0xfa, 0xb8, 0x6b, 0x29, 0xef, 0xad, 0xa8, 0xea,
+ 0x2c, 0x6e, 0xbd, 0xff, 0x39, 0x7b, 0x82, 0xc0, 0x06, 0x44,
+ 0x97, 0xd5, 0x13, 0x51, 0xfc, 0xbe, 0x78, 0x3a, 0xe9, 0xab,
+ 0x6d, 0x2f, 0xd6, 0x94, 0x52, 0x10, 0xc3, 0x81, 0x47, 0x05,
+ 0x4d, 0x0f, 0xc9, 0x8b, 0x58, 0x1a, 0xdc, 0x9e, 0x67, 0x25,
+ 0xe3, 0xa1, 0x72, 0x30, 0xf6, 0xb4, 0x19, 0x5b, 0x9d, 0xdf,
+ 0x0c, 0x4e, 0x88, 0xca, 0x33, 0x71, 0xb7, 0xf5, 0x26, 0x64,
+ 0xa2, 0xe0, 0xe5, 0xa7, 0x61, 0x23, 0xf0, 0xb2, 0x74, 0x36,
+ 0xcf, 0x8d, 0x4b, 0x09, 0xda, 0x98, 0x5e, 0x1c, 0xb1, 0xf3,
+ 0x35, 0x77, 0xa4, 0xe6, 0x20, 0x62, 0x9b, 0xd9, 0x1f, 0x5d,
+ 0x8e, 0xcc, 0x0a, 0x48, 0x9a, 0xd8, 0x1e, 0x5c, 0x8f, 0xcd,
+ 0x0b, 0x49, 0xb0, 0xf2, 0x34, 0x76, 0xa5, 0xe7, 0x21, 0x63,
+ 0xce, 0x8c, 0x4a, 0x08, 0xdb, 0x99, 0x5f, 0x1d, 0xe4, 0xa6,
+ 0x60, 0x22, 0xf1, 0xb3, 0x75, 0x37, 0x32, 0x70, 0xb6, 0xf4,
+ 0x27, 0x65, 0xa3, 0xe1, 0x18, 0x5a, 0x9c, 0xde, 0x0d, 0x4f,
+ 0x89, 0xcb, 0x66, 0x24, 0xe2, 0xa0, 0x73, 0x31, 0xf7, 0xb5,
+ 0x4c, 0x0e, 0xc8, 0x8a, 0x59, 0x1b, 0xdd, 0x9f, 0xd7, 0x95,
+ 0x53, 0x11, 0xc2, 0x80, 0x46, 0x04, 0xfd, 0xbf, 0x79, 0x3b,
+ 0xe8, 0xaa, 0x6c, 0x2e, 0x83, 0xc1, 0x07, 0x45, 0x96, 0xd4,
+ 0x12, 0x50, 0xa9, 0xeb, 0x2d, 0x6f, 0xbc, 0xfe, 0x38, 0x7a,
+ 0x7f, 0x3d, 0xfb, 0xb9, 0x6a, 0x28, 0xee, 0xac, 0x55, 0x17,
+ 0xd1, 0x93, 0x40, 0x02, 0xc4, 0x86, 0x2b, 0x69, 0xaf, 0xed,
+ 0x3e, 0x7c, 0xba, 0xf8, 0x01, 0x43, 0x85, 0xc7, 0x14, 0x56,
+ 0x90, 0xd2, 0x00, 0x43, 0x86, 0xc5, 0x11, 0x52, 0x97, 0xd4,
+ 0x22, 0x61, 0xa4, 0xe7, 0x33, 0x70, 0xb5, 0xf6, 0x44, 0x07,
+ 0xc2, 0x81, 0x55, 0x16, 0xd3, 0x90, 0x66, 0x25, 0xe0, 0xa3,
+ 0x77, 0x34, 0xf1, 0xb2, 0x88, 0xcb, 0x0e, 0x4d, 0x99, 0xda,
+ 0x1f, 0x5c, 0xaa, 0xe9, 0x2c, 0x6f, 0xbb, 0xf8, 0x3d, 0x7e,
+ 0xcc, 0x8f, 0x4a, 0x09, 0xdd, 0x9e, 0x5b, 0x18, 0xee, 0xad,
+ 0x68, 0x2b, 0xff, 0xbc, 0x79, 0x3a, 0x0d, 0x4e, 0x8b, 0xc8,
+ 0x1c, 0x5f, 0x9a, 0xd9, 0x2f, 0x6c, 0xa9, 0xea, 0x3e, 0x7d,
+ 0xb8, 0xfb, 0x49, 0x0a, 0xcf, 0x8c, 0x58, 0x1b, 0xde, 0x9d,
+ 0x6b, 0x28, 0xed, 0xae, 0x7a, 0x39, 0xfc, 0xbf, 0x85, 0xc6,
+ 0x03, 0x40, 0x94, 0xd7, 0x12, 0x51, 0xa7, 0xe4, 0x21, 0x62,
+ 0xb6, 0xf5, 0x30, 0x73, 0xc1, 0x82, 0x47, 0x04, 0xd0, 0x93,
+ 0x56, 0x15, 0xe3, 0xa0, 0x65, 0x26, 0xf2, 0xb1, 0x74, 0x37,
+ 0x1a, 0x59, 0x9c, 0xdf, 0x0b, 0x48, 0x8d, 0xce, 0x38, 0x7b,
+ 0xbe, 0xfd, 0x29, 0x6a, 0xaf, 0xec, 0x5e, 0x1d, 0xd8, 0x9b,
+ 0x4f, 0x0c, 0xc9, 0x8a, 0x7c, 0x3f, 0xfa, 0xb9, 0x6d, 0x2e,
+ 0xeb, 0xa8, 0x92, 0xd1, 0x14, 0x57, 0x83, 0xc0, 0x05, 0x46,
+ 0xb0, 0xf3, 0x36, 0x75, 0xa1, 0xe2, 0x27, 0x64, 0xd6, 0x95,
+ 0x50, 0x13, 0xc7, 0x84, 0x41, 0x02, 0xf4, 0xb7, 0x72, 0x31,
+ 0xe5, 0xa6, 0x63, 0x20, 0x17, 0x54, 0x91, 0xd2, 0x06, 0x45,
+ 0x80, 0xc3, 0x35, 0x76, 0xb3, 0xf0, 0x24, 0x67, 0xa2, 0xe1,
+ 0x53, 0x10, 0xd5, 0x96, 0x42, 0x01, 0xc4, 0x87, 0x71, 0x32,
+ 0xf7, 0xb4, 0x60, 0x23, 0xe6, 0xa5, 0x9f, 0xdc, 0x19, 0x5a,
+ 0x8e, 0xcd, 0x08, 0x4b, 0xbd, 0xfe, 0x3b, 0x78, 0xac, 0xef,
+ 0x2a, 0x69, 0xdb, 0x98, 0x5d, 0x1e, 0xca, 0x89, 0x4c, 0x0f,
+ 0xf9, 0xba, 0x7f, 0x3c, 0xe8, 0xab, 0x6e, 0x2d, 0x00, 0x44,
+ 0x88, 0xcc, 0x0d, 0x49, 0x85, 0xc1, 0x1a, 0x5e, 0x92, 0xd6,
+ 0x17, 0x53, 0x9f, 0xdb, 0x34, 0x70, 0xbc, 0xf8, 0x39, 0x7d,
+ 0xb1, 0xf5, 0x2e, 0x6a, 0xa6, 0xe2, 0x23, 0x67, 0xab, 0xef,
+ 0x68, 0x2c, 0xe0, 0xa4, 0x65, 0x21, 0xed, 0xa9, 0x72, 0x36,
+ 0xfa, 0xbe, 0x7f, 0x3b, 0xf7, 0xb3, 0x5c, 0x18, 0xd4, 0x90,
+ 0x51, 0x15, 0xd9, 0x9d, 0x46, 0x02, 0xce, 0x8a, 0x4b, 0x0f,
+ 0xc3, 0x87, 0xd0, 0x94, 0x58, 0x1c, 0xdd, 0x99, 0x55, 0x11,
+ 0xca, 0x8e, 0x42, 0x06, 0xc7, 0x83, 0x4f, 0x0b, 0xe4, 0xa0,
+ 0x6c, 0x28, 0xe9, 0xad, 0x61, 0x25, 0xfe, 0xba, 0x76, 0x32,
+ 0xf3, 0xb7, 0x7b, 0x3f, 0xb8, 0xfc, 0x30, 0x74, 0xb5, 0xf1,
+ 0x3d, 0x79, 0xa2, 0xe6, 0x2a, 0x6e, 0xaf, 0xeb, 0x27, 0x63,
+ 0x8c, 0xc8, 0x04, 0x40, 0x81, 0xc5, 0x09, 0x4d, 0x96, 0xd2,
+ 0x1e, 0x5a, 0x9b, 0xdf, 0x13, 0x57, 0xbd, 0xf9, 0x35, 0x71,
+ 0xb0, 0xf4, 0x38, 0x7c, 0xa7, 0xe3, 0x2f, 0x6b, 0xaa, 0xee,
+ 0x22, 0x66, 0x89, 0xcd, 0x01, 0x45, 0x84, 0xc0, 0x0c, 0x48,
+ 0x93, 0xd7, 0x1b, 0x5f, 0x9e, 0xda, 0x16, 0x52, 0xd5, 0x91,
+ 0x5d, 0x19, 0xd8, 0x9c, 0x50, 0x14, 0xcf, 0x8b, 0x47, 0x03,
+ 0xc2, 0x86, 0x4a, 0x0e, 0xe1, 0xa5, 0x69, 0x2d, 0xec, 0xa8,
+ 0x64, 0x20, 0xfb, 0xbf, 0x73, 0x37, 0xf6, 0xb2, 0x7e, 0x3a,
+ 0x6d, 0x29, 0xe5, 0xa1, 0x60, 0x24, 0xe8, 0xac, 0x77, 0x33,
+ 0xff, 0xbb, 0x7a, 0x3e, 0xf2, 0xb6, 0x59, 0x1d, 0xd1, 0x95,
+ 0x54, 0x10, 0xdc, 0x98, 0x43, 0x07, 0xcb, 0x8f, 0x4e, 0x0a,
+ 0xc6, 0x82, 0x05, 0x41, 0x8d, 0xc9, 0x08, 0x4c, 0x80, 0xc4,
+ 0x1f, 0x5b, 0x97, 0xd3, 0x12, 0x56, 0x9a, 0xde, 0x31, 0x75,
+ 0xb9, 0xfd, 0x3c, 0x78, 0xb4, 0xf0, 0x2b, 0x6f, 0xa3, 0xe7,
+ 0x26, 0x62, 0xae, 0xea, 0x00, 0x45, 0x8a, 0xcf, 0x09, 0x4c,
+ 0x83, 0xc6, 0x12, 0x57, 0x98, 0xdd, 0x1b, 0x5e, 0x91, 0xd4,
+ 0x24, 0x61, 0xae, 0xeb, 0x2d, 0x68, 0xa7, 0xe2, 0x36, 0x73,
+ 0xbc, 0xf9, 0x3f, 0x7a, 0xb5, 0xf0, 0x48, 0x0d, 0xc2, 0x87,
+ 0x41, 0x04, 0xcb, 0x8e, 0x5a, 0x1f, 0xd0, 0x95, 0x53, 0x16,
+ 0xd9, 0x9c, 0x6c, 0x29, 0xe6, 0xa3, 0x65, 0x20, 0xef, 0xaa,
+ 0x7e, 0x3b, 0xf4, 0xb1, 0x77, 0x32, 0xfd, 0xb8, 0x90, 0xd5,
+ 0x1a, 0x5f, 0x99, 0xdc, 0x13, 0x56, 0x82, 0xc7, 0x08, 0x4d,
+ 0x8b, 0xce, 0x01, 0x44, 0xb4, 0xf1, 0x3e, 0x7b, 0xbd, 0xf8,
+ 0x37, 0x72, 0xa6, 0xe3, 0x2c, 0x69, 0xaf, 0xea, 0x25, 0x60,
+ 0xd8, 0x9d, 0x52, 0x17, 0xd1, 0x94, 0x5b, 0x1e, 0xca, 0x8f,
+ 0x40, 0x05, 0xc3, 0x86, 0x49, 0x0c, 0xfc, 0xb9, 0x76, 0x33,
+ 0xf5, 0xb0, 0x7f, 0x3a, 0xee, 0xab, 0x64, 0x21, 0xe7, 0xa2,
+ 0x6d, 0x28, 0x3d, 0x78, 0xb7, 0xf2, 0x34, 0x71, 0xbe, 0xfb,
+ 0x2f, 0x6a, 0xa5, 0xe0, 0x26, 0x63, 0xac, 0xe9, 0x19, 0x5c,
+ 0x93, 0xd6, 0x10, 0x55, 0x9a, 0xdf, 0x0b, 0x4e, 0x81, 0xc4,
+ 0x02, 0x47, 0x88, 0xcd, 0x75, 0x30, 0xff, 0xba, 0x7c, 0x39,
+ 0xf6, 0xb3, 0x67, 0x22, 0xed, 0xa8, 0x6e, 0x2b, 0xe4, 0xa1,
+ 0x51, 0x14, 0xdb, 0x9e, 0x58, 0x1d, 0xd2, 0x97, 0x43, 0x06,
+ 0xc9, 0x8c, 0x4a, 0x0f, 0xc0, 0x85, 0xad, 0xe8, 0x27, 0x62,
+ 0xa4, 0xe1, 0x2e, 0x6b, 0xbf, 0xfa, 0x35, 0x70, 0xb6, 0xf3,
+ 0x3c, 0x79, 0x89, 0xcc, 0x03, 0x46, 0x80, 0xc5, 0x0a, 0x4f,
+ 0x9b, 0xde, 0x11, 0x54, 0x92, 0xd7, 0x18, 0x5d, 0xe5, 0xa0,
+ 0x6f, 0x2a, 0xec, 0xa9, 0x66, 0x23, 0xf7, 0xb2, 0x7d, 0x38,
+ 0xfe, 0xbb, 0x74, 0x31, 0xc1, 0x84, 0x4b, 0x0e, 0xc8, 0x8d,
+ 0x42, 0x07, 0xd3, 0x96, 0x59, 0x1c, 0xda, 0x9f, 0x50, 0x15,
+ 0x00, 0x46, 0x8c, 0xca, 0x05, 0x43, 0x89, 0xcf, 0x0a, 0x4c,
+ 0x86, 0xc0, 0x0f, 0x49, 0x83, 0xc5, 0x14, 0x52, 0x98, 0xde,
+ 0x11, 0x57, 0x9d, 0xdb, 0x1e, 0x58, 0x92, 0xd4, 0x1b, 0x5d,
+ 0x97, 0xd1, 0x28, 0x6e, 0xa4, 0xe2, 0x2d, 0x6b, 0xa1, 0xe7,
+ 0x22, 0x64, 0xae, 0xe8, 0x27, 0x61, 0xab, 0xed, 0x3c, 0x7a,
+ 0xb0, 0xf6, 0x39, 0x7f, 0xb5, 0xf3, 0x36, 0x70, 0xba, 0xfc,
+ 0x33, 0x75, 0xbf, 0xf9, 0x50, 0x16, 0xdc, 0x9a, 0x55, 0x13,
+ 0xd9, 0x9f, 0x5a, 0x1c, 0xd6, 0x90, 0x5f, 0x19, 0xd3, 0x95,
+ 0x44, 0x02, 0xc8, 0x8e, 0x41, 0x07, 0xcd, 0x8b, 0x4e, 0x08,
+ 0xc2, 0x84, 0x4b, 0x0d, 0xc7, 0x81, 0x78, 0x3e, 0xf4, 0xb2,
+ 0x7d, 0x3b, 0xf1, 0xb7, 0x72, 0x34, 0xfe, 0xb8, 0x77, 0x31,
+ 0xfb, 0xbd, 0x6c, 0x2a, 0xe0, 0xa6, 0x69, 0x2f, 0xe5, 0xa3,
+ 0x66, 0x20, 0xea, 0xac, 0x63, 0x25, 0xef, 0xa9, 0xa0, 0xe6,
+ 0x2c, 0x6a, 0xa5, 0xe3, 0x29, 0x6f, 0xaa, 0xec, 0x26, 0x60,
+ 0xaf, 0xe9, 0x23, 0x65, 0xb4, 0xf2, 0x38, 0x7e, 0xb1, 0xf7,
+ 0x3d, 0x7b, 0xbe, 0xf8, 0x32, 0x74, 0xbb, 0xfd, 0x37, 0x71,
+ 0x88, 0xce, 0x04, 0x42, 0x8d, 0xcb, 0x01, 0x47, 0x82, 0xc4,
+ 0x0e, 0x48, 0x87, 0xc1, 0x0b, 0x4d, 0x9c, 0xda, 0x10, 0x56,
+ 0x99, 0xdf, 0x15, 0x53, 0x96, 0xd0, 0x1a, 0x5c, 0x93, 0xd5,
+ 0x1f, 0x59, 0xf0, 0xb6, 0x7c, 0x3a, 0xf5, 0xb3, 0x79, 0x3f,
+ 0xfa, 0xbc, 0x76, 0x30, 0xff, 0xb9, 0x73, 0x35, 0xe4, 0xa2,
+ 0x68, 0x2e, 0xe1, 0xa7, 0x6d, 0x2b, 0xee, 0xa8, 0x62, 0x24,
+ 0xeb, 0xad, 0x67, 0x21, 0xd8, 0x9e, 0x54, 0x12, 0xdd, 0x9b,
+ 0x51, 0x17, 0xd2, 0x94, 0x5e, 0x18, 0xd7, 0x91, 0x5b, 0x1d,
+ 0xcc, 0x8a, 0x40, 0x06, 0xc9, 0x8f, 0x45, 0x03, 0xc6, 0x80,
+ 0x4a, 0x0c, 0xc3, 0x85, 0x4f, 0x09, 0x00, 0x47, 0x8e, 0xc9,
+ 0x01, 0x46, 0x8f, 0xc8, 0x02, 0x45, 0x8c, 0xcb, 0x03, 0x44,
+ 0x8d, 0xca, 0x04, 0x43, 0x8a, 0xcd, 0x05, 0x42, 0x8b, 0xcc,
+ 0x06, 0x41, 0x88, 0xcf, 0x07, 0x40, 0x89, 0xce, 0x08, 0x4f,
+ 0x86, 0xc1, 0x09, 0x4e, 0x87, 0xc0, 0x0a, 0x4d, 0x84, 0xc3,
+ 0x0b, 0x4c, 0x85, 0xc2, 0x0c, 0x4b, 0x82, 0xc5, 0x0d, 0x4a,
+ 0x83, 0xc4, 0x0e, 0x49, 0x80, 0xc7, 0x0f, 0x48, 0x81, 0xc6,
+ 0x10, 0x57, 0x9e, 0xd9, 0x11, 0x56, 0x9f, 0xd8, 0x12, 0x55,
+ 0x9c, 0xdb, 0x13, 0x54, 0x9d, 0xda, 0x14, 0x53, 0x9a, 0xdd,
+ 0x15, 0x52, 0x9b, 0xdc, 0x16, 0x51, 0x98, 0xdf, 0x17, 0x50,
+ 0x99, 0xde, 0x18, 0x5f, 0x96, 0xd1, 0x19, 0x5e, 0x97, 0xd0,
+ 0x1a, 0x5d, 0x94, 0xd3, 0x1b, 0x5c, 0x95, 0xd2, 0x1c, 0x5b,
+ 0x92, 0xd5, 0x1d, 0x5a, 0x93, 0xd4, 0x1e, 0x59, 0x90, 0xd7,
+ 0x1f, 0x58, 0x91, 0xd6, 0x20, 0x67, 0xae, 0xe9, 0x21, 0x66,
+ 0xaf, 0xe8, 0x22, 0x65, 0xac, 0xeb, 0x23, 0x64, 0xad, 0xea,
+ 0x24, 0x63, 0xaa, 0xed, 0x25, 0x62, 0xab, 0xec, 0x26, 0x61,
+ 0xa8, 0xef, 0x27, 0x60, 0xa9, 0xee, 0x28, 0x6f, 0xa6, 0xe1,
+ 0x29, 0x6e, 0xa7, 0xe0, 0x2a, 0x6d, 0xa4, 0xe3, 0x2b, 0x6c,
+ 0xa5, 0xe2, 0x2c, 0x6b, 0xa2, 0xe5, 0x2d, 0x6a, 0xa3, 0xe4,
+ 0x2e, 0x69, 0xa0, 0xe7, 0x2f, 0x68, 0xa1, 0xe6, 0x30, 0x77,
+ 0xbe, 0xf9, 0x31, 0x76, 0xbf, 0xf8, 0x32, 0x75, 0xbc, 0xfb,
+ 0x33, 0x74, 0xbd, 0xfa, 0x34, 0x73, 0xba, 0xfd, 0x35, 0x72,
+ 0xbb, 0xfc, 0x36, 0x71, 0xb8, 0xff, 0x37, 0x70, 0xb9, 0xfe,
+ 0x38, 0x7f, 0xb6, 0xf1, 0x39, 0x7e, 0xb7, 0xf0, 0x3a, 0x7d,
+ 0xb4, 0xf3, 0x3b, 0x7c, 0xb5, 0xf2, 0x3c, 0x7b, 0xb2, 0xf5,
+ 0x3d, 0x7a, 0xb3, 0xf4, 0x3e, 0x79, 0xb0, 0xf7, 0x3f, 0x78,
+ 0xb1, 0xf6, 0x00, 0x48, 0x90, 0xd8, 0x3d, 0x75, 0xad, 0xe5,
+ 0x7a, 0x32, 0xea, 0xa2, 0x47, 0x0f, 0xd7, 0x9f, 0xf4, 0xbc,
+ 0x64, 0x2c, 0xc9, 0x81, 0x59, 0x11, 0x8e, 0xc6, 0x1e, 0x56,
+ 0xb3, 0xfb, 0x23, 0x6b, 0xf5, 0xbd, 0x65, 0x2d, 0xc8, 0x80,
+ 0x58, 0x10, 0x8f, 0xc7, 0x1f, 0x57, 0xb2, 0xfa, 0x22, 0x6a,
+ 0x01, 0x49, 0x91, 0xd9, 0x3c, 0x74, 0xac, 0xe4, 0x7b, 0x33,
+ 0xeb, 0xa3, 0x46, 0x0e, 0xd6, 0x9e, 0xf7, 0xbf, 0x67, 0x2f,
+ 0xca, 0x82, 0x5a, 0x12, 0x8d, 0xc5, 0x1d, 0x55, 0xb0, 0xf8,
+ 0x20, 0x68, 0x03, 0x4b, 0x93, 0xdb, 0x3e, 0x76, 0xae, 0xe6,
+ 0x79, 0x31, 0xe9, 0xa1, 0x44, 0x0c, 0xd4, 0x9c, 0x02, 0x4a,
+ 0x92, 0xda, 0x3f, 0x77, 0xaf, 0xe7, 0x78, 0x30, 0xe8, 0xa0,
+ 0x45, 0x0d, 0xd5, 0x9d, 0xf6, 0xbe, 0x66, 0x2e, 0xcb, 0x83,
+ 0x5b, 0x13, 0x8c, 0xc4, 0x1c, 0x54, 0xb1, 0xf9, 0x21, 0x69,
+ 0xf3, 0xbb, 0x63, 0x2b, 0xce, 0x86, 0x5e, 0x16, 0x89, 0xc1,
+ 0x19, 0x51, 0xb4, 0xfc, 0x24, 0x6c, 0x07, 0x4f, 0x97, 0xdf,
+ 0x3a, 0x72, 0xaa, 0xe2, 0x7d, 0x35, 0xed, 0xa5, 0x40, 0x08,
+ 0xd0, 0x98, 0x06, 0x4e, 0x96, 0xde, 0x3b, 0x73, 0xab, 0xe3,
+ 0x7c, 0x34, 0xec, 0xa4, 0x41, 0x09, 0xd1, 0x99, 0xf2, 0xba,
+ 0x62, 0x2a, 0xcf, 0x87, 0x5f, 0x17, 0x88, 0xc0, 0x18, 0x50,
+ 0xb5, 0xfd, 0x25, 0x6d, 0x04, 0x4c, 0x94, 0xdc, 0x39, 0x71,
+ 0xa9, 0xe1, 0x7e, 0x36, 0xee, 0xa6, 0x43, 0x0b, 0xd3, 0x9b,
+ 0xf0, 0xb8, 0x60, 0x28, 0xcd, 0x85, 0x5d, 0x15, 0x8a, 0xc2,
+ 0x1a, 0x52, 0xb7, 0xff, 0x27, 0x6f, 0xf1, 0xb9, 0x61, 0x29,
+ 0xcc, 0x84, 0x5c, 0x14, 0x8b, 0xc3, 0x1b, 0x53, 0xb6, 0xfe,
+ 0x26, 0x6e, 0x05, 0x4d, 0x95, 0xdd, 0x38, 0x70, 0xa8, 0xe0,
+ 0x7f, 0x37, 0xef, 0xa7, 0x42, 0x0a, 0xd2, 0x9a, 0x00, 0x49,
+ 0x92, 0xdb, 0x39, 0x70, 0xab, 0xe2, 0x72, 0x3b, 0xe0, 0xa9,
+ 0x4b, 0x02, 0xd9, 0x90, 0xe4, 0xad, 0x76, 0x3f, 0xdd, 0x94,
+ 0x4f, 0x06, 0x96, 0xdf, 0x04, 0x4d, 0xaf, 0xe6, 0x3d, 0x74,
+ 0xd5, 0x9c, 0x47, 0x0e, 0xec, 0xa5, 0x7e, 0x37, 0xa7, 0xee,
+ 0x35, 0x7c, 0x9e, 0xd7, 0x0c, 0x45, 0x31, 0x78, 0xa3, 0xea,
+ 0x08, 0x41, 0x9a, 0xd3, 0x43, 0x0a, 0xd1, 0x98, 0x7a, 0x33,
+ 0xe8, 0xa1, 0xb7, 0xfe, 0x25, 0x6c, 0x8e, 0xc7, 0x1c, 0x55,
+ 0xc5, 0x8c, 0x57, 0x1e, 0xfc, 0xb5, 0x6e, 0x27, 0x53, 0x1a,
+ 0xc1, 0x88, 0x6a, 0x23, 0xf8, 0xb1, 0x21, 0x68, 0xb3, 0xfa,
+ 0x18, 0x51, 0x8a, 0xc3, 0x62, 0x2b, 0xf0, 0xb9, 0x5b, 0x12,
+ 0xc9, 0x80, 0x10, 0x59, 0x82, 0xcb, 0x29, 0x60, 0xbb, 0xf2,
+ 0x86, 0xcf, 0x14, 0x5d, 0xbf, 0xf6, 0x2d, 0x64, 0xf4, 0xbd,
+ 0x66, 0x2f, 0xcd, 0x84, 0x5f, 0x16, 0x73, 0x3a, 0xe1, 0xa8,
+ 0x4a, 0x03, 0xd8, 0x91, 0x01, 0x48, 0x93, 0xda, 0x38, 0x71,
+ 0xaa, 0xe3, 0x97, 0xde, 0x05, 0x4c, 0xae, 0xe7, 0x3c, 0x75,
+ 0xe5, 0xac, 0x77, 0x3e, 0xdc, 0x95, 0x4e, 0x07, 0xa6, 0xef,
+ 0x34, 0x7d, 0x9f, 0xd6, 0x0d, 0x44, 0xd4, 0x9d, 0x46, 0x0f,
+ 0xed, 0xa4, 0x7f, 0x36, 0x42, 0x0b, 0xd0, 0x99, 0x7b, 0x32,
+ 0xe9, 0xa0, 0x30, 0x79, 0xa2, 0xeb, 0x09, 0x40, 0x9b, 0xd2,
+ 0xc4, 0x8d, 0x56, 0x1f, 0xfd, 0xb4, 0x6f, 0x26, 0xb6, 0xff,
+ 0x24, 0x6d, 0x8f, 0xc6, 0x1d, 0x54, 0x20, 0x69, 0xb2, 0xfb,
+ 0x19, 0x50, 0x8b, 0xc2, 0x52, 0x1b, 0xc0, 0x89, 0x6b, 0x22,
+ 0xf9, 0xb0, 0x11, 0x58, 0x83, 0xca, 0x28, 0x61, 0xba, 0xf3,
+ 0x63, 0x2a, 0xf1, 0xb8, 0x5a, 0x13, 0xc8, 0x81, 0xf5, 0xbc,
+ 0x67, 0x2e, 0xcc, 0x85, 0x5e, 0x17, 0x87, 0xce, 0x15, 0x5c,
+ 0xbe, 0xf7, 0x2c, 0x65, 0x00, 0x4a, 0x94, 0xde, 0x35, 0x7f,
+ 0xa1, 0xeb, 0x6a, 0x20, 0xfe, 0xb4, 0x5f, 0x15, 0xcb, 0x81,
+ 0xd4, 0x9e, 0x40, 0x0a, 0xe1, 0xab, 0x75, 0x3f, 0xbe, 0xf4,
+ 0x2a, 0x60, 0x8b, 0xc1, 0x1f, 0x55, 0xb5, 0xff, 0x21, 0x6b,
+ 0x80, 0xca, 0x14, 0x5e, 0xdf, 0x95, 0x4b, 0x01, 0xea, 0xa0,
+ 0x7e, 0x34, 0x61, 0x2b, 0xf5, 0xbf, 0x54, 0x1e, 0xc0, 0x8a,
+ 0x0b, 0x41, 0x9f, 0xd5, 0x3e, 0x74, 0xaa, 0xe0, 0x77, 0x3d,
+ 0xe3, 0xa9, 0x42, 0x08, 0xd6, 0x9c, 0x1d, 0x57, 0x89, 0xc3,
+ 0x28, 0x62, 0xbc, 0xf6, 0xa3, 0xe9, 0x37, 0x7d, 0x96, 0xdc,
+ 0x02, 0x48, 0xc9, 0x83, 0x5d, 0x17, 0xfc, 0xb6, 0x68, 0x22,
+ 0xc2, 0x88, 0x56, 0x1c, 0xf7, 0xbd, 0x63, 0x29, 0xa8, 0xe2,
+ 0x3c, 0x76, 0x9d, 0xd7, 0x09, 0x43, 0x16, 0x5c, 0x82, 0xc8,
+ 0x23, 0x69, 0xb7, 0xfd, 0x7c, 0x36, 0xe8, 0xa2, 0x49, 0x03,
+ 0xdd, 0x97, 0xee, 0xa4, 0x7a, 0x30, 0xdb, 0x91, 0x4f, 0x05,
+ 0x84, 0xce, 0x10, 0x5a, 0xb1, 0xfb, 0x25, 0x6f, 0x3a, 0x70,
+ 0xae, 0xe4, 0x0f, 0x45, 0x9b, 0xd1, 0x50, 0x1a, 0xc4, 0x8e,
+ 0x65, 0x2f, 0xf1, 0xbb, 0x5b, 0x11, 0xcf, 0x85, 0x6e, 0x24,
+ 0xfa, 0xb0, 0x31, 0x7b, 0xa5, 0xef, 0x04, 0x4e, 0x90, 0xda,
+ 0x8f, 0xc5, 0x1b, 0x51, 0xba, 0xf0, 0x2e, 0x64, 0xe5, 0xaf,
+ 0x71, 0x3b, 0xd0, 0x9a, 0x44, 0x0e, 0x99, 0xd3, 0x0d, 0x47,
+ 0xac, 0xe6, 0x38, 0x72, 0xf3, 0xb9, 0x67, 0x2d, 0xc6, 0x8c,
+ 0x52, 0x18, 0x4d, 0x07, 0xd9, 0x93, 0x78, 0x32, 0xec, 0xa6,
+ 0x27, 0x6d, 0xb3, 0xf9, 0x12, 0x58, 0x86, 0xcc, 0x2c, 0x66,
+ 0xb8, 0xf2, 0x19, 0x53, 0x8d, 0xc7, 0x46, 0x0c, 0xd2, 0x98,
+ 0x73, 0x39, 0xe7, 0xad, 0xf8, 0xb2, 0x6c, 0x26, 0xcd, 0x87,
+ 0x59, 0x13, 0x92, 0xd8, 0x06, 0x4c, 0xa7, 0xed, 0x33, 0x79,
+ 0x00, 0x4b, 0x96, 0xdd, 0x31, 0x7a, 0xa7, 0xec, 0x62, 0x29,
+ 0xf4, 0xbf, 0x53, 0x18, 0xc5, 0x8e, 0xc4, 0x8f, 0x52, 0x19,
+ 0xf5, 0xbe, 0x63, 0x28, 0xa6, 0xed, 0x30, 0x7b, 0x97, 0xdc,
+ 0x01, 0x4a, 0x95, 0xde, 0x03, 0x48, 0xa4, 0xef, 0x32, 0x79,
+ 0xf7, 0xbc, 0x61, 0x2a, 0xc6, 0x8d, 0x50, 0x1b, 0x51, 0x1a,
+ 0xc7, 0x8c, 0x60, 0x2b, 0xf6, 0xbd, 0x33, 0x78, 0xa5, 0xee,
+ 0x02, 0x49, 0x94, 0xdf, 0x37, 0x7c, 0xa1, 0xea, 0x06, 0x4d,
+ 0x90, 0xdb, 0x55, 0x1e, 0xc3, 0x88, 0x64, 0x2f, 0xf2, 0xb9,
+ 0xf3, 0xb8, 0x65, 0x2e, 0xc2, 0x89, 0x54, 0x1f, 0x91, 0xda,
+ 0x07, 0x4c, 0xa0, 0xeb, 0x36, 0x7d, 0xa2, 0xe9, 0x34, 0x7f,
+ 0x93, 0xd8, 0x05, 0x4e, 0xc0, 0x8b, 0x56, 0x1d, 0xf1, 0xba,
+ 0x67, 0x2c, 0x66, 0x2d, 0xf0, 0xbb, 0x57, 0x1c, 0xc1, 0x8a,
+ 0x04, 0x4f, 0x92, 0xd9, 0x35, 0x7e, 0xa3, 0xe8, 0x6e, 0x25,
+ 0xf8, 0xb3, 0x5f, 0x14, 0xc9, 0x82, 0x0c, 0x47, 0x9a, 0xd1,
+ 0x3d, 0x76, 0xab, 0xe0, 0xaa, 0xe1, 0x3c, 0x77, 0x9b, 0xd0,
+ 0x0d, 0x46, 0xc8, 0x83, 0x5e, 0x15, 0xf9, 0xb2, 0x6f, 0x24,
+ 0xfb, 0xb0, 0x6d, 0x26, 0xca, 0x81, 0x5c, 0x17, 0x99, 0xd2,
+ 0x0f, 0x44, 0xa8, 0xe3, 0x3e, 0x75, 0x3f, 0x74, 0xa9, 0xe2,
+ 0x0e, 0x45, 0x98, 0xd3, 0x5d, 0x16, 0xcb, 0x80, 0x6c, 0x27,
+ 0xfa, 0xb1, 0x59, 0x12, 0xcf, 0x84, 0x68, 0x23, 0xfe, 0xb5,
+ 0x3b, 0x70, 0xad, 0xe6, 0x0a, 0x41, 0x9c, 0xd7, 0x9d, 0xd6,
+ 0x0b, 0x40, 0xac, 0xe7, 0x3a, 0x71, 0xff, 0xb4, 0x69, 0x22,
+ 0xce, 0x85, 0x58, 0x13, 0xcc, 0x87, 0x5a, 0x11, 0xfd, 0xb6,
+ 0x6b, 0x20, 0xae, 0xe5, 0x38, 0x73, 0x9f, 0xd4, 0x09, 0x42,
+ 0x08, 0x43, 0x9e, 0xd5, 0x39, 0x72, 0xaf, 0xe4, 0x6a, 0x21,
+ 0xfc, 0xb7, 0x5b, 0x10, 0xcd, 0x86, 0x00, 0x4c, 0x98, 0xd4,
+ 0x2d, 0x61, 0xb5, 0xf9, 0x5a, 0x16, 0xc2, 0x8e, 0x77, 0x3b,
+ 0xef, 0xa3, 0xb4, 0xf8, 0x2c, 0x60, 0x99, 0xd5, 0x01, 0x4d,
+ 0xee, 0xa2, 0x76, 0x3a, 0xc3, 0x8f, 0x5b, 0x17, 0x75, 0x39,
+ 0xed, 0xa1, 0x58, 0x14, 0xc0, 0x8c, 0x2f, 0x63, 0xb7, 0xfb,
+ 0x02, 0x4e, 0x9a, 0xd6, 0xc1, 0x8d, 0x59, 0x15, 0xec, 0xa0,
+ 0x74, 0x38, 0x9b, 0xd7, 0x03, 0x4f, 0xb6, 0xfa, 0x2e, 0x62,
+ 0xea, 0xa6, 0x72, 0x3e, 0xc7, 0x8b, 0x5f, 0x13, 0xb0, 0xfc,
+ 0x28, 0x64, 0x9d, 0xd1, 0x05, 0x49, 0x5e, 0x12, 0xc6, 0x8a,
+ 0x73, 0x3f, 0xeb, 0xa7, 0x04, 0x48, 0x9c, 0xd0, 0x29, 0x65,
+ 0xb1, 0xfd, 0x9f, 0xd3, 0x07, 0x4b, 0xb2, 0xfe, 0x2a, 0x66,
+ 0xc5, 0x89, 0x5d, 0x11, 0xe8, 0xa4, 0x70, 0x3c, 0x2b, 0x67,
+ 0xb3, 0xff, 0x06, 0x4a, 0x9e, 0xd2, 0x71, 0x3d, 0xe9, 0xa5,
+ 0x5c, 0x10, 0xc4, 0x88, 0xc9, 0x85, 0x51, 0x1d, 0xe4, 0xa8,
+ 0x7c, 0x30, 0x93, 0xdf, 0x0b, 0x47, 0xbe, 0xf2, 0x26, 0x6a,
+ 0x7d, 0x31, 0xe5, 0xa9, 0x50, 0x1c, 0xc8, 0x84, 0x27, 0x6b,
+ 0xbf, 0xf3, 0x0a, 0x46, 0x92, 0xde, 0xbc, 0xf0, 0x24, 0x68,
+ 0x91, 0xdd, 0x09, 0x45, 0xe6, 0xaa, 0x7e, 0x32, 0xcb, 0x87,
+ 0x53, 0x1f, 0x08, 0x44, 0x90, 0xdc, 0x25, 0x69, 0xbd, 0xf1,
+ 0x52, 0x1e, 0xca, 0x86, 0x7f, 0x33, 0xe7, 0xab, 0x23, 0x6f,
+ 0xbb, 0xf7, 0x0e, 0x42, 0x96, 0xda, 0x79, 0x35, 0xe1, 0xad,
+ 0x54, 0x18, 0xcc, 0x80, 0x97, 0xdb, 0x0f, 0x43, 0xba, 0xf6,
+ 0x22, 0x6e, 0xcd, 0x81, 0x55, 0x19, 0xe0, 0xac, 0x78, 0x34,
+ 0x56, 0x1a, 0xce, 0x82, 0x7b, 0x37, 0xe3, 0xaf, 0x0c, 0x40,
+ 0x94, 0xd8, 0x21, 0x6d, 0xb9, 0xf5, 0xe2, 0xae, 0x7a, 0x36,
+ 0xcf, 0x83, 0x57, 0x1b, 0xb8, 0xf4, 0x20, 0x6c, 0x95, 0xd9,
+ 0x0d, 0x41, 0x00, 0x4d, 0x9a, 0xd7, 0x29, 0x64, 0xb3, 0xfe,
+ 0x52, 0x1f, 0xc8, 0x85, 0x7b, 0x36, 0xe1, 0xac, 0xa4, 0xe9,
+ 0x3e, 0x73, 0x8d, 0xc0, 0x17, 0x5a, 0xf6, 0xbb, 0x6c, 0x21,
+ 0xdf, 0x92, 0x45, 0x08, 0x55, 0x18, 0xcf, 0x82, 0x7c, 0x31,
+ 0xe6, 0xab, 0x07, 0x4a, 0x9d, 0xd0, 0x2e, 0x63, 0xb4, 0xf9,
+ 0xf1, 0xbc, 0x6b, 0x26, 0xd8, 0x95, 0x42, 0x0f, 0xa3, 0xee,
+ 0x39, 0x74, 0x8a, 0xc7, 0x10, 0x5d, 0xaa, 0xe7, 0x30, 0x7d,
+ 0x83, 0xce, 0x19, 0x54, 0xf8, 0xb5, 0x62, 0x2f, 0xd1, 0x9c,
+ 0x4b, 0x06, 0x0e, 0x43, 0x94, 0xd9, 0x27, 0x6a, 0xbd, 0xf0,
+ 0x5c, 0x11, 0xc6, 0x8b, 0x75, 0x38, 0xef, 0xa2, 0xff, 0xb2,
+ 0x65, 0x28, 0xd6, 0x9b, 0x4c, 0x01, 0xad, 0xe0, 0x37, 0x7a,
+ 0x84, 0xc9, 0x1e, 0x53, 0x5b, 0x16, 0xc1, 0x8c, 0x72, 0x3f,
+ 0xe8, 0xa5, 0x09, 0x44, 0x93, 0xde, 0x20, 0x6d, 0xba, 0xf7,
+ 0x49, 0x04, 0xd3, 0x9e, 0x60, 0x2d, 0xfa, 0xb7, 0x1b, 0x56,
+ 0x81, 0xcc, 0x32, 0x7f, 0xa8, 0xe5, 0xed, 0xa0, 0x77, 0x3a,
+ 0xc4, 0x89, 0x5e, 0x13, 0xbf, 0xf2, 0x25, 0x68, 0x96, 0xdb,
+ 0x0c, 0x41, 0x1c, 0x51, 0x86, 0xcb, 0x35, 0x78, 0xaf, 0xe2,
+ 0x4e, 0x03, 0xd4, 0x99, 0x67, 0x2a, 0xfd, 0xb0, 0xb8, 0xf5,
+ 0x22, 0x6f, 0x91, 0xdc, 0x0b, 0x46, 0xea, 0xa7, 0x70, 0x3d,
+ 0xc3, 0x8e, 0x59, 0x14, 0xe3, 0xae, 0x79, 0x34, 0xca, 0x87,
+ 0x50, 0x1d, 0xb1, 0xfc, 0x2b, 0x66, 0x98, 0xd5, 0x02, 0x4f,
+ 0x47, 0x0a, 0xdd, 0x90, 0x6e, 0x23, 0xf4, 0xb9, 0x15, 0x58,
+ 0x8f, 0xc2, 0x3c, 0x71, 0xa6, 0xeb, 0xb6, 0xfb, 0x2c, 0x61,
+ 0x9f, 0xd2, 0x05, 0x48, 0xe4, 0xa9, 0x7e, 0x33, 0xcd, 0x80,
+ 0x57, 0x1a, 0x12, 0x5f, 0x88, 0xc5, 0x3b, 0x76, 0xa1, 0xec,
+ 0x40, 0x0d, 0xda, 0x97, 0x69, 0x24, 0xf3, 0xbe, 0x00, 0x4e,
+ 0x9c, 0xd2, 0x25, 0x6b, 0xb9, 0xf7, 0x4a, 0x04, 0xd6, 0x98,
+ 0x6f, 0x21, 0xf3, 0xbd, 0x94, 0xda, 0x08, 0x46, 0xb1, 0xff,
+ 0x2d, 0x63, 0xde, 0x90, 0x42, 0x0c, 0xfb, 0xb5, 0x67, 0x29,
+ 0x35, 0x7b, 0xa9, 0xe7, 0x10, 0x5e, 0x8c, 0xc2, 0x7f, 0x31,
+ 0xe3, 0xad, 0x5a, 0x14, 0xc6, 0x88, 0xa1, 0xef, 0x3d, 0x73,
+ 0x84, 0xca, 0x18, 0x56, 0xeb, 0xa5, 0x77, 0x39, 0xce, 0x80,
+ 0x52, 0x1c, 0x6a, 0x24, 0xf6, 0xb8, 0x4f, 0x01, 0xd3, 0x9d,
+ 0x20, 0x6e, 0xbc, 0xf2, 0x05, 0x4b, 0x99, 0xd7, 0xfe, 0xb0,
+ 0x62, 0x2c, 0xdb, 0x95, 0x47, 0x09, 0xb4, 0xfa, 0x28, 0x66,
+ 0x91, 0xdf, 0x0d, 0x43, 0x5f, 0x11, 0xc3, 0x8d, 0x7a, 0x34,
+ 0xe6, 0xa8, 0x15, 0x5b, 0x89, 0xc7, 0x30, 0x7e, 0xac, 0xe2,
+ 0xcb, 0x85, 0x57, 0x19, 0xee, 0xa0, 0x72, 0x3c, 0x81, 0xcf,
+ 0x1d, 0x53, 0xa4, 0xea, 0x38, 0x76, 0xd4, 0x9a, 0x48, 0x06,
+ 0xf1, 0xbf, 0x6d, 0x23, 0x9e, 0xd0, 0x02, 0x4c, 0xbb, 0xf5,
+ 0x27, 0x69, 0x40, 0x0e, 0xdc, 0x92, 0x65, 0x2b, 0xf9, 0xb7,
+ 0x0a, 0x44, 0x96, 0xd8, 0x2f, 0x61, 0xb3, 0xfd, 0xe1, 0xaf,
+ 0x7d, 0x33, 0xc4, 0x8a, 0x58, 0x16, 0xab, 0xe5, 0x37, 0x79,
+ 0x8e, 0xc0, 0x12, 0x5c, 0x75, 0x3b, 0xe9, 0xa7, 0x50, 0x1e,
+ 0xcc, 0x82, 0x3f, 0x71, 0xa3, 0xed, 0x1a, 0x54, 0x86, 0xc8,
+ 0xbe, 0xf0, 0x22, 0x6c, 0x9b, 0xd5, 0x07, 0x49, 0xf4, 0xba,
+ 0x68, 0x26, 0xd1, 0x9f, 0x4d, 0x03, 0x2a, 0x64, 0xb6, 0xf8,
+ 0x0f, 0x41, 0x93, 0xdd, 0x60, 0x2e, 0xfc, 0xb2, 0x45, 0x0b,
+ 0xd9, 0x97, 0x8b, 0xc5, 0x17, 0x59, 0xae, 0xe0, 0x32, 0x7c,
+ 0xc1, 0x8f, 0x5d, 0x13, 0xe4, 0xaa, 0x78, 0x36, 0x1f, 0x51,
+ 0x83, 0xcd, 0x3a, 0x74, 0xa6, 0xe8, 0x55, 0x1b, 0xc9, 0x87,
+ 0x70, 0x3e, 0xec, 0xa2, 0x00, 0x4f, 0x9e, 0xd1, 0x21, 0x6e,
+ 0xbf, 0xf0, 0x42, 0x0d, 0xdc, 0x93, 0x63, 0x2c, 0xfd, 0xb2,
+ 0x84, 0xcb, 0x1a, 0x55, 0xa5, 0xea, 0x3b, 0x74, 0xc6, 0x89,
+ 0x58, 0x17, 0xe7, 0xa8, 0x79, 0x36, 0x15, 0x5a, 0x8b, 0xc4,
+ 0x34, 0x7b, 0xaa, 0xe5, 0x57, 0x18, 0xc9, 0x86, 0x76, 0x39,
+ 0xe8, 0xa7, 0x91, 0xde, 0x0f, 0x40, 0xb0, 0xff, 0x2e, 0x61,
+ 0xd3, 0x9c, 0x4d, 0x02, 0xf2, 0xbd, 0x6c, 0x23, 0x2a, 0x65,
+ 0xb4, 0xfb, 0x0b, 0x44, 0x95, 0xda, 0x68, 0x27, 0xf6, 0xb9,
+ 0x49, 0x06, 0xd7, 0x98, 0xae, 0xe1, 0x30, 0x7f, 0x8f, 0xc0,
+ 0x11, 0x5e, 0xec, 0xa3, 0x72, 0x3d, 0xcd, 0x82, 0x53, 0x1c,
+ 0x3f, 0x70, 0xa1, 0xee, 0x1e, 0x51, 0x80, 0xcf, 0x7d, 0x32,
+ 0xe3, 0xac, 0x5c, 0x13, 0xc2, 0x8d, 0xbb, 0xf4, 0x25, 0x6a,
+ 0x9a, 0xd5, 0x04, 0x4b, 0xf9, 0xb6, 0x67, 0x28, 0xd8, 0x97,
+ 0x46, 0x09, 0x54, 0x1b, 0xca, 0x85, 0x75, 0x3a, 0xeb, 0xa4,
+ 0x16, 0x59, 0x88, 0xc7, 0x37, 0x78, 0xa9, 0xe6, 0xd0, 0x9f,
+ 0x4e, 0x01, 0xf1, 0xbe, 0x6f, 0x20, 0x92, 0xdd, 0x0c, 0x43,
+ 0xb3, 0xfc, 0x2d, 0x62, 0x41, 0x0e, 0xdf, 0x90, 0x60, 0x2f,
+ 0xfe, 0xb1, 0x03, 0x4c, 0x9d, 0xd2, 0x22, 0x6d, 0xbc, 0xf3,
+ 0xc5, 0x8a, 0x5b, 0x14, 0xe4, 0xab, 0x7a, 0x35, 0x87, 0xc8,
+ 0x19, 0x56, 0xa6, 0xe9, 0x38, 0x77, 0x7e, 0x31, 0xe0, 0xaf,
+ 0x5f, 0x10, 0xc1, 0x8e, 0x3c, 0x73, 0xa2, 0xed, 0x1d, 0x52,
+ 0x83, 0xcc, 0xfa, 0xb5, 0x64, 0x2b, 0xdb, 0x94, 0x45, 0x0a,
+ 0xb8, 0xf7, 0x26, 0x69, 0x99, 0xd6, 0x07, 0x48, 0x6b, 0x24,
+ 0xf5, 0xba, 0x4a, 0x05, 0xd4, 0x9b, 0x29, 0x66, 0xb7, 0xf8,
+ 0x08, 0x47, 0x96, 0xd9, 0xef, 0xa0, 0x71, 0x3e, 0xce, 0x81,
+ 0x50, 0x1f, 0xad, 0xe2, 0x33, 0x7c, 0x8c, 0xc3, 0x12, 0x5d,
+ 0x00, 0x50, 0xa0, 0xf0, 0x5d, 0x0d, 0xfd, 0xad, 0xba, 0xea,
+ 0x1a, 0x4a, 0xe7, 0xb7, 0x47, 0x17, 0x69, 0x39, 0xc9, 0x99,
+ 0x34, 0x64, 0x94, 0xc4, 0xd3, 0x83, 0x73, 0x23, 0x8e, 0xde,
+ 0x2e, 0x7e, 0xd2, 0x82, 0x72, 0x22, 0x8f, 0xdf, 0x2f, 0x7f,
+ 0x68, 0x38, 0xc8, 0x98, 0x35, 0x65, 0x95, 0xc5, 0xbb, 0xeb,
+ 0x1b, 0x4b, 0xe6, 0xb6, 0x46, 0x16, 0x01, 0x51, 0xa1, 0xf1,
+ 0x5c, 0x0c, 0xfc, 0xac, 0xb9, 0xe9, 0x19, 0x49, 0xe4, 0xb4,
+ 0x44, 0x14, 0x03, 0x53, 0xa3, 0xf3, 0x5e, 0x0e, 0xfe, 0xae,
+ 0xd0, 0x80, 0x70, 0x20, 0x8d, 0xdd, 0x2d, 0x7d, 0x6a, 0x3a,
+ 0xca, 0x9a, 0x37, 0x67, 0x97, 0xc7, 0x6b, 0x3b, 0xcb, 0x9b,
+ 0x36, 0x66, 0x96, 0xc6, 0xd1, 0x81, 0x71, 0x21, 0x8c, 0xdc,
+ 0x2c, 0x7c, 0x02, 0x52, 0xa2, 0xf2, 0x5f, 0x0f, 0xff, 0xaf,
+ 0xb8, 0xe8, 0x18, 0x48, 0xe5, 0xb5, 0x45, 0x15, 0x6f, 0x3f,
+ 0xcf, 0x9f, 0x32, 0x62, 0x92, 0xc2, 0xd5, 0x85, 0x75, 0x25,
+ 0x88, 0xd8, 0x28, 0x78, 0x06, 0x56, 0xa6, 0xf6, 0x5b, 0x0b,
+ 0xfb, 0xab, 0xbc, 0xec, 0x1c, 0x4c, 0xe1, 0xb1, 0x41, 0x11,
+ 0xbd, 0xed, 0x1d, 0x4d, 0xe0, 0xb0, 0x40, 0x10, 0x07, 0x57,
+ 0xa7, 0xf7, 0x5a, 0x0a, 0xfa, 0xaa, 0xd4, 0x84, 0x74, 0x24,
+ 0x89, 0xd9, 0x29, 0x79, 0x6e, 0x3e, 0xce, 0x9e, 0x33, 0x63,
+ 0x93, 0xc3, 0xd6, 0x86, 0x76, 0x26, 0x8b, 0xdb, 0x2b, 0x7b,
+ 0x6c, 0x3c, 0xcc, 0x9c, 0x31, 0x61, 0x91, 0xc1, 0xbf, 0xef,
+ 0x1f, 0x4f, 0xe2, 0xb2, 0x42, 0x12, 0x05, 0x55, 0xa5, 0xf5,
+ 0x58, 0x08, 0xf8, 0xa8, 0x04, 0x54, 0xa4, 0xf4, 0x59, 0x09,
+ 0xf9, 0xa9, 0xbe, 0xee, 0x1e, 0x4e, 0xe3, 0xb3, 0x43, 0x13,
+ 0x6d, 0x3d, 0xcd, 0x9d, 0x30, 0x60, 0x90, 0xc0, 0xd7, 0x87,
+ 0x77, 0x27, 0x8a, 0xda, 0x2a, 0x7a, 0x00, 0x51, 0xa2, 0xf3,
+ 0x59, 0x08, 0xfb, 0xaa, 0xb2, 0xe3, 0x10, 0x41, 0xeb, 0xba,
+ 0x49, 0x18, 0x79, 0x28, 0xdb, 0x8a, 0x20, 0x71, 0x82, 0xd3,
+ 0xcb, 0x9a, 0x69, 0x38, 0x92, 0xc3, 0x30, 0x61, 0xf2, 0xa3,
+ 0x50, 0x01, 0xab, 0xfa, 0x09, 0x58, 0x40, 0x11, 0xe2, 0xb3,
+ 0x19, 0x48, 0xbb, 0xea, 0x8b, 0xda, 0x29, 0x78, 0xd2, 0x83,
+ 0x70, 0x21, 0x39, 0x68, 0x9b, 0xca, 0x60, 0x31, 0xc2, 0x93,
+ 0xf9, 0xa8, 0x5b, 0x0a, 0xa0, 0xf1, 0x02, 0x53, 0x4b, 0x1a,
+ 0xe9, 0xb8, 0x12, 0x43, 0xb0, 0xe1, 0x80, 0xd1, 0x22, 0x73,
+ 0xd9, 0x88, 0x7b, 0x2a, 0x32, 0x63, 0x90, 0xc1, 0x6b, 0x3a,
+ 0xc9, 0x98, 0x0b, 0x5a, 0xa9, 0xf8, 0x52, 0x03, 0xf0, 0xa1,
+ 0xb9, 0xe8, 0x1b, 0x4a, 0xe0, 0xb1, 0x42, 0x13, 0x72, 0x23,
+ 0xd0, 0x81, 0x2b, 0x7a, 0x89, 0xd8, 0xc0, 0x91, 0x62, 0x33,
+ 0x99, 0xc8, 0x3b, 0x6a, 0xef, 0xbe, 0x4d, 0x1c, 0xb6, 0xe7,
+ 0x14, 0x45, 0x5d, 0x0c, 0xff, 0xae, 0x04, 0x55, 0xa6, 0xf7,
+ 0x96, 0xc7, 0x34, 0x65, 0xcf, 0x9e, 0x6d, 0x3c, 0x24, 0x75,
+ 0x86, 0xd7, 0x7d, 0x2c, 0xdf, 0x8e, 0x1d, 0x4c, 0xbf, 0xee,
+ 0x44, 0x15, 0xe6, 0xb7, 0xaf, 0xfe, 0x0d, 0x5c, 0xf6, 0xa7,
+ 0x54, 0x05, 0x64, 0x35, 0xc6, 0x97, 0x3d, 0x6c, 0x9f, 0xce,
+ 0xd6, 0x87, 0x74, 0x25, 0x8f, 0xde, 0x2d, 0x7c, 0x16, 0x47,
+ 0xb4, 0xe5, 0x4f, 0x1e, 0xed, 0xbc, 0xa4, 0xf5, 0x06, 0x57,
+ 0xfd, 0xac, 0x5f, 0x0e, 0x6f, 0x3e, 0xcd, 0x9c, 0x36, 0x67,
+ 0x94, 0xc5, 0xdd, 0x8c, 0x7f, 0x2e, 0x84, 0xd5, 0x26, 0x77,
+ 0xe4, 0xb5, 0x46, 0x17, 0xbd, 0xec, 0x1f, 0x4e, 0x56, 0x07,
+ 0xf4, 0xa5, 0x0f, 0x5e, 0xad, 0xfc, 0x9d, 0xcc, 0x3f, 0x6e,
+ 0xc4, 0x95, 0x66, 0x37, 0x2f, 0x7e, 0x8d, 0xdc, 0x76, 0x27,
+ 0xd4, 0x85, 0x00, 0x52, 0xa4, 0xf6, 0x55, 0x07, 0xf1, 0xa3,
+ 0xaa, 0xf8, 0x0e, 0x5c, 0xff, 0xad, 0x5b, 0x09, 0x49, 0x1b,
+ 0xed, 0xbf, 0x1c, 0x4e, 0xb8, 0xea, 0xe3, 0xb1, 0x47, 0x15,
+ 0xb6, 0xe4, 0x12, 0x40, 0x92, 0xc0, 0x36, 0x64, 0xc7, 0x95,
+ 0x63, 0x31, 0x38, 0x6a, 0x9c, 0xce, 0x6d, 0x3f, 0xc9, 0x9b,
+ 0xdb, 0x89, 0x7f, 0x2d, 0x8e, 0xdc, 0x2a, 0x78, 0x71, 0x23,
+ 0xd5, 0x87, 0x24, 0x76, 0x80, 0xd2, 0x39, 0x6b, 0x9d, 0xcf,
+ 0x6c, 0x3e, 0xc8, 0x9a, 0x93, 0xc1, 0x37, 0x65, 0xc6, 0x94,
+ 0x62, 0x30, 0x70, 0x22, 0xd4, 0x86, 0x25, 0x77, 0x81, 0xd3,
+ 0xda, 0x88, 0x7e, 0x2c, 0x8f, 0xdd, 0x2b, 0x79, 0xab, 0xf9,
+ 0x0f, 0x5d, 0xfe, 0xac, 0x5a, 0x08, 0x01, 0x53, 0xa5, 0xf7,
+ 0x54, 0x06, 0xf0, 0xa2, 0xe2, 0xb0, 0x46, 0x14, 0xb7, 0xe5,
+ 0x13, 0x41, 0x48, 0x1a, 0xec, 0xbe, 0x1d, 0x4f, 0xb9, 0xeb,
+ 0x72, 0x20, 0xd6, 0x84, 0x27, 0x75, 0x83, 0xd1, 0xd8, 0x8a,
+ 0x7c, 0x2e, 0x8d, 0xdf, 0x29, 0x7b, 0x3b, 0x69, 0x9f, 0xcd,
+ 0x6e, 0x3c, 0xca, 0x98, 0x91, 0xc3, 0x35, 0x67, 0xc4, 0x96,
+ 0x60, 0x32, 0xe0, 0xb2, 0x44, 0x16, 0xb5, 0xe7, 0x11, 0x43,
+ 0x4a, 0x18, 0xee, 0xbc, 0x1f, 0x4d, 0xbb, 0xe9, 0xa9, 0xfb,
+ 0x0d, 0x5f, 0xfc, 0xae, 0x58, 0x0a, 0x03, 0x51, 0xa7, 0xf5,
+ 0x56, 0x04, 0xf2, 0xa0, 0x4b, 0x19, 0xef, 0xbd, 0x1e, 0x4c,
+ 0xba, 0xe8, 0xe1, 0xb3, 0x45, 0x17, 0xb4, 0xe6, 0x10, 0x42,
+ 0x02, 0x50, 0xa6, 0xf4, 0x57, 0x05, 0xf3, 0xa1, 0xa8, 0xfa,
+ 0x0c, 0x5e, 0xfd, 0xaf, 0x59, 0x0b, 0xd9, 0x8b, 0x7d, 0x2f,
+ 0x8c, 0xde, 0x28, 0x7a, 0x73, 0x21, 0xd7, 0x85, 0x26, 0x74,
+ 0x82, 0xd0, 0x90, 0xc2, 0x34, 0x66, 0xc5, 0x97, 0x61, 0x33,
+ 0x3a, 0x68, 0x9e, 0xcc, 0x6f, 0x3d, 0xcb, 0x99, 0x00, 0x53,
+ 0xa6, 0xf5, 0x51, 0x02, 0xf7, 0xa4, 0xa2, 0xf1, 0x04, 0x57,
+ 0xf3, 0xa0, 0x55, 0x06, 0x59, 0x0a, 0xff, 0xac, 0x08, 0x5b,
+ 0xae, 0xfd, 0xfb, 0xa8, 0x5d, 0x0e, 0xaa, 0xf9, 0x0c, 0x5f,
+ 0xb2, 0xe1, 0x14, 0x47, 0xe3, 0xb0, 0x45, 0x16, 0x10, 0x43,
+ 0xb6, 0xe5, 0x41, 0x12, 0xe7, 0xb4, 0xeb, 0xb8, 0x4d, 0x1e,
+ 0xba, 0xe9, 0x1c, 0x4f, 0x49, 0x1a, 0xef, 0xbc, 0x18, 0x4b,
+ 0xbe, 0xed, 0x79, 0x2a, 0xdf, 0x8c, 0x28, 0x7b, 0x8e, 0xdd,
+ 0xdb, 0x88, 0x7d, 0x2e, 0x8a, 0xd9, 0x2c, 0x7f, 0x20, 0x73,
+ 0x86, 0xd5, 0x71, 0x22, 0xd7, 0x84, 0x82, 0xd1, 0x24, 0x77,
+ 0xd3, 0x80, 0x75, 0x26, 0xcb, 0x98, 0x6d, 0x3e, 0x9a, 0xc9,
+ 0x3c, 0x6f, 0x69, 0x3a, 0xcf, 0x9c, 0x38, 0x6b, 0x9e, 0xcd,
+ 0x92, 0xc1, 0x34, 0x67, 0xc3, 0x90, 0x65, 0x36, 0x30, 0x63,
+ 0x96, 0xc5, 0x61, 0x32, 0xc7, 0x94, 0xf2, 0xa1, 0x54, 0x07,
+ 0xa3, 0xf0, 0x05, 0x56, 0x50, 0x03, 0xf6, 0xa5, 0x01, 0x52,
+ 0xa7, 0xf4, 0xab, 0xf8, 0x0d, 0x5e, 0xfa, 0xa9, 0x5c, 0x0f,
+ 0x09, 0x5a, 0xaf, 0xfc, 0x58, 0x0b, 0xfe, 0xad, 0x40, 0x13,
+ 0xe6, 0xb5, 0x11, 0x42, 0xb7, 0xe4, 0xe2, 0xb1, 0x44, 0x17,
+ 0xb3, 0xe0, 0x15, 0x46, 0x19, 0x4a, 0xbf, 0xec, 0x48, 0x1b,
+ 0xee, 0xbd, 0xbb, 0xe8, 0x1d, 0x4e, 0xea, 0xb9, 0x4c, 0x1f,
+ 0x8b, 0xd8, 0x2d, 0x7e, 0xda, 0x89, 0x7c, 0x2f, 0x29, 0x7a,
+ 0x8f, 0xdc, 0x78, 0x2b, 0xde, 0x8d, 0xd2, 0x81, 0x74, 0x27,
+ 0x83, 0xd0, 0x25, 0x76, 0x70, 0x23, 0xd6, 0x85, 0x21, 0x72,
+ 0x87, 0xd4, 0x39, 0x6a, 0x9f, 0xcc, 0x68, 0x3b, 0xce, 0x9d,
+ 0x9b, 0xc8, 0x3d, 0x6e, 0xca, 0x99, 0x6c, 0x3f, 0x60, 0x33,
+ 0xc6, 0x95, 0x31, 0x62, 0x97, 0xc4, 0xc2, 0x91, 0x64, 0x37,
+ 0x93, 0xc0, 0x35, 0x66, 0x00, 0x54, 0xa8, 0xfc, 0x4d, 0x19,
+ 0xe5, 0xb1, 0x9a, 0xce, 0x32, 0x66, 0xd7, 0x83, 0x7f, 0x2b,
+ 0x29, 0x7d, 0x81, 0xd5, 0x64, 0x30, 0xcc, 0x98, 0xb3, 0xe7,
+ 0x1b, 0x4f, 0xfe, 0xaa, 0x56, 0x02, 0x52, 0x06, 0xfa, 0xae,
+ 0x1f, 0x4b, 0xb7, 0xe3, 0xc8, 0x9c, 0x60, 0x34, 0x85, 0xd1,
+ 0x2d, 0x79, 0x7b, 0x2f, 0xd3, 0x87, 0x36, 0x62, 0x9e, 0xca,
+ 0xe1, 0xb5, 0x49, 0x1d, 0xac, 0xf8, 0x04, 0x50, 0xa4, 0xf0,
+ 0x0c, 0x58, 0xe9, 0xbd, 0x41, 0x15, 0x3e, 0x6a, 0x96, 0xc2,
+ 0x73, 0x27, 0xdb, 0x8f, 0x8d, 0xd9, 0x25, 0x71, 0xc0, 0x94,
+ 0x68, 0x3c, 0x17, 0x43, 0xbf, 0xeb, 0x5a, 0x0e, 0xf2, 0xa6,
+ 0xf6, 0xa2, 0x5e, 0x0a, 0xbb, 0xef, 0x13, 0x47, 0x6c, 0x38,
+ 0xc4, 0x90, 0x21, 0x75, 0x89, 0xdd, 0xdf, 0x8b, 0x77, 0x23,
+ 0x92, 0xc6, 0x3a, 0x6e, 0x45, 0x11, 0xed, 0xb9, 0x08, 0x5c,
+ 0xa0, 0xf4, 0x55, 0x01, 0xfd, 0xa9, 0x18, 0x4c, 0xb0, 0xe4,
+ 0xcf, 0x9b, 0x67, 0x33, 0x82, 0xd6, 0x2a, 0x7e, 0x7c, 0x28,
+ 0xd4, 0x80, 0x31, 0x65, 0x99, 0xcd, 0xe6, 0xb2, 0x4e, 0x1a,
+ 0xab, 0xff, 0x03, 0x57, 0x07, 0x53, 0xaf, 0xfb, 0x4a, 0x1e,
+ 0xe2, 0xb6, 0x9d, 0xc9, 0x35, 0x61, 0xd0, 0x84, 0x78, 0x2c,
+ 0x2e, 0x7a, 0x86, 0xd2, 0x63, 0x37, 0xcb, 0x9f, 0xb4, 0xe0,
+ 0x1c, 0x48, 0xf9, 0xad, 0x51, 0x05, 0xf1, 0xa5, 0x59, 0x0d,
+ 0xbc, 0xe8, 0x14, 0x40, 0x6b, 0x3f, 0xc3, 0x97, 0x26, 0x72,
+ 0x8e, 0xda, 0xd8, 0x8c, 0x70, 0x24, 0x95, 0xc1, 0x3d, 0x69,
+ 0x42, 0x16, 0xea, 0xbe, 0x0f, 0x5b, 0xa7, 0xf3, 0xa3, 0xf7,
+ 0x0b, 0x5f, 0xee, 0xba, 0x46, 0x12, 0x39, 0x6d, 0x91, 0xc5,
+ 0x74, 0x20, 0xdc, 0x88, 0x8a, 0xde, 0x22, 0x76, 0xc7, 0x93,
+ 0x6f, 0x3b, 0x10, 0x44, 0xb8, 0xec, 0x5d, 0x09, 0xf5, 0xa1,
+ 0x00, 0x55, 0xaa, 0xff, 0x49, 0x1c, 0xe3, 0xb6, 0x92, 0xc7,
+ 0x38, 0x6d, 0xdb, 0x8e, 0x71, 0x24, 0x39, 0x6c, 0x93, 0xc6,
+ 0x70, 0x25, 0xda, 0x8f, 0xab, 0xfe, 0x01, 0x54, 0xe2, 0xb7,
+ 0x48, 0x1d, 0x72, 0x27, 0xd8, 0x8d, 0x3b, 0x6e, 0x91, 0xc4,
+ 0xe0, 0xb5, 0x4a, 0x1f, 0xa9, 0xfc, 0x03, 0x56, 0x4b, 0x1e,
+ 0xe1, 0xb4, 0x02, 0x57, 0xa8, 0xfd, 0xd9, 0x8c, 0x73, 0x26,
+ 0x90, 0xc5, 0x3a, 0x6f, 0xe4, 0xb1, 0x4e, 0x1b, 0xad, 0xf8,
+ 0x07, 0x52, 0x76, 0x23, 0xdc, 0x89, 0x3f, 0x6a, 0x95, 0xc0,
+ 0xdd, 0x88, 0x77, 0x22, 0x94, 0xc1, 0x3e, 0x6b, 0x4f, 0x1a,
+ 0xe5, 0xb0, 0x06, 0x53, 0xac, 0xf9, 0x96, 0xc3, 0x3c, 0x69,
+ 0xdf, 0x8a, 0x75, 0x20, 0x04, 0x51, 0xae, 0xfb, 0x4d, 0x18,
+ 0xe7, 0xb2, 0xaf, 0xfa, 0x05, 0x50, 0xe6, 0xb3, 0x4c, 0x19,
+ 0x3d, 0x68, 0x97, 0xc2, 0x74, 0x21, 0xde, 0x8b, 0xd5, 0x80,
+ 0x7f, 0x2a, 0x9c, 0xc9, 0x36, 0x63, 0x47, 0x12, 0xed, 0xb8,
+ 0x0e, 0x5b, 0xa4, 0xf1, 0xec, 0xb9, 0x46, 0x13, 0xa5, 0xf0,
+ 0x0f, 0x5a, 0x7e, 0x2b, 0xd4, 0x81, 0x37, 0x62, 0x9d, 0xc8,
+ 0xa7, 0xf2, 0x0d, 0x58, 0xee, 0xbb, 0x44, 0x11, 0x35, 0x60,
+ 0x9f, 0xca, 0x7c, 0x29, 0xd6, 0x83, 0x9e, 0xcb, 0x34, 0x61,
+ 0xd7, 0x82, 0x7d, 0x28, 0x0c, 0x59, 0xa6, 0xf3, 0x45, 0x10,
+ 0xef, 0xba, 0x31, 0x64, 0x9b, 0xce, 0x78, 0x2d, 0xd2, 0x87,
+ 0xa3, 0xf6, 0x09, 0x5c, 0xea, 0xbf, 0x40, 0x15, 0x08, 0x5d,
+ 0xa2, 0xf7, 0x41, 0x14, 0xeb, 0xbe, 0x9a, 0xcf, 0x30, 0x65,
+ 0xd3, 0x86, 0x79, 0x2c, 0x43, 0x16, 0xe9, 0xbc, 0x0a, 0x5f,
+ 0xa0, 0xf5, 0xd1, 0x84, 0x7b, 0x2e, 0x98, 0xcd, 0x32, 0x67,
+ 0x7a, 0x2f, 0xd0, 0x85, 0x33, 0x66, 0x99, 0xcc, 0xe8, 0xbd,
+ 0x42, 0x17, 0xa1, 0xf4, 0x0b, 0x5e, 0x00, 0x56, 0xac, 0xfa,
+ 0x45, 0x13, 0xe9, 0xbf, 0x8a, 0xdc, 0x26, 0x70, 0xcf, 0x99,
+ 0x63, 0x35, 0x09, 0x5f, 0xa5, 0xf3, 0x4c, 0x1a, 0xe0, 0xb6,
+ 0x83, 0xd5, 0x2f, 0x79, 0xc6, 0x90, 0x6a, 0x3c, 0x12, 0x44,
+ 0xbe, 0xe8, 0x57, 0x01, 0xfb, 0xad, 0x98, 0xce, 0x34, 0x62,
+ 0xdd, 0x8b, 0x71, 0x27, 0x1b, 0x4d, 0xb7, 0xe1, 0x5e, 0x08,
+ 0xf2, 0xa4, 0x91, 0xc7, 0x3d, 0x6b, 0xd4, 0x82, 0x78, 0x2e,
+ 0x24, 0x72, 0x88, 0xde, 0x61, 0x37, 0xcd, 0x9b, 0xae, 0xf8,
+ 0x02, 0x54, 0xeb, 0xbd, 0x47, 0x11, 0x2d, 0x7b, 0x81, 0xd7,
+ 0x68, 0x3e, 0xc4, 0x92, 0xa7, 0xf1, 0x0b, 0x5d, 0xe2, 0xb4,
+ 0x4e, 0x18, 0x36, 0x60, 0x9a, 0xcc, 0x73, 0x25, 0xdf, 0x89,
+ 0xbc, 0xea, 0x10, 0x46, 0xf9, 0xaf, 0x55, 0x03, 0x3f, 0x69,
+ 0x93, 0xc5, 0x7a, 0x2c, 0xd6, 0x80, 0xb5, 0xe3, 0x19, 0x4f,
+ 0xf0, 0xa6, 0x5c, 0x0a, 0x48, 0x1e, 0xe4, 0xb2, 0x0d, 0x5b,
+ 0xa1, 0xf7, 0xc2, 0x94, 0x6e, 0x38, 0x87, 0xd1, 0x2b, 0x7d,
+ 0x41, 0x17, 0xed, 0xbb, 0x04, 0x52, 0xa8, 0xfe, 0xcb, 0x9d,
+ 0x67, 0x31, 0x8e, 0xd8, 0x22, 0x74, 0x5a, 0x0c, 0xf6, 0xa0,
+ 0x1f, 0x49, 0xb3, 0xe5, 0xd0, 0x86, 0x7c, 0x2a, 0x95, 0xc3,
+ 0x39, 0x6f, 0x53, 0x05, 0xff, 0xa9, 0x16, 0x40, 0xba, 0xec,
+ 0xd9, 0x8f, 0x75, 0x23, 0x9c, 0xca, 0x30, 0x66, 0x6c, 0x3a,
+ 0xc0, 0x96, 0x29, 0x7f, 0x85, 0xd3, 0xe6, 0xb0, 0x4a, 0x1c,
+ 0xa3, 0xf5, 0x0f, 0x59, 0x65, 0x33, 0xc9, 0x9f, 0x20, 0x76,
+ 0x8c, 0xda, 0xef, 0xb9, 0x43, 0x15, 0xaa, 0xfc, 0x06, 0x50,
+ 0x7e, 0x28, 0xd2, 0x84, 0x3b, 0x6d, 0x97, 0xc1, 0xf4, 0xa2,
+ 0x58, 0x0e, 0xb1, 0xe7, 0x1d, 0x4b, 0x77, 0x21, 0xdb, 0x8d,
+ 0x32, 0x64, 0x9e, 0xc8, 0xfd, 0xab, 0x51, 0x07, 0xb8, 0xee,
+ 0x14, 0x42, 0x00, 0x57, 0xae, 0xf9, 0x41, 0x16, 0xef, 0xb8,
+ 0x82, 0xd5, 0x2c, 0x7b, 0xc3, 0x94, 0x6d, 0x3a, 0x19, 0x4e,
+ 0xb7, 0xe0, 0x58, 0x0f, 0xf6, 0xa1, 0x9b, 0xcc, 0x35, 0x62,
+ 0xda, 0x8d, 0x74, 0x23, 0x32, 0x65, 0x9c, 0xcb, 0x73, 0x24,
+ 0xdd, 0x8a, 0xb0, 0xe7, 0x1e, 0x49, 0xf1, 0xa6, 0x5f, 0x08,
+ 0x2b, 0x7c, 0x85, 0xd2, 0x6a, 0x3d, 0xc4, 0x93, 0xa9, 0xfe,
+ 0x07, 0x50, 0xe8, 0xbf, 0x46, 0x11, 0x64, 0x33, 0xca, 0x9d,
+ 0x25, 0x72, 0x8b, 0xdc, 0xe6, 0xb1, 0x48, 0x1f, 0xa7, 0xf0,
+ 0x09, 0x5e, 0x7d, 0x2a, 0xd3, 0x84, 0x3c, 0x6b, 0x92, 0xc5,
+ 0xff, 0xa8, 0x51, 0x06, 0xbe, 0xe9, 0x10, 0x47, 0x56, 0x01,
+ 0xf8, 0xaf, 0x17, 0x40, 0xb9, 0xee, 0xd4, 0x83, 0x7a, 0x2d,
+ 0x95, 0xc2, 0x3b, 0x6c, 0x4f, 0x18, 0xe1, 0xb6, 0x0e, 0x59,
+ 0xa0, 0xf7, 0xcd, 0x9a, 0x63, 0x34, 0x8c, 0xdb, 0x22, 0x75,
+ 0xc8, 0x9f, 0x66, 0x31, 0x89, 0xde, 0x27, 0x70, 0x4a, 0x1d,
+ 0xe4, 0xb3, 0x0b, 0x5c, 0xa5, 0xf2, 0xd1, 0x86, 0x7f, 0x28,
+ 0x90, 0xc7, 0x3e, 0x69, 0x53, 0x04, 0xfd, 0xaa, 0x12, 0x45,
+ 0xbc, 0xeb, 0xfa, 0xad, 0x54, 0x03, 0xbb, 0xec, 0x15, 0x42,
+ 0x78, 0x2f, 0xd6, 0x81, 0x39, 0x6e, 0x97, 0xc0, 0xe3, 0xb4,
+ 0x4d, 0x1a, 0xa2, 0xf5, 0x0c, 0x5b, 0x61, 0x36, 0xcf, 0x98,
+ 0x20, 0x77, 0x8e, 0xd9, 0xac, 0xfb, 0x02, 0x55, 0xed, 0xba,
+ 0x43, 0x14, 0x2e, 0x79, 0x80, 0xd7, 0x6f, 0x38, 0xc1, 0x96,
+ 0xb5, 0xe2, 0x1b, 0x4c, 0xf4, 0xa3, 0x5a, 0x0d, 0x37, 0x60,
+ 0x99, 0xce, 0x76, 0x21, 0xd8, 0x8f, 0x9e, 0xc9, 0x30, 0x67,
+ 0xdf, 0x88, 0x71, 0x26, 0x1c, 0x4b, 0xb2, 0xe5, 0x5d, 0x0a,
+ 0xf3, 0xa4, 0x87, 0xd0, 0x29, 0x7e, 0xc6, 0x91, 0x68, 0x3f,
+ 0x05, 0x52, 0xab, 0xfc, 0x44, 0x13, 0xea, 0xbd, 0x00, 0x58,
+ 0xb0, 0xe8, 0x7d, 0x25, 0xcd, 0x95, 0xfa, 0xa2, 0x4a, 0x12,
+ 0x87, 0xdf, 0x37, 0x6f, 0xe9, 0xb1, 0x59, 0x01, 0x94, 0xcc,
+ 0x24, 0x7c, 0x13, 0x4b, 0xa3, 0xfb, 0x6e, 0x36, 0xde, 0x86,
+ 0xcf, 0x97, 0x7f, 0x27, 0xb2, 0xea, 0x02, 0x5a, 0x35, 0x6d,
+ 0x85, 0xdd, 0x48, 0x10, 0xf8, 0xa0, 0x26, 0x7e, 0x96, 0xce,
+ 0x5b, 0x03, 0xeb, 0xb3, 0xdc, 0x84, 0x6c, 0x34, 0xa1, 0xf9,
+ 0x11, 0x49, 0x83, 0xdb, 0x33, 0x6b, 0xfe, 0xa6, 0x4e, 0x16,
+ 0x79, 0x21, 0xc9, 0x91, 0x04, 0x5c, 0xb4, 0xec, 0x6a, 0x32,
+ 0xda, 0x82, 0x17, 0x4f, 0xa7, 0xff, 0x90, 0xc8, 0x20, 0x78,
+ 0xed, 0xb5, 0x5d, 0x05, 0x4c, 0x14, 0xfc, 0xa4, 0x31, 0x69,
+ 0x81, 0xd9, 0xb6, 0xee, 0x06, 0x5e, 0xcb, 0x93, 0x7b, 0x23,
+ 0xa5, 0xfd, 0x15, 0x4d, 0xd8, 0x80, 0x68, 0x30, 0x5f, 0x07,
+ 0xef, 0xb7, 0x22, 0x7a, 0x92, 0xca, 0x1b, 0x43, 0xab, 0xf3,
+ 0x66, 0x3e, 0xd6, 0x8e, 0xe1, 0xb9, 0x51, 0x09, 0x9c, 0xc4,
+ 0x2c, 0x74, 0xf2, 0xaa, 0x42, 0x1a, 0x8f, 0xd7, 0x3f, 0x67,
+ 0x08, 0x50, 0xb8, 0xe0, 0x75, 0x2d, 0xc5, 0x9d, 0xd4, 0x8c,
+ 0x64, 0x3c, 0xa9, 0xf1, 0x19, 0x41, 0x2e, 0x76, 0x9e, 0xc6,
+ 0x53, 0x0b, 0xe3, 0xbb, 0x3d, 0x65, 0x8d, 0xd5, 0x40, 0x18,
+ 0xf0, 0xa8, 0xc7, 0x9f, 0x77, 0x2f, 0xba, 0xe2, 0x0a, 0x52,
+ 0x98, 0xc0, 0x28, 0x70, 0xe5, 0xbd, 0x55, 0x0d, 0x62, 0x3a,
+ 0xd2, 0x8a, 0x1f, 0x47, 0xaf, 0xf7, 0x71, 0x29, 0xc1, 0x99,
+ 0x0c, 0x54, 0xbc, 0xe4, 0x8b, 0xd3, 0x3b, 0x63, 0xf6, 0xae,
+ 0x46, 0x1e, 0x57, 0x0f, 0xe7, 0xbf, 0x2a, 0x72, 0x9a, 0xc2,
+ 0xad, 0xf5, 0x1d, 0x45, 0xd0, 0x88, 0x60, 0x38, 0xbe, 0xe6,
+ 0x0e, 0x56, 0xc3, 0x9b, 0x73, 0x2b, 0x44, 0x1c, 0xf4, 0xac,
+ 0x39, 0x61, 0x89, 0xd1, 0x00, 0x59, 0xb2, 0xeb, 0x79, 0x20,
+ 0xcb, 0x92, 0xf2, 0xab, 0x40, 0x19, 0x8b, 0xd2, 0x39, 0x60,
+ 0xf9, 0xa0, 0x4b, 0x12, 0x80, 0xd9, 0x32, 0x6b, 0x0b, 0x52,
+ 0xb9, 0xe0, 0x72, 0x2b, 0xc0, 0x99, 0xef, 0xb6, 0x5d, 0x04,
+ 0x96, 0xcf, 0x24, 0x7d, 0x1d, 0x44, 0xaf, 0xf6, 0x64, 0x3d,
+ 0xd6, 0x8f, 0x16, 0x4f, 0xa4, 0xfd, 0x6f, 0x36, 0xdd, 0x84,
+ 0xe4, 0xbd, 0x56, 0x0f, 0x9d, 0xc4, 0x2f, 0x76, 0xc3, 0x9a,
+ 0x71, 0x28, 0xba, 0xe3, 0x08, 0x51, 0x31, 0x68, 0x83, 0xda,
+ 0x48, 0x11, 0xfa, 0xa3, 0x3a, 0x63, 0x88, 0xd1, 0x43, 0x1a,
+ 0xf1, 0xa8, 0xc8, 0x91, 0x7a, 0x23, 0xb1, 0xe8, 0x03, 0x5a,
+ 0x2c, 0x75, 0x9e, 0xc7, 0x55, 0x0c, 0xe7, 0xbe, 0xde, 0x87,
+ 0x6c, 0x35, 0xa7, 0xfe, 0x15, 0x4c, 0xd5, 0x8c, 0x67, 0x3e,
+ 0xac, 0xf5, 0x1e, 0x47, 0x27, 0x7e, 0x95, 0xcc, 0x5e, 0x07,
+ 0xec, 0xb5, 0x9b, 0xc2, 0x29, 0x70, 0xe2, 0xbb, 0x50, 0x09,
+ 0x69, 0x30, 0xdb, 0x82, 0x10, 0x49, 0xa2, 0xfb, 0x62, 0x3b,
+ 0xd0, 0x89, 0x1b, 0x42, 0xa9, 0xf0, 0x90, 0xc9, 0x22, 0x7b,
+ 0xe9, 0xb0, 0x5b, 0x02, 0x74, 0x2d, 0xc6, 0x9f, 0x0d, 0x54,
+ 0xbf, 0xe6, 0x86, 0xdf, 0x34, 0x6d, 0xff, 0xa6, 0x4d, 0x14,
+ 0x8d, 0xd4, 0x3f, 0x66, 0xf4, 0xad, 0x46, 0x1f, 0x7f, 0x26,
+ 0xcd, 0x94, 0x06, 0x5f, 0xb4, 0xed, 0x58, 0x01, 0xea, 0xb3,
+ 0x21, 0x78, 0x93, 0xca, 0xaa, 0xf3, 0x18, 0x41, 0xd3, 0x8a,
+ 0x61, 0x38, 0xa1, 0xf8, 0x13, 0x4a, 0xd8, 0x81, 0x6a, 0x33,
+ 0x53, 0x0a, 0xe1, 0xb8, 0x2a, 0x73, 0x98, 0xc1, 0xb7, 0xee,
+ 0x05, 0x5c, 0xce, 0x97, 0x7c, 0x25, 0x45, 0x1c, 0xf7, 0xae,
+ 0x3c, 0x65, 0x8e, 0xd7, 0x4e, 0x17, 0xfc, 0xa5, 0x37, 0x6e,
+ 0x85, 0xdc, 0xbc, 0xe5, 0x0e, 0x57, 0xc5, 0x9c, 0x77, 0x2e,
+ 0x00, 0x5a, 0xb4, 0xee, 0x75, 0x2f, 0xc1, 0x9b, 0xea, 0xb0,
+ 0x5e, 0x04, 0x9f, 0xc5, 0x2b, 0x71, 0xc9, 0x93, 0x7d, 0x27,
+ 0xbc, 0xe6, 0x08, 0x52, 0x23, 0x79, 0x97, 0xcd, 0x56, 0x0c,
+ 0xe2, 0xb8, 0x8f, 0xd5, 0x3b, 0x61, 0xfa, 0xa0, 0x4e, 0x14,
+ 0x65, 0x3f, 0xd1, 0x8b, 0x10, 0x4a, 0xa4, 0xfe, 0x46, 0x1c,
+ 0xf2, 0xa8, 0x33, 0x69, 0x87, 0xdd, 0xac, 0xf6, 0x18, 0x42,
+ 0xd9, 0x83, 0x6d, 0x37, 0x03, 0x59, 0xb7, 0xed, 0x76, 0x2c,
+ 0xc2, 0x98, 0xe9, 0xb3, 0x5d, 0x07, 0x9c, 0xc6, 0x28, 0x72,
+ 0xca, 0x90, 0x7e, 0x24, 0xbf, 0xe5, 0x0b, 0x51, 0x20, 0x7a,
+ 0x94, 0xce, 0x55, 0x0f, 0xe1, 0xbb, 0x8c, 0xd6, 0x38, 0x62,
+ 0xf9, 0xa3, 0x4d, 0x17, 0x66, 0x3c, 0xd2, 0x88, 0x13, 0x49,
+ 0xa7, 0xfd, 0x45, 0x1f, 0xf1, 0xab, 0x30, 0x6a, 0x84, 0xde,
+ 0xaf, 0xf5, 0x1b, 0x41, 0xda, 0x80, 0x6e, 0x34, 0x06, 0x5c,
+ 0xb2, 0xe8, 0x73, 0x29, 0xc7, 0x9d, 0xec, 0xb6, 0x58, 0x02,
+ 0x99, 0xc3, 0x2d, 0x77, 0xcf, 0x95, 0x7b, 0x21, 0xba, 0xe0,
+ 0x0e, 0x54, 0x25, 0x7f, 0x91, 0xcb, 0x50, 0x0a, 0xe4, 0xbe,
+ 0x89, 0xd3, 0x3d, 0x67, 0xfc, 0xa6, 0x48, 0x12, 0x63, 0x39,
+ 0xd7, 0x8d, 0x16, 0x4c, 0xa2, 0xf8, 0x40, 0x1a, 0xf4, 0xae,
+ 0x35, 0x6f, 0x81, 0xdb, 0xaa, 0xf0, 0x1e, 0x44, 0xdf, 0x85,
+ 0x6b, 0x31, 0x05, 0x5f, 0xb1, 0xeb, 0x70, 0x2a, 0xc4, 0x9e,
+ 0xef, 0xb5, 0x5b, 0x01, 0x9a, 0xc0, 0x2e, 0x74, 0xcc, 0x96,
+ 0x78, 0x22, 0xb9, 0xe3, 0x0d, 0x57, 0x26, 0x7c, 0x92, 0xc8,
+ 0x53, 0x09, 0xe7, 0xbd, 0x8a, 0xd0, 0x3e, 0x64, 0xff, 0xa5,
+ 0x4b, 0x11, 0x60, 0x3a, 0xd4, 0x8e, 0x15, 0x4f, 0xa1, 0xfb,
+ 0x43, 0x19, 0xf7, 0xad, 0x36, 0x6c, 0x82, 0xd8, 0xa9, 0xf3,
+ 0x1d, 0x47, 0xdc, 0x86, 0x68, 0x32, 0x00, 0x5b, 0xb6, 0xed,
+ 0x71, 0x2a, 0xc7, 0x9c, 0xe2, 0xb9, 0x54, 0x0f, 0x93, 0xc8,
+ 0x25, 0x7e, 0xd9, 0x82, 0x6f, 0x34, 0xa8, 0xf3, 0x1e, 0x45,
+ 0x3b, 0x60, 0x8d, 0xd6, 0x4a, 0x11, 0xfc, 0xa7, 0xaf, 0xf4,
+ 0x19, 0x42, 0xde, 0x85, 0x68, 0x33, 0x4d, 0x16, 0xfb, 0xa0,
+ 0x3c, 0x67, 0x8a, 0xd1, 0x76, 0x2d, 0xc0, 0x9b, 0x07, 0x5c,
+ 0xb1, 0xea, 0x94, 0xcf, 0x22, 0x79, 0xe5, 0xbe, 0x53, 0x08,
+ 0x43, 0x18, 0xf5, 0xae, 0x32, 0x69, 0x84, 0xdf, 0xa1, 0xfa,
+ 0x17, 0x4c, 0xd0, 0x8b, 0x66, 0x3d, 0x9a, 0xc1, 0x2c, 0x77,
+ 0xeb, 0xb0, 0x5d, 0x06, 0x78, 0x23, 0xce, 0x95, 0x09, 0x52,
+ 0xbf, 0xe4, 0xec, 0xb7, 0x5a, 0x01, 0x9d, 0xc6, 0x2b, 0x70,
+ 0x0e, 0x55, 0xb8, 0xe3, 0x7f, 0x24, 0xc9, 0x92, 0x35, 0x6e,
+ 0x83, 0xd8, 0x44, 0x1f, 0xf2, 0xa9, 0xd7, 0x8c, 0x61, 0x3a,
+ 0xa6, 0xfd, 0x10, 0x4b, 0x86, 0xdd, 0x30, 0x6b, 0xf7, 0xac,
+ 0x41, 0x1a, 0x64, 0x3f, 0xd2, 0x89, 0x15, 0x4e, 0xa3, 0xf8,
+ 0x5f, 0x04, 0xe9, 0xb2, 0x2e, 0x75, 0x98, 0xc3, 0xbd, 0xe6,
+ 0x0b, 0x50, 0xcc, 0x97, 0x7a, 0x21, 0x29, 0x72, 0x9f, 0xc4,
+ 0x58, 0x03, 0xee, 0xb5, 0xcb, 0x90, 0x7d, 0x26, 0xba, 0xe1,
+ 0x0c, 0x57, 0xf0, 0xab, 0x46, 0x1d, 0x81, 0xda, 0x37, 0x6c,
+ 0x12, 0x49, 0xa4, 0xff, 0x63, 0x38, 0xd5, 0x8e, 0xc5, 0x9e,
+ 0x73, 0x28, 0xb4, 0xef, 0x02, 0x59, 0x27, 0x7c, 0x91, 0xca,
+ 0x56, 0x0d, 0xe0, 0xbb, 0x1c, 0x47, 0xaa, 0xf1, 0x6d, 0x36,
+ 0xdb, 0x80, 0xfe, 0xa5, 0x48, 0x13, 0x8f, 0xd4, 0x39, 0x62,
+ 0x6a, 0x31, 0xdc, 0x87, 0x1b, 0x40, 0xad, 0xf6, 0x88, 0xd3,
+ 0x3e, 0x65, 0xf9, 0xa2, 0x4f, 0x14, 0xb3, 0xe8, 0x05, 0x5e,
+ 0xc2, 0x99, 0x74, 0x2f, 0x51, 0x0a, 0xe7, 0xbc, 0x20, 0x7b,
+ 0x96, 0xcd, 0x00, 0x5c, 0xb8, 0xe4, 0x6d, 0x31, 0xd5, 0x89,
+ 0xda, 0x86, 0x62, 0x3e, 0xb7, 0xeb, 0x0f, 0x53, 0xa9, 0xf5,
+ 0x11, 0x4d, 0xc4, 0x98, 0x7c, 0x20, 0x73, 0x2f, 0xcb, 0x97,
+ 0x1e, 0x42, 0xa6, 0xfa, 0x4f, 0x13, 0xf7, 0xab, 0x22, 0x7e,
+ 0x9a, 0xc6, 0x95, 0xc9, 0x2d, 0x71, 0xf8, 0xa4, 0x40, 0x1c,
+ 0xe6, 0xba, 0x5e, 0x02, 0x8b, 0xd7, 0x33, 0x6f, 0x3c, 0x60,
+ 0x84, 0xd8, 0x51, 0x0d, 0xe9, 0xb5, 0x9e, 0xc2, 0x26, 0x7a,
+ 0xf3, 0xaf, 0x4b, 0x17, 0x44, 0x18, 0xfc, 0xa0, 0x29, 0x75,
+ 0x91, 0xcd, 0x37, 0x6b, 0x8f, 0xd3, 0x5a, 0x06, 0xe2, 0xbe,
+ 0xed, 0xb1, 0x55, 0x09, 0x80, 0xdc, 0x38, 0x64, 0xd1, 0x8d,
+ 0x69, 0x35, 0xbc, 0xe0, 0x04, 0x58, 0x0b, 0x57, 0xb3, 0xef,
+ 0x66, 0x3a, 0xde, 0x82, 0x78, 0x24, 0xc0, 0x9c, 0x15, 0x49,
+ 0xad, 0xf1, 0xa2, 0xfe, 0x1a, 0x46, 0xcf, 0x93, 0x77, 0x2b,
+ 0x21, 0x7d, 0x99, 0xc5, 0x4c, 0x10, 0xf4, 0xa8, 0xfb, 0xa7,
+ 0x43, 0x1f, 0x96, 0xca, 0x2e, 0x72, 0x88, 0xd4, 0x30, 0x6c,
+ 0xe5, 0xb9, 0x5d, 0x01, 0x52, 0x0e, 0xea, 0xb6, 0x3f, 0x63,
+ 0x87, 0xdb, 0x6e, 0x32, 0xd6, 0x8a, 0x03, 0x5f, 0xbb, 0xe7,
+ 0xb4, 0xe8, 0x0c, 0x50, 0xd9, 0x85, 0x61, 0x3d, 0xc7, 0x9b,
+ 0x7f, 0x23, 0xaa, 0xf6, 0x12, 0x4e, 0x1d, 0x41, 0xa5, 0xf9,
+ 0x70, 0x2c, 0xc8, 0x94, 0xbf, 0xe3, 0x07, 0x5b, 0xd2, 0x8e,
+ 0x6a, 0x36, 0x65, 0x39, 0xdd, 0x81, 0x08, 0x54, 0xb0, 0xec,
+ 0x16, 0x4a, 0xae, 0xf2, 0x7b, 0x27, 0xc3, 0x9f, 0xcc, 0x90,
+ 0x74, 0x28, 0xa1, 0xfd, 0x19, 0x45, 0xf0, 0xac, 0x48, 0x14,
+ 0x9d, 0xc1, 0x25, 0x79, 0x2a, 0x76, 0x92, 0xce, 0x47, 0x1b,
+ 0xff, 0xa3, 0x59, 0x05, 0xe1, 0xbd, 0x34, 0x68, 0x8c, 0xd0,
+ 0x83, 0xdf, 0x3b, 0x67, 0xee, 0xb2, 0x56, 0x0a, 0x00, 0x5d,
+ 0xba, 0xe7, 0x69, 0x34, 0xd3, 0x8e, 0xd2, 0x8f, 0x68, 0x35,
+ 0xbb, 0xe6, 0x01, 0x5c, 0xb9, 0xe4, 0x03, 0x5e, 0xd0, 0x8d,
+ 0x6a, 0x37, 0x6b, 0x36, 0xd1, 0x8c, 0x02, 0x5f, 0xb8, 0xe5,
+ 0x6f, 0x32, 0xd5, 0x88, 0x06, 0x5b, 0xbc, 0xe1, 0xbd, 0xe0,
+ 0x07, 0x5a, 0xd4, 0x89, 0x6e, 0x33, 0xd6, 0x8b, 0x6c, 0x31,
+ 0xbf, 0xe2, 0x05, 0x58, 0x04, 0x59, 0xbe, 0xe3, 0x6d, 0x30,
+ 0xd7, 0x8a, 0xde, 0x83, 0x64, 0x39, 0xb7, 0xea, 0x0d, 0x50,
+ 0x0c, 0x51, 0xb6, 0xeb, 0x65, 0x38, 0xdf, 0x82, 0x67, 0x3a,
+ 0xdd, 0x80, 0x0e, 0x53, 0xb4, 0xe9, 0xb5, 0xe8, 0x0f, 0x52,
+ 0xdc, 0x81, 0x66, 0x3b, 0xb1, 0xec, 0x0b, 0x56, 0xd8, 0x85,
+ 0x62, 0x3f, 0x63, 0x3e, 0xd9, 0x84, 0x0a, 0x57, 0xb0, 0xed,
+ 0x08, 0x55, 0xb2, 0xef, 0x61, 0x3c, 0xdb, 0x86, 0xda, 0x87,
+ 0x60, 0x3d, 0xb3, 0xee, 0x09, 0x54, 0xa1, 0xfc, 0x1b, 0x46,
+ 0xc8, 0x95, 0x72, 0x2f, 0x73, 0x2e, 0xc9, 0x94, 0x1a, 0x47,
+ 0xa0, 0xfd, 0x18, 0x45, 0xa2, 0xff, 0x71, 0x2c, 0xcb, 0x96,
+ 0xca, 0x97, 0x70, 0x2d, 0xa3, 0xfe, 0x19, 0x44, 0xce, 0x93,
+ 0x74, 0x29, 0xa7, 0xfa, 0x1d, 0x40, 0x1c, 0x41, 0xa6, 0xfb,
+ 0x75, 0x28, 0xcf, 0x92, 0x77, 0x2a, 0xcd, 0x90, 0x1e, 0x43,
+ 0xa4, 0xf9, 0xa5, 0xf8, 0x1f, 0x42, 0xcc, 0x91, 0x76, 0x2b,
+ 0x7f, 0x22, 0xc5, 0x98, 0x16, 0x4b, 0xac, 0xf1, 0xad, 0xf0,
+ 0x17, 0x4a, 0xc4, 0x99, 0x7e, 0x23, 0xc6, 0x9b, 0x7c, 0x21,
+ 0xaf, 0xf2, 0x15, 0x48, 0x14, 0x49, 0xae, 0xf3, 0x7d, 0x20,
+ 0xc7, 0x9a, 0x10, 0x4d, 0xaa, 0xf7, 0x79, 0x24, 0xc3, 0x9e,
+ 0xc2, 0x9f, 0x78, 0x25, 0xab, 0xf6, 0x11, 0x4c, 0xa9, 0xf4,
+ 0x13, 0x4e, 0xc0, 0x9d, 0x7a, 0x27, 0x7b, 0x26, 0xc1, 0x9c,
+ 0x12, 0x4f, 0xa8, 0xf5, 0x00, 0x5e, 0xbc, 0xe2, 0x65, 0x3b,
+ 0xd9, 0x87, 0xca, 0x94, 0x76, 0x28, 0xaf, 0xf1, 0x13, 0x4d,
+ 0x89, 0xd7, 0x35, 0x6b, 0xec, 0xb2, 0x50, 0x0e, 0x43, 0x1d,
+ 0xff, 0xa1, 0x26, 0x78, 0x9a, 0xc4, 0x0f, 0x51, 0xb3, 0xed,
+ 0x6a, 0x34, 0xd6, 0x88, 0xc5, 0x9b, 0x79, 0x27, 0xa0, 0xfe,
+ 0x1c, 0x42, 0x86, 0xd8, 0x3a, 0x64, 0xe3, 0xbd, 0x5f, 0x01,
+ 0x4c, 0x12, 0xf0, 0xae, 0x29, 0x77, 0x95, 0xcb, 0x1e, 0x40,
+ 0xa2, 0xfc, 0x7b, 0x25, 0xc7, 0x99, 0xd4, 0x8a, 0x68, 0x36,
+ 0xb1, 0xef, 0x0d, 0x53, 0x97, 0xc9, 0x2b, 0x75, 0xf2, 0xac,
+ 0x4e, 0x10, 0x5d, 0x03, 0xe1, 0xbf, 0x38, 0x66, 0x84, 0xda,
+ 0x11, 0x4f, 0xad, 0xf3, 0x74, 0x2a, 0xc8, 0x96, 0xdb, 0x85,
+ 0x67, 0x39, 0xbe, 0xe0, 0x02, 0x5c, 0x98, 0xc6, 0x24, 0x7a,
+ 0xfd, 0xa3, 0x41, 0x1f, 0x52, 0x0c, 0xee, 0xb0, 0x37, 0x69,
+ 0x8b, 0xd5, 0x3c, 0x62, 0x80, 0xde, 0x59, 0x07, 0xe5, 0xbb,
+ 0xf6, 0xa8, 0x4a, 0x14, 0x93, 0xcd, 0x2f, 0x71, 0xb5, 0xeb,
+ 0x09, 0x57, 0xd0, 0x8e, 0x6c, 0x32, 0x7f, 0x21, 0xc3, 0x9d,
+ 0x1a, 0x44, 0xa6, 0xf8, 0x33, 0x6d, 0x8f, 0xd1, 0x56, 0x08,
+ 0xea, 0xb4, 0xf9, 0xa7, 0x45, 0x1b, 0x9c, 0xc2, 0x20, 0x7e,
+ 0xba, 0xe4, 0x06, 0x58, 0xdf, 0x81, 0x63, 0x3d, 0x70, 0x2e,
+ 0xcc, 0x92, 0x15, 0x4b, 0xa9, 0xf7, 0x22, 0x7c, 0x9e, 0xc0,
+ 0x47, 0x19, 0xfb, 0xa5, 0xe8, 0xb6, 0x54, 0x0a, 0x8d, 0xd3,
+ 0x31, 0x6f, 0xab, 0xf5, 0x17, 0x49, 0xce, 0x90, 0x72, 0x2c,
+ 0x61, 0x3f, 0xdd, 0x83, 0x04, 0x5a, 0xb8, 0xe6, 0x2d, 0x73,
+ 0x91, 0xcf, 0x48, 0x16, 0xf4, 0xaa, 0xe7, 0xb9, 0x5b, 0x05,
+ 0x82, 0xdc, 0x3e, 0x60, 0xa4, 0xfa, 0x18, 0x46, 0xc1, 0x9f,
+ 0x7d, 0x23, 0x6e, 0x30, 0xd2, 0x8c, 0x0b, 0x55, 0xb7, 0xe9,
+ 0x00, 0x5f, 0xbe, 0xe1, 0x61, 0x3e, 0xdf, 0x80, 0xc2, 0x9d,
+ 0x7c, 0x23, 0xa3, 0xfc, 0x1d, 0x42, 0x99, 0xc6, 0x27, 0x78,
+ 0xf8, 0xa7, 0x46, 0x19, 0x5b, 0x04, 0xe5, 0xba, 0x3a, 0x65,
+ 0x84, 0xdb, 0x2f, 0x70, 0x91, 0xce, 0x4e, 0x11, 0xf0, 0xaf,
+ 0xed, 0xb2, 0x53, 0x0c, 0x8c, 0xd3, 0x32, 0x6d, 0xb6, 0xe9,
+ 0x08, 0x57, 0xd7, 0x88, 0x69, 0x36, 0x74, 0x2b, 0xca, 0x95,
+ 0x15, 0x4a, 0xab, 0xf4, 0x5e, 0x01, 0xe0, 0xbf, 0x3f, 0x60,
+ 0x81, 0xde, 0x9c, 0xc3, 0x22, 0x7d, 0xfd, 0xa2, 0x43, 0x1c,
+ 0xc7, 0x98, 0x79, 0x26, 0xa6, 0xf9, 0x18, 0x47, 0x05, 0x5a,
+ 0xbb, 0xe4, 0x64, 0x3b, 0xda, 0x85, 0x71, 0x2e, 0xcf, 0x90,
+ 0x10, 0x4f, 0xae, 0xf1, 0xb3, 0xec, 0x0d, 0x52, 0xd2, 0x8d,
+ 0x6c, 0x33, 0xe8, 0xb7, 0x56, 0x09, 0x89, 0xd6, 0x37, 0x68,
+ 0x2a, 0x75, 0x94, 0xcb, 0x4b, 0x14, 0xf5, 0xaa, 0xbc, 0xe3,
+ 0x02, 0x5d, 0xdd, 0x82, 0x63, 0x3c, 0x7e, 0x21, 0xc0, 0x9f,
+ 0x1f, 0x40, 0xa1, 0xfe, 0x25, 0x7a, 0x9b, 0xc4, 0x44, 0x1b,
+ 0xfa, 0xa5, 0xe7, 0xb8, 0x59, 0x06, 0x86, 0xd9, 0x38, 0x67,
+ 0x93, 0xcc, 0x2d, 0x72, 0xf2, 0xad, 0x4c, 0x13, 0x51, 0x0e,
+ 0xef, 0xb0, 0x30, 0x6f, 0x8e, 0xd1, 0x0a, 0x55, 0xb4, 0xeb,
+ 0x6b, 0x34, 0xd5, 0x8a, 0xc8, 0x97, 0x76, 0x29, 0xa9, 0xf6,
+ 0x17, 0x48, 0xe2, 0xbd, 0x5c, 0x03, 0x83, 0xdc, 0x3d, 0x62,
+ 0x20, 0x7f, 0x9e, 0xc1, 0x41, 0x1e, 0xff, 0xa0, 0x7b, 0x24,
+ 0xc5, 0x9a, 0x1a, 0x45, 0xa4, 0xfb, 0xb9, 0xe6, 0x07, 0x58,
+ 0xd8, 0x87, 0x66, 0x39, 0xcd, 0x92, 0x73, 0x2c, 0xac, 0xf3,
+ 0x12, 0x4d, 0x0f, 0x50, 0xb1, 0xee, 0x6e, 0x31, 0xd0, 0x8f,
+ 0x54, 0x0b, 0xea, 0xb5, 0x35, 0x6a, 0x8b, 0xd4, 0x96, 0xc9,
+ 0x28, 0x77, 0xf7, 0xa8, 0x49, 0x16, 0x00, 0x60, 0xc0, 0xa0,
+ 0x9d, 0xfd, 0x5d, 0x3d, 0x27, 0x47, 0xe7, 0x87, 0xba, 0xda,
+ 0x7a, 0x1a, 0x4e, 0x2e, 0x8e, 0xee, 0xd3, 0xb3, 0x13, 0x73,
+ 0x69, 0x09, 0xa9, 0xc9, 0xf4, 0x94, 0x34, 0x54, 0x9c, 0xfc,
+ 0x5c, 0x3c, 0x01, 0x61, 0xc1, 0xa1, 0xbb, 0xdb, 0x7b, 0x1b,
+ 0x26, 0x46, 0xe6, 0x86, 0xd2, 0xb2, 0x12, 0x72, 0x4f, 0x2f,
+ 0x8f, 0xef, 0xf5, 0x95, 0x35, 0x55, 0x68, 0x08, 0xa8, 0xc8,
+ 0x25, 0x45, 0xe5, 0x85, 0xb8, 0xd8, 0x78, 0x18, 0x02, 0x62,
+ 0xc2, 0xa2, 0x9f, 0xff, 0x5f, 0x3f, 0x6b, 0x0b, 0xab, 0xcb,
+ 0xf6, 0x96, 0x36, 0x56, 0x4c, 0x2c, 0x8c, 0xec, 0xd1, 0xb1,
+ 0x11, 0x71, 0xb9, 0xd9, 0x79, 0x19, 0x24, 0x44, 0xe4, 0x84,
+ 0x9e, 0xfe, 0x5e, 0x3e, 0x03, 0x63, 0xc3, 0xa3, 0xf7, 0x97,
+ 0x37, 0x57, 0x6a, 0x0a, 0xaa, 0xca, 0xd0, 0xb0, 0x10, 0x70,
+ 0x4d, 0x2d, 0x8d, 0xed, 0x4a, 0x2a, 0x8a, 0xea, 0xd7, 0xb7,
+ 0x17, 0x77, 0x6d, 0x0d, 0xad, 0xcd, 0xf0, 0x90, 0x30, 0x50,
+ 0x04, 0x64, 0xc4, 0xa4, 0x99, 0xf9, 0x59, 0x39, 0x23, 0x43,
+ 0xe3, 0x83, 0xbe, 0xde, 0x7e, 0x1e, 0xd6, 0xb6, 0x16, 0x76,
+ 0x4b, 0x2b, 0x8b, 0xeb, 0xf1, 0x91, 0x31, 0x51, 0x6c, 0x0c,
+ 0xac, 0xcc, 0x98, 0xf8, 0x58, 0x38, 0x05, 0x65, 0xc5, 0xa5,
+ 0xbf, 0xdf, 0x7f, 0x1f, 0x22, 0x42, 0xe2, 0x82, 0x6f, 0x0f,
+ 0xaf, 0xcf, 0xf2, 0x92, 0x32, 0x52, 0x48, 0x28, 0x88, 0xe8,
+ 0xd5, 0xb5, 0x15, 0x75, 0x21, 0x41, 0xe1, 0x81, 0xbc, 0xdc,
+ 0x7c, 0x1c, 0x06, 0x66, 0xc6, 0xa6, 0x9b, 0xfb, 0x5b, 0x3b,
+ 0xf3, 0x93, 0x33, 0x53, 0x6e, 0x0e, 0xae, 0xce, 0xd4, 0xb4,
+ 0x14, 0x74, 0x49, 0x29, 0x89, 0xe9, 0xbd, 0xdd, 0x7d, 0x1d,
+ 0x20, 0x40, 0xe0, 0x80, 0x9a, 0xfa, 0x5a, 0x3a, 0x07, 0x67,
+ 0xc7, 0xa7, 0x00, 0x61, 0xc2, 0xa3, 0x99, 0xf8, 0x5b, 0x3a,
+ 0x2f, 0x4e, 0xed, 0x8c, 0xb6, 0xd7, 0x74, 0x15, 0x5e, 0x3f,
+ 0x9c, 0xfd, 0xc7, 0xa6, 0x05, 0x64, 0x71, 0x10, 0xb3, 0xd2,
+ 0xe8, 0x89, 0x2a, 0x4b, 0xbc, 0xdd, 0x7e, 0x1f, 0x25, 0x44,
+ 0xe7, 0x86, 0x93, 0xf2, 0x51, 0x30, 0x0a, 0x6b, 0xc8, 0xa9,
+ 0xe2, 0x83, 0x20, 0x41, 0x7b, 0x1a, 0xb9, 0xd8, 0xcd, 0xac,
+ 0x0f, 0x6e, 0x54, 0x35, 0x96, 0xf7, 0x65, 0x04, 0xa7, 0xc6,
+ 0xfc, 0x9d, 0x3e, 0x5f, 0x4a, 0x2b, 0x88, 0xe9, 0xd3, 0xb2,
+ 0x11, 0x70, 0x3b, 0x5a, 0xf9, 0x98, 0xa2, 0xc3, 0x60, 0x01,
+ 0x14, 0x75, 0xd6, 0xb7, 0x8d, 0xec, 0x4f, 0x2e, 0xd9, 0xb8,
+ 0x1b, 0x7a, 0x40, 0x21, 0x82, 0xe3, 0xf6, 0x97, 0x34, 0x55,
+ 0x6f, 0x0e, 0xad, 0xcc, 0x87, 0xe6, 0x45, 0x24, 0x1e, 0x7f,
+ 0xdc, 0xbd, 0xa8, 0xc9, 0x6a, 0x0b, 0x31, 0x50, 0xf3, 0x92,
+ 0xca, 0xab, 0x08, 0x69, 0x53, 0x32, 0x91, 0xf0, 0xe5, 0x84,
+ 0x27, 0x46, 0x7c, 0x1d, 0xbe, 0xdf, 0x94, 0xf5, 0x56, 0x37,
+ 0x0d, 0x6c, 0xcf, 0xae, 0xbb, 0xda, 0x79, 0x18, 0x22, 0x43,
+ 0xe0, 0x81, 0x76, 0x17, 0xb4, 0xd5, 0xef, 0x8e, 0x2d, 0x4c,
+ 0x59, 0x38, 0x9b, 0xfa, 0xc0, 0xa1, 0x02, 0x63, 0x28, 0x49,
+ 0xea, 0x8b, 0xb1, 0xd0, 0x73, 0x12, 0x07, 0x66, 0xc5, 0xa4,
+ 0x9e, 0xff, 0x5c, 0x3d, 0xaf, 0xce, 0x6d, 0x0c, 0x36, 0x57,
+ 0xf4, 0x95, 0x80, 0xe1, 0x42, 0x23, 0x19, 0x78, 0xdb, 0xba,
+ 0xf1, 0x90, 0x33, 0x52, 0x68, 0x09, 0xaa, 0xcb, 0xde, 0xbf,
+ 0x1c, 0x7d, 0x47, 0x26, 0x85, 0xe4, 0x13, 0x72, 0xd1, 0xb0,
+ 0x8a, 0xeb, 0x48, 0x29, 0x3c, 0x5d, 0xfe, 0x9f, 0xa5, 0xc4,
+ 0x67, 0x06, 0x4d, 0x2c, 0x8f, 0xee, 0xd4, 0xb5, 0x16, 0x77,
+ 0x62, 0x03, 0xa0, 0xc1, 0xfb, 0x9a, 0x39, 0x58, 0x00, 0x62,
+ 0xc4, 0xa6, 0x95, 0xf7, 0x51, 0x33, 0x37, 0x55, 0xf3, 0x91,
+ 0xa2, 0xc0, 0x66, 0x04, 0x6e, 0x0c, 0xaa, 0xc8, 0xfb, 0x99,
+ 0x3f, 0x5d, 0x59, 0x3b, 0x9d, 0xff, 0xcc, 0xae, 0x08, 0x6a,
+ 0xdc, 0xbe, 0x18, 0x7a, 0x49, 0x2b, 0x8d, 0xef, 0xeb, 0x89,
+ 0x2f, 0x4d, 0x7e, 0x1c, 0xba, 0xd8, 0xb2, 0xd0, 0x76, 0x14,
+ 0x27, 0x45, 0xe3, 0x81, 0x85, 0xe7, 0x41, 0x23, 0x10, 0x72,
+ 0xd4, 0xb6, 0xa5, 0xc7, 0x61, 0x03, 0x30, 0x52, 0xf4, 0x96,
+ 0x92, 0xf0, 0x56, 0x34, 0x07, 0x65, 0xc3, 0xa1, 0xcb, 0xa9,
+ 0x0f, 0x6d, 0x5e, 0x3c, 0x9a, 0xf8, 0xfc, 0x9e, 0x38, 0x5a,
+ 0x69, 0x0b, 0xad, 0xcf, 0x79, 0x1b, 0xbd, 0xdf, 0xec, 0x8e,
+ 0x28, 0x4a, 0x4e, 0x2c, 0x8a, 0xe8, 0xdb, 0xb9, 0x1f, 0x7d,
+ 0x17, 0x75, 0xd3, 0xb1, 0x82, 0xe0, 0x46, 0x24, 0x20, 0x42,
+ 0xe4, 0x86, 0xb5, 0xd7, 0x71, 0x13, 0x57, 0x35, 0x93, 0xf1,
+ 0xc2, 0xa0, 0x06, 0x64, 0x60, 0x02, 0xa4, 0xc6, 0xf5, 0x97,
+ 0x31, 0x53, 0x39, 0x5b, 0xfd, 0x9f, 0xac, 0xce, 0x68, 0x0a,
+ 0x0e, 0x6c, 0xca, 0xa8, 0x9b, 0xf9, 0x5f, 0x3d, 0x8b, 0xe9,
+ 0x4f, 0x2d, 0x1e, 0x7c, 0xda, 0xb8, 0xbc, 0xde, 0x78, 0x1a,
+ 0x29, 0x4b, 0xed, 0x8f, 0xe5, 0x87, 0x21, 0x43, 0x70, 0x12,
+ 0xb4, 0xd6, 0xd2, 0xb0, 0x16, 0x74, 0x47, 0x25, 0x83, 0xe1,
+ 0xf2, 0x90, 0x36, 0x54, 0x67, 0x05, 0xa3, 0xc1, 0xc5, 0xa7,
+ 0x01, 0x63, 0x50, 0x32, 0x94, 0xf6, 0x9c, 0xfe, 0x58, 0x3a,
+ 0x09, 0x6b, 0xcd, 0xaf, 0xab, 0xc9, 0x6f, 0x0d, 0x3e, 0x5c,
+ 0xfa, 0x98, 0x2e, 0x4c, 0xea, 0x88, 0xbb, 0xd9, 0x7f, 0x1d,
+ 0x19, 0x7b, 0xdd, 0xbf, 0x8c, 0xee, 0x48, 0x2a, 0x40, 0x22,
+ 0x84, 0xe6, 0xd5, 0xb7, 0x11, 0x73, 0x77, 0x15, 0xb3, 0xd1,
+ 0xe2, 0x80, 0x26, 0x44, 0x00, 0x63, 0xc6, 0xa5, 0x91, 0xf2,
+ 0x57, 0x34, 0x3f, 0x5c, 0xf9, 0x9a, 0xae, 0xcd, 0x68, 0x0b,
+ 0x7e, 0x1d, 0xb8, 0xdb, 0xef, 0x8c, 0x29, 0x4a, 0x41, 0x22,
+ 0x87, 0xe4, 0xd0, 0xb3, 0x16, 0x75, 0xfc, 0x9f, 0x3a, 0x59,
+ 0x6d, 0x0e, 0xab, 0xc8, 0xc3, 0xa0, 0x05, 0x66, 0x52, 0x31,
+ 0x94, 0xf7, 0x82, 0xe1, 0x44, 0x27, 0x13, 0x70, 0xd5, 0xb6,
+ 0xbd, 0xde, 0x7b, 0x18, 0x2c, 0x4f, 0xea, 0x89, 0xe5, 0x86,
+ 0x23, 0x40, 0x74, 0x17, 0xb2, 0xd1, 0xda, 0xb9, 0x1c, 0x7f,
+ 0x4b, 0x28, 0x8d, 0xee, 0x9b, 0xf8, 0x5d, 0x3e, 0x0a, 0x69,
+ 0xcc, 0xaf, 0xa4, 0xc7, 0x62, 0x01, 0x35, 0x56, 0xf3, 0x90,
+ 0x19, 0x7a, 0xdf, 0xbc, 0x88, 0xeb, 0x4e, 0x2d, 0x26, 0x45,
+ 0xe0, 0x83, 0xb7, 0xd4, 0x71, 0x12, 0x67, 0x04, 0xa1, 0xc2,
+ 0xf6, 0x95, 0x30, 0x53, 0x58, 0x3b, 0x9e, 0xfd, 0xc9, 0xaa,
+ 0x0f, 0x6c, 0xd7, 0xb4, 0x11, 0x72, 0x46, 0x25, 0x80, 0xe3,
+ 0xe8, 0x8b, 0x2e, 0x4d, 0x79, 0x1a, 0xbf, 0xdc, 0xa9, 0xca,
+ 0x6f, 0x0c, 0x38, 0x5b, 0xfe, 0x9d, 0x96, 0xf5, 0x50, 0x33,
+ 0x07, 0x64, 0xc1, 0xa2, 0x2b, 0x48, 0xed, 0x8e, 0xba, 0xd9,
+ 0x7c, 0x1f, 0x14, 0x77, 0xd2, 0xb1, 0x85, 0xe6, 0x43, 0x20,
+ 0x55, 0x36, 0x93, 0xf0, 0xc4, 0xa7, 0x02, 0x61, 0x6a, 0x09,
+ 0xac, 0xcf, 0xfb, 0x98, 0x3d, 0x5e, 0x32, 0x51, 0xf4, 0x97,
+ 0xa3, 0xc0, 0x65, 0x06, 0x0d, 0x6e, 0xcb, 0xa8, 0x9c, 0xff,
+ 0x5a, 0x39, 0x4c, 0x2f, 0x8a, 0xe9, 0xdd, 0xbe, 0x1b, 0x78,
+ 0x73, 0x10, 0xb5, 0xd6, 0xe2, 0x81, 0x24, 0x47, 0xce, 0xad,
+ 0x08, 0x6b, 0x5f, 0x3c, 0x99, 0xfa, 0xf1, 0x92, 0x37, 0x54,
+ 0x60, 0x03, 0xa6, 0xc5, 0xb0, 0xd3, 0x76, 0x15, 0x21, 0x42,
+ 0xe7, 0x84, 0x8f, 0xec, 0x49, 0x2a, 0x1e, 0x7d, 0xd8, 0xbb,
+ 0x00, 0x64, 0xc8, 0xac, 0x8d, 0xe9, 0x45, 0x21, 0x07, 0x63,
+ 0xcf, 0xab, 0x8a, 0xee, 0x42, 0x26, 0x0e, 0x6a, 0xc6, 0xa2,
+ 0x83, 0xe7, 0x4b, 0x2f, 0x09, 0x6d, 0xc1, 0xa5, 0x84, 0xe0,
+ 0x4c, 0x28, 0x1c, 0x78, 0xd4, 0xb0, 0x91, 0xf5, 0x59, 0x3d,
+ 0x1b, 0x7f, 0xd3, 0xb7, 0x96, 0xf2, 0x5e, 0x3a, 0x12, 0x76,
+ 0xda, 0xbe, 0x9f, 0xfb, 0x57, 0x33, 0x15, 0x71, 0xdd, 0xb9,
+ 0x98, 0xfc, 0x50, 0x34, 0x38, 0x5c, 0xf0, 0x94, 0xb5, 0xd1,
+ 0x7d, 0x19, 0x3f, 0x5b, 0xf7, 0x93, 0xb2, 0xd6, 0x7a, 0x1e,
+ 0x36, 0x52, 0xfe, 0x9a, 0xbb, 0xdf, 0x73, 0x17, 0x31, 0x55,
+ 0xf9, 0x9d, 0xbc, 0xd8, 0x74, 0x10, 0x24, 0x40, 0xec, 0x88,
+ 0xa9, 0xcd, 0x61, 0x05, 0x23, 0x47, 0xeb, 0x8f, 0xae, 0xca,
+ 0x66, 0x02, 0x2a, 0x4e, 0xe2, 0x86, 0xa7, 0xc3, 0x6f, 0x0b,
+ 0x2d, 0x49, 0xe5, 0x81, 0xa0, 0xc4, 0x68, 0x0c, 0x70, 0x14,
+ 0xb8, 0xdc, 0xfd, 0x99, 0x35, 0x51, 0x77, 0x13, 0xbf, 0xdb,
+ 0xfa, 0x9e, 0x32, 0x56, 0x7e, 0x1a, 0xb6, 0xd2, 0xf3, 0x97,
+ 0x3b, 0x5f, 0x79, 0x1d, 0xb1, 0xd5, 0xf4, 0x90, 0x3c, 0x58,
+ 0x6c, 0x08, 0xa4, 0xc0, 0xe1, 0x85, 0x29, 0x4d, 0x6b, 0x0f,
+ 0xa3, 0xc7, 0xe6, 0x82, 0x2e, 0x4a, 0x62, 0x06, 0xaa, 0xce,
+ 0xef, 0x8b, 0x27, 0x43, 0x65, 0x01, 0xad, 0xc9, 0xe8, 0x8c,
+ 0x20, 0x44, 0x48, 0x2c, 0x80, 0xe4, 0xc5, 0xa1, 0x0d, 0x69,
+ 0x4f, 0x2b, 0x87, 0xe3, 0xc2, 0xa6, 0x0a, 0x6e, 0x46, 0x22,
+ 0x8e, 0xea, 0xcb, 0xaf, 0x03, 0x67, 0x41, 0x25, 0x89, 0xed,
+ 0xcc, 0xa8, 0x04, 0x60, 0x54, 0x30, 0x9c, 0xf8, 0xd9, 0xbd,
+ 0x11, 0x75, 0x53, 0x37, 0x9b, 0xff, 0xde, 0xba, 0x16, 0x72,
+ 0x5a, 0x3e, 0x92, 0xf6, 0xd7, 0xb3, 0x1f, 0x7b, 0x5d, 0x39,
+ 0x95, 0xf1, 0xd0, 0xb4, 0x18, 0x7c, 0x00, 0x65, 0xca, 0xaf,
+ 0x89, 0xec, 0x43, 0x26, 0x0f, 0x6a, 0xc5, 0xa0, 0x86, 0xe3,
+ 0x4c, 0x29, 0x1e, 0x7b, 0xd4, 0xb1, 0x97, 0xf2, 0x5d, 0x38,
+ 0x11, 0x74, 0xdb, 0xbe, 0x98, 0xfd, 0x52, 0x37, 0x3c, 0x59,
+ 0xf6, 0x93, 0xb5, 0xd0, 0x7f, 0x1a, 0x33, 0x56, 0xf9, 0x9c,
+ 0xba, 0xdf, 0x70, 0x15, 0x22, 0x47, 0xe8, 0x8d, 0xab, 0xce,
+ 0x61, 0x04, 0x2d, 0x48, 0xe7, 0x82, 0xa4, 0xc1, 0x6e, 0x0b,
+ 0x78, 0x1d, 0xb2, 0xd7, 0xf1, 0x94, 0x3b, 0x5e, 0x77, 0x12,
+ 0xbd, 0xd8, 0xfe, 0x9b, 0x34, 0x51, 0x66, 0x03, 0xac, 0xc9,
+ 0xef, 0x8a, 0x25, 0x40, 0x69, 0x0c, 0xa3, 0xc6, 0xe0, 0x85,
+ 0x2a, 0x4f, 0x44, 0x21, 0x8e, 0xeb, 0xcd, 0xa8, 0x07, 0x62,
+ 0x4b, 0x2e, 0x81, 0xe4, 0xc2, 0xa7, 0x08, 0x6d, 0x5a, 0x3f,
+ 0x90, 0xf5, 0xd3, 0xb6, 0x19, 0x7c, 0x55, 0x30, 0x9f, 0xfa,
+ 0xdc, 0xb9, 0x16, 0x73, 0xf0, 0x95, 0x3a, 0x5f, 0x79, 0x1c,
+ 0xb3, 0xd6, 0xff, 0x9a, 0x35, 0x50, 0x76, 0x13, 0xbc, 0xd9,
+ 0xee, 0x8b, 0x24, 0x41, 0x67, 0x02, 0xad, 0xc8, 0xe1, 0x84,
+ 0x2b, 0x4e, 0x68, 0x0d, 0xa2, 0xc7, 0xcc, 0xa9, 0x06, 0x63,
+ 0x45, 0x20, 0x8f, 0xea, 0xc3, 0xa6, 0x09, 0x6c, 0x4a, 0x2f,
+ 0x80, 0xe5, 0xd2, 0xb7, 0x18, 0x7d, 0x5b, 0x3e, 0x91, 0xf4,
+ 0xdd, 0xb8, 0x17, 0x72, 0x54, 0x31, 0x9e, 0xfb, 0x88, 0xed,
+ 0x42, 0x27, 0x01, 0x64, 0xcb, 0xae, 0x87, 0xe2, 0x4d, 0x28,
+ 0x0e, 0x6b, 0xc4, 0xa1, 0x96, 0xf3, 0x5c, 0x39, 0x1f, 0x7a,
+ 0xd5, 0xb0, 0x99, 0xfc, 0x53, 0x36, 0x10, 0x75, 0xda, 0xbf,
+ 0xb4, 0xd1, 0x7e, 0x1b, 0x3d, 0x58, 0xf7, 0x92, 0xbb, 0xde,
+ 0x71, 0x14, 0x32, 0x57, 0xf8, 0x9d, 0xaa, 0xcf, 0x60, 0x05,
+ 0x23, 0x46, 0xe9, 0x8c, 0xa5, 0xc0, 0x6f, 0x0a, 0x2c, 0x49,
+ 0xe6, 0x83, 0x00, 0x66, 0xcc, 0xaa, 0x85, 0xe3, 0x49, 0x2f,
+ 0x17, 0x71, 0xdb, 0xbd, 0x92, 0xf4, 0x5e, 0x38, 0x2e, 0x48,
+ 0xe2, 0x84, 0xab, 0xcd, 0x67, 0x01, 0x39, 0x5f, 0xf5, 0x93,
+ 0xbc, 0xda, 0x70, 0x16, 0x5c, 0x3a, 0x90, 0xf6, 0xd9, 0xbf,
+ 0x15, 0x73, 0x4b, 0x2d, 0x87, 0xe1, 0xce, 0xa8, 0x02, 0x64,
+ 0x72, 0x14, 0xbe, 0xd8, 0xf7, 0x91, 0x3b, 0x5d, 0x65, 0x03,
+ 0xa9, 0xcf, 0xe0, 0x86, 0x2c, 0x4a, 0xb8, 0xde, 0x74, 0x12,
+ 0x3d, 0x5b, 0xf1, 0x97, 0xaf, 0xc9, 0x63, 0x05, 0x2a, 0x4c,
+ 0xe6, 0x80, 0x96, 0xf0, 0x5a, 0x3c, 0x13, 0x75, 0xdf, 0xb9,
+ 0x81, 0xe7, 0x4d, 0x2b, 0x04, 0x62, 0xc8, 0xae, 0xe4, 0x82,
+ 0x28, 0x4e, 0x61, 0x07, 0xad, 0xcb, 0xf3, 0x95, 0x3f, 0x59,
+ 0x76, 0x10, 0xba, 0xdc, 0xca, 0xac, 0x06, 0x60, 0x4f, 0x29,
+ 0x83, 0xe5, 0xdd, 0xbb, 0x11, 0x77, 0x58, 0x3e, 0x94, 0xf2,
+ 0x6d, 0x0b, 0xa1, 0xc7, 0xe8, 0x8e, 0x24, 0x42, 0x7a, 0x1c,
+ 0xb6, 0xd0, 0xff, 0x99, 0x33, 0x55, 0x43, 0x25, 0x8f, 0xe9,
+ 0xc6, 0xa0, 0x0a, 0x6c, 0x54, 0x32, 0x98, 0xfe, 0xd1, 0xb7,
+ 0x1d, 0x7b, 0x31, 0x57, 0xfd, 0x9b, 0xb4, 0xd2, 0x78, 0x1e,
+ 0x26, 0x40, 0xea, 0x8c, 0xa3, 0xc5, 0x6f, 0x09, 0x1f, 0x79,
+ 0xd3, 0xb5, 0x9a, 0xfc, 0x56, 0x30, 0x08, 0x6e, 0xc4, 0xa2,
+ 0x8d, 0xeb, 0x41, 0x27, 0xd5, 0xb3, 0x19, 0x7f, 0x50, 0x36,
+ 0x9c, 0xfa, 0xc2, 0xa4, 0x0e, 0x68, 0x47, 0x21, 0x8b, 0xed,
+ 0xfb, 0x9d, 0x37, 0x51, 0x7e, 0x18, 0xb2, 0xd4, 0xec, 0x8a,
+ 0x20, 0x46, 0x69, 0x0f, 0xa5, 0xc3, 0x89, 0xef, 0x45, 0x23,
+ 0x0c, 0x6a, 0xc0, 0xa6, 0x9e, 0xf8, 0x52, 0x34, 0x1b, 0x7d,
+ 0xd7, 0xb1, 0xa7, 0xc1, 0x6b, 0x0d, 0x22, 0x44, 0xee, 0x88,
+ 0xb0, 0xd6, 0x7c, 0x1a, 0x35, 0x53, 0xf9, 0x9f, 0x00, 0x67,
+ 0xce, 0xa9, 0x81, 0xe6, 0x4f, 0x28, 0x1f, 0x78, 0xd1, 0xb6,
+ 0x9e, 0xf9, 0x50, 0x37, 0x3e, 0x59, 0xf0, 0x97, 0xbf, 0xd8,
+ 0x71, 0x16, 0x21, 0x46, 0xef, 0x88, 0xa0, 0xc7, 0x6e, 0x09,
+ 0x7c, 0x1b, 0xb2, 0xd5, 0xfd, 0x9a, 0x33, 0x54, 0x63, 0x04,
+ 0xad, 0xca, 0xe2, 0x85, 0x2c, 0x4b, 0x42, 0x25, 0x8c, 0xeb,
+ 0xc3, 0xa4, 0x0d, 0x6a, 0x5d, 0x3a, 0x93, 0xf4, 0xdc, 0xbb,
+ 0x12, 0x75, 0xf8, 0x9f, 0x36, 0x51, 0x79, 0x1e, 0xb7, 0xd0,
+ 0xe7, 0x80, 0x29, 0x4e, 0x66, 0x01, 0xa8, 0xcf, 0xc6, 0xa1,
+ 0x08, 0x6f, 0x47, 0x20, 0x89, 0xee, 0xd9, 0xbe, 0x17, 0x70,
+ 0x58, 0x3f, 0x96, 0xf1, 0x84, 0xe3, 0x4a, 0x2d, 0x05, 0x62,
+ 0xcb, 0xac, 0x9b, 0xfc, 0x55, 0x32, 0x1a, 0x7d, 0xd4, 0xb3,
+ 0xba, 0xdd, 0x74, 0x13, 0x3b, 0x5c, 0xf5, 0x92, 0xa5, 0xc2,
+ 0x6b, 0x0c, 0x24, 0x43, 0xea, 0x8d, 0xed, 0x8a, 0x23, 0x44,
+ 0x6c, 0x0b, 0xa2, 0xc5, 0xf2, 0x95, 0x3c, 0x5b, 0x73, 0x14,
+ 0xbd, 0xda, 0xd3, 0xb4, 0x1d, 0x7a, 0x52, 0x35, 0x9c, 0xfb,
+ 0xcc, 0xab, 0x02, 0x65, 0x4d, 0x2a, 0x83, 0xe4, 0x91, 0xf6,
+ 0x5f, 0x38, 0x10, 0x77, 0xde, 0xb9, 0x8e, 0xe9, 0x40, 0x27,
+ 0x0f, 0x68, 0xc1, 0xa6, 0xaf, 0xc8, 0x61, 0x06, 0x2e, 0x49,
+ 0xe0, 0x87, 0xb0, 0xd7, 0x7e, 0x19, 0x31, 0x56, 0xff, 0x98,
+ 0x15, 0x72, 0xdb, 0xbc, 0x94, 0xf3, 0x5a, 0x3d, 0x0a, 0x6d,
+ 0xc4, 0xa3, 0x8b, 0xec, 0x45, 0x22, 0x2b, 0x4c, 0xe5, 0x82,
+ 0xaa, 0xcd, 0x64, 0x03, 0x34, 0x53, 0xfa, 0x9d, 0xb5, 0xd2,
+ 0x7b, 0x1c, 0x69, 0x0e, 0xa7, 0xc0, 0xe8, 0x8f, 0x26, 0x41,
+ 0x76, 0x11, 0xb8, 0xdf, 0xf7, 0x90, 0x39, 0x5e, 0x57, 0x30,
+ 0x99, 0xfe, 0xd6, 0xb1, 0x18, 0x7f, 0x48, 0x2f, 0x86, 0xe1,
+ 0xc9, 0xae, 0x07, 0x60, 0x00, 0x68, 0xd0, 0xb8, 0xbd, 0xd5,
+ 0x6d, 0x05, 0x67, 0x0f, 0xb7, 0xdf, 0xda, 0xb2, 0x0a, 0x62,
+ 0xce, 0xa6, 0x1e, 0x76, 0x73, 0x1b, 0xa3, 0xcb, 0xa9, 0xc1,
+ 0x79, 0x11, 0x14, 0x7c, 0xc4, 0xac, 0x81, 0xe9, 0x51, 0x39,
+ 0x3c, 0x54, 0xec, 0x84, 0xe6, 0x8e, 0x36, 0x5e, 0x5b, 0x33,
+ 0x8b, 0xe3, 0x4f, 0x27, 0x9f, 0xf7, 0xf2, 0x9a, 0x22, 0x4a,
+ 0x28, 0x40, 0xf8, 0x90, 0x95, 0xfd, 0x45, 0x2d, 0x1f, 0x77,
+ 0xcf, 0xa7, 0xa2, 0xca, 0x72, 0x1a, 0x78, 0x10, 0xa8, 0xc0,
+ 0xc5, 0xad, 0x15, 0x7d, 0xd1, 0xb9, 0x01, 0x69, 0x6c, 0x04,
+ 0xbc, 0xd4, 0xb6, 0xde, 0x66, 0x0e, 0x0b, 0x63, 0xdb, 0xb3,
+ 0x9e, 0xf6, 0x4e, 0x26, 0x23, 0x4b, 0xf3, 0x9b, 0xf9, 0x91,
+ 0x29, 0x41, 0x44, 0x2c, 0x94, 0xfc, 0x50, 0x38, 0x80, 0xe8,
+ 0xed, 0x85, 0x3d, 0x55, 0x37, 0x5f, 0xe7, 0x8f, 0x8a, 0xe2,
+ 0x5a, 0x32, 0x3e, 0x56, 0xee, 0x86, 0x83, 0xeb, 0x53, 0x3b,
+ 0x59, 0x31, 0x89, 0xe1, 0xe4, 0x8c, 0x34, 0x5c, 0xf0, 0x98,
+ 0x20, 0x48, 0x4d, 0x25, 0x9d, 0xf5, 0x97, 0xff, 0x47, 0x2f,
+ 0x2a, 0x42, 0xfa, 0x92, 0xbf, 0xd7, 0x6f, 0x07, 0x02, 0x6a,
+ 0xd2, 0xba, 0xd8, 0xb0, 0x08, 0x60, 0x65, 0x0d, 0xb5, 0xdd,
+ 0x71, 0x19, 0xa1, 0xc9, 0xcc, 0xa4, 0x1c, 0x74, 0x16, 0x7e,
+ 0xc6, 0xae, 0xab, 0xc3, 0x7b, 0x13, 0x21, 0x49, 0xf1, 0x99,
+ 0x9c, 0xf4, 0x4c, 0x24, 0x46, 0x2e, 0x96, 0xfe, 0xfb, 0x93,
+ 0x2b, 0x43, 0xef, 0x87, 0x3f, 0x57, 0x52, 0x3a, 0x82, 0xea,
+ 0x88, 0xe0, 0x58, 0x30, 0x35, 0x5d, 0xe5, 0x8d, 0xa0, 0xc8,
+ 0x70, 0x18, 0x1d, 0x75, 0xcd, 0xa5, 0xc7, 0xaf, 0x17, 0x7f,
+ 0x7a, 0x12, 0xaa, 0xc2, 0x6e, 0x06, 0xbe, 0xd6, 0xd3, 0xbb,
+ 0x03, 0x6b, 0x09, 0x61, 0xd9, 0xb1, 0xb4, 0xdc, 0x64, 0x0c,
+ 0x00, 0x69, 0xd2, 0xbb, 0xb9, 0xd0, 0x6b, 0x02, 0x6f, 0x06,
+ 0xbd, 0xd4, 0xd6, 0xbf, 0x04, 0x6d, 0xde, 0xb7, 0x0c, 0x65,
+ 0x67, 0x0e, 0xb5, 0xdc, 0xb1, 0xd8, 0x63, 0x0a, 0x08, 0x61,
+ 0xda, 0xb3, 0xa1, 0xc8, 0x73, 0x1a, 0x18, 0x71, 0xca, 0xa3,
+ 0xce, 0xa7, 0x1c, 0x75, 0x77, 0x1e, 0xa5, 0xcc, 0x7f, 0x16,
+ 0xad, 0xc4, 0xc6, 0xaf, 0x14, 0x7d, 0x10, 0x79, 0xc2, 0xab,
+ 0xa9, 0xc0, 0x7b, 0x12, 0x5f, 0x36, 0x8d, 0xe4, 0xe6, 0x8f,
+ 0x34, 0x5d, 0x30, 0x59, 0xe2, 0x8b, 0x89, 0xe0, 0x5b, 0x32,
+ 0x81, 0xe8, 0x53, 0x3a, 0x38, 0x51, 0xea, 0x83, 0xee, 0x87,
+ 0x3c, 0x55, 0x57, 0x3e, 0x85, 0xec, 0xfe, 0x97, 0x2c, 0x45,
+ 0x47, 0x2e, 0x95, 0xfc, 0x91, 0xf8, 0x43, 0x2a, 0x28, 0x41,
+ 0xfa, 0x93, 0x20, 0x49, 0xf2, 0x9b, 0x99, 0xf0, 0x4b, 0x22,
+ 0x4f, 0x26, 0x9d, 0xf4, 0xf6, 0x9f, 0x24, 0x4d, 0xbe, 0xd7,
+ 0x6c, 0x05, 0x07, 0x6e, 0xd5, 0xbc, 0xd1, 0xb8, 0x03, 0x6a,
+ 0x68, 0x01, 0xba, 0xd3, 0x60, 0x09, 0xb2, 0xdb, 0xd9, 0xb0,
+ 0x0b, 0x62, 0x0f, 0x66, 0xdd, 0xb4, 0xb6, 0xdf, 0x64, 0x0d,
+ 0x1f, 0x76, 0xcd, 0xa4, 0xa6, 0xcf, 0x74, 0x1d, 0x70, 0x19,
+ 0xa2, 0xcb, 0xc9, 0xa0, 0x1b, 0x72, 0xc1, 0xa8, 0x13, 0x7a,
+ 0x78, 0x11, 0xaa, 0xc3, 0xae, 0xc7, 0x7c, 0x15, 0x17, 0x7e,
+ 0xc5, 0xac, 0xe1, 0x88, 0x33, 0x5a, 0x58, 0x31, 0x8a, 0xe3,
+ 0x8e, 0xe7, 0x5c, 0x35, 0x37, 0x5e, 0xe5, 0x8c, 0x3f, 0x56,
+ 0xed, 0x84, 0x86, 0xef, 0x54, 0x3d, 0x50, 0x39, 0x82, 0xeb,
+ 0xe9, 0x80, 0x3b, 0x52, 0x40, 0x29, 0x92, 0xfb, 0xf9, 0x90,
+ 0x2b, 0x42, 0x2f, 0x46, 0xfd, 0x94, 0x96, 0xff, 0x44, 0x2d,
+ 0x9e, 0xf7, 0x4c, 0x25, 0x27, 0x4e, 0xf5, 0x9c, 0xf1, 0x98,
+ 0x23, 0x4a, 0x48, 0x21, 0x9a, 0xf3, 0x00, 0x6a, 0xd4, 0xbe,
+ 0xb5, 0xdf, 0x61, 0x0b, 0x77, 0x1d, 0xa3, 0xc9, 0xc2, 0xa8,
+ 0x16, 0x7c, 0xee, 0x84, 0x3a, 0x50, 0x5b, 0x31, 0x8f, 0xe5,
+ 0x99, 0xf3, 0x4d, 0x27, 0x2c, 0x46, 0xf8, 0x92, 0xc1, 0xab,
+ 0x15, 0x7f, 0x74, 0x1e, 0xa0, 0xca, 0xb6, 0xdc, 0x62, 0x08,
+ 0x03, 0x69, 0xd7, 0xbd, 0x2f, 0x45, 0xfb, 0x91, 0x9a, 0xf0,
+ 0x4e, 0x24, 0x58, 0x32, 0x8c, 0xe6, 0xed, 0x87, 0x39, 0x53,
+ 0x9f, 0xf5, 0x4b, 0x21, 0x2a, 0x40, 0xfe, 0x94, 0xe8, 0x82,
+ 0x3c, 0x56, 0x5d, 0x37, 0x89, 0xe3, 0x71, 0x1b, 0xa5, 0xcf,
+ 0xc4, 0xae, 0x10, 0x7a, 0x06, 0x6c, 0xd2, 0xb8, 0xb3, 0xd9,
+ 0x67, 0x0d, 0x5e, 0x34, 0x8a, 0xe0, 0xeb, 0x81, 0x3f, 0x55,
+ 0x29, 0x43, 0xfd, 0x97, 0x9c, 0xf6, 0x48, 0x22, 0xb0, 0xda,
+ 0x64, 0x0e, 0x05, 0x6f, 0xd1, 0xbb, 0xc7, 0xad, 0x13, 0x79,
+ 0x72, 0x18, 0xa6, 0xcc, 0x23, 0x49, 0xf7, 0x9d, 0x96, 0xfc,
+ 0x42, 0x28, 0x54, 0x3e, 0x80, 0xea, 0xe1, 0x8b, 0x35, 0x5f,
+ 0xcd, 0xa7, 0x19, 0x73, 0x78, 0x12, 0xac, 0xc6, 0xba, 0xd0,
+ 0x6e, 0x04, 0x0f, 0x65, 0xdb, 0xb1, 0xe2, 0x88, 0x36, 0x5c,
+ 0x57, 0x3d, 0x83, 0xe9, 0x95, 0xff, 0x41, 0x2b, 0x20, 0x4a,
+ 0xf4, 0x9e, 0x0c, 0x66, 0xd8, 0xb2, 0xb9, 0xd3, 0x6d, 0x07,
+ 0x7b, 0x11, 0xaf, 0xc5, 0xce, 0xa4, 0x1a, 0x70, 0xbc, 0xd6,
+ 0x68, 0x02, 0x09, 0x63, 0xdd, 0xb7, 0xcb, 0xa1, 0x1f, 0x75,
+ 0x7e, 0x14, 0xaa, 0xc0, 0x52, 0x38, 0x86, 0xec, 0xe7, 0x8d,
+ 0x33, 0x59, 0x25, 0x4f, 0xf1, 0x9b, 0x90, 0xfa, 0x44, 0x2e,
+ 0x7d, 0x17, 0xa9, 0xc3, 0xc8, 0xa2, 0x1c, 0x76, 0x0a, 0x60,
+ 0xde, 0xb4, 0xbf, 0xd5, 0x6b, 0x01, 0x93, 0xf9, 0x47, 0x2d,
+ 0x26, 0x4c, 0xf2, 0x98, 0xe4, 0x8e, 0x30, 0x5a, 0x51, 0x3b,
+ 0x85, 0xef, 0x00, 0x6b, 0xd6, 0xbd, 0xb1, 0xda, 0x67, 0x0c,
+ 0x7f, 0x14, 0xa9, 0xc2, 0xce, 0xa5, 0x18, 0x73, 0xfe, 0x95,
+ 0x28, 0x43, 0x4f, 0x24, 0x99, 0xf2, 0x81, 0xea, 0x57, 0x3c,
+ 0x30, 0x5b, 0xe6, 0x8d, 0xe1, 0x8a, 0x37, 0x5c, 0x50, 0x3b,
+ 0x86, 0xed, 0x9e, 0xf5, 0x48, 0x23, 0x2f, 0x44, 0xf9, 0x92,
+ 0x1f, 0x74, 0xc9, 0xa2, 0xae, 0xc5, 0x78, 0x13, 0x60, 0x0b,
+ 0xb6, 0xdd, 0xd1, 0xba, 0x07, 0x6c, 0xdf, 0xb4, 0x09, 0x62,
+ 0x6e, 0x05, 0xb8, 0xd3, 0xa0, 0xcb, 0x76, 0x1d, 0x11, 0x7a,
+ 0xc7, 0xac, 0x21, 0x4a, 0xf7, 0x9c, 0x90, 0xfb, 0x46, 0x2d,
+ 0x5e, 0x35, 0x88, 0xe3, 0xef, 0x84, 0x39, 0x52, 0x3e, 0x55,
+ 0xe8, 0x83, 0x8f, 0xe4, 0x59, 0x32, 0x41, 0x2a, 0x97, 0xfc,
+ 0xf0, 0x9b, 0x26, 0x4d, 0xc0, 0xab, 0x16, 0x7d, 0x71, 0x1a,
+ 0xa7, 0xcc, 0xbf, 0xd4, 0x69, 0x02, 0x0e, 0x65, 0xd8, 0xb3,
+ 0xa3, 0xc8, 0x75, 0x1e, 0x12, 0x79, 0xc4, 0xaf, 0xdc, 0xb7,
+ 0x0a, 0x61, 0x6d, 0x06, 0xbb, 0xd0, 0x5d, 0x36, 0x8b, 0xe0,
+ 0xec, 0x87, 0x3a, 0x51, 0x22, 0x49, 0xf4, 0x9f, 0x93, 0xf8,
+ 0x45, 0x2e, 0x42, 0x29, 0x94, 0xff, 0xf3, 0x98, 0x25, 0x4e,
+ 0x3d, 0x56, 0xeb, 0x80, 0x8c, 0xe7, 0x5a, 0x31, 0xbc, 0xd7,
+ 0x6a, 0x01, 0x0d, 0x66, 0xdb, 0xb0, 0xc3, 0xa8, 0x15, 0x7e,
+ 0x72, 0x19, 0xa4, 0xcf, 0x7c, 0x17, 0xaa, 0xc1, 0xcd, 0xa6,
+ 0x1b, 0x70, 0x03, 0x68, 0xd5, 0xbe, 0xb2, 0xd9, 0x64, 0x0f,
+ 0x82, 0xe9, 0x54, 0x3f, 0x33, 0x58, 0xe5, 0x8e, 0xfd, 0x96,
+ 0x2b, 0x40, 0x4c, 0x27, 0x9a, 0xf1, 0x9d, 0xf6, 0x4b, 0x20,
+ 0x2c, 0x47, 0xfa, 0x91, 0xe2, 0x89, 0x34, 0x5f, 0x53, 0x38,
+ 0x85, 0xee, 0x63, 0x08, 0xb5, 0xde, 0xd2, 0xb9, 0x04, 0x6f,
+ 0x1c, 0x77, 0xca, 0xa1, 0xad, 0xc6, 0x7b, 0x10, 0x00, 0x6c,
+ 0xd8, 0xb4, 0xad, 0xc1, 0x75, 0x19, 0x47, 0x2b, 0x9f, 0xf3,
+ 0xea, 0x86, 0x32, 0x5e, 0x8e, 0xe2, 0x56, 0x3a, 0x23, 0x4f,
+ 0xfb, 0x97, 0xc9, 0xa5, 0x11, 0x7d, 0x64, 0x08, 0xbc, 0xd0,
+ 0x01, 0x6d, 0xd9, 0xb5, 0xac, 0xc0, 0x74, 0x18, 0x46, 0x2a,
+ 0x9e, 0xf2, 0xeb, 0x87, 0x33, 0x5f, 0x8f, 0xe3, 0x57, 0x3b,
+ 0x22, 0x4e, 0xfa, 0x96, 0xc8, 0xa4, 0x10, 0x7c, 0x65, 0x09,
+ 0xbd, 0xd1, 0x02, 0x6e, 0xda, 0xb6, 0xaf, 0xc3, 0x77, 0x1b,
+ 0x45, 0x29, 0x9d, 0xf1, 0xe8, 0x84, 0x30, 0x5c, 0x8c, 0xe0,
+ 0x54, 0x38, 0x21, 0x4d, 0xf9, 0x95, 0xcb, 0xa7, 0x13, 0x7f,
+ 0x66, 0x0a, 0xbe, 0xd2, 0x03, 0x6f, 0xdb, 0xb7, 0xae, 0xc2,
+ 0x76, 0x1a, 0x44, 0x28, 0x9c, 0xf0, 0xe9, 0x85, 0x31, 0x5d,
+ 0x8d, 0xe1, 0x55, 0x39, 0x20, 0x4c, 0xf8, 0x94, 0xca, 0xa6,
+ 0x12, 0x7e, 0x67, 0x0b, 0xbf, 0xd3, 0x04, 0x68, 0xdc, 0xb0,
+ 0xa9, 0xc5, 0x71, 0x1d, 0x43, 0x2f, 0x9b, 0xf7, 0xee, 0x82,
+ 0x36, 0x5a, 0x8a, 0xe6, 0x52, 0x3e, 0x27, 0x4b, 0xff, 0x93,
+ 0xcd, 0xa1, 0x15, 0x79, 0x60, 0x0c, 0xb8, 0xd4, 0x05, 0x69,
+ 0xdd, 0xb1, 0xa8, 0xc4, 0x70, 0x1c, 0x42, 0x2e, 0x9a, 0xf6,
+ 0xef, 0x83, 0x37, 0x5b, 0x8b, 0xe7, 0x53, 0x3f, 0x26, 0x4a,
+ 0xfe, 0x92, 0xcc, 0xa0, 0x14, 0x78, 0x61, 0x0d, 0xb9, 0xd5,
+ 0x06, 0x6a, 0xde, 0xb2, 0xab, 0xc7, 0x73, 0x1f, 0x41, 0x2d,
+ 0x99, 0xf5, 0xec, 0x80, 0x34, 0x58, 0x88, 0xe4, 0x50, 0x3c,
+ 0x25, 0x49, 0xfd, 0x91, 0xcf, 0xa3, 0x17, 0x7b, 0x62, 0x0e,
+ 0xba, 0xd6, 0x07, 0x6b, 0xdf, 0xb3, 0xaa, 0xc6, 0x72, 0x1e,
+ 0x40, 0x2c, 0x98, 0xf4, 0xed, 0x81, 0x35, 0x59, 0x89, 0xe5,
+ 0x51, 0x3d, 0x24, 0x48, 0xfc, 0x90, 0xce, 0xa2, 0x16, 0x7a,
+ 0x63, 0x0f, 0xbb, 0xd7, 0x00, 0x6d, 0xda, 0xb7, 0xa9, 0xc4,
+ 0x73, 0x1e, 0x4f, 0x22, 0x95, 0xf8, 0xe6, 0x8b, 0x3c, 0x51,
+ 0x9e, 0xf3, 0x44, 0x29, 0x37, 0x5a, 0xed, 0x80, 0xd1, 0xbc,
+ 0x0b, 0x66, 0x78, 0x15, 0xa2, 0xcf, 0x21, 0x4c, 0xfb, 0x96,
+ 0x88, 0xe5, 0x52, 0x3f, 0x6e, 0x03, 0xb4, 0xd9, 0xc7, 0xaa,
+ 0x1d, 0x70, 0xbf, 0xd2, 0x65, 0x08, 0x16, 0x7b, 0xcc, 0xa1,
+ 0xf0, 0x9d, 0x2a, 0x47, 0x59, 0x34, 0x83, 0xee, 0x42, 0x2f,
+ 0x98, 0xf5, 0xeb, 0x86, 0x31, 0x5c, 0x0d, 0x60, 0xd7, 0xba,
+ 0xa4, 0xc9, 0x7e, 0x13, 0xdc, 0xb1, 0x06, 0x6b, 0x75, 0x18,
+ 0xaf, 0xc2, 0x93, 0xfe, 0x49, 0x24, 0x3a, 0x57, 0xe0, 0x8d,
+ 0x63, 0x0e, 0xb9, 0xd4, 0xca, 0xa7, 0x10, 0x7d, 0x2c, 0x41,
+ 0xf6, 0x9b, 0x85, 0xe8, 0x5f, 0x32, 0xfd, 0x90, 0x27, 0x4a,
+ 0x54, 0x39, 0x8e, 0xe3, 0xb2, 0xdf, 0x68, 0x05, 0x1b, 0x76,
+ 0xc1, 0xac, 0x84, 0xe9, 0x5e, 0x33, 0x2d, 0x40, 0xf7, 0x9a,
+ 0xcb, 0xa6, 0x11, 0x7c, 0x62, 0x0f, 0xb8, 0xd5, 0x1a, 0x77,
+ 0xc0, 0xad, 0xb3, 0xde, 0x69, 0x04, 0x55, 0x38, 0x8f, 0xe2,
+ 0xfc, 0x91, 0x26, 0x4b, 0xa5, 0xc8, 0x7f, 0x12, 0x0c, 0x61,
+ 0xd6, 0xbb, 0xea, 0x87, 0x30, 0x5d, 0x43, 0x2e, 0x99, 0xf4,
+ 0x3b, 0x56, 0xe1, 0x8c, 0x92, 0xff, 0x48, 0x25, 0x74, 0x19,
+ 0xae, 0xc3, 0xdd, 0xb0, 0x07, 0x6a, 0xc6, 0xab, 0x1c, 0x71,
+ 0x6f, 0x02, 0xb5, 0xd8, 0x89, 0xe4, 0x53, 0x3e, 0x20, 0x4d,
+ 0xfa, 0x97, 0x58, 0x35, 0x82, 0xef, 0xf1, 0x9c, 0x2b, 0x46,
+ 0x17, 0x7a, 0xcd, 0xa0, 0xbe, 0xd3, 0x64, 0x09, 0xe7, 0x8a,
+ 0x3d, 0x50, 0x4e, 0x23, 0x94, 0xf9, 0xa8, 0xc5, 0x72, 0x1f,
+ 0x01, 0x6c, 0xdb, 0xb6, 0x79, 0x14, 0xa3, 0xce, 0xd0, 0xbd,
+ 0x0a, 0x67, 0x36, 0x5b, 0xec, 0x81, 0x9f, 0xf2, 0x45, 0x28,
+ 0x00, 0x6e, 0xdc, 0xb2, 0xa5, 0xcb, 0x79, 0x17, 0x57, 0x39,
+ 0x8b, 0xe5, 0xf2, 0x9c, 0x2e, 0x40, 0xae, 0xc0, 0x72, 0x1c,
+ 0x0b, 0x65, 0xd7, 0xb9, 0xf9, 0x97, 0x25, 0x4b, 0x5c, 0x32,
+ 0x80, 0xee, 0x41, 0x2f, 0x9d, 0xf3, 0xe4, 0x8a, 0x38, 0x56,
+ 0x16, 0x78, 0xca, 0xa4, 0xb3, 0xdd, 0x6f, 0x01, 0xef, 0x81,
+ 0x33, 0x5d, 0x4a, 0x24, 0x96, 0xf8, 0xb8, 0xd6, 0x64, 0x0a,
+ 0x1d, 0x73, 0xc1, 0xaf, 0x82, 0xec, 0x5e, 0x30, 0x27, 0x49,
+ 0xfb, 0x95, 0xd5, 0xbb, 0x09, 0x67, 0x70, 0x1e, 0xac, 0xc2,
+ 0x2c, 0x42, 0xf0, 0x9e, 0x89, 0xe7, 0x55, 0x3b, 0x7b, 0x15,
+ 0xa7, 0xc9, 0xde, 0xb0, 0x02, 0x6c, 0xc3, 0xad, 0x1f, 0x71,
+ 0x66, 0x08, 0xba, 0xd4, 0x94, 0xfa, 0x48, 0x26, 0x31, 0x5f,
+ 0xed, 0x83, 0x6d, 0x03, 0xb1, 0xdf, 0xc8, 0xa6, 0x14, 0x7a,
+ 0x3a, 0x54, 0xe6, 0x88, 0x9f, 0xf1, 0x43, 0x2d, 0x19, 0x77,
+ 0xc5, 0xab, 0xbc, 0xd2, 0x60, 0x0e, 0x4e, 0x20, 0x92, 0xfc,
+ 0xeb, 0x85, 0x37, 0x59, 0xb7, 0xd9, 0x6b, 0x05, 0x12, 0x7c,
+ 0xce, 0xa0, 0xe0, 0x8e, 0x3c, 0x52, 0x45, 0x2b, 0x99, 0xf7,
+ 0x58, 0x36, 0x84, 0xea, 0xfd, 0x93, 0x21, 0x4f, 0x0f, 0x61,
+ 0xd3, 0xbd, 0xaa, 0xc4, 0x76, 0x18, 0xf6, 0x98, 0x2a, 0x44,
+ 0x53, 0x3d, 0x8f, 0xe1, 0xa1, 0xcf, 0x7d, 0x13, 0x04, 0x6a,
+ 0xd8, 0xb6, 0x9b, 0xf5, 0x47, 0x29, 0x3e, 0x50, 0xe2, 0x8c,
+ 0xcc, 0xa2, 0x10, 0x7e, 0x69, 0x07, 0xb5, 0xdb, 0x35, 0x5b,
+ 0xe9, 0x87, 0x90, 0xfe, 0x4c, 0x22, 0x62, 0x0c, 0xbe, 0xd0,
+ 0xc7, 0xa9, 0x1b, 0x75, 0xda, 0xb4, 0x06, 0x68, 0x7f, 0x11,
+ 0xa3, 0xcd, 0x8d, 0xe3, 0x51, 0x3f, 0x28, 0x46, 0xf4, 0x9a,
+ 0x74, 0x1a, 0xa8, 0xc6, 0xd1, 0xbf, 0x0d, 0x63, 0x23, 0x4d,
+ 0xff, 0x91, 0x86, 0xe8, 0x5a, 0x34, 0x00, 0x6f, 0xde, 0xb1,
+ 0xa1, 0xce, 0x7f, 0x10, 0x5f, 0x30, 0x81, 0xee, 0xfe, 0x91,
+ 0x20, 0x4f, 0xbe, 0xd1, 0x60, 0x0f, 0x1f, 0x70, 0xc1, 0xae,
+ 0xe1, 0x8e, 0x3f, 0x50, 0x40, 0x2f, 0x9e, 0xf1, 0x61, 0x0e,
+ 0xbf, 0xd0, 0xc0, 0xaf, 0x1e, 0x71, 0x3e, 0x51, 0xe0, 0x8f,
+ 0x9f, 0xf0, 0x41, 0x2e, 0xdf, 0xb0, 0x01, 0x6e, 0x7e, 0x11,
+ 0xa0, 0xcf, 0x80, 0xef, 0x5e, 0x31, 0x21, 0x4e, 0xff, 0x90,
+ 0xc2, 0xad, 0x1c, 0x73, 0x63, 0x0c, 0xbd, 0xd2, 0x9d, 0xf2,
+ 0x43, 0x2c, 0x3c, 0x53, 0xe2, 0x8d, 0x7c, 0x13, 0xa2, 0xcd,
+ 0xdd, 0xb2, 0x03, 0x6c, 0x23, 0x4c, 0xfd, 0x92, 0x82, 0xed,
+ 0x5c, 0x33, 0xa3, 0xcc, 0x7d, 0x12, 0x02, 0x6d, 0xdc, 0xb3,
+ 0xfc, 0x93, 0x22, 0x4d, 0x5d, 0x32, 0x83, 0xec, 0x1d, 0x72,
+ 0xc3, 0xac, 0xbc, 0xd3, 0x62, 0x0d, 0x42, 0x2d, 0x9c, 0xf3,
+ 0xe3, 0x8c, 0x3d, 0x52, 0x99, 0xf6, 0x47, 0x28, 0x38, 0x57,
+ 0xe6, 0x89, 0xc6, 0xa9, 0x18, 0x77, 0x67, 0x08, 0xb9, 0xd6,
+ 0x27, 0x48, 0xf9, 0x96, 0x86, 0xe9, 0x58, 0x37, 0x78, 0x17,
+ 0xa6, 0xc9, 0xd9, 0xb6, 0x07, 0x68, 0xf8, 0x97, 0x26, 0x49,
+ 0x59, 0x36, 0x87, 0xe8, 0xa7, 0xc8, 0x79, 0x16, 0x06, 0x69,
+ 0xd8, 0xb7, 0x46, 0x29, 0x98, 0xf7, 0xe7, 0x88, 0x39, 0x56,
+ 0x19, 0x76, 0xc7, 0xa8, 0xb8, 0xd7, 0x66, 0x09, 0x5b, 0x34,
+ 0x85, 0xea, 0xfa, 0x95, 0x24, 0x4b, 0x04, 0x6b, 0xda, 0xb5,
+ 0xa5, 0xca, 0x7b, 0x14, 0xe5, 0x8a, 0x3b, 0x54, 0x44, 0x2b,
+ 0x9a, 0xf5, 0xba, 0xd5, 0x64, 0x0b, 0x1b, 0x74, 0xc5, 0xaa,
+ 0x3a, 0x55, 0xe4, 0x8b, 0x9b, 0xf4, 0x45, 0x2a, 0x65, 0x0a,
+ 0xbb, 0xd4, 0xc4, 0xab, 0x1a, 0x75, 0x84, 0xeb, 0x5a, 0x35,
+ 0x25, 0x4a, 0xfb, 0x94, 0xdb, 0xb4, 0x05, 0x6a, 0x7a, 0x15,
+ 0xa4, 0xcb, 0x00, 0x70, 0xe0, 0x90, 0xdd, 0xad, 0x3d, 0x4d,
+ 0xa7, 0xd7, 0x47, 0x37, 0x7a, 0x0a, 0x9a, 0xea, 0x53, 0x23,
+ 0xb3, 0xc3, 0x8e, 0xfe, 0x6e, 0x1e, 0xf4, 0x84, 0x14, 0x64,
+ 0x29, 0x59, 0xc9, 0xb9, 0xa6, 0xd6, 0x46, 0x36, 0x7b, 0x0b,
+ 0x9b, 0xeb, 0x01, 0x71, 0xe1, 0x91, 0xdc, 0xac, 0x3c, 0x4c,
+ 0xf5, 0x85, 0x15, 0x65, 0x28, 0x58, 0xc8, 0xb8, 0x52, 0x22,
+ 0xb2, 0xc2, 0x8f, 0xff, 0x6f, 0x1f, 0x51, 0x21, 0xb1, 0xc1,
+ 0x8c, 0xfc, 0x6c, 0x1c, 0xf6, 0x86, 0x16, 0x66, 0x2b, 0x5b,
+ 0xcb, 0xbb, 0x02, 0x72, 0xe2, 0x92, 0xdf, 0xaf, 0x3f, 0x4f,
+ 0xa5, 0xd5, 0x45, 0x35, 0x78, 0x08, 0x98, 0xe8, 0xf7, 0x87,
+ 0x17, 0x67, 0x2a, 0x5a, 0xca, 0xba, 0x50, 0x20, 0xb0, 0xc0,
+ 0x8d, 0xfd, 0x6d, 0x1d, 0xa4, 0xd4, 0x44, 0x34, 0x79, 0x09,
+ 0x99, 0xe9, 0x03, 0x73, 0xe3, 0x93, 0xde, 0xae, 0x3e, 0x4e,
+ 0xa2, 0xd2, 0x42, 0x32, 0x7f, 0x0f, 0x9f, 0xef, 0x05, 0x75,
+ 0xe5, 0x95, 0xd8, 0xa8, 0x38, 0x48, 0xf1, 0x81, 0x11, 0x61,
+ 0x2c, 0x5c, 0xcc, 0xbc, 0x56, 0x26, 0xb6, 0xc6, 0x8b, 0xfb,
+ 0x6b, 0x1b, 0x04, 0x74, 0xe4, 0x94, 0xd9, 0xa9, 0x39, 0x49,
+ 0xa3, 0xd3, 0x43, 0x33, 0x7e, 0x0e, 0x9e, 0xee, 0x57, 0x27,
+ 0xb7, 0xc7, 0x8a, 0xfa, 0x6a, 0x1a, 0xf0, 0x80, 0x10, 0x60,
+ 0x2d, 0x5d, 0xcd, 0xbd, 0xf3, 0x83, 0x13, 0x63, 0x2e, 0x5e,
+ 0xce, 0xbe, 0x54, 0x24, 0xb4, 0xc4, 0x89, 0xf9, 0x69, 0x19,
+ 0xa0, 0xd0, 0x40, 0x30, 0x7d, 0x0d, 0x9d, 0xed, 0x07, 0x77,
+ 0xe7, 0x97, 0xda, 0xaa, 0x3a, 0x4a, 0x55, 0x25, 0xb5, 0xc5,
+ 0x88, 0xf8, 0x68, 0x18, 0xf2, 0x82, 0x12, 0x62, 0x2f, 0x5f,
+ 0xcf, 0xbf, 0x06, 0x76, 0xe6, 0x96, 0xdb, 0xab, 0x3b, 0x4b,
+ 0xa1, 0xd1, 0x41, 0x31, 0x7c, 0x0c, 0x9c, 0xec, 0x00, 0x71,
+ 0xe2, 0x93, 0xd9, 0xa8, 0x3b, 0x4a, 0xaf, 0xde, 0x4d, 0x3c,
+ 0x76, 0x07, 0x94, 0xe5, 0x43, 0x32, 0xa1, 0xd0, 0x9a, 0xeb,
+ 0x78, 0x09, 0xec, 0x9d, 0x0e, 0x7f, 0x35, 0x44, 0xd7, 0xa6,
+ 0x86, 0xf7, 0x64, 0x15, 0x5f, 0x2e, 0xbd, 0xcc, 0x29, 0x58,
+ 0xcb, 0xba, 0xf0, 0x81, 0x12, 0x63, 0xc5, 0xb4, 0x27, 0x56,
+ 0x1c, 0x6d, 0xfe, 0x8f, 0x6a, 0x1b, 0x88, 0xf9, 0xb3, 0xc2,
+ 0x51, 0x20, 0x11, 0x60, 0xf3, 0x82, 0xc8, 0xb9, 0x2a, 0x5b,
+ 0xbe, 0xcf, 0x5c, 0x2d, 0x67, 0x16, 0x85, 0xf4, 0x52, 0x23,
+ 0xb0, 0xc1, 0x8b, 0xfa, 0x69, 0x18, 0xfd, 0x8c, 0x1f, 0x6e,
+ 0x24, 0x55, 0xc6, 0xb7, 0x97, 0xe6, 0x75, 0x04, 0x4e, 0x3f,
+ 0xac, 0xdd, 0x38, 0x49, 0xda, 0xab, 0xe1, 0x90, 0x03, 0x72,
+ 0xd4, 0xa5, 0x36, 0x47, 0x0d, 0x7c, 0xef, 0x9e, 0x7b, 0x0a,
+ 0x99, 0xe8, 0xa2, 0xd3, 0x40, 0x31, 0x22, 0x53, 0xc0, 0xb1,
+ 0xfb, 0x8a, 0x19, 0x68, 0x8d, 0xfc, 0x6f, 0x1e, 0x54, 0x25,
+ 0xb6, 0xc7, 0x61, 0x10, 0x83, 0xf2, 0xb8, 0xc9, 0x5a, 0x2b,
+ 0xce, 0xbf, 0x2c, 0x5d, 0x17, 0x66, 0xf5, 0x84, 0xa4, 0xd5,
+ 0x46, 0x37, 0x7d, 0x0c, 0x9f, 0xee, 0x0b, 0x7a, 0xe9, 0x98,
+ 0xd2, 0xa3, 0x30, 0x41, 0xe7, 0x96, 0x05, 0x74, 0x3e, 0x4f,
+ 0xdc, 0xad, 0x48, 0x39, 0xaa, 0xdb, 0x91, 0xe0, 0x73, 0x02,
+ 0x33, 0x42, 0xd1, 0xa0, 0xea, 0x9b, 0x08, 0x79, 0x9c, 0xed,
+ 0x7e, 0x0f, 0x45, 0x34, 0xa7, 0xd6, 0x70, 0x01, 0x92, 0xe3,
+ 0xa9, 0xd8, 0x4b, 0x3a, 0xdf, 0xae, 0x3d, 0x4c, 0x06, 0x77,
+ 0xe4, 0x95, 0xb5, 0xc4, 0x57, 0x26, 0x6c, 0x1d, 0x8e, 0xff,
+ 0x1a, 0x6b, 0xf8, 0x89, 0xc3, 0xb2, 0x21, 0x50, 0xf6, 0x87,
+ 0x14, 0x65, 0x2f, 0x5e, 0xcd, 0xbc, 0x59, 0x28, 0xbb, 0xca,
+ 0x80, 0xf1, 0x62, 0x13, 0x00, 0x72, 0xe4, 0x96, 0xd5, 0xa7,
+ 0x31, 0x43, 0xb7, 0xc5, 0x53, 0x21, 0x62, 0x10, 0x86, 0xf4,
+ 0x73, 0x01, 0x97, 0xe5, 0xa6, 0xd4, 0x42, 0x30, 0xc4, 0xb6,
+ 0x20, 0x52, 0x11, 0x63, 0xf5, 0x87, 0xe6, 0x94, 0x02, 0x70,
+ 0x33, 0x41, 0xd7, 0xa5, 0x51, 0x23, 0xb5, 0xc7, 0x84, 0xf6,
+ 0x60, 0x12, 0x95, 0xe7, 0x71, 0x03, 0x40, 0x32, 0xa4, 0xd6,
+ 0x22, 0x50, 0xc6, 0xb4, 0xf7, 0x85, 0x13, 0x61, 0xd1, 0xa3,
+ 0x35, 0x47, 0x04, 0x76, 0xe0, 0x92, 0x66, 0x14, 0x82, 0xf0,
+ 0xb3, 0xc1, 0x57, 0x25, 0xa2, 0xd0, 0x46, 0x34, 0x77, 0x05,
+ 0x93, 0xe1, 0x15, 0x67, 0xf1, 0x83, 0xc0, 0xb2, 0x24, 0x56,
+ 0x37, 0x45, 0xd3, 0xa1, 0xe2, 0x90, 0x06, 0x74, 0x80, 0xf2,
+ 0x64, 0x16, 0x55, 0x27, 0xb1, 0xc3, 0x44, 0x36, 0xa0, 0xd2,
+ 0x91, 0xe3, 0x75, 0x07, 0xf3, 0x81, 0x17, 0x65, 0x26, 0x54,
+ 0xc2, 0xb0, 0xbf, 0xcd, 0x5b, 0x29, 0x6a, 0x18, 0x8e, 0xfc,
+ 0x08, 0x7a, 0xec, 0x9e, 0xdd, 0xaf, 0x39, 0x4b, 0xcc, 0xbe,
+ 0x28, 0x5a, 0x19, 0x6b, 0xfd, 0x8f, 0x7b, 0x09, 0x9f, 0xed,
+ 0xae, 0xdc, 0x4a, 0x38, 0x59, 0x2b, 0xbd, 0xcf, 0x8c, 0xfe,
+ 0x68, 0x1a, 0xee, 0x9c, 0x0a, 0x78, 0x3b, 0x49, 0xdf, 0xad,
+ 0x2a, 0x58, 0xce, 0xbc, 0xff, 0x8d, 0x1b, 0x69, 0x9d, 0xef,
+ 0x79, 0x0b, 0x48, 0x3a, 0xac, 0xde, 0x6e, 0x1c, 0x8a, 0xf8,
+ 0xbb, 0xc9, 0x5f, 0x2d, 0xd9, 0xab, 0x3d, 0x4f, 0x0c, 0x7e,
+ 0xe8, 0x9a, 0x1d, 0x6f, 0xf9, 0x8b, 0xc8, 0xba, 0x2c, 0x5e,
+ 0xaa, 0xd8, 0x4e, 0x3c, 0x7f, 0x0d, 0x9b, 0xe9, 0x88, 0xfa,
+ 0x6c, 0x1e, 0x5d, 0x2f, 0xb9, 0xcb, 0x3f, 0x4d, 0xdb, 0xa9,
+ 0xea, 0x98, 0x0e, 0x7c, 0xfb, 0x89, 0x1f, 0x6d, 0x2e, 0x5c,
+ 0xca, 0xb8, 0x4c, 0x3e, 0xa8, 0xda, 0x99, 0xeb, 0x7d, 0x0f,
+ 0x00, 0x73, 0xe6, 0x95, 0xd1, 0xa2, 0x37, 0x44, 0xbf, 0xcc,
+ 0x59, 0x2a, 0x6e, 0x1d, 0x88, 0xfb, 0x63, 0x10, 0x85, 0xf6,
+ 0xb2, 0xc1, 0x54, 0x27, 0xdc, 0xaf, 0x3a, 0x49, 0x0d, 0x7e,
+ 0xeb, 0x98, 0xc6, 0xb5, 0x20, 0x53, 0x17, 0x64, 0xf1, 0x82,
+ 0x79, 0x0a, 0x9f, 0xec, 0xa8, 0xdb, 0x4e, 0x3d, 0xa5, 0xd6,
+ 0x43, 0x30, 0x74, 0x07, 0x92, 0xe1, 0x1a, 0x69, 0xfc, 0x8f,
+ 0xcb, 0xb8, 0x2d, 0x5e, 0x91, 0xe2, 0x77, 0x04, 0x40, 0x33,
+ 0xa6, 0xd5, 0x2e, 0x5d, 0xc8, 0xbb, 0xff, 0x8c, 0x19, 0x6a,
+ 0xf2, 0x81, 0x14, 0x67, 0x23, 0x50, 0xc5, 0xb6, 0x4d, 0x3e,
+ 0xab, 0xd8, 0x9c, 0xef, 0x7a, 0x09, 0x57, 0x24, 0xb1, 0xc2,
+ 0x86, 0xf5, 0x60, 0x13, 0xe8, 0x9b, 0x0e, 0x7d, 0x39, 0x4a,
+ 0xdf, 0xac, 0x34, 0x47, 0xd2, 0xa1, 0xe5, 0x96, 0x03, 0x70,
+ 0x8b, 0xf8, 0x6d, 0x1e, 0x5a, 0x29, 0xbc, 0xcf, 0x3f, 0x4c,
+ 0xd9, 0xaa, 0xee, 0x9d, 0x08, 0x7b, 0x80, 0xf3, 0x66, 0x15,
+ 0x51, 0x22, 0xb7, 0xc4, 0x5c, 0x2f, 0xba, 0xc9, 0x8d, 0xfe,
+ 0x6b, 0x18, 0xe3, 0x90, 0x05, 0x76, 0x32, 0x41, 0xd4, 0xa7,
+ 0xf9, 0x8a, 0x1f, 0x6c, 0x28, 0x5b, 0xce, 0xbd, 0x46, 0x35,
+ 0xa0, 0xd3, 0x97, 0xe4, 0x71, 0x02, 0x9a, 0xe9, 0x7c, 0x0f,
+ 0x4b, 0x38, 0xad, 0xde, 0x25, 0x56, 0xc3, 0xb0, 0xf4, 0x87,
+ 0x12, 0x61, 0xae, 0xdd, 0x48, 0x3b, 0x7f, 0x0c, 0x99, 0xea,
+ 0x11, 0x62, 0xf7, 0x84, 0xc0, 0xb3, 0x26, 0x55, 0xcd, 0xbe,
+ 0x2b, 0x58, 0x1c, 0x6f, 0xfa, 0x89, 0x72, 0x01, 0x94, 0xe7,
+ 0xa3, 0xd0, 0x45, 0x36, 0x68, 0x1b, 0x8e, 0xfd, 0xb9, 0xca,
+ 0x5f, 0x2c, 0xd7, 0xa4, 0x31, 0x42, 0x06, 0x75, 0xe0, 0x93,
+ 0x0b, 0x78, 0xed, 0x9e, 0xda, 0xa9, 0x3c, 0x4f, 0xb4, 0xc7,
+ 0x52, 0x21, 0x65, 0x16, 0x83, 0xf0, 0x00, 0x74, 0xe8, 0x9c,
+ 0xcd, 0xb9, 0x25, 0x51, 0x87, 0xf3, 0x6f, 0x1b, 0x4a, 0x3e,
+ 0xa2, 0xd6, 0x13, 0x67, 0xfb, 0x8f, 0xde, 0xaa, 0x36, 0x42,
+ 0x94, 0xe0, 0x7c, 0x08, 0x59, 0x2d, 0xb1, 0xc5, 0x26, 0x52,
+ 0xce, 0xba, 0xeb, 0x9f, 0x03, 0x77, 0xa1, 0xd5, 0x49, 0x3d,
+ 0x6c, 0x18, 0x84, 0xf0, 0x35, 0x41, 0xdd, 0xa9, 0xf8, 0x8c,
+ 0x10, 0x64, 0xb2, 0xc6, 0x5a, 0x2e, 0x7f, 0x0b, 0x97, 0xe3,
+ 0x4c, 0x38, 0xa4, 0xd0, 0x81, 0xf5, 0x69, 0x1d, 0xcb, 0xbf,
+ 0x23, 0x57, 0x06, 0x72, 0xee, 0x9a, 0x5f, 0x2b, 0xb7, 0xc3,
+ 0x92, 0xe6, 0x7a, 0x0e, 0xd8, 0xac, 0x30, 0x44, 0x15, 0x61,
+ 0xfd, 0x89, 0x6a, 0x1e, 0x82, 0xf6, 0xa7, 0xd3, 0x4f, 0x3b,
+ 0xed, 0x99, 0x05, 0x71, 0x20, 0x54, 0xc8, 0xbc, 0x79, 0x0d,
+ 0x91, 0xe5, 0xb4, 0xc0, 0x5c, 0x28, 0xfe, 0x8a, 0x16, 0x62,
+ 0x33, 0x47, 0xdb, 0xaf, 0x98, 0xec, 0x70, 0x04, 0x55, 0x21,
+ 0xbd, 0xc9, 0x1f, 0x6b, 0xf7, 0x83, 0xd2, 0xa6, 0x3a, 0x4e,
+ 0x8b, 0xff, 0x63, 0x17, 0x46, 0x32, 0xae, 0xda, 0x0c, 0x78,
+ 0xe4, 0x90, 0xc1, 0xb5, 0x29, 0x5d, 0xbe, 0xca, 0x56, 0x22,
+ 0x73, 0x07, 0x9b, 0xef, 0x39, 0x4d, 0xd1, 0xa5, 0xf4, 0x80,
+ 0x1c, 0x68, 0xad, 0xd9, 0x45, 0x31, 0x60, 0x14, 0x88, 0xfc,
+ 0x2a, 0x5e, 0xc2, 0xb6, 0xe7, 0x93, 0x0f, 0x7b, 0xd4, 0xa0,
+ 0x3c, 0x48, 0x19, 0x6d, 0xf1, 0x85, 0x53, 0x27, 0xbb, 0xcf,
+ 0x9e, 0xea, 0x76, 0x02, 0xc7, 0xb3, 0x2f, 0x5b, 0x0a, 0x7e,
+ 0xe2, 0x96, 0x40, 0x34, 0xa8, 0xdc, 0x8d, 0xf9, 0x65, 0x11,
+ 0xf2, 0x86, 0x1a, 0x6e, 0x3f, 0x4b, 0xd7, 0xa3, 0x75, 0x01,
+ 0x9d, 0xe9, 0xb8, 0xcc, 0x50, 0x24, 0xe1, 0x95, 0x09, 0x7d,
+ 0x2c, 0x58, 0xc4, 0xb0, 0x66, 0x12, 0x8e, 0xfa, 0xab, 0xdf,
+ 0x43, 0x37, 0x00, 0x75, 0xea, 0x9f, 0xc9, 0xbc, 0x23, 0x56,
+ 0x8f, 0xfa, 0x65, 0x10, 0x46, 0x33, 0xac, 0xd9, 0x03, 0x76,
+ 0xe9, 0x9c, 0xca, 0xbf, 0x20, 0x55, 0x8c, 0xf9, 0x66, 0x13,
+ 0x45, 0x30, 0xaf, 0xda, 0x06, 0x73, 0xec, 0x99, 0xcf, 0xba,
+ 0x25, 0x50, 0x89, 0xfc, 0x63, 0x16, 0x40, 0x35, 0xaa, 0xdf,
+ 0x05, 0x70, 0xef, 0x9a, 0xcc, 0xb9, 0x26, 0x53, 0x8a, 0xff,
+ 0x60, 0x15, 0x43, 0x36, 0xa9, 0xdc, 0x0c, 0x79, 0xe6, 0x93,
+ 0xc5, 0xb0, 0x2f, 0x5a, 0x83, 0xf6, 0x69, 0x1c, 0x4a, 0x3f,
+ 0xa0, 0xd5, 0x0f, 0x7a, 0xe5, 0x90, 0xc6, 0xb3, 0x2c, 0x59,
+ 0x80, 0xf5, 0x6a, 0x1f, 0x49, 0x3c, 0xa3, 0xd6, 0x0a, 0x7f,
+ 0xe0, 0x95, 0xc3, 0xb6, 0x29, 0x5c, 0x85, 0xf0, 0x6f, 0x1a,
+ 0x4c, 0x39, 0xa6, 0xd3, 0x09, 0x7c, 0xe3, 0x96, 0xc0, 0xb5,
+ 0x2a, 0x5f, 0x86, 0xf3, 0x6c, 0x19, 0x4f, 0x3a, 0xa5, 0xd0,
+ 0x18, 0x6d, 0xf2, 0x87, 0xd1, 0xa4, 0x3b, 0x4e, 0x97, 0xe2,
+ 0x7d, 0x08, 0x5e, 0x2b, 0xb4, 0xc1, 0x1b, 0x6e, 0xf1, 0x84,
+ 0xd2, 0xa7, 0x38, 0x4d, 0x94, 0xe1, 0x7e, 0x0b, 0x5d, 0x28,
+ 0xb7, 0xc2, 0x1e, 0x6b, 0xf4, 0x81, 0xd7, 0xa2, 0x3d, 0x48,
+ 0x91, 0xe4, 0x7b, 0x0e, 0x58, 0x2d, 0xb2, 0xc7, 0x1d, 0x68,
+ 0xf7, 0x82, 0xd4, 0xa1, 0x3e, 0x4b, 0x92, 0xe7, 0x78, 0x0d,
+ 0x5b, 0x2e, 0xb1, 0xc4, 0x14, 0x61, 0xfe, 0x8b, 0xdd, 0xa8,
+ 0x37, 0x42, 0x9b, 0xee, 0x71, 0x04, 0x52, 0x27, 0xb8, 0xcd,
+ 0x17, 0x62, 0xfd, 0x88, 0xde, 0xab, 0x34, 0x41, 0x98, 0xed,
+ 0x72, 0x07, 0x51, 0x24, 0xbb, 0xce, 0x12, 0x67, 0xf8, 0x8d,
+ 0xdb, 0xae, 0x31, 0x44, 0x9d, 0xe8, 0x77, 0x02, 0x54, 0x21,
+ 0xbe, 0xcb, 0x11, 0x64, 0xfb, 0x8e, 0xd8, 0xad, 0x32, 0x47,
+ 0x9e, 0xeb, 0x74, 0x01, 0x57, 0x22, 0xbd, 0xc8, 0x00, 0x76,
+ 0xec, 0x9a, 0xc5, 0xb3, 0x29, 0x5f, 0x97, 0xe1, 0x7b, 0x0d,
+ 0x52, 0x24, 0xbe, 0xc8, 0x33, 0x45, 0xdf, 0xa9, 0xf6, 0x80,
+ 0x1a, 0x6c, 0xa4, 0xd2, 0x48, 0x3e, 0x61, 0x17, 0x8d, 0xfb,
+ 0x66, 0x10, 0x8a, 0xfc, 0xa3, 0xd5, 0x4f, 0x39, 0xf1, 0x87,
+ 0x1d, 0x6b, 0x34, 0x42, 0xd8, 0xae, 0x55, 0x23, 0xb9, 0xcf,
+ 0x90, 0xe6, 0x7c, 0x0a, 0xc2, 0xb4, 0x2e, 0x58, 0x07, 0x71,
+ 0xeb, 0x9d, 0xcc, 0xba, 0x20, 0x56, 0x09, 0x7f, 0xe5, 0x93,
+ 0x5b, 0x2d, 0xb7, 0xc1, 0x9e, 0xe8, 0x72, 0x04, 0xff, 0x89,
+ 0x13, 0x65, 0x3a, 0x4c, 0xd6, 0xa0, 0x68, 0x1e, 0x84, 0xf2,
+ 0xad, 0xdb, 0x41, 0x37, 0xaa, 0xdc, 0x46, 0x30, 0x6f, 0x19,
+ 0x83, 0xf5, 0x3d, 0x4b, 0xd1, 0xa7, 0xf8, 0x8e, 0x14, 0x62,
+ 0x99, 0xef, 0x75, 0x03, 0x5c, 0x2a, 0xb0, 0xc6, 0x0e, 0x78,
+ 0xe2, 0x94, 0xcb, 0xbd, 0x27, 0x51, 0x85, 0xf3, 0x69, 0x1f,
+ 0x40, 0x36, 0xac, 0xda, 0x12, 0x64, 0xfe, 0x88, 0xd7, 0xa1,
+ 0x3b, 0x4d, 0xb6, 0xc0, 0x5a, 0x2c, 0x73, 0x05, 0x9f, 0xe9,
+ 0x21, 0x57, 0xcd, 0xbb, 0xe4, 0x92, 0x08, 0x7e, 0xe3, 0x95,
+ 0x0f, 0x79, 0x26, 0x50, 0xca, 0xbc, 0x74, 0x02, 0x98, 0xee,
+ 0xb1, 0xc7, 0x5d, 0x2b, 0xd0, 0xa6, 0x3c, 0x4a, 0x15, 0x63,
+ 0xf9, 0x8f, 0x47, 0x31, 0xab, 0xdd, 0x82, 0xf4, 0x6e, 0x18,
+ 0x49, 0x3f, 0xa5, 0xd3, 0x8c, 0xfa, 0x60, 0x16, 0xde, 0xa8,
+ 0x32, 0x44, 0x1b, 0x6d, 0xf7, 0x81, 0x7a, 0x0c, 0x96, 0xe0,
+ 0xbf, 0xc9, 0x53, 0x25, 0xed, 0x9b, 0x01, 0x77, 0x28, 0x5e,
+ 0xc4, 0xb2, 0x2f, 0x59, 0xc3, 0xb5, 0xea, 0x9c, 0x06, 0x70,
+ 0xb8, 0xce, 0x54, 0x22, 0x7d, 0x0b, 0x91, 0xe7, 0x1c, 0x6a,
+ 0xf0, 0x86, 0xd9, 0xaf, 0x35, 0x43, 0x8b, 0xfd, 0x67, 0x11,
+ 0x4e, 0x38, 0xa2, 0xd4, 0x00, 0x77, 0xee, 0x99, 0xc1, 0xb6,
+ 0x2f, 0x58, 0x9f, 0xe8, 0x71, 0x06, 0x5e, 0x29, 0xb0, 0xc7,
+ 0x23, 0x54, 0xcd, 0xba, 0xe2, 0x95, 0x0c, 0x7b, 0xbc, 0xcb,
+ 0x52, 0x25, 0x7d, 0x0a, 0x93, 0xe4, 0x46, 0x31, 0xa8, 0xdf,
+ 0x87, 0xf0, 0x69, 0x1e, 0xd9, 0xae, 0x37, 0x40, 0x18, 0x6f,
+ 0xf6, 0x81, 0x65, 0x12, 0x8b, 0xfc, 0xa4, 0xd3, 0x4a, 0x3d,
+ 0xfa, 0x8d, 0x14, 0x63, 0x3b, 0x4c, 0xd5, 0xa2, 0x8c, 0xfb,
+ 0x62, 0x15, 0x4d, 0x3a, 0xa3, 0xd4, 0x13, 0x64, 0xfd, 0x8a,
+ 0xd2, 0xa5, 0x3c, 0x4b, 0xaf, 0xd8, 0x41, 0x36, 0x6e, 0x19,
+ 0x80, 0xf7, 0x30, 0x47, 0xde, 0xa9, 0xf1, 0x86, 0x1f, 0x68,
+ 0xca, 0xbd, 0x24, 0x53, 0x0b, 0x7c, 0xe5, 0x92, 0x55, 0x22,
+ 0xbb, 0xcc, 0x94, 0xe3, 0x7a, 0x0d, 0xe9, 0x9e, 0x07, 0x70,
+ 0x28, 0x5f, 0xc6, 0xb1, 0x76, 0x01, 0x98, 0xef, 0xb7, 0xc0,
+ 0x59, 0x2e, 0x05, 0x72, 0xeb, 0x9c, 0xc4, 0xb3, 0x2a, 0x5d,
+ 0x9a, 0xed, 0x74, 0x03, 0x5b, 0x2c, 0xb5, 0xc2, 0x26, 0x51,
+ 0xc8, 0xbf, 0xe7, 0x90, 0x09, 0x7e, 0xb9, 0xce, 0x57, 0x20,
+ 0x78, 0x0f, 0x96, 0xe1, 0x43, 0x34, 0xad, 0xda, 0x82, 0xf5,
+ 0x6c, 0x1b, 0xdc, 0xab, 0x32, 0x45, 0x1d, 0x6a, 0xf3, 0x84,
+ 0x60, 0x17, 0x8e, 0xf9, 0xa1, 0xd6, 0x4f, 0x38, 0xff, 0x88,
+ 0x11, 0x66, 0x3e, 0x49, 0xd0, 0xa7, 0x89, 0xfe, 0x67, 0x10,
+ 0x48, 0x3f, 0xa6, 0xd1, 0x16, 0x61, 0xf8, 0x8f, 0xd7, 0xa0,
+ 0x39, 0x4e, 0xaa, 0xdd, 0x44, 0x33, 0x6b, 0x1c, 0x85, 0xf2,
+ 0x35, 0x42, 0xdb, 0xac, 0xf4, 0x83, 0x1a, 0x6d, 0xcf, 0xb8,
+ 0x21, 0x56, 0x0e, 0x79, 0xe0, 0x97, 0x50, 0x27, 0xbe, 0xc9,
+ 0x91, 0xe6, 0x7f, 0x08, 0xec, 0x9b, 0x02, 0x75, 0x2d, 0x5a,
+ 0xc3, 0xb4, 0x73, 0x04, 0x9d, 0xea, 0xb2, 0xc5, 0x5c, 0x2b,
+ 0x00, 0x78, 0xf0, 0x88, 0xfd, 0x85, 0x0d, 0x75, 0xe7, 0x9f,
+ 0x17, 0x6f, 0x1a, 0x62, 0xea, 0x92, 0xd3, 0xab, 0x23, 0x5b,
+ 0x2e, 0x56, 0xde, 0xa6, 0x34, 0x4c, 0xc4, 0xbc, 0xc9, 0xb1,
+ 0x39, 0x41, 0xbb, 0xc3, 0x4b, 0x33, 0x46, 0x3e, 0xb6, 0xce,
+ 0x5c, 0x24, 0xac, 0xd4, 0xa1, 0xd9, 0x51, 0x29, 0x68, 0x10,
+ 0x98, 0xe0, 0x95, 0xed, 0x65, 0x1d, 0x8f, 0xf7, 0x7f, 0x07,
+ 0x72, 0x0a, 0x82, 0xfa, 0x6b, 0x13, 0x9b, 0xe3, 0x96, 0xee,
+ 0x66, 0x1e, 0x8c, 0xf4, 0x7c, 0x04, 0x71, 0x09, 0x81, 0xf9,
+ 0xb8, 0xc0, 0x48, 0x30, 0x45, 0x3d, 0xb5, 0xcd, 0x5f, 0x27,
+ 0xaf, 0xd7, 0xa2, 0xda, 0x52, 0x2a, 0xd0, 0xa8, 0x20, 0x58,
+ 0x2d, 0x55, 0xdd, 0xa5, 0x37, 0x4f, 0xc7, 0xbf, 0xca, 0xb2,
+ 0x3a, 0x42, 0x03, 0x7b, 0xf3, 0x8b, 0xfe, 0x86, 0x0e, 0x76,
+ 0xe4, 0x9c, 0x14, 0x6c, 0x19, 0x61, 0xe9, 0x91, 0xd6, 0xae,
+ 0x26, 0x5e, 0x2b, 0x53, 0xdb, 0xa3, 0x31, 0x49, 0xc1, 0xb9,
+ 0xcc, 0xb4, 0x3c, 0x44, 0x05, 0x7d, 0xf5, 0x8d, 0xf8, 0x80,
+ 0x08, 0x70, 0xe2, 0x9a, 0x12, 0x6a, 0x1f, 0x67, 0xef, 0x97,
+ 0x6d, 0x15, 0x9d, 0xe5, 0x90, 0xe8, 0x60, 0x18, 0x8a, 0xf2,
+ 0x7a, 0x02, 0x77, 0x0f, 0x87, 0xff, 0xbe, 0xc6, 0x4e, 0x36,
+ 0x43, 0x3b, 0xb3, 0xcb, 0x59, 0x21, 0xa9, 0xd1, 0xa4, 0xdc,
+ 0x54, 0x2c, 0xbd, 0xc5, 0x4d, 0x35, 0x40, 0x38, 0xb0, 0xc8,
+ 0x5a, 0x22, 0xaa, 0xd2, 0xa7, 0xdf, 0x57, 0x2f, 0x6e, 0x16,
+ 0x9e, 0xe6, 0x93, 0xeb, 0x63, 0x1b, 0x89, 0xf1, 0x79, 0x01,
+ 0x74, 0x0c, 0x84, 0xfc, 0x06, 0x7e, 0xf6, 0x8e, 0xfb, 0x83,
+ 0x0b, 0x73, 0xe1, 0x99, 0x11, 0x69, 0x1c, 0x64, 0xec, 0x94,
+ 0xd5, 0xad, 0x25, 0x5d, 0x28, 0x50, 0xd8, 0xa0, 0x32, 0x4a,
+ 0xc2, 0xba, 0xcf, 0xb7, 0x3f, 0x47, 0x00, 0x79, 0xf2, 0x8b,
+ 0xf9, 0x80, 0x0b, 0x72, 0xef, 0x96, 0x1d, 0x64, 0x16, 0x6f,
+ 0xe4, 0x9d, 0xc3, 0xba, 0x31, 0x48, 0x3a, 0x43, 0xc8, 0xb1,
+ 0x2c, 0x55, 0xde, 0xa7, 0xd5, 0xac, 0x27, 0x5e, 0x9b, 0xe2,
+ 0x69, 0x10, 0x62, 0x1b, 0x90, 0xe9, 0x74, 0x0d, 0x86, 0xff,
+ 0x8d, 0xf4, 0x7f, 0x06, 0x58, 0x21, 0xaa, 0xd3, 0xa1, 0xd8,
+ 0x53, 0x2a, 0xb7, 0xce, 0x45, 0x3c, 0x4e, 0x37, 0xbc, 0xc5,
+ 0x2b, 0x52, 0xd9, 0xa0, 0xd2, 0xab, 0x20, 0x59, 0xc4, 0xbd,
+ 0x36, 0x4f, 0x3d, 0x44, 0xcf, 0xb6, 0xe8, 0x91, 0x1a, 0x63,
+ 0x11, 0x68, 0xe3, 0x9a, 0x07, 0x7e, 0xf5, 0x8c, 0xfe, 0x87,
+ 0x0c, 0x75, 0xb0, 0xc9, 0x42, 0x3b, 0x49, 0x30, 0xbb, 0xc2,
+ 0x5f, 0x26, 0xad, 0xd4, 0xa6, 0xdf, 0x54, 0x2d, 0x73, 0x0a,
+ 0x81, 0xf8, 0x8a, 0xf3, 0x78, 0x01, 0x9c, 0xe5, 0x6e, 0x17,
+ 0x65, 0x1c, 0x97, 0xee, 0x56, 0x2f, 0xa4, 0xdd, 0xaf, 0xd6,
+ 0x5d, 0x24, 0xb9, 0xc0, 0x4b, 0x32, 0x40, 0x39, 0xb2, 0xcb,
+ 0x95, 0xec, 0x67, 0x1e, 0x6c, 0x15, 0x9e, 0xe7, 0x7a, 0x03,
+ 0x88, 0xf1, 0x83, 0xfa, 0x71, 0x08, 0xcd, 0xb4, 0x3f, 0x46,
+ 0x34, 0x4d, 0xc6, 0xbf, 0x22, 0x5b, 0xd0, 0xa9, 0xdb, 0xa2,
+ 0x29, 0x50, 0x0e, 0x77, 0xfc, 0x85, 0xf7, 0x8e, 0x05, 0x7c,
+ 0xe1, 0x98, 0x13, 0x6a, 0x18, 0x61, 0xea, 0x93, 0x7d, 0x04,
+ 0x8f, 0xf6, 0x84, 0xfd, 0x76, 0x0f, 0x92, 0xeb, 0x60, 0x19,
+ 0x6b, 0x12, 0x99, 0xe0, 0xbe, 0xc7, 0x4c, 0x35, 0x47, 0x3e,
+ 0xb5, 0xcc, 0x51, 0x28, 0xa3, 0xda, 0xa8, 0xd1, 0x5a, 0x23,
+ 0xe6, 0x9f, 0x14, 0x6d, 0x1f, 0x66, 0xed, 0x94, 0x09, 0x70,
+ 0xfb, 0x82, 0xf0, 0x89, 0x02, 0x7b, 0x25, 0x5c, 0xd7, 0xae,
+ 0xdc, 0xa5, 0x2e, 0x57, 0xca, 0xb3, 0x38, 0x41, 0x33, 0x4a,
+ 0xc1, 0xb8, 0x00, 0x7a, 0xf4, 0x8e, 0xf5, 0x8f, 0x01, 0x7b,
+ 0xf7, 0x8d, 0x03, 0x79, 0x02, 0x78, 0xf6, 0x8c, 0xf3, 0x89,
+ 0x07, 0x7d, 0x06, 0x7c, 0xf2, 0x88, 0x04, 0x7e, 0xf0, 0x8a,
+ 0xf1, 0x8b, 0x05, 0x7f, 0xfb, 0x81, 0x0f, 0x75, 0x0e, 0x74,
+ 0xfa, 0x80, 0x0c, 0x76, 0xf8, 0x82, 0xf9, 0x83, 0x0d, 0x77,
+ 0x08, 0x72, 0xfc, 0x86, 0xfd, 0x87, 0x09, 0x73, 0xff, 0x85,
+ 0x0b, 0x71, 0x0a, 0x70, 0xfe, 0x84, 0xeb, 0x91, 0x1f, 0x65,
+ 0x1e, 0x64, 0xea, 0x90, 0x1c, 0x66, 0xe8, 0x92, 0xe9, 0x93,
+ 0x1d, 0x67, 0x18, 0x62, 0xec, 0x96, 0xed, 0x97, 0x19, 0x63,
+ 0xef, 0x95, 0x1b, 0x61, 0x1a, 0x60, 0xee, 0x94, 0x10, 0x6a,
+ 0xe4, 0x9e, 0xe5, 0x9f, 0x11, 0x6b, 0xe7, 0x9d, 0x13, 0x69,
+ 0x12, 0x68, 0xe6, 0x9c, 0xe3, 0x99, 0x17, 0x6d, 0x16, 0x6c,
+ 0xe2, 0x98, 0x14, 0x6e, 0xe0, 0x9a, 0xe1, 0x9b, 0x15, 0x6f,
+ 0xcb, 0xb1, 0x3f, 0x45, 0x3e, 0x44, 0xca, 0xb0, 0x3c, 0x46,
+ 0xc8, 0xb2, 0xc9, 0xb3, 0x3d, 0x47, 0x38, 0x42, 0xcc, 0xb6,
+ 0xcd, 0xb7, 0x39, 0x43, 0xcf, 0xb5, 0x3b, 0x41, 0x3a, 0x40,
+ 0xce, 0xb4, 0x30, 0x4a, 0xc4, 0xbe, 0xc5, 0xbf, 0x31, 0x4b,
+ 0xc7, 0xbd, 0x33, 0x49, 0x32, 0x48, 0xc6, 0xbc, 0xc3, 0xb9,
+ 0x37, 0x4d, 0x36, 0x4c, 0xc2, 0xb8, 0x34, 0x4e, 0xc0, 0xba,
+ 0xc1, 0xbb, 0x35, 0x4f, 0x20, 0x5a, 0xd4, 0xae, 0xd5, 0xaf,
+ 0x21, 0x5b, 0xd7, 0xad, 0x23, 0x59, 0x22, 0x58, 0xd6, 0xac,
+ 0xd3, 0xa9, 0x27, 0x5d, 0x26, 0x5c, 0xd2, 0xa8, 0x24, 0x5e,
+ 0xd0, 0xaa, 0xd1, 0xab, 0x25, 0x5f, 0xdb, 0xa1, 0x2f, 0x55,
+ 0x2e, 0x54, 0xda, 0xa0, 0x2c, 0x56, 0xd8, 0xa2, 0xd9, 0xa3,
+ 0x2d, 0x57, 0x28, 0x52, 0xdc, 0xa6, 0xdd, 0xa7, 0x29, 0x53,
+ 0xdf, 0xa5, 0x2b, 0x51, 0x2a, 0x50, 0xde, 0xa4, 0x00, 0x7b,
+ 0xf6, 0x8d, 0xf1, 0x8a, 0x07, 0x7c, 0xff, 0x84, 0x09, 0x72,
+ 0x0e, 0x75, 0xf8, 0x83, 0xe3, 0x98, 0x15, 0x6e, 0x12, 0x69,
+ 0xe4, 0x9f, 0x1c, 0x67, 0xea, 0x91, 0xed, 0x96, 0x1b, 0x60,
+ 0xdb, 0xa0, 0x2d, 0x56, 0x2a, 0x51, 0xdc, 0xa7, 0x24, 0x5f,
+ 0xd2, 0xa9, 0xd5, 0xae, 0x23, 0x58, 0x38, 0x43, 0xce, 0xb5,
+ 0xc9, 0xb2, 0x3f, 0x44, 0xc7, 0xbc, 0x31, 0x4a, 0x36, 0x4d,
+ 0xc0, 0xbb, 0xab, 0xd0, 0x5d, 0x26, 0x5a, 0x21, 0xac, 0xd7,
+ 0x54, 0x2f, 0xa2, 0xd9, 0xa5, 0xde, 0x53, 0x28, 0x48, 0x33,
+ 0xbe, 0xc5, 0xb9, 0xc2, 0x4f, 0x34, 0xb7, 0xcc, 0x41, 0x3a,
+ 0x46, 0x3d, 0xb0, 0xcb, 0x70, 0x0b, 0x86, 0xfd, 0x81, 0xfa,
+ 0x77, 0x0c, 0x8f, 0xf4, 0x79, 0x02, 0x7e, 0x05, 0x88, 0xf3,
+ 0x93, 0xe8, 0x65, 0x1e, 0x62, 0x19, 0x94, 0xef, 0x6c, 0x17,
+ 0x9a, 0xe1, 0x9d, 0xe6, 0x6b, 0x10, 0x4b, 0x30, 0xbd, 0xc6,
+ 0xba, 0xc1, 0x4c, 0x37, 0xb4, 0xcf, 0x42, 0x39, 0x45, 0x3e,
+ 0xb3, 0xc8, 0xa8, 0xd3, 0x5e, 0x25, 0x59, 0x22, 0xaf, 0xd4,
+ 0x57, 0x2c, 0xa1, 0xda, 0xa6, 0xdd, 0x50, 0x2b, 0x90, 0xeb,
+ 0x66, 0x1d, 0x61, 0x1a, 0x97, 0xec, 0x6f, 0x14, 0x99, 0xe2,
+ 0x9e, 0xe5, 0x68, 0x13, 0x73, 0x08, 0x85, 0xfe, 0x82, 0xf9,
+ 0x74, 0x0f, 0x8c, 0xf7, 0x7a, 0x01, 0x7d, 0x06, 0x8b, 0xf0,
+ 0xe0, 0x9b, 0x16, 0x6d, 0x11, 0x6a, 0xe7, 0x9c, 0x1f, 0x64,
+ 0xe9, 0x92, 0xee, 0x95, 0x18, 0x63, 0x03, 0x78, 0xf5, 0x8e,
+ 0xf2, 0x89, 0x04, 0x7f, 0xfc, 0x87, 0x0a, 0x71, 0x0d, 0x76,
+ 0xfb, 0x80, 0x3b, 0x40, 0xcd, 0xb6, 0xca, 0xb1, 0x3c, 0x47,
+ 0xc4, 0xbf, 0x32, 0x49, 0x35, 0x4e, 0xc3, 0xb8, 0xd8, 0xa3,
+ 0x2e, 0x55, 0x29, 0x52, 0xdf, 0xa4, 0x27, 0x5c, 0xd1, 0xaa,
+ 0xd6, 0xad, 0x20, 0x5b, 0x00, 0x7c, 0xf8, 0x84, 0xed, 0x91,
+ 0x15, 0x69, 0xc7, 0xbb, 0x3f, 0x43, 0x2a, 0x56, 0xd2, 0xae,
+ 0x93, 0xef, 0x6b, 0x17, 0x7e, 0x02, 0x86, 0xfa, 0x54, 0x28,
+ 0xac, 0xd0, 0xb9, 0xc5, 0x41, 0x3d, 0x3b, 0x47, 0xc3, 0xbf,
+ 0xd6, 0xaa, 0x2e, 0x52, 0xfc, 0x80, 0x04, 0x78, 0x11, 0x6d,
+ 0xe9, 0x95, 0xa8, 0xd4, 0x50, 0x2c, 0x45, 0x39, 0xbd, 0xc1,
+ 0x6f, 0x13, 0x97, 0xeb, 0x82, 0xfe, 0x7a, 0x06, 0x76, 0x0a,
+ 0x8e, 0xf2, 0x9b, 0xe7, 0x63, 0x1f, 0xb1, 0xcd, 0x49, 0x35,
+ 0x5c, 0x20, 0xa4, 0xd8, 0xe5, 0x99, 0x1d, 0x61, 0x08, 0x74,
+ 0xf0, 0x8c, 0x22, 0x5e, 0xda, 0xa6, 0xcf, 0xb3, 0x37, 0x4b,
+ 0x4d, 0x31, 0xb5, 0xc9, 0xa0, 0xdc, 0x58, 0x24, 0x8a, 0xf6,
+ 0x72, 0x0e, 0x67, 0x1b, 0x9f, 0xe3, 0xde, 0xa2, 0x26, 0x5a,
+ 0x33, 0x4f, 0xcb, 0xb7, 0x19, 0x65, 0xe1, 0x9d, 0xf4, 0x88,
+ 0x0c, 0x70, 0xec, 0x90, 0x14, 0x68, 0x01, 0x7d, 0xf9, 0x85,
+ 0x2b, 0x57, 0xd3, 0xaf, 0xc6, 0xba, 0x3e, 0x42, 0x7f, 0x03,
+ 0x87, 0xfb, 0x92, 0xee, 0x6a, 0x16, 0xb8, 0xc4, 0x40, 0x3c,
+ 0x55, 0x29, 0xad, 0xd1, 0xd7, 0xab, 0x2f, 0x53, 0x3a, 0x46,
+ 0xc2, 0xbe, 0x10, 0x6c, 0xe8, 0x94, 0xfd, 0x81, 0x05, 0x79,
+ 0x44, 0x38, 0xbc, 0xc0, 0xa9, 0xd5, 0x51, 0x2d, 0x83, 0xff,
+ 0x7b, 0x07, 0x6e, 0x12, 0x96, 0xea, 0x9a, 0xe6, 0x62, 0x1e,
+ 0x77, 0x0b, 0x8f, 0xf3, 0x5d, 0x21, 0xa5, 0xd9, 0xb0, 0xcc,
+ 0x48, 0x34, 0x09, 0x75, 0xf1, 0x8d, 0xe4, 0x98, 0x1c, 0x60,
+ 0xce, 0xb2, 0x36, 0x4a, 0x23, 0x5f, 0xdb, 0xa7, 0xa1, 0xdd,
+ 0x59, 0x25, 0x4c, 0x30, 0xb4, 0xc8, 0x66, 0x1a, 0x9e, 0xe2,
+ 0x8b, 0xf7, 0x73, 0x0f, 0x32, 0x4e, 0xca, 0xb6, 0xdf, 0xa3,
+ 0x27, 0x5b, 0xf5, 0x89, 0x0d, 0x71, 0x18, 0x64, 0xe0, 0x9c,
+ 0x00, 0x7d, 0xfa, 0x87, 0xe9, 0x94, 0x13, 0x6e, 0xcf, 0xb2,
+ 0x35, 0x48, 0x26, 0x5b, 0xdc, 0xa1, 0x83, 0xfe, 0x79, 0x04,
+ 0x6a, 0x17, 0x90, 0xed, 0x4c, 0x31, 0xb6, 0xcb, 0xa5, 0xd8,
+ 0x5f, 0x22, 0x1b, 0x66, 0xe1, 0x9c, 0xf2, 0x8f, 0x08, 0x75,
+ 0xd4, 0xa9, 0x2e, 0x53, 0x3d, 0x40, 0xc7, 0xba, 0x98, 0xe5,
+ 0x62, 0x1f, 0x71, 0x0c, 0x8b, 0xf6, 0x57, 0x2a, 0xad, 0xd0,
+ 0xbe, 0xc3, 0x44, 0x39, 0x36, 0x4b, 0xcc, 0xb1, 0xdf, 0xa2,
+ 0x25, 0x58, 0xf9, 0x84, 0x03, 0x7e, 0x10, 0x6d, 0xea, 0x97,
+ 0xb5, 0xc8, 0x4f, 0x32, 0x5c, 0x21, 0xa6, 0xdb, 0x7a, 0x07,
+ 0x80, 0xfd, 0x93, 0xee, 0x69, 0x14, 0x2d, 0x50, 0xd7, 0xaa,
+ 0xc4, 0xb9, 0x3e, 0x43, 0xe2, 0x9f, 0x18, 0x65, 0x0b, 0x76,
+ 0xf1, 0x8c, 0xae, 0xd3, 0x54, 0x29, 0x47, 0x3a, 0xbd, 0xc0,
+ 0x61, 0x1c, 0x9b, 0xe6, 0x88, 0xf5, 0x72, 0x0f, 0x6c, 0x11,
+ 0x96, 0xeb, 0x85, 0xf8, 0x7f, 0x02, 0xa3, 0xde, 0x59, 0x24,
+ 0x4a, 0x37, 0xb0, 0xcd, 0xef, 0x92, 0x15, 0x68, 0x06, 0x7b,
+ 0xfc, 0x81, 0x20, 0x5d, 0xda, 0xa7, 0xc9, 0xb4, 0x33, 0x4e,
+ 0x77, 0x0a, 0x8d, 0xf0, 0x9e, 0xe3, 0x64, 0x19, 0xb8, 0xc5,
+ 0x42, 0x3f, 0x51, 0x2c, 0xab, 0xd6, 0xf4, 0x89, 0x0e, 0x73,
+ 0x1d, 0x60, 0xe7, 0x9a, 0x3b, 0x46, 0xc1, 0xbc, 0xd2, 0xaf,
+ 0x28, 0x55, 0x5a, 0x27, 0xa0, 0xdd, 0xb3, 0xce, 0x49, 0x34,
+ 0x95, 0xe8, 0x6f, 0x12, 0x7c, 0x01, 0x86, 0xfb, 0xd9, 0xa4,
+ 0x23, 0x5e, 0x30, 0x4d, 0xca, 0xb7, 0x16, 0x6b, 0xec, 0x91,
+ 0xff, 0x82, 0x05, 0x78, 0x41, 0x3c, 0xbb, 0xc6, 0xa8, 0xd5,
+ 0x52, 0x2f, 0x8e, 0xf3, 0x74, 0x09, 0x67, 0x1a, 0x9d, 0xe0,
+ 0xc2, 0xbf, 0x38, 0x45, 0x2b, 0x56, 0xd1, 0xac, 0x0d, 0x70,
+ 0xf7, 0x8a, 0xe4, 0x99, 0x1e, 0x63, 0x00, 0x7e, 0xfc, 0x82,
+ 0xe5, 0x9b, 0x19, 0x67, 0xd7, 0xa9, 0x2b, 0x55, 0x32, 0x4c,
+ 0xce, 0xb0, 0xb3, 0xcd, 0x4f, 0x31, 0x56, 0x28, 0xaa, 0xd4,
+ 0x64, 0x1a, 0x98, 0xe6, 0x81, 0xff, 0x7d, 0x03, 0x7b, 0x05,
+ 0x87, 0xf9, 0x9e, 0xe0, 0x62, 0x1c, 0xac, 0xd2, 0x50, 0x2e,
+ 0x49, 0x37, 0xb5, 0xcb, 0xc8, 0xb6, 0x34, 0x4a, 0x2d, 0x53,
+ 0xd1, 0xaf, 0x1f, 0x61, 0xe3, 0x9d, 0xfa, 0x84, 0x06, 0x78,
+ 0xf6, 0x88, 0x0a, 0x74, 0x13, 0x6d, 0xef, 0x91, 0x21, 0x5f,
+ 0xdd, 0xa3, 0xc4, 0xba, 0x38, 0x46, 0x45, 0x3b, 0xb9, 0xc7,
+ 0xa0, 0xde, 0x5c, 0x22, 0x92, 0xec, 0x6e, 0x10, 0x77, 0x09,
+ 0x8b, 0xf5, 0x8d, 0xf3, 0x71, 0x0f, 0x68, 0x16, 0x94, 0xea,
+ 0x5a, 0x24, 0xa6, 0xd8, 0xbf, 0xc1, 0x43, 0x3d, 0x3e, 0x40,
+ 0xc2, 0xbc, 0xdb, 0xa5, 0x27, 0x59, 0xe9, 0x97, 0x15, 0x6b,
+ 0x0c, 0x72, 0xf0, 0x8e, 0xf1, 0x8f, 0x0d, 0x73, 0x14, 0x6a,
+ 0xe8, 0x96, 0x26, 0x58, 0xda, 0xa4, 0xc3, 0xbd, 0x3f, 0x41,
+ 0x42, 0x3c, 0xbe, 0xc0, 0xa7, 0xd9, 0x5b, 0x25, 0x95, 0xeb,
+ 0x69, 0x17, 0x70, 0x0e, 0x8c, 0xf2, 0x8a, 0xf4, 0x76, 0x08,
+ 0x6f, 0x11, 0x93, 0xed, 0x5d, 0x23, 0xa1, 0xdf, 0xb8, 0xc6,
+ 0x44, 0x3a, 0x39, 0x47, 0xc5, 0xbb, 0xdc, 0xa2, 0x20, 0x5e,
+ 0xee, 0x90, 0x12, 0x6c, 0x0b, 0x75, 0xf7, 0x89, 0x07, 0x79,
+ 0xfb, 0x85, 0xe2, 0x9c, 0x1e, 0x60, 0xd0, 0xae, 0x2c, 0x52,
+ 0x35, 0x4b, 0xc9, 0xb7, 0xb4, 0xca, 0x48, 0x36, 0x51, 0x2f,
+ 0xad, 0xd3, 0x63, 0x1d, 0x9f, 0xe1, 0x86, 0xf8, 0x7a, 0x04,
+ 0x7c, 0x02, 0x80, 0xfe, 0x99, 0xe7, 0x65, 0x1b, 0xab, 0xd5,
+ 0x57, 0x29, 0x4e, 0x30, 0xb2, 0xcc, 0xcf, 0xb1, 0x33, 0x4d,
+ 0x2a, 0x54, 0xd6, 0xa8, 0x18, 0x66, 0xe4, 0x9a, 0xfd, 0x83,
+ 0x01, 0x7f, 0x00, 0x7f, 0xfe, 0x81, 0xe1, 0x9e, 0x1f, 0x60,
+ 0xdf, 0xa0, 0x21, 0x5e, 0x3e, 0x41, 0xc0, 0xbf, 0xa3, 0xdc,
+ 0x5d, 0x22, 0x42, 0x3d, 0xbc, 0xc3, 0x7c, 0x03, 0x82, 0xfd,
+ 0x9d, 0xe2, 0x63, 0x1c, 0x5b, 0x24, 0xa5, 0xda, 0xba, 0xc5,
+ 0x44, 0x3b, 0x84, 0xfb, 0x7a, 0x05, 0x65, 0x1a, 0x9b, 0xe4,
+ 0xf8, 0x87, 0x06, 0x79, 0x19, 0x66, 0xe7, 0x98, 0x27, 0x58,
+ 0xd9, 0xa6, 0xc6, 0xb9, 0x38, 0x47, 0xb6, 0xc9, 0x48, 0x37,
+ 0x57, 0x28, 0xa9, 0xd6, 0x69, 0x16, 0x97, 0xe8, 0x88, 0xf7,
+ 0x76, 0x09, 0x15, 0x6a, 0xeb, 0x94, 0xf4, 0x8b, 0x0a, 0x75,
+ 0xca, 0xb5, 0x34, 0x4b, 0x2b, 0x54, 0xd5, 0xaa, 0xed, 0x92,
+ 0x13, 0x6c, 0x0c, 0x73, 0xf2, 0x8d, 0x32, 0x4d, 0xcc, 0xb3,
+ 0xd3, 0xac, 0x2d, 0x52, 0x4e, 0x31, 0xb0, 0xcf, 0xaf, 0xd0,
+ 0x51, 0x2e, 0x91, 0xee, 0x6f, 0x10, 0x70, 0x0f, 0x8e, 0xf1,
+ 0x71, 0x0e, 0x8f, 0xf0, 0x90, 0xef, 0x6e, 0x11, 0xae, 0xd1,
+ 0x50, 0x2f, 0x4f, 0x30, 0xb1, 0xce, 0xd2, 0xad, 0x2c, 0x53,
+ 0x33, 0x4c, 0xcd, 0xb2, 0x0d, 0x72, 0xf3, 0x8c, 0xec, 0x93,
+ 0x12, 0x6d, 0x2a, 0x55, 0xd4, 0xab, 0xcb, 0xb4, 0x35, 0x4a,
+ 0xf5, 0x8a, 0x0b, 0x74, 0x14, 0x6b, 0xea, 0x95, 0x89, 0xf6,
+ 0x77, 0x08, 0x68, 0x17, 0x96, 0xe9, 0x56, 0x29, 0xa8, 0xd7,
+ 0xb7, 0xc8, 0x49, 0x36, 0xc7, 0xb8, 0x39, 0x46, 0x26, 0x59,
+ 0xd8, 0xa7, 0x18, 0x67, 0xe6, 0x99, 0xf9, 0x86, 0x07, 0x78,
+ 0x64, 0x1b, 0x9a, 0xe5, 0x85, 0xfa, 0x7b, 0x04, 0xbb, 0xc4,
+ 0x45, 0x3a, 0x5a, 0x25, 0xa4, 0xdb, 0x9c, 0xe3, 0x62, 0x1d,
+ 0x7d, 0x02, 0x83, 0xfc, 0x43, 0x3c, 0xbd, 0xc2, 0xa2, 0xdd,
+ 0x5c, 0x23, 0x3f, 0x40, 0xc1, 0xbe, 0xde, 0xa1, 0x20, 0x5f,
+ 0xe0, 0x9f, 0x1e, 0x61, 0x01, 0x7e, 0xff, 0x80, 0x00, 0x80,
+ 0x1d, 0x9d, 0x3a, 0xba, 0x27, 0xa7, 0x74, 0xf4, 0x69, 0xe9,
+ 0x4e, 0xce, 0x53, 0xd3, 0xe8, 0x68, 0xf5, 0x75, 0xd2, 0x52,
+ 0xcf, 0x4f, 0x9c, 0x1c, 0x81, 0x01, 0xa6, 0x26, 0xbb, 0x3b,
+ 0xcd, 0x4d, 0xd0, 0x50, 0xf7, 0x77, 0xea, 0x6a, 0xb9, 0x39,
+ 0xa4, 0x24, 0x83, 0x03, 0x9e, 0x1e, 0x25, 0xa5, 0x38, 0xb8,
+ 0x1f, 0x9f, 0x02, 0x82, 0x51, 0xd1, 0x4c, 0xcc, 0x6b, 0xeb,
+ 0x76, 0xf6, 0x87, 0x07, 0x9a, 0x1a, 0xbd, 0x3d, 0xa0, 0x20,
+ 0xf3, 0x73, 0xee, 0x6e, 0xc9, 0x49, 0xd4, 0x54, 0x6f, 0xef,
+ 0x72, 0xf2, 0x55, 0xd5, 0x48, 0xc8, 0x1b, 0x9b, 0x06, 0x86,
+ 0x21, 0xa1, 0x3c, 0xbc, 0x4a, 0xca, 0x57, 0xd7, 0x70, 0xf0,
+ 0x6d, 0xed, 0x3e, 0xbe, 0x23, 0xa3, 0x04, 0x84, 0x19, 0x99,
+ 0xa2, 0x22, 0xbf, 0x3f, 0x98, 0x18, 0x85, 0x05, 0xd6, 0x56,
+ 0xcb, 0x4b, 0xec, 0x6c, 0xf1, 0x71, 0x13, 0x93, 0x0e, 0x8e,
+ 0x29, 0xa9, 0x34, 0xb4, 0x67, 0xe7, 0x7a, 0xfa, 0x5d, 0xdd,
+ 0x40, 0xc0, 0xfb, 0x7b, 0xe6, 0x66, 0xc1, 0x41, 0xdc, 0x5c,
+ 0x8f, 0x0f, 0x92, 0x12, 0xb5, 0x35, 0xa8, 0x28, 0xde, 0x5e,
+ 0xc3, 0x43, 0xe4, 0x64, 0xf9, 0x79, 0xaa, 0x2a, 0xb7, 0x37,
+ 0x90, 0x10, 0x8d, 0x0d, 0x36, 0xb6, 0x2b, 0xab, 0x0c, 0x8c,
+ 0x11, 0x91, 0x42, 0xc2, 0x5f, 0xdf, 0x78, 0xf8, 0x65, 0xe5,
+ 0x94, 0x14, 0x89, 0x09, 0xae, 0x2e, 0xb3, 0x33, 0xe0, 0x60,
+ 0xfd, 0x7d, 0xda, 0x5a, 0xc7, 0x47, 0x7c, 0xfc, 0x61, 0xe1,
+ 0x46, 0xc6, 0x5b, 0xdb, 0x08, 0x88, 0x15, 0x95, 0x32, 0xb2,
+ 0x2f, 0xaf, 0x59, 0xd9, 0x44, 0xc4, 0x63, 0xe3, 0x7e, 0xfe,
+ 0x2d, 0xad, 0x30, 0xb0, 0x17, 0x97, 0x0a, 0x8a, 0xb1, 0x31,
+ 0xac, 0x2c, 0x8b, 0x0b, 0x96, 0x16, 0xc5, 0x45, 0xd8, 0x58,
+ 0xff, 0x7f, 0xe2, 0x62, 0x00, 0x81, 0x1f, 0x9e, 0x3e, 0xbf,
+ 0x21, 0xa0, 0x7c, 0xfd, 0x63, 0xe2, 0x42, 0xc3, 0x5d, 0xdc,
+ 0xf8, 0x79, 0xe7, 0x66, 0xc6, 0x47, 0xd9, 0x58, 0x84, 0x05,
+ 0x9b, 0x1a, 0xba, 0x3b, 0xa5, 0x24, 0xed, 0x6c, 0xf2, 0x73,
+ 0xd3, 0x52, 0xcc, 0x4d, 0x91, 0x10, 0x8e, 0x0f, 0xaf, 0x2e,
+ 0xb0, 0x31, 0x15, 0x94, 0x0a, 0x8b, 0x2b, 0xaa, 0x34, 0xb5,
+ 0x69, 0xe8, 0x76, 0xf7, 0x57, 0xd6, 0x48, 0xc9, 0xc7, 0x46,
+ 0xd8, 0x59, 0xf9, 0x78, 0xe6, 0x67, 0xbb, 0x3a, 0xa4, 0x25,
+ 0x85, 0x04, 0x9a, 0x1b, 0x3f, 0xbe, 0x20, 0xa1, 0x01, 0x80,
+ 0x1e, 0x9f, 0x43, 0xc2, 0x5c, 0xdd, 0x7d, 0xfc, 0x62, 0xe3,
+ 0x2a, 0xab, 0x35, 0xb4, 0x14, 0x95, 0x0b, 0x8a, 0x56, 0xd7,
+ 0x49, 0xc8, 0x68, 0xe9, 0x77, 0xf6, 0xd2, 0x53, 0xcd, 0x4c,
+ 0xec, 0x6d, 0xf3, 0x72, 0xae, 0x2f, 0xb1, 0x30, 0x90, 0x11,
+ 0x8f, 0x0e, 0x93, 0x12, 0x8c, 0x0d, 0xad, 0x2c, 0xb2, 0x33,
+ 0xef, 0x6e, 0xf0, 0x71, 0xd1, 0x50, 0xce, 0x4f, 0x6b, 0xea,
+ 0x74, 0xf5, 0x55, 0xd4, 0x4a, 0xcb, 0x17, 0x96, 0x08, 0x89,
+ 0x29, 0xa8, 0x36, 0xb7, 0x7e, 0xff, 0x61, 0xe0, 0x40, 0xc1,
+ 0x5f, 0xde, 0x02, 0x83, 0x1d, 0x9c, 0x3c, 0xbd, 0x23, 0xa2,
+ 0x86, 0x07, 0x99, 0x18, 0xb8, 0x39, 0xa7, 0x26, 0xfa, 0x7b,
+ 0xe5, 0x64, 0xc4, 0x45, 0xdb, 0x5a, 0x54, 0xd5, 0x4b, 0xca,
+ 0x6a, 0xeb, 0x75, 0xf4, 0x28, 0xa9, 0x37, 0xb6, 0x16, 0x97,
+ 0x09, 0x88, 0xac, 0x2d, 0xb3, 0x32, 0x92, 0x13, 0x8d, 0x0c,
+ 0xd0, 0x51, 0xcf, 0x4e, 0xee, 0x6f, 0xf1, 0x70, 0xb9, 0x38,
+ 0xa6, 0x27, 0x87, 0x06, 0x98, 0x19, 0xc5, 0x44, 0xda, 0x5b,
+ 0xfb, 0x7a, 0xe4, 0x65, 0x41, 0xc0, 0x5e, 0xdf, 0x7f, 0xfe,
+ 0x60, 0xe1, 0x3d, 0xbc, 0x22, 0xa3, 0x03, 0x82, 0x1c, 0x9d,
+ 0x00, 0x82, 0x19, 0x9b, 0x32, 0xb0, 0x2b, 0xa9, 0x64, 0xe6,
+ 0x7d, 0xff, 0x56, 0xd4, 0x4f, 0xcd, 0xc8, 0x4a, 0xd1, 0x53,
+ 0xfa, 0x78, 0xe3, 0x61, 0xac, 0x2e, 0xb5, 0x37, 0x9e, 0x1c,
+ 0x87, 0x05, 0x8d, 0x0f, 0x94, 0x16, 0xbf, 0x3d, 0xa6, 0x24,
+ 0xe9, 0x6b, 0xf0, 0x72, 0xdb, 0x59, 0xc2, 0x40, 0x45, 0xc7,
+ 0x5c, 0xde, 0x77, 0xf5, 0x6e, 0xec, 0x21, 0xa3, 0x38, 0xba,
+ 0x13, 0x91, 0x0a, 0x88, 0x07, 0x85, 0x1e, 0x9c, 0x35, 0xb7,
+ 0x2c, 0xae, 0x63, 0xe1, 0x7a, 0xf8, 0x51, 0xd3, 0x48, 0xca,
+ 0xcf, 0x4d, 0xd6, 0x54, 0xfd, 0x7f, 0xe4, 0x66, 0xab, 0x29,
+ 0xb2, 0x30, 0x99, 0x1b, 0x80, 0x02, 0x8a, 0x08, 0x93, 0x11,
+ 0xb8, 0x3a, 0xa1, 0x23, 0xee, 0x6c, 0xf7, 0x75, 0xdc, 0x5e,
+ 0xc5, 0x47, 0x42, 0xc0, 0x5b, 0xd9, 0x70, 0xf2, 0x69, 0xeb,
+ 0x26, 0xa4, 0x3f, 0xbd, 0x14, 0x96, 0x0d, 0x8f, 0x0e, 0x8c,
+ 0x17, 0x95, 0x3c, 0xbe, 0x25, 0xa7, 0x6a, 0xe8, 0x73, 0xf1,
+ 0x58, 0xda, 0x41, 0xc3, 0xc6, 0x44, 0xdf, 0x5d, 0xf4, 0x76,
+ 0xed, 0x6f, 0xa2, 0x20, 0xbb, 0x39, 0x90, 0x12, 0x89, 0x0b,
+ 0x83, 0x01, 0x9a, 0x18, 0xb1, 0x33, 0xa8, 0x2a, 0xe7, 0x65,
+ 0xfe, 0x7c, 0xd5, 0x57, 0xcc, 0x4e, 0x4b, 0xc9, 0x52, 0xd0,
+ 0x79, 0xfb, 0x60, 0xe2, 0x2f, 0xad, 0x36, 0xb4, 0x1d, 0x9f,
+ 0x04, 0x86, 0x09, 0x8b, 0x10, 0x92, 0x3b, 0xb9, 0x22, 0xa0,
+ 0x6d, 0xef, 0x74, 0xf6, 0x5f, 0xdd, 0x46, 0xc4, 0xc1, 0x43,
+ 0xd8, 0x5a, 0xf3, 0x71, 0xea, 0x68, 0xa5, 0x27, 0xbc, 0x3e,
+ 0x97, 0x15, 0x8e, 0x0c, 0x84, 0x06, 0x9d, 0x1f, 0xb6, 0x34,
+ 0xaf, 0x2d, 0xe0, 0x62, 0xf9, 0x7b, 0xd2, 0x50, 0xcb, 0x49,
+ 0x4c, 0xce, 0x55, 0xd7, 0x7e, 0xfc, 0x67, 0xe5, 0x28, 0xaa,
+ 0x31, 0xb3, 0x1a, 0x98, 0x03, 0x81, 0x00, 0x83, 0x1b, 0x98,
+ 0x36, 0xb5, 0x2d, 0xae, 0x6c, 0xef, 0x77, 0xf4, 0x5a, 0xd9,
+ 0x41, 0xc2, 0xd8, 0x5b, 0xc3, 0x40, 0xee, 0x6d, 0xf5, 0x76,
+ 0xb4, 0x37, 0xaf, 0x2c, 0x82, 0x01, 0x99, 0x1a, 0xad, 0x2e,
+ 0xb6, 0x35, 0x9b, 0x18, 0x80, 0x03, 0xc1, 0x42, 0xda, 0x59,
+ 0xf7, 0x74, 0xec, 0x6f, 0x75, 0xf6, 0x6e, 0xed, 0x43, 0xc0,
+ 0x58, 0xdb, 0x19, 0x9a, 0x02, 0x81, 0x2f, 0xac, 0x34, 0xb7,
+ 0x47, 0xc4, 0x5c, 0xdf, 0x71, 0xf2, 0x6a, 0xe9, 0x2b, 0xa8,
+ 0x30, 0xb3, 0x1d, 0x9e, 0x06, 0x85, 0x9f, 0x1c, 0x84, 0x07,
+ 0xa9, 0x2a, 0xb2, 0x31, 0xf3, 0x70, 0xe8, 0x6b, 0xc5, 0x46,
+ 0xde, 0x5d, 0xea, 0x69, 0xf1, 0x72, 0xdc, 0x5f, 0xc7, 0x44,
+ 0x86, 0x05, 0x9d, 0x1e, 0xb0, 0x33, 0xab, 0x28, 0x32, 0xb1,
+ 0x29, 0xaa, 0x04, 0x87, 0x1f, 0x9c, 0x5e, 0xdd, 0x45, 0xc6,
+ 0x68, 0xeb, 0x73, 0xf0, 0x8e, 0x0d, 0x95, 0x16, 0xb8, 0x3b,
+ 0xa3, 0x20, 0xe2, 0x61, 0xf9, 0x7a, 0xd4, 0x57, 0xcf, 0x4c,
+ 0x56, 0xd5, 0x4d, 0xce, 0x60, 0xe3, 0x7b, 0xf8, 0x3a, 0xb9,
+ 0x21, 0xa2, 0x0c, 0x8f, 0x17, 0x94, 0x23, 0xa0, 0x38, 0xbb,
+ 0x15, 0x96, 0x0e, 0x8d, 0x4f, 0xcc, 0x54, 0xd7, 0x79, 0xfa,
+ 0x62, 0xe1, 0xfb, 0x78, 0xe0, 0x63, 0xcd, 0x4e, 0xd6, 0x55,
+ 0x97, 0x14, 0x8c, 0x0f, 0xa1, 0x22, 0xba, 0x39, 0xc9, 0x4a,
+ 0xd2, 0x51, 0xff, 0x7c, 0xe4, 0x67, 0xa5, 0x26, 0xbe, 0x3d,
+ 0x93, 0x10, 0x88, 0x0b, 0x11, 0x92, 0x0a, 0x89, 0x27, 0xa4,
+ 0x3c, 0xbf, 0x7d, 0xfe, 0x66, 0xe5, 0x4b, 0xc8, 0x50, 0xd3,
+ 0x64, 0xe7, 0x7f, 0xfc, 0x52, 0xd1, 0x49, 0xca, 0x08, 0x8b,
+ 0x13, 0x90, 0x3e, 0xbd, 0x25, 0xa6, 0xbc, 0x3f, 0xa7, 0x24,
+ 0x8a, 0x09, 0x91, 0x12, 0xd0, 0x53, 0xcb, 0x48, 0xe6, 0x65,
+ 0xfd, 0x7e, 0x00, 0x84, 0x15, 0x91, 0x2a, 0xae, 0x3f, 0xbb,
+ 0x54, 0xd0, 0x41, 0xc5, 0x7e, 0xfa, 0x6b, 0xef, 0xa8, 0x2c,
+ 0xbd, 0x39, 0x82, 0x06, 0x97, 0x13, 0xfc, 0x78, 0xe9, 0x6d,
+ 0xd6, 0x52, 0xc3, 0x47, 0x4d, 0xc9, 0x58, 0xdc, 0x67, 0xe3,
+ 0x72, 0xf6, 0x19, 0x9d, 0x0c, 0x88, 0x33, 0xb7, 0x26, 0xa2,
+ 0xe5, 0x61, 0xf0, 0x74, 0xcf, 0x4b, 0xda, 0x5e, 0xb1, 0x35,
+ 0xa4, 0x20, 0x9b, 0x1f, 0x8e, 0x0a, 0x9a, 0x1e, 0x8f, 0x0b,
+ 0xb0, 0x34, 0xa5, 0x21, 0xce, 0x4a, 0xdb, 0x5f, 0xe4, 0x60,
+ 0xf1, 0x75, 0x32, 0xb6, 0x27, 0xa3, 0x18, 0x9c, 0x0d, 0x89,
+ 0x66, 0xe2, 0x73, 0xf7, 0x4c, 0xc8, 0x59, 0xdd, 0xd7, 0x53,
+ 0xc2, 0x46, 0xfd, 0x79, 0xe8, 0x6c, 0x83, 0x07, 0x96, 0x12,
+ 0xa9, 0x2d, 0xbc, 0x38, 0x7f, 0xfb, 0x6a, 0xee, 0x55, 0xd1,
+ 0x40, 0xc4, 0x2b, 0xaf, 0x3e, 0xba, 0x01, 0x85, 0x14, 0x90,
+ 0x29, 0xad, 0x3c, 0xb8, 0x03, 0x87, 0x16, 0x92, 0x7d, 0xf9,
+ 0x68, 0xec, 0x57, 0xd3, 0x42, 0xc6, 0x81, 0x05, 0x94, 0x10,
+ 0xab, 0x2f, 0xbe, 0x3a, 0xd5, 0x51, 0xc0, 0x44, 0xff, 0x7b,
+ 0xea, 0x6e, 0x64, 0xe0, 0x71, 0xf5, 0x4e, 0xca, 0x5b, 0xdf,
+ 0x30, 0xb4, 0x25, 0xa1, 0x1a, 0x9e, 0x0f, 0x8b, 0xcc, 0x48,
+ 0xd9, 0x5d, 0xe6, 0x62, 0xf3, 0x77, 0x98, 0x1c, 0x8d, 0x09,
+ 0xb2, 0x36, 0xa7, 0x23, 0xb3, 0x37, 0xa6, 0x22, 0x99, 0x1d,
+ 0x8c, 0x08, 0xe7, 0x63, 0xf2, 0x76, 0xcd, 0x49, 0xd8, 0x5c,
+ 0x1b, 0x9f, 0x0e, 0x8a, 0x31, 0xb5, 0x24, 0xa0, 0x4f, 0xcb,
+ 0x5a, 0xde, 0x65, 0xe1, 0x70, 0xf4, 0xfe, 0x7a, 0xeb, 0x6f,
+ 0xd4, 0x50, 0xc1, 0x45, 0xaa, 0x2e, 0xbf, 0x3b, 0x80, 0x04,
+ 0x95, 0x11, 0x56, 0xd2, 0x43, 0xc7, 0x7c, 0xf8, 0x69, 0xed,
+ 0x02, 0x86, 0x17, 0x93, 0x28, 0xac, 0x3d, 0xb9, 0x00, 0x85,
+ 0x17, 0x92, 0x2e, 0xab, 0x39, 0xbc, 0x5c, 0xd9, 0x4b, 0xce,
+ 0x72, 0xf7, 0x65, 0xe0, 0xb8, 0x3d, 0xaf, 0x2a, 0x96, 0x13,
+ 0x81, 0x04, 0xe4, 0x61, 0xf3, 0x76, 0xca, 0x4f, 0xdd, 0x58,
+ 0x6d, 0xe8, 0x7a, 0xff, 0x43, 0xc6, 0x54, 0xd1, 0x31, 0xb4,
+ 0x26, 0xa3, 0x1f, 0x9a, 0x08, 0x8d, 0xd5, 0x50, 0xc2, 0x47,
+ 0xfb, 0x7e, 0xec, 0x69, 0x89, 0x0c, 0x9e, 0x1b, 0xa7, 0x22,
+ 0xb0, 0x35, 0xda, 0x5f, 0xcd, 0x48, 0xf4, 0x71, 0xe3, 0x66,
+ 0x86, 0x03, 0x91, 0x14, 0xa8, 0x2d, 0xbf, 0x3a, 0x62, 0xe7,
+ 0x75, 0xf0, 0x4c, 0xc9, 0x5b, 0xde, 0x3e, 0xbb, 0x29, 0xac,
+ 0x10, 0x95, 0x07, 0x82, 0xb7, 0x32, 0xa0, 0x25, 0x99, 0x1c,
+ 0x8e, 0x0b, 0xeb, 0x6e, 0xfc, 0x79, 0xc5, 0x40, 0xd2, 0x57,
+ 0x0f, 0x8a, 0x18, 0x9d, 0x21, 0xa4, 0x36, 0xb3, 0x53, 0xd6,
+ 0x44, 0xc1, 0x7d, 0xf8, 0x6a, 0xef, 0xa9, 0x2c, 0xbe, 0x3b,
+ 0x87, 0x02, 0x90, 0x15, 0xf5, 0x70, 0xe2, 0x67, 0xdb, 0x5e,
+ 0xcc, 0x49, 0x11, 0x94, 0x06, 0x83, 0x3f, 0xba, 0x28, 0xad,
+ 0x4d, 0xc8, 0x5a, 0xdf, 0x63, 0xe6, 0x74, 0xf1, 0xc4, 0x41,
+ 0xd3, 0x56, 0xea, 0x6f, 0xfd, 0x78, 0x98, 0x1d, 0x8f, 0x0a,
+ 0xb6, 0x33, 0xa1, 0x24, 0x7c, 0xf9, 0x6b, 0xee, 0x52, 0xd7,
+ 0x45, 0xc0, 0x20, 0xa5, 0x37, 0xb2, 0x0e, 0x8b, 0x19, 0x9c,
+ 0x73, 0xf6, 0x64, 0xe1, 0x5d, 0xd8, 0x4a, 0xcf, 0x2f, 0xaa,
+ 0x38, 0xbd, 0x01, 0x84, 0x16, 0x93, 0xcb, 0x4e, 0xdc, 0x59,
+ 0xe5, 0x60, 0xf2, 0x77, 0x97, 0x12, 0x80, 0x05, 0xb9, 0x3c,
+ 0xae, 0x2b, 0x1e, 0x9b, 0x09, 0x8c, 0x30, 0xb5, 0x27, 0xa2,
+ 0x42, 0xc7, 0x55, 0xd0, 0x6c, 0xe9, 0x7b, 0xfe, 0xa6, 0x23,
+ 0xb1, 0x34, 0x88, 0x0d, 0x9f, 0x1a, 0xfa, 0x7f, 0xed, 0x68,
+ 0xd4, 0x51, 0xc3, 0x46, 0x00, 0x86, 0x11, 0x97, 0x22, 0xa4,
+ 0x33, 0xb5, 0x44, 0xc2, 0x55, 0xd3, 0x66, 0xe0, 0x77, 0xf1,
+ 0x88, 0x0e, 0x99, 0x1f, 0xaa, 0x2c, 0xbb, 0x3d, 0xcc, 0x4a,
+ 0xdd, 0x5b, 0xee, 0x68, 0xff, 0x79, 0x0d, 0x8b, 0x1c, 0x9a,
+ 0x2f, 0xa9, 0x3e, 0xb8, 0x49, 0xcf, 0x58, 0xde, 0x6b, 0xed,
+ 0x7a, 0xfc, 0x85, 0x03, 0x94, 0x12, 0xa7, 0x21, 0xb6, 0x30,
+ 0xc1, 0x47, 0xd0, 0x56, 0xe3, 0x65, 0xf2, 0x74, 0x1a, 0x9c,
+ 0x0b, 0x8d, 0x38, 0xbe, 0x29, 0xaf, 0x5e, 0xd8, 0x4f, 0xc9,
+ 0x7c, 0xfa, 0x6d, 0xeb, 0x92, 0x14, 0x83, 0x05, 0xb0, 0x36,
+ 0xa1, 0x27, 0xd6, 0x50, 0xc7, 0x41, 0xf4, 0x72, 0xe5, 0x63,
+ 0x17, 0x91, 0x06, 0x80, 0x35, 0xb3, 0x24, 0xa2, 0x53, 0xd5,
+ 0x42, 0xc4, 0x71, 0xf7, 0x60, 0xe6, 0x9f, 0x19, 0x8e, 0x08,
+ 0xbd, 0x3b, 0xac, 0x2a, 0xdb, 0x5d, 0xca, 0x4c, 0xf9, 0x7f,
+ 0xe8, 0x6e, 0x34, 0xb2, 0x25, 0xa3, 0x16, 0x90, 0x07, 0x81,
+ 0x70, 0xf6, 0x61, 0xe7, 0x52, 0xd4, 0x43, 0xc5, 0xbc, 0x3a,
+ 0xad, 0x2b, 0x9e, 0x18, 0x8f, 0x09, 0xf8, 0x7e, 0xe9, 0x6f,
+ 0xda, 0x5c, 0xcb, 0x4d, 0x39, 0xbf, 0x28, 0xae, 0x1b, 0x9d,
+ 0x0a, 0x8c, 0x7d, 0xfb, 0x6c, 0xea, 0x5f, 0xd9, 0x4e, 0xc8,
+ 0xb1, 0x37, 0xa0, 0x26, 0x93, 0x15, 0x82, 0x04, 0xf5, 0x73,
+ 0xe4, 0x62, 0xd7, 0x51, 0xc6, 0x40, 0x2e, 0xa8, 0x3f, 0xb9,
+ 0x0c, 0x8a, 0x1d, 0x9b, 0x6a, 0xec, 0x7b, 0xfd, 0x48, 0xce,
+ 0x59, 0xdf, 0xa6, 0x20, 0xb7, 0x31, 0x84, 0x02, 0x95, 0x13,
+ 0xe2, 0x64, 0xf3, 0x75, 0xc0, 0x46, 0xd1, 0x57, 0x23, 0xa5,
+ 0x32, 0xb4, 0x01, 0x87, 0x10, 0x96, 0x67, 0xe1, 0x76, 0xf0,
+ 0x45, 0xc3, 0x54, 0xd2, 0xab, 0x2d, 0xba, 0x3c, 0x89, 0x0f,
+ 0x98, 0x1e, 0xef, 0x69, 0xfe, 0x78, 0xcd, 0x4b, 0xdc, 0x5a,
+ 0x00, 0x87, 0x13, 0x94, 0x26, 0xa1, 0x35, 0xb2, 0x4c, 0xcb,
+ 0x5f, 0xd8, 0x6a, 0xed, 0x79, 0xfe, 0x98, 0x1f, 0x8b, 0x0c,
+ 0xbe, 0x39, 0xad, 0x2a, 0xd4, 0x53, 0xc7, 0x40, 0xf2, 0x75,
+ 0xe1, 0x66, 0x2d, 0xaa, 0x3e, 0xb9, 0x0b, 0x8c, 0x18, 0x9f,
+ 0x61, 0xe6, 0x72, 0xf5, 0x47, 0xc0, 0x54, 0xd3, 0xb5, 0x32,
+ 0xa6, 0x21, 0x93, 0x14, 0x80, 0x07, 0xf9, 0x7e, 0xea, 0x6d,
+ 0xdf, 0x58, 0xcc, 0x4b, 0x5a, 0xdd, 0x49, 0xce, 0x7c, 0xfb,
+ 0x6f, 0xe8, 0x16, 0x91, 0x05, 0x82, 0x30, 0xb7, 0x23, 0xa4,
+ 0xc2, 0x45, 0xd1, 0x56, 0xe4, 0x63, 0xf7, 0x70, 0x8e, 0x09,
+ 0x9d, 0x1a, 0xa8, 0x2f, 0xbb, 0x3c, 0x77, 0xf0, 0x64, 0xe3,
+ 0x51, 0xd6, 0x42, 0xc5, 0x3b, 0xbc, 0x28, 0xaf, 0x1d, 0x9a,
+ 0x0e, 0x89, 0xef, 0x68, 0xfc, 0x7b, 0xc9, 0x4e, 0xda, 0x5d,
+ 0xa3, 0x24, 0xb0, 0x37, 0x85, 0x02, 0x96, 0x11, 0xb4, 0x33,
+ 0xa7, 0x20, 0x92, 0x15, 0x81, 0x06, 0xf8, 0x7f, 0xeb, 0x6c,
+ 0xde, 0x59, 0xcd, 0x4a, 0x2c, 0xab, 0x3f, 0xb8, 0x0a, 0x8d,
+ 0x19, 0x9e, 0x60, 0xe7, 0x73, 0xf4, 0x46, 0xc1, 0x55, 0xd2,
+ 0x99, 0x1e, 0x8a, 0x0d, 0xbf, 0x38, 0xac, 0x2b, 0xd5, 0x52,
+ 0xc6, 0x41, 0xf3, 0x74, 0xe0, 0x67, 0x01, 0x86, 0x12, 0x95,
+ 0x27, 0xa0, 0x34, 0xb3, 0x4d, 0xca, 0x5e, 0xd9, 0x6b, 0xec,
+ 0x78, 0xff, 0xee, 0x69, 0xfd, 0x7a, 0xc8, 0x4f, 0xdb, 0x5c,
+ 0xa2, 0x25, 0xb1, 0x36, 0x84, 0x03, 0x97, 0x10, 0x76, 0xf1,
+ 0x65, 0xe2, 0x50, 0xd7, 0x43, 0xc4, 0x3a, 0xbd, 0x29, 0xae,
+ 0x1c, 0x9b, 0x0f, 0x88, 0xc3, 0x44, 0xd0, 0x57, 0xe5, 0x62,
+ 0xf6, 0x71, 0x8f, 0x08, 0x9c, 0x1b, 0xa9, 0x2e, 0xba, 0x3d,
+ 0x5b, 0xdc, 0x48, 0xcf, 0x7d, 0xfa, 0x6e, 0xe9, 0x17, 0x90,
+ 0x04, 0x83, 0x31, 0xb6, 0x22, 0xa5, 0x00, 0x88, 0x0d, 0x85,
+ 0x1a, 0x92, 0x17, 0x9f, 0x34, 0xbc, 0x39, 0xb1, 0x2e, 0xa6,
+ 0x23, 0xab, 0x68, 0xe0, 0x65, 0xed, 0x72, 0xfa, 0x7f, 0xf7,
+ 0x5c, 0xd4, 0x51, 0xd9, 0x46, 0xce, 0x4b, 0xc3, 0xd0, 0x58,
+ 0xdd, 0x55, 0xca, 0x42, 0xc7, 0x4f, 0xe4, 0x6c, 0xe9, 0x61,
+ 0xfe, 0x76, 0xf3, 0x7b, 0xb8, 0x30, 0xb5, 0x3d, 0xa2, 0x2a,
+ 0xaf, 0x27, 0x8c, 0x04, 0x81, 0x09, 0x96, 0x1e, 0x9b, 0x13,
+ 0xbd, 0x35, 0xb0, 0x38, 0xa7, 0x2f, 0xaa, 0x22, 0x89, 0x01,
+ 0x84, 0x0c, 0x93, 0x1b, 0x9e, 0x16, 0xd5, 0x5d, 0xd8, 0x50,
+ 0xcf, 0x47, 0xc2, 0x4a, 0xe1, 0x69, 0xec, 0x64, 0xfb, 0x73,
+ 0xf6, 0x7e, 0x6d, 0xe5, 0x60, 0xe8, 0x77, 0xff, 0x7a, 0xf2,
+ 0x59, 0xd1, 0x54, 0xdc, 0x43, 0xcb, 0x4e, 0xc6, 0x05, 0x8d,
+ 0x08, 0x80, 0x1f, 0x97, 0x12, 0x9a, 0x31, 0xb9, 0x3c, 0xb4,
+ 0x2b, 0xa3, 0x26, 0xae, 0x67, 0xef, 0x6a, 0xe2, 0x7d, 0xf5,
+ 0x70, 0xf8, 0x53, 0xdb, 0x5e, 0xd6, 0x49, 0xc1, 0x44, 0xcc,
+ 0x0f, 0x87, 0x02, 0x8a, 0x15, 0x9d, 0x18, 0x90, 0x3b, 0xb3,
+ 0x36, 0xbe, 0x21, 0xa9, 0x2c, 0xa4, 0xb7, 0x3f, 0xba, 0x32,
+ 0xad, 0x25, 0xa0, 0x28, 0x83, 0x0b, 0x8e, 0x06, 0x99, 0x11,
+ 0x94, 0x1c, 0xdf, 0x57, 0xd2, 0x5a, 0xc5, 0x4d, 0xc8, 0x40,
+ 0xeb, 0x63, 0xe6, 0x6e, 0xf1, 0x79, 0xfc, 0x74, 0xda, 0x52,
+ 0xd7, 0x5f, 0xc0, 0x48, 0xcd, 0x45, 0xee, 0x66, 0xe3, 0x6b,
+ 0xf4, 0x7c, 0xf9, 0x71, 0xb2, 0x3a, 0xbf, 0x37, 0xa8, 0x20,
+ 0xa5, 0x2d, 0x86, 0x0e, 0x8b, 0x03, 0x9c, 0x14, 0x91, 0x19,
+ 0x0a, 0x82, 0x07, 0x8f, 0x10, 0x98, 0x1d, 0x95, 0x3e, 0xb6,
+ 0x33, 0xbb, 0x24, 0xac, 0x29, 0xa1, 0x62, 0xea, 0x6f, 0xe7,
+ 0x78, 0xf0, 0x75, 0xfd, 0x56, 0xde, 0x5b, 0xd3, 0x4c, 0xc4,
+ 0x41, 0xc9, 0x00, 0x89, 0x0f, 0x86, 0x1e, 0x97, 0x11, 0x98,
+ 0x3c, 0xb5, 0x33, 0xba, 0x22, 0xab, 0x2d, 0xa4, 0x78, 0xf1,
+ 0x77, 0xfe, 0x66, 0xef, 0x69, 0xe0, 0x44, 0xcd, 0x4b, 0xc2,
+ 0x5a, 0xd3, 0x55, 0xdc, 0xf0, 0x79, 0xff, 0x76, 0xee, 0x67,
+ 0xe1, 0x68, 0xcc, 0x45, 0xc3, 0x4a, 0xd2, 0x5b, 0xdd, 0x54,
+ 0x88, 0x01, 0x87, 0x0e, 0x96, 0x1f, 0x99, 0x10, 0xb4, 0x3d,
+ 0xbb, 0x32, 0xaa, 0x23, 0xa5, 0x2c, 0xfd, 0x74, 0xf2, 0x7b,
+ 0xe3, 0x6a, 0xec, 0x65, 0xc1, 0x48, 0xce, 0x47, 0xdf, 0x56,
+ 0xd0, 0x59, 0x85, 0x0c, 0x8a, 0x03, 0x9b, 0x12, 0x94, 0x1d,
+ 0xb9, 0x30, 0xb6, 0x3f, 0xa7, 0x2e, 0xa8, 0x21, 0x0d, 0x84,
+ 0x02, 0x8b, 0x13, 0x9a, 0x1c, 0x95, 0x31, 0xb8, 0x3e, 0xb7,
+ 0x2f, 0xa6, 0x20, 0xa9, 0x75, 0xfc, 0x7a, 0xf3, 0x6b, 0xe2,
+ 0x64, 0xed, 0x49, 0xc0, 0x46, 0xcf, 0x57, 0xde, 0x58, 0xd1,
+ 0xe7, 0x6e, 0xe8, 0x61, 0xf9, 0x70, 0xf6, 0x7f, 0xdb, 0x52,
+ 0xd4, 0x5d, 0xc5, 0x4c, 0xca, 0x43, 0x9f, 0x16, 0x90, 0x19,
+ 0x81, 0x08, 0x8e, 0x07, 0xa3, 0x2a, 0xac, 0x25, 0xbd, 0x34,
+ 0xb2, 0x3b, 0x17, 0x9e, 0x18, 0x91, 0x09, 0x80, 0x06, 0x8f,
+ 0x2b, 0xa2, 0x24, 0xad, 0x35, 0xbc, 0x3a, 0xb3, 0x6f, 0xe6,
+ 0x60, 0xe9, 0x71, 0xf8, 0x7e, 0xf7, 0x53, 0xda, 0x5c, 0xd5,
+ 0x4d, 0xc4, 0x42, 0xcb, 0x1a, 0x93, 0x15, 0x9c, 0x04, 0x8d,
+ 0x0b, 0x82, 0x26, 0xaf, 0x29, 0xa0, 0x38, 0xb1, 0x37, 0xbe,
+ 0x62, 0xeb, 0x6d, 0xe4, 0x7c, 0xf5, 0x73, 0xfa, 0x5e, 0xd7,
+ 0x51, 0xd8, 0x40, 0xc9, 0x4f, 0xc6, 0xea, 0x63, 0xe5, 0x6c,
+ 0xf4, 0x7d, 0xfb, 0x72, 0xd6, 0x5f, 0xd9, 0x50, 0xc8, 0x41,
+ 0xc7, 0x4e, 0x92, 0x1b, 0x9d, 0x14, 0x8c, 0x05, 0x83, 0x0a,
+ 0xae, 0x27, 0xa1, 0x28, 0xb0, 0x39, 0xbf, 0x36, 0x00, 0x8a,
+ 0x09, 0x83, 0x12, 0x98, 0x1b, 0x91, 0x24, 0xae, 0x2d, 0xa7,
+ 0x36, 0xbc, 0x3f, 0xb5, 0x48, 0xc2, 0x41, 0xcb, 0x5a, 0xd0,
+ 0x53, 0xd9, 0x6c, 0xe6, 0x65, 0xef, 0x7e, 0xf4, 0x77, 0xfd,
+ 0x90, 0x1a, 0x99, 0x13, 0x82, 0x08, 0x8b, 0x01, 0xb4, 0x3e,
+ 0xbd, 0x37, 0xa6, 0x2c, 0xaf, 0x25, 0xd8, 0x52, 0xd1, 0x5b,
+ 0xca, 0x40, 0xc3, 0x49, 0xfc, 0x76, 0xf5, 0x7f, 0xee, 0x64,
+ 0xe7, 0x6d, 0x3d, 0xb7, 0x34, 0xbe, 0x2f, 0xa5, 0x26, 0xac,
+ 0x19, 0x93, 0x10, 0x9a, 0x0b, 0x81, 0x02, 0x88, 0x75, 0xff,
+ 0x7c, 0xf6, 0x67, 0xed, 0x6e, 0xe4, 0x51, 0xdb, 0x58, 0xd2,
+ 0x43, 0xc9, 0x4a, 0xc0, 0xad, 0x27, 0xa4, 0x2e, 0xbf, 0x35,
+ 0xb6, 0x3c, 0x89, 0x03, 0x80, 0x0a, 0x9b, 0x11, 0x92, 0x18,
+ 0xe5, 0x6f, 0xec, 0x66, 0xf7, 0x7d, 0xfe, 0x74, 0xc1, 0x4b,
+ 0xc8, 0x42, 0xd3, 0x59, 0xda, 0x50, 0x7a, 0xf0, 0x73, 0xf9,
+ 0x68, 0xe2, 0x61, 0xeb, 0x5e, 0xd4, 0x57, 0xdd, 0x4c, 0xc6,
+ 0x45, 0xcf, 0x32, 0xb8, 0x3b, 0xb1, 0x20, 0xaa, 0x29, 0xa3,
+ 0x16, 0x9c, 0x1f, 0x95, 0x04, 0x8e, 0x0d, 0x87, 0xea, 0x60,
+ 0xe3, 0x69, 0xf8, 0x72, 0xf1, 0x7b, 0xce, 0x44, 0xc7, 0x4d,
+ 0xdc, 0x56, 0xd5, 0x5f, 0xa2, 0x28, 0xab, 0x21, 0xb0, 0x3a,
+ 0xb9, 0x33, 0x86, 0x0c, 0x8f, 0x05, 0x94, 0x1e, 0x9d, 0x17,
+ 0x47, 0xcd, 0x4e, 0xc4, 0x55, 0xdf, 0x5c, 0xd6, 0x63, 0xe9,
+ 0x6a, 0xe0, 0x71, 0xfb, 0x78, 0xf2, 0x0f, 0x85, 0x06, 0x8c,
+ 0x1d, 0x97, 0x14, 0x9e, 0x2b, 0xa1, 0x22, 0xa8, 0x39, 0xb3,
+ 0x30, 0xba, 0xd7, 0x5d, 0xde, 0x54, 0xc5, 0x4f, 0xcc, 0x46,
+ 0xf3, 0x79, 0xfa, 0x70, 0xe1, 0x6b, 0xe8, 0x62, 0x9f, 0x15,
+ 0x96, 0x1c, 0x8d, 0x07, 0x84, 0x0e, 0xbb, 0x31, 0xb2, 0x38,
+ 0xa9, 0x23, 0xa0, 0x2a, 0x00, 0x8b, 0x0b, 0x80, 0x16, 0x9d,
+ 0x1d, 0x96, 0x2c, 0xa7, 0x27, 0xac, 0x3a, 0xb1, 0x31, 0xba,
+ 0x58, 0xd3, 0x53, 0xd8, 0x4e, 0xc5, 0x45, 0xce, 0x74, 0xff,
+ 0x7f, 0xf4, 0x62, 0xe9, 0x69, 0xe2, 0xb0, 0x3b, 0xbb, 0x30,
+ 0xa6, 0x2d, 0xad, 0x26, 0x9c, 0x17, 0x97, 0x1c, 0x8a, 0x01,
+ 0x81, 0x0a, 0xe8, 0x63, 0xe3, 0x68, 0xfe, 0x75, 0xf5, 0x7e,
+ 0xc4, 0x4f, 0xcf, 0x44, 0xd2, 0x59, 0xd9, 0x52, 0x7d, 0xf6,
+ 0x76, 0xfd, 0x6b, 0xe0, 0x60, 0xeb, 0x51, 0xda, 0x5a, 0xd1,
+ 0x47, 0xcc, 0x4c, 0xc7, 0x25, 0xae, 0x2e, 0xa5, 0x33, 0xb8,
+ 0x38, 0xb3, 0x09, 0x82, 0x02, 0x89, 0x1f, 0x94, 0x14, 0x9f,
+ 0xcd, 0x46, 0xc6, 0x4d, 0xdb, 0x50, 0xd0, 0x5b, 0xe1, 0x6a,
+ 0xea, 0x61, 0xf7, 0x7c, 0xfc, 0x77, 0x95, 0x1e, 0x9e, 0x15,
+ 0x83, 0x08, 0x88, 0x03, 0xb9, 0x32, 0xb2, 0x39, 0xaf, 0x24,
+ 0xa4, 0x2f, 0xfa, 0x71, 0xf1, 0x7a, 0xec, 0x67, 0xe7, 0x6c,
+ 0xd6, 0x5d, 0xdd, 0x56, 0xc0, 0x4b, 0xcb, 0x40, 0xa2, 0x29,
+ 0xa9, 0x22, 0xb4, 0x3f, 0xbf, 0x34, 0x8e, 0x05, 0x85, 0x0e,
+ 0x98, 0x13, 0x93, 0x18, 0x4a, 0xc1, 0x41, 0xca, 0x5c, 0xd7,
+ 0x57, 0xdc, 0x66, 0xed, 0x6d, 0xe6, 0x70, 0xfb, 0x7b, 0xf0,
+ 0x12, 0x99, 0x19, 0x92, 0x04, 0x8f, 0x0f, 0x84, 0x3e, 0xb5,
+ 0x35, 0xbe, 0x28, 0xa3, 0x23, 0xa8, 0x87, 0x0c, 0x8c, 0x07,
+ 0x91, 0x1a, 0x9a, 0x11, 0xab, 0x20, 0xa0, 0x2b, 0xbd, 0x36,
+ 0xb6, 0x3d, 0xdf, 0x54, 0xd4, 0x5f, 0xc9, 0x42, 0xc2, 0x49,
+ 0xf3, 0x78, 0xf8, 0x73, 0xe5, 0x6e, 0xee, 0x65, 0x37, 0xbc,
+ 0x3c, 0xb7, 0x21, 0xaa, 0x2a, 0xa1, 0x1b, 0x90, 0x10, 0x9b,
+ 0x0d, 0x86, 0x06, 0x8d, 0x6f, 0xe4, 0x64, 0xef, 0x79, 0xf2,
+ 0x72, 0xf9, 0x43, 0xc8, 0x48, 0xc3, 0x55, 0xde, 0x5e, 0xd5,
+ 0x00, 0x8c, 0x05, 0x89, 0x0a, 0x86, 0x0f, 0x83, 0x14, 0x98,
+ 0x11, 0x9d, 0x1e, 0x92, 0x1b, 0x97, 0x28, 0xa4, 0x2d, 0xa1,
+ 0x22, 0xae, 0x27, 0xab, 0x3c, 0xb0, 0x39, 0xb5, 0x36, 0xba,
+ 0x33, 0xbf, 0x50, 0xdc, 0x55, 0xd9, 0x5a, 0xd6, 0x5f, 0xd3,
+ 0x44, 0xc8, 0x41, 0xcd, 0x4e, 0xc2, 0x4b, 0xc7, 0x78, 0xf4,
+ 0x7d, 0xf1, 0x72, 0xfe, 0x77, 0xfb, 0x6c, 0xe0, 0x69, 0xe5,
+ 0x66, 0xea, 0x63, 0xef, 0xa0, 0x2c, 0xa5, 0x29, 0xaa, 0x26,
+ 0xaf, 0x23, 0xb4, 0x38, 0xb1, 0x3d, 0xbe, 0x32, 0xbb, 0x37,
+ 0x88, 0x04, 0x8d, 0x01, 0x82, 0x0e, 0x87, 0x0b, 0x9c, 0x10,
+ 0x99, 0x15, 0x96, 0x1a, 0x93, 0x1f, 0xf0, 0x7c, 0xf5, 0x79,
+ 0xfa, 0x76, 0xff, 0x73, 0xe4, 0x68, 0xe1, 0x6d, 0xee, 0x62,
+ 0xeb, 0x67, 0xd8, 0x54, 0xdd, 0x51, 0xd2, 0x5e, 0xd7, 0x5b,
+ 0xcc, 0x40, 0xc9, 0x45, 0xc6, 0x4a, 0xc3, 0x4f, 0x5d, 0xd1,
+ 0x58, 0xd4, 0x57, 0xdb, 0x52, 0xde, 0x49, 0xc5, 0x4c, 0xc0,
+ 0x43, 0xcf, 0x46, 0xca, 0x75, 0xf9, 0x70, 0xfc, 0x7f, 0xf3,
+ 0x7a, 0xf6, 0x61, 0xed, 0x64, 0xe8, 0x6b, 0xe7, 0x6e, 0xe2,
+ 0x0d, 0x81, 0x08, 0x84, 0x07, 0x8b, 0x02, 0x8e, 0x19, 0x95,
+ 0x1c, 0x90, 0x13, 0x9f, 0x16, 0x9a, 0x25, 0xa9, 0x20, 0xac,
+ 0x2f, 0xa3, 0x2a, 0xa6, 0x31, 0xbd, 0x34, 0xb8, 0x3b, 0xb7,
+ 0x3e, 0xb2, 0xfd, 0x71, 0xf8, 0x74, 0xf7, 0x7b, 0xf2, 0x7e,
+ 0xe9, 0x65, 0xec, 0x60, 0xe3, 0x6f, 0xe6, 0x6a, 0xd5, 0x59,
+ 0xd0, 0x5c, 0xdf, 0x53, 0xda, 0x56, 0xc1, 0x4d, 0xc4, 0x48,
+ 0xcb, 0x47, 0xce, 0x42, 0xad, 0x21, 0xa8, 0x24, 0xa7, 0x2b,
+ 0xa2, 0x2e, 0xb9, 0x35, 0xbc, 0x30, 0xb3, 0x3f, 0xb6, 0x3a,
+ 0x85, 0x09, 0x80, 0x0c, 0x8f, 0x03, 0x8a, 0x06, 0x91, 0x1d,
+ 0x94, 0x18, 0x9b, 0x17, 0x9e, 0x12, 0x00, 0x8d, 0x07, 0x8a,
+ 0x0e, 0x83, 0x09, 0x84, 0x1c, 0x91, 0x1b, 0x96, 0x12, 0x9f,
+ 0x15, 0x98, 0x38, 0xb5, 0x3f, 0xb2, 0x36, 0xbb, 0x31, 0xbc,
+ 0x24, 0xa9, 0x23, 0xae, 0x2a, 0xa7, 0x2d, 0xa0, 0x70, 0xfd,
+ 0x77, 0xfa, 0x7e, 0xf3, 0x79, 0xf4, 0x6c, 0xe1, 0x6b, 0xe6,
+ 0x62, 0xef, 0x65, 0xe8, 0x48, 0xc5, 0x4f, 0xc2, 0x46, 0xcb,
+ 0x41, 0xcc, 0x54, 0xd9, 0x53, 0xde, 0x5a, 0xd7, 0x5d, 0xd0,
+ 0xe0, 0x6d, 0xe7, 0x6a, 0xee, 0x63, 0xe9, 0x64, 0xfc, 0x71,
+ 0xfb, 0x76, 0xf2, 0x7f, 0xf5, 0x78, 0xd8, 0x55, 0xdf, 0x52,
+ 0xd6, 0x5b, 0xd1, 0x5c, 0xc4, 0x49, 0xc3, 0x4e, 0xca, 0x47,
+ 0xcd, 0x40, 0x90, 0x1d, 0x97, 0x1a, 0x9e, 0x13, 0x99, 0x14,
+ 0x8c, 0x01, 0x8b, 0x06, 0x82, 0x0f, 0x85, 0x08, 0xa8, 0x25,
+ 0xaf, 0x22, 0xa6, 0x2b, 0xa1, 0x2c, 0xb4, 0x39, 0xb3, 0x3e,
+ 0xba, 0x37, 0xbd, 0x30, 0xdd, 0x50, 0xda, 0x57, 0xd3, 0x5e,
+ 0xd4, 0x59, 0xc1, 0x4c, 0xc6, 0x4b, 0xcf, 0x42, 0xc8, 0x45,
+ 0xe5, 0x68, 0xe2, 0x6f, 0xeb, 0x66, 0xec, 0x61, 0xf9, 0x74,
+ 0xfe, 0x73, 0xf7, 0x7a, 0xf0, 0x7d, 0xad, 0x20, 0xaa, 0x27,
+ 0xa3, 0x2e, 0xa4, 0x29, 0xb1, 0x3c, 0xb6, 0x3b, 0xbf, 0x32,
+ 0xb8, 0x35, 0x95, 0x18, 0x92, 0x1f, 0x9b, 0x16, 0x9c, 0x11,
+ 0x89, 0x04, 0x8e, 0x03, 0x87, 0x0a, 0x80, 0x0d, 0x3d, 0xb0,
+ 0x3a, 0xb7, 0x33, 0xbe, 0x34, 0xb9, 0x21, 0xac, 0x26, 0xab,
+ 0x2f, 0xa2, 0x28, 0xa5, 0x05, 0x88, 0x02, 0x8f, 0x0b, 0x86,
+ 0x0c, 0x81, 0x19, 0x94, 0x1e, 0x93, 0x17, 0x9a, 0x10, 0x9d,
+ 0x4d, 0xc0, 0x4a, 0xc7, 0x43, 0xce, 0x44, 0xc9, 0x51, 0xdc,
+ 0x56, 0xdb, 0x5f, 0xd2, 0x58, 0xd5, 0x75, 0xf8, 0x72, 0xff,
+ 0x7b, 0xf6, 0x7c, 0xf1, 0x69, 0xe4, 0x6e, 0xe3, 0x67, 0xea,
+ 0x60, 0xed, 0x00, 0x8e, 0x01, 0x8f, 0x02, 0x8c, 0x03, 0x8d,
+ 0x04, 0x8a, 0x05, 0x8b, 0x06, 0x88, 0x07, 0x89, 0x08, 0x86,
+ 0x09, 0x87, 0x0a, 0x84, 0x0b, 0x85, 0x0c, 0x82, 0x0d, 0x83,
+ 0x0e, 0x80, 0x0f, 0x81, 0x10, 0x9e, 0x11, 0x9f, 0x12, 0x9c,
+ 0x13, 0x9d, 0x14, 0x9a, 0x15, 0x9b, 0x16, 0x98, 0x17, 0x99,
+ 0x18, 0x96, 0x19, 0x97, 0x1a, 0x94, 0x1b, 0x95, 0x1c, 0x92,
+ 0x1d, 0x93, 0x1e, 0x90, 0x1f, 0x91, 0x20, 0xae, 0x21, 0xaf,
+ 0x22, 0xac, 0x23, 0xad, 0x24, 0xaa, 0x25, 0xab, 0x26, 0xa8,
+ 0x27, 0xa9, 0x28, 0xa6, 0x29, 0xa7, 0x2a, 0xa4, 0x2b, 0xa5,
+ 0x2c, 0xa2, 0x2d, 0xa3, 0x2e, 0xa0, 0x2f, 0xa1, 0x30, 0xbe,
+ 0x31, 0xbf, 0x32, 0xbc, 0x33, 0xbd, 0x34, 0xba, 0x35, 0xbb,
+ 0x36, 0xb8, 0x37, 0xb9, 0x38, 0xb6, 0x39, 0xb7, 0x3a, 0xb4,
+ 0x3b, 0xb5, 0x3c, 0xb2, 0x3d, 0xb3, 0x3e, 0xb0, 0x3f, 0xb1,
+ 0x40, 0xce, 0x41, 0xcf, 0x42, 0xcc, 0x43, 0xcd, 0x44, 0xca,
+ 0x45, 0xcb, 0x46, 0xc8, 0x47, 0xc9, 0x48, 0xc6, 0x49, 0xc7,
+ 0x4a, 0xc4, 0x4b, 0xc5, 0x4c, 0xc2, 0x4d, 0xc3, 0x4e, 0xc0,
+ 0x4f, 0xc1, 0x50, 0xde, 0x51, 0xdf, 0x52, 0xdc, 0x53, 0xdd,
+ 0x54, 0xda, 0x55, 0xdb, 0x56, 0xd8, 0x57, 0xd9, 0x58, 0xd6,
+ 0x59, 0xd7, 0x5a, 0xd4, 0x5b, 0xd5, 0x5c, 0xd2, 0x5d, 0xd3,
+ 0x5e, 0xd0, 0x5f, 0xd1, 0x60, 0xee, 0x61, 0xef, 0x62, 0xec,
+ 0x63, 0xed, 0x64, 0xea, 0x65, 0xeb, 0x66, 0xe8, 0x67, 0xe9,
+ 0x68, 0xe6, 0x69, 0xe7, 0x6a, 0xe4, 0x6b, 0xe5, 0x6c, 0xe2,
+ 0x6d, 0xe3, 0x6e, 0xe0, 0x6f, 0xe1, 0x70, 0xfe, 0x71, 0xff,
+ 0x72, 0xfc, 0x73, 0xfd, 0x74, 0xfa, 0x75, 0xfb, 0x76, 0xf8,
+ 0x77, 0xf9, 0x78, 0xf6, 0x79, 0xf7, 0x7a, 0xf4, 0x7b, 0xf5,
+ 0x7c, 0xf2, 0x7d, 0xf3, 0x7e, 0xf0, 0x7f, 0xf1, 0x00, 0x8f,
+ 0x03, 0x8c, 0x06, 0x89, 0x05, 0x8a, 0x0c, 0x83, 0x0f, 0x80,
+ 0x0a, 0x85, 0x09, 0x86, 0x18, 0x97, 0x1b, 0x94, 0x1e, 0x91,
+ 0x1d, 0x92, 0x14, 0x9b, 0x17, 0x98, 0x12, 0x9d, 0x11, 0x9e,
+ 0x30, 0xbf, 0x33, 0xbc, 0x36, 0xb9, 0x35, 0xba, 0x3c, 0xb3,
+ 0x3f, 0xb0, 0x3a, 0xb5, 0x39, 0xb6, 0x28, 0xa7, 0x2b, 0xa4,
+ 0x2e, 0xa1, 0x2d, 0xa2, 0x24, 0xab, 0x27, 0xa8, 0x22, 0xad,
+ 0x21, 0xae, 0x60, 0xef, 0x63, 0xec, 0x66, 0xe9, 0x65, 0xea,
+ 0x6c, 0xe3, 0x6f, 0xe0, 0x6a, 0xe5, 0x69, 0xe6, 0x78, 0xf7,
+ 0x7b, 0xf4, 0x7e, 0xf1, 0x7d, 0xf2, 0x74, 0xfb, 0x77, 0xf8,
+ 0x72, 0xfd, 0x71, 0xfe, 0x50, 0xdf, 0x53, 0xdc, 0x56, 0xd9,
+ 0x55, 0xda, 0x5c, 0xd3, 0x5f, 0xd0, 0x5a, 0xd5, 0x59, 0xd6,
+ 0x48, 0xc7, 0x4b, 0xc4, 0x4e, 0xc1, 0x4d, 0xc2, 0x44, 0xcb,
+ 0x47, 0xc8, 0x42, 0xcd, 0x41, 0xce, 0xc0, 0x4f, 0xc3, 0x4c,
+ 0xc6, 0x49, 0xc5, 0x4a, 0xcc, 0x43, 0xcf, 0x40, 0xca, 0x45,
+ 0xc9, 0x46, 0xd8, 0x57, 0xdb, 0x54, 0xde, 0x51, 0xdd, 0x52,
+ 0xd4, 0x5b, 0xd7, 0x58, 0xd2, 0x5d, 0xd1, 0x5e, 0xf0, 0x7f,
+ 0xf3, 0x7c, 0xf6, 0x79, 0xf5, 0x7a, 0xfc, 0x73, 0xff, 0x70,
+ 0xfa, 0x75, 0xf9, 0x76, 0xe8, 0x67, 0xeb, 0x64, 0xee, 0x61,
+ 0xed, 0x62, 0xe4, 0x6b, 0xe7, 0x68, 0xe2, 0x6d, 0xe1, 0x6e,
+ 0xa0, 0x2f, 0xa3, 0x2c, 0xa6, 0x29, 0xa5, 0x2a, 0xac, 0x23,
+ 0xaf, 0x20, 0xaa, 0x25, 0xa9, 0x26, 0xb8, 0x37, 0xbb, 0x34,
+ 0xbe, 0x31, 0xbd, 0x32, 0xb4, 0x3b, 0xb7, 0x38, 0xb2, 0x3d,
+ 0xb1, 0x3e, 0x90, 0x1f, 0x93, 0x1c, 0x96, 0x19, 0x95, 0x1a,
+ 0x9c, 0x13, 0x9f, 0x10, 0x9a, 0x15, 0x99, 0x16, 0x88, 0x07,
+ 0x8b, 0x04, 0x8e, 0x01, 0x8d, 0x02, 0x84, 0x0b, 0x87, 0x08,
+ 0x82, 0x0d, 0x81, 0x0e, 0x00, 0x90, 0x3d, 0xad, 0x7a, 0xea,
+ 0x47, 0xd7, 0xf4, 0x64, 0xc9, 0x59, 0x8e, 0x1e, 0xb3, 0x23,
+ 0xf5, 0x65, 0xc8, 0x58, 0x8f, 0x1f, 0xb2, 0x22, 0x01, 0x91,
+ 0x3c, 0xac, 0x7b, 0xeb, 0x46, 0xd6, 0xf7, 0x67, 0xca, 0x5a,
+ 0x8d, 0x1d, 0xb0, 0x20, 0x03, 0x93, 0x3e, 0xae, 0x79, 0xe9,
+ 0x44, 0xd4, 0x02, 0x92, 0x3f, 0xaf, 0x78, 0xe8, 0x45, 0xd5,
+ 0xf6, 0x66, 0xcb, 0x5b, 0x8c, 0x1c, 0xb1, 0x21, 0xf3, 0x63,
+ 0xce, 0x5e, 0x89, 0x19, 0xb4, 0x24, 0x07, 0x97, 0x3a, 0xaa,
+ 0x7d, 0xed, 0x40, 0xd0, 0x06, 0x96, 0x3b, 0xab, 0x7c, 0xec,
+ 0x41, 0xd1, 0xf2, 0x62, 0xcf, 0x5f, 0x88, 0x18, 0xb5, 0x25,
+ 0x04, 0x94, 0x39, 0xa9, 0x7e, 0xee, 0x43, 0xd3, 0xf0, 0x60,
+ 0xcd, 0x5d, 0x8a, 0x1a, 0xb7, 0x27, 0xf1, 0x61, 0xcc, 0x5c,
+ 0x8b, 0x1b, 0xb6, 0x26, 0x05, 0x95, 0x38, 0xa8, 0x7f, 0xef,
+ 0x42, 0xd2, 0xfb, 0x6b, 0xc6, 0x56, 0x81, 0x11, 0xbc, 0x2c,
+ 0x0f, 0x9f, 0x32, 0xa2, 0x75, 0xe5, 0x48, 0xd8, 0x0e, 0x9e,
+ 0x33, 0xa3, 0x74, 0xe4, 0x49, 0xd9, 0xfa, 0x6a, 0xc7, 0x57,
+ 0x80, 0x10, 0xbd, 0x2d, 0x0c, 0x9c, 0x31, 0xa1, 0x76, 0xe6,
+ 0x4b, 0xdb, 0xf8, 0x68, 0xc5, 0x55, 0x82, 0x12, 0xbf, 0x2f,
+ 0xf9, 0x69, 0xc4, 0x54, 0x83, 0x13, 0xbe, 0x2e, 0x0d, 0x9d,
+ 0x30, 0xa0, 0x77, 0xe7, 0x4a, 0xda, 0x08, 0x98, 0x35, 0xa5,
+ 0x72, 0xe2, 0x4f, 0xdf, 0xfc, 0x6c, 0xc1, 0x51, 0x86, 0x16,
+ 0xbb, 0x2b, 0xfd, 0x6d, 0xc0, 0x50, 0x87, 0x17, 0xba, 0x2a,
+ 0x09, 0x99, 0x34, 0xa4, 0x73, 0xe3, 0x4e, 0xde, 0xff, 0x6f,
+ 0xc2, 0x52, 0x85, 0x15, 0xb8, 0x28, 0x0b, 0x9b, 0x36, 0xa6,
+ 0x71, 0xe1, 0x4c, 0xdc, 0x0a, 0x9a, 0x37, 0xa7, 0x70, 0xe0,
+ 0x4d, 0xdd, 0xfe, 0x6e, 0xc3, 0x53, 0x84, 0x14, 0xb9, 0x29,
+ 0x00, 0x91, 0x3f, 0xae, 0x7e, 0xef, 0x41, 0xd0, 0xfc, 0x6d,
+ 0xc3, 0x52, 0x82, 0x13, 0xbd, 0x2c, 0xe5, 0x74, 0xda, 0x4b,
+ 0x9b, 0x0a, 0xa4, 0x35, 0x19, 0x88, 0x26, 0xb7, 0x67, 0xf6,
+ 0x58, 0xc9, 0xd7, 0x46, 0xe8, 0x79, 0xa9, 0x38, 0x96, 0x07,
+ 0x2b, 0xba, 0x14, 0x85, 0x55, 0xc4, 0x6a, 0xfb, 0x32, 0xa3,
+ 0x0d, 0x9c, 0x4c, 0xdd, 0x73, 0xe2, 0xce, 0x5f, 0xf1, 0x60,
+ 0xb0, 0x21, 0x8f, 0x1e, 0xb3, 0x22, 0x8c, 0x1d, 0xcd, 0x5c,
+ 0xf2, 0x63, 0x4f, 0xde, 0x70, 0xe1, 0x31, 0xa0, 0x0e, 0x9f,
+ 0x56, 0xc7, 0x69, 0xf8, 0x28, 0xb9, 0x17, 0x86, 0xaa, 0x3b,
+ 0x95, 0x04, 0xd4, 0x45, 0xeb, 0x7a, 0x64, 0xf5, 0x5b, 0xca,
+ 0x1a, 0x8b, 0x25, 0xb4, 0x98, 0x09, 0xa7, 0x36, 0xe6, 0x77,
+ 0xd9, 0x48, 0x81, 0x10, 0xbe, 0x2f, 0xff, 0x6e, 0xc0, 0x51,
+ 0x7d, 0xec, 0x42, 0xd3, 0x03, 0x92, 0x3c, 0xad, 0x7b, 0xea,
+ 0x44, 0xd5, 0x05, 0x94, 0x3a, 0xab, 0x87, 0x16, 0xb8, 0x29,
+ 0xf9, 0x68, 0xc6, 0x57, 0x9e, 0x0f, 0xa1, 0x30, 0xe0, 0x71,
+ 0xdf, 0x4e, 0x62, 0xf3, 0x5d, 0xcc, 0x1c, 0x8d, 0x23, 0xb2,
+ 0xac, 0x3d, 0x93, 0x02, 0xd2, 0x43, 0xed, 0x7c, 0x50, 0xc1,
+ 0x6f, 0xfe, 0x2e, 0xbf, 0x11, 0x80, 0x49, 0xd8, 0x76, 0xe7,
+ 0x37, 0xa6, 0x08, 0x99, 0xb5, 0x24, 0x8a, 0x1b, 0xcb, 0x5a,
+ 0xf4, 0x65, 0xc8, 0x59, 0xf7, 0x66, 0xb6, 0x27, 0x89, 0x18,
+ 0x34, 0xa5, 0x0b, 0x9a, 0x4a, 0xdb, 0x75, 0xe4, 0x2d, 0xbc,
+ 0x12, 0x83, 0x53, 0xc2, 0x6c, 0xfd, 0xd1, 0x40, 0xee, 0x7f,
+ 0xaf, 0x3e, 0x90, 0x01, 0x1f, 0x8e, 0x20, 0xb1, 0x61, 0xf0,
+ 0x5e, 0xcf, 0xe3, 0x72, 0xdc, 0x4d, 0x9d, 0x0c, 0xa2, 0x33,
+ 0xfa, 0x6b, 0xc5, 0x54, 0x84, 0x15, 0xbb, 0x2a, 0x06, 0x97,
+ 0x39, 0xa8, 0x78, 0xe9, 0x47, 0xd6, 0x00, 0x92, 0x39, 0xab,
+ 0x72, 0xe0, 0x4b, 0xd9, 0xe4, 0x76, 0xdd, 0x4f, 0x96, 0x04,
+ 0xaf, 0x3d, 0xd5, 0x47, 0xec, 0x7e, 0xa7, 0x35, 0x9e, 0x0c,
+ 0x31, 0xa3, 0x08, 0x9a, 0x43, 0xd1, 0x7a, 0xe8, 0xb7, 0x25,
+ 0x8e, 0x1c, 0xc5, 0x57, 0xfc, 0x6e, 0x53, 0xc1, 0x6a, 0xf8,
+ 0x21, 0xb3, 0x18, 0x8a, 0x62, 0xf0, 0x5b, 0xc9, 0x10, 0x82,
+ 0x29, 0xbb, 0x86, 0x14, 0xbf, 0x2d, 0xf4, 0x66, 0xcd, 0x5f,
+ 0x73, 0xe1, 0x4a, 0xd8, 0x01, 0x93, 0x38, 0xaa, 0x97, 0x05,
+ 0xae, 0x3c, 0xe5, 0x77, 0xdc, 0x4e, 0xa6, 0x34, 0x9f, 0x0d,
+ 0xd4, 0x46, 0xed, 0x7f, 0x42, 0xd0, 0x7b, 0xe9, 0x30, 0xa2,
+ 0x09, 0x9b, 0xc4, 0x56, 0xfd, 0x6f, 0xb6, 0x24, 0x8f, 0x1d,
+ 0x20, 0xb2, 0x19, 0x8b, 0x52, 0xc0, 0x6b, 0xf9, 0x11, 0x83,
+ 0x28, 0xba, 0x63, 0xf1, 0x5a, 0xc8, 0xf5, 0x67, 0xcc, 0x5e,
+ 0x87, 0x15, 0xbe, 0x2c, 0xe6, 0x74, 0xdf, 0x4d, 0x94, 0x06,
+ 0xad, 0x3f, 0x02, 0x90, 0x3b, 0xa9, 0x70, 0xe2, 0x49, 0xdb,
+ 0x33, 0xa1, 0x0a, 0x98, 0x41, 0xd3, 0x78, 0xea, 0xd7, 0x45,
+ 0xee, 0x7c, 0xa5, 0x37, 0x9c, 0x0e, 0x51, 0xc3, 0x68, 0xfa,
+ 0x23, 0xb1, 0x1a, 0x88, 0xb5, 0x27, 0x8c, 0x1e, 0xc7, 0x55,
+ 0xfe, 0x6c, 0x84, 0x16, 0xbd, 0x2f, 0xf6, 0x64, 0xcf, 0x5d,
+ 0x60, 0xf2, 0x59, 0xcb, 0x12, 0x80, 0x2b, 0xb9, 0x95, 0x07,
+ 0xac, 0x3e, 0xe7, 0x75, 0xde, 0x4c, 0x71, 0xe3, 0x48, 0xda,
+ 0x03, 0x91, 0x3a, 0xa8, 0x40, 0xd2, 0x79, 0xeb, 0x32, 0xa0,
+ 0x0b, 0x99, 0xa4, 0x36, 0x9d, 0x0f, 0xd6, 0x44, 0xef, 0x7d,
+ 0x22, 0xb0, 0x1b, 0x89, 0x50, 0xc2, 0x69, 0xfb, 0xc6, 0x54,
+ 0xff, 0x6d, 0xb4, 0x26, 0x8d, 0x1f, 0xf7, 0x65, 0xce, 0x5c,
+ 0x85, 0x17, 0xbc, 0x2e, 0x13, 0x81, 0x2a, 0xb8, 0x61, 0xf3,
+ 0x58, 0xca, 0x00, 0x93, 0x3b, 0xa8, 0x76, 0xe5, 0x4d, 0xde,
+ 0xec, 0x7f, 0xd7, 0x44, 0x9a, 0x09, 0xa1, 0x32, 0xc5, 0x56,
+ 0xfe, 0x6d, 0xb3, 0x20, 0x88, 0x1b, 0x29, 0xba, 0x12, 0x81,
+ 0x5f, 0xcc, 0x64, 0xf7, 0x97, 0x04, 0xac, 0x3f, 0xe1, 0x72,
+ 0xda, 0x49, 0x7b, 0xe8, 0x40, 0xd3, 0x0d, 0x9e, 0x36, 0xa5,
+ 0x52, 0xc1, 0x69, 0xfa, 0x24, 0xb7, 0x1f, 0x8c, 0xbe, 0x2d,
+ 0x85, 0x16, 0xc8, 0x5b, 0xf3, 0x60, 0x33, 0xa0, 0x08, 0x9b,
+ 0x45, 0xd6, 0x7e, 0xed, 0xdf, 0x4c, 0xe4, 0x77, 0xa9, 0x3a,
+ 0x92, 0x01, 0xf6, 0x65, 0xcd, 0x5e, 0x80, 0x13, 0xbb, 0x28,
+ 0x1a, 0x89, 0x21, 0xb2, 0x6c, 0xff, 0x57, 0xc4, 0xa4, 0x37,
+ 0x9f, 0x0c, 0xd2, 0x41, 0xe9, 0x7a, 0x48, 0xdb, 0x73, 0xe0,
+ 0x3e, 0xad, 0x05, 0x96, 0x61, 0xf2, 0x5a, 0xc9, 0x17, 0x84,
+ 0x2c, 0xbf, 0x8d, 0x1e, 0xb6, 0x25, 0xfb, 0x68, 0xc0, 0x53,
+ 0x66, 0xf5, 0x5d, 0xce, 0x10, 0x83, 0x2b, 0xb8, 0x8a, 0x19,
+ 0xb1, 0x22, 0xfc, 0x6f, 0xc7, 0x54, 0xa3, 0x30, 0x98, 0x0b,
+ 0xd5, 0x46, 0xee, 0x7d, 0x4f, 0xdc, 0x74, 0xe7, 0x39, 0xaa,
+ 0x02, 0x91, 0xf1, 0x62, 0xca, 0x59, 0x87, 0x14, 0xbc, 0x2f,
+ 0x1d, 0x8e, 0x26, 0xb5, 0x6b, 0xf8, 0x50, 0xc3, 0x34, 0xa7,
+ 0x0f, 0x9c, 0x42, 0xd1, 0x79, 0xea, 0xd8, 0x4b, 0xe3, 0x70,
+ 0xae, 0x3d, 0x95, 0x06, 0x55, 0xc6, 0x6e, 0xfd, 0x23, 0xb0,
+ 0x18, 0x8b, 0xb9, 0x2a, 0x82, 0x11, 0xcf, 0x5c, 0xf4, 0x67,
+ 0x90, 0x03, 0xab, 0x38, 0xe6, 0x75, 0xdd, 0x4e, 0x7c, 0xef,
+ 0x47, 0xd4, 0x0a, 0x99, 0x31, 0xa2, 0xc2, 0x51, 0xf9, 0x6a,
+ 0xb4, 0x27, 0x8f, 0x1c, 0x2e, 0xbd, 0x15, 0x86, 0x58, 0xcb,
+ 0x63, 0xf0, 0x07, 0x94, 0x3c, 0xaf, 0x71, 0xe2, 0x4a, 0xd9,
+ 0xeb, 0x78, 0xd0, 0x43, 0x9d, 0x0e, 0xa6, 0x35, 0x00, 0x94,
+ 0x35, 0xa1, 0x6a, 0xfe, 0x5f, 0xcb, 0xd4, 0x40, 0xe1, 0x75,
+ 0xbe, 0x2a, 0x8b, 0x1f, 0xb5, 0x21, 0x80, 0x14, 0xdf, 0x4b,
+ 0xea, 0x7e, 0x61, 0xf5, 0x54, 0xc0, 0x0b, 0x9f, 0x3e, 0xaa,
+ 0x77, 0xe3, 0x42, 0xd6, 0x1d, 0x89, 0x28, 0xbc, 0xa3, 0x37,
+ 0x96, 0x02, 0xc9, 0x5d, 0xfc, 0x68, 0xc2, 0x56, 0xf7, 0x63,
+ 0xa8, 0x3c, 0x9d, 0x09, 0x16, 0x82, 0x23, 0xb7, 0x7c, 0xe8,
+ 0x49, 0xdd, 0xee, 0x7a, 0xdb, 0x4f, 0x84, 0x10, 0xb1, 0x25,
+ 0x3a, 0xae, 0x0f, 0x9b, 0x50, 0xc4, 0x65, 0xf1, 0x5b, 0xcf,
+ 0x6e, 0xfa, 0x31, 0xa5, 0x04, 0x90, 0x8f, 0x1b, 0xba, 0x2e,
+ 0xe5, 0x71, 0xd0, 0x44, 0x99, 0x0d, 0xac, 0x38, 0xf3, 0x67,
+ 0xc6, 0x52, 0x4d, 0xd9, 0x78, 0xec, 0x27, 0xb3, 0x12, 0x86,
+ 0x2c, 0xb8, 0x19, 0x8d, 0x46, 0xd2, 0x73, 0xe7, 0xf8, 0x6c,
+ 0xcd, 0x59, 0x92, 0x06, 0xa7, 0x33, 0xc1, 0x55, 0xf4, 0x60,
+ 0xab, 0x3f, 0x9e, 0x0a, 0x15, 0x81, 0x20, 0xb4, 0x7f, 0xeb,
+ 0x4a, 0xde, 0x74, 0xe0, 0x41, 0xd5, 0x1e, 0x8a, 0x2b, 0xbf,
+ 0xa0, 0x34, 0x95, 0x01, 0xca, 0x5e, 0xff, 0x6b, 0xb6, 0x22,
+ 0x83, 0x17, 0xdc, 0x48, 0xe9, 0x7d, 0x62, 0xf6, 0x57, 0xc3,
+ 0x08, 0x9c, 0x3d, 0xa9, 0x03, 0x97, 0x36, 0xa2, 0x69, 0xfd,
+ 0x5c, 0xc8, 0xd7, 0x43, 0xe2, 0x76, 0xbd, 0x29, 0x88, 0x1c,
+ 0x2f, 0xbb, 0x1a, 0x8e, 0x45, 0xd1, 0x70, 0xe4, 0xfb, 0x6f,
+ 0xce, 0x5a, 0x91, 0x05, 0xa4, 0x30, 0x9a, 0x0e, 0xaf, 0x3b,
+ 0xf0, 0x64, 0xc5, 0x51, 0x4e, 0xda, 0x7b, 0xef, 0x24, 0xb0,
+ 0x11, 0x85, 0x58, 0xcc, 0x6d, 0xf9, 0x32, 0xa6, 0x07, 0x93,
+ 0x8c, 0x18, 0xb9, 0x2d, 0xe6, 0x72, 0xd3, 0x47, 0xed, 0x79,
+ 0xd8, 0x4c, 0x87, 0x13, 0xb2, 0x26, 0x39, 0xad, 0x0c, 0x98,
+ 0x53, 0xc7, 0x66, 0xf2, 0x00, 0x95, 0x37, 0xa2, 0x6e, 0xfb,
+ 0x59, 0xcc, 0xdc, 0x49, 0xeb, 0x7e, 0xb2, 0x27, 0x85, 0x10,
+ 0xa5, 0x30, 0x92, 0x07, 0xcb, 0x5e, 0xfc, 0x69, 0x79, 0xec,
+ 0x4e, 0xdb, 0x17, 0x82, 0x20, 0xb5, 0x57, 0xc2, 0x60, 0xf5,
+ 0x39, 0xac, 0x0e, 0x9b, 0x8b, 0x1e, 0xbc, 0x29, 0xe5, 0x70,
+ 0xd2, 0x47, 0xf2, 0x67, 0xc5, 0x50, 0x9c, 0x09, 0xab, 0x3e,
+ 0x2e, 0xbb, 0x19, 0x8c, 0x40, 0xd5, 0x77, 0xe2, 0xae, 0x3b,
+ 0x99, 0x0c, 0xc0, 0x55, 0xf7, 0x62, 0x72, 0xe7, 0x45, 0xd0,
+ 0x1c, 0x89, 0x2b, 0xbe, 0x0b, 0x9e, 0x3c, 0xa9, 0x65, 0xf0,
+ 0x52, 0xc7, 0xd7, 0x42, 0xe0, 0x75, 0xb9, 0x2c, 0x8e, 0x1b,
+ 0xf9, 0x6c, 0xce, 0x5b, 0x97, 0x02, 0xa0, 0x35, 0x25, 0xb0,
+ 0x12, 0x87, 0x4b, 0xde, 0x7c, 0xe9, 0x5c, 0xc9, 0x6b, 0xfe,
+ 0x32, 0xa7, 0x05, 0x90, 0x80, 0x15, 0xb7, 0x22, 0xee, 0x7b,
+ 0xd9, 0x4c, 0x41, 0xd4, 0x76, 0xe3, 0x2f, 0xba, 0x18, 0x8d,
+ 0x9d, 0x08, 0xaa, 0x3f, 0xf3, 0x66, 0xc4, 0x51, 0xe4, 0x71,
+ 0xd3, 0x46, 0x8a, 0x1f, 0xbd, 0x28, 0x38, 0xad, 0x0f, 0x9a,
+ 0x56, 0xc3, 0x61, 0xf4, 0x16, 0x83, 0x21, 0xb4, 0x78, 0xed,
+ 0x4f, 0xda, 0xca, 0x5f, 0xfd, 0x68, 0xa4, 0x31, 0x93, 0x06,
+ 0xb3, 0x26, 0x84, 0x11, 0xdd, 0x48, 0xea, 0x7f, 0x6f, 0xfa,
+ 0x58, 0xcd, 0x01, 0x94, 0x36, 0xa3, 0xef, 0x7a, 0xd8, 0x4d,
+ 0x81, 0x14, 0xb6, 0x23, 0x33, 0xa6, 0x04, 0x91, 0x5d, 0xc8,
+ 0x6a, 0xff, 0x4a, 0xdf, 0x7d, 0xe8, 0x24, 0xb1, 0x13, 0x86,
+ 0x96, 0x03, 0xa1, 0x34, 0xf8, 0x6d, 0xcf, 0x5a, 0xb8, 0x2d,
+ 0x8f, 0x1a, 0xd6, 0x43, 0xe1, 0x74, 0x64, 0xf1, 0x53, 0xc6,
+ 0x0a, 0x9f, 0x3d, 0xa8, 0x1d, 0x88, 0x2a, 0xbf, 0x73, 0xe6,
+ 0x44, 0xd1, 0xc1, 0x54, 0xf6, 0x63, 0xaf, 0x3a, 0x98, 0x0d,
+ 0x00, 0x96, 0x31, 0xa7, 0x62, 0xf4, 0x53, 0xc5, 0xc4, 0x52,
+ 0xf5, 0x63, 0xa6, 0x30, 0x97, 0x01, 0x95, 0x03, 0xa4, 0x32,
+ 0xf7, 0x61, 0xc6, 0x50, 0x51, 0xc7, 0x60, 0xf6, 0x33, 0xa5,
+ 0x02, 0x94, 0x37, 0xa1, 0x06, 0x90, 0x55, 0xc3, 0x64, 0xf2,
+ 0xf3, 0x65, 0xc2, 0x54, 0x91, 0x07, 0xa0, 0x36, 0xa2, 0x34,
+ 0x93, 0x05, 0xc0, 0x56, 0xf1, 0x67, 0x66, 0xf0, 0x57, 0xc1,
+ 0x04, 0x92, 0x35, 0xa3, 0x6e, 0xf8, 0x5f, 0xc9, 0x0c, 0x9a,
+ 0x3d, 0xab, 0xaa, 0x3c, 0x9b, 0x0d, 0xc8, 0x5e, 0xf9, 0x6f,
+ 0xfb, 0x6d, 0xca, 0x5c, 0x99, 0x0f, 0xa8, 0x3e, 0x3f, 0xa9,
+ 0x0e, 0x98, 0x5d, 0xcb, 0x6c, 0xfa, 0x59, 0xcf, 0x68, 0xfe,
+ 0x3b, 0xad, 0x0a, 0x9c, 0x9d, 0x0b, 0xac, 0x3a, 0xff, 0x69,
+ 0xce, 0x58, 0xcc, 0x5a, 0xfd, 0x6b, 0xae, 0x38, 0x9f, 0x09,
+ 0x08, 0x9e, 0x39, 0xaf, 0x6a, 0xfc, 0x5b, 0xcd, 0xdc, 0x4a,
+ 0xed, 0x7b, 0xbe, 0x28, 0x8f, 0x19, 0x18, 0x8e, 0x29, 0xbf,
+ 0x7a, 0xec, 0x4b, 0xdd, 0x49, 0xdf, 0x78, 0xee, 0x2b, 0xbd,
+ 0x1a, 0x8c, 0x8d, 0x1b, 0xbc, 0x2a, 0xef, 0x79, 0xde, 0x48,
+ 0xeb, 0x7d, 0xda, 0x4c, 0x89, 0x1f, 0xb8, 0x2e, 0x2f, 0xb9,
+ 0x1e, 0x88, 0x4d, 0xdb, 0x7c, 0xea, 0x7e, 0xe8, 0x4f, 0xd9,
+ 0x1c, 0x8a, 0x2d, 0xbb, 0xba, 0x2c, 0x8b, 0x1d, 0xd8, 0x4e,
+ 0xe9, 0x7f, 0xb2, 0x24, 0x83, 0x15, 0xd0, 0x46, 0xe1, 0x77,
+ 0x76, 0xe0, 0x47, 0xd1, 0x14, 0x82, 0x25, 0xb3, 0x27, 0xb1,
+ 0x16, 0x80, 0x45, 0xd3, 0x74, 0xe2, 0xe3, 0x75, 0xd2, 0x44,
+ 0x81, 0x17, 0xb0, 0x26, 0x85, 0x13, 0xb4, 0x22, 0xe7, 0x71,
+ 0xd6, 0x40, 0x41, 0xd7, 0x70, 0xe6, 0x23, 0xb5, 0x12, 0x84,
+ 0x10, 0x86, 0x21, 0xb7, 0x72, 0xe4, 0x43, 0xd5, 0xd4, 0x42,
+ 0xe5, 0x73, 0xb6, 0x20, 0x87, 0x11, 0x00, 0x97, 0x33, 0xa4,
+ 0x66, 0xf1, 0x55, 0xc2, 0xcc, 0x5b, 0xff, 0x68, 0xaa, 0x3d,
+ 0x99, 0x0e, 0x85, 0x12, 0xb6, 0x21, 0xe3, 0x74, 0xd0, 0x47,
+ 0x49, 0xde, 0x7a, 0xed, 0x2f, 0xb8, 0x1c, 0x8b, 0x17, 0x80,
+ 0x24, 0xb3, 0x71, 0xe6, 0x42, 0xd5, 0xdb, 0x4c, 0xe8, 0x7f,
+ 0xbd, 0x2a, 0x8e, 0x19, 0x92, 0x05, 0xa1, 0x36, 0xf4, 0x63,
+ 0xc7, 0x50, 0x5e, 0xc9, 0x6d, 0xfa, 0x38, 0xaf, 0x0b, 0x9c,
+ 0x2e, 0xb9, 0x1d, 0x8a, 0x48, 0xdf, 0x7b, 0xec, 0xe2, 0x75,
+ 0xd1, 0x46, 0x84, 0x13, 0xb7, 0x20, 0xab, 0x3c, 0x98, 0x0f,
+ 0xcd, 0x5a, 0xfe, 0x69, 0x67, 0xf0, 0x54, 0xc3, 0x01, 0x96,
+ 0x32, 0xa5, 0x39, 0xae, 0x0a, 0x9d, 0x5f, 0xc8, 0x6c, 0xfb,
+ 0xf5, 0x62, 0xc6, 0x51, 0x93, 0x04, 0xa0, 0x37, 0xbc, 0x2b,
+ 0x8f, 0x18, 0xda, 0x4d, 0xe9, 0x7e, 0x70, 0xe7, 0x43, 0xd4,
+ 0x16, 0x81, 0x25, 0xb2, 0x5c, 0xcb, 0x6f, 0xf8, 0x3a, 0xad,
+ 0x09, 0x9e, 0x90, 0x07, 0xa3, 0x34, 0xf6, 0x61, 0xc5, 0x52,
+ 0xd9, 0x4e, 0xea, 0x7d, 0xbf, 0x28, 0x8c, 0x1b, 0x15, 0x82,
+ 0x26, 0xb1, 0x73, 0xe4, 0x40, 0xd7, 0x4b, 0xdc, 0x78, 0xef,
+ 0x2d, 0xba, 0x1e, 0x89, 0x87, 0x10, 0xb4, 0x23, 0xe1, 0x76,
+ 0xd2, 0x45, 0xce, 0x59, 0xfd, 0x6a, 0xa8, 0x3f, 0x9b, 0x0c,
+ 0x02, 0x95, 0x31, 0xa6, 0x64, 0xf3, 0x57, 0xc0, 0x72, 0xe5,
+ 0x41, 0xd6, 0x14, 0x83, 0x27, 0xb0, 0xbe, 0x29, 0x8d, 0x1a,
+ 0xd8, 0x4f, 0xeb, 0x7c, 0xf7, 0x60, 0xc4, 0x53, 0x91, 0x06,
+ 0xa2, 0x35, 0x3b, 0xac, 0x08, 0x9f, 0x5d, 0xca, 0x6e, 0xf9,
+ 0x65, 0xf2, 0x56, 0xc1, 0x03, 0x94, 0x30, 0xa7, 0xa9, 0x3e,
+ 0x9a, 0x0d, 0xcf, 0x58, 0xfc, 0x6b, 0xe0, 0x77, 0xd3, 0x44,
+ 0x86, 0x11, 0xb5, 0x22, 0x2c, 0xbb, 0x1f, 0x88, 0x4a, 0xdd,
+ 0x79, 0xee, 0x00, 0x98, 0x2d, 0xb5, 0x5a, 0xc2, 0x77, 0xef,
+ 0xb4, 0x2c, 0x99, 0x01, 0xee, 0x76, 0xc3, 0x5b, 0x75, 0xed,
+ 0x58, 0xc0, 0x2f, 0xb7, 0x02, 0x9a, 0xc1, 0x59, 0xec, 0x74,
+ 0x9b, 0x03, 0xb6, 0x2e, 0xea, 0x72, 0xc7, 0x5f, 0xb0, 0x28,
+ 0x9d, 0x05, 0x5e, 0xc6, 0x73, 0xeb, 0x04, 0x9c, 0x29, 0xb1,
+ 0x9f, 0x07, 0xb2, 0x2a, 0xc5, 0x5d, 0xe8, 0x70, 0x2b, 0xb3,
+ 0x06, 0x9e, 0x71, 0xe9, 0x5c, 0xc4, 0xc9, 0x51, 0xe4, 0x7c,
+ 0x93, 0x0b, 0xbe, 0x26, 0x7d, 0xe5, 0x50, 0xc8, 0x27, 0xbf,
+ 0x0a, 0x92, 0xbc, 0x24, 0x91, 0x09, 0xe6, 0x7e, 0xcb, 0x53,
+ 0x08, 0x90, 0x25, 0xbd, 0x52, 0xca, 0x7f, 0xe7, 0x23, 0xbb,
+ 0x0e, 0x96, 0x79, 0xe1, 0x54, 0xcc, 0x97, 0x0f, 0xba, 0x22,
+ 0xcd, 0x55, 0xe0, 0x78, 0x56, 0xce, 0x7b, 0xe3, 0x0c, 0x94,
+ 0x21, 0xb9, 0xe2, 0x7a, 0xcf, 0x57, 0xb8, 0x20, 0x95, 0x0d,
+ 0x8f, 0x17, 0xa2, 0x3a, 0xd5, 0x4d, 0xf8, 0x60, 0x3b, 0xa3,
+ 0x16, 0x8e, 0x61, 0xf9, 0x4c, 0xd4, 0xfa, 0x62, 0xd7, 0x4f,
+ 0xa0, 0x38, 0x8d, 0x15, 0x4e, 0xd6, 0x63, 0xfb, 0x14, 0x8c,
+ 0x39, 0xa1, 0x65, 0xfd, 0x48, 0xd0, 0x3f, 0xa7, 0x12, 0x8a,
+ 0xd1, 0x49, 0xfc, 0x64, 0x8b, 0x13, 0xa6, 0x3e, 0x10, 0x88,
+ 0x3d, 0xa5, 0x4a, 0xd2, 0x67, 0xff, 0xa4, 0x3c, 0x89, 0x11,
+ 0xfe, 0x66, 0xd3, 0x4b, 0x46, 0xde, 0x6b, 0xf3, 0x1c, 0x84,
+ 0x31, 0xa9, 0xf2, 0x6a, 0xdf, 0x47, 0xa8, 0x30, 0x85, 0x1d,
+ 0x33, 0xab, 0x1e, 0x86, 0x69, 0xf1, 0x44, 0xdc, 0x87, 0x1f,
+ 0xaa, 0x32, 0xdd, 0x45, 0xf0, 0x68, 0xac, 0x34, 0x81, 0x19,
+ 0xf6, 0x6e, 0xdb, 0x43, 0x18, 0x80, 0x35, 0xad, 0x42, 0xda,
+ 0x6f, 0xf7, 0xd9, 0x41, 0xf4, 0x6c, 0x83, 0x1b, 0xae, 0x36,
+ 0x6d, 0xf5, 0x40, 0xd8, 0x37, 0xaf, 0x1a, 0x82, 0x00, 0x99,
+ 0x2f, 0xb6, 0x5e, 0xc7, 0x71, 0xe8, 0xbc, 0x25, 0x93, 0x0a,
+ 0xe2, 0x7b, 0xcd, 0x54, 0x65, 0xfc, 0x4a, 0xd3, 0x3b, 0xa2,
+ 0x14, 0x8d, 0xd9, 0x40, 0xf6, 0x6f, 0x87, 0x1e, 0xa8, 0x31,
+ 0xca, 0x53, 0xe5, 0x7c, 0x94, 0x0d, 0xbb, 0x22, 0x76, 0xef,
+ 0x59, 0xc0, 0x28, 0xb1, 0x07, 0x9e, 0xaf, 0x36, 0x80, 0x19,
+ 0xf1, 0x68, 0xde, 0x47, 0x13, 0x8a, 0x3c, 0xa5, 0x4d, 0xd4,
+ 0x62, 0xfb, 0x89, 0x10, 0xa6, 0x3f, 0xd7, 0x4e, 0xf8, 0x61,
+ 0x35, 0xac, 0x1a, 0x83, 0x6b, 0xf2, 0x44, 0xdd, 0xec, 0x75,
+ 0xc3, 0x5a, 0xb2, 0x2b, 0x9d, 0x04, 0x50, 0xc9, 0x7f, 0xe6,
+ 0x0e, 0x97, 0x21, 0xb8, 0x43, 0xda, 0x6c, 0xf5, 0x1d, 0x84,
+ 0x32, 0xab, 0xff, 0x66, 0xd0, 0x49, 0xa1, 0x38, 0x8e, 0x17,
+ 0x26, 0xbf, 0x09, 0x90, 0x78, 0xe1, 0x57, 0xce, 0x9a, 0x03,
+ 0xb5, 0x2c, 0xc4, 0x5d, 0xeb, 0x72, 0x0f, 0x96, 0x20, 0xb9,
+ 0x51, 0xc8, 0x7e, 0xe7, 0xb3, 0x2a, 0x9c, 0x05, 0xed, 0x74,
+ 0xc2, 0x5b, 0x6a, 0xf3, 0x45, 0xdc, 0x34, 0xad, 0x1b, 0x82,
+ 0xd6, 0x4f, 0xf9, 0x60, 0x88, 0x11, 0xa7, 0x3e, 0xc5, 0x5c,
+ 0xea, 0x73, 0x9b, 0x02, 0xb4, 0x2d, 0x79, 0xe0, 0x56, 0xcf,
+ 0x27, 0xbe, 0x08, 0x91, 0xa0, 0x39, 0x8f, 0x16, 0xfe, 0x67,
+ 0xd1, 0x48, 0x1c, 0x85, 0x33, 0xaa, 0x42, 0xdb, 0x6d, 0xf4,
+ 0x86, 0x1f, 0xa9, 0x30, 0xd8, 0x41, 0xf7, 0x6e, 0x3a, 0xa3,
+ 0x15, 0x8c, 0x64, 0xfd, 0x4b, 0xd2, 0xe3, 0x7a, 0xcc, 0x55,
+ 0xbd, 0x24, 0x92, 0x0b, 0x5f, 0xc6, 0x70, 0xe9, 0x01, 0x98,
+ 0x2e, 0xb7, 0x4c, 0xd5, 0x63, 0xfa, 0x12, 0x8b, 0x3d, 0xa4,
+ 0xf0, 0x69, 0xdf, 0x46, 0xae, 0x37, 0x81, 0x18, 0x29, 0xb0,
+ 0x06, 0x9f, 0x77, 0xee, 0x58, 0xc1, 0x95, 0x0c, 0xba, 0x23,
+ 0xcb, 0x52, 0xe4, 0x7d, 0x00, 0x9a, 0x29, 0xb3, 0x52, 0xc8,
+ 0x7b, 0xe1, 0xa4, 0x3e, 0x8d, 0x17, 0xf6, 0x6c, 0xdf, 0x45,
+ 0x55, 0xcf, 0x7c, 0xe6, 0x07, 0x9d, 0x2e, 0xb4, 0xf1, 0x6b,
+ 0xd8, 0x42, 0xa3, 0x39, 0x8a, 0x10, 0xaa, 0x30, 0x83, 0x19,
+ 0xf8, 0x62, 0xd1, 0x4b, 0x0e, 0x94, 0x27, 0xbd, 0x5c, 0xc6,
+ 0x75, 0xef, 0xff, 0x65, 0xd6, 0x4c, 0xad, 0x37, 0x84, 0x1e,
+ 0x5b, 0xc1, 0x72, 0xe8, 0x09, 0x93, 0x20, 0xba, 0x49, 0xd3,
+ 0x60, 0xfa, 0x1b, 0x81, 0x32, 0xa8, 0xed, 0x77, 0xc4, 0x5e,
+ 0xbf, 0x25, 0x96, 0x0c, 0x1c, 0x86, 0x35, 0xaf, 0x4e, 0xd4,
+ 0x67, 0xfd, 0xb8, 0x22, 0x91, 0x0b, 0xea, 0x70, 0xc3, 0x59,
+ 0xe3, 0x79, 0xca, 0x50, 0xb1, 0x2b, 0x98, 0x02, 0x47, 0xdd,
+ 0x6e, 0xf4, 0x15, 0x8f, 0x3c, 0xa6, 0xb6, 0x2c, 0x9f, 0x05,
+ 0xe4, 0x7e, 0xcd, 0x57, 0x12, 0x88, 0x3b, 0xa1, 0x40, 0xda,
+ 0x69, 0xf3, 0x92, 0x08, 0xbb, 0x21, 0xc0, 0x5a, 0xe9, 0x73,
+ 0x36, 0xac, 0x1f, 0x85, 0x64, 0xfe, 0x4d, 0xd7, 0xc7, 0x5d,
+ 0xee, 0x74, 0x95, 0x0f, 0xbc, 0x26, 0x63, 0xf9, 0x4a, 0xd0,
+ 0x31, 0xab, 0x18, 0x82, 0x38, 0xa2, 0x11, 0x8b, 0x6a, 0xf0,
+ 0x43, 0xd9, 0x9c, 0x06, 0xb5, 0x2f, 0xce, 0x54, 0xe7, 0x7d,
+ 0x6d, 0xf7, 0x44, 0xde, 0x3f, 0xa5, 0x16, 0x8c, 0xc9, 0x53,
+ 0xe0, 0x7a, 0x9b, 0x01, 0xb2, 0x28, 0xdb, 0x41, 0xf2, 0x68,
+ 0x89, 0x13, 0xa0, 0x3a, 0x7f, 0xe5, 0x56, 0xcc, 0x2d, 0xb7,
+ 0x04, 0x9e, 0x8e, 0x14, 0xa7, 0x3d, 0xdc, 0x46, 0xf5, 0x6f,
+ 0x2a, 0xb0, 0x03, 0x99, 0x78, 0xe2, 0x51, 0xcb, 0x71, 0xeb,
+ 0x58, 0xc2, 0x23, 0xb9, 0x0a, 0x90, 0xd5, 0x4f, 0xfc, 0x66,
+ 0x87, 0x1d, 0xae, 0x34, 0x24, 0xbe, 0x0d, 0x97, 0x76, 0xec,
+ 0x5f, 0xc5, 0x80, 0x1a, 0xa9, 0x33, 0xd2, 0x48, 0xfb, 0x61,
+ 0x00, 0x9b, 0x2b, 0xb0, 0x56, 0xcd, 0x7d, 0xe6, 0xac, 0x37,
+ 0x87, 0x1c, 0xfa, 0x61, 0xd1, 0x4a, 0x45, 0xde, 0x6e, 0xf5,
+ 0x13, 0x88, 0x38, 0xa3, 0xe9, 0x72, 0xc2, 0x59, 0xbf, 0x24,
+ 0x94, 0x0f, 0x8a, 0x11, 0xa1, 0x3a, 0xdc, 0x47, 0xf7, 0x6c,
+ 0x26, 0xbd, 0x0d, 0x96, 0x70, 0xeb, 0x5b, 0xc0, 0xcf, 0x54,
+ 0xe4, 0x7f, 0x99, 0x02, 0xb2, 0x29, 0x63, 0xf8, 0x48, 0xd3,
+ 0x35, 0xae, 0x1e, 0x85, 0x09, 0x92, 0x22, 0xb9, 0x5f, 0xc4,
+ 0x74, 0xef, 0xa5, 0x3e, 0x8e, 0x15, 0xf3, 0x68, 0xd8, 0x43,
+ 0x4c, 0xd7, 0x67, 0xfc, 0x1a, 0x81, 0x31, 0xaa, 0xe0, 0x7b,
+ 0xcb, 0x50, 0xb6, 0x2d, 0x9d, 0x06, 0x83, 0x18, 0xa8, 0x33,
+ 0xd5, 0x4e, 0xfe, 0x65, 0x2f, 0xb4, 0x04, 0x9f, 0x79, 0xe2,
+ 0x52, 0xc9, 0xc6, 0x5d, 0xed, 0x76, 0x90, 0x0b, 0xbb, 0x20,
+ 0x6a, 0xf1, 0x41, 0xda, 0x3c, 0xa7, 0x17, 0x8c, 0x12, 0x89,
+ 0x39, 0xa2, 0x44, 0xdf, 0x6f, 0xf4, 0xbe, 0x25, 0x95, 0x0e,
+ 0xe8, 0x73, 0xc3, 0x58, 0x57, 0xcc, 0x7c, 0xe7, 0x01, 0x9a,
+ 0x2a, 0xb1, 0xfb, 0x60, 0xd0, 0x4b, 0xad, 0x36, 0x86, 0x1d,
+ 0x98, 0x03, 0xb3, 0x28, 0xce, 0x55, 0xe5, 0x7e, 0x34, 0xaf,
+ 0x1f, 0x84, 0x62, 0xf9, 0x49, 0xd2, 0xdd, 0x46, 0xf6, 0x6d,
+ 0x8b, 0x10, 0xa0, 0x3b, 0x71, 0xea, 0x5a, 0xc1, 0x27, 0xbc,
+ 0x0c, 0x97, 0x1b, 0x80, 0x30, 0xab, 0x4d, 0xd6, 0x66, 0xfd,
+ 0xb7, 0x2c, 0x9c, 0x07, 0xe1, 0x7a, 0xca, 0x51, 0x5e, 0xc5,
+ 0x75, 0xee, 0x08, 0x93, 0x23, 0xb8, 0xf2, 0x69, 0xd9, 0x42,
+ 0xa4, 0x3f, 0x8f, 0x14, 0x91, 0x0a, 0xba, 0x21, 0xc7, 0x5c,
+ 0xec, 0x77, 0x3d, 0xa6, 0x16, 0x8d, 0x6b, 0xf0, 0x40, 0xdb,
+ 0xd4, 0x4f, 0xff, 0x64, 0x82, 0x19, 0xa9, 0x32, 0x78, 0xe3,
+ 0x53, 0xc8, 0x2e, 0xb5, 0x05, 0x9e, 0x00, 0x9c, 0x25, 0xb9,
+ 0x4a, 0xd6, 0x6f, 0xf3, 0x94, 0x08, 0xb1, 0x2d, 0xde, 0x42,
+ 0xfb, 0x67, 0x35, 0xa9, 0x10, 0x8c, 0x7f, 0xe3, 0x5a, 0xc6,
+ 0xa1, 0x3d, 0x84, 0x18, 0xeb, 0x77, 0xce, 0x52, 0x6a, 0xf6,
+ 0x4f, 0xd3, 0x20, 0xbc, 0x05, 0x99, 0xfe, 0x62, 0xdb, 0x47,
+ 0xb4, 0x28, 0x91, 0x0d, 0x5f, 0xc3, 0x7a, 0xe6, 0x15, 0x89,
+ 0x30, 0xac, 0xcb, 0x57, 0xee, 0x72, 0x81, 0x1d, 0xa4, 0x38,
+ 0xd4, 0x48, 0xf1, 0x6d, 0x9e, 0x02, 0xbb, 0x27, 0x40, 0xdc,
+ 0x65, 0xf9, 0x0a, 0x96, 0x2f, 0xb3, 0xe1, 0x7d, 0xc4, 0x58,
+ 0xab, 0x37, 0x8e, 0x12, 0x75, 0xe9, 0x50, 0xcc, 0x3f, 0xa3,
+ 0x1a, 0x86, 0xbe, 0x22, 0x9b, 0x07, 0xf4, 0x68, 0xd1, 0x4d,
+ 0x2a, 0xb6, 0x0f, 0x93, 0x60, 0xfc, 0x45, 0xd9, 0x8b, 0x17,
+ 0xae, 0x32, 0xc1, 0x5d, 0xe4, 0x78, 0x1f, 0x83, 0x3a, 0xa6,
+ 0x55, 0xc9, 0x70, 0xec, 0xb5, 0x29, 0x90, 0x0c, 0xff, 0x63,
+ 0xda, 0x46, 0x21, 0xbd, 0x04, 0x98, 0x6b, 0xf7, 0x4e, 0xd2,
+ 0x80, 0x1c, 0xa5, 0x39, 0xca, 0x56, 0xef, 0x73, 0x14, 0x88,
+ 0x31, 0xad, 0x5e, 0xc2, 0x7b, 0xe7, 0xdf, 0x43, 0xfa, 0x66,
+ 0x95, 0x09, 0xb0, 0x2c, 0x4b, 0xd7, 0x6e, 0xf2, 0x01, 0x9d,
+ 0x24, 0xb8, 0xea, 0x76, 0xcf, 0x53, 0xa0, 0x3c, 0x85, 0x19,
+ 0x7e, 0xe2, 0x5b, 0xc7, 0x34, 0xa8, 0x11, 0x8d, 0x61, 0xfd,
+ 0x44, 0xd8, 0x2b, 0xb7, 0x0e, 0x92, 0xf5, 0x69, 0xd0, 0x4c,
+ 0xbf, 0x23, 0x9a, 0x06, 0x54, 0xc8, 0x71, 0xed, 0x1e, 0x82,
+ 0x3b, 0xa7, 0xc0, 0x5c, 0xe5, 0x79, 0x8a, 0x16, 0xaf, 0x33,
+ 0x0b, 0x97, 0x2e, 0xb2, 0x41, 0xdd, 0x64, 0xf8, 0x9f, 0x03,
+ 0xba, 0x26, 0xd5, 0x49, 0xf0, 0x6c, 0x3e, 0xa2, 0x1b, 0x87,
+ 0x74, 0xe8, 0x51, 0xcd, 0xaa, 0x36, 0x8f, 0x13, 0xe0, 0x7c,
+ 0xc5, 0x59, 0x00, 0x9d, 0x27, 0xba, 0x4e, 0xd3, 0x69, 0xf4,
+ 0x9c, 0x01, 0xbb, 0x26, 0xd2, 0x4f, 0xf5, 0x68, 0x25, 0xb8,
+ 0x02, 0x9f, 0x6b, 0xf6, 0x4c, 0xd1, 0xb9, 0x24, 0x9e, 0x03,
+ 0xf7, 0x6a, 0xd0, 0x4d, 0x4a, 0xd7, 0x6d, 0xf0, 0x04, 0x99,
+ 0x23, 0xbe, 0xd6, 0x4b, 0xf1, 0x6c, 0x98, 0x05, 0xbf, 0x22,
+ 0x6f, 0xf2, 0x48, 0xd5, 0x21, 0xbc, 0x06, 0x9b, 0xf3, 0x6e,
+ 0xd4, 0x49, 0xbd, 0x20, 0x9a, 0x07, 0x94, 0x09, 0xb3, 0x2e,
+ 0xda, 0x47, 0xfd, 0x60, 0x08, 0x95, 0x2f, 0xb2, 0x46, 0xdb,
+ 0x61, 0xfc, 0xb1, 0x2c, 0x96, 0x0b, 0xff, 0x62, 0xd8, 0x45,
+ 0x2d, 0xb0, 0x0a, 0x97, 0x63, 0xfe, 0x44, 0xd9, 0xde, 0x43,
+ 0xf9, 0x64, 0x90, 0x0d, 0xb7, 0x2a, 0x42, 0xdf, 0x65, 0xf8,
+ 0x0c, 0x91, 0x2b, 0xb6, 0xfb, 0x66, 0xdc, 0x41, 0xb5, 0x28,
+ 0x92, 0x0f, 0x67, 0xfa, 0x40, 0xdd, 0x29, 0xb4, 0x0e, 0x93,
+ 0x35, 0xa8, 0x12, 0x8f, 0x7b, 0xe6, 0x5c, 0xc1, 0xa9, 0x34,
+ 0x8e, 0x13, 0xe7, 0x7a, 0xc0, 0x5d, 0x10, 0x8d, 0x37, 0xaa,
+ 0x5e, 0xc3, 0x79, 0xe4, 0x8c, 0x11, 0xab, 0x36, 0xc2, 0x5f,
+ 0xe5, 0x78, 0x7f, 0xe2, 0x58, 0xc5, 0x31, 0xac, 0x16, 0x8b,
+ 0xe3, 0x7e, 0xc4, 0x59, 0xad, 0x30, 0x8a, 0x17, 0x5a, 0xc7,
+ 0x7d, 0xe0, 0x14, 0x89, 0x33, 0xae, 0xc6, 0x5b, 0xe1, 0x7c,
+ 0x88, 0x15, 0xaf, 0x32, 0xa1, 0x3c, 0x86, 0x1b, 0xef, 0x72,
+ 0xc8, 0x55, 0x3d, 0xa0, 0x1a, 0x87, 0x73, 0xee, 0x54, 0xc9,
+ 0x84, 0x19, 0xa3, 0x3e, 0xca, 0x57, 0xed, 0x70, 0x18, 0x85,
+ 0x3f, 0xa2, 0x56, 0xcb, 0x71, 0xec, 0xeb, 0x76, 0xcc, 0x51,
+ 0xa5, 0x38, 0x82, 0x1f, 0x77, 0xea, 0x50, 0xcd, 0x39, 0xa4,
+ 0x1e, 0x83, 0xce, 0x53, 0xe9, 0x74, 0x80, 0x1d, 0xa7, 0x3a,
+ 0x52, 0xcf, 0x75, 0xe8, 0x1c, 0x81, 0x3b, 0xa6, 0x00, 0x9e,
+ 0x21, 0xbf, 0x42, 0xdc, 0x63, 0xfd, 0x84, 0x1a, 0xa5, 0x3b,
+ 0xc6, 0x58, 0xe7, 0x79, 0x15, 0x8b, 0x34, 0xaa, 0x57, 0xc9,
+ 0x76, 0xe8, 0x91, 0x0f, 0xb0, 0x2e, 0xd3, 0x4d, 0xf2, 0x6c,
+ 0x2a, 0xb4, 0x0b, 0x95, 0x68, 0xf6, 0x49, 0xd7, 0xae, 0x30,
+ 0x8f, 0x11, 0xec, 0x72, 0xcd, 0x53, 0x3f, 0xa1, 0x1e, 0x80,
+ 0x7d, 0xe3, 0x5c, 0xc2, 0xbb, 0x25, 0x9a, 0x04, 0xf9, 0x67,
+ 0xd8, 0x46, 0x54, 0xca, 0x75, 0xeb, 0x16, 0x88, 0x37, 0xa9,
+ 0xd0, 0x4e, 0xf1, 0x6f, 0x92, 0x0c, 0xb3, 0x2d, 0x41, 0xdf,
+ 0x60, 0xfe, 0x03, 0x9d, 0x22, 0xbc, 0xc5, 0x5b, 0xe4, 0x7a,
+ 0x87, 0x19, 0xa6, 0x38, 0x7e, 0xe0, 0x5f, 0xc1, 0x3c, 0xa2,
+ 0x1d, 0x83, 0xfa, 0x64, 0xdb, 0x45, 0xb8, 0x26, 0x99, 0x07,
+ 0x6b, 0xf5, 0x4a, 0xd4, 0x29, 0xb7, 0x08, 0x96, 0xef, 0x71,
+ 0xce, 0x50, 0xad, 0x33, 0x8c, 0x12, 0xa8, 0x36, 0x89, 0x17,
+ 0xea, 0x74, 0xcb, 0x55, 0x2c, 0xb2, 0x0d, 0x93, 0x6e, 0xf0,
+ 0x4f, 0xd1, 0xbd, 0x23, 0x9c, 0x02, 0xff, 0x61, 0xde, 0x40,
+ 0x39, 0xa7, 0x18, 0x86, 0x7b, 0xe5, 0x5a, 0xc4, 0x82, 0x1c,
+ 0xa3, 0x3d, 0xc0, 0x5e, 0xe1, 0x7f, 0x06, 0x98, 0x27, 0xb9,
+ 0x44, 0xda, 0x65, 0xfb, 0x97, 0x09, 0xb6, 0x28, 0xd5, 0x4b,
+ 0xf4, 0x6a, 0x13, 0x8d, 0x32, 0xac, 0x51, 0xcf, 0x70, 0xee,
+ 0xfc, 0x62, 0xdd, 0x43, 0xbe, 0x20, 0x9f, 0x01, 0x78, 0xe6,
+ 0x59, 0xc7, 0x3a, 0xa4, 0x1b, 0x85, 0xe9, 0x77, 0xc8, 0x56,
+ 0xab, 0x35, 0x8a, 0x14, 0x6d, 0xf3, 0x4c, 0xd2, 0x2f, 0xb1,
+ 0x0e, 0x90, 0xd6, 0x48, 0xf7, 0x69, 0x94, 0x0a, 0xb5, 0x2b,
+ 0x52, 0xcc, 0x73, 0xed, 0x10, 0x8e, 0x31, 0xaf, 0xc3, 0x5d,
+ 0xe2, 0x7c, 0x81, 0x1f, 0xa0, 0x3e, 0x47, 0xd9, 0x66, 0xf8,
+ 0x05, 0x9b, 0x24, 0xba, 0x00, 0x9f, 0x23, 0xbc, 0x46, 0xd9,
+ 0x65, 0xfa, 0x8c, 0x13, 0xaf, 0x30, 0xca, 0x55, 0xe9, 0x76,
+ 0x05, 0x9a, 0x26, 0xb9, 0x43, 0xdc, 0x60, 0xff, 0x89, 0x16,
+ 0xaa, 0x35, 0xcf, 0x50, 0xec, 0x73, 0x0a, 0x95, 0x29, 0xb6,
+ 0x4c, 0xd3, 0x6f, 0xf0, 0x86, 0x19, 0xa5, 0x3a, 0xc0, 0x5f,
+ 0xe3, 0x7c, 0x0f, 0x90, 0x2c, 0xb3, 0x49, 0xd6, 0x6a, 0xf5,
+ 0x83, 0x1c, 0xa0, 0x3f, 0xc5, 0x5a, 0xe6, 0x79, 0x14, 0x8b,
+ 0x37, 0xa8, 0x52, 0xcd, 0x71, 0xee, 0x98, 0x07, 0xbb, 0x24,
+ 0xde, 0x41, 0xfd, 0x62, 0x11, 0x8e, 0x32, 0xad, 0x57, 0xc8,
+ 0x74, 0xeb, 0x9d, 0x02, 0xbe, 0x21, 0xdb, 0x44, 0xf8, 0x67,
+ 0x1e, 0x81, 0x3d, 0xa2, 0x58, 0xc7, 0x7b, 0xe4, 0x92, 0x0d,
+ 0xb1, 0x2e, 0xd4, 0x4b, 0xf7, 0x68, 0x1b, 0x84, 0x38, 0xa7,
+ 0x5d, 0xc2, 0x7e, 0xe1, 0x97, 0x08, 0xb4, 0x2b, 0xd1, 0x4e,
+ 0xf2, 0x6d, 0x28, 0xb7, 0x0b, 0x94, 0x6e, 0xf1, 0x4d, 0xd2,
+ 0xa4, 0x3b, 0x87, 0x18, 0xe2, 0x7d, 0xc1, 0x5e, 0x2d, 0xb2,
+ 0x0e, 0x91, 0x6b, 0xf4, 0x48, 0xd7, 0xa1, 0x3e, 0x82, 0x1d,
+ 0xe7, 0x78, 0xc4, 0x5b, 0x22, 0xbd, 0x01, 0x9e, 0x64, 0xfb,
+ 0x47, 0xd8, 0xae, 0x31, 0x8d, 0x12, 0xe8, 0x77, 0xcb, 0x54,
+ 0x27, 0xb8, 0x04, 0x9b, 0x61, 0xfe, 0x42, 0xdd, 0xab, 0x34,
+ 0x88, 0x17, 0xed, 0x72, 0xce, 0x51, 0x3c, 0xa3, 0x1f, 0x80,
+ 0x7a, 0xe5, 0x59, 0xc6, 0xb0, 0x2f, 0x93, 0x0c, 0xf6, 0x69,
+ 0xd5, 0x4a, 0x39, 0xa6, 0x1a, 0x85, 0x7f, 0xe0, 0x5c, 0xc3,
+ 0xb5, 0x2a, 0x96, 0x09, 0xf3, 0x6c, 0xd0, 0x4f, 0x36, 0xa9,
+ 0x15, 0x8a, 0x70, 0xef, 0x53, 0xcc, 0xba, 0x25, 0x99, 0x06,
+ 0xfc, 0x63, 0xdf, 0x40, 0x33, 0xac, 0x10, 0x8f, 0x75, 0xea,
+ 0x56, 0xc9, 0xbf, 0x20, 0x9c, 0x03, 0xf9, 0x66, 0xda, 0x45,
+ 0x00, 0xa0, 0x5d, 0xfd, 0xba, 0x1a, 0xe7, 0x47, 0x69, 0xc9,
+ 0x34, 0x94, 0xd3, 0x73, 0x8e, 0x2e, 0xd2, 0x72, 0x8f, 0x2f,
+ 0x68, 0xc8, 0x35, 0x95, 0xbb, 0x1b, 0xe6, 0x46, 0x01, 0xa1,
+ 0x5c, 0xfc, 0xb9, 0x19, 0xe4, 0x44, 0x03, 0xa3, 0x5e, 0xfe,
+ 0xd0, 0x70, 0x8d, 0x2d, 0x6a, 0xca, 0x37, 0x97, 0x6b, 0xcb,
+ 0x36, 0x96, 0xd1, 0x71, 0x8c, 0x2c, 0x02, 0xa2, 0x5f, 0xff,
+ 0xb8, 0x18, 0xe5, 0x45, 0x6f, 0xcf, 0x32, 0x92, 0xd5, 0x75,
+ 0x88, 0x28, 0x06, 0xa6, 0x5b, 0xfb, 0xbc, 0x1c, 0xe1, 0x41,
+ 0xbd, 0x1d, 0xe0, 0x40, 0x07, 0xa7, 0x5a, 0xfa, 0xd4, 0x74,
+ 0x89, 0x29, 0x6e, 0xce, 0x33, 0x93, 0xd6, 0x76, 0x8b, 0x2b,
+ 0x6c, 0xcc, 0x31, 0x91, 0xbf, 0x1f, 0xe2, 0x42, 0x05, 0xa5,
+ 0x58, 0xf8, 0x04, 0xa4, 0x59, 0xf9, 0xbe, 0x1e, 0xe3, 0x43,
+ 0x6d, 0xcd, 0x30, 0x90, 0xd7, 0x77, 0x8a, 0x2a, 0xde, 0x7e,
+ 0x83, 0x23, 0x64, 0xc4, 0x39, 0x99, 0xb7, 0x17, 0xea, 0x4a,
+ 0x0d, 0xad, 0x50, 0xf0, 0x0c, 0xac, 0x51, 0xf1, 0xb6, 0x16,
+ 0xeb, 0x4b, 0x65, 0xc5, 0x38, 0x98, 0xdf, 0x7f, 0x82, 0x22,
+ 0x67, 0xc7, 0x3a, 0x9a, 0xdd, 0x7d, 0x80, 0x20, 0x0e, 0xae,
+ 0x53, 0xf3, 0xb4, 0x14, 0xe9, 0x49, 0xb5, 0x15, 0xe8, 0x48,
+ 0x0f, 0xaf, 0x52, 0xf2, 0xdc, 0x7c, 0x81, 0x21, 0x66, 0xc6,
+ 0x3b, 0x9b, 0xb1, 0x11, 0xec, 0x4c, 0x0b, 0xab, 0x56, 0xf6,
+ 0xd8, 0x78, 0x85, 0x25, 0x62, 0xc2, 0x3f, 0x9f, 0x63, 0xc3,
+ 0x3e, 0x9e, 0xd9, 0x79, 0x84, 0x24, 0x0a, 0xaa, 0x57, 0xf7,
+ 0xb0, 0x10, 0xed, 0x4d, 0x08, 0xa8, 0x55, 0xf5, 0xb2, 0x12,
+ 0xef, 0x4f, 0x61, 0xc1, 0x3c, 0x9c, 0xdb, 0x7b, 0x86, 0x26,
+ 0xda, 0x7a, 0x87, 0x27, 0x60, 0xc0, 0x3d, 0x9d, 0xb3, 0x13,
+ 0xee, 0x4e, 0x09, 0xa9, 0x54, 0xf4, 0x00, 0xa1, 0x5f, 0xfe,
+ 0xbe, 0x1f, 0xe1, 0x40, 0x61, 0xc0, 0x3e, 0x9f, 0xdf, 0x7e,
+ 0x80, 0x21, 0xc2, 0x63, 0x9d, 0x3c, 0x7c, 0xdd, 0x23, 0x82,
+ 0xa3, 0x02, 0xfc, 0x5d, 0x1d, 0xbc, 0x42, 0xe3, 0x99, 0x38,
+ 0xc6, 0x67, 0x27, 0x86, 0x78, 0xd9, 0xf8, 0x59, 0xa7, 0x06,
+ 0x46, 0xe7, 0x19, 0xb8, 0x5b, 0xfa, 0x04, 0xa5, 0xe5, 0x44,
+ 0xba, 0x1b, 0x3a, 0x9b, 0x65, 0xc4, 0x84, 0x25, 0xdb, 0x7a,
+ 0x2f, 0x8e, 0x70, 0xd1, 0x91, 0x30, 0xce, 0x6f, 0x4e, 0xef,
+ 0x11, 0xb0, 0xf0, 0x51, 0xaf, 0x0e, 0xed, 0x4c, 0xb2, 0x13,
+ 0x53, 0xf2, 0x0c, 0xad, 0x8c, 0x2d, 0xd3, 0x72, 0x32, 0x93,
+ 0x6d, 0xcc, 0xb6, 0x17, 0xe9, 0x48, 0x08, 0xa9, 0x57, 0xf6,
+ 0xd7, 0x76, 0x88, 0x29, 0x69, 0xc8, 0x36, 0x97, 0x74, 0xd5,
+ 0x2b, 0x8a, 0xca, 0x6b, 0x95, 0x34, 0x15, 0xb4, 0x4a, 0xeb,
+ 0xab, 0x0a, 0xf4, 0x55, 0x5e, 0xff, 0x01, 0xa0, 0xe0, 0x41,
+ 0xbf, 0x1e, 0x3f, 0x9e, 0x60, 0xc1, 0x81, 0x20, 0xde, 0x7f,
+ 0x9c, 0x3d, 0xc3, 0x62, 0x22, 0x83, 0x7d, 0xdc, 0xfd, 0x5c,
+ 0xa2, 0x03, 0x43, 0xe2, 0x1c, 0xbd, 0xc7, 0x66, 0x98, 0x39,
+ 0x79, 0xd8, 0x26, 0x87, 0xa6, 0x07, 0xf9, 0x58, 0x18, 0xb9,
+ 0x47, 0xe6, 0x05, 0xa4, 0x5a, 0xfb, 0xbb, 0x1a, 0xe4, 0x45,
+ 0x64, 0xc5, 0x3b, 0x9a, 0xda, 0x7b, 0x85, 0x24, 0x71, 0xd0,
+ 0x2e, 0x8f, 0xcf, 0x6e, 0x90, 0x31, 0x10, 0xb1, 0x4f, 0xee,
+ 0xae, 0x0f, 0xf1, 0x50, 0xb3, 0x12, 0xec, 0x4d, 0x0d, 0xac,
+ 0x52, 0xf3, 0xd2, 0x73, 0x8d, 0x2c, 0x6c, 0xcd, 0x33, 0x92,
+ 0xe8, 0x49, 0xb7, 0x16, 0x56, 0xf7, 0x09, 0xa8, 0x89, 0x28,
+ 0xd6, 0x77, 0x37, 0x96, 0x68, 0xc9, 0x2a, 0x8b, 0x75, 0xd4,
+ 0x94, 0x35, 0xcb, 0x6a, 0x4b, 0xea, 0x14, 0xb5, 0xf5, 0x54,
+ 0xaa, 0x0b, 0x00, 0xa2, 0x59, 0xfb, 0xb2, 0x10, 0xeb, 0x49,
+ 0x79, 0xdb, 0x20, 0x82, 0xcb, 0x69, 0x92, 0x30, 0xf2, 0x50,
+ 0xab, 0x09, 0x40, 0xe2, 0x19, 0xbb, 0x8b, 0x29, 0xd2, 0x70,
+ 0x39, 0x9b, 0x60, 0xc2, 0xf9, 0x5b, 0xa0, 0x02, 0x4b, 0xe9,
+ 0x12, 0xb0, 0x80, 0x22, 0xd9, 0x7b, 0x32, 0x90, 0x6b, 0xc9,
+ 0x0b, 0xa9, 0x52, 0xf0, 0xb9, 0x1b, 0xe0, 0x42, 0x72, 0xd0,
+ 0x2b, 0x89, 0xc0, 0x62, 0x99, 0x3b, 0xef, 0x4d, 0xb6, 0x14,
+ 0x5d, 0xff, 0x04, 0xa6, 0x96, 0x34, 0xcf, 0x6d, 0x24, 0x86,
+ 0x7d, 0xdf, 0x1d, 0xbf, 0x44, 0xe6, 0xaf, 0x0d, 0xf6, 0x54,
+ 0x64, 0xc6, 0x3d, 0x9f, 0xd6, 0x74, 0x8f, 0x2d, 0x16, 0xb4,
+ 0x4f, 0xed, 0xa4, 0x06, 0xfd, 0x5f, 0x6f, 0xcd, 0x36, 0x94,
+ 0xdd, 0x7f, 0x84, 0x26, 0xe4, 0x46, 0xbd, 0x1f, 0x56, 0xf4,
+ 0x0f, 0xad, 0x9d, 0x3f, 0xc4, 0x66, 0x2f, 0x8d, 0x76, 0xd4,
+ 0xc3, 0x61, 0x9a, 0x38, 0x71, 0xd3, 0x28, 0x8a, 0xba, 0x18,
+ 0xe3, 0x41, 0x08, 0xaa, 0x51, 0xf3, 0x31, 0x93, 0x68, 0xca,
+ 0x83, 0x21, 0xda, 0x78, 0x48, 0xea, 0x11, 0xb3, 0xfa, 0x58,
+ 0xa3, 0x01, 0x3a, 0x98, 0x63, 0xc1, 0x88, 0x2a, 0xd1, 0x73,
+ 0x43, 0xe1, 0x1a, 0xb8, 0xf1, 0x53, 0xa8, 0x0a, 0xc8, 0x6a,
+ 0x91, 0x33, 0x7a, 0xd8, 0x23, 0x81, 0xb1, 0x13, 0xe8, 0x4a,
+ 0x03, 0xa1, 0x5a, 0xf8, 0x2c, 0x8e, 0x75, 0xd7, 0x9e, 0x3c,
+ 0xc7, 0x65, 0x55, 0xf7, 0x0c, 0xae, 0xe7, 0x45, 0xbe, 0x1c,
+ 0xde, 0x7c, 0x87, 0x25, 0x6c, 0xce, 0x35, 0x97, 0xa7, 0x05,
+ 0xfe, 0x5c, 0x15, 0xb7, 0x4c, 0xee, 0xd5, 0x77, 0x8c, 0x2e,
+ 0x67, 0xc5, 0x3e, 0x9c, 0xac, 0x0e, 0xf5, 0x57, 0x1e, 0xbc,
+ 0x47, 0xe5, 0x27, 0x85, 0x7e, 0xdc, 0x95, 0x37, 0xcc, 0x6e,
+ 0x5e, 0xfc, 0x07, 0xa5, 0xec, 0x4e, 0xb5, 0x17, 0x00, 0xa3,
+ 0x5b, 0xf8, 0xb6, 0x15, 0xed, 0x4e, 0x71, 0xd2, 0x2a, 0x89,
+ 0xc7, 0x64, 0x9c, 0x3f, 0xe2, 0x41, 0xb9, 0x1a, 0x54, 0xf7,
+ 0x0f, 0xac, 0x93, 0x30, 0xc8, 0x6b, 0x25, 0x86, 0x7e, 0xdd,
+ 0xd9, 0x7a, 0x82, 0x21, 0x6f, 0xcc, 0x34, 0x97, 0xa8, 0x0b,
+ 0xf3, 0x50, 0x1e, 0xbd, 0x45, 0xe6, 0x3b, 0x98, 0x60, 0xc3,
+ 0x8d, 0x2e, 0xd6, 0x75, 0x4a, 0xe9, 0x11, 0xb2, 0xfc, 0x5f,
+ 0xa7, 0x04, 0xaf, 0x0c, 0xf4, 0x57, 0x19, 0xba, 0x42, 0xe1,
+ 0xde, 0x7d, 0x85, 0x26, 0x68, 0xcb, 0x33, 0x90, 0x4d, 0xee,
+ 0x16, 0xb5, 0xfb, 0x58, 0xa0, 0x03, 0x3c, 0x9f, 0x67, 0xc4,
+ 0x8a, 0x29, 0xd1, 0x72, 0x76, 0xd5, 0x2d, 0x8e, 0xc0, 0x63,
+ 0x9b, 0x38, 0x07, 0xa4, 0x5c, 0xff, 0xb1, 0x12, 0xea, 0x49,
+ 0x94, 0x37, 0xcf, 0x6c, 0x22, 0x81, 0x79, 0xda, 0xe5, 0x46,
+ 0xbe, 0x1d, 0x53, 0xf0, 0x08, 0xab, 0x43, 0xe0, 0x18, 0xbb,
+ 0xf5, 0x56, 0xae, 0x0d, 0x32, 0x91, 0x69, 0xca, 0x84, 0x27,
+ 0xdf, 0x7c, 0xa1, 0x02, 0xfa, 0x59, 0x17, 0xb4, 0x4c, 0xef,
+ 0xd0, 0x73, 0x8b, 0x28, 0x66, 0xc5, 0x3d, 0x9e, 0x9a, 0x39,
+ 0xc1, 0x62, 0x2c, 0x8f, 0x77, 0xd4, 0xeb, 0x48, 0xb0, 0x13,
+ 0x5d, 0xfe, 0x06, 0xa5, 0x78, 0xdb, 0x23, 0x80, 0xce, 0x6d,
+ 0x95, 0x36, 0x09, 0xaa, 0x52, 0xf1, 0xbf, 0x1c, 0xe4, 0x47,
+ 0xec, 0x4f, 0xb7, 0x14, 0x5a, 0xf9, 0x01, 0xa2, 0x9d, 0x3e,
+ 0xc6, 0x65, 0x2b, 0x88, 0x70, 0xd3, 0x0e, 0xad, 0x55, 0xf6,
+ 0xb8, 0x1b, 0xe3, 0x40, 0x7f, 0xdc, 0x24, 0x87, 0xc9, 0x6a,
+ 0x92, 0x31, 0x35, 0x96, 0x6e, 0xcd, 0x83, 0x20, 0xd8, 0x7b,
+ 0x44, 0xe7, 0x1f, 0xbc, 0xf2, 0x51, 0xa9, 0x0a, 0xd7, 0x74,
+ 0x8c, 0x2f, 0x61, 0xc2, 0x3a, 0x99, 0xa6, 0x05, 0xfd, 0x5e,
+ 0x10, 0xb3, 0x4b, 0xe8, 0x00, 0xa4, 0x55, 0xf1, 0xaa, 0x0e,
+ 0xff, 0x5b, 0x49, 0xed, 0x1c, 0xb8, 0xe3, 0x47, 0xb6, 0x12,
+ 0x92, 0x36, 0xc7, 0x63, 0x38, 0x9c, 0x6d, 0xc9, 0xdb, 0x7f,
+ 0x8e, 0x2a, 0x71, 0xd5, 0x24, 0x80, 0x39, 0x9d, 0x6c, 0xc8,
+ 0x93, 0x37, 0xc6, 0x62, 0x70, 0xd4, 0x25, 0x81, 0xda, 0x7e,
+ 0x8f, 0x2b, 0xab, 0x0f, 0xfe, 0x5a, 0x01, 0xa5, 0x54, 0xf0,
+ 0xe2, 0x46, 0xb7, 0x13, 0x48, 0xec, 0x1d, 0xb9, 0x72, 0xd6,
+ 0x27, 0x83, 0xd8, 0x7c, 0x8d, 0x29, 0x3b, 0x9f, 0x6e, 0xca,
+ 0x91, 0x35, 0xc4, 0x60, 0xe0, 0x44, 0xb5, 0x11, 0x4a, 0xee,
+ 0x1f, 0xbb, 0xa9, 0x0d, 0xfc, 0x58, 0x03, 0xa7, 0x56, 0xf2,
+ 0x4b, 0xef, 0x1e, 0xba, 0xe1, 0x45, 0xb4, 0x10, 0x02, 0xa6,
+ 0x57, 0xf3, 0xa8, 0x0c, 0xfd, 0x59, 0xd9, 0x7d, 0x8c, 0x28,
+ 0x73, 0xd7, 0x26, 0x82, 0x90, 0x34, 0xc5, 0x61, 0x3a, 0x9e,
+ 0x6f, 0xcb, 0xe4, 0x40, 0xb1, 0x15, 0x4e, 0xea, 0x1b, 0xbf,
+ 0xad, 0x09, 0xf8, 0x5c, 0x07, 0xa3, 0x52, 0xf6, 0x76, 0xd2,
+ 0x23, 0x87, 0xdc, 0x78, 0x89, 0x2d, 0x3f, 0x9b, 0x6a, 0xce,
+ 0x95, 0x31, 0xc0, 0x64, 0xdd, 0x79, 0x88, 0x2c, 0x77, 0xd3,
+ 0x22, 0x86, 0x94, 0x30, 0xc1, 0x65, 0x3e, 0x9a, 0x6b, 0xcf,
+ 0x4f, 0xeb, 0x1a, 0xbe, 0xe5, 0x41, 0xb0, 0x14, 0x06, 0xa2,
+ 0x53, 0xf7, 0xac, 0x08, 0xf9, 0x5d, 0x96, 0x32, 0xc3, 0x67,
+ 0x3c, 0x98, 0x69, 0xcd, 0xdf, 0x7b, 0x8a, 0x2e, 0x75, 0xd1,
+ 0x20, 0x84, 0x04, 0xa0, 0x51, 0xf5, 0xae, 0x0a, 0xfb, 0x5f,
+ 0x4d, 0xe9, 0x18, 0xbc, 0xe7, 0x43, 0xb2, 0x16, 0xaf, 0x0b,
+ 0xfa, 0x5e, 0x05, 0xa1, 0x50, 0xf4, 0xe6, 0x42, 0xb3, 0x17,
+ 0x4c, 0xe8, 0x19, 0xbd, 0x3d, 0x99, 0x68, 0xcc, 0x97, 0x33,
+ 0xc2, 0x66, 0x74, 0xd0, 0x21, 0x85, 0xde, 0x7a, 0x8b, 0x2f,
+ 0x00, 0xa5, 0x57, 0xf2, 0xae, 0x0b, 0xf9, 0x5c, 0x41, 0xe4,
+ 0x16, 0xb3, 0xef, 0x4a, 0xb8, 0x1d, 0x82, 0x27, 0xd5, 0x70,
+ 0x2c, 0x89, 0x7b, 0xde, 0xc3, 0x66, 0x94, 0x31, 0x6d, 0xc8,
+ 0x3a, 0x9f, 0x19, 0xbc, 0x4e, 0xeb, 0xb7, 0x12, 0xe0, 0x45,
+ 0x58, 0xfd, 0x0f, 0xaa, 0xf6, 0x53, 0xa1, 0x04, 0x9b, 0x3e,
+ 0xcc, 0x69, 0x35, 0x90, 0x62, 0xc7, 0xda, 0x7f, 0x8d, 0x28,
+ 0x74, 0xd1, 0x23, 0x86, 0x32, 0x97, 0x65, 0xc0, 0x9c, 0x39,
+ 0xcb, 0x6e, 0x73, 0xd6, 0x24, 0x81, 0xdd, 0x78, 0x8a, 0x2f,
+ 0xb0, 0x15, 0xe7, 0x42, 0x1e, 0xbb, 0x49, 0xec, 0xf1, 0x54,
+ 0xa6, 0x03, 0x5f, 0xfa, 0x08, 0xad, 0x2b, 0x8e, 0x7c, 0xd9,
+ 0x85, 0x20, 0xd2, 0x77, 0x6a, 0xcf, 0x3d, 0x98, 0xc4, 0x61,
+ 0x93, 0x36, 0xa9, 0x0c, 0xfe, 0x5b, 0x07, 0xa2, 0x50, 0xf5,
+ 0xe8, 0x4d, 0xbf, 0x1a, 0x46, 0xe3, 0x11, 0xb4, 0x64, 0xc1,
+ 0x33, 0x96, 0xca, 0x6f, 0x9d, 0x38, 0x25, 0x80, 0x72, 0xd7,
+ 0x8b, 0x2e, 0xdc, 0x79, 0xe6, 0x43, 0xb1, 0x14, 0x48, 0xed,
+ 0x1f, 0xba, 0xa7, 0x02, 0xf0, 0x55, 0x09, 0xac, 0x5e, 0xfb,
+ 0x7d, 0xd8, 0x2a, 0x8f, 0xd3, 0x76, 0x84, 0x21, 0x3c, 0x99,
+ 0x6b, 0xce, 0x92, 0x37, 0xc5, 0x60, 0xff, 0x5a, 0xa8, 0x0d,
+ 0x51, 0xf4, 0x06, 0xa3, 0xbe, 0x1b, 0xe9, 0x4c, 0x10, 0xb5,
+ 0x47, 0xe2, 0x56, 0xf3, 0x01, 0xa4, 0xf8, 0x5d, 0xaf, 0x0a,
+ 0x17, 0xb2, 0x40, 0xe5, 0xb9, 0x1c, 0xee, 0x4b, 0xd4, 0x71,
+ 0x83, 0x26, 0x7a, 0xdf, 0x2d, 0x88, 0x95, 0x30, 0xc2, 0x67,
+ 0x3b, 0x9e, 0x6c, 0xc9, 0x4f, 0xea, 0x18, 0xbd, 0xe1, 0x44,
+ 0xb6, 0x13, 0x0e, 0xab, 0x59, 0xfc, 0xa0, 0x05, 0xf7, 0x52,
+ 0xcd, 0x68, 0x9a, 0x3f, 0x63, 0xc6, 0x34, 0x91, 0x8c, 0x29,
+ 0xdb, 0x7e, 0x22, 0x87, 0x75, 0xd0, 0x00, 0xa6, 0x51, 0xf7,
+ 0xa2, 0x04, 0xf3, 0x55, 0x59, 0xff, 0x08, 0xae, 0xfb, 0x5d,
+ 0xaa, 0x0c, 0xb2, 0x14, 0xe3, 0x45, 0x10, 0xb6, 0x41, 0xe7,
+ 0xeb, 0x4d, 0xba, 0x1c, 0x49, 0xef, 0x18, 0xbe, 0x79, 0xdf,
+ 0x28, 0x8e, 0xdb, 0x7d, 0x8a, 0x2c, 0x20, 0x86, 0x71, 0xd7,
+ 0x82, 0x24, 0xd3, 0x75, 0xcb, 0x6d, 0x9a, 0x3c, 0x69, 0xcf,
+ 0x38, 0x9e, 0x92, 0x34, 0xc3, 0x65, 0x30, 0x96, 0x61, 0xc7,
+ 0xf2, 0x54, 0xa3, 0x05, 0x50, 0xf6, 0x01, 0xa7, 0xab, 0x0d,
+ 0xfa, 0x5c, 0x09, 0xaf, 0x58, 0xfe, 0x40, 0xe6, 0x11, 0xb7,
+ 0xe2, 0x44, 0xb3, 0x15, 0x19, 0xbf, 0x48, 0xee, 0xbb, 0x1d,
+ 0xea, 0x4c, 0x8b, 0x2d, 0xda, 0x7c, 0x29, 0x8f, 0x78, 0xde,
+ 0xd2, 0x74, 0x83, 0x25, 0x70, 0xd6, 0x21, 0x87, 0x39, 0x9f,
+ 0x68, 0xce, 0x9b, 0x3d, 0xca, 0x6c, 0x60, 0xc6, 0x31, 0x97,
+ 0xc2, 0x64, 0x93, 0x35, 0xf9, 0x5f, 0xa8, 0x0e, 0x5b, 0xfd,
+ 0x0a, 0xac, 0xa0, 0x06, 0xf1, 0x57, 0x02, 0xa4, 0x53, 0xf5,
+ 0x4b, 0xed, 0x1a, 0xbc, 0xe9, 0x4f, 0xb8, 0x1e, 0x12, 0xb4,
+ 0x43, 0xe5, 0xb0, 0x16, 0xe1, 0x47, 0x80, 0x26, 0xd1, 0x77,
+ 0x22, 0x84, 0x73, 0xd5, 0xd9, 0x7f, 0x88, 0x2e, 0x7b, 0xdd,
+ 0x2a, 0x8c, 0x32, 0x94, 0x63, 0xc5, 0x90, 0x36, 0xc1, 0x67,
+ 0x6b, 0xcd, 0x3a, 0x9c, 0xc9, 0x6f, 0x98, 0x3e, 0x0b, 0xad,
+ 0x5a, 0xfc, 0xa9, 0x0f, 0xf8, 0x5e, 0x52, 0xf4, 0x03, 0xa5,
+ 0xf0, 0x56, 0xa1, 0x07, 0xb9, 0x1f, 0xe8, 0x4e, 0x1b, 0xbd,
+ 0x4a, 0xec, 0xe0, 0x46, 0xb1, 0x17, 0x42, 0xe4, 0x13, 0xb5,
+ 0x72, 0xd4, 0x23, 0x85, 0xd0, 0x76, 0x81, 0x27, 0x2b, 0x8d,
+ 0x7a, 0xdc, 0x89, 0x2f, 0xd8, 0x7e, 0xc0, 0x66, 0x91, 0x37,
+ 0x62, 0xc4, 0x33, 0x95, 0x99, 0x3f, 0xc8, 0x6e, 0x3b, 0x9d,
+ 0x6a, 0xcc, 0x00, 0xa7, 0x53, 0xf4, 0xa6, 0x01, 0xf5, 0x52,
+ 0x51, 0xf6, 0x02, 0xa5, 0xf7, 0x50, 0xa4, 0x03, 0xa2, 0x05,
+ 0xf1, 0x56, 0x04, 0xa3, 0x57, 0xf0, 0xf3, 0x54, 0xa0, 0x07,
+ 0x55, 0xf2, 0x06, 0xa1, 0x59, 0xfe, 0x0a, 0xad, 0xff, 0x58,
+ 0xac, 0x0b, 0x08, 0xaf, 0x5b, 0xfc, 0xae, 0x09, 0xfd, 0x5a,
+ 0xfb, 0x5c, 0xa8, 0x0f, 0x5d, 0xfa, 0x0e, 0xa9, 0xaa, 0x0d,
+ 0xf9, 0x5e, 0x0c, 0xab, 0x5f, 0xf8, 0xb2, 0x15, 0xe1, 0x46,
+ 0x14, 0xb3, 0x47, 0xe0, 0xe3, 0x44, 0xb0, 0x17, 0x45, 0xe2,
+ 0x16, 0xb1, 0x10, 0xb7, 0x43, 0xe4, 0xb6, 0x11, 0xe5, 0x42,
+ 0x41, 0xe6, 0x12, 0xb5, 0xe7, 0x40, 0xb4, 0x13, 0xeb, 0x4c,
+ 0xb8, 0x1f, 0x4d, 0xea, 0x1e, 0xb9, 0xba, 0x1d, 0xe9, 0x4e,
+ 0x1c, 0xbb, 0x4f, 0xe8, 0x49, 0xee, 0x1a, 0xbd, 0xef, 0x48,
+ 0xbc, 0x1b, 0x18, 0xbf, 0x4b, 0xec, 0xbe, 0x19, 0xed, 0x4a,
+ 0x79, 0xde, 0x2a, 0x8d, 0xdf, 0x78, 0x8c, 0x2b, 0x28, 0x8f,
+ 0x7b, 0xdc, 0x8e, 0x29, 0xdd, 0x7a, 0xdb, 0x7c, 0x88, 0x2f,
+ 0x7d, 0xda, 0x2e, 0x89, 0x8a, 0x2d, 0xd9, 0x7e, 0x2c, 0x8b,
+ 0x7f, 0xd8, 0x20, 0x87, 0x73, 0xd4, 0x86, 0x21, 0xd5, 0x72,
+ 0x71, 0xd6, 0x22, 0x85, 0xd7, 0x70, 0x84, 0x23, 0x82, 0x25,
+ 0xd1, 0x76, 0x24, 0x83, 0x77, 0xd0, 0xd3, 0x74, 0x80, 0x27,
+ 0x75, 0xd2, 0x26, 0x81, 0xcb, 0x6c, 0x98, 0x3f, 0x6d, 0xca,
+ 0x3e, 0x99, 0x9a, 0x3d, 0xc9, 0x6e, 0x3c, 0x9b, 0x6f, 0xc8,
+ 0x69, 0xce, 0x3a, 0x9d, 0xcf, 0x68, 0x9c, 0x3b, 0x38, 0x9f,
+ 0x6b, 0xcc, 0x9e, 0x39, 0xcd, 0x6a, 0x92, 0x35, 0xc1, 0x66,
+ 0x34, 0x93, 0x67, 0xc0, 0xc3, 0x64, 0x90, 0x37, 0x65, 0xc2,
+ 0x36, 0x91, 0x30, 0x97, 0x63, 0xc4, 0x96, 0x31, 0xc5, 0x62,
+ 0x61, 0xc6, 0x32, 0x95, 0xc7, 0x60, 0x94, 0x33, 0x00, 0xa8,
+ 0x4d, 0xe5, 0x9a, 0x32, 0xd7, 0x7f, 0x29, 0x81, 0x64, 0xcc,
+ 0xb3, 0x1b, 0xfe, 0x56, 0x52, 0xfa, 0x1f, 0xb7, 0xc8, 0x60,
+ 0x85, 0x2d, 0x7b, 0xd3, 0x36, 0x9e, 0xe1, 0x49, 0xac, 0x04,
+ 0xa4, 0x0c, 0xe9, 0x41, 0x3e, 0x96, 0x73, 0xdb, 0x8d, 0x25,
+ 0xc0, 0x68, 0x17, 0xbf, 0x5a, 0xf2, 0xf6, 0x5e, 0xbb, 0x13,
+ 0x6c, 0xc4, 0x21, 0x89, 0xdf, 0x77, 0x92, 0x3a, 0x45, 0xed,
+ 0x08, 0xa0, 0x55, 0xfd, 0x18, 0xb0, 0xcf, 0x67, 0x82, 0x2a,
+ 0x7c, 0xd4, 0x31, 0x99, 0xe6, 0x4e, 0xab, 0x03, 0x07, 0xaf,
+ 0x4a, 0xe2, 0x9d, 0x35, 0xd0, 0x78, 0x2e, 0x86, 0x63, 0xcb,
+ 0xb4, 0x1c, 0xf9, 0x51, 0xf1, 0x59, 0xbc, 0x14, 0x6b, 0xc3,
+ 0x26, 0x8e, 0xd8, 0x70, 0x95, 0x3d, 0x42, 0xea, 0x0f, 0xa7,
+ 0xa3, 0x0b, 0xee, 0x46, 0x39, 0x91, 0x74, 0xdc, 0x8a, 0x22,
+ 0xc7, 0x6f, 0x10, 0xb8, 0x5d, 0xf5, 0xaa, 0x02, 0xe7, 0x4f,
+ 0x30, 0x98, 0x7d, 0xd5, 0x83, 0x2b, 0xce, 0x66, 0x19, 0xb1,
+ 0x54, 0xfc, 0xf8, 0x50, 0xb5, 0x1d, 0x62, 0xca, 0x2f, 0x87,
+ 0xd1, 0x79, 0x9c, 0x34, 0x4b, 0xe3, 0x06, 0xae, 0x0e, 0xa6,
+ 0x43, 0xeb, 0x94, 0x3c, 0xd9, 0x71, 0x27, 0x8f, 0x6a, 0xc2,
+ 0xbd, 0x15, 0xf0, 0x58, 0x5c, 0xf4, 0x11, 0xb9, 0xc6, 0x6e,
+ 0x8b, 0x23, 0x75, 0xdd, 0x38, 0x90, 0xef, 0x47, 0xa2, 0x0a,
+ 0xff, 0x57, 0xb2, 0x1a, 0x65, 0xcd, 0x28, 0x80, 0xd6, 0x7e,
+ 0x9b, 0x33, 0x4c, 0xe4, 0x01, 0xa9, 0xad, 0x05, 0xe0, 0x48,
+ 0x37, 0x9f, 0x7a, 0xd2, 0x84, 0x2c, 0xc9, 0x61, 0x1e, 0xb6,
+ 0x53, 0xfb, 0x5b, 0xf3, 0x16, 0xbe, 0xc1, 0x69, 0x8c, 0x24,
+ 0x72, 0xda, 0x3f, 0x97, 0xe8, 0x40, 0xa5, 0x0d, 0x09, 0xa1,
+ 0x44, 0xec, 0x93, 0x3b, 0xde, 0x76, 0x20, 0x88, 0x6d, 0xc5,
+ 0xba, 0x12, 0xf7, 0x5f, 0x00, 0xa9, 0x4f, 0xe6, 0x9e, 0x37,
+ 0xd1, 0x78, 0x21, 0x88, 0x6e, 0xc7, 0xbf, 0x16, 0xf0, 0x59,
+ 0x42, 0xeb, 0x0d, 0xa4, 0xdc, 0x75, 0x93, 0x3a, 0x63, 0xca,
+ 0x2c, 0x85, 0xfd, 0x54, 0xb2, 0x1b, 0x84, 0x2d, 0xcb, 0x62,
+ 0x1a, 0xb3, 0x55, 0xfc, 0xa5, 0x0c, 0xea, 0x43, 0x3b, 0x92,
+ 0x74, 0xdd, 0xc6, 0x6f, 0x89, 0x20, 0x58, 0xf1, 0x17, 0xbe,
+ 0xe7, 0x4e, 0xa8, 0x01, 0x79, 0xd0, 0x36, 0x9f, 0x15, 0xbc,
+ 0x5a, 0xf3, 0x8b, 0x22, 0xc4, 0x6d, 0x34, 0x9d, 0x7b, 0xd2,
+ 0xaa, 0x03, 0xe5, 0x4c, 0x57, 0xfe, 0x18, 0xb1, 0xc9, 0x60,
+ 0x86, 0x2f, 0x76, 0xdf, 0x39, 0x90, 0xe8, 0x41, 0xa7, 0x0e,
+ 0x91, 0x38, 0xde, 0x77, 0x0f, 0xa6, 0x40, 0xe9, 0xb0, 0x19,
+ 0xff, 0x56, 0x2e, 0x87, 0x61, 0xc8, 0xd3, 0x7a, 0x9c, 0x35,
+ 0x4d, 0xe4, 0x02, 0xab, 0xf2, 0x5b, 0xbd, 0x14, 0x6c, 0xc5,
+ 0x23, 0x8a, 0x2a, 0x83, 0x65, 0xcc, 0xb4, 0x1d, 0xfb, 0x52,
+ 0x0b, 0xa2, 0x44, 0xed, 0x95, 0x3c, 0xda, 0x73, 0x68, 0xc1,
+ 0x27, 0x8e, 0xf6, 0x5f, 0xb9, 0x10, 0x49, 0xe0, 0x06, 0xaf,
+ 0xd7, 0x7e, 0x98, 0x31, 0xae, 0x07, 0xe1, 0x48, 0x30, 0x99,
+ 0x7f, 0xd6, 0x8f, 0x26, 0xc0, 0x69, 0x11, 0xb8, 0x5e, 0xf7,
+ 0xec, 0x45, 0xa3, 0x0a, 0x72, 0xdb, 0x3d, 0x94, 0xcd, 0x64,
+ 0x82, 0x2b, 0x53, 0xfa, 0x1c, 0xb5, 0x3f, 0x96, 0x70, 0xd9,
+ 0xa1, 0x08, 0xee, 0x47, 0x1e, 0xb7, 0x51, 0xf8, 0x80, 0x29,
+ 0xcf, 0x66, 0x7d, 0xd4, 0x32, 0x9b, 0xe3, 0x4a, 0xac, 0x05,
+ 0x5c, 0xf5, 0x13, 0xba, 0xc2, 0x6b, 0x8d, 0x24, 0xbb, 0x12,
+ 0xf4, 0x5d, 0x25, 0x8c, 0x6a, 0xc3, 0x9a, 0x33, 0xd5, 0x7c,
+ 0x04, 0xad, 0x4b, 0xe2, 0xf9, 0x50, 0xb6, 0x1f, 0x67, 0xce,
+ 0x28, 0x81, 0xd8, 0x71, 0x97, 0x3e, 0x46, 0xef, 0x09, 0xa0,
+ 0x00, 0xaa, 0x49, 0xe3, 0x92, 0x38, 0xdb, 0x71, 0x39, 0x93,
+ 0x70, 0xda, 0xab, 0x01, 0xe2, 0x48, 0x72, 0xd8, 0x3b, 0x91,
+ 0xe0, 0x4a, 0xa9, 0x03, 0x4b, 0xe1, 0x02, 0xa8, 0xd9, 0x73,
+ 0x90, 0x3a, 0xe4, 0x4e, 0xad, 0x07, 0x76, 0xdc, 0x3f, 0x95,
+ 0xdd, 0x77, 0x94, 0x3e, 0x4f, 0xe5, 0x06, 0xac, 0x96, 0x3c,
+ 0xdf, 0x75, 0x04, 0xae, 0x4d, 0xe7, 0xaf, 0x05, 0xe6, 0x4c,
+ 0x3d, 0x97, 0x74, 0xde, 0xd5, 0x7f, 0x9c, 0x36, 0x47, 0xed,
+ 0x0e, 0xa4, 0xec, 0x46, 0xa5, 0x0f, 0x7e, 0xd4, 0x37, 0x9d,
+ 0xa7, 0x0d, 0xee, 0x44, 0x35, 0x9f, 0x7c, 0xd6, 0x9e, 0x34,
+ 0xd7, 0x7d, 0x0c, 0xa6, 0x45, 0xef, 0x31, 0x9b, 0x78, 0xd2,
+ 0xa3, 0x09, 0xea, 0x40, 0x08, 0xa2, 0x41, 0xeb, 0x9a, 0x30,
+ 0xd3, 0x79, 0x43, 0xe9, 0x0a, 0xa0, 0xd1, 0x7b, 0x98, 0x32,
+ 0x7a, 0xd0, 0x33, 0x99, 0xe8, 0x42, 0xa1, 0x0b, 0xb7, 0x1d,
+ 0xfe, 0x54, 0x25, 0x8f, 0x6c, 0xc6, 0x8e, 0x24, 0xc7, 0x6d,
+ 0x1c, 0xb6, 0x55, 0xff, 0xc5, 0x6f, 0x8c, 0x26, 0x57, 0xfd,
+ 0x1e, 0xb4, 0xfc, 0x56, 0xb5, 0x1f, 0x6e, 0xc4, 0x27, 0x8d,
+ 0x53, 0xf9, 0x1a, 0xb0, 0xc1, 0x6b, 0x88, 0x22, 0x6a, 0xc0,
+ 0x23, 0x89, 0xf8, 0x52, 0xb1, 0x1b, 0x21, 0x8b, 0x68, 0xc2,
+ 0xb3, 0x19, 0xfa, 0x50, 0x18, 0xb2, 0x51, 0xfb, 0x8a, 0x20,
+ 0xc3, 0x69, 0x62, 0xc8, 0x2b, 0x81, 0xf0, 0x5a, 0xb9, 0x13,
+ 0x5b, 0xf1, 0x12, 0xb8, 0xc9, 0x63, 0x80, 0x2a, 0x10, 0xba,
+ 0x59, 0xf3, 0x82, 0x28, 0xcb, 0x61, 0x29, 0x83, 0x60, 0xca,
+ 0xbb, 0x11, 0xf2, 0x58, 0x86, 0x2c, 0xcf, 0x65, 0x14, 0xbe,
+ 0x5d, 0xf7, 0xbf, 0x15, 0xf6, 0x5c, 0x2d, 0x87, 0x64, 0xce,
+ 0xf4, 0x5e, 0xbd, 0x17, 0x66, 0xcc, 0x2f, 0x85, 0xcd, 0x67,
+ 0x84, 0x2e, 0x5f, 0xf5, 0x16, 0xbc, 0x00, 0xab, 0x4b, 0xe0,
+ 0x96, 0x3d, 0xdd, 0x76, 0x31, 0x9a, 0x7a, 0xd1, 0xa7, 0x0c,
+ 0xec, 0x47, 0x62, 0xc9, 0x29, 0x82, 0xf4, 0x5f, 0xbf, 0x14,
+ 0x53, 0xf8, 0x18, 0xb3, 0xc5, 0x6e, 0x8e, 0x25, 0xc4, 0x6f,
+ 0x8f, 0x24, 0x52, 0xf9, 0x19, 0xb2, 0xf5, 0x5e, 0xbe, 0x15,
+ 0x63, 0xc8, 0x28, 0x83, 0xa6, 0x0d, 0xed, 0x46, 0x30, 0x9b,
+ 0x7b, 0xd0, 0x97, 0x3c, 0xdc, 0x77, 0x01, 0xaa, 0x4a, 0xe1,
+ 0x95, 0x3e, 0xde, 0x75, 0x03, 0xa8, 0x48, 0xe3, 0xa4, 0x0f,
+ 0xef, 0x44, 0x32, 0x99, 0x79, 0xd2, 0xf7, 0x5c, 0xbc, 0x17,
+ 0x61, 0xca, 0x2a, 0x81, 0xc6, 0x6d, 0x8d, 0x26, 0x50, 0xfb,
+ 0x1b, 0xb0, 0x51, 0xfa, 0x1a, 0xb1, 0xc7, 0x6c, 0x8c, 0x27,
+ 0x60, 0xcb, 0x2b, 0x80, 0xf6, 0x5d, 0xbd, 0x16, 0x33, 0x98,
+ 0x78, 0xd3, 0xa5, 0x0e, 0xee, 0x45, 0x02, 0xa9, 0x49, 0xe2,
+ 0x94, 0x3f, 0xdf, 0x74, 0x37, 0x9c, 0x7c, 0xd7, 0xa1, 0x0a,
+ 0xea, 0x41, 0x06, 0xad, 0x4d, 0xe6, 0x90, 0x3b, 0xdb, 0x70,
+ 0x55, 0xfe, 0x1e, 0xb5, 0xc3, 0x68, 0x88, 0x23, 0x64, 0xcf,
+ 0x2f, 0x84, 0xf2, 0x59, 0xb9, 0x12, 0xf3, 0x58, 0xb8, 0x13,
+ 0x65, 0xce, 0x2e, 0x85, 0xc2, 0x69, 0x89, 0x22, 0x54, 0xff,
+ 0x1f, 0xb4, 0x91, 0x3a, 0xda, 0x71, 0x07, 0xac, 0x4c, 0xe7,
+ 0xa0, 0x0b, 0xeb, 0x40, 0x36, 0x9d, 0x7d, 0xd6, 0xa2, 0x09,
+ 0xe9, 0x42, 0x34, 0x9f, 0x7f, 0xd4, 0x93, 0x38, 0xd8, 0x73,
+ 0x05, 0xae, 0x4e, 0xe5, 0xc0, 0x6b, 0x8b, 0x20, 0x56, 0xfd,
+ 0x1d, 0xb6, 0xf1, 0x5a, 0xba, 0x11, 0x67, 0xcc, 0x2c, 0x87,
+ 0x66, 0xcd, 0x2d, 0x86, 0xf0, 0x5b, 0xbb, 0x10, 0x57, 0xfc,
+ 0x1c, 0xb7, 0xc1, 0x6a, 0x8a, 0x21, 0x04, 0xaf, 0x4f, 0xe4,
+ 0x92, 0x39, 0xd9, 0x72, 0x35, 0x9e, 0x7e, 0xd5, 0xa3, 0x08,
+ 0xe8, 0x43, 0x00, 0xac, 0x45, 0xe9, 0x8a, 0x26, 0xcf, 0x63,
+ 0x09, 0xa5, 0x4c, 0xe0, 0x83, 0x2f, 0xc6, 0x6a, 0x12, 0xbe,
+ 0x57, 0xfb, 0x98, 0x34, 0xdd, 0x71, 0x1b, 0xb7, 0x5e, 0xf2,
+ 0x91, 0x3d, 0xd4, 0x78, 0x24, 0x88, 0x61, 0xcd, 0xae, 0x02,
+ 0xeb, 0x47, 0x2d, 0x81, 0x68, 0xc4, 0xa7, 0x0b, 0xe2, 0x4e,
+ 0x36, 0x9a, 0x73, 0xdf, 0xbc, 0x10, 0xf9, 0x55, 0x3f, 0x93,
+ 0x7a, 0xd6, 0xb5, 0x19, 0xf0, 0x5c, 0x48, 0xe4, 0x0d, 0xa1,
+ 0xc2, 0x6e, 0x87, 0x2b, 0x41, 0xed, 0x04, 0xa8, 0xcb, 0x67,
+ 0x8e, 0x22, 0x5a, 0xf6, 0x1f, 0xb3, 0xd0, 0x7c, 0x95, 0x39,
+ 0x53, 0xff, 0x16, 0xba, 0xd9, 0x75, 0x9c, 0x30, 0x6c, 0xc0,
+ 0x29, 0x85, 0xe6, 0x4a, 0xa3, 0x0f, 0x65, 0xc9, 0x20, 0x8c,
+ 0xef, 0x43, 0xaa, 0x06, 0x7e, 0xd2, 0x3b, 0x97, 0xf4, 0x58,
+ 0xb1, 0x1d, 0x77, 0xdb, 0x32, 0x9e, 0xfd, 0x51, 0xb8, 0x14,
+ 0x90, 0x3c, 0xd5, 0x79, 0x1a, 0xb6, 0x5f, 0xf3, 0x99, 0x35,
+ 0xdc, 0x70, 0x13, 0xbf, 0x56, 0xfa, 0x82, 0x2e, 0xc7, 0x6b,
+ 0x08, 0xa4, 0x4d, 0xe1, 0x8b, 0x27, 0xce, 0x62, 0x01, 0xad,
+ 0x44, 0xe8, 0xb4, 0x18, 0xf1, 0x5d, 0x3e, 0x92, 0x7b, 0xd7,
+ 0xbd, 0x11, 0xf8, 0x54, 0x37, 0x9b, 0x72, 0xde, 0xa6, 0x0a,
+ 0xe3, 0x4f, 0x2c, 0x80, 0x69, 0xc5, 0xaf, 0x03, 0xea, 0x46,
+ 0x25, 0x89, 0x60, 0xcc, 0xd8, 0x74, 0x9d, 0x31, 0x52, 0xfe,
+ 0x17, 0xbb, 0xd1, 0x7d, 0x94, 0x38, 0x5b, 0xf7, 0x1e, 0xb2,
+ 0xca, 0x66, 0x8f, 0x23, 0x40, 0xec, 0x05, 0xa9, 0xc3, 0x6f,
+ 0x86, 0x2a, 0x49, 0xe5, 0x0c, 0xa0, 0xfc, 0x50, 0xb9, 0x15,
+ 0x76, 0xda, 0x33, 0x9f, 0xf5, 0x59, 0xb0, 0x1c, 0x7f, 0xd3,
+ 0x3a, 0x96, 0xee, 0x42, 0xab, 0x07, 0x64, 0xc8, 0x21, 0x8d,
+ 0xe7, 0x4b, 0xa2, 0x0e, 0x6d, 0xc1, 0x28, 0x84, 0x00, 0xad,
+ 0x47, 0xea, 0x8e, 0x23, 0xc9, 0x64, 0x01, 0xac, 0x46, 0xeb,
+ 0x8f, 0x22, 0xc8, 0x65, 0x02, 0xaf, 0x45, 0xe8, 0x8c, 0x21,
+ 0xcb, 0x66, 0x03, 0xae, 0x44, 0xe9, 0x8d, 0x20, 0xca, 0x67,
+ 0x04, 0xa9, 0x43, 0xee, 0x8a, 0x27, 0xcd, 0x60, 0x05, 0xa8,
+ 0x42, 0xef, 0x8b, 0x26, 0xcc, 0x61, 0x06, 0xab, 0x41, 0xec,
+ 0x88, 0x25, 0xcf, 0x62, 0x07, 0xaa, 0x40, 0xed, 0x89, 0x24,
+ 0xce, 0x63, 0x08, 0xa5, 0x4f, 0xe2, 0x86, 0x2b, 0xc1, 0x6c,
+ 0x09, 0xa4, 0x4e, 0xe3, 0x87, 0x2a, 0xc0, 0x6d, 0x0a, 0xa7,
+ 0x4d, 0xe0, 0x84, 0x29, 0xc3, 0x6e, 0x0b, 0xa6, 0x4c, 0xe1,
+ 0x85, 0x28, 0xc2, 0x6f, 0x0c, 0xa1, 0x4b, 0xe6, 0x82, 0x2f,
+ 0xc5, 0x68, 0x0d, 0xa0, 0x4a, 0xe7, 0x83, 0x2e, 0xc4, 0x69,
+ 0x0e, 0xa3, 0x49, 0xe4, 0x80, 0x2d, 0xc7, 0x6a, 0x0f, 0xa2,
+ 0x48, 0xe5, 0x81, 0x2c, 0xc6, 0x6b, 0x10, 0xbd, 0x57, 0xfa,
+ 0x9e, 0x33, 0xd9, 0x74, 0x11, 0xbc, 0x56, 0xfb, 0x9f, 0x32,
+ 0xd8, 0x75, 0x12, 0xbf, 0x55, 0xf8, 0x9c, 0x31, 0xdb, 0x76,
+ 0x13, 0xbe, 0x54, 0xf9, 0x9d, 0x30, 0xda, 0x77, 0x14, 0xb9,
+ 0x53, 0xfe, 0x9a, 0x37, 0xdd, 0x70, 0x15, 0xb8, 0x52, 0xff,
+ 0x9b, 0x36, 0xdc, 0x71, 0x16, 0xbb, 0x51, 0xfc, 0x98, 0x35,
+ 0xdf, 0x72, 0x17, 0xba, 0x50, 0xfd, 0x99, 0x34, 0xde, 0x73,
+ 0x18, 0xb5, 0x5f, 0xf2, 0x96, 0x3b, 0xd1, 0x7c, 0x19, 0xb4,
+ 0x5e, 0xf3, 0x97, 0x3a, 0xd0, 0x7d, 0x1a, 0xb7, 0x5d, 0xf0,
+ 0x94, 0x39, 0xd3, 0x7e, 0x1b, 0xb6, 0x5c, 0xf1, 0x95, 0x38,
+ 0xd2, 0x7f, 0x1c, 0xb1, 0x5b, 0xf6, 0x92, 0x3f, 0xd5, 0x78,
+ 0x1d, 0xb0, 0x5a, 0xf7, 0x93, 0x3e, 0xd4, 0x79, 0x1e, 0xb3,
+ 0x59, 0xf4, 0x90, 0x3d, 0xd7, 0x7a, 0x1f, 0xb2, 0x58, 0xf5,
+ 0x91, 0x3c, 0xd6, 0x7b, 0x00, 0xae, 0x41, 0xef, 0x82, 0x2c,
+ 0xc3, 0x6d, 0x19, 0xb7, 0x58, 0xf6, 0x9b, 0x35, 0xda, 0x74,
+ 0x32, 0x9c, 0x73, 0xdd, 0xb0, 0x1e, 0xf1, 0x5f, 0x2b, 0x85,
+ 0x6a, 0xc4, 0xa9, 0x07, 0xe8, 0x46, 0x64, 0xca, 0x25, 0x8b,
+ 0xe6, 0x48, 0xa7, 0x09, 0x7d, 0xd3, 0x3c, 0x92, 0xff, 0x51,
+ 0xbe, 0x10, 0x56, 0xf8, 0x17, 0xb9, 0xd4, 0x7a, 0x95, 0x3b,
+ 0x4f, 0xe1, 0x0e, 0xa0, 0xcd, 0x63, 0x8c, 0x22, 0xc8, 0x66,
+ 0x89, 0x27, 0x4a, 0xe4, 0x0b, 0xa5, 0xd1, 0x7f, 0x90, 0x3e,
+ 0x53, 0xfd, 0x12, 0xbc, 0xfa, 0x54, 0xbb, 0x15, 0x78, 0xd6,
+ 0x39, 0x97, 0xe3, 0x4d, 0xa2, 0x0c, 0x61, 0xcf, 0x20, 0x8e,
+ 0xac, 0x02, 0xed, 0x43, 0x2e, 0x80, 0x6f, 0xc1, 0xb5, 0x1b,
+ 0xf4, 0x5a, 0x37, 0x99, 0x76, 0xd8, 0x9e, 0x30, 0xdf, 0x71,
+ 0x1c, 0xb2, 0x5d, 0xf3, 0x87, 0x29, 0xc6, 0x68, 0x05, 0xab,
+ 0x44, 0xea, 0x8d, 0x23, 0xcc, 0x62, 0x0f, 0xa1, 0x4e, 0xe0,
+ 0x94, 0x3a, 0xd5, 0x7b, 0x16, 0xb8, 0x57, 0xf9, 0xbf, 0x11,
+ 0xfe, 0x50, 0x3d, 0x93, 0x7c, 0xd2, 0xa6, 0x08, 0xe7, 0x49,
+ 0x24, 0x8a, 0x65, 0xcb, 0xe9, 0x47, 0xa8, 0x06, 0x6b, 0xc5,
+ 0x2a, 0x84, 0xf0, 0x5e, 0xb1, 0x1f, 0x72, 0xdc, 0x33, 0x9d,
+ 0xdb, 0x75, 0x9a, 0x34, 0x59, 0xf7, 0x18, 0xb6, 0xc2, 0x6c,
+ 0x83, 0x2d, 0x40, 0xee, 0x01, 0xaf, 0x45, 0xeb, 0x04, 0xaa,
+ 0xc7, 0x69, 0x86, 0x28, 0x5c, 0xf2, 0x1d, 0xb3, 0xde, 0x70,
+ 0x9f, 0x31, 0x77, 0xd9, 0x36, 0x98, 0xf5, 0x5b, 0xb4, 0x1a,
+ 0x6e, 0xc0, 0x2f, 0x81, 0xec, 0x42, 0xad, 0x03, 0x21, 0x8f,
+ 0x60, 0xce, 0xa3, 0x0d, 0xe2, 0x4c, 0x38, 0x96, 0x79, 0xd7,
+ 0xba, 0x14, 0xfb, 0x55, 0x13, 0xbd, 0x52, 0xfc, 0x91, 0x3f,
+ 0xd0, 0x7e, 0x0a, 0xa4, 0x4b, 0xe5, 0x88, 0x26, 0xc9, 0x67,
+ 0x00, 0xaf, 0x43, 0xec, 0x86, 0x29, 0xc5, 0x6a, 0x11, 0xbe,
+ 0x52, 0xfd, 0x97, 0x38, 0xd4, 0x7b, 0x22, 0x8d, 0x61, 0xce,
+ 0xa4, 0x0b, 0xe7, 0x48, 0x33, 0x9c, 0x70, 0xdf, 0xb5, 0x1a,
+ 0xf6, 0x59, 0x44, 0xeb, 0x07, 0xa8, 0xc2, 0x6d, 0x81, 0x2e,
+ 0x55, 0xfa, 0x16, 0xb9, 0xd3, 0x7c, 0x90, 0x3f, 0x66, 0xc9,
+ 0x25, 0x8a, 0xe0, 0x4f, 0xa3, 0x0c, 0x77, 0xd8, 0x34, 0x9b,
+ 0xf1, 0x5e, 0xb2, 0x1d, 0x88, 0x27, 0xcb, 0x64, 0x0e, 0xa1,
+ 0x4d, 0xe2, 0x99, 0x36, 0xda, 0x75, 0x1f, 0xb0, 0x5c, 0xf3,
+ 0xaa, 0x05, 0xe9, 0x46, 0x2c, 0x83, 0x6f, 0xc0, 0xbb, 0x14,
+ 0xf8, 0x57, 0x3d, 0x92, 0x7e, 0xd1, 0xcc, 0x63, 0x8f, 0x20,
+ 0x4a, 0xe5, 0x09, 0xa6, 0xdd, 0x72, 0x9e, 0x31, 0x5b, 0xf4,
+ 0x18, 0xb7, 0xee, 0x41, 0xad, 0x02, 0x68, 0xc7, 0x2b, 0x84,
+ 0xff, 0x50, 0xbc, 0x13, 0x79, 0xd6, 0x3a, 0x95, 0x0d, 0xa2,
+ 0x4e, 0xe1, 0x8b, 0x24, 0xc8, 0x67, 0x1c, 0xb3, 0x5f, 0xf0,
+ 0x9a, 0x35, 0xd9, 0x76, 0x2f, 0x80, 0x6c, 0xc3, 0xa9, 0x06,
+ 0xea, 0x45, 0x3e, 0x91, 0x7d, 0xd2, 0xb8, 0x17, 0xfb, 0x54,
+ 0x49, 0xe6, 0x0a, 0xa5, 0xcf, 0x60, 0x8c, 0x23, 0x58, 0xf7,
+ 0x1b, 0xb4, 0xde, 0x71, 0x9d, 0x32, 0x6b, 0xc4, 0x28, 0x87,
+ 0xed, 0x42, 0xae, 0x01, 0x7a, 0xd5, 0x39, 0x96, 0xfc, 0x53,
+ 0xbf, 0x10, 0x85, 0x2a, 0xc6, 0x69, 0x03, 0xac, 0x40, 0xef,
+ 0x94, 0x3b, 0xd7, 0x78, 0x12, 0xbd, 0x51, 0xfe, 0xa7, 0x08,
+ 0xe4, 0x4b, 0x21, 0x8e, 0x62, 0xcd, 0xb6, 0x19, 0xf5, 0x5a,
+ 0x30, 0x9f, 0x73, 0xdc, 0xc1, 0x6e, 0x82, 0x2d, 0x47, 0xe8,
+ 0x04, 0xab, 0xd0, 0x7f, 0x93, 0x3c, 0x56, 0xf9, 0x15, 0xba,
+ 0xe3, 0x4c, 0xa0, 0x0f, 0x65, 0xca, 0x26, 0x89, 0xf2, 0x5d,
+ 0xb1, 0x1e, 0x74, 0xdb, 0x37, 0x98, 0x00, 0xb0, 0x7d, 0xcd,
+ 0xfa, 0x4a, 0x87, 0x37, 0xe9, 0x59, 0x94, 0x24, 0x13, 0xa3,
+ 0x6e, 0xde, 0xcf, 0x7f, 0xb2, 0x02, 0x35, 0x85, 0x48, 0xf8,
+ 0x26, 0x96, 0x5b, 0xeb, 0xdc, 0x6c, 0xa1, 0x11, 0x83, 0x33,
+ 0xfe, 0x4e, 0x79, 0xc9, 0x04, 0xb4, 0x6a, 0xda, 0x17, 0xa7,
+ 0x90, 0x20, 0xed, 0x5d, 0x4c, 0xfc, 0x31, 0x81, 0xb6, 0x06,
+ 0xcb, 0x7b, 0xa5, 0x15, 0xd8, 0x68, 0x5f, 0xef, 0x22, 0x92,
+ 0x1b, 0xab, 0x66, 0xd6, 0xe1, 0x51, 0x9c, 0x2c, 0xf2, 0x42,
+ 0x8f, 0x3f, 0x08, 0xb8, 0x75, 0xc5, 0xd4, 0x64, 0xa9, 0x19,
+ 0x2e, 0x9e, 0x53, 0xe3, 0x3d, 0x8d, 0x40, 0xf0, 0xc7, 0x77,
+ 0xba, 0x0a, 0x98, 0x28, 0xe5, 0x55, 0x62, 0xd2, 0x1f, 0xaf,
+ 0x71, 0xc1, 0x0c, 0xbc, 0x8b, 0x3b, 0xf6, 0x46, 0x57, 0xe7,
+ 0x2a, 0x9a, 0xad, 0x1d, 0xd0, 0x60, 0xbe, 0x0e, 0xc3, 0x73,
+ 0x44, 0xf4, 0x39, 0x89, 0x36, 0x86, 0x4b, 0xfb, 0xcc, 0x7c,
+ 0xb1, 0x01, 0xdf, 0x6f, 0xa2, 0x12, 0x25, 0x95, 0x58, 0xe8,
+ 0xf9, 0x49, 0x84, 0x34, 0x03, 0xb3, 0x7e, 0xce, 0x10, 0xa0,
+ 0x6d, 0xdd, 0xea, 0x5a, 0x97, 0x27, 0xb5, 0x05, 0xc8, 0x78,
+ 0x4f, 0xff, 0x32, 0x82, 0x5c, 0xec, 0x21, 0x91, 0xa6, 0x16,
+ 0xdb, 0x6b, 0x7a, 0xca, 0x07, 0xb7, 0x80, 0x30, 0xfd, 0x4d,
+ 0x93, 0x23, 0xee, 0x5e, 0x69, 0xd9, 0x14, 0xa4, 0x2d, 0x9d,
+ 0x50, 0xe0, 0xd7, 0x67, 0xaa, 0x1a, 0xc4, 0x74, 0xb9, 0x09,
+ 0x3e, 0x8e, 0x43, 0xf3, 0xe2, 0x52, 0x9f, 0x2f, 0x18, 0xa8,
+ 0x65, 0xd5, 0x0b, 0xbb, 0x76, 0xc6, 0xf1, 0x41, 0x8c, 0x3c,
+ 0xae, 0x1e, 0xd3, 0x63, 0x54, 0xe4, 0x29, 0x99, 0x47, 0xf7,
+ 0x3a, 0x8a, 0xbd, 0x0d, 0xc0, 0x70, 0x61, 0xd1, 0x1c, 0xac,
+ 0x9b, 0x2b, 0xe6, 0x56, 0x88, 0x38, 0xf5, 0x45, 0x72, 0xc2,
+ 0x0f, 0xbf, 0x00, 0xb1, 0x7f, 0xce, 0xfe, 0x4f, 0x81, 0x30,
+ 0xe1, 0x50, 0x9e, 0x2f, 0x1f, 0xae, 0x60, 0xd1, 0xdf, 0x6e,
+ 0xa0, 0x11, 0x21, 0x90, 0x5e, 0xef, 0x3e, 0x8f, 0x41, 0xf0,
+ 0xc0, 0x71, 0xbf, 0x0e, 0xa3, 0x12, 0xdc, 0x6d, 0x5d, 0xec,
+ 0x22, 0x93, 0x42, 0xf3, 0x3d, 0x8c, 0xbc, 0x0d, 0xc3, 0x72,
+ 0x7c, 0xcd, 0x03, 0xb2, 0x82, 0x33, 0xfd, 0x4c, 0x9d, 0x2c,
+ 0xe2, 0x53, 0x63, 0xd2, 0x1c, 0xad, 0x5b, 0xea, 0x24, 0x95,
+ 0xa5, 0x14, 0xda, 0x6b, 0xba, 0x0b, 0xc5, 0x74, 0x44, 0xf5,
+ 0x3b, 0x8a, 0x84, 0x35, 0xfb, 0x4a, 0x7a, 0xcb, 0x05, 0xb4,
+ 0x65, 0xd4, 0x1a, 0xab, 0x9b, 0x2a, 0xe4, 0x55, 0xf8, 0x49,
+ 0x87, 0x36, 0x06, 0xb7, 0x79, 0xc8, 0x19, 0xa8, 0x66, 0xd7,
+ 0xe7, 0x56, 0x98, 0x29, 0x27, 0x96, 0x58, 0xe9, 0xd9, 0x68,
+ 0xa6, 0x17, 0xc6, 0x77, 0xb9, 0x08, 0x38, 0x89, 0x47, 0xf6,
+ 0xb6, 0x07, 0xc9, 0x78, 0x48, 0xf9, 0x37, 0x86, 0x57, 0xe6,
+ 0x28, 0x99, 0xa9, 0x18, 0xd6, 0x67, 0x69, 0xd8, 0x16, 0xa7,
+ 0x97, 0x26, 0xe8, 0x59, 0x88, 0x39, 0xf7, 0x46, 0x76, 0xc7,
+ 0x09, 0xb8, 0x15, 0xa4, 0x6a, 0xdb, 0xeb, 0x5a, 0x94, 0x25,
+ 0xf4, 0x45, 0x8b, 0x3a, 0x0a, 0xbb, 0x75, 0xc4, 0xca, 0x7b,
+ 0xb5, 0x04, 0x34, 0x85, 0x4b, 0xfa, 0x2b, 0x9a, 0x54, 0xe5,
+ 0xd5, 0x64, 0xaa, 0x1b, 0xed, 0x5c, 0x92, 0x23, 0x13, 0xa2,
+ 0x6c, 0xdd, 0x0c, 0xbd, 0x73, 0xc2, 0xf2, 0x43, 0x8d, 0x3c,
+ 0x32, 0x83, 0x4d, 0xfc, 0xcc, 0x7d, 0xb3, 0x02, 0xd3, 0x62,
+ 0xac, 0x1d, 0x2d, 0x9c, 0x52, 0xe3, 0x4e, 0xff, 0x31, 0x80,
+ 0xb0, 0x01, 0xcf, 0x7e, 0xaf, 0x1e, 0xd0, 0x61, 0x51, 0xe0,
+ 0x2e, 0x9f, 0x91, 0x20, 0xee, 0x5f, 0x6f, 0xde, 0x10, 0xa1,
+ 0x70, 0xc1, 0x0f, 0xbe, 0x8e, 0x3f, 0xf1, 0x40, 0x00, 0xb2,
+ 0x79, 0xcb, 0xf2, 0x40, 0x8b, 0x39, 0xf9, 0x4b, 0x80, 0x32,
+ 0x0b, 0xb9, 0x72, 0xc0, 0xef, 0x5d, 0x96, 0x24, 0x1d, 0xaf,
+ 0x64, 0xd6, 0x16, 0xa4, 0x6f, 0xdd, 0xe4, 0x56, 0x9d, 0x2f,
+ 0xc3, 0x71, 0xba, 0x08, 0x31, 0x83, 0x48, 0xfa, 0x3a, 0x88,
+ 0x43, 0xf1, 0xc8, 0x7a, 0xb1, 0x03, 0x2c, 0x9e, 0x55, 0xe7,
+ 0xde, 0x6c, 0xa7, 0x15, 0xd5, 0x67, 0xac, 0x1e, 0x27, 0x95,
+ 0x5e, 0xec, 0x9b, 0x29, 0xe2, 0x50, 0x69, 0xdb, 0x10, 0xa2,
+ 0x62, 0xd0, 0x1b, 0xa9, 0x90, 0x22, 0xe9, 0x5b, 0x74, 0xc6,
+ 0x0d, 0xbf, 0x86, 0x34, 0xff, 0x4d, 0x8d, 0x3f, 0xf4, 0x46,
+ 0x7f, 0xcd, 0x06, 0xb4, 0x58, 0xea, 0x21, 0x93, 0xaa, 0x18,
+ 0xd3, 0x61, 0xa1, 0x13, 0xd8, 0x6a, 0x53, 0xe1, 0x2a, 0x98,
+ 0xb7, 0x05, 0xce, 0x7c, 0x45, 0xf7, 0x3c, 0x8e, 0x4e, 0xfc,
+ 0x37, 0x85, 0xbc, 0x0e, 0xc5, 0x77, 0x2b, 0x99, 0x52, 0xe0,
+ 0xd9, 0x6b, 0xa0, 0x12, 0xd2, 0x60, 0xab, 0x19, 0x20, 0x92,
+ 0x59, 0xeb, 0xc4, 0x76, 0xbd, 0x0f, 0x36, 0x84, 0x4f, 0xfd,
+ 0x3d, 0x8f, 0x44, 0xf6, 0xcf, 0x7d, 0xb6, 0x04, 0xe8, 0x5a,
+ 0x91, 0x23, 0x1a, 0xa8, 0x63, 0xd1, 0x11, 0xa3, 0x68, 0xda,
+ 0xe3, 0x51, 0x9a, 0x28, 0x07, 0xb5, 0x7e, 0xcc, 0xf5, 0x47,
+ 0x8c, 0x3e, 0xfe, 0x4c, 0x87, 0x35, 0x0c, 0xbe, 0x75, 0xc7,
+ 0xb0, 0x02, 0xc9, 0x7b, 0x42, 0xf0, 0x3b, 0x89, 0x49, 0xfb,
+ 0x30, 0x82, 0xbb, 0x09, 0xc2, 0x70, 0x5f, 0xed, 0x26, 0x94,
+ 0xad, 0x1f, 0xd4, 0x66, 0xa6, 0x14, 0xdf, 0x6d, 0x54, 0xe6,
+ 0x2d, 0x9f, 0x73, 0xc1, 0x0a, 0xb8, 0x81, 0x33, 0xf8, 0x4a,
+ 0x8a, 0x38, 0xf3, 0x41, 0x78, 0xca, 0x01, 0xb3, 0x9c, 0x2e,
+ 0xe5, 0x57, 0x6e, 0xdc, 0x17, 0xa5, 0x65, 0xd7, 0x1c, 0xae,
+ 0x97, 0x25, 0xee, 0x5c, 0x00, 0xb3, 0x7b, 0xc8, 0xf6, 0x45,
+ 0x8d, 0x3e, 0xf1, 0x42, 0x8a, 0x39, 0x07, 0xb4, 0x7c, 0xcf,
+ 0xff, 0x4c, 0x84, 0x37, 0x09, 0xba, 0x72, 0xc1, 0x0e, 0xbd,
+ 0x75, 0xc6, 0xf8, 0x4b, 0x83, 0x30, 0xe3, 0x50, 0x98, 0x2b,
+ 0x15, 0xa6, 0x6e, 0xdd, 0x12, 0xa1, 0x69, 0xda, 0xe4, 0x57,
+ 0x9f, 0x2c, 0x1c, 0xaf, 0x67, 0xd4, 0xea, 0x59, 0x91, 0x22,
+ 0xed, 0x5e, 0x96, 0x25, 0x1b, 0xa8, 0x60, 0xd3, 0xdb, 0x68,
+ 0xa0, 0x13, 0x2d, 0x9e, 0x56, 0xe5, 0x2a, 0x99, 0x51, 0xe2,
+ 0xdc, 0x6f, 0xa7, 0x14, 0x24, 0x97, 0x5f, 0xec, 0xd2, 0x61,
+ 0xa9, 0x1a, 0xd5, 0x66, 0xae, 0x1d, 0x23, 0x90, 0x58, 0xeb,
+ 0x38, 0x8b, 0x43, 0xf0, 0xce, 0x7d, 0xb5, 0x06, 0xc9, 0x7a,
+ 0xb2, 0x01, 0x3f, 0x8c, 0x44, 0xf7, 0xc7, 0x74, 0xbc, 0x0f,
+ 0x31, 0x82, 0x4a, 0xf9, 0x36, 0x85, 0x4d, 0xfe, 0xc0, 0x73,
+ 0xbb, 0x08, 0xab, 0x18, 0xd0, 0x63, 0x5d, 0xee, 0x26, 0x95,
+ 0x5a, 0xe9, 0x21, 0x92, 0xac, 0x1f, 0xd7, 0x64, 0x54, 0xe7,
+ 0x2f, 0x9c, 0xa2, 0x11, 0xd9, 0x6a, 0xa5, 0x16, 0xde, 0x6d,
+ 0x53, 0xe0, 0x28, 0x9b, 0x48, 0xfb, 0x33, 0x80, 0xbe, 0x0d,
+ 0xc5, 0x76, 0xb9, 0x0a, 0xc2, 0x71, 0x4f, 0xfc, 0x34, 0x87,
+ 0xb7, 0x04, 0xcc, 0x7f, 0x41, 0xf2, 0x3a, 0x89, 0x46, 0xf5,
+ 0x3d, 0x8e, 0xb0, 0x03, 0xcb, 0x78, 0x70, 0xc3, 0x0b, 0xb8,
+ 0x86, 0x35, 0xfd, 0x4e, 0x81, 0x32, 0xfa, 0x49, 0x77, 0xc4,
+ 0x0c, 0xbf, 0x8f, 0x3c, 0xf4, 0x47, 0x79, 0xca, 0x02, 0xb1,
+ 0x7e, 0xcd, 0x05, 0xb6, 0x88, 0x3b, 0xf3, 0x40, 0x93, 0x20,
+ 0xe8, 0x5b, 0x65, 0xd6, 0x1e, 0xad, 0x62, 0xd1, 0x19, 0xaa,
+ 0x94, 0x27, 0xef, 0x5c, 0x6c, 0xdf, 0x17, 0xa4, 0x9a, 0x29,
+ 0xe1, 0x52, 0x9d, 0x2e, 0xe6, 0x55, 0x6b, 0xd8, 0x10, 0xa3,
+ 0x00, 0xb4, 0x75, 0xc1, 0xea, 0x5e, 0x9f, 0x2b, 0xc9, 0x7d,
+ 0xbc, 0x08, 0x23, 0x97, 0x56, 0xe2, 0x8f, 0x3b, 0xfa, 0x4e,
+ 0x65, 0xd1, 0x10, 0xa4, 0x46, 0xf2, 0x33, 0x87, 0xac, 0x18,
+ 0xd9, 0x6d, 0x03, 0xb7, 0x76, 0xc2, 0xe9, 0x5d, 0x9c, 0x28,
+ 0xca, 0x7e, 0xbf, 0x0b, 0x20, 0x94, 0x55, 0xe1, 0x8c, 0x38,
+ 0xf9, 0x4d, 0x66, 0xd2, 0x13, 0xa7, 0x45, 0xf1, 0x30, 0x84,
+ 0xaf, 0x1b, 0xda, 0x6e, 0x06, 0xb2, 0x73, 0xc7, 0xec, 0x58,
+ 0x99, 0x2d, 0xcf, 0x7b, 0xba, 0x0e, 0x25, 0x91, 0x50, 0xe4,
+ 0x89, 0x3d, 0xfc, 0x48, 0x63, 0xd7, 0x16, 0xa2, 0x40, 0xf4,
+ 0x35, 0x81, 0xaa, 0x1e, 0xdf, 0x6b, 0x05, 0xb1, 0x70, 0xc4,
+ 0xef, 0x5b, 0x9a, 0x2e, 0xcc, 0x78, 0xb9, 0x0d, 0x26, 0x92,
+ 0x53, 0xe7, 0x8a, 0x3e, 0xff, 0x4b, 0x60, 0xd4, 0x15, 0xa1,
+ 0x43, 0xf7, 0x36, 0x82, 0xa9, 0x1d, 0xdc, 0x68, 0x0c, 0xb8,
+ 0x79, 0xcd, 0xe6, 0x52, 0x93, 0x27, 0xc5, 0x71, 0xb0, 0x04,
+ 0x2f, 0x9b, 0x5a, 0xee, 0x83, 0x37, 0xf6, 0x42, 0x69, 0xdd,
+ 0x1c, 0xa8, 0x4a, 0xfe, 0x3f, 0x8b, 0xa0, 0x14, 0xd5, 0x61,
+ 0x0f, 0xbb, 0x7a, 0xce, 0xe5, 0x51, 0x90, 0x24, 0xc6, 0x72,
+ 0xb3, 0x07, 0x2c, 0x98, 0x59, 0xed, 0x80, 0x34, 0xf5, 0x41,
+ 0x6a, 0xde, 0x1f, 0xab, 0x49, 0xfd, 0x3c, 0x88, 0xa3, 0x17,
+ 0xd6, 0x62, 0x0a, 0xbe, 0x7f, 0xcb, 0xe0, 0x54, 0x95, 0x21,
+ 0xc3, 0x77, 0xb6, 0x02, 0x29, 0x9d, 0x5c, 0xe8, 0x85, 0x31,
+ 0xf0, 0x44, 0x6f, 0xdb, 0x1a, 0xae, 0x4c, 0xf8, 0x39, 0x8d,
+ 0xa6, 0x12, 0xd3, 0x67, 0x09, 0xbd, 0x7c, 0xc8, 0xe3, 0x57,
+ 0x96, 0x22, 0xc0, 0x74, 0xb5, 0x01, 0x2a, 0x9e, 0x5f, 0xeb,
+ 0x86, 0x32, 0xf3, 0x47, 0x6c, 0xd8, 0x19, 0xad, 0x4f, 0xfb,
+ 0x3a, 0x8e, 0xa5, 0x11, 0xd0, 0x64, 0x00, 0xb5, 0x77, 0xc2,
+ 0xee, 0x5b, 0x99, 0x2c, 0xc1, 0x74, 0xb6, 0x03, 0x2f, 0x9a,
+ 0x58, 0xed, 0x9f, 0x2a, 0xe8, 0x5d, 0x71, 0xc4, 0x06, 0xb3,
+ 0x5e, 0xeb, 0x29, 0x9c, 0xb0, 0x05, 0xc7, 0x72, 0x23, 0x96,
+ 0x54, 0xe1, 0xcd, 0x78, 0xba, 0x0f, 0xe2, 0x57, 0x95, 0x20,
+ 0x0c, 0xb9, 0x7b, 0xce, 0xbc, 0x09, 0xcb, 0x7e, 0x52, 0xe7,
+ 0x25, 0x90, 0x7d, 0xc8, 0x0a, 0xbf, 0x93, 0x26, 0xe4, 0x51,
+ 0x46, 0xf3, 0x31, 0x84, 0xa8, 0x1d, 0xdf, 0x6a, 0x87, 0x32,
+ 0xf0, 0x45, 0x69, 0xdc, 0x1e, 0xab, 0xd9, 0x6c, 0xae, 0x1b,
+ 0x37, 0x82, 0x40, 0xf5, 0x18, 0xad, 0x6f, 0xda, 0xf6, 0x43,
+ 0x81, 0x34, 0x65, 0xd0, 0x12, 0xa7, 0x8b, 0x3e, 0xfc, 0x49,
+ 0xa4, 0x11, 0xd3, 0x66, 0x4a, 0xff, 0x3d, 0x88, 0xfa, 0x4f,
+ 0x8d, 0x38, 0x14, 0xa1, 0x63, 0xd6, 0x3b, 0x8e, 0x4c, 0xf9,
+ 0xd5, 0x60, 0xa2, 0x17, 0x8c, 0x39, 0xfb, 0x4e, 0x62, 0xd7,
+ 0x15, 0xa0, 0x4d, 0xf8, 0x3a, 0x8f, 0xa3, 0x16, 0xd4, 0x61,
+ 0x13, 0xa6, 0x64, 0xd1, 0xfd, 0x48, 0x8a, 0x3f, 0xd2, 0x67,
+ 0xa5, 0x10, 0x3c, 0x89, 0x4b, 0xfe, 0xaf, 0x1a, 0xd8, 0x6d,
+ 0x41, 0xf4, 0x36, 0x83, 0x6e, 0xdb, 0x19, 0xac, 0x80, 0x35,
+ 0xf7, 0x42, 0x30, 0x85, 0x47, 0xf2, 0xde, 0x6b, 0xa9, 0x1c,
+ 0xf1, 0x44, 0x86, 0x33, 0x1f, 0xaa, 0x68, 0xdd, 0xca, 0x7f,
+ 0xbd, 0x08, 0x24, 0x91, 0x53, 0xe6, 0x0b, 0xbe, 0x7c, 0xc9,
+ 0xe5, 0x50, 0x92, 0x27, 0x55, 0xe0, 0x22, 0x97, 0xbb, 0x0e,
+ 0xcc, 0x79, 0x94, 0x21, 0xe3, 0x56, 0x7a, 0xcf, 0x0d, 0xb8,
+ 0xe9, 0x5c, 0x9e, 0x2b, 0x07, 0xb2, 0x70, 0xc5, 0x28, 0x9d,
+ 0x5f, 0xea, 0xc6, 0x73, 0xb1, 0x04, 0x76, 0xc3, 0x01, 0xb4,
+ 0x98, 0x2d, 0xef, 0x5a, 0xb7, 0x02, 0xc0, 0x75, 0x59, 0xec,
+ 0x2e, 0x9b, 0x00, 0xb6, 0x71, 0xc7, 0xe2, 0x54, 0x93, 0x25,
+ 0xd9, 0x6f, 0xa8, 0x1e, 0x3b, 0x8d, 0x4a, 0xfc, 0xaf, 0x19,
+ 0xde, 0x68, 0x4d, 0xfb, 0x3c, 0x8a, 0x76, 0xc0, 0x07, 0xb1,
+ 0x94, 0x22, 0xe5, 0x53, 0x43, 0xf5, 0x32, 0x84, 0xa1, 0x17,
+ 0xd0, 0x66, 0x9a, 0x2c, 0xeb, 0x5d, 0x78, 0xce, 0x09, 0xbf,
+ 0xec, 0x5a, 0x9d, 0x2b, 0x0e, 0xb8, 0x7f, 0xc9, 0x35, 0x83,
+ 0x44, 0xf2, 0xd7, 0x61, 0xa6, 0x10, 0x86, 0x30, 0xf7, 0x41,
+ 0x64, 0xd2, 0x15, 0xa3, 0x5f, 0xe9, 0x2e, 0x98, 0xbd, 0x0b,
+ 0xcc, 0x7a, 0x29, 0x9f, 0x58, 0xee, 0xcb, 0x7d, 0xba, 0x0c,
+ 0xf0, 0x46, 0x81, 0x37, 0x12, 0xa4, 0x63, 0xd5, 0xc5, 0x73,
+ 0xb4, 0x02, 0x27, 0x91, 0x56, 0xe0, 0x1c, 0xaa, 0x6d, 0xdb,
+ 0xfe, 0x48, 0x8f, 0x39, 0x6a, 0xdc, 0x1b, 0xad, 0x88, 0x3e,
+ 0xf9, 0x4f, 0xb3, 0x05, 0xc2, 0x74, 0x51, 0xe7, 0x20, 0x96,
+ 0x11, 0xa7, 0x60, 0xd6, 0xf3, 0x45, 0x82, 0x34, 0xc8, 0x7e,
+ 0xb9, 0x0f, 0x2a, 0x9c, 0x5b, 0xed, 0xbe, 0x08, 0xcf, 0x79,
+ 0x5c, 0xea, 0x2d, 0x9b, 0x67, 0xd1, 0x16, 0xa0, 0x85, 0x33,
+ 0xf4, 0x42, 0x52, 0xe4, 0x23, 0x95, 0xb0, 0x06, 0xc1, 0x77,
+ 0x8b, 0x3d, 0xfa, 0x4c, 0x69, 0xdf, 0x18, 0xae, 0xfd, 0x4b,
+ 0x8c, 0x3a, 0x1f, 0xa9, 0x6e, 0xd8, 0x24, 0x92, 0x55, 0xe3,
+ 0xc6, 0x70, 0xb7, 0x01, 0x97, 0x21, 0xe6, 0x50, 0x75, 0xc3,
+ 0x04, 0xb2, 0x4e, 0xf8, 0x3f, 0x89, 0xac, 0x1a, 0xdd, 0x6b,
+ 0x38, 0x8e, 0x49, 0xff, 0xda, 0x6c, 0xab, 0x1d, 0xe1, 0x57,
+ 0x90, 0x26, 0x03, 0xb5, 0x72, 0xc4, 0xd4, 0x62, 0xa5, 0x13,
+ 0x36, 0x80, 0x47, 0xf1, 0x0d, 0xbb, 0x7c, 0xca, 0xef, 0x59,
+ 0x9e, 0x28, 0x7b, 0xcd, 0x0a, 0xbc, 0x99, 0x2f, 0xe8, 0x5e,
+ 0xa2, 0x14, 0xd3, 0x65, 0x40, 0xf6, 0x31, 0x87, 0x00, 0xb7,
+ 0x73, 0xc4, 0xe6, 0x51, 0x95, 0x22, 0xd1, 0x66, 0xa2, 0x15,
+ 0x37, 0x80, 0x44, 0xf3, 0xbf, 0x08, 0xcc, 0x7b, 0x59, 0xee,
+ 0x2a, 0x9d, 0x6e, 0xd9, 0x1d, 0xaa, 0x88, 0x3f, 0xfb, 0x4c,
+ 0x63, 0xd4, 0x10, 0xa7, 0x85, 0x32, 0xf6, 0x41, 0xb2, 0x05,
+ 0xc1, 0x76, 0x54, 0xe3, 0x27, 0x90, 0xdc, 0x6b, 0xaf, 0x18,
+ 0x3a, 0x8d, 0x49, 0xfe, 0x0d, 0xba, 0x7e, 0xc9, 0xeb, 0x5c,
+ 0x98, 0x2f, 0xc6, 0x71, 0xb5, 0x02, 0x20, 0x97, 0x53, 0xe4,
+ 0x17, 0xa0, 0x64, 0xd3, 0xf1, 0x46, 0x82, 0x35, 0x79, 0xce,
+ 0x0a, 0xbd, 0x9f, 0x28, 0xec, 0x5b, 0xa8, 0x1f, 0xdb, 0x6c,
+ 0x4e, 0xf9, 0x3d, 0x8a, 0xa5, 0x12, 0xd6, 0x61, 0x43, 0xf4,
+ 0x30, 0x87, 0x74, 0xc3, 0x07, 0xb0, 0x92, 0x25, 0xe1, 0x56,
+ 0x1a, 0xad, 0x69, 0xde, 0xfc, 0x4b, 0x8f, 0x38, 0xcb, 0x7c,
+ 0xb8, 0x0f, 0x2d, 0x9a, 0x5e, 0xe9, 0x91, 0x26, 0xe2, 0x55,
+ 0x77, 0xc0, 0x04, 0xb3, 0x40, 0xf7, 0x33, 0x84, 0xa6, 0x11,
+ 0xd5, 0x62, 0x2e, 0x99, 0x5d, 0xea, 0xc8, 0x7f, 0xbb, 0x0c,
+ 0xff, 0x48, 0x8c, 0x3b, 0x19, 0xae, 0x6a, 0xdd, 0xf2, 0x45,
+ 0x81, 0x36, 0x14, 0xa3, 0x67, 0xd0, 0x23, 0x94, 0x50, 0xe7,
+ 0xc5, 0x72, 0xb6, 0x01, 0x4d, 0xfa, 0x3e, 0x89, 0xab, 0x1c,
+ 0xd8, 0x6f, 0x9c, 0x2b, 0xef, 0x58, 0x7a, 0xcd, 0x09, 0xbe,
+ 0x57, 0xe0, 0x24, 0x93, 0xb1, 0x06, 0xc2, 0x75, 0x86, 0x31,
+ 0xf5, 0x42, 0x60, 0xd7, 0x13, 0xa4, 0xe8, 0x5f, 0x9b, 0x2c,
+ 0x0e, 0xb9, 0x7d, 0xca, 0x39, 0x8e, 0x4a, 0xfd, 0xdf, 0x68,
+ 0xac, 0x1b, 0x34, 0x83, 0x47, 0xf0, 0xd2, 0x65, 0xa1, 0x16,
+ 0xe5, 0x52, 0x96, 0x21, 0x03, 0xb4, 0x70, 0xc7, 0x8b, 0x3c,
+ 0xf8, 0x4f, 0x6d, 0xda, 0x1e, 0xa9, 0x5a, 0xed, 0x29, 0x9e,
+ 0xbc, 0x0b, 0xcf, 0x78, 0x00, 0xb8, 0x6d, 0xd5, 0xda, 0x62,
+ 0xb7, 0x0f, 0xa9, 0x11, 0xc4, 0x7c, 0x73, 0xcb, 0x1e, 0xa6,
+ 0x4f, 0xf7, 0x22, 0x9a, 0x95, 0x2d, 0xf8, 0x40, 0xe6, 0x5e,
+ 0x8b, 0x33, 0x3c, 0x84, 0x51, 0xe9, 0x9e, 0x26, 0xf3, 0x4b,
+ 0x44, 0xfc, 0x29, 0x91, 0x37, 0x8f, 0x5a, 0xe2, 0xed, 0x55,
+ 0x80, 0x38, 0xd1, 0x69, 0xbc, 0x04, 0x0b, 0xb3, 0x66, 0xde,
+ 0x78, 0xc0, 0x15, 0xad, 0xa2, 0x1a, 0xcf, 0x77, 0x21, 0x99,
+ 0x4c, 0xf4, 0xfb, 0x43, 0x96, 0x2e, 0x88, 0x30, 0xe5, 0x5d,
+ 0x52, 0xea, 0x3f, 0x87, 0x6e, 0xd6, 0x03, 0xbb, 0xb4, 0x0c,
+ 0xd9, 0x61, 0xc7, 0x7f, 0xaa, 0x12, 0x1d, 0xa5, 0x70, 0xc8,
+ 0xbf, 0x07, 0xd2, 0x6a, 0x65, 0xdd, 0x08, 0xb0, 0x16, 0xae,
+ 0x7b, 0xc3, 0xcc, 0x74, 0xa1, 0x19, 0xf0, 0x48, 0x9d, 0x25,
+ 0x2a, 0x92, 0x47, 0xff, 0x59, 0xe1, 0x34, 0x8c, 0x83, 0x3b,
+ 0xee, 0x56, 0x42, 0xfa, 0x2f, 0x97, 0x98, 0x20, 0xf5, 0x4d,
+ 0xeb, 0x53, 0x86, 0x3e, 0x31, 0x89, 0x5c, 0xe4, 0x0d, 0xb5,
+ 0x60, 0xd8, 0xd7, 0x6f, 0xba, 0x02, 0xa4, 0x1c, 0xc9, 0x71,
+ 0x7e, 0xc6, 0x13, 0xab, 0xdc, 0x64, 0xb1, 0x09, 0x06, 0xbe,
+ 0x6b, 0xd3, 0x75, 0xcd, 0x18, 0xa0, 0xaf, 0x17, 0xc2, 0x7a,
+ 0x93, 0x2b, 0xfe, 0x46, 0x49, 0xf1, 0x24, 0x9c, 0x3a, 0x82,
+ 0x57, 0xef, 0xe0, 0x58, 0x8d, 0x35, 0x63, 0xdb, 0x0e, 0xb6,
+ 0xb9, 0x01, 0xd4, 0x6c, 0xca, 0x72, 0xa7, 0x1f, 0x10, 0xa8,
+ 0x7d, 0xc5, 0x2c, 0x94, 0x41, 0xf9, 0xf6, 0x4e, 0x9b, 0x23,
+ 0x85, 0x3d, 0xe8, 0x50, 0x5f, 0xe7, 0x32, 0x8a, 0xfd, 0x45,
+ 0x90, 0x28, 0x27, 0x9f, 0x4a, 0xf2, 0x54, 0xec, 0x39, 0x81,
+ 0x8e, 0x36, 0xe3, 0x5b, 0xb2, 0x0a, 0xdf, 0x67, 0x68, 0xd0,
+ 0x05, 0xbd, 0x1b, 0xa3, 0x76, 0xce, 0xc1, 0x79, 0xac, 0x14,
+ 0x00, 0xb9, 0x6f, 0xd6, 0xde, 0x67, 0xb1, 0x08, 0xa1, 0x18,
+ 0xce, 0x77, 0x7f, 0xc6, 0x10, 0xa9, 0x5f, 0xe6, 0x30, 0x89,
+ 0x81, 0x38, 0xee, 0x57, 0xfe, 0x47, 0x91, 0x28, 0x20, 0x99,
+ 0x4f, 0xf6, 0xbe, 0x07, 0xd1, 0x68, 0x60, 0xd9, 0x0f, 0xb6,
+ 0x1f, 0xa6, 0x70, 0xc9, 0xc1, 0x78, 0xae, 0x17, 0xe1, 0x58,
+ 0x8e, 0x37, 0x3f, 0x86, 0x50, 0xe9, 0x40, 0xf9, 0x2f, 0x96,
+ 0x9e, 0x27, 0xf1, 0x48, 0x61, 0xd8, 0x0e, 0xb7, 0xbf, 0x06,
+ 0xd0, 0x69, 0xc0, 0x79, 0xaf, 0x16, 0x1e, 0xa7, 0x71, 0xc8,
+ 0x3e, 0x87, 0x51, 0xe8, 0xe0, 0x59, 0x8f, 0x36, 0x9f, 0x26,
+ 0xf0, 0x49, 0x41, 0xf8, 0x2e, 0x97, 0xdf, 0x66, 0xb0, 0x09,
+ 0x01, 0xb8, 0x6e, 0xd7, 0x7e, 0xc7, 0x11, 0xa8, 0xa0, 0x19,
+ 0xcf, 0x76, 0x80, 0x39, 0xef, 0x56, 0x5e, 0xe7, 0x31, 0x88,
+ 0x21, 0x98, 0x4e, 0xf7, 0xff, 0x46, 0x90, 0x29, 0xc2, 0x7b,
+ 0xad, 0x14, 0x1c, 0xa5, 0x73, 0xca, 0x63, 0xda, 0x0c, 0xb5,
+ 0xbd, 0x04, 0xd2, 0x6b, 0x9d, 0x24, 0xf2, 0x4b, 0x43, 0xfa,
+ 0x2c, 0x95, 0x3c, 0x85, 0x53, 0xea, 0xe2, 0x5b, 0x8d, 0x34,
+ 0x7c, 0xc5, 0x13, 0xaa, 0xa2, 0x1b, 0xcd, 0x74, 0xdd, 0x64,
+ 0xb2, 0x0b, 0x03, 0xba, 0x6c, 0xd5, 0x23, 0x9a, 0x4c, 0xf5,
+ 0xfd, 0x44, 0x92, 0x2b, 0x82, 0x3b, 0xed, 0x54, 0x5c, 0xe5,
+ 0x33, 0x8a, 0xa3, 0x1a, 0xcc, 0x75, 0x7d, 0xc4, 0x12, 0xab,
+ 0x02, 0xbb, 0x6d, 0xd4, 0xdc, 0x65, 0xb3, 0x0a, 0xfc, 0x45,
+ 0x93, 0x2a, 0x22, 0x9b, 0x4d, 0xf4, 0x5d, 0xe4, 0x32, 0x8b,
+ 0x83, 0x3a, 0xec, 0x55, 0x1d, 0xa4, 0x72, 0xcb, 0xc3, 0x7a,
+ 0xac, 0x15, 0xbc, 0x05, 0xd3, 0x6a, 0x62, 0xdb, 0x0d, 0xb4,
+ 0x42, 0xfb, 0x2d, 0x94, 0x9c, 0x25, 0xf3, 0x4a, 0xe3, 0x5a,
+ 0x8c, 0x35, 0x3d, 0x84, 0x52, 0xeb, 0x00, 0xba, 0x69, 0xd3,
+ 0xd2, 0x68, 0xbb, 0x01, 0xb9, 0x03, 0xd0, 0x6a, 0x6b, 0xd1,
+ 0x02, 0xb8, 0x6f, 0xd5, 0x06, 0xbc, 0xbd, 0x07, 0xd4, 0x6e,
+ 0xd6, 0x6c, 0xbf, 0x05, 0x04, 0xbe, 0x6d, 0xd7, 0xde, 0x64,
+ 0xb7, 0x0d, 0x0c, 0xb6, 0x65, 0xdf, 0x67, 0xdd, 0x0e, 0xb4,
+ 0xb5, 0x0f, 0xdc, 0x66, 0xb1, 0x0b, 0xd8, 0x62, 0x63, 0xd9,
+ 0x0a, 0xb0, 0x08, 0xb2, 0x61, 0xdb, 0xda, 0x60, 0xb3, 0x09,
+ 0xa1, 0x1b, 0xc8, 0x72, 0x73, 0xc9, 0x1a, 0xa0, 0x18, 0xa2,
+ 0x71, 0xcb, 0xca, 0x70, 0xa3, 0x19, 0xce, 0x74, 0xa7, 0x1d,
+ 0x1c, 0xa6, 0x75, 0xcf, 0x77, 0xcd, 0x1e, 0xa4, 0xa5, 0x1f,
+ 0xcc, 0x76, 0x7f, 0xc5, 0x16, 0xac, 0xad, 0x17, 0xc4, 0x7e,
+ 0xc6, 0x7c, 0xaf, 0x15, 0x14, 0xae, 0x7d, 0xc7, 0x10, 0xaa,
+ 0x79, 0xc3, 0xc2, 0x78, 0xab, 0x11, 0xa9, 0x13, 0xc0, 0x7a,
+ 0x7b, 0xc1, 0x12, 0xa8, 0x5f, 0xe5, 0x36, 0x8c, 0x8d, 0x37,
+ 0xe4, 0x5e, 0xe6, 0x5c, 0x8f, 0x35, 0x34, 0x8e, 0x5d, 0xe7,
+ 0x30, 0x8a, 0x59, 0xe3, 0xe2, 0x58, 0x8b, 0x31, 0x89, 0x33,
+ 0xe0, 0x5a, 0x5b, 0xe1, 0x32, 0x88, 0x81, 0x3b, 0xe8, 0x52,
+ 0x53, 0xe9, 0x3a, 0x80, 0x38, 0x82, 0x51, 0xeb, 0xea, 0x50,
+ 0x83, 0x39, 0xee, 0x54, 0x87, 0x3d, 0x3c, 0x86, 0x55, 0xef,
+ 0x57, 0xed, 0x3e, 0x84, 0x85, 0x3f, 0xec, 0x56, 0xfe, 0x44,
+ 0x97, 0x2d, 0x2c, 0x96, 0x45, 0xff, 0x47, 0xfd, 0x2e, 0x94,
+ 0x95, 0x2f, 0xfc, 0x46, 0x91, 0x2b, 0xf8, 0x42, 0x43, 0xf9,
+ 0x2a, 0x90, 0x28, 0x92, 0x41, 0xfb, 0xfa, 0x40, 0x93, 0x29,
+ 0x20, 0x9a, 0x49, 0xf3, 0xf2, 0x48, 0x9b, 0x21, 0x99, 0x23,
+ 0xf0, 0x4a, 0x4b, 0xf1, 0x22, 0x98, 0x4f, 0xf5, 0x26, 0x9c,
+ 0x9d, 0x27, 0xf4, 0x4e, 0xf6, 0x4c, 0x9f, 0x25, 0x24, 0x9e,
+ 0x4d, 0xf7, 0x00, 0xbb, 0x6b, 0xd0, 0xd6, 0x6d, 0xbd, 0x06,
+ 0xb1, 0x0a, 0xda, 0x61, 0x67, 0xdc, 0x0c, 0xb7, 0x7f, 0xc4,
+ 0x14, 0xaf, 0xa9, 0x12, 0xc2, 0x79, 0xce, 0x75, 0xa5, 0x1e,
+ 0x18, 0xa3, 0x73, 0xc8, 0xfe, 0x45, 0x95, 0x2e, 0x28, 0x93,
+ 0x43, 0xf8, 0x4f, 0xf4, 0x24, 0x9f, 0x99, 0x22, 0xf2, 0x49,
+ 0x81, 0x3a, 0xea, 0x51, 0x57, 0xec, 0x3c, 0x87, 0x30, 0x8b,
+ 0x5b, 0xe0, 0xe6, 0x5d, 0x8d, 0x36, 0xe1, 0x5a, 0x8a, 0x31,
+ 0x37, 0x8c, 0x5c, 0xe7, 0x50, 0xeb, 0x3b, 0x80, 0x86, 0x3d,
+ 0xed, 0x56, 0x9e, 0x25, 0xf5, 0x4e, 0x48, 0xf3, 0x23, 0x98,
+ 0x2f, 0x94, 0x44, 0xff, 0xf9, 0x42, 0x92, 0x29, 0x1f, 0xa4,
+ 0x74, 0xcf, 0xc9, 0x72, 0xa2, 0x19, 0xae, 0x15, 0xc5, 0x7e,
+ 0x78, 0xc3, 0x13, 0xa8, 0x60, 0xdb, 0x0b, 0xb0, 0xb6, 0x0d,
+ 0xdd, 0x66, 0xd1, 0x6a, 0xba, 0x01, 0x07, 0xbc, 0x6c, 0xd7,
+ 0xdf, 0x64, 0xb4, 0x0f, 0x09, 0xb2, 0x62, 0xd9, 0x6e, 0xd5,
+ 0x05, 0xbe, 0xb8, 0x03, 0xd3, 0x68, 0xa0, 0x1b, 0xcb, 0x70,
+ 0x76, 0xcd, 0x1d, 0xa6, 0x11, 0xaa, 0x7a, 0xc1, 0xc7, 0x7c,
+ 0xac, 0x17, 0x21, 0x9a, 0x4a, 0xf1, 0xf7, 0x4c, 0x9c, 0x27,
+ 0x90, 0x2b, 0xfb, 0x40, 0x46, 0xfd, 0x2d, 0x96, 0x5e, 0xe5,
+ 0x35, 0x8e, 0x88, 0x33, 0xe3, 0x58, 0xef, 0x54, 0x84, 0x3f,
+ 0x39, 0x82, 0x52, 0xe9, 0x3e, 0x85, 0x55, 0xee, 0xe8, 0x53,
+ 0x83, 0x38, 0x8f, 0x34, 0xe4, 0x5f, 0x59, 0xe2, 0x32, 0x89,
+ 0x41, 0xfa, 0x2a, 0x91, 0x97, 0x2c, 0xfc, 0x47, 0xf0, 0x4b,
+ 0x9b, 0x20, 0x26, 0x9d, 0x4d, 0xf6, 0xc0, 0x7b, 0xab, 0x10,
+ 0x16, 0xad, 0x7d, 0xc6, 0x71, 0xca, 0x1a, 0xa1, 0xa7, 0x1c,
+ 0xcc, 0x77, 0xbf, 0x04, 0xd4, 0x6f, 0x69, 0xd2, 0x02, 0xb9,
+ 0x0e, 0xb5, 0x65, 0xde, 0xd8, 0x63, 0xb3, 0x08, 0x00, 0xbc,
+ 0x65, 0xd9, 0xca, 0x76, 0xaf, 0x13, 0x89, 0x35, 0xec, 0x50,
+ 0x43, 0xff, 0x26, 0x9a, 0x0f, 0xb3, 0x6a, 0xd6, 0xc5, 0x79,
+ 0xa0, 0x1c, 0x86, 0x3a, 0xe3, 0x5f, 0x4c, 0xf0, 0x29, 0x95,
+ 0x1e, 0xa2, 0x7b, 0xc7, 0xd4, 0x68, 0xb1, 0x0d, 0x97, 0x2b,
+ 0xf2, 0x4e, 0x5d, 0xe1, 0x38, 0x84, 0x11, 0xad, 0x74, 0xc8,
+ 0xdb, 0x67, 0xbe, 0x02, 0x98, 0x24, 0xfd, 0x41, 0x52, 0xee,
+ 0x37, 0x8b, 0x3c, 0x80, 0x59, 0xe5, 0xf6, 0x4a, 0x93, 0x2f,
+ 0xb5, 0x09, 0xd0, 0x6c, 0x7f, 0xc3, 0x1a, 0xa6, 0x33, 0x8f,
+ 0x56, 0xea, 0xf9, 0x45, 0x9c, 0x20, 0xba, 0x06, 0xdf, 0x63,
+ 0x70, 0xcc, 0x15, 0xa9, 0x22, 0x9e, 0x47, 0xfb, 0xe8, 0x54,
+ 0x8d, 0x31, 0xab, 0x17, 0xce, 0x72, 0x61, 0xdd, 0x04, 0xb8,
+ 0x2d, 0x91, 0x48, 0xf4, 0xe7, 0x5b, 0x82, 0x3e, 0xa4, 0x18,
+ 0xc1, 0x7d, 0x6e, 0xd2, 0x0b, 0xb7, 0x78, 0xc4, 0x1d, 0xa1,
+ 0xb2, 0x0e, 0xd7, 0x6b, 0xf1, 0x4d, 0x94, 0x28, 0x3b, 0x87,
+ 0x5e, 0xe2, 0x77, 0xcb, 0x12, 0xae, 0xbd, 0x01, 0xd8, 0x64,
+ 0xfe, 0x42, 0x9b, 0x27, 0x34, 0x88, 0x51, 0xed, 0x66, 0xda,
+ 0x03, 0xbf, 0xac, 0x10, 0xc9, 0x75, 0xef, 0x53, 0x8a, 0x36,
+ 0x25, 0x99, 0x40, 0xfc, 0x69, 0xd5, 0x0c, 0xb0, 0xa3, 0x1f,
+ 0xc6, 0x7a, 0xe0, 0x5c, 0x85, 0x39, 0x2a, 0x96, 0x4f, 0xf3,
+ 0x44, 0xf8, 0x21, 0x9d, 0x8e, 0x32, 0xeb, 0x57, 0xcd, 0x71,
+ 0xa8, 0x14, 0x07, 0xbb, 0x62, 0xde, 0x4b, 0xf7, 0x2e, 0x92,
+ 0x81, 0x3d, 0xe4, 0x58, 0xc2, 0x7e, 0xa7, 0x1b, 0x08, 0xb4,
+ 0x6d, 0xd1, 0x5a, 0xe6, 0x3f, 0x83, 0x90, 0x2c, 0xf5, 0x49,
+ 0xd3, 0x6f, 0xb6, 0x0a, 0x19, 0xa5, 0x7c, 0xc0, 0x55, 0xe9,
+ 0x30, 0x8c, 0x9f, 0x23, 0xfa, 0x46, 0xdc, 0x60, 0xb9, 0x05,
+ 0x16, 0xaa, 0x73, 0xcf, 0x00, 0xbd, 0x67, 0xda, 0xce, 0x73,
+ 0xa9, 0x14, 0x81, 0x3c, 0xe6, 0x5b, 0x4f, 0xf2, 0x28, 0x95,
+ 0x1f, 0xa2, 0x78, 0xc5, 0xd1, 0x6c, 0xb6, 0x0b, 0x9e, 0x23,
+ 0xf9, 0x44, 0x50, 0xed, 0x37, 0x8a, 0x3e, 0x83, 0x59, 0xe4,
+ 0xf0, 0x4d, 0x97, 0x2a, 0xbf, 0x02, 0xd8, 0x65, 0x71, 0xcc,
+ 0x16, 0xab, 0x21, 0x9c, 0x46, 0xfb, 0xef, 0x52, 0x88, 0x35,
+ 0xa0, 0x1d, 0xc7, 0x7a, 0x6e, 0xd3, 0x09, 0xb4, 0x7c, 0xc1,
+ 0x1b, 0xa6, 0xb2, 0x0f, 0xd5, 0x68, 0xfd, 0x40, 0x9a, 0x27,
+ 0x33, 0x8e, 0x54, 0xe9, 0x63, 0xde, 0x04, 0xb9, 0xad, 0x10,
+ 0xca, 0x77, 0xe2, 0x5f, 0x85, 0x38, 0x2c, 0x91, 0x4b, 0xf6,
+ 0x42, 0xff, 0x25, 0x98, 0x8c, 0x31, 0xeb, 0x56, 0xc3, 0x7e,
+ 0xa4, 0x19, 0x0d, 0xb0, 0x6a, 0xd7, 0x5d, 0xe0, 0x3a, 0x87,
+ 0x93, 0x2e, 0xf4, 0x49, 0xdc, 0x61, 0xbb, 0x06, 0x12, 0xaf,
+ 0x75, 0xc8, 0xf8, 0x45, 0x9f, 0x22, 0x36, 0x8b, 0x51, 0xec,
+ 0x79, 0xc4, 0x1e, 0xa3, 0xb7, 0x0a, 0xd0, 0x6d, 0xe7, 0x5a,
+ 0x80, 0x3d, 0x29, 0x94, 0x4e, 0xf3, 0x66, 0xdb, 0x01, 0xbc,
+ 0xa8, 0x15, 0xcf, 0x72, 0xc6, 0x7b, 0xa1, 0x1c, 0x08, 0xb5,
+ 0x6f, 0xd2, 0x47, 0xfa, 0x20, 0x9d, 0x89, 0x34, 0xee, 0x53,
+ 0xd9, 0x64, 0xbe, 0x03, 0x17, 0xaa, 0x70, 0xcd, 0x58, 0xe5,
+ 0x3f, 0x82, 0x96, 0x2b, 0xf1, 0x4c, 0x84, 0x39, 0xe3, 0x5e,
+ 0x4a, 0xf7, 0x2d, 0x90, 0x05, 0xb8, 0x62, 0xdf, 0xcb, 0x76,
+ 0xac, 0x11, 0x9b, 0x26, 0xfc, 0x41, 0x55, 0xe8, 0x32, 0x8f,
+ 0x1a, 0xa7, 0x7d, 0xc0, 0xd4, 0x69, 0xb3, 0x0e, 0xba, 0x07,
+ 0xdd, 0x60, 0x74, 0xc9, 0x13, 0xae, 0x3b, 0x86, 0x5c, 0xe1,
+ 0xf5, 0x48, 0x92, 0x2f, 0xa5, 0x18, 0xc2, 0x7f, 0x6b, 0xd6,
+ 0x0c, 0xb1, 0x24, 0x99, 0x43, 0xfe, 0xea, 0x57, 0x8d, 0x30,
+ 0x00, 0xbe, 0x61, 0xdf, 0xc2, 0x7c, 0xa3, 0x1d, 0x99, 0x27,
+ 0xf8, 0x46, 0x5b, 0xe5, 0x3a, 0x84, 0x2f, 0x91, 0x4e, 0xf0,
+ 0xed, 0x53, 0x8c, 0x32, 0xb6, 0x08, 0xd7, 0x69, 0x74, 0xca,
+ 0x15, 0xab, 0x5e, 0xe0, 0x3f, 0x81, 0x9c, 0x22, 0xfd, 0x43,
+ 0xc7, 0x79, 0xa6, 0x18, 0x05, 0xbb, 0x64, 0xda, 0x71, 0xcf,
+ 0x10, 0xae, 0xb3, 0x0d, 0xd2, 0x6c, 0xe8, 0x56, 0x89, 0x37,
+ 0x2a, 0x94, 0x4b, 0xf5, 0xbc, 0x02, 0xdd, 0x63, 0x7e, 0xc0,
+ 0x1f, 0xa1, 0x25, 0x9b, 0x44, 0xfa, 0xe7, 0x59, 0x86, 0x38,
+ 0x93, 0x2d, 0xf2, 0x4c, 0x51, 0xef, 0x30, 0x8e, 0x0a, 0xb4,
+ 0x6b, 0xd5, 0xc8, 0x76, 0xa9, 0x17, 0xe2, 0x5c, 0x83, 0x3d,
+ 0x20, 0x9e, 0x41, 0xff, 0x7b, 0xc5, 0x1a, 0xa4, 0xb9, 0x07,
+ 0xd8, 0x66, 0xcd, 0x73, 0xac, 0x12, 0x0f, 0xb1, 0x6e, 0xd0,
+ 0x54, 0xea, 0x35, 0x8b, 0x96, 0x28, 0xf7, 0x49, 0x65, 0xdb,
+ 0x04, 0xba, 0xa7, 0x19, 0xc6, 0x78, 0xfc, 0x42, 0x9d, 0x23,
+ 0x3e, 0x80, 0x5f, 0xe1, 0x4a, 0xf4, 0x2b, 0x95, 0x88, 0x36,
+ 0xe9, 0x57, 0xd3, 0x6d, 0xb2, 0x0c, 0x11, 0xaf, 0x70, 0xce,
+ 0x3b, 0x85, 0x5a, 0xe4, 0xf9, 0x47, 0x98, 0x26, 0xa2, 0x1c,
+ 0xc3, 0x7d, 0x60, 0xde, 0x01, 0xbf, 0x14, 0xaa, 0x75, 0xcb,
+ 0xd6, 0x68, 0xb7, 0x09, 0x8d, 0x33, 0xec, 0x52, 0x4f, 0xf1,
+ 0x2e, 0x90, 0xd9, 0x67, 0xb8, 0x06, 0x1b, 0xa5, 0x7a, 0xc4,
+ 0x40, 0xfe, 0x21, 0x9f, 0x82, 0x3c, 0xe3, 0x5d, 0xf6, 0x48,
+ 0x97, 0x29, 0x34, 0x8a, 0x55, 0xeb, 0x6f, 0xd1, 0x0e, 0xb0,
+ 0xad, 0x13, 0xcc, 0x72, 0x87, 0x39, 0xe6, 0x58, 0x45, 0xfb,
+ 0x24, 0x9a, 0x1e, 0xa0, 0x7f, 0xc1, 0xdc, 0x62, 0xbd, 0x03,
+ 0xa8, 0x16, 0xc9, 0x77, 0x6a, 0xd4, 0x0b, 0xb5, 0x31, 0x8f,
+ 0x50, 0xee, 0xf3, 0x4d, 0x92, 0x2c, 0x00, 0xbf, 0x63, 0xdc,
+ 0xc6, 0x79, 0xa5, 0x1a, 0x91, 0x2e, 0xf2, 0x4d, 0x57, 0xe8,
+ 0x34, 0x8b, 0x3f, 0x80, 0x5c, 0xe3, 0xf9, 0x46, 0x9a, 0x25,
+ 0xae, 0x11, 0xcd, 0x72, 0x68, 0xd7, 0x0b, 0xb4, 0x7e, 0xc1,
+ 0x1d, 0xa2, 0xb8, 0x07, 0xdb, 0x64, 0xef, 0x50, 0x8c, 0x33,
+ 0x29, 0x96, 0x4a, 0xf5, 0x41, 0xfe, 0x22, 0x9d, 0x87, 0x38,
+ 0xe4, 0x5b, 0xd0, 0x6f, 0xb3, 0x0c, 0x16, 0xa9, 0x75, 0xca,
+ 0xfc, 0x43, 0x9f, 0x20, 0x3a, 0x85, 0x59, 0xe6, 0x6d, 0xd2,
+ 0x0e, 0xb1, 0xab, 0x14, 0xc8, 0x77, 0xc3, 0x7c, 0xa0, 0x1f,
+ 0x05, 0xba, 0x66, 0xd9, 0x52, 0xed, 0x31, 0x8e, 0x94, 0x2b,
+ 0xf7, 0x48, 0x82, 0x3d, 0xe1, 0x5e, 0x44, 0xfb, 0x27, 0x98,
+ 0x13, 0xac, 0x70, 0xcf, 0xd5, 0x6a, 0xb6, 0x09, 0xbd, 0x02,
+ 0xde, 0x61, 0x7b, 0xc4, 0x18, 0xa7, 0x2c, 0x93, 0x4f, 0xf0,
+ 0xea, 0x55, 0x89, 0x36, 0xe5, 0x5a, 0x86, 0x39, 0x23, 0x9c,
+ 0x40, 0xff, 0x74, 0xcb, 0x17, 0xa8, 0xb2, 0x0d, 0xd1, 0x6e,
+ 0xda, 0x65, 0xb9, 0x06, 0x1c, 0xa3, 0x7f, 0xc0, 0x4b, 0xf4,
+ 0x28, 0x97, 0x8d, 0x32, 0xee, 0x51, 0x9b, 0x24, 0xf8, 0x47,
+ 0x5d, 0xe2, 0x3e, 0x81, 0x0a, 0xb5, 0x69, 0xd6, 0xcc, 0x73,
+ 0xaf, 0x10, 0xa4, 0x1b, 0xc7, 0x78, 0x62, 0xdd, 0x01, 0xbe,
+ 0x35, 0x8a, 0x56, 0xe9, 0xf3, 0x4c, 0x90, 0x2f, 0x19, 0xa6,
+ 0x7a, 0xc5, 0xdf, 0x60, 0xbc, 0x03, 0x88, 0x37, 0xeb, 0x54,
+ 0x4e, 0xf1, 0x2d, 0x92, 0x26, 0x99, 0x45, 0xfa, 0xe0, 0x5f,
+ 0x83, 0x3c, 0xb7, 0x08, 0xd4, 0x6b, 0x71, 0xce, 0x12, 0xad,
+ 0x67, 0xd8, 0x04, 0xbb, 0xa1, 0x1e, 0xc2, 0x7d, 0xf6, 0x49,
+ 0x95, 0x2a, 0x30, 0x8f, 0x53, 0xec, 0x58, 0xe7, 0x3b, 0x84,
+ 0x9e, 0x21, 0xfd, 0x42, 0xc9, 0x76, 0xaa, 0x15, 0x0f, 0xb0,
+ 0x6c, 0xd3, 0x00, 0xc0, 0x9d, 0x5d, 0x27, 0xe7, 0xba, 0x7a,
+ 0x4e, 0x8e, 0xd3, 0x13, 0x69, 0xa9, 0xf4, 0x34, 0x9c, 0x5c,
+ 0x01, 0xc1, 0xbb, 0x7b, 0x26, 0xe6, 0xd2, 0x12, 0x4f, 0x8f,
+ 0xf5, 0x35, 0x68, 0xa8, 0x25, 0xe5, 0xb8, 0x78, 0x02, 0xc2,
+ 0x9f, 0x5f, 0x6b, 0xab, 0xf6, 0x36, 0x4c, 0x8c, 0xd1, 0x11,
+ 0xb9, 0x79, 0x24, 0xe4, 0x9e, 0x5e, 0x03, 0xc3, 0xf7, 0x37,
+ 0x6a, 0xaa, 0xd0, 0x10, 0x4d, 0x8d, 0x4a, 0x8a, 0xd7, 0x17,
+ 0x6d, 0xad, 0xf0, 0x30, 0x04, 0xc4, 0x99, 0x59, 0x23, 0xe3,
+ 0xbe, 0x7e, 0xd6, 0x16, 0x4b, 0x8b, 0xf1, 0x31, 0x6c, 0xac,
+ 0x98, 0x58, 0x05, 0xc5, 0xbf, 0x7f, 0x22, 0xe2, 0x6f, 0xaf,
+ 0xf2, 0x32, 0x48, 0x88, 0xd5, 0x15, 0x21, 0xe1, 0xbc, 0x7c,
+ 0x06, 0xc6, 0x9b, 0x5b, 0xf3, 0x33, 0x6e, 0xae, 0xd4, 0x14,
+ 0x49, 0x89, 0xbd, 0x7d, 0x20, 0xe0, 0x9a, 0x5a, 0x07, 0xc7,
+ 0x94, 0x54, 0x09, 0xc9, 0xb3, 0x73, 0x2e, 0xee, 0xda, 0x1a,
+ 0x47, 0x87, 0xfd, 0x3d, 0x60, 0xa0, 0x08, 0xc8, 0x95, 0x55,
+ 0x2f, 0xef, 0xb2, 0x72, 0x46, 0x86, 0xdb, 0x1b, 0x61, 0xa1,
+ 0xfc, 0x3c, 0xb1, 0x71, 0x2c, 0xec, 0x96, 0x56, 0x0b, 0xcb,
+ 0xff, 0x3f, 0x62, 0xa2, 0xd8, 0x18, 0x45, 0x85, 0x2d, 0xed,
+ 0xb0, 0x70, 0x0a, 0xca, 0x97, 0x57, 0x63, 0xa3, 0xfe, 0x3e,
+ 0x44, 0x84, 0xd9, 0x19, 0xde, 0x1e, 0x43, 0x83, 0xf9, 0x39,
+ 0x64, 0xa4, 0x90, 0x50, 0x0d, 0xcd, 0xb7, 0x77, 0x2a, 0xea,
+ 0x42, 0x82, 0xdf, 0x1f, 0x65, 0xa5, 0xf8, 0x38, 0x0c, 0xcc,
+ 0x91, 0x51, 0x2b, 0xeb, 0xb6, 0x76, 0xfb, 0x3b, 0x66, 0xa6,
+ 0xdc, 0x1c, 0x41, 0x81, 0xb5, 0x75, 0x28, 0xe8, 0x92, 0x52,
+ 0x0f, 0xcf, 0x67, 0xa7, 0xfa, 0x3a, 0x40, 0x80, 0xdd, 0x1d,
+ 0x29, 0xe9, 0xb4, 0x74, 0x0e, 0xce, 0x93, 0x53, 0x00, 0xc1,
+ 0x9f, 0x5e, 0x23, 0xe2, 0xbc, 0x7d, 0x46, 0x87, 0xd9, 0x18,
+ 0x65, 0xa4, 0xfa, 0x3b, 0x8c, 0x4d, 0x13, 0xd2, 0xaf, 0x6e,
+ 0x30, 0xf1, 0xca, 0x0b, 0x55, 0x94, 0xe9, 0x28, 0x76, 0xb7,
+ 0x05, 0xc4, 0x9a, 0x5b, 0x26, 0xe7, 0xb9, 0x78, 0x43, 0x82,
+ 0xdc, 0x1d, 0x60, 0xa1, 0xff, 0x3e, 0x89, 0x48, 0x16, 0xd7,
+ 0xaa, 0x6b, 0x35, 0xf4, 0xcf, 0x0e, 0x50, 0x91, 0xec, 0x2d,
+ 0x73, 0xb2, 0x0a, 0xcb, 0x95, 0x54, 0x29, 0xe8, 0xb6, 0x77,
+ 0x4c, 0x8d, 0xd3, 0x12, 0x6f, 0xae, 0xf0, 0x31, 0x86, 0x47,
+ 0x19, 0xd8, 0xa5, 0x64, 0x3a, 0xfb, 0xc0, 0x01, 0x5f, 0x9e,
+ 0xe3, 0x22, 0x7c, 0xbd, 0x0f, 0xce, 0x90, 0x51, 0x2c, 0xed,
+ 0xb3, 0x72, 0x49, 0x88, 0xd6, 0x17, 0x6a, 0xab, 0xf5, 0x34,
+ 0x83, 0x42, 0x1c, 0xdd, 0xa0, 0x61, 0x3f, 0xfe, 0xc5, 0x04,
+ 0x5a, 0x9b, 0xe6, 0x27, 0x79, 0xb8, 0x14, 0xd5, 0x8b, 0x4a,
+ 0x37, 0xf6, 0xa8, 0x69, 0x52, 0x93, 0xcd, 0x0c, 0x71, 0xb0,
+ 0xee, 0x2f, 0x98, 0x59, 0x07, 0xc6, 0xbb, 0x7a, 0x24, 0xe5,
+ 0xde, 0x1f, 0x41, 0x80, 0xfd, 0x3c, 0x62, 0xa3, 0x11, 0xd0,
+ 0x8e, 0x4f, 0x32, 0xf3, 0xad, 0x6c, 0x57, 0x96, 0xc8, 0x09,
+ 0x74, 0xb5, 0xeb, 0x2a, 0x9d, 0x5c, 0x02, 0xc3, 0xbe, 0x7f,
+ 0x21, 0xe0, 0xdb, 0x1a, 0x44, 0x85, 0xf8, 0x39, 0x67, 0xa6,
+ 0x1e, 0xdf, 0x81, 0x40, 0x3d, 0xfc, 0xa2, 0x63, 0x58, 0x99,
+ 0xc7, 0x06, 0x7b, 0xba, 0xe4, 0x25, 0x92, 0x53, 0x0d, 0xcc,
+ 0xb1, 0x70, 0x2e, 0xef, 0xd4, 0x15, 0x4b, 0x8a, 0xf7, 0x36,
+ 0x68, 0xa9, 0x1b, 0xda, 0x84, 0x45, 0x38, 0xf9, 0xa7, 0x66,
+ 0x5d, 0x9c, 0xc2, 0x03, 0x7e, 0xbf, 0xe1, 0x20, 0x97, 0x56,
+ 0x08, 0xc9, 0xb4, 0x75, 0x2b, 0xea, 0xd1, 0x10, 0x4e, 0x8f,
+ 0xf2, 0x33, 0x6d, 0xac, 0x00, 0xc2, 0x99, 0x5b, 0x2f, 0xed,
+ 0xb6, 0x74, 0x5e, 0x9c, 0xc7, 0x05, 0x71, 0xb3, 0xe8, 0x2a,
+ 0xbc, 0x7e, 0x25, 0xe7, 0x93, 0x51, 0x0a, 0xc8, 0xe2, 0x20,
+ 0x7b, 0xb9, 0xcd, 0x0f, 0x54, 0x96, 0x65, 0xa7, 0xfc, 0x3e,
+ 0x4a, 0x88, 0xd3, 0x11, 0x3b, 0xf9, 0xa2, 0x60, 0x14, 0xd6,
+ 0x8d, 0x4f, 0xd9, 0x1b, 0x40, 0x82, 0xf6, 0x34, 0x6f, 0xad,
+ 0x87, 0x45, 0x1e, 0xdc, 0xa8, 0x6a, 0x31, 0xf3, 0xca, 0x08,
+ 0x53, 0x91, 0xe5, 0x27, 0x7c, 0xbe, 0x94, 0x56, 0x0d, 0xcf,
+ 0xbb, 0x79, 0x22, 0xe0, 0x76, 0xb4, 0xef, 0x2d, 0x59, 0x9b,
+ 0xc0, 0x02, 0x28, 0xea, 0xb1, 0x73, 0x07, 0xc5, 0x9e, 0x5c,
+ 0xaf, 0x6d, 0x36, 0xf4, 0x80, 0x42, 0x19, 0xdb, 0xf1, 0x33,
+ 0x68, 0xaa, 0xde, 0x1c, 0x47, 0x85, 0x13, 0xd1, 0x8a, 0x48,
+ 0x3c, 0xfe, 0xa5, 0x67, 0x4d, 0x8f, 0xd4, 0x16, 0x62, 0xa0,
+ 0xfb, 0x39, 0x89, 0x4b, 0x10, 0xd2, 0xa6, 0x64, 0x3f, 0xfd,
+ 0xd7, 0x15, 0x4e, 0x8c, 0xf8, 0x3a, 0x61, 0xa3, 0x35, 0xf7,
+ 0xac, 0x6e, 0x1a, 0xd8, 0x83, 0x41, 0x6b, 0xa9, 0xf2, 0x30,
+ 0x44, 0x86, 0xdd, 0x1f, 0xec, 0x2e, 0x75, 0xb7, 0xc3, 0x01,
+ 0x5a, 0x98, 0xb2, 0x70, 0x2b, 0xe9, 0x9d, 0x5f, 0x04, 0xc6,
+ 0x50, 0x92, 0xc9, 0x0b, 0x7f, 0xbd, 0xe6, 0x24, 0x0e, 0xcc,
+ 0x97, 0x55, 0x21, 0xe3, 0xb8, 0x7a, 0x43, 0x81, 0xda, 0x18,
+ 0x6c, 0xae, 0xf5, 0x37, 0x1d, 0xdf, 0x84, 0x46, 0x32, 0xf0,
+ 0xab, 0x69, 0xff, 0x3d, 0x66, 0xa4, 0xd0, 0x12, 0x49, 0x8b,
+ 0xa1, 0x63, 0x38, 0xfa, 0x8e, 0x4c, 0x17, 0xd5, 0x26, 0xe4,
+ 0xbf, 0x7d, 0x09, 0xcb, 0x90, 0x52, 0x78, 0xba, 0xe1, 0x23,
+ 0x57, 0x95, 0xce, 0x0c, 0x9a, 0x58, 0x03, 0xc1, 0xb5, 0x77,
+ 0x2c, 0xee, 0xc4, 0x06, 0x5d, 0x9f, 0xeb, 0x29, 0x72, 0xb0,
+ 0x00, 0xc3, 0x9b, 0x58, 0x2b, 0xe8, 0xb0, 0x73, 0x56, 0x95,
+ 0xcd, 0x0e, 0x7d, 0xbe, 0xe6, 0x25, 0xac, 0x6f, 0x37, 0xf4,
+ 0x87, 0x44, 0x1c, 0xdf, 0xfa, 0x39, 0x61, 0xa2, 0xd1, 0x12,
+ 0x4a, 0x89, 0x45, 0x86, 0xde, 0x1d, 0x6e, 0xad, 0xf5, 0x36,
+ 0x13, 0xd0, 0x88, 0x4b, 0x38, 0xfb, 0xa3, 0x60, 0xe9, 0x2a,
+ 0x72, 0xb1, 0xc2, 0x01, 0x59, 0x9a, 0xbf, 0x7c, 0x24, 0xe7,
+ 0x94, 0x57, 0x0f, 0xcc, 0x8a, 0x49, 0x11, 0xd2, 0xa1, 0x62,
+ 0x3a, 0xf9, 0xdc, 0x1f, 0x47, 0x84, 0xf7, 0x34, 0x6c, 0xaf,
+ 0x26, 0xe5, 0xbd, 0x7e, 0x0d, 0xce, 0x96, 0x55, 0x70, 0xb3,
+ 0xeb, 0x28, 0x5b, 0x98, 0xc0, 0x03, 0xcf, 0x0c, 0x54, 0x97,
+ 0xe4, 0x27, 0x7f, 0xbc, 0x99, 0x5a, 0x02, 0xc1, 0xb2, 0x71,
+ 0x29, 0xea, 0x63, 0xa0, 0xf8, 0x3b, 0x48, 0x8b, 0xd3, 0x10,
+ 0x35, 0xf6, 0xae, 0x6d, 0x1e, 0xdd, 0x85, 0x46, 0x09, 0xca,
+ 0x92, 0x51, 0x22, 0xe1, 0xb9, 0x7a, 0x5f, 0x9c, 0xc4, 0x07,
+ 0x74, 0xb7, 0xef, 0x2c, 0xa5, 0x66, 0x3e, 0xfd, 0x8e, 0x4d,
+ 0x15, 0xd6, 0xf3, 0x30, 0x68, 0xab, 0xd8, 0x1b, 0x43, 0x80,
+ 0x4c, 0x8f, 0xd7, 0x14, 0x67, 0xa4, 0xfc, 0x3f, 0x1a, 0xd9,
+ 0x81, 0x42, 0x31, 0xf2, 0xaa, 0x69, 0xe0, 0x23, 0x7b, 0xb8,
+ 0xcb, 0x08, 0x50, 0x93, 0xb6, 0x75, 0x2d, 0xee, 0x9d, 0x5e,
+ 0x06, 0xc5, 0x83, 0x40, 0x18, 0xdb, 0xa8, 0x6b, 0x33, 0xf0,
+ 0xd5, 0x16, 0x4e, 0x8d, 0xfe, 0x3d, 0x65, 0xa6, 0x2f, 0xec,
+ 0xb4, 0x77, 0x04, 0xc7, 0x9f, 0x5c, 0x79, 0xba, 0xe2, 0x21,
+ 0x52, 0x91, 0xc9, 0x0a, 0xc6, 0x05, 0x5d, 0x9e, 0xed, 0x2e,
+ 0x76, 0xb5, 0x90, 0x53, 0x0b, 0xc8, 0xbb, 0x78, 0x20, 0xe3,
+ 0x6a, 0xa9, 0xf1, 0x32, 0x41, 0x82, 0xda, 0x19, 0x3c, 0xff,
+ 0xa7, 0x64, 0x17, 0xd4, 0x8c, 0x4f, 0x00, 0xc4, 0x95, 0x51,
+ 0x37, 0xf3, 0xa2, 0x66, 0x6e, 0xaa, 0xfb, 0x3f, 0x59, 0x9d,
+ 0xcc, 0x08, 0xdc, 0x18, 0x49, 0x8d, 0xeb, 0x2f, 0x7e, 0xba,
+ 0xb2, 0x76, 0x27, 0xe3, 0x85, 0x41, 0x10, 0xd4, 0xa5, 0x61,
+ 0x30, 0xf4, 0x92, 0x56, 0x07, 0xc3, 0xcb, 0x0f, 0x5e, 0x9a,
+ 0xfc, 0x38, 0x69, 0xad, 0x79, 0xbd, 0xec, 0x28, 0x4e, 0x8a,
+ 0xdb, 0x1f, 0x17, 0xd3, 0x82, 0x46, 0x20, 0xe4, 0xb5, 0x71,
+ 0x57, 0x93, 0xc2, 0x06, 0x60, 0xa4, 0xf5, 0x31, 0x39, 0xfd,
+ 0xac, 0x68, 0x0e, 0xca, 0x9b, 0x5f, 0x8b, 0x4f, 0x1e, 0xda,
+ 0xbc, 0x78, 0x29, 0xed, 0xe5, 0x21, 0x70, 0xb4, 0xd2, 0x16,
+ 0x47, 0x83, 0xf2, 0x36, 0x67, 0xa3, 0xc5, 0x01, 0x50, 0x94,
+ 0x9c, 0x58, 0x09, 0xcd, 0xab, 0x6f, 0x3e, 0xfa, 0x2e, 0xea,
+ 0xbb, 0x7f, 0x19, 0xdd, 0x8c, 0x48, 0x40, 0x84, 0xd5, 0x11,
+ 0x77, 0xb3, 0xe2, 0x26, 0xae, 0x6a, 0x3b, 0xff, 0x99, 0x5d,
+ 0x0c, 0xc8, 0xc0, 0x04, 0x55, 0x91, 0xf7, 0x33, 0x62, 0xa6,
+ 0x72, 0xb6, 0xe7, 0x23, 0x45, 0x81, 0xd0, 0x14, 0x1c, 0xd8,
+ 0x89, 0x4d, 0x2b, 0xef, 0xbe, 0x7a, 0x0b, 0xcf, 0x9e, 0x5a,
+ 0x3c, 0xf8, 0xa9, 0x6d, 0x65, 0xa1, 0xf0, 0x34, 0x52, 0x96,
+ 0xc7, 0x03, 0xd7, 0x13, 0x42, 0x86, 0xe0, 0x24, 0x75, 0xb1,
+ 0xb9, 0x7d, 0x2c, 0xe8, 0x8e, 0x4a, 0x1b, 0xdf, 0xf9, 0x3d,
+ 0x6c, 0xa8, 0xce, 0x0a, 0x5b, 0x9f, 0x97, 0x53, 0x02, 0xc6,
+ 0xa0, 0x64, 0x35, 0xf1, 0x25, 0xe1, 0xb0, 0x74, 0x12, 0xd6,
+ 0x87, 0x43, 0x4b, 0x8f, 0xde, 0x1a, 0x7c, 0xb8, 0xe9, 0x2d,
+ 0x5c, 0x98, 0xc9, 0x0d, 0x6b, 0xaf, 0xfe, 0x3a, 0x32, 0xf6,
+ 0xa7, 0x63, 0x05, 0xc1, 0x90, 0x54, 0x80, 0x44, 0x15, 0xd1,
+ 0xb7, 0x73, 0x22, 0xe6, 0xee, 0x2a, 0x7b, 0xbf, 0xd9, 0x1d,
+ 0x4c, 0x88, 0x00, 0xc5, 0x97, 0x52, 0x33, 0xf6, 0xa4, 0x61,
+ 0x66, 0xa3, 0xf1, 0x34, 0x55, 0x90, 0xc2, 0x07, 0xcc, 0x09,
+ 0x5b, 0x9e, 0xff, 0x3a, 0x68, 0xad, 0xaa, 0x6f, 0x3d, 0xf8,
+ 0x99, 0x5c, 0x0e, 0xcb, 0x85, 0x40, 0x12, 0xd7, 0xb6, 0x73,
+ 0x21, 0xe4, 0xe3, 0x26, 0x74, 0xb1, 0xd0, 0x15, 0x47, 0x82,
+ 0x49, 0x8c, 0xde, 0x1b, 0x7a, 0xbf, 0xed, 0x28, 0x2f, 0xea,
+ 0xb8, 0x7d, 0x1c, 0xd9, 0x8b, 0x4e, 0x17, 0xd2, 0x80, 0x45,
+ 0x24, 0xe1, 0xb3, 0x76, 0x71, 0xb4, 0xe6, 0x23, 0x42, 0x87,
+ 0xd5, 0x10, 0xdb, 0x1e, 0x4c, 0x89, 0xe8, 0x2d, 0x7f, 0xba,
+ 0xbd, 0x78, 0x2a, 0xef, 0x8e, 0x4b, 0x19, 0xdc, 0x92, 0x57,
+ 0x05, 0xc0, 0xa1, 0x64, 0x36, 0xf3, 0xf4, 0x31, 0x63, 0xa6,
+ 0xc7, 0x02, 0x50, 0x95, 0x5e, 0x9b, 0xc9, 0x0c, 0x6d, 0xa8,
+ 0xfa, 0x3f, 0x38, 0xfd, 0xaf, 0x6a, 0x0b, 0xce, 0x9c, 0x59,
+ 0x2e, 0xeb, 0xb9, 0x7c, 0x1d, 0xd8, 0x8a, 0x4f, 0x48, 0x8d,
+ 0xdf, 0x1a, 0x7b, 0xbe, 0xec, 0x29, 0xe2, 0x27, 0x75, 0xb0,
+ 0xd1, 0x14, 0x46, 0x83, 0x84, 0x41, 0x13, 0xd6, 0xb7, 0x72,
+ 0x20, 0xe5, 0xab, 0x6e, 0x3c, 0xf9, 0x98, 0x5d, 0x0f, 0xca,
+ 0xcd, 0x08, 0x5a, 0x9f, 0xfe, 0x3b, 0x69, 0xac, 0x67, 0xa2,
+ 0xf0, 0x35, 0x54, 0x91, 0xc3, 0x06, 0x01, 0xc4, 0x96, 0x53,
+ 0x32, 0xf7, 0xa5, 0x60, 0x39, 0xfc, 0xae, 0x6b, 0x0a, 0xcf,
+ 0x9d, 0x58, 0x5f, 0x9a, 0xc8, 0x0d, 0x6c, 0xa9, 0xfb, 0x3e,
+ 0xf5, 0x30, 0x62, 0xa7, 0xc6, 0x03, 0x51, 0x94, 0x93, 0x56,
+ 0x04, 0xc1, 0xa0, 0x65, 0x37, 0xf2, 0xbc, 0x79, 0x2b, 0xee,
+ 0x8f, 0x4a, 0x18, 0xdd, 0xda, 0x1f, 0x4d, 0x88, 0xe9, 0x2c,
+ 0x7e, 0xbb, 0x70, 0xb5, 0xe7, 0x22, 0x43, 0x86, 0xd4, 0x11,
+ 0x16, 0xd3, 0x81, 0x44, 0x25, 0xe0, 0xb2, 0x77, 0x00, 0xc6,
+ 0x91, 0x57, 0x3f, 0xf9, 0xae, 0x68, 0x7e, 0xb8, 0xef, 0x29,
+ 0x41, 0x87, 0xd0, 0x16, 0xfc, 0x3a, 0x6d, 0xab, 0xc3, 0x05,
+ 0x52, 0x94, 0x82, 0x44, 0x13, 0xd5, 0xbd, 0x7b, 0x2c, 0xea,
+ 0xe5, 0x23, 0x74, 0xb2, 0xda, 0x1c, 0x4b, 0x8d, 0x9b, 0x5d,
+ 0x0a, 0xcc, 0xa4, 0x62, 0x35, 0xf3, 0x19, 0xdf, 0x88, 0x4e,
+ 0x26, 0xe0, 0xb7, 0x71, 0x67, 0xa1, 0xf6, 0x30, 0x58, 0x9e,
+ 0xc9, 0x0f, 0xd7, 0x11, 0x46, 0x80, 0xe8, 0x2e, 0x79, 0xbf,
+ 0xa9, 0x6f, 0x38, 0xfe, 0x96, 0x50, 0x07, 0xc1, 0x2b, 0xed,
+ 0xba, 0x7c, 0x14, 0xd2, 0x85, 0x43, 0x55, 0x93, 0xc4, 0x02,
+ 0x6a, 0xac, 0xfb, 0x3d, 0x32, 0xf4, 0xa3, 0x65, 0x0d, 0xcb,
+ 0x9c, 0x5a, 0x4c, 0x8a, 0xdd, 0x1b, 0x73, 0xb5, 0xe2, 0x24,
+ 0xce, 0x08, 0x5f, 0x99, 0xf1, 0x37, 0x60, 0xa6, 0xb0, 0x76,
+ 0x21, 0xe7, 0x8f, 0x49, 0x1e, 0xd8, 0xb3, 0x75, 0x22, 0xe4,
+ 0x8c, 0x4a, 0x1d, 0xdb, 0xcd, 0x0b, 0x5c, 0x9a, 0xf2, 0x34,
+ 0x63, 0xa5, 0x4f, 0x89, 0xde, 0x18, 0x70, 0xb6, 0xe1, 0x27,
+ 0x31, 0xf7, 0xa0, 0x66, 0x0e, 0xc8, 0x9f, 0x59, 0x56, 0x90,
+ 0xc7, 0x01, 0x69, 0xaf, 0xf8, 0x3e, 0x28, 0xee, 0xb9, 0x7f,
+ 0x17, 0xd1, 0x86, 0x40, 0xaa, 0x6c, 0x3b, 0xfd, 0x95, 0x53,
+ 0x04, 0xc2, 0xd4, 0x12, 0x45, 0x83, 0xeb, 0x2d, 0x7a, 0xbc,
+ 0x64, 0xa2, 0xf5, 0x33, 0x5b, 0x9d, 0xca, 0x0c, 0x1a, 0xdc,
+ 0x8b, 0x4d, 0x25, 0xe3, 0xb4, 0x72, 0x98, 0x5e, 0x09, 0xcf,
+ 0xa7, 0x61, 0x36, 0xf0, 0xe6, 0x20, 0x77, 0xb1, 0xd9, 0x1f,
+ 0x48, 0x8e, 0x81, 0x47, 0x10, 0xd6, 0xbe, 0x78, 0x2f, 0xe9,
+ 0xff, 0x39, 0x6e, 0xa8, 0xc0, 0x06, 0x51, 0x97, 0x7d, 0xbb,
+ 0xec, 0x2a, 0x42, 0x84, 0xd3, 0x15, 0x03, 0xc5, 0x92, 0x54,
+ 0x3c, 0xfa, 0xad, 0x6b, 0x00, 0xc7, 0x93, 0x54, 0x3b, 0xfc,
+ 0xa8, 0x6f, 0x76, 0xb1, 0xe5, 0x22, 0x4d, 0x8a, 0xde, 0x19,
+ 0xec, 0x2b, 0x7f, 0xb8, 0xd7, 0x10, 0x44, 0x83, 0x9a, 0x5d,
+ 0x09, 0xce, 0xa1, 0x66, 0x32, 0xf5, 0xc5, 0x02, 0x56, 0x91,
+ 0xfe, 0x39, 0x6d, 0xaa, 0xb3, 0x74, 0x20, 0xe7, 0x88, 0x4f,
+ 0x1b, 0xdc, 0x29, 0xee, 0xba, 0x7d, 0x12, 0xd5, 0x81, 0x46,
+ 0x5f, 0x98, 0xcc, 0x0b, 0x64, 0xa3, 0xf7, 0x30, 0x97, 0x50,
+ 0x04, 0xc3, 0xac, 0x6b, 0x3f, 0xf8, 0xe1, 0x26, 0x72, 0xb5,
+ 0xda, 0x1d, 0x49, 0x8e, 0x7b, 0xbc, 0xe8, 0x2f, 0x40, 0x87,
+ 0xd3, 0x14, 0x0d, 0xca, 0x9e, 0x59, 0x36, 0xf1, 0xa5, 0x62,
+ 0x52, 0x95, 0xc1, 0x06, 0x69, 0xae, 0xfa, 0x3d, 0x24, 0xe3,
+ 0xb7, 0x70, 0x1f, 0xd8, 0x8c, 0x4b, 0xbe, 0x79, 0x2d, 0xea,
+ 0x85, 0x42, 0x16, 0xd1, 0xc8, 0x0f, 0x5b, 0x9c, 0xf3, 0x34,
+ 0x60, 0xa7, 0x33, 0xf4, 0xa0, 0x67, 0x08, 0xcf, 0x9b, 0x5c,
+ 0x45, 0x82, 0xd6, 0x11, 0x7e, 0xb9, 0xed, 0x2a, 0xdf, 0x18,
+ 0x4c, 0x8b, 0xe4, 0x23, 0x77, 0xb0, 0xa9, 0x6e, 0x3a, 0xfd,
+ 0x92, 0x55, 0x01, 0xc6, 0xf6, 0x31, 0x65, 0xa2, 0xcd, 0x0a,
+ 0x5e, 0x99, 0x80, 0x47, 0x13, 0xd4, 0xbb, 0x7c, 0x28, 0xef,
+ 0x1a, 0xdd, 0x89, 0x4e, 0x21, 0xe6, 0xb2, 0x75, 0x6c, 0xab,
+ 0xff, 0x38, 0x57, 0x90, 0xc4, 0x03, 0xa4, 0x63, 0x37, 0xf0,
+ 0x9f, 0x58, 0x0c, 0xcb, 0xd2, 0x15, 0x41, 0x86, 0xe9, 0x2e,
+ 0x7a, 0xbd, 0x48, 0x8f, 0xdb, 0x1c, 0x73, 0xb4, 0xe0, 0x27,
+ 0x3e, 0xf9, 0xad, 0x6a, 0x05, 0xc2, 0x96, 0x51, 0x61, 0xa6,
+ 0xf2, 0x35, 0x5a, 0x9d, 0xc9, 0x0e, 0x17, 0xd0, 0x84, 0x43,
+ 0x2c, 0xeb, 0xbf, 0x78, 0x8d, 0x4a, 0x1e, 0xd9, 0xb6, 0x71,
+ 0x25, 0xe2, 0xfb, 0x3c, 0x68, 0xaf, 0xc0, 0x07, 0x53, 0x94,
+ 0x00, 0xc8, 0x8d, 0x45, 0x07, 0xcf, 0x8a, 0x42, 0x0e, 0xc6,
+ 0x83, 0x4b, 0x09, 0xc1, 0x84, 0x4c, 0x1c, 0xd4, 0x91, 0x59,
+ 0x1b, 0xd3, 0x96, 0x5e, 0x12, 0xda, 0x9f, 0x57, 0x15, 0xdd,
+ 0x98, 0x50, 0x38, 0xf0, 0xb5, 0x7d, 0x3f, 0xf7, 0xb2, 0x7a,
+ 0x36, 0xfe, 0xbb, 0x73, 0x31, 0xf9, 0xbc, 0x74, 0x24, 0xec,
+ 0xa9, 0x61, 0x23, 0xeb, 0xae, 0x66, 0x2a, 0xe2, 0xa7, 0x6f,
+ 0x2d, 0xe5, 0xa0, 0x68, 0x70, 0xb8, 0xfd, 0x35, 0x77, 0xbf,
+ 0xfa, 0x32, 0x7e, 0xb6, 0xf3, 0x3b, 0x79, 0xb1, 0xf4, 0x3c,
+ 0x6c, 0xa4, 0xe1, 0x29, 0x6b, 0xa3, 0xe6, 0x2e, 0x62, 0xaa,
+ 0xef, 0x27, 0x65, 0xad, 0xe8, 0x20, 0x48, 0x80, 0xc5, 0x0d,
+ 0x4f, 0x87, 0xc2, 0x0a, 0x46, 0x8e, 0xcb, 0x03, 0x41, 0x89,
+ 0xcc, 0x04, 0x54, 0x9c, 0xd9, 0x11, 0x53, 0x9b, 0xde, 0x16,
+ 0x5a, 0x92, 0xd7, 0x1f, 0x5d, 0x95, 0xd0, 0x18, 0xe0, 0x28,
+ 0x6d, 0xa5, 0xe7, 0x2f, 0x6a, 0xa2, 0xee, 0x26, 0x63, 0xab,
+ 0xe9, 0x21, 0x64, 0xac, 0xfc, 0x34, 0x71, 0xb9, 0xfb, 0x33,
+ 0x76, 0xbe, 0xf2, 0x3a, 0x7f, 0xb7, 0xf5, 0x3d, 0x78, 0xb0,
+ 0xd8, 0x10, 0x55, 0x9d, 0xdf, 0x17, 0x52, 0x9a, 0xd6, 0x1e,
+ 0x5b, 0x93, 0xd1, 0x19, 0x5c, 0x94, 0xc4, 0x0c, 0x49, 0x81,
+ 0xc3, 0x0b, 0x4e, 0x86, 0xca, 0x02, 0x47, 0x8f, 0xcd, 0x05,
+ 0x40, 0x88, 0x90, 0x58, 0x1d, 0xd5, 0x97, 0x5f, 0x1a, 0xd2,
+ 0x9e, 0x56, 0x13, 0xdb, 0x99, 0x51, 0x14, 0xdc, 0x8c, 0x44,
+ 0x01, 0xc9, 0x8b, 0x43, 0x06, 0xce, 0x82, 0x4a, 0x0f, 0xc7,
+ 0x85, 0x4d, 0x08, 0xc0, 0xa8, 0x60, 0x25, 0xed, 0xaf, 0x67,
+ 0x22, 0xea, 0xa6, 0x6e, 0x2b, 0xe3, 0xa1, 0x69, 0x2c, 0xe4,
+ 0xb4, 0x7c, 0x39, 0xf1, 0xb3, 0x7b, 0x3e, 0xf6, 0xba, 0x72,
+ 0x37, 0xff, 0xbd, 0x75, 0x30, 0xf8, 0x00, 0xc9, 0x8f, 0x46,
+ 0x03, 0xca, 0x8c, 0x45, 0x06, 0xcf, 0x89, 0x40, 0x05, 0xcc,
+ 0x8a, 0x43, 0x0c, 0xc5, 0x83, 0x4a, 0x0f, 0xc6, 0x80, 0x49,
+ 0x0a, 0xc3, 0x85, 0x4c, 0x09, 0xc0, 0x86, 0x4f, 0x18, 0xd1,
+ 0x97, 0x5e, 0x1b, 0xd2, 0x94, 0x5d, 0x1e, 0xd7, 0x91, 0x58,
+ 0x1d, 0xd4, 0x92, 0x5b, 0x14, 0xdd, 0x9b, 0x52, 0x17, 0xde,
+ 0x98, 0x51, 0x12, 0xdb, 0x9d, 0x54, 0x11, 0xd8, 0x9e, 0x57,
+ 0x30, 0xf9, 0xbf, 0x76, 0x33, 0xfa, 0xbc, 0x75, 0x36, 0xff,
+ 0xb9, 0x70, 0x35, 0xfc, 0xba, 0x73, 0x3c, 0xf5, 0xb3, 0x7a,
+ 0x3f, 0xf6, 0xb0, 0x79, 0x3a, 0xf3, 0xb5, 0x7c, 0x39, 0xf0,
+ 0xb6, 0x7f, 0x28, 0xe1, 0xa7, 0x6e, 0x2b, 0xe2, 0xa4, 0x6d,
+ 0x2e, 0xe7, 0xa1, 0x68, 0x2d, 0xe4, 0xa2, 0x6b, 0x24, 0xed,
+ 0xab, 0x62, 0x27, 0xee, 0xa8, 0x61, 0x22, 0xeb, 0xad, 0x64,
+ 0x21, 0xe8, 0xae, 0x67, 0x60, 0xa9, 0xef, 0x26, 0x63, 0xaa,
+ 0xec, 0x25, 0x66, 0xaf, 0xe9, 0x20, 0x65, 0xac, 0xea, 0x23,
+ 0x6c, 0xa5, 0xe3, 0x2a, 0x6f, 0xa6, 0xe0, 0x29, 0x6a, 0xa3,
+ 0xe5, 0x2c, 0x69, 0xa0, 0xe6, 0x2f, 0x78, 0xb1, 0xf7, 0x3e,
+ 0x7b, 0xb2, 0xf4, 0x3d, 0x7e, 0xb7, 0xf1, 0x38, 0x7d, 0xb4,
+ 0xf2, 0x3b, 0x74, 0xbd, 0xfb, 0x32, 0x77, 0xbe, 0xf8, 0x31,
+ 0x72, 0xbb, 0xfd, 0x34, 0x71, 0xb8, 0xfe, 0x37, 0x50, 0x99,
+ 0xdf, 0x16, 0x53, 0x9a, 0xdc, 0x15, 0x56, 0x9f, 0xd9, 0x10,
+ 0x55, 0x9c, 0xda, 0x13, 0x5c, 0x95, 0xd3, 0x1a, 0x5f, 0x96,
+ 0xd0, 0x19, 0x5a, 0x93, 0xd5, 0x1c, 0x59, 0x90, 0xd6, 0x1f,
+ 0x48, 0x81, 0xc7, 0x0e, 0x4b, 0x82, 0xc4, 0x0d, 0x4e, 0x87,
+ 0xc1, 0x08, 0x4d, 0x84, 0xc2, 0x0b, 0x44, 0x8d, 0xcb, 0x02,
+ 0x47, 0x8e, 0xc8, 0x01, 0x42, 0x8b, 0xcd, 0x04, 0x41, 0x88,
+ 0xce, 0x07, 0x00, 0xca, 0x89, 0x43, 0x0f, 0xc5, 0x86, 0x4c,
+ 0x1e, 0xd4, 0x97, 0x5d, 0x11, 0xdb, 0x98, 0x52, 0x3c, 0xf6,
+ 0xb5, 0x7f, 0x33, 0xf9, 0xba, 0x70, 0x22, 0xe8, 0xab, 0x61,
+ 0x2d, 0xe7, 0xa4, 0x6e, 0x78, 0xb2, 0xf1, 0x3b, 0x77, 0xbd,
+ 0xfe, 0x34, 0x66, 0xac, 0xef, 0x25, 0x69, 0xa3, 0xe0, 0x2a,
+ 0x44, 0x8e, 0xcd, 0x07, 0x4b, 0x81, 0xc2, 0x08, 0x5a, 0x90,
+ 0xd3, 0x19, 0x55, 0x9f, 0xdc, 0x16, 0xf0, 0x3a, 0x79, 0xb3,
+ 0xff, 0x35, 0x76, 0xbc, 0xee, 0x24, 0x67, 0xad, 0xe1, 0x2b,
+ 0x68, 0xa2, 0xcc, 0x06, 0x45, 0x8f, 0xc3, 0x09, 0x4a, 0x80,
+ 0xd2, 0x18, 0x5b, 0x91, 0xdd, 0x17, 0x54, 0x9e, 0x88, 0x42,
+ 0x01, 0xcb, 0x87, 0x4d, 0x0e, 0xc4, 0x96, 0x5c, 0x1f, 0xd5,
+ 0x99, 0x53, 0x10, 0xda, 0xb4, 0x7e, 0x3d, 0xf7, 0xbb, 0x71,
+ 0x32, 0xf8, 0xaa, 0x60, 0x23, 0xe9, 0xa5, 0x6f, 0x2c, 0xe6,
+ 0xfd, 0x37, 0x74, 0xbe, 0xf2, 0x38, 0x7b, 0xb1, 0xe3, 0x29,
+ 0x6a, 0xa0, 0xec, 0x26, 0x65, 0xaf, 0xc1, 0x0b, 0x48, 0x82,
+ 0xce, 0x04, 0x47, 0x8d, 0xdf, 0x15, 0x56, 0x9c, 0xd0, 0x1a,
+ 0x59, 0x93, 0x85, 0x4f, 0x0c, 0xc6, 0x8a, 0x40, 0x03, 0xc9,
+ 0x9b, 0x51, 0x12, 0xd8, 0x94, 0x5e, 0x1d, 0xd7, 0xb9, 0x73,
+ 0x30, 0xfa, 0xb6, 0x7c, 0x3f, 0xf5, 0xa7, 0x6d, 0x2e, 0xe4,
+ 0xa8, 0x62, 0x21, 0xeb, 0x0d, 0xc7, 0x84, 0x4e, 0x02, 0xc8,
+ 0x8b, 0x41, 0x13, 0xd9, 0x9a, 0x50, 0x1c, 0xd6, 0x95, 0x5f,
+ 0x31, 0xfb, 0xb8, 0x72, 0x3e, 0xf4, 0xb7, 0x7d, 0x2f, 0xe5,
+ 0xa6, 0x6c, 0x20, 0xea, 0xa9, 0x63, 0x75, 0xbf, 0xfc, 0x36,
+ 0x7a, 0xb0, 0xf3, 0x39, 0x6b, 0xa1, 0xe2, 0x28, 0x64, 0xae,
+ 0xed, 0x27, 0x49, 0x83, 0xc0, 0x0a, 0x46, 0x8c, 0xcf, 0x05,
+ 0x57, 0x9d, 0xde, 0x14, 0x58, 0x92, 0xd1, 0x1b, 0x00, 0xcb,
+ 0x8b, 0x40, 0x0b, 0xc0, 0x80, 0x4b, 0x16, 0xdd, 0x9d, 0x56,
+ 0x1d, 0xd6, 0x96, 0x5d, 0x2c, 0xe7, 0xa7, 0x6c, 0x27, 0xec,
+ 0xac, 0x67, 0x3a, 0xf1, 0xb1, 0x7a, 0x31, 0xfa, 0xba, 0x71,
+ 0x58, 0x93, 0xd3, 0x18, 0x53, 0x98, 0xd8, 0x13, 0x4e, 0x85,
+ 0xc5, 0x0e, 0x45, 0x8e, 0xce, 0x05, 0x74, 0xbf, 0xff, 0x34,
+ 0x7f, 0xb4, 0xf4, 0x3f, 0x62, 0xa9, 0xe9, 0x22, 0x69, 0xa2,
+ 0xe2, 0x29, 0xb0, 0x7b, 0x3b, 0xf0, 0xbb, 0x70, 0x30, 0xfb,
+ 0xa6, 0x6d, 0x2d, 0xe6, 0xad, 0x66, 0x26, 0xed, 0x9c, 0x57,
+ 0x17, 0xdc, 0x97, 0x5c, 0x1c, 0xd7, 0x8a, 0x41, 0x01, 0xca,
+ 0x81, 0x4a, 0x0a, 0xc1, 0xe8, 0x23, 0x63, 0xa8, 0xe3, 0x28,
+ 0x68, 0xa3, 0xfe, 0x35, 0x75, 0xbe, 0xf5, 0x3e, 0x7e, 0xb5,
+ 0xc4, 0x0f, 0x4f, 0x84, 0xcf, 0x04, 0x44, 0x8f, 0xd2, 0x19,
+ 0x59, 0x92, 0xd9, 0x12, 0x52, 0x99, 0x7d, 0xb6, 0xf6, 0x3d,
+ 0x76, 0xbd, 0xfd, 0x36, 0x6b, 0xa0, 0xe0, 0x2b, 0x60, 0xab,
+ 0xeb, 0x20, 0x51, 0x9a, 0xda, 0x11, 0x5a, 0x91, 0xd1, 0x1a,
+ 0x47, 0x8c, 0xcc, 0x07, 0x4c, 0x87, 0xc7, 0x0c, 0x25, 0xee,
+ 0xae, 0x65, 0x2e, 0xe5, 0xa5, 0x6e, 0x33, 0xf8, 0xb8, 0x73,
+ 0x38, 0xf3, 0xb3, 0x78, 0x09, 0xc2, 0x82, 0x49, 0x02, 0xc9,
+ 0x89, 0x42, 0x1f, 0xd4, 0x94, 0x5f, 0x14, 0xdf, 0x9f, 0x54,
+ 0xcd, 0x06, 0x46, 0x8d, 0xc6, 0x0d, 0x4d, 0x86, 0xdb, 0x10,
+ 0x50, 0x9b, 0xd0, 0x1b, 0x5b, 0x90, 0xe1, 0x2a, 0x6a, 0xa1,
+ 0xea, 0x21, 0x61, 0xaa, 0xf7, 0x3c, 0x7c, 0xb7, 0xfc, 0x37,
+ 0x77, 0xbc, 0x95, 0x5e, 0x1e, 0xd5, 0x9e, 0x55, 0x15, 0xde,
+ 0x83, 0x48, 0x08, 0xc3, 0x88, 0x43, 0x03, 0xc8, 0xb9, 0x72,
+ 0x32, 0xf9, 0xb2, 0x79, 0x39, 0xf2, 0xaf, 0x64, 0x24, 0xef,
+ 0xa4, 0x6f, 0x2f, 0xe4, 0x00, 0xcc, 0x85, 0x49, 0x17, 0xdb,
+ 0x92, 0x5e, 0x2e, 0xe2, 0xab, 0x67, 0x39, 0xf5, 0xbc, 0x70,
+ 0x5c, 0x90, 0xd9, 0x15, 0x4b, 0x87, 0xce, 0x02, 0x72, 0xbe,
+ 0xf7, 0x3b, 0x65, 0xa9, 0xe0, 0x2c, 0xb8, 0x74, 0x3d, 0xf1,
+ 0xaf, 0x63, 0x2a, 0xe6, 0x96, 0x5a, 0x13, 0xdf, 0x81, 0x4d,
+ 0x04, 0xc8, 0xe4, 0x28, 0x61, 0xad, 0xf3, 0x3f, 0x76, 0xba,
+ 0xca, 0x06, 0x4f, 0x83, 0xdd, 0x11, 0x58, 0x94, 0x6d, 0xa1,
+ 0xe8, 0x24, 0x7a, 0xb6, 0xff, 0x33, 0x43, 0x8f, 0xc6, 0x0a,
+ 0x54, 0x98, 0xd1, 0x1d, 0x31, 0xfd, 0xb4, 0x78, 0x26, 0xea,
+ 0xa3, 0x6f, 0x1f, 0xd3, 0x9a, 0x56, 0x08, 0xc4, 0x8d, 0x41,
+ 0xd5, 0x19, 0x50, 0x9c, 0xc2, 0x0e, 0x47, 0x8b, 0xfb, 0x37,
+ 0x7e, 0xb2, 0xec, 0x20, 0x69, 0xa5, 0x89, 0x45, 0x0c, 0xc0,
+ 0x9e, 0x52, 0x1b, 0xd7, 0xa7, 0x6b, 0x22, 0xee, 0xb0, 0x7c,
+ 0x35, 0xf9, 0xda, 0x16, 0x5f, 0x93, 0xcd, 0x01, 0x48, 0x84,
+ 0xf4, 0x38, 0x71, 0xbd, 0xe3, 0x2f, 0x66, 0xaa, 0x86, 0x4a,
+ 0x03, 0xcf, 0x91, 0x5d, 0x14, 0xd8, 0xa8, 0x64, 0x2d, 0xe1,
+ 0xbf, 0x73, 0x3a, 0xf6, 0x62, 0xae, 0xe7, 0x2b, 0x75, 0xb9,
+ 0xf0, 0x3c, 0x4c, 0x80, 0xc9, 0x05, 0x5b, 0x97, 0xde, 0x12,
+ 0x3e, 0xf2, 0xbb, 0x77, 0x29, 0xe5, 0xac, 0x60, 0x10, 0xdc,
+ 0x95, 0x59, 0x07, 0xcb, 0x82, 0x4e, 0xb7, 0x7b, 0x32, 0xfe,
+ 0xa0, 0x6c, 0x25, 0xe9, 0x99, 0x55, 0x1c, 0xd0, 0x8e, 0x42,
+ 0x0b, 0xc7, 0xeb, 0x27, 0x6e, 0xa2, 0xfc, 0x30, 0x79, 0xb5,
+ 0xc5, 0x09, 0x40, 0x8c, 0xd2, 0x1e, 0x57, 0x9b, 0x0f, 0xc3,
+ 0x8a, 0x46, 0x18, 0xd4, 0x9d, 0x51, 0x21, 0xed, 0xa4, 0x68,
+ 0x36, 0xfa, 0xb3, 0x7f, 0x53, 0x9f, 0xd6, 0x1a, 0x44, 0x88,
+ 0xc1, 0x0d, 0x7d, 0xb1, 0xf8, 0x34, 0x6a, 0xa6, 0xef, 0x23,
+ 0x00, 0xcd, 0x87, 0x4a, 0x13, 0xde, 0x94, 0x59, 0x26, 0xeb,
+ 0xa1, 0x6c, 0x35, 0xf8, 0xb2, 0x7f, 0x4c, 0x81, 0xcb, 0x06,
+ 0x5f, 0x92, 0xd8, 0x15, 0x6a, 0xa7, 0xed, 0x20, 0x79, 0xb4,
+ 0xfe, 0x33, 0x98, 0x55, 0x1f, 0xd2, 0x8b, 0x46, 0x0c, 0xc1,
+ 0xbe, 0x73, 0x39, 0xf4, 0xad, 0x60, 0x2a, 0xe7, 0xd4, 0x19,
+ 0x53, 0x9e, 0xc7, 0x0a, 0x40, 0x8d, 0xf2, 0x3f, 0x75, 0xb8,
+ 0xe1, 0x2c, 0x66, 0xab, 0x2d, 0xe0, 0xaa, 0x67, 0x3e, 0xf3,
+ 0xb9, 0x74, 0x0b, 0xc6, 0x8c, 0x41, 0x18, 0xd5, 0x9f, 0x52,
+ 0x61, 0xac, 0xe6, 0x2b, 0x72, 0xbf, 0xf5, 0x38, 0x47, 0x8a,
+ 0xc0, 0x0d, 0x54, 0x99, 0xd3, 0x1e, 0xb5, 0x78, 0x32, 0xff,
+ 0xa6, 0x6b, 0x21, 0xec, 0x93, 0x5e, 0x14, 0xd9, 0x80, 0x4d,
+ 0x07, 0xca, 0xf9, 0x34, 0x7e, 0xb3, 0xea, 0x27, 0x6d, 0xa0,
+ 0xdf, 0x12, 0x58, 0x95, 0xcc, 0x01, 0x4b, 0x86, 0x5a, 0x97,
+ 0xdd, 0x10, 0x49, 0x84, 0xce, 0x03, 0x7c, 0xb1, 0xfb, 0x36,
+ 0x6f, 0xa2, 0xe8, 0x25, 0x16, 0xdb, 0x91, 0x5c, 0x05, 0xc8,
+ 0x82, 0x4f, 0x30, 0xfd, 0xb7, 0x7a, 0x23, 0xee, 0xa4, 0x69,
+ 0xc2, 0x0f, 0x45, 0x88, 0xd1, 0x1c, 0x56, 0x9b, 0xe4, 0x29,
+ 0x63, 0xae, 0xf7, 0x3a, 0x70, 0xbd, 0x8e, 0x43, 0x09, 0xc4,
+ 0x9d, 0x50, 0x1a, 0xd7, 0xa8, 0x65, 0x2f, 0xe2, 0xbb, 0x76,
+ 0x3c, 0xf1, 0x77, 0xba, 0xf0, 0x3d, 0x64, 0xa9, 0xe3, 0x2e,
+ 0x51, 0x9c, 0xd6, 0x1b, 0x42, 0x8f, 0xc5, 0x08, 0x3b, 0xf6,
+ 0xbc, 0x71, 0x28, 0xe5, 0xaf, 0x62, 0x1d, 0xd0, 0x9a, 0x57,
+ 0x0e, 0xc3, 0x89, 0x44, 0xef, 0x22, 0x68, 0xa5, 0xfc, 0x31,
+ 0x7b, 0xb6, 0xc9, 0x04, 0x4e, 0x83, 0xda, 0x17, 0x5d, 0x90,
+ 0xa3, 0x6e, 0x24, 0xe9, 0xb0, 0x7d, 0x37, 0xfa, 0x85, 0x48,
+ 0x02, 0xcf, 0x96, 0x5b, 0x11, 0xdc, 0x00, 0xce, 0x81, 0x4f,
+ 0x1f, 0xd1, 0x9e, 0x50, 0x3e, 0xf0, 0xbf, 0x71, 0x21, 0xef,
+ 0xa0, 0x6e, 0x7c, 0xb2, 0xfd, 0x33, 0x63, 0xad, 0xe2, 0x2c,
+ 0x42, 0x8c, 0xc3, 0x0d, 0x5d, 0x93, 0xdc, 0x12, 0xf8, 0x36,
+ 0x79, 0xb7, 0xe7, 0x29, 0x66, 0xa8, 0xc6, 0x08, 0x47, 0x89,
+ 0xd9, 0x17, 0x58, 0x96, 0x84, 0x4a, 0x05, 0xcb, 0x9b, 0x55,
+ 0x1a, 0xd4, 0xba, 0x74, 0x3b, 0xf5, 0xa5, 0x6b, 0x24, 0xea,
+ 0xed, 0x23, 0x6c, 0xa2, 0xf2, 0x3c, 0x73, 0xbd, 0xd3, 0x1d,
+ 0x52, 0x9c, 0xcc, 0x02, 0x4d, 0x83, 0x91, 0x5f, 0x10, 0xde,
+ 0x8e, 0x40, 0x0f, 0xc1, 0xaf, 0x61, 0x2e, 0xe0, 0xb0, 0x7e,
+ 0x31, 0xff, 0x15, 0xdb, 0x94, 0x5a, 0x0a, 0xc4, 0x8b, 0x45,
+ 0x2b, 0xe5, 0xaa, 0x64, 0x34, 0xfa, 0xb5, 0x7b, 0x69, 0xa7,
+ 0xe8, 0x26, 0x76, 0xb8, 0xf7, 0x39, 0x57, 0x99, 0xd6, 0x18,
+ 0x48, 0x86, 0xc9, 0x07, 0xc7, 0x09, 0x46, 0x88, 0xd8, 0x16,
+ 0x59, 0x97, 0xf9, 0x37, 0x78, 0xb6, 0xe6, 0x28, 0x67, 0xa9,
+ 0xbb, 0x75, 0x3a, 0xf4, 0xa4, 0x6a, 0x25, 0xeb, 0x85, 0x4b,
+ 0x04, 0xca, 0x9a, 0x54, 0x1b, 0xd5, 0x3f, 0xf1, 0xbe, 0x70,
+ 0x20, 0xee, 0xa1, 0x6f, 0x01, 0xcf, 0x80, 0x4e, 0x1e, 0xd0,
+ 0x9f, 0x51, 0x43, 0x8d, 0xc2, 0x0c, 0x5c, 0x92, 0xdd, 0x13,
+ 0x7d, 0xb3, 0xfc, 0x32, 0x62, 0xac, 0xe3, 0x2d, 0x2a, 0xe4,
+ 0xab, 0x65, 0x35, 0xfb, 0xb4, 0x7a, 0x14, 0xda, 0x95, 0x5b,
+ 0x0b, 0xc5, 0x8a, 0x44, 0x56, 0x98, 0xd7, 0x19, 0x49, 0x87,
+ 0xc8, 0x06, 0x68, 0xa6, 0xe9, 0x27, 0x77, 0xb9, 0xf6, 0x38,
+ 0xd2, 0x1c, 0x53, 0x9d, 0xcd, 0x03, 0x4c, 0x82, 0xec, 0x22,
+ 0x6d, 0xa3, 0xf3, 0x3d, 0x72, 0xbc, 0xae, 0x60, 0x2f, 0xe1,
+ 0xb1, 0x7f, 0x30, 0xfe, 0x90, 0x5e, 0x11, 0xdf, 0x8f, 0x41,
+ 0x0e, 0xc0, 0x00, 0xcf, 0x83, 0x4c, 0x1b, 0xd4, 0x98, 0x57,
+ 0x36, 0xf9, 0xb5, 0x7a, 0x2d, 0xe2, 0xae, 0x61, 0x6c, 0xa3,
+ 0xef, 0x20, 0x77, 0xb8, 0xf4, 0x3b, 0x5a, 0x95, 0xd9, 0x16,
+ 0x41, 0x8e, 0xc2, 0x0d, 0xd8, 0x17, 0x5b, 0x94, 0xc3, 0x0c,
+ 0x40, 0x8f, 0xee, 0x21, 0x6d, 0xa2, 0xf5, 0x3a, 0x76, 0xb9,
+ 0xb4, 0x7b, 0x37, 0xf8, 0xaf, 0x60, 0x2c, 0xe3, 0x82, 0x4d,
+ 0x01, 0xce, 0x99, 0x56, 0x1a, 0xd5, 0xad, 0x62, 0x2e, 0xe1,
+ 0xb6, 0x79, 0x35, 0xfa, 0x9b, 0x54, 0x18, 0xd7, 0x80, 0x4f,
+ 0x03, 0xcc, 0xc1, 0x0e, 0x42, 0x8d, 0xda, 0x15, 0x59, 0x96,
+ 0xf7, 0x38, 0x74, 0xbb, 0xec, 0x23, 0x6f, 0xa0, 0x75, 0xba,
+ 0xf6, 0x39, 0x6e, 0xa1, 0xed, 0x22, 0x43, 0x8c, 0xc0, 0x0f,
+ 0x58, 0x97, 0xdb, 0x14, 0x19, 0xd6, 0x9a, 0x55, 0x02, 0xcd,
+ 0x81, 0x4e, 0x2f, 0xe0, 0xac, 0x63, 0x34, 0xfb, 0xb7, 0x78,
+ 0x47, 0x88, 0xc4, 0x0b, 0x5c, 0x93, 0xdf, 0x10, 0x71, 0xbe,
+ 0xf2, 0x3d, 0x6a, 0xa5, 0xe9, 0x26, 0x2b, 0xe4, 0xa8, 0x67,
+ 0x30, 0xff, 0xb3, 0x7c, 0x1d, 0xd2, 0x9e, 0x51, 0x06, 0xc9,
+ 0x85, 0x4a, 0x9f, 0x50, 0x1c, 0xd3, 0x84, 0x4b, 0x07, 0xc8,
+ 0xa9, 0x66, 0x2a, 0xe5, 0xb2, 0x7d, 0x31, 0xfe, 0xf3, 0x3c,
+ 0x70, 0xbf, 0xe8, 0x27, 0x6b, 0xa4, 0xc5, 0x0a, 0x46, 0x89,
+ 0xde, 0x11, 0x5d, 0x92, 0xea, 0x25, 0x69, 0xa6, 0xf1, 0x3e,
+ 0x72, 0xbd, 0xdc, 0x13, 0x5f, 0x90, 0xc7, 0x08, 0x44, 0x8b,
+ 0x86, 0x49, 0x05, 0xca, 0x9d, 0x52, 0x1e, 0xd1, 0xb0, 0x7f,
+ 0x33, 0xfc, 0xab, 0x64, 0x28, 0xe7, 0x32, 0xfd, 0xb1, 0x7e,
+ 0x29, 0xe6, 0xaa, 0x65, 0x04, 0xcb, 0x87, 0x48, 0x1f, 0xd0,
+ 0x9c, 0x53, 0x5e, 0x91, 0xdd, 0x12, 0x45, 0x8a, 0xc6, 0x09,
+ 0x68, 0xa7, 0xeb, 0x24, 0x73, 0xbc, 0xf0, 0x3f, 0x00, 0xd0,
+ 0xbd, 0x6d, 0x67, 0xb7, 0xda, 0x0a, 0xce, 0x1e, 0x73, 0xa3,
+ 0xa9, 0x79, 0x14, 0xc4, 0x81, 0x51, 0x3c, 0xec, 0xe6, 0x36,
+ 0x5b, 0x8b, 0x4f, 0x9f, 0xf2, 0x22, 0x28, 0xf8, 0x95, 0x45,
+ 0x1f, 0xcf, 0xa2, 0x72, 0x78, 0xa8, 0xc5, 0x15, 0xd1, 0x01,
+ 0x6c, 0xbc, 0xb6, 0x66, 0x0b, 0xdb, 0x9e, 0x4e, 0x23, 0xf3,
+ 0xf9, 0x29, 0x44, 0x94, 0x50, 0x80, 0xed, 0x3d, 0x37, 0xe7,
+ 0x8a, 0x5a, 0x3e, 0xee, 0x83, 0x53, 0x59, 0x89, 0xe4, 0x34,
+ 0xf0, 0x20, 0x4d, 0x9d, 0x97, 0x47, 0x2a, 0xfa, 0xbf, 0x6f,
+ 0x02, 0xd2, 0xd8, 0x08, 0x65, 0xb5, 0x71, 0xa1, 0xcc, 0x1c,
+ 0x16, 0xc6, 0xab, 0x7b, 0x21, 0xf1, 0x9c, 0x4c, 0x46, 0x96,
+ 0xfb, 0x2b, 0xef, 0x3f, 0x52, 0x82, 0x88, 0x58, 0x35, 0xe5,
+ 0xa0, 0x70, 0x1d, 0xcd, 0xc7, 0x17, 0x7a, 0xaa, 0x6e, 0xbe,
+ 0xd3, 0x03, 0x09, 0xd9, 0xb4, 0x64, 0x7c, 0xac, 0xc1, 0x11,
+ 0x1b, 0xcb, 0xa6, 0x76, 0xb2, 0x62, 0x0f, 0xdf, 0xd5, 0x05,
+ 0x68, 0xb8, 0xfd, 0x2d, 0x40, 0x90, 0x9a, 0x4a, 0x27, 0xf7,
+ 0x33, 0xe3, 0x8e, 0x5e, 0x54, 0x84, 0xe9, 0x39, 0x63, 0xb3,
+ 0xde, 0x0e, 0x04, 0xd4, 0xb9, 0x69, 0xad, 0x7d, 0x10, 0xc0,
+ 0xca, 0x1a, 0x77, 0xa7, 0xe2, 0x32, 0x5f, 0x8f, 0x85, 0x55,
+ 0x38, 0xe8, 0x2c, 0xfc, 0x91, 0x41, 0x4b, 0x9b, 0xf6, 0x26,
+ 0x42, 0x92, 0xff, 0x2f, 0x25, 0xf5, 0x98, 0x48, 0x8c, 0x5c,
+ 0x31, 0xe1, 0xeb, 0x3b, 0x56, 0x86, 0xc3, 0x13, 0x7e, 0xae,
+ 0xa4, 0x74, 0x19, 0xc9, 0x0d, 0xdd, 0xb0, 0x60, 0x6a, 0xba,
+ 0xd7, 0x07, 0x5d, 0x8d, 0xe0, 0x30, 0x3a, 0xea, 0x87, 0x57,
+ 0x93, 0x43, 0x2e, 0xfe, 0xf4, 0x24, 0x49, 0x99, 0xdc, 0x0c,
+ 0x61, 0xb1, 0xbb, 0x6b, 0x06, 0xd6, 0x12, 0xc2, 0xaf, 0x7f,
+ 0x75, 0xa5, 0xc8, 0x18, 0x00, 0xd1, 0xbf, 0x6e, 0x63, 0xb2,
+ 0xdc, 0x0d, 0xc6, 0x17, 0x79, 0xa8, 0xa5, 0x74, 0x1a, 0xcb,
+ 0x91, 0x40, 0x2e, 0xff, 0xf2, 0x23, 0x4d, 0x9c, 0x57, 0x86,
+ 0xe8, 0x39, 0x34, 0xe5, 0x8b, 0x5a, 0x3f, 0xee, 0x80, 0x51,
+ 0x5c, 0x8d, 0xe3, 0x32, 0xf9, 0x28, 0x46, 0x97, 0x9a, 0x4b,
+ 0x25, 0xf4, 0xae, 0x7f, 0x11, 0xc0, 0xcd, 0x1c, 0x72, 0xa3,
+ 0x68, 0xb9, 0xd7, 0x06, 0x0b, 0xda, 0xb4, 0x65, 0x7e, 0xaf,
+ 0xc1, 0x10, 0x1d, 0xcc, 0xa2, 0x73, 0xb8, 0x69, 0x07, 0xd6,
+ 0xdb, 0x0a, 0x64, 0xb5, 0xef, 0x3e, 0x50, 0x81, 0x8c, 0x5d,
+ 0x33, 0xe2, 0x29, 0xf8, 0x96, 0x47, 0x4a, 0x9b, 0xf5, 0x24,
+ 0x41, 0x90, 0xfe, 0x2f, 0x22, 0xf3, 0x9d, 0x4c, 0x87, 0x56,
+ 0x38, 0xe9, 0xe4, 0x35, 0x5b, 0x8a, 0xd0, 0x01, 0x6f, 0xbe,
+ 0xb3, 0x62, 0x0c, 0xdd, 0x16, 0xc7, 0xa9, 0x78, 0x75, 0xa4,
+ 0xca, 0x1b, 0xfc, 0x2d, 0x43, 0x92, 0x9f, 0x4e, 0x20, 0xf1,
+ 0x3a, 0xeb, 0x85, 0x54, 0x59, 0x88, 0xe6, 0x37, 0x6d, 0xbc,
+ 0xd2, 0x03, 0x0e, 0xdf, 0xb1, 0x60, 0xab, 0x7a, 0x14, 0xc5,
+ 0xc8, 0x19, 0x77, 0xa6, 0xc3, 0x12, 0x7c, 0xad, 0xa0, 0x71,
+ 0x1f, 0xce, 0x05, 0xd4, 0xba, 0x6b, 0x66, 0xb7, 0xd9, 0x08,
+ 0x52, 0x83, 0xed, 0x3c, 0x31, 0xe0, 0x8e, 0x5f, 0x94, 0x45,
+ 0x2b, 0xfa, 0xf7, 0x26, 0x48, 0x99, 0x82, 0x53, 0x3d, 0xec,
+ 0xe1, 0x30, 0x5e, 0x8f, 0x44, 0x95, 0xfb, 0x2a, 0x27, 0xf6,
+ 0x98, 0x49, 0x13, 0xc2, 0xac, 0x7d, 0x70, 0xa1, 0xcf, 0x1e,
+ 0xd5, 0x04, 0x6a, 0xbb, 0xb6, 0x67, 0x09, 0xd8, 0xbd, 0x6c,
+ 0x02, 0xd3, 0xde, 0x0f, 0x61, 0xb0, 0x7b, 0xaa, 0xc4, 0x15,
+ 0x18, 0xc9, 0xa7, 0x76, 0x2c, 0xfd, 0x93, 0x42, 0x4f, 0x9e,
+ 0xf0, 0x21, 0xea, 0x3b, 0x55, 0x84, 0x89, 0x58, 0x36, 0xe7,
+ 0x00, 0xd2, 0xb9, 0x6b, 0x6f, 0xbd, 0xd6, 0x04, 0xde, 0x0c,
+ 0x67, 0xb5, 0xb1, 0x63, 0x08, 0xda, 0xa1, 0x73, 0x18, 0xca,
+ 0xce, 0x1c, 0x77, 0xa5, 0x7f, 0xad, 0xc6, 0x14, 0x10, 0xc2,
+ 0xa9, 0x7b, 0x5f, 0x8d, 0xe6, 0x34, 0x30, 0xe2, 0x89, 0x5b,
+ 0x81, 0x53, 0x38, 0xea, 0xee, 0x3c, 0x57, 0x85, 0xfe, 0x2c,
+ 0x47, 0x95, 0x91, 0x43, 0x28, 0xfa, 0x20, 0xf2, 0x99, 0x4b,
+ 0x4f, 0x9d, 0xf6, 0x24, 0xbe, 0x6c, 0x07, 0xd5, 0xd1, 0x03,
+ 0x68, 0xba, 0x60, 0xb2, 0xd9, 0x0b, 0x0f, 0xdd, 0xb6, 0x64,
+ 0x1f, 0xcd, 0xa6, 0x74, 0x70, 0xa2, 0xc9, 0x1b, 0xc1, 0x13,
+ 0x78, 0xaa, 0xae, 0x7c, 0x17, 0xc5, 0xe1, 0x33, 0x58, 0x8a,
+ 0x8e, 0x5c, 0x37, 0xe5, 0x3f, 0xed, 0x86, 0x54, 0x50, 0x82,
+ 0xe9, 0x3b, 0x40, 0x92, 0xf9, 0x2b, 0x2f, 0xfd, 0x96, 0x44,
+ 0x9e, 0x4c, 0x27, 0xf5, 0xf1, 0x23, 0x48, 0x9a, 0x61, 0xb3,
+ 0xd8, 0x0a, 0x0e, 0xdc, 0xb7, 0x65, 0xbf, 0x6d, 0x06, 0xd4,
+ 0xd0, 0x02, 0x69, 0xbb, 0xc0, 0x12, 0x79, 0xab, 0xaf, 0x7d,
+ 0x16, 0xc4, 0x1e, 0xcc, 0xa7, 0x75, 0x71, 0xa3, 0xc8, 0x1a,
+ 0x3e, 0xec, 0x87, 0x55, 0x51, 0x83, 0xe8, 0x3a, 0xe0, 0x32,
+ 0x59, 0x8b, 0x8f, 0x5d, 0x36, 0xe4, 0x9f, 0x4d, 0x26, 0xf4,
+ 0xf0, 0x22, 0x49, 0x9b, 0x41, 0x93, 0xf8, 0x2a, 0x2e, 0xfc,
+ 0x97, 0x45, 0xdf, 0x0d, 0x66, 0xb4, 0xb0, 0x62, 0x09, 0xdb,
+ 0x01, 0xd3, 0xb8, 0x6a, 0x6e, 0xbc, 0xd7, 0x05, 0x7e, 0xac,
+ 0xc7, 0x15, 0x11, 0xc3, 0xa8, 0x7a, 0xa0, 0x72, 0x19, 0xcb,
+ 0xcf, 0x1d, 0x76, 0xa4, 0x80, 0x52, 0x39, 0xeb, 0xef, 0x3d,
+ 0x56, 0x84, 0x5e, 0x8c, 0xe7, 0x35, 0x31, 0xe3, 0x88, 0x5a,
+ 0x21, 0xf3, 0x98, 0x4a, 0x4e, 0x9c, 0xf7, 0x25, 0xff, 0x2d,
+ 0x46, 0x94, 0x90, 0x42, 0x29, 0xfb, 0x00, 0xd3, 0xbb, 0x68,
+ 0x6b, 0xb8, 0xd0, 0x03, 0xd6, 0x05, 0x6d, 0xbe, 0xbd, 0x6e,
+ 0x06, 0xd5, 0xb1, 0x62, 0x0a, 0xd9, 0xda, 0x09, 0x61, 0xb2,
+ 0x67, 0xb4, 0xdc, 0x0f, 0x0c, 0xdf, 0xb7, 0x64, 0x7f, 0xac,
+ 0xc4, 0x17, 0x14, 0xc7, 0xaf, 0x7c, 0xa9, 0x7a, 0x12, 0xc1,
+ 0xc2, 0x11, 0x79, 0xaa, 0xce, 0x1d, 0x75, 0xa6, 0xa5, 0x76,
+ 0x1e, 0xcd, 0x18, 0xcb, 0xa3, 0x70, 0x73, 0xa0, 0xc8, 0x1b,
+ 0xfe, 0x2d, 0x45, 0x96, 0x95, 0x46, 0x2e, 0xfd, 0x28, 0xfb,
+ 0x93, 0x40, 0x43, 0x90, 0xf8, 0x2b, 0x4f, 0x9c, 0xf4, 0x27,
+ 0x24, 0xf7, 0x9f, 0x4c, 0x99, 0x4a, 0x22, 0xf1, 0xf2, 0x21,
+ 0x49, 0x9a, 0x81, 0x52, 0x3a, 0xe9, 0xea, 0x39, 0x51, 0x82,
+ 0x57, 0x84, 0xec, 0x3f, 0x3c, 0xef, 0x87, 0x54, 0x30, 0xe3,
+ 0x8b, 0x58, 0x5b, 0x88, 0xe0, 0x33, 0xe6, 0x35, 0x5d, 0x8e,
+ 0x8d, 0x5e, 0x36, 0xe5, 0xe1, 0x32, 0x5a, 0x89, 0x8a, 0x59,
+ 0x31, 0xe2, 0x37, 0xe4, 0x8c, 0x5f, 0x5c, 0x8f, 0xe7, 0x34,
+ 0x50, 0x83, 0xeb, 0x38, 0x3b, 0xe8, 0x80, 0x53, 0x86, 0x55,
+ 0x3d, 0xee, 0xed, 0x3e, 0x56, 0x85, 0x9e, 0x4d, 0x25, 0xf6,
+ 0xf5, 0x26, 0x4e, 0x9d, 0x48, 0x9b, 0xf3, 0x20, 0x23, 0xf0,
+ 0x98, 0x4b, 0x2f, 0xfc, 0x94, 0x47, 0x44, 0x97, 0xff, 0x2c,
+ 0xf9, 0x2a, 0x42, 0x91, 0x92, 0x41, 0x29, 0xfa, 0x1f, 0xcc,
+ 0xa4, 0x77, 0x74, 0xa7, 0xcf, 0x1c, 0xc9, 0x1a, 0x72, 0xa1,
+ 0xa2, 0x71, 0x19, 0xca, 0xae, 0x7d, 0x15, 0xc6, 0xc5, 0x16,
+ 0x7e, 0xad, 0x78, 0xab, 0xc3, 0x10, 0x13, 0xc0, 0xa8, 0x7b,
+ 0x60, 0xb3, 0xdb, 0x08, 0x0b, 0xd8, 0xb0, 0x63, 0xb6, 0x65,
+ 0x0d, 0xde, 0xdd, 0x0e, 0x66, 0xb5, 0xd1, 0x02, 0x6a, 0xb9,
+ 0xba, 0x69, 0x01, 0xd2, 0x07, 0xd4, 0xbc, 0x6f, 0x6c, 0xbf,
+ 0xd7, 0x04, 0x00, 0xd4, 0xb5, 0x61, 0x77, 0xa3, 0xc2, 0x16,
+ 0xee, 0x3a, 0x5b, 0x8f, 0x99, 0x4d, 0x2c, 0xf8, 0xc1, 0x15,
+ 0x74, 0xa0, 0xb6, 0x62, 0x03, 0xd7, 0x2f, 0xfb, 0x9a, 0x4e,
+ 0x58, 0x8c, 0xed, 0x39, 0x9f, 0x4b, 0x2a, 0xfe, 0xe8, 0x3c,
+ 0x5d, 0x89, 0x71, 0xa5, 0xc4, 0x10, 0x06, 0xd2, 0xb3, 0x67,
+ 0x5e, 0x8a, 0xeb, 0x3f, 0x29, 0xfd, 0x9c, 0x48, 0xb0, 0x64,
+ 0x05, 0xd1, 0xc7, 0x13, 0x72, 0xa6, 0x23, 0xf7, 0x96, 0x42,
+ 0x54, 0x80, 0xe1, 0x35, 0xcd, 0x19, 0x78, 0xac, 0xba, 0x6e,
+ 0x0f, 0xdb, 0xe2, 0x36, 0x57, 0x83, 0x95, 0x41, 0x20, 0xf4,
+ 0x0c, 0xd8, 0xb9, 0x6d, 0x7b, 0xaf, 0xce, 0x1a, 0xbc, 0x68,
+ 0x09, 0xdd, 0xcb, 0x1f, 0x7e, 0xaa, 0x52, 0x86, 0xe7, 0x33,
+ 0x25, 0xf1, 0x90, 0x44, 0x7d, 0xa9, 0xc8, 0x1c, 0x0a, 0xde,
+ 0xbf, 0x6b, 0x93, 0x47, 0x26, 0xf2, 0xe4, 0x30, 0x51, 0x85,
+ 0x46, 0x92, 0xf3, 0x27, 0x31, 0xe5, 0x84, 0x50, 0xa8, 0x7c,
+ 0x1d, 0xc9, 0xdf, 0x0b, 0x6a, 0xbe, 0x87, 0x53, 0x32, 0xe6,
+ 0xf0, 0x24, 0x45, 0x91, 0x69, 0xbd, 0xdc, 0x08, 0x1e, 0xca,
+ 0xab, 0x7f, 0xd9, 0x0d, 0x6c, 0xb8, 0xae, 0x7a, 0x1b, 0xcf,
+ 0x37, 0xe3, 0x82, 0x56, 0x40, 0x94, 0xf5, 0x21, 0x18, 0xcc,
+ 0xad, 0x79, 0x6f, 0xbb, 0xda, 0x0e, 0xf6, 0x22, 0x43, 0x97,
+ 0x81, 0x55, 0x34, 0xe0, 0x65, 0xb1, 0xd0, 0x04, 0x12, 0xc6,
+ 0xa7, 0x73, 0x8b, 0x5f, 0x3e, 0xea, 0xfc, 0x28, 0x49, 0x9d,
+ 0xa4, 0x70, 0x11, 0xc5, 0xd3, 0x07, 0x66, 0xb2, 0x4a, 0x9e,
+ 0xff, 0x2b, 0x3d, 0xe9, 0x88, 0x5c, 0xfa, 0x2e, 0x4f, 0x9b,
+ 0x8d, 0x59, 0x38, 0xec, 0x14, 0xc0, 0xa1, 0x75, 0x63, 0xb7,
+ 0xd6, 0x02, 0x3b, 0xef, 0x8e, 0x5a, 0x4c, 0x98, 0xf9, 0x2d,
+ 0xd5, 0x01, 0x60, 0xb4, 0xa2, 0x76, 0x17, 0xc3, 0x00, 0xd5,
+ 0xb7, 0x62, 0x73, 0xa6, 0xc4, 0x11, 0xe6, 0x33, 0x51, 0x84,
+ 0x95, 0x40, 0x22, 0xf7, 0xd1, 0x04, 0x66, 0xb3, 0xa2, 0x77,
+ 0x15, 0xc0, 0x37, 0xe2, 0x80, 0x55, 0x44, 0x91, 0xf3, 0x26,
+ 0xbf, 0x6a, 0x08, 0xdd, 0xcc, 0x19, 0x7b, 0xae, 0x59, 0x8c,
+ 0xee, 0x3b, 0x2a, 0xff, 0x9d, 0x48, 0x6e, 0xbb, 0xd9, 0x0c,
+ 0x1d, 0xc8, 0xaa, 0x7f, 0x88, 0x5d, 0x3f, 0xea, 0xfb, 0x2e,
+ 0x4c, 0x99, 0x63, 0xb6, 0xd4, 0x01, 0x10, 0xc5, 0xa7, 0x72,
+ 0x85, 0x50, 0x32, 0xe7, 0xf6, 0x23, 0x41, 0x94, 0xb2, 0x67,
+ 0x05, 0xd0, 0xc1, 0x14, 0x76, 0xa3, 0x54, 0x81, 0xe3, 0x36,
+ 0x27, 0xf2, 0x90, 0x45, 0xdc, 0x09, 0x6b, 0xbe, 0xaf, 0x7a,
+ 0x18, 0xcd, 0x3a, 0xef, 0x8d, 0x58, 0x49, 0x9c, 0xfe, 0x2b,
+ 0x0d, 0xd8, 0xba, 0x6f, 0x7e, 0xab, 0xc9, 0x1c, 0xeb, 0x3e,
+ 0x5c, 0x89, 0x98, 0x4d, 0x2f, 0xfa, 0xc6, 0x13, 0x71, 0xa4,
+ 0xb5, 0x60, 0x02, 0xd7, 0x20, 0xf5, 0x97, 0x42, 0x53, 0x86,
+ 0xe4, 0x31, 0x17, 0xc2, 0xa0, 0x75, 0x64, 0xb1, 0xd3, 0x06,
+ 0xf1, 0x24, 0x46, 0x93, 0x82, 0x57, 0x35, 0xe0, 0x79, 0xac,
+ 0xce, 0x1b, 0x0a, 0xdf, 0xbd, 0x68, 0x9f, 0x4a, 0x28, 0xfd,
+ 0xec, 0x39, 0x5b, 0x8e, 0xa8, 0x7d, 0x1f, 0xca, 0xdb, 0x0e,
+ 0x6c, 0xb9, 0x4e, 0x9b, 0xf9, 0x2c, 0x3d, 0xe8, 0x8a, 0x5f,
+ 0xa5, 0x70, 0x12, 0xc7, 0xd6, 0x03, 0x61, 0xb4, 0x43, 0x96,
+ 0xf4, 0x21, 0x30, 0xe5, 0x87, 0x52, 0x74, 0xa1, 0xc3, 0x16,
+ 0x07, 0xd2, 0xb0, 0x65, 0x92, 0x47, 0x25, 0xf0, 0xe1, 0x34,
+ 0x56, 0x83, 0x1a, 0xcf, 0xad, 0x78, 0x69, 0xbc, 0xde, 0x0b,
+ 0xfc, 0x29, 0x4b, 0x9e, 0x8f, 0x5a, 0x38, 0xed, 0xcb, 0x1e,
+ 0x7c, 0xa9, 0xb8, 0x6d, 0x0f, 0xda, 0x2d, 0xf8, 0x9a, 0x4f,
+ 0x5e, 0x8b, 0xe9, 0x3c, 0x00, 0xd6, 0xb1, 0x67, 0x7f, 0xa9,
+ 0xce, 0x18, 0xfe, 0x28, 0x4f, 0x99, 0x81, 0x57, 0x30, 0xe6,
+ 0xe1, 0x37, 0x50, 0x86, 0x9e, 0x48, 0x2f, 0xf9, 0x1f, 0xc9,
+ 0xae, 0x78, 0x60, 0xb6, 0xd1, 0x07, 0xdf, 0x09, 0x6e, 0xb8,
+ 0xa0, 0x76, 0x11, 0xc7, 0x21, 0xf7, 0x90, 0x46, 0x5e, 0x88,
+ 0xef, 0x39, 0x3e, 0xe8, 0x8f, 0x59, 0x41, 0x97, 0xf0, 0x26,
+ 0xc0, 0x16, 0x71, 0xa7, 0xbf, 0x69, 0x0e, 0xd8, 0xa3, 0x75,
+ 0x12, 0xc4, 0xdc, 0x0a, 0x6d, 0xbb, 0x5d, 0x8b, 0xec, 0x3a,
+ 0x22, 0xf4, 0x93, 0x45, 0x42, 0x94, 0xf3, 0x25, 0x3d, 0xeb,
+ 0x8c, 0x5a, 0xbc, 0x6a, 0x0d, 0xdb, 0xc3, 0x15, 0x72, 0xa4,
+ 0x7c, 0xaa, 0xcd, 0x1b, 0x03, 0xd5, 0xb2, 0x64, 0x82, 0x54,
+ 0x33, 0xe5, 0xfd, 0x2b, 0x4c, 0x9a, 0x9d, 0x4b, 0x2c, 0xfa,
+ 0xe2, 0x34, 0x53, 0x85, 0x63, 0xb5, 0xd2, 0x04, 0x1c, 0xca,
+ 0xad, 0x7b, 0x5b, 0x8d, 0xea, 0x3c, 0x24, 0xf2, 0x95, 0x43,
+ 0xa5, 0x73, 0x14, 0xc2, 0xda, 0x0c, 0x6b, 0xbd, 0xba, 0x6c,
+ 0x0b, 0xdd, 0xc5, 0x13, 0x74, 0xa2, 0x44, 0x92, 0xf5, 0x23,
+ 0x3b, 0xed, 0x8a, 0x5c, 0x84, 0x52, 0x35, 0xe3, 0xfb, 0x2d,
+ 0x4a, 0x9c, 0x7a, 0xac, 0xcb, 0x1d, 0x05, 0xd3, 0xb4, 0x62,
+ 0x65, 0xb3, 0xd4, 0x02, 0x1a, 0xcc, 0xab, 0x7d, 0x9b, 0x4d,
+ 0x2a, 0xfc, 0xe4, 0x32, 0x55, 0x83, 0xf8, 0x2e, 0x49, 0x9f,
+ 0x87, 0x51, 0x36, 0xe0, 0x06, 0xd0, 0xb7, 0x61, 0x79, 0xaf,
+ 0xc8, 0x1e, 0x19, 0xcf, 0xa8, 0x7e, 0x66, 0xb0, 0xd7, 0x01,
+ 0xe7, 0x31, 0x56, 0x80, 0x98, 0x4e, 0x29, 0xff, 0x27, 0xf1,
+ 0x96, 0x40, 0x58, 0x8e, 0xe9, 0x3f, 0xd9, 0x0f, 0x68, 0xbe,
+ 0xa6, 0x70, 0x17, 0xc1, 0xc6, 0x10, 0x77, 0xa1, 0xb9, 0x6f,
+ 0x08, 0xde, 0x38, 0xee, 0x89, 0x5f, 0x47, 0x91, 0xf6, 0x20,
+ 0x00, 0xd7, 0xb3, 0x64, 0x7b, 0xac, 0xc8, 0x1f, 0xf6, 0x21,
+ 0x45, 0x92, 0x8d, 0x5a, 0x3e, 0xe9, 0xf1, 0x26, 0x42, 0x95,
+ 0x8a, 0x5d, 0x39, 0xee, 0x07, 0xd0, 0xb4, 0x63, 0x7c, 0xab,
+ 0xcf, 0x18, 0xff, 0x28, 0x4c, 0x9b, 0x84, 0x53, 0x37, 0xe0,
+ 0x09, 0xde, 0xba, 0x6d, 0x72, 0xa5, 0xc1, 0x16, 0x0e, 0xd9,
+ 0xbd, 0x6a, 0x75, 0xa2, 0xc6, 0x11, 0xf8, 0x2f, 0x4b, 0x9c,
+ 0x83, 0x54, 0x30, 0xe7, 0xe3, 0x34, 0x50, 0x87, 0x98, 0x4f,
+ 0x2b, 0xfc, 0x15, 0xc2, 0xa6, 0x71, 0x6e, 0xb9, 0xdd, 0x0a,
+ 0x12, 0xc5, 0xa1, 0x76, 0x69, 0xbe, 0xda, 0x0d, 0xe4, 0x33,
+ 0x57, 0x80, 0x9f, 0x48, 0x2c, 0xfb, 0x1c, 0xcb, 0xaf, 0x78,
+ 0x67, 0xb0, 0xd4, 0x03, 0xea, 0x3d, 0x59, 0x8e, 0x91, 0x46,
+ 0x22, 0xf5, 0xed, 0x3a, 0x5e, 0x89, 0x96, 0x41, 0x25, 0xf2,
+ 0x1b, 0xcc, 0xa8, 0x7f, 0x60, 0xb7, 0xd3, 0x04, 0xdb, 0x0c,
+ 0x68, 0xbf, 0xa0, 0x77, 0x13, 0xc4, 0x2d, 0xfa, 0x9e, 0x49,
+ 0x56, 0x81, 0xe5, 0x32, 0x2a, 0xfd, 0x99, 0x4e, 0x51, 0x86,
+ 0xe2, 0x35, 0xdc, 0x0b, 0x6f, 0xb8, 0xa7, 0x70, 0x14, 0xc3,
+ 0x24, 0xf3, 0x97, 0x40, 0x5f, 0x88, 0xec, 0x3b, 0xd2, 0x05,
+ 0x61, 0xb6, 0xa9, 0x7e, 0x1a, 0xcd, 0xd5, 0x02, 0x66, 0xb1,
+ 0xae, 0x79, 0x1d, 0xca, 0x23, 0xf4, 0x90, 0x47, 0x58, 0x8f,
+ 0xeb, 0x3c, 0x38, 0xef, 0x8b, 0x5c, 0x43, 0x94, 0xf0, 0x27,
+ 0xce, 0x19, 0x7d, 0xaa, 0xb5, 0x62, 0x06, 0xd1, 0xc9, 0x1e,
+ 0x7a, 0xad, 0xb2, 0x65, 0x01, 0xd6, 0x3f, 0xe8, 0x8c, 0x5b,
+ 0x44, 0x93, 0xf7, 0x20, 0xc7, 0x10, 0x74, 0xa3, 0xbc, 0x6b,
+ 0x0f, 0xd8, 0x31, 0xe6, 0x82, 0x55, 0x4a, 0x9d, 0xf9, 0x2e,
+ 0x36, 0xe1, 0x85, 0x52, 0x4d, 0x9a, 0xfe, 0x29, 0xc0, 0x17,
+ 0x73, 0xa4, 0xbb, 0x6c, 0x08, 0xdf, 0x00, 0xd8, 0xad, 0x75,
+ 0x47, 0x9f, 0xea, 0x32, 0x8e, 0x56, 0x23, 0xfb, 0xc9, 0x11,
+ 0x64, 0xbc, 0x01, 0xd9, 0xac, 0x74, 0x46, 0x9e, 0xeb, 0x33,
+ 0x8f, 0x57, 0x22, 0xfa, 0xc8, 0x10, 0x65, 0xbd, 0x02, 0xda,
+ 0xaf, 0x77, 0x45, 0x9d, 0xe8, 0x30, 0x8c, 0x54, 0x21, 0xf9,
+ 0xcb, 0x13, 0x66, 0xbe, 0x03, 0xdb, 0xae, 0x76, 0x44, 0x9c,
+ 0xe9, 0x31, 0x8d, 0x55, 0x20, 0xf8, 0xca, 0x12, 0x67, 0xbf,
+ 0x04, 0xdc, 0xa9, 0x71, 0x43, 0x9b, 0xee, 0x36, 0x8a, 0x52,
+ 0x27, 0xff, 0xcd, 0x15, 0x60, 0xb8, 0x05, 0xdd, 0xa8, 0x70,
+ 0x42, 0x9a, 0xef, 0x37, 0x8b, 0x53, 0x26, 0xfe, 0xcc, 0x14,
+ 0x61, 0xb9, 0x06, 0xde, 0xab, 0x73, 0x41, 0x99, 0xec, 0x34,
+ 0x88, 0x50, 0x25, 0xfd, 0xcf, 0x17, 0x62, 0xba, 0x07, 0xdf,
+ 0xaa, 0x72, 0x40, 0x98, 0xed, 0x35, 0x89, 0x51, 0x24, 0xfc,
+ 0xce, 0x16, 0x63, 0xbb, 0x08, 0xd0, 0xa5, 0x7d, 0x4f, 0x97,
+ 0xe2, 0x3a, 0x86, 0x5e, 0x2b, 0xf3, 0xc1, 0x19, 0x6c, 0xb4,
+ 0x09, 0xd1, 0xa4, 0x7c, 0x4e, 0x96, 0xe3, 0x3b, 0x87, 0x5f,
+ 0x2a, 0xf2, 0xc0, 0x18, 0x6d, 0xb5, 0x0a, 0xd2, 0xa7, 0x7f,
+ 0x4d, 0x95, 0xe0, 0x38, 0x84, 0x5c, 0x29, 0xf1, 0xc3, 0x1b,
+ 0x6e, 0xb6, 0x0b, 0xd3, 0xa6, 0x7e, 0x4c, 0x94, 0xe1, 0x39,
+ 0x85, 0x5d, 0x28, 0xf0, 0xc2, 0x1a, 0x6f, 0xb7, 0x0c, 0xd4,
+ 0xa1, 0x79, 0x4b, 0x93, 0xe6, 0x3e, 0x82, 0x5a, 0x2f, 0xf7,
+ 0xc5, 0x1d, 0x68, 0xb0, 0x0d, 0xd5, 0xa0, 0x78, 0x4a, 0x92,
+ 0xe7, 0x3f, 0x83, 0x5b, 0x2e, 0xf6, 0xc4, 0x1c, 0x69, 0xb1,
+ 0x0e, 0xd6, 0xa3, 0x7b, 0x49, 0x91, 0xe4, 0x3c, 0x80, 0x58,
+ 0x2d, 0xf5, 0xc7, 0x1f, 0x6a, 0xb2, 0x0f, 0xd7, 0xa2, 0x7a,
+ 0x48, 0x90, 0xe5, 0x3d, 0x81, 0x59, 0x2c, 0xf4, 0xc6, 0x1e,
+ 0x6b, 0xb3, 0x00, 0xd9, 0xaf, 0x76, 0x43, 0x9a, 0xec, 0x35,
+ 0x86, 0x5f, 0x29, 0xf0, 0xc5, 0x1c, 0x6a, 0xb3, 0x11, 0xc8,
+ 0xbe, 0x67, 0x52, 0x8b, 0xfd, 0x24, 0x97, 0x4e, 0x38, 0xe1,
+ 0xd4, 0x0d, 0x7b, 0xa2, 0x22, 0xfb, 0x8d, 0x54, 0x61, 0xb8,
+ 0xce, 0x17, 0xa4, 0x7d, 0x0b, 0xd2, 0xe7, 0x3e, 0x48, 0x91,
+ 0x33, 0xea, 0x9c, 0x45, 0x70, 0xa9, 0xdf, 0x06, 0xb5, 0x6c,
+ 0x1a, 0xc3, 0xf6, 0x2f, 0x59, 0x80, 0x44, 0x9d, 0xeb, 0x32,
+ 0x07, 0xde, 0xa8, 0x71, 0xc2, 0x1b, 0x6d, 0xb4, 0x81, 0x58,
+ 0x2e, 0xf7, 0x55, 0x8c, 0xfa, 0x23, 0x16, 0xcf, 0xb9, 0x60,
+ 0xd3, 0x0a, 0x7c, 0xa5, 0x90, 0x49, 0x3f, 0xe6, 0x66, 0xbf,
+ 0xc9, 0x10, 0x25, 0xfc, 0x8a, 0x53, 0xe0, 0x39, 0x4f, 0x96,
+ 0xa3, 0x7a, 0x0c, 0xd5, 0x77, 0xae, 0xd8, 0x01, 0x34, 0xed,
+ 0x9b, 0x42, 0xf1, 0x28, 0x5e, 0x87, 0xb2, 0x6b, 0x1d, 0xc4,
+ 0x88, 0x51, 0x27, 0xfe, 0xcb, 0x12, 0x64, 0xbd, 0x0e, 0xd7,
+ 0xa1, 0x78, 0x4d, 0x94, 0xe2, 0x3b, 0x99, 0x40, 0x36, 0xef,
+ 0xda, 0x03, 0x75, 0xac, 0x1f, 0xc6, 0xb0, 0x69, 0x5c, 0x85,
+ 0xf3, 0x2a, 0xaa, 0x73, 0x05, 0xdc, 0xe9, 0x30, 0x46, 0x9f,
+ 0x2c, 0xf5, 0x83, 0x5a, 0x6f, 0xb6, 0xc0, 0x19, 0xbb, 0x62,
+ 0x14, 0xcd, 0xf8, 0x21, 0x57, 0x8e, 0x3d, 0xe4, 0x92, 0x4b,
+ 0x7e, 0xa7, 0xd1, 0x08, 0xcc, 0x15, 0x63, 0xba, 0x8f, 0x56,
+ 0x20, 0xf9, 0x4a, 0x93, 0xe5, 0x3c, 0x09, 0xd0, 0xa6, 0x7f,
+ 0xdd, 0x04, 0x72, 0xab, 0x9e, 0x47, 0x31, 0xe8, 0x5b, 0x82,
+ 0xf4, 0x2d, 0x18, 0xc1, 0xb7, 0x6e, 0xee, 0x37, 0x41, 0x98,
+ 0xad, 0x74, 0x02, 0xdb, 0x68, 0xb1, 0xc7, 0x1e, 0x2b, 0xf2,
+ 0x84, 0x5d, 0xff, 0x26, 0x50, 0x89, 0xbc, 0x65, 0x13, 0xca,
+ 0x79, 0xa0, 0xd6, 0x0f, 0x3a, 0xe3, 0x95, 0x4c, 0x00, 0xda,
+ 0xa9, 0x73, 0x4f, 0x95, 0xe6, 0x3c, 0x9e, 0x44, 0x37, 0xed,
+ 0xd1, 0x0b, 0x78, 0xa2, 0x21, 0xfb, 0x88, 0x52, 0x6e, 0xb4,
+ 0xc7, 0x1d, 0xbf, 0x65, 0x16, 0xcc, 0xf0, 0x2a, 0x59, 0x83,
+ 0x42, 0x98, 0xeb, 0x31, 0x0d, 0xd7, 0xa4, 0x7e, 0xdc, 0x06,
+ 0x75, 0xaf, 0x93, 0x49, 0x3a, 0xe0, 0x63, 0xb9, 0xca, 0x10,
+ 0x2c, 0xf6, 0x85, 0x5f, 0xfd, 0x27, 0x54, 0x8e, 0xb2, 0x68,
+ 0x1b, 0xc1, 0x84, 0x5e, 0x2d, 0xf7, 0xcb, 0x11, 0x62, 0xb8,
+ 0x1a, 0xc0, 0xb3, 0x69, 0x55, 0x8f, 0xfc, 0x26, 0xa5, 0x7f,
+ 0x0c, 0xd6, 0xea, 0x30, 0x43, 0x99, 0x3b, 0xe1, 0x92, 0x48,
+ 0x74, 0xae, 0xdd, 0x07, 0xc6, 0x1c, 0x6f, 0xb5, 0x89, 0x53,
+ 0x20, 0xfa, 0x58, 0x82, 0xf1, 0x2b, 0x17, 0xcd, 0xbe, 0x64,
+ 0xe7, 0x3d, 0x4e, 0x94, 0xa8, 0x72, 0x01, 0xdb, 0x79, 0xa3,
+ 0xd0, 0x0a, 0x36, 0xec, 0x9f, 0x45, 0x15, 0xcf, 0xbc, 0x66,
+ 0x5a, 0x80, 0xf3, 0x29, 0x8b, 0x51, 0x22, 0xf8, 0xc4, 0x1e,
+ 0x6d, 0xb7, 0x34, 0xee, 0x9d, 0x47, 0x7b, 0xa1, 0xd2, 0x08,
+ 0xaa, 0x70, 0x03, 0xd9, 0xe5, 0x3f, 0x4c, 0x96, 0x57, 0x8d,
+ 0xfe, 0x24, 0x18, 0xc2, 0xb1, 0x6b, 0xc9, 0x13, 0x60, 0xba,
+ 0x86, 0x5c, 0x2f, 0xf5, 0x76, 0xac, 0xdf, 0x05, 0x39, 0xe3,
+ 0x90, 0x4a, 0xe8, 0x32, 0x41, 0x9b, 0xa7, 0x7d, 0x0e, 0xd4,
+ 0x91, 0x4b, 0x38, 0xe2, 0xde, 0x04, 0x77, 0xad, 0x0f, 0xd5,
+ 0xa6, 0x7c, 0x40, 0x9a, 0xe9, 0x33, 0xb0, 0x6a, 0x19, 0xc3,
+ 0xff, 0x25, 0x56, 0x8c, 0x2e, 0xf4, 0x87, 0x5d, 0x61, 0xbb,
+ 0xc8, 0x12, 0xd3, 0x09, 0x7a, 0xa0, 0x9c, 0x46, 0x35, 0xef,
+ 0x4d, 0x97, 0xe4, 0x3e, 0x02, 0xd8, 0xab, 0x71, 0xf2, 0x28,
+ 0x5b, 0x81, 0xbd, 0x67, 0x14, 0xce, 0x6c, 0xb6, 0xc5, 0x1f,
+ 0x23, 0xf9, 0x8a, 0x50, 0x00, 0xdb, 0xab, 0x70, 0x4b, 0x90,
+ 0xe0, 0x3b, 0x96, 0x4d, 0x3d, 0xe6, 0xdd, 0x06, 0x76, 0xad,
+ 0x31, 0xea, 0x9a, 0x41, 0x7a, 0xa1, 0xd1, 0x0a, 0xa7, 0x7c,
+ 0x0c, 0xd7, 0xec, 0x37, 0x47, 0x9c, 0x62, 0xb9, 0xc9, 0x12,
+ 0x29, 0xf2, 0x82, 0x59, 0xf4, 0x2f, 0x5f, 0x84, 0xbf, 0x64,
+ 0x14, 0xcf, 0x53, 0x88, 0xf8, 0x23, 0x18, 0xc3, 0xb3, 0x68,
+ 0xc5, 0x1e, 0x6e, 0xb5, 0x8e, 0x55, 0x25, 0xfe, 0xc4, 0x1f,
+ 0x6f, 0xb4, 0x8f, 0x54, 0x24, 0xff, 0x52, 0x89, 0xf9, 0x22,
+ 0x19, 0xc2, 0xb2, 0x69, 0xf5, 0x2e, 0x5e, 0x85, 0xbe, 0x65,
+ 0x15, 0xce, 0x63, 0xb8, 0xc8, 0x13, 0x28, 0xf3, 0x83, 0x58,
+ 0xa6, 0x7d, 0x0d, 0xd6, 0xed, 0x36, 0x46, 0x9d, 0x30, 0xeb,
+ 0x9b, 0x40, 0x7b, 0xa0, 0xd0, 0x0b, 0x97, 0x4c, 0x3c, 0xe7,
+ 0xdc, 0x07, 0x77, 0xac, 0x01, 0xda, 0xaa, 0x71, 0x4a, 0x91,
+ 0xe1, 0x3a, 0x95, 0x4e, 0x3e, 0xe5, 0xde, 0x05, 0x75, 0xae,
+ 0x03, 0xd8, 0xa8, 0x73, 0x48, 0x93, 0xe3, 0x38, 0xa4, 0x7f,
+ 0x0f, 0xd4, 0xef, 0x34, 0x44, 0x9f, 0x32, 0xe9, 0x99, 0x42,
+ 0x79, 0xa2, 0xd2, 0x09, 0xf7, 0x2c, 0x5c, 0x87, 0xbc, 0x67,
+ 0x17, 0xcc, 0x61, 0xba, 0xca, 0x11, 0x2a, 0xf1, 0x81, 0x5a,
+ 0xc6, 0x1d, 0x6d, 0xb6, 0x8d, 0x56, 0x26, 0xfd, 0x50, 0x8b,
+ 0xfb, 0x20, 0x1b, 0xc0, 0xb0, 0x6b, 0x51, 0x8a, 0xfa, 0x21,
+ 0x1a, 0xc1, 0xb1, 0x6a, 0xc7, 0x1c, 0x6c, 0xb7, 0x8c, 0x57,
+ 0x27, 0xfc, 0x60, 0xbb, 0xcb, 0x10, 0x2b, 0xf0, 0x80, 0x5b,
+ 0xf6, 0x2d, 0x5d, 0x86, 0xbd, 0x66, 0x16, 0xcd, 0x33, 0xe8,
+ 0x98, 0x43, 0x78, 0xa3, 0xd3, 0x08, 0xa5, 0x7e, 0x0e, 0xd5,
+ 0xee, 0x35, 0x45, 0x9e, 0x02, 0xd9, 0xa9, 0x72, 0x49, 0x92,
+ 0xe2, 0x39, 0x94, 0x4f, 0x3f, 0xe4, 0xdf, 0x04, 0x74, 0xaf,
+ 0x00, 0xdc, 0xa5, 0x79, 0x57, 0x8b, 0xf2, 0x2e, 0xae, 0x72,
+ 0x0b, 0xd7, 0xf9, 0x25, 0x5c, 0x80, 0x41, 0x9d, 0xe4, 0x38,
+ 0x16, 0xca, 0xb3, 0x6f, 0xef, 0x33, 0x4a, 0x96, 0xb8, 0x64,
+ 0x1d, 0xc1, 0x82, 0x5e, 0x27, 0xfb, 0xd5, 0x09, 0x70, 0xac,
+ 0x2c, 0xf0, 0x89, 0x55, 0x7b, 0xa7, 0xde, 0x02, 0xc3, 0x1f,
+ 0x66, 0xba, 0x94, 0x48, 0x31, 0xed, 0x6d, 0xb1, 0xc8, 0x14,
+ 0x3a, 0xe6, 0x9f, 0x43, 0x19, 0xc5, 0xbc, 0x60, 0x4e, 0x92,
+ 0xeb, 0x37, 0xb7, 0x6b, 0x12, 0xce, 0xe0, 0x3c, 0x45, 0x99,
+ 0x58, 0x84, 0xfd, 0x21, 0x0f, 0xd3, 0xaa, 0x76, 0xf6, 0x2a,
+ 0x53, 0x8f, 0xa1, 0x7d, 0x04, 0xd8, 0x9b, 0x47, 0x3e, 0xe2,
+ 0xcc, 0x10, 0x69, 0xb5, 0x35, 0xe9, 0x90, 0x4c, 0x62, 0xbe,
+ 0xc7, 0x1b, 0xda, 0x06, 0x7f, 0xa3, 0x8d, 0x51, 0x28, 0xf4,
+ 0x74, 0xa8, 0xd1, 0x0d, 0x23, 0xff, 0x86, 0x5a, 0x32, 0xee,
+ 0x97, 0x4b, 0x65, 0xb9, 0xc0, 0x1c, 0x9c, 0x40, 0x39, 0xe5,
+ 0xcb, 0x17, 0x6e, 0xb2, 0x73, 0xaf, 0xd6, 0x0a, 0x24, 0xf8,
+ 0x81, 0x5d, 0xdd, 0x01, 0x78, 0xa4, 0x8a, 0x56, 0x2f, 0xf3,
+ 0xb0, 0x6c, 0x15, 0xc9, 0xe7, 0x3b, 0x42, 0x9e, 0x1e, 0xc2,
+ 0xbb, 0x67, 0x49, 0x95, 0xec, 0x30, 0xf1, 0x2d, 0x54, 0x88,
+ 0xa6, 0x7a, 0x03, 0xdf, 0x5f, 0x83, 0xfa, 0x26, 0x08, 0xd4,
+ 0xad, 0x71, 0x2b, 0xf7, 0x8e, 0x52, 0x7c, 0xa0, 0xd9, 0x05,
+ 0x85, 0x59, 0x20, 0xfc, 0xd2, 0x0e, 0x77, 0xab, 0x6a, 0xb6,
+ 0xcf, 0x13, 0x3d, 0xe1, 0x98, 0x44, 0xc4, 0x18, 0x61, 0xbd,
+ 0x93, 0x4f, 0x36, 0xea, 0xa9, 0x75, 0x0c, 0xd0, 0xfe, 0x22,
+ 0x5b, 0x87, 0x07, 0xdb, 0xa2, 0x7e, 0x50, 0x8c, 0xf5, 0x29,
+ 0xe8, 0x34, 0x4d, 0x91, 0xbf, 0x63, 0x1a, 0xc6, 0x46, 0x9a,
+ 0xe3, 0x3f, 0x11, 0xcd, 0xb4, 0x68, 0x00, 0xdd, 0xa7, 0x7a,
+ 0x53, 0x8e, 0xf4, 0x29, 0xa6, 0x7b, 0x01, 0xdc, 0xf5, 0x28,
+ 0x52, 0x8f, 0x51, 0x8c, 0xf6, 0x2b, 0x02, 0xdf, 0xa5, 0x78,
+ 0xf7, 0x2a, 0x50, 0x8d, 0xa4, 0x79, 0x03, 0xde, 0xa2, 0x7f,
+ 0x05, 0xd8, 0xf1, 0x2c, 0x56, 0x8b, 0x04, 0xd9, 0xa3, 0x7e,
+ 0x57, 0x8a, 0xf0, 0x2d, 0xf3, 0x2e, 0x54, 0x89, 0xa0, 0x7d,
+ 0x07, 0xda, 0x55, 0x88, 0xf2, 0x2f, 0x06, 0xdb, 0xa1, 0x7c,
+ 0x59, 0x84, 0xfe, 0x23, 0x0a, 0xd7, 0xad, 0x70, 0xff, 0x22,
+ 0x58, 0x85, 0xac, 0x71, 0x0b, 0xd6, 0x08, 0xd5, 0xaf, 0x72,
+ 0x5b, 0x86, 0xfc, 0x21, 0xae, 0x73, 0x09, 0xd4, 0xfd, 0x20,
+ 0x5a, 0x87, 0xfb, 0x26, 0x5c, 0x81, 0xa8, 0x75, 0x0f, 0xd2,
+ 0x5d, 0x80, 0xfa, 0x27, 0x0e, 0xd3, 0xa9, 0x74, 0xaa, 0x77,
+ 0x0d, 0xd0, 0xf9, 0x24, 0x5e, 0x83, 0x0c, 0xd1, 0xab, 0x76,
+ 0x5f, 0x82, 0xf8, 0x25, 0xb2, 0x6f, 0x15, 0xc8, 0xe1, 0x3c,
+ 0x46, 0x9b, 0x14, 0xc9, 0xb3, 0x6e, 0x47, 0x9a, 0xe0, 0x3d,
+ 0xe3, 0x3e, 0x44, 0x99, 0xb0, 0x6d, 0x17, 0xca, 0x45, 0x98,
+ 0xe2, 0x3f, 0x16, 0xcb, 0xb1, 0x6c, 0x10, 0xcd, 0xb7, 0x6a,
+ 0x43, 0x9e, 0xe4, 0x39, 0xb6, 0x6b, 0x11, 0xcc, 0xe5, 0x38,
+ 0x42, 0x9f, 0x41, 0x9c, 0xe6, 0x3b, 0x12, 0xcf, 0xb5, 0x68,
+ 0xe7, 0x3a, 0x40, 0x9d, 0xb4, 0x69, 0x13, 0xce, 0xeb, 0x36,
+ 0x4c, 0x91, 0xb8, 0x65, 0x1f, 0xc2, 0x4d, 0x90, 0xea, 0x37,
+ 0x1e, 0xc3, 0xb9, 0x64, 0xba, 0x67, 0x1d, 0xc0, 0xe9, 0x34,
+ 0x4e, 0x93, 0x1c, 0xc1, 0xbb, 0x66, 0x4f, 0x92, 0xe8, 0x35,
+ 0x49, 0x94, 0xee, 0x33, 0x1a, 0xc7, 0xbd, 0x60, 0xef, 0x32,
+ 0x48, 0x95, 0xbc, 0x61, 0x1b, 0xc6, 0x18, 0xc5, 0xbf, 0x62,
+ 0x4b, 0x96, 0xec, 0x31, 0xbe, 0x63, 0x19, 0xc4, 0xed, 0x30,
+ 0x4a, 0x97, 0x00, 0xde, 0xa1, 0x7f, 0x5f, 0x81, 0xfe, 0x20,
+ 0xbe, 0x60, 0x1f, 0xc1, 0xe1, 0x3f, 0x40, 0x9e, 0x61, 0xbf,
+ 0xc0, 0x1e, 0x3e, 0xe0, 0x9f, 0x41, 0xdf, 0x01, 0x7e, 0xa0,
+ 0x80, 0x5e, 0x21, 0xff, 0xc2, 0x1c, 0x63, 0xbd, 0x9d, 0x43,
+ 0x3c, 0xe2, 0x7c, 0xa2, 0xdd, 0x03, 0x23, 0xfd, 0x82, 0x5c,
+ 0xa3, 0x7d, 0x02, 0xdc, 0xfc, 0x22, 0x5d, 0x83, 0x1d, 0xc3,
+ 0xbc, 0x62, 0x42, 0x9c, 0xe3, 0x3d, 0x99, 0x47, 0x38, 0xe6,
+ 0xc6, 0x18, 0x67, 0xb9, 0x27, 0xf9, 0x86, 0x58, 0x78, 0xa6,
+ 0xd9, 0x07, 0xf8, 0x26, 0x59, 0x87, 0xa7, 0x79, 0x06, 0xd8,
+ 0x46, 0x98, 0xe7, 0x39, 0x19, 0xc7, 0xb8, 0x66, 0x5b, 0x85,
+ 0xfa, 0x24, 0x04, 0xda, 0xa5, 0x7b, 0xe5, 0x3b, 0x44, 0x9a,
+ 0xba, 0x64, 0x1b, 0xc5, 0x3a, 0xe4, 0x9b, 0x45, 0x65, 0xbb,
+ 0xc4, 0x1a, 0x84, 0x5a, 0x25, 0xfb, 0xdb, 0x05, 0x7a, 0xa4,
+ 0x2f, 0xf1, 0x8e, 0x50, 0x70, 0xae, 0xd1, 0x0f, 0x91, 0x4f,
+ 0x30, 0xee, 0xce, 0x10, 0x6f, 0xb1, 0x4e, 0x90, 0xef, 0x31,
+ 0x11, 0xcf, 0xb0, 0x6e, 0xf0, 0x2e, 0x51, 0x8f, 0xaf, 0x71,
+ 0x0e, 0xd0, 0xed, 0x33, 0x4c, 0x92, 0xb2, 0x6c, 0x13, 0xcd,
+ 0x53, 0x8d, 0xf2, 0x2c, 0x0c, 0xd2, 0xad, 0x73, 0x8c, 0x52,
+ 0x2d, 0xf3, 0xd3, 0x0d, 0x72, 0xac, 0x32, 0xec, 0x93, 0x4d,
+ 0x6d, 0xb3, 0xcc, 0x12, 0xb6, 0x68, 0x17, 0xc9, 0xe9, 0x37,
+ 0x48, 0x96, 0x08, 0xd6, 0xa9, 0x77, 0x57, 0x89, 0xf6, 0x28,
+ 0xd7, 0x09, 0x76, 0xa8, 0x88, 0x56, 0x29, 0xf7, 0x69, 0xb7,
+ 0xc8, 0x16, 0x36, 0xe8, 0x97, 0x49, 0x74, 0xaa, 0xd5, 0x0b,
+ 0x2b, 0xf5, 0x8a, 0x54, 0xca, 0x14, 0x6b, 0xb5, 0x95, 0x4b,
+ 0x34, 0xea, 0x15, 0xcb, 0xb4, 0x6a, 0x4a, 0x94, 0xeb, 0x35,
+ 0xab, 0x75, 0x0a, 0xd4, 0xf4, 0x2a, 0x55, 0x8b, 0x00, 0xdf,
+ 0xa3, 0x7c, 0x5b, 0x84, 0xf8, 0x27, 0xb6, 0x69, 0x15, 0xca,
+ 0xed, 0x32, 0x4e, 0x91, 0x71, 0xae, 0xd2, 0x0d, 0x2a, 0xf5,
+ 0x89, 0x56, 0xc7, 0x18, 0x64, 0xbb, 0x9c, 0x43, 0x3f, 0xe0,
+ 0xe2, 0x3d, 0x41, 0x9e, 0xb9, 0x66, 0x1a, 0xc5, 0x54, 0x8b,
+ 0xf7, 0x28, 0x0f, 0xd0, 0xac, 0x73, 0x93, 0x4c, 0x30, 0xef,
+ 0xc8, 0x17, 0x6b, 0xb4, 0x25, 0xfa, 0x86, 0x59, 0x7e, 0xa1,
+ 0xdd, 0x02, 0xd9, 0x06, 0x7a, 0xa5, 0x82, 0x5d, 0x21, 0xfe,
+ 0x6f, 0xb0, 0xcc, 0x13, 0x34, 0xeb, 0x97, 0x48, 0xa8, 0x77,
+ 0x0b, 0xd4, 0xf3, 0x2c, 0x50, 0x8f, 0x1e, 0xc1, 0xbd, 0x62,
+ 0x45, 0x9a, 0xe6, 0x39, 0x3b, 0xe4, 0x98, 0x47, 0x60, 0xbf,
+ 0xc3, 0x1c, 0x8d, 0x52, 0x2e, 0xf1, 0xd6, 0x09, 0x75, 0xaa,
+ 0x4a, 0x95, 0xe9, 0x36, 0x11, 0xce, 0xb2, 0x6d, 0xfc, 0x23,
+ 0x5f, 0x80, 0xa7, 0x78, 0x04, 0xdb, 0xaf, 0x70, 0x0c, 0xd3,
+ 0xf4, 0x2b, 0x57, 0x88, 0x19, 0xc6, 0xba, 0x65, 0x42, 0x9d,
+ 0xe1, 0x3e, 0xde, 0x01, 0x7d, 0xa2, 0x85, 0x5a, 0x26, 0xf9,
+ 0x68, 0xb7, 0xcb, 0x14, 0x33, 0xec, 0x90, 0x4f, 0x4d, 0x92,
+ 0xee, 0x31, 0x16, 0xc9, 0xb5, 0x6a, 0xfb, 0x24, 0x58, 0x87,
+ 0xa0, 0x7f, 0x03, 0xdc, 0x3c, 0xe3, 0x9f, 0x40, 0x67, 0xb8,
+ 0xc4, 0x1b, 0x8a, 0x55, 0x29, 0xf6, 0xd1, 0x0e, 0x72, 0xad,
+ 0x76, 0xa9, 0xd5, 0x0a, 0x2d, 0xf2, 0x8e, 0x51, 0xc0, 0x1f,
+ 0x63, 0xbc, 0x9b, 0x44, 0x38, 0xe7, 0x07, 0xd8, 0xa4, 0x7b,
+ 0x5c, 0x83, 0xff, 0x20, 0xb1, 0x6e, 0x12, 0xcd, 0xea, 0x35,
+ 0x49, 0x96, 0x94, 0x4b, 0x37, 0xe8, 0xcf, 0x10, 0x6c, 0xb3,
+ 0x22, 0xfd, 0x81, 0x5e, 0x79, 0xa6, 0xda, 0x05, 0xe5, 0x3a,
+ 0x46, 0x99, 0xbe, 0x61, 0x1d, 0xc2, 0x53, 0x8c, 0xf0, 0x2f,
+ 0x08, 0xd7, 0xab, 0x74, 0x00, 0xe0, 0xdd, 0x3d, 0xa7, 0x47,
+ 0x7a, 0x9a, 0x53, 0xb3, 0x8e, 0x6e, 0xf4, 0x14, 0x29, 0xc9,
+ 0xa6, 0x46, 0x7b, 0x9b, 0x01, 0xe1, 0xdc, 0x3c, 0xf5, 0x15,
+ 0x28, 0xc8, 0x52, 0xb2, 0x8f, 0x6f, 0x51, 0xb1, 0x8c, 0x6c,
+ 0xf6, 0x16, 0x2b, 0xcb, 0x02, 0xe2, 0xdf, 0x3f, 0xa5, 0x45,
+ 0x78, 0x98, 0xf7, 0x17, 0x2a, 0xca, 0x50, 0xb0, 0x8d, 0x6d,
+ 0xa4, 0x44, 0x79, 0x99, 0x03, 0xe3, 0xde, 0x3e, 0xa2, 0x42,
+ 0x7f, 0x9f, 0x05, 0xe5, 0xd8, 0x38, 0xf1, 0x11, 0x2c, 0xcc,
+ 0x56, 0xb6, 0x8b, 0x6b, 0x04, 0xe4, 0xd9, 0x39, 0xa3, 0x43,
+ 0x7e, 0x9e, 0x57, 0xb7, 0x8a, 0x6a, 0xf0, 0x10, 0x2d, 0xcd,
+ 0xf3, 0x13, 0x2e, 0xce, 0x54, 0xb4, 0x89, 0x69, 0xa0, 0x40,
+ 0x7d, 0x9d, 0x07, 0xe7, 0xda, 0x3a, 0x55, 0xb5, 0x88, 0x68,
+ 0xf2, 0x12, 0x2f, 0xcf, 0x06, 0xe6, 0xdb, 0x3b, 0xa1, 0x41,
+ 0x7c, 0x9c, 0x59, 0xb9, 0x84, 0x64, 0xfe, 0x1e, 0x23, 0xc3,
+ 0x0a, 0xea, 0xd7, 0x37, 0xad, 0x4d, 0x70, 0x90, 0xff, 0x1f,
+ 0x22, 0xc2, 0x58, 0xb8, 0x85, 0x65, 0xac, 0x4c, 0x71, 0x91,
+ 0x0b, 0xeb, 0xd6, 0x36, 0x08, 0xe8, 0xd5, 0x35, 0xaf, 0x4f,
+ 0x72, 0x92, 0x5b, 0xbb, 0x86, 0x66, 0xfc, 0x1c, 0x21, 0xc1,
+ 0xae, 0x4e, 0x73, 0x93, 0x09, 0xe9, 0xd4, 0x34, 0xfd, 0x1d,
+ 0x20, 0xc0, 0x5a, 0xba, 0x87, 0x67, 0xfb, 0x1b, 0x26, 0xc6,
+ 0x5c, 0xbc, 0x81, 0x61, 0xa8, 0x48, 0x75, 0x95, 0x0f, 0xef,
+ 0xd2, 0x32, 0x5d, 0xbd, 0x80, 0x60, 0xfa, 0x1a, 0x27, 0xc7,
+ 0x0e, 0xee, 0xd3, 0x33, 0xa9, 0x49, 0x74, 0x94, 0xaa, 0x4a,
+ 0x77, 0x97, 0x0d, 0xed, 0xd0, 0x30, 0xf9, 0x19, 0x24, 0xc4,
+ 0x5e, 0xbe, 0x83, 0x63, 0x0c, 0xec, 0xd1, 0x31, 0xab, 0x4b,
+ 0x76, 0x96, 0x5f, 0xbf, 0x82, 0x62, 0xf8, 0x18, 0x25, 0xc5,
+ 0x00, 0xe1, 0xdf, 0x3e, 0xa3, 0x42, 0x7c, 0x9d, 0x5b, 0xba,
+ 0x84, 0x65, 0xf8, 0x19, 0x27, 0xc6, 0xb6, 0x57, 0x69, 0x88,
+ 0x15, 0xf4, 0xca, 0x2b, 0xed, 0x0c, 0x32, 0xd3, 0x4e, 0xaf,
+ 0x91, 0x70, 0x71, 0x90, 0xae, 0x4f, 0xd2, 0x33, 0x0d, 0xec,
+ 0x2a, 0xcb, 0xf5, 0x14, 0x89, 0x68, 0x56, 0xb7, 0xc7, 0x26,
+ 0x18, 0xf9, 0x64, 0x85, 0xbb, 0x5a, 0x9c, 0x7d, 0x43, 0xa2,
+ 0x3f, 0xde, 0xe0, 0x01, 0xe2, 0x03, 0x3d, 0xdc, 0x41, 0xa0,
+ 0x9e, 0x7f, 0xb9, 0x58, 0x66, 0x87, 0x1a, 0xfb, 0xc5, 0x24,
+ 0x54, 0xb5, 0x8b, 0x6a, 0xf7, 0x16, 0x28, 0xc9, 0x0f, 0xee,
+ 0xd0, 0x31, 0xac, 0x4d, 0x73, 0x92, 0x93, 0x72, 0x4c, 0xad,
+ 0x30, 0xd1, 0xef, 0x0e, 0xc8, 0x29, 0x17, 0xf6, 0x6b, 0x8a,
+ 0xb4, 0x55, 0x25, 0xc4, 0xfa, 0x1b, 0x86, 0x67, 0x59, 0xb8,
+ 0x7e, 0x9f, 0xa1, 0x40, 0xdd, 0x3c, 0x02, 0xe3, 0xd9, 0x38,
+ 0x06, 0xe7, 0x7a, 0x9b, 0xa5, 0x44, 0x82, 0x63, 0x5d, 0xbc,
+ 0x21, 0xc0, 0xfe, 0x1f, 0x6f, 0x8e, 0xb0, 0x51, 0xcc, 0x2d,
+ 0x13, 0xf2, 0x34, 0xd5, 0xeb, 0x0a, 0x97, 0x76, 0x48, 0xa9,
+ 0xa8, 0x49, 0x77, 0x96, 0x0b, 0xea, 0xd4, 0x35, 0xf3, 0x12,
+ 0x2c, 0xcd, 0x50, 0xb1, 0x8f, 0x6e, 0x1e, 0xff, 0xc1, 0x20,
+ 0xbd, 0x5c, 0x62, 0x83, 0x45, 0xa4, 0x9a, 0x7b, 0xe6, 0x07,
+ 0x39, 0xd8, 0x3b, 0xda, 0xe4, 0x05, 0x98, 0x79, 0x47, 0xa6,
+ 0x60, 0x81, 0xbf, 0x5e, 0xc3, 0x22, 0x1c, 0xfd, 0x8d, 0x6c,
+ 0x52, 0xb3, 0x2e, 0xcf, 0xf1, 0x10, 0xd6, 0x37, 0x09, 0xe8,
+ 0x75, 0x94, 0xaa, 0x4b, 0x4a, 0xab, 0x95, 0x74, 0xe9, 0x08,
+ 0x36, 0xd7, 0x11, 0xf0, 0xce, 0x2f, 0xb2, 0x53, 0x6d, 0x8c,
+ 0xfc, 0x1d, 0x23, 0xc2, 0x5f, 0xbe, 0x80, 0x61, 0xa7, 0x46,
+ 0x78, 0x99, 0x04, 0xe5, 0xdb, 0x3a, 0x00, 0xe2, 0xd9, 0x3b,
+ 0xaf, 0x4d, 0x76, 0x94, 0x43, 0xa1, 0x9a, 0x78, 0xec, 0x0e,
+ 0x35, 0xd7, 0x86, 0x64, 0x5f, 0xbd, 0x29, 0xcb, 0xf0, 0x12,
+ 0xc5, 0x27, 0x1c, 0xfe, 0x6a, 0x88, 0xb3, 0x51, 0x11, 0xf3,
+ 0xc8, 0x2a, 0xbe, 0x5c, 0x67, 0x85, 0x52, 0xb0, 0x8b, 0x69,
+ 0xfd, 0x1f, 0x24, 0xc6, 0x97, 0x75, 0x4e, 0xac, 0x38, 0xda,
+ 0xe1, 0x03, 0xd4, 0x36, 0x0d, 0xef, 0x7b, 0x99, 0xa2, 0x40,
+ 0x22, 0xc0, 0xfb, 0x19, 0x8d, 0x6f, 0x54, 0xb6, 0x61, 0x83,
+ 0xb8, 0x5a, 0xce, 0x2c, 0x17, 0xf5, 0xa4, 0x46, 0x7d, 0x9f,
+ 0x0b, 0xe9, 0xd2, 0x30, 0xe7, 0x05, 0x3e, 0xdc, 0x48, 0xaa,
+ 0x91, 0x73, 0x33, 0xd1, 0xea, 0x08, 0x9c, 0x7e, 0x45, 0xa7,
+ 0x70, 0x92, 0xa9, 0x4b, 0xdf, 0x3d, 0x06, 0xe4, 0xb5, 0x57,
+ 0x6c, 0x8e, 0x1a, 0xf8, 0xc3, 0x21, 0xf6, 0x14, 0x2f, 0xcd,
+ 0x59, 0xbb, 0x80, 0x62, 0x44, 0xa6, 0x9d, 0x7f, 0xeb, 0x09,
+ 0x32, 0xd0, 0x07, 0xe5, 0xde, 0x3c, 0xa8, 0x4a, 0x71, 0x93,
+ 0xc2, 0x20, 0x1b, 0xf9, 0x6d, 0x8f, 0xb4, 0x56, 0x81, 0x63,
+ 0x58, 0xba, 0x2e, 0xcc, 0xf7, 0x15, 0x55, 0xb7, 0x8c, 0x6e,
+ 0xfa, 0x18, 0x23, 0xc1, 0x16, 0xf4, 0xcf, 0x2d, 0xb9, 0x5b,
+ 0x60, 0x82, 0xd3, 0x31, 0x0a, 0xe8, 0x7c, 0x9e, 0xa5, 0x47,
+ 0x90, 0x72, 0x49, 0xab, 0x3f, 0xdd, 0xe6, 0x04, 0x66, 0x84,
+ 0xbf, 0x5d, 0xc9, 0x2b, 0x10, 0xf2, 0x25, 0xc7, 0xfc, 0x1e,
+ 0x8a, 0x68, 0x53, 0xb1, 0xe0, 0x02, 0x39, 0xdb, 0x4f, 0xad,
+ 0x96, 0x74, 0xa3, 0x41, 0x7a, 0x98, 0x0c, 0xee, 0xd5, 0x37,
+ 0x77, 0x95, 0xae, 0x4c, 0xd8, 0x3a, 0x01, 0xe3, 0x34, 0xd6,
+ 0xed, 0x0f, 0x9b, 0x79, 0x42, 0xa0, 0xf1, 0x13, 0x28, 0xca,
+ 0x5e, 0xbc, 0x87, 0x65, 0xb2, 0x50, 0x6b, 0x89, 0x1d, 0xff,
+ 0xc4, 0x26, 0x00, 0xe3, 0xdb, 0x38, 0xab, 0x48, 0x70, 0x93,
+ 0x4b, 0xa8, 0x90, 0x73, 0xe0, 0x03, 0x3b, 0xd8, 0x96, 0x75,
+ 0x4d, 0xae, 0x3d, 0xde, 0xe6, 0x05, 0xdd, 0x3e, 0x06, 0xe5,
+ 0x76, 0x95, 0xad, 0x4e, 0x31, 0xd2, 0xea, 0x09, 0x9a, 0x79,
+ 0x41, 0xa2, 0x7a, 0x99, 0xa1, 0x42, 0xd1, 0x32, 0x0a, 0xe9,
+ 0xa7, 0x44, 0x7c, 0x9f, 0x0c, 0xef, 0xd7, 0x34, 0xec, 0x0f,
+ 0x37, 0xd4, 0x47, 0xa4, 0x9c, 0x7f, 0x62, 0x81, 0xb9, 0x5a,
+ 0xc9, 0x2a, 0x12, 0xf1, 0x29, 0xca, 0xf2, 0x11, 0x82, 0x61,
+ 0x59, 0xba, 0xf4, 0x17, 0x2f, 0xcc, 0x5f, 0xbc, 0x84, 0x67,
+ 0xbf, 0x5c, 0x64, 0x87, 0x14, 0xf7, 0xcf, 0x2c, 0x53, 0xb0,
+ 0x88, 0x6b, 0xf8, 0x1b, 0x23, 0xc0, 0x18, 0xfb, 0xc3, 0x20,
+ 0xb3, 0x50, 0x68, 0x8b, 0xc5, 0x26, 0x1e, 0xfd, 0x6e, 0x8d,
+ 0xb5, 0x56, 0x8e, 0x6d, 0x55, 0xb6, 0x25, 0xc6, 0xfe, 0x1d,
+ 0xc4, 0x27, 0x1f, 0xfc, 0x6f, 0x8c, 0xb4, 0x57, 0x8f, 0x6c,
+ 0x54, 0xb7, 0x24, 0xc7, 0xff, 0x1c, 0x52, 0xb1, 0x89, 0x6a,
+ 0xf9, 0x1a, 0x22, 0xc1, 0x19, 0xfa, 0xc2, 0x21, 0xb2, 0x51,
+ 0x69, 0x8a, 0xf5, 0x16, 0x2e, 0xcd, 0x5e, 0xbd, 0x85, 0x66,
+ 0xbe, 0x5d, 0x65, 0x86, 0x15, 0xf6, 0xce, 0x2d, 0x63, 0x80,
+ 0xb8, 0x5b, 0xc8, 0x2b, 0x13, 0xf0, 0x28, 0xcb, 0xf3, 0x10,
+ 0x83, 0x60, 0x58, 0xbb, 0xa6, 0x45, 0x7d, 0x9e, 0x0d, 0xee,
+ 0xd6, 0x35, 0xed, 0x0e, 0x36, 0xd5, 0x46, 0xa5, 0x9d, 0x7e,
+ 0x30, 0xd3, 0xeb, 0x08, 0x9b, 0x78, 0x40, 0xa3, 0x7b, 0x98,
+ 0xa0, 0x43, 0xd0, 0x33, 0x0b, 0xe8, 0x97, 0x74, 0x4c, 0xaf,
+ 0x3c, 0xdf, 0xe7, 0x04, 0xdc, 0x3f, 0x07, 0xe4, 0x77, 0x94,
+ 0xac, 0x4f, 0x01, 0xe2, 0xda, 0x39, 0xaa, 0x49, 0x71, 0x92,
+ 0x4a, 0xa9, 0x91, 0x72, 0xe1, 0x02, 0x3a, 0xd9, 0x00, 0xe4,
+ 0xd5, 0x31, 0xb7, 0x53, 0x62, 0x86, 0x73, 0x97, 0xa6, 0x42,
+ 0xc4, 0x20, 0x11, 0xf5, 0xe6, 0x02, 0x33, 0xd7, 0x51, 0xb5,
+ 0x84, 0x60, 0x95, 0x71, 0x40, 0xa4, 0x22, 0xc6, 0xf7, 0x13,
+ 0xd1, 0x35, 0x04, 0xe0, 0x66, 0x82, 0xb3, 0x57, 0xa2, 0x46,
+ 0x77, 0x93, 0x15, 0xf1, 0xc0, 0x24, 0x37, 0xd3, 0xe2, 0x06,
+ 0x80, 0x64, 0x55, 0xb1, 0x44, 0xa0, 0x91, 0x75, 0xf3, 0x17,
+ 0x26, 0xc2, 0xbf, 0x5b, 0x6a, 0x8e, 0x08, 0xec, 0xdd, 0x39,
+ 0xcc, 0x28, 0x19, 0xfd, 0x7b, 0x9f, 0xae, 0x4a, 0x59, 0xbd,
+ 0x8c, 0x68, 0xee, 0x0a, 0x3b, 0xdf, 0x2a, 0xce, 0xff, 0x1b,
+ 0x9d, 0x79, 0x48, 0xac, 0x6e, 0x8a, 0xbb, 0x5f, 0xd9, 0x3d,
+ 0x0c, 0xe8, 0x1d, 0xf9, 0xc8, 0x2c, 0xaa, 0x4e, 0x7f, 0x9b,
+ 0x88, 0x6c, 0x5d, 0xb9, 0x3f, 0xdb, 0xea, 0x0e, 0xfb, 0x1f,
+ 0x2e, 0xca, 0x4c, 0xa8, 0x99, 0x7d, 0x63, 0x87, 0xb6, 0x52,
+ 0xd4, 0x30, 0x01, 0xe5, 0x10, 0xf4, 0xc5, 0x21, 0xa7, 0x43,
+ 0x72, 0x96, 0x85, 0x61, 0x50, 0xb4, 0x32, 0xd6, 0xe7, 0x03,
+ 0xf6, 0x12, 0x23, 0xc7, 0x41, 0xa5, 0x94, 0x70, 0xb2, 0x56,
+ 0x67, 0x83, 0x05, 0xe1, 0xd0, 0x34, 0xc1, 0x25, 0x14, 0xf0,
+ 0x76, 0x92, 0xa3, 0x47, 0x54, 0xb0, 0x81, 0x65, 0xe3, 0x07,
+ 0x36, 0xd2, 0x27, 0xc3, 0xf2, 0x16, 0x90, 0x74, 0x45, 0xa1,
+ 0xdc, 0x38, 0x09, 0xed, 0x6b, 0x8f, 0xbe, 0x5a, 0xaf, 0x4b,
+ 0x7a, 0x9e, 0x18, 0xfc, 0xcd, 0x29, 0x3a, 0xde, 0xef, 0x0b,
+ 0x8d, 0x69, 0x58, 0xbc, 0x49, 0xad, 0x9c, 0x78, 0xfe, 0x1a,
+ 0x2b, 0xcf, 0x0d, 0xe9, 0xd8, 0x3c, 0xba, 0x5e, 0x6f, 0x8b,
+ 0x7e, 0x9a, 0xab, 0x4f, 0xc9, 0x2d, 0x1c, 0xf8, 0xeb, 0x0f,
+ 0x3e, 0xda, 0x5c, 0xb8, 0x89, 0x6d, 0x98, 0x7c, 0x4d, 0xa9,
+ 0x2f, 0xcb, 0xfa, 0x1e, 0x00, 0xe5, 0xd7, 0x32, 0xb3, 0x56,
+ 0x64, 0x81, 0x7b, 0x9e, 0xac, 0x49, 0xc8, 0x2d, 0x1f, 0xfa,
+ 0xf6, 0x13, 0x21, 0xc4, 0x45, 0xa0, 0x92, 0x77, 0x8d, 0x68,
+ 0x5a, 0xbf, 0x3e, 0xdb, 0xe9, 0x0c, 0xf1, 0x14, 0x26, 0xc3,
+ 0x42, 0xa7, 0x95, 0x70, 0x8a, 0x6f, 0x5d, 0xb8, 0x39, 0xdc,
+ 0xee, 0x0b, 0x07, 0xe2, 0xd0, 0x35, 0xb4, 0x51, 0x63, 0x86,
+ 0x7c, 0x99, 0xab, 0x4e, 0xcf, 0x2a, 0x18, 0xfd, 0xff, 0x1a,
+ 0x28, 0xcd, 0x4c, 0xa9, 0x9b, 0x7e, 0x84, 0x61, 0x53, 0xb6,
+ 0x37, 0xd2, 0xe0, 0x05, 0x09, 0xec, 0xde, 0x3b, 0xba, 0x5f,
+ 0x6d, 0x88, 0x72, 0x97, 0xa5, 0x40, 0xc1, 0x24, 0x16, 0xf3,
+ 0x0e, 0xeb, 0xd9, 0x3c, 0xbd, 0x58, 0x6a, 0x8f, 0x75, 0x90,
+ 0xa2, 0x47, 0xc6, 0x23, 0x11, 0xf4, 0xf8, 0x1d, 0x2f, 0xca,
+ 0x4b, 0xae, 0x9c, 0x79, 0x83, 0x66, 0x54, 0xb1, 0x30, 0xd5,
+ 0xe7, 0x02, 0xe3, 0x06, 0x34, 0xd1, 0x50, 0xb5, 0x87, 0x62,
+ 0x98, 0x7d, 0x4f, 0xaa, 0x2b, 0xce, 0xfc, 0x19, 0x15, 0xf0,
+ 0xc2, 0x27, 0xa6, 0x43, 0x71, 0x94, 0x6e, 0x8b, 0xb9, 0x5c,
+ 0xdd, 0x38, 0x0a, 0xef, 0x12, 0xf7, 0xc5, 0x20, 0xa1, 0x44,
+ 0x76, 0x93, 0x69, 0x8c, 0xbe, 0x5b, 0xda, 0x3f, 0x0d, 0xe8,
+ 0xe4, 0x01, 0x33, 0xd6, 0x57, 0xb2, 0x80, 0x65, 0x9f, 0x7a,
+ 0x48, 0xad, 0x2c, 0xc9, 0xfb, 0x1e, 0x1c, 0xf9, 0xcb, 0x2e,
+ 0xaf, 0x4a, 0x78, 0x9d, 0x67, 0x82, 0xb0, 0x55, 0xd4, 0x31,
+ 0x03, 0xe6, 0xea, 0x0f, 0x3d, 0xd8, 0x59, 0xbc, 0x8e, 0x6b,
+ 0x91, 0x74, 0x46, 0xa3, 0x22, 0xc7, 0xf5, 0x10, 0xed, 0x08,
+ 0x3a, 0xdf, 0x5e, 0xbb, 0x89, 0x6c, 0x96, 0x73, 0x41, 0xa4,
+ 0x25, 0xc0, 0xf2, 0x17, 0x1b, 0xfe, 0xcc, 0x29, 0xa8, 0x4d,
+ 0x7f, 0x9a, 0x60, 0x85, 0xb7, 0x52, 0xd3, 0x36, 0x04, 0xe1,
+ 0x00, 0xe6, 0xd1, 0x37, 0xbf, 0x59, 0x6e, 0x88, 0x63, 0x85,
+ 0xb2, 0x54, 0xdc, 0x3a, 0x0d, 0xeb, 0xc6, 0x20, 0x17, 0xf1,
+ 0x79, 0x9f, 0xa8, 0x4e, 0xa5, 0x43, 0x74, 0x92, 0x1a, 0xfc,
+ 0xcb, 0x2d, 0x91, 0x77, 0x40, 0xa6, 0x2e, 0xc8, 0xff, 0x19,
+ 0xf2, 0x14, 0x23, 0xc5, 0x4d, 0xab, 0x9c, 0x7a, 0x57, 0xb1,
+ 0x86, 0x60, 0xe8, 0x0e, 0x39, 0xdf, 0x34, 0xd2, 0xe5, 0x03,
+ 0x8b, 0x6d, 0x5a, 0xbc, 0x3f, 0xd9, 0xee, 0x08, 0x80, 0x66,
+ 0x51, 0xb7, 0x5c, 0xba, 0x8d, 0x6b, 0xe3, 0x05, 0x32, 0xd4,
+ 0xf9, 0x1f, 0x28, 0xce, 0x46, 0xa0, 0x97, 0x71, 0x9a, 0x7c,
+ 0x4b, 0xad, 0x25, 0xc3, 0xf4, 0x12, 0xae, 0x48, 0x7f, 0x99,
+ 0x11, 0xf7, 0xc0, 0x26, 0xcd, 0x2b, 0x1c, 0xfa, 0x72, 0x94,
+ 0xa3, 0x45, 0x68, 0x8e, 0xb9, 0x5f, 0xd7, 0x31, 0x06, 0xe0,
+ 0x0b, 0xed, 0xda, 0x3c, 0xb4, 0x52, 0x65, 0x83, 0x7e, 0x98,
+ 0xaf, 0x49, 0xc1, 0x27, 0x10, 0xf6, 0x1d, 0xfb, 0xcc, 0x2a,
+ 0xa2, 0x44, 0x73, 0x95, 0xb8, 0x5e, 0x69, 0x8f, 0x07, 0xe1,
+ 0xd6, 0x30, 0xdb, 0x3d, 0x0a, 0xec, 0x64, 0x82, 0xb5, 0x53,
+ 0xef, 0x09, 0x3e, 0xd8, 0x50, 0xb6, 0x81, 0x67, 0x8c, 0x6a,
+ 0x5d, 0xbb, 0x33, 0xd5, 0xe2, 0x04, 0x29, 0xcf, 0xf8, 0x1e,
+ 0x96, 0x70, 0x47, 0xa1, 0x4a, 0xac, 0x9b, 0x7d, 0xf5, 0x13,
+ 0x24, 0xc2, 0x41, 0xa7, 0x90, 0x76, 0xfe, 0x18, 0x2f, 0xc9,
+ 0x22, 0xc4, 0xf3, 0x15, 0x9d, 0x7b, 0x4c, 0xaa, 0x87, 0x61,
+ 0x56, 0xb0, 0x38, 0xde, 0xe9, 0x0f, 0xe4, 0x02, 0x35, 0xd3,
+ 0x5b, 0xbd, 0x8a, 0x6c, 0xd0, 0x36, 0x01, 0xe7, 0x6f, 0x89,
+ 0xbe, 0x58, 0xb3, 0x55, 0x62, 0x84, 0x0c, 0xea, 0xdd, 0x3b,
+ 0x16, 0xf0, 0xc7, 0x21, 0xa9, 0x4f, 0x78, 0x9e, 0x75, 0x93,
+ 0xa4, 0x42, 0xca, 0x2c, 0x1b, 0xfd, 0x00, 0xe7, 0xd3, 0x34,
+ 0xbb, 0x5c, 0x68, 0x8f, 0x6b, 0x8c, 0xb8, 0x5f, 0xd0, 0x37,
+ 0x03, 0xe4, 0xd6, 0x31, 0x05, 0xe2, 0x6d, 0x8a, 0xbe, 0x59,
+ 0xbd, 0x5a, 0x6e, 0x89, 0x06, 0xe1, 0xd5, 0x32, 0xb1, 0x56,
+ 0x62, 0x85, 0x0a, 0xed, 0xd9, 0x3e, 0xda, 0x3d, 0x09, 0xee,
+ 0x61, 0x86, 0xb2, 0x55, 0x67, 0x80, 0xb4, 0x53, 0xdc, 0x3b,
+ 0x0f, 0xe8, 0x0c, 0xeb, 0xdf, 0x38, 0xb7, 0x50, 0x64, 0x83,
+ 0x7f, 0x98, 0xac, 0x4b, 0xc4, 0x23, 0x17, 0xf0, 0x14, 0xf3,
+ 0xc7, 0x20, 0xaf, 0x48, 0x7c, 0x9b, 0xa9, 0x4e, 0x7a, 0x9d,
+ 0x12, 0xf5, 0xc1, 0x26, 0xc2, 0x25, 0x11, 0xf6, 0x79, 0x9e,
+ 0xaa, 0x4d, 0xce, 0x29, 0x1d, 0xfa, 0x75, 0x92, 0xa6, 0x41,
+ 0xa5, 0x42, 0x76, 0x91, 0x1e, 0xf9, 0xcd, 0x2a, 0x18, 0xff,
+ 0xcb, 0x2c, 0xa3, 0x44, 0x70, 0x97, 0x73, 0x94, 0xa0, 0x47,
+ 0xc8, 0x2f, 0x1b, 0xfc, 0xfe, 0x19, 0x2d, 0xca, 0x45, 0xa2,
+ 0x96, 0x71, 0x95, 0x72, 0x46, 0xa1, 0x2e, 0xc9, 0xfd, 0x1a,
+ 0x28, 0xcf, 0xfb, 0x1c, 0x93, 0x74, 0x40, 0xa7, 0x43, 0xa4,
+ 0x90, 0x77, 0xf8, 0x1f, 0x2b, 0xcc, 0x4f, 0xa8, 0x9c, 0x7b,
+ 0xf4, 0x13, 0x27, 0xc0, 0x24, 0xc3, 0xf7, 0x10, 0x9f, 0x78,
+ 0x4c, 0xab, 0x99, 0x7e, 0x4a, 0xad, 0x22, 0xc5, 0xf1, 0x16,
+ 0xf2, 0x15, 0x21, 0xc6, 0x49, 0xae, 0x9a, 0x7d, 0x81, 0x66,
+ 0x52, 0xb5, 0x3a, 0xdd, 0xe9, 0x0e, 0xea, 0x0d, 0x39, 0xde,
+ 0x51, 0xb6, 0x82, 0x65, 0x57, 0xb0, 0x84, 0x63, 0xec, 0x0b,
+ 0x3f, 0xd8, 0x3c, 0xdb, 0xef, 0x08, 0x87, 0x60, 0x54, 0xb3,
+ 0x30, 0xd7, 0xe3, 0x04, 0x8b, 0x6c, 0x58, 0xbf, 0x5b, 0xbc,
+ 0x88, 0x6f, 0xe0, 0x07, 0x33, 0xd4, 0xe6, 0x01, 0x35, 0xd2,
+ 0x5d, 0xba, 0x8e, 0x69, 0x8d, 0x6a, 0x5e, 0xb9, 0x36, 0xd1,
+ 0xe5, 0x02, 0x00, 0xe8, 0xcd, 0x25, 0x87, 0x6f, 0x4a, 0xa2,
+ 0x13, 0xfb, 0xde, 0x36, 0x94, 0x7c, 0x59, 0xb1, 0x26, 0xce,
+ 0xeb, 0x03, 0xa1, 0x49, 0x6c, 0x84, 0x35, 0xdd, 0xf8, 0x10,
+ 0xb2, 0x5a, 0x7f, 0x97, 0x4c, 0xa4, 0x81, 0x69, 0xcb, 0x23,
+ 0x06, 0xee, 0x5f, 0xb7, 0x92, 0x7a, 0xd8, 0x30, 0x15, 0xfd,
+ 0x6a, 0x82, 0xa7, 0x4f, 0xed, 0x05, 0x20, 0xc8, 0x79, 0x91,
+ 0xb4, 0x5c, 0xfe, 0x16, 0x33, 0xdb, 0x98, 0x70, 0x55, 0xbd,
+ 0x1f, 0xf7, 0xd2, 0x3a, 0x8b, 0x63, 0x46, 0xae, 0x0c, 0xe4,
+ 0xc1, 0x29, 0xbe, 0x56, 0x73, 0x9b, 0x39, 0xd1, 0xf4, 0x1c,
+ 0xad, 0x45, 0x60, 0x88, 0x2a, 0xc2, 0xe7, 0x0f, 0xd4, 0x3c,
+ 0x19, 0xf1, 0x53, 0xbb, 0x9e, 0x76, 0xc7, 0x2f, 0x0a, 0xe2,
+ 0x40, 0xa8, 0x8d, 0x65, 0xf2, 0x1a, 0x3f, 0xd7, 0x75, 0x9d,
+ 0xb8, 0x50, 0xe1, 0x09, 0x2c, 0xc4, 0x66, 0x8e, 0xab, 0x43,
+ 0x2d, 0xc5, 0xe0, 0x08, 0xaa, 0x42, 0x67, 0x8f, 0x3e, 0xd6,
+ 0xf3, 0x1b, 0xb9, 0x51, 0x74, 0x9c, 0x0b, 0xe3, 0xc6, 0x2e,
+ 0x8c, 0x64, 0x41, 0xa9, 0x18, 0xf0, 0xd5, 0x3d, 0x9f, 0x77,
+ 0x52, 0xba, 0x61, 0x89, 0xac, 0x44, 0xe6, 0x0e, 0x2b, 0xc3,
+ 0x72, 0x9a, 0xbf, 0x57, 0xf5, 0x1d, 0x38, 0xd0, 0x47, 0xaf,
+ 0x8a, 0x62, 0xc0, 0x28, 0x0d, 0xe5, 0x54, 0xbc, 0x99, 0x71,
+ 0xd3, 0x3b, 0x1e, 0xf6, 0xb5, 0x5d, 0x78, 0x90, 0x32, 0xda,
+ 0xff, 0x17, 0xa6, 0x4e, 0x6b, 0x83, 0x21, 0xc9, 0xec, 0x04,
+ 0x93, 0x7b, 0x5e, 0xb6, 0x14, 0xfc, 0xd9, 0x31, 0x80, 0x68,
+ 0x4d, 0xa5, 0x07, 0xef, 0xca, 0x22, 0xf9, 0x11, 0x34, 0xdc,
+ 0x7e, 0x96, 0xb3, 0x5b, 0xea, 0x02, 0x27, 0xcf, 0x6d, 0x85,
+ 0xa0, 0x48, 0xdf, 0x37, 0x12, 0xfa, 0x58, 0xb0, 0x95, 0x7d,
+ 0xcc, 0x24, 0x01, 0xe9, 0x4b, 0xa3, 0x86, 0x6e, 0x00, 0xe9,
+ 0xcf, 0x26, 0x83, 0x6a, 0x4c, 0xa5, 0x1b, 0xf2, 0xd4, 0x3d,
+ 0x98, 0x71, 0x57, 0xbe, 0x36, 0xdf, 0xf9, 0x10, 0xb5, 0x5c,
+ 0x7a, 0x93, 0x2d, 0xc4, 0xe2, 0x0b, 0xae, 0x47, 0x61, 0x88,
+ 0x6c, 0x85, 0xa3, 0x4a, 0xef, 0x06, 0x20, 0xc9, 0x77, 0x9e,
+ 0xb8, 0x51, 0xf4, 0x1d, 0x3b, 0xd2, 0x5a, 0xb3, 0x95, 0x7c,
+ 0xd9, 0x30, 0x16, 0xff, 0x41, 0xa8, 0x8e, 0x67, 0xc2, 0x2b,
+ 0x0d, 0xe4, 0xd8, 0x31, 0x17, 0xfe, 0x5b, 0xb2, 0x94, 0x7d,
+ 0xc3, 0x2a, 0x0c, 0xe5, 0x40, 0xa9, 0x8f, 0x66, 0xee, 0x07,
+ 0x21, 0xc8, 0x6d, 0x84, 0xa2, 0x4b, 0xf5, 0x1c, 0x3a, 0xd3,
+ 0x76, 0x9f, 0xb9, 0x50, 0xb4, 0x5d, 0x7b, 0x92, 0x37, 0xde,
+ 0xf8, 0x11, 0xaf, 0x46, 0x60, 0x89, 0x2c, 0xc5, 0xe3, 0x0a,
+ 0x82, 0x6b, 0x4d, 0xa4, 0x01, 0xe8, 0xce, 0x27, 0x99, 0x70,
+ 0x56, 0xbf, 0x1a, 0xf3, 0xd5, 0x3c, 0xad, 0x44, 0x62, 0x8b,
+ 0x2e, 0xc7, 0xe1, 0x08, 0xb6, 0x5f, 0x79, 0x90, 0x35, 0xdc,
+ 0xfa, 0x13, 0x9b, 0x72, 0x54, 0xbd, 0x18, 0xf1, 0xd7, 0x3e,
+ 0x80, 0x69, 0x4f, 0xa6, 0x03, 0xea, 0xcc, 0x25, 0xc1, 0x28,
+ 0x0e, 0xe7, 0x42, 0xab, 0x8d, 0x64, 0xda, 0x33, 0x15, 0xfc,
+ 0x59, 0xb0, 0x96, 0x7f, 0xf7, 0x1e, 0x38, 0xd1, 0x74, 0x9d,
+ 0xbb, 0x52, 0xec, 0x05, 0x23, 0xca, 0x6f, 0x86, 0xa0, 0x49,
+ 0x75, 0x9c, 0xba, 0x53, 0xf6, 0x1f, 0x39, 0xd0, 0x6e, 0x87,
+ 0xa1, 0x48, 0xed, 0x04, 0x22, 0xcb, 0x43, 0xaa, 0x8c, 0x65,
+ 0xc0, 0x29, 0x0f, 0xe6, 0x58, 0xb1, 0x97, 0x7e, 0xdb, 0x32,
+ 0x14, 0xfd, 0x19, 0xf0, 0xd6, 0x3f, 0x9a, 0x73, 0x55, 0xbc,
+ 0x02, 0xeb, 0xcd, 0x24, 0x81, 0x68, 0x4e, 0xa7, 0x2f, 0xc6,
+ 0xe0, 0x09, 0xac, 0x45, 0x63, 0x8a, 0x34, 0xdd, 0xfb, 0x12,
+ 0xb7, 0x5e, 0x78, 0x91, 0x00, 0xea, 0xc9, 0x23, 0x8f, 0x65,
+ 0x46, 0xac, 0x03, 0xe9, 0xca, 0x20, 0x8c, 0x66, 0x45, 0xaf,
+ 0x06, 0xec, 0xcf, 0x25, 0x89, 0x63, 0x40, 0xaa, 0x05, 0xef,
+ 0xcc, 0x26, 0x8a, 0x60, 0x43, 0xa9, 0x0c, 0xe6, 0xc5, 0x2f,
+ 0x83, 0x69, 0x4a, 0xa0, 0x0f, 0xe5, 0xc6, 0x2c, 0x80, 0x6a,
+ 0x49, 0xa3, 0x0a, 0xe0, 0xc3, 0x29, 0x85, 0x6f, 0x4c, 0xa6,
+ 0x09, 0xe3, 0xc0, 0x2a, 0x86, 0x6c, 0x4f, 0xa5, 0x18, 0xf2,
+ 0xd1, 0x3b, 0x97, 0x7d, 0x5e, 0xb4, 0x1b, 0xf1, 0xd2, 0x38,
+ 0x94, 0x7e, 0x5d, 0xb7, 0x1e, 0xf4, 0xd7, 0x3d, 0x91, 0x7b,
+ 0x58, 0xb2, 0x1d, 0xf7, 0xd4, 0x3e, 0x92, 0x78, 0x5b, 0xb1,
+ 0x14, 0xfe, 0xdd, 0x37, 0x9b, 0x71, 0x52, 0xb8, 0x17, 0xfd,
+ 0xde, 0x34, 0x98, 0x72, 0x51, 0xbb, 0x12, 0xf8, 0xdb, 0x31,
+ 0x9d, 0x77, 0x54, 0xbe, 0x11, 0xfb, 0xd8, 0x32, 0x9e, 0x74,
+ 0x57, 0xbd, 0x30, 0xda, 0xf9, 0x13, 0xbf, 0x55, 0x76, 0x9c,
+ 0x33, 0xd9, 0xfa, 0x10, 0xbc, 0x56, 0x75, 0x9f, 0x36, 0xdc,
+ 0xff, 0x15, 0xb9, 0x53, 0x70, 0x9a, 0x35, 0xdf, 0xfc, 0x16,
+ 0xba, 0x50, 0x73, 0x99, 0x3c, 0xd6, 0xf5, 0x1f, 0xb3, 0x59,
+ 0x7a, 0x90, 0x3f, 0xd5, 0xf6, 0x1c, 0xb0, 0x5a, 0x79, 0x93,
+ 0x3a, 0xd0, 0xf3, 0x19, 0xb5, 0x5f, 0x7c, 0x96, 0x39, 0xd3,
+ 0xf0, 0x1a, 0xb6, 0x5c, 0x7f, 0x95, 0x28, 0xc2, 0xe1, 0x0b,
+ 0xa7, 0x4d, 0x6e, 0x84, 0x2b, 0xc1, 0xe2, 0x08, 0xa4, 0x4e,
+ 0x6d, 0x87, 0x2e, 0xc4, 0xe7, 0x0d, 0xa1, 0x4b, 0x68, 0x82,
+ 0x2d, 0xc7, 0xe4, 0x0e, 0xa2, 0x48, 0x6b, 0x81, 0x24, 0xce,
+ 0xed, 0x07, 0xab, 0x41, 0x62, 0x88, 0x27, 0xcd, 0xee, 0x04,
+ 0xa8, 0x42, 0x61, 0x8b, 0x22, 0xc8, 0xeb, 0x01, 0xad, 0x47,
+ 0x64, 0x8e, 0x21, 0xcb, 0xe8, 0x02, 0xae, 0x44, 0x67, 0x8d,
+ 0x00, 0xeb, 0xcb, 0x20, 0x8b, 0x60, 0x40, 0xab, 0x0b, 0xe0,
+ 0xc0, 0x2b, 0x80, 0x6b, 0x4b, 0xa0, 0x16, 0xfd, 0xdd, 0x36,
+ 0x9d, 0x76, 0x56, 0xbd, 0x1d, 0xf6, 0xd6, 0x3d, 0x96, 0x7d,
+ 0x5d, 0xb6, 0x2c, 0xc7, 0xe7, 0x0c, 0xa7, 0x4c, 0x6c, 0x87,
+ 0x27, 0xcc, 0xec, 0x07, 0xac, 0x47, 0x67, 0x8c, 0x3a, 0xd1,
+ 0xf1, 0x1a, 0xb1, 0x5a, 0x7a, 0x91, 0x31, 0xda, 0xfa, 0x11,
+ 0xba, 0x51, 0x71, 0x9a, 0x58, 0xb3, 0x93, 0x78, 0xd3, 0x38,
+ 0x18, 0xf3, 0x53, 0xb8, 0x98, 0x73, 0xd8, 0x33, 0x13, 0xf8,
+ 0x4e, 0xa5, 0x85, 0x6e, 0xc5, 0x2e, 0x0e, 0xe5, 0x45, 0xae,
+ 0x8e, 0x65, 0xce, 0x25, 0x05, 0xee, 0x74, 0x9f, 0xbf, 0x54,
+ 0xff, 0x14, 0x34, 0xdf, 0x7f, 0x94, 0xb4, 0x5f, 0xf4, 0x1f,
+ 0x3f, 0xd4, 0x62, 0x89, 0xa9, 0x42, 0xe9, 0x02, 0x22, 0xc9,
+ 0x69, 0x82, 0xa2, 0x49, 0xe2, 0x09, 0x29, 0xc2, 0xb0, 0x5b,
+ 0x7b, 0x90, 0x3b, 0xd0, 0xf0, 0x1b, 0xbb, 0x50, 0x70, 0x9b,
+ 0x30, 0xdb, 0xfb, 0x10, 0xa6, 0x4d, 0x6d, 0x86, 0x2d, 0xc6,
+ 0xe6, 0x0d, 0xad, 0x46, 0x66, 0x8d, 0x26, 0xcd, 0xed, 0x06,
+ 0x9c, 0x77, 0x57, 0xbc, 0x17, 0xfc, 0xdc, 0x37, 0x97, 0x7c,
+ 0x5c, 0xb7, 0x1c, 0xf7, 0xd7, 0x3c, 0x8a, 0x61, 0x41, 0xaa,
+ 0x01, 0xea, 0xca, 0x21, 0x81, 0x6a, 0x4a, 0xa1, 0x0a, 0xe1,
+ 0xc1, 0x2a, 0xe8, 0x03, 0x23, 0xc8, 0x63, 0x88, 0xa8, 0x43,
+ 0xe3, 0x08, 0x28, 0xc3, 0x68, 0x83, 0xa3, 0x48, 0xfe, 0x15,
+ 0x35, 0xde, 0x75, 0x9e, 0xbe, 0x55, 0xf5, 0x1e, 0x3e, 0xd5,
+ 0x7e, 0x95, 0xb5, 0x5e, 0xc4, 0x2f, 0x0f, 0xe4, 0x4f, 0xa4,
+ 0x84, 0x6f, 0xcf, 0x24, 0x04, 0xef, 0x44, 0xaf, 0x8f, 0x64,
+ 0xd2, 0x39, 0x19, 0xf2, 0x59, 0xb2, 0x92, 0x79, 0xd9, 0x32,
+ 0x12, 0xf9, 0x52, 0xb9, 0x99, 0x72, 0x00, 0xec, 0xc5, 0x29,
+ 0x97, 0x7b, 0x52, 0xbe, 0x33, 0xdf, 0xf6, 0x1a, 0xa4, 0x48,
+ 0x61, 0x8d, 0x66, 0x8a, 0xa3, 0x4f, 0xf1, 0x1d, 0x34, 0xd8,
+ 0x55, 0xb9, 0x90, 0x7c, 0xc2, 0x2e, 0x07, 0xeb, 0xcc, 0x20,
+ 0x09, 0xe5, 0x5b, 0xb7, 0x9e, 0x72, 0xff, 0x13, 0x3a, 0xd6,
+ 0x68, 0x84, 0xad, 0x41, 0xaa, 0x46, 0x6f, 0x83, 0x3d, 0xd1,
+ 0xf8, 0x14, 0x99, 0x75, 0x5c, 0xb0, 0x0e, 0xe2, 0xcb, 0x27,
+ 0x85, 0x69, 0x40, 0xac, 0x12, 0xfe, 0xd7, 0x3b, 0xb6, 0x5a,
+ 0x73, 0x9f, 0x21, 0xcd, 0xe4, 0x08, 0xe3, 0x0f, 0x26, 0xca,
+ 0x74, 0x98, 0xb1, 0x5d, 0xd0, 0x3c, 0x15, 0xf9, 0x47, 0xab,
+ 0x82, 0x6e, 0x49, 0xa5, 0x8c, 0x60, 0xde, 0x32, 0x1b, 0xf7,
+ 0x7a, 0x96, 0xbf, 0x53, 0xed, 0x01, 0x28, 0xc4, 0x2f, 0xc3,
+ 0xea, 0x06, 0xb8, 0x54, 0x7d, 0x91, 0x1c, 0xf0, 0xd9, 0x35,
+ 0x8b, 0x67, 0x4e, 0xa2, 0x17, 0xfb, 0xd2, 0x3e, 0x80, 0x6c,
+ 0x45, 0xa9, 0x24, 0xc8, 0xe1, 0x0d, 0xb3, 0x5f, 0x76, 0x9a,
+ 0x71, 0x9d, 0xb4, 0x58, 0xe6, 0x0a, 0x23, 0xcf, 0x42, 0xae,
+ 0x87, 0x6b, 0xd5, 0x39, 0x10, 0xfc, 0xdb, 0x37, 0x1e, 0xf2,
+ 0x4c, 0xa0, 0x89, 0x65, 0xe8, 0x04, 0x2d, 0xc1, 0x7f, 0x93,
+ 0xba, 0x56, 0xbd, 0x51, 0x78, 0x94, 0x2a, 0xc6, 0xef, 0x03,
+ 0x8e, 0x62, 0x4b, 0xa7, 0x19, 0xf5, 0xdc, 0x30, 0x92, 0x7e,
+ 0x57, 0xbb, 0x05, 0xe9, 0xc0, 0x2c, 0xa1, 0x4d, 0x64, 0x88,
+ 0x36, 0xda, 0xf3, 0x1f, 0xf4, 0x18, 0x31, 0xdd, 0x63, 0x8f,
+ 0xa6, 0x4a, 0xc7, 0x2b, 0x02, 0xee, 0x50, 0xbc, 0x95, 0x79,
+ 0x5e, 0xb2, 0x9b, 0x77, 0xc9, 0x25, 0x0c, 0xe0, 0x6d, 0x81,
+ 0xa8, 0x44, 0xfa, 0x16, 0x3f, 0xd3, 0x38, 0xd4, 0xfd, 0x11,
+ 0xaf, 0x43, 0x6a, 0x86, 0x0b, 0xe7, 0xce, 0x22, 0x9c, 0x70,
+ 0x59, 0xb5, 0x00, 0xed, 0xc7, 0x2a, 0x93, 0x7e, 0x54, 0xb9,
+ 0x3b, 0xd6, 0xfc, 0x11, 0xa8, 0x45, 0x6f, 0x82, 0x76, 0x9b,
+ 0xb1, 0x5c, 0xe5, 0x08, 0x22, 0xcf, 0x4d, 0xa0, 0x8a, 0x67,
+ 0xde, 0x33, 0x19, 0xf4, 0xec, 0x01, 0x2b, 0xc6, 0x7f, 0x92,
+ 0xb8, 0x55, 0xd7, 0x3a, 0x10, 0xfd, 0x44, 0xa9, 0x83, 0x6e,
+ 0x9a, 0x77, 0x5d, 0xb0, 0x09, 0xe4, 0xce, 0x23, 0xa1, 0x4c,
+ 0x66, 0x8b, 0x32, 0xdf, 0xf5, 0x18, 0xc5, 0x28, 0x02, 0xef,
+ 0x56, 0xbb, 0x91, 0x7c, 0xfe, 0x13, 0x39, 0xd4, 0x6d, 0x80,
+ 0xaa, 0x47, 0xb3, 0x5e, 0x74, 0x99, 0x20, 0xcd, 0xe7, 0x0a,
+ 0x88, 0x65, 0x4f, 0xa2, 0x1b, 0xf6, 0xdc, 0x31, 0x29, 0xc4,
+ 0xee, 0x03, 0xba, 0x57, 0x7d, 0x90, 0x12, 0xff, 0xd5, 0x38,
+ 0x81, 0x6c, 0x46, 0xab, 0x5f, 0xb2, 0x98, 0x75, 0xcc, 0x21,
+ 0x0b, 0xe6, 0x64, 0x89, 0xa3, 0x4e, 0xf7, 0x1a, 0x30, 0xdd,
+ 0x97, 0x7a, 0x50, 0xbd, 0x04, 0xe9, 0xc3, 0x2e, 0xac, 0x41,
+ 0x6b, 0x86, 0x3f, 0xd2, 0xf8, 0x15, 0xe1, 0x0c, 0x26, 0xcb,
+ 0x72, 0x9f, 0xb5, 0x58, 0xda, 0x37, 0x1d, 0xf0, 0x49, 0xa4,
+ 0x8e, 0x63, 0x7b, 0x96, 0xbc, 0x51, 0xe8, 0x05, 0x2f, 0xc2,
+ 0x40, 0xad, 0x87, 0x6a, 0xd3, 0x3e, 0x14, 0xf9, 0x0d, 0xe0,
+ 0xca, 0x27, 0x9e, 0x73, 0x59, 0xb4, 0x36, 0xdb, 0xf1, 0x1c,
+ 0xa5, 0x48, 0x62, 0x8f, 0x52, 0xbf, 0x95, 0x78, 0xc1, 0x2c,
+ 0x06, 0xeb, 0x69, 0x84, 0xae, 0x43, 0xfa, 0x17, 0x3d, 0xd0,
+ 0x24, 0xc9, 0xe3, 0x0e, 0xb7, 0x5a, 0x70, 0x9d, 0x1f, 0xf2,
+ 0xd8, 0x35, 0x8c, 0x61, 0x4b, 0xa6, 0xbe, 0x53, 0x79, 0x94,
+ 0x2d, 0xc0, 0xea, 0x07, 0x85, 0x68, 0x42, 0xaf, 0x16, 0xfb,
+ 0xd1, 0x3c, 0xc8, 0x25, 0x0f, 0xe2, 0x5b, 0xb6, 0x9c, 0x71,
+ 0xf3, 0x1e, 0x34, 0xd9, 0x60, 0x8d, 0xa7, 0x4a, 0x00, 0xee,
+ 0xc1, 0x2f, 0x9f, 0x71, 0x5e, 0xb0, 0x23, 0xcd, 0xe2, 0x0c,
+ 0xbc, 0x52, 0x7d, 0x93, 0x46, 0xa8, 0x87, 0x69, 0xd9, 0x37,
+ 0x18, 0xf6, 0x65, 0x8b, 0xa4, 0x4a, 0xfa, 0x14, 0x3b, 0xd5,
+ 0x8c, 0x62, 0x4d, 0xa3, 0x13, 0xfd, 0xd2, 0x3c, 0xaf, 0x41,
+ 0x6e, 0x80, 0x30, 0xde, 0xf1, 0x1f, 0xca, 0x24, 0x0b, 0xe5,
+ 0x55, 0xbb, 0x94, 0x7a, 0xe9, 0x07, 0x28, 0xc6, 0x76, 0x98,
+ 0xb7, 0x59, 0x05, 0xeb, 0xc4, 0x2a, 0x9a, 0x74, 0x5b, 0xb5,
+ 0x26, 0xc8, 0xe7, 0x09, 0xb9, 0x57, 0x78, 0x96, 0x43, 0xad,
+ 0x82, 0x6c, 0xdc, 0x32, 0x1d, 0xf3, 0x60, 0x8e, 0xa1, 0x4f,
+ 0xff, 0x11, 0x3e, 0xd0, 0x89, 0x67, 0x48, 0xa6, 0x16, 0xf8,
+ 0xd7, 0x39, 0xaa, 0x44, 0x6b, 0x85, 0x35, 0xdb, 0xf4, 0x1a,
+ 0xcf, 0x21, 0x0e, 0xe0, 0x50, 0xbe, 0x91, 0x7f, 0xec, 0x02,
+ 0x2d, 0xc3, 0x73, 0x9d, 0xb2, 0x5c, 0x0a, 0xe4, 0xcb, 0x25,
+ 0x95, 0x7b, 0x54, 0xba, 0x29, 0xc7, 0xe8, 0x06, 0xb6, 0x58,
+ 0x77, 0x99, 0x4c, 0xa2, 0x8d, 0x63, 0xd3, 0x3d, 0x12, 0xfc,
+ 0x6f, 0x81, 0xae, 0x40, 0xf0, 0x1e, 0x31, 0xdf, 0x86, 0x68,
+ 0x47, 0xa9, 0x19, 0xf7, 0xd8, 0x36, 0xa5, 0x4b, 0x64, 0x8a,
+ 0x3a, 0xd4, 0xfb, 0x15, 0xc0, 0x2e, 0x01, 0xef, 0x5f, 0xb1,
+ 0x9e, 0x70, 0xe3, 0x0d, 0x22, 0xcc, 0x7c, 0x92, 0xbd, 0x53,
+ 0x0f, 0xe1, 0xce, 0x20, 0x90, 0x7e, 0x51, 0xbf, 0x2c, 0xc2,
+ 0xed, 0x03, 0xb3, 0x5d, 0x72, 0x9c, 0x49, 0xa7, 0x88, 0x66,
+ 0xd6, 0x38, 0x17, 0xf9, 0x6a, 0x84, 0xab, 0x45, 0xf5, 0x1b,
+ 0x34, 0xda, 0x83, 0x6d, 0x42, 0xac, 0x1c, 0xf2, 0xdd, 0x33,
+ 0xa0, 0x4e, 0x61, 0x8f, 0x3f, 0xd1, 0xfe, 0x10, 0xc5, 0x2b,
+ 0x04, 0xea, 0x5a, 0xb4, 0x9b, 0x75, 0xe6, 0x08, 0x27, 0xc9,
+ 0x79, 0x97, 0xb8, 0x56, 0x00, 0xef, 0xc3, 0x2c, 0x9b, 0x74,
+ 0x58, 0xb7, 0x2b, 0xc4, 0xe8, 0x07, 0xb0, 0x5f, 0x73, 0x9c,
+ 0x56, 0xb9, 0x95, 0x7a, 0xcd, 0x22, 0x0e, 0xe1, 0x7d, 0x92,
+ 0xbe, 0x51, 0xe6, 0x09, 0x25, 0xca, 0xac, 0x43, 0x6f, 0x80,
+ 0x37, 0xd8, 0xf4, 0x1b, 0x87, 0x68, 0x44, 0xab, 0x1c, 0xf3,
+ 0xdf, 0x30, 0xfa, 0x15, 0x39, 0xd6, 0x61, 0x8e, 0xa2, 0x4d,
+ 0xd1, 0x3e, 0x12, 0xfd, 0x4a, 0xa5, 0x89, 0x66, 0x45, 0xaa,
+ 0x86, 0x69, 0xde, 0x31, 0x1d, 0xf2, 0x6e, 0x81, 0xad, 0x42,
+ 0xf5, 0x1a, 0x36, 0xd9, 0x13, 0xfc, 0xd0, 0x3f, 0x88, 0x67,
+ 0x4b, 0xa4, 0x38, 0xd7, 0xfb, 0x14, 0xa3, 0x4c, 0x60, 0x8f,
+ 0xe9, 0x06, 0x2a, 0xc5, 0x72, 0x9d, 0xb1, 0x5e, 0xc2, 0x2d,
+ 0x01, 0xee, 0x59, 0xb6, 0x9a, 0x75, 0xbf, 0x50, 0x7c, 0x93,
+ 0x24, 0xcb, 0xe7, 0x08, 0x94, 0x7b, 0x57, 0xb8, 0x0f, 0xe0,
+ 0xcc, 0x23, 0x8a, 0x65, 0x49, 0xa6, 0x11, 0xfe, 0xd2, 0x3d,
+ 0xa1, 0x4e, 0x62, 0x8d, 0x3a, 0xd5, 0xf9, 0x16, 0xdc, 0x33,
+ 0x1f, 0xf0, 0x47, 0xa8, 0x84, 0x6b, 0xf7, 0x18, 0x34, 0xdb,
+ 0x6c, 0x83, 0xaf, 0x40, 0x26, 0xc9, 0xe5, 0x0a, 0xbd, 0x52,
+ 0x7e, 0x91, 0x0d, 0xe2, 0xce, 0x21, 0x96, 0x79, 0x55, 0xba,
+ 0x70, 0x9f, 0xb3, 0x5c, 0xeb, 0x04, 0x28, 0xc7, 0x5b, 0xb4,
+ 0x98, 0x77, 0xc0, 0x2f, 0x03, 0xec, 0xcf, 0x20, 0x0c, 0xe3,
+ 0x54, 0xbb, 0x97, 0x78, 0xe4, 0x0b, 0x27, 0xc8, 0x7f, 0x90,
+ 0xbc, 0x53, 0x99, 0x76, 0x5a, 0xb5, 0x02, 0xed, 0xc1, 0x2e,
+ 0xb2, 0x5d, 0x71, 0x9e, 0x29, 0xc6, 0xea, 0x05, 0x63, 0x8c,
+ 0xa0, 0x4f, 0xf8, 0x17, 0x3b, 0xd4, 0x48, 0xa7, 0x8b, 0x64,
+ 0xd3, 0x3c, 0x10, 0xff, 0x35, 0xda, 0xf6, 0x19, 0xae, 0x41,
+ 0x6d, 0x82, 0x1e, 0xf1, 0xdd, 0x32, 0x85, 0x6a, 0x46, 0xa9,
+ 0x00, 0xf0, 0xfd, 0x0d, 0xe7, 0x17, 0x1a, 0xea, 0xd3, 0x23,
+ 0x2e, 0xde, 0x34, 0xc4, 0xc9, 0x39, 0xbb, 0x4b, 0x46, 0xb6,
+ 0x5c, 0xac, 0xa1, 0x51, 0x68, 0x98, 0x95, 0x65, 0x8f, 0x7f,
+ 0x72, 0x82, 0x6b, 0x9b, 0x96, 0x66, 0x8c, 0x7c, 0x71, 0x81,
+ 0xb8, 0x48, 0x45, 0xb5, 0x5f, 0xaf, 0xa2, 0x52, 0xd0, 0x20,
+ 0x2d, 0xdd, 0x37, 0xc7, 0xca, 0x3a, 0x03, 0xf3, 0xfe, 0x0e,
+ 0xe4, 0x14, 0x19, 0xe9, 0xd6, 0x26, 0x2b, 0xdb, 0x31, 0xc1,
+ 0xcc, 0x3c, 0x05, 0xf5, 0xf8, 0x08, 0xe2, 0x12, 0x1f, 0xef,
+ 0x6d, 0x9d, 0x90, 0x60, 0x8a, 0x7a, 0x77, 0x87, 0xbe, 0x4e,
+ 0x43, 0xb3, 0x59, 0xa9, 0xa4, 0x54, 0xbd, 0x4d, 0x40, 0xb0,
+ 0x5a, 0xaa, 0xa7, 0x57, 0x6e, 0x9e, 0x93, 0x63, 0x89, 0x79,
+ 0x74, 0x84, 0x06, 0xf6, 0xfb, 0x0b, 0xe1, 0x11, 0x1c, 0xec,
+ 0xd5, 0x25, 0x28, 0xd8, 0x32, 0xc2, 0xcf, 0x3f, 0xb1, 0x41,
+ 0x4c, 0xbc, 0x56, 0xa6, 0xab, 0x5b, 0x62, 0x92, 0x9f, 0x6f,
+ 0x85, 0x75, 0x78, 0x88, 0x0a, 0xfa, 0xf7, 0x07, 0xed, 0x1d,
+ 0x10, 0xe0, 0xd9, 0x29, 0x24, 0xd4, 0x3e, 0xce, 0xc3, 0x33,
+ 0xda, 0x2a, 0x27, 0xd7, 0x3d, 0xcd, 0xc0, 0x30, 0x09, 0xf9,
+ 0xf4, 0x04, 0xee, 0x1e, 0x13, 0xe3, 0x61, 0x91, 0x9c, 0x6c,
+ 0x86, 0x76, 0x7b, 0x8b, 0xb2, 0x42, 0x4f, 0xbf, 0x55, 0xa5,
+ 0xa8, 0x58, 0x67, 0x97, 0x9a, 0x6a, 0x80, 0x70, 0x7d, 0x8d,
+ 0xb4, 0x44, 0x49, 0xb9, 0x53, 0xa3, 0xae, 0x5e, 0xdc, 0x2c,
+ 0x21, 0xd1, 0x3b, 0xcb, 0xc6, 0x36, 0x0f, 0xff, 0xf2, 0x02,
+ 0xe8, 0x18, 0x15, 0xe5, 0x0c, 0xfc, 0xf1, 0x01, 0xeb, 0x1b,
+ 0x16, 0xe6, 0xdf, 0x2f, 0x22, 0xd2, 0x38, 0xc8, 0xc5, 0x35,
+ 0xb7, 0x47, 0x4a, 0xba, 0x50, 0xa0, 0xad, 0x5d, 0x64, 0x94,
+ 0x99, 0x69, 0x83, 0x73, 0x7e, 0x8e, 0x00, 0xf1, 0xff, 0x0e,
+ 0xe3, 0x12, 0x1c, 0xed, 0xdb, 0x2a, 0x24, 0xd5, 0x38, 0xc9,
+ 0xc7, 0x36, 0xab, 0x5a, 0x54, 0xa5, 0x48, 0xb9, 0xb7, 0x46,
+ 0x70, 0x81, 0x8f, 0x7e, 0x93, 0x62, 0x6c, 0x9d, 0x4b, 0xba,
+ 0xb4, 0x45, 0xa8, 0x59, 0x57, 0xa6, 0x90, 0x61, 0x6f, 0x9e,
+ 0x73, 0x82, 0x8c, 0x7d, 0xe0, 0x11, 0x1f, 0xee, 0x03, 0xf2,
+ 0xfc, 0x0d, 0x3b, 0xca, 0xc4, 0x35, 0xd8, 0x29, 0x27, 0xd6,
+ 0x96, 0x67, 0x69, 0x98, 0x75, 0x84, 0x8a, 0x7b, 0x4d, 0xbc,
+ 0xb2, 0x43, 0xae, 0x5f, 0x51, 0xa0, 0x3d, 0xcc, 0xc2, 0x33,
+ 0xde, 0x2f, 0x21, 0xd0, 0xe6, 0x17, 0x19, 0xe8, 0x05, 0xf4,
+ 0xfa, 0x0b, 0xdd, 0x2c, 0x22, 0xd3, 0x3e, 0xcf, 0xc1, 0x30,
+ 0x06, 0xf7, 0xf9, 0x08, 0xe5, 0x14, 0x1a, 0xeb, 0x76, 0x87,
+ 0x89, 0x78, 0x95, 0x64, 0x6a, 0x9b, 0xad, 0x5c, 0x52, 0xa3,
+ 0x4e, 0xbf, 0xb1, 0x40, 0x31, 0xc0, 0xce, 0x3f, 0xd2, 0x23,
+ 0x2d, 0xdc, 0xea, 0x1b, 0x15, 0xe4, 0x09, 0xf8, 0xf6, 0x07,
+ 0x9a, 0x6b, 0x65, 0x94, 0x79, 0x88, 0x86, 0x77, 0x41, 0xb0,
+ 0xbe, 0x4f, 0xa2, 0x53, 0x5d, 0xac, 0x7a, 0x8b, 0x85, 0x74,
+ 0x99, 0x68, 0x66, 0x97, 0xa1, 0x50, 0x5e, 0xaf, 0x42, 0xb3,
+ 0xbd, 0x4c, 0xd1, 0x20, 0x2e, 0xdf, 0x32, 0xc3, 0xcd, 0x3c,
+ 0x0a, 0xfb, 0xf5, 0x04, 0xe9, 0x18, 0x16, 0xe7, 0xa7, 0x56,
+ 0x58, 0xa9, 0x44, 0xb5, 0xbb, 0x4a, 0x7c, 0x8d, 0x83, 0x72,
+ 0x9f, 0x6e, 0x60, 0x91, 0x0c, 0xfd, 0xf3, 0x02, 0xef, 0x1e,
+ 0x10, 0xe1, 0xd7, 0x26, 0x28, 0xd9, 0x34, 0xc5, 0xcb, 0x3a,
+ 0xec, 0x1d, 0x13, 0xe2, 0x0f, 0xfe, 0xf0, 0x01, 0x37, 0xc6,
+ 0xc8, 0x39, 0xd4, 0x25, 0x2b, 0xda, 0x47, 0xb6, 0xb8, 0x49,
+ 0xa4, 0x55, 0x5b, 0xaa, 0x9c, 0x6d, 0x63, 0x92, 0x7f, 0x8e,
+ 0x80, 0x71, 0x00, 0xf2, 0xf9, 0x0b, 0xef, 0x1d, 0x16, 0xe4,
+ 0xc3, 0x31, 0x3a, 0xc8, 0x2c, 0xde, 0xd5, 0x27, 0x9b, 0x69,
+ 0x62, 0x90, 0x74, 0x86, 0x8d, 0x7f, 0x58, 0xaa, 0xa1, 0x53,
+ 0xb7, 0x45, 0x4e, 0xbc, 0x2b, 0xd9, 0xd2, 0x20, 0xc4, 0x36,
+ 0x3d, 0xcf, 0xe8, 0x1a, 0x11, 0xe3, 0x07, 0xf5, 0xfe, 0x0c,
+ 0xb0, 0x42, 0x49, 0xbb, 0x5f, 0xad, 0xa6, 0x54, 0x73, 0x81,
+ 0x8a, 0x78, 0x9c, 0x6e, 0x65, 0x97, 0x56, 0xa4, 0xaf, 0x5d,
+ 0xb9, 0x4b, 0x40, 0xb2, 0x95, 0x67, 0x6c, 0x9e, 0x7a, 0x88,
+ 0x83, 0x71, 0xcd, 0x3f, 0x34, 0xc6, 0x22, 0xd0, 0xdb, 0x29,
+ 0x0e, 0xfc, 0xf7, 0x05, 0xe1, 0x13, 0x18, 0xea, 0x7d, 0x8f,
+ 0x84, 0x76, 0x92, 0x60, 0x6b, 0x99, 0xbe, 0x4c, 0x47, 0xb5,
+ 0x51, 0xa3, 0xa8, 0x5a, 0xe6, 0x14, 0x1f, 0xed, 0x09, 0xfb,
+ 0xf0, 0x02, 0x25, 0xd7, 0xdc, 0x2e, 0xca, 0x38, 0x33, 0xc1,
+ 0xac, 0x5e, 0x55, 0xa7, 0x43, 0xb1, 0xba, 0x48, 0x6f, 0x9d,
+ 0x96, 0x64, 0x80, 0x72, 0x79, 0x8b, 0x37, 0xc5, 0xce, 0x3c,
+ 0xd8, 0x2a, 0x21, 0xd3, 0xf4, 0x06, 0x0d, 0xff, 0x1b, 0xe9,
+ 0xe2, 0x10, 0x87, 0x75, 0x7e, 0x8c, 0x68, 0x9a, 0x91, 0x63,
+ 0x44, 0xb6, 0xbd, 0x4f, 0xab, 0x59, 0x52, 0xa0, 0x1c, 0xee,
+ 0xe5, 0x17, 0xf3, 0x01, 0x0a, 0xf8, 0xdf, 0x2d, 0x26, 0xd4,
+ 0x30, 0xc2, 0xc9, 0x3b, 0xfa, 0x08, 0x03, 0xf1, 0x15, 0xe7,
+ 0xec, 0x1e, 0x39, 0xcb, 0xc0, 0x32, 0xd6, 0x24, 0x2f, 0xdd,
+ 0x61, 0x93, 0x98, 0x6a, 0x8e, 0x7c, 0x77, 0x85, 0xa2, 0x50,
+ 0x5b, 0xa9, 0x4d, 0xbf, 0xb4, 0x46, 0xd1, 0x23, 0x28, 0xda,
+ 0x3e, 0xcc, 0xc7, 0x35, 0x12, 0xe0, 0xeb, 0x19, 0xfd, 0x0f,
+ 0x04, 0xf6, 0x4a, 0xb8, 0xb3, 0x41, 0xa5, 0x57, 0x5c, 0xae,
+ 0x89, 0x7b, 0x70, 0x82, 0x66, 0x94, 0x9f, 0x6d, 0x00, 0xf3,
+ 0xfb, 0x08, 0xeb, 0x18, 0x10, 0xe3, 0xcb, 0x38, 0x30, 0xc3,
+ 0x20, 0xd3, 0xdb, 0x28, 0x8b, 0x78, 0x70, 0x83, 0x60, 0x93,
+ 0x9b, 0x68, 0x40, 0xb3, 0xbb, 0x48, 0xab, 0x58, 0x50, 0xa3,
+ 0x0b, 0xf8, 0xf0, 0x03, 0xe0, 0x13, 0x1b, 0xe8, 0xc0, 0x33,
+ 0x3b, 0xc8, 0x2b, 0xd8, 0xd0, 0x23, 0x80, 0x73, 0x7b, 0x88,
+ 0x6b, 0x98, 0x90, 0x63, 0x4b, 0xb8, 0xb0, 0x43, 0xa0, 0x53,
+ 0x5b, 0xa8, 0x16, 0xe5, 0xed, 0x1e, 0xfd, 0x0e, 0x06, 0xf5,
+ 0xdd, 0x2e, 0x26, 0xd5, 0x36, 0xc5, 0xcd, 0x3e, 0x9d, 0x6e,
+ 0x66, 0x95, 0x76, 0x85, 0x8d, 0x7e, 0x56, 0xa5, 0xad, 0x5e,
+ 0xbd, 0x4e, 0x46, 0xb5, 0x1d, 0xee, 0xe6, 0x15, 0xf6, 0x05,
+ 0x0d, 0xfe, 0xd6, 0x25, 0x2d, 0xde, 0x3d, 0xce, 0xc6, 0x35,
+ 0x96, 0x65, 0x6d, 0x9e, 0x7d, 0x8e, 0x86, 0x75, 0x5d, 0xae,
+ 0xa6, 0x55, 0xb6, 0x45, 0x4d, 0xbe, 0x2c, 0xdf, 0xd7, 0x24,
+ 0xc7, 0x34, 0x3c, 0xcf, 0xe7, 0x14, 0x1c, 0xef, 0x0c, 0xff,
+ 0xf7, 0x04, 0xa7, 0x54, 0x5c, 0xaf, 0x4c, 0xbf, 0xb7, 0x44,
+ 0x6c, 0x9f, 0x97, 0x64, 0x87, 0x74, 0x7c, 0x8f, 0x27, 0xd4,
+ 0xdc, 0x2f, 0xcc, 0x3f, 0x37, 0xc4, 0xec, 0x1f, 0x17, 0xe4,
+ 0x07, 0xf4, 0xfc, 0x0f, 0xac, 0x5f, 0x57, 0xa4, 0x47, 0xb4,
+ 0xbc, 0x4f, 0x67, 0x94, 0x9c, 0x6f, 0x8c, 0x7f, 0x77, 0x84,
+ 0x3a, 0xc9, 0xc1, 0x32, 0xd1, 0x22, 0x2a, 0xd9, 0xf1, 0x02,
+ 0x0a, 0xf9, 0x1a, 0xe9, 0xe1, 0x12, 0xb1, 0x42, 0x4a, 0xb9,
+ 0x5a, 0xa9, 0xa1, 0x52, 0x7a, 0x89, 0x81, 0x72, 0x91, 0x62,
+ 0x6a, 0x99, 0x31, 0xc2, 0xca, 0x39, 0xda, 0x29, 0x21, 0xd2,
+ 0xfa, 0x09, 0x01, 0xf2, 0x11, 0xe2, 0xea, 0x19, 0xba, 0x49,
+ 0x41, 0xb2, 0x51, 0xa2, 0xaa, 0x59, 0x71, 0x82, 0x8a, 0x79,
+ 0x9a, 0x69, 0x61, 0x92, 0x00, 0xf4, 0xf5, 0x01, 0xf7, 0x03,
+ 0x02, 0xf6, 0xf3, 0x07, 0x06, 0xf2, 0x04, 0xf0, 0xf1, 0x05,
+ 0xfb, 0x0f, 0x0e, 0xfa, 0x0c, 0xf8, 0xf9, 0x0d, 0x08, 0xfc,
+ 0xfd, 0x09, 0xff, 0x0b, 0x0a, 0xfe, 0xeb, 0x1f, 0x1e, 0xea,
+ 0x1c, 0xe8, 0xe9, 0x1d, 0x18, 0xec, 0xed, 0x19, 0xef, 0x1b,
+ 0x1a, 0xee, 0x10, 0xe4, 0xe5, 0x11, 0xe7, 0x13, 0x12, 0xe6,
+ 0xe3, 0x17, 0x16, 0xe2, 0x14, 0xe0, 0xe1, 0x15, 0xcb, 0x3f,
+ 0x3e, 0xca, 0x3c, 0xc8, 0xc9, 0x3d, 0x38, 0xcc, 0xcd, 0x39,
+ 0xcf, 0x3b, 0x3a, 0xce, 0x30, 0xc4, 0xc5, 0x31, 0xc7, 0x33,
+ 0x32, 0xc6, 0xc3, 0x37, 0x36, 0xc2, 0x34, 0xc0, 0xc1, 0x35,
+ 0x20, 0xd4, 0xd5, 0x21, 0xd7, 0x23, 0x22, 0xd6, 0xd3, 0x27,
+ 0x26, 0xd2, 0x24, 0xd0, 0xd1, 0x25, 0xdb, 0x2f, 0x2e, 0xda,
+ 0x2c, 0xd8, 0xd9, 0x2d, 0x28, 0xdc, 0xdd, 0x29, 0xdf, 0x2b,
+ 0x2a, 0xde, 0x8b, 0x7f, 0x7e, 0x8a, 0x7c, 0x88, 0x89, 0x7d,
+ 0x78, 0x8c, 0x8d, 0x79, 0x8f, 0x7b, 0x7a, 0x8e, 0x70, 0x84,
+ 0x85, 0x71, 0x87, 0x73, 0x72, 0x86, 0x83, 0x77, 0x76, 0x82,
+ 0x74, 0x80, 0x81, 0x75, 0x60, 0x94, 0x95, 0x61, 0x97, 0x63,
+ 0x62, 0x96, 0x93, 0x67, 0x66, 0x92, 0x64, 0x90, 0x91, 0x65,
+ 0x9b, 0x6f, 0x6e, 0x9a, 0x6c, 0x98, 0x99, 0x6d, 0x68, 0x9c,
+ 0x9d, 0x69, 0x9f, 0x6b, 0x6a, 0x9e, 0x40, 0xb4, 0xb5, 0x41,
+ 0xb7, 0x43, 0x42, 0xb6, 0xb3, 0x47, 0x46, 0xb2, 0x44, 0xb0,
+ 0xb1, 0x45, 0xbb, 0x4f, 0x4e, 0xba, 0x4c, 0xb8, 0xb9, 0x4d,
+ 0x48, 0xbc, 0xbd, 0x49, 0xbf, 0x4b, 0x4a, 0xbe, 0xab, 0x5f,
+ 0x5e, 0xaa, 0x5c, 0xa8, 0xa9, 0x5d, 0x58, 0xac, 0xad, 0x59,
+ 0xaf, 0x5b, 0x5a, 0xae, 0x50, 0xa4, 0xa5, 0x51, 0xa7, 0x53,
+ 0x52, 0xa6, 0xa3, 0x57, 0x56, 0xa2, 0x54, 0xa0, 0xa1, 0x55,
+ 0x00, 0xf5, 0xf7, 0x02, 0xf3, 0x06, 0x04, 0xf1, 0xfb, 0x0e,
+ 0x0c, 0xf9, 0x08, 0xfd, 0xff, 0x0a, 0xeb, 0x1e, 0x1c, 0xe9,
+ 0x18, 0xed, 0xef, 0x1a, 0x10, 0xe5, 0xe7, 0x12, 0xe3, 0x16,
+ 0x14, 0xe1, 0xcb, 0x3e, 0x3c, 0xc9, 0x38, 0xcd, 0xcf, 0x3a,
+ 0x30, 0xc5, 0xc7, 0x32, 0xc3, 0x36, 0x34, 0xc1, 0x20, 0xd5,
+ 0xd7, 0x22, 0xd3, 0x26, 0x24, 0xd1, 0xdb, 0x2e, 0x2c, 0xd9,
+ 0x28, 0xdd, 0xdf, 0x2a, 0x8b, 0x7e, 0x7c, 0x89, 0x78, 0x8d,
+ 0x8f, 0x7a, 0x70, 0x85, 0x87, 0x72, 0x83, 0x76, 0x74, 0x81,
+ 0x60, 0x95, 0x97, 0x62, 0x93, 0x66, 0x64, 0x91, 0x9b, 0x6e,
+ 0x6c, 0x99, 0x68, 0x9d, 0x9f, 0x6a, 0x40, 0xb5, 0xb7, 0x42,
+ 0xb3, 0x46, 0x44, 0xb1, 0xbb, 0x4e, 0x4c, 0xb9, 0x48, 0xbd,
+ 0xbf, 0x4a, 0xab, 0x5e, 0x5c, 0xa9, 0x58, 0xad, 0xaf, 0x5a,
+ 0x50, 0xa5, 0xa7, 0x52, 0xa3, 0x56, 0x54, 0xa1, 0x0b, 0xfe,
+ 0xfc, 0x09, 0xf8, 0x0d, 0x0f, 0xfa, 0xf0, 0x05, 0x07, 0xf2,
+ 0x03, 0xf6, 0xf4, 0x01, 0xe0, 0x15, 0x17, 0xe2, 0x13, 0xe6,
+ 0xe4, 0x11, 0x1b, 0xee, 0xec, 0x19, 0xe8, 0x1d, 0x1f, 0xea,
+ 0xc0, 0x35, 0x37, 0xc2, 0x33, 0xc6, 0xc4, 0x31, 0x3b, 0xce,
+ 0xcc, 0x39, 0xc8, 0x3d, 0x3f, 0xca, 0x2b, 0xde, 0xdc, 0x29,
+ 0xd8, 0x2d, 0x2f, 0xda, 0xd0, 0x25, 0x27, 0xd2, 0x23, 0xd6,
+ 0xd4, 0x21, 0x80, 0x75, 0x77, 0x82, 0x73, 0x86, 0x84, 0x71,
+ 0x7b, 0x8e, 0x8c, 0x79, 0x88, 0x7d, 0x7f, 0x8a, 0x6b, 0x9e,
+ 0x9c, 0x69, 0x98, 0x6d, 0x6f, 0x9a, 0x90, 0x65, 0x67, 0x92,
+ 0x63, 0x96, 0x94, 0x61, 0x4b, 0xbe, 0xbc, 0x49, 0xb8, 0x4d,
+ 0x4f, 0xba, 0xb0, 0x45, 0x47, 0xb2, 0x43, 0xb6, 0xb4, 0x41,
+ 0xa0, 0x55, 0x57, 0xa2, 0x53, 0xa6, 0xa4, 0x51, 0x5b, 0xae,
+ 0xac, 0x59, 0xa8, 0x5d, 0x5f, 0xaa, 0x00, 0xf6, 0xf1, 0x07,
+ 0xff, 0x09, 0x0e, 0xf8, 0xe3, 0x15, 0x12, 0xe4, 0x1c, 0xea,
+ 0xed, 0x1b, 0xdb, 0x2d, 0x2a, 0xdc, 0x24, 0xd2, 0xd5, 0x23,
+ 0x38, 0xce, 0xc9, 0x3f, 0xc7, 0x31, 0x36, 0xc0, 0xab, 0x5d,
+ 0x5a, 0xac, 0x54, 0xa2, 0xa5, 0x53, 0x48, 0xbe, 0xb9, 0x4f,
+ 0xb7, 0x41, 0x46, 0xb0, 0x70, 0x86, 0x81, 0x77, 0x8f, 0x79,
+ 0x7e, 0x88, 0x93, 0x65, 0x62, 0x94, 0x6c, 0x9a, 0x9d, 0x6b,
+ 0x4b, 0xbd, 0xba, 0x4c, 0xb4, 0x42, 0x45, 0xb3, 0xa8, 0x5e,
+ 0x59, 0xaf, 0x57, 0xa1, 0xa6, 0x50, 0x90, 0x66, 0x61, 0x97,
+ 0x6f, 0x99, 0x9e, 0x68, 0x73, 0x85, 0x82, 0x74, 0x8c, 0x7a,
+ 0x7d, 0x8b, 0xe0, 0x16, 0x11, 0xe7, 0x1f, 0xe9, 0xee, 0x18,
+ 0x03, 0xf5, 0xf2, 0x04, 0xfc, 0x0a, 0x0d, 0xfb, 0x3b, 0xcd,
+ 0xca, 0x3c, 0xc4, 0x32, 0x35, 0xc3, 0xd8, 0x2e, 0x29, 0xdf,
+ 0x27, 0xd1, 0xd6, 0x20, 0x96, 0x60, 0x67, 0x91, 0x69, 0x9f,
+ 0x98, 0x6e, 0x75, 0x83, 0x84, 0x72, 0x8a, 0x7c, 0x7b, 0x8d,
+ 0x4d, 0xbb, 0xbc, 0x4a, 0xb2, 0x44, 0x43, 0xb5, 0xae, 0x58,
+ 0x5f, 0xa9, 0x51, 0xa7, 0xa0, 0x56, 0x3d, 0xcb, 0xcc, 0x3a,
+ 0xc2, 0x34, 0x33, 0xc5, 0xde, 0x28, 0x2f, 0xd9, 0x21, 0xd7,
+ 0xd0, 0x26, 0xe6, 0x10, 0x17, 0xe1, 0x19, 0xef, 0xe8, 0x1e,
+ 0x05, 0xf3, 0xf4, 0x02, 0xfa, 0x0c, 0x0b, 0xfd, 0xdd, 0x2b,
+ 0x2c, 0xda, 0x22, 0xd4, 0xd3, 0x25, 0x3e, 0xc8, 0xcf, 0x39,
+ 0xc1, 0x37, 0x30, 0xc6, 0x06, 0xf0, 0xf7, 0x01, 0xf9, 0x0f,
+ 0x08, 0xfe, 0xe5, 0x13, 0x14, 0xe2, 0x1a, 0xec, 0xeb, 0x1d,
+ 0x76, 0x80, 0x87, 0x71, 0x89, 0x7f, 0x78, 0x8e, 0x95, 0x63,
+ 0x64, 0x92, 0x6a, 0x9c, 0x9b, 0x6d, 0xad, 0x5b, 0x5c, 0xaa,
+ 0x52, 0xa4, 0xa3, 0x55, 0x4e, 0xb8, 0xbf, 0x49, 0xb1, 0x47,
+ 0x40, 0xb6, 0x00, 0xf7, 0xf3, 0x04, 0xfb, 0x0c, 0x08, 0xff,
+ 0xeb, 0x1c, 0x18, 0xef, 0x10, 0xe7, 0xe3, 0x14, 0xcb, 0x3c,
+ 0x38, 0xcf, 0x30, 0xc7, 0xc3, 0x34, 0x20, 0xd7, 0xd3, 0x24,
+ 0xdb, 0x2c, 0x28, 0xdf, 0x8b, 0x7c, 0x78, 0x8f, 0x70, 0x87,
+ 0x83, 0x74, 0x60, 0x97, 0x93, 0x64, 0x9b, 0x6c, 0x68, 0x9f,
+ 0x40, 0xb7, 0xb3, 0x44, 0xbb, 0x4c, 0x48, 0xbf, 0xab, 0x5c,
+ 0x58, 0xaf, 0x50, 0xa7, 0xa3, 0x54, 0x0b, 0xfc, 0xf8, 0x0f,
+ 0xf0, 0x07, 0x03, 0xf4, 0xe0, 0x17, 0x13, 0xe4, 0x1b, 0xec,
+ 0xe8, 0x1f, 0xc0, 0x37, 0x33, 0xc4, 0x3b, 0xcc, 0xc8, 0x3f,
+ 0x2b, 0xdc, 0xd8, 0x2f, 0xd0, 0x27, 0x23, 0xd4, 0x80, 0x77,
+ 0x73, 0x84, 0x7b, 0x8c, 0x88, 0x7f, 0x6b, 0x9c, 0x98, 0x6f,
+ 0x90, 0x67, 0x63, 0x94, 0x4b, 0xbc, 0xb8, 0x4f, 0xb0, 0x47,
+ 0x43, 0xb4, 0xa0, 0x57, 0x53, 0xa4, 0x5b, 0xac, 0xa8, 0x5f,
+ 0x16, 0xe1, 0xe5, 0x12, 0xed, 0x1a, 0x1e, 0xe9, 0xfd, 0x0a,
+ 0x0e, 0xf9, 0x06, 0xf1, 0xf5, 0x02, 0xdd, 0x2a, 0x2e, 0xd9,
+ 0x26, 0xd1, 0xd5, 0x22, 0x36, 0xc1, 0xc5, 0x32, 0xcd, 0x3a,
+ 0x3e, 0xc9, 0x9d, 0x6a, 0x6e, 0x99, 0x66, 0x91, 0x95, 0x62,
+ 0x76, 0x81, 0x85, 0x72, 0x8d, 0x7a, 0x7e, 0x89, 0x56, 0xa1,
+ 0xa5, 0x52, 0xad, 0x5a, 0x5e, 0xa9, 0xbd, 0x4a, 0x4e, 0xb9,
+ 0x46, 0xb1, 0xb5, 0x42, 0x1d, 0xea, 0xee, 0x19, 0xe6, 0x11,
+ 0x15, 0xe2, 0xf6, 0x01, 0x05, 0xf2, 0x0d, 0xfa, 0xfe, 0x09,
+ 0xd6, 0x21, 0x25, 0xd2, 0x2d, 0xda, 0xde, 0x29, 0x3d, 0xca,
+ 0xce, 0x39, 0xc6, 0x31, 0x35, 0xc2, 0x96, 0x61, 0x65, 0x92,
+ 0x6d, 0x9a, 0x9e, 0x69, 0x7d, 0x8a, 0x8e, 0x79, 0x86, 0x71,
+ 0x75, 0x82, 0x5d, 0xaa, 0xae, 0x59, 0xa6, 0x51, 0x55, 0xa2,
+ 0xb6, 0x41, 0x45, 0xb2, 0x4d, 0xba, 0xbe, 0x49, 0x00, 0xf8,
+ 0xed, 0x15, 0xc7, 0x3f, 0x2a, 0xd2, 0x93, 0x6b, 0x7e, 0x86,
+ 0x54, 0xac, 0xb9, 0x41, 0x3b, 0xc3, 0xd6, 0x2e, 0xfc, 0x04,
+ 0x11, 0xe9, 0xa8, 0x50, 0x45, 0xbd, 0x6f, 0x97, 0x82, 0x7a,
+ 0x76, 0x8e, 0x9b, 0x63, 0xb1, 0x49, 0x5c, 0xa4, 0xe5, 0x1d,
+ 0x08, 0xf0, 0x22, 0xda, 0xcf, 0x37, 0x4d, 0xb5, 0xa0, 0x58,
+ 0x8a, 0x72, 0x67, 0x9f, 0xde, 0x26, 0x33, 0xcb, 0x19, 0xe1,
+ 0xf4, 0x0c, 0xec, 0x14, 0x01, 0xf9, 0x2b, 0xd3, 0xc6, 0x3e,
+ 0x7f, 0x87, 0x92, 0x6a, 0xb8, 0x40, 0x55, 0xad, 0xd7, 0x2f,
+ 0x3a, 0xc2, 0x10, 0xe8, 0xfd, 0x05, 0x44, 0xbc, 0xa9, 0x51,
+ 0x83, 0x7b, 0x6e, 0x96, 0x9a, 0x62, 0x77, 0x8f, 0x5d, 0xa5,
+ 0xb0, 0x48, 0x09, 0xf1, 0xe4, 0x1c, 0xce, 0x36, 0x23, 0xdb,
+ 0xa1, 0x59, 0x4c, 0xb4, 0x66, 0x9e, 0x8b, 0x73, 0x32, 0xca,
+ 0xdf, 0x27, 0xf5, 0x0d, 0x18, 0xe0, 0xc5, 0x3d, 0x28, 0xd0,
+ 0x02, 0xfa, 0xef, 0x17, 0x56, 0xae, 0xbb, 0x43, 0x91, 0x69,
+ 0x7c, 0x84, 0xfe, 0x06, 0x13, 0xeb, 0x39, 0xc1, 0xd4, 0x2c,
+ 0x6d, 0x95, 0x80, 0x78, 0xaa, 0x52, 0x47, 0xbf, 0xb3, 0x4b,
+ 0x5e, 0xa6, 0x74, 0x8c, 0x99, 0x61, 0x20, 0xd8, 0xcd, 0x35,
+ 0xe7, 0x1f, 0x0a, 0xf2, 0x88, 0x70, 0x65, 0x9d, 0x4f, 0xb7,
+ 0xa2, 0x5a, 0x1b, 0xe3, 0xf6, 0x0e, 0xdc, 0x24, 0x31, 0xc9,
+ 0x29, 0xd1, 0xc4, 0x3c, 0xee, 0x16, 0x03, 0xfb, 0xba, 0x42,
+ 0x57, 0xaf, 0x7d, 0x85, 0x90, 0x68, 0x12, 0xea, 0xff, 0x07,
+ 0xd5, 0x2d, 0x38, 0xc0, 0x81, 0x79, 0x6c, 0x94, 0x46, 0xbe,
+ 0xab, 0x53, 0x5f, 0xa7, 0xb2, 0x4a, 0x98, 0x60, 0x75, 0x8d,
+ 0xcc, 0x34, 0x21, 0xd9, 0x0b, 0xf3, 0xe6, 0x1e, 0x64, 0x9c,
+ 0x89, 0x71, 0xa3, 0x5b, 0x4e, 0xb6, 0xf7, 0x0f, 0x1a, 0xe2,
+ 0x30, 0xc8, 0xdd, 0x25, 0x00, 0xf9, 0xef, 0x16, 0xc3, 0x3a,
+ 0x2c, 0xd5, 0x9b, 0x62, 0x74, 0x8d, 0x58, 0xa1, 0xb7, 0x4e,
+ 0x2b, 0xd2, 0xc4, 0x3d, 0xe8, 0x11, 0x07, 0xfe, 0xb0, 0x49,
+ 0x5f, 0xa6, 0x73, 0x8a, 0x9c, 0x65, 0x56, 0xaf, 0xb9, 0x40,
+ 0x95, 0x6c, 0x7a, 0x83, 0xcd, 0x34, 0x22, 0xdb, 0x0e, 0xf7,
+ 0xe1, 0x18, 0x7d, 0x84, 0x92, 0x6b, 0xbe, 0x47, 0x51, 0xa8,
+ 0xe6, 0x1f, 0x09, 0xf0, 0x25, 0xdc, 0xca, 0x33, 0xac, 0x55,
+ 0x43, 0xba, 0x6f, 0x96, 0x80, 0x79, 0x37, 0xce, 0xd8, 0x21,
+ 0xf4, 0x0d, 0x1b, 0xe2, 0x87, 0x7e, 0x68, 0x91, 0x44, 0xbd,
+ 0xab, 0x52, 0x1c, 0xe5, 0xf3, 0x0a, 0xdf, 0x26, 0x30, 0xc9,
+ 0xfa, 0x03, 0x15, 0xec, 0x39, 0xc0, 0xd6, 0x2f, 0x61, 0x98,
+ 0x8e, 0x77, 0xa2, 0x5b, 0x4d, 0xb4, 0xd1, 0x28, 0x3e, 0xc7,
+ 0x12, 0xeb, 0xfd, 0x04, 0x4a, 0xb3, 0xa5, 0x5c, 0x89, 0x70,
+ 0x66, 0x9f, 0x45, 0xbc, 0xaa, 0x53, 0x86, 0x7f, 0x69, 0x90,
+ 0xde, 0x27, 0x31, 0xc8, 0x1d, 0xe4, 0xf2, 0x0b, 0x6e, 0x97,
+ 0x81, 0x78, 0xad, 0x54, 0x42, 0xbb, 0xf5, 0x0c, 0x1a, 0xe3,
+ 0x36, 0xcf, 0xd9, 0x20, 0x13, 0xea, 0xfc, 0x05, 0xd0, 0x29,
+ 0x3f, 0xc6, 0x88, 0x71, 0x67, 0x9e, 0x4b, 0xb2, 0xa4, 0x5d,
+ 0x38, 0xc1, 0xd7, 0x2e, 0xfb, 0x02, 0x14, 0xed, 0xa3, 0x5a,
+ 0x4c, 0xb5, 0x60, 0x99, 0x8f, 0x76, 0xe9, 0x10, 0x06, 0xff,
+ 0x2a, 0xd3, 0xc5, 0x3c, 0x72, 0x8b, 0x9d, 0x64, 0xb1, 0x48,
+ 0x5e, 0xa7, 0xc2, 0x3b, 0x2d, 0xd4, 0x01, 0xf8, 0xee, 0x17,
+ 0x59, 0xa0, 0xb6, 0x4f, 0x9a, 0x63, 0x75, 0x8c, 0xbf, 0x46,
+ 0x50, 0xa9, 0x7c, 0x85, 0x93, 0x6a, 0x24, 0xdd, 0xcb, 0x32,
+ 0xe7, 0x1e, 0x08, 0xf1, 0x94, 0x6d, 0x7b, 0x82, 0x57, 0xae,
+ 0xb8, 0x41, 0x0f, 0xf6, 0xe0, 0x19, 0xcc, 0x35, 0x23, 0xda,
+ 0x00, 0xfa, 0xe9, 0x13, 0xcf, 0x35, 0x26, 0xdc, 0x83, 0x79,
+ 0x6a, 0x90, 0x4c, 0xb6, 0xa5, 0x5f, 0x1b, 0xe1, 0xf2, 0x08,
+ 0xd4, 0x2e, 0x3d, 0xc7, 0x98, 0x62, 0x71, 0x8b, 0x57, 0xad,
+ 0xbe, 0x44, 0x36, 0xcc, 0xdf, 0x25, 0xf9, 0x03, 0x10, 0xea,
+ 0xb5, 0x4f, 0x5c, 0xa6, 0x7a, 0x80, 0x93, 0x69, 0x2d, 0xd7,
+ 0xc4, 0x3e, 0xe2, 0x18, 0x0b, 0xf1, 0xae, 0x54, 0x47, 0xbd,
+ 0x61, 0x9b, 0x88, 0x72, 0x6c, 0x96, 0x85, 0x7f, 0xa3, 0x59,
+ 0x4a, 0xb0, 0xef, 0x15, 0x06, 0xfc, 0x20, 0xda, 0xc9, 0x33,
+ 0x77, 0x8d, 0x9e, 0x64, 0xb8, 0x42, 0x51, 0xab, 0xf4, 0x0e,
+ 0x1d, 0xe7, 0x3b, 0xc1, 0xd2, 0x28, 0x5a, 0xa0, 0xb3, 0x49,
+ 0x95, 0x6f, 0x7c, 0x86, 0xd9, 0x23, 0x30, 0xca, 0x16, 0xec,
+ 0xff, 0x05, 0x41, 0xbb, 0xa8, 0x52, 0x8e, 0x74, 0x67, 0x9d,
+ 0xc2, 0x38, 0x2b, 0xd1, 0x0d, 0xf7, 0xe4, 0x1e, 0xd8, 0x22,
+ 0x31, 0xcb, 0x17, 0xed, 0xfe, 0x04, 0x5b, 0xa1, 0xb2, 0x48,
+ 0x94, 0x6e, 0x7d, 0x87, 0xc3, 0x39, 0x2a, 0xd0, 0x0c, 0xf6,
+ 0xe5, 0x1f, 0x40, 0xba, 0xa9, 0x53, 0x8f, 0x75, 0x66, 0x9c,
+ 0xee, 0x14, 0x07, 0xfd, 0x21, 0xdb, 0xc8, 0x32, 0x6d, 0x97,
+ 0x84, 0x7e, 0xa2, 0x58, 0x4b, 0xb1, 0xf5, 0x0f, 0x1c, 0xe6,
+ 0x3a, 0xc0, 0xd3, 0x29, 0x76, 0x8c, 0x9f, 0x65, 0xb9, 0x43,
+ 0x50, 0xaa, 0xb4, 0x4e, 0x5d, 0xa7, 0x7b, 0x81, 0x92, 0x68,
+ 0x37, 0xcd, 0xde, 0x24, 0xf8, 0x02, 0x11, 0xeb, 0xaf, 0x55,
+ 0x46, 0xbc, 0x60, 0x9a, 0x89, 0x73, 0x2c, 0xd6, 0xc5, 0x3f,
+ 0xe3, 0x19, 0x0a, 0xf0, 0x82, 0x78, 0x6b, 0x91, 0x4d, 0xb7,
+ 0xa4, 0x5e, 0x01, 0xfb, 0xe8, 0x12, 0xce, 0x34, 0x27, 0xdd,
+ 0x99, 0x63, 0x70, 0x8a, 0x56, 0xac, 0xbf, 0x45, 0x1a, 0xe0,
+ 0xf3, 0x09, 0xd5, 0x2f, 0x3c, 0xc6, 0x00, 0xfb, 0xeb, 0x10,
+ 0xcb, 0x30, 0x20, 0xdb, 0x8b, 0x70, 0x60, 0x9b, 0x40, 0xbb,
+ 0xab, 0x50, 0x0b, 0xf0, 0xe0, 0x1b, 0xc0, 0x3b, 0x2b, 0xd0,
+ 0x80, 0x7b, 0x6b, 0x90, 0x4b, 0xb0, 0xa0, 0x5b, 0x16, 0xed,
+ 0xfd, 0x06, 0xdd, 0x26, 0x36, 0xcd, 0x9d, 0x66, 0x76, 0x8d,
+ 0x56, 0xad, 0xbd, 0x46, 0x1d, 0xe6, 0xf6, 0x0d, 0xd6, 0x2d,
+ 0x3d, 0xc6, 0x96, 0x6d, 0x7d, 0x86, 0x5d, 0xa6, 0xb6, 0x4d,
+ 0x2c, 0xd7, 0xc7, 0x3c, 0xe7, 0x1c, 0x0c, 0xf7, 0xa7, 0x5c,
+ 0x4c, 0xb7, 0x6c, 0x97, 0x87, 0x7c, 0x27, 0xdc, 0xcc, 0x37,
+ 0xec, 0x17, 0x07, 0xfc, 0xac, 0x57, 0x47, 0xbc, 0x67, 0x9c,
+ 0x8c, 0x77, 0x3a, 0xc1, 0xd1, 0x2a, 0xf1, 0x0a, 0x1a, 0xe1,
+ 0xb1, 0x4a, 0x5a, 0xa1, 0x7a, 0x81, 0x91, 0x6a, 0x31, 0xca,
+ 0xda, 0x21, 0xfa, 0x01, 0x11, 0xea, 0xba, 0x41, 0x51, 0xaa,
+ 0x71, 0x8a, 0x9a, 0x61, 0x58, 0xa3, 0xb3, 0x48, 0x93, 0x68,
+ 0x78, 0x83, 0xd3, 0x28, 0x38, 0xc3, 0x18, 0xe3, 0xf3, 0x08,
+ 0x53, 0xa8, 0xb8, 0x43, 0x98, 0x63, 0x73, 0x88, 0xd8, 0x23,
+ 0x33, 0xc8, 0x13, 0xe8, 0xf8, 0x03, 0x4e, 0xb5, 0xa5, 0x5e,
+ 0x85, 0x7e, 0x6e, 0x95, 0xc5, 0x3e, 0x2e, 0xd5, 0x0e, 0xf5,
+ 0xe5, 0x1e, 0x45, 0xbe, 0xae, 0x55, 0x8e, 0x75, 0x65, 0x9e,
+ 0xce, 0x35, 0x25, 0xde, 0x05, 0xfe, 0xee, 0x15, 0x74, 0x8f,
+ 0x9f, 0x64, 0xbf, 0x44, 0x54, 0xaf, 0xff, 0x04, 0x14, 0xef,
+ 0x34, 0xcf, 0xdf, 0x24, 0x7f, 0x84, 0x94, 0x6f, 0xb4, 0x4f,
+ 0x5f, 0xa4, 0xf4, 0x0f, 0x1f, 0xe4, 0x3f, 0xc4, 0xd4, 0x2f,
+ 0x62, 0x99, 0x89, 0x72, 0xa9, 0x52, 0x42, 0xb9, 0xe9, 0x12,
+ 0x02, 0xf9, 0x22, 0xd9, 0xc9, 0x32, 0x69, 0x92, 0x82, 0x79,
+ 0xa2, 0x59, 0x49, 0xb2, 0xe2, 0x19, 0x09, 0xf2, 0x29, 0xd2,
+ 0xc2, 0x39, 0x00, 0xfc, 0xe5, 0x19, 0xd7, 0x2b, 0x32, 0xce,
+ 0xb3, 0x4f, 0x56, 0xaa, 0x64, 0x98, 0x81, 0x7d, 0x7b, 0x87,
+ 0x9e, 0x62, 0xac, 0x50, 0x49, 0xb5, 0xc8, 0x34, 0x2d, 0xd1,
+ 0x1f, 0xe3, 0xfa, 0x06, 0xf6, 0x0a, 0x13, 0xef, 0x21, 0xdd,
+ 0xc4, 0x38, 0x45, 0xb9, 0xa0, 0x5c, 0x92, 0x6e, 0x77, 0x8b,
+ 0x8d, 0x71, 0x68, 0x94, 0x5a, 0xa6, 0xbf, 0x43, 0x3e, 0xc2,
+ 0xdb, 0x27, 0xe9, 0x15, 0x0c, 0xf0, 0xf1, 0x0d, 0x14, 0xe8,
+ 0x26, 0xda, 0xc3, 0x3f, 0x42, 0xbe, 0xa7, 0x5b, 0x95, 0x69,
+ 0x70, 0x8c, 0x8a, 0x76, 0x6f, 0x93, 0x5d, 0xa1, 0xb8, 0x44,
+ 0x39, 0xc5, 0xdc, 0x20, 0xee, 0x12, 0x0b, 0xf7, 0x07, 0xfb,
+ 0xe2, 0x1e, 0xd0, 0x2c, 0x35, 0xc9, 0xb4, 0x48, 0x51, 0xad,
+ 0x63, 0x9f, 0x86, 0x7a, 0x7c, 0x80, 0x99, 0x65, 0xab, 0x57,
+ 0x4e, 0xb2, 0xcf, 0x33, 0x2a, 0xd6, 0x18, 0xe4, 0xfd, 0x01,
+ 0xff, 0x03, 0x1a, 0xe6, 0x28, 0xd4, 0xcd, 0x31, 0x4c, 0xb0,
+ 0xa9, 0x55, 0x9b, 0x67, 0x7e, 0x82, 0x84, 0x78, 0x61, 0x9d,
+ 0x53, 0xaf, 0xb6, 0x4a, 0x37, 0xcb, 0xd2, 0x2e, 0xe0, 0x1c,
+ 0x05, 0xf9, 0x09, 0xf5, 0xec, 0x10, 0xde, 0x22, 0x3b, 0xc7,
+ 0xba, 0x46, 0x5f, 0xa3, 0x6d, 0x91, 0x88, 0x74, 0x72, 0x8e,
+ 0x97, 0x6b, 0xa5, 0x59, 0x40, 0xbc, 0xc1, 0x3d, 0x24, 0xd8,
+ 0x16, 0xea, 0xf3, 0x0f, 0x0e, 0xf2, 0xeb, 0x17, 0xd9, 0x25,
+ 0x3c, 0xc0, 0xbd, 0x41, 0x58, 0xa4, 0x6a, 0x96, 0x8f, 0x73,
+ 0x75, 0x89, 0x90, 0x6c, 0xa2, 0x5e, 0x47, 0xbb, 0xc6, 0x3a,
+ 0x23, 0xdf, 0x11, 0xed, 0xf4, 0x08, 0xf8, 0x04, 0x1d, 0xe1,
+ 0x2f, 0xd3, 0xca, 0x36, 0x4b, 0xb7, 0xae, 0x52, 0x9c, 0x60,
+ 0x79, 0x85, 0x83, 0x7f, 0x66, 0x9a, 0x54, 0xa8, 0xb1, 0x4d,
+ 0x30, 0xcc, 0xd5, 0x29, 0xe7, 0x1b, 0x02, 0xfe, 0x00, 0xfd,
+ 0xe7, 0x1a, 0xd3, 0x2e, 0x34, 0xc9, 0xbb, 0x46, 0x5c, 0xa1,
+ 0x68, 0x95, 0x8f, 0x72, 0x6b, 0x96, 0x8c, 0x71, 0xb8, 0x45,
+ 0x5f, 0xa2, 0xd0, 0x2d, 0x37, 0xca, 0x03, 0xfe, 0xe4, 0x19,
+ 0xd6, 0x2b, 0x31, 0xcc, 0x05, 0xf8, 0xe2, 0x1f, 0x6d, 0x90,
+ 0x8a, 0x77, 0xbe, 0x43, 0x59, 0xa4, 0xbd, 0x40, 0x5a, 0xa7,
+ 0x6e, 0x93, 0x89, 0x74, 0x06, 0xfb, 0xe1, 0x1c, 0xd5, 0x28,
+ 0x32, 0xcf, 0xb1, 0x4c, 0x56, 0xab, 0x62, 0x9f, 0x85, 0x78,
+ 0x0a, 0xf7, 0xed, 0x10, 0xd9, 0x24, 0x3e, 0xc3, 0xda, 0x27,
+ 0x3d, 0xc0, 0x09, 0xf4, 0xee, 0x13, 0x61, 0x9c, 0x86, 0x7b,
+ 0xb2, 0x4f, 0x55, 0xa8, 0x67, 0x9a, 0x80, 0x7d, 0xb4, 0x49,
+ 0x53, 0xae, 0xdc, 0x21, 0x3b, 0xc6, 0x0f, 0xf2, 0xe8, 0x15,
+ 0x0c, 0xf1, 0xeb, 0x16, 0xdf, 0x22, 0x38, 0xc5, 0xb7, 0x4a,
+ 0x50, 0xad, 0x64, 0x99, 0x83, 0x7e, 0x7f, 0x82, 0x98, 0x65,
+ 0xac, 0x51, 0x4b, 0xb6, 0xc4, 0x39, 0x23, 0xde, 0x17, 0xea,
+ 0xf0, 0x0d, 0x14, 0xe9, 0xf3, 0x0e, 0xc7, 0x3a, 0x20, 0xdd,
+ 0xaf, 0x52, 0x48, 0xb5, 0x7c, 0x81, 0x9b, 0x66, 0xa9, 0x54,
+ 0x4e, 0xb3, 0x7a, 0x87, 0x9d, 0x60, 0x12, 0xef, 0xf5, 0x08,
+ 0xc1, 0x3c, 0x26, 0xdb, 0xc2, 0x3f, 0x25, 0xd8, 0x11, 0xec,
+ 0xf6, 0x0b, 0x79, 0x84, 0x9e, 0x63, 0xaa, 0x57, 0x4d, 0xb0,
+ 0xce, 0x33, 0x29, 0xd4, 0x1d, 0xe0, 0xfa, 0x07, 0x75, 0x88,
+ 0x92, 0x6f, 0xa6, 0x5b, 0x41, 0xbc, 0xa5, 0x58, 0x42, 0xbf,
+ 0x76, 0x8b, 0x91, 0x6c, 0x1e, 0xe3, 0xf9, 0x04, 0xcd, 0x30,
+ 0x2a, 0xd7, 0x18, 0xe5, 0xff, 0x02, 0xcb, 0x36, 0x2c, 0xd1,
+ 0xa3, 0x5e, 0x44, 0xb9, 0x70, 0x8d, 0x97, 0x6a, 0x73, 0x8e,
+ 0x94, 0x69, 0xa0, 0x5d, 0x47, 0xba, 0xc8, 0x35, 0x2f, 0xd2,
+ 0x1b, 0xe6, 0xfc, 0x01, 0x00, 0xfe, 0xe1, 0x1f, 0xdf, 0x21,
+ 0x3e, 0xc0, 0xa3, 0x5d, 0x42, 0xbc, 0x7c, 0x82, 0x9d, 0x63,
+ 0x5b, 0xa5, 0xba, 0x44, 0x84, 0x7a, 0x65, 0x9b, 0xf8, 0x06,
+ 0x19, 0xe7, 0x27, 0xd9, 0xc6, 0x38, 0xb6, 0x48, 0x57, 0xa9,
+ 0x69, 0x97, 0x88, 0x76, 0x15, 0xeb, 0xf4, 0x0a, 0xca, 0x34,
+ 0x2b, 0xd5, 0xed, 0x13, 0x0c, 0xf2, 0x32, 0xcc, 0xd3, 0x2d,
+ 0x4e, 0xb0, 0xaf, 0x51, 0x91, 0x6f, 0x70, 0x8e, 0x71, 0x8f,
+ 0x90, 0x6e, 0xae, 0x50, 0x4f, 0xb1, 0xd2, 0x2c, 0x33, 0xcd,
+ 0x0d, 0xf3, 0xec, 0x12, 0x2a, 0xd4, 0xcb, 0x35, 0xf5, 0x0b,
+ 0x14, 0xea, 0x89, 0x77, 0x68, 0x96, 0x56, 0xa8, 0xb7, 0x49,
+ 0xc7, 0x39, 0x26, 0xd8, 0x18, 0xe6, 0xf9, 0x07, 0x64, 0x9a,
+ 0x85, 0x7b, 0xbb, 0x45, 0x5a, 0xa4, 0x9c, 0x62, 0x7d, 0x83,
+ 0x43, 0xbd, 0xa2, 0x5c, 0x3f, 0xc1, 0xde, 0x20, 0xe0, 0x1e,
+ 0x01, 0xff, 0xe2, 0x1c, 0x03, 0xfd, 0x3d, 0xc3, 0xdc, 0x22,
+ 0x41, 0xbf, 0xa0, 0x5e, 0x9e, 0x60, 0x7f, 0x81, 0xb9, 0x47,
+ 0x58, 0xa6, 0x66, 0x98, 0x87, 0x79, 0x1a, 0xe4, 0xfb, 0x05,
+ 0xc5, 0x3b, 0x24, 0xda, 0x54, 0xaa, 0xb5, 0x4b, 0x8b, 0x75,
+ 0x6a, 0x94, 0xf7, 0x09, 0x16, 0xe8, 0x28, 0xd6, 0xc9, 0x37,
+ 0x0f, 0xf1, 0xee, 0x10, 0xd0, 0x2e, 0x31, 0xcf, 0xac, 0x52,
+ 0x4d, 0xb3, 0x73, 0x8d, 0x92, 0x6c, 0x93, 0x6d, 0x72, 0x8c,
+ 0x4c, 0xb2, 0xad, 0x53, 0x30, 0xce, 0xd1, 0x2f, 0xef, 0x11,
+ 0x0e, 0xf0, 0xc8, 0x36, 0x29, 0xd7, 0x17, 0xe9, 0xf6, 0x08,
+ 0x6b, 0x95, 0x8a, 0x74, 0xb4, 0x4a, 0x55, 0xab, 0x25, 0xdb,
+ 0xc4, 0x3a, 0xfa, 0x04, 0x1b, 0xe5, 0x86, 0x78, 0x67, 0x99,
+ 0x59, 0xa7, 0xb8, 0x46, 0x7e, 0x80, 0x9f, 0x61, 0xa1, 0x5f,
+ 0x40, 0xbe, 0xdd, 0x23, 0x3c, 0xc2, 0x02, 0xfc, 0xe3, 0x1d,
+ 0x00, 0xff, 0xe3, 0x1c, 0xdb, 0x24, 0x38, 0xc7, 0xab, 0x54,
+ 0x48, 0xb7, 0x70, 0x8f, 0x93, 0x6c, 0x4b, 0xb4, 0xa8, 0x57,
+ 0x90, 0x6f, 0x73, 0x8c, 0xe0, 0x1f, 0x03, 0xfc, 0x3b, 0xc4,
+ 0xd8, 0x27, 0x96, 0x69, 0x75, 0x8a, 0x4d, 0xb2, 0xae, 0x51,
+ 0x3d, 0xc2, 0xde, 0x21, 0xe6, 0x19, 0x05, 0xfa, 0xdd, 0x22,
+ 0x3e, 0xc1, 0x06, 0xf9, 0xe5, 0x1a, 0x76, 0x89, 0x95, 0x6a,
+ 0xad, 0x52, 0x4e, 0xb1, 0x31, 0xce, 0xd2, 0x2d, 0xea, 0x15,
+ 0x09, 0xf6, 0x9a, 0x65, 0x79, 0x86, 0x41, 0xbe, 0xa2, 0x5d,
+ 0x7a, 0x85, 0x99, 0x66, 0xa1, 0x5e, 0x42, 0xbd, 0xd1, 0x2e,
+ 0x32, 0xcd, 0x0a, 0xf5, 0xe9, 0x16, 0xa7, 0x58, 0x44, 0xbb,
+ 0x7c, 0x83, 0x9f, 0x60, 0x0c, 0xf3, 0xef, 0x10, 0xd7, 0x28,
+ 0x34, 0xcb, 0xec, 0x13, 0x0f, 0xf0, 0x37, 0xc8, 0xd4, 0x2b,
+ 0x47, 0xb8, 0xa4, 0x5b, 0x9c, 0x63, 0x7f, 0x80, 0x62, 0x9d,
+ 0x81, 0x7e, 0xb9, 0x46, 0x5a, 0xa5, 0xc9, 0x36, 0x2a, 0xd5,
+ 0x12, 0xed, 0xf1, 0x0e, 0x29, 0xd6, 0xca, 0x35, 0xf2, 0x0d,
+ 0x11, 0xee, 0x82, 0x7d, 0x61, 0x9e, 0x59, 0xa6, 0xba, 0x45,
+ 0xf4, 0x0b, 0x17, 0xe8, 0x2f, 0xd0, 0xcc, 0x33, 0x5f, 0xa0,
+ 0xbc, 0x43, 0x84, 0x7b, 0x67, 0x98, 0xbf, 0x40, 0x5c, 0xa3,
+ 0x64, 0x9b, 0x87, 0x78, 0x14, 0xeb, 0xf7, 0x08, 0xcf, 0x30,
+ 0x2c, 0xd3, 0x53, 0xac, 0xb0, 0x4f, 0x88, 0x77, 0x6b, 0x94,
+ 0xf8, 0x07, 0x1b, 0xe4, 0x23, 0xdc, 0xc0, 0x3f, 0x18, 0xe7,
+ 0xfb, 0x04, 0xc3, 0x3c, 0x20, 0xdf, 0xb3, 0x4c, 0x50, 0xaf,
+ 0x68, 0x97, 0x8b, 0x74, 0xc5, 0x3a, 0x26, 0xd9, 0x1e, 0xe1,
+ 0xfd, 0x02, 0x6e, 0x91, 0x8d, 0x72, 0xb5, 0x4a, 0x56, 0xa9,
+ 0x8e, 0x71, 0x6d, 0x92, 0x55, 0xaa, 0xb6, 0x49, 0x25, 0xda,
+ 0xc6, 0x39, 0xfe, 0x01, 0x1d, 0xe2
+};
+
+static const unsigned char gf_inv_table_base[] = {
+ 0x00, 0x01, 0x8e, 0xf4, 0x47, 0xa7, 0x7a, 0xba, 0xad, 0x9d,
+ 0xdd, 0x98, 0x3d, 0xaa, 0x5d, 0x96, 0xd8, 0x72, 0xc0, 0x58,
+ 0xe0, 0x3e, 0x4c, 0x66, 0x90, 0xde, 0x55, 0x80, 0xa0, 0x83,
+ 0x4b, 0x2a, 0x6c, 0xed, 0x39, 0x51, 0x60, 0x56, 0x2c, 0x8a,
+ 0x70, 0xd0, 0x1f, 0x4a, 0x26, 0x8b, 0x33, 0x6e, 0x48, 0x89,
+ 0x6f, 0x2e, 0xa4, 0xc3, 0x40, 0x5e, 0x50, 0x22, 0xcf, 0xa9,
+ 0xab, 0x0c, 0x15, 0xe1, 0x36, 0x5f, 0xf8, 0xd5, 0x92, 0x4e,
+ 0xa6, 0x04, 0x30, 0x88, 0x2b, 0x1e, 0x16, 0x67, 0x45, 0x93,
+ 0x38, 0x23, 0x68, 0x8c, 0x81, 0x1a, 0x25, 0x61, 0x13, 0xc1,
+ 0xcb, 0x63, 0x97, 0x0e, 0x37, 0x41, 0x24, 0x57, 0xca, 0x5b,
+ 0xb9, 0xc4, 0x17, 0x4d, 0x52, 0x8d, 0xef, 0xb3, 0x20, 0xec,
+ 0x2f, 0x32, 0x28, 0xd1, 0x11, 0xd9, 0xe9, 0xfb, 0xda, 0x79,
+ 0xdb, 0x77, 0x06, 0xbb, 0x84, 0xcd, 0xfe, 0xfc, 0x1b, 0x54,
+ 0xa1, 0x1d, 0x7c, 0xcc, 0xe4, 0xb0, 0x49, 0x31, 0x27, 0x2d,
+ 0x53, 0x69, 0x02, 0xf5, 0x18, 0xdf, 0x44, 0x4f, 0x9b, 0xbc,
+ 0x0f, 0x5c, 0x0b, 0xdc, 0xbd, 0x94, 0xac, 0x09, 0xc7, 0xa2,
+ 0x1c, 0x82, 0x9f, 0xc6, 0x34, 0xc2, 0x46, 0x05, 0xce, 0x3b,
+ 0x0d, 0x3c, 0x9c, 0x08, 0xbe, 0xb7, 0x87, 0xe5, 0xee, 0x6b,
+ 0xeb, 0xf2, 0xbf, 0xaf, 0xc5, 0x64, 0x07, 0x7b, 0x95, 0x9a,
+ 0xae, 0xb6, 0x12, 0x59, 0xa5, 0x35, 0x65, 0xb8, 0xa3, 0x9e,
+ 0xd2, 0xf7, 0x62, 0x5a, 0x85, 0x7d, 0xa8, 0x3a, 0x29, 0x71,
+ 0xc8, 0xf6, 0xf9, 0x43, 0xd7, 0xd6, 0x10, 0x73, 0x76, 0x78,
+ 0x99, 0x0a, 0x19, 0x91, 0x14, 0x3f, 0xe6, 0xf0, 0x86, 0xb1,
+ 0xe2, 0xf1, 0xfa, 0x74, 0xf3, 0xb4, 0x6d, 0x21, 0xb2, 0x6a,
+ 0xe3, 0xe7, 0xb5, 0xea, 0x03, 0x8f, 0xd3, 0xc9, 0x42, 0xd4,
+ 0xe8, 0x75, 0x7f, 0xff, 0x7e, 0xfd
+};
+#endif // GF_LARGE_TABLES
+
+#endif //_EC_BASE_H_
diff --git a/src/isa-l/erasure_code/ec_base_aliases.c b/src/isa-l/erasure_code/ec_base_aliases.c
new file mode 100644
index 000000000..d046ff61a
--- /dev/null
+++ b/src/isa-l/erasure_code/ec_base_aliases.c
@@ -0,0 +1,61 @@
+/**********************************************************************
+ Copyright(c) 2011-2017 Intel Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+#include "erasure_code.h"
+
+void gf_vect_dot_prod(int len, int vlen, unsigned char *v,
+ unsigned char **src, unsigned char *dest)
+{
+ gf_vect_dot_prod_base(len, vlen, v, src, dest);
+}
+
+void gf_vect_mad(int len, int vec, int vec_i,
+ unsigned char *v, unsigned char *src, unsigned char *dest)
+{
+ gf_vect_mad_base(len, vec, vec_i, v, src, dest);
+
+}
+
+void ec_encode_data(int len, int srcs, int dests, unsigned char *v,
+ unsigned char **src, unsigned char **dest)
+{
+ ec_encode_data_base(len, srcs, dests, v, src, dest);
+}
+
+void ec_encode_data_update(int len, int k, int rows, int vec_i, unsigned char *v,
+ unsigned char *data, unsigned char **dest)
+{
+ ec_encode_data_update_base(len, k, rows, vec_i, v, data, dest);
+}
+
+int gf_vect_mul(int len, unsigned char *a, void *src, void *dest)
+{
+ gf_vect_mul_base(len, a, (unsigned char *)src, (unsigned char *)dest);
+ return 0;
+}
diff --git a/src/isa-l/erasure_code/ec_highlevel_func.c b/src/isa-l/erasure_code/ec_highlevel_func.c
new file mode 100644
index 000000000..a9fe6abb5
--- /dev/null
+++ b/src/isa-l/erasure_code/ec_highlevel_func.c
@@ -0,0 +1,374 @@
+/**********************************************************************
+ Copyright(c) 2011-2019 Intel Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+#include <limits.h>
+#include "erasure_code.h"
+
+#if __x86_64__ || __i386__ || _M_X64 || _M_IX86
+void ec_encode_data_sse(int len, int k, int rows, unsigned char *g_tbls, unsigned char **data,
+ unsigned char **coding)
+{
+
+ if (len < 16) {
+ ec_encode_data_base(len, k, rows, g_tbls, data, coding);
+ return;
+ }
+
+ while (rows >= 6) {
+ gf_6vect_dot_prod_sse(len, k, g_tbls, data, coding);
+ g_tbls += 6 * k * 32;
+ coding += 6;
+ rows -= 6;
+ }
+ switch (rows) {
+ case 5:
+ gf_5vect_dot_prod_sse(len, k, g_tbls, data, coding);
+ break;
+ case 4:
+ gf_4vect_dot_prod_sse(len, k, g_tbls, data, coding);
+ break;
+ case 3:
+ gf_3vect_dot_prod_sse(len, k, g_tbls, data, coding);
+ break;
+ case 2:
+ gf_2vect_dot_prod_sse(len, k, g_tbls, data, coding);
+ break;
+ case 1:
+ gf_vect_dot_prod_sse(len, k, g_tbls, data, *coding);
+ break;
+ case 0:
+ break;
+ }
+
+}
+
+void ec_encode_data_avx(int len, int k, int rows, unsigned char *g_tbls, unsigned char **data,
+ unsigned char **coding)
+{
+ if (len < 16) {
+ ec_encode_data_base(len, k, rows, g_tbls, data, coding);
+ return;
+ }
+
+ while (rows >= 6) {
+ gf_6vect_dot_prod_avx(len, k, g_tbls, data, coding);
+ g_tbls += 6 * k * 32;
+ coding += 6;
+ rows -= 6;
+ }
+ switch (rows) {
+ case 5:
+ gf_5vect_dot_prod_avx(len, k, g_tbls, data, coding);
+ break;
+ case 4:
+ gf_4vect_dot_prod_avx(len, k, g_tbls, data, coding);
+ break;
+ case 3:
+ gf_3vect_dot_prod_avx(len, k, g_tbls, data, coding);
+ break;
+ case 2:
+ gf_2vect_dot_prod_avx(len, k, g_tbls, data, coding);
+ break;
+ case 1:
+ gf_vect_dot_prod_avx(len, k, g_tbls, data, *coding);
+ break;
+ case 0:
+ break;
+ }
+
+}
+
+void ec_encode_data_avx2(int len, int k, int rows, unsigned char *g_tbls, unsigned char **data,
+ unsigned char **coding)
+{
+
+ if (len < 32) {
+ ec_encode_data_base(len, k, rows, g_tbls, data, coding);
+ return;
+ }
+
+ while (rows >= 6) {
+ gf_6vect_dot_prod_avx2(len, k, g_tbls, data, coding);
+ g_tbls += 6 * k * 32;
+ coding += 6;
+ rows -= 6;
+ }
+ switch (rows) {
+ case 5:
+ gf_5vect_dot_prod_avx2(len, k, g_tbls, data, coding);
+ break;
+ case 4:
+ gf_4vect_dot_prod_avx2(len, k, g_tbls, data, coding);
+ break;
+ case 3:
+ gf_3vect_dot_prod_avx2(len, k, g_tbls, data, coding);
+ break;
+ case 2:
+ gf_2vect_dot_prod_avx2(len, k, g_tbls, data, coding);
+ break;
+ case 1:
+ gf_vect_dot_prod_avx2(len, k, g_tbls, data, *coding);
+ break;
+ case 0:
+ break;
+ }
+
+}
+
+#ifdef HAVE_AS_KNOWS_AVX512
+
+extern int gf_vect_dot_prod_avx512(int len, int k, unsigned char *g_tbls, unsigned char **data,
+ unsigned char *dest);
+extern int gf_2vect_dot_prod_avx512(int len, int k, unsigned char *g_tbls,
+ unsigned char **data, unsigned char **coding);
+extern int gf_3vect_dot_prod_avx512(int len, int k, unsigned char *g_tbls,
+ unsigned char **data, unsigned char **coding);
+extern int gf_4vect_dot_prod_avx512(int len, int k, unsigned char *g_tbls,
+ unsigned char **data, unsigned char **coding);
+extern int gf_5vect_dot_prod_avx512(int len, int k, unsigned char *g_tbls,
+ unsigned char **data, unsigned char **coding);
+extern int gf_6vect_dot_prod_avx512(int len, int k, unsigned char *g_tbls,
+ unsigned char **data, unsigned char **coding);
+extern void gf_vect_mad_avx512(int len, int vec, int vec_i, unsigned char *gftbls,
+ unsigned char *src, unsigned char *dest);
+extern void gf_2vect_mad_avx512(int len, int vec, int vec_i, unsigned char *gftbls,
+ unsigned char *src, unsigned char **dest);
+extern void gf_3vect_mad_avx512(int len, int vec, int vec_i, unsigned char *gftbls,
+ unsigned char *src, unsigned char **dest);
+extern void gf_4vect_mad_avx512(int len, int vec, int vec_i, unsigned char *gftbls,
+ unsigned char *src, unsigned char **dest);
+extern void gf_5vect_mad_avx512(int len, int vec, int vec_i, unsigned char *gftbls,
+ unsigned char *src, unsigned char **dest);
+extern void gf_6vect_mad_avx512(int len, int vec, int vec_i, unsigned char *gftbls,
+ unsigned char *src, unsigned char **dest);
+
+void ec_encode_data_avx512(int len, int k, int rows, unsigned char *g_tbls,
+ unsigned char **data, unsigned char **coding)
+{
+
+ if (len < 64) {
+ ec_encode_data_base(len, k, rows, g_tbls, data, coding);
+ return;
+ }
+
+ while (rows >= 6) {
+ gf_6vect_dot_prod_avx512(len, k, g_tbls, data, coding);
+ g_tbls += 6 * k * 32;
+ coding += 6;
+ rows -= 6;
+ }
+ switch (rows) {
+ case 5:
+ gf_5vect_dot_prod_avx512(len, k, g_tbls, data, coding);
+ break;
+ case 4:
+ gf_4vect_dot_prod_avx512(len, k, g_tbls, data, coding);
+ break;
+ case 3:
+ gf_3vect_dot_prod_avx512(len, k, g_tbls, data, coding);
+ break;
+ case 2:
+ gf_2vect_dot_prod_avx512(len, k, g_tbls, data, coding);
+ break;
+ case 1:
+ gf_vect_dot_prod_avx512(len, k, g_tbls, data, *coding);
+ break;
+ case 0:
+ break;
+ }
+}
+
+void ec_encode_data_update_avx512(int len, int k, int rows, int vec_i, unsigned char *g_tbls,
+ unsigned char *data, unsigned char **coding)
+{
+ if (len < 64) {
+ ec_encode_data_update_base(len, k, rows, vec_i, g_tbls, data, coding);
+ return;
+ }
+
+ while (rows >= 6) {
+ gf_6vect_mad_avx512(len, k, vec_i, g_tbls, data, coding);
+ g_tbls += 6 * k * 32;
+ coding += 6;
+ rows -= 6;
+ }
+ switch (rows) {
+ case 5:
+ gf_5vect_mad_avx512(len, k, vec_i, g_tbls, data, coding);
+ break;
+ case 4:
+ gf_4vect_mad_avx512(len, k, vec_i, g_tbls, data, coding);
+ break;
+ case 3:
+ gf_3vect_mad_avx512(len, k, vec_i, g_tbls, data, coding);
+ break;
+ case 2:
+ gf_2vect_mad_avx512(len, k, vec_i, g_tbls, data, coding);
+ break;
+ case 1:
+ gf_vect_mad_avx512(len, k, vec_i, g_tbls, data, *coding);
+ break;
+ case 0:
+ break;
+ }
+}
+
+#endif // HAVE_AS_KNOWS_AVX512
+
+#if __WORDSIZE == 64 || _WIN64 || __x86_64__
+
+void ec_encode_data_update_sse(int len, int k, int rows, int vec_i, unsigned char *g_tbls,
+ unsigned char *data, unsigned char **coding)
+{
+ if (len < 16) {
+ ec_encode_data_update_base(len, k, rows, vec_i, g_tbls, data, coding);
+ return;
+ }
+
+ while (rows > 6) {
+ gf_6vect_mad_sse(len, k, vec_i, g_tbls, data, coding);
+ g_tbls += 6 * k * 32;
+ coding += 6;
+ rows -= 6;
+ }
+ switch (rows) {
+ case 6:
+ gf_6vect_mad_sse(len, k, vec_i, g_tbls, data, coding);
+ break;
+ case 5:
+ gf_5vect_mad_sse(len, k, vec_i, g_tbls, data, coding);
+ break;
+ case 4:
+ gf_4vect_mad_sse(len, k, vec_i, g_tbls, data, coding);
+ break;
+ case 3:
+ gf_3vect_mad_sse(len, k, vec_i, g_tbls, data, coding);
+ break;
+ case 2:
+ gf_2vect_mad_sse(len, k, vec_i, g_tbls, data, coding);
+ break;
+ case 1:
+ gf_vect_mad_sse(len, k, vec_i, g_tbls, data, *coding);
+ break;
+ case 0:
+ break;
+ }
+
+}
+
+void ec_encode_data_update_avx(int len, int k, int rows, int vec_i, unsigned char *g_tbls,
+ unsigned char *data, unsigned char **coding)
+{
+ if (len < 16) {
+ ec_encode_data_update_base(len, k, rows, vec_i, g_tbls, data, coding);
+ return;
+ }
+ while (rows > 6) {
+ gf_6vect_mad_avx(len, k, vec_i, g_tbls, data, coding);
+ g_tbls += 6 * k * 32;
+ coding += 6;
+ rows -= 6;
+ }
+ switch (rows) {
+ case 6:
+ gf_6vect_mad_avx(len, k, vec_i, g_tbls, data, coding);
+ break;
+ case 5:
+ gf_5vect_mad_avx(len, k, vec_i, g_tbls, data, coding);
+ break;
+ case 4:
+ gf_4vect_mad_avx(len, k, vec_i, g_tbls, data, coding);
+ break;
+ case 3:
+ gf_3vect_mad_avx(len, k, vec_i, g_tbls, data, coding);
+ break;
+ case 2:
+ gf_2vect_mad_avx(len, k, vec_i, g_tbls, data, coding);
+ break;
+ case 1:
+ gf_vect_mad_avx(len, k, vec_i, g_tbls, data, *coding);
+ break;
+ case 0:
+ break;
+ }
+
+}
+
+void ec_encode_data_update_avx2(int len, int k, int rows, int vec_i, unsigned char *g_tbls,
+ unsigned char *data, unsigned char **coding)
+{
+ if (len < 32) {
+ ec_encode_data_update_base(len, k, rows, vec_i, g_tbls, data, coding);
+ return;
+ }
+ while (rows > 6) {
+ gf_6vect_mad_avx2(len, k, vec_i, g_tbls, data, coding);
+ g_tbls += 6 * k * 32;
+ coding += 6;
+ rows -= 6;
+ }
+ switch (rows) {
+ case 6:
+ gf_6vect_mad_avx2(len, k, vec_i, g_tbls, data, coding);
+ break;
+ case 5:
+ gf_5vect_mad_avx2(len, k, vec_i, g_tbls, data, coding);
+ break;
+ case 4:
+ gf_4vect_mad_avx2(len, k, vec_i, g_tbls, data, coding);
+ break;
+ case 3:
+ gf_3vect_mad_avx2(len, k, vec_i, g_tbls, data, coding);
+ break;
+ case 2:
+ gf_2vect_mad_avx2(len, k, vec_i, g_tbls, data, coding);
+ break;
+ case 1:
+ gf_vect_mad_avx2(len, k, vec_i, g_tbls, data, *coding);
+ break;
+ case 0:
+ break;
+ }
+
+}
+
+#endif //__WORDSIZE == 64 || _WIN64 || __x86_64__
+#endif //__x86_64__ || __i386__ || _M_X64 || _M_IX86
+
+struct slver {
+ unsigned short snum;
+ unsigned char ver;
+ unsigned char core;
+};
+
+// Version info
+struct slver ec_init_tables_slver_00010068;
+struct slver ec_init_tables_slver = { 0x0068, 0x01, 0x00 };
+
+struct slver ec_encode_data_sse_slver_00020069;
+struct slver ec_encode_data_sse_slver = { 0x0069, 0x02, 0x00 };
diff --git a/src/isa-l/erasure_code/ec_multibinary.asm b/src/isa-l/erasure_code/ec_multibinary.asm
new file mode 100644
index 000000000..a07f45d6f
--- /dev/null
+++ b/src/isa-l/erasure_code/ec_multibinary.asm
@@ -0,0 +1,95 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright(c) 2011-2015 Intel Corporation All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in
+; the documentation and/or other materials provided with the
+; distribution.
+; * Neither the name of Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived
+; from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+%include "reg_sizes.asm"
+%include "multibinary.asm"
+
+%ifidn __OUTPUT_FORMAT__, elf32
+ [bits 32]
+%else
+ default rel
+ [bits 64]
+
+ extern ec_encode_data_update_sse
+ extern ec_encode_data_update_avx
+ extern ec_encode_data_update_avx2
+%ifdef HAVE_AS_KNOWS_AVX512
+ extern ec_encode_data_avx512
+ extern gf_vect_dot_prod_avx512
+ extern ec_encode_data_update_avx512
+ extern gf_vect_mad_avx512
+%endif
+ extern gf_vect_mul_sse
+ extern gf_vect_mul_avx
+
+ extern gf_vect_mad_sse
+ extern gf_vect_mad_avx
+ extern gf_vect_mad_avx2
+%endif
+
+extern gf_vect_mul_base
+extern ec_encode_data_base
+extern ec_encode_data_update_base
+extern gf_vect_dot_prod_base
+extern gf_vect_mad_base
+
+extern gf_vect_dot_prod_sse
+extern gf_vect_dot_prod_avx
+extern gf_vect_dot_prod_avx2
+extern ec_encode_data_sse
+extern ec_encode_data_avx
+extern ec_encode_data_avx2
+
+mbin_interface ec_encode_data
+mbin_interface gf_vect_dot_prod
+mbin_interface gf_vect_mul
+mbin_interface ec_encode_data_update
+mbin_interface gf_vect_mad
+
+%ifidn __OUTPUT_FORMAT__, elf32
+ mbin_dispatch_init5 ec_encode_data, ec_encode_data_base, ec_encode_data_sse, ec_encode_data_avx, ec_encode_data_avx2
+ mbin_dispatch_init5 gf_vect_dot_prod, gf_vect_dot_prod_base, gf_vect_dot_prod_sse, gf_vect_dot_prod_avx, gf_vect_dot_prod_avx2
+ mbin_dispatch_init2 gf_vect_mul, gf_vect_mul_base
+ mbin_dispatch_init2 ec_encode_data_update, ec_encode_data_update_base
+ mbin_dispatch_init2 gf_vect_mad, gf_vect_mad_base
+%else
+
+ mbin_dispatch_init5 gf_vect_mul, gf_vect_mul_base, gf_vect_mul_sse, gf_vect_mul_avx, gf_vect_mul_avx
+ mbin_dispatch_init6 ec_encode_data, ec_encode_data_base, ec_encode_data_sse, ec_encode_data_avx, ec_encode_data_avx2, ec_encode_data_avx512
+ mbin_dispatch_init6 ec_encode_data_update, ec_encode_data_update_base, ec_encode_data_update_sse, ec_encode_data_update_avx, ec_encode_data_update_avx2, ec_encode_data_update_avx512
+ mbin_dispatch_init6 gf_vect_mad, gf_vect_mad_base, gf_vect_mad_sse, gf_vect_mad_avx, gf_vect_mad_avx2, gf_vect_mad_avx512
+ mbin_dispatch_init6 gf_vect_dot_prod, gf_vect_dot_prod_base, gf_vect_dot_prod_sse, gf_vect_dot_prod_avx, gf_vect_dot_prod_avx2, gf_vect_dot_prod_avx512
+%endif
+
+;;; func core, ver, snum
+slversion ec_encode_data, 00, 06, 0133
+slversion gf_vect_mul, 00, 05, 0134
+slversion ec_encode_data_update, 00, 05, 0212
+slversion gf_vect_dot_prod, 00, 05, 0138
+slversion gf_vect_mad, 00, 04, 0213
diff --git a/src/isa-l/erasure_code/erasure_code_base_perf.c b/src/isa-l/erasure_code/erasure_code_base_perf.c
new file mode 100644
index 000000000..9587788d8
--- /dev/null
+++ b/src/isa-l/erasure_code/erasure_code_base_perf.c
@@ -0,0 +1,176 @@
+/**********************************************************************
+ Copyright(c) 2011-2015 Intel Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h> // for memset, memcmp
+#include "erasure_code.h"
+#include "test.h"
+
+//#define CACHED_TEST
+#ifdef CACHED_TEST
+// Cached test, loop many times over small dataset
+# define TEST_SOURCES 32
+# define TEST_LEN(m) ((128*1024 / m) & ~(64-1))
+# define TEST_TYPE_STR "_warm"
+#else
+# ifndef TEST_CUSTOM
+// Uncached test. Pull from large mem base.
+# define TEST_SOURCES 32
+# define GT_L3_CACHE 32*1024*1024 /* some number > last level cache */
+# define TEST_LEN(m) ((GT_L3_CACHE / m) & ~(64-1))
+# define TEST_TYPE_STR "_cold"
+# else
+# define TEST_TYPE_STR "_cus"
+# endif
+#endif
+
+#define MMAX TEST_SOURCES
+#define KMAX TEST_SOURCES
+
+#define BAD_MATRIX -1
+
+typedef unsigned char u8;
+
+void ec_encode_perf(int m, int k, u8 * a, u8 * g_tbls, u8 ** buffs)
+{
+ ec_init_tables(k, m - k, &a[k * k], g_tbls);
+ ec_encode_data_base(TEST_LEN(m), k, m - k, g_tbls, buffs, &buffs[k]);
+}
+
+int ec_decode_perf(int m, int k, u8 * a, u8 * g_tbls, u8 ** buffs, u8 * src_in_err,
+ u8 * src_err_list, int nerrs, u8 ** temp_buffs)
+{
+ int i, j, r;
+ u8 b[MMAX * KMAX], c[MMAX * KMAX], d[MMAX * KMAX];
+ u8 *recov[TEST_SOURCES];
+
+ // Construct b by removing error rows
+ for (i = 0, r = 0; i < k; i++, r++) {
+ while (src_in_err[r])
+ r++;
+ recov[i] = buffs[r];
+ for (j = 0; j < k; j++)
+ b[k * i + j] = a[k * r + j];
+ }
+
+ if (gf_invert_matrix(b, d, k) < 0)
+ return BAD_MATRIX;
+
+ for (i = 0; i < nerrs; i++)
+ for (j = 0; j < k; j++)
+ c[k * i + j] = d[k * src_err_list[i] + j];
+
+ // Recover data
+ ec_init_tables(k, nerrs, c, g_tbls);
+ ec_encode_data_base(TEST_LEN(m), k, nerrs, g_tbls, recov, temp_buffs);
+
+ return 0;
+}
+
+int main(int argc, char *argv[])
+{
+ int i, j, m, k, nerrs, check;
+ void *buf;
+ u8 *temp_buffs[TEST_SOURCES], *buffs[TEST_SOURCES];
+ u8 a[MMAX * KMAX];
+ u8 g_tbls[KMAX * TEST_SOURCES * 32], src_in_err[TEST_SOURCES];
+ u8 src_err_list[TEST_SOURCES];
+ struct perf start;
+
+ // Pick test parameters
+ m = 14;
+ k = 10;
+ nerrs = 4;
+ const u8 err_list[] = { 2, 4, 5, 7 };
+
+ printf("erasure_code_base_perf: %dx%d %d\n", m, TEST_LEN(m), nerrs);
+
+ if (m > MMAX || k > KMAX || nerrs > (m - k)) {
+ printf(" Input test parameter error\n");
+ return -1;
+ }
+
+ memcpy(src_err_list, err_list, nerrs);
+ memset(src_in_err, 0, TEST_SOURCES);
+ for (i = 0; i < nerrs; i++)
+ src_in_err[src_err_list[i]] = 1;
+
+ // Allocate the arrays
+ for (i = 0; i < m; i++) {
+ if (posix_memalign(&buf, 64, TEST_LEN(m))) {
+ printf("alloc error: Fail\n");
+ return -1;
+ }
+ buffs[i] = buf;
+ }
+
+ for (i = 0; i < (m - k); i++) {
+ if (posix_memalign(&buf, 64, TEST_LEN(m))) {
+ printf("alloc error: Fail\n");
+ return -1;
+ }
+ temp_buffs[i] = buf;
+ }
+
+ // Make random data
+ for (i = 0; i < k; i++)
+ for (j = 0; j < TEST_LEN(m); j++)
+ buffs[i][j] = rand();
+
+ gf_gen_rs_matrix(a, m, k);
+
+ // Start encode test
+ BENCHMARK(&start, BENCHMARK_TIME, ec_encode_perf(m, k, a, g_tbls, buffs));
+ printf("erasure_code_base_encode" TEST_TYPE_STR ": ");
+ perf_print(start, (long long)(TEST_LEN(m)) * (m));
+
+ // Start decode test
+ BENCHMARK(&start, BENCHMARK_TIME, check =
+ ec_decode_perf(m, k, a, g_tbls, buffs, src_in_err, src_err_list, nerrs,
+ temp_buffs));
+
+ if (check == BAD_MATRIX) {
+ printf("BAD MATRIX\n");
+ return check;
+ }
+
+ for (i = 0; i < nerrs; i++) {
+ if (0 != memcmp(temp_buffs[i], buffs[src_err_list[i]], TEST_LEN(m))) {
+ printf("Fail error recovery (%d, %d, %d) - ", m, k, nerrs);
+ return -1;
+ }
+ }
+
+ printf("erasure_code_base_decode" TEST_TYPE_STR ": ");
+ perf_print(start, (long long)(TEST_LEN(m)) * (k + nerrs));
+
+ printf("done all: Pass\n");
+ return 0;
+}
diff --git a/src/isa-l/erasure_code/erasure_code_base_test.c b/src/isa-l/erasure_code/erasure_code_base_test.c
new file mode 100644
index 000000000..81e1b5778
--- /dev/null
+++ b/src/isa-l/erasure_code/erasure_code_base_test.c
@@ -0,0 +1,764 @@
+/**********************************************************************
+ Copyright(c) 2011-2015 Intel Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h> // for memset, memcmp
+#include "erasure_code.h"
+#include "types.h"
+
+#define TEST_LEN 8192
+#define TEST_SIZE (TEST_LEN/2)
+
+#ifndef TEST_SOURCES
+# define TEST_SOURCES 127
+#endif
+#ifndef RANDOMS
+# define RANDOMS 50
+#endif
+
+#define MMAX TEST_SOURCES
+#define KMAX TEST_SOURCES
+
+#define EFENCE_TEST_MIN_SIZE 16
+
+#ifdef EC_ALIGNED_ADDR
+// Define power of 2 range to check ptr, len alignment
+# define PTR_ALIGN_CHK_B 0
+# define LEN_ALIGN_CHK_B 0 // 0 for aligned only
+#else
+// Define power of 2 range to check ptr, len alignment
+# define PTR_ALIGN_CHK_B 32
+# define LEN_ALIGN_CHK_B 32 // 0 for aligned only
+#endif
+
+#ifndef TEST_SEED
+#define TEST_SEED 11
+#endif
+
+typedef unsigned char u8;
+
+void dump(unsigned char *buf, int len)
+{
+ int i;
+ for (i = 0; i < len;) {
+ printf(" %2x", 0xff & buf[i++]);
+ if (i % 32 == 0)
+ printf("\n");
+ }
+ printf("\n");
+}
+
+void dump_matrix(unsigned char **s, int k, int m)
+{
+ int i, j;
+ for (i = 0; i < k; i++) {
+ for (j = 0; j < m; j++) {
+ printf(" %2x", s[i][j]);
+ }
+ printf("\n");
+ }
+ printf("\n");
+}
+
+void dump_u8xu8(unsigned char *s, int k, int m)
+{
+ int i, j;
+ for (i = 0; i < k; i++) {
+ for (j = 0; j < m; j++) {
+ printf(" %2x", 0xff & s[j + (i * m)]);
+ }
+ printf("\n");
+ }
+ printf("\n");
+}
+
+// Generate Random errors
+static void gen_err_list(unsigned char *src_err_list,
+ unsigned char *src_in_err, int *pnerrs, int *pnsrcerrs, int k, int m)
+{
+ int i, err;
+ int nerrs = 0, nsrcerrs = 0;
+
+ for (i = 0, nerrs = 0, nsrcerrs = 0; i < m && nerrs < m - k; i++) {
+ err = 1 & rand();
+ src_in_err[i] = err;
+ if (err) {
+ src_err_list[nerrs++] = i;
+ if (i < k) {
+ nsrcerrs++;
+ }
+ }
+ }
+ if (nerrs == 0) { // should have at least one error
+ while ((err = (rand() % KMAX)) >= m) ;
+ src_err_list[nerrs++] = err;
+ src_in_err[err] = 1;
+ if (err < k)
+ nsrcerrs = 1;
+ }
+ *pnerrs = nerrs;
+ *pnsrcerrs = nsrcerrs;
+ return;
+}
+
+#define NO_INVERT_MATRIX -2
+// Generate decode matrix from encode matrix
+static int gf_gen_decode_matrix(unsigned char *encode_matrix,
+ unsigned char *decode_matrix,
+ unsigned char *invert_matrix,
+ unsigned int *decode_index,
+ unsigned char *src_err_list,
+ unsigned char *src_in_err,
+ int nerrs, int nsrcerrs, int k, int m)
+{
+ int i, j, p;
+ int r;
+ unsigned char *backup, *b, s;
+ int incr = 0;
+
+ b = malloc(MMAX * KMAX);
+ backup = malloc(MMAX * KMAX);
+
+ if (b == NULL || backup == NULL) {
+ printf("Test failure! Error with malloc\n");
+ free(b);
+ free(backup);
+ return -1;
+ }
+ // Construct matrix b by removing error rows
+ for (i = 0, r = 0; i < k; i++, r++) {
+ while (src_in_err[r])
+ r++;
+ for (j = 0; j < k; j++) {
+ b[k * i + j] = encode_matrix[k * r + j];
+ backup[k * i + j] = encode_matrix[k * r + j];
+ }
+ decode_index[i] = r;
+ }
+ incr = 0;
+ while (gf_invert_matrix(b, invert_matrix, k) < 0) {
+ if (nerrs == (m - k)) {
+ free(b);
+ free(backup);
+ printf("BAD MATRIX\n");
+ return NO_INVERT_MATRIX;
+ }
+ incr++;
+ memcpy(b, backup, MMAX * KMAX);
+ for (i = nsrcerrs; i < nerrs - nsrcerrs; i++) {
+ if (src_err_list[i] == (decode_index[k - 1] + incr)) {
+ // skip the erased parity line
+ incr++;
+ continue;
+ }
+ }
+ if (decode_index[k - 1] + incr >= m) {
+ free(b);
+ free(backup);
+ printf("BAD MATRIX\n");
+ return NO_INVERT_MATRIX;
+ }
+ decode_index[k - 1] += incr;
+ for (j = 0; j < k; j++)
+ b[k * (k - 1) + j] = encode_matrix[k * decode_index[k - 1] + j];
+
+ };
+
+ for (i = 0; i < nsrcerrs; i++) {
+ for (j = 0; j < k; j++) {
+ decode_matrix[k * i + j] = invert_matrix[k * src_err_list[i] + j];
+ }
+ }
+ /* src_err_list from encode_matrix * invert of b for parity decoding */
+ for (p = nsrcerrs; p < nerrs; p++) {
+ for (i = 0; i < k; i++) {
+ s = 0;
+ for (j = 0; j < k; j++)
+ s ^= gf_mul(invert_matrix[j * k + i],
+ encode_matrix[k * src_err_list[p] + j]);
+
+ decode_matrix[k * p + i] = s;
+ }
+ }
+ free(b);
+ free(backup);
+ return 0;
+}
+
+int main(int argc, char *argv[])
+{
+ int re = 0;
+ int i, j, p, rtest, m, k;
+ int nerrs, nsrcerrs;
+ void *buf;
+ unsigned int decode_index[MMAX];
+ unsigned char *temp_buffs[TEST_SOURCES], *buffs[TEST_SOURCES];
+ unsigned char *encode_matrix, *decode_matrix, *invert_matrix, *g_tbls;
+ unsigned char src_in_err[TEST_SOURCES], src_err_list[TEST_SOURCES];
+ unsigned char *recov[TEST_SOURCES];
+
+ int rows, align, size;
+ unsigned char *efence_buffs[TEST_SOURCES];
+ unsigned int offset;
+ u8 *ubuffs[TEST_SOURCES];
+ u8 *temp_ubuffs[TEST_SOURCES];
+
+ printf("erasure_code_base_test: %dx%d ", TEST_SOURCES, TEST_LEN);
+ srand(TEST_SEED);
+
+ // Allocate the arrays
+ for (i = 0; i < TEST_SOURCES; i++) {
+ if (posix_memalign(&buf, 64, TEST_LEN)) {
+ printf("alloc error: Fail");
+ return -1;
+ }
+ buffs[i] = buf;
+ }
+
+ for (i = 0; i < TEST_SOURCES; i++) {
+ if (posix_memalign(&buf, 64, TEST_LEN)) {
+ printf("alloc error: Fail");
+ return -1;
+ }
+ temp_buffs[i] = buf;
+ }
+
+ // Test erasure code by encode and recovery
+
+ encode_matrix = malloc(MMAX * KMAX);
+ decode_matrix = malloc(MMAX * KMAX);
+ invert_matrix = malloc(MMAX * KMAX);
+ g_tbls = malloc(KMAX * TEST_SOURCES * 32);
+ if (encode_matrix == NULL || decode_matrix == NULL
+ || invert_matrix == NULL || g_tbls == NULL) {
+ printf("Test failure! Error with malloc\n");
+ return -1;
+ }
+ // Pick a first test
+ m = 9;
+ k = 5;
+ if (m > MMAX || k > KMAX)
+ return -1;
+
+ // Make random data
+ for (i = 0; i < k; i++)
+ for (j = 0; j < TEST_LEN; j++)
+ buffs[i][j] = rand();
+
+ // Generate encode matrix encode_matrix
+ // The matrix generated by gf_gen_rs_matrix
+ // is not always invertable.
+ gf_gen_rs_matrix(encode_matrix, m, k);
+
+ // Generate g_tbls from encode matrix encode_matrix
+ ec_init_tables(k, m - k, &encode_matrix[k * k], g_tbls);
+
+ // Perform matrix dot_prod for EC encoding
+ // using g_tbls from encode matrix encode_matrix
+ ec_encode_data_base(TEST_LEN, k, m - k, g_tbls, buffs, &buffs[k]);
+
+ // Choose random buffers to be in erasure
+ memset(src_in_err, 0, TEST_SOURCES);
+ gen_err_list(src_err_list, src_in_err, &nerrs, &nsrcerrs, k, m);
+
+ // Generate decode matrix
+ re = gf_gen_decode_matrix(encode_matrix, decode_matrix,
+ invert_matrix, decode_index, src_err_list, src_in_err,
+ nerrs, nsrcerrs, k, m);
+ if (re != 0) {
+ printf("Fail to gf_gen_decode_matrix\n");
+ return -1;
+ }
+ // Pack recovery array as list of valid sources
+ // Its order must be the same as the order
+ // to generate matrix b in gf_gen_decode_matrix
+ for (i = 0; i < k; i++) {
+ recov[i] = buffs[decode_index[i]];
+ }
+
+ // Recover data
+ ec_init_tables(k, nerrs, decode_matrix, g_tbls);
+ ec_encode_data_base(TEST_LEN, k, nerrs, g_tbls, recov, &temp_buffs[k]);
+ for (i = 0; i < nerrs; i++) {
+
+ if (0 != memcmp(temp_buffs[k + i], buffs[src_err_list[i]], TEST_LEN)) {
+ printf("Fail error recovery (%d, %d, %d)\n", m, k, nerrs);
+ printf(" - erase list = ");
+ for (j = 0; j < nerrs; j++)
+ printf(" %d", src_err_list[j]);
+ printf(" - Index = ");
+ for (p = 0; p < k; p++)
+ printf(" %d", decode_index[p]);
+ printf("\nencode_matrix:\n");
+ dump_u8xu8((u8 *) encode_matrix, m, k);
+ printf("inv b:\n");
+ dump_u8xu8((u8 *) invert_matrix, k, k);
+ printf("\ndecode_matrix:\n");
+ dump_u8xu8((u8 *) decode_matrix, m, k);
+ printf("recov %d:", src_err_list[i]);
+ dump(temp_buffs[k + i], 25);
+ printf("orig :");
+ dump(buffs[src_err_list[i]], 25);
+ return -1;
+ }
+ }
+
+ // Pick a first test
+ m = 9;
+ k = 5;
+ if (m > MMAX || k > KMAX)
+ return -1;
+
+ // Make random data
+ for (i = 0; i < k; i++)
+ for (j = 0; j < TEST_LEN; j++)
+ buffs[i][j] = rand();
+
+ // The matrix generated by gf_gen_cauchy1_matrix
+ // is always invertable.
+ gf_gen_cauchy1_matrix(encode_matrix, m, k);
+
+ // Generate g_tbls from encode matrix encode_matrix
+ ec_init_tables(k, m - k, &encode_matrix[k * k], g_tbls);
+
+ // Perform matrix dot_prod for EC encoding
+ // using g_tbls from encode matrix encode_matrix
+ ec_encode_data_base(TEST_LEN, k, m - k, g_tbls, buffs, &buffs[k]);
+
+ // Choose random buffers to be in erasure
+ memset(src_in_err, 0, TEST_SOURCES);
+ gen_err_list(src_err_list, src_in_err, &nerrs, &nsrcerrs, k, m);
+
+ // Generate decode matrix
+ re = gf_gen_decode_matrix(encode_matrix, decode_matrix,
+ invert_matrix, decode_index, src_err_list, src_in_err,
+ nerrs, nsrcerrs, k, m);
+ if (re != 0) {
+ printf("Fail to gf_gen_decode_matrix\n");
+ return -1;
+ }
+ // Pack recovery array as list of valid sources
+ // Its order must be the same as the order
+ // to generate matrix b in gf_gen_decode_matrix
+ for (i = 0; i < k; i++) {
+ recov[i] = buffs[decode_index[i]];
+ }
+
+ // Recover data
+ ec_init_tables(k, nerrs, decode_matrix, g_tbls);
+ ec_encode_data_base(TEST_LEN, k, nerrs, g_tbls, recov, &temp_buffs[k]);
+ for (i = 0; i < nerrs; i++) {
+
+ if (0 != memcmp(temp_buffs[k + i], buffs[src_err_list[i]], TEST_LEN)) {
+ printf("Fail error recovery (%d, %d, %d)\n", m, k, nerrs);
+ printf(" - erase list = ");
+ for (j = 0; j < nerrs; j++)
+ printf(" %d", src_err_list[j]);
+ printf(" - Index = ");
+ for (p = 0; p < k; p++)
+ printf(" %d", decode_index[p]);
+ printf("\nencode_matrix:\n");
+ dump_u8xu8((u8 *) encode_matrix, m, k);
+ printf("inv b:\n");
+ dump_u8xu8((u8 *) invert_matrix, k, k);
+ printf("\ndecode_matrix:\n");
+ dump_u8xu8((u8 *) decode_matrix, m, k);
+ printf("recov %d:", src_err_list[i]);
+ dump(temp_buffs[k + i], 25);
+ printf("orig :");
+ dump(buffs[src_err_list[i]], 25);
+ return -1;
+ }
+ }
+
+ // Do more random tests
+ for (rtest = 0; rtest < RANDOMS; rtest++) {
+ while ((m = (rand() % MMAX)) < 2) ;
+ while ((k = (rand() % KMAX)) >= m || k < 1) ;
+
+ if (m > MMAX || k > KMAX)
+ continue;
+
+ // Make random data
+ for (i = 0; i < k; i++)
+ for (j = 0; j < TEST_LEN; j++)
+ buffs[i][j] = rand();
+
+ // The matrix generated by gf_gen_cauchy1_matrix
+ // is always invertable.
+ gf_gen_cauchy1_matrix(encode_matrix, m, k);
+
+ // Make parity vects
+ // Generate g_tbls from encode matrix a
+ ec_init_tables(k, m - k, &encode_matrix[k * k], g_tbls);
+ // Perform matrix dot_prod for EC encoding
+ // using g_tbls from encode matrix a
+ ec_encode_data_base(TEST_LEN, k, m - k, g_tbls, buffs, &buffs[k]);
+
+ // Random errors
+ memset(src_in_err, 0, TEST_SOURCES);
+ gen_err_list(src_err_list, src_in_err, &nerrs, &nsrcerrs, k, m);
+
+ // Generate decode matrix
+ re = gf_gen_decode_matrix(encode_matrix, decode_matrix,
+ invert_matrix, decode_index, src_err_list,
+ src_in_err, nerrs, nsrcerrs, k, m);
+ if (re != 0) {
+ printf("Fail to gf_gen_decode_matrix\n");
+ return -1;
+ }
+ // Pack recovery array as list of valid sources
+ // Its order must be the same as the order
+ // to generate matrix b in gf_gen_decode_matrix
+ for (i = 0; i < k; i++) {
+ recov[i] = buffs[decode_index[i]];
+ }
+
+ // Recover data
+ ec_init_tables(k, nerrs, decode_matrix, g_tbls);
+ ec_encode_data_base(TEST_LEN, k, nerrs, g_tbls, recov, &temp_buffs[k]);
+
+ for (i = 0; i < nerrs; i++) {
+
+ if (0 != memcmp(temp_buffs[k + i], buffs[src_err_list[i]], TEST_LEN)) {
+ printf("Fail error recovery (%d, %d, %d) - ", m, k, nerrs);
+ printf(" - erase list = ");
+ for (j = 0; j < nerrs; j++)
+ printf(" %d", src_err_list[j]);
+ printf(" - Index = ");
+ for (p = 0; p < k; p++)
+ printf(" %d", decode_index[p]);
+ printf("\nencode_matrix:\n");
+ dump_u8xu8((u8 *) encode_matrix, m, k);
+ printf("inv b:\n");
+ dump_u8xu8((u8 *) invert_matrix, k, k);
+ printf("\ndecode_matrix:\n");
+ dump_u8xu8((u8 *) decode_matrix, m, k);
+ printf("orig data:\n");
+ dump_matrix(buffs, m, 25);
+ printf("orig :");
+ dump(buffs[src_err_list[i]], 25);
+ printf("recov %d:", src_err_list[i]);
+ dump(temp_buffs[k + i], 25);
+ return -1;
+ }
+ }
+ putchar('.');
+ }
+
+ // Run tests at end of buffer for Electric Fence
+ k = 16;
+ align = (LEN_ALIGN_CHK_B != 0) ? 1 : 16;
+ if (k > KMAX)
+ return -1;
+
+ for (rows = 1; rows <= 16; rows++) {
+ m = k + rows;
+ if (m > MMAX)
+ return -1;
+
+ // Make random data
+ for (i = 0; i < k; i++)
+ for (j = 0; j < TEST_LEN; j++)
+ buffs[i][j] = rand();
+
+ for (size = EFENCE_TEST_MIN_SIZE; size <= TEST_SIZE; size += align) {
+ for (i = 0; i < m; i++) { // Line up TEST_SIZE from end
+ efence_buffs[i] = buffs[i] + TEST_LEN - size;
+ }
+
+ // The matrix generated by gf_gen_cauchy1_matrix
+ // is always invertable.
+ gf_gen_cauchy1_matrix(encode_matrix, m, k);
+
+ // Make parity vects
+ // Generate g_tbls from encode matrix a
+ ec_init_tables(k, m - k, &encode_matrix[k * k], g_tbls);
+ // Perform matrix dot_prod for EC encoding
+ // using g_tbls from encode matrix a
+ ec_encode_data_base(size, k, m - k, g_tbls, efence_buffs,
+ &efence_buffs[k]);
+
+ // Random errors
+ memset(src_in_err, 0, TEST_SOURCES);
+ gen_err_list(src_err_list, src_in_err, &nerrs, &nsrcerrs, k, m);
+
+ // Generate decode matrix
+ re = gf_gen_decode_matrix(encode_matrix, decode_matrix,
+ invert_matrix, decode_index, src_err_list,
+ src_in_err, nerrs, nsrcerrs, k, m);
+ if (re != 0) {
+ printf("Fail to gf_gen_decode_matrix\n");
+ return -1;
+ }
+ // Pack recovery array as list of valid sources
+ // Its order must be the same as the order
+ // to generate matrix b in gf_gen_decode_matrix
+ for (i = 0; i < k; i++) {
+ recov[i] = efence_buffs[decode_index[i]];
+ }
+
+ // Recover data
+ ec_init_tables(k, nerrs, decode_matrix, g_tbls);
+ ec_encode_data_base(size, k, nerrs, g_tbls, recov, &temp_buffs[k]);
+
+ for (i = 0; i < nerrs; i++) {
+
+ if (0 !=
+ memcmp(temp_buffs[k + i], efence_buffs[src_err_list[i]],
+ size)) {
+ printf("Efence: Fail error recovery (%d, %d, %d)\n", m,
+ k, nerrs);
+
+ printf("size = %d\n", size);
+
+ printf("Test erase list = ");
+ for (j = 0; j < nerrs; j++)
+ printf(" %d", src_err_list[j]);
+ printf(" - Index = ");
+ for (p = 0; p < k; p++)
+ printf(" %d", decode_index[p]);
+ printf("\nencode_matrix:\n");
+ dump_u8xu8((u8 *) encode_matrix, m, k);
+ printf("inv b:\n");
+ dump_u8xu8((u8 *) invert_matrix, k, k);
+ printf("\ndecode_matrix:\n");
+ dump_u8xu8((u8 *) decode_matrix, m, k);
+
+ printf("recov %d:", src_err_list[i]);
+ dump(temp_buffs[k + i], align);
+ printf("orig :");
+ dump(efence_buffs[src_err_list[i]], align);
+ return -1;
+ }
+ }
+ }
+
+ }
+
+ // Test rand ptr alignment if available
+
+ for (rtest = 0; rtest < RANDOMS; rtest++) {
+ while ((m = (rand() % MMAX)) < 2) ;
+ while ((k = (rand() % KMAX)) >= m || k < 1) ;
+
+ if (m > MMAX || k > KMAX)
+ continue;
+
+ size = (TEST_LEN - PTR_ALIGN_CHK_B) & ~15;
+
+ offset = (PTR_ALIGN_CHK_B != 0) ? 1 : PTR_ALIGN_CHK_B;
+ // Add random offsets
+ for (i = 0; i < m; i++) {
+ memset(buffs[i], 0, TEST_LEN); // zero pad to check write-over
+ memset(temp_buffs[i], 0, TEST_LEN); // zero pad to check write-over
+ ubuffs[i] = buffs[i] + (rand() & (PTR_ALIGN_CHK_B - offset));
+ temp_ubuffs[i] = temp_buffs[i] + (rand() & (PTR_ALIGN_CHK_B - offset));
+ }
+
+ for (i = 0; i < k; i++)
+ for (j = 0; j < size; j++)
+ ubuffs[i][j] = rand();
+
+ // The matrix generated by gf_gen_cauchy1_matrix
+ // is always invertable.
+ gf_gen_cauchy1_matrix(encode_matrix, m, k);
+
+ // Make parity vects
+ // Generate g_tbls from encode matrix a
+ ec_init_tables(k, m - k, &encode_matrix[k * k], g_tbls);
+ // Perform matrix dot_prod for EC encoding
+ // using g_tbls from encode matrix a
+ ec_encode_data_base(size, k, m - k, g_tbls, ubuffs, &ubuffs[k]);
+
+ // Random errors
+ memset(src_in_err, 0, TEST_SOURCES);
+ gen_err_list(src_err_list, src_in_err, &nerrs, &nsrcerrs, k, m);
+
+ // Generate decode matrix
+ re = gf_gen_decode_matrix(encode_matrix, decode_matrix,
+ invert_matrix, decode_index, src_err_list,
+ src_in_err, nerrs, nsrcerrs, k, m);
+ if (re != 0) {
+ printf("Fail to gf_gen_decode_matrix\n");
+ return -1;
+ }
+ // Pack recovery array as list of valid sources
+ // Its order must be the same as the order
+ // to generate matrix b in gf_gen_decode_matrix
+ for (i = 0; i < k; i++) {
+ recov[i] = ubuffs[decode_index[i]];
+ }
+
+ // Recover data
+ ec_init_tables(k, nerrs, decode_matrix, g_tbls);
+ ec_encode_data_base(size, k, nerrs, g_tbls, recov, &temp_ubuffs[k]);
+
+ for (i = 0; i < nerrs; i++) {
+
+ if (0 != memcmp(temp_ubuffs[k + i], ubuffs[src_err_list[i]], size)) {
+ printf("Fail error recovery (%d, %d, %d) - ", m, k, nerrs);
+ printf(" - erase list = ");
+ for (j = 0; j < nerrs; j++)
+ printf(" %d", src_err_list[j]);
+ printf(" - Index = ");
+ for (p = 0; p < k; p++)
+ printf(" %d", decode_index[p]);
+ printf("\nencode_matrix:\n");
+ dump_u8xu8((unsigned char *)encode_matrix, m, k);
+ printf("inv b:\n");
+ dump_u8xu8((unsigned char *)invert_matrix, k, k);
+ printf("\ndecode_matrix:\n");
+ dump_u8xu8((unsigned char *)decode_matrix, m, k);
+ printf("orig data:\n");
+ dump_matrix(ubuffs, m, 25);
+ printf("orig :");
+ dump(ubuffs[src_err_list[i]], 25);
+ printf("recov %d:", src_err_list[i]);
+ dump(temp_ubuffs[k + i], 25);
+ return -1;
+ }
+ }
+
+ // Confirm that padding around dests is unchanged
+ memset(temp_buffs[0], 0, PTR_ALIGN_CHK_B); // Make reference zero buff
+
+ for (i = 0; i < m; i++) {
+
+ offset = ubuffs[i] - buffs[i];
+
+ if (memcmp(buffs[i], temp_buffs[0], offset)) {
+ printf("Fail rand ualign encode pad start\n");
+ return -1;
+ }
+ if (memcmp
+ (buffs[i] + offset + size, temp_buffs[0],
+ PTR_ALIGN_CHK_B - offset)) {
+ printf("Fail rand ualign encode pad end\n");
+ return -1;
+ }
+ }
+
+ for (i = 0; i < nerrs; i++) {
+
+ offset = temp_ubuffs[k + i] - temp_buffs[k + i];
+ if (memcmp(temp_buffs[k + i], temp_buffs[0], offset)) {
+ printf("Fail rand ualign decode pad start\n");
+ return -1;
+ }
+ if (memcmp
+ (temp_buffs[k + i] + offset + size, temp_buffs[0],
+ PTR_ALIGN_CHK_B - offset)) {
+ printf("Fail rand ualign decode pad end\n");
+ return -1;
+ }
+ }
+
+ putchar('.');
+ }
+
+ // Test size alignment
+
+ align = (LEN_ALIGN_CHK_B != 0) ? 13 : 16;
+
+ for (size = TEST_LEN; size > 0; size -= align) {
+ while ((m = (rand() % MMAX)) < 2) ;
+ while ((k = (rand() % KMAX)) >= m || k < 1) ;
+
+ if (m > MMAX || k > KMAX)
+ continue;
+
+ for (i = 0; i < k; i++)
+ for (j = 0; j < size; j++)
+ buffs[i][j] = rand();
+
+ // The matrix generated by gf_gen_cauchy1_matrix
+ // is always invertable.
+ gf_gen_cauchy1_matrix(encode_matrix, m, k);
+
+ // Make parity vects
+ // Generate g_tbls from encode matrix a
+ ec_init_tables(k, m - k, &encode_matrix[k * k], g_tbls);
+ // Perform matrix dot_prod for EC encoding
+ // using g_tbls from encode matrix a
+ ec_encode_data_base(size, k, m - k, g_tbls, buffs, &buffs[k]);
+
+ // Random errors
+ memset(src_in_err, 0, TEST_SOURCES);
+ gen_err_list(src_err_list, src_in_err, &nerrs, &nsrcerrs, k, m);
+ // Generate decode matrix
+ re = gf_gen_decode_matrix(encode_matrix, decode_matrix,
+ invert_matrix, decode_index, src_err_list,
+ src_in_err, nerrs, nsrcerrs, k, m);
+ if (re != 0) {
+ printf("Fail to gf_gen_decode_matrix\n");
+ return -1;
+ }
+ // Pack recovery array as list of valid sources
+ // Its order must be the same as the order
+ // to generate matrix b in gf_gen_decode_matrix
+ for (i = 0; i < k; i++) {
+ recov[i] = buffs[decode_index[i]];
+ }
+
+ // Recover data
+ ec_init_tables(k, nerrs, decode_matrix, g_tbls);
+ ec_encode_data_base(size, k, nerrs, g_tbls, recov, &temp_buffs[k]);
+
+ for (i = 0; i < nerrs; i++) {
+
+ if (0 != memcmp(temp_buffs[k + i], buffs[src_err_list[i]], size)) {
+ printf("Fail error recovery (%d, %d, %d) - ", m, k, nerrs);
+ printf(" - erase list = ");
+ for (j = 0; j < nerrs; j++)
+ printf(" %d", src_err_list[j]);
+ printf(" - Index = ");
+ for (p = 0; p < k; p++)
+ printf(" %d", decode_index[p]);
+ printf("\nencode_matrix:\n");
+ dump_u8xu8((unsigned char *)encode_matrix, m, k);
+ printf("inv b:\n");
+ dump_u8xu8((unsigned char *)invert_matrix, k, k);
+ printf("\ndecode_matrix:\n");
+ dump_u8xu8((unsigned char *)decode_matrix, m, k);
+ printf("orig data:\n");
+ dump_matrix(buffs, m, 25);
+ printf("orig :");
+ dump(buffs[src_err_list[i]], 25);
+ printf("recov %d:", src_err_list[i]);
+ dump(temp_buffs[k + i], 25);
+ return -1;
+ }
+ }
+ }
+
+ printf("done EC tests: Pass\n");
+ return 0;
+}
diff --git a/src/isa-l/erasure_code/erasure_code_perf.c b/src/isa-l/erasure_code/erasure_code_perf.c
new file mode 100644
index 000000000..da81387b5
--- /dev/null
+++ b/src/isa-l/erasure_code/erasure_code_perf.c
@@ -0,0 +1,177 @@
+/**********************************************************************
+ Copyright(c) 2011-2015 Intel Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h> // for memset, memcmp
+#include "erasure_code.h"
+#include "test.h"
+
+//#define CACHED_TEST
+#ifdef CACHED_TEST
+// Cached test, loop many times over small dataset
+# define TEST_SOURCES 32
+# define TEST_LEN(m) ((128*1024 / m) & ~(64-1))
+# define TEST_TYPE_STR "_warm"
+#else
+# ifndef TEST_CUSTOM
+// Uncached test. Pull from large mem base.
+# define TEST_SOURCES 32
+# define GT_L3_CACHE 32*1024*1024 /* some number > last level cache */
+# define TEST_LEN(m) ((GT_L3_CACHE / m) & ~(64-1))
+# define TEST_TYPE_STR "_cold"
+# else
+# define TEST_TYPE_STR "_cus"
+# endif
+#endif
+
+#define MMAX TEST_SOURCES
+#define KMAX TEST_SOURCES
+
+#define BAD_MATRIX -1
+
+typedef unsigned char u8;
+
+void ec_encode_perf(int m, int k, u8 * a, u8 * g_tbls, u8 ** buffs, struct perf *start)
+{
+ ec_init_tables(k, m - k, &a[k * k], g_tbls);
+ BENCHMARK(start, BENCHMARK_TIME,
+ ec_encode_data(TEST_LEN(m), k, m - k, g_tbls, buffs, &buffs[k]));
+}
+
+int ec_decode_perf(int m, int k, u8 * a, u8 * g_tbls, u8 ** buffs, u8 * src_in_err,
+ u8 * src_err_list, int nerrs, u8 ** temp_buffs, struct perf *start)
+{
+ int i, j, r;
+ u8 b[MMAX * KMAX], c[MMAX * KMAX], d[MMAX * KMAX];
+ u8 *recov[TEST_SOURCES];
+
+ // Construct b by removing error rows
+ for (i = 0, r = 0; i < k; i++, r++) {
+ while (src_in_err[r])
+ r++;
+ recov[i] = buffs[r];
+ for (j = 0; j < k; j++)
+ b[k * i + j] = a[k * r + j];
+ }
+
+ if (gf_invert_matrix(b, d, k) < 0)
+ return BAD_MATRIX;
+
+ for (i = 0; i < nerrs; i++)
+ for (j = 0; j < k; j++)
+ c[k * i + j] = d[k * src_err_list[i] + j];
+
+ // Recover data
+ ec_init_tables(k, nerrs, c, g_tbls);
+ BENCHMARK(start, BENCHMARK_TIME,
+ ec_encode_data(TEST_LEN(m), k, nerrs, g_tbls, recov, temp_buffs));
+
+ return 0;
+}
+
+int main(int argc, char *argv[])
+{
+ int i, j, m, k, nerrs, check;
+ void *buf;
+ u8 *temp_buffs[TEST_SOURCES], *buffs[TEST_SOURCES];
+ u8 a[MMAX * KMAX];
+ u8 g_tbls[KMAX * TEST_SOURCES * 32], src_in_err[TEST_SOURCES];
+ u8 src_err_list[TEST_SOURCES];
+ struct perf start;
+
+ // Pick test parameters
+ m = 14;
+ k = 10;
+ nerrs = 4;
+ const u8 err_list[] = { 2, 4, 5, 7 };
+
+ printf("erasure_code_perf: %dx%d %d\n", m, TEST_LEN(m), nerrs);
+
+ if (m > MMAX || k > KMAX || nerrs > (m - k)) {
+ printf(" Input test parameter error\n");
+ return -1;
+ }
+
+ memcpy(src_err_list, err_list, nerrs);
+ memset(src_in_err, 0, TEST_SOURCES);
+ for (i = 0; i < nerrs; i++)
+ src_in_err[src_err_list[i]] = 1;
+
+ // Allocate the arrays
+ for (i = 0; i < m; i++) {
+ if (posix_memalign(&buf, 64, TEST_LEN(m))) {
+ printf("alloc error: Fail\n");
+ return -1;
+ }
+ buffs[i] = buf;
+ }
+
+ for (i = 0; i < (m - k); i++) {
+ if (posix_memalign(&buf, 64, TEST_LEN(m))) {
+ printf("alloc error: Fail\n");
+ return -1;
+ }
+ temp_buffs[i] = buf;
+ }
+
+ // Make random data
+ for (i = 0; i < k; i++)
+ for (j = 0; j < TEST_LEN(m); j++)
+ buffs[i][j] = rand();
+
+ gf_gen_rs_matrix(a, m, k);
+
+ // Start encode test
+ ec_encode_perf(m, k, a, g_tbls, buffs, &start);
+ printf("erasure_code_encode" TEST_TYPE_STR ": ");
+ perf_print(start, (long long)(TEST_LEN(m)) * (m));
+
+ // Start decode test
+ check = ec_decode_perf(m, k, a, g_tbls, buffs, src_in_err, src_err_list, nerrs,
+ temp_buffs, &start);
+
+ if (check == BAD_MATRIX) {
+ printf("BAD MATRIX\n");
+ return check;
+ }
+
+ for (i = 0; i < nerrs; i++) {
+ if (0 != memcmp(temp_buffs[i], buffs[src_err_list[i]], TEST_LEN(m))) {
+ printf("Fail error recovery (%d, %d, %d) - ", m, k, nerrs);
+ return -1;
+ }
+ }
+
+ printf("erasure_code_decode" TEST_TYPE_STR ": ");
+ perf_print(start, (long long)(TEST_LEN(m)) * (k + nerrs));
+
+ printf("done all: Pass\n");
+ return 0;
+}
diff --git a/src/isa-l/erasure_code/erasure_code_test.c b/src/isa-l/erasure_code/erasure_code_test.c
new file mode 100644
index 000000000..a1736afd5
--- /dev/null
+++ b/src/isa-l/erasure_code/erasure_code_test.c
@@ -0,0 +1,764 @@
+/**********************************************************************
+ Copyright(c) 2011-2015 Intel Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h> // for memset, memcmp
+#include "erasure_code.h"
+#include "types.h"
+
+#define TEST_LEN 8192
+#define TEST_SIZE (TEST_LEN/2)
+
+#ifndef TEST_SOURCES
+# define TEST_SOURCES 127
+#endif
+#ifndef RANDOMS
+# define RANDOMS 200
+#endif
+
+#define MMAX TEST_SOURCES
+#define KMAX TEST_SOURCES
+
+#define EFENCE_TEST_MIN_SIZE 16
+#define EFENCE_TEST_MAX_SIZE EFENCE_TEST_MIN_SIZE + 0x100
+
+#ifdef EC_ALIGNED_ADDR
+// Define power of 2 range to check ptr, len alignment
+# define PTR_ALIGN_CHK_B 0
+# define LEN_ALIGN_CHK_B 0 // 0 for aligned only
+#else
+// Define power of 2 range to check ptr, len alignment
+# define PTR_ALIGN_CHK_B 32
+# define LEN_ALIGN_CHK_B 32 // 0 for aligned only
+#endif
+
+#ifndef TEST_SEED
+#define TEST_SEED 11
+#endif
+
+typedef unsigned char u8;
+
+void dump(unsigned char *buf, int len)
+{
+ int i;
+ for (i = 0; i < len;) {
+ printf(" %2x", 0xff & buf[i++]);
+ if (i % 32 == 0)
+ printf("\n");
+ }
+ printf("\n");
+}
+
+void dump_matrix(unsigned char **s, int k, int m)
+{
+ int i, j;
+ for (i = 0; i < k; i++) {
+ for (j = 0; j < m; j++) {
+ printf(" %2x", s[i][j]);
+ }
+ printf("\n");
+ }
+ printf("\n");
+}
+
+void dump_u8xu8(unsigned char *s, int k, int m)
+{
+ int i, j;
+ for (i = 0; i < k; i++) {
+ for (j = 0; j < m; j++) {
+ printf(" %2x", 0xff & s[j + (i * m)]);
+ }
+ printf("\n");
+ }
+ printf("\n");
+}
+
+// Generate Random errors
+static void gen_err_list(unsigned char *src_err_list,
+ unsigned char *src_in_err, int *pnerrs, int *pnsrcerrs, int k, int m)
+{
+ int i, err;
+ int nerrs = 0, nsrcerrs = 0;
+
+ for (i = 0, nerrs = 0, nsrcerrs = 0; i < m && nerrs < m - k; i++) {
+ err = 1 & rand();
+ src_in_err[i] = err;
+ if (err) {
+ src_err_list[nerrs++] = i;
+ if (i < k) {
+ nsrcerrs++;
+ }
+ }
+ }
+ if (nerrs == 0) { // should have at least one error
+ while ((err = (rand() % KMAX)) >= m) ;
+ src_err_list[nerrs++] = err;
+ src_in_err[err] = 1;
+ if (err < k)
+ nsrcerrs = 1;
+ }
+ *pnerrs = nerrs;
+ *pnsrcerrs = nsrcerrs;
+ return;
+}
+
+#define NO_INVERT_MATRIX -2
+// Generate decode matrix from encode matrix
+static int gf_gen_decode_matrix(unsigned char *encode_matrix,
+ unsigned char *decode_matrix,
+ unsigned char *invert_matrix,
+ unsigned int *decode_index,
+ unsigned char *src_err_list,
+ unsigned char *src_in_err,
+ int nerrs, int nsrcerrs, int k, int m)
+{
+ int i, j, p;
+ int r;
+ unsigned char *backup, *b, s;
+ int incr = 0;
+
+ b = malloc(MMAX * KMAX);
+ backup = malloc(MMAX * KMAX);
+
+ if (b == NULL || backup == NULL) {
+ printf("Test failure! Error with malloc\n");
+ free(b);
+ free(backup);
+ return -1;
+ }
+ // Construct matrix b by removing error rows
+ for (i = 0, r = 0; i < k; i++, r++) {
+ while (src_in_err[r])
+ r++;
+ for (j = 0; j < k; j++) {
+ b[k * i + j] = encode_matrix[k * r + j];
+ backup[k * i + j] = encode_matrix[k * r + j];
+ }
+ decode_index[i] = r;
+ }
+ incr = 0;
+ while (gf_invert_matrix(b, invert_matrix, k) < 0) {
+ if (nerrs == (m - k)) {
+ free(b);
+ free(backup);
+ printf("BAD MATRIX\n");
+ return NO_INVERT_MATRIX;
+ }
+ incr++;
+ memcpy(b, backup, MMAX * KMAX);
+ for (i = nsrcerrs; i < nerrs - nsrcerrs; i++) {
+ if (src_err_list[i] == (decode_index[k - 1] + incr)) {
+ // skip the erased parity line
+ incr++;
+ continue;
+ }
+ }
+ if (decode_index[k - 1] + incr >= m) {
+ free(b);
+ free(backup);
+ printf("BAD MATRIX\n");
+ return NO_INVERT_MATRIX;
+ }
+ decode_index[k - 1] += incr;
+ for (j = 0; j < k; j++)
+ b[k * (k - 1) + j] = encode_matrix[k * decode_index[k - 1] + j];
+
+ };
+
+ for (i = 0; i < nsrcerrs; i++) {
+ for (j = 0; j < k; j++) {
+ decode_matrix[k * i + j] = invert_matrix[k * src_err_list[i] + j];
+ }
+ }
+ /* src_err_list from encode_matrix * invert of b for parity decoding */
+ for (p = nsrcerrs; p < nerrs; p++) {
+ for (i = 0; i < k; i++) {
+ s = 0;
+ for (j = 0; j < k; j++)
+ s ^= gf_mul(invert_matrix[j * k + i],
+ encode_matrix[k * src_err_list[p] + j]);
+
+ decode_matrix[k * p + i] = s;
+ }
+ }
+ free(b);
+ free(backup);
+ return 0;
+}
+
+int main(int argc, char *argv[])
+{
+ int re = 0;
+ int i, j, p, rtest, m, k;
+ int nerrs, nsrcerrs;
+ void *buf;
+ unsigned int decode_index[MMAX];
+ unsigned char *temp_buffs[TEST_SOURCES], *buffs[TEST_SOURCES];
+ unsigned char *encode_matrix, *decode_matrix, *invert_matrix, *g_tbls;
+ unsigned char src_in_err[TEST_SOURCES], src_err_list[TEST_SOURCES];
+ unsigned char *recov[TEST_SOURCES];
+
+ int rows, align, size;
+ unsigned char *efence_buffs[TEST_SOURCES];
+ unsigned int offset;
+ u8 *ubuffs[TEST_SOURCES];
+ u8 *temp_ubuffs[TEST_SOURCES];
+
+ printf("erasure_code_test: %dx%d ", TEST_SOURCES, TEST_LEN);
+ srand(TEST_SEED);
+
+ // Allocate the arrays
+ for (i = 0; i < TEST_SOURCES; i++) {
+ if (posix_memalign(&buf, 64, TEST_LEN)) {
+ printf("alloc error: Fail");
+ return -1;
+ }
+ buffs[i] = buf;
+ }
+
+ for (i = 0; i < TEST_SOURCES; i++) {
+ if (posix_memalign(&buf, 64, TEST_LEN)) {
+ printf("alloc error: Fail");
+ return -1;
+ }
+ temp_buffs[i] = buf;
+ }
+
+ // Test erasure code by encode and recovery
+
+ encode_matrix = malloc(MMAX * KMAX);
+ decode_matrix = malloc(MMAX * KMAX);
+ invert_matrix = malloc(MMAX * KMAX);
+ g_tbls = malloc(KMAX * TEST_SOURCES * 32);
+ if (encode_matrix == NULL || decode_matrix == NULL
+ || invert_matrix == NULL || g_tbls == NULL) {
+ printf("Test failure! Error with malloc\n");
+ return -1;
+ }
+ // Pick a first test
+ m = 9;
+ k = 5;
+ if (m > MMAX || k > KMAX)
+ return -1;
+
+ // Make random data
+ for (i = 0; i < k; i++)
+ for (j = 0; j < TEST_LEN; j++)
+ buffs[i][j] = rand();
+
+ // Generate encode matrix encode_matrix
+ // The matrix generated by gf_gen_rs_matrix
+ // is not always invertable.
+ gf_gen_rs_matrix(encode_matrix, m, k);
+
+ // Generate g_tbls from encode matrix encode_matrix
+ ec_init_tables(k, m - k, &encode_matrix[k * k], g_tbls);
+
+ // Perform matrix dot_prod for EC encoding
+ // using g_tbls from encode matrix encode_matrix
+ ec_encode_data(TEST_LEN, k, m - k, g_tbls, buffs, &buffs[k]);
+
+ // Choose random buffers to be in erasure
+ memset(src_in_err, 0, TEST_SOURCES);
+ gen_err_list(src_err_list, src_in_err, &nerrs, &nsrcerrs, k, m);
+
+ // Generate decode matrix
+ re = gf_gen_decode_matrix(encode_matrix, decode_matrix,
+ invert_matrix, decode_index, src_err_list, src_in_err,
+ nerrs, nsrcerrs, k, m);
+ if (re != 0) {
+ printf("Fail to gf_gen_decode_matrix\n");
+ return -1;
+ }
+ // Pack recovery array as list of valid sources
+ // Its order must be the same as the order
+ // to generate matrix b in gf_gen_decode_matrix
+ for (i = 0; i < k; i++) {
+ recov[i] = buffs[decode_index[i]];
+ }
+
+ // Recover data
+ ec_init_tables(k, nerrs, decode_matrix, g_tbls);
+ ec_encode_data(TEST_LEN, k, nerrs, g_tbls, recov, &temp_buffs[k]);
+ for (i = 0; i < nerrs; i++) {
+
+ if (0 != memcmp(temp_buffs[k + i], buffs[src_err_list[i]], TEST_LEN)) {
+ printf("Fail error recovery (%d, %d, %d)\n", m, k, nerrs);
+ printf(" - erase list = ");
+ for (j = 0; j < nerrs; j++)
+ printf(" %d", src_err_list[j]);
+ printf(" - Index = ");
+ for (p = 0; p < k; p++)
+ printf(" %d", decode_index[p]);
+ printf("\nencode_matrix:\n");
+ dump_u8xu8((u8 *) encode_matrix, m, k);
+ printf("inv b:\n");
+ dump_u8xu8((u8 *) invert_matrix, k, k);
+ printf("\ndecode_matrix:\n");
+ dump_u8xu8((u8 *) decode_matrix, m, k);
+ printf("recov %d:", src_err_list[i]);
+ dump(temp_buffs[k + i], 25);
+ printf("orig :");
+ dump(buffs[src_err_list[i]], 25);
+ return -1;
+ }
+ }
+
+ // Pick a first test
+ m = 9;
+ k = 5;
+ if (m > MMAX || k > KMAX)
+ return -1;
+
+ // Make random data
+ for (i = 0; i < k; i++)
+ for (j = 0; j < TEST_LEN; j++)
+ buffs[i][j] = rand();
+
+ // The matrix generated by gf_gen_cauchy1_matrix
+ // is always invertable.
+ gf_gen_cauchy1_matrix(encode_matrix, m, k);
+
+ // Generate g_tbls from encode matrix encode_matrix
+ ec_init_tables(k, m - k, &encode_matrix[k * k], g_tbls);
+
+ // Perform matrix dot_prod for EC encoding
+ // using g_tbls from encode matrix encode_matrix
+ ec_encode_data(TEST_LEN, k, m - k, g_tbls, buffs, &buffs[k]);
+
+ // Choose random buffers to be in erasure
+ memset(src_in_err, 0, TEST_SOURCES);
+ gen_err_list(src_err_list, src_in_err, &nerrs, &nsrcerrs, k, m);
+
+ // Generate decode matrix
+ re = gf_gen_decode_matrix(encode_matrix, decode_matrix,
+ invert_matrix, decode_index, src_err_list, src_in_err,
+ nerrs, nsrcerrs, k, m);
+ if (re != 0) {
+ printf("Fail to gf_gen_decode_matrix\n");
+ return -1;
+ }
+ // Pack recovery array as list of valid sources
+ // Its order must be the same as the order
+ // to generate matrix b in gf_gen_decode_matrix
+ for (i = 0; i < k; i++) {
+ recov[i] = buffs[decode_index[i]];
+ }
+
+ // Recover data
+ ec_init_tables(k, nerrs, decode_matrix, g_tbls);
+ ec_encode_data(TEST_LEN, k, nerrs, g_tbls, recov, &temp_buffs[k]);
+ for (i = 0; i < nerrs; i++) {
+
+ if (0 != memcmp(temp_buffs[k + i], buffs[src_err_list[i]], TEST_LEN)) {
+ printf("Fail error recovery (%d, %d, %d)\n", m, k, nerrs);
+ printf(" - erase list = ");
+ for (j = 0; j < nerrs; j++)
+ printf(" %d", src_err_list[j]);
+ printf(" - Index = ");
+ for (p = 0; p < k; p++)
+ printf(" %d", decode_index[p]);
+ printf("\nencode_matrix:\n");
+ dump_u8xu8((u8 *) encode_matrix, m, k);
+ printf("inv b:\n");
+ dump_u8xu8((u8 *) invert_matrix, k, k);
+ printf("\ndecode_matrix:\n");
+ dump_u8xu8((u8 *) decode_matrix, m, k);
+ printf("recov %d:", src_err_list[i]);
+ dump(temp_buffs[k + i], 25);
+ printf("orig :");
+ dump(buffs[src_err_list[i]], 25);
+ return -1;
+ }
+ }
+
+ // Do more random tests
+ for (rtest = 0; rtest < RANDOMS; rtest++) {
+ while ((m = (rand() % MMAX)) < 2) ;
+ while ((k = (rand() % KMAX)) >= m || k < 1) ;
+
+ if (m > MMAX || k > KMAX)
+ continue;
+
+ // Make random data
+ for (i = 0; i < k; i++)
+ for (j = 0; j < TEST_LEN; j++)
+ buffs[i][j] = rand();
+
+ // The matrix generated by gf_gen_cauchy1_matrix
+ // is always invertable.
+ gf_gen_cauchy1_matrix(encode_matrix, m, k);
+
+ // Make parity vects
+ // Generate g_tbls from encode matrix a
+ ec_init_tables(k, m - k, &encode_matrix[k * k], g_tbls);
+ // Perform matrix dot_prod for EC encoding
+ // using g_tbls from encode matrix a
+ ec_encode_data(TEST_LEN, k, m - k, g_tbls, buffs, &buffs[k]);
+
+ // Random errors
+ memset(src_in_err, 0, TEST_SOURCES);
+ gen_err_list(src_err_list, src_in_err, &nerrs, &nsrcerrs, k, m);
+
+ // Generate decode matrix
+ re = gf_gen_decode_matrix(encode_matrix, decode_matrix,
+ invert_matrix, decode_index, src_err_list,
+ src_in_err, nerrs, nsrcerrs, k, m);
+ if (re != 0) {
+ printf("Fail to gf_gen_decode_matrix\n");
+ return -1;
+ }
+ // Pack recovery array as list of valid sources
+ // Its order must be the same as the order
+ // to generate matrix b in gf_gen_decode_matrix
+ for (i = 0; i < k; i++) {
+ recov[i] = buffs[decode_index[i]];
+ }
+
+ // Recover data
+ ec_init_tables(k, nerrs, decode_matrix, g_tbls);
+ ec_encode_data(TEST_LEN, k, nerrs, g_tbls, recov, &temp_buffs[k]);
+
+ for (i = 0; i < nerrs; i++) {
+
+ if (0 != memcmp(temp_buffs[k + i], buffs[src_err_list[i]], TEST_LEN)) {
+ printf("Fail error recovery (%d, %d, %d) - ", m, k, nerrs);
+ printf(" - erase list = ");
+ for (j = 0; j < nerrs; j++)
+ printf(" %d", src_err_list[j]);
+ printf(" - Index = ");
+ for (p = 0; p < k; p++)
+ printf(" %d", decode_index[p]);
+ printf("\nencode_matrix:\n");
+ dump_u8xu8((u8 *) encode_matrix, m, k);
+ printf("inv b:\n");
+ dump_u8xu8((u8 *) invert_matrix, k, k);
+ printf("\ndecode_matrix:\n");
+ dump_u8xu8((u8 *) decode_matrix, m, k);
+ printf("orig data:\n");
+ dump_matrix(buffs, m, 25);
+ printf("orig :");
+ dump(buffs[src_err_list[i]], 25);
+ printf("recov %d:", src_err_list[i]);
+ dump(temp_buffs[k + i], 25);
+ return -1;
+ }
+ }
+ putchar('.');
+ }
+
+ // Run tests at end of buffer for Electric Fence
+ k = 16;
+ align = (LEN_ALIGN_CHK_B != 0) ? 1 : 16;
+ if (k > KMAX)
+ return -1;
+
+ for (rows = 1; rows <= 16; rows++) {
+ m = k + rows;
+ if (m > MMAX)
+ return -1;
+
+ // Make random data
+ for (i = 0; i < k; i++)
+ for (j = 0; j < TEST_LEN; j++)
+ buffs[i][j] = rand();
+
+ for (size = EFENCE_TEST_MIN_SIZE; size <= EFENCE_TEST_MAX_SIZE; size += align) {
+ for (i = 0; i < m; i++) { // Line up TEST_SIZE from end
+ efence_buffs[i] = buffs[i] + TEST_LEN - size;
+ }
+
+ // The matrix generated by gf_gen_cauchy1_matrix
+ // is always invertable.
+ gf_gen_cauchy1_matrix(encode_matrix, m, k);
+
+ // Make parity vects
+ // Generate g_tbls from encode matrix a
+ ec_init_tables(k, m - k, &encode_matrix[k * k], g_tbls);
+ // Perform matrix dot_prod for EC encoding
+ // using g_tbls from encode matrix a
+ ec_encode_data(size, k, m - k, g_tbls, efence_buffs, &efence_buffs[k]);
+
+ // Random errors
+ memset(src_in_err, 0, TEST_SOURCES);
+ gen_err_list(src_err_list, src_in_err, &nerrs, &nsrcerrs, k, m);
+
+ // Generate decode matrix
+ re = gf_gen_decode_matrix(encode_matrix, decode_matrix,
+ invert_matrix, decode_index, src_err_list,
+ src_in_err, nerrs, nsrcerrs, k, m);
+ if (re != 0) {
+ printf("Fail to gf_gen_decode_matrix\n");
+ return -1;
+ }
+ // Pack recovery array as list of valid sources
+ // Its order must be the same as the order
+ // to generate matrix b in gf_gen_decode_matrix
+ for (i = 0; i < k; i++) {
+ recov[i] = efence_buffs[decode_index[i]];
+ }
+
+ // Recover data
+ ec_init_tables(k, nerrs, decode_matrix, g_tbls);
+ ec_encode_data(size, k, nerrs, g_tbls, recov, &temp_buffs[k]);
+
+ for (i = 0; i < nerrs; i++) {
+
+ if (0 !=
+ memcmp(temp_buffs[k + i], efence_buffs[src_err_list[i]],
+ size)) {
+ printf("Efence: Fail error recovery (%d, %d, %d)\n", m,
+ k, nerrs);
+
+ printf("size = %d\n", size);
+
+ printf("Test erase list = ");
+ for (j = 0; j < nerrs; j++)
+ printf(" %d", src_err_list[j]);
+ printf(" - Index = ");
+ for (p = 0; p < k; p++)
+ printf(" %d", decode_index[p]);
+ printf("\nencode_matrix:\n");
+ dump_u8xu8((u8 *) encode_matrix, m, k);
+ printf("inv b:\n");
+ dump_u8xu8((u8 *) invert_matrix, k, k);
+ printf("\ndecode_matrix:\n");
+ dump_u8xu8((u8 *) decode_matrix, m, k);
+
+ printf("recov %d:", src_err_list[i]);
+ dump(temp_buffs[k + i], align);
+ printf("orig :");
+ dump(efence_buffs[src_err_list[i]], align);
+ return -1;
+ }
+ }
+ }
+
+ }
+
+ // Test rand ptr alignment if available
+
+ for (rtest = 0; rtest < RANDOMS; rtest++) {
+ while ((m = (rand() % MMAX)) < 2) ;
+ while ((k = (rand() % KMAX)) >= m || k < 1) ;
+
+ if (m > MMAX || k > KMAX)
+ continue;
+
+ size = (TEST_LEN - PTR_ALIGN_CHK_B) & ~15;
+
+ offset = (PTR_ALIGN_CHK_B != 0) ? 1 : PTR_ALIGN_CHK_B;
+ // Add random offsets
+ for (i = 0; i < m; i++) {
+ memset(buffs[i], 0, TEST_LEN); // zero pad to check write-over
+ memset(temp_buffs[i], 0, TEST_LEN); // zero pad to check write-over
+ ubuffs[i] = buffs[i] + (rand() & (PTR_ALIGN_CHK_B - offset));
+ temp_ubuffs[i] = temp_buffs[i] + (rand() & (PTR_ALIGN_CHK_B - offset));
+ }
+
+ for (i = 0; i < k; i++)
+ for (j = 0; j < size; j++)
+ ubuffs[i][j] = rand();
+
+ // The matrix generated by gf_gen_cauchy1_matrix
+ // is always invertable.
+ gf_gen_cauchy1_matrix(encode_matrix, m, k);
+
+ // Make parity vects
+ // Generate g_tbls from encode matrix a
+ ec_init_tables(k, m - k, &encode_matrix[k * k], g_tbls);
+ // Perform matrix dot_prod for EC encoding
+ // using g_tbls from encode matrix a
+ ec_encode_data(size, k, m - k, g_tbls, ubuffs, &ubuffs[k]);
+
+ // Random errors
+ memset(src_in_err, 0, TEST_SOURCES);
+ gen_err_list(src_err_list, src_in_err, &nerrs, &nsrcerrs, k, m);
+
+ // Generate decode matrix
+ re = gf_gen_decode_matrix(encode_matrix, decode_matrix,
+ invert_matrix, decode_index, src_err_list,
+ src_in_err, nerrs, nsrcerrs, k, m);
+ if (re != 0) {
+ printf("Fail to gf_gen_decode_matrix\n");
+ return -1;
+ }
+ // Pack recovery array as list of valid sources
+ // Its order must be the same as the order
+ // to generate matrix b in gf_gen_decode_matrix
+ for (i = 0; i < k; i++) {
+ recov[i] = ubuffs[decode_index[i]];
+ }
+
+ // Recover data
+ ec_init_tables(k, nerrs, decode_matrix, g_tbls);
+ ec_encode_data(size, k, nerrs, g_tbls, recov, &temp_ubuffs[k]);
+
+ for (i = 0; i < nerrs; i++) {
+
+ if (0 != memcmp(temp_ubuffs[k + i], ubuffs[src_err_list[i]], size)) {
+ printf("Fail error recovery (%d, %d, %d) - ", m, k, nerrs);
+ printf(" - erase list = ");
+ for (j = 0; j < nerrs; j++)
+ printf(" %d", src_err_list[j]);
+ printf(" - Index = ");
+ for (p = 0; p < k; p++)
+ printf(" %d", decode_index[p]);
+ printf("\nencode_matrix:\n");
+ dump_u8xu8((unsigned char *)encode_matrix, m, k);
+ printf("inv b:\n");
+ dump_u8xu8((unsigned char *)invert_matrix, k, k);
+ printf("\ndecode_matrix:\n");
+ dump_u8xu8((unsigned char *)decode_matrix, m, k);
+ printf("orig data:\n");
+ dump_matrix(ubuffs, m, 25);
+ printf("orig :");
+ dump(ubuffs[src_err_list[i]], 25);
+ printf("recov %d:", src_err_list[i]);
+ dump(temp_ubuffs[k + i], 25);
+ return -1;
+ }
+ }
+
+ // Confirm that padding around dests is unchanged
+ memset(temp_buffs[0], 0, PTR_ALIGN_CHK_B); // Make reference zero buff
+
+ for (i = 0; i < m; i++) {
+
+ offset = ubuffs[i] - buffs[i];
+
+ if (memcmp(buffs[i], temp_buffs[0], offset)) {
+ printf("Fail rand ualign encode pad start\n");
+ return -1;
+ }
+ if (memcmp
+ (buffs[i] + offset + size, temp_buffs[0],
+ PTR_ALIGN_CHK_B - offset)) {
+ printf("Fail rand ualign encode pad end\n");
+ return -1;
+ }
+ }
+
+ for (i = 0; i < nerrs; i++) {
+
+ offset = temp_ubuffs[k + i] - temp_buffs[k + i];
+ if (memcmp(temp_buffs[k + i], temp_buffs[0], offset)) {
+ printf("Fail rand ualign decode pad start\n");
+ return -1;
+ }
+ if (memcmp
+ (temp_buffs[k + i] + offset + size, temp_buffs[0],
+ PTR_ALIGN_CHK_B - offset)) {
+ printf("Fail rand ualign decode pad end\n");
+ return -1;
+ }
+ }
+
+ putchar('.');
+ }
+
+ // Test size alignment
+
+ align = (LEN_ALIGN_CHK_B != 0) ? 13 : 16;
+
+ for (size = TEST_LEN; size > 0; size -= align) {
+ while ((m = (rand() % MMAX)) < 2) ;
+ while ((k = (rand() % KMAX)) >= m || k < 1) ;
+
+ if (m > MMAX || k > KMAX)
+ continue;
+
+ for (i = 0; i < k; i++)
+ for (j = 0; j < size; j++)
+ buffs[i][j] = rand();
+
+ // The matrix generated by gf_gen_cauchy1_matrix
+ // is always invertable.
+ gf_gen_cauchy1_matrix(encode_matrix, m, k);
+
+ // Make parity vects
+ // Generate g_tbls from encode matrix a
+ ec_init_tables(k, m - k, &encode_matrix[k * k], g_tbls);
+ // Perform matrix dot_prod for EC encoding
+ // using g_tbls from encode matrix a
+ ec_encode_data(size, k, m - k, g_tbls, buffs, &buffs[k]);
+
+ // Random errors
+ memset(src_in_err, 0, TEST_SOURCES);
+ gen_err_list(src_err_list, src_in_err, &nerrs, &nsrcerrs, k, m);
+ // Generate decode matrix
+ re = gf_gen_decode_matrix(encode_matrix, decode_matrix,
+ invert_matrix, decode_index, src_err_list,
+ src_in_err, nerrs, nsrcerrs, k, m);
+ if (re != 0) {
+ printf("Fail to gf_gen_decode_matrix\n");
+ return -1;
+ }
+ // Pack recovery array as list of valid sources
+ // Its order must be the same as the order
+ // to generate matrix b in gf_gen_decode_matrix
+ for (i = 0; i < k; i++) {
+ recov[i] = buffs[decode_index[i]];
+ }
+
+ // Recover data
+ ec_init_tables(k, nerrs, decode_matrix, g_tbls);
+ ec_encode_data(size, k, nerrs, g_tbls, recov, &temp_buffs[k]);
+
+ for (i = 0; i < nerrs; i++) {
+
+ if (0 != memcmp(temp_buffs[k + i], buffs[src_err_list[i]], size)) {
+ printf("Fail error recovery (%d, %d, %d) - ", m, k, nerrs);
+ printf(" - erase list = ");
+ for (j = 0; j < nerrs; j++)
+ printf(" %d", src_err_list[j]);
+ printf(" - Index = ");
+ for (p = 0; p < k; p++)
+ printf(" %d", decode_index[p]);
+ printf("\nencode_matrix:\n");
+ dump_u8xu8((unsigned char *)encode_matrix, m, k);
+ printf("inv b:\n");
+ dump_u8xu8((unsigned char *)invert_matrix, k, k);
+ printf("\ndecode_matrix:\n");
+ dump_u8xu8((unsigned char *)decode_matrix, m, k);
+ printf("orig data:\n");
+ dump_matrix(buffs, m, 25);
+ printf("orig :");
+ dump(buffs[src_err_list[i]], 25);
+ printf("recov %d:", src_err_list[i]);
+ dump(temp_buffs[k + i], 25);
+ return -1;
+ }
+ }
+ }
+
+ printf("done EC tests: Pass\n");
+ return 0;
+}
diff --git a/src/isa-l/erasure_code/erasure_code_update_perf.c b/src/isa-l/erasure_code/erasure_code_update_perf.c
new file mode 100644
index 000000000..909e89414
--- /dev/null
+++ b/src/isa-l/erasure_code/erasure_code_update_perf.c
@@ -0,0 +1,281 @@
+/**********************************************************************
+ Copyright(c) 2011-2015 Intel Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h> // for memset, memcmp
+#include "erasure_code.h"
+#include "types.h"
+#include "test.h"
+
+//By default, test multibinary version
+#ifndef FUNCTION_UNDER_TEST
+# define FUNCTION_UNDER_TEST ec_encode_data_update
+# define REF_FUNCTION ec_encode_data
+#endif
+
+//By default, test EC(8+4)
+#if (!defined(VECT))
+# define VECT 4
+#endif
+
+#define str(s) #s
+#define xstr(s) str(s)
+
+//#define CACHED_TEST
+#ifdef CACHED_TEST
+// Cached test, loop many times over small dataset
+# define TEST_SOURCES 32
+# define TEST_LEN(m) ((128*1024 / m) & ~(64-1))
+# define TEST_TYPE_STR "_warm"
+#else
+# ifndef TEST_CUSTOM
+// Uncached test. Pull from large mem base.
+# define TEST_SOURCES 32
+# define GT_L3_CACHE 32*1024*1024 /* some number > last level cache */
+# define TEST_LEN(m) ((GT_L3_CACHE / m) & ~(64-1))
+# define TEST_TYPE_STR "_cold"
+# else
+# define TEST_TYPE_STR "_cus"
+# endif
+#endif
+
+#define MMAX TEST_SOURCES
+#define KMAX TEST_SOURCES
+
+typedef unsigned char u8;
+
+void dump(unsigned char *buf, int len)
+{
+ int i;
+ for (i = 0; i < len;) {
+ printf(" %2x", 0xff & buf[i++]);
+ if (i % 32 == 0)
+ printf("\n");
+ }
+ printf("\n");
+}
+
+void encode_update_test_ref(int m, int k, u8 * g_tbls, u8 ** buffs, u8 * a)
+{
+ ec_init_tables(k, m - k, &a[k * k], g_tbls);
+ REF_FUNCTION(TEST_LEN(m), k, m - k, g_tbls, buffs, &buffs[k]);
+}
+
+void encode_update_test(int m, int k, u8 * g_tbls, u8 ** perf_update_buffs, u8 * a)
+{
+ int i;
+
+ // Make parity vects
+ ec_init_tables(k, m - k, &a[k * k], g_tbls);
+ for (i = 0; i < k; i++) {
+ FUNCTION_UNDER_TEST(TEST_LEN(m), k, m - k, i, g_tbls,
+ perf_update_buffs[i], &perf_update_buffs[k]);
+ }
+}
+
+int decode_test(int m, int k, u8 ** update_buffs, u8 ** recov, u8 * a, u8 * src_in_err,
+ u8 * src_err_list, int nerrs, u8 * g_tbls, u8 ** perf_update_buffs)
+{
+ int i, j, r;
+ u8 b[MMAX * KMAX], c[MMAX * KMAX], d[MMAX * KMAX];
+ // Construct b by removing error rows
+ for (i = 0, r = 0; i < k; i++, r++) {
+ while (src_in_err[r])
+ r++;
+ recov[i] = update_buffs[r];
+ for (j = 0; j < k; j++)
+ b[k * i + j] = a[k * r + j];
+ }
+
+ if (gf_invert_matrix(b, d, k) < 0) {
+ printf("BAD MATRIX\n");
+ return -1;
+ }
+
+ for (i = 0; i < nerrs; i++)
+ for (j = 0; j < k; j++)
+ c[k * i + j] = d[k * src_err_list[i] + j];
+
+ // Recover data
+ ec_init_tables(k, nerrs, c, g_tbls);
+ for (i = 0; i < k; i++) {
+ FUNCTION_UNDER_TEST(TEST_LEN(m), k, nerrs, i, g_tbls, recov[i],
+ perf_update_buffs);
+ }
+ return 0;
+}
+
+int main(int argc, char *argv[])
+{
+ int i, j, check, m, k, nerrs;
+ void *buf;
+ u8 *temp_buffs[TEST_SOURCES], *buffs[TEST_SOURCES];
+ u8 *update_buffs[TEST_SOURCES];
+ u8 *perf_update_buffs[TEST_SOURCES];
+ u8 a[MMAX * KMAX];
+ u8 g_tbls[KMAX * TEST_SOURCES * 32], src_in_err[TEST_SOURCES];
+ u8 src_err_list[TEST_SOURCES], *recov[TEST_SOURCES];
+ struct perf start;
+
+ // Pick test parameters
+ k = 10;
+ m = k + VECT;
+ nerrs = VECT;
+ const u8 err_list[] = { 0, 2, 4, 5, 7, 8 };
+
+ printf(xstr(FUNCTION_UNDER_TEST) "_perf: %dx%d %d\n", m, TEST_LEN(m), nerrs);
+
+ if (m > MMAX || k > KMAX || nerrs > (m - k)) {
+ printf(" Input test parameter error\n");
+ return -1;
+ }
+
+ memcpy(src_err_list, err_list, nerrs);
+ memset(src_in_err, 0, TEST_SOURCES);
+ for (i = 0; i < nerrs; i++)
+ src_in_err[src_err_list[i]] = 1;
+
+ // Allocate the arrays
+ for (i = 0; i < m; i++) {
+ if (posix_memalign(&buf, 64, TEST_LEN(m))) {
+ printf("alloc error: Fail\n");
+ return -1;
+ }
+ buffs[i] = buf;
+ }
+
+ for (i = 0; i < (m - k); i++) {
+ if (posix_memalign(&buf, 64, TEST_LEN(m))) {
+ printf("alloc error: Fail\n");
+ return -1;
+ }
+ temp_buffs[i] = buf;
+ memset(temp_buffs[i], 0, TEST_LEN(m)); // initialize the destination buffer to be zero for update function
+ }
+
+ for (i = 0; i < TEST_SOURCES; i++) {
+ if (posix_memalign(&buf, 64, TEST_LEN(m))) {
+ printf("alloc error: Fail");
+ return -1;
+ }
+ update_buffs[i] = buf;
+ memset(update_buffs[i], 0, TEST_LEN(m)); // initialize the destination buffer to be zero for update function
+ }
+ for (i = 0; i < TEST_SOURCES; i++) {
+ if (posix_memalign(&buf, 64, TEST_LEN(m))) {
+ printf("alloc error: Fail");
+ return -1;
+ }
+ perf_update_buffs[i] = buf;
+ memset(perf_update_buffs[i], 0, TEST_LEN(m)); // initialize the destination buffer to be zero for update function
+ }
+
+ // Make random data
+ for (i = 0; i < k; i++)
+ for (j = 0; j < TEST_LEN(m); j++) {
+ buffs[i][j] = rand();
+ update_buffs[i][j] = buffs[i][j];
+ }
+
+ gf_gen_rs_matrix(a, m, k);
+
+ encode_update_test_ref(m, k, g_tbls, buffs, a);
+ encode_update_test(m, k, g_tbls, update_buffs, a);
+ for (i = 0; i < m - k; i++) {
+ if (0 != memcmp(update_buffs[k + i], buffs[k + i], TEST_LEN(m))) {
+ printf("\nupdate_buffs%d :", i);
+ dump(update_buffs[k + i], 25);
+ printf("buffs%d :", i);
+ dump(buffs[k + i], 25);
+ return -1;
+ }
+ }
+
+#ifdef DO_REF_PERF
+ // Start encode test
+ BENCHMARK(&start, BENCHMARK_TIME, encode_update_test_ref(m, k, g_tbls, buffs, a));
+ printf(xstr(REF_FUNCTION) TEST_TYPE_STR ": ");
+ perf_print(start, (long long)(TEST_LEN(m)) * (m));
+#endif
+
+ // Start encode test
+ BENCHMARK(&start, BENCHMARK_TIME,
+ encode_update_test(m, k, g_tbls, perf_update_buffs, a));
+ printf(xstr(FUNCTION_UNDER_TEST) TEST_TYPE_STR ": ");
+ perf_print(start, (long long)(TEST_LEN(m)) * (m));
+
+ // Start encode test
+ BENCHMARK(&start, BENCHMARK_TIME,
+ // Make parity vects
+ ec_init_tables(k, m - k, &a[k * k], g_tbls);
+ FUNCTION_UNDER_TEST(TEST_LEN(m), k, m - k, 0, g_tbls, perf_update_buffs[0],
+ &perf_update_buffs[k]));
+ printf(xstr(FUNCTION_UNDER_TEST) "_single_src" TEST_TYPE_STR ": ");
+ perf_print(start, (long long)(TEST_LEN(m)) * (m - k + 1));
+
+ // Start encode test
+ BENCHMARK(&start, BENCHMARK_TIME,
+ // Make parity vects
+ FUNCTION_UNDER_TEST(TEST_LEN(m), k, m - k, 0, g_tbls, perf_update_buffs[0],
+ &perf_update_buffs[k]));
+ printf(xstr(FUNCTION_UNDER_TEST) "_single_src_simple" TEST_TYPE_STR ": ");
+ perf_print(start, (long long)(TEST_LEN(m)) * (m - k + 1));
+
+ for (i = k; i < m; i++) {
+ memset(update_buffs[i], 0, TEST_LEN(m)); // initialize the destination buffer to be zero for update function
+ }
+ for (i = 0; i < k; i++) {
+ FUNCTION_UNDER_TEST(TEST_LEN(m), k, m - k, i, g_tbls, update_buffs[i],
+ &update_buffs[k]);
+ }
+
+ decode_test(m, k, update_buffs, recov, a, src_in_err, src_err_list,
+ nerrs, g_tbls, temp_buffs);
+ BENCHMARK(&start, BENCHMARK_TIME, check =
+ decode_test(m, k, update_buffs, recov, a, src_in_err, src_err_list,
+ nerrs, g_tbls, perf_update_buffs));
+ if (check) {
+ printf("BAD_MATRIX\n");
+ return -1;
+ }
+
+ for (i = 0; i < nerrs; i++) {
+ if (0 != memcmp(temp_buffs[i], update_buffs[src_err_list[i]], TEST_LEN(m))) {
+ printf("Fail error recovery (%d, %d, %d) - \n", m, k, nerrs);
+ return -1;
+ }
+ }
+
+ printf(xstr(FUNCTION_UNDER_TEST) "_decode" TEST_TYPE_STR ": ");
+ perf_print(start, (long long)(TEST_LEN(m)) * (k + nerrs));
+
+ printf("done all: Pass\n");
+ return 0;
+}
diff --git a/src/isa-l/erasure_code/erasure_code_update_test.c b/src/isa-l/erasure_code/erasure_code_update_test.c
new file mode 100644
index 000000000..f30a6a29b
--- /dev/null
+++ b/src/isa-l/erasure_code/erasure_code_update_test.c
@@ -0,0 +1,959 @@
+/**********************************************************************
+ Copyright(c) 2011-2015 Intel Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h> // for memset, memcmp
+#include "erasure_code.h"
+#include "types.h"
+
+#ifndef ALIGN_SIZE
+# define ALIGN_SIZE 16
+#endif
+
+//By default, test multibinary version
+#ifndef FUNCTION_UNDER_TEST
+# define FUNCTION_UNDER_TEST ec_encode_data_update
+# define REF_FUNCTION ec_encode_data
+#endif
+
+#define TEST_LEN 8192
+#define TEST_SIZE (TEST_LEN/2)
+
+#ifndef TEST_SOURCES
+# define TEST_SOURCES 127
+#endif
+#ifndef RANDOMS
+# define RANDOMS 200
+#endif
+
+#define MMAX TEST_SOURCES
+#define KMAX TEST_SOURCES
+
+#define EFENCE_TEST_MAX_SIZE 0x100
+
+#ifdef EC_ALIGNED_ADDR
+// Define power of 2 range to check ptr, len alignment
+# define PTR_ALIGN_CHK_B 0
+# define LEN_ALIGN_CHK_B 0 // 0 for aligned only
+#else
+// Define power of 2 range to check ptr, len alignment
+# define PTR_ALIGN_CHK_B ALIGN_SIZE
+# define LEN_ALIGN_CHK_B ALIGN_SIZE // 0 for aligned only
+#endif
+
+#ifndef TEST_SEED
+#define TEST_SEED 11
+#endif
+
+#define str(s) #s
+#define xstr(s) str(s)
+
+typedef unsigned char u8;
+
+void dump(unsigned char *buf, int len)
+{
+ int i;
+ for (i = 0; i < len;) {
+ printf(" %2x", 0xff & buf[i++]);
+ if (i % 32 == 0)
+ printf("\n");
+ }
+ printf("\n");
+}
+
+void dump_matrix(unsigned char **s, int k, int m)
+{
+ int i, j;
+ for (i = 0; i < k; i++) {
+ for (j = 0; j < m; j++) {
+ printf(" %2x", s[i][j]);
+ }
+ printf("\n");
+ }
+ printf("\n");
+}
+
+void dump_u8xu8(unsigned char *s, int k, int m)
+{
+ int i, j;
+ for (i = 0; i < k; i++) {
+ for (j = 0; j < m; j++) {
+ printf(" %2x", 0xff & s[j + (i * m)]);
+ }
+ printf("\n");
+ }
+ printf("\n");
+}
+
+// Generate Random errors
+static void gen_err_list(unsigned char *src_err_list,
+ unsigned char *src_in_err, int *pnerrs, int *pnsrcerrs, int k, int m)
+{
+ int i, err;
+ int nerrs = 0, nsrcerrs = 0;
+
+ for (i = 0, nerrs = 0, nsrcerrs = 0; i < m && nerrs < m - k; i++) {
+ err = 1 & rand();
+ src_in_err[i] = err;
+ if (err) {
+ src_err_list[nerrs++] = i;
+ if (i < k) {
+ nsrcerrs++;
+ }
+ }
+ }
+ if (nerrs == 0) { // should have at least one error
+ while ((err = (rand() % KMAX)) >= m) ;
+ src_err_list[nerrs++] = err;
+ src_in_err[err] = 1;
+ if (err < k)
+ nsrcerrs = 1;
+ }
+ *pnerrs = nerrs;
+ *pnsrcerrs = nsrcerrs;
+ return;
+}
+
+#define NO_INVERT_MATRIX -2
+// Generate decode matrix from encode matrix
+static int gf_gen_decode_matrix(unsigned char *encode_matrix,
+ unsigned char *decode_matrix,
+ unsigned char *invert_matrix,
+ unsigned int *decode_index,
+ unsigned char *src_err_list,
+ unsigned char *src_in_err,
+ int nerrs, int nsrcerrs, int k, int m)
+{
+ int i, j, p;
+ int r;
+ unsigned char *backup, *b, s;
+ int incr = 0;
+
+ b = malloc(MMAX * KMAX);
+ backup = malloc(MMAX * KMAX);
+
+ if (b == NULL || backup == NULL) {
+ printf("Test failure! Error with malloc\n");
+ free(b);
+ free(backup);
+ return -1;
+ }
+ // Construct matrix b by removing error rows
+ for (i = 0, r = 0; i < k; i++, r++) {
+ while (src_in_err[r])
+ r++;
+ for (j = 0; j < k; j++) {
+ b[k * i + j] = encode_matrix[k * r + j];
+ backup[k * i + j] = encode_matrix[k * r + j];
+ }
+ decode_index[i] = r;
+ }
+ incr = 0;
+ while (gf_invert_matrix(b, invert_matrix, k) < 0) {
+ if (nerrs == (m - k)) {
+ free(b);
+ free(backup);
+ printf("BAD MATRIX\n");
+ return NO_INVERT_MATRIX;
+ }
+ incr++;
+ memcpy(b, backup, MMAX * KMAX);
+ for (i = nsrcerrs; i < nerrs - nsrcerrs; i++) {
+ if (src_err_list[i] == (decode_index[k - 1] + incr)) {
+ // skip the erased parity line
+ incr++;
+ continue;
+ }
+ }
+ if (decode_index[k - 1] + incr >= m) {
+ free(b);
+ free(backup);
+ printf("BAD MATRIX\n");
+ return NO_INVERT_MATRIX;
+ }
+ decode_index[k - 1] += incr;
+ for (j = 0; j < k; j++)
+ b[k * (k - 1) + j] = encode_matrix[k * decode_index[k - 1] + j];
+
+ };
+
+ for (i = 0; i < nsrcerrs; i++) {
+ for (j = 0; j < k; j++) {
+ decode_matrix[k * i + j] = invert_matrix[k * src_err_list[i] + j];
+ }
+ }
+ /* src_err_list from encode_matrix * invert of b for parity decoding */
+ for (p = nsrcerrs; p < nerrs; p++) {
+ for (i = 0; i < k; i++) {
+ s = 0;
+ for (j = 0; j < k; j++)
+ s ^= gf_mul(invert_matrix[j * k + i],
+ encode_matrix[k * src_err_list[p] + j]);
+
+ decode_matrix[k * p + i] = s;
+ }
+ }
+ free(b);
+ free(backup);
+ return 0;
+}
+
+int main(int argc, char *argv[])
+{
+ int re = 0;
+ int i, j, p, rtest, m, k;
+ int nerrs, nsrcerrs;
+ void *buf;
+ unsigned int decode_index[MMAX];
+ unsigned char *temp_buffs[TEST_SOURCES], *buffs[TEST_SOURCES];
+ unsigned char *update_buffs[TEST_SOURCES];
+ unsigned char *encode_matrix, *decode_matrix, *invert_matrix, *g_tbls;
+ unsigned char src_in_err[TEST_SOURCES], src_err_list[TEST_SOURCES];
+ unsigned char *recov[TEST_SOURCES];
+
+ int rows, align, size;
+ unsigned char *efence_buffs[TEST_SOURCES];
+ unsigned char *efence_update_buffs[TEST_SOURCES];
+ unsigned int offset;
+ u8 *ubuffs[TEST_SOURCES];
+ u8 *update_ubuffs[TEST_SOURCES];
+ u8 *temp_ubuffs[TEST_SOURCES];
+
+ printf("test " xstr(FUNCTION_UNDER_TEST) ": %dx%d ", TEST_SOURCES, TEST_LEN);
+ srand(TEST_SEED);
+
+ // Allocate the arrays
+ for (i = 0; i < TEST_SOURCES; i++) {
+ if (posix_memalign(&buf, 64, TEST_LEN)) {
+ printf("alloc error: Fail");
+ return -1;
+ }
+ buffs[i] = buf;
+ }
+
+ for (i = 0; i < TEST_SOURCES; i++) {
+ if (posix_memalign(&buf, 64, TEST_LEN)) {
+ printf("alloc error: Fail");
+ return -1;
+ }
+ temp_buffs[i] = buf;
+ memset(temp_buffs[i], 0, TEST_LEN); // initialize the destination buffer to be zero for update function
+ }
+
+ for (i = 0; i < TEST_SOURCES; i++) {
+ if (posix_memalign(&buf, 64, TEST_LEN)) {
+ printf("alloc error: Fail");
+ return -1;
+ }
+ update_buffs[i] = buf;
+ memset(update_buffs[i], 0, TEST_LEN); // initialize the destination buffer to be zero for update function
+ }
+ // Test erasure code by encode and recovery
+
+ encode_matrix = malloc(MMAX * KMAX);
+ decode_matrix = malloc(MMAX * KMAX);
+ invert_matrix = malloc(MMAX * KMAX);
+ g_tbls = malloc(KMAX * TEST_SOURCES * 32);
+ if (encode_matrix == NULL || decode_matrix == NULL
+ || invert_matrix == NULL || g_tbls == NULL) {
+ printf("Test failure! Error with malloc\n");
+ return -1;
+ }
+ // Pick a first test
+ m = 14;
+ k = 10;
+ if (m > MMAX || k > KMAX)
+ return -1;
+
+ // Make random data
+ for (i = 0; i < k; i++) {
+ for (j = 0; j < TEST_LEN; j++) {
+ buffs[i][j] = rand();
+ update_buffs[i][j] = buffs[i][j];
+ }
+ }
+
+ // Generate encode matrix encode_matrix
+ // The matrix generated by gf_gen_rs_matrix
+ // is not always invertable.
+ gf_gen_rs_matrix(encode_matrix, m, k);
+
+ // Generate g_tbls from encode matrix encode_matrix
+ ec_init_tables(k, m - k, &encode_matrix[k * k], g_tbls);
+
+ // Perform matrix dot_prod for EC encoding
+ // using g_tbls from encode matrix encode_matrix
+ REF_FUNCTION(TEST_LEN, k, m - k, g_tbls, buffs, &buffs[k]);
+ for (i = 0; i < k; i++) {
+ FUNCTION_UNDER_TEST(TEST_LEN, k, m - k, i, g_tbls, update_buffs[i],
+ &update_buffs[k]);
+ }
+ for (i = 0; i < m - k; i++) {
+ if (0 != memcmp(update_buffs[k + i], buffs[k + i], TEST_LEN)) {
+ printf("\nupdate_buffs%d :", i);
+ dump(update_buffs[k + i], 25);
+ printf("buffs%d :", i);
+ dump(buffs[k + i], 25);
+ return -1;
+ }
+ }
+
+ // Choose random buffers to be in erasure
+ memset(src_in_err, 0, TEST_SOURCES);
+ gen_err_list(src_err_list, src_in_err, &nerrs, &nsrcerrs, k, m);
+
+ // Generate decode matrix
+ re = gf_gen_decode_matrix(encode_matrix, decode_matrix,
+ invert_matrix, decode_index, src_err_list, src_in_err,
+ nerrs, nsrcerrs, k, m);
+ if (re != 0) {
+ printf("Fail to gf_gen_decode_matrix\n");
+ return -1;
+ }
+ // Pack recovery array as list of valid sources
+ // Its order must be the same as the order
+ // to generate matrix b in gf_gen_decode_matrix
+ for (i = 0; i < k; i++) {
+ recov[i] = update_buffs[decode_index[i]];
+ }
+
+ // Recover data
+ ec_init_tables(k, nerrs, decode_matrix, g_tbls);
+ REF_FUNCTION(TEST_LEN, k, nerrs, g_tbls, recov, &temp_buffs[k]);
+ for (i = 0; i < nerrs; i++) {
+
+ if (0 != memcmp(temp_buffs[k + i], update_buffs[src_err_list[i]], TEST_LEN)) {
+ printf("Fail error recovery (%d, %d, %d)\n", m, k, nerrs);
+ printf(" - erase list = ");
+ for (j = 0; j < nerrs; j++)
+ printf(" %d", src_err_list[j]);
+ printf(" - Index = ");
+ for (p = 0; p < k; p++)
+ printf(" %d", decode_index[p]);
+ printf("\nencode_matrix:\n");
+ dump_u8xu8((u8 *) encode_matrix, m, k);
+ printf("inv b:\n");
+ dump_u8xu8((u8 *) invert_matrix, k, k);
+ printf("\ndecode_matrix:\n");
+ dump_u8xu8((u8 *) decode_matrix, m, k);
+ printf("recov %d:", src_err_list[i]);
+ dump(temp_buffs[k + i], 25);
+ printf("orig :");
+ dump(update_buffs[src_err_list[i]], 25);
+ return -1;
+ }
+ }
+ putchar('.');
+
+ // Pick a first test
+ m = 7;
+ k = 5;
+ if (m > MMAX || k > KMAX)
+ return -1;
+
+ // Zero the destination buffer for update function
+ for (i = k; i < TEST_SOURCES; i++) {
+ memset(buffs[i], 0, TEST_LEN);
+ memset(update_buffs[i], 0, TEST_LEN);
+ }
+ // Make random data
+ for (i = 0; i < k; i++) {
+ for (j = 0; j < TEST_LEN; j++) {
+ buffs[i][j] = rand();
+ update_buffs[i][j] = buffs[i][j];
+ }
+ }
+
+ // The matrix generated by gf_gen_cauchy1_matrix
+ // is always invertable.
+ gf_gen_cauchy1_matrix(encode_matrix, m, k);
+
+ // Generate g_tbls from encode matrix encode_matrix
+ ec_init_tables(k, m - k, &encode_matrix[k * k], g_tbls);
+
+ // Perform matrix dot_prod for EC encoding
+ // using g_tbls from encode matrix encode_matrix
+ REF_FUNCTION(TEST_LEN, k, m - k, g_tbls, buffs, &buffs[k]);
+ for (i = 0; i < k; i++) {
+ FUNCTION_UNDER_TEST(TEST_LEN, k, m - k, i, g_tbls, update_buffs[i],
+ &update_buffs[k]);
+ }
+ for (i = 0; i < m - k; i++) {
+ if (0 != memcmp(update_buffs[k + i], buffs[k + i], TEST_LEN)) {
+ printf("\nupdate_buffs%d :", i);
+ dump(update_buffs[k + i], 25);
+ printf("buffs%d :", i);
+ dump(buffs[k + i], 25);
+ return -1;
+ }
+ }
+
+ // Choose random buffers to be in erasure
+ memset(src_in_err, 0, TEST_SOURCES);
+ gen_err_list(src_err_list, src_in_err, &nerrs, &nsrcerrs, k, m);
+
+ // Generate decode matrix
+ re = gf_gen_decode_matrix(encode_matrix, decode_matrix,
+ invert_matrix, decode_index, src_err_list, src_in_err,
+ nerrs, nsrcerrs, k, m);
+ if (re != 0) {
+ printf("Fail to gf_gen_decode_matrix\n");
+ return -1;
+ }
+ // Pack recovery array as list of valid sources
+ // Its order must be the same as the order
+ // to generate matrix b in gf_gen_decode_matrix
+ for (i = 0; i < k; i++) {
+ recov[i] = update_buffs[decode_index[i]];
+ }
+
+ // Recover data
+ for (i = 0; i < TEST_SOURCES; i++) {
+ memset(temp_buffs[i], 0, TEST_LEN);
+ }
+ ec_init_tables(k, nerrs, decode_matrix, g_tbls);
+ for (i = 0; i < k; i++) {
+ FUNCTION_UNDER_TEST(TEST_LEN, k, nerrs, i, g_tbls, recov[i], &temp_buffs[k]);
+ }
+ for (i = 0; i < nerrs; i++) {
+
+ if (0 != memcmp(temp_buffs[k + i], update_buffs[src_err_list[i]], TEST_LEN)) {
+ printf("Fail error recovery (%d, %d, %d)\n", m, k, nerrs);
+ printf(" - erase list = ");
+ for (j = 0; j < nerrs; j++)
+ printf(" %d", src_err_list[j]);
+ printf(" - Index = ");
+ for (p = 0; p < k; p++)
+ printf(" %d", decode_index[p]);
+ printf("\nencode_matrix:\n");
+ dump_u8xu8((u8 *) encode_matrix, m, k);
+ printf("inv b:\n");
+ dump_u8xu8((u8 *) invert_matrix, k, k);
+ printf("\ndecode_matrix:\n");
+ dump_u8xu8((u8 *) decode_matrix, m, k);
+ printf("recov %d:", src_err_list[i]);
+ dump(temp_buffs[k + i], 25);
+ printf("orig :");
+ dump(update_buffs[src_err_list[i]], 25);
+ return -1;
+ }
+ }
+ putchar('.');
+
+ // Do more random tests
+ for (rtest = 0; rtest < RANDOMS; rtest++) {
+ while ((m = (rand() % MMAX)) < 2) ;
+ while ((k = (rand() % KMAX)) >= m || k < 1) ;
+
+ if (m > MMAX || k > KMAX)
+ continue;
+
+ // Zero the destination buffer for update function
+ for (i = k; i < TEST_SOURCES; i++) {
+ memset(buffs[i], 0, TEST_LEN);
+ memset(update_buffs[i], 0, TEST_LEN);
+ }
+ // Make random data
+ for (i = 0; i < k; i++) {
+ for (j = 0; j < TEST_LEN; j++) {
+ buffs[i][j] = rand();
+ update_buffs[i][j] = buffs[i][j];
+ }
+ }
+
+ // The matrix generated by gf_gen_cauchy1_matrix
+ // is always invertable.
+ gf_gen_cauchy1_matrix(encode_matrix, m, k);
+
+ // Make parity vects
+ // Generate g_tbls from encode matrix a
+ ec_init_tables(k, m - k, &encode_matrix[k * k], g_tbls);
+ // Perform matrix dot_prod for EC encoding
+ // using g_tbls from encode matrix a
+ REF_FUNCTION(TEST_LEN, k, m - k, g_tbls, buffs, &buffs[k]);
+ for (i = 0; i < k; i++) {
+ FUNCTION_UNDER_TEST(TEST_LEN, k, m - k, i, g_tbls, update_buffs[i],
+ &update_buffs[k]);
+ }
+ for (i = 0; i < m - k; i++) {
+ if (0 != memcmp(update_buffs[k + i], buffs[k + i], TEST_LEN)) {
+ printf("\nupdate_buffs%d :", i);
+ dump(update_buffs[k + i], 25);
+ printf("buffs%d :", i);
+ dump(buffs[k + i], 25);
+ return -1;
+ }
+ }
+
+ // Random errors
+ memset(src_in_err, 0, TEST_SOURCES);
+ gen_err_list(src_err_list, src_in_err, &nerrs, &nsrcerrs, k, m);
+
+ // Generate decode matrix
+ re = gf_gen_decode_matrix(encode_matrix, decode_matrix,
+ invert_matrix, decode_index, src_err_list,
+ src_in_err, nerrs, nsrcerrs, k, m);
+ if (re != 0) {
+ printf("Fail to gf_gen_decode_matrix\n");
+ return -1;
+ }
+ // Pack recovery array as list of valid sources
+ // Its order must be the same as the order
+ // to generate matrix b in gf_gen_decode_matrix
+ for (i = 0; i < k; i++) {
+ recov[i] = update_buffs[decode_index[i]];
+ }
+
+ // Recover data
+ for (i = 0; i < TEST_SOURCES; i++) {
+ memset(temp_buffs[i], 0, TEST_LEN);
+ }
+ ec_init_tables(k, nerrs, decode_matrix, g_tbls);
+ for (i = 0; i < k; i++) {
+ FUNCTION_UNDER_TEST(TEST_LEN, k, nerrs, i, g_tbls, recov[i],
+ &temp_buffs[k]);
+ }
+
+ for (i = 0; i < nerrs; i++) {
+
+ if (0 !=
+ memcmp(temp_buffs[k + i], update_buffs[src_err_list[i]],
+ TEST_LEN)) {
+ printf("Fail error recovery (%d, %d, %d) - ", m, k, nerrs);
+ printf(" - erase list = ");
+ for (j = 0; j < nerrs; j++)
+ printf(" %d", src_err_list[j]);
+ printf(" - Index = ");
+ for (p = 0; p < k; p++)
+ printf(" %d", decode_index[p]);
+ printf("\nencode_matrix:\n");
+ dump_u8xu8((u8 *) encode_matrix, m, k);
+ printf("inv b:\n");
+ dump_u8xu8((u8 *) invert_matrix, k, k);
+ printf("\ndecode_matrix:\n");
+ dump_u8xu8((u8 *) decode_matrix, m, k);
+ printf("orig data:\n");
+ dump_matrix(update_buffs, m, 25);
+ printf("orig :");
+ dump(update_buffs[src_err_list[i]], 25);
+ printf("recov %d:", src_err_list[i]);
+ dump(temp_buffs[k + i], 25);
+ return -1;
+ }
+ }
+ putchar('.');
+ }
+
+ // Run tests at end of buffer for Electric Fence
+ k = 16;
+ align = (LEN_ALIGN_CHK_B != 0) ? 1 : ALIGN_SIZE;
+ if (k > KMAX)
+ return -1;
+
+ for (rows = 1; rows <= 16; rows++) {
+ m = k + rows;
+ if (m > MMAX)
+ return -1;
+
+ for (i = k; i < TEST_SOURCES; i++) {
+ memset(buffs[i], 0, TEST_LEN);
+ memset(update_buffs[i], 0, TEST_LEN);
+ }
+ // Make random data
+ for (i = 0; i < k; i++) {
+ for (j = 0; j < TEST_LEN; j++) {
+ buffs[i][j] = rand();
+ update_buffs[i][j] = buffs[i][j];
+ }
+ }
+
+ for (size = 0; size <= EFENCE_TEST_MAX_SIZE; size += align) {
+ for (i = 0; i < m; i++) { // Line up TEST_SIZE from end
+ efence_buffs[i] = buffs[i] + TEST_LEN - size;
+ efence_update_buffs[i] = update_buffs[i] + TEST_LEN - size;
+ }
+ // Zero the destination buffer for update function
+ for (i = k; i < m; i++) {
+ memset(efence_buffs[i], 0, size);
+ memset(efence_update_buffs[i], 0, size);
+ }
+
+ // The matrix generated by gf_gen_cauchy1_matrix
+ // is always invertable.
+ gf_gen_cauchy1_matrix(encode_matrix, m, k);
+
+ // Make parity vects
+ // Generate g_tbls from encode matrix a
+ ec_init_tables(k, m - k, &encode_matrix[k * k], g_tbls);
+ // Perform matrix dot_prod for EC encoding
+ // using g_tbls from encode matrix a
+ REF_FUNCTION(size, k, m - k, g_tbls, efence_buffs, &efence_buffs[k]);
+ for (i = 0; i < k; i++) {
+ FUNCTION_UNDER_TEST(size, k, m - k, i, g_tbls,
+ efence_update_buffs[i],
+ &efence_update_buffs[k]);
+ }
+ for (i = 0; i < m - k; i++) {
+ if (0 !=
+ memcmp(efence_update_buffs[k + i], efence_buffs[k + i],
+ size)) {
+ printf("\nefence_update_buffs%d :", i);
+ dump(efence_update_buffs[k + i], 25);
+ printf("efence_buffs%d :", i);
+ dump(efence_buffs[k + i], 25);
+ return -1;
+ }
+ }
+
+ // Random errors
+ memset(src_in_err, 0, TEST_SOURCES);
+ gen_err_list(src_err_list, src_in_err, &nerrs, &nsrcerrs, k, m);
+
+ // Generate decode matrix
+ re = gf_gen_decode_matrix(encode_matrix, decode_matrix,
+ invert_matrix, decode_index, src_err_list,
+ src_in_err, nerrs, nsrcerrs, k, m);
+ if (re != 0) {
+ printf("Fail to gf_gen_decode_matrix\n");
+ return -1;
+ }
+ // Pack recovery array as list of valid sources
+ // Its order must be the same as the order
+ // to generate matrix b in gf_gen_decode_matrix
+ for (i = 0; i < k; i++) {
+ recov[i] = efence_update_buffs[decode_index[i]];
+ }
+
+ // Recover data
+ for (i = 0; i < TEST_SOURCES; i++) {
+ memset(temp_buffs[i], 0, TEST_LEN);
+ }
+ ec_init_tables(k, nerrs, decode_matrix, g_tbls);
+ for (i = 0; i < k; i++) {
+ FUNCTION_UNDER_TEST(size, k, nerrs, i, g_tbls, recov[i],
+ &temp_buffs[k]);
+ }
+
+ for (i = 0; i < nerrs; i++) {
+
+ if (0 !=
+ memcmp(temp_buffs[k + i],
+ efence_update_buffs[src_err_list[i]], size)) {
+ printf("Efence: Fail error recovery (%d, %d, %d)\n", m,
+ k, nerrs);
+
+ printf("size = %d\n", size);
+
+ printf("Test erase list = ");
+ for (j = 0; j < nerrs; j++)
+ printf(" %d", src_err_list[j]);
+ printf(" - Index = ");
+ for (p = 0; p < k; p++)
+ printf(" %d", decode_index[p]);
+ printf("\nencode_matrix:\n");
+ dump_u8xu8((u8 *) encode_matrix, m, k);
+ printf("inv b:\n");
+ dump_u8xu8((u8 *) invert_matrix, k, k);
+ printf("\ndecode_matrix:\n");
+ dump_u8xu8((u8 *) decode_matrix, m, k);
+
+ printf("recov %d:", src_err_list[i]);
+ dump(temp_buffs[k + i], align);
+ printf("orig :");
+ dump(efence_update_buffs[src_err_list[i]], align);
+ return -1;
+ }
+ }
+ }
+ putchar('.');
+
+ }
+
+ // Test rand ptr alignment if available
+
+ for (rtest = 0; rtest < RANDOMS; rtest++) {
+ while ((m = (rand() % MMAX)) < 2) ;
+ while ((k = (rand() % KMAX)) >= m || k < 1) ;
+
+ if (m > MMAX || k > KMAX)
+ continue;
+
+ size = (TEST_LEN - PTR_ALIGN_CHK_B) & ~15;
+
+ offset = (PTR_ALIGN_CHK_B != 0) ? 1 : PTR_ALIGN_CHK_B;
+ // Add random offsets
+ for (i = 0; i < m; i++) {
+ memset(buffs[i], 0, TEST_LEN); // zero pad to check write-over
+ memset(update_buffs[i], 0, TEST_LEN); // zero pad to check write-over
+ memset(temp_buffs[i], 0, TEST_LEN); // zero pad to check write-over
+ ubuffs[i] = buffs[i] + (rand() & (PTR_ALIGN_CHK_B - offset));
+ update_ubuffs[i] =
+ update_buffs[i] + (rand() & (PTR_ALIGN_CHK_B - offset));
+ temp_ubuffs[i] = temp_buffs[i] + (rand() & (PTR_ALIGN_CHK_B - offset));
+ }
+
+ // Zero the destination buffer for update function
+ for (i = k; i < m; i++) {
+ memset(ubuffs[i], 0, size);
+ memset(update_ubuffs[i], 0, size);
+ }
+ // Make random data
+ for (i = 0; i < k; i++) {
+ for (j = 0; j < size; j++) {
+ ubuffs[i][j] = rand();
+ update_ubuffs[i][j] = ubuffs[i][j];
+ }
+ }
+
+ // The matrix generated by gf_gen_cauchy1_matrix
+ // is always invertable.
+ gf_gen_cauchy1_matrix(encode_matrix, m, k);
+
+ // Make parity vects
+ // Generate g_tbls from encode matrix a
+ ec_init_tables(k, m - k, &encode_matrix[k * k], g_tbls);
+ // Perform matrix dot_prod for EC encoding
+ // using g_tbls from encode matrix a
+ REF_FUNCTION(size, k, m - k, g_tbls, ubuffs, &ubuffs[k]);
+ for (i = 0; i < k; i++) {
+ FUNCTION_UNDER_TEST(size, k, m - k, i, g_tbls, update_ubuffs[i],
+ &update_ubuffs[k]);
+ }
+ for (i = 0; i < m - k; i++) {
+ if (0 != memcmp(update_ubuffs[k + i], ubuffs[k + i], size)) {
+ printf("\nupdate_ubuffs%d :", i);
+ dump(update_ubuffs[k + i], 25);
+ printf("ubuffs%d :", i);
+ dump(ubuffs[k + i], 25);
+ return -1;
+ }
+ }
+
+ // Random errors
+ memset(src_in_err, 0, TEST_SOURCES);
+ gen_err_list(src_err_list, src_in_err, &nerrs, &nsrcerrs, k, m);
+
+ // Generate decode matrix
+ re = gf_gen_decode_matrix(encode_matrix, decode_matrix,
+ invert_matrix, decode_index, src_err_list,
+ src_in_err, nerrs, nsrcerrs, k, m);
+ if (re != 0) {
+ printf("Fail to gf_gen_decode_matrix\n");
+ return -1;
+ }
+ // Pack recovery array as list of valid sources
+ // Its order must be the same as the order
+ // to generate matrix b in gf_gen_decode_matrix
+ for (i = 0; i < k; i++) {
+ recov[i] = update_ubuffs[decode_index[i]];
+ }
+
+ // Recover data
+ for (i = 0; i < m; i++) {
+ memset(temp_ubuffs[i], 0, size);
+ }
+ ec_init_tables(k, nerrs, decode_matrix, g_tbls);
+ for (i = 0; i < k; i++) {
+ FUNCTION_UNDER_TEST(size, k, nerrs, i, g_tbls, recov[i],
+ &temp_ubuffs[k]);
+ }
+
+ for (i = 0; i < nerrs; i++) {
+
+ if (0 !=
+ memcmp(temp_ubuffs[k + i], update_ubuffs[src_err_list[i]], size)) {
+ printf("Fail error recovery (%d, %d, %d) - ", m, k, nerrs);
+ printf(" - erase list = ");
+ for (j = 0; j < nerrs; j++)
+ printf(" %d", src_err_list[j]);
+ printf(" - Index = ");
+ for (p = 0; p < k; p++)
+ printf(" %d", decode_index[p]);
+ printf("\nencode_matrix:\n");
+ dump_u8xu8((unsigned char *)encode_matrix, m, k);
+ printf("inv b:\n");
+ dump_u8xu8((unsigned char *)invert_matrix, k, k);
+ printf("\ndecode_matrix:\n");
+ dump_u8xu8((unsigned char *)decode_matrix, m, k);
+ printf("orig data:\n");
+ dump_matrix(update_ubuffs, m, 25);
+ printf("orig :");
+ dump(update_ubuffs[src_err_list[i]], 25);
+ printf("recov %d:", src_err_list[i]);
+ dump(temp_ubuffs[k + i], 25);
+ return -1;
+ }
+ }
+
+ // Confirm that padding around dests is unchanged
+ memset(temp_buffs[0], 0, PTR_ALIGN_CHK_B); // Make reference zero buff
+
+ for (i = 0; i < m; i++) {
+
+ offset = update_ubuffs[i] - update_buffs[i];
+
+ if (memcmp(update_buffs[i], temp_buffs[0], offset)) {
+ printf("Fail rand ualign encode pad start\n");
+ return -1;
+ }
+ if (memcmp
+ (update_buffs[i] + offset + size, temp_buffs[0],
+ PTR_ALIGN_CHK_B - offset)) {
+ printf("Fail rand ualign encode pad end\n");
+ return -1;
+ }
+ }
+
+ for (i = 0; i < nerrs; i++) {
+
+ offset = temp_ubuffs[k + i] - temp_buffs[k + i];
+ if (memcmp(temp_buffs[k + i], temp_buffs[0], offset)) {
+ printf("Fail rand ualign decode pad start\n");
+ return -1;
+ }
+ if (memcmp
+ (temp_buffs[k + i] + offset + size, temp_buffs[0],
+ PTR_ALIGN_CHK_B - offset)) {
+ printf("Fail rand ualign decode pad end\n");
+ return -1;
+ }
+ }
+
+ putchar('.');
+ }
+
+ // Test size alignment
+
+ align = (LEN_ALIGN_CHK_B != 0) ? 13 : ALIGN_SIZE;
+
+ for (size = TEST_LEN; size >= 0; size -= align) {
+ while ((m = (rand() % MMAX)) < 2) ;
+ while ((k = (rand() % KMAX)) >= m || k < 1) ;
+
+ if (m > MMAX || k > KMAX)
+ continue;
+
+ // Zero the destination buffer for update function
+ for (i = k; i < TEST_SOURCES; i++) {
+ memset(buffs[i], 0, size);
+ memset(update_buffs[i], 0, size);
+ }
+ // Make random data
+ for (i = 0; i < k; i++) {
+ for (j = 0; j < size; j++) {
+ buffs[i][j] = rand();
+ update_buffs[i][j] = buffs[i][j];
+ }
+ }
+
+ // The matrix generated by gf_gen_cauchy1_matrix
+ // is always invertable.
+ gf_gen_cauchy1_matrix(encode_matrix, m, k);
+
+ // Make parity vects
+ // Generate g_tbls from encode matrix a
+ ec_init_tables(k, m - k, &encode_matrix[k * k], g_tbls);
+ // Perform matrix dot_prod for EC encoding
+ // using g_tbls from encode matrix a
+ REF_FUNCTION(size, k, m - k, g_tbls, buffs, &buffs[k]);
+ for (i = 0; i < k; i++) {
+ FUNCTION_UNDER_TEST(size, k, m - k, i, g_tbls, update_buffs[i],
+ &update_buffs[k]);
+ }
+ for (i = 0; i < m - k; i++) {
+ if (0 != memcmp(update_buffs[k + i], buffs[k + i], size)) {
+ printf("\nupdate_buffs%d (size=%d) :", i, size);
+ dump(update_buffs[k + i], 25);
+ printf("buffs%d (size=%d) :", i, size);
+ dump(buffs[k + i], 25);
+ return -1;
+ }
+ }
+
+ // Random errors
+ memset(src_in_err, 0, TEST_SOURCES);
+ gen_err_list(src_err_list, src_in_err, &nerrs, &nsrcerrs, k, m);
+ // Generate decode matrix
+ re = gf_gen_decode_matrix(encode_matrix, decode_matrix,
+ invert_matrix, decode_index, src_err_list,
+ src_in_err, nerrs, nsrcerrs, k, m);
+ if (re != 0) {
+ printf("Fail to gf_gen_decode_matrix\n");
+ return -1;
+ }
+ // Pack recovery array as list of valid sources
+ // Its order must be the same as the order
+ // to generate matrix b in gf_gen_decode_matrix
+ for (i = 0; i < k; i++) {
+ recov[i] = update_buffs[decode_index[i]];
+ }
+
+ // Recover data
+ for (i = 0; i < TEST_SOURCES; i++) {
+ memset(temp_buffs[i], 0, TEST_LEN);
+ }
+ ec_init_tables(k, nerrs, decode_matrix, g_tbls);
+ for (i = 0; i < k; i++) {
+ FUNCTION_UNDER_TEST(size, k, nerrs, i, g_tbls, recov[i],
+ &temp_buffs[k]);
+ }
+
+ for (i = 0; i < nerrs; i++) {
+
+ if (0 !=
+ memcmp(temp_buffs[k + i], update_buffs[src_err_list[i]], size)) {
+ printf("Fail error recovery (%d, %d, %d) - ", m, k, nerrs);
+ printf(" - erase list = ");
+ for (j = 0; j < nerrs; j++)
+ printf(" %d", src_err_list[j]);
+ printf(" - Index = ");
+ for (p = 0; p < k; p++)
+ printf(" %d", decode_index[p]);
+ printf("\nencode_matrix:\n");
+ dump_u8xu8((unsigned char *)encode_matrix, m, k);
+ printf("inv b:\n");
+ dump_u8xu8((unsigned char *)invert_matrix, k, k);
+ printf("\ndecode_matrix:\n");
+ dump_u8xu8((unsigned char *)decode_matrix, m, k);
+ printf("orig data:\n");
+ dump_matrix(update_buffs, m, 25);
+ printf("orig :");
+ dump(update_buffs[src_err_list[i]], 25);
+ printf("recov %d:", src_err_list[i]);
+ dump(temp_buffs[k + i], 25);
+ return -1;
+ }
+ }
+ putchar('.');
+ }
+
+ printf("done EC tests: Pass\n");
+ return 0;
+}
diff --git a/src/isa-l/erasure_code/gen_rs_matrix_limits.c b/src/isa-l/erasure_code/gen_rs_matrix_limits.c
new file mode 100644
index 000000000..85061484b
--- /dev/null
+++ b/src/isa-l/erasure_code/gen_rs_matrix_limits.c
@@ -0,0 +1,115 @@
+#include <string.h>
+#include <stdint.h>
+#include <stdio.h>
+#include "erasure_code.h"
+
+#define MAX_CHECK 63 /* Size is limited by using uint64_t to represent subsets */
+#define M_MAX 0x20
+#define K_MAX 0x10
+#define ROWS M_MAX
+#define COLS K_MAX
+
+static inline int min(int a, int b)
+{
+ if (a <= b)
+ return a;
+ else
+ return b;
+}
+
+void gen_sub_matrix(unsigned char *out_matrix, int dim, unsigned char *in_matrix, int rows,
+ int cols, uint64_t row_indicator, uint64_t col_indicator)
+{
+ int i, j, r, s;
+
+ for (i = 0, r = 0; i < rows; i++) {
+ if (!(row_indicator & ((uint64_t) 1 << i)))
+ continue;
+
+ for (j = 0, s = 0; j < cols; j++) {
+ if (!(col_indicator & ((uint64_t) 1 << j)))
+ continue;
+ out_matrix[dim * r + s] = in_matrix[cols * i + j];
+ s++;
+ }
+ r++;
+ }
+}
+
+/* Gosper's Hack */
+uint64_t next_subset(uint64_t * subset, uint64_t element_count, uint64_t subsize)
+{
+ uint64_t tmp1 = *subset & -*subset;
+ uint64_t tmp2 = *subset + tmp1;
+ *subset = (((*subset ^ tmp2) >> 2) / tmp1) | tmp2;
+ if (*subset & (((uint64_t) 1 << element_count))) {
+ /* Overflow on last subset */
+ *subset = ((uint64_t) 1 << subsize) - 1;
+ return 1;
+ }
+
+ return 0;
+}
+
+int are_submatrices_singular(unsigned char *vmatrix, int rows, int cols)
+{
+ unsigned char matrix[COLS * COLS];
+ unsigned char invert_matrix[COLS * COLS];
+ uint64_t row_indicator, col_indicator, subset_init, subsize;
+
+ /* Check all square subsize x subsize submatrices of the rows x cols
+ * vmatrix for singularity*/
+ for (subsize = 1; subsize <= min(rows, cols); subsize++) {
+ subset_init = (1 << subsize) - 1;
+ col_indicator = subset_init;
+ do {
+ row_indicator = subset_init;
+ do {
+ gen_sub_matrix(matrix, subsize, vmatrix, rows,
+ cols, row_indicator, col_indicator);
+ if (gf_invert_matrix(matrix, invert_matrix, subsize))
+ return 1;
+
+ } while (next_subset(&row_indicator, rows, subsize) == 0);
+ } while (next_subset(&col_indicator, cols, subsize) == 0);
+ }
+
+ return 0;
+}
+
+int main(int argc, char **argv)
+{
+ unsigned char vmatrix[(ROWS + COLS) * COLS];
+ int rows, cols;
+
+ if (K_MAX > MAX_CHECK) {
+ printf("K_MAX too large for this test\n");
+ return 0;
+ }
+ if (M_MAX > MAX_CHECK) {
+ printf("M_MAX too large for this test\n");
+ return 0;
+ }
+ if (M_MAX < K_MAX) {
+ printf("M_MAX must be smaller than K_MAX");
+ return 0;
+ }
+
+ printf("Checking gen_rs_matrix for k <= %d and m <= %d.\n", K_MAX, M_MAX);
+ printf("gen_rs_matrix creates erasure codes for:\n");
+
+ for (cols = 1; cols <= K_MAX; cols++) {
+ for (rows = 1; rows <= M_MAX - cols; rows++) {
+ gf_gen_rs_matrix(vmatrix, rows + cols, cols);
+
+ /* Verify the Vandermonde portion of vmatrix contains no
+ * singular submatrix */
+ if (are_submatrices_singular(&vmatrix[cols * cols], rows, cols))
+ break;
+
+ }
+ printf(" k = %2d, m <= %2d \n", cols, rows + cols - 1);
+
+ }
+ return 0;
+}
diff --git a/src/isa-l/erasure_code/gf_2vect_dot_prod_avx.asm b/src/isa-l/erasure_code/gf_2vect_dot_prod_avx.asm
new file mode 100644
index 000000000..cfbc2eb48
--- /dev/null
+++ b/src/isa-l/erasure_code/gf_2vect_dot_prod_avx.asm
@@ -0,0 +1,337 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright(c) 2011-2015 Intel Corporation All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in
+; the documentation and/or other materials provided with the
+; distribution.
+; * Neither the name of Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived
+; from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;;;
+;;; gf_2vect_dot_prod_avx(len, vec, *g_tbls, **buffs, **dests);
+;;;
+
+%include "reg_sizes.asm"
+
+%ifidn __OUTPUT_FORMAT__, elf64
+ %define arg0 rdi
+ %define arg1 rsi
+ %define arg2 rdx
+ %define arg3 rcx
+ %define arg4 r8
+ %define arg5 r9
+
+ %define tmp r11
+ %define tmp2 r10
+ %define tmp3 r9
+ %define tmp4 r12 ; must be saved and restored
+ %define return rax
+ %macro SLDR 2
+ %endmacro
+ %define SSTR SLDR
+ %define PS 8
+ %define LOG_PS 3
+
+ %define func(x) x: endbranch
+ %macro FUNC_SAVE 0
+ push r12
+ %endmacro
+ %macro FUNC_RESTORE 0
+ pop r12
+ %endmacro
+%endif
+
+%ifidn __OUTPUT_FORMAT__, win64
+ %define arg0 rcx
+ %define arg1 rdx
+ %define arg2 r8
+ %define arg3 r9
+
+ %define arg4 r12 ; must be saved, loaded and restored
+ %define tmp r11
+ %define tmp2 r10
+ %define tmp3 r13 ; must be saved and restored
+ %define tmp4 r14 ; must be saved and restored
+ %define return rax
+ %macro SLDR 2
+ %endmacro
+ %define SSTR SLDR
+ %define PS 8
+ %define LOG_PS 3
+ %define stack_size 3*16 + 3*8 ; must be an odd multiple of 8
+ %define arg(x) [rsp + stack_size + PS + PS*x]
+
+ %define func(x) proc_frame x
+ %macro FUNC_SAVE 0
+ alloc_stack stack_size
+ vmovdqa [rsp + 0*16], xmm6
+ vmovdqa [rsp + 1*16], xmm7
+ vmovdqa [rsp + 2*16], xmm8
+ save_reg r12, 3*16 + 0*8
+ save_reg r13, 3*16 + 1*8
+ save_reg r14, 3*16 + 2*8
+ end_prolog
+ mov arg4, arg(4)
+ %endmacro
+
+ %macro FUNC_RESTORE 0
+ vmovdqa xmm6, [rsp + 0*16]
+ vmovdqa xmm7, [rsp + 1*16]
+ vmovdqa xmm8, [rsp + 2*16]
+ mov r12, [rsp + 3*16 + 0*8]
+ mov r13, [rsp + 3*16 + 1*8]
+ mov r14, [rsp + 3*16 + 2*8]
+ add rsp, stack_size
+ %endmacro
+%endif
+
+%ifidn __OUTPUT_FORMAT__, elf32
+
+;;;================== High Address;
+;;; arg4
+;;; arg3
+;;; arg2
+;;; arg1
+;;; arg0
+;;; return
+;;;<================= esp of caller
+;;; ebp
+;;;<================= ebp = esp
+;;; var0
+;;; esi
+;;; edi
+;;; ebx
+;;;<================= esp of callee
+;;;
+;;;================== Low Address;
+
+ %define PS 4
+ %define LOG_PS 2
+ %define func(x) x: endbranch
+ %define arg(x) [ebp + PS*2 + PS*x]
+ %define var(x) [ebp - PS - PS*x]
+
+ %define trans ecx
+ %define trans2 esi
+ %define arg0 trans ;trans and trans2 are for the variables in stack
+ %define arg0_m arg(0)
+ %define arg1 ebx
+ %define arg2 arg2_m
+ %define arg2_m arg(2)
+ %define arg3 trans
+ %define arg3_m arg(3)
+ %define arg4 trans
+ %define arg4_m arg(4)
+ %define tmp edx
+ %define tmp2 edi
+ %define tmp3 trans2
+ %define tmp4 trans2
+ %define tmp4_m var(0)
+ %define return eax
+ %macro SLDR 2 ;; stack load/restore
+ mov %1, %2
+ %endmacro
+ %define SSTR SLDR
+
+ %macro FUNC_SAVE 0
+ push ebp
+ mov ebp, esp
+ sub esp, PS*1 ;1 local variable
+ push esi
+ push edi
+ push ebx
+ mov arg1, arg(1)
+ %endmacro
+
+ %macro FUNC_RESTORE 0
+ pop ebx
+ pop edi
+ pop esi
+ add esp, PS*1 ;1 local variable
+ pop ebp
+ %endmacro
+
+%endif ; output formats
+
+%define len arg0
+%define vec arg1
+%define mul_array arg2
+%define src arg3
+%define dest1 arg4
+
+%define vec_i tmp2
+%define ptr tmp3
+%define dest2 tmp4
+%define pos return
+
+ %ifidn PS,4 ;32-bit code
+ %define len_m arg0_m
+ %define src_m arg3_m
+ %define dest1_m arg4_m
+ %define dest2_m tmp4_m
+ %endif
+
+%ifndef EC_ALIGNED_ADDR
+;;; Use Un-aligned load/store
+ %define XLDR vmovdqu
+ %define XSTR vmovdqu
+%else
+;;; Use Non-temporal load/stor
+ %ifdef NO_NT_LDST
+ %define XLDR vmovdqa
+ %define XSTR vmovdqa
+ %else
+ %define XLDR vmovntdqa
+ %define XSTR vmovntdq
+ %endif
+%endif
+
+%ifidn PS,8 ; 64-bit code
+ default rel
+ [bits 64]
+%endif
+
+section .text
+
+%ifidn PS,8 ;64-bit code
+ %define xmask0f xmm8
+ %define xgft1_lo xmm7
+ %define xgft1_hi xmm6
+ %define xgft2_lo xmm5
+ %define xgft2_hi xmm4
+
+ %define x0 xmm0
+ %define xtmpa xmm1
+ %define xp1 xmm2
+ %define xp2 xmm3
+%else ;32-bit code
+ %define xmask0f xmm4
+ %define xgft1_lo xmm7
+ %define xgft1_hi xmm6
+ %define xgft2_lo xgft1_lo
+ %define xgft2_hi xgft1_hi
+
+ %define x0 xmm0
+ %define xtmpa xmm1
+ %define xp1 xmm2
+ %define xp2 xmm3
+%endif
+
+align 16
+mk_global gf_2vect_dot_prod_avx, function
+
+func(gf_2vect_dot_prod_avx)
+ FUNC_SAVE
+ SLDR len, len_m
+ sub len, 16
+ SSTR len_m, len
+ jl .return_fail
+ xor pos, pos
+ vmovdqa xmask0f, [mask0f] ;Load mask of lower nibble in each byte
+ sal vec, LOG_PS ;vec *= PS. Make vec_i count by PS
+ SLDR dest1, dest1_m
+ mov dest2, [dest1+PS]
+ SSTR dest2_m, dest2
+ mov dest1, [dest1]
+ SSTR dest1_m, dest1
+
+.loop16:
+ vpxor xp1, xp1
+ vpxor xp2, xp2
+ mov tmp, mul_array
+ xor vec_i, vec_i
+
+.next_vect:
+ SLDR src, src_m
+ mov ptr, [src+vec_i]
+
+ vmovdqu xgft1_lo, [tmp] ;Load array Ax{00}, Ax{01}, ..., Ax{0f}
+ vmovdqu xgft1_hi, [tmp+16] ; " Ax{00}, Ax{10}, ..., Ax{f0}
+ %ifidn PS,8 ; 64-bit code
+ vmovdqu xgft2_lo, [tmp+vec*(32/PS)] ;Load array Bx{00}, Bx{01}, ..., Bx{0f}
+ vmovdqu xgft2_hi, [tmp+vec*(32/PS)+16] ; " Bx{00}, Bx{10}, ..., Bx{f0}
+ add tmp, 32
+ add vec_i, PS
+ %endif
+ XLDR x0, [ptr+pos] ;Get next source vector
+
+ vpand xtmpa, x0, xmask0f ;Mask low src nibble in bits 4-0
+ vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0
+ vpand x0, x0, xmask0f ;Mask high src nibble in bits 4-0
+
+ vpshufb xgft1_hi, x0 ;Lookup mul table of high nibble
+ vpshufb xgft1_lo, xtmpa ;Lookup mul table of low nibble
+ vpxor xgft1_hi, xgft1_lo ;GF add high and low partials
+ vpxor xp1, xgft1_hi ;xp1 += partial
+
+ %ifidn PS,4 ; 32-bit code
+ vmovdqu xgft2_lo, [tmp+vec*(32/PS)] ;Load array Bx{00}, Bx{01}, ..., Bx{0f}
+ vmovdqu xgft2_hi, [tmp+vec*(32/PS)+16] ; " Bx{00}, Bx{10}, ..., Bx{f0}
+ add tmp, 32
+ add vec_i, PS
+ %endif
+ vpshufb xgft2_hi, x0 ;Lookup mul table of high nibble
+ vpshufb xgft2_lo, xtmpa ;Lookup mul table of low nibble
+ vpxor xgft2_hi, xgft2_lo ;GF add high and low partials
+ vpxor xp2, xgft2_hi ;xp2 += partial
+
+ cmp vec_i, vec
+ jl .next_vect
+
+ SLDR dest1, dest1_m
+ SLDR dest2, dest2_m
+ XSTR [dest1+pos], xp1
+ XSTR [dest2+pos], xp2
+
+ SLDR len, len_m
+ add pos, 16 ;Loop on 16 bytes at a time
+ cmp pos, len
+ jle .loop16
+
+ lea tmp, [len + 16]
+ cmp pos, tmp
+ je .return_pass
+
+ ;; Tail len
+ mov pos, len ;Overlapped offset length-16
+ jmp .loop16 ;Do one more overlap pass
+
+.return_pass:
+ mov return, 0
+ FUNC_RESTORE
+ ret
+
+.return_fail:
+ mov return, 1
+ FUNC_RESTORE
+ ret
+
+endproc_frame
+
+section .data
+
+align 16
+mask0f: dq 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f
+
+;;; func core, ver, snum
+slversion gf_2vect_dot_prod_avx, 02, 05, 0191
diff --git a/src/isa-l/erasure_code/gf_2vect_dot_prod_avx2.asm b/src/isa-l/erasure_code/gf_2vect_dot_prod_avx2.asm
new file mode 100644
index 000000000..a06f67a4f
--- /dev/null
+++ b/src/isa-l/erasure_code/gf_2vect_dot_prod_avx2.asm
@@ -0,0 +1,356 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright(c) 2011-2015 Intel Corporation All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in
+; the documentation and/or other materials provided with the
+; distribution.
+; * Neither the name of Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived
+; from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;;;
+;;; gf_2vect_dot_prod_avx2(len, vec, *g_tbls, **buffs, **dests);
+;;;
+
+%include "reg_sizes.asm"
+
+%ifidn __OUTPUT_FORMAT__, elf64
+ %define arg0 rdi
+ %define arg1 rsi
+ %define arg2 rdx
+ %define arg3 rcx
+ %define arg4 r8
+ %define arg5 r9
+
+ %define tmp r11
+ %define tmp.w r11d
+ %define tmp.b r11b
+ %define tmp2 r10
+ %define tmp3 r9
+ %define tmp4 r12 ; must be saved and restored
+ %define return rax
+ %macro SLDR 2
+ %endmacro
+ %define SSTR SLDR
+ %define PS 8
+ %define LOG_PS 3
+
+ %define func(x) x: endbranch
+ %macro FUNC_SAVE 0
+ push r12
+ %endmacro
+ %macro FUNC_RESTORE 0
+ pop r12
+ %endmacro
+%endif
+
+%ifidn __OUTPUT_FORMAT__, win64
+ %define arg0 rcx
+ %define arg1 rdx
+ %define arg2 r8
+ %define arg3 r9
+
+ %define arg4 r12 ; must be saved, loaded and restored
+ %define tmp r11
+ %define tmp.w r11d
+ %define tmp.b r11b
+ %define tmp2 r10
+ %define tmp3 r13 ; must be saved and restored
+ %define tmp4 r14 ; must be saved and restored
+ %define return rax
+ %macro SLDR 2
+ %endmacro
+ %define SSTR SLDR
+ %define PS 8
+ %define LOG_PS 3
+ %define stack_size 3*16 + 3*8 ; must be an odd multiple of 8
+ %define arg(x) [rsp + stack_size + PS + PS*x]
+
+ %define func(x) proc_frame x
+ %macro FUNC_SAVE 0
+ alloc_stack stack_size
+ vmovdqa [rsp + 0*16], xmm6
+ vmovdqa [rsp + 1*16], xmm7
+ vmovdqa [rsp + 2*16], xmm8
+ save_reg r12, 3*16 + 0*8
+ save_reg r13, 3*16 + 1*8
+ save_reg r14, 3*16 + 2*8
+ end_prolog
+ mov arg4, arg(4)
+ %endmacro
+
+ %macro FUNC_RESTORE 0
+ vmovdqa xmm6, [rsp + 0*16]
+ vmovdqa xmm7, [rsp + 1*16]
+ vmovdqa xmm8, [rsp + 2*16]
+ mov r12, [rsp + 3*16 + 0*8]
+ mov r13, [rsp + 3*16 + 1*8]
+ mov r14, [rsp + 3*16 + 2*8]
+ add rsp, stack_size
+ %endmacro
+%endif
+
+%ifidn __OUTPUT_FORMAT__, elf32
+
+;;;================== High Address;
+;;; arg4
+;;; arg3
+;;; arg2
+;;; arg1
+;;; arg0
+;;; return
+;;;<================= esp of caller
+;;; ebp
+;;;<================= ebp = esp
+;;; var0
+;;; esi
+;;; edi
+;;; ebx
+;;;<================= esp of callee
+;;;
+;;;================== Low Address;
+
+ %define PS 4
+ %define LOG_PS 2
+ %define func(x) x: endbranch
+ %define arg(x) [ebp + PS*2 + PS*x]
+ %define var(x) [ebp - PS - PS*x]
+
+ %define trans ecx
+ %define trans2 esi
+ %define arg0 trans ;trans and trans2 are for the variables in stack
+ %define arg0_m arg(0)
+ %define arg1 ebx
+ %define arg2 arg2_m
+ %define arg2_m arg(2)
+ %define arg3 trans
+ %define arg3_m arg(3)
+ %define arg4 trans
+ %define arg4_m arg(4)
+ %define tmp edx
+ %define tmp.w edx
+ %define tmp.b dl
+ %define tmp2 edi
+ %define tmp3 trans2
+ %define tmp4 trans2
+ %define tmp4_m var(0)
+ %define return eax
+ %macro SLDR 2 ;stack load/restore
+ mov %1, %2
+ %endmacro
+ %define SSTR SLDR
+
+ %macro FUNC_SAVE 0
+ push ebp
+ mov ebp, esp
+ sub esp, PS*1 ;1 local variable
+ push esi
+ push edi
+ push ebx
+ mov arg1, arg(1)
+ %endmacro
+
+ %macro FUNC_RESTORE 0
+ pop ebx
+ pop edi
+ pop esi
+ add esp, PS*1 ;1 local variable
+ pop ebp
+ %endmacro
+
+%endif ; output formats
+
+%define len arg0
+%define vec arg1
+%define mul_array arg2
+%define src arg3
+%define dest1 arg4
+
+%define vec_i tmp2
+%define ptr tmp3
+%define dest2 tmp4
+%define pos return
+
+%ifidn PS,4 ;32-bit code
+ %define len_m arg0_m
+ %define src_m arg3_m
+ %define dest1_m arg4_m
+ %define dest2_m tmp4_m
+%endif
+
+%ifndef EC_ALIGNED_ADDR
+;;; Use Un-aligned load/store
+ %define XLDR vmovdqu
+ %define XSTR vmovdqu
+%else
+
+;;; Use Non-temporal load/stor
+ %ifdef NO_NT_LDST
+ %define XLDR vmovdqa
+ %define XSTR vmovdqa
+ %else
+ %define XLDR vmovntdqa
+ %define XSTR vmovntdq
+ %endif
+%endif
+
+%ifidn PS,8 ;64-bit code
+ default rel
+ [bits 64]
+%endif
+
+section .text
+
+%ifidn PS,8 ;64-bit code
+ %define xmask0f ymm8
+ %define xmask0fx xmm8
+ %define xgft1_lo ymm7
+ %define xgft1_hi ymm6
+ %define xgft2_lo ymm5
+ %define xgft2_hi ymm4
+
+ %define x0 ymm0
+ %define xtmpa ymm1
+ %define xp1 ymm2
+ %define xp2 ymm3
+%else ;32-bit code
+ %define xmask0f ymm7
+ %define xmask0fx xmm7
+ %define xgft1_lo ymm5
+ %define xgft1_hi ymm4
+ %define xgft2_lo xgft1_lo
+ %define xgft2_hi xgft1_hi
+
+ %define x0 ymm0
+ %define xtmpa ymm1
+ %define xp1 ymm2
+ %define xp2 ymm3
+
+%endif
+
+align 16
+mk_global gf_2vect_dot_prod_avx2, function
+
+func(gf_2vect_dot_prod_avx2)
+ FUNC_SAVE
+ SLDR len, len_m
+ sub len, 32
+ SSTR len_m, len
+ jl .return_fail
+ xor pos, pos
+ mov tmp.b, 0x0f
+ vpinsrb xmask0fx, xmask0fx, tmp.w, 0
+ vpbroadcastb xmask0f, xmask0fx ;Construct mask 0x0f0f0f...
+
+ sal vec, LOG_PS ;vec *= PS. Make vec_i count by PS
+ SLDR dest1, dest1_m
+ mov dest2, [dest1+PS]
+ SSTR dest2_m, dest2
+ mov dest1, [dest1]
+ SSTR dest1_m, dest1
+
+.loop32:
+ vpxor xp1, xp1
+ vpxor xp2, xp2
+ mov tmp, mul_array
+ xor vec_i, vec_i
+
+.next_vect:
+ SLDR src, src_m
+ mov ptr, [src+vec_i]
+
+ vmovdqu xgft1_lo, [tmp] ;Load array Ax{00}, Ax{01}, ..., Ax{0f}
+ ; " Ax{00}, Ax{10}, ..., Ax{f0}
+ vperm2i128 xgft1_hi, xgft1_lo, xgft1_lo, 0x11 ; swapped to hi | hi
+ vperm2i128 xgft1_lo, xgft1_lo, xgft1_lo, 0x00 ; swapped to lo | lo
+ %ifidn PS,8 ; 64-bit code
+ vmovdqu xgft2_lo, [tmp+vec*(32/PS)] ;Load array Bx{00}, Bx{01}, ..., Bx{0f}
+ ; " Bx{00}, Bx{10}, ..., Bx{f0}
+ vperm2i128 xgft2_hi, xgft2_lo, xgft2_lo, 0x11 ; swapped to hi | hi
+ vperm2i128 xgft2_lo, xgft2_lo, xgft2_lo, 0x00 ; swapped to lo | lo
+
+ XLDR x0, [ptr+pos] ;Get next source vector
+ add tmp, 32
+ add vec_i, PS
+ %else
+ XLDR x0, [ptr+pos] ;Get next source vector
+ %endif
+
+ vpand xtmpa, x0, xmask0f ;Mask low src nibble in bits 4-0
+ vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0
+ vpand x0, x0, xmask0f ;Mask high src nibble in bits 4-0
+
+ vpshufb xgft1_hi, x0 ;Lookup mul table of high nibble
+ vpshufb xgft1_lo, xtmpa ;Lookup mul table of low nibble
+ vpxor xgft1_hi, xgft1_lo ;GF add high and low partials
+ vpxor xp1, xgft1_hi ;xp1 += partial
+
+ %ifidn PS,4 ; 32-bit code
+ vmovdqu xgft2_lo, [tmp+vec*(32/PS)] ;Load array Bx{00}, Bx{01}, ..., Bx{0f}
+ ; " Bx{00}, Bx{10}, ..., Bx{f0}
+ vperm2i128 xgft2_hi, xgft2_lo, xgft2_lo, 0x11 ; swapped to hi | hi
+ vperm2i128 xgft2_lo, xgft2_lo, xgft2_lo, 0x00 ; swapped to lo | lo
+ add tmp, 32
+ add vec_i, PS
+ %endif
+ vpshufb xgft2_hi, x0 ;Lookup mul table of high nibble
+ vpshufb xgft2_lo, xtmpa ;Lookup mul table of low nibble
+ vpxor xgft2_hi, xgft2_lo ;GF add high and low partials
+ vpxor xp2, xgft2_hi ;xp2 += partial
+
+ cmp vec_i, vec
+ jl .next_vect
+
+ SLDR dest1, dest1_m
+ SLDR dest2, dest2_m
+ XSTR [dest1+pos], xp1
+ XSTR [dest2+pos], xp2
+
+ SLDR len, len_m
+ add pos, 32 ;Loop on 32 bytes at a time
+ cmp pos, len
+ jle .loop32
+
+ lea tmp, [len + 32]
+ cmp pos, tmp
+ je .return_pass
+
+ ;; Tail len
+ mov pos, len ;Overlapped offset length-16
+ jmp .loop32 ;Do one more overlap pass
+
+.return_pass:
+ mov return, 0
+ FUNC_RESTORE
+ ret
+
+.return_fail:
+ mov return, 1
+ FUNC_RESTORE
+ ret
+
+endproc_frame
+
+section .data
+
+;;; func core, ver, snum
+slversion gf_2vect_dot_prod_avx2, 04, 05, 0196
diff --git a/src/isa-l/erasure_code/gf_2vect_dot_prod_avx512.asm b/src/isa-l/erasure_code/gf_2vect_dot_prod_avx512.asm
new file mode 100644
index 000000000..92d7e9d55
--- /dev/null
+++ b/src/isa-l/erasure_code/gf_2vect_dot_prod_avx512.asm
@@ -0,0 +1,245 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright(c) 2011-2015 Intel Corporation All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in
+; the documentation and/or other materials provided with the
+; distribution.
+; * Neither the name of Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived
+; from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;;;
+;;; gf_2vect_dot_prod_avx512(len, vec, *g_tbls, **buffs, **dests);
+;;;
+
+%include "reg_sizes.asm"
+
+%ifdef HAVE_AS_KNOWS_AVX512
+
+%ifidn __OUTPUT_FORMAT__, elf64
+ %define arg0 rdi
+ %define arg1 rsi
+ %define arg2 rdx
+ %define arg3 rcx
+ %define arg4 r8
+ %define arg5 r9
+
+ %define tmp r11
+ %define tmp2 r10
+ %define tmp3 r12 ; must be saved and restored
+ %define return rax
+ %define PS 8
+ %define LOG_PS 3
+
+ %define func(x) x: endbranch
+ %macro FUNC_SAVE 0
+ push r12
+ %endmacro
+ %macro FUNC_RESTORE 0
+ pop r12
+ %endmacro
+%endif
+
+%ifidn __OUTPUT_FORMAT__, win64
+ %define arg0 rcx
+ %define arg1 rdx
+ %define arg2 r8
+ %define arg3 r9
+
+ %define arg4 r12 ; must be saved, loaded and restored
+ %define arg5 r15 ; must be saved and restored
+ %define tmp r11
+ %define tmp2 r10
+ %define tmp3 r13 ; must be saved and restored
+ %define return rax
+ %define PS 8
+ %define LOG_PS 3
+ %define stack_size 9*16 + 5*8 ; must be an odd multiple of 8
+ %define arg(x) [rsp + stack_size + PS + PS*x]
+
+ %define func(x) proc_frame x
+ %macro FUNC_SAVE 0
+ alloc_stack stack_size
+ vmovdqa [rsp + 0*16], xmm6
+ vmovdqa [rsp + 1*16], xmm7
+ vmovdqa [rsp + 2*16], xmm8
+ vmovdqa [rsp + 3*16], xmm9
+ vmovdqa [rsp + 4*16], xmm10
+ vmovdqa [rsp + 5*16], xmm11
+ vmovdqa [rsp + 6*16], xmm12
+ vmovdqa [rsp + 7*16], xmm13
+ vmovdqa [rsp + 8*16], xmm14
+ save_reg r12, 9*16 + 0*8
+ save_reg r13, 9*16 + 1*8
+ save_reg r14, 9*16 + 2*8
+ save_reg r15, 9*16 + 3*8
+ end_prolog
+ mov arg4, arg(4)
+ %endmacro
+
+ %macro FUNC_RESTORE 0
+ vmovdqa xmm6, [rsp + 0*16]
+ vmovdqa xmm7, [rsp + 1*16]
+ vmovdqa xmm8, [rsp + 2*16]
+ vmovdqa xmm9, [rsp + 3*16]
+ vmovdqa xmm10, [rsp + 4*16]
+ vmovdqa xmm11, [rsp + 5*16]
+ vmovdqa xmm12, [rsp + 6*16]
+ vmovdqa xmm13, [rsp + 7*16]
+ vmovdqa xmm14, [rsp + 8*16]
+ mov r12, [rsp + 9*16 + 0*8]
+ mov r13, [rsp + 9*16 + 1*8]
+ mov r14, [rsp + 9*16 + 2*8]
+ mov r15, [rsp + 9*16 + 3*8]
+ add rsp, stack_size
+ %endmacro
+%endif
+
+
+%define len arg0
+%define vec arg1
+%define mul_array arg2
+%define src arg3
+%define dest1 arg4
+%define ptr arg5
+%define vec_i tmp2
+%define dest2 tmp3
+%define pos return
+
+
+%ifndef EC_ALIGNED_ADDR
+;;; Use Un-aligned load/store
+ %define XLDR vmovdqu8
+ %define XSTR vmovdqu8
+%else
+;;; Use Non-temporal load/stor
+ %ifdef NO_NT_LDST
+ %define XLDR vmovdqa
+ %define XSTR vmovdqa
+ %else
+ %define XLDR vmovntdqa
+ %define XSTR vmovntdq
+ %endif
+%endif
+
+%define xmask0f zmm8
+%define xgft1_lo zmm7
+%define xgft1_loy ymm7
+%define xgft1_hi zmm6
+%define xgft2_lo zmm5
+%define xgft2_loy ymm5
+%define xgft2_hi zmm4
+
+%define x0 zmm0
+%define xtmpa zmm1
+%define xp1 zmm2
+%define xp2 zmm3
+
+default rel
+[bits 64]
+
+section .text
+
+align 16
+mk_global gf_2vect_dot_prod_avx512, function
+func(gf_2vect_dot_prod_avx512)
+ FUNC_SAVE
+ sub len, 64
+ jl .return_fail
+
+ xor pos, pos
+ mov tmp, 0x0f
+ vpbroadcastb xmask0f, tmp ;Construct mask 0x0f0f0f...
+ sal vec, LOG_PS ;vec *= PS. Make vec_i count by PS
+ mov dest2, [dest1+PS]
+ mov dest1, [dest1]
+
+.loop64:
+ vpxorq xp1, xp1, xp1
+ vpxorq xp2, xp2, xp2
+ mov tmp, mul_array
+ xor vec_i, vec_i
+
+.next_vect:
+ mov ptr, [src+vec_i]
+ XLDR x0, [ptr+pos] ;Get next source vector
+ add vec_i, PS
+
+ vpandq xtmpa, x0, xmask0f ;Mask low src nibble in bits 4-0
+ vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0
+ vpandq x0, x0, xmask0f ;Mask high src nibble in bits 4-0
+
+ vmovdqu8 xgft1_loy, [tmp] ;Load array Ax{00}..{0f}, Ax{00}..{f0}
+ vmovdqu8 xgft2_loy, [tmp+vec*(32/PS)] ;Load array Bx{00}..{0f}, Bx{00}..{f0}
+ add tmp, 32
+
+ vshufi64x2 xgft1_hi, xgft1_lo, xgft1_lo, 0x55
+ vshufi64x2 xgft1_lo, xgft1_lo, xgft1_lo, 0x00
+ vshufi64x2 xgft2_hi, xgft2_lo, xgft2_lo, 0x55
+ vshufi64x2 xgft2_lo, xgft2_lo, xgft2_lo, 0x00
+
+ vpshufb xgft1_hi, xgft1_hi, x0 ;Lookup mul table of high nibble
+ vpshufb xgft1_lo, xgft1_lo, xtmpa ;Lookup mul table of low nibble
+ vpxorq xgft1_hi, xgft1_hi, xgft1_lo ;GF add high and low partials
+ vpxorq xp1, xp1, xgft1_hi ;xp1 += partial
+
+ vpshufb xgft2_hi, xgft2_hi, x0 ;Lookup mul table of high nibble
+ vpshufb xgft2_lo, xgft2_lo, xtmpa ;Lookup mul table of low nibble
+ vpxorq xgft2_hi, xgft2_hi, xgft2_lo ;GF add high and low partials
+ vpxorq xp2, xp2, xgft2_hi ;xp2 += partial
+
+ cmp vec_i, vec
+ jl .next_vect
+
+ XSTR [dest1+pos], xp1
+ XSTR [dest2+pos], xp2
+
+ add pos, 64 ;Loop on 64 bytes at a time
+ cmp pos, len
+ jle .loop64
+
+ lea tmp, [len + 64]
+ cmp pos, tmp
+ je .return_pass
+
+ ;; Tail len
+ mov pos, len ;Overlapped offset length-64
+ jmp .loop64 ;Do one more overlap pass
+
+.return_pass:
+ mov return, 0
+ FUNC_RESTORE
+ ret
+
+.return_fail:
+ mov return, 1
+ FUNC_RESTORE
+ ret
+
+endproc_frame
+
+%else
+%ifidn __OUTPUT_FORMAT__, win64
+global no_gf_2vect_dot_prod_avx512
+no_gf_2vect_dot_prod_avx512:
+%endif
+%endif ; ifdef HAVE_AS_KNOWS_AVX512
diff --git a/src/isa-l/erasure_code/gf_2vect_dot_prod_sse.asm b/src/isa-l/erasure_code/gf_2vect_dot_prod_sse.asm
new file mode 100644
index 000000000..f7e44e7f9
--- /dev/null
+++ b/src/isa-l/erasure_code/gf_2vect_dot_prod_sse.asm
@@ -0,0 +1,339 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright(c) 2011-2015 Intel Corporation All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in
+; the documentation and/or other materials provided with the
+; distribution.
+; * Neither the name of Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived
+; from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;;;
+;;; gf_2vect_dot_prod_sse(len, vec, *g_tbls, **buffs, **dests);
+;;;
+
+%include "reg_sizes.asm"
+
+%ifidn __OUTPUT_FORMAT__, elf64
+ %define arg0 rdi
+ %define arg1 rsi
+ %define arg2 rdx
+ %define arg3 rcx
+ %define arg4 r8
+ %define arg5 r9
+
+ %define tmp r11
+ %define tmp2 r10
+ %define tmp3 r9
+ %define tmp4 r12 ; must be saved and restored
+ %define return rax
+ %macro SLDR 2
+ %endmacro
+ %define SSTR SLDR
+ %define PS 8
+ %define LOG_PS 3
+
+ %define func(x) x: endbranch
+ %macro FUNC_SAVE 0
+ push r12
+ %endmacro
+ %macro FUNC_RESTORE 0
+ pop r12
+ %endmacro
+%endif
+
+%ifidn __OUTPUT_FORMAT__, win64
+ %define arg0 rcx
+ %define arg1 rdx
+ %define arg2 r8
+ %define arg3 r9
+
+ %define arg4 r12 ; must be saved, loaded and restored
+ %define tmp r11
+ %define tmp2 r10
+ %define tmp3 r13 ; must be saved and restored
+ %define tmp4 r14 ; must be saved and restored
+ %define return rax
+ %macro SLDR 2
+ %endmacro
+ %define SSTR SLDR
+ %define PS 8
+ %define LOG_PS 3
+ %define stack_size 3*16 + 3*8 ; must be an odd multiple of 8
+ %define arg(x) [rsp + stack_size + PS + PS*x]
+
+ %define func(x) proc_frame x
+ %macro FUNC_SAVE 0
+ alloc_stack stack_size
+ save_xmm128 xmm6, 0*16
+ save_xmm128 xmm7, 1*16
+ save_xmm128 xmm8, 2*16
+ save_reg r12, 3*16 + 0*8
+ save_reg r13, 3*16 + 1*8
+ save_reg r14, 3*16 + 2*8
+ end_prolog
+ mov arg4, arg(4)
+ %endmacro
+
+ %macro FUNC_RESTORE 0
+ movdqa xmm6, [rsp + 0*16]
+ movdqa xmm7, [rsp + 1*16]
+ movdqa xmm8, [rsp + 2*16]
+ mov r12, [rsp + 3*16 + 0*8]
+ mov r13, [rsp + 3*16 + 1*8]
+ mov r14, [rsp + 3*16 + 2*8]
+ add rsp, stack_size
+ %endmacro
+%endif
+
+%ifidn __OUTPUT_FORMAT__, elf32
+
+;;;================== High Address;
+;;; arg4
+;;; arg3
+;;; arg2
+;;; arg1
+;;; arg0
+;;; return
+;;;<================= esp of caller
+;;; ebp
+;;;<================= ebp = esp
+;;; var0
+;;; esi
+;;; edi
+;;; ebx
+;;;<================= esp of callee
+;;;
+;;;================== Low Address;
+
+ %define PS 4
+ %define LOG_PS 2
+ %define func(x) x: endbranch
+ %define arg(x) [ebp + PS*2 + PS*x]
+ %define var(x) [ebp - PS - PS*x]
+
+ %define trans ecx
+ %define trans2 esi
+ %define arg0 trans ;trans and trans2 are for the variables in stack
+ %define arg0_m arg(0)
+ %define arg1 ebx
+ %define arg2 arg2_m
+ %define arg2_m arg(2)
+ %define arg3 trans
+ %define arg3_m arg(3)
+ %define arg4 trans
+ %define arg4_m arg(4)
+ %define tmp edx
+ %define tmp2 edi
+ %define tmp3 trans2
+ %define tmp4 trans2
+ %define tmp4_m var(0)
+ %define return eax
+ %macro SLDR 2 ;; stack load/restore
+ mov %1, %2
+ %endmacro
+ %define SSTR SLDR
+
+ %macro FUNC_SAVE 0
+ push ebp
+ mov ebp, esp
+ sub esp, PS*1 ;1 local variable
+ push esi
+ push edi
+ push ebx
+ mov arg1, arg(1)
+ %endmacro
+
+ %macro FUNC_RESTORE 0
+ pop ebx
+ pop edi
+ pop esi
+ add esp, PS*1 ;1 local variable
+ pop ebp
+ %endmacro
+
+%endif ; output formats
+
+%define len arg0
+%define vec arg1
+%define mul_array arg2
+%define src arg3
+%define dest1 arg4
+
+%define vec_i tmp2
+%define ptr tmp3
+%define dest2 tmp4
+%define pos return
+
+ %ifidn PS,4 ;32-bit code
+ %define len_m arg0_m
+ %define src_m arg3_m
+ %define dest1_m arg4_m
+ %define dest2_m tmp4_m
+ %endif
+
+%ifndef EC_ALIGNED_ADDR
+;;; Use Un-aligned load/store
+ %define XLDR movdqu
+ %define XSTR movdqu
+%else
+;;; Use Non-temporal load/stor
+ %ifdef NO_NT_LDST
+ %define XLDR movdqa
+ %define XSTR movdqa
+ %else
+ %define XLDR movntdqa
+ %define XSTR movntdq
+ %endif
+%endif
+
+%ifidn PS,8 ;64-bit code
+ default rel
+ [bits 64]
+%endif
+
+section .text
+
+%ifidn PS,8 ;64-bit code
+ %define xmask0f xmm8
+ %define xgft1_lo xmm7
+ %define xgft1_hi xmm6
+ %define xgft2_lo xmm5
+ %define xgft2_hi xmm4
+
+ %define x0 xmm0
+ %define xtmpa xmm1
+ %define xp1 xmm2
+ %define xp2 xmm3
+%else ;32-bit code
+ %define xmask0f xmm4
+ %define xgft1_lo xmm7
+ %define xgft1_hi xmm6
+ %define xgft2_lo xgft1_lo
+ %define xgft2_hi xgft1_hi
+
+ %define x0 xmm0
+ %define xtmpa xmm1
+ %define xp1 xmm2
+ %define xp2 xmm3
+%endif
+
+align 16
+mk_global gf_2vect_dot_prod_sse, function
+
+func(gf_2vect_dot_prod_sse)
+ FUNC_SAVE
+ SLDR len, len_m
+ sub len, 16
+ SSTR len_m, len
+ jl .return_fail
+ xor pos, pos
+ movdqa xmask0f, [mask0f] ;Load mask of lower nibble in each byte
+ sal vec, LOG_PS ;vec *= PS. Make vec_i count by PS
+ SLDR dest1, dest1_m
+ mov dest2, [dest1+PS]
+ SSTR dest2_m, dest2
+ mov dest1, [dest1]
+ SSTR dest1_m, dest1
+
+.loop16:
+ pxor xp1, xp1
+ pxor xp2, xp2
+ mov tmp, mul_array
+ xor vec_i, vec_i
+
+.next_vect:
+ SLDR src, src_m
+ mov ptr, [src+vec_i]
+
+ movdqu xgft1_lo, [tmp] ;Load array Ax{00}, Ax{01}, ..., Ax{0f}
+ movdqu xgft1_hi, [tmp+16] ; " Ax{00}, Ax{10}, ..., Ax{f0}
+ %ifidn PS,8 ;64-bit code
+ movdqu xgft2_lo, [tmp+vec*(32/PS)] ;Load array Bx{00}, Bx{01}, ..., Bx{0f}
+ movdqu xgft2_hi, [tmp+vec*(32/PS)+16] ; " Bx{00}, Bx{10}, ..., Bx{f0}
+ add tmp, 32
+ add vec_i, PS
+ %endif
+ XLDR x0, [ptr+pos] ;Get next source vector
+
+ movdqa xtmpa, x0 ;Keep unshifted copy of src
+ psraw x0, 4 ;Shift to put high nibble into bits 4-0
+ pand x0, xmask0f ;Mask high src nibble in bits 4-0
+ pand xtmpa, xmask0f ;Mask low src nibble in bits 4-0
+
+ pshufb xgft1_hi, x0 ;Lookup mul table of high nibble
+ pshufb xgft1_lo, xtmpa ;Lookup mul table of low nibble
+ pxor xgft1_hi, xgft1_lo ;GF add high and low partials
+ pxor xp1, xgft1_hi ;xp1 += partial
+
+ %ifidn PS,4 ;32-bit code
+ movdqu xgft2_lo, [tmp+vec*(32/PS)] ;Load array Bx{00}, Bx{01}, ..., Bx{0f}
+ movdqu xgft2_hi, [tmp+vec*(32/PS)+16] ; " Bx{00}, Bx{10}, ..., Bx{f0}
+
+ add tmp, 32
+ add vec_i, PS
+ %endif
+ pshufb xgft2_hi, x0 ;Lookup mul table of high nibble
+ pshufb xgft2_lo, xtmpa ;Lookup mul table of low nibble
+ pxor xgft2_hi, xgft2_lo ;GF add high and low partials
+ pxor xp2, xgft2_hi ;xp2 += partial
+
+ cmp vec_i, vec
+ jl .next_vect
+
+ SLDR dest1, dest1_m
+ SLDR dest2, dest2_m
+ XSTR [dest1+pos], xp1
+ XSTR [dest2+pos], xp2
+
+ SLDR len, len_m
+ add pos, 16 ;Loop on 16 bytes at a time
+ cmp pos, len
+ jle .loop16
+
+ lea tmp, [len + 16]
+ cmp pos, tmp
+ je .return_pass
+
+ ;; Tail len
+ mov pos, len ;Overlapped offset length-16
+ jmp .loop16 ;Do one more overlap pass
+
+.return_pass:
+ mov return, 0
+ FUNC_RESTORE
+ ret
+
+.return_fail:
+ mov return, 1
+ FUNC_RESTORE
+ ret
+
+endproc_frame
+
+section .data
+
+align 16
+mask0f: dq 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f
+
+;;; func core, ver, snum
+slversion gf_2vect_dot_prod_sse, 00, 04, 0062
diff --git a/src/isa-l/erasure_code/gf_2vect_mad_avx.asm b/src/isa-l/erasure_code/gf_2vect_mad_avx.asm
new file mode 100644
index 000000000..995c36bbe
--- /dev/null
+++ b/src/isa-l/erasure_code/gf_2vect_mad_avx.asm
@@ -0,0 +1,236 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright(c) 2011-2015 Intel Corporation All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in
+; the documentation and/or other materials provided with the
+; distribution.
+; * Neither the name of Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived
+; from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;;;
+;;; gf_2vect_mad_avx(len, vec, vec_i, mul_array, src, dest);
+;;;
+
+%include "reg_sizes.asm"
+
+%define PS 8
+
+%ifidn __OUTPUT_FORMAT__, win64
+ %define arg0 rcx
+ %define arg0.w ecx
+ %define arg1 rdx
+ %define arg2 r8
+ %define arg3 r9
+ %define arg4 r12
+ %define arg5 r15
+ %define tmp r11
+ %define tmp2 r10
+ %define return rax
+ %define return.w eax
+ %define stack_size 16*9 + 3*8
+ %define arg(x) [rsp + stack_size + PS + PS*x]
+ %define func(x) proc_frame x
+
+%macro FUNC_SAVE 0
+ sub rsp, stack_size
+ movdqa [rsp+16*0],xmm6
+ movdqa [rsp+16*1],xmm7
+ movdqa [rsp+16*2],xmm8
+ movdqa [rsp+16*3],xmm9
+ movdqa [rsp+16*4],xmm10
+ movdqa [rsp+16*5],xmm11
+ movdqa [rsp+16*6],xmm12
+ movdqa [rsp+16*7],xmm13
+ movdqa [rsp+16*8],xmm14
+ save_reg r12, 9*16 + 0*8
+ save_reg r15, 9*16 + 1*8
+ end_prolog
+ mov arg4, arg(4)
+ mov arg5, arg(5)
+%endmacro
+
+%macro FUNC_RESTORE 0
+ movdqa xmm6, [rsp+16*0]
+ movdqa xmm7, [rsp+16*1]
+ movdqa xmm8, [rsp+16*2]
+ movdqa xmm9, [rsp+16*3]
+ movdqa xmm10, [rsp+16*4]
+ movdqa xmm11, [rsp+16*5]
+ movdqa xmm12, [rsp+16*6]
+ movdqa xmm13, [rsp+16*7]
+ movdqa xmm14, [rsp+16*8]
+ mov r12, [rsp + 9*16 + 0*8]
+ mov r15, [rsp + 9*16 + 1*8]
+ add rsp, stack_size
+%endmacro
+
+%elifidn __OUTPUT_FORMAT__, elf64
+ %define arg0 rdi
+ %define arg0.w edi
+ %define arg1 rsi
+ %define arg2 rdx
+ %define arg3 rcx
+ %define arg4 r8
+ %define arg5 r9
+ %define tmp r11
+ %define tmp2 r10
+ %define return rax
+ %define return.w eax
+
+ %define func(x) x: endbranch
+ %define FUNC_SAVE
+ %define FUNC_RESTORE
+%endif
+
+;;; gf_2vect_mad_avx(len, vec, vec_i, mul_array, src, dest)
+%define len arg0
+%define len.w arg0.w
+%define vec arg1
+%define vec_i arg2
+%define mul_array arg3
+%define src arg4
+%define dest1 arg5
+%define pos return
+%define pos.w return.w
+
+%define dest2 tmp2
+
+%ifndef EC_ALIGNED_ADDR
+;;; Use Un-aligned load/store
+ %define XLDR vmovdqu
+ %define XSTR vmovdqu
+%else
+;;; Use Non-temporal load/stor
+ %ifdef NO_NT_LDST
+ %define XLDR vmovdqa
+ %define XSTR vmovdqa
+ %else
+ %define XLDR vmovntdqa
+ %define XSTR vmovntdq
+ %endif
+%endif
+
+
+default rel
+
+[bits 64]
+section .text
+
+%define xmask0f xmm14
+%define xgft1_lo xmm13
+%define xgft1_hi xmm12
+%define xgft2_lo xmm11
+%define xgft2_hi xmm10
+
+%define x0 xmm0
+%define xtmpa xmm1
+%define xtmph1 xmm2
+%define xtmpl1 xmm3
+%define xtmph2 xmm4
+%define xtmpl2 xmm5
+%define xd1 xmm6
+%define xd2 xmm7
+%define xtmpd1 xmm8
+%define xtmpd2 xmm9
+
+
+align 16
+mk_global gf_2vect_mad_avx, function
+
+func(gf_2vect_mad_avx)
+ FUNC_SAVE
+ sub len, 16
+ jl .return_fail
+
+ xor pos, pos
+ vmovdqa xmask0f, [mask0f] ;Load mask of lower nibble in each byte
+ sal vec_i, 5 ;Multiply by 32
+ sal vec, 5
+ lea tmp, [mul_array + vec_i]
+ vmovdqu xgft1_lo, [tmp] ;Load array Ax{00}, Ax{01}, Ax{02}, ...
+ vmovdqu xgft1_hi, [tmp+16] ; " Ax{00}, Ax{10}, Ax{20}, ... , Ax{f0}
+ vmovdqu xgft2_lo, [tmp+vec] ;Load array Bx{00}, Bx{01}, Bx{02}, ...
+ vmovdqu xgft2_hi, [tmp+vec+16] ; " Bx{00}, Bx{10}, Bx{20}, ... , Bx{f0}
+
+ mov dest2, [dest1+PS]
+ mov dest1, [dest1]
+
+ XLDR xtmpd1, [dest1+len] ;backup the last 16 bytes in dest
+ XLDR xtmpd2, [dest2+len] ;backup the last 16 bytes in dest
+
+.loop16:
+ XLDR xd1, [dest1+pos] ;Get next dest vector
+ XLDR xd2, [dest2+pos] ;Get next dest vector
+.loop16_overlap:
+ XLDR x0, [src+pos] ;Get next source vector
+
+ vpand xtmpa, x0, xmask0f ;Mask low src nibble in bits 4-0
+ vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0
+ vpand x0, x0, xmask0f ;Mask high src nibble in bits 4-0
+
+ vpshufb xtmph1, xgft1_hi, x0 ;Lookup mul table of high nibble
+ vpshufb xtmpl1, xgft1_lo, xtmpa ;Lookup mul table of low nibble
+ vpxor xtmph1, xtmph1, xtmpl1 ;GF add high and low partials
+ vpxor xd1, xd1, xtmph1 ;xd1 += partial
+
+ vpshufb xtmph2, xgft2_hi, x0 ;Lookup mul table of high nibble
+ vpshufb xtmpl2, xgft2_lo, xtmpa ;Lookup mul table of low nibble
+ vpxor xtmph2, xtmph2, xtmpl2 ;GF add high and low partials
+ vpxor xd2, xd2, xtmph2 ;xd2 += partial
+
+ XSTR [dest1+pos], xd1
+ XSTR [dest2+pos], xd2
+
+ add pos, 16 ;Loop on 16 bytes at a time
+ cmp pos, len
+ jle .loop16
+
+ lea tmp, [len + 16]
+ cmp pos, tmp
+ je .return_pass
+
+ ;; Tail len
+ mov pos, len ;Overlapped offset length-16
+ vmovdqa xd1, xtmpd1 ;Restore xd1
+ vmovdqa xd2, xtmpd2 ;Restore xd2
+ jmp .loop16_overlap ;Do one more overlap pass
+
+.return_pass:
+ mov return, 0
+ FUNC_RESTORE
+ ret
+
+.return_fail:
+ mov return, 1
+ FUNC_RESTORE
+ ret
+
+endproc_frame
+
+section .data
+
+align 16
+mask0f: dq 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f
+
+;;; func core, ver, snum
+slversion gf_2vect_mad_avx, 02, 01, 0204
diff --git a/src/isa-l/erasure_code/gf_2vect_mad_avx2.asm b/src/isa-l/erasure_code/gf_2vect_mad_avx2.asm
new file mode 100644
index 000000000..751677d79
--- /dev/null
+++ b/src/isa-l/erasure_code/gf_2vect_mad_avx2.asm
@@ -0,0 +1,247 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright(c) 2011-2015 Intel Corporation All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in
+; the documentation and/or other materials provided with the
+; distribution.
+; * Neither the name of Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived
+; from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;;;
+;;; gf_2vect_mad_avx2(len, vec, vec_i, mul_array, src, dest);
+;;;
+
+%include "reg_sizes.asm"
+
+%define PS 8
+
+%ifidn __OUTPUT_FORMAT__, win64
+ %define arg0 rcx
+ %define arg0.w ecx
+ %define arg1 rdx
+ %define arg2 r8
+ %define arg3 r9
+ %define arg4 r12
+ %define arg5 r15
+
+ %define tmp r11
+ %define tmp.w r11d
+ %define tmp.b r11b
+ %define tmp2 r10
+ %define return rax
+ %define return.w eax
+ %define stack_size 16*9 + 3*8 ; must be an odd multiple of 8
+ %define arg(x) [rsp + stack_size + PS + PS*x]
+
+ %define func(x) proc_frame x
+ %macro FUNC_SAVE 0
+ sub rsp, stack_size
+ vmovdqa [rsp+16*0],xmm6
+ vmovdqa [rsp+16*1],xmm7
+ vmovdqa [rsp+16*2],xmm8
+ vmovdqa [rsp+16*3],xmm9
+ vmovdqa [rsp+16*4],xmm10
+ vmovdqa [rsp+16*5],xmm11
+ vmovdqa [rsp+16*6],xmm12
+ vmovdqa [rsp+16*7],xmm13
+ vmovdqa [rsp+16*8],xmm14
+ save_reg r12, 9*16 + 0*8
+ save_reg r15, 9*16 + 1*8
+ end_prolog
+ mov arg4, arg(4)
+ mov arg5, arg(5)
+ %endmacro
+
+ %macro FUNC_RESTORE 0
+ vmovdqa xmm6, [rsp+16*0]
+ vmovdqa xmm7, [rsp+16*1]
+ vmovdqa xmm8, [rsp+16*2]
+ vmovdqa xmm9, [rsp+16*3]
+ vmovdqa xmm10, [rsp+16*4]
+ vmovdqa xmm11, [rsp+16*5]
+ vmovdqa xmm12, [rsp+16*6]
+ vmovdqa xmm13, [rsp+16*7]
+ vmovdqa xmm14, [rsp+16*8]
+ mov r12, [rsp + 9*16 + 0*8]
+ mov r15, [rsp + 9*16 + 1*8]
+ add rsp, stack_size
+ %endmacro
+%endif
+
+%ifidn __OUTPUT_FORMAT__, elf64
+ %define arg0 rdi
+ %define arg0.w edi
+ %define arg1 rsi
+ %define arg2 rdx
+ %define arg3 rcx
+ %define arg4 r8
+ %define arg5 r9
+
+ %define tmp r11
+ %define tmp.w r11d
+ %define tmp.b r11b
+ %define tmp2 r10
+ %define return rax
+ %define return.w eax
+
+ %define func(x) x: endbranch
+ %define FUNC_SAVE
+ %define FUNC_RESTORE
+%endif
+
+;;; gf_2vect_mad_avx2(len, vec, vec_i, mul_array, src, dest)
+%define len arg0
+%define len.w arg0.w
+%define vec arg1
+%define vec_i arg2
+%define mul_array arg3
+%define src arg4
+%define dest1 arg5
+%define pos return
+%define pos.w return.w
+
+%define dest2 tmp2
+
+%ifndef EC_ALIGNED_ADDR
+;;; Use Un-aligned load/store
+ %define XLDR vmovdqu
+ %define XSTR vmovdqu
+%else
+
+;;; Use Non-temporal load/stor
+ %ifdef NO_NT_LDST
+ %define XLDR vmovdqa
+ %define XSTR vmovdqa
+ %else
+ %define XLDR vmovntdqa
+ %define XSTR vmovntdq
+ %endif
+%endif
+
+
+default rel
+
+[bits 64]
+section .text
+
+%define xmask0f ymm14
+%define xmask0fx xmm14
+%define xgft1_lo ymm13
+%define xgft1_hi ymm12
+%define xgft2_lo ymm11
+%define xgft2_hi ymm10
+
+%define x0 ymm0
+%define xtmpa ymm1
+%define xtmph1 ymm2
+%define xtmpl1 ymm3
+%define xtmph2 ymm4
+%define xtmpl2 ymm5
+%define xd1 ymm6
+%define xd2 ymm7
+%define xtmpd1 ymm8
+%define xtmpd2 ymm9
+
+align 16
+mk_global gf_2vect_mad_avx2, function
+
+func(gf_2vect_mad_avx2)
+ FUNC_SAVE
+ sub len, 32
+ jl .return_fail
+ xor pos, pos
+ mov tmp.b, 0x0f
+ vpinsrb xmask0fx, xmask0fx, tmp.w, 0
+ vpbroadcastb xmask0f, xmask0fx ;Construct mask 0x0f0f0f...
+
+ sal vec_i, 5 ;Multiply by 32
+ sal vec, 5
+ lea tmp, [mul_array + vec_i]
+ vmovdqu xgft1_lo, [tmp] ;Load array Ax{00}, Ax{01}, ..., Ax{0f}
+ ; " Ax{00}, Ax{10}, ..., Ax{f0}
+ vmovdqu xgft2_lo, [tmp+vec] ;Load array Bx{00}, Bx{01}, ..., Bx{0f}
+ ; " Bx{00}, Bx{10}, ..., Bx{f0}
+
+ vperm2i128 xgft1_hi, xgft1_lo, xgft1_lo, 0x11 ; swapped to hi | hi
+ vperm2i128 xgft1_lo, xgft1_lo, xgft1_lo, 0x00 ; swapped to lo | lo
+ vperm2i128 xgft2_hi, xgft2_lo, xgft2_lo, 0x11 ; swapped to hi | hi
+ vperm2i128 xgft2_lo, xgft2_lo, xgft2_lo, 0x00 ; swapped to lo | lo
+ mov dest2, [dest1+PS] ; reuse mul_array
+ mov dest1, [dest1]
+
+ XLDR xtmpd1, [dest1+len] ;backup the last 16 bytes in dest
+ XLDR xtmpd2, [dest2+len] ;backup the last 16 bytes in dest
+
+.loop32:
+ XLDR xd1, [dest1+pos] ;Get next dest vector
+ XLDR xd2, [dest2+pos] ;Get next dest vector
+.loop32_overlap:
+ XLDR x0, [src+pos] ;Get next source vector
+
+ vpand xtmpa, x0, xmask0f ;Mask low src nibble in bits 4-0
+ vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0
+ vpand x0, x0, xmask0f ;Mask high src nibble in bits 4-0
+
+ vpshufb xtmph1, xgft1_hi, x0 ;Lookup mul table of high nibble
+ vpshufb xtmpl1, xgft1_lo, xtmpa ;Lookup mul table of low nibble
+ vpxor xtmph1, xtmph1, xtmpl1 ;GF add high and low partials
+ vpxor xd1, xd1, xtmph1 ;xd1 += partial
+
+ vpshufb xtmph2, xgft2_hi, x0 ;Lookup mul table of high nibble
+ vpshufb xtmpl2, xgft2_lo, xtmpa ;Lookup mul table of low nibble
+ vpxor xtmph2, xtmph2, xtmpl2 ;GF add high and low partials
+ vpxor xd2, xd2, xtmph2 ;xd2 += partial
+
+ XSTR [dest1+pos], xd1
+ XSTR [dest2+pos], xd2
+
+ add pos, 32 ;Loop on 32 bytes at a time
+ cmp pos, len
+ jle .loop32
+
+ lea tmp, [len + 32]
+ cmp pos, tmp
+ je .return_pass
+
+ ;; Tail len
+ mov pos, len ;Overlapped offset length-32
+ vmovdqa xd1, xtmpd1 ;Restore xd1
+ vmovdqa xd2, xtmpd2 ;Restore xd2
+ jmp .loop32_overlap ;Do one more overlap pass
+
+.return_pass:
+ mov return, 0
+ FUNC_RESTORE
+ ret
+
+.return_fail:
+ mov return, 1
+ FUNC_RESTORE
+ ret
+
+endproc_frame
+
+section .data
+
+;;; func core, ver, snum
+slversion gf_2vect_mad_avx2, 04, 01, 0205
diff --git a/src/isa-l/erasure_code/gf_2vect_mad_avx512.asm b/src/isa-l/erasure_code/gf_2vect_mad_avx512.asm
new file mode 100644
index 000000000..ce372487a
--- /dev/null
+++ b/src/isa-l/erasure_code/gf_2vect_mad_avx512.asm
@@ -0,0 +1,230 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright(c) 2011-2015 Intel Corporation All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in
+; the documentation and/or other materials provided with the
+; distribution.
+; * Neither the name of Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived
+; from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;;;
+;;; gf_2vect_mad_avx512(len, vec, vec_i, mul_array, src, dest);
+;;;
+
+%include "reg_sizes.asm"
+
+%ifdef HAVE_AS_KNOWS_AVX512
+
+%ifidn __OUTPUT_FORMAT__, elf64
+ %define arg0 rdi
+ %define arg1 rsi
+ %define arg2 rdx
+ %define arg3 rcx
+ %define arg4 r8
+ %define arg5 r9
+ %define tmp r11
+ %define tmp2 r10
+ %define return rax
+ %define func(x) x: endbranch
+ %define FUNC_SAVE
+ %define FUNC_RESTORE
+%endif
+
+%ifidn __OUTPUT_FORMAT__, win64
+ %define arg0 rcx
+ %define arg1 rdx
+ %define arg2 r8
+ %define arg3 r9
+ %define arg4 r12
+ %define arg5 r15
+ %define tmp r11
+ %define tmp2 r10
+ %define return rax
+ %define stack_size 16*9 + 3*8 ; must be an odd multiple of 8
+ %define arg(x) [rsp + stack_size + PS + PS*x]
+
+ %define func(x) proc_frame x
+ %macro FUNC_SAVE 0
+ sub rsp, stack_size
+ vmovdqa [rsp+16*0],xmm6
+ vmovdqa [rsp+16*1],xmm7
+ vmovdqa [rsp+16*2],xmm8
+ vmovdqa [rsp+16*3],xmm9
+ vmovdqa [rsp+16*4],xmm10
+ vmovdqa [rsp+16*5],xmm11
+ vmovdqa [rsp+16*6],xmm12
+ vmovdqa [rsp+16*7],xmm13
+ vmovdqa [rsp+16*8],xmm14
+ save_reg r12, 9*16 + 0*8
+ save_reg r15, 9*16 + 1*8
+ end_prolog
+ mov arg4, arg(4)
+ mov arg5, arg(5)
+ %endmacro
+
+ %macro FUNC_RESTORE 0
+ vmovdqa xmm6, [rsp+16*0]
+ vmovdqa xmm7, [rsp+16*1]
+ vmovdqa xmm8, [rsp+16*2]
+ vmovdqa xmm9, [rsp+16*3]
+ vmovdqa xmm10, [rsp+16*4]
+ vmovdqa xmm11, [rsp+16*5]
+ vmovdqa xmm12, [rsp+16*6]
+ vmovdqa xmm13, [rsp+16*7]
+ vmovdqa xmm14, [rsp+16*8]
+ mov r12, [rsp + 9*16 + 0*8]
+ mov r15, [rsp + 9*16 + 1*8]
+ add rsp, stack_size
+ %endmacro
+%endif
+
+
+%define PS 8
+%define len arg0
+%define len.w arg0.w
+%define vec arg1
+%define vec_i arg2
+%define mul_array arg3
+%define src arg4
+%define dest1 arg5
+%define pos return
+%define pos.w return.w
+%define dest2 tmp2
+
+%ifndef EC_ALIGNED_ADDR
+;;; Use Un-aligned load/store
+ %define XLDR vmovdqu8
+ %define XSTR vmovdqu8
+%else
+;;; Use Non-temporal load/stor
+ %ifdef NO_NT_LDST
+ %define XLDR vmovdqa
+ %define XSTR vmovdqa
+ %else
+ %define XLDR vmovntdqa
+ %define XSTR vmovntdq
+ %endif
+%endif
+
+default rel
+[bits 64]
+section .text
+
+%define x0 zmm0
+%define xtmpa zmm1
+%define xtmph1 zmm2
+%define xtmpl1 zmm3
+%define xtmph2 zmm4
+%define xtmpl2 zmm5
+%define xd1 zmm6
+%define xd2 zmm7
+%define xtmpd1 zmm8
+%define xtmpd2 zmm9
+%define xgft1_hi zmm10
+%define xgft1_lo zmm11
+%define xgft1_loy ymm11
+%define xgft2_hi zmm12
+%define xgft2_lo zmm13
+%define xgft2_loy ymm13
+%define xmask0f zmm14
+
+align 16
+mk_global gf_2vect_mad_avx512, function
+func(gf_2vect_mad_avx512)
+ FUNC_SAVE
+ sub len, 64
+ jl .return_fail
+ xor pos, pos
+ mov tmp, 0x0f
+ vpbroadcastb xmask0f, tmp ;Construct mask 0x0f0f0f...
+ sal vec_i, 5 ;Multiply by 32
+ sal vec, 5
+ lea tmp, [mul_array + vec_i]
+ vmovdqu xgft1_loy, [tmp] ;Load array Ax{00}..{0f}, Ax{00}..{f0}
+ vmovdqu xgft2_loy, [tmp+vec] ;Load array Bx{00}..{0f}, Bx{00}..{f0}
+ vshufi64x2 xgft1_hi, xgft1_lo, xgft1_lo, 0x55
+ vshufi64x2 xgft1_lo, xgft1_lo, xgft1_lo, 0x00
+ vshufi64x2 xgft2_hi, xgft2_lo, xgft2_lo, 0x55
+ vshufi64x2 xgft2_lo, xgft2_lo, xgft2_lo, 0x00
+ mov dest2, [dest1+PS] ; reuse mul_array
+ mov dest1, [dest1]
+ mov tmp, -1
+ kmovq k1, tmp
+
+.loop64:
+ XLDR xd1, [dest1+pos] ;Get next dest vector
+ XLDR xd2, [dest2+pos] ;Get next dest vector
+ XLDR x0, [src+pos] ;Get next source vector
+
+ vpandq xtmpa, x0, xmask0f ;Mask low src nibble in bits 4-0
+ vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0
+ vpandq x0, x0, xmask0f ;Mask high src nibble in bits 4-0
+
+ vpshufb xtmph1 {k1}{z}, xgft1_hi, x0 ;Lookup mul table of high nibble
+ vpshufb xtmpl1 {k1}{z}, xgft1_lo, xtmpa ;Lookup mul table of low nibble
+ vpxorq xtmph1, xtmph1, xtmpl1 ;GF add high and low partials
+ vpxorq xd1, xd1, xtmph1 ;xd1 += partial
+
+ vpshufb xtmph2 {k1}{z}, xgft2_hi, x0 ;Lookup mul table of high nibble
+ vpshufb xtmpl2 {k1}{z}, xgft2_lo, xtmpa ;Lookup mul table of low nibble
+ vpxorq xtmph2, xtmph2, xtmpl2 ;GF add high and low partials
+ vpxorq xd2, xd2, xtmph2 ;xd2 += partial
+
+ XSTR [dest1+pos], xd1
+ XSTR [dest2+pos], xd2
+
+ add pos, 64 ;Loop on 64 bytes at a time
+ cmp pos, len
+ jle .loop64
+
+ lea tmp, [len + 64]
+ cmp pos, tmp
+ je .return_pass
+
+ ;; Tail len
+ mov pos, (1 << 63)
+ lea tmp, [len + 64 - 1]
+ and tmp, 63
+ sarx pos, pos, tmp
+ kmovq k1, pos
+ mov pos, len ;Overlapped offset length-64
+ jmp .loop64 ;Do one more overlap pass
+
+.return_pass:
+ mov return, 0
+ FUNC_RESTORE
+ ret
+
+.return_fail:
+ mov return, 1
+ FUNC_RESTORE
+ ret
+
+endproc_frame
+
+%else
+%ifidn __OUTPUT_FORMAT__, win64
+global no_gf_2vect_mad_avx512
+no_gf_2vect_mad_avx512:
+%endif
+%endif ; ifdef HAVE_AS_KNOWS_AVX512
diff --git a/src/isa-l/erasure_code/gf_2vect_mad_sse.asm b/src/isa-l/erasure_code/gf_2vect_mad_sse.asm
new file mode 100644
index 000000000..2bff82f0a
--- /dev/null
+++ b/src/isa-l/erasure_code/gf_2vect_mad_sse.asm
@@ -0,0 +1,239 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright(c) 2011-2015 Intel Corporation All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in
+; the documentation and/or other materials provided with the
+; distribution.
+; * Neither the name of Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived
+; from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;;;
+;;; gf_2vect_mad_sse(len, vec, vec_i, mul_array, src, dest);
+;;;
+
+%include "reg_sizes.asm"
+
+%define PS 8
+
+%ifidn __OUTPUT_FORMAT__, win64
+ %define arg0 rcx
+ %define arg0.w ecx
+ %define arg1 rdx
+ %define arg2 r8
+ %define arg3 r9
+ %define arg4 r12
+ %define arg5 r15
+ %define tmp r11
+ %define tmp2 r10
+ %define return rax
+ %define return.w eax
+ %define stack_size 16*9 + 3*8
+ %define arg(x) [rsp + stack_size + PS + PS*x]
+ %define func(x) proc_frame x
+
+%macro FUNC_SAVE 0
+ sub rsp, stack_size
+ movdqa [rsp+16*0],xmm6
+ movdqa [rsp+16*1],xmm7
+ movdqa [rsp+16*2],xmm8
+ movdqa [rsp+16*3],xmm9
+ movdqa [rsp+16*4],xmm10
+ movdqa [rsp+16*5],xmm11
+ movdqa [rsp+16*6],xmm12
+ movdqa [rsp+16*7],xmm13
+ movdqa [rsp+16*8],xmm14
+ save_reg r12, 9*16 + 0*8
+ save_reg r15, 9*16 + 1*8
+ end_prolog
+ mov arg4, arg(4)
+ mov arg5, arg(5)
+%endmacro
+
+%macro FUNC_RESTORE 0
+ movdqa xmm6, [rsp+16*0]
+ movdqa xmm7, [rsp+16*1]
+ movdqa xmm8, [rsp+16*2]
+ movdqa xmm9, [rsp+16*3]
+ movdqa xmm10, [rsp+16*4]
+ movdqa xmm11, [rsp+16*5]
+ movdqa xmm12, [rsp+16*6]
+ movdqa xmm13, [rsp+16*7]
+ movdqa xmm14, [rsp+16*8]
+ mov r12, [rsp + 9*16 + 0*8]
+ mov r15, [rsp + 9*16 + 1*8]
+ add rsp, stack_size
+%endmacro
+
+%elifidn __OUTPUT_FORMAT__, elf64
+ %define arg0 rdi
+ %define arg0.w edi
+ %define arg1 rsi
+ %define arg2 rdx
+ %define arg3 rcx
+ %define arg4 r8
+ %define arg5 r9
+ %define tmp r11
+ %define tmp2 r10
+ %define return rax
+ %define return.w eax
+
+ %define func(x) x: endbranch
+ %define FUNC_SAVE
+ %define FUNC_RESTORE
+%endif
+
+;;; gf_2vect_mad_sse(len, vec, vec_i, mul_array, src, dest)
+%define len arg0
+%define len.w arg0.w
+%define vec arg1
+%define vec_i arg2
+%define mul_array arg3
+%define src arg4
+%define dest1 arg5
+%define pos return
+%define pos.w return.w
+
+%define dest2 tmp2
+
+%ifndef EC_ALIGNED_ADDR
+;;; Use Un-aligned load/store
+ %define XLDR movdqu
+ %define XSTR movdqu
+%else
+;;; Use Non-temporal load/stor
+ %ifdef NO_NT_LDST
+ %define XLDR movdqa
+ %define XSTR movdqa
+ %else
+ %define XLDR movntdqa
+ %define XSTR movntdq
+ %endif
+%endif
+
+default rel
+
+[bits 64]
+section .text
+
+%define xmask0f xmm14
+%define xgft1_lo xmm13
+%define xgft1_hi xmm12
+%define xgft2_lo xmm11
+%define xgft2_hi xmm10
+
+%define x0 xmm0
+%define xtmpa xmm1
+%define xtmph1 xmm2
+%define xtmpl1 xmm3
+%define xtmph2 xmm4
+%define xtmpl2 xmm5
+%define xd1 xmm6
+%define xd2 xmm7
+%define xtmpd1 xmm8
+%define xtmpd2 xmm9
+
+
+align 16
+mk_global gf_2vect_mad_sse, function
+func(gf_2vect_mad_sse)
+ FUNC_SAVE
+ sub len, 16
+ jl .return_fail
+
+ xor pos, pos
+ movdqa xmask0f, [mask0f] ;Load mask of lower nibble in each byte
+ sal vec_i, 5 ;Multiply by 32
+ sal vec, 5
+ lea tmp, [mul_array + vec_i]
+ movdqu xgft1_lo,[tmp] ;Load array Ax{00}, Ax{01}, Ax{02}, ...
+ movdqu xgft1_hi, [tmp+16] ; " Ax{00}, Ax{10}, Ax{20}, ... , Ax{f0}
+ movdqu xgft2_lo, [tmp+vec] ;Load array Bx{00}, Bx{01}, Bx{02}, ...
+ movdqu xgft2_hi, [tmp+vec+16] ; " Bx{00}, Bx{10}, Bx{20}, ... , Bx{f0}
+ mov dest2, [dest1+PS]
+ mov dest1, [dest1]
+
+ XLDR xtmpd1, [dest1+len] ;backup the last 16 bytes in dest
+ XLDR xtmpd2, [dest2+len] ;backup the last 16 bytes in dest
+
+.loop16:
+ XLDR xd1, [dest1+pos] ;Get next dest vector
+ XLDR xd2, [dest2+pos] ;Get next dest vector
+.loop16_overlap:
+ XLDR x0, [src+pos] ;Get next source vector
+ movdqa xtmph1, xgft1_hi ;Reload const array registers
+ movdqa xtmpl1, xgft1_lo
+ movdqa xtmph2, xgft2_hi ;Reload const array registers
+ movdqa xtmpl2, xgft2_lo
+ movdqa xtmpa, x0 ;Keep unshifted copy of src
+ psraw x0, 4 ;Shift to put high nibble into bits 4-0
+ pand x0, xmask0f ;Mask high src nibble in bits 4-0
+ pand xtmpa, xmask0f ;Mask low src nibble in bits 4-0
+
+ pshufb xtmph1, x0 ;Lookup mul table of high nibble
+ pshufb xtmpl1, xtmpa ;Lookup mul table of low nibble
+ pxor xtmph1, xtmpl1 ;GF add high and low partials
+ pxor xd1, xtmph1
+
+ pshufb xtmph2, x0 ;Lookup mul table of high nibble
+ pshufb xtmpl2, xtmpa ;Lookup mul table of low nibble
+ pxor xtmph2, xtmpl2 ;GF add high and low partials
+ pxor xd2, xtmph2
+
+ XSTR [dest1+pos], xd1 ;Store result
+ XSTR [dest2+pos], xd2 ;Store result
+
+ add pos, 16 ;Loop on 16 bytes at a time
+ cmp pos, len
+ jle .loop16
+
+ lea tmp, [len + 16]
+ cmp pos, tmp
+ je .return_pass
+
+ ;; Tail len
+ mov pos, len ;Overlapped offset length-16
+ movdqa xd1, xtmpd1 ;Restore xd1
+ movdqa xd2, xtmpd2 ;Restore xd2
+ jmp .loop16_overlap ;Do one more overlap pass
+
+.return_pass:
+ FUNC_RESTORE
+ mov return, 0
+ ret
+
+.return_fail:
+ FUNC_RESTORE
+ mov return, 1
+ ret
+
+endproc_frame
+
+section .data
+
+align 16
+
+mask0f:
+ dq 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f
+
+;;; func core, ver, snum
+slversion gf_2vect_mad_sse, 00, 01, 0203
diff --git a/src/isa-l/erasure_code/gf_3vect_dot_prod_avx.asm b/src/isa-l/erasure_code/gf_3vect_dot_prod_avx.asm
new file mode 100644
index 000000000..79c7ed4f0
--- /dev/null
+++ b/src/isa-l/erasure_code/gf_3vect_dot_prod_avx.asm
@@ -0,0 +1,377 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright(c) 2011-2015 Intel Corporation All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in
+; the documentation and/or other materials provided with the
+; distribution.
+; * Neither the name of Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived
+; from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;;;
+;;; gf_3vect_dot_prod_avx(len, vec, *g_tbls, **buffs, **dests);
+;;;
+
+%include "reg_sizes.asm"
+
+%ifidn __OUTPUT_FORMAT__, elf64
+ %define arg0 rdi
+ %define arg1 rsi
+ %define arg2 rdx
+ %define arg3 rcx
+ %define arg4 r8
+ %define arg5 r9
+
+ %define tmp r11
+ %define tmp2 r10
+ %define tmp3 r13 ; must be saved and restored
+ %define tmp4 r12 ; must be saved and restored
+ %define return rax
+ %macro SLDR 2
+ %endmacro
+ %define SSTR SLDR
+ %define PS 8
+ %define LOG_PS 3
+
+ %define func(x) x: endbranch
+ %macro FUNC_SAVE 0
+ push r12
+ push r13
+ %endmacro
+ %macro FUNC_RESTORE 0
+ pop r13
+ pop r12
+ %endmacro
+%endif
+
+%ifidn __OUTPUT_FORMAT__, win64
+ %define arg0 rcx
+ %define arg1 rdx
+ %define arg2 r8
+ %define arg3 r9
+
+ %define arg4 r12 ; must be saved, loaded and restored
+ %define arg5 r15 ; must be saved and restored
+ %define tmp r11
+ %define tmp2 r10
+ %define tmp3 r13 ; must be saved and restored
+ %define tmp4 r14 ; must be saved and restored
+ %define return rax
+ %macro SLDR 2
+ %endmacro
+ %define SSTR SLDR
+ %define PS 8
+ %define LOG_PS 3
+ %define stack_size 6*16 + 5*8 ; must be an odd multiple of 8
+ %define arg(x) [rsp + stack_size + PS + PS*x]
+
+ %define func(x) proc_frame x
+ %macro FUNC_SAVE 0
+ alloc_stack stack_size
+ vmovdqa [rsp + 0*16], xmm6
+ vmovdqa [rsp + 1*16], xmm7
+ vmovdqa [rsp + 2*16], xmm8
+ vmovdqa [rsp + 3*16], xmm9
+ vmovdqa [rsp + 4*16], xmm10
+ vmovdqa [rsp + 5*16], xmm11
+ save_reg r12, 6*16 + 0*8
+ save_reg r13, 6*16 + 1*8
+ save_reg r14, 6*16 + 2*8
+ save_reg r15, 6*16 + 3*8
+ end_prolog
+ mov arg4, arg(4)
+ %endmacro
+
+ %macro FUNC_RESTORE 0
+ vmovdqa xmm6, [rsp + 0*16]
+ vmovdqa xmm7, [rsp + 1*16]
+ vmovdqa xmm8, [rsp + 2*16]
+ vmovdqa xmm9, [rsp + 3*16]
+ vmovdqa xmm10, [rsp + 4*16]
+ vmovdqa xmm11, [rsp + 5*16]
+ mov r12, [rsp + 6*16 + 0*8]
+ mov r13, [rsp + 6*16 + 1*8]
+ mov r14, [rsp + 6*16 + 2*8]
+ mov r15, [rsp + 6*16 + 3*8]
+ add rsp, stack_size
+ %endmacro
+%endif
+
+%ifidn __OUTPUT_FORMAT__, elf32
+
+;;;================== High Address;
+;;; arg4
+;;; arg3
+;;; arg2
+;;; arg1
+;;; arg0
+;;; return
+;;;<================= esp of caller
+;;; ebp
+;;;<================= ebp = esp
+;;; var0
+;;; var1
+;;; esi
+;;; edi
+;;; ebx
+;;;<================= esp of callee
+;;;
+;;;================== Low Address;
+
+ %define PS 4
+ %define LOG_PS 2
+ %define func(x) x: endbranch
+ %define arg(x) [ebp + PS*2 + PS*x]
+ %define var(x) [ebp - PS - PS*x]
+
+ %define trans ecx
+ %define trans2 esi
+ %define arg0 trans ;trans and trans2 are for the variables in stack
+ %define arg0_m arg(0)
+ %define arg1 ebx
+ %define arg2 arg2_m
+ %define arg2_m arg(2)
+ %define arg3 trans
+ %define arg3_m arg(3)
+ %define arg4 trans
+ %define arg4_m arg(4)
+ %define arg5 trans2
+ %define tmp edx
+ %define tmp2 edi
+ %define tmp3 trans2
+ %define tmp3_m var(0)
+ %define tmp4 trans2
+ %define tmp4_m var(1)
+ %define return eax
+ %macro SLDR 2 ;; stack load/restore
+ mov %1, %2
+ %endmacro
+ %define SSTR SLDR
+
+ %macro FUNC_SAVE 0
+ push ebp
+ mov ebp, esp
+ sub esp, PS*2 ;2 local variables
+ push esi
+ push edi
+ push ebx
+ mov arg1, arg(1)
+ %endmacro
+
+ %macro FUNC_RESTORE 0
+ pop ebx
+ pop edi
+ pop esi
+ add esp, PS*2 ;2 local variables
+ pop ebp
+ %endmacro
+
+%endif ; output formats
+
+%define len arg0
+%define vec arg1
+%define mul_array arg2
+%define src arg3
+%define dest1 arg4
+%define ptr arg5
+
+%define vec_i tmp2
+%define dest2 tmp3
+%define dest3 tmp4
+%define pos return
+
+ %ifidn PS,4 ;32-bit code
+ %define len_m arg0_m
+ %define src_m arg3_m
+ %define dest1_m arg4_m
+ %define dest2_m tmp3_m
+ %define dest3_m tmp4_m
+ %endif
+
+%ifndef EC_ALIGNED_ADDR
+;;; Use Un-aligned load/store
+ %define XLDR vmovdqu
+ %define XSTR vmovdqu
+%else
+;;; Use Non-temporal load/stor
+ %ifdef NO_NT_LDST
+ %define XLDR vmovdqa
+ %define XSTR vmovdqa
+ %else
+ %define XLDR vmovntdqa
+ %define XSTR vmovntdq
+ %endif
+%endif
+
+%ifidn PS,8 ; 64-bit code
+ default rel
+ [bits 64]
+%endif
+
+
+section .text
+
+%ifidn PS,8 ;64-bit code
+ %define xmask0f xmm11
+ %define xgft1_lo xmm10
+ %define xgft1_hi xmm9
+ %define xgft2_lo xmm8
+ %define xgft2_hi xmm7
+ %define xgft3_lo xmm6
+ %define xgft3_hi xmm5
+
+ %define x0 xmm0
+ %define xtmpa xmm1
+ %define xp1 xmm2
+ %define xp2 xmm3
+ %define xp3 xmm4
+%else
+ %define xmask0f xmm7
+ %define xgft1_lo xmm6
+ %define xgft1_hi xmm5
+ %define xgft2_lo xgft1_lo
+ %define xgft2_hi xgft1_hi
+ %define xgft3_lo xgft1_lo
+ %define xgft3_hi xgft1_hi
+
+ %define x0 xmm0
+ %define xtmpa xmm1
+ %define xp1 xmm2
+ %define xp2 xmm3
+ %define xp3 xmm4
+%endif
+
+align 16
+mk_global gf_3vect_dot_prod_avx, function
+func(gf_3vect_dot_prod_avx)
+ FUNC_SAVE
+ SLDR len, len_m
+ sub len, 16
+ SSTR len_m, len
+ jl .return_fail
+ xor pos, pos
+ vmovdqa xmask0f, [mask0f] ;Load mask of lower nibble in each byte
+ sal vec, LOG_PS ;vec *= PS. Make vec_i count by PS
+ SLDR dest1, dest1_m
+ mov dest2, [dest1+PS]
+ SSTR dest2_m, dest2
+ mov dest3, [dest1+2*PS]
+ SSTR dest3_m, dest3
+ mov dest1, [dest1]
+ SSTR dest1_m, dest1
+
+.loop16:
+ vpxor xp1, xp1
+ vpxor xp2, xp2
+ vpxor xp3, xp3
+ mov tmp, mul_array
+ xor vec_i, vec_i
+
+.next_vect:
+ SLDR src, src_m
+ mov ptr, [src+vec_i]
+
+ vmovdqu xgft1_lo, [tmp] ;Load array Ax{00}, Ax{01}, ..., Ax{0f}
+ vmovdqu xgft1_hi, [tmp+16] ; " Ax{00}, Ax{10}, ..., Ax{f0}
+ %ifidn PS,8 ; 64-bit code
+ vmovdqu xgft2_lo, [tmp+vec*(32/PS)] ;Load array Bx{00}, Bx{01}, ..., Bx{0f}
+ vmovdqu xgft2_hi, [tmp+vec*(32/PS)+16] ; " Bx{00}, Bx{10}, ..., Bx{f0}
+ vmovdqu xgft3_lo, [tmp+vec*(64/PS)] ;Load array Cx{00}, Cx{01}, ..., Cx{0f}
+ vmovdqu xgft3_hi, [tmp+vec*(64/PS)+16] ; " Cx{00}, Cx{10}, ..., Cx{f0}
+ add tmp, 32
+ add vec_i, PS
+ %endif
+ XLDR x0, [ptr+pos] ;Get next source vector
+
+ vpand xtmpa, x0, xmask0f ;Mask low src nibble in bits 4-0
+ vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0
+ vpand x0, x0, xmask0f ;Mask high src nibble in bits 4-0
+
+ vpshufb xgft1_hi, x0 ;Lookup mul table of high nibble
+ vpshufb xgft1_lo, xtmpa ;Lookup mul table of low nibble
+ vpxor xgft1_hi, xgft1_lo ;GF add high and low partials
+ vpxor xp1, xgft1_hi ;xp1 += partial
+
+ %ifidn PS,4 ; 32-bit code
+ vmovdqu xgft2_lo, [tmp+vec*(32/PS)] ;Load array Bx{00}, Bx{01}, ..., Bx{0f}
+ vmovdqu xgft2_hi, [tmp+vec*(32/PS)+16] ; " Bx{00}, Bx{10}, ..., Bx{f0}
+ %endif
+ vpshufb xgft2_hi, x0 ;Lookup mul table of high nibble
+ vpshufb xgft2_lo, xtmpa ;Lookup mul table of low nibble
+ vpxor xgft2_hi, xgft2_lo ;GF add high and low partials
+ vpxor xp2, xgft2_hi ;xp2 += partial
+
+ %ifidn PS,4 ; 32-bit code
+ sal vec, 1
+ vmovdqu xgft3_lo, [tmp+vec*(32/PS)] ;Load array Cx{00}, Cx{01}, ..., Cx{0f}
+ vmovdqu xgft3_hi, [tmp+vec*(32/PS)+16] ; " Cx{00}, Cx{10}, ..., Cx{f0}
+ sar vec, 1
+ add tmp, 32
+ add vec_i, PS
+ %endif
+ vpshufb xgft3_hi, x0 ;Lookup mul table of high nibble
+ vpshufb xgft3_lo, xtmpa ;Lookup mul table of low nibble
+ vpxor xgft3_hi, xgft3_lo ;GF add high and low partials
+ vpxor xp3, xgft3_hi ;xp3 += partial
+
+ cmp vec_i, vec
+ jl .next_vect
+
+ SLDR dest1, dest1_m
+ SLDR dest2, dest2_m
+ XSTR [dest1+pos], xp1
+ XSTR [dest2+pos], xp2
+ SLDR dest3, dest3_m
+ XSTR [dest3+pos], xp3
+
+ SLDR len, len_m
+ add pos, 16 ;Loop on 16 bytes at a time
+ cmp pos, len
+ jle .loop16
+
+ lea tmp, [len + 16]
+ cmp pos, tmp
+ je .return_pass
+
+ ;; Tail len
+ mov pos, len ;Overlapped offset length-16
+ jmp .loop16 ;Do one more overlap pass
+
+.return_pass:
+ mov return, 0
+ FUNC_RESTORE
+ ret
+
+.return_fail:
+ mov return, 1
+ FUNC_RESTORE
+ ret
+
+endproc_frame
+
+section .data
+
+align 16
+mask0f: dq 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f
+
+;;; func core, ver, snum
+slversion gf_3vect_dot_prod_avx, 02, 05, 0192
diff --git a/src/isa-l/erasure_code/gf_3vect_dot_prod_avx2.asm b/src/isa-l/erasure_code/gf_3vect_dot_prod_avx2.asm
new file mode 100644
index 000000000..606c3a133
--- /dev/null
+++ b/src/isa-l/erasure_code/gf_3vect_dot_prod_avx2.asm
@@ -0,0 +1,397 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright(c) 2011-2015 Intel Corporation All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in
+; the documentation and/or other materials provided with the
+; distribution.
+; * Neither the name of Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived
+; from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;;;
+;;; gf_3vect_dot_prod_avx2(len, vec, *g_tbls, **buffs, **dests);
+;;;
+
+%include "reg_sizes.asm"
+
+%ifidn __OUTPUT_FORMAT__, elf64
+ %define arg0 rdi
+ %define arg1 rsi
+ %define arg2 rdx
+ %define arg3 rcx
+ %define arg4 r8
+ %define arg5 r9
+
+ %define tmp r11
+ %define tmp.w r11d
+ %define tmp.b r11b
+ %define tmp2 r10
+ %define tmp3 r13 ; must be saved and restored
+ %define tmp4 r12 ; must be saved and restored
+ %define return rax
+ %macro SLDR 2
+ %endmacro
+ %define SSTR SLDR
+ %define PS 8
+ %define LOG_PS 3
+
+ %define func(x) x: endbranch
+ %macro FUNC_SAVE 0
+ push r12
+ push r13
+ %endmacro
+ %macro FUNC_RESTORE 0
+ pop r13
+ pop r12
+ %endmacro
+%endif
+
+%ifidn __OUTPUT_FORMAT__, win64
+ %define arg0 rcx
+ %define arg1 rdx
+ %define arg2 r8
+ %define arg3 r9
+
+ %define arg4 r12 ; must be saved, loaded and restored
+ %define arg5 r15 ; must be saved and restored
+ %define tmp r11
+ %define tmp.w r11d
+ %define tmp.b r11b
+ %define tmp2 r10
+ %define tmp3 r13 ; must be saved and restored
+ %define tmp4 r14 ; must be saved and restored
+ %define return rax
+ %macro SLDR 2
+ %endmacro
+ %define SSTR SLDR
+ %define PS 8
+ %define LOG_PS 3
+ %define stack_size 6*16 + 5*8 ; must be an odd multiple of 8
+ %define arg(x) [rsp + stack_size + PS + PS*x]
+
+ %define func(x) proc_frame x
+ %macro FUNC_SAVE 0
+ alloc_stack stack_size
+ vmovdqa [rsp + 0*16], xmm6
+ vmovdqa [rsp + 1*16], xmm7
+ vmovdqa [rsp + 2*16], xmm8
+ vmovdqa [rsp + 3*16], xmm9
+ vmovdqa [rsp + 4*16], xmm10
+ vmovdqa [rsp + 5*16], xmm11
+ save_reg r12, 6*16 + 0*8
+ save_reg r13, 6*16 + 1*8
+ save_reg r14, 6*16 + 2*8
+ save_reg r15, 6*16 + 3*8
+ end_prolog
+ mov arg4, arg(4)
+ %endmacro
+
+ %macro FUNC_RESTORE 0
+ vmovdqa xmm6, [rsp + 0*16]
+ vmovdqa xmm7, [rsp + 1*16]
+ vmovdqa xmm8, [rsp + 2*16]
+ vmovdqa xmm9, [rsp + 3*16]
+ vmovdqa xmm10, [rsp + 4*16]
+ vmovdqa xmm11, [rsp + 5*16]
+ mov r12, [rsp + 6*16 + 0*8]
+ mov r13, [rsp + 6*16 + 1*8]
+ mov r14, [rsp + 6*16 + 2*8]
+ mov r15, [rsp + 6*16 + 3*8]
+ add rsp, stack_size
+ %endmacro
+%endif
+
+%ifidn __OUTPUT_FORMAT__, elf32
+
+;;;================== High Address;
+;;; arg4
+;;; arg3
+;;; arg2
+;;; arg1
+;;; arg0
+;;; return
+;;;<================= esp of caller
+;;; ebp
+;;;<================= ebp = esp
+;;; var0
+;;; var1
+;;; esi
+;;; edi
+;;; ebx
+;;;<================= esp of callee
+;;;
+;;;================== Low Address;
+
+ %define PS 4
+ %define LOG_PS 2
+ %define func(x) x: endbranch
+ %define arg(x) [ebp + PS*2 + PS*x]
+ %define var(x) [ebp - PS - PS*x]
+
+ %define trans ecx
+ %define trans2 esi
+ %define arg0 trans ;trans and trans2 are for the variables in stack
+ %define arg0_m arg(0)
+ %define arg1 ebx
+ %define arg2 arg2_m
+ %define arg2_m arg(2)
+ %define arg3 trans
+ %define arg3_m arg(3)
+ %define arg4 trans
+ %define arg4_m arg(4)
+ %define arg5 trans2
+ %define tmp edx
+ %define tmp.w edx
+ %define tmp.b dl
+ %define tmp2 edi
+ %define tmp3 trans2
+ %define tmp3_m var(0)
+ %define tmp4 trans2
+ %define tmp4_m var(1)
+ %define return eax
+ %macro SLDR 2 ;stack load/restore
+ mov %1, %2
+ %endmacro
+ %define SSTR SLDR
+
+ %macro FUNC_SAVE 0
+ push ebp
+ mov ebp, esp
+ sub esp, PS*2 ;2 local variables
+ push esi
+ push edi
+ push ebx
+ mov arg1, arg(1)
+ %endmacro
+
+ %macro FUNC_RESTORE 0
+ pop ebx
+ pop edi
+ pop esi
+ add esp, PS*2 ;2 local variables
+ pop ebp
+ %endmacro
+
+%endif ; output formats
+
+%define len arg0
+%define vec arg1
+%define mul_array arg2
+%define src arg3
+%define dest1 arg4
+%define ptr arg5
+
+%define vec_i tmp2
+%define dest2 tmp3
+%define dest3 tmp4
+%define pos return
+
+%ifidn PS,4 ;32-bit code
+ %define len_m arg0_m
+ %define src_m arg3_m
+ %define dest1_m arg4_m
+ %define dest2_m tmp3_m
+ %define dest3_m tmp4_m
+%endif
+
+%ifndef EC_ALIGNED_ADDR
+;;; Use Un-aligned load/store
+ %define XLDR vmovdqu
+ %define XSTR vmovdqu
+%else
+;;; Use Non-temporal load/stor
+ %ifdef NO_NT_LDST
+ %define XLDR vmovdqa
+ %define XSTR vmovdqa
+ %else
+ %define XLDR vmovntdqa
+ %define XSTR vmovntdq
+ %endif
+%endif
+
+%ifidn PS,8 ;64-bit code
+ default rel
+ [bits 64]
+%endif
+
+section .text
+
+%ifidn PS,8 ;64-bit code
+ %define xmask0f ymm11
+ %define xmask0fx xmm11
+ %define xgft1_lo ymm10
+ %define xgft1_hi ymm9
+ %define xgft2_lo ymm8
+ %define xgft2_hi ymm7
+ %define xgft3_lo ymm6
+ %define xgft3_hi ymm5
+
+ %define x0 ymm0
+ %define xtmpa ymm1
+ %define xp1 ymm2
+ %define xp2 ymm3
+ %define xp3 ymm4
+%else
+ %define xmask0f ymm7
+ %define xmask0fx xmm7
+ %define xgft1_lo ymm6
+ %define xgft1_hi ymm5
+ %define xgft2_lo xgft1_lo
+ %define xgft2_hi xgft1_hi
+ %define xgft3_lo xgft1_lo
+ %define xgft3_hi xgft1_hi
+
+ %define x0 ymm0
+ %define xtmpa ymm1
+ %define xp1 ymm2
+ %define xp2 ymm3
+ %define xp3 ymm4
+
+%endif
+
+align 16
+mk_global gf_3vect_dot_prod_avx2, function
+func(gf_3vect_dot_prod_avx2)
+ FUNC_SAVE
+ SLDR len, len_m
+ sub len, 32
+ SSTR len_m, len
+ jl .return_fail
+ xor pos, pos
+ mov tmp.b, 0x0f
+ vpinsrb xmask0fx, xmask0fx, tmp.w, 0
+ vpbroadcastb xmask0f, xmask0fx ;Construct mask 0x0f0f0f...
+
+ sal vec, LOG_PS ;vec *= PS. Make vec_i count by PS
+ SLDR dest1, dest1_m
+ mov dest2, [dest1+PS]
+ SSTR dest2_m, dest2
+ mov dest3, [dest1+2*PS]
+ SSTR dest3_m, dest3
+ mov dest1, [dest1]
+ SSTR dest1_m, dest1
+
+.loop32:
+ vpxor xp1, xp1
+ vpxor xp2, xp2
+ vpxor xp3, xp3
+ mov tmp, mul_array
+ xor vec_i, vec_i
+
+.next_vect:
+ SLDR src, src_m
+ mov ptr, [src+vec_i]
+
+ vmovdqu xgft1_lo, [tmp] ;Load array Ax{00}, Ax{01}, ..., Ax{0f}
+ ; " Ax{00}, Ax{10}, ..., Ax{f0}
+ vperm2i128 xgft1_hi, xgft1_lo, xgft1_lo, 0x11 ; swapped to hi | hi
+ vperm2i128 xgft1_lo, xgft1_lo, xgft1_lo, 0x00 ; swapped to lo | lo
+ %ifidn PS,8 ; 64-bit code
+ vmovdqu xgft2_lo, [tmp+vec*(32/PS)] ;Load array Bx{00}, Bx{01}, ..., Bx{0f}
+ ; " Bx{00}, Bx{10}, ..., Bx{f0}
+ vperm2i128 xgft2_hi, xgft2_lo, xgft2_lo, 0x11 ; swapped to hi | hi
+ vperm2i128 xgft2_lo, xgft2_lo, xgft2_lo, 0x00 ; swapped to lo | lo
+
+ vmovdqu xgft3_lo, [tmp+vec*(64/PS)] ;Load array Cx{00}, Cx{01}, ..., Cx{0f}
+ ; " Cx{00}, Cx{10}, ..., Cx{f0}
+ vperm2i128 xgft3_hi, xgft3_lo, xgft3_lo, 0x11 ; swapped to hi | hi
+ vperm2i128 xgft3_lo, xgft3_lo, xgft3_lo, 0x00 ; swapped to lo | lo
+
+ add tmp, 32
+ add vec_i, PS
+ %endif
+ XLDR x0, [ptr+pos] ;Get next source vector
+
+ vpand xtmpa, x0, xmask0f ;Mask low src nibble in bits 4-0
+ vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0
+ vpand x0, x0, xmask0f ;Mask high src nibble in bits 4-0
+
+ vpshufb xgft1_hi, x0 ;Lookup mul table of high nibble
+ vpshufb xgft1_lo, xtmpa ;Lookup mul table of low nibble
+ vpxor xgft1_hi, xgft1_lo ;GF add high and low partials
+ vpxor xp1, xgft1_hi ;xp1 += partial
+
+ %ifidn PS,4 ; 32-bit code
+ vmovdqu xgft2_lo, [tmp+vec*(32/PS)] ;Load array Bx{00}, Bx{01}, ..., Bx{0f}
+ ; " Bx{00}, Bx{10}, ..., Bx{f0}
+ vperm2i128 xgft2_hi, xgft2_lo, xgft2_lo, 0x11 ; swapped to hi | hi
+ vperm2i128 xgft2_lo, xgft2_lo, xgft2_lo, 0x00 ; swapped to lo | lo
+ %endif
+ vpshufb xgft2_hi, x0 ;Lookup mul table of high nibble
+ vpshufb xgft2_lo, xtmpa ;Lookup mul table of low nibble
+ vpxor xgft2_hi, xgft2_lo ;GF add high and low partials
+ vpxor xp2, xgft2_hi ;xp2 += partial
+
+ %ifidn PS,4 ; 32-bit code
+ sal vec, 1
+ vmovdqu xgft3_lo, [tmp+vec*(32/PS)] ;Load array Cx{00}, Cx{01}, ..., Cx{0f}
+ ; " Cx{00}, Cx{10}, ..., Cx{f0}
+ vperm2i128 xgft3_hi, xgft3_lo, xgft3_lo, 0x11 ; swapped to hi | hi
+ vperm2i128 xgft3_lo, xgft3_lo, xgft3_lo, 0x00 ; swapped to lo | lo
+ sar vec, 1
+ add tmp, 32
+ add vec_i, PS
+ %endif
+ vpshufb xgft3_hi, x0 ;Lookup mul table of high nibble
+ vpshufb xgft3_lo, xtmpa ;Lookup mul table of low nibble
+ vpxor xgft3_hi, xgft3_lo ;GF add high and low partials
+ vpxor xp3, xgft3_hi ;xp3 += partial
+
+ cmp vec_i, vec
+ jl .next_vect
+
+ SLDR dest1, dest1_m
+ SLDR dest2, dest2_m
+ XSTR [dest1+pos], xp1
+ XSTR [dest2+pos], xp2
+ SLDR dest3, dest3_m
+ XSTR [dest3+pos], xp3
+
+ SLDR len, len_m
+ add pos, 32 ;Loop on 32 bytes at a time
+ cmp pos, len
+ jle .loop32
+
+ lea tmp, [len + 32]
+ cmp pos, tmp
+ je .return_pass
+
+ ;; Tail len
+ mov pos, len ;Overlapped offset length-16
+ jmp .loop32 ;Do one more overlap pass
+
+.return_pass:
+ mov return, 0
+ FUNC_RESTORE
+ ret
+
+.return_fail:
+ mov return, 1
+ FUNC_RESTORE
+ ret
+
+endproc_frame
+
+section .data
+
+;;; func core, ver, snum
+slversion gf_3vect_dot_prod_avx2, 04, 05, 0197
diff --git a/src/isa-l/erasure_code/gf_3vect_dot_prod_avx512.asm b/src/isa-l/erasure_code/gf_3vect_dot_prod_avx512.asm
new file mode 100644
index 000000000..81e96f292
--- /dev/null
+++ b/src/isa-l/erasure_code/gf_3vect_dot_prod_avx512.asm
@@ -0,0 +1,270 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright(c) 2011-2015 Intel Corporation All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in
+; the documentation and/or other materials provided with the
+; distribution.
+; * Neither the name of Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived
+; from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;;;
+;;; gf_3vect_dot_prod_avx512(len, vec, *g_tbls, **buffs, **dests);
+;;;
+
+%include "reg_sizes.asm"
+
+%ifdef HAVE_AS_KNOWS_AVX512
+
+%ifidn __OUTPUT_FORMAT__, elf64
+ %define arg0 rdi
+ %define arg1 rsi
+ %define arg2 rdx
+ %define arg3 rcx
+ %define arg4 r8
+ %define arg5 r9
+
+ %define tmp r11
+ %define tmp.w r11d
+ %define tmp.b r11b
+ %define tmp2 r10
+ %define tmp3 r13 ; must be saved and restored
+ %define tmp4 r12 ; must be saved and restored
+ %define return rax
+ %define PS 8
+ %define LOG_PS 3
+
+ %define func(x) x: endbranch
+ %macro FUNC_SAVE 0
+ push r12
+ push r13
+ %endmacro
+ %macro FUNC_RESTORE 0
+ pop r13
+ pop r12
+ %endmacro
+%endif
+
+%ifidn __OUTPUT_FORMAT__, win64
+ %define arg0 rcx
+ %define arg1 rdx
+ %define arg2 r8
+ %define arg3 r9
+
+ %define arg4 r12 ; must be saved, loaded and restored
+ %define arg5 r15 ; must be saved and restored
+ %define tmp r11
+ %define tmp.w r11d
+ %define tmp.b r11b
+ %define tmp2 r10
+ %define tmp3 r13 ; must be saved and restored
+ %define tmp4 r14 ; must be saved and restored
+ %define return rax
+ %define PS 8
+ %define LOG_PS 3
+ %define stack_size 9*16 + 5*8 ; must be an odd multiple of 8
+ %define arg(x) [rsp + stack_size + PS + PS*x]
+
+ %define func(x) proc_frame x
+ %macro FUNC_SAVE 0
+ alloc_stack stack_size
+ vmovdqa [rsp + 0*16], xmm6
+ vmovdqa [rsp + 1*16], xmm7
+ vmovdqa [rsp + 2*16], xmm8
+ vmovdqa [rsp + 3*16], xmm9
+ vmovdqa [rsp + 4*16], xmm10
+ vmovdqa [rsp + 5*16], xmm11
+ vmovdqa [rsp + 6*16], xmm12
+ vmovdqa [rsp + 7*16], xmm13
+ vmovdqa [rsp + 8*16], xmm14
+ save_reg r12, 9*16 + 0*8
+ save_reg r13, 9*16 + 1*8
+ save_reg r14, 9*16 + 2*8
+ save_reg r15, 9*16 + 3*8
+ end_prolog
+ mov arg4, arg(4)
+ %endmacro
+
+ %macro FUNC_RESTORE 0
+ vmovdqa xmm6, [rsp + 0*16]
+ vmovdqa xmm7, [rsp + 1*16]
+ vmovdqa xmm8, [rsp + 2*16]
+ vmovdqa xmm9, [rsp + 3*16]
+ vmovdqa xmm10, [rsp + 4*16]
+ vmovdqa xmm11, [rsp + 5*16]
+ vmovdqa xmm12, [rsp + 6*16]
+ vmovdqa xmm13, [rsp + 7*16]
+ vmovdqa xmm14, [rsp + 8*16]
+ mov r12, [rsp + 9*16 + 0*8]
+ mov r13, [rsp + 9*16 + 1*8]
+ mov r14, [rsp + 9*16 + 2*8]
+ mov r15, [rsp + 9*16 + 3*8]
+ add rsp, stack_size
+ %endmacro
+%endif
+
+
+%define len arg0
+%define vec arg1
+%define mul_array arg2
+%define src arg3
+%define dest1 arg4
+%define ptr arg5
+%define vec_i tmp2
+%define dest2 tmp3
+%define dest3 tmp4
+%define pos return
+
+
+%ifndef EC_ALIGNED_ADDR
+;;; Use Un-aligned load/store
+ %define XLDR vmovdqu8
+ %define XSTR vmovdqu8
+%else
+;;; Use Non-temporal load/stor
+ %ifdef NO_NT_LDST
+ %define XLDR vmovdqa
+ %define XSTR vmovdqa
+ %else
+ %define XLDR vmovntdqa
+ %define XSTR vmovntdq
+ %endif
+%endif
+
+%define xmask0f zmm11
+%define xgft1_lo zmm10
+%define xgft1_loy ymm10
+%define xgft1_hi zmm9
+%define xgft2_lo zmm8
+%define xgft2_loy ymm8
+%define xgft2_hi zmm7
+%define xgft3_lo zmm6
+%define xgft3_loy ymm6
+%define xgft3_hi zmm5
+
+%define x0 zmm0
+%define xtmpa zmm1
+%define xp1 zmm2
+%define xp2 zmm3
+%define xp3 zmm4
+
+default rel
+[bits 64]
+
+section .text
+
+align 16
+mk_global gf_3vect_dot_prod_avx512, function
+func(gf_3vect_dot_prod_avx512)
+ FUNC_SAVE
+ sub len, 64
+ jl .return_fail
+
+ xor pos, pos
+ mov tmp, 0x0f
+ vpbroadcastb xmask0f, tmp ;Construct mask 0x0f0f0f...
+ sal vec, LOG_PS ;vec *= PS. Make vec_i count by PS
+ mov dest2, [dest1+PS]
+ mov dest3, [dest1+2*PS]
+ mov dest1, [dest1]
+
+.loop64:
+ vpxorq xp1, xp1, xp1
+ vpxorq xp2, xp2, xp2
+ vpxorq xp3, xp3, xp3
+ mov tmp, mul_array
+ xor vec_i, vec_i
+
+.next_vect:
+ mov ptr, [src+vec_i]
+ XLDR x0, [ptr+pos] ;Get next source vector
+ add vec_i, PS
+
+ vpandq xtmpa, x0, xmask0f ;Mask low src nibble in bits 4-0
+ vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0
+ vpandq x0, x0, xmask0f ;Mask high src nibble in bits 4-0
+
+ vmovdqu8 xgft1_loy, [tmp] ;Load array Ax{00}..{0f}, Ax{00}..{f0}
+ vmovdqu8 xgft2_loy, [tmp+vec*(32/PS)] ;Load array Bx{00}..{0f}, Bx{00}..{f0}
+ vmovdqu8 xgft3_loy, [tmp+vec*(64/PS)] ;Load array Cx{00}..{0f}, Cx{00}..{f0}
+ add tmp, 32
+
+ vshufi64x2 xgft1_hi, xgft1_lo, xgft1_lo, 0x55
+ vshufi64x2 xgft1_lo, xgft1_lo, xgft1_lo, 0x00
+ vshufi64x2 xgft2_hi, xgft2_lo, xgft2_lo, 0x55
+ vshufi64x2 xgft2_lo, xgft2_lo, xgft2_lo, 0x00
+
+ vpshufb xgft1_hi, xgft1_hi, x0 ;Lookup mul table of high nibble
+ vpshufb xgft1_lo, xgft1_lo, xtmpa ;Lookup mul table of low nibble
+ vpxorq xgft1_hi, xgft1_hi, xgft1_lo ;GF add high and low partials
+ vpxorq xp1, xp1, xgft1_hi ;xp1 += partial
+
+ vpshufb xgft2_hi, xgft2_hi, x0 ;Lookup mul table of high nibble
+ vpshufb xgft2_lo, xgft2_lo, xtmpa ;Lookup mul table of low nibble
+ vpxorq xgft2_hi, xgft2_hi, xgft2_lo ;GF add high and low partials
+ vpxorq xp2, xp2, xgft2_hi ;xp2 += partial
+
+ vshufi64x2 xgft3_hi, xgft3_lo, xgft3_lo, 0x55
+ vshufi64x2 xgft3_lo, xgft3_lo, xgft3_lo, 0x00
+
+ vpshufb xgft3_hi, xgft3_hi, x0 ;Lookup mul table of high nibble
+ vpshufb xgft3_lo, xgft3_lo, xtmpa ;Lookup mul table of low nibble
+ vpxorq xgft3_hi, xgft3_hi, xgft3_lo ;GF add high and low partials
+ vpxorq xp3, xp3, xgft3_hi ;xp3 += partial
+
+ cmp vec_i, vec
+ jl .next_vect
+
+ XSTR [dest1+pos], xp1
+ XSTR [dest2+pos], xp2
+ XSTR [dest3+pos], xp3
+
+ add pos, 64 ;Loop on 64 bytes at a time
+ cmp pos, len
+ jle .loop64
+
+ lea tmp, [len + 64]
+ cmp pos, tmp
+ je .return_pass
+
+ ;; Tail len
+ mov pos, len ;Overlapped offset length-64
+ jmp .loop64 ;Do one more overlap pass
+
+.return_pass:
+ mov return, 0
+ FUNC_RESTORE
+ ret
+
+.return_fail:
+ mov return, 1
+ FUNC_RESTORE
+ ret
+
+endproc_frame
+
+%else
+%ifidn __OUTPUT_FORMAT__, win64
+global no_gf_3vect_dot_prod_avx512
+no_gf_3vect_dot_prod_avx512:
+%endif
+%endif ; ifdef HAVE_AS_KNOWS_AVX512
diff --git a/src/isa-l/erasure_code/gf_3vect_dot_prod_sse.asm b/src/isa-l/erasure_code/gf_3vect_dot_prod_sse.asm
new file mode 100644
index 000000000..d52c72b74
--- /dev/null
+++ b/src/isa-l/erasure_code/gf_3vect_dot_prod_sse.asm
@@ -0,0 +1,378 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright(c) 2011-2015 Intel Corporation All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in
+; the documentation and/or other materials provided with the
+; distribution.
+; * Neither the name of Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived
+; from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;;;
+;;; gf_3vect_dot_prod_sse(len, vec, *g_tbls, **buffs, **dests);
+;;;
+
+%include "reg_sizes.asm"
+
+%ifidn __OUTPUT_FORMAT__, elf64
+ %define arg0 rdi
+ %define arg1 rsi
+ %define arg2 rdx
+ %define arg3 rcx
+ %define arg4 r8
+ %define arg5 r9
+
+ %define tmp r11
+ %define tmp2 r10
+ %define tmp3 r13 ; must be saved and restored
+ %define tmp4 r12 ; must be saved and restored
+ %define return rax
+ %macro SLDR 2
+ %endmacro
+ %define SSTR SLDR
+ %define PS 8
+ %define LOG_PS 3
+
+ %define func(x) x: endbranch
+ %macro FUNC_SAVE 0
+ push r12
+ push r13
+ %endmacro
+ %macro FUNC_RESTORE 0
+ pop r13
+ pop r12
+ %endmacro
+%endif
+
+%ifidn __OUTPUT_FORMAT__, win64
+ %define arg0 rcx
+ %define arg1 rdx
+ %define arg2 r8
+ %define arg3 r9
+
+ %define arg4 r12 ; must be saved, loaded and restored
+ %define arg5 r15 ; must be saved and restored
+ %define tmp r11
+ %define tmp2 r10
+ %define tmp3 r13 ; must be saved and restored
+ %define tmp4 r14 ; must be saved and restored
+ %define return rax
+ %macro SLDR 2
+ %endmacro
+ %define SSTR SLDR
+ %define PS 8
+ %define LOG_PS 3
+ %define stack_size 6*16 + 5*8 ; must be an odd multiple of 8
+ %define arg(x) [rsp + stack_size + PS + PS*x]
+
+ %define func(x) proc_frame x
+ %macro FUNC_SAVE 0
+ alloc_stack stack_size
+ save_xmm128 xmm6, 0*16
+ save_xmm128 xmm7, 1*16
+ save_xmm128 xmm8, 2*16
+ save_xmm128 xmm9, 3*16
+ save_xmm128 xmm10, 4*16
+ save_xmm128 xmm11, 5*16
+ save_reg r12, 6*16 + 0*8
+ save_reg r13, 6*16 + 1*8
+ save_reg r14, 6*16 + 2*8
+ save_reg r15, 6*16 + 3*8
+ end_prolog
+ mov arg4, arg(4)
+ %endmacro
+
+ %macro FUNC_RESTORE 0
+ movdqa xmm6, [rsp + 0*16]
+ movdqa xmm7, [rsp + 1*16]
+ movdqa xmm8, [rsp + 2*16]
+ movdqa xmm9, [rsp + 3*16]
+ movdqa xmm10, [rsp + 4*16]
+ movdqa xmm11, [rsp + 5*16]
+ mov r12, [rsp + 6*16 + 0*8]
+ mov r13, [rsp + 6*16 + 1*8]
+ mov r14, [rsp + 6*16 + 2*8]
+ mov r15, [rsp + 6*16 + 3*8]
+ add rsp, stack_size
+ %endmacro
+%endif
+
+%ifidn __OUTPUT_FORMAT__, elf32
+
+;;;================== High Address;
+;;; arg4
+;;; arg3
+;;; arg2
+;;; arg1
+;;; arg0
+;;; return
+;;;<================= esp of caller
+;;; ebp
+;;;<================= ebp = esp
+;;; var0
+;;; var1
+;;; esi
+;;; edi
+;;; ebx
+;;;<================= esp of callee
+;;;
+;;;================== Low Address;
+
+ %define PS 4
+ %define LOG_PS 2
+ %define func(x) x: endbranch
+ %define arg(x) [ebp + PS*2 + PS*x]
+ %define var(x) [ebp - PS - PS*x]
+
+ %define trans ecx
+ %define trans2 esi
+ %define arg0 trans ;trans and trans2 are for the variables in stack
+ %define arg0_m arg(0)
+ %define arg1 ebx
+ %define arg2 arg2_m
+ %define arg2_m arg(2)
+ %define arg3 trans
+ %define arg3_m arg(3)
+ %define arg4 trans
+ %define arg4_m arg(4)
+ %define arg5 trans2
+ %define tmp edx
+ %define tmp2 edi
+ %define tmp3 trans2
+ %define tmp3_m var(0)
+ %define tmp4 trans2
+ %define tmp4_m var(1)
+ %define return eax
+ %macro SLDR 2 ;; stack load/restore
+ mov %1, %2
+ %endmacro
+ %define SSTR SLDR
+
+ %macro FUNC_SAVE 0
+ push ebp
+ mov ebp, esp
+ sub esp, PS*2 ;2 local variables
+ push esi
+ push edi
+ push ebx
+ mov arg1, arg(1)
+ %endmacro
+
+ %macro FUNC_RESTORE 0
+ pop ebx
+ pop edi
+ pop esi
+ add esp, PS*2 ;2 local variables
+ pop ebp
+ %endmacro
+
+%endif ; output formats
+
+%define len arg0
+%define vec arg1
+%define mul_array arg2
+%define src arg3
+%define dest1 arg4
+%define ptr arg5
+
+%define vec_i tmp2
+%define dest2 tmp3
+%define dest3 tmp4
+%define pos return
+
+ %ifidn PS,4 ;32-bit code
+ %define len_m arg0_m
+ %define src_m arg3_m
+ %define dest1_m arg4_m
+ %define dest2_m tmp3_m
+ %define dest3_m tmp4_m
+ %endif
+
+%ifndef EC_ALIGNED_ADDR
+;;; Use Un-aligned load/store
+ %define XLDR movdqu
+ %define XSTR movdqu
+%else
+;;; Use Non-temporal load/stor
+ %ifdef NO_NT_LDST
+ %define XLDR movdqa
+ %define XSTR movdqa
+ %else
+ %define XLDR movntdqa
+ %define XSTR movntdq
+ %endif
+%endif
+
+%ifidn PS,8 ; 64-bit code
+ default rel
+ [bits 64]
+%endif
+
+
+section .text
+
+%ifidn PS,8 ;64-bit code
+ %define xmask0f xmm11
+ %define xgft1_lo xmm2
+ %define xgft1_hi xmm3
+ %define xgft2_lo xmm4
+ %define xgft2_hi xmm7
+ %define xgft3_lo xmm6
+ %define xgft3_hi xmm5
+
+ %define x0 xmm0
+ %define xtmpa xmm1
+ %define xp1 xmm10
+ %define xp2 xmm9
+ %define xp3 xmm8
+%else
+ %define xmask0f xmm7
+ %define xgft1_lo xmm6
+ %define xgft1_hi xmm5
+ %define xgft2_lo xgft1_lo
+ %define xgft2_hi xgft1_hi
+ %define xgft3_lo xgft1_lo
+ %define xgft3_hi xgft1_hi
+
+ %define x0 xmm0
+ %define xtmpa xmm1
+ %define xp1 xmm2
+ %define xp2 xmm3
+ %define xp3 xmm4
+%endif
+
+align 16
+mk_global gf_3vect_dot_prod_sse, function
+func(gf_3vect_dot_prod_sse)
+ FUNC_SAVE
+ SLDR len, len_m
+ sub len, 16
+ SSTR len_m, len
+ jl .return_fail
+ xor pos, pos
+ movdqa xmask0f, [mask0f] ;Load mask of lower nibble in each byte
+ sal vec, LOG_PS ;vec *= PS. Make vec_i count by PS
+ SLDR dest1, dest1_m
+ mov dest2, [dest1+PS]
+ SSTR dest2_m, dest2
+ mov dest3, [dest1+2*PS]
+ SSTR dest3_m, dest3
+ mov dest1, [dest1]
+ SSTR dest1_m, dest1
+
+.loop16:
+ pxor xp1, xp1
+ pxor xp2, xp2
+ pxor xp3, xp3
+ mov tmp, mul_array
+ xor vec_i, vec_i
+
+.next_vect:
+ SLDR src, src_m
+ mov ptr, [src+vec_i]
+
+ movdqu xgft1_lo, [tmp] ;Load array Ax{00}, Ax{01}, ..., Ax{0f}
+ movdqu xgft1_hi, [tmp+16] ; " Ax{00}, Ax{10}, ..., Ax{f0}
+ %ifidn PS,8 ;64-bit code
+ movdqu xgft2_lo, [tmp+vec*(32/PS)] ;Load array Bx{00}, Bx{01}, ..., Bx{0f}
+ movdqu xgft2_hi, [tmp+vec*(32/PS)+16] ; " Bx{00}, Bx{10}, ..., Bx{f0}
+ movdqu xgft3_lo, [tmp+vec*(64/PS)] ;Load array Cx{00}, Cx{01}, ..., Cx{0f}
+ movdqu xgft3_hi, [tmp+vec*(64/PS)+16] ; " Cx{00}, Cx{10}, ..., Cx{f0}
+ add tmp, 32
+ add vec_i, PS
+ %endif
+ XLDR x0, [ptr+pos] ;Get next source vector
+
+ movdqa xtmpa, x0 ;Keep unshifted copy of src
+ psraw x0, 4 ;Shift to put high nibble into bits 4-0
+ pand x0, xmask0f ;Mask high src nibble in bits 4-0
+ pand xtmpa, xmask0f ;Mask low src nibble in bits 4-0
+
+ pshufb xgft1_hi, x0 ;Lookup mul table of high nibble
+ pshufb xgft1_lo, xtmpa ;Lookup mul table of low nibble
+ pxor xgft1_hi, xgft1_lo ;GF add high and low partials
+ pxor xp1, xgft1_hi ;xp1 += partial
+
+ %ifidn PS,4 ;32-bit code
+ movdqu xgft2_lo, [tmp+vec*(32/PS)] ;Load array Bx{00}, Bx{01}, ..., Bx{0f}
+ movdqu xgft2_hi, [tmp+vec*(32/PS)+16] ; " Bx{00}, Bx{10}, ..., Bx{f0}
+ %endif
+ pshufb xgft2_hi, x0 ;Lookup mul table of high nibble
+ pshufb xgft2_lo, xtmpa ;Lookup mul table of low nibble
+ pxor xgft2_hi, xgft2_lo ;GF add high and low partials
+ pxor xp2, xgft2_hi ;xp2 += partial
+
+ %ifidn PS,4 ;32-bit code
+ sal vec, 1
+ movdqu xgft3_lo, [tmp+vec*(32/PS)] ;Load array Cx{00}, Cx{01}, ..., Cx{0f}
+ movdqu xgft3_hi, [tmp+vec*(32/PS)+16] ; " Cx{00}, Cx{10}, ..., Cx{f0}
+ sar vec, 1
+ add tmp, 32
+ add vec_i, PS
+ %endif
+ pshufb xgft3_hi, x0 ;Lookup mul table of high nibble
+ pshufb xgft3_lo, xtmpa ;Lookup mul table of low nibble
+ pxor xgft3_hi, xgft3_lo ;GF add high and low partials
+ pxor xp3, xgft3_hi ;xp3 += partial
+
+ cmp vec_i, vec
+ jl .next_vect
+
+ SLDR dest1, dest1_m
+ SLDR dest2, dest2_m
+ XSTR [dest1+pos], xp1
+ XSTR [dest2+pos], xp2
+ SLDR dest3, dest3_m
+ XSTR [dest3+pos], xp3
+
+ SLDR len, len_m
+ add pos, 16 ;Loop on 16 bytes at a time
+ cmp pos, len
+ jle .loop16
+
+ lea tmp, [len + 16]
+ cmp pos, tmp
+ je .return_pass
+
+ ;; Tail len
+ mov pos, len ;Overlapped offset length-16
+ jmp .loop16 ;Do one more overlap pass
+
+.return_pass:
+ mov return, 0
+ FUNC_RESTORE
+ ret
+
+.return_fail:
+ mov return, 1
+ FUNC_RESTORE
+ ret
+
+endproc_frame
+
+section .data
+
+align 16
+mask0f: dq 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f
+
+;;; func core, ver, snum
+slversion gf_3vect_dot_prod_sse, 00, 06, 0063
diff --git a/src/isa-l/erasure_code/gf_3vect_mad_avx.asm b/src/isa-l/erasure_code/gf_3vect_mad_avx.asm
new file mode 100644
index 000000000..13963f695
--- /dev/null
+++ b/src/isa-l/erasure_code/gf_3vect_mad_avx.asm
@@ -0,0 +1,288 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright(c) 2011-2015 Intel Corporation All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in
+; the documentation and/or other materials provided with the
+; distribution.
+; * Neither the name of Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived
+; from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;;;
+;;; gf_3vect_mad_avx(len, vec, vec_i, mul_array, src, dest);
+;;;
+
+%include "reg_sizes.asm"
+
+%define PS 8
+
+%ifidn __OUTPUT_FORMAT__, win64
+ %define arg0 rcx
+ %define arg0.w ecx
+ %define arg1 rdx
+ %define arg2 r8
+ %define arg3 r9
+ %define arg4 r12
+ %define arg5 r15
+ %define tmp r11
+ %define return rax
+ %define return.w eax
+ %define stack_size 16*10 + 3*8
+ %define arg(x) [rsp + stack_size + PS + PS*x]
+ %define func(x) proc_frame x
+
+%macro FUNC_SAVE 0
+ sub rsp, stack_size
+ vmovdqa [rsp+16*0],xmm6
+ vmovdqa [rsp+16*1],xmm7
+ vmovdqa [rsp+16*2],xmm8
+ vmovdqa [rsp+16*3],xmm9
+ vmovdqa [rsp+16*4],xmm10
+ vmovdqa [rsp+16*5],xmm11
+ vmovdqa [rsp+16*6],xmm12
+ vmovdqa [rsp+16*7],xmm13
+ vmovdqa [rsp+16*8],xmm14
+ vmovdqa [rsp+16*9],xmm15
+ save_reg r12, 10*16 + 0*8
+ save_reg r15, 10*16 + 1*8
+ end_prolog
+ mov arg4, arg(4)
+ mov arg5, arg(5)
+%endmacro
+
+%macro FUNC_RESTORE 0
+ vmovdqa xmm6, [rsp+16*0]
+ vmovdqa xmm7, [rsp+16*1]
+ vmovdqa xmm8, [rsp+16*2]
+ vmovdqa xmm9, [rsp+16*3]
+ vmovdqa xmm10, [rsp+16*4]
+ vmovdqa xmm11, [rsp+16*5]
+ vmovdqa xmm12, [rsp+16*6]
+ vmovdqa xmm13, [rsp+16*7]
+ vmovdqa xmm14, [rsp+16*8]
+ vmovdqa xmm15, [rsp+16*9]
+ mov r12, [rsp + 10*16 + 0*8]
+ mov r15, [rsp + 10*16 + 1*8]
+ add rsp, stack_size
+%endmacro
+
+%elifidn __OUTPUT_FORMAT__, elf64
+ %define arg0 rdi
+ %define arg0.w edi
+ %define arg1 rsi
+ %define arg2 rdx
+ %define arg3 rcx
+ %define arg4 r8
+ %define arg5 r9
+ %define tmp r11
+ %define return rax
+ %define return.w eax
+
+ %define func(x) x: endbranch
+ %define FUNC_SAVE
+ %define FUNC_RESTORE
+%endif
+
+;;; gf_3vect_mad_avx(len, vec, vec_i, mul_array, src, dest)
+%define len arg0
+%define len.w arg0.w
+%define vec arg1
+%define vec_i arg2
+%define mul_array arg3
+%define src arg4
+%define dest1 arg5
+%define pos return
+%define pos.w return.w
+
+%define dest2 mul_array
+%define dest3 vec_i
+
+%ifndef EC_ALIGNED_ADDR
+;;; Use Un-aligned load/store
+ %define XLDR vmovdqu
+ %define XSTR vmovdqu
+%else
+;;; Use Non-temporal load/stor
+ %ifdef NO_NT_LDST
+ %define XLDR vmovdqa
+ %define XSTR vmovdqa
+ %else
+ %define XLDR vmovntdqa
+ %define XSTR vmovntdq
+ %endif
+%endif
+
+
+default rel
+
+[bits 64]
+section .text
+
+%define xmask0f xmm15
+%define xgft1_lo xmm14
+%define xgft1_hi xmm13
+%define xgft2_lo xmm12
+%define xgft2_hi xmm11
+%define xgft3_lo xmm10
+%define xgft3_hi xmm9
+
+%define x0 xmm0
+%define xtmpa xmm1
+%define xtmph1 xmm2
+%define xtmpl1 xmm3
+%define xtmph2 xmm4
+%define xtmpl2 xmm5
+%define xtmph3 xmm6
+%define xtmpl3 xmm7
+%define xd1 xmm8
+%define xd2 xtmpl1
+%define xd3 xtmph1
+
+align 16
+mk_global gf_3vect_mad_avx, function
+func(gf_3vect_mad_avx)
+ FUNC_SAVE
+ sub len, 16
+ jl .return_fail
+ xor pos, pos
+ vmovdqa xmask0f, [mask0f] ;Load mask of lower nibble in each byte
+
+ sal vec_i, 5 ;Multiply by 32
+ sal vec, 5
+ lea tmp, [mul_array + vec_i]
+ vmovdqu xgft1_lo, [tmp] ;Load array Ax{00}, Ax{01}, Ax{02}, ...
+ vmovdqu xgft1_hi, [tmp+16] ; " Ax{00}, Ax{10}, Ax{20}, ... , Ax{f0}
+ vmovdqu xgft2_lo, [tmp+vec] ;Load array Bx{00}, Bx{01}, Bx{02}, ...
+ vmovdqu xgft2_hi, [tmp+vec+16] ; " Bx{00}, Bx{10}, Bx{20}, ... , Bx{f0}
+ vmovdqu xgft3_lo, [tmp+2*vec] ;Load array Cx{00}, Cx{01}, Cx{02}, ...
+ vmovdqu xgft3_hi, [tmp+2*vec+16]; " Cx{00}, Cx{10}, Cx{20}, ... , Cx{f0}
+ mov dest2, [dest1+PS] ; reuse mul_array
+ mov dest3, [dest1+2*PS] ; reuse vec_i
+ mov dest1, [dest1]
+
+.loop16:
+ XLDR x0, [src+pos] ;Get next source vector
+ XLDR xd1, [dest1+pos] ;Get next dest vector
+
+ vpand xtmpa, x0, xmask0f ;Mask low src nibble in bits 4-0
+ vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0
+ vpand x0, x0, xmask0f ;Mask high src nibble in bits 4-0
+
+ ; dest1
+ vpshufb xtmph1, xgft1_hi, x0 ;Lookup mul table of high nibble
+ vpshufb xtmpl1, xgft1_lo, xtmpa ;Lookup mul table of low nibble
+ vpxor xtmph1, xtmph1, xtmpl1 ;GF add high and low partials
+ vpxor xd1, xd1, xtmph1 ;xd1 += partial
+
+ XLDR xd2, [dest2+pos] ;reuse xtmpl1. Get next dest vector
+ XLDR xd3, [dest3+pos] ;reuse xtmph1. Get next dest vector
+
+ ; dest2
+ vpshufb xtmph2, xgft2_hi, x0 ;Lookup mul table of high nibble
+ vpshufb xtmpl2, xgft2_lo, xtmpa ;Lookup mul table of low nibble
+ vpxor xtmph2, xtmph2, xtmpl2 ;GF add high and low partials
+ vpxor xd2, xd2, xtmph2 ;xd2 += partial
+
+ ; dest3
+ vpshufb xtmph3, xgft3_hi, x0 ;Lookup mul table of high nibble
+ vpshufb xtmpl3, xgft3_lo, xtmpa ;Lookup mul table of low nibble
+ vpxor xtmph3, xtmph3, xtmpl3 ;GF add high and low partials
+ vpxor xd3, xd3, xtmph3 ;xd3 += partial
+
+ XSTR [dest1+pos], xd1
+ XSTR [dest2+pos], xd2
+ XSTR [dest3+pos], xd3
+
+ add pos, 16 ;Loop on 16 bytes at a time
+ cmp pos, len
+ jle .loop16
+
+ lea tmp, [len + 16]
+ cmp pos, tmp
+ je .return_pass
+
+.lessthan16:
+ ;; Tail len
+ ;; Do one more overlap pass
+ mov tmp, len ;Overlapped offset length-16
+ XLDR x0, [src+tmp] ;Get next source vector
+ XLDR xd1, [dest1+tmp] ;Get next dest vector
+ XLDR xd2, [dest2+tmp] ;reuse xtmpl1. Get next dest vector
+ XLDR xd3, [dest3+tmp] ;reuse xtmph1. Get next dest vector
+
+ sub len, pos
+
+ movdqa xtmph3, [constip16] ;Load const of i + 16
+ vpinsrb xtmpl3, xtmpl3, len.w, 15
+ vpshufb xtmpl3, xtmpl3, xmask0f ;Broadcast len to all bytes
+ vpcmpgtb xtmpl3, xtmpl3, xtmph3
+
+ vpand xtmpa, x0, xmask0f ;Mask low src nibble in bits 4-0
+ vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0
+ vpand x0, x0, xmask0f ;Mask high src nibble in bits 4-0
+
+ ; dest1
+ vpshufb xgft1_hi, xgft1_hi, x0 ;Lookup mul table of high nibble
+ vpshufb xgft1_lo, xgft1_lo, xtmpa ;Lookup mul table of low nibble
+ vpxor xgft1_hi, xgft1_hi, xgft1_lo ;GF add high and low partials
+ vpand xgft1_hi, xgft1_hi, xtmpl3
+ vpxor xd1, xd1, xgft1_hi
+
+ ; dest2
+ vpshufb xgft2_hi, xgft2_hi, x0 ;Lookup mul table of high nibble
+ vpshufb xgft2_lo, xgft2_lo, xtmpa ;Lookup mul table of low nibble
+ vpxor xgft2_hi, xgft2_hi, xgft2_lo ;GF add high and low partials
+ vpand xgft2_hi, xgft2_hi, xtmpl3
+ vpxor xd2, xd2, xgft2_hi
+
+ ; dest3
+ vpshufb xgft3_hi, xgft3_hi, x0 ;Lookup mul table of high nibble
+ vpshufb xgft3_lo, xgft3_lo, xtmpa ;Lookup mul table of low nibble
+ vpxor xgft3_hi, xgft3_hi, xgft3_lo ;GF add high and low partials
+ vpand xgft3_hi, xgft3_hi, xtmpl3
+ vpxor xd3, xd3, xgft3_hi
+
+ XSTR [dest1+tmp], xd1
+ XSTR [dest2+tmp], xd2
+ XSTR [dest3+tmp], xd3
+
+.return_pass:
+ mov return, 0
+ FUNC_RESTORE
+ ret
+
+.return_fail:
+ mov return, 1
+ FUNC_RESTORE
+ ret
+
+endproc_frame
+
+section .data
+
+align 16
+mask0f: dq 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f
+constip16:
+ dq 0xf8f9fafbfcfdfeff, 0xf0f1f2f3f4f5f6f7
+
+;;; func core, ver, snum
+slversion gf_3vect_mad_avx, 02, 01, 0207
diff --git a/src/isa-l/erasure_code/gf_3vect_mad_avx2.asm b/src/isa-l/erasure_code/gf_3vect_mad_avx2.asm
new file mode 100644
index 000000000..797d95463
--- /dev/null
+++ b/src/isa-l/erasure_code/gf_3vect_mad_avx2.asm
@@ -0,0 +1,317 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright(c) 2011-2015 Intel Corporation All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in
+; the documentation and/or other materials provided with the
+; distribution.
+; * Neither the name of Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived
+; from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;;;
+;;; gf_3vect_mad_avx2(len, vec, vec_i, mul_array, src, dest);
+;;;
+
+%include "reg_sizes.asm"
+
+%define PS 8
+
+%ifidn __OUTPUT_FORMAT__, win64
+ %define arg0 rcx
+ %define arg0.w ecx
+ %define arg1 rdx
+ %define arg2 r8
+ %define arg3 r9
+ %define arg4 r12 ; must be saved, loaded and restored
+ %define arg5 r15 ; must be saved and restored
+
+ %define tmp r11
+ %define tmp.w r11d
+ %define tmp.b r11b
+ %define return rax
+ %define return.w eax
+ %define stack_size 16*10 + 3*8
+ %define arg(x) [rsp + stack_size + PS + PS*x]
+ %define func(x) proc_frame x
+
+ %macro FUNC_SAVE 0
+ sub rsp, stack_size
+ vmovdqa [rsp+16*0],xmm6
+ vmovdqa [rsp+16*1],xmm7
+ vmovdqa [rsp+16*2],xmm8
+ vmovdqa [rsp+16*3],xmm9
+ vmovdqa [rsp+16*4],xmm10
+ vmovdqa [rsp+16*5],xmm11
+ vmovdqa [rsp+16*6],xmm12
+ vmovdqa [rsp+16*7],xmm13
+ vmovdqa [rsp+16*8],xmm14
+ vmovdqa [rsp+16*9],xmm15
+ save_reg r12, 10*16 + 0*8
+ save_reg r15, 10*16 + 1*8
+ end_prolog
+ mov arg4, arg(4)
+ mov arg5, arg(5)
+ %endmacro
+
+ %macro FUNC_RESTORE 0
+ vmovdqa xmm6, [rsp+16*0]
+ vmovdqa xmm7, [rsp+16*1]
+ vmovdqa xmm8, [rsp+16*2]
+ vmovdqa xmm9, [rsp+16*3]
+ vmovdqa xmm10, [rsp+16*4]
+ vmovdqa xmm11, [rsp+16*5]
+ vmovdqa xmm12, [rsp+16*6]
+ vmovdqa xmm13, [rsp+16*7]
+ vmovdqa xmm14, [rsp+16*8]
+ vmovdqa xmm15, [rsp+16*9]
+ mov r12, [rsp + 10*16 + 0*8]
+ mov r15, [rsp + 10*16 + 1*8]
+ add rsp, stack_size
+ %endmacro
+
+%elifidn __OUTPUT_FORMAT__, elf64
+ %define arg0 rdi
+ %define arg0.w edi
+ %define arg1 rsi
+ %define arg2 rdx
+ %define arg3 rcx
+ %define arg4 r8
+ %define arg5 r9
+
+ %define tmp r11
+ %define tmp.w r11d
+ %define tmp.b r11b
+ %define return rax
+ %define return.w eax
+
+ %define func(x) x: endbranch
+ %define FUNC_SAVE
+ %define FUNC_RESTORE
+%endif
+
+;;; gf_3vect_mad_avx2(len, vec, vec_i, mul_array, src, dest)
+%define len arg0
+%define len.w arg0.w
+%define vec arg1
+%define vec_i arg2
+%define mul_array arg3
+%define src arg4
+%define dest1 arg5
+%define pos return
+%define pos.w return.w
+
+%define dest2 mul_array
+%define dest3 vec_i
+
+%ifndef EC_ALIGNED_ADDR
+;;; Use Un-aligned load/store
+ %define XLDR vmovdqu
+ %define XSTR vmovdqu
+%else
+;;; Use Non-temporal load/stor
+ %ifdef NO_NT_LDST
+ %define XLDR vmovdqa
+ %define XSTR vmovdqa
+ %else
+ %define XLDR vmovntdqa
+ %define XSTR vmovntdq
+ %endif
+%endif
+
+
+default rel
+
+[bits 64]
+section .text
+
+%define xmask0f ymm15
+%define xmask0fx xmm15
+%define xgft1_lo ymm14
+%define xgft1_hi ymm13
+%define xgft2_lo ymm12
+%define xgft3_lo ymm11
+
+%define x0 ymm0
+%define xtmpa ymm1
+%define xtmph1 ymm2
+%define xtmpl1 ymm3
+%define xtmph2 ymm4
+%define xtmpl2 ymm5
+%define xtmpl2x xmm5
+%define xtmph3 ymm6
+%define xtmpl3 ymm7
+%define xtmpl3x xmm7
+%define xd1 ymm8
+%define xd2 ymm9
+%define xd3 ymm10
+
+align 16
+mk_global gf_3vect_mad_avx2, function
+func(gf_3vect_mad_avx2)
+ FUNC_SAVE
+ sub len, 32
+ jl .return_fail
+ xor pos, pos
+ mov tmp.b, 0x0f
+ vpinsrb xmask0fx, xmask0fx, tmp.w, 0
+ vpbroadcastb xmask0f, xmask0fx ;Construct mask 0x0f0f0f...
+
+ sal vec_i, 5 ;Multiply by 32
+ sal vec, 5
+ lea tmp, [mul_array + vec_i]
+
+ vmovdqu xgft1_lo, [tmp] ;Load array Ax{00}, Ax{01}, ..., Ax{0f}
+ ; " Ax{00}, Ax{10}, ..., Ax{f0}
+ vperm2i128 xgft1_hi, xgft1_lo, xgft1_lo, 0x11 ; swapped to hi | hi
+ vperm2i128 xgft1_lo, xgft1_lo, xgft1_lo, 0x00 ; swapped to lo | lo
+
+ vmovdqu xgft2_lo, [tmp+vec] ;Load array Bx{00}, Bx{01}, Bx{02}, ...
+ ; " Bx{00}, Bx{10}, Bx{20}, ... , Bx{f0}
+ vmovdqu xgft3_lo, [tmp+2*vec] ;Load array Cx{00}, Cx{01}, Cx{02}, ...
+ ; " Cx{00}, Cx{10}, Cx{20}, ... , Cx{f0}
+ mov dest2, [dest1+PS] ; reuse mul_array
+ mov dest3, [dest1+2*PS] ; reuse vec_i
+ mov dest1, [dest1]
+
+.loop32:
+ XLDR x0, [src+pos] ;Get next source vector
+ XLDR xd1, [dest1+pos] ;Get next dest vector
+ XLDR xd2, [dest2+pos] ;Get next dest vector
+ XLDR xd3, [dest3+pos] ;Get next dest vector
+ vperm2i128 xtmph2, xgft2_lo, xgft2_lo, 0x11 ; swapped to hi | hi
+ vperm2i128 xtmpl2, xgft2_lo, xgft2_lo, 0x00 ; swapped to lo | lo
+
+ vperm2i128 xtmph3, xgft3_lo, xgft3_lo, 0x11 ; swapped to hi | hi
+ vperm2i128 xtmpl3, xgft3_lo, xgft3_lo, 0x00 ; swapped to lo | lo
+
+ vpand xtmpa, x0, xmask0f ;Mask low src nibble in bits 4-0
+ vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0
+ vpand x0, x0, xmask0f ;Mask high src nibble in bits 4-0
+
+ ; dest1
+ vpshufb xtmph1, xgft1_hi, x0 ;Lookup mul table of high nibble
+ vpshufb xtmpl1, xgft1_lo, xtmpa ;Lookup mul table of low nibble
+ vpxor xtmph1, xtmph1, xtmpl1 ;GF add high and low partials
+ vpxor xd1, xd1, xtmph1 ;xd1 += partial
+
+ ; dest2
+ vpshufb xtmph2, x0 ;Lookup mul table of high nibble
+ vpshufb xtmpl2, xtmpa ;Lookup mul table of low nibble
+ vpxor xtmph2, xtmpl2 ;GF add high and low partials
+ vpxor xd2, xtmph2 ;xd2 += partial
+
+ ; dest3
+ vpshufb xtmph3, x0 ;Lookup mul table of high nibble
+ vpshufb xtmpl3, xtmpa ;Lookup mul table of low nibble
+ vpxor xtmph3, xtmpl3 ;GF add high and low partials
+ vpxor xd3, xtmph3 ;xd3 += partial
+
+ XSTR [dest1+pos], xd1
+ XSTR [dest2+pos], xd2
+ XSTR [dest3+pos], xd3
+
+ add pos, 32 ;Loop on 32 bytes at a time
+ cmp pos, len
+ jle .loop32
+
+ lea tmp, [len + 32]
+ cmp pos, tmp
+ je .return_pass
+
+.lessthan32:
+ ;; Tail len
+ ;; Do one more overlap pass
+ mov tmp.b, 0x1f
+ vpinsrb xtmpl2x, xtmpl2x, tmp.w, 0
+ vpbroadcastb xtmpl2, xtmpl2x ;Construct mask 0x1f1f1f...
+
+ mov tmp, len ;Overlapped offset length-32
+
+ XLDR x0, [src+tmp] ;Get next source vector
+ XLDR xd1, [dest1+tmp] ;Get next dest vector
+ XLDR xd2, [dest2+tmp] ;Get next dest vector
+ XLDR xd3, [dest3+tmp] ;Get next dest vector
+
+ sub len, pos
+
+ vmovdqa xtmph3, [constip32] ;Load const of i + 32
+ vpinsrb xtmpl3x, xtmpl3x, len.w, 15
+ vinserti128 xtmpl3, xtmpl3, xtmpl3x, 1 ;swapped to xtmpl3x | xtmpl3x
+ vpshufb xtmpl3, xtmpl3, xtmpl2 ;Broadcast len to all bytes. xtmpl2=0x1f1f1f...
+ vpcmpgtb xtmpl3, xtmpl3, xtmph3
+
+ vperm2i128 xtmph2, xgft2_lo, xgft2_lo, 0x11 ; swapped to hi | hi
+ vperm2i128 xgft2_lo, xgft2_lo, xgft2_lo, 0x00 ; swapped to lo | lo
+
+ vperm2i128 xtmph3, xgft3_lo, xgft3_lo, 0x11 ; swapped to hi | hi
+ vperm2i128 xgft3_lo, xgft3_lo, xgft3_lo, 0x00 ; swapped to lo | lo
+
+ vpand xtmpa, x0, xmask0f ;Mask low src nibble in bits 4-0
+ vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0
+ vpand x0, x0, xmask0f ;Mask high src nibble in bits 4-0
+
+ ; dest1
+ vpshufb xtmph1, xgft1_hi, x0 ;Lookup mul table of high nibble
+ vpshufb xtmpl1, xgft1_lo, xtmpa ;Lookup mul table of low nibble
+ vpxor xtmph1, xtmph1, xtmpl1 ;GF add high and low partials
+ vpand xtmph1, xtmph1, xtmpl3
+ vpxor xd1, xd1, xtmph1 ;xd1 += partial
+
+ ; dest2
+ vpshufb xtmph2, xtmph2, x0 ;Lookup mul table of high nibble
+ vpshufb xgft2_lo, xgft2_lo, xtmpa ;Lookup mul table of low nibble
+ vpxor xtmph2, xtmph2, xgft2_lo ;GF add high and low partials
+ vpand xtmph2, xtmph2, xtmpl3
+ vpxor xd2, xd2, xtmph2 ;xd2 += partial
+
+ ; dest3
+ vpshufb xtmph3, xtmph3, x0 ;Lookup mul table of high nibble
+ vpshufb xgft3_lo, xgft3_lo, xtmpa ;Lookup mul table of low nibble
+ vpxor xtmph3, xtmph3, xgft3_lo ;GF add high and low partials
+ vpand xtmph3, xtmph3, xtmpl3
+ vpxor xd3, xd3, xtmph3 ;xd3 += partial
+
+ XSTR [dest1+tmp], xd1
+ XSTR [dest2+tmp], xd2
+ XSTR [dest3+tmp], xd3
+
+.return_pass:
+ mov return, 0
+ FUNC_RESTORE
+ ret
+
+.return_fail:
+ mov return, 1
+ FUNC_RESTORE
+ ret
+
+endproc_frame
+
+section .data
+
+align 32
+constip32:
+ dq 0xf8f9fafbfcfdfeff, 0xf0f1f2f3f4f5f6f7
+ dq 0xe8e9eaebecedeeef, 0xe0e1e2e3e4e5e6e7
+
+;;; func core, ver, snum
+slversion gf_3vect_mad_avx2, 04, 01, 0208
diff --git a/src/isa-l/erasure_code/gf_3vect_mad_avx512.asm b/src/isa-l/erasure_code/gf_3vect_mad_avx512.asm
new file mode 100644
index 000000000..bc6190067
--- /dev/null
+++ b/src/isa-l/erasure_code/gf_3vect_mad_avx512.asm
@@ -0,0 +1,247 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright(c) 2011-2015 Intel Corporation All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in
+; the documentation and/or other materials provided with the
+; distribution.
+; * Neither the name of Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived
+; from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;;;
+;;; gf_3vect_mad_avx512(len, vec, vec_i, mul_array, src, dest);
+;;;
+
+%include "reg_sizes.asm"
+
+%ifdef HAVE_AS_KNOWS_AVX512
+
+%ifidn __OUTPUT_FORMAT__, elf64
+ %define arg0 rdi
+ %define arg1 rsi
+ %define arg2 rdx
+ %define arg3 rcx
+ %define arg4 r8
+ %define arg5 r9
+ %define tmp r11
+ %define return rax
+ %define func(x) x: endbranch
+ %define FUNC_SAVE
+ %define FUNC_RESTORE
+%endif
+
+%ifidn __OUTPUT_FORMAT__, win64
+ %define arg0 rcx
+ %define arg1 rdx
+ %define arg2 r8
+ %define arg3 r9
+ %define arg4 r12 ; must be saved, loaded and restored
+ %define arg5 r15 ; must be saved and restored
+ %define tmp r11
+ %define return rax
+ %define return.w eax
+ %define stack_size 16*10 + 3*8
+ %define arg(x) [rsp + stack_size + PS + PS*x]
+ %define func(x) proc_frame x
+
+ %macro FUNC_SAVE 0
+ sub rsp, stack_size
+ vmovdqa [rsp+16*0],xmm6
+ vmovdqa [rsp+16*1],xmm7
+ vmovdqa [rsp+16*2],xmm8
+ vmovdqa [rsp+16*3],xmm9
+ vmovdqa [rsp+16*4],xmm10
+ vmovdqa [rsp+16*5],xmm11
+ vmovdqa [rsp+16*6],xmm12
+ vmovdqa [rsp+16*7],xmm13
+ vmovdqa [rsp+16*8],xmm14
+ vmovdqa [rsp+16*9],xmm15
+ save_reg r12, 10*16 + 0*8
+ save_reg r15, 10*16 + 1*8
+ end_prolog
+ mov arg4, arg(4)
+ mov arg5, arg(5)
+ %endmacro
+
+ %macro FUNC_RESTORE 0
+ vmovdqa xmm6, [rsp+16*0]
+ vmovdqa xmm7, [rsp+16*1]
+ vmovdqa xmm8, [rsp+16*2]
+ vmovdqa xmm9, [rsp+16*3]
+ vmovdqa xmm10, [rsp+16*4]
+ vmovdqa xmm11, [rsp+16*5]
+ vmovdqa xmm12, [rsp+16*6]
+ vmovdqa xmm13, [rsp+16*7]
+ vmovdqa xmm14, [rsp+16*8]
+ vmovdqa xmm15, [rsp+16*9]
+ mov r12, [rsp + 10*16 + 0*8]
+ mov r15, [rsp + 10*16 + 1*8]
+ add rsp, stack_size
+ %endmacro
+%endif
+
+%define PS 8
+%define len arg0
+%define vec arg1
+%define vec_i arg2
+%define mul_array arg3
+%define src arg4
+%define dest1 arg5
+%define pos return
+%define dest2 mul_array
+%define dest3 vec_i
+
+%ifndef EC_ALIGNED_ADDR
+;;; Use Un-aligned load/store
+ %define XLDR vmovdqu8
+ %define XSTR vmovdqu8
+%else
+;;; Use Non-temporal load/stor
+ %ifdef NO_NT_LDST
+ %define XLDR vmovdqa
+ %define XSTR vmovdqa
+ %else
+ %define XLDR vmovntdqa
+ %define XSTR vmovntdq
+ %endif
+%endif
+
+default rel
+[bits 64]
+section .text
+
+%define x0 zmm0
+%define xtmpa zmm1
+%define xtmph1 zmm2
+%define xtmpl1 zmm3
+%define xtmph2 zmm4
+%define xtmpl2 zmm5
+%define xtmph3 zmm6
+%define xtmpl3 zmm7
+%define xgft1_hi zmm8
+%define xgft1_lo zmm9
+%define xgft1_loy ymm9
+%define xgft2_hi zmm10
+%define xgft2_lo zmm11
+%define xgft2_loy ymm11
+%define xgft3_hi zmm12
+%define xgft3_lo zmm13
+%define xgft3_loy ymm13
+%define xd1 zmm14
+%define xd2 zmm15
+%define xd3 zmm16
+%define xmask0f zmm17
+
+align 16
+mk_global gf_3vect_mad_avx512, function
+func(gf_3vect_mad_avx512)
+ FUNC_SAVE
+ sub len, 64
+ jl .return_fail
+ xor pos, pos
+ mov tmp, 0x0f
+ vpbroadcastb xmask0f, tmp ;Construct mask 0x0f0f0f...
+ sal vec_i, 5 ;Multiply by 32
+ sal vec, 5
+ lea tmp, [mul_array + vec_i]
+ vmovdqu xgft1_loy, [tmp] ;Load array Ax{00}..{0f}, Ax{00}..{f0}
+ vmovdqu xgft2_loy, [tmp+vec] ;Load array Bx{00}..{0f}, Bx{00}..{f0}
+ vmovdqu xgft3_loy, [tmp+2*vec] ;Load array Cx{00}..{0f}, Cx{00}..{f0}
+ vshufi64x2 xgft1_hi, xgft1_lo, xgft1_lo, 0x55
+ vshufi64x2 xgft1_lo, xgft1_lo, xgft1_lo, 0x00
+ vshufi64x2 xgft2_hi, xgft2_lo, xgft2_lo, 0x55
+ vshufi64x2 xgft2_lo, xgft2_lo, xgft2_lo, 0x00
+ vshufi64x2 xgft3_hi, xgft3_lo, xgft3_lo, 0x55
+ vshufi64x2 xgft3_lo, xgft3_lo, xgft3_lo, 0x00
+ mov dest2, [dest1+PS] ; reuse mul_array
+ mov dest3, [dest1+2*PS] ; reuse vec_i
+ mov dest1, [dest1]
+ mov tmp, -1
+ kmovq k1, tmp
+
+.loop64:
+ XLDR x0, [src+pos] ;Get next source vector
+ XLDR xd1, [dest1+pos] ;Get next dest vector
+ XLDR xd2, [dest2+pos] ;Get next dest vector
+ XLDR xd3, [dest3+pos] ;Get next dest vector
+
+ vpandq xtmpa, x0, xmask0f ;Mask low src nibble in bits 4-0
+ vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0
+ vpandq x0, x0, xmask0f ;Mask high src nibble in bits 4-0
+
+ ; dest1
+ vpshufb xtmph1 {k1}{z}, xgft1_hi, x0 ;Lookup mul table of high nibble
+ vpshufb xtmpl1 {k1}{z}, xgft1_lo, xtmpa ;Lookup mul table of low nibble
+ vpxorq xtmph1, xtmph1, xtmpl1 ;GF add high and low partials
+ vpxorq xd1, xd1, xtmph1 ;xd1 += partial
+
+ ; dest2
+ vpshufb xtmph2 {k1}{z}, xgft2_hi, x0 ;Lookup mul table of high nibble
+ vpshufb xtmpl2 {k1}{z}, xgft2_lo, xtmpa ;Lookup mul table of low nibble
+ vpxorq xtmph2, xtmph2, xtmpl2 ;GF add high and low partials
+ vpxorq xd2, xd2, xtmph2 ;xd2 += partial
+
+ ; dest3
+ vpshufb xtmph3 {k1}{z}, xgft3_hi, x0 ;Lookup mul table of high nibble
+ vpshufb xtmpl3 {k1}{z}, xgft3_lo, xtmpa ;Lookup mul table of low nibble
+ vpxorq xtmph3, xtmph3, xtmpl3 ;GF add high and low partials
+ vpxorq xd3, xd3, xtmph3 ;xd2 += partial
+
+ XSTR [dest1+pos], xd1
+ XSTR [dest2+pos], xd2
+ XSTR [dest3+pos], xd3
+
+ add pos, 64 ;Loop on 64 bytes at a time
+ cmp pos, len
+ jle .loop64
+
+ lea tmp, [len + 64]
+ cmp pos, tmp
+ je .return_pass
+
+ ;; Tail len
+ mov pos, (1 << 63)
+ lea tmp, [len + 64 - 1]
+ and tmp, 63
+ sarx pos, pos, tmp
+ kmovq k1, pos
+ mov pos, len ;Overlapped offset length-64
+ jmp .loop64 ;Do one more overlap pass
+
+.return_pass:
+ mov return, 0
+ FUNC_RESTORE
+ ret
+
+.return_fail:
+ mov return, 1
+ FUNC_RESTORE
+ ret
+
+endproc_frame
+
+%else
+%ifidn __OUTPUT_FORMAT__, win64
+global no_gf_3vect_mad_avx512
+no_gf_3vect_mad_avx512:
+%endif
+%endif ; ifdef HAVE_AS_KNOWS_AVX512
diff --git a/src/isa-l/erasure_code/gf_3vect_mad_sse.asm b/src/isa-l/erasure_code/gf_3vect_mad_sse.asm
new file mode 100644
index 000000000..c0fd0b964
--- /dev/null
+++ b/src/isa-l/erasure_code/gf_3vect_mad_sse.asm
@@ -0,0 +1,298 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright(c) 2011-2015 Intel Corporation All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in
+; the documentation and/or other materials provided with the
+; distribution.
+; * Neither the name of Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived
+; from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;;;
+;;; gf_3vect_mad_sse(len, vec, vec_i, mul_array, src, dest);
+;;;
+
+%include "reg_sizes.asm"
+
+%define PS 8
+%ifidn __OUTPUT_FORMAT__, win64
+ %define arg0 rcx
+ %define arg0.w ecx
+ %define arg1 rdx
+ %define arg2 r8
+ %define arg3 r9
+ %define arg4 r12
+ %define arg5 r15
+ %define tmp r11
+ %define return rax
+ %define return.w eax
+ %define stack_size 16*10 + 3*8
+ %define arg(x) [rsp + stack_size + PS + PS*x]
+ %define func(x) proc_frame x
+
+%macro FUNC_SAVE 0
+ sub rsp, stack_size
+ movdqa [rsp+16*0],xmm6
+ movdqa [rsp+16*1],xmm7
+ movdqa [rsp+16*2],xmm8
+ movdqa [rsp+16*3],xmm9
+ movdqa [rsp+16*4],xmm10
+ movdqa [rsp+16*5],xmm11
+ movdqa [rsp+16*6],xmm12
+ movdqa [rsp+16*7],xmm13
+ movdqa [rsp+16*8],xmm14
+ movdqa [rsp+16*9],xmm15
+ save_reg r12, 10*16 + 0*8
+ save_reg r15, 10*16 + 1*8
+ end_prolog
+ mov arg4, arg(4)
+ mov arg5, arg(5)
+%endmacro
+
+%macro FUNC_RESTORE 0
+ movdqa xmm6, [rsp+16*0]
+ movdqa xmm7, [rsp+16*1]
+ movdqa xmm8, [rsp+16*2]
+ movdqa xmm9, [rsp+16*3]
+ movdqa xmm10, [rsp+16*4]
+ movdqa xmm11, [rsp+16*5]
+ movdqa xmm12, [rsp+16*6]
+ movdqa xmm13, [rsp+16*7]
+ movdqa xmm14, [rsp+16*8]
+ movdqa xmm15, [rsp+16*9]
+ mov r12, [rsp + 10*16 + 0*8]
+ mov r15, [rsp + 10*16 + 1*8]
+ add rsp, stack_size
+%endmacro
+
+%elifidn __OUTPUT_FORMAT__, elf64
+ %define arg0 rdi
+ %define arg0.w edi
+ %define arg1 rsi
+ %define arg2 rdx
+ %define arg3 rcx
+ %define arg4 r8
+ %define arg5 r9
+ %define tmp r11
+ %define return rax
+ %define return.w eax
+
+ %define func(x) x: endbranch
+ %define FUNC_SAVE
+ %define FUNC_RESTORE
+%endif
+
+;;; gf_3vect_mad_sse(len, vec, vec_i, mul_array, src, dest)
+%define len arg0
+%define len.w arg0.w
+%define vec arg1
+%define vec_i arg2
+%define mul_array arg3
+%define src arg4
+%define dest1 arg5
+%define pos return
+%define pos.w return.w
+
+%define dest2 mul_array
+%define dest3 vec_i
+
+%ifndef EC_ALIGNED_ADDR
+;;; Use Un-aligned load/store
+ %define XLDR movdqu
+ %define XSTR movdqu
+%else
+;;; Use Non-temporal load/stor
+ %ifdef NO_NT_LDST
+ %define XLDR movdqa
+ %define XSTR movdqa
+ %else
+ %define XLDR movntdqa
+ %define XSTR movntdq
+ %endif
+%endif
+
+default rel
+
+[bits 64]
+section .text
+
+%define xmask0f xmm15
+%define xgft1_lo xmm14
+%define xgft1_hi xmm13
+%define xgft2_lo xmm12
+%define xgft2_hi xmm11
+%define xgft3_lo xmm10
+%define xgft3_hi xmm9
+
+%define x0 xmm0
+%define xtmpa xmm1
+%define xtmph1 xmm2
+%define xtmpl1 xmm3
+%define xtmph2 xmm4
+%define xtmpl2 xmm5
+%define xtmph3 xmm6
+%define xtmpl3 xmm7
+%define xd1 xmm8
+%define xd2 xtmpl1
+%define xd3 xtmph1
+
+align 16
+mk_global gf_3vect_mad_sse, function
+func(gf_3vect_mad_sse)
+ FUNC_SAVE
+ sub len, 16
+ jl .return_fail
+ xor pos, pos
+ movdqa xmask0f, [mask0f] ;Load mask of lower nibble in each byte
+ sal vec_i, 5 ;Multiply by 32
+ sal vec, 5
+ lea tmp, [mul_array + vec_i]
+
+ movdqu xgft1_lo, [tmp] ;Load array Ax{00}, Ax{01}, Ax{02}, ...
+ movdqu xgft1_hi, [tmp+16] ; " Ax{00}, Ax{10}, Ax{20}, ... , Ax{f0}
+ movdqu xgft2_lo, [tmp+vec] ;Load array Bx{00}, Bx{01}, Bx{02}, ...
+ movdqu xgft2_hi, [tmp+vec+16] ; " Bx{00}, Bx{10}, Bx{20}, ... , Bx{f0}
+ movdqu xgft3_lo, [tmp+2*vec] ;Load array Cx{00}, Cx{01}, Cx{02}, ...
+ movdqu xgft3_hi, [tmp+2*vec+16] ; " Cx{00}, Cx{10}, Cx{20}, ... , Cx{f0}
+ mov dest2, [dest1+PS] ; reuse mul_array
+ mov dest3, [dest1+2*PS] ; reuse vec_i
+ mov dest1, [dest1]
+
+.loop16:
+ XLDR x0, [src+pos] ;Get next source vector
+ movdqa xtmph1, xgft1_hi ;Reload const array registers
+ movdqa xtmpl1, xgft1_lo
+ movdqa xtmph2, xgft2_hi ;Reload const array registers
+ movdqa xtmpl2, xgft2_lo
+ movdqa xtmph3, xgft3_hi ;Reload const array registers
+ movdqa xtmpl3, xgft3_lo
+
+ XLDR xd1, [dest1+pos] ;Get next dest vector
+
+ movdqa xtmpa, x0 ;Keep unshifted copy of src
+ psraw x0, 4 ;Shift to put high nibble into bits 4-0
+ pand x0, xmask0f ;Mask high src nibble in bits 4-0
+ pand xtmpa, xmask0f ;Mask low src nibble in bits 4-0
+
+ ; dest1
+ pshufb xtmph1, x0 ;Lookup mul table of high nibble
+ pshufb xtmpl1, xtmpa ;Lookup mul table of low nibble
+ pxor xtmph1, xtmpl1 ;GF add high and low partials
+ pxor xd1, xtmph1
+
+ XLDR xd2, [dest2+pos] ;reuse xtmpl1. Get next dest vector
+ XLDR xd3, [dest3+pos] ;reuse xtmph1. Get next dest vector
+
+ ; dest2
+ pshufb xtmph2, x0 ;Lookup mul table of high nibble
+ pshufb xtmpl2, xtmpa ;Lookup mul table of low nibble
+ pxor xtmph2, xtmpl2 ;GF add high and low partials
+ pxor xd2, xtmph2
+
+ ; dest3
+ pshufb xtmph3, x0 ;Lookup mul table of high nibble
+ pshufb xtmpl3, xtmpa ;Lookup mul table of low nibble
+ pxor xtmph3, xtmpl3 ;GF add high and low partials
+ pxor xd3, xtmph3
+
+ XSTR [dest1+pos], xd1 ;Store result
+ XSTR [dest2+pos], xd2 ;Store result
+ XSTR [dest3+pos], xd3 ;Store result
+
+ add pos, 16 ;Loop on 16 bytes at a time
+ cmp pos, len
+ jle .loop16
+
+ lea tmp, [len + 16]
+ cmp pos, tmp
+ je .return_pass
+
+.lessthan16:
+ ;; Tail len
+ ;; Do one more overlap pass
+ mov tmp, len ;Overlapped offset length-16
+
+ XLDR x0, [src+tmp] ;Get next source vector
+ XLDR xd1, [dest1+tmp] ;Get next dest vector
+ XLDR xd2, [dest2+tmp] ;reuse xtmpl1. Get next dest vector
+ XLDR xd3, [dest3+tmp] ;reuse xtmph1. Get next dest vector
+
+ sub len, pos
+
+ movdqa xtmph3, [constip16] ;Load const of i + 16
+ pinsrb xtmpl3, len.w, 15
+ pshufb xtmpl3, xmask0f ;Broadcast len to all bytes
+ pcmpgtb xtmpl3, xtmph3
+
+ movdqa xtmpa, x0 ;Keep unshifted copy of src
+ psraw x0, 4 ;Shift to put high nibble into bits 4-0
+ pand x0, xmask0f ;Mask high src nibble in bits 4-0
+ pand xtmpa, xmask0f ;Mask low src nibble in bits 4-0
+
+ ; dest1
+ pshufb xgft1_hi, x0 ;Lookup mul table of high nibble
+ pshufb xgft1_lo, xtmpa ;Lookup mul table of low nibble
+ pxor xgft1_hi, xgft1_lo ;GF add high and low partials
+ pand xgft1_hi, xtmpl3
+ pxor xd1, xgft1_hi
+
+ ; dest2
+ pshufb xgft2_hi, x0 ;Lookup mul table of high nibble
+ pshufb xgft2_lo, xtmpa ;Lookup mul table of low nibble
+ pxor xgft2_hi, xgft2_lo ;GF add high and low partials
+ pand xgft2_hi, xtmpl3
+ pxor xd2, xgft2_hi
+
+ ; dest3
+ pshufb xgft3_hi, x0 ;Lookup mul table of high nibble
+ pshufb xgft3_lo, xtmpa ;Lookup mul table of low nibble
+ pxor xgft3_hi, xgft3_lo ;GF add high and low partials
+ pand xgft3_hi, xtmpl3
+ pxor xd3, xgft3_hi
+
+ XSTR [dest1+tmp], xd1 ;Store result
+ XSTR [dest2+tmp], xd2 ;Store result
+ XSTR [dest3+tmp], xd3 ;Store result
+
+.return_pass:
+ FUNC_RESTORE
+ mov return, 0
+ ret
+
+.return_fail:
+ FUNC_RESTORE
+ mov return, 1
+ ret
+
+endproc_frame
+
+section .data
+
+align 16
+
+mask0f:
+ dq 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f
+constip16:
+ dq 0xf8f9fafbfcfdfeff, 0xf0f1f2f3f4f5f6f7
+
+;;; func core, ver, snum
+slversion gf_3vect_mad_sse, 00, 01, 0206
diff --git a/src/isa-l/erasure_code/gf_4vect_dot_prod_avx.asm b/src/isa-l/erasure_code/gf_4vect_dot_prod_avx.asm
new file mode 100644
index 000000000..bad869267
--- /dev/null
+++ b/src/isa-l/erasure_code/gf_4vect_dot_prod_avx.asm
@@ -0,0 +1,441 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright(c) 2011-2015 Intel Corporation All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in
+; the documentation and/or other materials provided with the
+; distribution.
+; * Neither the name of Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived
+; from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;;;
+;;; gf_4vect_dot_prod_avx(len, vec, *g_tbls, **buffs, **dests);
+;;;
+
+%include "reg_sizes.asm"
+
+%ifidn __OUTPUT_FORMAT__, elf64
+ %define arg0 rdi
+ %define arg1 rsi
+ %define arg2 rdx
+ %define arg3 rcx
+ %define arg4 r8
+ %define arg5 r9
+
+ %define tmp r11
+ %define tmp2 r10
+ %define tmp3 r13 ; must be saved and restored
+ %define tmp4 r12 ; must be saved and restored
+ %define tmp5 r14 ; must be saved and restored
+ %define tmp6 r15 ; must be saved and restored
+ %define return rax
+ %macro SLDR 2
+ %endmacro
+ %define SSTR SLDR
+ %define PS 8
+ %define LOG_PS 3
+
+ %define func(x) x: endbranch
+ %macro FUNC_SAVE 0
+ push r12
+ push r13
+ push r14
+ push r15
+ %endmacro
+ %macro FUNC_RESTORE 0
+ pop r15
+ pop r14
+ pop r13
+ pop r12
+ %endmacro
+%endif
+
+%ifidn __OUTPUT_FORMAT__, win64
+ %define arg0 rcx
+ %define arg1 rdx
+ %define arg2 r8
+ %define arg3 r9
+
+ %define arg4 r12 ; must be saved, loaded and restored
+ %define arg5 r15 ; must be saved and restored
+ %define tmp r11
+ %define tmp2 r10
+ %define tmp3 r13 ; must be saved and restored
+ %define tmp4 r14 ; must be saved and restored
+ %define tmp5 rdi ; must be saved and restored
+ %define tmp6 rsi ; must be saved and restored
+ %define return rax
+ %macro SLDR 2
+ %endmacro
+ %define SSTR SLDR
+ %define PS 8
+ %define LOG_PS 3
+ %define stack_size 9*16 + 7*8 ; must be an odd multiple of 8
+ %define arg(x) [rsp + stack_size + PS + PS*x]
+
+ %define func(x) proc_frame x
+ %macro FUNC_SAVE 0
+ alloc_stack stack_size
+ vmovdqa [rsp + 0*16], xmm6
+ vmovdqa [rsp + 1*16], xmm7
+ vmovdqa [rsp + 2*16], xmm8
+ vmovdqa [rsp + 3*16], xmm9
+ vmovdqa [rsp + 4*16], xmm10
+ vmovdqa [rsp + 5*16], xmm11
+ vmovdqa [rsp + 6*16], xmm12
+ vmovdqa [rsp + 7*16], xmm13
+ vmovdqa [rsp + 8*16], xmm14
+ save_reg r12, 9*16 + 0*8
+ save_reg r13, 9*16 + 1*8
+ save_reg r14, 9*16 + 2*8
+ save_reg r15, 9*16 + 3*8
+ save_reg rdi, 9*16 + 4*8
+ save_reg rsi, 9*16 + 5*8
+ end_prolog
+ mov arg4, arg(4)
+ %endmacro
+
+ %macro FUNC_RESTORE 0
+ vmovdqa xmm6, [rsp + 0*16]
+ vmovdqa xmm7, [rsp + 1*16]
+ vmovdqa xmm8, [rsp + 2*16]
+ vmovdqa xmm9, [rsp + 3*16]
+ vmovdqa xmm10, [rsp + 4*16]
+ vmovdqa xmm11, [rsp + 5*16]
+ vmovdqa xmm12, [rsp + 6*16]
+ vmovdqa xmm13, [rsp + 7*16]
+ vmovdqa xmm14, [rsp + 8*16]
+ mov r12, [rsp + 9*16 + 0*8]
+ mov r13, [rsp + 9*16 + 1*8]
+ mov r14, [rsp + 9*16 + 2*8]
+ mov r15, [rsp + 9*16 + 3*8]
+ mov rdi, [rsp + 9*16 + 4*8]
+ mov rsi, [rsp + 9*16 + 5*8]
+ add rsp, stack_size
+ %endmacro
+%endif
+
+%ifidn __OUTPUT_FORMAT__, elf32
+
+;;;================== High Address;
+;;; arg4
+;;; arg3
+;;; arg2
+;;; arg1
+;;; arg0
+;;; return
+;;;<================= esp of caller
+;;; ebp
+;;;<================= ebp = esp
+;;; var0
+;;; var1
+;;; var2
+;;; var3
+;;; esi
+;;; edi
+;;; ebx
+;;;<================= esp of callee
+;;;
+;;;================== Low Address;
+
+ %define PS 4
+ %define LOG_PS 2
+ %define func(x) x: endbranch
+ %define arg(x) [ebp + PS*2 + PS*x]
+ %define var(x) [ebp - PS - PS*x]
+
+ %define trans ecx
+ %define trans2 esi
+ %define arg0 trans ;trans and trans2 are for the variables in stack
+ %define arg0_m arg(0)
+ %define arg1 ebx
+ %define arg2 arg2_m
+ %define arg2_m arg(2)
+ %define arg3 trans
+ %define arg3_m arg(3)
+ %define arg4 trans
+ %define arg4_m arg(4)
+ %define arg5 trans2
+ %define tmp edx
+ %define tmp2 edi
+ %define tmp3 trans2
+ %define tmp3_m var(0)
+ %define tmp4 trans2
+ %define tmp4_m var(1)
+ %define tmp5 trans2
+ %define tmp5_m var(2)
+ %define tmp6 trans2
+ %define tmp6_m var(3)
+ %define return eax
+ %macro SLDR 2 ;stack load/restore
+ mov %1, %2
+ %endmacro
+ %define SSTR SLDR
+
+ %macro FUNC_SAVE 0
+ push ebp
+ mov ebp, esp
+ sub esp, PS*4 ;4 local variables
+ push esi
+ push edi
+ push ebx
+ mov arg1, arg(1)
+ %endmacro
+
+ %macro FUNC_RESTORE 0
+ pop ebx
+ pop edi
+ pop esi
+ add esp, PS*4 ;4 local variables
+ pop ebp
+ %endmacro
+
+%endif ; output formats
+
+%define len arg0
+%define vec arg1
+%define mul_array arg2
+%define src arg3
+%define dest1 arg4
+%define ptr arg5
+%define vec_i tmp2
+%define dest2 tmp3
+%define dest3 tmp4
+%define dest4 tmp5
+%define vskip3 tmp6
+%define pos return
+
+ %ifidn PS,4 ;32-bit code
+ %define len_m arg0_m
+ %define src_m arg3_m
+ %define dest1_m arg4_m
+ %define dest2_m tmp3_m
+ %define dest3_m tmp4_m
+ %define dest4_m tmp5_m
+ %define vskip3_m tmp6_m
+ %endif
+
+%ifndef EC_ALIGNED_ADDR
+;;; Use Un-aligned load/store
+ %define XLDR vmovdqu
+ %define XSTR vmovdqu
+%else
+;;; Use Non-temporal load/stor
+ %ifdef NO_NT_LDST
+ %define XLDR vmovdqa
+ %define XSTR vmovdqa
+ %else
+ %define XLDR vmovntdqa
+ %define XSTR vmovntdq
+ %endif
+%endif
+
+%ifidn PS,8 ; 64-bit code
+ default rel
+ [bits 64]
+%endif
+
+
+section .text
+
+%ifidn PS,8 ;64-bit code
+ %define xmask0f xmm14
+ %define xgft1_lo xmm13
+ %define xgft1_hi xmm12
+ %define xgft2_lo xmm11
+ %define xgft2_hi xmm10
+ %define xgft3_lo xmm9
+ %define xgft3_hi xmm8
+ %define xgft4_lo xmm7
+ %define xgft4_hi xmm6
+
+ %define x0 xmm0
+ %define xtmpa xmm1
+ %define xp1 xmm2
+ %define xp2 xmm3
+ %define xp3 xmm4
+ %define xp4 xmm5
+%else
+ %define xmm_trans xmm7 ;reuse xmask0f and xgft1_lo
+ %define xmask0f xmm_trans
+ %define xgft1_lo xmm_trans
+ %define xgft1_hi xmm6
+ %define xgft2_lo xgft1_lo
+ %define xgft2_hi xgft1_hi
+ %define xgft3_lo xgft1_lo
+ %define xgft3_hi xgft1_hi
+ %define xgft4_lo xgft1_lo
+ %define xgft4_hi xgft1_hi
+
+ %define x0 xmm0
+ %define xtmpa xmm1
+ %define xp1 xmm2
+ %define xp2 xmm3
+ %define xp3 xmm4
+ %define xp4 xmm5
+%endif
+align 16
+mk_global gf_4vect_dot_prod_avx, function
+func(gf_4vect_dot_prod_avx)
+ FUNC_SAVE
+ SLDR len, len_m
+ sub len, 16
+ SSTR len_m, len
+ jl .return_fail
+ xor pos, pos
+ vmovdqa xmask0f, [mask0f] ;Load mask of lower nibble in each byte
+ mov vskip3, vec
+ imul vskip3, 96
+ SSTR vskip3_m, vskip3
+ sal vec, LOG_PS ;vec *= PS. Make vec_i count by PS
+ SLDR dest1, dest1_m
+ mov dest2, [dest1+PS]
+ SSTR dest2_m, dest2
+ mov dest3, [dest1+2*PS]
+ SSTR dest3_m, dest3
+ mov dest4, [dest1+3*PS]
+ SSTR dest4_m, dest4
+ mov dest1, [dest1]
+ SSTR dest1_m, dest1
+
+.loop16:
+ vpxor xp1, xp1
+ vpxor xp2, xp2
+ vpxor xp3, xp3
+ vpxor xp4, xp4
+ mov tmp, mul_array
+ xor vec_i, vec_i
+
+.next_vect:
+ SLDR src, src_m
+ mov ptr, [src+vec_i]
+
+ %ifidn PS,8 ;64-bit code
+ vmovdqu xgft1_lo, [tmp] ;Load array Ax{00}, Ax{01}, ..., Ax{0f}
+ vmovdqu xgft1_hi, [tmp+16] ; " Ax{00}, Ax{10}, ..., Ax{f0}
+ vmovdqu xgft2_lo, [tmp+vec*(32/PS)] ;Load array Bx{00}, Bx{01}, ..., Bx{0f}
+ vmovdqu xgft2_hi, [tmp+vec*(32/PS)+16] ; " Bx{00}, Bx{10}, ..., Bx{f0}
+ vmovdqu xgft3_lo, [tmp+vec*(64/PS)] ;Load array Cx{00}, Cx{01}, ..., Cx{0f}
+ vmovdqu xgft3_hi, [tmp+vec*(64/PS)+16] ; " Cx{00}, Cx{10}, ..., Cx{f0}
+ vmovdqu xgft4_lo, [tmp+vskip3] ;Load array Dx{00}, Dx{01}, ..., Dx{0f}
+ vmovdqu xgft4_hi, [tmp+vskip3+16] ; " Dx{00}, Dx{10}, ..., Dx{f0}
+
+ XLDR x0, [ptr+pos] ;Get next source vector
+ add tmp, 32
+ add vec_i, PS
+
+ vpand xtmpa, x0, xmask0f ;Mask low src nibble in bits 4-0
+ vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0
+ vpand x0, x0, xmask0f ;Mask high src nibble in bits 4-0
+ %else ;32-bit code
+ XLDR x0, [ptr+pos] ;Get next source vector
+ vmovdqa xmask0f, [mask0f] ;Load mask of lower nibble in each byte
+
+ vpand xtmpa, x0, xmask0f ;Mask low src nibble in bits 4-0
+ vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0
+ vpand x0, x0, xmask0f ;Mask high src nibble in bits 4-0
+
+ vmovdqu xgft1_lo, [tmp] ;Load array Ax{00}, Ax{01}, ..., Ax{0f}
+ vmovdqu xgft1_hi, [tmp+16] ; " Ax{00}, Ax{10}, ..., Ax{f0}
+ %endif
+
+ vpshufb xgft1_hi, x0 ;Lookup mul table of high nibble
+ vpshufb xgft1_lo, xtmpa ;Lookup mul table of low nibble
+ vpxor xgft1_hi, xgft1_lo ;GF add high and low partials
+ vpxor xp1, xgft1_hi ;xp1 += partial
+
+ %ifidn PS,4 ;32-bit code
+ vmovdqu xgft2_lo, [tmp+vec*(32/PS)] ;Load array Bx{00}, Bx{01}, ..., Bx{0f}
+ vmovdqu xgft2_hi, [tmp+vec*(32/PS)+16] ; " Bx{00}, Bx{10}, ..., Bx{f0}
+ %endif
+ vpshufb xgft2_hi, x0 ;Lookup mul table of high nibble
+ vpshufb xgft2_lo, xtmpa ;Lookup mul table of low nibble
+ vpxor xgft2_hi, xgft2_lo ;GF add high and low partials
+ vpxor xp2, xgft2_hi ;xp2 += partial
+
+ %ifidn PS,4 ;32-bit code
+ sal vec, 1
+ vmovdqu xgft3_lo, [tmp+vec*(32/PS)] ;Load array Cx{00}, Cx{01}, ..., Cx{0f}
+ vmovdqu xgft3_hi, [tmp+vec*(32/PS)+16] ; " Cx{00}, Cx{10}, ..., Cx{f0}
+ sar vec, 1
+ %endif
+ vpshufb xgft3_hi, x0 ;Lookup mul table of high nibble
+ vpshufb xgft3_lo, xtmpa ;Lookup mul table of low nibble
+ vpxor xgft3_hi, xgft3_lo ;GF add high and low partials
+ vpxor xp3, xgft3_hi ;xp3 += partial
+
+ %ifidn PS,4 ;32-bit code
+ SLDR vskip3, vskip3_m
+ vmovdqu xgft4_lo, [tmp+vskip3] ;Load array Dx{00}, Dx{01}, ..., Dx{0f}
+ vmovdqu xgft4_hi, [tmp+vskip3+16] ; " Dx{00}, Dx{10}, ..., Dx{f0}
+ add tmp, 32
+ add vec_i, PS
+ %endif
+ vpshufb xgft4_hi, x0 ;Lookup mul table of high nibble
+ vpshufb xgft4_lo, xtmpa ;Lookup mul table of low nibble
+ vpxor xgft4_hi, xgft4_lo ;GF add high and low partials
+ vpxor xp4, xgft4_hi ;xp4 += partial
+
+ cmp vec_i, vec
+ jl .next_vect
+
+ SLDR dest1, dest1_m
+ SLDR dest2, dest2_m
+ XSTR [dest1+pos], xp1
+ XSTR [dest2+pos], xp2
+ SLDR dest3, dest3_m
+ XSTR [dest3+pos], xp3
+ SLDR dest4, dest4_m
+ XSTR [dest4+pos], xp4
+
+ SLDR len, len_m
+ add pos, 16 ;Loop on 16 bytes at a time
+ cmp pos, len
+ jle .loop16
+
+ lea tmp, [len + 16]
+ cmp pos, tmp
+ je .return_pass
+
+ ;; Tail len
+ mov pos, len ;Overlapped offset length-16
+ jmp .loop16 ;Do one more overlap pass
+
+.return_pass:
+ mov return, 0
+ FUNC_RESTORE
+ ret
+
+.return_fail:
+ mov return, 1
+ FUNC_RESTORE
+ ret
+
+endproc_frame
+
+section .data
+
+align 16
+mask0f: dq 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f
+
+;;; func core, ver, snum
+slversion gf_4vect_dot_prod_avx, 02, 05, 0193
diff --git a/src/isa-l/erasure_code/gf_4vect_dot_prod_avx2.asm b/src/isa-l/erasure_code/gf_4vect_dot_prod_avx2.asm
new file mode 100644
index 000000000..e422e28c9
--- /dev/null
+++ b/src/isa-l/erasure_code/gf_4vect_dot_prod_avx2.asm
@@ -0,0 +1,460 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright(c) 2011-2015 Intel Corporation All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in
+; the documentation and/or other materials provided with the
+; distribution.
+; * Neither the name of Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived
+; from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;;;
+;;; gf_4vect_dot_prod_avx2(len, vec, *g_tbls, **buffs, **dests);
+;;;
+
+%include "reg_sizes.asm"
+
+%ifidn __OUTPUT_FORMAT__, elf64
+ %define arg0 rdi
+ %define arg1 rsi
+ %define arg2 rdx
+ %define arg3 rcx
+ %define arg4 r8
+ %define arg5 r9
+
+ %define tmp r11
+ %define tmp.w r11d
+ %define tmp.b r11b
+ %define tmp2 r10
+ %define tmp3 r13 ; must be saved and restored
+ %define tmp4 r12 ; must be saved and restored
+ %define tmp5 r14 ; must be saved and restored
+ %define tmp6 r15 ; must be saved and restored
+ %define return rax
+ %macro SLDR 2
+ %endmacro
+ %define SSTR SLDR
+ %define PS 8
+ %define LOG_PS 3
+
+ %define func(x) x: endbranch
+ %macro FUNC_SAVE 0
+ push r12
+ push r13
+ push r14
+ push r15
+ %endmacro
+ %macro FUNC_RESTORE 0
+ pop r15
+ pop r14
+ pop r13
+ pop r12
+ %endmacro
+%endif
+
+%ifidn __OUTPUT_FORMAT__, win64
+ %define arg0 rcx
+ %define arg1 rdx
+ %define arg2 r8
+ %define arg3 r9
+
+ %define arg4 r12 ; must be saved, loaded and restored
+ %define arg5 r15 ; must be saved and restored
+ %define tmp r11
+ %define tmp.w r11d
+ %define tmp.b r11b
+ %define tmp2 r10
+ %define tmp3 r13 ; must be saved and restored
+ %define tmp4 r14 ; must be saved and restored
+ %define tmp5 rdi ; must be saved and restored
+ %define tmp6 rsi ; must be saved and restored
+ %define return rax
+ %macro SLDR 2
+ %endmacro
+ %define SSTR SLDR
+ %define PS 8
+ %define LOG_PS 3
+ %define stack_size 9*16 + 7*8 ; must be an odd multiple of 8
+ %define arg(x) [rsp + stack_size + PS + PS*x]
+
+ %define func(x) proc_frame x
+ %macro FUNC_SAVE 0
+ alloc_stack stack_size
+ vmovdqa [rsp + 0*16], xmm6
+ vmovdqa [rsp + 1*16], xmm7
+ vmovdqa [rsp + 2*16], xmm8
+ vmovdqa [rsp + 3*16], xmm9
+ vmovdqa [rsp + 4*16], xmm10
+ vmovdqa [rsp + 5*16], xmm11
+ vmovdqa [rsp + 6*16], xmm12
+ vmovdqa [rsp + 7*16], xmm13
+ vmovdqa [rsp + 8*16], xmm14
+ save_reg r12, 9*16 + 0*8
+ save_reg r13, 9*16 + 1*8
+ save_reg r14, 9*16 + 2*8
+ save_reg r15, 9*16 + 3*8
+ save_reg rdi, 9*16 + 4*8
+ save_reg rsi, 9*16 + 5*8
+ end_prolog
+ mov arg4, arg(4)
+ %endmacro
+
+ %macro FUNC_RESTORE 0
+ vmovdqa xmm6, [rsp + 0*16]
+ vmovdqa xmm7, [rsp + 1*16]
+ vmovdqa xmm8, [rsp + 2*16]
+ vmovdqa xmm9, [rsp + 3*16]
+ vmovdqa xmm10, [rsp + 4*16]
+ vmovdqa xmm11, [rsp + 5*16]
+ vmovdqa xmm12, [rsp + 6*16]
+ vmovdqa xmm13, [rsp + 7*16]
+ vmovdqa xmm14, [rsp + 8*16]
+ mov r12, [rsp + 9*16 + 0*8]
+ mov r13, [rsp + 9*16 + 1*8]
+ mov r14, [rsp + 9*16 + 2*8]
+ mov r15, [rsp + 9*16 + 3*8]
+ mov rdi, [rsp + 9*16 + 4*8]
+ mov rsi, [rsp + 9*16 + 5*8]
+ add rsp, stack_size
+ %endmacro
+%endif
+
+%ifidn __OUTPUT_FORMAT__, elf32
+
+;;;================== High Address;
+;;; arg4
+;;; arg3
+;;; arg2
+;;; arg1
+;;; arg0
+;;; return
+;;;<================= esp of caller
+;;; ebp
+;;;<================= ebp = esp
+;;; var0
+;;; var1
+;;; var2
+;;; var3
+;;; esi
+;;; edi
+;;; ebx
+;;;<================= esp of callee
+;;;
+;;;================== Low Address;
+
+ %define PS 4
+ %define LOG_PS 2
+ %define func(x) x: endbranch
+ %define arg(x) [ebp + PS*2 + PS*x]
+ %define var(x) [ebp - PS - PS*x]
+
+ %define trans ecx
+ %define trans2 esi
+ %define arg0 trans ;trans and trans2 are for the variables in stack
+ %define arg0_m arg(0)
+ %define arg1 ebx
+ %define arg2 arg2_m
+ %define arg2_m arg(2)
+ %define arg3 trans
+ %define arg3_m arg(3)
+ %define arg4 trans
+ %define arg4_m arg(4)
+ %define arg5 trans2
+ %define tmp edx
+ %define tmp.w edx
+ %define tmp.b dl
+ %define tmp2 edi
+ %define tmp3 trans2
+ %define tmp3_m var(0)
+ %define tmp4 trans2
+ %define tmp4_m var(1)
+ %define tmp5 trans2
+ %define tmp5_m var(2)
+ %define tmp6 trans2
+ %define tmp6_m var(3)
+ %define return eax
+ %macro SLDR 2 ;stack load/restore
+ mov %1, %2
+ %endmacro
+ %define SSTR SLDR
+
+ %macro FUNC_SAVE 0
+ push ebp
+ mov ebp, esp
+ sub esp, PS*4 ;4 local variables
+ push esi
+ push edi
+ push ebx
+ mov arg1, arg(1)
+ %endmacro
+
+ %macro FUNC_RESTORE 0
+ pop ebx
+ pop edi
+ pop esi
+ add esp, PS*4 ;4 local variables
+ pop ebp
+ %endmacro
+
+%endif ; output formats
+
+%define len arg0
+%define vec arg1
+%define mul_array arg2
+%define src arg3
+%define dest1 arg4
+%define ptr arg5
+%define vec_i tmp2
+%define dest2 tmp3
+%define dest3 tmp4
+%define dest4 tmp5
+%define vskip3 tmp6
+%define pos return
+
+ %ifidn PS,4 ;32-bit code
+ %define len_m arg0_m
+ %define src_m arg3_m
+ %define dest1_m arg4_m
+ %define dest2_m tmp3_m
+ %define dest3_m tmp4_m
+ %define dest4_m tmp5_m
+ %define vskip3_m tmp6_m
+ %endif
+
+%ifndef EC_ALIGNED_ADDR
+;;; Use Un-aligned load/store
+ %define XLDR vmovdqu
+ %define XSTR vmovdqu
+%else
+;;; Use Non-temporal load/stor
+ %ifdef NO_NT_LDST
+ %define XLDR vmovdqa
+ %define XSTR vmovdqa
+ %else
+ %define XLDR vmovntdqa
+ %define XSTR vmovntdq
+ %endif
+%endif
+
+%ifidn PS,8 ;64-bit code
+ default rel
+ [bits 64]
+%endif
+
+
+section .text
+
+%ifidn PS,8 ;64-bit code
+ %define xmask0f ymm14
+ %define xmask0fx xmm14
+ %define xgft1_lo ymm13
+ %define xgft1_hi ymm12
+ %define xgft2_lo ymm11
+ %define xgft2_hi ymm10
+ %define xgft3_lo ymm9
+ %define xgft3_hi ymm8
+ %define xgft4_lo ymm7
+ %define xgft4_hi ymm6
+
+ %define x0 ymm0
+ %define xtmpa ymm1
+ %define xp1 ymm2
+ %define xp2 ymm3
+ %define xp3 ymm4
+ %define xp4 ymm5
+%else
+ %define ymm_trans ymm7 ;reuse xmask0f and xgft1_hi
+ %define xmask0f ymm_trans
+ %define xmask0fx xmm7
+ %define xgft1_lo ymm6
+ %define xgft1_hi ymm_trans
+ %define xgft2_lo xgft1_lo
+ %define xgft2_hi xgft1_hi
+ %define xgft3_lo xgft1_lo
+ %define xgft3_hi xgft1_hi
+ %define xgft4_lo xgft1_lo
+ %define xgft4_hi xgft1_hi
+
+ %define x0 ymm0
+ %define xtmpa ymm1
+ %define xp1 ymm2
+ %define xp2 ymm3
+ %define xp3 ymm4
+ %define xp4 ymm5
+%endif
+align 16
+mk_global gf_4vect_dot_prod_avx2, function
+func(gf_4vect_dot_prod_avx2)
+ FUNC_SAVE
+ SLDR len, len_m
+ sub len, 32
+ SSTR len_m, len
+ jl .return_fail
+ xor pos, pos
+ mov tmp.b, 0x0f
+ vpinsrb xmask0fx, xmask0fx, tmp.w, 0
+ vpbroadcastb xmask0f, xmask0fx ;Construct mask 0x0f0f0f...
+ mov vskip3, vec
+ imul vskip3, 96
+ SSTR vskip3_m, vskip3
+ sal vec, LOG_PS ;vec *= PS. Make vec_i count by PS
+ SLDR dest1, dest1_m
+ mov dest2, [dest1+PS]
+ SSTR dest2_m, dest2
+ mov dest3, [dest1+2*PS]
+ SSTR dest3_m, dest3
+ mov dest4, [dest1+3*PS]
+ SSTR dest4_m, dest4
+ mov dest1, [dest1]
+ SSTR dest1_m, dest1
+
+.loop32:
+ vpxor xp1, xp1
+ vpxor xp2, xp2
+ vpxor xp3, xp3
+ vpxor xp4, xp4
+ mov tmp, mul_array
+ xor vec_i, vec_i
+
+.next_vect:
+ SLDR src, src_m
+ mov ptr, [src+vec_i]
+ XLDR x0, [ptr+pos] ;Get next source vector
+
+ add vec_i, PS
+ %ifidn PS,8 ;64-bit code
+ vpand xgft4_lo, x0, xmask0f ;Mask low src nibble in bits 4-0
+ vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0
+ vpand x0, x0, xmask0f ;Mask high src nibble in bits 4-0
+ vperm2i128 xtmpa, xgft4_lo, x0, 0x30 ;swap xtmpa from 1lo|2lo to 1lo|2hi
+ vperm2i128 x0, xgft4_lo, x0, 0x12 ;swap x0 from 1hi|2hi to 1hi|2lo
+
+ vmovdqu xgft1_lo, [tmp] ;Load array Ax{00}, Ax{01}, ..., Ax{0f}
+ ; " Ax{00}, Ax{10}, ..., Ax{f0}
+ vmovdqu xgft2_lo, [tmp+vec*(32/PS)] ;Load array Bx{00}, Bx{01}, ..., Bx{0f}
+ ; " Bx{00}, Bx{10}, ..., Bx{f0}
+ vmovdqu xgft3_lo, [tmp+vec*(64/PS)] ;Load array Cx{00}, Cx{01}, ..., Cx{0f}
+ ; " Cx{00}, Cx{10}, ..., Cx{f0}
+ vmovdqu xgft4_lo, [tmp+vskip3] ;Load array Dx{00}, Dx{01}, ..., Dx{0f}
+ ; " Dx{00}, Dx{10}, ..., Dx{f0}
+
+ vperm2i128 xgft1_hi, xgft1_lo, xgft1_lo, 0x01 ; swapped to hi | lo
+ vperm2i128 xgft2_hi, xgft2_lo, xgft2_lo, 0x01 ; swapped to hi | lo
+ vperm2i128 xgft3_hi, xgft3_lo, xgft3_lo, 0x01 ; swapped to hi | lo
+ vperm2i128 xgft4_hi, xgft4_lo, xgft4_lo, 0x01 ; swapped to hi | lo
+ add tmp, 32
+ %else ;32-bit code
+ mov cl, 0x0f ;use ecx as a temp variable
+ vpinsrb xmask0fx, xmask0fx, ecx, 0
+ vpbroadcastb xmask0f, xmask0fx ;Construct mask 0x0f0f0f...
+
+ vpand xgft4_lo, x0, xmask0f ;Mask low src nibble in bits 4-0
+ vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0
+ vpand x0, x0, xmask0f ;Mask high src nibble in bits 4-0
+ vperm2i128 xtmpa, xgft4_lo, x0, 0x30 ;swap xtmpa from 1lo|2lo to 1lo|2hi
+ vperm2i128 x0, xgft4_lo, x0, 0x12 ;swap x0 from 1hi|2hi to 1hi|2lo
+
+ vmovdqu xgft1_lo, [tmp] ;Load array Ax{00}, Ax{01}, ..., Ax{0f}
+ ; " Ax{00}, Ax{10}, ..., Ax{f0}
+ vperm2i128 xgft1_hi, xgft1_lo, xgft1_lo, 0x01 ; swapped to hi | lo
+ %endif
+
+ vpshufb xgft1_hi, x0 ;Lookup mul table of high nibble
+ vpshufb xgft1_lo, xtmpa ;Lookup mul table of low nibble
+ vpxor xgft1_hi, xgft1_lo ;GF add high and low partials
+ vpxor xp1, xgft1_hi ;xp1 += partial
+
+ %ifidn PS,4 ; 32-bit code
+ vmovdqu xgft2_lo, [tmp+vec*(32/PS)] ;Load array Bx{00}, Bx{01}, ..., Bx{0f}
+ ; " Bx{00}, Bx{10}, ..., Bx{f0}
+ vperm2i128 xgft2_hi, xgft2_lo, xgft2_lo, 0x01 ; swapped to hi | lo
+ %endif
+ vpshufb xgft2_hi, x0 ;Lookup mul table of high nibble
+ vpshufb xgft2_lo, xtmpa ;Lookup mul table of low nibble
+ vpxor xgft2_hi, xgft2_lo ;GF add high and low partials
+ vpxor xp2, xgft2_hi ;xp2 += partial
+
+ %ifidn PS,4 ; 32-bit code
+ sal vec, 1
+ vmovdqu xgft3_lo, [tmp+vec*(32/PS)] ;Load array Cx{00}, Cx{01}, ..., Cx{0f}
+ ; " Cx{00}, Cx{10}, ..., Cx{f0}
+ vperm2i128 xgft3_hi, xgft3_lo, xgft3_lo, 0x01 ; swapped to hi | lo
+ sar vec, 1
+ %endif
+ vpshufb xgft3_hi, x0 ;Lookup mul table of high nibble
+ vpshufb xgft3_lo, xtmpa ;Lookup mul table of low nibble
+ vpxor xgft3_hi, xgft3_lo ;GF add high and low partials
+ vpxor xp3, xgft3_hi ;xp3 += partial
+
+ %ifidn PS,4 ; 32-bit code
+ SLDR vskip3, vskip3_m
+ vmovdqu xgft4_lo, [tmp+vskip3] ;Load array Dx{00}, Dx{01}, ..., Dx{0f}
+ ; " DX{00}, Dx{10}, ..., Dx{f0}
+ vperm2i128 xgft4_hi, xgft4_lo, xgft4_lo, 0x01 ; swapped to hi | lo
+ add tmp, 32
+ %endif
+ vpshufb xgft4_hi, x0 ;Lookup mul table of high nibble
+ vpshufb xgft4_lo, xtmpa ;Lookup mul table of low nibble
+ vpxor xgft4_hi, xgft4_lo ;GF add high and low partials
+ vpxor xp4, xgft4_hi ;xp4 += partial
+
+ cmp vec_i, vec
+ jl .next_vect
+
+ SLDR dest1, dest1_m
+ SLDR dest2, dest2_m
+ XSTR [dest1+pos], xp1
+ XSTR [dest2+pos], xp2
+ SLDR dest3, dest3_m
+ XSTR [dest3+pos], xp3
+ SLDR dest4, dest4_m
+ XSTR [dest4+pos], xp4
+
+ SLDR len, len_m
+ add pos, 32 ;Loop on 32 bytes at a time
+ cmp pos, len
+ jle .loop32
+
+ lea tmp, [len + 32]
+ cmp pos, tmp
+ je .return_pass
+
+ ;; Tail len
+ mov pos, len ;Overlapped offset length-32
+ jmp .loop32 ;Do one more overlap pass
+
+.return_pass:
+ mov return, 0
+ FUNC_RESTORE
+ ret
+
+.return_fail:
+ mov return, 1
+ FUNC_RESTORE
+ ret
+
+endproc_frame
+
+section .data
+
+;;; func core, ver, snum
+slversion gf_4vect_dot_prod_avx2, 04, 05, 0198
diff --git a/src/isa-l/erasure_code/gf_4vect_dot_prod_avx512.asm b/src/isa-l/erasure_code/gf_4vect_dot_prod_avx512.asm
new file mode 100644
index 000000000..9d329736a
--- /dev/null
+++ b/src/isa-l/erasure_code/gf_4vect_dot_prod_avx512.asm
@@ -0,0 +1,301 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright(c) 2011-2015 Intel Corporation All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in
+; the documentation and/or other materials provided with the
+; distribution.
+; * Neither the name of Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived
+; from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;;;
+;;; gf_4vect_dot_prod_avx512(len, vec, *g_tbls, **buffs, **dests);
+;;;
+
+%include "reg_sizes.asm"
+
+%ifdef HAVE_AS_KNOWS_AVX512
+
+%ifidn __OUTPUT_FORMAT__, elf64
+ %define arg0 rdi
+ %define arg1 rsi
+ %define arg2 rdx
+ %define arg3 rcx
+ %define arg4 r8
+ %define arg5 r9
+
+ %define tmp r11
+ %define tmp.w r11d
+ %define tmp.b r11b
+ %define tmp2 r10
+ %define tmp3 r13 ; must be saved and restored
+ %define tmp4 r12 ; must be saved and restored
+ %define tmp5 r14 ; must be saved and restored
+ %define tmp6 r15 ; must be saved and restored
+ %define return rax
+ %define PS 8
+ %define LOG_PS 3
+
+ %define func(x) x: endbranch
+ %macro FUNC_SAVE 0
+ push r12
+ push r13
+ push r14
+ push r15
+ %endmacro
+ %macro FUNC_RESTORE 0
+ pop r15
+ pop r14
+ pop r13
+ pop r12
+ %endmacro
+%endif
+
+%ifidn __OUTPUT_FORMAT__, win64
+ %define arg0 rcx
+ %define arg1 rdx
+ %define arg2 r8
+ %define arg3 r9
+
+ %define arg4 r12 ; must be saved, loaded and restored
+ %define arg5 r15 ; must be saved and restored
+ %define tmp r11
+ %define tmp.w r11d
+ %define tmp.b r11b
+ %define tmp2 r10
+ %define tmp3 r13 ; must be saved and restored
+ %define tmp4 r14 ; must be saved and restored
+ %define tmp5 rdi ; must be saved and restored
+ %define tmp6 rsi ; must be saved and restored
+ %define return rax
+ %define PS 8
+ %define LOG_PS 3
+ %define stack_size 9*16 + 7*8 ; must be an odd multiple of 8
+ %define arg(x) [rsp + stack_size + PS + PS*x]
+
+ %define func(x) proc_frame x
+ %macro FUNC_SAVE 0
+ alloc_stack stack_size
+ vmovdqa [rsp + 0*16], xmm6
+ vmovdqa [rsp + 1*16], xmm7
+ vmovdqa [rsp + 2*16], xmm8
+ vmovdqa [rsp + 3*16], xmm9
+ vmovdqa [rsp + 4*16], xmm10
+ vmovdqa [rsp + 5*16], xmm11
+ vmovdqa [rsp + 6*16], xmm12
+ vmovdqa [rsp + 7*16], xmm13
+ vmovdqa [rsp + 8*16], xmm14
+ save_reg r12, 9*16 + 0*8
+ save_reg r13, 9*16 + 1*8
+ save_reg r14, 9*16 + 2*8
+ save_reg r15, 9*16 + 3*8
+ save_reg rdi, 9*16 + 4*8
+ save_reg rsi, 9*16 + 5*8
+ end_prolog
+ mov arg4, arg(4)
+ %endmacro
+
+ %macro FUNC_RESTORE 0
+ vmovdqa xmm6, [rsp + 0*16]
+ vmovdqa xmm7, [rsp + 1*16]
+ vmovdqa xmm8, [rsp + 2*16]
+ vmovdqa xmm9, [rsp + 3*16]
+ vmovdqa xmm10, [rsp + 4*16]
+ vmovdqa xmm11, [rsp + 5*16]
+ vmovdqa xmm12, [rsp + 6*16]
+ vmovdqa xmm13, [rsp + 7*16]
+ vmovdqa xmm14, [rsp + 8*16]
+ mov r12, [rsp + 9*16 + 0*8]
+ mov r13, [rsp + 9*16 + 1*8]
+ mov r14, [rsp + 9*16 + 2*8]
+ mov r15, [rsp + 9*16 + 3*8]
+ mov rdi, [rsp + 9*16 + 4*8]
+ mov rsi, [rsp + 9*16 + 5*8]
+ add rsp, stack_size
+ %endmacro
+%endif
+
+
+%define len arg0
+%define vec arg1
+%define mul_array arg2
+%define src arg3
+%define dest1 arg4
+%define ptr arg5
+%define vec_i tmp2
+%define dest2 tmp3
+%define dest3 tmp4
+%define dest4 tmp5
+%define vskip3 tmp6
+%define pos return
+
+
+%ifndef EC_ALIGNED_ADDR
+;;; Use Un-aligned load/store
+ %define XLDR vmovdqu8
+ %define XSTR vmovdqu8
+%else
+;;; Use Non-temporal load/stor
+ %ifdef NO_NT_LDST
+ %define XLDR vmovdqa
+ %define XSTR vmovdqa
+ %else
+ %define XLDR vmovntdqa
+ %define XSTR vmovntdq
+ %endif
+%endif
+
+%define xmask0f zmm14
+%define xgft1_lo zmm13
+%define xgft1_loy ymm13
+%define xgft1_hi zmm12
+%define xgft2_lo zmm11
+%define xgft2_loy ymm11
+%define xgft2_hi zmm10
+%define xgft3_lo zmm9
+%define xgft3_loy ymm9
+%define xgft3_hi zmm8
+%define xgft4_lo zmm7
+%define xgft4_loy ymm7
+%define xgft4_hi zmm6
+
+%define x0 zmm0
+%define xtmpa zmm1
+%define xp1 zmm2
+%define xp2 zmm3
+%define xp3 zmm4
+%define xp4 zmm5
+
+default rel
+[bits 64]
+
+section .text
+
+align 16
+mk_global gf_4vect_dot_prod_avx512, function
+func(gf_4vect_dot_prod_avx512)
+ FUNC_SAVE
+ sub len, 64
+ jl .return_fail
+
+ xor pos, pos
+ mov tmp, 0x0f
+ vpbroadcastb xmask0f, tmp ;Construct mask 0x0f0f0f...
+ mov vskip3, vec
+ imul vskip3, 96
+ sal vec, LOG_PS ;vec *= PS. Make vec_i count by PS
+ mov dest2, [dest1+PS]
+ mov dest3, [dest1+2*PS]
+ mov dest4, [dest1+3*PS]
+ mov dest1, [dest1]
+
+.loop64:
+ vpxorq xp1, xp1, xp1
+ vpxorq xp2, xp2, xp2
+ vpxorq xp3, xp3, xp3
+ vpxorq xp4, xp4, xp4
+ mov tmp, mul_array
+ xor vec_i, vec_i
+
+.next_vect:
+ mov ptr, [src+vec_i]
+ XLDR x0, [ptr+pos] ;Get next source vector
+ add vec_i, PS
+
+ vpandq xtmpa, x0, xmask0f ;Mask low src nibble in bits 4-0
+ vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0
+ vpandq x0, x0, xmask0f ;Mask high src nibble in bits 4-0
+
+ vmovdqu8 xgft1_loy, [tmp] ;Load array Ax{00}..{0f}, Ax{00}..{f0}
+ vmovdqu8 xgft2_loy, [tmp+vec*(32/PS)] ;Load array Bx{00}..{0f}, Bx{00}..{f0}
+ vmovdqu8 xgft3_loy, [tmp+vec*(64/PS)] ;Load array Cx{00}..{0f}, Cx{00}..{f0}
+ vmovdqu8 xgft4_loy, [tmp+vskip3] ;Load array Dx{00}..{0f}, Dx{00}..{f0}
+ add tmp, 32
+
+ vshufi64x2 xgft1_hi, xgft1_lo, xgft1_lo, 0x55
+ vshufi64x2 xgft1_lo, xgft1_lo, xgft1_lo, 0x00
+ vshufi64x2 xgft2_hi, xgft2_lo, xgft2_lo, 0x55
+ vshufi64x2 xgft2_lo, xgft2_lo, xgft2_lo, 0x00
+
+ vpshufb xgft1_hi, xgft1_hi, x0 ;Lookup mul table of high nibble
+ vpshufb xgft1_lo, xgft1_lo, xtmpa ;Lookup mul table of low nibble
+ vpxorq xgft1_hi, xgft1_hi, xgft1_lo ;GF add high and low partials
+ vpxorq xp1, xp1, xgft1_hi ;xp1 += partial
+
+ vpshufb xgft2_hi, xgft2_hi, x0 ;Lookup mul table of high nibble
+ vpshufb xgft2_lo, xgft2_lo, xtmpa ;Lookup mul table of low nibble
+ vpxorq xgft2_hi, xgft2_hi, xgft2_lo ;GF add high and low partials
+ vpxorq xp2, xp2, xgft2_hi ;xp2 += partial
+
+ vshufi64x2 xgft3_hi, xgft3_lo, xgft3_lo, 0x55
+ vshufi64x2 xgft3_lo, xgft3_lo, xgft3_lo, 0x00
+ vshufi64x2 xgft4_hi, xgft4_lo, xgft4_lo, 0x55
+ vshufi64x2 xgft4_lo, xgft4_lo, xgft4_lo, 0x00
+
+ vpshufb xgft3_hi, xgft3_hi, x0 ;Lookup mul table of high nibble
+ vpshufb xgft3_lo, xgft3_lo, xtmpa ;Lookup mul table of low nibble
+ vpxorq xgft3_hi, xgft3_hi, xgft3_lo ;GF add high and low partials
+ vpxorq xp3, xp3, xgft3_hi ;xp3 += partial
+
+ vpshufb xgft4_hi, xgft4_hi, x0 ;Lookup mul table of high nibble
+ vpshufb xgft4_lo, xgft4_lo, xtmpa ;Lookup mul table of low nibble
+ vpxorq xgft4_hi, xgft4_hi, xgft4_lo ;GF add high and low partials
+ vpxorq xp4, xp4, xgft4_hi ;xp4 += partial
+
+ cmp vec_i, vec
+ jl .next_vect
+
+ XSTR [dest1+pos], xp1
+ XSTR [dest2+pos], xp2
+ XSTR [dest3+pos], xp3
+ XSTR [dest4+pos], xp4
+
+ add pos, 64 ;Loop on 64 bytes at a time
+ cmp pos, len
+ jle .loop64
+
+ lea tmp, [len + 64]
+ cmp pos, tmp
+ je .return_pass
+
+ ;; Tail len
+ mov pos, len ;Overlapped offset length-64
+ jmp .loop64 ;Do one more overlap pass
+
+.return_pass:
+ mov return, 0
+ FUNC_RESTORE
+ ret
+
+.return_fail:
+ mov return, 1
+ FUNC_RESTORE
+ ret
+
+endproc_frame
+
+%else
+%ifidn __OUTPUT_FORMAT__, win64
+global no_gf_4vect_dot_prod_avx512
+no_gf_4vect_dot_prod_avx512:
+%endif
+%endif ; ifdef HAVE_AS_KNOWS_AVX512
diff --git a/src/isa-l/erasure_code/gf_4vect_dot_prod_sse.asm b/src/isa-l/erasure_code/gf_4vect_dot_prod_sse.asm
new file mode 100644
index 000000000..25b5cfffa
--- /dev/null
+++ b/src/isa-l/erasure_code/gf_4vect_dot_prod_sse.asm
@@ -0,0 +1,443 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright(c) 2011-2015 Intel Corporation All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in
+; the documentation and/or other materials provided with the
+; distribution.
+; * Neither the name of Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived
+; from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;;;
+;;; gf_4vect_dot_prod_sse(len, vec, *g_tbls, **buffs, **dests);
+;;;
+
+%include "reg_sizes.asm"
+
+%ifidn __OUTPUT_FORMAT__, elf64
+ %define arg0 rdi
+ %define arg1 rsi
+ %define arg2 rdx
+ %define arg3 rcx
+ %define arg4 r8
+ %define arg5 r9
+
+ %define tmp r11
+ %define tmp2 r10
+ %define tmp3 r13 ; must be saved and restored
+ %define tmp4 r12 ; must be saved and restored
+ %define tmp5 r14 ; must be saved and restored
+ %define tmp6 r15 ; must be saved and restored
+ %define return rax
+ %macro SLDR 2
+ %endmacro
+ %define SSTR SLDR
+ %define PS 8
+ %define LOG_PS 3
+
+ %define func(x) x: endbranch
+ %macro FUNC_SAVE 0
+ push r12
+ push r13
+ push r14
+ push r15
+ %endmacro
+ %macro FUNC_RESTORE 0
+ pop r15
+ pop r14
+ pop r13
+ pop r12
+ %endmacro
+%endif
+
+%ifidn __OUTPUT_FORMAT__, win64
+ %define arg0 rcx
+ %define arg1 rdx
+ %define arg2 r8
+ %define arg3 r9
+
+ %define arg4 r12 ; must be saved, loaded and restored
+ %define arg5 r15 ; must be saved and restored
+ %define tmp r11
+ %define tmp2 r10
+ %define tmp3 r13 ; must be saved and restored
+ %define tmp4 r14 ; must be saved and restored
+ %define tmp5 rdi ; must be saved and restored
+ %define tmp6 rsi ; must be saved and restored
+ %define return rax
+ %macro SLDR 2
+ %endmacro
+ %define SSTR SLDR
+ %define PS 8
+ %define LOG_PS 3
+ %define stack_size 9*16 + 7*8 ; must be an odd multiple of 8
+ %define arg(x) [rsp + stack_size + PS + PS*x]
+
+ %define func(x) proc_frame x
+ %macro FUNC_SAVE 0
+ alloc_stack stack_size
+ save_xmm128 xmm6, 0*16
+ save_xmm128 xmm7, 1*16
+ save_xmm128 xmm8, 2*16
+ save_xmm128 xmm9, 3*16
+ save_xmm128 xmm10, 4*16
+ save_xmm128 xmm11, 5*16
+ save_xmm128 xmm12, 6*16
+ save_xmm128 xmm13, 7*16
+ save_xmm128 xmm14, 8*16
+ save_reg r12, 9*16 + 0*8
+ save_reg r13, 9*16 + 1*8
+ save_reg r14, 9*16 + 2*8
+ save_reg r15, 9*16 + 3*8
+ save_reg rdi, 9*16 + 4*8
+ save_reg rsi, 9*16 + 5*8
+ end_prolog
+ mov arg4, arg(4)
+ %endmacro
+
+ %macro FUNC_RESTORE 0
+ movdqa xmm6, [rsp + 0*16]
+ movdqa xmm7, [rsp + 1*16]
+ movdqa xmm8, [rsp + 2*16]
+ movdqa xmm9, [rsp + 3*16]
+ movdqa xmm10, [rsp + 4*16]
+ movdqa xmm11, [rsp + 5*16]
+ movdqa xmm12, [rsp + 6*16]
+ movdqa xmm13, [rsp + 7*16]
+ movdqa xmm14, [rsp + 8*16]
+ mov r12, [rsp + 9*16 + 0*8]
+ mov r13, [rsp + 9*16 + 1*8]
+ mov r14, [rsp + 9*16 + 2*8]
+ mov r15, [rsp + 9*16 + 3*8]
+ mov rdi, [rsp + 9*16 + 4*8]
+ mov rsi, [rsp + 9*16 + 5*8]
+ add rsp, stack_size
+ %endmacro
+%endif
+
+%ifidn __OUTPUT_FORMAT__, elf32
+
+;;;================== High Address;
+;;; arg4
+;;; arg3
+;;; arg2
+;;; arg1
+;;; arg0
+;;; return
+;;;<================= esp of caller
+;;; ebp
+;;;<================= ebp = esp
+;;; var0
+;;; var1
+;;; var2
+;;; var3
+;;; esi
+;;; edi
+;;; ebx
+;;;<================= esp of callee
+;;;
+;;;================== Low Address;
+
+ %define PS 4
+ %define LOG_PS 2
+ %define func(x) x: endbranch
+ %define arg(x) [ebp + PS*2 + PS*x]
+ %define var(x) [ebp - PS - PS*x]
+
+ %define trans ecx
+ %define trans2 esi
+ %define arg0 trans ;trans and trans2 are for the variables in stack
+ %define arg0_m arg(0)
+ %define arg1 ebx
+ %define arg2 arg2_m
+ %define arg2_m arg(2)
+ %define arg3 trans
+ %define arg3_m arg(3)
+ %define arg4 trans
+ %define arg4_m arg(4)
+ %define arg5 trans2
+ %define tmp edx
+ %define tmp2 edi
+ %define tmp3 trans2
+ %define tmp3_m var(0)
+ %define tmp4 trans2
+ %define tmp4_m var(1)
+ %define tmp5 trans2
+ %define tmp5_m var(2)
+ %define tmp6 trans2
+ %define tmp6_m var(3)
+ %define return eax
+ %macro SLDR 2 ;stack load/restore
+ mov %1, %2
+ %endmacro
+ %define SSTR SLDR
+
+ %macro FUNC_SAVE 0
+ push ebp
+ mov ebp, esp
+ sub esp, PS*4 ;4 local variables
+ push esi
+ push edi
+ push ebx
+ mov arg1, arg(1)
+ %endmacro
+
+ %macro FUNC_RESTORE 0
+ pop ebx
+ pop edi
+ pop esi
+ add esp, PS*4 ;4 local variables
+ pop ebp
+ %endmacro
+
+%endif ; output formats
+
+%define len arg0
+%define vec arg1
+%define mul_array arg2
+%define src arg3
+%define dest1 arg4
+%define ptr arg5
+%define vec_i tmp2
+%define dest2 tmp3
+%define dest3 tmp4
+%define dest4 tmp5
+%define vskip3 tmp6
+%define pos return
+
+ %ifidn PS,4 ;32-bit code
+ %define len_m arg0_m
+ %define src_m arg3_m
+ %define dest1_m arg4_m
+ %define dest2_m tmp3_m
+ %define dest3_m tmp4_m
+ %define dest4_m tmp5_m
+ %define vskip3_m tmp6_m
+ %endif
+
+%ifndef EC_ALIGNED_ADDR
+;;; Use Un-aligned load/store
+ %define XLDR movdqu
+ %define XSTR movdqu
+%else
+;;; Use Non-temporal load/stor
+ %ifdef NO_NT_LDST
+ %define XLDR movdqa
+ %define XSTR movdqa
+ %else
+ %define XLDR movntdqa
+ %define XSTR movntdq
+ %endif
+%endif
+
+%ifidn PS,8 ; 64-bit code
+ default rel
+ [bits 64]
+%endif
+
+
+section .text
+
+%ifidn PS,8 ;64-bit code
+ %define xmask0f xmm14
+ %define xgft1_lo xmm2
+ %define xgft1_hi xmm3
+ %define xgft2_lo xmm11
+ %define xgft2_hi xmm4
+ %define xgft3_lo xmm9
+ %define xgft3_hi xmm5
+ %define xgft4_lo xmm7
+ %define xgft4_hi xmm6
+
+ %define x0 xmm0
+ %define xtmpa xmm1
+ %define xp1 xmm8
+ %define xp2 xmm10
+ %define xp3 xmm12
+ %define xp4 xmm13
+%else
+ %define xmm_trans xmm7 ;reuse xmask0f and xgft1_lo
+ %define xmask0f xmm_trans
+ %define xgft1_lo xmm_trans
+ %define xgft1_hi xmm6
+ %define xgft2_lo xgft1_lo
+ %define xgft2_hi xgft1_hi
+ %define xgft3_lo xgft1_lo
+ %define xgft3_hi xgft1_hi
+ %define xgft4_lo xgft1_lo
+ %define xgft4_hi xgft1_hi
+
+ %define x0 xmm0
+ %define xtmpa xmm1
+ %define xp1 xmm2
+ %define xp2 xmm3
+ %define xp3 xmm4
+ %define xp4 xmm5
+%endif
+align 16
+mk_global gf_4vect_dot_prod_sse, function
+func(gf_4vect_dot_prod_sse)
+ FUNC_SAVE
+ SLDR len, len_m
+ sub len, 16
+ SSTR len_m, len
+ jl .return_fail
+ xor pos, pos
+ movdqa xmask0f, [mask0f] ;Load mask of lower nibble in each byte
+ mov vskip3, vec
+ imul vskip3, 96
+ SSTR vskip3_m, vskip3
+ sal vec, LOG_PS ;vec *= PS. Make vec_i count by PS
+ SLDR dest1, dest1_m
+ mov dest2, [dest1+PS]
+ SSTR dest2_m, dest2
+ mov dest3, [dest1+2*PS]
+ SSTR dest3_m, dest3
+ mov dest4, [dest1+3*PS]
+ SSTR dest4_m, dest4
+ mov dest1, [dest1]
+ SSTR dest1_m, dest1
+
+.loop16:
+ pxor xp1, xp1
+ pxor xp2, xp2
+ pxor xp3, xp3
+ pxor xp4, xp4
+ mov tmp, mul_array
+ xor vec_i, vec_i
+
+.next_vect:
+ SLDR src, src_m
+ mov ptr, [src+vec_i]
+
+ %ifidn PS,8 ;64-bit code
+ movdqu xgft1_lo, [tmp] ;Load array Ax{00}, Ax{01}, ..., Ax{0f}
+ movdqu xgft1_hi, [tmp+16] ; " Ax{00}, Ax{10}, ..., Ax{f0}
+ movdqu xgft2_lo, [tmp+vec*(32/PS)] ;Load array Bx{00}, Bx{01}, ..., Bx{0f}
+ movdqu xgft2_hi, [tmp+vec*(32/PS)+16] ; " Bx{00}, Bx{10}, ..., Bx{f0}
+ movdqu xgft3_lo, [tmp+vec*(64/PS)] ;Load array Cx{00}, Cx{01}, ..., Cx{0f}
+ movdqu xgft3_hi, [tmp+vec*(64/PS)+16] ; " Cx{00}, Cx{10}, ..., Cx{f0}
+ movdqu xgft4_lo, [tmp+vskip3] ;Load array Dx{00}, Dx{01}, ..., Dx{0f}
+ movdqu xgft4_hi, [tmp+vskip3+16] ; " Dx{00}, Dx{10}, ..., Dx{f0}
+
+ XLDR x0, [ptr+pos] ;Get next source vector
+ add tmp, 32
+ add vec_i, PS
+
+ movdqa xtmpa, x0 ;Keep unshifted copy of src
+ psraw x0, 4 ;Shift to put high nibble into bits 4-0
+ pand x0, xmask0f ;Mask high src nibble in bits 4-0
+ pand xtmpa, xmask0f ;Mask low src nibble in bits 4-0
+ %else ;32-bit code
+ XLDR x0, [ptr+pos] ;Get next source vector
+ movdqa xmask0f, [mask0f] ;Load mask of lower nibble in each byte
+
+ movdqa xtmpa, x0 ;Keep unshifted copy of src
+ psraw x0, 4 ;Shift to put high nibble into bits 4-0
+ pand x0, xmask0f ;Mask high src nibble in bits 4-0
+ pand xtmpa, xmask0f ;Mask low src nibble in bits 4-0
+
+ movdqu xgft1_lo, [tmp] ;Load array Ax{00}, Ax{01}, ..., Ax{0f}
+ movdqu xgft1_hi, [tmp+16] ; " Ax{00}, Ax{10}, ..., Ax{f0}
+ %endif
+
+ pshufb xgft1_hi, x0 ;Lookup mul table of high nibble
+ pshufb xgft1_lo, xtmpa ;Lookup mul table of low nibble
+ pxor xgft1_hi, xgft1_lo ;GF add high and low partials
+ pxor xp1, xgft1_hi ;xp1 += partial
+
+ %ifidn PS,4 ;32-bit code
+ movdqu xgft2_lo, [tmp+vec*(32/PS)] ;Load array Bx{00}, Bx{01}, ..., Bx{0f}
+ movdqu xgft2_hi, [tmp+vec*(32/PS)+16] ; " Bx{00}, Bx{10}, ..., Bx{f0}
+ %endif
+ pshufb xgft2_hi, x0 ;Lookup mul table of high nibble
+ pshufb xgft2_lo, xtmpa ;Lookup mul table of low nibble
+ pxor xgft2_hi, xgft2_lo ;GF add high and low partials
+ pxor xp2, xgft2_hi ;xp2 += partial
+
+ %ifidn PS,4 ;32-bit code
+ sal vec, 1
+ movdqu xgft3_lo, [tmp+vec*(32/PS)] ;Load array Cx{00}, Cx{01}, ..., Cx{0f}
+ movdqu xgft3_hi, [tmp+vec*(32/PS)+16] ; " Cx{00}, Cx{10}, ..., Cx{f0}
+ sar vec, 1
+ %endif
+ pshufb xgft3_hi, x0 ;Lookup mul table of high nibble
+ pshufb xgft3_lo, xtmpa ;Lookup mul table of low nibble
+ pxor xgft3_hi, xgft3_lo ;GF add high and low partials
+ pxor xp3, xgft3_hi ;xp3 += partial
+
+ %ifidn PS,4 ;32-bit code
+ SLDR vskip3, vskip3_m
+ movdqu xgft4_lo, [tmp+vskip3] ;Load array Dx{00}, Dx{01}, ..., Dx{0f}
+ movdqu xgft4_hi, [tmp+vskip3+16] ; " Dx{00}, Dx{10}, ..., Dx{f0}
+ add tmp, 32
+ add vec_i, PS
+ %endif
+ pshufb xgft4_hi, x0 ;Lookup mul table of high nibble
+ pshufb xgft4_lo, xtmpa ;Lookup mul table of low nibble
+ pxor xgft4_hi, xgft4_lo ;GF add high and low partials
+ pxor xp4, xgft4_hi ;xp4 += partial
+
+ cmp vec_i, vec
+ jl .next_vect
+
+ SLDR dest1, dest1_m
+ SLDR dest2, dest2_m
+ XSTR [dest1+pos], xp1
+ XSTR [dest2+pos], xp2
+ SLDR dest3, dest3_m
+ XSTR [dest3+pos], xp3
+ SLDR dest4, dest4_m
+ XSTR [dest4+pos], xp4
+
+ SLDR len, len_m
+ add pos, 16 ;Loop on 16 bytes at a time
+ cmp pos, len
+ jle .loop16
+
+ lea tmp, [len + 16]
+ cmp pos, tmp
+ je .return_pass
+
+ ;; Tail len
+ mov pos, len ;Overlapped offset length-16
+ jmp .loop16 ;Do one more overlap pass
+
+.return_pass:
+ mov return, 0
+ FUNC_RESTORE
+ ret
+
+.return_fail:
+ mov return, 1
+ FUNC_RESTORE
+ ret
+
+endproc_frame
+
+section .data
+
+align 16
+mask0f: dq 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f
+
+;;; func core, ver, snum
+slversion gf_4vect_dot_prod_sse, 00, 06, 0064
diff --git a/src/isa-l/erasure_code/gf_4vect_mad_avx.asm b/src/isa-l/erasure_code/gf_4vect_mad_avx.asm
new file mode 100644
index 000000000..3a00623c0
--- /dev/null
+++ b/src/isa-l/erasure_code/gf_4vect_mad_avx.asm
@@ -0,0 +1,336 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright(c) 2011-2015 Intel Corporation All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in
+; the documentation and/or other materials provided with the
+; distribution.
+; * Neither the name of Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived
+; from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;;;
+;;; gf_4vect_mad_avx(len, vec, vec_i, mul_array, src, dest);
+;;;
+
+%include "reg_sizes.asm"
+
+%define PS 8
+
+%ifidn __OUTPUT_FORMAT__, win64
+ %define arg0 rcx
+ %define arg0.w ecx
+ %define arg1 rdx
+ %define arg2 r8
+ %define arg3 r9
+ %define arg4 r12
+ %define arg5 r15
+ %define tmp r11
+ %define tmp2 r10
+ %define tmp3 r13
+ %define return rax
+ %define return.w eax
+ %define stack_size 16*10 + 3*8
+ %define arg(x) [rsp + stack_size + PS + PS*x]
+ %define func(x) proc_frame x
+
+%macro FUNC_SAVE 0
+ sub rsp, stack_size
+ movdqa [rsp+16*0],xmm6
+ movdqa [rsp+16*1],xmm7
+ movdqa [rsp+16*2],xmm8
+ movdqa [rsp+16*3],xmm9
+ movdqa [rsp+16*4],xmm10
+ movdqa [rsp+16*5],xmm11
+ movdqa [rsp+16*6],xmm12
+ movdqa [rsp+16*7],xmm13
+ movdqa [rsp+16*8],xmm14
+ movdqa [rsp+16*9],xmm15
+ save_reg r12, 10*16 + 0*8
+ save_reg r13, 10*16 + 1*8
+ save_reg r15, 10*16 + 2*8
+ end_prolog
+ mov arg4, arg(4)
+ mov arg5, arg(5)
+%endmacro
+
+%macro FUNC_RESTORE 0
+ movdqa xmm6, [rsp+16*0]
+ movdqa xmm7, [rsp+16*1]
+ movdqa xmm8, [rsp+16*2]
+ movdqa xmm9, [rsp+16*3]
+ movdqa xmm10, [rsp+16*4]
+ movdqa xmm11, [rsp+16*5]
+ movdqa xmm12, [rsp+16*6]
+ movdqa xmm13, [rsp+16*7]
+ movdqa xmm14, [rsp+16*8]
+ movdqa xmm15, [rsp+16*9]
+ mov r12, [rsp + 10*16 + 0*8]
+ mov r13, [rsp + 10*16 + 1*8]
+ mov r15, [rsp + 10*16 + 2*8]
+ add rsp, stack_size
+%endmacro
+
+%elifidn __OUTPUT_FORMAT__, elf64
+ %define arg0 rdi
+ %define arg0.w edi
+ %define arg1 rsi
+ %define arg2 rdx
+ %define arg3 rcx
+ %define arg4 r8
+ %define arg5 r9
+ %define tmp r11
+ %define tmp2 r10
+ %define tmp3 r12
+ %define return rax
+ %define return.w eax
+
+ %define func(x) x: endbranch
+ %macro FUNC_SAVE 0
+ push r12
+ %endmacro
+ %macro FUNC_RESTORE 0
+ pop r12
+ %endmacro
+%endif
+
+;;; gf_4vect_mad_avx(len, vec, vec_i, mul_array, src, dest)
+%define len arg0
+%define len.w arg0.w
+%define vec arg1
+%define vec_i arg2
+%define mul_array arg3
+%define src arg4
+%define dest1 arg5
+%define pos return
+%define pos.w return.w
+
+%define dest2 mul_array
+%define dest3 tmp2
+%define dest4 vec_i
+
+%ifndef EC_ALIGNED_ADDR
+;;; Use Un-aligned load/store
+ %define XLDR vmovdqu
+ %define XSTR vmovdqu
+%else
+;;; Use Non-temporal load/stor
+ %ifdef NO_NT_LDST
+ %define XLDR vmovdqa
+ %define XSTR vmovdqa
+ %else
+ %define XLDR vmovntdqa
+ %define XSTR vmovntdq
+ %endif
+%endif
+
+
+default rel
+
+[bits 64]
+section .text
+
+%define xmask0f xmm15
+%define xgft3_hi xmm14
+%define xgft4_hi xmm13
+%define xgft4_lo xmm12
+
+%define x0 xmm0
+%define xtmpa xmm1
+%define xtmph1 xmm2
+%define xtmpl1 xmm3
+%define xtmph2 xmm4
+%define xtmpl2 xmm5
+%define xtmph3 xmm6
+%define xtmpl3 xmm7
+%define xtmph4 xmm8
+%define xtmpl4 xmm9
+%define xd1 xmm10
+%define xd2 xmm11
+%define xd3 xtmph1
+%define xd4 xtmpl1
+
+align 16
+mk_global gf_4vect_mad_avx, function
+func(gf_4vect_mad_avx)
+ FUNC_SAVE
+ sub len, 16
+ jl .return_fail
+ xor pos, pos
+ vmovdqa xmask0f, [mask0f] ;Load mask of lower nibble in each byte
+
+ mov tmp, vec
+
+ sal vec_i, 5 ;Multiply by 32
+ lea tmp3, [mul_array + vec_i]
+
+ sal tmp, 6 ;Multiply by 64
+ vmovdqu xgft3_hi, [tmp3+tmp+16] ; " Cx{00}, Cx{10}, Cx{20}, ... , Cx{f0}
+ sal vec, 5 ;Multiply by 32
+ add tmp, vec
+ vmovdqu xgft4_lo, [tmp3+tmp] ;Load array Dx{00}, Dx{01}, Dx{02}, ...
+ vmovdqu xgft4_hi, [tmp3+tmp+16] ; " Dx{00}, Dx{10}, Dx{20}, ... , Dx{f0}
+
+ mov dest2, [dest1+PS] ; reuse mul_array
+ mov dest3, [dest1+2*PS]
+ mov dest4, [dest1+3*PS] ; reuse vec_i
+ mov dest1, [dest1]
+
+.loop16:
+ XLDR x0, [src+pos] ;Get next source vector
+ vmovdqu xtmph1, [tmp3+16] ; " Ax{00}, Ax{10}, Ax{20}, ... , Ax{f0}
+ vmovdqu xtmpl1, [tmp3] ;Load array Ax{00}, Ax{01}, Ax{02}, ...
+ vmovdqu xtmph2, [tmp3+vec+16] ; " Bx{00}, Bx{10}, Bx{20}, ... , Bx{f0}
+ vmovdqu xtmpl2, [tmp3+vec] ;Load array Bx{00}, Bx{01}, Bx{02}, ...
+ vmovdqu xtmpl3, [tmp3+2*vec] ;Load array Cx{00}, Cx{01}, Cx{02}, ...
+
+ XLDR xd1, [dest1+pos] ;Get next dest vector
+ XLDR xd2, [dest2+pos] ;Get next dest vector
+
+ vpand xtmpa, x0, xmask0f ;Mask low src nibble in bits 4-0
+ vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0
+ vpand x0, x0, xmask0f ;Mask high src nibble in bits 4-0
+
+ ; dest1
+ vpshufb xtmph1, xtmph1, x0 ;Lookup mul table of high nibble
+ vpshufb xtmpl1, xtmpl1, xtmpa ;Lookup mul table of low nibble
+ vpxor xtmph1, xtmph1, xtmpl1 ;GF add high and low partials
+ vpxor xd1, xd1, xtmph1
+
+ XLDR xd3, [dest3+pos] ;Reuse xtmph1, Get next dest vector
+ XLDR xd4, [dest4+pos] ;Reuse xtmpl1, Get next dest vector
+
+ ; dest2
+ vpshufb xtmph2, xtmph2, x0 ;Lookup mul table of high nibble
+ vpshufb xtmpl2, xtmpl2, xtmpa ;Lookup mul table of low nibble
+ vpxor xtmph2, xtmph2, xtmpl2 ;GF add high and low partials
+ vpxor xd2, xd2, xtmph2
+
+ ; dest3
+ vpshufb xtmph3, xgft3_hi, x0 ;Lookup mul table of high nibble
+ vpshufb xtmpl3, xtmpl3, xtmpa ;Lookup mul table of low nibble
+ vpxor xtmph3, xtmph3, xtmpl3 ;GF add high and low partials
+ vpxor xd3, xd3, xtmph3
+
+ ; dest4
+ vpshufb xtmph4, xgft4_hi, x0 ;Lookup mul table of high nibble
+ vpshufb xtmpl4, xgft4_lo, xtmpa ;Lookup mul table of low nibble
+ vpxor xtmph4, xtmph4, xtmpl4 ;GF add high and low partials
+ vpxor xd4, xd4, xtmph4
+
+ XSTR [dest1+pos], xd1 ;Store result
+ XSTR [dest2+pos], xd2 ;Store result
+ XSTR [dest3+pos], xd3 ;Store result
+ XSTR [dest4+pos], xd4 ;Store result
+
+ add pos, 16 ;Loop on 16 bytes at a time
+ cmp pos, len
+ jle .loop16
+
+ lea tmp, [len + 16]
+ cmp pos, tmp
+ je .return_pass
+
+.lessthan16:
+ ;; Tail len
+ ;; Do one more overlap pass
+
+ mov tmp, len ;Overlapped offset length-16
+
+ XLDR x0, [src+tmp] ;Get next source vector
+
+ vmovdqu xtmph1, [tmp3+16] ; " Ax{00}, Ax{10}, Ax{20}, ... , Ax{f0}
+ vmovdqu xtmpl1, [tmp3] ;Load array Ax{00}, Ax{01}, Ax{02}, ...
+ vmovdqu xtmph2, [tmp3+vec+16] ; " Bx{00}, Bx{10}, Bx{20}, ... , Bx{f0}
+ vmovdqu xtmpl2, [tmp3+vec] ;Load array Bx{00}, Bx{01}, Bx{02}, ...
+ vmovdqu xtmpl3, [tmp3+2*vec] ;Load array Cx{00}, Cx{01}, Cx{02}, ...
+
+ XLDR xd1, [dest1+tmp] ;Get next dest vector
+ XLDR xd2, [dest2+tmp] ;Get next dest vector
+ XLDR xtmph4, [dest3+tmp] ;Get next dest vector
+
+ sub len, pos
+
+ vmovdqa xtmpl4, [constip16] ;Load const of i + 16
+ vpinsrb xtmph3, xtmph3, len.w, 15
+ vpshufb xtmph3, xtmph3, xmask0f ;Broadcast len to all bytes
+ vpcmpgtb xtmph3, xtmph3, xtmpl4
+
+ XLDR xtmpl4, [dest4+tmp] ;Get next dest vector
+
+ vpand xtmpa, x0, xmask0f ;Mask low src nibble in bits 4-0
+ vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0
+ vpand x0, x0, xmask0f ;Mask high src nibble in bits 4-0
+
+ ; dest1
+ vpshufb xtmph1, xtmph1, x0 ;Lookup mul table of high nibble
+ vpshufb xtmpl1, xtmpl1, xtmpa ;Lookup mul table of low nibble
+ vpxor xtmph1, xtmph1, xtmpl1 ;GF add high and low partials
+ vpand xtmph1, xtmph1, xtmph3
+ vpxor xd1, xd1, xtmph1
+
+ ; dest2
+ vpshufb xtmph2, xtmph2, x0 ;Lookup mul table of high nibble
+ vpshufb xtmpl2, xtmpl2, xtmpa ;Lookup mul table of low nibble
+ vpxor xtmph2, xtmph2, xtmpl2 ;GF add high and low partials
+ vpand xtmph2, xtmph2, xtmph3
+ vpxor xd2, xd2, xtmph2
+
+ ; dest3
+ vpshufb xgft3_hi, xgft3_hi, x0 ;Lookup mul table of high nibble
+ vpshufb xtmpl3, xtmpl3, xtmpa ;Lookup mul table of low nibble
+ vpxor xgft3_hi, xgft3_hi, xtmpl3 ;GF add high and low partials
+ vpand xgft3_hi, xgft3_hi, xtmph3
+ vpxor xtmph4, xtmph4, xgft3_hi
+
+ ; dest4
+ vpshufb xgft4_hi, xgft4_hi, x0 ;Lookup mul table of high nibble
+ vpshufb xgft4_lo, xgft4_lo, xtmpa ;Lookup mul table of low nibble
+ vpxor xgft4_hi, xgft4_hi, xgft4_lo ;GF add high and low partials
+ vpand xgft4_hi, xgft4_hi, xtmph3
+ vpxor xtmpl4, xtmpl4, xgft4_hi
+
+ XSTR [dest1+tmp], xd1 ;Store result
+ XSTR [dest2+tmp], xd2 ;Store result
+ XSTR [dest3+tmp], xtmph4 ;Store result
+ XSTR [dest4+tmp], xtmpl4 ;Store result
+
+.return_pass:
+ mov return, 0
+ FUNC_RESTORE
+ ret
+
+.return_fail:
+ mov return, 1
+ FUNC_RESTORE
+ ret
+
+endproc_frame
+
+section .data
+
+align 16
+mask0f: dq 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f
+constip16:
+ dq 0xf8f9fafbfcfdfeff, 0xf0f1f2f3f4f5f6f7
+
+;;; func core, ver, snum
+slversion gf_4vect_mad_avx, 02, 01, 020a
diff --git a/src/isa-l/erasure_code/gf_4vect_mad_avx2.asm b/src/isa-l/erasure_code/gf_4vect_mad_avx2.asm
new file mode 100644
index 000000000..e1cf910ae
--- /dev/null
+++ b/src/isa-l/erasure_code/gf_4vect_mad_avx2.asm
@@ -0,0 +1,342 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright(c) 2011-2015 Intel Corporation All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in
+; the documentation and/or other materials provided with the
+; distribution.
+; * Neither the name of Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived
+; from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;;;
+;;; gf_4vect_mad_avx2(len, vec, vec_i, mul_array, src, dest);
+;;;
+
+%include "reg_sizes.asm"
+
+%define PS 8
+
+%ifidn __OUTPUT_FORMAT__, win64
+ %define arg0 rcx
+ %define arg0.w ecx
+ %define arg1 rdx
+ %define arg2 r8
+ %define arg3 r9
+ %define arg4 r12
+ %define arg5 r15
+ %define tmp r11
+ %define tmp.w r11d
+ %define tmp.b r11b
+ %define return rax
+ %define return.w eax
+ %define stack_size 16*10 + 3*8
+ %define arg(x) [rsp + stack_size + PS + PS*x]
+ %define func(x) proc_frame x
+
+%macro FUNC_SAVE 0
+ sub rsp, stack_size
+ movdqa [rsp+16*0],xmm6
+ movdqa [rsp+16*1],xmm7
+ movdqa [rsp+16*2],xmm8
+ movdqa [rsp+16*3],xmm9
+ movdqa [rsp+16*4],xmm10
+ movdqa [rsp+16*5],xmm11
+ movdqa [rsp+16*6],xmm12
+ movdqa [rsp+16*7],xmm13
+ movdqa [rsp+16*8],xmm14
+ movdqa [rsp+16*9],xmm15
+ save_reg r12, 10*16 + 0*8
+ save_reg r15, 10*16 + 1*8
+ end_prolog
+ mov arg4, arg(4)
+ mov arg5, arg(5)
+%endmacro
+
+%macro FUNC_RESTORE 0
+ movdqa xmm6, [rsp+16*0]
+ movdqa xmm7, [rsp+16*1]
+ movdqa xmm8, [rsp+16*2]
+ movdqa xmm9, [rsp+16*3]
+ movdqa xmm10, [rsp+16*4]
+ movdqa xmm11, [rsp+16*5]
+ movdqa xmm12, [rsp+16*6]
+ movdqa xmm13, [rsp+16*7]
+ movdqa xmm14, [rsp+16*8]
+ movdqa xmm15, [rsp+16*9]
+ mov r12, [rsp + 10*16 + 0*8]
+ mov r15, [rsp + 10*16 + 1*8]
+ add rsp, stack_size
+%endmacro
+
+%elifidn __OUTPUT_FORMAT__, elf64
+ %define arg0 rdi
+ %define arg0.w edi
+ %define arg1 rsi
+ %define arg2 rdx
+ %define arg3 rcx
+ %define arg4 r8
+ %define arg5 r9
+ %define tmp r11
+ %define tmp.w r11d
+ %define tmp.b r11b
+ %define return rax
+ %define return.w eax
+
+ %define func(x) x: endbranch
+ %define FUNC_SAVE
+ %define FUNC_RESTORE
+%endif
+
+
+;;; gf_4vect_mad_avx2(len, vec, vec_i, mul_array, src, dest)
+%define len arg0
+%define len.w arg0.w
+%define vec arg1
+%define vec_i arg2
+%define mul_array arg3
+%define src arg4
+%define dest1 arg5
+%define pos return
+%define pos.w return.w
+
+%define dest2 mul_array
+%define dest3 vec
+%define dest4 vec_i
+
+%ifndef EC_ALIGNED_ADDR
+;;; Use Un-aligned load/store
+ %define XLDR vmovdqu
+ %define XSTR vmovdqu
+%else
+;;; Use Non-temporal load/stor
+ %ifdef NO_NT_LDST
+ %define XLDR vmovdqa
+ %define XSTR vmovdqa
+ %else
+ %define XLDR vmovntdqa
+ %define XSTR vmovntdq
+ %endif
+%endif
+
+
+default rel
+
+[bits 64]
+section .text
+
+%define xmask0f ymm15
+%define xmask0fx xmm15
+%define xgft1_lo ymm14
+%define xgft2_lo ymm13
+%define xgft3_lo ymm12
+%define xgft4_lo ymm11
+
+%define x0 ymm0
+%define xtmpa ymm1
+%define xtmpl ymm2
+%define xtmplx xmm2
+%define xtmph1 ymm3
+%define xtmph1x xmm3
+%define xtmph2 ymm4
+%define xtmph3 ymm5
+%define xtmph4 ymm6
+%define xd1 ymm7
+%define xd2 ymm8
+%define xd3 ymm9
+%define xd4 ymm10
+
+align 16
+mk_global gf_4vect_mad_avx2, function
+func(gf_4vect_mad_avx2)
+ FUNC_SAVE
+ sub len, 32
+ jl .return_fail
+ xor pos, pos
+ mov tmp.b, 0x0f
+ vpinsrb xmask0fx, xmask0fx, tmp.w, 0
+ vpbroadcastb xmask0f, xmask0fx ;Construct mask 0x0f0f0f...
+
+ sal vec_i, 5 ;Multiply by 32
+ sal vec, 5 ;Multiply by 32
+ lea tmp, [mul_array + vec_i]
+
+ vmovdqu xgft1_lo, [tmp] ;Load array Ax{00}, Ax{01}, Ax{02}, ...
+ ; " Ax{00}, Ax{10}, Ax{20}, ... , Ax{f0}
+ vmovdqu xgft2_lo, [tmp+vec] ;Load array Bx{00}, Bx{01}, Bx{02}, ...
+ ; " Bx{00}, Bx{10}, Bx{20}, ... , Bx{f0}
+ vmovdqu xgft3_lo, [tmp+2*vec] ;Load array Cx{00}, Cx{01}, Cx{02}, ...
+ ; " Cx{00}, Cx{10}, Cx{20}, ... , Cx{f0}
+ add tmp, vec
+ vmovdqu xgft4_lo, [tmp+2*vec] ;Load array Dx{00}, Dx{01}, Dx{02}, ...
+ ; " Dx{00}, Dx{10}, Dx{20}, ... , Dx{f0}
+
+ mov dest2, [dest1+PS] ; reuse mul_array
+ mov dest3, [dest1+2*PS] ; reuse vec
+ mov dest4, [dest1+3*PS] ; reuse vec_i
+ mov dest1, [dest1]
+
+.loop32:
+ XLDR x0, [src+pos] ;Get next source vector
+
+ XLDR xd1, [dest1+pos] ;Get next dest vector
+ XLDR xd2, [dest2+pos] ;Get next dest vector
+ XLDR xd3, [dest3+pos] ;Get next dest vector
+ XLDR xd4, [dest4+pos] ;reuse xtmpl1. Get next dest vector
+
+ vpand xtmpl, x0, xmask0f ;Mask low src nibble in bits 4-0
+ vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0
+ vpand x0, x0, xmask0f ;Mask high src nibble in bits 4-0
+
+ vperm2i128 xtmpa, xtmpl, x0, 0x30 ;swap xtmpa from 1lo|2lo to 1lo|2hi
+ vperm2i128 x0, xtmpl, x0, 0x12 ;swap x0 from 1hi|2hi to 1hi|2lo
+
+ vperm2i128 xtmph1, xgft1_lo, xgft1_lo, 0x01 ; swapped to hi | lo
+ vperm2i128 xtmph2, xgft2_lo, xgft2_lo, 0x01 ; swapped to hi | lo
+ vperm2i128 xtmph3, xgft3_lo, xgft3_lo, 0x01 ; swapped to hi | lo
+ vperm2i128 xtmph4, xgft4_lo, xgft4_lo, 0x01 ; swapped to hi | lo
+
+ ; dest1
+ vpshufb xtmph1, xtmph1, x0 ;Lookup mul table of high nibble
+ vpshufb xtmpl, xgft1_lo, xtmpa ;Lookup mul table of low nibble
+ vpxor xtmph1, xtmph1, xtmpl ;GF add high and low partials
+ vpxor xd1, xd1, xtmph1 ;xd1 += partial
+
+ ; dest2
+ vpshufb xtmph2, xtmph2, x0 ;Lookup mul table of high nibble
+ vpshufb xtmpl, xgft2_lo, xtmpa ;Lookup mul table of low nibble
+ vpxor xtmph2, xtmph2, xtmpl ;GF add high and low partials
+ vpxor xd2, xd2, xtmph2 ;xd2 += partial
+
+ ; dest3
+ vpshufb xtmph3, xtmph3, x0 ;Lookup mul table of high nibble
+ vpshufb xtmpl, xgft3_lo, xtmpa ;Lookup mul table of low nibble
+ vpxor xtmph3, xtmph3, xtmpl ;GF add high and low partials
+ vpxor xd3, xd3, xtmph3 ;xd3 += partial
+
+ ; dest4
+ vpshufb xtmph4, xtmph4, x0 ;Lookup mul table of high nibble
+ vpshufb xtmpl, xgft4_lo, xtmpa ;Lookup mul table of low nibble
+ vpxor xtmph4, xtmph4, xtmpl ;GF add high and low partials
+ vpxor xd4, xd4, xtmph4 ;xd4 += partial
+
+ XSTR [dest1+pos], xd1
+ XSTR [dest2+pos], xd2
+ XSTR [dest3+pos], xd3
+ XSTR [dest4+pos], xd4
+
+ add pos, 32 ;Loop on 32 bytes at a time
+ cmp pos, len
+ jle .loop32
+
+ lea tmp, [len + 32]
+ cmp pos, tmp
+ je .return_pass
+
+.lessthan32:
+ ;; Tail len
+ ;; Do one more overlap pass
+ mov tmp.b, 0x1f
+ vpinsrb xtmph1x, xtmph1x, tmp.w, 0
+ vpbroadcastb xtmph1, xtmph1x ;Construct mask 0x1f1f1f...
+
+ mov tmp, len ;Overlapped offset length-32
+
+ XLDR x0, [src+tmp] ;Get next source vector
+
+ XLDR xd1, [dest1+tmp] ;Get next dest vector
+ XLDR xd2, [dest2+tmp] ;Get next dest vector
+ XLDR xd3, [dest3+tmp] ;Get next dest vector
+ XLDR xd4, [dest4+tmp] ;Get next dest vector
+
+ sub len, pos
+
+ vmovdqa xtmph2, [constip32] ;Load const of i + 32
+ vpinsrb xtmplx, xtmplx, len.w, 15
+ vinserti128 xtmpl, xtmpl, xtmplx, 1 ;swapped to xtmplx | xtmplx
+ vpshufb xtmpl, xtmpl, xtmph1 ;Broadcast len to all bytes. xtmph1=0x1f1f1f...
+ vpcmpgtb xtmpl, xtmpl, xtmph2
+
+ vpand xtmph1, x0, xmask0f ;Mask low src nibble in bits 4-0
+ vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0
+ vpand x0, x0, xmask0f ;Mask high src nibble in bits 4-0
+
+ vperm2i128 xtmpa, xtmph1, x0, 0x30 ;swap xtmpa from 1lo|2lo to 1lo|2hi
+ vperm2i128 x0, xtmph1, x0, 0x12 ;swap x0 from 1hi|2hi to 1hi|2lo
+
+ vperm2i128 xtmph1, xgft1_lo, xgft1_lo, 0x01 ; swapped to hi | lo
+ vperm2i128 xtmph2, xgft2_lo, xgft2_lo, 0x01 ; swapped to hi | lo
+ vperm2i128 xtmph3, xgft3_lo, xgft3_lo, 0x01 ; swapped to hi | lo
+ vperm2i128 xtmph4, xgft4_lo, xgft4_lo, 0x01 ; swapped to hi | lo
+
+ ; dest1
+ vpshufb xtmph1, xtmph1, x0 ;Lookup mul table of high nibble
+ vpshufb xgft1_lo, xgft1_lo, xtmpa ;Lookup mul table of low nibble
+ vpxor xtmph1, xtmph1, xgft1_lo ;GF add high and low partials
+ vpand xtmph1, xtmph1, xtmpl
+ vpxor xd1, xd1, xtmph1 ;xd1 += partial
+
+ ; dest2
+ vpshufb xtmph2, xtmph2, x0 ;Lookup mul table of high nibble
+ vpshufb xgft2_lo, xgft2_lo, xtmpa ;Lookup mul table of low nibble
+ vpxor xtmph2, xtmph2, xgft2_lo ;GF add high and low partials
+ vpand xtmph2, xtmph2, xtmpl
+ vpxor xd2, xd2, xtmph2 ;xd2 += partial
+
+ ; dest3
+ vpshufb xtmph3, xtmph3, x0 ;Lookup mul table of high nibble
+ vpshufb xgft3_lo, xgft3_lo, xtmpa ;Lookup mul table of low nibble
+ vpxor xtmph3, xtmph3, xgft3_lo ;GF add high and low partials
+ vpand xtmph3, xtmph3, xtmpl
+ vpxor xd3, xd3, xtmph3 ;xd3 += partial
+
+ ; dest4
+ vpshufb xtmph4, xtmph4, x0 ;Lookup mul table of high nibble
+ vpshufb xgft4_lo, xgft4_lo, xtmpa ;Lookup mul table of low nibble
+ vpxor xtmph4, xtmph4, xgft4_lo ;GF add high and low partials
+ vpand xtmph4, xtmph4, xtmpl
+ vpxor xd4, xd4, xtmph4 ;xd4 += partial
+
+ XSTR [dest1+tmp], xd1
+ XSTR [dest2+tmp], xd2
+ XSTR [dest3+tmp], xd3
+ XSTR [dest4+tmp], xd4
+
+.return_pass:
+ mov return, 0
+ FUNC_RESTORE
+ ret
+
+.return_fail:
+ mov return, 1
+ FUNC_RESTORE
+ ret
+
+endproc_frame
+
+section .data
+align 32
+constip32:
+ dq 0xf8f9fafbfcfdfeff, 0xf0f1f2f3f4f5f6f7
+ dq 0xe8e9eaebecedeeef, 0xe0e1e2e3e4e5e6e7
+
+;;; func core, ver, snum
+slversion gf_4vect_mad_avx2, 04, 01, 020b
diff --git a/src/isa-l/erasure_code/gf_4vect_mad_avx512.asm b/src/isa-l/erasure_code/gf_4vect_mad_avx512.asm
new file mode 100644
index 000000000..77dc76b4c
--- /dev/null
+++ b/src/isa-l/erasure_code/gf_4vect_mad_avx512.asm
@@ -0,0 +1,267 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright(c) 2011-2015 Intel Corporation All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in
+; the documentation and/or other materials provided with the
+; distribution.
+; * Neither the name of Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived
+; from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;;;
+;;; gf_4vect_mad_avx512(len, vec, vec_i, mul_array, src, dest);
+;;;
+
+%include "reg_sizes.asm"
+
+%ifdef HAVE_AS_KNOWS_AVX512
+
+%ifidn __OUTPUT_FORMAT__, elf64
+ %define arg0 rdi
+ %define arg1 rsi
+ %define arg2 rdx
+ %define arg3 rcx
+ %define arg4 r8
+ %define arg5 r9
+ %define tmp r11
+ %define return rax
+ %define func(x) x: endbranch
+ %define FUNC_SAVE
+ %define FUNC_RESTORE
+%endif
+
+%ifidn __OUTPUT_FORMAT__, win64
+ %define arg0 rcx
+ %define arg1 rdx
+ %define arg2 r8
+ %define arg3 r9
+ %define arg4 r12
+ %define arg5 r15
+ %define tmp r11
+ %define return rax
+ %define stack_size 16*10 + 3*8
+ %define arg(x) [rsp + stack_size + PS + PS*x]
+ %define func(x) proc_frame x
+
+%macro FUNC_SAVE 0
+ sub rsp, stack_size
+ movdqa [rsp+16*0],xmm6
+ movdqa [rsp+16*1],xmm7
+ movdqa [rsp+16*2],xmm8
+ movdqa [rsp+16*3],xmm9
+ movdqa [rsp+16*4],xmm10
+ movdqa [rsp+16*5],xmm11
+ movdqa [rsp+16*6],xmm12
+ movdqa [rsp+16*7],xmm13
+ movdqa [rsp+16*8],xmm14
+ movdqa [rsp+16*9],xmm15
+ save_reg r12, 10*16 + 0*8
+ save_reg r15, 10*16 + 1*8
+ end_prolog
+ mov arg4, arg(4)
+ mov arg5, arg(5)
+%endmacro
+
+%macro FUNC_RESTORE 0
+ movdqa xmm6, [rsp+16*0]
+ movdqa xmm7, [rsp+16*1]
+ movdqa xmm8, [rsp+16*2]
+ movdqa xmm9, [rsp+16*3]
+ movdqa xmm10, [rsp+16*4]
+ movdqa xmm11, [rsp+16*5]
+ movdqa xmm12, [rsp+16*6]
+ movdqa xmm13, [rsp+16*7]
+ movdqa xmm14, [rsp+16*8]
+ movdqa xmm15, [rsp+16*9]
+ mov r12, [rsp + 10*16 + 0*8]
+ mov r15, [rsp + 10*16 + 1*8]
+ add rsp, stack_size
+%endmacro
+%endif
+
+%define PS 8
+%define len arg0
+%define vec arg1
+%define vec_i arg2
+%define mul_array arg3
+%define src arg4
+%define dest1 arg5
+%define pos return
+%define dest2 mul_array
+%define dest3 vec
+%define dest4 vec_i
+
+%ifndef EC_ALIGNED_ADDR
+;;; Use Un-aligned load/store
+ %define XLDR vmovdqu8
+ %define XSTR vmovdqu8
+%else
+;;; Use Non-temporal load/stor
+ %ifdef NO_NT_LDST
+ %define XLDR vmovdqa
+ %define XSTR vmovdqa
+ %else
+ %define XLDR vmovntdqa
+ %define XSTR vmovntdq
+ %endif
+%endif
+
+default rel
+[bits 64]
+section .text
+
+%define x0 zmm0
+%define xtmpa zmm1
+%define xtmpl1 zmm2
+%define xtmph1 zmm3
+%define xtmph2 zmm4
+%define xtmph3 zmm5
+%define xtmph4 zmm6
+%define xgft1_hi zmm7
+%define xgft1_lo zmm8
+%define xgft1_loy ymm8
+%define xgft2_hi zmm9
+%define xgft2_lo zmm10
+%define xgft2_loy ymm10
+%define xgft3_hi zmm11
+%define xgft3_lo zmm12
+%define xgft3_loy ymm12
+%define xgft4_hi zmm13
+%define xgft4_lo zmm14
+%define xgft4_loy ymm14
+%define xd1 zmm15
+%define xd2 zmm16
+%define xd3 zmm17
+%define xd4 zmm18
+%define xmask0f zmm19
+%define xtmpl2 zmm20
+%define xtmpl3 zmm21
+%define xtmpl4 zmm22
+%define xtmpl5 zmm23
+
+align 16
+mk_global gf_4vect_mad_avx512, function
+func(gf_4vect_mad_avx512)
+ FUNC_SAVE
+ sub len, 64
+ jl .return_fail
+ xor pos, pos
+ mov tmp, 0x0f
+ vpbroadcastb xmask0f, tmp ;Construct mask 0x0f0f0f...
+ sal vec_i, 5 ;Multiply by 32
+ sal vec, 5 ;Multiply by 32
+ lea tmp, [mul_array + vec_i]
+ vmovdqu xgft1_loy, [tmp] ;Load array Ax{00}..{0f}, Ax{00}..{f0}
+ vmovdqu xgft2_loy, [tmp+vec] ;Load array Bx{00}..{0f}, Bx{00}..{f0}
+ vmovdqu xgft3_loy, [tmp+2*vec] ;Load array Cx{00}..{0f}, Cx{00}..{f0}
+ add tmp, vec
+ vmovdqu xgft4_loy, [tmp+2*vec] ;Load array Dx{00}..{0f}, Dx{00}..{f0}
+ vshufi64x2 xgft1_hi, xgft1_lo, xgft1_lo, 0x55
+ vshufi64x2 xgft1_lo, xgft1_lo, xgft1_lo, 0x00
+ vshufi64x2 xgft2_hi, xgft2_lo, xgft2_lo, 0x55
+ vshufi64x2 xgft2_lo, xgft2_lo, xgft2_lo, 0x00
+ vshufi64x2 xgft3_hi, xgft3_lo, xgft3_lo, 0x55
+ vshufi64x2 xgft3_lo, xgft3_lo, xgft3_lo, 0x00
+ vshufi64x2 xgft4_hi, xgft4_lo, xgft4_lo, 0x55
+ vshufi64x2 xgft4_lo, xgft4_lo, xgft4_lo, 0x00
+ mov dest2, [dest1+PS] ; reuse mul_array
+ mov dest3, [dest1+2*PS] ; reuse vec
+ mov dest4, [dest1+3*PS] ; reuse vec_i
+ mov dest1, [dest1]
+ mov tmp, -1
+ kmovq k1, tmp
+
+.loop64:
+ XLDR x0, [src+pos] ;Get next source vector
+ XLDR xd1, [dest1+pos] ;Get next dest vector
+ XLDR xd2, [dest2+pos] ;Get next dest vector
+ XLDR xd3, [dest3+pos] ;Get next dest vector
+ XLDR xd4, [dest4+pos] ;reuse xtmpl1. Get next dest vector
+
+ vpandq xtmpa, x0, xmask0f ;Mask low src nibble in bits 4-0
+ vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0
+ vpandq x0, x0, xmask0f ;Mask high src nibble in bits 4-0
+
+ ; dest1
+ vpshufb xtmph1 {k1}{z}, xgft1_hi, x0 ;Lookup mul table of high nibble
+ vpshufb xtmpl1 {k1}{z}, xgft1_lo, xtmpa ;Lookup mul table of low nibble
+ vpxorq xtmph1, xtmph1, xtmpl1 ;GF add high and low partials
+ vpxorq xd1, xd1, xtmph1 ;xd1 += partial
+
+ ; dest2
+ vpshufb xtmph2 {k1}{z}, xgft2_hi, x0 ;Lookup mul table of high nibble
+ vpshufb xtmpl2 {k1}{z}, xgft2_lo, xtmpa ;Lookup mul table of low nibble
+ vpxorq xtmph2, xtmph2, xtmpl2 ;GF add high and low partials
+ vpxorq xd2, xd2, xtmph2 ;xd2 += partial
+
+ ; dest3
+ vpshufb xtmph3 {k1}{z}, xgft3_hi, x0 ;Lookup mul table of high nibble
+ vpshufb xtmpl3 {k1}{z}, xgft3_lo, xtmpa ;Lookup mul table of low nibble
+ vpxorq xtmph3, xtmph3, xtmpl3 ;GF add high and low partials
+ vpxorq xd3, xd3, xtmph3 ;xd2 += partial
+
+ ; dest4
+ vpshufb xtmph4 {k1}{z}, xgft4_hi, x0 ;Lookup mul table of high nibble
+ vpshufb xtmpl4 {k1}{z}, xgft4_lo, xtmpa ;Lookup mul table of low nibble
+ vpxorq xtmph4, xtmph4, xtmpl4 ;GF add high and low partials
+ vpxorq xd4, xd4, xtmph4 ;xd2 += partial
+
+ XSTR [dest1+pos], xd1
+ XSTR [dest2+pos], xd2
+ XSTR [dest3+pos], xd3
+ XSTR [dest4+pos], xd4
+
+ add pos, 64 ;Loop on 64 bytes at a time
+ cmp pos, len
+ jle .loop64
+
+ lea tmp, [len + 64]
+ cmp pos, tmp
+ je .return_pass
+
+ ;; Tail len
+ mov pos, (1 << 63)
+ lea tmp, [len + 64 - 1]
+ and tmp, 63
+ sarx pos, pos, tmp
+ kmovq k1, pos
+ mov pos, len ;Overlapped offset length-64
+ jmp .loop64 ;Do one more overlap pass
+
+.return_pass:
+ mov return, 0
+ FUNC_RESTORE
+ ret
+
+.return_fail:
+ mov return, 1
+ FUNC_RESTORE
+ ret
+
+endproc_frame
+
+%else
+%ifidn __OUTPUT_FORMAT__, win64
+global no_gf_4vect_mad_avx512
+no_gf_4vect_mad_avx512:
+%endif
+%endif ; ifdef HAVE_AS_KNOWS_AVX512
diff --git a/src/isa-l/erasure_code/gf_4vect_mad_sse.asm b/src/isa-l/erasure_code/gf_4vect_mad_sse.asm
new file mode 100644
index 000000000..d5efc9791
--- /dev/null
+++ b/src/isa-l/erasure_code/gf_4vect_mad_sse.asm
@@ -0,0 +1,342 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright(c) 2011-2015 Intel Corporation All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in
+; the documentation and/or other materials provided with the
+; distribution.
+; * Neither the name of Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived
+; from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;;;
+;;; gf_4vect_mad_sse(len, vec, vec_i, mul_array, src, dest);
+;;;
+
+%include "reg_sizes.asm"
+
+%define PS 8
+
+%ifidn __OUTPUT_FORMAT__, win64
+ %define arg0 rcx
+ %define arg0.w ecx
+ %define arg1 rdx
+ %define arg2 r8
+ %define arg3 r9
+ %define arg4 r12
+ %define arg5 r15
+ %define tmp r11
+ %define tmp2 r10
+ %define tmp3 r13
+ %define return rax
+ %define return.w eax
+ %define stack_size 16*10 + 3*8
+ %define arg(x) [rsp + stack_size + PS + PS*x]
+ %define func(x) proc_frame x
+
+%macro FUNC_SAVE 0
+ sub rsp, stack_size
+ movdqa [rsp+16*0],xmm6
+ movdqa [rsp+16*1],xmm7
+ movdqa [rsp+16*2],xmm8
+ movdqa [rsp+16*3],xmm9
+ movdqa [rsp+16*4],xmm10
+ movdqa [rsp+16*5],xmm11
+ movdqa [rsp+16*6],xmm12
+ movdqa [rsp+16*7],xmm13
+ movdqa [rsp+16*8],xmm14
+ movdqa [rsp+16*9],xmm15
+ save_reg r12, 10*16 + 0*8
+ save_reg r13, 10*16 + 1*8
+ save_reg r15, 10*16 + 2*8
+ end_prolog
+ mov arg4, arg(4)
+ mov arg5, arg(5)
+%endmacro
+
+%macro FUNC_RESTORE 0
+ movdqa xmm6, [rsp+16*0]
+ movdqa xmm7, [rsp+16*1]
+ movdqa xmm8, [rsp+16*2]
+ movdqa xmm9, [rsp+16*3]
+ movdqa xmm10, [rsp+16*4]
+ movdqa xmm11, [rsp+16*5]
+ movdqa xmm12, [rsp+16*6]
+ movdqa xmm13, [rsp+16*7]
+ movdqa xmm14, [rsp+16*8]
+ movdqa xmm15, [rsp+16*9]
+ mov r12, [rsp + 10*16 + 0*8]
+ mov r13, [rsp + 10*16 + 1*8]
+ mov r15, [rsp + 10*16 + 2*8]
+ add rsp, stack_size
+%endmacro
+
+%elifidn __OUTPUT_FORMAT__, elf64
+ %define arg0 rdi
+ %define arg0.w edi
+ %define arg1 rsi
+ %define arg2 rdx
+ %define arg3 rcx
+ %define arg4 r8
+ %define arg5 r9
+ %define tmp r11
+ %define tmp2 r10
+ %define tmp3 r12
+ %define return rax
+ %define return.w eax
+
+ %define func(x) x: endbranch
+ %macro FUNC_SAVE 0
+ push r12
+ %endmacro
+ %macro FUNC_RESTORE 0
+ pop r12
+ %endmacro
+%endif
+
+;;; gf_4vect_mad_sse(len, vec, vec_i, mul_array, src, dest)
+%define len arg0
+%define len.w arg0.w
+%define vec arg1
+%define vec_i arg2
+%define mul_array arg3
+%define src arg4
+%define dest1 arg5
+%define pos return
+%define pos.w return.w
+
+%define dest2 mul_array
+%define dest3 tmp2
+%define dest4 vec_i
+
+%ifndef EC_ALIGNED_ADDR
+;;; Use Un-aligned load/store
+ %define XLDR movdqu
+ %define XSTR movdqu
+%else
+;;; Use Non-temporal load/stor
+ %ifdef NO_NT_LDST
+ %define XLDR movdqa
+ %define XSTR movdqa
+ %else
+ %define XLDR movntdqa
+ %define XSTR movntdq
+ %endif
+%endif
+
+default rel
+
+[bits 64]
+section .text
+
+%define xmask0f xmm15
+%define xgft3_hi xmm14
+%define xgft4_hi xmm13
+%define xgft4_lo xmm12
+
+%define x0 xmm0
+%define xtmpa xmm1
+%define xtmph1 xmm2
+%define xtmpl1 xmm3
+%define xtmph2 xmm4
+%define xtmpl2 xmm5
+%define xtmph3 xmm6
+%define xtmpl3 xmm7
+%define xtmph4 xmm8
+%define xtmpl4 xmm9
+%define xd1 xmm10
+%define xd2 xmm11
+%define xd3 xtmph1
+%define xd4 xtmpl1
+
+align 16
+mk_global gf_4vect_mad_sse, function
+func(gf_4vect_mad_sse)
+ FUNC_SAVE
+ sub len, 16
+ jl .return_fail
+ xor pos, pos
+ movdqa xmask0f, [mask0f] ;Load mask of lower nibble in each byte
+ mov tmp, vec
+
+ sal vec_i, 5 ;Multiply by 32
+ lea tmp3, [mul_array + vec_i]
+
+ sal tmp, 6 ;Multiply by 64
+
+ movdqu xgft3_hi, [tmp3+tmp+16] ; " Cx{00}, Cx{10}, Cx{20}, ... , Cx{f0}
+ sal vec, 5 ;Multiply by 32
+ add tmp, vec
+ movdqu xgft4_lo, [tmp3+tmp] ;Load array Dx{00}, Dx{01}, Dx{02}, ...
+ movdqu xgft4_hi, [tmp3+tmp+16] ; " Dx{00}, Dx{10}, Dx{20}, ... , Dx{f0}
+
+ mov dest2, [dest1+PS] ; reuse mul_array
+ mov dest3, [dest1+2*PS]
+ mov dest4, [dest1+3*PS] ; reuse vec_i
+ mov dest1, [dest1]
+
+.loop16:
+ XLDR x0, [src+pos] ;Get next source vector
+ movdqu xtmph1, [tmp3+16] ; " Ax{00}, Ax{10}, Ax{20}, ... , Ax{f0}
+ movdqu xtmpl1, [tmp3] ;Load array Ax{00}, Ax{01}, Ax{02}, ...
+ movdqu xtmph2, [tmp3+vec+16] ; " Bx{00}, Bx{10}, Bx{20}, ... , Bx{f0}
+ movdqu xtmpl2, [tmp3+vec] ;Load array Bx{00}, Bx{01}, Bx{02}, ...
+ movdqu xtmpl3, [tmp3+2*vec] ;Load array Cx{00}, Cx{01}, Cx{02}, ...
+
+ movdqa xtmph3, xgft3_hi
+ movdqa xtmpl4, xgft4_lo
+ movdqa xtmph4, xgft4_hi
+
+ XLDR xd1, [dest1+pos] ;Get next dest vector
+ XLDR xd2, [dest2+pos] ;Get next dest vector
+
+ movdqa xtmpa, x0 ;Keep unshifted copy of src
+ psraw x0, 4 ;Shift to put high nibble into bits 4-0
+ pand x0, xmask0f ;Mask high src nibble in bits 4-0
+ pand xtmpa, xmask0f ;Mask low src nibble in bits 4-0
+
+ ; dest1
+ pshufb xtmph1, x0 ;Lookup mul table of high nibble
+ pshufb xtmpl1, xtmpa ;Lookup mul table of low nibble
+ pxor xtmph1, xtmpl1 ;GF add high and low partials
+ pxor xd1, xtmph1
+
+ XLDR xd3, [dest3+pos] ;Reuse xtmph1, Get next dest vector
+ XLDR xd4, [dest4+pos] ;Reuse xtmpl1, Get next dest vector
+
+ ; dest2
+ pshufb xtmph2, x0 ;Lookup mul table of high nibble
+ pshufb xtmpl2, xtmpa ;Lookup mul table of low nibble
+ pxor xtmph2, xtmpl2 ;GF add high and low partials
+ pxor xd2, xtmph2
+
+ ; dest3
+ pshufb xtmph3, x0 ;Lookup mul table of high nibble
+ pshufb xtmpl3, xtmpa ;Lookup mul table of low nibble
+ pxor xtmph3, xtmpl3 ;GF add high and low partials
+ pxor xd3, xtmph3
+
+ ; dest4
+ pshufb xtmph4, x0 ;Lookup mul table of high nibble
+ pshufb xtmpl4, xtmpa ;Lookup mul table of low nibble
+ pxor xtmph4, xtmpl4 ;GF add high and low partials
+ pxor xd4, xtmph4
+
+ XSTR [dest1+pos], xd1 ;Store result
+ XSTR [dest2+pos], xd2 ;Store result
+ XSTR [dest3+pos], xd3 ;Store result
+ XSTR [dest4+pos], xd4 ;Store result
+
+ add pos, 16 ;Loop on 16 bytes at a time
+ cmp pos, len
+ jle .loop16
+
+ lea tmp, [len + 16]
+ cmp pos, tmp
+ je .return_pass
+
+.lessthan16:
+ ;; Tail len
+ ;; Do one more overlap pass
+ mov tmp, len ;Overlapped offset length-16
+
+ XLDR x0, [src+tmp] ;Get next source vector
+
+ movdqu xtmph1, [tmp3+16] ; " Ax{00}, Ax{10}, Ax{20}, ... , Ax{f0}
+ movdqu xtmpl1, [tmp3] ;Load array Ax{00}, Ax{01}, Ax{02}, ...
+ movdqu xtmph2, [tmp3+vec+16] ; " Bx{00}, Bx{10}, Bx{20}, ... , Bx{f0}
+ movdqu xtmpl2, [tmp3+vec] ;Load array Bx{00}, Bx{01}, Bx{02}, ...
+ movdqu xtmpl3, [tmp3+2*vec] ;Load array Cx{00}, Cx{01}, Cx{02}, ...
+
+ XLDR xd1, [dest1+tmp] ;Get next dest vector
+ XLDR xd2, [dest2+tmp] ;Get next dest vector
+ XLDR xtmph4, [dest3+tmp] ;Reuse xtmph1. Get next dest vector
+
+ sub len, pos
+
+ movdqa xtmpl4, [constip16] ;Load const of i + 16
+ pinsrb xtmph3, len.w, 15
+ pshufb xtmph3, xmask0f ;Broadcast len to all bytes
+ pcmpgtb xtmph3, xtmpl4
+
+ XLDR xtmpl4, [dest4+tmp] ;Get next dest vector
+
+ movdqa xtmpa, x0 ;Keep unshifted copy of src
+ psraw x0, 4 ;Shift to put high nibble into bits 4-0
+ pand x0, xmask0f ;Mask high src nibble in bits 4-0
+ pand xtmpa, xmask0f ;Mask low src nibble in bits 4-0
+
+ ; dest1
+ pshufb xtmph1, x0 ;Lookup mul table of high nibble
+ pshufb xtmpl1, xtmpa ;Lookup mul table of low nibble
+ pxor xtmph1, xtmpl1 ;GF add high and low partials
+ pand xtmph1, xtmph3
+ pxor xd1, xtmph1
+
+ ; dest2
+ pshufb xtmph2, x0 ;Lookup mul table of high nibble
+ pshufb xtmpl2, xtmpa ;Lookup mul table of low nibble
+ pxor xtmph2, xtmpl2 ;GF add high and low partials
+ pand xtmph2, xtmph3
+ pxor xd2, xtmph2
+
+ ; dest3
+ pshufb xgft3_hi, x0 ;Lookup mul table of high nibble
+ pshufb xtmpl3, xtmpa ;Lookup mul table of low nibble
+ pxor xgft3_hi, xtmpl3 ;GF add high and low partials
+ pand xgft3_hi, xtmph3
+ pxor xtmph4, xgft3_hi
+
+ ; dest4
+ pshufb xgft4_hi, x0 ;Lookup mul table of high nibble
+ pshufb xgft4_lo, xtmpa ;Lookup mul table of low nibble
+ pxor xgft4_hi, xgft4_lo ;GF add high and low partials
+ pand xgft4_hi, xtmph3
+ pxor xtmpl4, xgft4_hi
+
+ XSTR [dest1+tmp], xd1 ;Store result
+ XSTR [dest2+tmp], xd2 ;Store result
+ XSTR [dest3+tmp], xtmph4 ;Store result
+ XSTR [dest4+tmp], xtmpl4 ;Store result
+
+.return_pass:
+ FUNC_RESTORE
+ mov return, 0
+ ret
+
+.return_fail:
+ FUNC_RESTORE
+ mov return, 1
+ ret
+
+endproc_frame
+
+section .data
+
+align 16
+
+mask0f:
+ dq 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f
+constip16:
+ dq 0xf8f9fafbfcfdfeff, 0xf0f1f2f3f4f5f6f7
+
+;;; func core, ver, snum
+slversion gf_4vect_mad_sse, 00, 01, 0209
diff --git a/src/isa-l/erasure_code/gf_5vect_dot_prod_avx.asm b/src/isa-l/erasure_code/gf_5vect_dot_prod_avx.asm
new file mode 100644
index 000000000..a5bdb2a18
--- /dev/null
+++ b/src/isa-l/erasure_code/gf_5vect_dot_prod_avx.asm
@@ -0,0 +1,303 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright(c) 2011-2015 Intel Corporation All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in
+; the documentation and/or other materials provided with the
+; distribution.
+; * Neither the name of Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived
+; from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;;;
+;;; gf_5vect_dot_prod_avx(len, vec, *g_tbls, **buffs, **dests);
+;;;
+
+%include "reg_sizes.asm"
+
+%ifidn __OUTPUT_FORMAT__, elf64
+ %define arg0 rdi
+ %define arg1 rsi
+ %define arg2 rdx
+ %define arg3 rcx
+ %define arg4 r8
+ %define arg5 r9
+
+ %define tmp r11
+ %define tmp2 r10
+ %define tmp3 r13 ; must be saved and restored
+ %define tmp4 r12 ; must be saved and restored
+ %define tmp5 r14 ; must be saved and restored
+ %define tmp6 r15 ; must be saved and restored
+ %define return rax
+ %define PS 8
+ %define LOG_PS 3
+
+ %define func(x) x: endbranch
+ %macro FUNC_SAVE 0
+ push r12
+ push r13
+ push r14
+ push r15
+ %endmacro
+ %macro FUNC_RESTORE 0
+ pop r15
+ pop r14
+ pop r13
+ pop r12
+ %endmacro
+%endif
+
+%ifidn __OUTPUT_FORMAT__, win64
+ %define arg0 rcx
+ %define arg1 rdx
+ %define arg2 r8
+ %define arg3 r9
+
+ %define arg4 r12 ; must be saved, loaded and restored
+ %define arg5 r15 ; must be saved and restored
+ %define tmp r11
+ %define tmp2 r10
+ %define tmp3 r13 ; must be saved and restored
+ %define tmp4 r14 ; must be saved and restored
+ %define tmp5 rdi ; must be saved and restored
+ %define tmp6 rsi ; must be saved and restored
+ %define return rax
+ %define PS 8
+ %define LOG_PS 3
+ %define stack_size 10*16 + 7*8 ; must be an odd multiple of 8
+ %define arg(x) [rsp + stack_size + PS + PS*x]
+
+ %define func(x) proc_frame x
+ %macro FUNC_SAVE 0
+ alloc_stack stack_size
+ vmovdqa [rsp + 0*16], xmm6
+ vmovdqa [rsp + 1*16], xmm7
+ vmovdqa [rsp + 2*16], xmm8
+ vmovdqa [rsp + 3*16], xmm9
+ vmovdqa [rsp + 4*16], xmm10
+ vmovdqa [rsp + 5*16], xmm11
+ vmovdqa [rsp + 6*16], xmm12
+ vmovdqa [rsp + 7*16], xmm13
+ vmovdqa [rsp + 8*16], xmm14
+ vmovdqa [rsp + 9*16], xmm15
+ save_reg r12, 10*16 + 0*8
+ save_reg r13, 10*16 + 1*8
+ save_reg r14, 10*16 + 2*8
+ save_reg r15, 10*16 + 3*8
+ save_reg rdi, 10*16 + 4*8
+ save_reg rsi, 10*16 + 5*8
+ end_prolog
+ mov arg4, arg(4)
+ %endmacro
+
+ %macro FUNC_RESTORE 0
+ vmovdqa xmm6, [rsp + 0*16]
+ vmovdqa xmm7, [rsp + 1*16]
+ vmovdqa xmm8, [rsp + 2*16]
+ vmovdqa xmm9, [rsp + 3*16]
+ vmovdqa xmm10, [rsp + 4*16]
+ vmovdqa xmm11, [rsp + 5*16]
+ vmovdqa xmm12, [rsp + 6*16]
+ vmovdqa xmm13, [rsp + 7*16]
+ vmovdqa xmm14, [rsp + 8*16]
+ vmovdqa xmm15, [rsp + 9*16]
+ mov r12, [rsp + 10*16 + 0*8]
+ mov r13, [rsp + 10*16 + 1*8]
+ mov r14, [rsp + 10*16 + 2*8]
+ mov r15, [rsp + 10*16 + 3*8]
+ mov rdi, [rsp + 10*16 + 4*8]
+ mov rsi, [rsp + 10*16 + 5*8]
+ add rsp, stack_size
+ %endmacro
+%endif
+
+%define len arg0
+%define vec arg1
+%define mul_array arg2
+%define src arg3
+%define dest arg4
+%define ptr arg5
+%define vec_i tmp2
+%define dest1 tmp3
+%define dest2 tmp4
+%define vskip1 tmp5
+%define vskip3 tmp6
+%define pos return
+
+
+%ifndef EC_ALIGNED_ADDR
+;;; Use Un-aligned load/store
+ %define XLDR vmovdqu
+ %define XSTR vmovdqu
+%else
+;;; Use Non-temporal load/stor
+ %ifdef NO_NT_LDST
+ %define XLDR vmovdqa
+ %define XSTR vmovdqa
+ %else
+ %define XLDR vmovntdqa
+ %define XSTR vmovntdq
+ %endif
+%endif
+
+default rel
+
+[bits 64]
+section .text
+
+%define xmask0f xmm15
+%define xgft1_lo xmm14
+%define xgft1_hi xmm13
+%define xgft2_lo xmm12
+%define xgft2_hi xmm11
+%define xgft3_lo xmm10
+%define xgft3_hi xmm9
+%define xgft4_lo xmm8
+%define xgft4_hi xmm7
+
+
+%define x0 xmm0
+%define xtmpa xmm1
+%define xp1 xmm2
+%define xp2 xmm3
+%define xp3 xmm4
+%define xp4 xmm5
+%define xp5 xmm6
+
+align 16
+mk_global gf_5vect_dot_prod_avx, function
+func(gf_5vect_dot_prod_avx)
+ FUNC_SAVE
+ sub len, 16
+ jl .return_fail
+ xor pos, pos
+ vmovdqa xmask0f, [mask0f] ;Load mask of lower nibble in each byte
+ mov vskip1, vec
+ imul vskip1, 32
+ mov vskip3, vec
+ imul vskip3, 96
+ sal vec, LOG_PS ;vec *= PS. Make vec_i count by PS
+ mov dest1, [dest]
+ mov dest2, [dest+PS]
+
+
+.loop16:
+ mov tmp, mul_array
+ xor vec_i, vec_i
+ vpxor xp1, xp1
+ vpxor xp2, xp2
+ vpxor xp3, xp3
+ vpxor xp4, xp4
+ vpxor xp5, xp5
+
+
+.next_vect:
+ mov ptr, [src+vec_i]
+ add vec_i, PS
+ XLDR x0, [ptr+pos] ;Get next source vector
+
+ vmovdqu xgft1_lo, [tmp] ;Load array Ax{00}, Ax{01}, ..., Ax{0f}
+ vmovdqu xgft1_hi, [tmp+16] ; " Ax{00}, Ax{10}, ..., Ax{f0}
+ vmovdqu xgft2_lo, [tmp+vskip1*1] ;Load array Bx{00}, Bx{01}, ..., Bx{0f}
+ vmovdqu xgft2_hi, [tmp+vskip1*1+16] ; " Bx{00}, Bx{10}, ..., Bx{f0}
+ vmovdqu xgft3_lo, [tmp+vskip1*2] ;Load array Cx{00}, Cx{01}, ..., Cx{0f}
+ vmovdqu xgft3_hi, [tmp+vskip1*2+16] ; " Cx{00}, Cx{10}, ..., Cx{f0}
+ vmovdqu xgft4_lo, [tmp+vskip3] ;Load array Dx{00}, Dx{01}, ..., Dx{0f}
+ vmovdqu xgft4_hi, [tmp+vskip3+16] ; " Dx{00}, Dx{10}, ..., Dx{f0}
+
+ vpand xtmpa, x0, xmask0f ;Mask low src nibble in bits 4-0
+ vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0
+ vpand x0, x0, xmask0f ;Mask high src nibble in bits 4-0
+
+ vpshufb xgft1_hi, x0 ;Lookup mul table of high nibble
+ vpshufb xgft1_lo, xtmpa ;Lookup mul table of low nibble
+ vpxor xgft1_hi, xgft1_lo ;GF add high and low partials
+ vpxor xp1, xgft1_hi ;xp1 += partial
+
+ vpshufb xgft2_hi, x0 ;Lookup mul table of high nibble
+ vpshufb xgft2_lo, xtmpa ;Lookup mul table of low nibble
+ vpxor xgft2_hi, xgft2_lo ;GF add high and low partials
+ vpxor xp2, xgft2_hi ;xp2 += partial
+
+ vmovdqu xgft1_lo, [tmp+vskip1*4] ;Load array Ex{00}, Ex{01}, ..., Ex{0f}
+ vmovdqu xgft1_hi, [tmp+vskip1*4+16] ; " Ex{00}, Ex{10}, ..., Ex{f0}
+ add tmp, 32
+
+ vpshufb xgft3_hi, x0 ;Lookup mul table of high nibble
+ vpshufb xgft3_lo, xtmpa ;Lookup mul table of low nibble
+ vpxor xgft3_hi, xgft3_lo ;GF add high and low partials
+ vpxor xp3, xgft3_hi ;xp3 += partial
+
+ vpshufb xgft4_hi, x0 ;Lookup mul table of high nibble
+ vpshufb xgft4_lo, xtmpa ;Lookup mul table of low nibble
+ vpxor xgft4_hi, xgft4_lo ;GF add high and low partials
+ vpxor xp4, xgft4_hi ;xp4 += partial
+
+ vpshufb xgft1_hi, x0 ;Lookup mul table of high nibble
+ vpshufb xgft1_lo, xtmpa ;Lookup mul table of low nibble
+ vpxor xgft1_hi, xgft1_lo ;GF add high and low partials
+ vpxor xp5, xgft1_hi ;xp5 += partial
+
+ cmp vec_i, vec
+ jl .next_vect
+
+ mov tmp, [dest+2*PS]
+ mov ptr, [dest+3*PS]
+ mov vec_i, [dest+4*PS]
+
+ XSTR [dest1+pos], xp1
+ XSTR [dest2+pos], xp2
+ XSTR [tmp+pos], xp3
+ XSTR [ptr+pos], xp4
+ XSTR [vec_i+pos], xp5
+
+ add pos, 16 ;Loop on 16 bytes at a time
+ cmp pos, len
+ jle .loop16
+
+ lea tmp, [len + 16]
+ cmp pos, tmp
+ je .return_pass
+
+ ;; Tail len
+ mov pos, len ;Overlapped offset length-16
+ jmp .loop16 ;Do one more overlap pass
+
+.return_pass:
+ FUNC_RESTORE
+ mov return, 0
+ ret
+
+.return_fail:
+ FUNC_RESTORE
+ mov return, 1
+ ret
+
+endproc_frame
+
+section .data
+
+align 16
+mask0f: dq 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f
+
+;;; func core, ver, snum
+slversion gf_5vect_dot_prod_avx, 02, 04, 0194
diff --git a/src/isa-l/erasure_code/gf_5vect_dot_prod_avx2.asm b/src/isa-l/erasure_code/gf_5vect_dot_prod_avx2.asm
new file mode 100644
index 000000000..d019e978f
--- /dev/null
+++ b/src/isa-l/erasure_code/gf_5vect_dot_prod_avx2.asm
@@ -0,0 +1,315 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright(c) 2011-2015 Intel Corporation All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in
+; the documentation and/or other materials provided with the
+; distribution.
+; * Neither the name of Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived
+; from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;;;
+;;; gf_5vect_dot_prod_avx2(len, vec, *g_tbls, **buffs, **dests);
+;;;
+
+%include "reg_sizes.asm"
+
+%ifidn __OUTPUT_FORMAT__, elf64
+ %define arg0 rdi
+ %define arg1 rsi
+ %define arg2 rdx
+ %define arg3 rcx
+ %define arg4 r8
+ %define arg5 r9
+
+ %define tmp r11
+ %define tmp.w r11d
+ %define tmp.b r11b
+ %define tmp2 r10
+ %define tmp3 r13 ; must be saved and restored
+ %define tmp4 r12 ; must be saved and restored
+ %define tmp5 r14 ; must be saved and restored
+ %define tmp6 r15 ; must be saved and restored
+ %define return rax
+ %define PS 8
+ %define LOG_PS 3
+
+ %define func(x) x: endbranch
+ %macro FUNC_SAVE 0
+ push r12
+ push r13
+ push r14
+ push r15
+ %endmacro
+ %macro FUNC_RESTORE 0
+ pop r15
+ pop r14
+ pop r13
+ pop r12
+ %endmacro
+%endif
+
+%ifidn __OUTPUT_FORMAT__, win64
+ %define arg0 rcx
+ %define arg1 rdx
+ %define arg2 r8
+ %define arg3 r9
+
+ %define arg4 r12 ; must be saved, loaded and restored
+ %define arg5 r15 ; must be saved and restored
+ %define tmp r11
+ %define tmp.w r11d
+ %define tmp.b r11b
+ %define tmp2 r10
+ %define tmp3 r13 ; must be saved and restored
+ %define tmp4 r14 ; must be saved and restored
+ %define tmp5 rdi ; must be saved and restored
+ %define tmp6 rsi ; must be saved and restored
+ %define return rax
+ %define PS 8
+ %define LOG_PS 3
+ %define stack_size 10*16 + 7*8 ; must be an odd multiple of 8
+ %define arg(x) [rsp + stack_size + PS + PS*x]
+
+ %define func(x) proc_frame x
+ %macro FUNC_SAVE 0
+ alloc_stack stack_size
+ vmovdqa [rsp + 0*16], xmm6
+ vmovdqa [rsp + 1*16], xmm7
+ vmovdqa [rsp + 2*16], xmm8
+ vmovdqa [rsp + 3*16], xmm9
+ vmovdqa [rsp + 4*16], xmm10
+ vmovdqa [rsp + 5*16], xmm11
+ vmovdqa [rsp + 6*16], xmm12
+ vmovdqa [rsp + 7*16], xmm13
+ vmovdqa [rsp + 8*16], xmm14
+ vmovdqa [rsp + 9*16], xmm15
+ save_reg r12, 10*16 + 0*8
+ save_reg r13, 10*16 + 1*8
+ save_reg r14, 10*16 + 2*8
+ save_reg r15, 10*16 + 3*8
+ save_reg rdi, 10*16 + 4*8
+ save_reg rsi, 10*16 + 5*8
+ end_prolog
+ mov arg4, arg(4)
+ %endmacro
+
+ %macro FUNC_RESTORE 0
+ vmovdqa xmm6, [rsp + 0*16]
+ vmovdqa xmm7, [rsp + 1*16]
+ vmovdqa xmm8, [rsp + 2*16]
+ vmovdqa xmm9, [rsp + 3*16]
+ vmovdqa xmm10, [rsp + 4*16]
+ vmovdqa xmm11, [rsp + 5*16]
+ vmovdqa xmm12, [rsp + 6*16]
+ vmovdqa xmm13, [rsp + 7*16]
+ vmovdqa xmm14, [rsp + 8*16]
+ vmovdqa xmm15, [rsp + 9*16]
+ mov r12, [rsp + 10*16 + 0*8]
+ mov r13, [rsp + 10*16 + 1*8]
+ mov r14, [rsp + 10*16 + 2*8]
+ mov r15, [rsp + 10*16 + 3*8]
+ mov rdi, [rsp + 10*16 + 4*8]
+ mov rsi, [rsp + 10*16 + 5*8]
+ add rsp, stack_size
+ %endmacro
+%endif
+
+%define len arg0
+%define vec arg1
+%define mul_array arg2
+%define src arg3
+%define dest arg4
+%define ptr arg5
+%define vec_i tmp2
+%define dest1 tmp3
+%define dest2 tmp4
+%define vskip1 tmp5
+%define vskip3 tmp6
+%define pos return
+
+
+%ifndef EC_ALIGNED_ADDR
+;;; Use Un-aligned load/store
+ %define XLDR vmovdqu
+ %define XSTR vmovdqu
+%else
+;;; Use Non-temporal load/stor
+ %ifdef NO_NT_LDST
+ %define XLDR vmovdqa
+ %define XSTR vmovdqa
+ %else
+ %define XLDR vmovntdqa
+ %define XSTR vmovntdq
+ %endif
+%endif
+
+default rel
+
+[bits 64]
+section .text
+
+%define xmask0f ymm15
+%define xmask0fx xmm15
+%define xgft1_lo ymm14
+%define xgft1_hi ymm13
+%define xgft2_lo ymm12
+%define xgft2_hi ymm11
+%define xgft3_lo ymm10
+%define xgft3_hi ymm9
+%define xgft4_lo ymm8
+%define xgft4_hi ymm7
+
+
+%define x0 ymm0
+%define xtmpa ymm1
+%define xp1 ymm2
+%define xp2 ymm3
+%define xp3 ymm4
+%define xp4 ymm5
+%define xp5 ymm6
+
+align 16
+mk_global gf_5vect_dot_prod_avx2, function
+func(gf_5vect_dot_prod_avx2)
+ FUNC_SAVE
+ sub len, 32
+ jl .return_fail
+ xor pos, pos
+ mov tmp.b, 0x0f
+ vpinsrb xmask0fx, xmask0fx, tmp.w, 0
+ vpbroadcastb xmask0f, xmask0fx ;Construct mask 0x0f0f0f...
+ mov vskip1, vec
+ imul vskip1, 32
+ mov vskip3, vec
+ imul vskip3, 96
+ sal vec, LOG_PS ;vec *= PS. Make vec_i count by PS
+ mov dest1, [dest]
+ mov dest2, [dest+PS]
+
+
+.loop32:
+ mov tmp, mul_array
+ xor vec_i, vec_i
+ vpxor xp1, xp1
+ vpxor xp2, xp2
+ vpxor xp3, xp3
+ vpxor xp4, xp4
+ vpxor xp5, xp5
+
+
+.next_vect:
+ mov ptr, [src+vec_i]
+ XLDR x0, [ptr+pos] ;Get next source vector
+ add vec_i, PS
+
+ vpand xgft4_lo, x0, xmask0f ;Mask low src nibble in bits 4-0
+ vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0
+ vpand x0, x0, xmask0f ;Mask high src nibble in bits 4-0
+ vperm2i128 xtmpa, xgft4_lo, x0, 0x30 ;swap xtmpa from 1lo|2lo to 1lo|2hi
+ vperm2i128 x0, xgft4_lo, x0, 0x12 ;swap x0 from 1hi|2hi to 1hi|2lo
+
+ vmovdqu xgft1_lo, [tmp] ;Load array Ax{00}, Ax{01}, ..., Ax{0f}
+ ; " Ax{00}, Ax{10}, ..., Ax{f0}
+ vmovdqu xgft2_lo, [tmp+vskip1*1] ;Load array Bx{00}, Bx{01}, ..., Bx{0f}
+ ; " Bx{00}, Bx{10}, ..., Bx{f0}
+ vmovdqu xgft3_lo, [tmp+vskip1*2] ;Load array Cx{00}, Cx{01}, ..., Cx{0f}
+ ; " Cx{00}, Cx{10}, ..., Cx{f0}
+ vmovdqu xgft4_lo, [tmp+vskip3] ;Load array Dx{00}, Dx{01}, ..., Dx{0f}
+ ; " Dx{00}, Dx{10}, ..., Dx{f0}
+
+ vperm2i128 xgft1_hi, xgft1_lo, xgft1_lo, 0x01 ; swapped to hi | lo
+ vperm2i128 xgft2_hi, xgft2_lo, xgft2_lo, 0x01 ; swapped to hi | lo
+ vperm2i128 xgft3_hi, xgft3_lo, xgft3_lo, 0x01 ; swapped to hi | lo
+ vperm2i128 xgft4_hi, xgft4_lo, xgft4_lo, 0x01 ; swapped to hi | lo
+
+ vpshufb xgft1_hi, x0 ;Lookup mul table of high nibble
+ vpshufb xgft1_lo, xtmpa ;Lookup mul table of low nibble
+ vpxor xgft1_hi, xgft1_lo ;GF add high and low partials
+ vpxor xp1, xgft1_hi ;xp1 += partial
+
+ vpshufb xgft2_hi, x0 ;Lookup mul table of high nibble
+ vpshufb xgft2_lo, xtmpa ;Lookup mul table of low nibble
+ vpxor xgft2_hi, xgft2_lo ;GF add high and low partials
+ vpxor xp2, xgft2_hi ;xp2 += partial
+
+ vmovdqu xgft1_lo, [tmp+vskip1*4] ;Load array Ex{00}, Ex{01}, ..., Ex{0f}
+ ; " Ex{00}, Ex{10}, ..., Ex{f0}
+ vperm2i128 xgft1_hi, xgft1_lo, xgft1_lo, 0x01 ; swapped to hi | lo
+ add tmp, 32
+
+ vpshufb xgft3_hi, x0 ;Lookup mul table of high nibble
+ vpshufb xgft3_lo, xtmpa ;Lookup mul table of low nibble
+ vpxor xgft3_hi, xgft3_lo ;GF add high and low partials
+ vpxor xp3, xgft3_hi ;xp3 += partial
+
+ vpshufb xgft4_hi, x0 ;Lookup mul table of high nibble
+ vpshufb xgft4_lo, xtmpa ;Lookup mul table of low nibble
+ vpxor xgft4_hi, xgft4_lo ;GF add high and low partials
+ vpxor xp4, xgft4_hi ;xp4 += partial
+
+ vpshufb xgft1_hi, x0 ;Lookup mul table of high nibble
+ vpshufb xgft1_lo, xtmpa ;Lookup mul table of low nibble
+ vpxor xgft1_hi, xgft1_lo ;GF add high and low partials
+ vpxor xp5, xgft1_hi ;xp5 += partial
+
+ cmp vec_i, vec
+ jl .next_vect
+
+ mov tmp, [dest+2*PS]
+ mov ptr, [dest+3*PS]
+ mov vec_i, [dest+4*PS]
+
+ XSTR [dest1+pos], xp1
+ XSTR [dest2+pos], xp2
+ XSTR [tmp+pos], xp3
+ XSTR [ptr+pos], xp4
+ XSTR [vec_i+pos], xp5
+
+ add pos, 32 ;Loop on 32 bytes at a time
+ cmp pos, len
+ jle .loop32
+
+ lea tmp, [len + 32]
+ cmp pos, tmp
+ je .return_pass
+
+ ;; Tail len
+ mov pos, len ;Overlapped offset length-16
+ jmp .loop32 ;Do one more overlap pass
+
+.return_pass:
+ FUNC_RESTORE
+ mov return, 0
+ ret
+
+.return_fail:
+ FUNC_RESTORE
+ mov return, 1
+ ret
+
+endproc_frame
+
+section .data
+
+;;; func core, ver, snum
+slversion gf_5vect_dot_prod_avx2, 04, 04, 0199
diff --git a/src/isa-l/erasure_code/gf_5vect_dot_prod_avx512.asm b/src/isa-l/erasure_code/gf_5vect_dot_prod_avx512.asm
new file mode 100644
index 000000000..1cca65b37
--- /dev/null
+++ b/src/isa-l/erasure_code/gf_5vect_dot_prod_avx512.asm
@@ -0,0 +1,335 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright(c) 2011-2019 Intel Corporation All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in
+; the documentation and/or other materials provided with the
+; distribution.
+; * Neither the name of Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived
+; from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;;;
+;;; gf_5vect_dot_prod_avx512(len, vec, *g_tbls, **buffs, **dests);
+;;;
+
+%include "reg_sizes.asm"
+
+%ifdef HAVE_AS_KNOWS_AVX512
+
+%ifidn __OUTPUT_FORMAT__, elf64
+ %define arg0 rdi
+ %define arg1 rsi
+ %define arg2 rdx
+ %define arg3 rcx
+ %define arg4 r8
+ %define arg5 r9
+
+ %define tmp r11
+ %define tmp.w r11d
+ %define tmp.b r11b
+ %define tmp2 r10
+ %define tmp3 r13 ; must be saved and restored
+ %define tmp4 r12 ; must be saved and restored
+ %define tmp5 r14 ; must be saved and restored
+ %define tmp6 r15 ; must be saved and restored
+ %define tmp7 rbp ; must be saved and restored
+ %define tmp8 rbx ; must be saved and restored
+ %define return rax
+ %define PS 8
+ %define LOG_PS 3
+
+ %define func(x) x: endbranch
+ %macro FUNC_SAVE 0
+ push r12
+ push r13
+ push r14
+ push r15
+ push rbp
+ push rbx
+ %endmacro
+ %macro FUNC_RESTORE 0
+ pop rbx
+ pop rbp
+ pop r15
+ pop r14
+ pop r13
+ pop r12
+ %endmacro
+%endif
+
+%ifidn __OUTPUT_FORMAT__, win64
+ %define arg0 rcx
+ %define arg1 rdx
+ %define arg2 r8
+ %define arg3 r9
+
+ %define arg4 r12 ; must be saved, loaded and restored
+ %define arg5 r15 ; must be saved and restored
+ %define tmp r11
+ %define tmp.w r11d
+ %define tmp.b r11b
+ %define tmp2 r10
+ %define tmp3 r13 ; must be saved and restored
+ %define tmp4 r14 ; must be saved and restored
+ %define tmp5 rdi ; must be saved and restored
+ %define tmp6 rsi ; must be saved and restored
+ %define tmp7 rbp ; must be saved and restored
+ %define tmp8 rbx ; must be saved and restored
+ %define return rax
+ %define PS 8
+ %define LOG_PS 3
+ %define stack_size 10*16 + 9*8 ; must be an odd multiple of 8
+ %define arg(x) [rsp + stack_size + PS + PS*x]
+
+ %define func(x) proc_frame x
+ %macro FUNC_SAVE 0
+ alloc_stack stack_size
+ vmovdqa [rsp + 0*16], xmm6
+ vmovdqa [rsp + 1*16], xmm7
+ vmovdqa [rsp + 2*16], xmm8
+ vmovdqa [rsp + 3*16], xmm9
+ vmovdqa [rsp + 4*16], xmm10
+ vmovdqa [rsp + 5*16], xmm11
+ vmovdqa [rsp + 6*16], xmm12
+ vmovdqa [rsp + 7*16], xmm13
+ vmovdqa [rsp + 8*16], xmm14
+ vmovdqa [rsp + 9*16], xmm15
+ save_reg r12, 10*16 + 0*8
+ save_reg r13, 10*16 + 1*8
+ save_reg r14, 10*16 + 2*8
+ save_reg r15, 10*16 + 3*8
+ save_reg rdi, 10*16 + 4*8
+ save_reg rsi, 10*16 + 5*8
+ save_reg rbp, 10*16 + 6*8
+ save_reg rbx, 10*16 + 7*8
+ end_prolog
+ mov arg4, arg(4)
+ %endmacro
+
+ %macro FUNC_RESTORE 0
+ vmovdqa xmm6, [rsp + 0*16]
+ vmovdqa xmm7, [rsp + 1*16]
+ vmovdqa xmm8, [rsp + 2*16]
+ vmovdqa xmm9, [rsp + 3*16]
+ vmovdqa xmm10, [rsp + 4*16]
+ vmovdqa xmm11, [rsp + 5*16]
+ vmovdqa xmm12, [rsp + 6*16]
+ vmovdqa xmm13, [rsp + 7*16]
+ vmovdqa xmm14, [rsp + 8*16]
+ vmovdqa xmm15, [rsp + 9*16]
+ mov r12, [rsp + 10*16 + 0*8]
+ mov r13, [rsp + 10*16 + 1*8]
+ mov r14, [rsp + 10*16 + 2*8]
+ mov r15, [rsp + 10*16 + 3*8]
+ mov rdi, [rsp + 10*16 + 4*8]
+ mov rsi, [rsp + 10*16 + 5*8]
+ mov rbp, [rsp + 10*16 + 6*8]
+ mov rbx, [rsp + 10*16 + 7*8]
+ add rsp, stack_size
+ %endmacro
+%endif
+
+
+%define len arg0
+%define vec arg1
+%define mul_array arg2
+%define src arg3
+%define dest1 arg4
+%define ptr arg5
+%define vec_i tmp2
+%define dest2 tmp3
+%define dest3 tmp4
+%define dest4 tmp5
+%define vskip3 tmp6
+%define dest5 tmp7
+%define vskip1 tmp8
+%define pos return
+
+
+%ifndef EC_ALIGNED_ADDR
+;;; Use Un-aligned load/store
+ %define XLDR vmovdqu8
+ %define XSTR vmovdqu8
+%else
+;;; Use Non-temporal load/stor
+ %ifdef NO_NT_LDST
+ %define XLDR vmovdqa
+ %define XSTR vmovdqa
+ %else
+ %define XLDR vmovntdqa
+ %define XSTR vmovntdq
+ %endif
+%endif
+
+%define xmask0f zmm17
+%define xgft1_lo zmm16
+%define xgft1_loy ymm16
+%define xgft1_hi zmm15
+%define xgft2_lo zmm14
+%define xgft2_loy ymm14
+%define xgft2_hi zmm13
+%define xgft3_lo zmm12
+%define xgft3_loy ymm12
+%define xgft3_hi zmm11
+%define xgft4_lo zmm10
+%define xgft4_loy ymm10
+%define xgft4_hi zmm9
+%define xgft5_lo zmm8
+%define xgft5_loy ymm8
+%define xgft5_hi zmm7
+
+%define x0 zmm0
+%define xtmpa zmm1
+%define xp1 zmm2
+%define xp2 zmm3
+%define xp3 zmm4
+%define xp4 zmm5
+%define xp5 zmm6
+
+default rel
+[bits 64]
+
+section .text
+
+align 16
+mk_global gf_5vect_dot_prod_avx512, function
+func(gf_5vect_dot_prod_avx512)
+ FUNC_SAVE
+ sub len, 64
+ jl .return_fail
+
+ xor pos, pos
+ mov tmp, 0x0f
+ vpbroadcastb xmask0f, tmp ;Construct mask 0x0f0f0f...
+ mov vskip1, vec
+ imul vskip1, 32
+ mov vskip3, vec
+ imul vskip3, 96
+ sal vec, LOG_PS ;vec *= PS. Make vec_i count by PS
+ mov dest2, [dest1+PS]
+ mov dest3, [dest1+2*PS]
+ mov dest4, [dest1+3*PS]
+ mov dest5, [dest1+4*PS]
+ mov dest1, [dest1]
+
+.loop64:
+ vpxorq xp1, xp1, xp1
+ vpxorq xp2, xp2, xp2
+ vpxorq xp3, xp3, xp3
+ vpxorq xp4, xp4, xp4
+ vpxorq xp5, xp5, xp5
+ mov tmp, mul_array
+ xor vec_i, vec_i
+
+.next_vect:
+ mov ptr, [src+vec_i]
+ XLDR x0, [ptr+pos] ;Get next source vector
+ add vec_i, PS
+
+ vpandq xtmpa, x0, xmask0f ;Mask low src nibble in bits 4-0
+ vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0
+ vpandq x0, x0, xmask0f ;Mask high src nibble in bits 4-0
+
+ vmovdqu8 xgft1_loy, [tmp] ;Load array Ax{00}..{0f}, Ax{00}..{f0}
+ vmovdqu8 xgft2_loy, [tmp+vec*(32/PS)] ;Load array Bx{00}..{0f}, Bx{00}..{f0}
+ vmovdqu8 xgft3_loy, [tmp+vec*(64/PS)] ;Load array Cx{00}..{0f}, Cx{00}..{f0}
+ vmovdqu8 xgft4_loy, [tmp+vskip3] ;Load array Dx{00}..{0f}, Dx{00}..{f0}
+ vmovdqu8 xgft5_loy, [tmp+vskip1*4] ;Load array Ex{00}..{0f}, Ex{00}..{f0}
+ add tmp, 32
+
+ vshufi64x2 xgft1_hi, xgft1_lo, xgft1_lo, 0x55
+ vshufi64x2 xgft1_lo, xgft1_lo, xgft1_lo, 0x00
+ vshufi64x2 xgft2_hi, xgft2_lo, xgft2_lo, 0x55
+ vshufi64x2 xgft2_lo, xgft2_lo, xgft2_lo, 0x00
+
+ vpshufb xgft1_hi, xgft1_hi, x0 ;Lookup mul table of high nibble
+ vpshufb xgft1_lo, xgft1_lo, xtmpa ;Lookup mul table of low nibble
+ vpxorq xgft1_hi, xgft1_hi, xgft1_lo ;GF add high and low partials
+ vpxorq xp1, xp1, xgft1_hi ;xp1 += partial
+
+ vpshufb xgft2_hi, xgft2_hi, x0 ;Lookup mul table of high nibble
+ vpshufb xgft2_lo, xgft2_lo, xtmpa ;Lookup mul table of low nibble
+ vpxorq xgft2_hi, xgft2_hi, xgft2_lo ;GF add high and low partials
+ vpxorq xp2, xp2, xgft2_hi ;xp2 += partial
+
+ vshufi64x2 xgft3_hi, xgft3_lo, xgft3_lo, 0x55
+ vshufi64x2 xgft3_lo, xgft3_lo, xgft3_lo, 0x00
+ vshufi64x2 xgft4_hi, xgft4_lo, xgft4_lo, 0x55
+ vshufi64x2 xgft4_lo, xgft4_lo, xgft4_lo, 0x00
+
+ vpshufb xgft3_hi, xgft3_hi, x0 ;Lookup mul table of high nibble
+ vpshufb xgft3_lo, xgft3_lo, xtmpa ;Lookup mul table of low nibble
+ vpxorq xgft3_hi, xgft3_hi, xgft3_lo ;GF add high and low partials
+ vpxorq xp3, xp3, xgft3_hi ;xp3 += partial
+
+ vpshufb xgft4_hi, xgft4_hi, x0 ;Lookup mul table of high nibble
+ vpshufb xgft4_lo, xgft4_lo, xtmpa ;Lookup mul table of low nibble
+ vpxorq xgft4_hi, xgft4_hi, xgft4_lo ;GF add high and low partials
+ vpxorq xp4, xp4, xgft4_hi ;xp4 += partial
+
+ vshufi64x2 xgft5_hi, xgft5_lo, xgft5_lo, 0x55
+ vshufi64x2 xgft5_lo, xgft5_lo, xgft5_lo, 0x00
+
+ vpshufb xgft5_hi, xgft5_hi, x0 ;Lookup mul table of high nibble
+ vpshufb xgft5_lo, xgft5_lo, xtmpa ;Lookup mul table of low nibble
+ vpxorq xgft5_hi, xgft5_hi, xgft5_lo ;GF add high and low partials
+ vpxorq xp5, xp5, xgft5_hi ;xp5 += partial
+
+ cmp vec_i, vec
+ jl .next_vect
+
+ XSTR [dest1+pos], xp1
+ XSTR [dest2+pos], xp2
+ XSTR [dest3+pos], xp3
+ XSTR [dest4+pos], xp4
+ XSTR [dest5+pos], xp5
+
+ add pos, 64 ;Loop on 64 bytes at a time
+ cmp pos, len
+ jle .loop64
+
+ lea tmp, [len + 64]
+ cmp pos, tmp
+ je .return_pass
+
+ ;; Tail len
+ mov pos, len ;Overlapped offset length-64
+ jmp .loop64 ;Do one more overlap pass
+
+.return_pass:
+ mov return, 0
+ FUNC_RESTORE
+ ret
+
+.return_fail:
+ mov return, 1
+ FUNC_RESTORE
+ ret
+
+endproc_frame
+
+%else
+%ifidn __OUTPUT_FORMAT__, win64
+global no_gf_5vect_dot_prod_avx512
+no_gf_5vect_dot_prod_avx512:
+%endif
+%endif ; ifdef HAVE_AS_KNOWS_AVX512
diff --git a/src/isa-l/erasure_code/gf_5vect_dot_prod_sse.asm b/src/isa-l/erasure_code/gf_5vect_dot_prod_sse.asm
new file mode 100644
index 000000000..c96bed514
--- /dev/null
+++ b/src/isa-l/erasure_code/gf_5vect_dot_prod_sse.asm
@@ -0,0 +1,304 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright(c) 2011-2015 Intel Corporation All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in
+; the documentation and/or other materials provided with the
+; distribution.
+; * Neither the name of Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived
+; from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;;;
+;;; gf_5vect_dot_prod_sse(len, vec, *g_tbls, **buffs, **dests);
+;;;
+
+%include "reg_sizes.asm"
+
+%ifidn __OUTPUT_FORMAT__, elf64
+ %define arg0 rdi
+ %define arg1 rsi
+ %define arg2 rdx
+ %define arg3 rcx
+ %define arg4 r8
+ %define arg5 r9
+
+ %define tmp r11
+ %define tmp2 r10
+ %define tmp3 r13 ; must be saved and restored
+ %define tmp4 r12 ; must be saved and restored
+ %define tmp5 r14 ; must be saved and restored
+ %define tmp6 r15 ; must be saved and restored
+ %define return rax
+ %define PS 8
+ %define LOG_PS 3
+
+ %define func(x) x: endbranch
+ %macro FUNC_SAVE 0
+ push r12
+ push r13
+ push r14
+ push r15
+ %endmacro
+ %macro FUNC_RESTORE 0
+ pop r15
+ pop r14
+ pop r13
+ pop r12
+ %endmacro
+%endif
+
+%ifidn __OUTPUT_FORMAT__, win64
+ %define arg0 rcx
+ %define arg1 rdx
+ %define arg2 r8
+ %define arg3 r9
+
+ %define arg4 r12 ; must be saved, loaded and restored
+ %define arg5 r15 ; must be saved and restored
+ %define tmp r11
+ %define tmp2 r10
+ %define tmp3 r13 ; must be saved and restored
+ %define tmp4 r14 ; must be saved and restored
+ %define tmp5 rdi ; must be saved and restored
+ %define tmp6 rsi ; must be saved and restored
+ %define return rax
+ %define PS 8
+ %define LOG_PS 3
+ %define stack_size 10*16 + 7*8 ; must be an odd multiple of 8
+ %define arg(x) [rsp + stack_size + PS + PS*x]
+
+ %define func(x) proc_frame x
+ %macro FUNC_SAVE 0
+ alloc_stack stack_size
+ save_xmm128 xmm6, 0*16
+ save_xmm128 xmm7, 1*16
+ save_xmm128 xmm8, 2*16
+ save_xmm128 xmm9, 3*16
+ save_xmm128 xmm10, 4*16
+ save_xmm128 xmm11, 5*16
+ save_xmm128 xmm12, 6*16
+ save_xmm128 xmm13, 7*16
+ save_xmm128 xmm14, 8*16
+ save_xmm128 xmm15, 9*16
+ save_reg r12, 10*16 + 0*8
+ save_reg r13, 10*16 + 1*8
+ save_reg r14, 10*16 + 2*8
+ save_reg r15, 10*16 + 3*8
+ save_reg rdi, 10*16 + 4*8
+ save_reg rsi, 10*16 + 5*8
+ end_prolog
+ mov arg4, arg(4)
+ %endmacro
+
+ %macro FUNC_RESTORE 0
+ movdqa xmm6, [rsp + 0*16]
+ movdqa xmm7, [rsp + 1*16]
+ movdqa xmm8, [rsp + 2*16]
+ movdqa xmm9, [rsp + 3*16]
+ movdqa xmm10, [rsp + 4*16]
+ movdqa xmm11, [rsp + 5*16]
+ movdqa xmm12, [rsp + 6*16]
+ movdqa xmm13, [rsp + 7*16]
+ movdqa xmm14, [rsp + 8*16]
+ movdqa xmm15, [rsp + 9*16]
+ mov r12, [rsp + 10*16 + 0*8]
+ mov r13, [rsp + 10*16 + 1*8]
+ mov r14, [rsp + 10*16 + 2*8]
+ mov r15, [rsp + 10*16 + 3*8]
+ mov rdi, [rsp + 10*16 + 4*8]
+ mov rsi, [rsp + 10*16 + 5*8]
+ add rsp, stack_size
+ %endmacro
+%endif
+
+%define len arg0
+%define vec arg1
+%define mul_array arg2
+%define src arg3
+%define dest arg4
+%define ptr arg5
+%define vec_i tmp2
+%define dest1 tmp3
+%define dest2 tmp4
+%define vskip1 tmp5
+%define vskip3 tmp6
+%define pos return
+
+
+%ifndef EC_ALIGNED_ADDR
+;;; Use Un-aligned load/store
+ %define XLDR movdqu
+ %define XSTR movdqu
+%else
+;;; Use Non-temporal load/stor
+ %ifdef NO_NT_LDST
+ %define XLDR movdqa
+ %define XSTR movdqa
+ %else
+ %define XLDR movntdqa
+ %define XSTR movntdq
+ %endif
+%endif
+
+default rel
+
+[bits 64]
+section .text
+
+%define xmask0f xmm15
+%define xgft1_lo xmm2
+%define xgft1_hi xmm3
+%define xgft2_lo xmm4
+%define xgft2_hi xmm5
+%define xgft3_lo xmm10
+%define xgft3_hi xmm6
+%define xgft4_lo xmm8
+%define xgft4_hi xmm7
+
+
+%define x0 xmm0
+%define xtmpa xmm1
+%define xp1 xmm9
+%define xp2 xmm11
+%define xp3 xmm12
+%define xp4 xmm13
+%define xp5 xmm14
+
+align 16
+mk_global gf_5vect_dot_prod_sse, function
+func(gf_5vect_dot_prod_sse)
+ FUNC_SAVE
+ sub len, 16
+ jl .return_fail
+ xor pos, pos
+ movdqa xmask0f, [mask0f] ;Load mask of lower nibble in each byte
+ mov vskip1, vec
+ imul vskip1, 32
+ mov vskip3, vec
+ imul vskip3, 96
+ sal vec, LOG_PS ;vec *= PS. Make vec_i count by PS
+ mov dest1, [dest]
+ mov dest2, [dest+PS]
+
+
+.loop16:
+ mov tmp, mul_array
+ xor vec_i, vec_i
+ pxor xp1, xp1
+ pxor xp2, xp2
+ pxor xp3, xp3
+ pxor xp4, xp4
+ pxor xp5, xp5
+
+
+.next_vect:
+ mov ptr, [src+vec_i]
+ add vec_i, PS
+ XLDR x0, [ptr+pos] ;Get next source vector
+
+ movdqu xgft1_lo, [tmp] ;Load array Ax{00}, Ax{01}, ..., Ax{0f}
+ movdqu xgft1_hi, [tmp+16] ; " Ax{00}, Ax{10}, ..., Ax{f0}
+ movdqu xgft2_lo, [tmp+vskip1*1] ;Load array Bx{00}, Bx{01}, ..., Bx{0f}
+ movdqu xgft2_hi, [tmp+vskip1*1+16] ; " Bx{00}, Bx{10}, ..., Bx{f0}
+ movdqu xgft3_lo, [tmp+vskip1*2] ;Load array Cx{00}, Cx{01}, ..., Cx{0f}
+ movdqu xgft3_hi, [tmp+vskip1*2+16] ; " Cx{00}, Cx{10}, ..., Cx{f0}
+ movdqu xgft4_lo, [tmp+vskip3] ;Load array Dx{00}, Dx{01}, ..., Dx{0f}
+ movdqu xgft4_hi, [tmp+vskip3+16] ; " Dx{00}, Dx{10}, ..., Dx{f0}
+
+ movdqa xtmpa, x0 ;Keep unshifted copy of src
+ psraw x0, 4 ;Shift to put high nibble into bits 4-0
+ pand x0, xmask0f ;Mask high src nibble in bits 4-0
+ pand xtmpa, xmask0f ;Mask low src nibble in bits 4-0
+
+ pshufb xgft1_hi, x0 ;Lookup mul table of high nibble
+ pshufb xgft1_lo, xtmpa ;Lookup mul table of low nibble
+ pxor xgft1_hi, xgft1_lo ;GF add high and low partials
+ pxor xp1, xgft1_hi ;xp1 += partial
+
+ pshufb xgft2_hi, x0 ;Lookup mul table of high nibble
+ pshufb xgft2_lo, xtmpa ;Lookup mul table of low nibble
+ pxor xgft2_hi, xgft2_lo ;GF add high and low partials
+ pxor xp2, xgft2_hi ;xp2 += partial
+
+ movdqu xgft1_lo, [tmp+vskip1*4] ;Load array Ex{00}, Ex{01}, ..., Ex{0f}
+ movdqu xgft1_hi, [tmp+vskip1*4+16] ; " Ex{00}, Ex{10}, ..., Ex{f0}
+ add tmp, 32
+
+ pshufb xgft3_hi, x0 ;Lookup mul table of high nibble
+ pshufb xgft3_lo, xtmpa ;Lookup mul table of low nibble
+ pxor xgft3_hi, xgft3_lo ;GF add high and low partials
+ pxor xp3, xgft3_hi ;xp3 += partial
+
+ pshufb xgft4_hi, x0 ;Lookup mul table of high nibble
+ pshufb xgft4_lo, xtmpa ;Lookup mul table of low nibble
+ pxor xgft4_hi, xgft4_lo ;GF add high and low partials
+ pxor xp4, xgft4_hi ;xp4 += partial
+
+ pshufb xgft1_hi, x0 ;Lookup mul table of high nibble
+ pshufb xgft1_lo, xtmpa ;Lookup mul table of low nibble
+ pxor xgft1_hi, xgft1_lo ;GF add high and low partials
+ pxor xp5, xgft1_hi ;xp5 += partial
+
+ cmp vec_i, vec
+ jl .next_vect
+
+ mov tmp, [dest+2*PS]
+ mov ptr, [dest+3*PS]
+ mov vec_i, [dest+4*PS]
+
+ XSTR [dest1+pos], xp1
+ XSTR [dest2+pos], xp2
+ XSTR [tmp+pos], xp3
+ XSTR [ptr+pos], xp4
+ XSTR [vec_i+pos], xp5
+
+ add pos, 16 ;Loop on 16 bytes at a time
+ cmp pos, len
+ jle .loop16
+
+ lea tmp, [len + 16]
+ cmp pos, tmp
+ je .return_pass
+
+ ;; Tail len
+ mov pos, len ;Overlapped offset length-16
+ jmp .loop16 ;Do one more overlap pass
+
+.return_pass:
+ FUNC_RESTORE
+ mov return, 0
+ ret
+
+.return_fail:
+ FUNC_RESTORE
+ mov return, 1
+ ret
+
+endproc_frame
+
+section .data
+
+align 16
+mask0f: dq 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f
+
+;;; func core, ver, snum
+slversion gf_5vect_dot_prod_sse, 00, 05, 0065
diff --git a/src/isa-l/erasure_code/gf_5vect_mad_avx.asm b/src/isa-l/erasure_code/gf_5vect_mad_avx.asm
new file mode 100644
index 000000000..e9e246c74
--- /dev/null
+++ b/src/isa-l/erasure_code/gf_5vect_mad_avx.asm
@@ -0,0 +1,365 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright(c) 2011-2015 Intel Corporation All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in
+; the documentation and/or other materials provided with the
+; distribution.
+; * Neither the name of Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived
+; from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;;;
+;;; gf_5vect_mad_avx(len, vec, vec_i, mul_array, src, dest);
+;;;
+
+%include "reg_sizes.asm"
+
+%define PS 8
+
+%ifidn __OUTPUT_FORMAT__, win64
+ %define arg0 rcx
+ %define arg0.w ecx
+ %define arg1 rdx
+ %define arg2 r8
+ %define arg3 r9
+ %define arg4 r12
+ %define arg5 r15
+ %define tmp r11
+ %define tmp2 r10
+ %define tmp3 r13
+ %define tmp4 r14
+ %define return rax
+ %define return.w eax
+ %define stack_size 16*10 + 5*8
+ %define arg(x) [rsp + stack_size + PS + PS*x]
+ %define func(x) proc_frame x
+
+%macro FUNC_SAVE 0
+ sub rsp, stack_size
+ movdqa [rsp+16*0],xmm6
+ movdqa [rsp+16*1],xmm7
+ movdqa [rsp+16*2],xmm8
+ movdqa [rsp+16*3],xmm9
+ movdqa [rsp+16*4],xmm10
+ movdqa [rsp+16*5],xmm11
+ movdqa [rsp+16*6],xmm12
+ movdqa [rsp+16*7],xmm13
+ movdqa [rsp+16*8],xmm14
+ movdqa [rsp+16*9],xmm15
+ save_reg r12, 10*16 + 0*8
+ save_reg r13, 10*16 + 1*8
+ save_reg r14, 10*16 + 2*8
+ save_reg r15, 10*16 + 3*8
+ end_prolog
+ mov arg4, arg(4)
+ mov arg5, arg(5)
+%endmacro
+
+%macro FUNC_RESTORE 0
+ movdqa xmm6, [rsp+16*0]
+ movdqa xmm7, [rsp+16*1]
+ movdqa xmm8, [rsp+16*2]
+ movdqa xmm9, [rsp+16*3]
+ movdqa xmm10, [rsp+16*4]
+ movdqa xmm11, [rsp+16*5]
+ movdqa xmm12, [rsp+16*6]
+ movdqa xmm13, [rsp+16*7]
+ movdqa xmm14, [rsp+16*8]
+ movdqa xmm15, [rsp+16*9]
+ mov r12, [rsp + 10*16 + 0*8]
+ mov r13, [rsp + 10*16 + 1*8]
+ mov r14, [rsp + 10*16 + 2*8]
+ mov r15, [rsp + 10*16 + 3*8]
+ add rsp, stack_size
+%endmacro
+
+%elifidn __OUTPUT_FORMAT__, elf64
+ %define arg0 rdi
+ %define arg0.w edi
+ %define arg1 rsi
+ %define arg2 rdx
+ %define arg3 rcx
+ %define arg4 r8
+ %define arg5 r9
+ %define tmp r11
+ %define tmp2 r10
+ %define tmp3 r12
+ %define tmp4 r13
+ %define return rax
+ %define return.w eax
+
+ %define func(x) x: endbranch
+ %macro FUNC_SAVE 0
+ push r12
+ push r13
+ %endmacro
+ %macro FUNC_RESTORE 0
+ pop r13
+ pop r12
+ %endmacro
+%endif
+
+;;; gf_5vect_mad_avx(len, vec, vec_i, mul_array, src, dest)
+%define len arg0
+%define len.w arg0.w
+%define vec arg1
+%define vec_i arg2
+%define mul_array arg3
+%define src arg4
+%define dest1 arg5
+%define pos return
+%define pos.w return.w
+
+%define dest2 tmp4
+%define dest3 mul_array
+%define dest4 tmp2
+%define dest5 vec_i
+
+
+%ifndef EC_ALIGNED_ADDR
+;;; Use Un-aligned load/store
+ %define XLDR vmovdqu
+ %define XSTR vmovdqu
+%else
+;;; Use Non-temporal load/stor
+ %ifdef NO_NT_LDST
+ %define XLDR vmovdqa
+ %define XSTR vmovdqa
+ %else
+ %define XLDR vmovntdqa
+ %define XSTR vmovntdq
+ %endif
+%endif
+
+default rel
+
+[bits 64]
+section .text
+
+%define xmask0f xmm15
+%define xgft5_hi xmm14
+%define xgft4_lo xmm13
+%define xgft4_hi xmm12
+
+%define x0 xmm0
+%define xtmpa xmm1
+%define xtmph1 xmm2
+%define xtmpl1 xmm3
+%define xtmph2 xmm4
+%define xtmpl2 xmm5
+%define xtmph3 xmm6
+%define xtmpl3 xmm7
+%define xtmph5 xmm8
+%define xtmpl5 xmm9
+%define xd1 xmm10
+%define xd2 xmm11
+%define xd3 xtmpl1
+%define xd4 xtmph1
+%define xd5 xtmpl2
+
+
+align 16
+mk_global gf_5vect_mad_avx, function
+func(gf_5vect_mad_avx)
+ FUNC_SAVE
+ sub len, 16
+ jl .return_fail
+ xor pos, pos
+ vmovdqa xmask0f, [mask0f] ;Load mask of lower nibble in each byte
+ mov tmp, vec
+ sal vec_i, 5 ;Multiply by 32
+ lea tmp3, [mul_array + vec_i]
+ sal tmp, 6 ;Multiply by 64
+ vmovdqu xgft5_hi, [tmp3+2*tmp+16] ; " Ex{00}, Ex{10}, ..., Ex{f0}
+ sal vec, 5 ;Multiply by 32
+ add tmp, vec
+ vmovdqu xgft4_hi, [tmp3+tmp+16] ; " Dx{00}, Dx{10}, Dx{20}, ... , Dx{f0}
+ vmovdqu xgft4_lo, [tmp3+tmp] ;Load array Dx{00}, Dx{01}, Dx{02}, ...
+
+ mov dest3, [dest1+2*PS] ; reuse mul_array
+ mov dest4, [dest1+3*PS]
+ mov dest5, [dest1+4*PS] ; reuse vec_i
+ mov dest2, [dest1+PS]
+ mov dest1, [dest1]
+
+.loop16:
+ XLDR x0, [src+pos] ;Get next source vector
+
+ vmovdqu xtmph1, [tmp3+16] ; " Ax{00}, Ax{10}, Ax{20}, ... , Ax{f0}
+ vmovdqu xtmpl1, [tmp3] ;Load array Ax{00}, Ax{01}, Ax{02}, ...
+ vmovdqu xtmph2, [tmp3+vec+16] ; " Bx{00}, Bx{10}, Bx{20}, ... , Bx{f0}
+ vmovdqu xtmpl2, [tmp3+vec] ;Load array Bx{00}, Bx{01}, Bx{02}, ...
+ vmovdqu xtmph3, [tmp3+2*vec+16] ; " Cx{00}, Cx{10}, Cx{20}, ... , Cx{f0}
+ vmovdqu xtmpl3, [tmp3+2*vec] ;Load array Cx{00}, Cx{01}, Cx{02}, ...
+ vmovdqu xtmpl5, [tmp3+4*vec] ;Load array Ex{00}, Ex{01}, ..., Ex{0f}
+
+ XLDR xd1, [dest1+pos] ;Get next dest vector
+ XLDR xd2, [dest2+pos] ;Get next dest vector
+
+ vpand xtmpa, x0, xmask0f ;Mask low src nibble in bits 4-0
+ vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0
+ vpand x0, x0, xmask0f ;Mask high src nibble in bits 4-0
+
+ ; dest1
+ vpshufb xtmph1, xtmph1, x0 ;Lookup mul table of high nibble
+ vpshufb xtmpl1, xtmpl1, xtmpa ;Lookup mul table of low nibble
+ vpxor xtmph1, xtmph1, xtmpl1 ;GF add high and low partials
+ vpxor xd1, xd1, xtmph1
+
+ XLDR xd3, [dest3+pos] ;Reuse xtmpl1, Get next dest vector
+ XLDR xd4, [dest4+pos] ;Reuse xtmph1, Get next dest vector
+
+ ; dest2
+ vpshufb xtmph2, xtmph2, x0 ;Lookup mul table of high nibble
+ vpshufb xtmpl2, xtmpl2, xtmpa ;Lookup mul table of low nibble
+ vpxor xtmph2, xtmph2, xtmpl2 ;GF add high and low partials
+ vpxor xd2, xd2, xtmph2
+
+ XLDR xd5, [dest5+pos] ;Reuse xtmpl2. Get next dest vector
+
+ ; dest3
+ vpshufb xtmph3, xtmph3, x0 ;Lookup mul table of high nibble
+ vpshufb xtmpl3, xtmpl3, xtmpa ;Lookup mul table of low nibble
+ vpxor xtmph3, xtmph3, xtmpl3 ;GF add high and low partials
+ vpxor xd3, xd3, xtmph3
+
+ ; dest4
+ vpshufb xtmph2, xgft4_hi, x0 ;Lookup mul table of high nibble
+ vpshufb xtmpl3, xgft4_lo, xtmpa ;Lookup mul table of low nibble
+ vpxor xtmph2, xtmph2, xtmpl3 ;GF add high and low partials
+ vpxor xd4, xd4, xtmph2
+
+ ; dest5
+ vpshufb xtmph5, xgft5_hi, x0 ;Lookup mul table of high nibble
+ vpshufb xtmpl5, xtmpl5, xtmpa ;Lookup mul table of low nibble
+ vpxor xtmph5, xtmph5, xtmpl5 ;GF add high and low partials
+ vpxor xd5, xd5, xtmph5
+
+ XSTR [dest1+pos], xd1 ;Store result into dest1
+ XSTR [dest2+pos], xd2 ;Store result into dest2
+ XSTR [dest3+pos], xd3 ;Store result into dest3
+ XSTR [dest4+pos], xd4 ;Store result into dest4
+ XSTR [dest5+pos], xd5 ;Store result into dest5
+
+ add pos, 16 ;Loop on 16 bytes at a time
+ cmp pos, len
+ jle .loop16
+
+ lea tmp, [len + 16]
+ cmp pos, tmp
+ je .return_pass
+
+.lessthan16:
+ ;; Tail len
+ ;; Do one more overlap pass
+ mov tmp, len ;Overlapped offset length-16
+ XLDR x0, [src+tmp] ;Get next source vector
+
+ sub len, pos
+
+ vmovdqa xtmph1, [constip16] ;Load const of i + 16
+ vpinsrb xtmph5, len.w, 15
+ vpshufb xtmph5, xmask0f ;Broadcast len to all bytes
+ vpcmpgtb xtmph5, xtmph5, xtmph1
+
+ vmovdqu xtmph1, [tmp3+16] ; " Ax{00}, Ax{10}, Ax{20}, ... , Ax{f0}
+ vmovdqu xtmpl1, [tmp3] ;Load array Ax{00}, Ax{01}, Ax{02}, ...
+ vmovdqu xtmph2, [tmp3+vec+16] ; " Bx{00}, Bx{10}, Bx{20}, ... , Bx{f0}
+ vmovdqu xtmpl2, [tmp3+vec] ;Load array Bx{00}, Bx{01}, Bx{02}, ...
+ vmovdqu xtmph3, [tmp3+2*vec+16] ; " Cx{00}, Cx{10}, Cx{20}, ... , Cx{f0}
+ vmovdqu xtmpl3, [tmp3+2*vec] ;Load array Cx{00}, Cx{01}, Cx{02}, ...
+ vmovdqu xtmpl5, [tmp3+4*vec] ;Load array Ex{00}, Ex{01}, ..., Ex{0f}
+
+ XLDR xd1, [dest1+tmp] ;Get next dest vector
+ XLDR xd2, [dest2+tmp] ;Get next dest vector
+
+ vpand xtmpa, x0, xmask0f ;Mask low src nibble in bits 4-0
+ vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0
+ vpand x0, x0, xmask0f ;Mask high src nibble in bits 4-0
+
+ ; dest1
+ vpshufb xtmph1, xtmph1, x0 ;Lookup mul table of high nibble
+ vpshufb xtmpl1, xtmpl1, xtmpa ;Lookup mul table of low nibble
+ vpxor xtmph1, xtmph1, xtmpl1 ;GF add high and low partials
+ vpand xtmph1, xtmph1, xtmph5
+ vpxor xd1, xd1, xtmph1
+
+ XLDR xd3, [dest3+tmp] ;Reuse xtmpl1, Get next dest vector
+ XLDR xd4, [dest4+tmp] ;Reuse xtmph1, Get next dest vector
+
+ ; dest2
+ vpshufb xtmph2, xtmph2, x0 ;Lookup mul table of high nibble
+ vpshufb xtmpl2, xtmpl2, xtmpa ;Lookup mul table of low nibble
+ vpxor xtmph2, xtmph2, xtmpl2 ;GF add high and low partials
+ vpand xtmph2, xtmph2, xtmph5
+ vpxor xd2, xd2, xtmph2
+
+ XLDR xd5, [dest5+tmp] ;Reuse xtmpl2. Get next dest vector
+
+ ; dest3
+ vpshufb xtmph3, xtmph3, x0 ;Lookup mul table of high nibble
+ vpshufb xtmpl3, xtmpl3, xtmpa ;Lookup mul table of low nibble
+ vpxor xtmph3, xtmph3, xtmpl3 ;GF add high and low partials
+ vpand xtmph3, xtmph3, xtmph5
+ vpxor xd3, xd3, xtmph3
+
+ ; dest4
+ vpshufb xgft4_hi, xgft4_hi, x0 ;Lookup mul table of high nibble
+ vpshufb xgft4_lo, xgft4_lo, xtmpa ;Lookup mul table of low nibble
+ vpxor xgft4_hi, xgft4_hi, xgft4_lo ;GF add high and low partials
+ vpand xgft4_hi, xgft4_hi, xtmph5
+ vpxor xd4, xd4, xgft4_hi
+
+ ; dest5
+ vpshufb xgft5_hi, xgft5_hi, x0 ;Lookup mul table of high nibble
+ vpshufb xtmpl5, xtmpl5, xtmpa ;Lookup mul table of low nibble
+ vpxor xgft5_hi, xgft5_hi, xtmpl5 ;GF add high and low partials
+ vpand xgft5_hi, xgft5_hi, xtmph5
+ vpxor xd5, xd5, xgft5_hi
+
+ XSTR [dest1+tmp], xd1 ;Store result into dest1
+ XSTR [dest2+tmp], xd2 ;Store result into dest2
+ XSTR [dest3+tmp], xd3 ;Store result into dest3
+ XSTR [dest4+tmp], xd4 ;Store result into dest4
+ XSTR [dest5+tmp], xd5 ;Store result into dest5
+
+.return_pass:
+ FUNC_RESTORE
+ mov return, 0
+ ret
+
+.return_fail:
+ FUNC_RESTORE
+ mov return, 1
+ ret
+
+endproc_frame
+
+section .data
+
+align 16
+mask0f: dq 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f
+constip16:
+ dq 0xf8f9fafbfcfdfeff, 0xf0f1f2f3f4f5f6f7
+
+;;; func core, ver, snum
+slversion gf_5vect_mad_avx, 02, 01, 020d
diff --git a/src/isa-l/erasure_code/gf_5vect_mad_avx2.asm b/src/isa-l/erasure_code/gf_5vect_mad_avx2.asm
new file mode 100644
index 000000000..87038a795
--- /dev/null
+++ b/src/isa-l/erasure_code/gf_5vect_mad_avx2.asm
@@ -0,0 +1,363 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright(c) 2011-2015 Intel Corporation All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in
+; the documentation and/or other materials provided with the
+; distribution.
+; * Neither the name of Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived
+; from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;;;
+;;; gf_5vect_mad_avx2(len, vec, vec_i, mul_array, src, dest);
+;;;
+
+%include "reg_sizes.asm"
+
+%define PS 8
+
+%ifidn __OUTPUT_FORMAT__, win64
+ %define arg0 rcx
+ %define arg0.w ecx
+ %define arg1 rdx
+ %define arg2 r8
+ %define arg3 r9
+ %define arg4 r12
+ %define arg5 r15
+ %define tmp r11
+ %define tmp.w r11d
+ %define tmp.b r11b
+ %define tmp2 r10
+ %define return rax
+ %define return.w eax
+ %define stack_size 16*10 + 3*8
+ %define arg(x) [rsp + stack_size + PS + PS*x]
+ %define func(x) proc_frame x
+
+%macro FUNC_SAVE 0
+ sub rsp, stack_size
+ movdqa [rsp+16*0],xmm6
+ movdqa [rsp+16*1],xmm7
+ movdqa [rsp+16*2],xmm8
+ movdqa [rsp+16*3],xmm9
+ movdqa [rsp+16*4],xmm10
+ movdqa [rsp+16*5],xmm11
+ movdqa [rsp+16*6],xmm12
+ movdqa [rsp+16*7],xmm13
+ movdqa [rsp+16*8],xmm14
+ movdqa [rsp+16*9],xmm15
+ save_reg r12, 10*16 + 0*8
+ save_reg r15, 10*16 + 1*8
+ end_prolog
+ mov arg4, arg(4)
+ mov arg5, arg(5)
+%endmacro
+
+%macro FUNC_RESTORE 0
+ movdqa xmm6, [rsp+16*0]
+ movdqa xmm7, [rsp+16*1]
+ movdqa xmm8, [rsp+16*2]
+ movdqa xmm9, [rsp+16*3]
+ movdqa xmm10, [rsp+16*4]
+ movdqa xmm11, [rsp+16*5]
+ movdqa xmm12, [rsp+16*6]
+ movdqa xmm13, [rsp+16*7]
+ movdqa xmm14, [rsp+16*8]
+ movdqa xmm15, [rsp+16*9]
+ mov r12, [rsp + 10*16 + 0*8]
+ mov r15, [rsp + 10*16 + 1*8]
+ add rsp, stack_size
+%endmacro
+
+%elifidn __OUTPUT_FORMAT__, elf64
+ %define arg0 rdi
+ %define arg0.w edi
+ %define arg1 rsi
+ %define arg2 rdx
+ %define arg3 rcx
+ %define arg4 r8
+ %define arg5 r9
+ %define tmp r11
+ %define tmp.w r11d
+ %define tmp.b r11b
+ %define tmp2 r10
+ %define return rax
+ %define return.w eax
+
+ %define func(x) x: endbranch
+ %define FUNC_SAVE
+ %define FUNC_RESTORE
+%endif
+
+;;; gf_5vect_mad_avx2(len, vec, vec_i, mul_array, src, dest)
+%define len arg0
+%define len.w arg0.w
+%define vec arg1
+%define vec_i arg2
+%define mul_array arg3
+%define src arg4
+%define dest1 arg5
+%define pos return
+%define pos.w return.w
+
+%define dest2 tmp2
+%define dest3 mul_array
+%define dest4 vec
+%define dest5 vec_i
+
+%ifndef EC_ALIGNED_ADDR
+;;; Use Un-aligned load/store
+ %define XLDR vmovdqu
+ %define XSTR vmovdqu
+%else
+;;; Use Non-temporal load/stor
+ %ifdef NO_NT_LDST
+ %define XLDR vmovdqa
+ %define XSTR vmovdqa
+ %else
+ %define XLDR vmovntdqa
+ %define XSTR vmovntdq
+ %endif
+%endif
+
+default rel
+
+[bits 64]
+section .text
+
+%define xmask0f ymm15
+%define xmask0fx xmm15
+%define xgft1_lo ymm14
+%define xgft2_lo ymm13
+%define xgft3_lo ymm12
+%define xgft4_lo ymm11
+%define xgft5_lo ymm10
+
+%define x0 ymm0
+%define xtmpa ymm1
+%define xtmpl ymm2
+%define xtmplx xmm2
+%define xtmph1 ymm3
+%define xtmph1x xmm3
+%define xtmph2 ymm4
+%define xd1 ymm5
+%define xd2 ymm6
+%define xd3 ymm7
+%define xd4 ymm8
+%define xd5 ymm9
+
+align 16
+mk_global gf_5vect_mad_avx2, function
+func(gf_5vect_mad_avx2)
+ FUNC_SAVE
+ sub len, 32
+ jl .return_fail
+ xor pos, pos
+ mov tmp.b, 0x0f
+ vpinsrb xmask0fx, xmask0fx, tmp.w, 0
+ vpbroadcastb xmask0f, xmask0fx ;Construct mask 0x0f0f0f...
+
+ sal vec_i, 5 ;Multiply by 32
+ sal vec, 5 ;Multiply by 32
+ lea tmp, [mul_array + vec_i]
+
+ vmovdqu xgft1_lo, [tmp] ;Load array Ax{00}, Ax{01}, ..., Ax{0f}
+ ; " Ax{00}, Ax{10}, ..., Ax{f0}
+ vmovdqu xgft2_lo, [tmp+vec] ;Load array Bx{00}, Bx{01}, ..., Bx{0f}
+ ; " Bx{00}, Bx{10}, ..., Bx{f0}
+ vmovdqu xgft3_lo, [tmp+2*vec] ;Load array Cx{00}, Cx{01}, ..., Cx{0f}
+ ; " Cx{00}, Cx{10}, ..., Cx{f0}
+ vmovdqu xgft5_lo, [tmp+4*vec] ;Load array Ex{00}, Ex{01}, ..., Ex{0f}
+ ; " Ex{00}, Ex{10}, ..., Ex{f0}
+ add tmp, vec
+ vmovdqu xgft4_lo, [tmp+2*vec] ;Load array Dx{00}, Dx{01}, ..., Dx{0f}
+ ; " Dx{00}, Dx{10}, ..., Dx{f0}
+
+ mov dest3, [dest1+2*PS] ; reuse mul_array
+ mov dest4, [dest1+3*PS] ; reuse vec
+ mov dest5, [dest1+4*PS] ; reuse vec_i
+ mov dest2, [dest1+PS]
+ mov dest1, [dest1]
+
+.loop32:
+ XLDR x0, [src+pos] ;Get next source vector
+
+ XLDR xd1, [dest1+pos] ;Get next dest vector
+ XLDR xd2, [dest2+pos] ;Get next dest vector
+ XLDR xd3, [dest3+pos] ;Get next dest vector
+ XLDR xd4, [dest4+pos] ;Get next dest vector
+ XLDR xd5, [dest5+pos] ;Get next dest vector
+
+ vpand xtmpl, x0, xmask0f ;Mask low src nibble in bits 4-0
+ vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0
+ vpand x0, x0, xmask0f ;Mask high src nibble in bits 4-0
+ vperm2i128 xtmpa, xtmpl, x0, 0x30 ;swap xtmpa from 1lo|2lo to 1lo|2hi
+ vperm2i128 x0, xtmpl, x0, 0x12 ;swap x0 from 1hi|2hi to 1hi|2lo
+
+ vperm2i128 xtmph1, xgft1_lo, xgft1_lo, 0x01 ; swapped to hi | lo
+ vperm2i128 xtmph2, xgft2_lo, xgft2_lo, 0x01 ; swapped to hi | lo
+
+ ; dest1
+ vpshufb xtmph1, xtmph1, x0 ;Lookup mul table of high nibble
+ vpshufb xtmpl, xgft1_lo, xtmpa ;Lookup mul table of low nibble
+ vpxor xtmph1, xtmph1, xtmpl ;GF add high and low partials
+ vpxor xd1, xd1, xtmph1 ;xd1 += partial
+
+ vperm2i128 xtmph1, xgft3_lo, xgft3_lo, 0x01 ; swapped to hi | lo
+ ; dest2
+ vpshufb xtmph2, xtmph2, x0 ;Lookup mul table of high nibble
+ vpshufb xtmpl, xgft2_lo, xtmpa ;Lookup mul table of low nibble
+ vpxor xtmph2, xtmph2, xtmpl ;GF add high and low partials
+ vpxor xd2, xd2, xtmph2 ;xd2 += partial
+
+ vperm2i128 xtmph2, xgft4_lo, xgft4_lo, 0x01 ; swapped to hi | lo
+ ; dest3
+ vpshufb xtmph1, xtmph1, x0 ;Lookup mul table of high nibble
+ vpshufb xtmpl, xgft3_lo, xtmpa ;Lookup mul table of low nibble
+ vpxor xtmph1, xtmph1, xtmpl ;GF add high and low partials
+ vpxor xd3, xd3, xtmph1 ;xd3 += partial
+
+ vperm2i128 xtmph1, xgft5_lo, xgft5_lo, 0x01 ; swapped to hi | lo
+ ; dest4
+ vpshufb xtmph2, xtmph2, x0 ;Lookup mul table of high nibble
+ vpshufb xtmpl, xgft4_lo, xtmpa ;Lookup mul table of low nibble
+ vpxor xtmph2, xtmph2, xtmpl ;GF add high and low partials
+ vpxor xd4, xd4, xtmph2 ;xd4 += partial
+
+ ; dest5
+ vpshufb xtmph1, xtmph1, x0 ;Lookup mul table of high nibble
+ vpshufb xtmpl, xgft5_lo, xtmpa ;Lookup mul table of low nibble
+ vpxor xtmph1, xtmph1, xtmpl ;GF add high and low partials
+ vpxor xd5, xd5, xtmph1 ;xd5 += partial
+
+ XSTR [dest1+pos], xd1
+ XSTR [dest2+pos], xd2
+ XSTR [dest3+pos], xd3
+ XSTR [dest4+pos], xd4
+ XSTR [dest5+pos], xd5
+
+ add pos, 32 ;Loop on 32 bytes at a time
+ cmp pos, len
+ jle .loop32
+
+ lea tmp, [len + 32]
+ cmp pos, tmp
+ je .return_pass
+
+.lessthan32:
+ ;; Tail len
+ ;; Do one more overlap pass
+ mov tmp.b, 0x1f
+ vpinsrb xtmph1x, xtmph1x, tmp.w, 0
+ vpbroadcastb xtmph1, xtmph1x ;Construct mask 0x1f1f1f...
+
+ mov tmp, len ;Overlapped offset length-32
+
+ XLDR x0, [src+tmp] ;Get next source vector
+
+ XLDR xd1, [dest1+tmp] ;Get next dest vector
+ XLDR xd2, [dest2+tmp] ;Get next dest vector
+ XLDR xd3, [dest3+tmp] ;Get next dest vector
+ XLDR xd4, [dest4+tmp] ;Get next dest vector
+ XLDR xd5, [dest5+tmp] ;Get next dest vector
+
+ sub len, pos
+
+ vmovdqa xtmph2, [constip32] ;Load const of i + 32
+ vpinsrb xtmplx, xtmplx, len.w, 15
+ vinserti128 xtmpl, xtmpl, xtmplx, 1 ;swapped to xtmplx | xtmplx
+ vpshufb xtmpl, xtmpl, xtmph1 ;Broadcast len to all bytes. xtmph1=0x1f1f1f...
+ vpcmpgtb xtmpl, xtmpl, xtmph2
+
+ vpand xtmph1, x0, xmask0f ;Mask low src nibble in bits 4-0
+ vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0
+ vpand x0, x0, xmask0f ;Mask high src nibble in bits 4-0
+ vperm2i128 xtmpa, xtmph1, x0, 0x30 ;swap xtmpa from 1lo|2lo to 1lo|2hi
+ vperm2i128 x0, xtmph1, x0, 0x12 ;swap x0 from 1hi|2hi to 1hi|2lo
+
+ vperm2i128 xtmph1, xgft1_lo, xgft1_lo, 0x01 ; swapped to hi | lo
+ vperm2i128 xtmph2, xgft2_lo, xgft2_lo, 0x01 ; swapped to hi | lo
+
+ ; dest1
+ vpshufb xtmph1, xtmph1, x0 ;Lookup mul table of high nibble
+ vpshufb xgft1_lo, xgft1_lo, xtmpa ;Lookup mul table of low nibble
+ vpxor xtmph1, xtmph1, xgft1_lo ;GF add high and low partials
+ vpand xtmph1, xtmph1, xtmpl
+ vpxor xd1, xd1, xtmph1 ;xd1 += partial
+
+ vperm2i128 xtmph1, xgft3_lo, xgft3_lo, 0x01 ; swapped to hi | lo
+ ; dest2
+ vpshufb xtmph2, xtmph2, x0 ;Lookup mul table of high nibble
+ vpshufb xgft2_lo, xgft2_lo, xtmpa ;Lookup mul table of low nibble
+ vpxor xtmph2, xtmph2, xgft2_lo ;GF add high and low partials
+ vpand xtmph2, xtmph2, xtmpl
+ vpxor xd2, xd2, xtmph2 ;xd2 += partial
+
+ vperm2i128 xtmph2, xgft4_lo, xgft4_lo, 0x01 ; swapped to hi | lo
+ ; dest3
+ vpshufb xtmph1, xtmph1, x0 ;Lookup mul table of high nibble
+ vpshufb xgft3_lo, xgft3_lo, xtmpa ;Lookup mul table of low nibble
+ vpxor xtmph1, xtmph1, xgft3_lo ;GF add high and low partials
+ vpand xtmph1, xtmph1, xtmpl
+ vpxor xd3, xd3, xtmph1 ;xd3 += partial
+
+ vperm2i128 xtmph1, xgft5_lo, xgft5_lo, 0x01 ; swapped to hi | lo
+ ; dest4
+ vpshufb xtmph2, xtmph2, x0 ;Lookup mul table of high nibble
+ vpshufb xgft4_lo, xgft4_lo, xtmpa ;Lookup mul table of low nibble
+ vpxor xtmph2, xtmph2, xgft4_lo ;GF add high and low partials
+ vpand xtmph2, xtmph2, xtmpl
+ vpxor xd4, xd4, xtmph2 ;xd4 += partial
+
+ ; dest5
+ vpshufb xtmph1, xtmph1, x0 ;Lookup mul table of high nibble
+ vpshufb xgft5_lo, xgft5_lo, xtmpa ;Lookup mul table of low nibble
+ vpxor xtmph1, xtmph1, xgft5_lo ;GF add high and low partials
+ vpand xtmph1, xtmph1, xtmpl
+ vpxor xd5, xd5, xtmph1 ;xd5 += partial
+
+ XSTR [dest1+tmp], xd1
+ XSTR [dest2+tmp], xd2
+ XSTR [dest3+tmp], xd3
+ XSTR [dest4+tmp], xd4
+ XSTR [dest5+tmp], xd5
+
+.return_pass:
+ FUNC_RESTORE
+ mov return, 0
+ ret
+
+.return_fail:
+ FUNC_RESTORE
+ mov return, 1
+ ret
+
+endproc_frame
+
+section .data
+align 32
+constip32:
+ dq 0xf8f9fafbfcfdfeff, 0xf0f1f2f3f4f5f6f7
+ dq 0xe8e9eaebecedeeef, 0xe0e1e2e3e4e5e6e7
+
+;;; func core, ver, snum
+slversion gf_5vect_mad_avx2, 04, 01, 020e
diff --git a/src/isa-l/erasure_code/gf_5vect_mad_avx512.asm b/src/isa-l/erasure_code/gf_5vect_mad_avx512.asm
new file mode 100644
index 000000000..e2a14558b
--- /dev/null
+++ b/src/isa-l/erasure_code/gf_5vect_mad_avx512.asm
@@ -0,0 +1,287 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright(c) 2011-2019 Intel Corporation All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in
+; the documentation and/or other materials provided with the
+; distribution.
+; * Neither the name of Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived
+; from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;;;
+;;; gf_5vect_mad_avx512(len, vec, vec_i, mul_array, src, dest);
+;;;
+
+%include "reg_sizes.asm"
+
+%ifdef HAVE_AS_KNOWS_AVX512
+
+%ifidn __OUTPUT_FORMAT__, elf64
+ %define arg0 rdi
+ %define arg1 rsi
+ %define arg2 rdx
+ %define arg3 rcx
+ %define arg4 r8
+ %define arg5 r9
+ %define tmp r11
+ %define tmp2 r10
+ %define return rax
+ %define func(x) x: endbranch
+ %define FUNC_SAVE
+ %define FUNC_RESTORE
+%endif
+
+%ifidn __OUTPUT_FORMAT__, win64
+ %define arg0 rcx
+ %define arg1 rdx
+ %define arg2 r8
+ %define arg3 r9
+ %define arg4 r12
+ %define arg5 r15
+ %define tmp r11
+ %define tmp2 r10
+ %define return rax
+ %define stack_size 16*10 + 3*8
+ %define arg(x) [rsp + stack_size + PS + PS*x]
+ %define func(x) proc_frame x
+
+%macro FUNC_SAVE 0
+ sub rsp, stack_size
+ movdqa [rsp+16*0],xmm6
+ movdqa [rsp+16*1],xmm7
+ movdqa [rsp+16*2],xmm8
+ movdqa [rsp+16*3],xmm9
+ movdqa [rsp+16*4],xmm10
+ movdqa [rsp+16*5],xmm11
+ movdqa [rsp+16*6],xmm12
+ movdqa [rsp+16*7],xmm13
+ movdqa [rsp+16*8],xmm14
+ movdqa [rsp+16*9],xmm15
+ save_reg r12, 10*16 + 0*8
+ save_reg r15, 10*16 + 1*8
+ end_prolog
+ mov arg4, arg(4)
+ mov arg5, arg(5)
+%endmacro
+
+%macro FUNC_RESTORE 0
+ movdqa xmm6, [rsp+16*0]
+ movdqa xmm7, [rsp+16*1]
+ movdqa xmm8, [rsp+16*2]
+ movdqa xmm9, [rsp+16*3]
+ movdqa xmm10, [rsp+16*4]
+ movdqa xmm11, [rsp+16*5]
+ movdqa xmm12, [rsp+16*6]
+ movdqa xmm13, [rsp+16*7]
+ movdqa xmm14, [rsp+16*8]
+ movdqa xmm15, [rsp+16*9]
+ mov r12, [rsp + 10*16 + 0*8]
+ mov r15, [rsp + 10*16 + 1*8]
+ add rsp, stack_size
+%endmacro
+%endif
+
+%define PS 8
+%define len arg0
+%define vec arg1
+%define vec_i arg2
+%define mul_array arg3
+%define src arg4
+%define dest1 arg5
+%define pos return
+%define dest2 tmp2
+%define dest3 mul_array
+%define dest4 vec
+%define dest5 vec_i
+
+%ifndef EC_ALIGNED_ADDR
+;;; Use Un-aligned load/store
+ %define XLDR vmovdqu8
+ %define XSTR vmovdqu8
+%else
+;;; Use Non-temporal load/stor
+ %ifdef NO_NT_LDST
+ %define XLDR vmovdqa
+ %define XSTR vmovdqa
+ %else
+ %define XLDR vmovntdqa
+ %define XSTR vmovntdq
+ %endif
+%endif
+
+default rel
+[bits 64]
+section .text
+
+%define x0 zmm0
+%define xtmpa zmm1
+%define xtmpl1 zmm2
+%define xtmph1 zmm3
+%define xtmph2 zmm4
+%define xtmph3 zmm5
+%define xgft1_hi zmm6
+%define xgft1_lo zmm7
+%define xgft1_loy ymm7
+%define xgft2_hi zmm8
+%define xgft2_lo zmm9
+%define xgft2_loy ymm9
+%define xgft3_hi zmm10
+%define xgft3_lo zmm11
+%define xgft3_loy ymm11
+%define xgft4_hi zmm12
+%define xgft4_lo zmm13
+%define xgft4_loy ymm13
+%define xgft5_hi zmm14
+%define xgft5_lo zmm15
+%define xgft5_loy ymm15
+%define xd1 zmm16
+%define xd2 zmm17
+%define xd3 zmm18
+%define xd4 zmm19
+%define xd5 zmm20
+%define xmask0f zmm21
+%define xtmpl2 zmm22
+%define xtmpl3 zmm23
+%define xtmpl4 zmm24
+%define xtmpl5 zmm25
+%define xtmph4 zmm26
+%define xtmph5 zmm27
+
+align 16
+mk_global gf_5vect_mad_avx512, function
+func(gf_5vect_mad_avx512)
+ FUNC_SAVE
+ sub len, 64
+ jl .return_fail
+ xor pos, pos
+ mov tmp, 0x0f
+ vpbroadcastb xmask0f, tmp ;Construct mask 0x0f0f0f...
+ sal vec_i, 5 ;Multiply by 32
+ sal vec, 5 ;Multiply by 32
+ lea tmp, [mul_array + vec_i]
+ vmovdqu xgft1_loy, [tmp] ;Load array Ax{00}..{0f}, Ax{00}..{f0}
+ vmovdqu xgft2_loy, [tmp+vec] ;Load array Bx{00}..{0f}, Bx{00}..{f0}
+ vmovdqu xgft3_loy, [tmp+2*vec] ;Load array Cx{00}..{0f}, Cx{00}..{f0}
+ vmovdqu xgft5_loy, [tmp+4*vec] ;Load array Ex{00}..{0f}, Ex{00}..{f0}
+ add tmp, vec
+ vmovdqu xgft4_loy, [tmp+2*vec] ;Load array Dx{00}..{0f}, Dx{00}..{f0}
+ vshufi64x2 xgft1_hi, xgft1_lo, xgft1_lo, 0x55
+ vshufi64x2 xgft1_lo, xgft1_lo, xgft1_lo, 0x00
+ vshufi64x2 xgft2_hi, xgft2_lo, xgft2_lo, 0x55
+ vshufi64x2 xgft2_lo, xgft2_lo, xgft2_lo, 0x00
+ vshufi64x2 xgft3_hi, xgft3_lo, xgft3_lo, 0x55
+ vshufi64x2 xgft3_lo, xgft3_lo, xgft3_lo, 0x00
+ vshufi64x2 xgft4_hi, xgft4_lo, xgft4_lo, 0x55
+ vshufi64x2 xgft4_lo, xgft4_lo, xgft4_lo, 0x00
+ vshufi64x2 xgft5_hi, xgft5_lo, xgft5_lo, 0x55
+ vshufi64x2 xgft5_lo, xgft5_lo, xgft5_lo, 0x00
+ mov dest2, [dest1+PS]
+ mov dest3, [dest1+2*PS] ; reuse mul_array
+ mov dest4, [dest1+3*PS] ; reuse vec
+ mov dest5, [dest1+4*PS] ; reuse vec_i
+ mov dest1, [dest1]
+ mov tmp, -1
+ kmovq k1, tmp
+
+.loop64:
+ XLDR x0, [src+pos] ;Get next source vector
+ XLDR xd1, [dest1+pos] ;Get next dest vector
+ XLDR xd2, [dest2+pos] ;Get next dest vector
+ XLDR xd3, [dest3+pos] ;Get next dest vector
+ XLDR xd4, [dest4+pos] ;reuse xtmpl1. Get next dest vector
+ XLDR xd5, [dest5+pos] ;Get next dest vector
+
+ vpandq xtmpa, x0, xmask0f ;Mask low src nibble in bits 4-0
+ vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0
+ vpandq x0, x0, xmask0f ;Mask high src nibble in bits 4-0
+
+ ; dest1
+ vpshufb xtmph1 {k1}{z}, xgft1_hi, x0 ;Lookup mul table of high nibble
+ vpshufb xtmpl1 {k1}{z}, xgft1_lo, xtmpa ;Lookup mul table of low nibble
+ vpxorq xtmph1, xtmph1, xtmpl1 ;GF add high and low partials
+ vpxorq xd1, xd1, xtmph1 ;xd1 += partial
+
+ ; dest2
+ vpshufb xtmph2 {k1}{z}, xgft2_hi, x0 ;Lookup mul table of high nibble
+ vpshufb xtmpl2 {k1}{z}, xgft2_lo, xtmpa ;Lookup mul table of low nibble
+ vpxorq xtmph2, xtmph2, xtmpl2 ;GF add high and low partials
+ vpxorq xd2, xd2, xtmph2 ;xd2 += partial
+
+ ; dest3
+ vpshufb xtmph3 {k1}{z}, xgft3_hi, x0 ;Lookup mul table of high nibble
+ vpshufb xtmpl3 {k1}{z}, xgft3_lo, xtmpa ;Lookup mul table of low nibble
+ vpxorq xtmph3, xtmph3, xtmpl3 ;GF add high and low partials
+ vpxorq xd3, xd3, xtmph3 ;xd2 += partial
+
+ ; dest4
+ vpshufb xtmph4 {k1}{z}, xgft4_hi, x0 ;Lookup mul table of high nibble
+ vpshufb xtmpl4 {k1}{z}, xgft4_lo, xtmpa ;Lookup mul table of low nibble
+ vpxorq xtmph4, xtmph4, xtmpl4 ;GF add high and low partials
+ vpxorq xd4, xd4, xtmph4 ;xd2 += partial
+
+ ; dest5
+ vpshufb xtmph5 {k1}{z}, xgft5_hi, x0 ;Lookup mul table of high nibble
+ vpshufb xtmpl5 {k1}{z}, xgft5_lo, xtmpa ;Lookup mul table of low nibble
+ vpxorq xtmph5, xtmph5, xtmpl5 ;GF add high and low partials
+ vpxorq xd5, xd5, xtmph5 ;xd2 += partial
+
+ XSTR [dest1+pos], xd1
+ XSTR [dest2+pos], xd2
+ XSTR [dest3+pos], xd3
+ XSTR [dest4+pos], xd4
+ XSTR [dest5+pos], xd5
+
+ add pos, 64 ;Loop on 64 bytes at a time
+ cmp pos, len
+ jle .loop64
+
+ lea tmp, [len + 64]
+ cmp pos, tmp
+ je .return_pass
+
+ ;; Tail len
+ mov pos, (1 << 63)
+ lea tmp, [len + 64 - 1]
+ and tmp, 63
+ sarx pos, pos, tmp
+ kmovq k1, pos
+ mov pos, len ;Overlapped offset length-64
+ jmp .loop64 ;Do one more overlap pass
+
+.return_pass:
+ mov return, 0
+ FUNC_RESTORE
+ ret
+
+.return_fail:
+ mov return, 1
+ FUNC_RESTORE
+ ret
+
+endproc_frame
+
+%else
+%ifidn __OUTPUT_FORMAT__, win64
+global no_gf_5vect_mad_avx512
+no_gf_5vect_mad_avx512:
+%endif
+%endif ; ifdef HAVE_AS_KNOWS_AVX512
diff --git a/src/isa-l/erasure_code/gf_5vect_mad_sse.asm b/src/isa-l/erasure_code/gf_5vect_mad_sse.asm
new file mode 100644
index 000000000..17760d096
--- /dev/null
+++ b/src/isa-l/erasure_code/gf_5vect_mad_sse.asm
@@ -0,0 +1,373 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright(c) 2011-2015 Intel Corporation All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in
+; the documentation and/or other materials provided with the
+; distribution.
+; * Neither the name of Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived
+; from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;;;
+;;; gf_5vect_mad_sse(len, vec, vec_i, mul_array, src, dest);
+;;;
+
+%include "reg_sizes.asm"
+
+%define PS 8
+
+%ifidn __OUTPUT_FORMAT__, win64
+ %define arg0 rcx
+ %define arg0.w ecx
+ %define arg1 rdx
+ %define arg2 r8
+ %define arg3 r9
+ %define arg4 r12
+ %define arg5 r15
+ %define tmp r11
+ %define tmp2 r10
+ %define tmp3 r13
+ %define tmp4 r14
+ %define return rax
+ %define return.w eax
+ %define stack_size 16*10 + 5*8
+ %define arg(x) [rsp + stack_size + PS + PS*x]
+ %define func(x) proc_frame x
+
+%macro FUNC_SAVE 0
+ sub rsp, stack_size
+ movdqa [rsp+16*0],xmm6
+ movdqa [rsp+16*1],xmm7
+ movdqa [rsp+16*2],xmm8
+ movdqa [rsp+16*3],xmm9
+ movdqa [rsp+16*4],xmm10
+ movdqa [rsp+16*5],xmm11
+ movdqa [rsp+16*6],xmm12
+ movdqa [rsp+16*7],xmm13
+ movdqa [rsp+16*8],xmm14
+ movdqa [rsp+16*9],xmm15
+ save_reg r12, 10*16 + 0*8
+ save_reg r13, 10*16 + 1*8
+ save_reg r14, 10*16 + 2*8
+ save_reg r15, 10*16 + 3*8
+ end_prolog
+ mov arg4, arg(4)
+ mov arg5, arg(5)
+%endmacro
+
+%macro FUNC_RESTORE 0
+ movdqa xmm6, [rsp+16*0]
+ movdqa xmm7, [rsp+16*1]
+ movdqa xmm8, [rsp+16*2]
+ movdqa xmm9, [rsp+16*3]
+ movdqa xmm10, [rsp+16*4]
+ movdqa xmm11, [rsp+16*5]
+ movdqa xmm12, [rsp+16*6]
+ movdqa xmm13, [rsp+16*7]
+ movdqa xmm14, [rsp+16*8]
+ movdqa xmm15, [rsp+16*9]
+ mov r12, [rsp + 10*16 + 0*8]
+ mov r13, [rsp + 10*16 + 1*8]
+ mov r14, [rsp + 10*16 + 2*8]
+ mov r15, [rsp + 10*16 + 3*8]
+ add rsp, stack_size
+%endmacro
+
+%elifidn __OUTPUT_FORMAT__, elf64
+ %define arg0 rdi
+ %define arg0.w edi
+ %define arg1 rsi
+ %define arg2 rdx
+ %define arg3 rcx
+ %define arg4 r8
+ %define arg5 r9
+ %define tmp r11
+ %define tmp2 r10
+ %define tmp3 r12
+ %define tmp4 r13
+ %define return rax
+ %define return.w eax
+
+ %define func(x) x: endbranch
+ %macro FUNC_SAVE 0
+ push r12
+ push r13
+ %endmacro
+ %macro FUNC_RESTORE 0
+ pop r13
+ pop r12
+ %endmacro
+%endif
+
+;;; gf_5vect_mad_sse(len, vec, vec_i, mul_array, src, dest)
+%define len arg0
+%define len.w arg0.w
+%define vec arg1
+%define vec_i arg2
+%define mul_array arg3
+%define src arg4
+%define dest1 arg5
+%define pos return
+%define pos.w return.w
+
+%define dest2 tmp4
+%define dest3 mul_array
+%define dest4 tmp2
+%define dest5 vec_i
+
+%ifndef EC_ALIGNED_ADDR
+;;; Use Un-aligned load/store
+ %define XLDR movdqu
+ %define XSTR movdqu
+%else
+;;; Use Non-temporal load/stor
+ %ifdef NO_NT_LDST
+ %define XLDR movdqa
+ %define XSTR movdqa
+ %else
+ %define XLDR movntdqa
+ %define XSTR movntdq
+ %endif
+%endif
+
+default rel
+
+[bits 64]
+section .text
+
+%define xmask0f xmm15
+%define xgft5_hi xmm14
+%define xgft4_lo xmm13
+%define xgft4_hi xmm12
+
+%define x0 xmm0
+%define xtmpa xmm1
+%define xtmph1 xmm2
+%define xtmpl1 xmm3
+%define xtmph2 xmm4
+%define xtmpl2 xmm5
+%define xtmph3 xmm6
+%define xtmpl3 xmm7
+%define xtmph5 xmm8
+%define xtmpl5 xmm9
+%define xd1 xmm10
+%define xd2 xmm11
+%define xd3 xtmpl1
+%define xd4 xtmph1
+%define xd5 xtmpl2
+
+
+align 16
+mk_global gf_5vect_mad_sse, function
+func(gf_5vect_mad_sse)
+ FUNC_SAVE
+ sub len, 16
+ jl .return_fail
+ xor pos, pos
+
+ movdqa xmask0f, [mask0f] ;Load mask of lower nibble in each byte
+ mov tmp, vec
+ sal vec_i, 5 ;Multiply by 32
+ lea tmp3, [mul_array + vec_i]
+ sal tmp, 6 ;Multiply by 64
+ movdqu xgft5_hi, [tmp3+2*tmp+16] ; " Ex{00}, Ex{10}, ..., Ex{f0}
+ sal vec, 5 ;Multiply by 32
+ add tmp, vec
+ movdqu xgft4_hi, [tmp3+tmp+16] ; " Dx{00}, Dx{10}, Dx{20}, ... , Dx{f0}
+ movdqu xgft4_lo, [tmp3+tmp] ;Load array Dx{00}, Dx{01}, Dx{02}, ...
+
+ mov dest3, [dest1+2*PS] ; reuse mul_array
+ mov dest4, [dest1+3*PS]
+ mov dest5, [dest1+4*PS] ; reuse vec_i
+ mov dest2, [dest1+PS]
+ mov dest1, [dest1]
+
+.loop16:
+ XLDR x0, [src+pos] ;Get next source vector
+
+ movdqu xtmph1, [tmp3+16] ; " Ax{00}, Ax{10}, Ax{20}, ... , Ax{f0}
+ movdqu xtmpl1, [tmp3] ;Load array Ax{00}, Ax{01}, Ax{02}, ...
+ movdqu xtmph2, [tmp3+vec+16] ; " Bx{00}, Bx{10}, Bx{20}, ... , Bx{f0}
+ movdqu xtmpl2, [tmp3+vec] ;Load array Bx{00}, Bx{01}, Bx{02}, ...
+ movdqu xtmph3, [tmp3+2*vec+16] ; " Cx{00}, Cx{10}, Cx{20}, ... , Cx{f0}
+ movdqu xtmpl3, [tmp3+2*vec] ;Load array Cx{00}, Cx{01}, Cx{02}, ...
+ movdqu xtmpl5, [tmp3+4*vec] ;Load array Ex{00}, Ex{01}, ..., Ex{0f}
+ movdqa xtmph5, xgft5_hi ;Reload const array registers
+
+ XLDR xd1, [dest1+pos] ;Get next dest vector
+ XLDR xd2, [dest2+pos] ;Get next dest vector
+
+ movdqa xtmpa, x0 ;Keep unshifted copy of src
+ psraw x0, 4 ;Shift to put high nibble into bits 4-0
+ pand x0, xmask0f ;Mask high src nibble in bits 4-0
+ pand xtmpa, xmask0f ;Mask low src nibble in bits 4-0
+
+ ; dest1
+ pshufb xtmph1, x0 ;Lookup mul table of high nibble
+ pshufb xtmpl1, xtmpa ;Lookup mul table of low nibble
+ pxor xtmph1, xtmpl1 ;GF add high and low partials
+ pxor xd1, xtmph1
+
+ XLDR xd3, [dest3+pos] ;Reuse xtmpl1, Get next dest vector
+ XLDR xd4, [dest4+pos] ;Reuse xtmph1. Get next dest vector
+
+ ; dest2
+ pshufb xtmph2, x0 ;Lookup mul table of high nibble
+ pshufb xtmpl2, xtmpa ;Lookup mul table of low nibble
+ pxor xtmph2, xtmpl2 ;GF add high and low partials
+ pxor xd2, xtmph2
+
+ XLDR xd5, [dest5+pos] ;Reuse xtmpl2. Get next dest vector
+
+ ; dest3
+ pshufb xtmph3, x0 ;Lookup mul table of high nibble
+ pshufb xtmpl3, xtmpa ;Lookup mul table of low nibble
+ pxor xtmph3, xtmpl3 ;GF add high and low partials
+ pxor xd3, xtmph3
+
+ movdqa xtmph2, xgft4_hi ;Reload const array registers
+ movdqa xtmpl3, xgft4_lo ;Reload const array registers
+
+ ; dest5
+ pshufb xtmph5, x0 ;Lookup mul table of high nibble
+ pshufb xtmpl5, xtmpa ;Lookup mul table of low nibble
+ pxor xtmph5, xtmpl5 ;GF add high and low partials
+ pxor xd5, xtmph5
+
+ ; dest4
+ pshufb xtmph2, x0 ;Lookup mul table of high nibble
+ pshufb xtmpl3, xtmpa ;Lookup mul table of low nibble
+ pxor xtmph2, xtmpl3 ;GF add high and low partials
+ pxor xd4, xtmph2
+
+ XSTR [dest1+pos], xd1 ;Store result into dest1
+ XSTR [dest2+pos], xd2 ;Store result into dest2
+ XSTR [dest3+pos], xd3 ;Store result into dest3
+ XSTR [dest4+pos], xd4 ;Store result into dest4
+ XSTR [dest5+pos], xd5 ;Store result into dest5
+
+ add pos, 16 ;Loop on 16 bytes at a time
+ cmp pos, len
+ jle .loop16
+
+ lea tmp, [len + 16]
+ cmp pos, tmp
+ je .return_pass
+
+.lessthan16:
+ ;; Tail len
+ ;; Do one more overlap pass
+ mov tmp, len ;Overlapped offset length-16
+ XLDR x0, [src+tmp] ;Get next source vector
+
+ sub len, pos
+
+ movdqa xtmpl1, [constip16] ;Load const of i + 16
+ pinsrb xtmph5, len.w, 15
+ pshufb xtmph5, xmask0f ;Broadcast len to all bytes
+ pcmpgtb xtmph5, xtmpl1
+
+ movdqu xtmph1, [tmp3+16] ; " Ax{00}, Ax{10}, Ax{20}, ... , Ax{f0}
+ movdqu xtmpl1, [tmp3] ;Load array Ax{00}, Ax{01}, Ax{02}, ...
+ movdqu xtmph2, [tmp3+vec+16] ; " Bx{00}, Bx{10}, Bx{20}, ... , Bx{f0}
+ movdqu xtmpl2, [tmp3+vec] ;Load array Bx{00}, Bx{01}, Bx{02}, ...
+ movdqu xtmph3, [tmp3+2*vec+16] ; " Cx{00}, Cx{10}, Cx{20}, ... , Cx{f0}
+ movdqu xtmpl3, [tmp3+2*vec] ;Load array Cx{00}, Cx{01}, Cx{02}, ...
+ movdqu xtmpl5, [tmp3+4*vec] ;Load array Ex{00}, Ex{01}, ..., Ex{0f}
+
+ XLDR xd1, [dest1+tmp] ;Get next dest vector
+ XLDR xd2, [dest2+tmp] ;Get next dest vector
+
+ movdqa xtmpa, x0 ;Keep unshifted copy of src
+ psraw x0, 4 ;Shift to put high nibble into bits 4-0
+ pand x0, xmask0f ;Mask high src nibble in bits 4-0
+ pand xtmpa, xmask0f ;Mask low src nibble in bits 4-0
+
+ ; dest1
+ pshufb xtmph1, x0 ;Lookup mul table of high nibble
+ pshufb xtmpl1, xtmpa ;Lookup mul table of low nibble
+ pxor xtmph1, xtmpl1 ;GF add high and low partials
+ pand xtmph1, xtmph5
+ pxor xd1, xtmph1
+
+ XLDR xd3, [dest3+tmp] ;Reuse xtmpl1, Get next dest vector
+ XLDR xd4, [dest4+tmp] ;Reuse xtmph1. Get next dest vector
+
+ ; dest2
+ pshufb xtmph2, x0 ;Lookup mul table of high nibble
+ pshufb xtmpl2, xtmpa ;Lookup mul table of low nibble
+ pxor xtmph2, xtmpl2 ;GF add high and low partials
+ pand xtmph2, xtmph5
+ pxor xd2, xtmph2
+
+ XLDR xd5, [dest5+tmp] ;Reuse xtmpl2. Get next dest vector
+
+ ; dest3
+ pshufb xtmph3, x0 ;Lookup mul table of high nibble
+ pshufb xtmpl3, xtmpa ;Lookup mul table of low nibble
+ pxor xtmph3, xtmpl3 ;GF add high and low partials
+ pand xtmph3, xtmph5
+ pxor xd3, xtmph3
+
+ ; dest4
+ pshufb xgft4_hi, x0 ;Lookup mul table of high nibble
+ pshufb xgft4_lo, xtmpa ;Lookup mul table of low nibble
+ pxor xgft4_hi, xgft4_lo ;GF add high and low partials
+ pand xgft4_hi, xtmph5
+ pxor xd4, xgft4_hi
+
+ ; dest5
+ pshufb xgft5_hi, x0 ;Lookup mul table of high nibble
+ pshufb xtmpl5, xtmpa ;Lookup mul table of low nibble
+ pxor xgft5_hi, xtmpl5 ;GF add high and low partials
+ pand xgft5_hi, xtmph5
+ pxor xd5, xgft5_hi
+
+ XSTR [dest1+tmp], xd1 ;Store result into dest1
+ XSTR [dest2+tmp], xd2 ;Store result into dest2
+ XSTR [dest3+tmp], xd3 ;Store result into dest3
+ XSTR [dest4+tmp], xd4 ;Store result into dest4
+ XSTR [dest5+tmp], xd5 ;Store result into dest5
+
+.return_pass:
+ FUNC_RESTORE
+ mov return, 0
+ ret
+
+.return_fail:
+ FUNC_RESTORE
+ mov return, 1
+ ret
+
+endproc_frame
+
+section .data
+
+align 16
+
+mask0f:
+ dq 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f
+constip16:
+ dq 0xf8f9fafbfcfdfeff, 0xf0f1f2f3f4f5f6f7
+
+;;; func core, ver, snum
+slversion gf_5vect_mad_sse, 00, 01, 020c
diff --git a/src/isa-l/erasure_code/gf_6vect_dot_prod_avx.asm b/src/isa-l/erasure_code/gf_6vect_dot_prod_avx.asm
new file mode 100644
index 000000000..76047110c
--- /dev/null
+++ b/src/isa-l/erasure_code/gf_6vect_dot_prod_avx.asm
@@ -0,0 +1,315 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright(c) 2011-2015 Intel Corporation All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in
+; the documentation and/or other materials provided with the
+; distribution.
+; * Neither the name of Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived
+; from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;;;
+;;; gf_6vect_dot_prod_avx(len, vec, *g_tbls, **buffs, **dests);
+;;;
+
+%include "reg_sizes.asm"
+
+%ifidn __OUTPUT_FORMAT__, elf64
+ %define arg0 rdi
+ %define arg1 rsi
+ %define arg2 rdx
+ %define arg3 rcx
+ %define arg4 r8
+ %define arg5 r9
+
+ %define tmp r11
+ %define tmp2 r10
+ %define tmp3 r13 ; must be saved and restored
+ %define tmp4 r12 ; must be saved and restored
+ %define tmp5 r14 ; must be saved and restored
+ %define tmp6 r15 ; must be saved and restored
+ %define return rax
+ %define PS 8
+ %define LOG_PS 3
+
+ %define func(x) x: endbranch
+ %macro FUNC_SAVE 0
+ push r12
+ push r13
+ push r14
+ push r15
+ %endmacro
+ %macro FUNC_RESTORE 0
+ pop r15
+ pop r14
+ pop r13
+ pop r12
+ %endmacro
+%endif
+
+%ifidn __OUTPUT_FORMAT__, win64
+ %define arg0 rcx
+ %define arg1 rdx
+ %define arg2 r8
+ %define arg3 r9
+
+ %define arg4 r12 ; must be saved, loaded and restored
+ %define arg5 r15 ; must be saved and restored
+ %define tmp r11
+ %define tmp2 r10
+ %define tmp3 r13 ; must be saved and restored
+ %define tmp4 r14 ; must be saved and restored
+ %define tmp5 rdi ; must be saved and restored
+ %define tmp6 rsi ; must be saved and restored
+ %define return rax
+ %define PS 8
+ %define LOG_PS 3
+ %define stack_size 10*16 + 7*8 ; must be an odd multiple of 8
+ %define arg(x) [rsp + stack_size + PS + PS*x]
+
+ %define func(x) proc_frame x
+ %macro FUNC_SAVE 0
+ alloc_stack stack_size
+ vmovdqa [rsp + 0*16], xmm6
+ vmovdqa [rsp + 1*16], xmm7
+ vmovdqa [rsp + 2*16], xmm8
+ vmovdqa [rsp + 3*16], xmm9
+ vmovdqa [rsp + 4*16], xmm10
+ vmovdqa [rsp + 5*16], xmm11
+ vmovdqa [rsp + 6*16], xmm12
+ vmovdqa [rsp + 7*16], xmm13
+ vmovdqa [rsp + 8*16], xmm14
+ vmovdqa [rsp + 9*16], xmm15
+ save_reg r12, 10*16 + 0*8
+ save_reg r13, 10*16 + 1*8
+ save_reg r14, 10*16 + 2*8
+ save_reg r15, 10*16 + 3*8
+ save_reg rdi, 10*16 + 4*8
+ save_reg rsi, 10*16 + 5*8
+ end_prolog
+ mov arg4, arg(4)
+ %endmacro
+
+ %macro FUNC_RESTORE 0
+ vmovdqa xmm6, [rsp + 0*16]
+ vmovdqa xmm7, [rsp + 1*16]
+ vmovdqa xmm8, [rsp + 2*16]
+ vmovdqa xmm9, [rsp + 3*16]
+ vmovdqa xmm10, [rsp + 4*16]
+ vmovdqa xmm11, [rsp + 5*16]
+ vmovdqa xmm12, [rsp + 6*16]
+ vmovdqa xmm13, [rsp + 7*16]
+ vmovdqa xmm14, [rsp + 8*16]
+ vmovdqa xmm15, [rsp + 9*16]
+ mov r12, [rsp + 10*16 + 0*8]
+ mov r13, [rsp + 10*16 + 1*8]
+ mov r14, [rsp + 10*16 + 2*8]
+ mov r15, [rsp + 10*16 + 3*8]
+ mov rdi, [rsp + 10*16 + 4*8]
+ mov rsi, [rsp + 10*16 + 5*8]
+ add rsp, stack_size
+ %endmacro
+%endif
+
+%define len arg0
+%define vec arg1
+%define mul_array arg2
+%define src arg3
+%define dest arg4
+%define ptr arg5
+%define vec_i tmp2
+%define dest1 tmp3
+%define dest2 tmp4
+%define vskip1 tmp5
+%define vskip3 tmp6
+%define pos return
+
+
+%ifndef EC_ALIGNED_ADDR
+;;; Use Un-aligned load/store
+ %define XLDR vmovdqu
+ %define XSTR vmovdqu
+%else
+;;; Use Non-temporal load/stor
+ %ifdef NO_NT_LDST
+ %define XLDR vmovdqa
+ %define XSTR vmovdqa
+ %else
+ %define XLDR vmovntdqa
+ %define XSTR vmovntdq
+ %endif
+%endif
+
+
+default rel
+
+[bits 64]
+section .text
+
+%define xmask0f xmm15
+%define xgft1_lo xmm14
+%define xgft1_hi xmm13
+%define xgft2_lo xmm12
+%define xgft2_hi xmm11
+%define xgft3_lo xmm10
+%define xgft3_hi xmm9
+%define x0 xmm0
+%define xtmpa xmm1
+%define xp1 xmm2
+%define xp2 xmm3
+%define xp3 xmm4
+%define xp4 xmm5
+%define xp5 xmm6
+%define xp6 xmm7
+
+align 16
+mk_global gf_6vect_dot_prod_avx, function
+func(gf_6vect_dot_prod_avx)
+ FUNC_SAVE
+ sub len, 16
+ jl .return_fail
+ xor pos, pos
+ vmovdqa xmask0f, [mask0f] ;Load mask of lower nibble in each byte
+ mov vskip1, vec
+ imul vskip1, 32
+ mov vskip3, vec
+ imul vskip3, 96
+ sal vec, LOG_PS ;vec *= PS. Make vec_i count by PS
+ mov dest1, [dest]
+ mov dest2, [dest+PS]
+
+
+.loop16:
+ mov tmp, mul_array
+ xor vec_i, vec_i
+ vpxor xp1, xp1
+ vpxor xp2, xp2
+ vpxor xp3, xp3
+ vpxor xp4, xp4
+ vpxor xp5, xp5
+ vpxor xp6, xp6
+
+.next_vect:
+ mov ptr, [src+vec_i]
+ add vec_i, PS
+ XLDR x0, [ptr+pos] ;Get next source vector
+
+ vmovdqu xgft1_lo, [tmp] ;Load array Ax{00}, Ax{01}, ..., Ax{0f}
+ vmovdqu xgft1_hi, [tmp+16] ; " Ax{00}, Ax{10}, ..., Ax{f0}
+ vmovdqu xgft2_lo, [tmp+vskip1*1] ;Load array Bx{00}, Bx{01}, ..., Bx{0f}
+ vmovdqu xgft2_hi, [tmp+vskip1*1+16] ; " Bx{00}, Bx{10}, ..., Bx{f0}
+ vmovdqu xgft3_lo, [tmp+vskip1*2] ;Load array Cx{00}, Cx{01}, ..., Cx{0f}
+ vmovdqu xgft3_hi, [tmp+vskip1*2+16] ; " Cx{00}, Cx{10}, ..., Cx{f0}
+ lea ptr, [vskip1 + vskip1*4] ;ptr = vskip5
+
+ vpand xtmpa, x0, xmask0f ;Mask low src nibble in bits 4-0
+ vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0
+ vpand x0, x0, xmask0f ;Mask high src nibble in bits 4-0
+
+
+ vpshufb xgft1_hi, x0 ;Lookup mul table of high nibble
+ vpshufb xgft1_lo, xtmpa ;Lookup mul table of low nibble
+ vpxor xgft1_hi, xgft1_lo ;GF add high and low partials
+ vpxor xp1, xgft1_hi ;xp1 += partial
+
+ vpshufb xgft2_hi, x0 ;Lookup mul table of high nibble
+ vpshufb xgft2_lo, xtmpa ;Lookup mul table of low nibble
+ vpxor xgft2_hi, xgft2_lo ;GF add high and low partials
+ vpxor xp2, xgft2_hi ;xp2 += partial
+
+ vpshufb xgft3_hi, x0 ;Lookup mul table of high nibble
+ vpshufb xgft3_lo, xtmpa ;Lookup mul table of low nibble
+ vpxor xgft3_hi, xgft3_lo ;GF add high and low partials
+ vpxor xp3, xgft3_hi ;xp3 += partial
+
+
+ vmovdqu xgft1_lo, [tmp+vskip3] ;Load array Dx{00}, Dx{01}, ..., Dx{0f}
+ vmovdqu xgft1_hi, [tmp+vskip3+16] ; " Dx{00}, Dx{10}, ..., Dx{f0}
+ vmovdqu xgft2_lo, [tmp+vskip1*4] ;Load array Ex{00}, Ex{01}, ..., Ex{0f}
+ vmovdqu xgft2_hi, [tmp+vskip1*4+16] ; " Ex{00}, Ex{10}, ..., Ex{f0}
+ vmovdqu xgft3_lo, [tmp+ptr] ;Load array Fx{00}, Fx{01}, ..., Fx{0f}
+ vmovdqu xgft3_hi, [tmp+ptr+16] ; " Fx{00}, Fx{10}, ..., Fx{f0}
+ add tmp, 32
+
+
+ vpshufb xgft1_hi, x0 ;Lookup mul table of high nibble
+ vpshufb xgft1_lo, xtmpa ;Lookup mul table of low nibble
+ vpxor xgft1_hi, xgft1_lo ;GF add high and low partials
+ vpxor xp4, xgft1_hi ;xp4 += partial
+
+ vpshufb xgft2_hi, x0 ;Lookup mul table of high nibble
+ vpshufb xgft2_lo, xtmpa ;Lookup mul table of low nibble
+ vpxor xgft2_hi, xgft2_lo ;GF add high and low partials
+ vpxor xp5, xgft2_hi ;xp5 += partial
+
+ vpshufb xgft3_hi, x0 ;Lookup mul table of high nibble
+ vpshufb xgft3_lo, xtmpa ;Lookup mul table of low nibble
+ vpxor xgft3_hi, xgft3_lo ;GF add high and low partials
+ vpxor xp6, xgft3_hi ;xp6 += partial
+
+ cmp vec_i, vec
+ jl .next_vect
+
+
+ mov tmp, [dest+2*PS]
+ mov ptr, [dest+3*PS]
+ mov vec_i, [dest+4*PS]
+
+ XSTR [dest1+pos], xp1
+ XSTR [dest2+pos], xp2
+ XSTR [tmp+pos], xp3
+ mov tmp, [dest+5*PS]
+ XSTR [ptr+pos], xp4
+ XSTR [vec_i+pos], xp5
+ XSTR [tmp+pos], xp6
+
+ add pos, 16 ;Loop on 16 bytes at a time
+ cmp pos, len
+ jle .loop16
+
+ lea tmp, [len + 16]
+ cmp pos, tmp
+ je .return_pass
+
+ ;; Tail len
+ mov pos, len ;Overlapped offset length-16
+ jmp .loop16 ;Do one more overlap pass
+
+.return_pass:
+ FUNC_RESTORE
+ mov return, 0
+ ret
+
+.return_fail:
+ FUNC_RESTORE
+ mov return, 1
+ ret
+
+endproc_frame
+
+section .data
+
+align 16
+mask0f: dq 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f
+
+;;; func core, ver, snum
+slversion gf_6vect_dot_prod_avx, 02, 04, 0195
diff --git a/src/isa-l/erasure_code/gf_6vect_dot_prod_avx2.asm b/src/isa-l/erasure_code/gf_6vect_dot_prod_avx2.asm
new file mode 100644
index 000000000..5885d97cf
--- /dev/null
+++ b/src/isa-l/erasure_code/gf_6vect_dot_prod_avx2.asm
@@ -0,0 +1,326 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright(c) 2011-2015 Intel Corporation All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in
+; the documentation and/or other materials provided with the
+; distribution.
+; * Neither the name of Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived
+; from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;;;
+;;; gf_6vect_dot_prod_avx2(len, vec, *g_tbls, **buffs, **dests);
+;;;
+
+%include "reg_sizes.asm"
+
+%ifidn __OUTPUT_FORMAT__, elf64
+ %define arg0 rdi
+ %define arg1 rsi
+ %define arg2 rdx
+ %define arg3 rcx
+ %define arg4 r8
+ %define arg5 r9
+
+ %define tmp r11
+ %define tmp.w r11d
+ %define tmp.b r11b
+ %define tmp2 r10
+ %define tmp3 r13 ; must be saved and restored
+ %define tmp4 r12 ; must be saved and restored
+ %define tmp5 r14 ; must be saved and restored
+ %define tmp6 r15 ; must be saved and restored
+ %define return rax
+ %define PS 8
+ %define LOG_PS 3
+
+ %define func(x) x: endbranch
+ %macro FUNC_SAVE 0
+ push r12
+ push r13
+ push r14
+ push r15
+ %endmacro
+ %macro FUNC_RESTORE 0
+ pop r15
+ pop r14
+ pop r13
+ pop r12
+ %endmacro
+%endif
+
+%ifidn __OUTPUT_FORMAT__, win64
+ %define arg0 rcx
+ %define arg1 rdx
+ %define arg2 r8
+ %define arg3 r9
+
+ %define arg4 r12 ; must be saved, loaded and restored
+ %define arg5 r15 ; must be saved and restored
+ %define tmp r11
+ %define tmp.w r11d
+ %define tmp.b r11b
+ %define tmp2 r10
+ %define tmp3 r13 ; must be saved and restored
+ %define tmp4 r14 ; must be saved and restored
+ %define tmp5 rdi ; must be saved and restored
+ %define tmp6 rsi ; must be saved and restored
+ %define return rax
+ %define PS 8
+ %define LOG_PS 3
+ %define stack_size 10*16 + 7*8 ; must be an odd multiple of 8
+ %define arg(x) [rsp + stack_size + PS + PS*x]
+
+ %define func(x) proc_frame x
+ %macro FUNC_SAVE 0
+ alloc_stack stack_size
+ vmovdqa [rsp + 0*16], xmm6
+ vmovdqa [rsp + 1*16], xmm7
+ vmovdqa [rsp + 2*16], xmm8
+ vmovdqa [rsp + 3*16], xmm9
+ vmovdqa [rsp + 4*16], xmm10
+ vmovdqa [rsp + 5*16], xmm11
+ vmovdqa [rsp + 6*16], xmm12
+ vmovdqa [rsp + 7*16], xmm13
+ vmovdqa [rsp + 8*16], xmm14
+ vmovdqa [rsp + 9*16], xmm15
+ save_reg r12, 10*16 + 0*8
+ save_reg r13, 10*16 + 1*8
+ save_reg r14, 10*16 + 2*8
+ save_reg r15, 10*16 + 3*8
+ save_reg rdi, 10*16 + 4*8
+ save_reg rsi, 10*16 + 5*8
+ end_prolog
+ mov arg4, arg(4)
+ %endmacro
+
+ %macro FUNC_RESTORE 0
+ vmovdqa xmm6, [rsp + 0*16]
+ vmovdqa xmm7, [rsp + 1*16]
+ vmovdqa xmm8, [rsp + 2*16]
+ vmovdqa xmm9, [rsp + 3*16]
+ vmovdqa xmm10, [rsp + 4*16]
+ vmovdqa xmm11, [rsp + 5*16]
+ vmovdqa xmm12, [rsp + 6*16]
+ vmovdqa xmm13, [rsp + 7*16]
+ vmovdqa xmm14, [rsp + 8*16]
+ vmovdqa xmm15, [rsp + 9*16]
+ mov r12, [rsp + 10*16 + 0*8]
+ mov r13, [rsp + 10*16 + 1*8]
+ mov r14, [rsp + 10*16 + 2*8]
+ mov r15, [rsp + 10*16 + 3*8]
+ mov rdi, [rsp + 10*16 + 4*8]
+ mov rsi, [rsp + 10*16 + 5*8]
+ add rsp, stack_size
+ %endmacro
+%endif
+
+%define len arg0
+%define vec arg1
+%define mul_array arg2
+%define src arg3
+%define dest arg4
+%define ptr arg5
+%define vec_i tmp2
+%define dest1 tmp3
+%define dest2 tmp4
+%define vskip1 tmp5
+%define vskip3 tmp6
+%define pos return
+
+
+%ifndef EC_ALIGNED_ADDR
+;;; Use Un-aligned load/store
+ %define XLDR vmovdqu
+ %define XSTR vmovdqu
+%else
+;;; Use Non-temporal load/stor
+ %ifdef NO_NT_LDST
+ %define XLDR vmovdqa
+ %define XSTR vmovdqa
+ %else
+ %define XLDR vmovntdqa
+ %define XSTR vmovntdq
+ %endif
+%endif
+
+
+default rel
+
+[bits 64]
+section .text
+
+%define xmask0f ymm15
+%define xmask0fx xmm15
+%define xgft1_lo ymm14
+%define xgft1_hi ymm13
+%define xgft2_lo ymm12
+%define xgft2_hi ymm11
+%define xgft3_lo ymm10
+%define xgft3_hi ymm9
+%define x0 ymm0
+%define xtmpa ymm1
+%define xp1 ymm2
+%define xp2 ymm3
+%define xp3 ymm4
+%define xp4 ymm5
+%define xp5 ymm6
+%define xp6 ymm7
+
+align 16
+mk_global gf_6vect_dot_prod_avx2, function
+func(gf_6vect_dot_prod_avx2)
+ FUNC_SAVE
+ sub len, 32
+ jl .return_fail
+ xor pos, pos
+ mov tmp.b, 0x0f
+ vpinsrb xmask0fx, xmask0fx, tmp.w, 0
+ vpbroadcastb xmask0f, xmask0fx ;Construct mask 0x0f0f0f...
+ mov vskip1, vec
+ imul vskip1, 32
+ mov vskip3, vec
+ imul vskip3, 96
+ sal vec, LOG_PS ;vec *= PS. Make vec_i count by PS
+ mov dest1, [dest]
+ mov dest2, [dest+PS]
+
+
+.loop32:
+ mov tmp, mul_array
+ xor vec_i, vec_i
+ vpxor xp1, xp1
+ vpxor xp2, xp2
+ vpxor xp3, xp3
+ vpxor xp4, xp4
+ vpxor xp5, xp5
+ vpxor xp6, xp6
+
+.next_vect:
+ mov ptr, [src+vec_i]
+ XLDR x0, [ptr+pos] ;Get next source vector
+ add vec_i, PS
+
+ vpand xgft3_lo, x0, xmask0f ;Mask low src nibble in bits 4-0
+ vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0
+ vpand x0, x0, xmask0f ;Mask high src nibble in bits 4-0
+ vperm2i128 xtmpa, xgft3_lo, x0, 0x30 ;swap xtmpa from 1lo|2lo to 1lo|2hi
+ vperm2i128 x0, xgft3_lo, x0, 0x12 ;swap x0 from 1hi|2hi to 1hi|2lo
+
+ vmovdqu xgft1_lo, [tmp] ;Load array Ax{00}, Ax{01}, ..., Ax{0f}
+ ; " Ax{00}, Ax{10}, ..., Ax{f0}
+ vmovdqu xgft2_lo, [tmp+vskip1*1] ;Load array Bx{00}, Bx{01}, ..., Bx{0f}
+ ; " Bx{00}, Bx{10}, ..., Bx{f0}
+ vmovdqu xgft3_lo, [tmp+vskip1*2] ;Load array Cx{00}, Cx{01}, ..., Cx{0f}
+ ; " Cx{00}, Cx{10}, ..., Cx{f0}
+ lea ptr, [vskip1 + vskip1*4] ;ptr = vskip5
+
+ vperm2i128 xgft1_hi, xgft1_lo, xgft1_lo, 0x01 ; swapped to hi | lo
+ vperm2i128 xgft2_hi, xgft2_lo, xgft2_lo, 0x01 ; swapped to hi | lo
+ vperm2i128 xgft3_hi, xgft3_lo, xgft3_lo, 0x01 ; swapped to hi | lo
+
+ vpshufb xgft1_hi, x0 ;Lookup mul table of high nibble
+ vpshufb xgft1_lo, xtmpa ;Lookup mul table of low nibble
+ vpxor xgft1_hi, xgft1_lo ;GF add high and low partials
+ vpxor xp1, xgft1_hi ;xp1 += partial
+
+ vpshufb xgft2_hi, x0 ;Lookup mul table of high nibble
+ vpshufb xgft2_lo, xtmpa ;Lookup mul table of low nibble
+ vpxor xgft2_hi, xgft2_lo ;GF add high and low partials
+ vpxor xp2, xgft2_hi ;xp2 += partial
+
+ vpshufb xgft3_hi, x0 ;Lookup mul table of high nibble
+ vpshufb xgft3_lo, xtmpa ;Lookup mul table of low nibble
+ vpxor xgft3_hi, xgft3_lo ;GF add high and low partials
+ vpxor xp3, xgft3_hi ;xp3 += partial
+
+
+ vmovdqu xgft1_lo, [tmp+vskip3] ;Load array Dx{00}, Dx{01}, ..., Dx{0f}
+ ; " Dx{00}, Dx{10}, ..., Dx{f0}
+ vmovdqu xgft2_lo, [tmp+vskip1*4] ;Load array Ex{00}, Ex{01}, ..., Ex{0f}
+ ; " Ex{00}, Ex{10}, ..., Ex{f0}
+ vmovdqu xgft3_lo, [tmp+ptr] ;Load array Fx{00}, Fx{01}, ..., Fx{0f}
+ ; " Fx{00}, Fx{10}, ..., Fx{f0}
+ add tmp, 32
+ vperm2i128 xgft1_hi, xgft1_lo, xgft1_lo, 0x01 ; swapped to hi | lo
+ vperm2i128 xgft2_hi, xgft2_lo, xgft2_lo, 0x01 ; swapped to hi | lo
+ vperm2i128 xgft3_hi, xgft3_lo, xgft3_lo, 0x01 ; swapped to hi | lo
+
+ vpshufb xgft1_hi, x0 ;Lookup mul table of high nibble
+ vpshufb xgft1_lo, xtmpa ;Lookup mul table of low nibble
+ vpxor xgft1_hi, xgft1_lo ;GF add high and low partials
+ vpxor xp4, xgft1_hi ;xp4 += partial
+
+ vpshufb xgft2_hi, x0 ;Lookup mul table of high nibble
+ vpshufb xgft2_lo, xtmpa ;Lookup mul table of low nibble
+ vpxor xgft2_hi, xgft2_lo ;GF add high and low partials
+ vpxor xp5, xgft2_hi ;xp5 += partial
+
+ vpshufb xgft3_hi, x0 ;Lookup mul table of high nibble
+ vpshufb xgft3_lo, xtmpa ;Lookup mul table of low nibble
+ vpxor xgft3_hi, xgft3_lo ;GF add high and low partials
+ vpxor xp6, xgft3_hi ;xp6 += partial
+
+ cmp vec_i, vec
+ jl .next_vect
+
+
+ mov tmp, [dest+2*PS]
+ mov ptr, [dest+3*PS]
+ mov vec_i, [dest+4*PS]
+
+ XSTR [dest1+pos], xp1
+ XSTR [dest2+pos], xp2
+ XSTR [tmp+pos], xp3
+ mov tmp, [dest+5*PS]
+ XSTR [ptr+pos], xp4
+ XSTR [vec_i+pos], xp5
+ XSTR [tmp+pos], xp6
+
+ add pos, 32 ;Loop on 32 bytes at a time
+ cmp pos, len
+ jle .loop32
+
+ lea tmp, [len + 32]
+ cmp pos, tmp
+ je .return_pass
+
+ ;; Tail len
+ mov pos, len ;Overlapped offset length-16
+ jmp .loop32 ;Do one more overlap pass
+
+.return_pass:
+ FUNC_RESTORE
+ mov return, 0
+ ret
+
+.return_fail:
+ FUNC_RESTORE
+ mov return, 1
+ ret
+
+endproc_frame
+
+section .data
+
+;;; func core, ver, snum
+slversion gf_6vect_dot_prod_avx2, 04, 04, 019a
diff --git a/src/isa-l/erasure_code/gf_6vect_dot_prod_avx512.asm b/src/isa-l/erasure_code/gf_6vect_dot_prod_avx512.asm
new file mode 100644
index 000000000..bb25e67b4
--- /dev/null
+++ b/src/isa-l/erasure_code/gf_6vect_dot_prod_avx512.asm
@@ -0,0 +1,354 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright(c) 2011-2019 Intel Corporation All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in
+; the documentation and/or other materials provided with the
+; distribution.
+; * Neither the name of Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived
+; from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;;;
+;;; gf_6vect_dot_prod_avx512(len, vec, *g_tbls, **buffs, **dests);
+;;;
+
+%include "reg_sizes.asm"
+
+%ifdef HAVE_AS_KNOWS_AVX512
+
+%ifidn __OUTPUT_FORMAT__, elf64
+ %define arg0 rdi
+ %define arg1 rsi
+ %define arg2 rdx
+ %define arg3 rcx
+ %define arg4 r8
+ %define arg5 r9
+
+ %define tmp r11
+ %define tmp.w r11d
+ %define tmp.b r11b
+ %define tmp2 r10
+ %define tmp3 r13 ; must be saved and restored
+ %define tmp4 r12 ; must be saved and restored
+ %define tmp5 r14 ; must be saved and restored
+ %define tmp6 r15 ; must be saved and restored
+ %define tmp7 rbp ; must be saved and restored
+ %define tmp8 rbx ; must be saved and restored
+ %define return rax
+ %define PS 8
+ %define LOG_PS 3
+
+ %define func(x) x: endbranch
+ %macro FUNC_SAVE 0
+ push r12
+ push r13
+ push r14
+ push r15
+ push rbp
+ push rbx
+ %endmacro
+ %macro FUNC_RESTORE 0
+ pop rbx
+ pop rbp
+ pop r15
+ pop r14
+ pop r13
+ pop r12
+ %endmacro
+%endif
+
+%ifidn __OUTPUT_FORMAT__, win64
+ %define arg0 rcx
+ %define arg1 rdx
+ %define arg2 r8
+ %define arg3 r9
+
+ %define arg4 r12 ; must be saved, loaded and restored
+ %define arg5 r15 ; must be saved and restored
+ %define tmp r11
+ %define tmp.w r11d
+ %define tmp.b r11b
+ %define tmp2 r10
+ %define tmp3 r13 ; must be saved and restored
+ %define tmp4 r14 ; must be saved and restored
+ %define tmp5 rdi ; must be saved and restored
+ %define tmp6 rsi ; must be saved and restored
+ %define tmp7 rbp ; must be saved and restored
+ %define tmp8 rbx ; must be saved and restored
+ %define return rax
+ %define PS 8
+ %define LOG_PS 3
+ %define stack_size 10*16 + 9*8 ; must be an odd multiple of 8
+ %define arg(x) [rsp + stack_size + PS + PS*x]
+
+ %define func(x) proc_frame x
+ %macro FUNC_SAVE 0
+ alloc_stack stack_size
+ vmovdqa [rsp + 0*16], xmm6
+ vmovdqa [rsp + 1*16], xmm7
+ vmovdqa [rsp + 2*16], xmm8
+ vmovdqa [rsp + 3*16], xmm9
+ vmovdqa [rsp + 4*16], xmm10
+ vmovdqa [rsp + 5*16], xmm11
+ vmovdqa [rsp + 6*16], xmm12
+ vmovdqa [rsp + 7*16], xmm13
+ vmovdqa [rsp + 8*16], xmm14
+ vmovdqa [rsp + 9*16], xmm15
+ save_reg r12, 10*16 + 0*8
+ save_reg r13, 10*16 + 1*8
+ save_reg r14, 10*16 + 2*8
+ save_reg r15, 10*16 + 3*8
+ save_reg rdi, 10*16 + 4*8
+ save_reg rsi, 10*16 + 5*8
+ save_reg rbp, 10*16 + 6*8
+ save_reg rbx, 10*16 + 7*8
+ end_prolog
+ mov arg4, arg(4)
+ %endmacro
+
+ %macro FUNC_RESTORE 0
+ vmovdqa xmm6, [rsp + 0*16]
+ vmovdqa xmm7, [rsp + 1*16]
+ vmovdqa xmm8, [rsp + 2*16]
+ vmovdqa xmm9, [rsp + 3*16]
+ vmovdqa xmm10, [rsp + 4*16]
+ vmovdqa xmm11, [rsp + 5*16]
+ vmovdqa xmm12, [rsp + 6*16]
+ vmovdqa xmm13, [rsp + 7*16]
+ vmovdqa xmm14, [rsp + 8*16]
+ vmovdqa xmm15, [rsp + 9*16]
+ mov r12, [rsp + 10*16 + 0*8]
+ mov r13, [rsp + 10*16 + 1*8]
+ mov r14, [rsp + 10*16 + 2*8]
+ mov r15, [rsp + 10*16 + 3*8]
+ mov rdi, [rsp + 10*16 + 4*8]
+ mov rsi, [rsp + 10*16 + 5*8]
+ mov rbp, [rsp + 10*16 + 6*8]
+ mov rbx, [rsp + 10*16 + 7*8]
+ add rsp, stack_size
+ %endmacro
+%endif
+
+
+%define len arg0
+%define vec arg1
+%define mul_array arg2
+%define src arg3
+%define dest1 arg4
+%define ptr arg5
+%define vec_i tmp2
+%define dest2 tmp3
+%define dest3 tmp4
+%define dest4 tmp5
+%define vskip3 tmp6
+%define dest5 tmp7
+%define vskip1 tmp8
+%define pos return
+
+
+%ifndef EC_ALIGNED_ADDR
+;;; Use Un-aligned load/store
+ %define XLDR vmovdqu8
+ %define XSTR vmovdqu8
+%else
+;;; Use Non-temporal load/stor
+ %ifdef NO_NT_LDST
+ %define XLDR vmovdqa
+ %define XSTR vmovdqa
+ %else
+ %define XLDR vmovntdqa
+ %define XSTR vmovntdq
+ %endif
+%endif
+
+%define xmask0f zmm20
+%define xgft1_lo zmm19
+%define xgft1_loy ymm19
+%define xgft1_hi zmm18
+%define xgft2_lo zmm17
+%define xgft2_loy ymm17
+%define xgft2_hi zmm16
+%define xgft3_lo zmm15
+%define xgft3_loy ymm15
+%define xgft3_hi zmm14
+%define xgft4_lo zmm13
+%define xgft4_loy ymm13
+%define xgft4_hi zmm12
+%define xgft5_lo zmm11
+%define xgft5_loy ymm11
+%define xgft5_hi zmm10
+%define xgft6_lo zmm9
+%define xgft6_loy ymm9
+%define xgft6_hi zmm8
+
+%define x0 zmm0
+%define xtmpa zmm1
+%define xp1 zmm2
+%define xp2 zmm3
+%define xp3 zmm4
+%define xp4 zmm5
+%define xp5 zmm6
+%define xp6 zmm7
+
+default rel
+[bits 64]
+
+section .text
+
+align 16
+mk_global gf_6vect_dot_prod_avx512, function
+func(gf_6vect_dot_prod_avx512)
+ FUNC_SAVE
+ sub len, 64
+ jl .return_fail
+
+ xor pos, pos
+ mov tmp, 0x0f
+ vpbroadcastb xmask0f, tmp ;Construct mask 0x0f0f0f...
+ mov vskip1, vec
+ imul vskip1, 32
+ mov vskip3, vec
+ imul vskip3, 96
+ sal vec, LOG_PS ;vec *= PS. Make vec_i count by PS
+ mov dest2, [dest1+PS]
+ mov dest3, [dest1+2*PS]
+ mov dest4, [dest1+3*PS]
+ mov dest5, [dest1+4*PS]
+
+.loop64:
+ vpxorq xp1, xp1, xp1
+ vpxorq xp2, xp2, xp2
+ vpxorq xp3, xp3, xp3
+ vpxorq xp4, xp4, xp4
+ vpxorq xp5, xp5, xp5
+ vpxorq xp6, xp6, xp6
+ mov tmp, mul_array
+ xor vec_i, vec_i
+
+.next_vect:
+ mov ptr, [src+vec_i]
+ XLDR x0, [ptr+pos] ;Get next source vector
+ add vec_i, PS
+
+ vpandq xtmpa, x0, xmask0f ;Mask low src nibble in bits 4-0
+ vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0
+ vpandq x0, x0, xmask0f ;Mask high src nibble in bits 4-0
+
+ vmovdqu8 xgft1_loy, [tmp] ;Load array Ax{00}..{0f}, Ax{00}..{f0}
+ vmovdqu8 xgft2_loy, [tmp+vec*(32/PS)] ;Load array Bx{00}..{0f}, Bx{00}..{f0}
+ vmovdqu8 xgft3_loy, [tmp+vec*(64/PS)] ;Load array Cx{00}..{0f}, Cx{00}..{f0}
+ vmovdqu8 xgft4_loy, [tmp+vskip3] ;Load array Dx{00}..{0f}, Dx{00}..{f0}
+ vmovdqu8 xgft5_loy, [tmp+vskip1*4] ;Load array Ex{00}..{0f}, Ex{00}..{f0}
+ lea ptr, [vskip1 + vskip1*4] ;ptr = vskip5
+ vmovdqu8 xgft6_loy, [tmp+ptr] ;Load array Fx{00}..{0f}, Fx{00}..{f0}
+ add tmp, 32
+
+ vshufi64x2 xgft1_hi, xgft1_lo, xgft1_lo, 0x55
+ vshufi64x2 xgft1_lo, xgft1_lo, xgft1_lo, 0x00
+ vshufi64x2 xgft2_hi, xgft2_lo, xgft2_lo, 0x55
+ vshufi64x2 xgft2_lo, xgft2_lo, xgft2_lo, 0x00
+
+ vpshufb xgft1_hi, xgft1_hi, x0 ;Lookup mul table of high nibble
+ vpshufb xgft1_lo, xgft1_lo, xtmpa ;Lookup mul table of low nibble
+ vpxorq xgft1_hi, xgft1_hi, xgft1_lo ;GF add high and low partials
+ vpxorq xp1, xp1, xgft1_hi ;xp1 += partial
+
+ vpshufb xgft2_hi, xgft2_hi, x0 ;Lookup mul table of high nibble
+ vpshufb xgft2_lo, xgft2_lo, xtmpa ;Lookup mul table of low nibble
+ vpxorq xgft2_hi, xgft2_hi, xgft2_lo ;GF add high and low partials
+ vpxorq xp2, xp2, xgft2_hi ;xp2 += partial
+
+ vshufi64x2 xgft3_hi, xgft3_lo, xgft3_lo, 0x55
+ vshufi64x2 xgft3_lo, xgft3_lo, xgft3_lo, 0x00
+ vshufi64x2 xgft4_hi, xgft4_lo, xgft4_lo, 0x55
+ vshufi64x2 xgft4_lo, xgft4_lo, xgft4_lo, 0x00
+
+ vpshufb xgft3_hi, xgft3_hi, x0 ;Lookup mul table of high nibble
+ vpshufb xgft3_lo, xgft3_lo, xtmpa ;Lookup mul table of low nibble
+ vpxorq xgft3_hi, xgft3_hi, xgft3_lo ;GF add high and low partials
+ vpxorq xp3, xp3, xgft3_hi ;xp3 += partial
+
+ vpshufb xgft4_hi, xgft4_hi, x0 ;Lookup mul table of high nibble
+ vpshufb xgft4_lo, xgft4_lo, xtmpa ;Lookup mul table of low nibble
+ vpxorq xgft4_hi, xgft4_hi, xgft4_lo ;GF add high and low partials
+ vpxorq xp4, xp4, xgft4_hi ;xp4 += partial
+
+ vshufi64x2 xgft5_hi, xgft5_lo, xgft5_lo, 0x55
+ vshufi64x2 xgft5_lo, xgft5_lo, xgft5_lo, 0x00
+
+ vpshufb xgft5_hi, xgft5_hi, x0 ;Lookup mul table of high nibble
+ vpshufb xgft5_lo, xgft5_lo, xtmpa ;Lookup mul table of low nibble
+ vpxorq xgft5_hi, xgft5_hi, xgft5_lo ;GF add high and low partials
+ vpxorq xp5, xp5, xgft5_hi ;xp5 += partial
+
+ vshufi64x2 xgft6_hi, xgft6_lo, xgft6_lo, 0x55
+ vshufi64x2 xgft6_lo, xgft6_lo, xgft6_lo, 0x00
+
+ vpshufb xgft6_hi, xgft6_hi, x0 ;Lookup mul table of high nibble
+ vpshufb xgft6_lo, xgft6_lo, xtmpa ;Lookup mul table of low nibble
+ vpxorq xgft6_hi, xgft6_hi, xgft6_lo ;GF add high and low partials
+ vpxorq xp6, xp6, xgft6_hi ;x6 += partial
+
+ cmp vec_i, vec
+ jl .next_vect
+
+ mov ptr, [dest1] ;reuse ptr
+ mov tmp, [dest1+5*PS] ;reuse tmp
+
+ XSTR [dest2+pos], xp2
+ XSTR [dest3+pos], xp3
+ XSTR [dest4+pos], xp4
+ XSTR [dest5+pos], xp5
+
+ XSTR [ptr+pos], xp1
+ XSTR [tmp+pos], xp6
+
+ add pos, 64 ;Loop on 64 bytes at a time
+ cmp pos, len
+ jle .loop64
+
+ lea tmp, [len + 64]
+ cmp pos, tmp
+ je .return_pass
+
+ ;; Tail len
+ mov pos, len ;Overlapped offset length-64
+ jmp .loop64 ;Do one more overlap pass
+
+.return_pass:
+ mov return, 0
+ FUNC_RESTORE
+ ret
+
+.return_fail:
+ mov return, 1
+ FUNC_RESTORE
+ ret
+
+endproc_frame
+
+%else
+%ifidn __OUTPUT_FORMAT__, win64
+global no_gf_6vect_dot_prod_avx512
+no_gf_6vect_dot_prod_avx512:
+%endif
+%endif ; ifdef HAVE_AS_KNOWS_AVX512
diff --git a/src/isa-l/erasure_code/gf_6vect_dot_prod_sse.asm b/src/isa-l/erasure_code/gf_6vect_dot_prod_sse.asm
new file mode 100644
index 000000000..41176bb99
--- /dev/null
+++ b/src/isa-l/erasure_code/gf_6vect_dot_prod_sse.asm
@@ -0,0 +1,315 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright(c) 2011-2015 Intel Corporation All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in
+; the documentation and/or other materials provided with the
+; distribution.
+; * Neither the name of Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived
+; from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;;;
+;;; gf_6vect_dot_prod_sse(len, vec, *g_tbls, **buffs, **dests);
+;;;
+
+%include "reg_sizes.asm"
+
+%ifidn __OUTPUT_FORMAT__, elf64
+ %define arg0 rdi
+ %define arg1 rsi
+ %define arg2 rdx
+ %define arg3 rcx
+ %define arg4 r8
+ %define arg5 r9
+
+ %define tmp r11
+ %define tmp2 r10
+ %define tmp3 r13 ; must be saved and restored
+ %define tmp4 r12 ; must be saved and restored
+ %define tmp5 r14 ; must be saved and restored
+ %define tmp6 r15 ; must be saved and restored
+ %define return rax
+ %define PS 8
+ %define LOG_PS 3
+
+ %define func(x) x: endbranch
+ %macro FUNC_SAVE 0
+ push r12
+ push r13
+ push r14
+ push r15
+ %endmacro
+ %macro FUNC_RESTORE 0
+ pop r15
+ pop r14
+ pop r13
+ pop r12
+ %endmacro
+%endif
+
+%ifidn __OUTPUT_FORMAT__, win64
+ %define arg0 rcx
+ %define arg1 rdx
+ %define arg2 r8
+ %define arg3 r9
+
+ %define arg4 r12 ; must be saved, loaded and restored
+ %define arg5 r15 ; must be saved and restored
+ %define tmp r11
+ %define tmp2 r10
+ %define tmp3 r13 ; must be saved and restored
+ %define tmp4 r14 ; must be saved and restored
+ %define tmp5 rdi ; must be saved and restored
+ %define tmp6 rsi ; must be saved and restored
+ %define return rax
+ %define PS 8
+ %define LOG_PS 3
+ %define stack_size 10*16 + 7*8 ; must be an odd multiple of 8
+ %define arg(x) [rsp + stack_size + PS + PS*x]
+
+ %define func(x) proc_frame x
+ %macro FUNC_SAVE 0
+ alloc_stack stack_size
+ save_xmm128 xmm6, 0*16
+ save_xmm128 xmm7, 1*16
+ save_xmm128 xmm8, 2*16
+ save_xmm128 xmm9, 3*16
+ save_xmm128 xmm10, 4*16
+ save_xmm128 xmm11, 5*16
+ save_xmm128 xmm12, 6*16
+ save_xmm128 xmm13, 7*16
+ save_xmm128 xmm14, 8*16
+ save_xmm128 xmm15, 9*16
+ save_reg r12, 10*16 + 0*8
+ save_reg r13, 10*16 + 1*8
+ save_reg r14, 10*16 + 2*8
+ save_reg r15, 10*16 + 3*8
+ save_reg rdi, 10*16 + 4*8
+ save_reg rsi, 10*16 + 5*8
+ end_prolog
+ mov arg4, arg(4)
+ %endmacro
+
+ %macro FUNC_RESTORE 0
+ movdqa xmm6, [rsp + 0*16]
+ movdqa xmm7, [rsp + 1*16]
+ movdqa xmm8, [rsp + 2*16]
+ movdqa xmm9, [rsp + 3*16]
+ movdqa xmm10, [rsp + 4*16]
+ movdqa xmm11, [rsp + 5*16]
+ movdqa xmm12, [rsp + 6*16]
+ movdqa xmm13, [rsp + 7*16]
+ movdqa xmm14, [rsp + 8*16]
+ movdqa xmm15, [rsp + 9*16]
+ mov r12, [rsp + 10*16 + 0*8]
+ mov r13, [rsp + 10*16 + 1*8]
+ mov r14, [rsp + 10*16 + 2*8]
+ mov r15, [rsp + 10*16 + 3*8]
+ mov rdi, [rsp + 10*16 + 4*8]
+ mov rsi, [rsp + 10*16 + 5*8]
+ add rsp, stack_size
+ %endmacro
+%endif
+
+%define len arg0
+%define vec arg1
+%define mul_array arg2
+%define src arg3
+%define dest arg4
+%define ptr arg5
+%define vec_i tmp2
+%define dest1 tmp3
+%define dest2 tmp4
+%define vskip1 tmp5
+%define vskip3 tmp6
+%define pos return
+
+
+%ifndef EC_ALIGNED_ADDR
+;;; Use Un-aligned load/store
+ %define XLDR movdqu
+ %define XSTR movdqu
+%else
+;;; Use Non-temporal load/stor
+ %ifdef NO_NT_LDST
+ %define XLDR movdqa
+ %define XSTR movdqa
+ %else
+ %define XLDR movntdqa
+ %define XSTR movntdq
+ %endif
+%endif
+
+
+default rel
+
+[bits 64]
+section .text
+
+%define xmask0f xmm15
+%define xgft1_lo xmm2
+%define xgft1_hi xmm3
+%define xgft2_lo xmm4
+%define xgft2_hi xmm5
+%define xgft3_lo xmm6
+%define xgft3_hi xmm7
+%define x0 xmm0
+%define xtmpa xmm1
+%define xp1 xmm8
+%define xp2 xmm9
+%define xp3 xmm10
+%define xp4 xmm11
+%define xp5 xmm12
+%define xp6 xmm13
+
+align 16
+mk_global gf_6vect_dot_prod_sse, function
+func(gf_6vect_dot_prod_sse)
+ FUNC_SAVE
+ sub len, 16
+ jl .return_fail
+ xor pos, pos
+ movdqa xmask0f, [mask0f] ;Load mask of lower nibble in each byte
+ mov vskip1, vec
+ imul vskip1, 32
+ mov vskip3, vec
+ imul vskip3, 96
+ sal vec, LOG_PS ;vec *= PS. Make vec_i count by PS
+ mov dest1, [dest]
+ mov dest2, [dest+PS]
+
+
+.loop16:
+ mov tmp, mul_array
+ xor vec_i, vec_i
+ pxor xp1, xp1
+ pxor xp2, xp2
+ pxor xp3, xp3
+ pxor xp4, xp4
+ pxor xp5, xp5
+ pxor xp6, xp6
+
+.next_vect:
+ mov ptr, [src+vec_i]
+ add vec_i, PS
+ XLDR x0, [ptr+pos] ;Get next source vector
+
+ movdqu xgft1_lo, [tmp] ;Load array Ax{00}, Ax{01}, ..., Ax{0f}
+ movdqu xgft1_hi, [tmp+16] ; " Ax{00}, Ax{10}, ..., Ax{f0}
+ movdqu xgft2_lo, [tmp+vskip1*1] ;Load array Bx{00}, Bx{01}, ..., Bx{0f}
+ movdqu xgft2_hi, [tmp+vskip1*1+16] ; " Bx{00}, Bx{10}, ..., Bx{f0}
+ movdqu xgft3_lo, [tmp+vskip1*2] ;Load array Cx{00}, Cx{01}, ..., Cx{0f}
+ movdqu xgft3_hi, [tmp+vskip1*2+16] ; " Cx{00}, Cx{10}, ..., Cx{f0}
+ lea ptr, [vskip1 + vskip1*4] ;ptr = vskip5
+
+ movdqa xtmpa, x0 ;Keep unshifted copy of src
+ psraw x0, 4 ;Shift to put high nibble into bits 4-0
+ pand x0, xmask0f ;Mask high src nibble in bits 4-0
+ pand xtmpa, xmask0f ;Mask low src nibble in bits 4-0
+
+ pshufb xgft1_hi, x0 ;Lookup mul table of high nibble
+ pshufb xgft1_lo, xtmpa ;Lookup mul table of low nibble
+ pxor xgft1_hi, xgft1_lo ;GF add high and low partials
+ pxor xp1, xgft1_hi ;xp1 += partial
+
+ pshufb xgft2_hi, x0 ;Lookup mul table of high nibble
+ pshufb xgft2_lo, xtmpa ;Lookup mul table of low nibble
+ pxor xgft2_hi, xgft2_lo ;GF add high and low partials
+ pxor xp2, xgft2_hi ;xp2 += partial
+
+ pshufb xgft3_hi, x0 ;Lookup mul table of high nibble
+ pshufb xgft3_lo, xtmpa ;Lookup mul table of low nibble
+ pxor xgft3_hi, xgft3_lo ;GF add high and low partials
+ pxor xp3, xgft3_hi ;xp3 += partial
+
+
+ movdqu xgft1_lo, [tmp+vskip3] ;Load array Dx{00}, Dx{01}, ..., Dx{0f}
+ movdqu xgft1_hi, [tmp+vskip3+16] ; " Dx{00}, Dx{10}, ..., Dx{f0}
+ movdqu xgft2_lo, [tmp+vskip1*4] ;Load array Ex{00}, Ex{01}, ..., Ex{0f}
+ movdqu xgft2_hi, [tmp+vskip1*4+16] ; " Ex{00}, Ex{10}, ..., Ex{f0}
+ movdqu xgft3_lo, [tmp+ptr] ;Load array Fx{00}, Fx{01}, ..., Fx{0f}
+ movdqu xgft3_hi, [tmp+ptr+16] ; " Fx{00}, Fx{10}, ..., Fx{f0}
+ add tmp, 32
+
+
+ pshufb xgft1_hi, x0 ;Lookup mul table of high nibble
+ pshufb xgft1_lo, xtmpa ;Lookup mul table of low nibble
+ pxor xgft1_hi, xgft1_lo ;GF add high and low partials
+ pxor xp4, xgft1_hi ;xp4 += partial
+
+ pshufb xgft2_hi, x0 ;Lookup mul table of high nibble
+ pshufb xgft2_lo, xtmpa ;Lookup mul table of low nibble
+ pxor xgft2_hi, xgft2_lo ;GF add high and low partials
+ pxor xp5, xgft2_hi ;xp5 += partial
+
+ pshufb xgft3_hi, x0 ;Lookup mul table of high nibble
+ pshufb xgft3_lo, xtmpa ;Lookup mul table of low nibble
+ pxor xgft3_hi, xgft3_lo ;GF add high and low partials
+ pxor xp6, xgft3_hi ;xp6 += partial
+
+ cmp vec_i, vec
+ jl .next_vect
+
+
+ mov tmp, [dest+2*PS]
+ mov ptr, [dest+3*PS]
+ mov vec_i, [dest+4*PS]
+
+ XSTR [dest1+pos], xp1
+ XSTR [dest2+pos], xp2
+ XSTR [tmp+pos], xp3
+ mov tmp, [dest+5*PS]
+ XSTR [ptr+pos], xp4
+ XSTR [vec_i+pos], xp5
+ XSTR [tmp+pos], xp6
+
+ add pos, 16 ;Loop on 16 bytes at a time
+ cmp pos, len
+ jle .loop16
+
+ lea tmp, [len + 16]
+ cmp pos, tmp
+ je .return_pass
+
+ ;; Tail len
+ mov pos, len ;Overlapped offset length-16
+ jmp .loop16 ;Do one more overlap pass
+
+.return_pass:
+ FUNC_RESTORE
+ mov return, 0
+ ret
+
+.return_fail:
+ FUNC_RESTORE
+ mov return, 1
+ ret
+
+endproc_frame
+
+section .data
+
+align 16
+mask0f: dq 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f
+
+;;; func core, ver, snum
+slversion gf_6vect_dot_prod_sse, 00, 05, 0066
diff --git a/src/isa-l/erasure_code/gf_6vect_mad_avx.asm b/src/isa-l/erasure_code/gf_6vect_mad_avx.asm
new file mode 100644
index 000000000..c9ce490aa
--- /dev/null
+++ b/src/isa-l/erasure_code/gf_6vect_mad_avx.asm
@@ -0,0 +1,394 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright(c) 2011-2015 Intel Corporation All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in
+; the documentation and/or other materials provided with the
+; distribution.
+; * Neither the name of Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived
+; from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;;;
+;;; gf_6vect_mad_avx(len, vec, vec_i, mul_array, src, dest);
+;;;
+
+%include "reg_sizes.asm"
+
+%define PS 8
+
+%ifidn __OUTPUT_FORMAT__, win64
+ %define arg0 rcx
+ %define arg0.w ecx
+ %define arg1 rdx
+ %define arg2 r8
+ %define arg3 r9
+ %define arg4 r12
+ %define arg5 r15
+ %define tmp r11
+ %define tmp2 r10
+ %define tmp3 r13
+ %define tmp4 r14
+ %define tmp5 rdi
+ %define return rax
+ %define return.w eax
+ %define stack_size 16*10 + 5*8
+ %define arg(x) [rsp + stack_size + PS + PS*x]
+ %define func(x) proc_frame x
+
+%macro FUNC_SAVE 0
+ sub rsp, stack_size
+ movdqa [rsp+16*0],xmm6
+ movdqa [rsp+16*1],xmm7
+ movdqa [rsp+16*2],xmm8
+ movdqa [rsp+16*3],xmm9
+ movdqa [rsp+16*4],xmm10
+ movdqa [rsp+16*5],xmm11
+ movdqa [rsp+16*6],xmm12
+ movdqa [rsp+16*7],xmm13
+ movdqa [rsp+16*8],xmm14
+ movdqa [rsp+16*9],xmm15
+ save_reg r12, 10*16 + 0*8
+ save_reg r13, 10*16 + 1*8
+ save_reg r14, 10*16 + 2*8
+ save_reg r15, 10*16 + 3*8
+ save_reg rdi, 10*16 + 4*8
+ end_prolog
+ mov arg4, arg(4)
+ mov arg5, arg(5)
+%endmacro
+
+%macro FUNC_RESTORE 0
+ movdqa xmm6, [rsp+16*0]
+ movdqa xmm7, [rsp+16*1]
+ movdqa xmm8, [rsp+16*2]
+ movdqa xmm9, [rsp+16*3]
+ movdqa xmm10, [rsp+16*4]
+ movdqa xmm11, [rsp+16*5]
+ movdqa xmm12, [rsp+16*6]
+ movdqa xmm13, [rsp+16*7]
+ movdqa xmm14, [rsp+16*8]
+ movdqa xmm15, [rsp+16*9]
+ mov r12, [rsp + 10*16 + 0*8]
+ mov r13, [rsp + 10*16 + 1*8]
+ mov r14, [rsp + 10*16 + 2*8]
+ mov r15, [rsp + 10*16 + 3*8]
+ mov rdi, [rsp + 10*16 + 4*8]
+ add rsp, stack_size
+%endmacro
+
+%elifidn __OUTPUT_FORMAT__, elf64
+ %define arg0 rdi
+ %define arg0.w edi
+ %define arg1 rsi
+ %define arg2 rdx
+ %define arg3 rcx
+ %define arg4 r8
+ %define arg5 r9
+ %define tmp r11
+ %define tmp2 r10
+ %define tmp3 r12
+ %define tmp4 r13
+ %define tmp5 r14
+ %define return rax
+ %define return.w eax
+
+ %define func(x) x: endbranch
+ %macro FUNC_SAVE 0
+ push r12
+ push r13
+ push r14
+ %endmacro
+ %macro FUNC_RESTORE 0
+ pop r14
+ pop r13
+ pop r12
+ %endmacro
+%endif
+
+;;; gf_6vect_mad_avx(len, vec, vec_i, mul_array, src, dest)
+%define len arg0
+%define len.w arg0.w
+%define vec arg1
+%define vec_i arg2
+%define mul_array arg3
+%define src arg4
+%define dest1 arg5
+%define pos return
+%define pos.w return.w
+
+%define dest2 tmp4
+%define dest3 tmp2
+%define dest4 mul_array
+%define dest5 tmp5
+%define dest6 vec_i
+
+%ifndef EC_ALIGNED_ADDR
+;;; Use Un-aligned load/store
+ %define XLDR vmovdqu
+ %define XSTR vmovdqu
+%else
+;;; Use Non-temporal load/stor
+ %ifdef NO_NT_LDST
+ %define XLDR vmovdqa
+ %define XSTR vmovdqa
+ %else
+ %define XLDR vmovntdqa
+ %define XSTR vmovntdq
+ %endif
+%endif
+
+
+default rel
+
+[bits 64]
+section .text
+
+%define xmask0f xmm15
+%define xgft4_lo xmm14
+%define xgft4_hi xmm13
+%define xgft5_lo xmm12
+%define xgft5_hi xmm11
+%define xgft6_lo xmm10
+%define xgft6_hi xmm9
+
+%define x0 xmm0
+%define xtmpa xmm1
+%define xtmph1 xmm2
+%define xtmpl1 xmm3
+%define xtmph2 xmm4
+%define xtmpl2 xmm5
+%define xtmph3 xmm6
+%define xtmpl3 xmm7
+%define xd1 xmm8
+%define xd2 xtmpl1
+%define xd3 xtmph1
+
+
+align 16
+mk_global gf_6vect_mad_avx, function
+func(gf_6vect_mad_avx)
+ FUNC_SAVE
+ sub len, 16
+ jl .return_fail
+ xor pos, pos
+ vmovdqa xmask0f, [mask0f] ;Load mask of lower nibble in each byte
+ mov tmp, vec
+ sal vec_i, 5 ;Multiply by 32
+ lea tmp3, [mul_array + vec_i]
+ sal tmp, 6 ;Multiply by 64
+
+ sal vec, 5 ;Multiply by 32
+ lea vec_i, [tmp + vec] ;vec_i = vec*96
+ lea mul_array, [tmp + vec_i] ;mul_array = vec*160
+
+ vmovdqu xgft5_lo, [tmp3+2*tmp] ;Load array Ex{00}, Ex{01}, ..., Ex{0f}
+ vmovdqu xgft5_hi, [tmp3+2*tmp+16] ; " Ex{00}, Ex{10}, ..., Ex{f0}
+ vmovdqu xgft4_lo, [tmp3+vec_i] ;Load array Dx{00}, Dx{01}, Dx{02}, ...
+ vmovdqu xgft4_hi, [tmp3+vec_i+16] ; " Dx{00}, Dx{10}, Dx{20}, ... , Dx{f0}
+ vmovdqu xgft6_lo, [tmp3+mul_array] ;Load array Fx{00}, Fx{01}, ..., Fx{0f}
+ vmovdqu xgft6_hi, [tmp3+mul_array+16] ; " Fx{00}, Fx{10}, ..., Fx{f0}
+
+ mov dest2, [dest1+PS]
+ mov dest3, [dest1+2*PS]
+ mov dest4, [dest1+3*PS] ; reuse mul_array
+ mov dest5, [dest1+4*PS]
+ mov dest6, [dest1+5*PS] ; reuse vec_i
+ mov dest1, [dest1]
+
+.loop16:
+ XLDR x0, [src+pos] ;Get next source vector
+
+ vmovdqu xtmpl1, [tmp3] ;Load array Ax{00}, Ax{01}, Ax{02}, ...
+ vmovdqu xtmph1, [tmp3+16] ; " Ax{00}, Ax{10}, Ax{20}, ... , Ax{f0}
+ vmovdqu xtmpl2, [tmp3+vec] ;Load array Bx{00}, Bx{01}, Bx{02}, ...
+ vmovdqu xtmph2, [tmp3+vec+16] ; " Bx{00}, Bx{10}, Bx{20}, ... , Bx{f0}
+ vmovdqu xtmpl3, [tmp3+2*vec] ;Load array Cx{00}, Cx{01}, Cx{02}, ...
+ vmovdqu xtmph3, [tmp3+2*vec+16] ; " Cx{00}, Cx{10}, Cx{20}, ... , Cx{f0}
+ XLDR xd1, [dest1+pos] ;Get next dest vector
+
+ vpand xtmpa, x0, xmask0f ;Mask low src nibble in bits 4-0
+ vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0
+ vpand x0, x0, xmask0f ;Mask high src nibble in bits 4-0
+
+
+ ;dest1
+ vpshufb xtmph1, x0 ;Lookup mul table of high nibble
+ vpshufb xtmpl1, xtmpa ;Lookup mul table of low nibble
+ vpxor xtmph1, xtmpl1 ;GF add high and low partials
+ vpxor xd1, xtmph1
+
+ XLDR xd2, [dest2+pos] ;reuse xtmpl1. Get next dest vector
+ XLDR xd3, [dest3+pos] ;reuse xtmph1. Get next dest vector
+
+ ;dest2
+ vpshufb xtmph2, x0 ;Lookup mul table of high nibble
+ vpshufb xtmpl2, xtmpa ;Lookup mul table of low nibble
+ vpxor xtmph2, xtmpl2 ;GF add high and low partials
+ vpxor xd2, xtmph2
+
+ ;dest3
+ vpshufb xtmph3, x0 ;Lookup mul table of high nibble
+ vpshufb xtmpl3, xtmpa ;Lookup mul table of low nibble
+ vpxor xtmph3, xtmpl3 ;GF add high and low partials
+ vpxor xd3, xtmph3
+
+ XSTR [dest1+pos], xd1 ;Store result into dest1
+ XSTR [dest2+pos], xd2 ;Store result into dest2
+ XSTR [dest3+pos], xd3 ;Store result into dest3
+
+ ;dest4
+ XLDR xd1, [dest4+pos] ;Get next dest vector
+ vpshufb xtmph1, xgft4_hi, x0 ;Lookup mul table of high nibble
+ vpshufb xtmpl1, xgft4_lo, xtmpa ;Lookup mul table of low nibble
+ vpxor xtmph1, xtmph1, xtmpl1 ;GF add high and low partials
+ vpxor xd1, xd1, xtmph1
+
+ XLDR xd2, [dest5+pos] ;reuse xtmpl1. Get next dest vector
+ XLDR xd3, [dest6+pos] ;reuse xtmph1. Get next dest vector
+
+ ;dest5
+ vpshufb xtmph2, xgft5_hi, x0 ;Lookup mul table of high nibble
+ vpshufb xtmpl2, xgft5_lo, xtmpa ;Lookup mul table of low nibble
+ vpxor xtmph2, xtmph2, xtmpl2 ;GF add high and low partials
+ vpxor xd2, xd2, xtmph2
+
+ ;dest6
+ vpshufb xtmph3, xgft6_hi, x0 ;Lookup mul table of high nibble
+ vpshufb xtmpl3, xgft6_lo, xtmpa ;Lookup mul table of low nibble
+ vpxor xtmph3, xtmph3, xtmpl3 ;GF add high and low partials
+ vpxor xd3, xd3, xtmph3
+
+ XSTR [dest4+pos], xd1 ;Store result into dest4
+ XSTR [dest5+pos], xd2 ;Store result into dest5
+ XSTR [dest6+pos], xd3 ;Store result into dest6
+
+ add pos, 16 ;Loop on 16 bytes at a time
+ cmp pos, len
+ jle .loop16
+
+ lea tmp, [len + 16]
+ cmp pos, tmp
+ je .return_pass
+
+.lessthan16:
+ ;; Tail len
+ ;; Do one more overlap pass
+ ;; Overlapped offset length-16
+ mov tmp, len ;Backup len as len=rdi
+
+ XLDR x0, [src+tmp] ;Get next source vector
+ XLDR xd1, [dest4+tmp] ;Get next dest vector
+ XLDR xd2, [dest5+tmp] ;reuse xtmpl1. Get next dest vector
+ XLDR xd3, [dest6+tmp] ;reuse xtmph1. Get next dest vector
+
+ sub len, pos
+
+ vmovdqa xtmph3, [constip16] ;Load const of i + 16
+ vpinsrb xtmpl3, len.w, 15
+ vpshufb xtmpl3, xmask0f ;Broadcast len to all bytes
+ vpcmpgtb xtmpl3, xtmpl3, xtmph3
+
+ vpand xtmpa, x0, xmask0f ;Mask low src nibble in bits 4-0
+ vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0
+ vpand x0, x0, xmask0f ;Mask high src nibble in bits 4-0
+
+ ;dest4
+ vpshufb xgft4_hi, xgft4_hi, x0 ;Lookup mul table of high nibble
+ vpshufb xgft4_lo, xgft4_lo, xtmpa ;Lookup mul table of low nibble
+ vpxor xgft4_hi, xgft4_hi, xgft4_lo ;GF add high and low partials
+ vpand xgft4_hi, xgft4_hi, xtmpl3
+ vpxor xd1, xd1, xgft4_hi
+
+ ;dest5
+ vpshufb xgft5_hi, xgft5_hi, x0 ;Lookup mul table of high nibble
+ vpshufb xgft5_lo, xgft5_lo, xtmpa ;Lookup mul table of low nibble
+ vpxor xgft5_hi, xgft5_hi, xgft5_lo ;GF add high and low partials
+ vpand xgft5_hi, xgft5_hi, xtmpl3
+ vpxor xd2, xd2, xgft5_hi
+
+ ;dest6
+ vpshufb xgft6_hi, xgft6_hi, x0 ;Lookup mul table of high nibble
+ vpshufb xgft6_lo, xgft6_lo, xtmpa ;Lookup mul table of low nibble
+ vpxor xgft6_hi, xgft6_hi, xgft6_lo ;GF add high and low partials
+ vpand xgft6_hi, xgft6_hi, xtmpl3
+ vpxor xd3, xd3, xgft6_hi
+
+ XSTR [dest4+tmp], xd1 ;Store result into dest4
+ XSTR [dest5+tmp], xd2 ;Store result into dest5
+ XSTR [dest6+tmp], xd3 ;Store result into dest6
+
+ vmovdqu xgft4_lo, [tmp3] ;Load array Ax{00}, Ax{01}, Ax{02}, ...
+ vmovdqu xgft4_hi, [tmp3+16] ; " Ax{00}, Ax{10}, Ax{20}, ... , Ax{f0}
+ vmovdqu xgft5_lo, [tmp3+vec] ;Load array Bx{00}, Bx{01}, Bx{02}, ...
+ vmovdqu xgft5_hi, [tmp3+vec+16] ; " Bx{00}, Bx{10}, Bx{20}, ... , Bx{f0}
+ vmovdqu xgft6_lo, [tmp3+2*vec] ;Load array Cx{00}, Cx{01}, Cx{02}, ...
+ vmovdqu xgft6_hi, [tmp3+2*vec+16] ; " Cx{00}, Cx{10}, Cx{20}, ... , Cx{f0}
+ XLDR xd1, [dest1+tmp] ;Get next dest vector
+ XLDR xd2, [dest2+tmp] ;reuse xtmpl1. Get next dest vector
+ XLDR xd3, [dest3+tmp] ;reuse xtmph1. Get next dest3 vector
+
+ ;dest1
+ vpshufb xgft4_hi, xgft4_hi, x0 ;Lookup mul table of high nibble
+ vpshufb xgft4_lo, xgft4_lo, xtmpa ;Lookup mul table of low nibble
+ vpxor xgft4_hi, xgft4_hi, xgft4_lo ;GF add high and low partials
+ vpand xgft4_hi, xgft4_hi, xtmpl3
+ vpxor xd1, xd1, xgft4_hi
+
+ ;dest2
+ vpshufb xgft5_hi, xgft5_hi, x0 ;Lookup mul table of high nibble
+ vpshufb xgft5_lo, xgft5_lo, xtmpa ;Lookup mul table of low nibble
+ vpxor xgft5_hi, xgft5_hi, xgft5_lo ;GF add high and low partials
+ vpand xgft5_hi, xgft5_hi, xtmpl3
+ vpxor xd2, xd2, xgft5_hi
+
+ ;dest3
+ vpshufb xgft6_hi, xgft6_hi, x0 ;Lookup mul table of high nibble
+ vpshufb xgft6_lo, xgft6_lo, xtmpa ;Lookup mul table of low nibble
+ vpxor xgft6_hi, xgft6_hi, xgft6_lo ;GF add high and low partials
+ vpand xgft6_hi, xgft6_hi, xtmpl3
+ vpxor xd3, xd3, xgft6_hi
+
+ XSTR [dest1+tmp], xd1 ;Store result into dest1
+ XSTR [dest2+tmp], xd2 ;Store result into dest2
+ XSTR [dest3+tmp], xd3 ;Store result into dest3
+
+.return_pass:
+ FUNC_RESTORE
+ mov return, 0
+ ret
+
+.return_fail:
+ FUNC_RESTORE
+ mov return, 1
+ ret
+
+endproc_frame
+
+section .data
+
+align 16
+mask0f: dq 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f
+constip16:
+ dq 0xf8f9fafbfcfdfeff, 0xf0f1f2f3f4f5f6f7
+
+;;; func core, ver, snum
+slversion gf_6vect_mad_avx, 02, 01, 0210
diff --git a/src/isa-l/erasure_code/gf_6vect_mad_avx2.asm b/src/isa-l/erasure_code/gf_6vect_mad_avx2.asm
new file mode 100644
index 000000000..8f94c6aa3
--- /dev/null
+++ b/src/isa-l/erasure_code/gf_6vect_mad_avx2.asm
@@ -0,0 +1,400 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright(c) 2011-2015 Intel Corporation All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in
+; the documentation and/or other materials provided with the
+; distribution.
+; * Neither the name of Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived
+; from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;;;
+;;; gf_6vect_mad_avx2(len, vec, vec_i, mul_array, src, dest);
+;;;
+
+%include "reg_sizes.asm"
+
+%define PS 8
+
+%ifidn __OUTPUT_FORMAT__, win64
+ %define arg0 rcx
+ %define arg0.w ecx
+ %define arg1 rdx
+ %define arg2 r8
+ %define arg3 r9
+ %define arg4 r12
+ %define arg5 r15
+ %define tmp r11
+ %define tmp.w r11d
+ %define tmp.b r11b
+ %define tmp2 r10
+ %define tmp3 r13
+ %define return rax
+ %define return.w eax
+ %define stack_size 16*10 + 3*8
+ %define arg(x) [rsp + stack_size + PS + PS*x]
+ %define func(x) proc_frame x
+
+%macro FUNC_SAVE 0
+ sub rsp, stack_size
+ movdqa [rsp+16*0],xmm6
+ movdqa [rsp+16*1],xmm7
+ movdqa [rsp+16*2],xmm8
+ movdqa [rsp+16*3],xmm9
+ movdqa [rsp+16*4],xmm10
+ movdqa [rsp+16*5],xmm11
+ movdqa [rsp+16*6],xmm12
+ movdqa [rsp+16*7],xmm13
+ movdqa [rsp+16*8],xmm14
+ movdqa [rsp+16*9],xmm15
+ save_reg r12, 10*16 + 0*8
+ save_reg r13, 10*16 + 1*8
+ save_reg r15, 10*16 + 2*8
+ end_prolog
+ mov arg4, arg(4)
+ mov arg5, arg(5)
+%endmacro
+
+%macro FUNC_RESTORE 0
+ movdqa xmm6, [rsp+16*0]
+ movdqa xmm7, [rsp+16*1]
+ movdqa xmm8, [rsp+16*2]
+ movdqa xmm9, [rsp+16*3]
+ movdqa xmm10, [rsp+16*4]
+ movdqa xmm11, [rsp+16*5]
+ movdqa xmm12, [rsp+16*6]
+ movdqa xmm13, [rsp+16*7]
+ movdqa xmm14, [rsp+16*8]
+ movdqa xmm15, [rsp+16*9]
+ mov r12, [rsp + 10*16 + 0*8]
+ mov r13, [rsp + 10*16 + 1*8]
+ mov r15, [rsp + 10*16 + 2*8]
+ add rsp, stack_size
+%endmacro
+
+%elifidn __OUTPUT_FORMAT__, elf64
+ %define arg0 rdi
+ %define arg0.w edi
+ %define arg1 rsi
+ %define arg2 rdx
+ %define arg3 rcx
+ %define arg4 r8
+ %define arg5 r9
+ %define tmp r11
+ %define tmp.w r11d
+ %define tmp.b r11b
+ %define tmp2 r10
+ %define tmp3 r12
+ %define return rax
+ %define return.w eax
+
+ %define func(x) x: endbranch
+ %macro FUNC_SAVE 0
+ push r12
+ %endmacro
+ %macro FUNC_RESTORE 0
+ pop r12
+ %endmacro
+%endif
+
+;;; gf_6vect_mad_avx2(len, vec, vec_i, mul_array, src, dest)
+%define len arg0
+%define len.w arg0.w
+%define vec arg1
+%define vec_i arg2
+%define mul_array arg3
+%define src arg4
+%define dest1 arg5
+%define pos return
+%define pos.w return.w
+
+%define dest2 tmp3
+%define dest3 tmp2
+%define dest4 mul_array
+%define dest5 vec
+%define dest6 vec_i
+
+%ifndef EC_ALIGNED_ADDR
+;;; Use Un-aligned load/store
+ %define XLDR vmovdqu
+ %define XSTR vmovdqu
+%else
+;;; Use Non-temporal load/stor
+ %ifdef NO_NT_LDST
+ %define XLDR vmovdqa
+ %define XSTR vmovdqa
+ %else
+ %define XLDR vmovntdqa
+ %define XSTR vmovntdq
+ %endif
+%endif
+
+
+default rel
+
+[bits 64]
+section .text
+
+%define xmask0f ymm15
+%define xmask0fx xmm15
+%define xgft1_lo ymm14
+%define xgft2_lo ymm13
+%define xgft3_lo ymm12
+%define xgft4_lo ymm11
+%define xgft5_lo ymm10
+%define xgft6_lo ymm9
+
+%define x0 ymm0
+%define xtmpa ymm1
+%define xtmpl ymm2
+%define xtmplx xmm2
+%define xtmph ymm3
+%define xtmphx xmm3
+%define xd1 ymm4
+%define xd2 ymm5
+%define xd3 ymm6
+%define xd4 ymm7
+%define xd5 ymm8
+%define xd6 xd1
+
+align 16
+mk_global gf_6vect_mad_avx2, function
+func(gf_6vect_mad_avx2)
+ FUNC_SAVE
+ sub len, 32
+ jl .return_fail
+ xor pos, pos
+ mov tmp.b, 0x0f
+ vpinsrb xmask0fx, xmask0fx, tmp.w, 0
+ vpbroadcastb xmask0f, xmask0fx ;Construct mask 0x0f0f0f...
+
+ sal vec_i, 5 ;Multiply by 32
+ sal vec, 5 ;Multiply by 32
+ lea tmp, [mul_array + vec_i]
+ mov vec_i, vec
+ mov mul_array, vec
+ sal vec_i, 1
+ sal mul_array, 1
+ add vec_i, vec ;vec_i=vec*96
+ add mul_array, vec_i ;vec_i=vec*160
+
+ vmovdqu xgft1_lo, [tmp] ;Load array Ax{00}, Ax{01}, ..., Ax{0f}
+ ; " Ax{00}, Ax{10}, ..., Ax{f0}
+ vmovdqu xgft2_lo, [tmp+vec] ;Load array Bx{00}, Bx{01}, ..., Bx{0f}
+ ; " Bx{00}, Bx{10}, ..., Bx{f0}
+ vmovdqu xgft3_lo, [tmp+2*vec] ;Load array Cx{00}, Cx{01}, ..., Cx{0f}
+ ; " Cx{00}, Cx{10}, ..., Cx{f0}
+ vmovdqu xgft4_lo, [tmp+vec_i] ;Load array Fx{00}, Fx{01}, ..., Fx{0f}
+ ; " Fx{00}, Fx{10}, ..., Fx{f0}
+ vmovdqu xgft5_lo, [tmp+4*vec] ;Load array Ex{00}, Ex{01}, ..., Ex{0f}
+ ; " Ex{00}, Ex{10}, ..., Ex{f0}
+ vmovdqu xgft6_lo, [tmp+mul_array] ;Load array Dx{00}, Dx{01}, ..., Dx{0f}
+ ; " Dx{00}, Dx{10}, ..., Dx{f0}
+
+ mov dest2, [dest1+PS] ; reuse tmp3
+ mov dest3, [dest1+2*PS] ; reuse tmp2
+ mov dest4, [dest1+3*PS] ; reuse mul_array
+ mov dest5, [dest1+4*PS] ; reuse vec
+ mov dest6, [dest1+5*PS] ; reuse vec_i
+ mov dest1, [dest1]
+
+.loop32:
+ XLDR x0, [src+pos] ;Get next source vector
+ XLDR xd1, [dest1+pos] ;Get next dest vector
+ XLDR xd2, [dest2+pos] ;Get next dest vector
+ XLDR xd3, [dest3+pos] ;Get next dest vector
+ XLDR xd4, [dest4+pos] ;Get next dest vector
+ XLDR xd5, [dest5+pos] ;Get next dest vector
+
+ vpand xtmpl, x0, xmask0f ;Mask low src nibble in bits 4-0
+ vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0
+ vpand x0, x0, xmask0f ;Mask high src nibble in bits 4-0
+ vperm2i128 xtmpa, xtmpl, x0, 0x30 ;swap xtmpa from 1lo|2lo to 1lo|2hi
+ vperm2i128 x0, xtmpl, x0, 0x12 ;swap x0 from 1hi|2hi to 1hi|2lo
+
+ ;dest1
+ vperm2i128 xtmph, xgft1_lo, xgft1_lo, 0x01 ; swapped to hi | lo
+ vpshufb xtmph, xtmph, x0 ;Lookup mul table of high nibble
+ vpshufb xtmpl, xgft1_lo, xtmpa ;Lookup mul table of low nibble
+ vpxor xtmph, xtmph, xtmpl ;GF add high and low partials
+ vpxor xd1, xd1, xtmph ;xd1 += partial
+
+ XSTR [dest1+pos], xd1 ;Store result into dest1
+
+ ;dest2
+ vperm2i128 xtmph, xgft2_lo, xgft2_lo, 0x01 ; swapped to hi | lo
+ vpshufb xtmph, xtmph, x0 ;Lookup mul table of high nibble
+ vpshufb xtmpl, xgft2_lo, xtmpa ;Lookup mul table of low nibble
+ vpxor xtmph, xtmph, xtmpl ;GF add high and low partials
+ vpxor xd2, xd2, xtmph ;xd2 += partial
+
+ ;dest3
+ vperm2i128 xtmph, xgft3_lo, xgft3_lo, 0x01 ; swapped to hi | lo
+ vpshufb xtmph, xtmph, x0 ;Lookup mul table of high nibble
+ vpshufb xtmpl, xgft3_lo, xtmpa ;Lookup mul table of low nibble
+ vpxor xtmph, xtmph, xtmpl ;GF add high and low partials
+ vpxor xd3, xd3, xtmph ;xd3 += partial
+
+ XLDR xd6, [dest6+pos] ;reuse xd1. Get next dest vector
+
+ ;dest4
+ vperm2i128 xtmph, xgft4_lo, xgft4_lo, 0x01 ; swapped to hi | lo
+ vpshufb xtmph, xtmph, x0 ;Lookup mul table of high nibble
+ vpshufb xtmpl, xgft4_lo, xtmpa ;Lookup mul table of low nibble
+ vpxor xtmph, xtmph, xtmpl ;GF add high and low partials
+ vpxor xd4, xd4, xtmph ;xd4 += partial
+
+ ;dest5
+ vperm2i128 xtmph, xgft5_lo, xgft5_lo, 0x01 ; swapped to hi | lo
+ vpshufb xtmph, xtmph, x0 ;Lookup mul table of high nibble
+ vpshufb xtmpl, xgft5_lo, xtmpa ;Lookup mul table of low nibble
+ vpxor xtmph, xtmph, xtmpl ;GF add high and low partials
+ vpxor xd5, xd5, xtmph ;xd5 += partial
+
+ ;dest6
+ vperm2i128 xtmph, xgft6_lo, xgft6_lo, 0x01 ; swapped to hi | lo
+ vpshufb xtmph, xtmph, x0 ;Lookup mul table of high nibble
+ vpshufb xtmpl, xgft6_lo, xtmpa ;Lookup mul table of low nibble
+ vpxor xtmph, xtmph, xtmpl ;GF add high and low partials
+ vpxor xd6, xd6, xtmph ;xd6 += partial
+
+ XSTR [dest2+pos], xd2 ;Store result into dest2
+ XSTR [dest3+pos], xd3 ;Store result into dest3
+ XSTR [dest4+pos], xd4 ;Store result into dest4
+ XSTR [dest5+pos], xd5 ;Store result into dest5
+ XSTR [dest6+pos], xd6 ;Store result into dest6
+
+ add pos, 32 ;Loop on 32 bytes at a time
+ cmp pos, len
+ jle .loop32
+
+ lea tmp, [len + 32]
+ cmp pos, tmp
+ je .return_pass
+
+.lessthan32:
+ ;; Tail len
+ ;; Do one more overlap pass
+ mov tmp.b, 0x1f
+ vpinsrb xtmphx, xtmphx, tmp.w, 0
+ vpbroadcastb xtmph, xtmphx ;Construct mask 0x1f1f1f...
+
+ mov tmp, len ;Overlapped offset length-32
+
+ XLDR x0, [src+tmp] ;Get next source vector
+ XLDR xd1, [dest1+tmp] ;Get next dest vector
+ XLDR xd2, [dest2+tmp] ;Get next dest vector
+ XLDR xd3, [dest3+tmp] ;Get next dest vector
+ XLDR xd4, [dest4+tmp] ;Get next dest vector
+ XLDR xd5, [dest5+tmp] ;Get next dest vector
+
+ sub len, pos
+
+ vpinsrb xtmplx, xtmplx, len.w, 15
+ vinserti128 xtmpl, xtmpl, xtmplx, 1 ;swapped to xtmplx | xtmplx
+ vpshufb xtmpl, xtmpl, xtmph ;Broadcast len to all bytes. xtmph=0x1f1f1f...
+ vpcmpgtb xtmpl, xtmpl, [constip32]
+
+ vpand xtmph, x0, xmask0f ;Mask low src nibble in bits 4-0
+ vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0
+ vpand x0, x0, xmask0f ;Mask high src nibble in bits 4-0
+ vperm2i128 xtmpa, xtmph, x0, 0x30 ;swap xtmpa from 1lo|2lo to 1lo|2hi
+ vperm2i128 x0, xtmph, x0, 0x12 ;swap x0 from 1hi|2hi to 1hi|2lo
+
+ ;dest1
+ vperm2i128 xtmph, xgft1_lo, xgft1_lo, 0x01 ; swapped to hi | lo
+ vpshufb xtmph, xtmph, x0 ;Lookup mul table of high nibble
+ vpshufb xgft1_lo, xgft1_lo, xtmpa ;Lookup mul table of low nibble
+ vpxor xtmph, xtmph, xgft1_lo ;GF add high and low partials
+ vpand xtmph, xtmph, xtmpl
+ vpxor xd1, xd1, xtmph ;xd1 += partial
+
+ XSTR [dest1+tmp], xd1 ;Store result into dest1
+
+ ;dest2
+ vperm2i128 xtmph, xgft2_lo, xgft2_lo, 0x01 ; swapped to hi | lo
+ vpshufb xtmph, xtmph, x0 ;Lookup mul table of high nibble
+ vpshufb xgft2_lo, xgft2_lo, xtmpa ;Lookup mul table of low nibble
+ vpxor xtmph, xtmph, xgft2_lo ;GF add high and low partials
+ vpand xtmph, xtmph, xtmpl
+ vpxor xd2, xd2, xtmph ;xd2 += partial
+
+ ;dest3
+ vperm2i128 xtmph, xgft3_lo, xgft3_lo, 0x01 ; swapped to hi | lo
+ vpshufb xtmph, xtmph, x0 ;Lookup mul table of high nibble
+ vpshufb xgft3_lo, xgft3_lo, xtmpa ;Lookup mul table of low nibble
+ vpxor xtmph, xtmph, xgft3_lo ;GF add high and low partials
+ vpand xtmph, xtmph, xtmpl
+ vpxor xd3, xd3, xtmph ;xd3 += partial
+
+ XLDR xd6, [dest6+tmp] ;reuse xd1. Get next dest vector
+
+ ;dest4
+ vperm2i128 xtmph, xgft4_lo, xgft4_lo, 0x01 ; swapped to hi | lo
+ vpshufb xtmph, xtmph, x0 ;Lookup mul table of high nibble
+ vpshufb xgft4_lo, xgft4_lo, xtmpa ;Lookup mul table of low nibble
+ vpxor xtmph, xtmph, xgft4_lo ;GF add high and low partials
+ vpand xtmph, xtmph, xtmpl
+ vpxor xd4, xd4, xtmph ;xd4 += partial
+
+ ;dest5
+ vperm2i128 xtmph, xgft5_lo, xgft5_lo, 0x01 ; swapped to hi | lo
+ vpshufb xtmph, xtmph, x0 ;Lookup mul table of high nibble
+ vpshufb xgft5_lo, xgft5_lo, xtmpa ;Lookup mul table of low nibble
+ vpxor xtmph, xtmph, xgft5_lo ;GF add high and low partials
+ vpand xtmph, xtmph, xtmpl
+ vpxor xd5, xd5, xtmph ;xd5 += partial
+
+ ;dest6
+ vperm2i128 xtmph, xgft6_lo, xgft6_lo, 0x01 ; swapped to hi | lo
+ vpshufb xtmph, xtmph, x0 ;Lookup mul table of high nibble
+ vpshufb xgft6_lo, xgft6_lo, xtmpa ;Lookup mul table of low nibble
+ vpxor xtmph, xtmph, xgft6_lo ;GF add high and low partials
+ vpand xtmph, xtmph, xtmpl
+ vpxor xd6, xd6, xtmph ;xd6 += partial
+
+ XSTR [dest2+tmp], xd2 ;Store result into dest2
+ XSTR [dest3+tmp], xd3 ;Store result into dest3
+ XSTR [dest4+tmp], xd4 ;Store result into dest4
+ XSTR [dest5+tmp], xd5 ;Store result into dest5
+ XSTR [dest6+tmp], xd6 ;Store result into dest6
+
+.return_pass:
+ FUNC_RESTORE
+ mov return, 0
+ ret
+
+.return_fail:
+ FUNC_RESTORE
+ mov return, 1
+ ret
+
+endproc_frame
+
+section .data
+align 32
+constip32:
+ dq 0xf8f9fafbfcfdfeff, 0xf0f1f2f3f4f5f6f7
+ dq 0xe8e9eaebecedeeef, 0xe0e1e2e3e4e5e6e7
+
+;;; func core, ver, snum
+slversion gf_6vect_mad_avx2, 04, 01, 0211
diff --git a/src/isa-l/erasure_code/gf_6vect_mad_avx512.asm b/src/isa-l/erasure_code/gf_6vect_mad_avx512.asm
new file mode 100644
index 000000000..c2383a2ee
--- /dev/null
+++ b/src/isa-l/erasure_code/gf_6vect_mad_avx512.asm
@@ -0,0 +1,321 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright(c) 2011-2019 Intel Corporation All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in
+; the documentation and/or other materials provided with the
+; distribution.
+; * Neither the name of Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived
+; from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;;;
+;;; gf_6vect_mad_avx512(len, vec, vec_i, mul_array, src, dest);
+;;;
+
+%include "reg_sizes.asm"
+
+%ifdef HAVE_AS_KNOWS_AVX512
+
+%ifidn __OUTPUT_FORMAT__, elf64
+ %define arg0 rdi
+ %define arg1 rsi
+ %define arg2 rdx
+ %define arg3 rcx
+ %define arg4 r8
+ %define arg5 r9
+ %define tmp r11
+ %define tmp2 r10
+ %define tmp3 r12 ;must be saved and restored
+ %define return rax
+ %define func(x) x: endbranch
+ %macro FUNC_SAVE 0
+ push r12
+ %endmacro
+ %macro FUNC_RESTORE 0
+ pop r12
+ %endmacro
+%endif
+
+%ifidn __OUTPUT_FORMAT__, win64
+ %define arg0 rcx
+ %define arg1 rdx
+ %define arg2 r8
+ %define arg3 r9
+ %define arg4 r12
+ %define arg5 r15
+ %define tmp r11
+ %define tmp2 r10
+ %define tmp3 r13
+ %define return rax
+ %define stack_size 16*10 + 3*8
+ %define arg(x) [rsp + stack_size + PS + PS*x]
+ %define func(x) proc_frame x
+
+%macro FUNC_SAVE 0
+ sub rsp, stack_size
+ movdqa [rsp+16*0],xmm6
+ movdqa [rsp+16*1],xmm7
+ movdqa [rsp+16*2],xmm8
+ movdqa [rsp+16*3],xmm9
+ movdqa [rsp+16*4],xmm10
+ movdqa [rsp+16*5],xmm11
+ movdqa [rsp+16*6],xmm12
+ movdqa [rsp+16*7],xmm13
+ movdqa [rsp+16*8],xmm14
+ movdqa [rsp+16*9],xmm15
+ save_reg r12, 10*16 + 0*8
+ save_reg r15, 10*16 + 1*8
+ save_reg r13, 10*16 + 2*8
+ end_prolog
+ mov arg4, arg(4)
+ mov arg5, arg(5)
+%endmacro
+
+%macro FUNC_RESTORE 0
+ movdqa xmm6, [rsp+16*0]
+ movdqa xmm7, [rsp+16*1]
+ movdqa xmm8, [rsp+16*2]
+ movdqa xmm9, [rsp+16*3]
+ movdqa xmm10, [rsp+16*4]
+ movdqa xmm11, [rsp+16*5]
+ movdqa xmm12, [rsp+16*6]
+ movdqa xmm13, [rsp+16*7]
+ movdqa xmm14, [rsp+16*8]
+ movdqa xmm15, [rsp+16*9]
+ mov r12, [rsp + 10*16 + 0*8]
+ mov r15, [rsp + 10*16 + 1*8]
+ mov r13, [rsp + 10*16 + 2*8]
+ add rsp, stack_size
+%endmacro
+%endif
+
+%define PS 8
+%define len arg0
+%define vec arg1
+%define vec_i arg2
+%define mul_array arg3
+%define src arg4
+%define dest1 arg5
+%define pos return
+%define dest2 tmp3
+%define dest3 tmp2
+%define dest4 mul_array
+%define dest5 vec
+%define dest6 vec_i
+
+%ifndef EC_ALIGNED_ADDR
+;;; Use Un-aligned load/store
+ %define XLDR vmovdqu8
+ %define XSTR vmovdqu8
+%else
+;;; Use Non-temporal load/stor
+ %ifdef NO_NT_LDST
+ %define XLDR vmovdqa
+ %define XSTR vmovdqa
+ %else
+ %define XLDR vmovntdqa
+ %define XSTR vmovntdq
+ %endif
+%endif
+
+default rel
+[bits 64]
+section .text
+
+%define x0 zmm0
+%define xtmpa zmm1
+%define xtmpl1 zmm2
+%define xtmph1 zmm3
+%define xgft1_hi zmm4
+%define xgft1_lo zmm5
+%define xgft1_loy ymm5
+%define xgft2_hi zmm6
+%define xgft2_lo zmm7
+%define xgft2_loy ymm7
+%define xgft3_hi zmm8
+%define xgft3_lo zmm9
+%define xgft3_loy ymm9
+%define xgft4_hi zmm10
+%define xgft4_lo zmm11
+%define xgft4_loy ymm11
+%define xgft5_hi zmm12
+%define xgft5_lo zmm13
+%define xgft5_loy ymm13
+%define xgft6_hi zmm14
+%define xgft6_lo zmm15
+%define xgft6_loy ymm15
+%define xd1 zmm16
+%define xd2 zmm17
+%define xd3 zmm18
+%define xd4 zmm19
+%define xd5 zmm20
+%define xd6 zmm21
+%define xmask0f zmm22
+%define xtmpl2 zmm23
+%define xtmpl3 zmm24
+%define xtmpl4 zmm25
+%define xtmpl5 zmm26
+%define xtmph2 zmm27
+%define xtmph3 zmm28
+%define xtmph4 zmm29
+%define xtmph5 zmm30
+%define xtmph6 zmm31
+
+align 16
+mk_global gf_6vect_mad_avx512, function
+func(gf_6vect_mad_avx512)
+ FUNC_SAVE
+ sub len, 64
+ jl .return_fail
+ xor pos, pos
+ mov tmp, 0x0f
+ vpbroadcastb xmask0f, tmp ;Construct mask 0x0f0f0f...
+ sal vec_i, 5 ;Multiply by 32
+ sal vec, 5 ;Multiply by 32
+ lea tmp, [mul_array + vec_i]
+ mov vec_i, vec
+ mov mul_array, vec
+ sal vec_i, 1 ;vec_i=vec*64
+ sal mul_array, 1 ;mul_array=vec*64
+ add vec_i, vec ;vec_i=vec*96
+ add mul_array, vec_i ;vec_i=vec*160
+
+ vmovdqu xgft1_loy, [tmp] ;Load array Ax{00}..{0f}, Ax{00}..{f0}
+ vmovdqu xgft2_loy, [tmp+vec] ;Load array Bx{00}..{0f}, Bx{00}..{f0}
+ vmovdqu xgft3_loy, [tmp+2*vec] ;Load array Cx{00}..{0f}, Cx{00}..{f0}
+ vmovdqu xgft4_loy, [tmp+vec_i] ;Load array Dx{00}..{0f}, Dx{00}..{f0}
+ vmovdqu xgft5_loy, [tmp+4*vec] ;Load array Ex{00}..{0f}, Ex{00}..{f0}
+ vmovdqu xgft6_loy, [tmp+mul_array] ;Load array Fx{00}..{0f}, Fx{00}..{f0}
+
+ vshufi64x2 xgft1_hi, xgft1_lo, xgft1_lo, 0x55
+ vshufi64x2 xgft1_lo, xgft1_lo, xgft1_lo, 0x00
+ vshufi64x2 xgft2_hi, xgft2_lo, xgft2_lo, 0x55
+ vshufi64x2 xgft2_lo, xgft2_lo, xgft2_lo, 0x00
+ vshufi64x2 xgft3_hi, xgft3_lo, xgft3_lo, 0x55
+ vshufi64x2 xgft3_lo, xgft3_lo, xgft3_lo, 0x00
+ vshufi64x2 xgft4_hi, xgft4_lo, xgft4_lo, 0x55
+ vshufi64x2 xgft4_lo, xgft4_lo, xgft4_lo, 0x00
+ vshufi64x2 xgft5_hi, xgft5_lo, xgft5_lo, 0x55
+ vshufi64x2 xgft5_lo, xgft5_lo, xgft5_lo, 0x00
+ vshufi64x2 xgft6_hi, xgft6_lo, xgft6_lo, 0x55
+ vshufi64x2 xgft6_lo, xgft6_lo, xgft6_lo, 0x00
+
+ mov dest2, [dest1+PS]
+ mov dest3, [dest1+2*PS]
+ mov dest4, [dest1+3*PS] ; reuse mul_array
+ mov dest5, [dest1+4*PS] ; reuse vec
+ mov dest6, [dest1+5*PS] ; reuse vec_i
+ mov dest1, [dest1]
+ mov tmp, -1
+ kmovq k1, tmp
+
+.loop64:
+ XLDR x0, [src+pos] ;Get next source vector
+ XLDR xd1, [dest1+pos] ;Get next dest vector
+ XLDR xd2, [dest2+pos] ;Get next dest vector
+ XLDR xd3, [dest3+pos] ;Get next dest vector
+ XLDR xd4, [dest4+pos] ;Get next dest vector
+ XLDR xd5, [dest5+pos] ;Get next dest vector
+ XLDR xd6, [dest6+pos] ;Get next dest vector
+
+ vpandq xtmpa, x0, xmask0f ;Mask low src nibble in bits 4-0
+ vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0
+ vpandq x0, x0, xmask0f ;Mask high src nibble in bits 4-0
+
+ ; dest1
+ vpshufb xtmph1 {k1}{z}, xgft1_hi, x0 ;Lookup mul table of high nibble
+ vpshufb xtmpl1 {k1}{z}, xgft1_lo, xtmpa ;Lookup mul table of low nibble
+ vpxorq xtmph1, xtmph1, xtmpl1 ;GF add high and low partials
+ vpxorq xd1, xd1, xtmph1 ;xd1 += partial
+
+ ; dest2
+ vpshufb xtmph2 {k1}{z}, xgft2_hi, x0 ;Lookup mul table of high nibble
+ vpshufb xtmpl2 {k1}{z}, xgft2_lo, xtmpa ;Lookup mul table of low nibble
+ vpxorq xtmph2, xtmph2, xtmpl2 ;GF add high and low partials
+ vpxorq xd2, xd2, xtmph2 ;xd2 += partial
+
+ ; dest3
+ vpshufb xtmph3 {k1}{z}, xgft3_hi, x0 ;Lookup mul table of high nibble
+ vpshufb xtmpl3 {k1}{z}, xgft3_lo, xtmpa ;Lookup mul table of low nibble
+ vpxorq xtmph3, xtmph3, xtmpl3 ;GF add high and low partials
+ vpxorq xd3, xd3, xtmph3 ;xd3 += partial
+
+ ; dest4
+ vpshufb xtmph4 {k1}{z}, xgft4_hi, x0 ;Lookup mul table of high nibble
+ vpshufb xtmpl4 {k1}{z}, xgft4_lo, xtmpa ;Lookup mul table of low nibble
+ vpxorq xtmph4, xtmph4, xtmpl4 ;GF add high and low partials
+ vpxorq xd4, xd4, xtmph4 ;xd4 += partial
+
+ ; dest5
+ vpshufb xtmph5 {k1}{z}, xgft5_hi, x0 ;Lookup mul table of high nibble
+ vpshufb xtmpl5 {k1}{z}, xgft5_lo, xtmpa ;Lookup mul table of low nibble
+ vpxorq xtmph5, xtmph5, xtmpl5 ;GF add high and low partials
+ vpxorq xd5, xd5, xtmph5 ;xd5 += partial
+
+ ; dest6
+ vpshufb xtmph6 {k1}{z}, xgft6_hi, x0 ;Lookup mul table of high nibble
+ vpshufb xtmpl5 {k1}{z}, xgft6_lo, xtmpa ;Lookup mul table of low nibble. Reuse xtmpl5
+ vpxorq xtmph6, xtmph6, xtmpl5 ;GF add high and low partials.
+ vpxorq xd6, xd6, xtmph6 ;xd6 += partial
+
+ XSTR [dest1+pos], xd1
+ XSTR [dest2+pos], xd2
+ XSTR [dest3+pos], xd3
+ XSTR [dest4+pos], xd4
+ XSTR [dest5+pos], xd5
+ XSTR [dest6+pos], xd6
+
+ add pos, 64 ;Loop on 64 bytes at a time
+ cmp pos, len
+ jle .loop64
+
+ lea tmp, [len + 64]
+ cmp pos, tmp
+ je .return_pass
+
+ ;; Tail len
+ mov pos, (1 << 63)
+ lea tmp, [len + 64 - 1]
+ and tmp, 63
+ sarx pos, pos, tmp
+ kmovq k1, pos
+ mov pos, len ;Overlapped offset length-64
+ jmp .loop64 ;Do one more overlap pass
+
+.return_pass:
+ mov return, 0
+ FUNC_RESTORE
+ ret
+
+.return_fail:
+ mov return, 1
+ FUNC_RESTORE
+ ret
+
+endproc_frame
+
+%else
+%ifidn __OUTPUT_FORMAT__, win64
+global no_gf_6vect_mad_avx512
+no_gf_6vect_mad_avx512:
+%endif
+%endif ; ifdef HAVE_AS_KNOWS_AVX512
diff --git a/src/isa-l/erasure_code/gf_6vect_mad_sse.asm b/src/isa-l/erasure_code/gf_6vect_mad_sse.asm
new file mode 100644
index 000000000..f33ec0646
--- /dev/null
+++ b/src/isa-l/erasure_code/gf_6vect_mad_sse.asm
@@ -0,0 +1,406 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright(c) 2011-2015 Intel Corporation All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in
+; the documentation and/or other materials provided with the
+; distribution.
+; * Neither the name of Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived
+; from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;;;
+;;; gf_6vect_mad_sse(len, vec, vec_i, mul_array, src, dest);
+;;;
+
+%include "reg_sizes.asm"
+
+%define PS 8
+
+%ifidn __OUTPUT_FORMAT__, win64
+ %define arg0 rcx
+ %define arg0.w ecx
+ %define arg1 rdx
+ %define arg2 r8
+ %define arg3 r9
+ %define arg4 r12
+ %define arg5 r15
+ %define tmp r11
+ %define tmp.w r11d
+ %define tmp2 r10
+ %define tmp3 r13
+ %define tmp4 r14
+ %define tmp5 rdi
+ %define return rax
+ %define return.w eax
+ %define stack_size 16*10 + 5*8
+ %define arg(x) [rsp + stack_size + PS + PS*x]
+ %define func(x) proc_frame x
+
+%macro FUNC_SAVE 0
+ sub rsp, stack_size
+ movdqa [rsp+16*0],xmm6
+ movdqa [rsp+16*1],xmm7
+ movdqa [rsp+16*2],xmm8
+ movdqa [rsp+16*3],xmm9
+ movdqa [rsp+16*4],xmm10
+ movdqa [rsp+16*5],xmm11
+ movdqa [rsp+16*6],xmm12
+ movdqa [rsp+16*7],xmm13
+ movdqa [rsp+16*8],xmm14
+ movdqa [rsp+16*9],xmm15
+ save_reg r12, 10*16 + 0*8
+ save_reg r13, 10*16 + 1*8
+ save_reg r14, 10*16 + 2*8
+ save_reg r15, 10*16 + 3*8
+ save_reg rdi, 10*16 + 4*8
+ end_prolog
+ mov arg4, arg(4)
+ mov arg5, arg(5)
+%endmacro
+
+%macro FUNC_RESTORE 0
+ movdqa xmm6, [rsp+16*0]
+ movdqa xmm7, [rsp+16*1]
+ movdqa xmm8, [rsp+16*2]
+ movdqa xmm9, [rsp+16*3]
+ movdqa xmm10, [rsp+16*4]
+ movdqa xmm11, [rsp+16*5]
+ movdqa xmm12, [rsp+16*6]
+ movdqa xmm13, [rsp+16*7]
+ movdqa xmm14, [rsp+16*8]
+ movdqa xmm15, [rsp+16*9]
+ mov r12, [rsp + 10*16 + 0*8]
+ mov r13, [rsp + 10*16 + 1*8]
+ mov r14, [rsp + 10*16 + 2*8]
+ mov r15, [rsp + 10*16 + 3*8]
+ mov rdi, [rsp + 10*16 + 4*8]
+ add rsp, stack_size
+%endmacro
+
+%elifidn __OUTPUT_FORMAT__, elf64
+ %define arg0 rdi
+ %define arg0.w edi
+ %define arg1 rsi
+ %define arg2 rdx
+ %define arg3 rcx
+ %define arg4 r8
+ %define arg5 r9
+ %define tmp r11
+ %define tmp.w r11d
+ %define tmp2 r10
+ %define tmp3 r12
+ %define tmp4 r13
+ %define tmp5 r14
+ %define return rax
+ %define return.w eax
+
+ %define func(x) x: endbranch
+ %macro FUNC_SAVE 0
+ push r12
+ push r13
+ push r14
+ %endmacro
+ %macro FUNC_RESTORE 0
+ pop r14
+ pop r13
+ pop r12
+ %endmacro
+%endif
+
+;;; gf_6vect_mad_sse(len, vec, vec_i, mul_array, src, dest)
+%define len arg0
+%define len.w arg0.w
+%define vec arg1
+%define vec_i arg2
+%define mul_array arg3
+%define src arg4
+%define dest1 arg5
+%define pos return
+%define pos.w return.w
+
+%define dest2 mul_array
+%define dest3 tmp2
+%define dest4 tmp4
+%define dest5 tmp5
+%define dest6 vec_i
+
+%ifndef EC_ALIGNED_ADDR
+;;; Use Un-aligned load/store
+ %define XLDR movdqu
+ %define XSTR movdqu
+%else
+;;; Use Non-temporal load/stor
+ %ifdef NO_NT_LDST
+ %define XLDR movdqa
+ %define XSTR movdqa
+ %else
+ %define XLDR movntdqa
+ %define XSTR movntdq
+ %endif
+%endif
+
+default rel
+
+[bits 64]
+section .text
+
+%define xmask0f xmm15
+%define xgft4_lo xmm14
+%define xgft4_hi xmm13
+%define xgft5_lo xmm12
+%define xgft5_hi xmm11
+%define xgft6_lo xmm10
+%define xgft6_hi xmm9
+
+%define x0 xmm0
+%define xtmpa xmm1
+%define xtmph1 xmm2
+%define xtmpl1 xmm3
+%define xtmph2 xmm4
+%define xtmpl2 xmm5
+%define xtmph3 xmm6
+%define xtmpl3 xmm7
+%define xd1 xmm8
+%define xd2 xtmpl1
+%define xd3 xtmph1
+
+
+align 16
+mk_global gf_6vect_mad_sse, function
+func(gf_6vect_mad_sse)
+ FUNC_SAVE
+ sub len, 16
+ jl .return_fail
+
+ xor pos, pos
+ movdqa xmask0f, [mask0f] ;Load mask of lower nibble in each byte
+
+ mov tmp, vec
+ sal vec_i, 5 ;Multiply by 32
+ lea tmp3, [mul_array + vec_i]
+ sal tmp, 6 ;Multiply by 64
+
+ sal vec, 5 ;Multiply by 32
+ lea vec_i, [tmp + vec] ;vec_i = 96
+ lea mul_array, [tmp + vec_i] ;mul_array = 160
+
+ movdqu xgft5_lo, [tmp3+2*tmp] ;Load array Ex{00}, Ex{01}, ..., Ex{0f}
+ movdqu xgft5_hi, [tmp3+2*tmp+16] ; " Ex{00}, Ex{10}, ..., Ex{f0}
+ movdqu xgft4_lo, [tmp3+vec_i] ;Load array Dx{00}, Dx{01}, Dx{02}, ...
+ movdqu xgft4_hi, [tmp3+vec_i+16] ; " Dx{00}, Dx{10}, Dx{20}, ... , Dx{f0}
+ movdqu xgft6_lo, [tmp3+mul_array] ;Load array Fx{00}, Fx{01}, ..., Fx{0f}
+ movdqu xgft6_hi, [tmp3+mul_array+16] ; " Fx{00}, Fx{10}, ..., Fx{f0}
+
+ mov dest2, [dest1+PS]
+ mov dest3, [dest1+2*PS]
+ mov dest4, [dest1+3*PS] ; reuse mul_array
+ mov dest5, [dest1+4*PS]
+ mov dest6, [dest1+5*PS] ; reuse vec_i
+ mov dest1, [dest1]
+
+.loop16:
+ XLDR x0, [src+pos] ;Get next source vector
+
+ movdqu xtmpl1, [tmp3] ;Load array Ax{00}, Ax{01}, Ax{02}, ...
+ movdqu xtmph1, [tmp3+16] ; " Ax{00}, Ax{10}, Ax{20}, ... , Ax{f0}
+ movdqu xtmpl2, [tmp3+vec] ;Load array Bx{00}, Bx{01}, Bx{02}, ...
+ movdqu xtmph2, [tmp3+vec+16] ; " Bx{00}, Bx{10}, Bx{20}, ... , Bx{f0}
+ movdqu xtmpl3, [tmp3+2*vec] ;Load array Cx{00}, Cx{01}, Cx{02}, ...
+ movdqu xtmph3, [tmp3+2*vec+16] ; " Cx{00}, Cx{10}, Cx{20}, ... , Cx{f0}
+ XLDR xd1, [dest1+pos] ;Get next dest vector
+
+ movdqa xtmpa, x0 ;Keep unshifted copy of src
+ psraw x0, 4 ;Shift to put high nibble into bits 4-0
+ pand x0, xmask0f ;Mask high src nibble in bits 4-0
+ pand xtmpa, xmask0f ;Mask low src nibble in bits 4-0
+
+ ;dest1
+ pshufb xtmph1, x0 ;Lookup mul table of high nibble
+ pshufb xtmpl1, xtmpa ;Lookup mul table of low nibble
+ pxor xtmph1, xtmpl1 ;GF add high and low partials
+ pxor xd1, xtmph1
+
+ XLDR xd2, [dest2+pos] ;reuse xtmpl1. Get next dest vector
+ XLDR xd3, [dest3+pos] ;reuse xtmph1. Get next dest3 vector
+
+ ;dest2
+ pshufb xtmph2, x0 ;Lookup mul table of high nibble
+ pshufb xtmpl2, xtmpa ;Lookup mul table of low nibble
+ pxor xtmph2, xtmpl2 ;GF add high and low partials
+ pxor xd2, xtmph2
+
+ ;dest3
+ pshufb xtmph3, x0 ;Lookup mul table of high nibble
+ pshufb xtmpl3, xtmpa ;Lookup mul table of low nibble
+ pxor xtmph3, xtmpl3 ;GF add high and low partials
+ pxor xd3, xtmph3
+
+ XSTR [dest1+pos], xd1 ;Store result into dest1
+ XSTR [dest2+pos], xd2 ;Store result into dest2
+ XSTR [dest3+pos], xd3 ;Store result into dest3
+
+ movdqa xtmph1, xgft4_hi ;Reload const array registers
+ movdqa xtmpl1, xgft4_lo ;Reload const array registers
+ movdqa xtmph2, xgft5_hi ;Reload const array registers
+ movdqa xtmpl2, xgft5_lo ;Reload const array registers
+ movdqa xtmph3, xgft6_hi ;Reload const array registers
+ movdqa xtmpl3, xgft6_lo ;Reload const array registers
+
+ ;dest4
+ XLDR xd1, [dest4+pos] ;Get next dest vector
+ pshufb xtmph1, x0 ;Lookup mul table of high nibble
+ pshufb xtmpl1, xtmpa ;Lookup mul table of low nibble
+ pxor xtmph1, xtmpl1 ;GF add high and low partials
+ pxor xd1, xtmph1
+
+ XLDR xd2, [dest5+pos] ;reuse xtmpl1. Get next dest vector
+ XLDR xd3, [dest6+pos] ;reuse xtmph1. Get next dest vector
+
+ ;dest5
+ pshufb xtmph2, x0 ;Lookup mul table of high nibble
+ pshufb xtmpl2, xtmpa ;Lookup mul table of low nibble
+ pxor xtmph2, xtmpl2 ;GF add high and low partials
+ pxor xd2, xtmph2
+
+ ;dest6
+ pshufb xtmph3, x0 ;Lookup mul table of high nibble
+ pshufb xtmpl3, xtmpa ;Lookup mul table of low nibble
+ pxor xtmph3, xtmpl3 ;GF add high and low partials
+ pxor xd3, xtmph3
+
+ XSTR [dest4+pos], xd1 ;Store result into dest4
+ XSTR [dest5+pos], xd2 ;Store result into dest5
+ XSTR [dest6+pos], xd3 ;Store result into dest6
+
+ add pos, 16 ;Loop on 16 bytes at a time
+ cmp pos, len
+ jle .loop16
+
+ lea tmp, [len + 16]
+ cmp pos, tmp
+ je .return_pass
+
+.lessthan16:
+ ;; Tail len
+ ;; Do one more overlap pass
+ ;; Overlapped offset length-16
+ mov tmp, len ;Backup len as len=rdi
+
+ XLDR x0, [src+tmp] ;Get next source vector
+ XLDR xd1, [dest4+tmp] ;Get next dest vector
+ XLDR xd2, [dest5+tmp] ;reuse xtmpl1. Get next dest vector
+ XLDR xd3, [dest6+tmp] ;reuse xtmph1. Get next dest vector
+
+ sub len, pos
+
+ movdqa xtmph3, [constip16] ;Load const of i + 16
+ pinsrb xtmpl3, len.w, 15
+ pshufb xtmpl3, xmask0f ;Broadcast len to all bytes
+ pcmpgtb xtmpl3, xtmph3
+
+ movdqa xtmpa, x0 ;Keep unshifted copy of src
+ psraw x0, 4 ;Shift to put high nibble into bits 4-0
+ pand x0, xmask0f ;Mask high src nibble in bits 4-0
+ pand xtmpa, xmask0f ;Mask low src nibble in bits 4-0
+
+ ;dest4
+ pshufb xgft4_hi, x0 ;Lookup mul table of high nibble
+ pshufb xgft4_lo, xtmpa ;Lookup mul table of low nibble
+ pxor xgft4_hi, xgft4_lo ;GF add high and low partials
+ pand xgft4_hi, xtmpl3
+ pxor xd1, xgft4_hi
+
+ ;dest5
+ pshufb xgft5_hi, x0 ;Lookup mul table of high nibble
+ pshufb xgft5_lo, xtmpa ;Lookup mul table of low nibble
+ pxor xgft5_hi, xgft5_lo ;GF add high and low partials
+ pand xgft5_hi, xtmpl3
+ pxor xd2, xgft5_hi
+
+ ;dest6
+ pshufb xgft6_hi, x0 ;Lookup mul table of high nibble
+ pshufb xgft6_lo, xtmpa ;Lookup mul table of low nibble
+ pxor xgft6_hi, xgft6_lo ;GF add high and low partials
+ pand xgft6_hi, xtmpl3
+ pxor xd3, xgft6_hi
+
+ XSTR [dest4+tmp], xd1 ;Store result into dest4
+ XSTR [dest5+tmp], xd2 ;Store result into dest5
+ XSTR [dest6+tmp], xd3 ;Store result into dest6
+
+ movdqu xgft4_lo, [tmp3] ;Load array Ax{00}, Ax{01}, Ax{02}, ...
+ movdqu xgft4_hi, [tmp3+16] ; " Ax{00}, Ax{10}, Ax{20}, ... , Ax{f0}
+ movdqu xgft5_lo, [tmp3+vec] ;Load array Bx{00}, Bx{01}, Bx{02}, ...
+ movdqu xgft5_hi, [tmp3+vec+16] ; " Bx{00}, Bx{10}, Bx{20}, ... , Bx{f0}
+ movdqu xgft6_lo, [tmp3+2*vec] ;Load array Cx{00}, Cx{01}, Cx{02}, ...
+ movdqu xgft6_hi, [tmp3+2*vec+16] ; " Cx{00}, Cx{10}, Cx{20}, ... , Cx{f0}
+ XLDR xd1, [dest1+tmp] ;Get next dest vector
+ XLDR xd2, [dest2+tmp] ;reuse xtmpl1. Get next dest vector
+ XLDR xd3, [dest3+tmp] ;reuse xtmph1. Get next dest3 vector
+
+ ;dest1
+ pshufb xgft4_hi, x0 ;Lookup mul table of high nibble
+ pshufb xgft4_lo, xtmpa ;Lookup mul table of low nibble
+ pxor xgft4_hi, xgft4_lo ;GF add high and low partials
+ pand xgft4_hi, xtmpl3
+ pxor xd1, xgft4_hi
+
+ ;dest2
+ pshufb xgft5_hi, x0 ;Lookup mul table of high nibble
+ pshufb xgft5_lo, xtmpa ;Lookup mul table of low nibble
+ pxor xgft5_hi, xgft5_lo ;GF add high and low partials
+ pand xgft5_hi, xtmpl3
+ pxor xd2, xgft5_hi
+
+ ;dest3
+ pshufb xgft6_hi, x0 ;Lookup mul table of high nibble
+ pshufb xgft6_lo, xtmpa ;Lookup mul table of low nibble
+ pxor xgft6_hi, xgft6_lo ;GF add high and low partials
+ pand xgft6_hi, xtmpl3
+ pxor xd3, xgft6_hi
+
+ XSTR [dest1+tmp], xd1 ;Store result into dest1
+ XSTR [dest2+tmp], xd2 ;Store result into dest2
+ XSTR [dest3+tmp], xd3 ;Store result into dest3
+
+.return_pass:
+ FUNC_RESTORE
+ mov return, 0
+ ret
+
+.return_fail:
+ FUNC_RESTORE
+ mov return, 1
+ ret
+
+endproc_frame
+
+section .data
+
+align 16
+
+mask0f: dq 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f
+constip16:
+ dq 0xf8f9fafbfcfdfeff, 0xf0f1f2f3f4f5f6f7
+
+;;; func core, ver, snum
+slversion gf_6vect_mad_sse, 00, 01, 020f
diff --git a/src/isa-l/erasure_code/gf_inverse_test.c b/src/isa-l/erasure_code/gf_inverse_test.c
new file mode 100644
index 000000000..418d7c195
--- /dev/null
+++ b/src/isa-l/erasure_code/gf_inverse_test.c
@@ -0,0 +1,225 @@
+/**********************************************************************
+ Copyright(c) 2011-2015 Intel Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h> // for memset, memcmp
+#include <assert.h>
+
+#include "erasure_code.h"
+
+#define TEST_LEN 8192
+
+#ifndef TEST_SOURCES
+# define TEST_SOURCES 128
+#endif
+#ifndef RANDOMS
+# define RANDOMS 200
+#endif
+
+#define KMAX TEST_SOURCES
+
+typedef unsigned char u8;
+
+void matrix_mult(u8 * a, u8 * b, u8 * c, int n)
+{
+ int i, j, k;
+ u8 d;
+
+ for (i = 0; i < n; i++) {
+ for (j = 0; j < n; j++) {
+ d = 0;
+ for (k = 0; k < n; k++) {
+ d ^= gf_mul(a[n * i + k], b[n * k + j]);
+ }
+ c[i * n + j] = d;
+ }
+ }
+}
+
+void print_matrix(u8 * a, int n)
+{
+ int i, j;
+
+ for (i = 0; i < n; i++) {
+ for (j = 0; j < n; j++) {
+ printf(" %2x", a[i * n + j]);
+ }
+ printf("\n");
+ }
+ printf("\n");
+}
+
+int is_ident(u8 * a, const int n)
+{
+ int i, j;
+ u8 c;
+ for (i = 0; i < n; i++) {
+ for (j = 0; j < n; j++) {
+ c = *a++;
+ if (i == j)
+ c--;
+ if (c != 0)
+ return -1;
+ }
+ }
+ return 0;
+}
+
+int inv_test(u8 * in, u8 * inv, u8 * sav, int n)
+{
+ memcpy(sav, in, n * n);
+
+ if (gf_invert_matrix(in, inv, n)) {
+ printf("Given singular matrix\n");
+ print_matrix(sav, n);
+ return -1;
+ }
+
+ matrix_mult(inv, sav, in, n);
+
+ if (is_ident(in, n)) {
+ printf("fail\n");
+ print_matrix(sav, n);
+ print_matrix(inv, n);
+ print_matrix(in, n);
+ return -1;
+ }
+ putchar('.');
+
+ return 0;
+}
+
+int main(int argc, char *argv[])
+{
+ int i, k, t;
+ u8 *test_mat, *save_mat, *invr_mat;
+
+ u8 test1[] = { 1, 1, 6,
+ 1, 1, 1,
+ 7, 1, 9
+ };
+
+ u8 test2[] = { 0, 1, 6,
+ 1, 0, 1,
+ 0, 1, 9
+ };
+
+ u8 test3[] = { 0, 0, 1,
+ 1, 0, 0,
+ 0, 1, 1
+ };
+
+ u8 test4[] = { 0, 1, 6, 7,
+ 1, 1, 0, 0,
+ 0, 1, 2, 3,
+ 3, 2, 2, 3
+ }; // = row3+3*row2
+
+ printf("gf_inverse_test: max=%d ", KMAX);
+
+ test_mat = malloc(KMAX * KMAX);
+ save_mat = malloc(KMAX * KMAX);
+ invr_mat = malloc(KMAX * KMAX);
+
+ if (NULL == test_mat || NULL == save_mat || NULL == invr_mat)
+ return -1;
+
+ // Test with lots of leading 1's
+ k = 3;
+ memcpy(test_mat, test1, k * k);
+ if (inv_test(test_mat, invr_mat, save_mat, k))
+ return -1;
+
+ // Test with leading zeros
+ k = 3;
+ memcpy(test_mat, test2, k * k);
+ if (inv_test(test_mat, invr_mat, save_mat, k))
+ return -1;
+
+ // Test 3
+ k = 3;
+ memcpy(test_mat, test3, k * k);
+ if (inv_test(test_mat, invr_mat, save_mat, k))
+ return -1;
+
+ // Test 4 - try a singular matrix
+ k = 4;
+ memcpy(test_mat, test4, k * k);
+ if (!gf_invert_matrix(test_mat, invr_mat, k)) {
+ printf("Fail: didn't catch singular matrix\n");
+ print_matrix(test4, 4);
+ return -1;
+ }
+ // Do random test of size KMAX
+ k = KMAX;
+
+ for (i = 0; i < k * k; i++)
+ test_mat[i] = save_mat[i] = rand();
+
+ if (gf_invert_matrix(test_mat, invr_mat, k)) {
+ printf("rand picked a singular matrix, try again\n");
+ return -1;
+ }
+
+ matrix_mult(invr_mat, save_mat, test_mat, k);
+
+ if (is_ident(test_mat, k)) {
+ printf("fail\n");
+ print_matrix(save_mat, k);
+ print_matrix(invr_mat, k);
+ print_matrix(test_mat, k);
+ return -1;
+ }
+ // Do Randoms. Random size and coefficients
+ for (t = 0; t < RANDOMS; t++) {
+ k = rand() % KMAX;
+
+ for (i = 0; i < k * k; i++)
+ test_mat[i] = save_mat[i] = rand();
+
+ if (gf_invert_matrix(test_mat, invr_mat, k))
+ continue;
+
+ matrix_mult(invr_mat, save_mat, test_mat, k);
+
+ if (is_ident(test_mat, k)) {
+ printf("fail rand k=%d\n", k);
+ print_matrix(save_mat, k);
+ print_matrix(invr_mat, k);
+ print_matrix(test_mat, k);
+ return -1;
+ }
+ if (0 == (t % 8))
+ putchar('.');
+ }
+
+ printf(" Pass\n");
+ return 0;
+}
diff --git a/src/isa-l/erasure_code/gf_vect_dot_prod_1tbl.c b/src/isa-l/erasure_code/gf_vect_dot_prod_1tbl.c
new file mode 100644
index 000000000..1d23eb67f
--- /dev/null
+++ b/src/isa-l/erasure_code/gf_vect_dot_prod_1tbl.c
@@ -0,0 +1,152 @@
+/**********************************************************************
+ Copyright(c) 2011-2015 Intel Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h> // for memset, memcmp
+#include "test.h"
+#include "erasure_code.h"
+
+//#define CACHED_TEST
+#ifdef CACHED_TEST
+// Cached test, loop many times over small dataset
+# define TEST_SOURCES 10
+# define TEST_LEN 8*1024
+# define TEST_TYPE_STR "_warm"
+#else
+# ifndef TEST_CUSTOM
+// Uncached test. Pull from large mem base.
+# define TEST_SOURCES 10
+# define GT_L3_CACHE 32*1024*1024 /* some number > last level cache */
+# define TEST_LEN GT_L3_CACHE / TEST_SOURCES
+# define TEST_TYPE_STR "_cold"
+# else
+# define TEST_TYPE_STR "_cus"
+# endif
+#endif
+
+typedef unsigned char u8;
+
+// Global GF(256) tables
+u8 gff[256];
+u8 gflog[256];
+u8 gf_mul_table[256 * 256];
+
+void mk_gf_field(void)
+{
+ int i;
+ u8 s = 1;
+ gflog[0] = 0;
+
+ for (i = 0; i < 256; i++) {
+ gff[i] = s;
+ gflog[s] = i;
+ s = (s << 1) ^ ((s & 0x80) ? 0x1d : 0); // mult by GF{2}
+ }
+}
+
+void mk_gf_mul_table(u8 * table)
+{
+ // Populate a single table with all multiply combinations for a fast,
+ // single-table lookup of GF(2^8) multiply at the expense of memory.
+ int i, j;
+ for (i = 0; i < 256; i++)
+ for (j = 0; j < 256; j++)
+ table[i * 256 + j] = gf_mul(i, j);
+}
+
+void gf_vect_dot_prod_ref(int len, int vlen, u8 * v, u8 ** src, u8 * dest)
+{
+ int i, j;
+ u8 s;
+ for (i = 0; i < len; i++) {
+ s = 0;
+ for (j = 0; j < vlen; j++)
+ s ^= gf_mul(src[j][i], v[j]);
+
+ dest[i] = s;
+ }
+}
+
+void gf_vect_dot_prod_mult(int len, int vlen, u8 * v, u8 ** src, u8 * dest)
+{
+ int i, j;
+ u8 s;
+ for (i = 0; i < len; i++) {
+ s = 0;
+ for (j = 0; j < vlen; j++) {
+ s ^= gf_mul_table[v[j] * 256 + src[j][i]];
+ }
+ dest[i] = s;
+ }
+
+}
+
+int main(void)
+{
+ int i, j;
+ u8 vec[TEST_SOURCES], dest1[TEST_LEN], dest2[TEST_LEN];
+ u8 *matrix[TEST_SOURCES];
+ struct perf start;
+
+ mk_gf_field();
+ mk_gf_mul_table(gf_mul_table);
+
+ //generate random vector and matrix/data
+ for (i = 0; i < TEST_SOURCES; i++) {
+ vec[i] = rand();
+
+ if (!(matrix[i] = malloc(TEST_LEN))) {
+ fprintf(stderr, "Error failure\n\n");
+ return -1;
+ }
+ for (j = 0; j < TEST_LEN; j++)
+ matrix[i][j] = rand();
+
+ }
+
+ BENCHMARK(&start, BENCHMARK_TIME,
+ gf_vect_dot_prod_ref(TEST_LEN, TEST_SOURCES, vec, matrix, dest1));
+ printf("gf_vect_dot_prod_2tbl" TEST_TYPE_STR ": ");
+ perf_print(start, (long long)TEST_LEN * (TEST_SOURCES + 1));
+
+ BENCHMARK(&start, BENCHMARK_TIME,
+ gf_vect_dot_prod_mult(TEST_LEN, TEST_SOURCES, vec, matrix, dest2));
+ printf("gf_vect_dot_prod_1tbl" TEST_TYPE_STR ": ");
+ perf_print(start, (long long)TEST_LEN * (TEST_SOURCES + 1));
+
+ // Compare with reference function
+ if (0 != memcmp(dest1, dest2, TEST_LEN)) {
+ printf("Error, different results!\n\n");
+ return -1;
+ }
+
+ printf("Pass functional test\n");
+ return 0;
+}
diff --git a/src/isa-l/erasure_code/gf_vect_dot_prod_avx.asm b/src/isa-l/erasure_code/gf_vect_dot_prod_avx.asm
new file mode 100644
index 000000000..7bd8700ab
--- /dev/null
+++ b/src/isa-l/erasure_code/gf_vect_dot_prod_avx.asm
@@ -0,0 +1,271 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright(c) 2011-2015 Intel Corporation All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in
+; the documentation and/or other materials provided with the
+; distribution.
+; * Neither the name of Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived
+; from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;;;
+;;; gf_vect_dot_prod_avx(len, vec, *g_tbls, **buffs, *dest);
+;;;
+
+%include "reg_sizes.asm"
+
+%ifidn __OUTPUT_FORMAT__, elf64
+ %define arg0 rdi
+ %define arg1 rsi
+ %define arg2 rdx
+ %define arg3 rcx
+ %define arg4 r8
+
+ %define tmp r11
+ %define tmp2 r10
+ %define tmp3 r9
+ %define return rax
+ %macro SLDR 2
+ %endmacro
+ %define SSTR SLDR
+ %define PS 8
+ %define func(x) x: endbranch
+ %define FUNC_SAVE
+ %define FUNC_RESTORE
+%endif
+
+%ifidn __OUTPUT_FORMAT__, win64
+ %define arg0 rcx
+ %define arg1 rdx
+ %define arg2 r8
+ %define arg3 r9
+
+ %define arg4 r12 ; must be saved and loaded
+ %define tmp r11
+ %define tmp2 r10
+ %define tmp3 rdi ; must be saved and loaded
+ %define return rax
+ %macro SLDR 2
+ %endmacro
+ %define SSTR SLDR
+ %define PS 8
+ %define frame_size 2*8
+ %define arg(x) [rsp + frame_size + PS + PS*x]
+
+ %define func(x) proc_frame x
+ %macro FUNC_SAVE 0
+ rex_push_reg r12
+ push_reg rdi
+ end_prolog
+ mov arg4, arg(4)
+ %endmacro
+
+ %macro FUNC_RESTORE 0
+ pop rdi
+ pop r12
+ %endmacro
+%endif
+
+%ifidn __OUTPUT_FORMAT__, elf32
+
+;;;================== High Address;
+;;; arg4
+;;; arg3
+;;; arg2
+;;; arg1
+;;; arg0
+;;; return
+;;;<================= esp of caller
+;;; ebp
+;;;<================= ebp = esp
+;;; esi
+;;; edi
+;;; ebx
+;;;<================= esp of callee
+;;;
+;;;================== Low Address;
+
+ %define PS 4
+ %define LOG_PS 2
+ %define func(x) x: endbranch
+ %define arg(x) [ebp + PS*2 + PS*x]
+
+ %define trans ecx ;trans is for the variables in stack
+ %define arg0 trans
+ %define arg0_m arg(0)
+ %define arg1 trans
+ %define arg1_m arg(1)
+ %define arg2 arg2_m
+ %define arg2_m arg(2)
+ %define arg3 ebx
+ %define arg4 trans
+ %define arg4_m arg(4)
+ %define tmp edx
+ %define tmp2 edi
+ %define tmp3 esi
+ %define return eax
+ %macro SLDR 2 ;; stack load/restore
+ mov %1, %2
+ %endmacro
+ %define SSTR SLDR
+
+ %macro FUNC_SAVE 0
+ push ebp
+ mov ebp, esp
+ push esi
+ push edi
+ push ebx
+ mov arg3, arg(3)
+ %endmacro
+
+ %macro FUNC_RESTORE 0
+ pop ebx
+ pop edi
+ pop esi
+ mov esp, ebp
+ pop ebp
+ %endmacro
+
+%endif ; output formats
+
+%define len arg0
+%define vec arg1
+%define mul_array arg2
+%define src arg3
+%define dest arg4
+
+%define vec_i tmp2
+%define ptr tmp3
+%define pos return
+
+ %ifidn PS,4 ;32-bit code
+ %define vec_m arg1_m
+ %define len_m arg0_m
+ %define dest_m arg4_m
+ %endif
+
+%ifndef EC_ALIGNED_ADDR
+;;; Use Un-aligned load/store
+ %define XLDR vmovdqu
+ %define XSTR vmovdqu
+%else
+;;; Use Non-temporal load/stor
+ %ifdef NO_NT_LDST
+ %define XLDR vmovdqa
+ %define XSTR vmovdqa
+ %else
+ %define XLDR vmovntdqa
+ %define XSTR vmovntdq
+ %endif
+%endif
+
+%ifidn PS,8 ; 64-bit code
+ default rel
+ [bits 64]
+%endif
+
+section .text
+
+%define xmask0f xmm5
+%define xgft_lo xmm4
+%define xgft_hi xmm3
+
+%define x0 xmm0
+%define xtmpa xmm1
+%define xp xmm2
+
+align 16
+mk_global gf_vect_dot_prod_avx, function
+func(gf_vect_dot_prod_avx)
+ FUNC_SAVE
+ SLDR len, len_m
+ sub len, 16
+ SSTR len_m, len
+ jl .return_fail
+ xor pos, pos
+ vmovdqa xmask0f, [mask0f] ;Load mask of lower nibble in each byte
+
+.loop16:
+ vpxor xp, xp
+ mov tmp, mul_array
+ xor vec_i, vec_i
+
+.next_vect:
+
+ mov ptr, [src+vec_i*PS]
+ vmovdqu xgft_lo, [tmp] ;Load array Cx{00}, Cx{01}, ..., Cx{0f}
+ vmovdqu xgft_hi, [tmp+16] ; " Cx{00}, Cx{10}, ..., Cx{f0}
+ XLDR x0, [ptr+pos] ;Get next source vector
+
+ add tmp, 32
+ add vec_i, 1
+
+ vpand xtmpa, x0, xmask0f ;Mask low src nibble in bits 4-0
+ vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0
+ vpand x0, x0, xmask0f ;Mask high src nibble in bits 4-0
+
+ vpshufb xgft_hi, xgft_hi, x0 ;Lookup mul table of high nibble
+ vpshufb xgft_lo, xgft_lo, xtmpa ;Lookup mul table of low nibble
+ vpxor xgft_hi, xgft_hi, xgft_lo ;GF add high and low partials
+ vpxor xp, xp, xgft_hi ;xp += partial
+
+ SLDR vec, vec_m
+ cmp vec_i, vec
+ jl .next_vect
+
+ SLDR dest, dest_m
+ XSTR [dest+pos], xp
+
+ add pos, 16 ;Loop on 16 bytes at a time
+ SLDR len, len_m
+ cmp pos, len
+ jle .loop16
+
+ lea tmp, [len + 16]
+ cmp pos, tmp
+ je .return_pass
+
+ ;; Tail len
+ mov pos, len ;Overlapped offset length-16
+ jmp .loop16 ;Do one more overlap pass
+
+.return_pass:
+ mov return, 0
+ FUNC_RESTORE
+ ret
+
+.return_fail:
+ mov return, 1
+ FUNC_RESTORE
+ ret
+
+endproc_frame
+
+section .data
+
+align 16
+
+mask0f:
+dq 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f
+
+;;; func core, ver, snum
+slversion gf_vect_dot_prod_avx, 02, 05, 0061
diff --git a/src/isa-l/erasure_code/gf_vect_dot_prod_avx2.asm b/src/isa-l/erasure_code/gf_vect_dot_prod_avx2.asm
new file mode 100644
index 000000000..c385e3b98
--- /dev/null
+++ b/src/isa-l/erasure_code/gf_vect_dot_prod_avx2.asm
@@ -0,0 +1,280 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright(c) 2011-2015 Intel Corporation All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in
+; the documentation and/or other materials provided with the
+; distribution.
+; * Neither the name of Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived
+; from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;;;
+;;; gf_vect_dot_prod_avx2(len, vec, *g_tbls, **buffs, *dest);
+;;;
+
+%include "reg_sizes.asm"
+
+%ifidn __OUTPUT_FORMAT__, elf64
+ %define arg0 rdi
+ %define arg1 rsi
+ %define arg2 rdx
+ %define arg3 rcx
+ %define arg4 r8
+ %define arg5 r9
+
+ %define tmp r11
+ %define tmp.w r11d
+ %define tmp.b r11b
+ %define tmp2 r10
+ %define tmp3 r9
+ %define return rax
+ %macro SLDR 2
+ %endmacro
+ %define SSTR SLDR
+ %define PS 8
+ %define func(x) x: endbranch
+ %define FUNC_SAVE
+ %define FUNC_RESTORE
+%endif
+
+%ifidn __OUTPUT_FORMAT__, win64
+ %define arg0 rcx
+ %define arg1 rdx
+ %define arg2 r8
+ %define arg3 r9
+
+ %define arg4 r12 ; must be saved and loaded
+ %define tmp r11
+ %define tmp.w r11d
+ %define tmp.b r11b
+ %define tmp2 r10
+ %define tmp3 rdi ; must be saved and loaded
+ %define return rax
+ %macro SLDR 2
+ %endmacro
+ %define SSTR SLDR
+ %define PS 8
+ %define frame_size 2*8
+ %define arg(x) [rsp + frame_size + PS + PS*x]
+
+ %define func(x) proc_frame x
+ %macro FUNC_SAVE 0
+ rex_push_reg r12
+ push_reg rdi
+ end_prolog
+ mov arg4, arg(4)
+ %endmacro
+
+ %macro FUNC_RESTORE 0
+ pop rdi
+ pop r12
+ %endmacro
+%endif
+
+%ifidn __OUTPUT_FORMAT__, elf32
+
+;;;================== High Address;
+;;; arg4
+;;; arg3
+;;; arg2
+;;; arg1
+;;; arg0
+;;; return
+;;;<================= esp of caller
+;;; ebp
+;;;<================= ebp = esp
+;;; esi
+;;; edi
+;;; ebx
+;;;<================= esp of callee
+;;;
+;;;================== Low Address;
+
+ %define PS 4
+ %define LOG_PS 2
+ %define func(x) x: endbranch
+ %define arg(x) [ebp + PS*2 + PS*x]
+
+ %define trans ecx ;trans is for the variables in stack
+ %define arg0 trans
+ %define arg0_m arg(0)
+ %define arg1 trans
+ %define arg1_m arg(1)
+ %define arg2 arg2_m
+ %define arg2_m arg(2)
+ %define arg3 ebx
+ %define arg4 trans
+ %define arg4_m arg(4)
+ %define tmp edx
+ %define tmp.w edx
+ %define tmp.b dl
+ %define tmp2 edi
+ %define tmp3 esi
+ %define return eax
+ %macro SLDR 2 ;stack load/restore
+ mov %1, %2
+ %endmacro
+ %define SSTR SLDR
+
+ %macro FUNC_SAVE 0
+ push ebp
+ mov ebp, esp
+ push esi
+ push edi
+ push ebx
+ mov arg3, arg(3)
+ %endmacro
+
+ %macro FUNC_RESTORE 0
+ pop ebx
+ pop edi
+ pop esi
+ mov esp, ebp
+ pop ebp
+ %endmacro
+
+%endif ; output formats
+
+%define len arg0
+%define vec arg1
+%define mul_array arg2
+%define src arg3
+%define dest arg4
+
+%define vec_i tmp2
+%define ptr tmp3
+%define pos return
+
+%ifidn PS,4 ;32-bit code
+ %define vec_m arg1_m
+ %define len_m arg0_m
+ %define dest_m arg4_m
+%endif
+
+%ifndef EC_ALIGNED_ADDR
+;;; Use Un-aligned load/store
+ %define XLDR vmovdqu
+ %define XSTR vmovdqu
+%else
+;;; Use Non-temporal load/stor
+ %ifdef NO_NT_LDST
+ %define XLDR vmovdqa
+ %define XSTR vmovdqa
+ %else
+ %define XLDR vmovntdqa
+ %define XSTR vmovntdq
+ %endif
+%endif
+
+%ifidn PS,8 ;64-bit code
+ default rel
+ [bits 64]
+%endif
+
+section .text
+
+%define xmask0f ymm3
+%define xmask0fx xmm3
+%define xgft_lo ymm4
+%define xgft_hi ymm5
+
+%define x0 ymm0
+%define xtmpa ymm1
+%define xp ymm2
+
+align 16
+mk_global gf_vect_dot_prod_avx2, function
+func(gf_vect_dot_prod_avx2)
+ FUNC_SAVE
+ SLDR len, len_m
+ sub len, 32
+ SSTR len_m, len
+ jl .return_fail
+ xor pos, pos
+ mov tmp.b, 0x0f
+ vpinsrb xmask0fx, xmask0fx, tmp.w, 0
+ vpbroadcastb xmask0f, xmask0fx ;Construct mask 0x0f0f0f...
+
+.loop32:
+ vpxor xp, xp
+ mov tmp, mul_array
+ xor vec_i, vec_i
+
+.next_vect:
+
+ mov ptr, [src+vec_i*PS]
+
+ vmovdqu xgft_lo, [tmp] ;Load array Cx{00}, Cx{01}, Cx{02}, ...
+ ; " Cx{00}, Cx{10}, Cx{20}, ... , Cx{f0}
+ vperm2i128 xgft_hi, xgft_lo, xgft_lo, 0x11 ; swapped to hi | hi
+ vperm2i128 xgft_lo, xgft_lo, xgft_lo, 0x00 ; swapped to lo | lo
+
+ XLDR x0, [ptr+pos] ;Get next source vector
+
+ add tmp, 32
+ add vec_i, 1
+
+ vpand xtmpa, x0, xmask0f ;Mask low src nibble in bits 4-0
+ vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0
+ vpand x0, x0, xmask0f ;Mask high src nibble in bits 4-0
+
+ vpshufb xgft_hi, xgft_hi, x0 ;Lookup mul table of high nibble
+ vpshufb xgft_lo, xgft_lo, xtmpa ;Lookup mul table of low nibble
+ vpxor xgft_hi, xgft_hi, xgft_lo ;GF add high and low partials
+ vpxor xp, xp, xgft_hi ;xp += partial
+
+ SLDR vec, vec_m
+ cmp vec_i, vec
+ jl .next_vect
+
+ SLDR dest, dest_m
+ XSTR [dest+pos], xp
+
+ add pos, 32 ;Loop on 32 bytes at a time
+ SLDR len, len_m
+ cmp pos, len
+ jle .loop32
+
+ lea tmp, [len + 32]
+ cmp pos, tmp
+ je .return_pass
+
+ ;; Tail len
+ mov pos, len ;Overlapped offset length-32
+ jmp .loop32 ;Do one more overlap pass
+
+.return_pass:
+ mov return, 0
+ FUNC_RESTORE
+ ret
+
+.return_fail:
+ mov return, 1
+ FUNC_RESTORE
+ ret
+
+endproc_frame
+
+section .data
+
+;;; func core, ver, snum
+slversion gf_vect_dot_prod_avx2, 04, 05, 0190
diff --git a/src/isa-l/erasure_code/gf_vect_dot_prod_avx512.asm b/src/isa-l/erasure_code/gf_vect_dot_prod_avx512.asm
new file mode 100644
index 000000000..37fe082fc
--- /dev/null
+++ b/src/isa-l/erasure_code/gf_vect_dot_prod_avx512.asm
@@ -0,0 +1,240 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright(c) 2011-2015 Intel Corporation All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in
+; the documentation and/or other materials provided with the
+; distribution.
+; * Neither the name of Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived
+; from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;;;
+;;; gf_vect_dot_prod_avx512(len, vec, *g_tbls, **buffs, *dest);
+;;;
+
+%include "reg_sizes.asm"
+
+%ifdef HAVE_AS_KNOWS_AVX512
+
+%ifidn __OUTPUT_FORMAT__, elf64
+ %define arg0 rdi
+ %define arg1 rsi
+ %define arg2 rdx
+ %define arg3 rcx
+ %define arg4 r8
+ %define arg5 r9
+
+ %define tmp r11
+ %define tmp2 r10
+ %define return rax
+ %define PS 8
+ %define LOG_PS 3
+
+ %define func(x) x: endbranch
+ %define FUNC_SAVE
+ %define FUNC_RESTORE
+%endif
+
+%ifidn __OUTPUT_FORMAT__, win64
+ %define arg0 rcx
+ %define arg1 rdx
+ %define arg2 r8
+ %define arg3 r9
+
+ %define arg4 r12 ; must be saved, loaded and restored
+ %define arg5 r15 ; must be saved and restored
+ %define tmp r11
+ %define tmp2 r10
+ %define return rax
+ %define PS 8
+ %define LOG_PS 3
+ %define stack_size 0*16 + 3*8 ; must be an odd multiple of 8
+ %define arg(x) [rsp + stack_size + PS + PS*x]
+
+ %define func(x) proc_frame x
+ %macro FUNC_SAVE 0
+ alloc_stack stack_size
+ save_reg r12, 0*8
+ save_reg r15, 1*8
+ end_prolog
+ mov arg4, arg(4)
+ %endmacro
+
+ %macro FUNC_RESTORE 0
+ mov r12, [rsp + 0*8]
+ mov r15, [rsp + 1*8]
+ add rsp, stack_size
+ %endmacro
+%endif
+
+
+%define len arg0
+%define vec arg1
+%define mul_array arg2
+%define src arg3
+%define dest1 arg4
+%define ptr arg5
+%define vec_i tmp2
+%define pos return
+
+
+%ifndef EC_ALIGNED_ADDR
+;;; Use Un-aligned load/store
+ %define XLDR vmovdqu8
+ %define XSTR vmovdqu8
+%else
+;;; Use Non-temporal load/stor
+ %ifdef NO_NT_LDST
+ %define XLDR vmovdqa
+ %define XSTR vmovdqa
+ %else
+ %define XLDR vmovntdqa
+ %define XSTR vmovntdq
+ %endif
+%endif
+
+%define xmask0f zmm5
+%define xgft1_lo zmm4
+%define xgft1_loy ymm4
+%define xgft1_hi zmm3
+%define x0 zmm0
+%define xgft1_loy ymm4
+%define x0y ymm0
+%define xtmpa zmm1
+%define xp1 zmm2
+%define xp1y ymm2
+
+default rel
+[bits 64]
+section .text
+
+align 16
+mk_global gf_vect_dot_prod_avx512, function
+func(gf_vect_dot_prod_avx512)
+ FUNC_SAVE
+ xor pos, pos
+ mov tmp, 0x0f
+ vpbroadcastb xmask0f, tmp ;Construct mask 0x0f0f0f...
+ sal vec, LOG_PS ;vec *= PS. Make vec_i count by PS
+ sub len, 64
+ jl .len_lt_64
+
+.loop64:
+ vpxorq xp1, xp1, xp1
+ mov tmp, mul_array
+ xor vec_i, vec_i
+
+.next_vect:
+ mov ptr, [src+vec_i]
+ XLDR x0, [ptr+pos] ;Get next source vector
+ add vec_i, PS
+
+ vpandq xtmpa, x0, xmask0f ;Mask low src nibble in bits 4-0
+ vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0
+ vpandq x0, x0, xmask0f ;Mask high src nibble in bits 4-0
+
+ vmovdqu8 xgft1_loy, [tmp] ;Load array Ax{00}..{0f}, Ax{00}..{f0}
+ add tmp, 32
+
+ vshufi64x2 xgft1_hi, xgft1_lo, xgft1_lo, 0x55
+ vshufi64x2 xgft1_lo, xgft1_lo, xgft1_lo, 0x00
+
+ vpshufb xgft1_hi, xgft1_hi, x0 ;Lookup mul table of high nibble
+ vpshufb xgft1_lo, xgft1_lo, xtmpa ;Lookup mul table of low nibble
+ vpxorq xgft1_hi, xgft1_hi, xgft1_lo ;GF add high and low partials
+ vpxorq xp1, xp1, xgft1_hi ;xp1 += partial
+
+ cmp vec_i, vec
+ jl .next_vect
+
+ XSTR [dest1+pos], xp1
+
+ add pos, 64 ;Loop on 64 bytes at a time
+ cmp pos, len
+ jle .loop64
+
+ lea tmp, [len + 64]
+ cmp pos, tmp
+ je .return_pass
+
+ ;; Tail len
+ mov pos, len ;Overlapped offset length-64
+ jmp .loop64 ;Do one more overlap pass
+
+
+.len_lt_64: ; 32-byte version
+ add len, 32
+ jl .return_fail
+
+.loop32:
+ vpxorq xp1, xp1, xp1
+ mov tmp, mul_array
+ xor vec_i, vec_i
+
+.next_vect2:
+ mov ptr, [src+vec_i]
+ XLDR x0y, [ptr+pos] ;Get next source vector 32B
+ add vec_i, PS
+ vpsraw xtmpa, x0, 4 ;Shift to put high nibble into bits 4-0
+ vshufi64x2 x0, x0, xtmpa, 0x44 ;put x0 = xl:xh
+ vpandq x0, x0, xmask0f ;Mask bits 4-0
+ vmovdqu8 xgft1_loy, [tmp] ;Load array Ax{00}..{0f}, Ax{00}..{f0}
+ add tmp, 32
+ vshufi64x2 xgft1_lo, xgft1_lo, xgft1_lo, 0x50 ;=AlAh:AlAh
+ vpshufb xgft1_lo, xgft1_lo, x0 ;Lookup mul table
+ vshufi64x2 xgft1_hi, xgft1_lo, xgft1_lo, 0x0e ;=xh:
+ vpxorq xgft1_hi, xgft1_hi, xgft1_lo ;GF add high and low partials
+ vpxorq xp1, xp1, xgft1_hi ;xp1 += partial
+ cmp vec_i, vec
+ jl .next_vect2
+
+ XSTR [dest1+pos], xp1y
+ add pos, 32 ;Loop on 32 bytes at a time
+ cmp pos, len
+ jle .loop32
+
+ lea tmp, [len + 32]
+ cmp pos, tmp
+ je .return_pass
+
+ ;; Tail len
+ mov pos, len ;Overlapped offset length-32
+ jmp .loop32 ;Do one more overlap pass
+
+.return_pass:
+ mov return, 0
+ FUNC_RESTORE
+ ret
+
+.return_fail:
+ mov return, 1
+ FUNC_RESTORE
+ ret
+
+endproc_frame
+
+%else
+%ifidn __OUTPUT_FORMAT__, win64
+global no_gf_vect_dot_prod_avx512
+no_gf_vect_dot_prod_avx512:
+%endif
+%endif ; ifdef HAVE_AS_KNOWS_AVX512
diff --git a/src/isa-l/erasure_code/gf_vect_dot_prod_base_test.c b/src/isa-l/erasure_code/gf_vect_dot_prod_base_test.c
new file mode 100644
index 000000000..2b4dfbbe0
--- /dev/null
+++ b/src/isa-l/erasure_code/gf_vect_dot_prod_base_test.c
@@ -0,0 +1,290 @@
+/**********************************************************************
+ Copyright(c) 2011-2015 Intel Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h> // for memset, memcmp
+#include "erasure_code.h"
+#include "types.h"
+
+#define TEST_LEN 8192
+#define TEST_SIZE (TEST_LEN/2)
+
+#ifndef TEST_SOURCES
+# define TEST_SOURCES 250
+#endif
+#ifndef RANDOMS
+# define RANDOMS 20
+#endif
+
+#define MMAX TEST_SOURCES
+#define KMAX TEST_SOURCES
+
+typedef unsigned char u8;
+
+void dump(unsigned char *buf, int len)
+{
+ int i;
+ for (i = 0; i < len;) {
+ printf(" %2x", 0xff & buf[i++]);
+ if (i % 32 == 0)
+ printf("\n");
+ }
+ printf("\n");
+}
+
+void dump_matrix(unsigned char **s, int k, int m)
+{
+ int i, j;
+ for (i = 0; i < k; i++) {
+ for (j = 0; j < m; j++) {
+ printf(" %2x", s[i][j]);
+ }
+ printf("\n");
+ }
+ printf("\n");
+}
+
+void dump_u8xu8(unsigned char *s, int k, int m)
+{
+ int i, j;
+ for (i = 0; i < k; i++) {
+ for (j = 0; j < m; j++) {
+ printf(" %2x", 0xff & s[j + (i * m)]);
+ }
+ printf("\n");
+ }
+ printf("\n");
+}
+
+int main(int argc, char *argv[])
+{
+ int i, j, rtest, m, k, nerrs, r, err;
+ void *buf;
+ u8 g[TEST_SOURCES], g_tbls[TEST_SOURCES * 32], src_in_err[TEST_SOURCES];
+ u8 *dest, *dest_ref, *temp_buff, *buffs[TEST_SOURCES];
+ u8 a[MMAX * KMAX], b[MMAX * KMAX], d[MMAX * KMAX];
+ u8 src_err_list[TEST_SOURCES], *recov[TEST_SOURCES];
+
+ printf("gf_vect_dot_prod_base: %dx%d ", TEST_SOURCES, TEST_LEN);
+
+ // Allocate the arrays
+ for (i = 0; i < TEST_SOURCES; i++) {
+ if (posix_memalign(&buf, 64, TEST_LEN)) {
+ printf("alloc error: Fail");
+ return -1;
+ }
+ buffs[i] = buf;
+ }
+
+ if (posix_memalign(&buf, 64, TEST_LEN)) {
+ printf("alloc error: Fail");
+ return -1;
+ }
+ dest = buf;
+
+ if (posix_memalign(&buf, 64, TEST_LEN)) {
+ printf("alloc error: Fail");
+ return -1;
+ }
+ dest_ref = buf;
+
+ if (posix_memalign(&buf, 64, TEST_LEN)) {
+ printf("alloc error: Fail");
+ return -1;
+ }
+ temp_buff = buf;
+
+ // Init
+ for (i = 0; i < TEST_SOURCES; i++)
+ memset(buffs[i], 0, TEST_LEN);
+
+ memset(dest, 0, TEST_LEN);
+ memset(temp_buff, 0, TEST_LEN);
+ memset(dest_ref, 0, TEST_LEN);
+ memset(g, 0, TEST_SOURCES);
+
+ // Test erasure code using gf_vect_dot_prod
+ // Pick a first test
+ m = 9;
+ k = 5;
+ if (m > MMAX || k > KMAX)
+ return -1;
+
+ gf_gen_cauchy1_matrix(a, m, k);
+
+ // Make random data
+ for (i = 0; i < k; i++)
+ for (j = 0; j < TEST_LEN; j++)
+ buffs[i][j] = rand();
+
+ // Make parity vects
+ for (i = k; i < m; i++) {
+ for (j = 0; j < k; j++)
+ gf_vect_mul_init(a[k * i + j], &g_tbls[j * 32]);
+
+ gf_vect_dot_prod_base(TEST_LEN, k, g_tbls, buffs, buffs[i]);
+ }
+
+ // Random buffers in erasure
+ memset(src_in_err, 0, TEST_SOURCES);
+ for (i = 0, nerrs = 0; i < k && nerrs < m - k; i++) {
+ err = 1 & rand();
+ src_in_err[i] = err;
+ if (err)
+ src_err_list[nerrs++] = i;
+ }
+
+ // construct b by removing error rows
+ for (i = 0, r = 0; i < k; i++, r++) {
+ while (src_in_err[r]) {
+ r++;
+ continue;
+ }
+ for (j = 0; j < k; j++)
+ b[k * i + j] = a[k * r + j];
+ }
+
+ if (gf_invert_matrix((u8 *) b, (u8 *) d, k) < 0)
+ printf("BAD MATRIX\n");
+
+ for (i = 0, r = 0; i < k; i++, r++) {
+ while (src_in_err[r]) {
+ r++;
+ continue;
+ }
+ recov[i] = buffs[r];
+ }
+
+ // Recover data
+ for (i = 0; i < nerrs; i++) {
+ for (j = 0; j < k; j++)
+ gf_vect_mul_init(d[k * src_err_list[i] + j], &g_tbls[j * 32]);
+
+ gf_vect_dot_prod_base(TEST_LEN, k, g_tbls, recov, temp_buff);
+
+ if (0 != memcmp(temp_buff, buffs[src_err_list[i]], TEST_LEN)) {
+ printf("Fail error recovery (%d, %d, %d)\n", m, k, nerrs);
+ printf("recov %d:", src_err_list[i]);
+ dump(temp_buff, 25);
+ printf("orig :");
+ dump(buffs[src_err_list[i]], 25);
+ return -1;
+ }
+ }
+
+ // Do more random tests
+
+ for (rtest = 0; rtest < RANDOMS; rtest++) {
+ while ((m = (rand() % MMAX)) < 2) ;
+ while ((k = (rand() % KMAX)) >= m || k < 1) ;
+
+ if (m > MMAX || k > KMAX)
+ continue;
+
+ gf_gen_cauchy1_matrix(a, m, k);
+
+ // Make random data
+ for (i = 0; i < k; i++)
+ for (j = 0; j < TEST_LEN; j++)
+ buffs[i][j] = rand();
+
+ // Make parity vects
+ for (i = k; i < m; i++) {
+ for (j = 0; j < k; j++)
+ gf_vect_mul_init(a[k * i + j], &g_tbls[j * 32]);
+
+ gf_vect_dot_prod_base(TEST_LEN, k, g_tbls, buffs, buffs[i]);
+ }
+
+ // Random errors
+ memset(src_in_err, 0, TEST_SOURCES);
+ for (i = 0, nerrs = 0; i < k && nerrs < m - k; i++) {
+ err = 1 & rand();
+ src_in_err[i] = err;
+ if (err)
+ src_err_list[nerrs++] = i;
+ }
+ if (nerrs == 0) { // should have at least one error
+ while ((err = (rand() % KMAX)) >= k) ;
+ src_err_list[nerrs++] = err;
+ src_in_err[err] = 1;
+ }
+ // construct b by removing error rows
+ for (i = 0, r = 0; i < k; i++, r++) {
+ while (src_in_err[r]) {
+ r++;
+ continue;
+ }
+ for (j = 0; j < k; j++)
+ b[k * i + j] = a[k * r + j];
+ }
+
+ if (gf_invert_matrix((u8 *) b, (u8 *) d, k) < 0)
+ printf("BAD MATRIX\n");
+
+ for (i = 0, r = 0; i < k; i++, r++) {
+ while (src_in_err[r]) {
+ r++;
+ continue;
+ }
+ recov[i] = buffs[r];
+ }
+
+ // Recover data
+ for (i = 0; i < nerrs; i++) {
+ for (j = 0; j < k; j++)
+ gf_vect_mul_init(d[k * src_err_list[i] + j], &g_tbls[j * 32]);
+
+ gf_vect_dot_prod_base(TEST_LEN, k, g_tbls, recov, temp_buff);
+
+ if (0 != memcmp(temp_buff, buffs[src_err_list[i]], TEST_LEN)) {
+ printf("Fail error recovery (%d, %d, %d) - ", m, k, nerrs);
+ printf(" - erase list = ");
+ for (i = 0; i < nerrs; i++)
+ printf(" %d", src_err_list[i]);
+ printf("\na:\n");
+ dump_u8xu8((u8 *) a, m, k);
+ printf("inv b:\n");
+ dump_u8xu8((u8 *) d, k, k);
+ printf("orig data:\n");
+ dump_matrix(buffs, m, 25);
+ printf("orig :");
+ dump(buffs[src_err_list[i]], 25);
+ printf("recov %d:", src_err_list[i]);
+ dump(temp_buff, 25);
+ return -1;
+ }
+ }
+ putchar('.');
+ }
+
+ printf("done all: Pass\n");
+ return 0;
+}
diff --git a/src/isa-l/erasure_code/gf_vect_dot_prod_perf.c b/src/isa-l/erasure_code/gf_vect_dot_prod_perf.c
new file mode 100644
index 000000000..bd2b555b0
--- /dev/null
+++ b/src/isa-l/erasure_code/gf_vect_dot_prod_perf.c
@@ -0,0 +1,174 @@
+/**********************************************************************
+ Copyright(c) 2011-2015 Intel Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h> // for memset, memcmp
+#include "erasure_code.h"
+#include "test.h"
+
+#ifndef FUNCTION_UNDER_TEST
+# define FUNCTION_UNDER_TEST gf_vect_dot_prod
+#endif
+
+#define str(s) #s
+#define xstr(s) str(s)
+
+//#define CACHED_TEST
+#ifdef CACHED_TEST
+// Cached test, loop many times over small dataset
+# define TEST_SOURCES 10
+# define TEST_LEN 8*1024
+# define TEST_TYPE_STR "_warm"
+#else
+# ifndef TEST_CUSTOM
+// Uncached test. Pull from large mem base.
+# define TEST_SOURCES 10
+# define GT_L3_CACHE 32*1024*1024 /* some number > last level cache */
+# define TEST_LEN ((GT_L3_CACHE / TEST_SOURCES) & ~(64-1))
+# define TEST_TYPE_STR "_cold"
+# else
+# define TEST_TYPE_STR "_cus"
+# endif
+#endif
+
+typedef unsigned char u8;
+
+void dump(unsigned char *buf, int len)
+{
+ int i;
+ for (i = 0; i < len;) {
+ printf(" %2x", 0xff & buf[i++]);
+ if (i % 32 == 0)
+ printf("\n");
+ }
+ printf("\n");
+}
+
+void dump_matrix(unsigned char **s, int k, int m)
+{
+ int i, j;
+ for (i = 0; i < k; i++) {
+ for (j = 0; j < m; j++) {
+ printf(" %2x", s[i][j]);
+ }
+ printf("\n");
+ }
+ printf("\n");
+}
+
+void vect_dot_prod_perf(void (*fun_ptr)
+ (int, int, unsigned char *, unsigned char **, unsigned char *),
+ u8 * g, u8 * g_tbls, u8 ** buffs, u8 * dest_ref)
+{
+ int j;
+ for (j = 0; j < TEST_SOURCES; j++)
+ gf_vect_mul_init(g[j], &g_tbls[j * 32]);
+
+ (*fun_ptr) (TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref);
+}
+
+int main(int argc, char *argv[])
+{
+ int i, j;
+ void *buf;
+ u8 g[TEST_SOURCES], g_tbls[TEST_SOURCES * 32], *dest, *dest_ref;
+ u8 *temp_buff, *buffs[TEST_SOURCES];
+ struct perf start;
+
+ printf(xstr(FUNCTION_UNDER_TEST) ": %dx%d\n", TEST_SOURCES, TEST_LEN);
+
+ // Allocate the arrays
+ for (i = 0; i < TEST_SOURCES; i++) {
+ if (posix_memalign(&buf, 64, TEST_LEN)) {
+ printf("alloc error: Fail");
+ return -1;
+ }
+ buffs[i] = buf;
+ }
+
+ if (posix_memalign(&buf, 64, TEST_LEN)) {
+ printf("alloc error: Fail");
+ return -1;
+ }
+ dest = buf;
+
+ if (posix_memalign(&buf, 64, TEST_LEN)) {
+ printf("alloc error: Fail");
+ return -1;
+ }
+ dest_ref = buf;
+
+ if (posix_memalign(&buf, 64, TEST_LEN)) {
+ printf("alloc error: Fail");
+ return -1;
+ }
+ temp_buff = buf;
+
+ // Performance test
+ for (i = 0; i < TEST_SOURCES; i++)
+ for (j = 0; j < TEST_LEN; j++)
+ buffs[i][j] = rand();
+
+ memset(dest, 0, TEST_LEN);
+ memset(temp_buff, 0, TEST_LEN);
+ memset(dest_ref, 0, TEST_LEN);
+ memset(g, 0, TEST_SOURCES);
+
+ for (i = 0; i < TEST_SOURCES; i++)
+ g[i] = rand();
+
+#ifdef DO_REF_PERF
+ BENCHMARK(&start, BENCHMARK_TIME,
+ vect_dot_prod_perf(&gf_vect_dot_prod_base, g, g_tbls, buffs, dest_ref)
+ );
+ printf("gf_vect_dot_prod_base" TEST_TYPE_STR ": ");
+ perf_print(start, (long long)TEST_LEN * (TEST_SOURCES + 1));
+#else
+ vect_dot_prod_perf(&gf_vect_dot_prod_base, g, g_tbls, buffs, dest_ref);
+#endif
+
+ BENCHMARK(&start, BENCHMARK_TIME,
+ vect_dot_prod_perf(&FUNCTION_UNDER_TEST, g, g_tbls, buffs, dest));
+ printf(xstr(FUNCTION_UNDER_TEST) TEST_TYPE_STR ": ");
+ perf_print(start, (long long)TEST_LEN * (TEST_SOURCES + 1));
+
+ if (0 != memcmp(dest_ref, dest, TEST_LEN)) {
+ printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test\n");
+ dump_matrix(buffs, 5, TEST_SOURCES);
+ printf("dprod_base:");
+ dump(dest_ref, 25);
+ printf("dprod:");
+ dump(dest, 25);
+ return -1;
+ }
+
+ printf("pass perf check\n");
+ return 0;
+}
diff --git a/src/isa-l/erasure_code/gf_vect_dot_prod_sse.asm b/src/isa-l/erasure_code/gf_vect_dot_prod_sse.asm
new file mode 100644
index 000000000..9b0a44022
--- /dev/null
+++ b/src/isa-l/erasure_code/gf_vect_dot_prod_sse.asm
@@ -0,0 +1,271 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright(c) 2011-2015 Intel Corporation All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in
+; the documentation and/or other materials provided with the
+; distribution.
+; * Neither the name of Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived
+; from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;;;
+;;; gf_vect_dot_prod_sse(len, vec, *g_tbls, **buffs, *dest);
+;;;
+
+%include "reg_sizes.asm"
+
+%ifidn __OUTPUT_FORMAT__, elf64
+ %define arg0 rdi
+ %define arg1 rsi
+ %define arg2 rdx
+ %define arg3 rcx
+ %define arg4 r8
+
+ %define tmp r11
+ %define tmp2 r10
+ %define tmp3 r9
+ %define return rax
+ %macro SLDR 2
+ %endmacro
+ %define SSTR SLDR
+ %define PS 8
+ %define func(x) x: endbranch
+ %define FUNC_SAVE
+ %define FUNC_RESTORE
+%endif
+
+%ifidn __OUTPUT_FORMAT__, win64
+ %define arg0 rcx
+ %define arg1 rdx
+ %define arg2 r8
+ %define arg3 r9
+
+ %define arg4 r12 ; must be saved and loaded
+ %define tmp r11
+ %define tmp2 r10
+ %define tmp3 rdi ; must be saved and loaded
+ %define return rax
+ %macro SLDR 2
+ %endmacro
+ %define SSTR SLDR
+ %define PS 8
+ %define frame_size 2*8
+ %define arg(x) [rsp + frame_size + PS + PS*x]
+
+ %define func(x) proc_frame x
+ %macro FUNC_SAVE 0
+ rex_push_reg r12
+ push_reg rdi
+ end_prolog
+ mov arg4, arg(4)
+ %endmacro
+
+ %macro FUNC_RESTORE 0
+ pop rdi
+ pop r12
+ %endmacro
+%endif
+
+%ifidn __OUTPUT_FORMAT__, elf32
+
+;;;================== High Address;
+;;; arg4
+;;; arg3
+;;; arg2
+;;; arg1
+;;; arg0
+;;; return
+;;;<================= esp of caller
+;;; ebp
+;;;<================= ebp = esp
+;;; esi
+;;; edi
+;;; ebx
+;;;<================= esp of callee
+;;;
+;;;================== Low Address;
+
+ %define PS 4
+ %define LOG_PS 2
+ %define func(x) x: endbranch
+ %define arg(x) [ebp + PS*2 + PS*x]
+
+ %define trans ecx ;trans is for the variables in stack
+ %define arg0 trans
+ %define arg0_m arg(0)
+ %define arg1 trans
+ %define arg1_m arg(1)
+ %define arg2 arg2_m
+ %define arg2_m arg(2)
+ %define arg3 ebx
+ %define arg4 trans
+ %define arg4_m arg(4)
+ %define tmp edx
+ %define tmp2 edi
+ %define tmp3 esi
+ %define return eax
+ %macro SLDR 2 ;; stack load/restore
+ mov %1, %2
+ %endmacro
+ %define SSTR SLDR
+
+ %macro FUNC_SAVE 0
+ push ebp
+ mov ebp, esp
+ push esi
+ push edi
+ push ebx
+ mov arg3, arg(3)
+ %endmacro
+
+ %macro FUNC_RESTORE 0
+ pop ebx
+ pop edi
+ pop esi
+ mov esp, ebp
+ pop ebp
+ %endmacro
+
+%endif ; output formats
+
+%define len arg0
+%define vec arg1
+%define mul_array arg2
+%define src arg3
+%define dest arg4
+
+%define vec_i tmp2
+%define ptr tmp3
+%define pos return
+
+ %ifidn PS,4 ;32-bit code
+ %define vec_m arg1_m
+ %define len_m arg0_m
+ %define dest_m arg4_m
+ %endif
+
+%ifndef EC_ALIGNED_ADDR
+;;; Use Un-aligned load/store
+ %define XLDR movdqu
+ %define XSTR movdqu
+%else
+;;; Use Non-temporal load/stor
+ %ifdef NO_NT_LDST
+ %define XLDR movdqa
+ %define XSTR movdqa
+ %else
+ %define XLDR movntdqa
+ %define XSTR movntdq
+ %endif
+%endif
+
+%ifidn PS,8 ;64-bit code
+ default rel
+ [bits 64]
+%endif
+
+section .text
+
+%define xmask0f xmm5
+%define xgft_lo xmm4
+%define xgft_hi xmm3
+
+%define x0 xmm0
+%define xtmpa xmm1
+%define xp xmm2
+
+align 16
+mk_global gf_vect_dot_prod_sse, function
+func(gf_vect_dot_prod_sse)
+ FUNC_SAVE
+ SLDR len, len_m
+ sub len, 16
+ SSTR len_m, len
+ jl .return_fail
+ xor pos, pos
+ movdqa xmask0f, [mask0f] ;Load mask of lower nibble in each byte
+
+.loop16:
+ pxor xp, xp
+ mov tmp, mul_array
+ xor vec_i, vec_i
+
+.next_vect:
+
+ mov ptr, [src+vec_i*PS]
+ movdqu xgft_lo, [tmp] ;Load array Cx{00}, Cx{01}, ..., Cx{0f}
+ movdqu xgft_hi, [tmp+16] ; " Cx{00}, Cx{10}, ..., Cx{f0}
+ XLDR x0, [ptr+pos] ;Get next source vector
+
+ add tmp, 32
+ add vec_i, 1
+
+ movdqa xtmpa, x0 ;Keep unshifted copy of src
+ psraw x0, 4 ;Shift to put high nibble into bits 4-0
+ pand x0, xmask0f ;Mask high src nibble in bits 4-0
+ pand xtmpa, xmask0f ;Mask low src nibble in bits 4-0
+
+ pshufb xgft_hi, x0 ;Lookup mul table of high nibble
+ pshufb xgft_lo, xtmpa ;Lookup mul table of low nibble
+ pxor xgft_hi, xgft_lo ;GF add high and low partials
+ pxor xp, xgft_hi ;xp += partial
+
+ SLDR vec, vec_m
+ cmp vec_i, vec
+ jl .next_vect
+
+ SLDR dest, dest_m
+ XSTR [dest+pos], xp
+
+ add pos, 16 ;Loop on 16 bytes at a time
+ SLDR len, len_m
+ cmp pos, len
+ jle .loop16
+
+ lea tmp, [len + 16]
+ cmp pos, tmp
+ je .return_pass
+
+ ;; Tail len
+ mov pos, len ;Overlapped offset length-16
+ jmp .loop16 ;Do one more overlap pass
+
+.return_pass:
+ mov return, 0
+ FUNC_RESTORE
+ ret
+
+.return_fail:
+ mov return, 1
+ FUNC_RESTORE
+ ret
+
+endproc_frame
+
+section .data
+
+align 16
+
+mask0f: dq 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f
+
+;;; func core, ver, snum
+slversion gf_vect_dot_prod_sse, 00, 05, 0060
diff --git a/src/isa-l/erasure_code/gf_vect_dot_prod_test.c b/src/isa-l/erasure_code/gf_vect_dot_prod_test.c
new file mode 100644
index 000000000..1c0232cca
--- /dev/null
+++ b/src/isa-l/erasure_code/gf_vect_dot_prod_test.c
@@ -0,0 +1,525 @@
+/**********************************************************************
+ Copyright(c) 2011-2015 Intel Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h> // for memset, memcmp
+#include "erasure_code.h"
+#include "types.h"
+
+#ifndef FUNCTION_UNDER_TEST
+# define FUNCTION_UNDER_TEST gf_vect_dot_prod
+#endif
+#ifndef TEST_MIN_SIZE
+# define TEST_MIN_SIZE 32
+#endif
+
+#define str(s) #s
+#define xstr(s) str(s)
+
+#define TEST_LEN 8192
+#define TEST_SIZE (TEST_LEN/2)
+
+#ifndef TEST_SOURCES
+# define TEST_SOURCES 16
+#endif
+#ifndef RANDOMS
+# define RANDOMS 20
+#endif
+
+#define MMAX TEST_SOURCES
+#define KMAX TEST_SOURCES
+
+#ifdef EC_ALIGNED_ADDR
+// Define power of 2 range to check ptr, len alignment
+# define PTR_ALIGN_CHK_B 0
+# define LEN_ALIGN_CHK_B 0 // 0 for aligned only
+#else
+// Define power of 2 range to check ptr, len alignment
+# define PTR_ALIGN_CHK_B 32
+# define LEN_ALIGN_CHK_B 32 // 0 for aligned only
+#endif
+
+typedef unsigned char u8;
+
+void dump(unsigned char *buf, int len)
+{
+ int i;
+ for (i = 0; i < len;) {
+ printf(" %2x", 0xff & buf[i++]);
+ if (i % 32 == 0)
+ printf("\n");
+ }
+ printf("\n");
+}
+
+void dump_matrix(unsigned char **s, int k, int m)
+{
+ int i, j;
+ for (i = 0; i < k; i++) {
+ for (j = 0; j < m; j++) {
+ printf(" %2x", s[i][j]);
+ }
+ printf("\n");
+ }
+ printf("\n");
+}
+
+void dump_u8xu8(unsigned char *s, int k, int m)
+{
+ int i, j;
+ for (i = 0; i < k; i++) {
+ for (j = 0; j < m; j++) {
+ printf(" %2x", 0xff & s[j + (i * m)]);
+ }
+ printf("\n");
+ }
+ printf("\n");
+}
+
+int main(int argc, char *argv[])
+{
+ int i, j, rtest, srcs, m, k, nerrs, r, err;
+ void *buf;
+ u8 g[TEST_SOURCES], g_tbls[TEST_SOURCES * 32], src_in_err[TEST_SOURCES];
+ u8 *dest, *dest_ref, *temp_buff, *buffs[TEST_SOURCES];
+ u8 a[MMAX * KMAX], b[MMAX * KMAX], d[MMAX * KMAX];
+ u8 src_err_list[TEST_SOURCES], *recov[TEST_SOURCES];
+
+ int align, size;
+ unsigned char *efence_buffs[TEST_SOURCES];
+ unsigned int offset;
+ u8 *ubuffs[TEST_SOURCES];
+ u8 *udest_ptr;
+
+ printf(xstr(FUNCTION_UNDER_TEST) ": %dx%d ", TEST_SOURCES, TEST_LEN);
+
+ // Allocate the arrays
+ for (i = 0; i < TEST_SOURCES; i++) {
+ if (posix_memalign(&buf, 64, TEST_LEN)) {
+ printf("alloc error: Fail");
+ return -1;
+ }
+ buffs[i] = buf;
+ }
+
+ if (posix_memalign(&buf, 64, TEST_LEN)) {
+ printf("alloc error: Fail");
+ return -1;
+ }
+ dest = buf;
+
+ if (posix_memalign(&buf, 64, TEST_LEN)) {
+ printf("alloc error: Fail");
+ return -1;
+ }
+ dest_ref = buf;
+
+ if (posix_memalign(&buf, 64, TEST_LEN)) {
+ printf("alloc error: Fail");
+ return -1;
+ }
+ temp_buff = buf;
+
+ // Test of all zeros
+ for (i = 0; i < TEST_SOURCES; i++)
+ memset(buffs[i], 0, TEST_LEN);
+
+ memset(dest, 0, TEST_LEN);
+ memset(temp_buff, 0, TEST_LEN);
+ memset(dest_ref, 0, TEST_LEN);
+ memset(g, 0, TEST_SOURCES);
+
+ for (i = 0; i < TEST_SOURCES; i++)
+ gf_vect_mul_init(g[i], &g_tbls[i * 32]);
+
+ gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref);
+
+ FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest);
+
+ if (0 != memcmp(dest_ref, dest, TEST_LEN)) {
+ printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " \n");
+ dump_matrix(buffs, 5, TEST_SOURCES);
+ printf("dprod_base:");
+ dump(dest_ref, 25);
+ printf("dprod:");
+ dump(dest, 25);
+ return -1;
+ } else
+ putchar('.');
+
+ // Rand data test
+ for (rtest = 0; rtest < RANDOMS; rtest++) {
+ for (i = 0; i < TEST_SOURCES; i++)
+ for (j = 0; j < TEST_LEN; j++)
+ buffs[i][j] = rand();
+
+ for (i = 0; i < TEST_SOURCES; i++)
+ g[i] = rand();
+
+ for (i = 0; i < TEST_SOURCES; i++)
+ gf_vect_mul_init(g[i], &g_tbls[i * 32]);
+
+ gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref);
+ FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest);
+
+ if (0 != memcmp(dest_ref, dest, TEST_LEN)) {
+ printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " 1\n");
+ dump_matrix(buffs, 5, TEST_SOURCES);
+ printf("dprod_base:");
+ dump(dest_ref, 25);
+ printf("dprod:");
+ dump(dest, 25);
+ return -1;
+ }
+
+ putchar('.');
+ }
+
+ // Rand data test with varied parameters
+ for (rtest = 0; rtest < RANDOMS; rtest++) {
+ for (srcs = TEST_SOURCES; srcs > 0; srcs--) {
+ for (i = 0; i < srcs; i++)
+ for (j = 0; j < TEST_LEN; j++)
+ buffs[i][j] = rand();
+
+ for (i = 0; i < srcs; i++)
+ g[i] = rand();
+
+ for (i = 0; i < srcs; i++)
+ gf_vect_mul_init(g[i], &g_tbls[i * 32]);
+
+ gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[0], buffs, dest_ref);
+ FUNCTION_UNDER_TEST(TEST_LEN, srcs, g_tbls, buffs, dest);
+
+ if (0 != memcmp(dest_ref, dest, TEST_LEN)) {
+ printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test 2\n");
+ dump_matrix(buffs, 5, srcs);
+ printf("dprod_base:");
+ dump(dest_ref, 5);
+ printf("dprod:");
+ dump(dest, 5);
+ return -1;
+ }
+
+ putchar('.');
+ }
+ }
+
+ // Test erasure code using gf_vect_dot_prod
+
+ // Pick a first test
+ m = 9;
+ k = 5;
+ if (m > MMAX || k > KMAX)
+ return -1;
+
+ gf_gen_rs_matrix(a, m, k);
+
+ // Make random data
+ for (i = 0; i < k; i++)
+ for (j = 0; j < TEST_LEN; j++)
+ buffs[i][j] = rand();
+
+ // Make parity vects
+ for (i = k; i < m; i++) {
+ for (j = 0; j < k; j++)
+ gf_vect_mul_init(a[k * i + j], &g_tbls[j * 32]);
+#ifndef USEREF
+ FUNCTION_UNDER_TEST(TEST_LEN, k, g_tbls, buffs, buffs[i]);
+#else
+ gf_vect_dot_prod_base(TEST_LEN, k, &g_tbls[0], buffs, buffs[i]);
+#endif
+ }
+
+ // Random buffers in erasure
+ memset(src_in_err, 0, TEST_SOURCES);
+ for (i = 0, nerrs = 0; i < k && nerrs < m - k; i++) {
+ err = 1 & rand();
+ src_in_err[i] = err;
+ if (err)
+ src_err_list[nerrs++] = i;
+ }
+
+ // construct b by removing error rows
+ for (i = 0, r = 0; i < k; i++, r++) {
+ while (src_in_err[r]) {
+ r++;
+ continue;
+ }
+ for (j = 0; j < k; j++)
+ b[k * i + j] = a[k * r + j];
+ }
+
+ if (gf_invert_matrix((u8 *) b, (u8 *) d, k) < 0)
+ printf("BAD MATRIX\n");
+
+ for (i = 0, r = 0; i < k; i++, r++) {
+ while (src_in_err[r]) {
+ r++;
+ continue;
+ }
+ recov[i] = buffs[r];
+ }
+
+ // Recover data
+ for (i = 0; i < nerrs; i++) {
+ for (j = 0; j < k; j++)
+ gf_vect_mul_init(d[k * src_err_list[i] + j], &g_tbls[j * 32]);
+#ifndef USEREF
+ FUNCTION_UNDER_TEST(TEST_LEN, k, g_tbls, recov, temp_buff);
+#else
+ gf_vect_dot_prod_base(TEST_LEN, k, &g_tbls[0], recov, temp_buff);
+#endif
+
+ if (0 != memcmp(temp_buff, buffs[src_err_list[i]], TEST_LEN)) {
+ printf("Fail error recovery (%d, %d, %d)\n", m, k, nerrs);
+ printf("recov %d:", src_err_list[i]);
+ dump(temp_buff, 25);
+ printf("orig :");
+ dump(buffs[src_err_list[i]], 25);
+ return -1;
+ }
+ }
+
+ // Do more random tests
+
+ for (rtest = 0; rtest < RANDOMS; rtest++) {
+ while ((m = (rand() % MMAX)) < 2) ;
+ while ((k = (rand() % KMAX)) >= m || k < 1) ;
+
+ if (m > MMAX || k > KMAX)
+ continue;
+
+ gf_gen_rs_matrix(a, m, k);
+
+ // Make random data
+ for (i = 0; i < k; i++)
+ for (j = 0; j < TEST_LEN; j++)
+ buffs[i][j] = rand();
+
+ // Make parity vects
+ for (i = k; i < m; i++) {
+ for (j = 0; j < k; j++)
+ gf_vect_mul_init(a[k * i + j], &g_tbls[j * 32]);
+#ifndef USEREF
+ FUNCTION_UNDER_TEST(TEST_LEN, k, g_tbls, buffs, buffs[i]);
+#else
+ gf_vect_dot_prod_base(TEST_LEN, k, &g_tbls[0], buffs, buffs[i]);
+#endif
+ }
+
+ // Random errors
+ memset(src_in_err, 0, TEST_SOURCES);
+ for (i = 0, nerrs = 0; i < k && nerrs < m - k; i++) {
+ err = 1 & rand();
+ src_in_err[i] = err;
+ if (err)
+ src_err_list[nerrs++] = i;
+ }
+ if (nerrs == 0) { // should have at least one error
+ while ((err = (rand() % KMAX)) >= k) ;
+ src_err_list[nerrs++] = err;
+ src_in_err[err] = 1;
+ }
+ // construct b by removing error rows
+ for (i = 0, r = 0; i < k; i++, r++) {
+ while (src_in_err[r]) {
+ r++;
+ continue;
+ }
+ for (j = 0; j < k; j++)
+ b[k * i + j] = a[k * r + j];
+ }
+
+ if (gf_invert_matrix((u8 *) b, (u8 *) d, k) < 0)
+ printf("BAD MATRIX\n");
+
+ for (i = 0, r = 0; i < k; i++, r++) {
+ while (src_in_err[r]) {
+ r++;
+ continue;
+ }
+ recov[i] = buffs[r];
+ }
+
+ // Recover data
+ for (i = 0; i < nerrs; i++) {
+ for (j = 0; j < k; j++)
+ gf_vect_mul_init(d[k * src_err_list[i] + j], &g_tbls[j * 32]);
+#ifndef USEREF
+ FUNCTION_UNDER_TEST(TEST_LEN, k, g_tbls, recov, temp_buff);
+#else
+ gf_vect_dot_prod_base(TEST_LEN, k, &g_tbls[0], recov, temp_buff);
+#endif
+ if (0 != memcmp(temp_buff, buffs[src_err_list[i]], TEST_LEN)) {
+ printf("Fail error recovery (%d, %d, %d) - ", m, k, nerrs);
+ printf(" - erase list = ");
+ for (i = 0; i < nerrs; i++)
+ printf(" %d", src_err_list[i]);
+ printf("\na:\n");
+ dump_u8xu8((u8 *) a, m, k);
+ printf("inv b:\n");
+ dump_u8xu8((u8 *) d, k, k);
+ printf("orig data:\n");
+ dump_matrix(buffs, m, 25);
+ printf("orig :");
+ dump(buffs[src_err_list[i]], 25);
+ printf("recov %d:", src_err_list[i]);
+ dump(temp_buff, 25);
+ return -1;
+ }
+ }
+ putchar('.');
+ }
+
+ // Run tests at end of buffer for Electric Fence
+ align = (LEN_ALIGN_CHK_B != 0) ? 1 : 16;
+ for (size = TEST_MIN_SIZE; size <= TEST_SIZE; size += align) {
+ for (i = 0; i < TEST_SOURCES; i++)
+ for (j = 0; j < TEST_LEN; j++)
+ buffs[i][j] = rand();
+
+ for (i = 0; i < TEST_SOURCES; i++) // Line up TEST_SIZE from end
+ efence_buffs[i] = buffs[i] + TEST_LEN - size;
+
+ for (i = 0; i < TEST_SOURCES; i++)
+ g[i] = rand();
+
+ for (i = 0; i < TEST_SOURCES; i++)
+ gf_vect_mul_init(g[i], &g_tbls[i * 32]);
+
+ gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[0], efence_buffs, dest_ref);
+ FUNCTION_UNDER_TEST(size, TEST_SOURCES, g_tbls, efence_buffs, dest);
+
+ if (0 != memcmp(dest_ref, dest, size)) {
+ printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test 3\n");
+ dump_matrix(efence_buffs, 5, TEST_SOURCES);
+ printf("dprod_base:");
+ dump(dest_ref, align);
+ printf("dprod:");
+ dump(dest, align);
+ return -1;
+ }
+
+ putchar('.');
+ }
+
+ // Test rand ptr alignment if available
+
+ for (rtest = 0; rtest < RANDOMS; rtest++) {
+ size = (TEST_LEN - PTR_ALIGN_CHK_B) & ~(TEST_MIN_SIZE - 1);
+ srcs = rand() % TEST_SOURCES;
+ if (srcs == 0)
+ continue;
+
+ offset = (PTR_ALIGN_CHK_B != 0) ? 1 : PTR_ALIGN_CHK_B;
+ // Add random offsets
+ for (i = 0; i < srcs; i++)
+ ubuffs[i] = buffs[i] + (rand() & (PTR_ALIGN_CHK_B - offset));
+
+ udest_ptr = dest + (rand() & (PTR_ALIGN_CHK_B - offset));
+
+ memset(dest, 0, TEST_LEN); // zero pad to check write-over
+
+ for (i = 0; i < srcs; i++)
+ for (j = 0; j < size; j++)
+ ubuffs[i][j] = rand();
+
+ for (i = 0; i < srcs; i++)
+ g[i] = rand();
+
+ for (i = 0; i < srcs; i++)
+ gf_vect_mul_init(g[i], &g_tbls[i * 32]);
+
+ gf_vect_dot_prod_base(size, srcs, &g_tbls[0], ubuffs, dest_ref);
+
+ FUNCTION_UNDER_TEST(size, srcs, g_tbls, ubuffs, udest_ptr);
+
+ if (memcmp(dest_ref, udest_ptr, size)) {
+ printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " ualign srcs=%d\n",
+ srcs);
+ dump_matrix(ubuffs, 5, TEST_SOURCES);
+ printf("dprod_base:");
+ dump(dest_ref, 25);
+ printf("dprod:");
+ dump(udest_ptr, 25);
+ return -1;
+ }
+ // Confirm that padding around dests is unchanged
+ memset(dest_ref, 0, PTR_ALIGN_CHK_B); // Make reference zero buff
+ offset = udest_ptr - dest;
+
+ if (memcmp(dest, dest_ref, offset)) {
+ printf("Fail rand ualign pad start\n");
+ return -1;
+ }
+ if (memcmp(dest + offset + size, dest_ref, PTR_ALIGN_CHK_B - offset)) {
+ printf("Fail rand ualign pad end\n");
+ return -1;
+ }
+
+ putchar('.');
+ }
+
+ // Test all size alignment
+ align = (LEN_ALIGN_CHK_B != 0) ? 1 : 16;
+
+ for (size = TEST_LEN; size >= TEST_MIN_SIZE; size -= align) {
+ srcs = TEST_SOURCES;
+
+ for (i = 0; i < srcs; i++)
+ for (j = 0; j < size; j++)
+ buffs[i][j] = rand();
+
+ for (i = 0; i < srcs; i++)
+ g[i] = rand();
+
+ for (i = 0; i < srcs; i++)
+ gf_vect_mul_init(g[i], &g_tbls[i * 32]);
+
+ gf_vect_dot_prod_base(size, srcs, &g_tbls[0], buffs, dest_ref);
+
+ FUNCTION_UNDER_TEST(size, srcs, g_tbls, buffs, dest);
+
+ if (memcmp(dest_ref, dest, size)) {
+ printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " ualign len=%d\n",
+ size);
+ dump_matrix(buffs, 5, TEST_SOURCES);
+ printf("dprod_base:");
+ dump(dest_ref, 25);
+ printf("dprod:");
+ dump(dest, 25);
+ return -1;
+ }
+ }
+
+ printf("done all: Pass\n");
+ return 0;
+}
diff --git a/src/isa-l/erasure_code/gf_vect_mad_avx.asm b/src/isa-l/erasure_code/gf_vect_mad_avx.asm
new file mode 100644
index 000000000..448fbd788
--- /dev/null
+++ b/src/isa-l/erasure_code/gf_vect_mad_avx.asm
@@ -0,0 +1,196 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright(c) 2011-2015 Intel Corporation All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in
+; the documentation and/or other materials provided with the
+; distribution.
+; * Neither the name of Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived
+; from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;;;
+;;; gf_vect_mad_avx(len, vec, vec_i, mul_array, src, dest);
+;;;
+
+%include "reg_sizes.asm"
+
+%ifidn __OUTPUT_FORMAT__, win64
+ %define arg0 rcx
+ %define arg0.w ecx
+ %define arg1 rdx
+ %define arg2 r8
+ %define arg3 r9
+ %define arg4 r12
+ %define arg5 r15
+ %define tmp r11
+ %define return rax
+ %define return.w eax
+ %define PS 8
+ %define stack_size 16*3 + 3*8
+ %define arg(x) [rsp + stack_size + PS + PS*x]
+ %define func(x) proc_frame x
+
+%macro FUNC_SAVE 0
+ sub rsp, stack_size
+ vmovdqa [rsp+16*0],xmm6
+ vmovdqa [rsp+16*1],xmm7
+ vmovdqa [rsp+16*2],xmm8
+ save_reg r12, 3*16 + 0*8
+ save_reg r15, 3*16 + 1*8
+ end_prolog
+ mov arg4, arg(4)
+ mov arg5, arg(5)
+%endmacro
+
+%macro FUNC_RESTORE 0
+ vmovdqa xmm6, [rsp+16*0]
+ vmovdqa xmm7, [rsp+16*1]
+ vmovdqa xmm8, [rsp+16*2]
+ mov r12, [rsp + 3*16 + 0*8]
+ mov r15, [rsp + 3*16 + 1*8]
+ add rsp, stack_size
+%endmacro
+
+%elifidn __OUTPUT_FORMAT__, elf64
+ %define arg0 rdi
+ %define arg0.w edi
+ %define arg1 rsi
+ %define arg2 rdx
+ %define arg3 rcx
+ %define arg4 r8
+ %define arg5 r9
+ %define tmp r11
+ %define return rax
+ %define return.w eax
+
+ %define func(x) x: endbranch
+ %define FUNC_SAVE
+ %define FUNC_RESTORE
+%endif
+
+;;; gf_vect_mad_avx(len, vec, vec_i, mul_array, src, dest)
+%define len arg0
+%define len.w arg0.w
+%define vec arg1
+%define vec_i arg2
+%define mul_array arg3
+%define src arg4
+%define dest arg5
+%define pos return
+%define pos.w return.w
+
+%ifndef EC_ALIGNED_ADDR
+;;; Use Un-aligned load/store
+ %define XLDR vmovdqu
+ %define XSTR vmovdqu
+%else
+;;; Use Non-temporal load/stor
+ %ifdef NO_NT_LDST
+ %define XLDR vmovdqa
+ %define XSTR vmovdqa
+ %else
+ %define XLDR vmovntdqa
+ %define XSTR vmovntdq
+ %endif
+%endif
+
+
+default rel
+
+[bits 64]
+section .text
+
+%define xmask0f xmm8
+%define xgft_lo xmm7
+%define xgft_hi xmm6
+
+%define x0 xmm0
+%define xtmpa xmm1
+%define xtmph xmm2
+%define xtmpl xmm3
+%define xd xmm4
+%define xtmpd xmm5
+
+align 16
+mk_global gf_vect_mad_avx, function
+func(gf_vect_mad_avx)
+ FUNC_SAVE
+ sub len, 16
+ jl .return_fail
+
+ xor pos, pos
+ vmovdqa xmask0f, [mask0f] ;Load mask of lower nibble in each byte
+
+ sal vec_i, 5 ;Multiply by 32
+ vmovdqu xgft_lo, [vec_i+mul_array] ;Load array Cx{00}, Cx{01}, Cx{02}, ...
+ vmovdqu xgft_hi, [vec_i+mul_array+16] ; " Cx{00}, Cx{10}, Cx{20}, ... , Cx{f0}
+
+ XLDR xtmpd, [dest+len] ;backup the last 16 bytes in dest
+
+.loop16:
+ XLDR xd, [dest+pos] ;Get next dest vector
+.loop16_overlap:
+ XLDR x0, [src+pos] ;Get next source vector
+
+ vpand xtmpa, x0, xmask0f ;Mask low src nibble in bits 4-0
+ vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0
+ vpand x0, x0, xmask0f ;Mask high src nibble in bits 4-0
+
+ vpshufb xtmph, xgft_hi, x0 ;Lookup mul table of high nibble
+ vpshufb xtmpl, xgft_lo, xtmpa ;Lookup mul table of low nibble
+ vpxor xtmph, xtmph, xtmpl ;GF add high and low partials
+ vpxor xd, xd, xtmph ;xd += partial
+
+ XSTR [dest+pos], xd
+ add pos, 16 ;Loop on 16 bytes at a time
+ cmp pos, len
+ jle .loop16
+
+ lea tmp, [len + 16]
+ cmp pos, tmp
+ je .return_pass
+
+ ;; Tail len
+ mov pos, len ;Overlapped offset length-16
+ vmovdqa xd, xtmpd ;Restore xd
+ jmp .loop16_overlap ;Do one more overlap pass
+
+.return_pass:
+ mov return, 0
+ FUNC_RESTORE
+ ret
+
+.return_fail:
+ mov return, 1
+ FUNC_RESTORE
+ ret
+
+endproc_frame
+
+section .data
+
+align 16
+
+mask0f: dq 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f
+
+;;; func core, ver, snum
+slversion gf_vect_mad_avx, 02, 01, 0201
diff --git a/src/isa-l/erasure_code/gf_vect_mad_avx2.asm b/src/isa-l/erasure_code/gf_vect_mad_avx2.asm
new file mode 100644
index 000000000..097d8fa00
--- /dev/null
+++ b/src/isa-l/erasure_code/gf_vect_mad_avx2.asm
@@ -0,0 +1,203 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright(c) 2011-2015 Intel Corporation All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in
+; the documentation and/or other materials provided with the
+; distribution.
+; * Neither the name of Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived
+; from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;;;
+;;; gf_vect_mad_avx2(len, vec, vec_i, mul_array, src, dest);
+;;;
+
+%include "reg_sizes.asm"
+
+%ifidn __OUTPUT_FORMAT__, win64
+ %define arg0 rcx
+ %define arg0.w ecx
+ %define arg1 rdx
+ %define arg2 r8
+ %define arg3 r9
+ %define arg4 r12 ; must be saved and loaded
+ %define arg5 r15
+
+ %define tmp r11
+ %define tmp.w r11d
+ %define tmp.b r11b
+ %define return rax
+ %define return.w eax
+ %define PS 8
+ %define stack_size 16*3 + 3*8
+ %define arg(x) [rsp + stack_size + PS + PS*x]
+ %define func(x) proc_frame x
+
+ %macro FUNC_SAVE 0
+ sub rsp, stack_size
+ vmovdqa [rsp+16*0],xmm6
+ vmovdqa [rsp+16*1],xmm7
+ vmovdqa [rsp+16*2],xmm8
+ save_reg r12, 3*16 + 0*8
+ save_reg r15, 3*16 + 1*8
+ end_prolog
+ mov arg4, arg(4)
+ mov arg5, arg(5)
+ %endmacro
+
+ %macro FUNC_RESTORE 0
+ vmovdqa xmm6, [rsp+16*0]
+ vmovdqa xmm7, [rsp+16*1]
+ vmovdqa xmm8, [rsp+16*2]
+ mov r12, [rsp + 3*16 + 0*8]
+ mov r15, [rsp + 3*16 + 1*8]
+ add rsp, stack_size
+ %endmacro
+
+%elifidn __OUTPUT_FORMAT__, elf64
+ %define arg0 rdi
+ %define arg0.w edi
+ %define arg1 rsi
+ %define arg2 rdx
+ %define arg3 rcx
+ %define arg4 r8
+ %define arg5 r9
+
+ %define tmp r11
+ %define tmp.w r11d
+ %define tmp.b r11b
+ %define return rax
+ %define return.w eax
+
+ %define func(x) x: endbranch
+ %define FUNC_SAVE
+ %define FUNC_RESTORE
+%endif
+
+
+;;; gf_vect_mad_avx2(len, vec, vec_i, mul_array, src, dest)
+%define len arg0
+%define len.w arg0.w
+%define vec arg1
+%define vec_i arg2
+%define mul_array arg3
+%define src arg4
+%define dest arg5
+%define pos return
+%define pos.w return.w
+
+%ifndef EC_ALIGNED_ADDR
+;;; Use Un-aligned load/store
+ %define XLDR vmovdqu
+ %define XSTR vmovdqu
+%else
+;;; Use Non-temporal load/stor
+ %ifdef NO_NT_LDST
+ %define XLDR vmovdqa
+ %define XSTR vmovdqa
+ %else
+ %define XLDR vmovntdqa
+ %define XSTR vmovntdq
+ %endif
+%endif
+
+
+default rel
+
+[bits 64]
+section .text
+
+%define xmask0f ymm8
+%define xmask0fx xmm8
+%define xgft_lo ymm7
+%define xgft_hi ymm6
+
+%define x0 ymm0
+%define xtmpa ymm1
+%define xtmph ymm2
+%define xtmpl ymm3
+%define xd ymm4
+%define xtmpd ymm5
+
+align 16
+mk_global gf_vect_mad_avx2, function
+func(gf_vect_mad_avx2)
+ FUNC_SAVE
+ sub len, 32
+ jl .return_fail
+ xor pos, pos
+ mov tmp.b, 0x0f
+ vpinsrb xmask0fx, xmask0fx, tmp.w, 0
+ vpbroadcastb xmask0f, xmask0fx ;Construct mask 0x0f0f0f...
+
+ sal vec_i, 5 ;Multiply by 32
+ vmovdqu xgft_lo, [vec_i+mul_array] ;Load array Cx{00}, Cx{01}, Cx{02}, ...
+ ; " Cx{00}, Cx{10}, Cx{20}, ... , Cx{f0}
+ vperm2i128 xgft_hi, xgft_lo, xgft_lo, 0x11 ; swapped to hi | hi
+ vperm2i128 xgft_lo, xgft_lo, xgft_lo, 0x00 ; swapped to lo | lo
+
+ XLDR xtmpd, [dest+len] ;backup the last 32 bytes in dest
+
+.loop32:
+ XLDR xd, [dest+pos] ;Get next dest vector
+.loop32_overlap:
+ XLDR x0, [src+pos] ;Get next source vector
+
+ vpand xtmpa, x0, xmask0f ;Mask low src nibble in bits 4-0
+ vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0
+ vpand x0, x0, xmask0f ;Mask high src nibble in bits 4-0
+
+ vpshufb xtmph, xgft_hi, x0 ;Lookup mul table of high nibble
+ vpshufb xtmpl, xgft_lo, xtmpa ;Lookup mul table of low nibble
+ vpxor xtmph, xtmph, xtmpl ;GF add high and low partials
+ vpxor xd, xd, xtmph ;xd += partial
+
+ XSTR [dest+pos], xd
+ add pos, 32 ;Loop on 32 bytes at a time
+ cmp pos, len
+ jle .loop32
+
+ lea tmp, [len + 32]
+ cmp pos, tmp
+ je .return_pass
+
+ ;; Tail len
+ mov pos, len ;Overlapped offset length-32
+ vmovdqa xd, xtmpd ;Restore xd
+ jmp .loop32_overlap ;Do one more overlap pass
+
+.return_pass:
+ mov return, 0
+ FUNC_RESTORE
+ ret
+
+.return_fail:
+ mov return, 1
+ FUNC_RESTORE
+ ret
+
+endproc_frame
+
+section .data
+
+;;; func core, ver, snum
+slversion gf_vect_mad_avx2, 04, 01, 0202
diff --git a/src/isa-l/erasure_code/gf_vect_mad_avx512.asm b/src/isa-l/erasure_code/gf_vect_mad_avx512.asm
new file mode 100644
index 000000000..43982e341
--- /dev/null
+++ b/src/isa-l/erasure_code/gf_vect_mad_avx512.asm
@@ -0,0 +1,193 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright(c) 2011-2015 Intel Corporation All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in
+; the documentation and/or other materials provided with the
+; distribution.
+; * Neither the name of Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived
+; from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;;;
+;;; gf_vect_mad_avx512(len, vec, vec_i, mul_array, src, dest);
+;;;
+
+%include "reg_sizes.asm"
+
+%ifdef HAVE_AS_KNOWS_AVX512
+
+%ifidn __OUTPUT_FORMAT__, elf64
+ %define arg0 rdi
+ %define arg1 rsi
+ %define arg2 rdx
+ %define arg3 rcx
+ %define arg4 r8
+ %define arg5 r9
+ %define tmp r11
+ %define return rax
+ %define func(x) x: endbranch
+ %define FUNC_SAVE
+ %define FUNC_RESTORE
+%endif
+
+%ifidn __OUTPUT_FORMAT__, win64
+ %define arg0 rcx
+ %define arg1 rdx
+ %define arg2 r8
+ %define arg3 r9
+ %define arg4 r12 ; must be saved and loaded
+ %define arg5 r15
+ %define tmp r11
+ %define return rax
+ %define PS 8
+ %define stack_size 16*3 + 3*8
+ %define arg(x) [rsp + stack_size + PS + PS*x]
+ %define func(x) proc_frame x
+
+ %macro FUNC_SAVE 0
+ sub rsp, stack_size
+ vmovdqa [rsp+16*0],xmm6
+ vmovdqa [rsp+16*1],xmm7
+ vmovdqa [rsp+16*2],xmm8
+ save_reg r12, 3*16 + 0*8
+ save_reg r15, 3*16 + 1*8
+ end_prolog
+ mov arg4, arg(4)
+ mov arg5, arg(5)
+ %endmacro
+
+ %macro FUNC_RESTORE 0
+ vmovdqa xmm6, [rsp+16*0]
+ vmovdqa xmm7, [rsp+16*1]
+ vmovdqa xmm8, [rsp+16*2]
+ mov r12, [rsp + 3*16 + 0*8]
+ mov r15, [rsp + 3*16 + 1*8]
+ add rsp, stack_size
+ %endmacro
+%endif
+
+;;; gf_vect_mad_avx512(len, vec, vec_i, mul_array, src, dest)
+%define len arg0
+%define vec arg1
+%define vec_i arg2
+%define mul_array arg3
+%define src arg4
+%define dest arg5
+%define pos return
+
+%ifndef EC_ALIGNED_ADDR
+;;; Use Un-aligned load/store
+ %define XLDR vmovdqu8
+ %define XSTR vmovdqu8
+%else
+;;; Use Non-temporal load/stor
+ %ifdef NO_NT_LDST
+ %define XLDR vmovdqa
+ %define XSTR vmovdqa
+ %else
+ %define XLDR vmovntdqa
+ %define XSTR vmovntdq
+ %endif
+%endif
+
+
+default rel
+
+[bits 64]
+section .text
+
+%define x0 zmm0
+%define xtmpa zmm1
+%define xtmph zmm2
+%define xtmpl zmm3
+%define xd zmm4
+%define xtmpd zmm5
+%define xgft_hi zmm6
+%define xgft_lo zmm7
+%define xgft_loy ymm7
+%define xmask0f zmm8
+
+align 16
+mk_global gf_vect_mad_avx512, function
+func(gf_vect_mad_avx512)
+ FUNC_SAVE
+ sub len, 64
+ jl .return_fail
+ xor pos, pos
+ mov tmp, 0x0f
+ vpbroadcastb xmask0f, tmp ;Construct mask 0x0f0f0f...
+ sal vec_i, 5 ;Multiply by 32
+ vmovdqu8 xgft_loy, [vec_i+mul_array] ;Load array Cx{00}..{0f}, Cx{00}..{f0}
+ vshufi64x2 xgft_hi, xgft_lo, xgft_lo, 0x55
+ vshufi64x2 xgft_lo, xgft_lo, xgft_lo, 0x00
+ mov tmp, -1
+ kmovq k1, tmp
+
+.loop64:
+ XLDR xd, [dest+pos] ;Get next dest vector
+ XLDR x0, [src+pos] ;Get next source vector
+
+ vpandq xtmpa, x0, xmask0f ;Mask low src nibble in bits 4-0
+ vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0
+ vpandq x0, x0, xmask0f ;Mask high src nibble in bits 4-0
+
+ vpshufb xtmph {k1}{z}, xgft_hi, x0 ;Lookup mul table of high nibble
+ vpshufb xtmpl {k1}{z}, xgft_lo, xtmpa ;Lookup mul table of low nibble
+ vpxorq xtmph, xtmph, xtmpl ;GF add high and low partials
+ vpxorq xd, xd, xtmph ;xd += partial
+
+ XSTR [dest+pos], xd
+ add pos, 64 ;Loop on 64 bytes at a time
+ cmp pos, len
+ jle .loop64
+
+ lea tmp, [len + 64]
+ cmp pos, tmp
+ je .return_pass
+
+ ;; Tail len
+ mov pos, (1 << 63)
+ lea tmp, [len + 64 - 1]
+ and tmp, 63
+ sarx pos, pos, tmp
+ kmovq k1, pos
+ mov pos, len ;Overlapped offset length-64
+ jmp .loop64 ;Do one more overlap pass
+
+.return_pass:
+ mov return, 0
+ FUNC_RESTORE
+ ret
+
+.return_fail:
+ mov return, 1
+ FUNC_RESTORE
+ ret
+
+endproc_frame
+
+%else
+%ifidn __OUTPUT_FORMAT__, win64
+global no_gf_vect_mad_avx512
+no_gf_vect_mad_avx512:
+%endif
+%endif ; ifdef HAVE_AS_KNOWS_AVX512
diff --git a/src/isa-l/erasure_code/gf_vect_mad_sse.asm b/src/isa-l/erasure_code/gf_vect_mad_sse.asm
new file mode 100644
index 000000000..10444040b
--- /dev/null
+++ b/src/isa-l/erasure_code/gf_vect_mad_sse.asm
@@ -0,0 +1,197 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright(c) 2011-2015 Intel Corporation All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in
+; the documentation and/or other materials provided with the
+; distribution.
+; * Neither the name of Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived
+; from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;;;
+;;; gf_vect_mad_sse(len, vec, vec_i, mul_array, src, dest);
+;;;
+
+%include "reg_sizes.asm"
+
+%ifidn __OUTPUT_FORMAT__, win64
+ %define arg0 rcx
+ %define arg0.w ecx
+ %define arg1 rdx
+ %define arg2 r8
+ %define arg3 r9
+ %define arg4 r12
+ %define arg5 r15
+ %define tmp r11
+ %define return rax
+ %define return.w eax
+ %define PS 8
+ %define stack_size 16*3 + 3*8
+ %define arg(x) [rsp + stack_size + PS + PS*x]
+ %define func(x) proc_frame x
+
+%macro FUNC_SAVE 0
+ sub rsp, stack_size
+ movdqa [rsp+16*0],xmm6
+ movdqa [rsp+16*1],xmm7
+ movdqa [rsp+16*2],xmm8
+ save_reg r12, 3*16 + 0*8
+ save_reg r15, 3*16 + 1*8
+ end_prolog
+ mov arg4, arg(4)
+ mov arg5, arg(5)
+%endmacro
+
+%macro FUNC_RESTORE 0
+ movdqa xmm6, [rsp+16*0]
+ movdqa xmm7, [rsp+16*1]
+ movdqa xmm8, [rsp+16*2]
+ mov r12, [rsp + 3*16 + 0*8]
+ mov r15, [rsp + 3*16 + 1*8]
+ add rsp, stack_size
+%endmacro
+
+%elifidn __OUTPUT_FORMAT__, elf64
+ %define arg0 rdi
+ %define arg0.w edi
+ %define arg1 rsi
+ %define arg2 rdx
+ %define arg3 rcx
+ %define arg4 r8
+ %define arg5 r9
+ %define tmp r11
+ %define return rax
+ %define return.w eax
+
+ %define func(x) x: endbranch
+ %define FUNC_SAVE
+ %define FUNC_RESTORE
+%endif
+
+;;; gf_vect_mad_sse(len, vec, vec_i, mul_array, src, dest)
+%define len arg0
+%define len.w arg0.w
+%define vec arg1
+%define vec_i arg2
+%define mul_array arg3
+%define src arg4
+%define dest arg5
+%define pos return
+%define pos.w return.w
+
+%ifndef EC_ALIGNED_ADDR
+;;; Use Un-aligned load/store
+ %define XLDR movdqu
+ %define XSTR movdqu
+%else
+;;; Use Non-temporal load/stor
+ %ifdef NO_NT_LDST
+ %define XLDR movdqa
+ %define XSTR movdqa
+ %else
+ %define XLDR movntdqa
+ %define XSTR movntdq
+ %endif
+%endif
+
+default rel
+
+[bits 64]
+section .text
+
+%define xmask0f xmm8
+%define xgft_lo xmm7
+%define xgft_hi xmm6
+
+%define x0 xmm0
+%define xtmpa xmm1
+%define xtmph xmm2
+%define xtmpl xmm3
+%define xd xmm4
+%define xtmpd xmm5
+
+
+align 16
+mk_global gf_vect_mad_sse, function
+func(gf_vect_mad_sse)
+ FUNC_SAVE
+ sub len, 16
+ jl .return_fail
+
+ xor pos, pos
+ movdqa xmask0f, [mask0f] ;Load mask of lower nibble in each byte
+ sal vec_i, 5 ;Multiply by 32
+ movdqu xgft_lo, [vec_i+mul_array] ;Load array Cx{00}, Cx{01}, Cx{02}, ...
+ movdqu xgft_hi, [vec_i+mul_array+16] ; " Cx{00}, Cx{10}, Cx{20}, ... , Cx{f0}
+
+ XLDR xtmpd, [dest+len] ;backup the last 16 bytes in dest
+
+.loop16:
+ XLDR xd, [dest+pos] ;Get next dest vector
+.loop16_overlap:
+ XLDR x0, [src+pos] ;Get next source vector
+ movdqa xtmph, xgft_hi ;Reload const array registers
+ movdqa xtmpl, xgft_lo
+ movdqa xtmpa, x0 ;Keep unshifted copy of src
+ psraw x0, 4 ;Shift to put high nibble into bits 4-0
+ pand x0, xmask0f ;Mask high src nibble in bits 4-0
+ pand xtmpa, xmask0f ;Mask low src nibble in bits 4-0
+ pshufb xtmph, x0 ;Lookup mul table of high nibble
+ pshufb xtmpl, xtmpa ;Lookup mul table of low nibble
+ pxor xtmph, xtmpl ;GF add high and low partials
+
+ pxor xd, xtmph
+ XSTR [dest+pos], xd ;Store result
+
+ add pos, 16 ;Loop on 16 bytes at a time
+ cmp pos, len
+ jle .loop16
+
+ lea tmp, [len + 16]
+ cmp pos, tmp
+ je .return_pass
+
+ ;; Tail len
+ mov pos, len ;Overlapped offset length-16
+ movdqa xd, xtmpd ;Restore xd
+ jmp .loop16_overlap ;Do one more overlap pass
+
+.return_pass:
+ mov return, 0
+ FUNC_RESTORE
+ ret
+
+.return_fail:
+ mov return, 1
+ FUNC_RESTORE
+ ret
+
+endproc_frame
+
+section .data
+
+align 16
+
+mask0f: dq 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f
+
+;;; func core, ver, snum
+slversion gf_vect_mad_sse, 00, 01, 0200
diff --git a/src/isa-l/erasure_code/gf_vect_mad_test.c b/src/isa-l/erasure_code/gf_vect_mad_test.c
new file mode 100644
index 000000000..cecbc1669
--- /dev/null
+++ b/src/isa-l/erasure_code/gf_vect_mad_test.c
@@ -0,0 +1,519 @@
+/**********************************************************************
+ Copyright(c) 2011-2015 Intel Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h> // for memset, memcmp
+#include "erasure_code.h"
+#include "types.h"
+
+#ifndef ALIGN_SIZE
+# define ALIGN_SIZE 32
+#endif
+
+#ifndef FUNCTION_UNDER_TEST
+//By default, test multi-binary version
+# define FUNCTION_UNDER_TEST gf_vect_mad
+# define REF_FUNCTION gf_vect_dot_prod
+# define VECT 1
+#endif
+
+#ifndef TEST_MIN_SIZE
+# define TEST_MIN_SIZE 64
+#endif
+
+#define str(s) #s
+#define xstr(s) str(s)
+
+#define TEST_LEN 8192
+#define TEST_SIZE (TEST_LEN/2)
+#define TEST_MEM TEST_SIZE
+#define TEST_LOOPS 20000
+#define TEST_TYPE_STR ""
+
+#ifndef TEST_SOURCES
+# define TEST_SOURCES 16
+#endif
+#ifndef RANDOMS
+# define RANDOMS 20
+#endif
+
+#ifdef EC_ALIGNED_ADDR
+// Define power of 2 range to check ptr, len alignment
+# define PTR_ALIGN_CHK_B 0
+# define LEN_ALIGN_CHK_B 0 // 0 for aligned only
+#else
+// Define power of 2 range to check ptr, len alignment
+# define PTR_ALIGN_CHK_B ALIGN_SIZE
+# define LEN_ALIGN_CHK_B ALIGN_SIZE // 0 for aligned only
+#endif
+
+#define str(s) #s
+#define xstr(s) str(s)
+
+typedef unsigned char u8;
+
+#if (VECT == 1)
+# define LAST_ARG *dest
+#else
+# define LAST_ARG **dest
+#endif
+
+extern void FUNCTION_UNDER_TEST(int len, int vec, int vec_i, unsigned char *gftbls,
+ unsigned char *src, unsigned char LAST_ARG);
+extern void REF_FUNCTION(int len, int vlen, unsigned char *gftbls, unsigned char **src,
+ unsigned char LAST_ARG);
+
+void dump(unsigned char *buf, int len)
+{
+ int i;
+ for (i = 0; i < len;) {
+ printf(" %2x", 0xff & buf[i++]);
+ if (i % 32 == 0)
+ printf("\n");
+ }
+ printf("\n");
+}
+
+void dump_matrix(unsigned char **s, int k, int m)
+{
+ int i, j;
+ for (i = 0; i < k; i++) {
+ for (j = 0; j < m; j++) {
+ printf(" %2x", s[i][j]);
+ }
+ printf("\n");
+ }
+ printf("\n");
+}
+
+void dump_u8xu8(unsigned char *s, int k, int m)
+{
+ int i, j;
+ for (i = 0; i < k; i++) {
+ for (j = 0; j < m; j++) {
+ printf(" %2x", 0xff & s[j + (i * m)]);
+ }
+ printf("\n");
+ }
+ printf("\n");
+}
+
+int main(int argc, char *argv[])
+{
+ int i, j, rtest, srcs;
+ void *buf;
+ u8 gf[6][TEST_SOURCES];
+ u8 *g_tbls;
+ u8 *dest_ref[VECT];
+ u8 *dest_ptrs[VECT], *buffs[TEST_SOURCES];
+ int vector = VECT;
+
+ int align, size;
+ unsigned char *efence_buffs[TEST_SOURCES];
+ unsigned int offset;
+ u8 *ubuffs[TEST_SOURCES];
+ u8 *udest_ptrs[VECT];
+ printf("test" xstr(FUNCTION_UNDER_TEST) ": %dx%d ", TEST_SOURCES, TEST_LEN);
+
+ // Allocate the arrays
+ for (i = 0; i < TEST_SOURCES; i++) {
+ if (posix_memalign(&buf, 64, TEST_LEN)) {
+ printf("alloc error: Fail");
+ return -1;
+ }
+ buffs[i] = buf;
+ }
+
+ if (posix_memalign(&buf, 16, 2 * (vector * TEST_SOURCES * 32))) {
+ printf("alloc error: Fail");
+ return -1;
+ }
+ g_tbls = buf;
+
+ for (i = 0; i < vector; i++) {
+ if (posix_memalign(&buf, 64, TEST_LEN)) {
+ printf("alloc error: Fail");
+ return -1;
+ }
+ dest_ptrs[i] = buf;
+ memset(dest_ptrs[i], 0, TEST_LEN);
+ }
+
+ for (i = 0; i < vector; i++) {
+ if (posix_memalign(&buf, 64, TEST_LEN)) {
+ printf("alloc error: Fail");
+ return -1;
+ }
+ dest_ref[i] = buf;
+ memset(dest_ref[i], 0, TEST_LEN);
+ }
+
+ // Test of all zeros
+ for (i = 0; i < TEST_SOURCES; i++)
+ memset(buffs[i], 0, TEST_LEN);
+
+ switch (vector) {
+ case 6:
+ memset(gf[5], 0xe6, TEST_SOURCES);
+ case 5:
+ memset(gf[4], 4, TEST_SOURCES);
+ case 4:
+ memset(gf[3], 9, TEST_SOURCES);
+ case 3:
+ memset(gf[2], 7, TEST_SOURCES);
+ case 2:
+ memset(gf[1], 1, TEST_SOURCES);
+ case 1:
+ memset(gf[0], 2, TEST_SOURCES);
+ break;
+ default:
+ return -1;
+ }
+
+ for (i = 0; i < TEST_SOURCES; i++)
+ for (j = 0; j < TEST_LEN; j++)
+ buffs[i][j] = rand();
+
+ for (i = 0; i < vector; i++)
+ for (j = 0; j < TEST_SOURCES; j++) {
+ gf[i][j] = rand();
+ gf_vect_mul_init(gf[i][j], &g_tbls[i * (32 * TEST_SOURCES) + j * 32]);
+ }
+
+ for (i = 0; i < vector; i++)
+ gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[i * 32 * TEST_SOURCES],
+ buffs, dest_ref[i]);
+
+ for (i = 0; i < vector; i++)
+ memset(dest_ptrs[i], 0, TEST_LEN);
+ for (i = 0; i < TEST_SOURCES; i++) {
+#if (VECT == 1)
+ FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, i, g_tbls, buffs[i], *dest_ptrs);
+#else
+ FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, i, g_tbls, buffs[i], dest_ptrs);
+#endif
+ }
+ for (i = 0; i < vector; i++) {
+ if (0 != memcmp(dest_ref[i], dest_ptrs[i], TEST_LEN)) {
+ printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test%d\n", i);
+ dump_matrix(buffs, vector, TEST_SOURCES);
+ printf("dprod_base:");
+ dump(dest_ref[i], 25);
+ printf("dprod_dut:");
+ dump(dest_ptrs[i], 25);
+ return -1;
+ }
+ }
+
+#if (VECT == 1)
+ REF_FUNCTION(TEST_LEN, TEST_SOURCES, g_tbls, buffs, *dest_ref);
+#else
+ REF_FUNCTION(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest_ref);
+#endif
+ for (i = 0; i < vector; i++) {
+ if (0 != memcmp(dest_ref[i], dest_ptrs[i], TEST_LEN)) {
+ printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test%d\n", i);
+ dump_matrix(buffs, vector, TEST_SOURCES);
+ printf("dprod_base:");
+ dump(dest_ref[i], 25);
+ printf("dprod_dut:");
+ dump(dest_ptrs[i], 25);
+ return -1;
+ }
+ }
+
+ putchar('.');
+
+ // Rand data test
+
+ for (rtest = 0; rtest < RANDOMS; rtest++) {
+ for (i = 0; i < TEST_SOURCES; i++)
+ for (j = 0; j < TEST_LEN; j++)
+ buffs[i][j] = rand();
+
+ for (i = 0; i < vector; i++)
+ for (j = 0; j < TEST_SOURCES; j++) {
+ gf[i][j] = rand();
+ gf_vect_mul_init(gf[i][j],
+ &g_tbls[i * (32 * TEST_SOURCES) + j * 32]);
+ }
+
+ for (i = 0; i < vector; i++)
+ gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES,
+ &g_tbls[i * 32 * TEST_SOURCES], buffs,
+ dest_ref[i]);
+
+ for (i = 0; i < vector; i++)
+ memset(dest_ptrs[i], 0, TEST_LEN);
+ for (i = 0; i < TEST_SOURCES; i++) {
+#if (VECT == 1)
+ FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, i, g_tbls, buffs[i],
+ *dest_ptrs);
+#else
+ FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, i, g_tbls, buffs[i],
+ dest_ptrs);
+#endif
+ }
+ for (i = 0; i < vector; i++) {
+ if (0 != memcmp(dest_ref[i], dest_ptrs[i], TEST_LEN)) {
+ printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test%d %d\n",
+ i, rtest);
+ dump_matrix(buffs, vector, TEST_SOURCES);
+ printf("dprod_base:");
+ dump(dest_ref[i], 25);
+ printf("dprod_dut:");
+ dump(dest_ptrs[i], 25);
+ return -1;
+ }
+ }
+
+ putchar('.');
+ }
+
+ // Rand data test with varied parameters
+ for (rtest = 0; rtest < RANDOMS; rtest++) {
+ for (srcs = TEST_SOURCES; srcs > 0; srcs--) {
+ for (i = 0; i < srcs; i++)
+ for (j = 0; j < TEST_LEN; j++)
+ buffs[i][j] = rand();
+
+ for (i = 0; i < vector; i++)
+ for (j = 0; j < srcs; j++) {
+ gf[i][j] = rand();
+ gf_vect_mul_init(gf[i][j],
+ &g_tbls[i * (32 * srcs) + j * 32]);
+ }
+
+ for (i = 0; i < vector; i++)
+ gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[i * 32 * srcs],
+ buffs, dest_ref[i]);
+
+ for (i = 0; i < vector; i++)
+ memset(dest_ptrs[i], 0, TEST_LEN);
+ for (i = 0; i < srcs; i++) {
+#if (VECT == 1)
+ FUNCTION_UNDER_TEST(TEST_LEN, srcs, i, g_tbls, buffs[i],
+ *dest_ptrs);
+#else
+ FUNCTION_UNDER_TEST(TEST_LEN, srcs, i, g_tbls, buffs[i],
+ dest_ptrs);
+#endif
+
+ }
+ for (i = 0; i < vector; i++) {
+ if (0 != memcmp(dest_ref[i], dest_ptrs[i], TEST_LEN)) {
+ printf("Fail rand " xstr(FUNCTION_UNDER_TEST)
+ " test%d srcs=%d\n", i, srcs);
+ dump_matrix(buffs, vector, TEST_SOURCES);
+ printf("dprod_base:");
+ dump(dest_ref[i], 25);
+ printf("dprod_dut:");
+ dump(dest_ptrs[i], 25);
+ return -1;
+ }
+ }
+
+ putchar('.');
+ }
+ }
+
+ // Run tests at end of buffer for Electric Fence
+ align = (LEN_ALIGN_CHK_B != 0) ? 1 : ALIGN_SIZE;
+ for (size = TEST_MIN_SIZE; size <= TEST_SIZE; size += align) {
+ for (i = 0; i < TEST_SOURCES; i++)
+ for (j = 0; j < TEST_LEN; j++)
+ buffs[i][j] = rand();
+
+ for (i = 0; i < TEST_SOURCES; i++) // Line up TEST_SIZE from end
+ efence_buffs[i] = buffs[i] + TEST_LEN - size;
+
+ for (i = 0; i < vector; i++)
+ for (j = 0; j < TEST_SOURCES; j++) {
+ gf[i][j] = rand();
+ gf_vect_mul_init(gf[i][j],
+ &g_tbls[i * (32 * TEST_SOURCES) + j * 32]);
+ }
+
+ for (i = 0; i < vector; i++)
+ gf_vect_dot_prod_base(size, TEST_SOURCES,
+ &g_tbls[i * 32 * TEST_SOURCES], efence_buffs,
+ dest_ref[i]);
+
+ for (i = 0; i < vector; i++)
+ memset(dest_ptrs[i], 0, size);
+ for (i = 0; i < TEST_SOURCES; i++) {
+#if (VECT == 1)
+ FUNCTION_UNDER_TEST(size, TEST_SOURCES, i, g_tbls, efence_buffs[i],
+ *dest_ptrs);
+#else
+ FUNCTION_UNDER_TEST(size, TEST_SOURCES, i, g_tbls, efence_buffs[i],
+ dest_ptrs);
+#endif
+ }
+ for (i = 0; i < vector; i++) {
+ if (0 != memcmp(dest_ref[i], dest_ptrs[i], size)) {
+ printf("Fail rand " xstr(FUNCTION_UNDER_TEST)
+ " test%d size=%d\n", i, size);
+ dump_matrix(buffs, vector, TEST_SOURCES);
+ printf("dprod_base:");
+ dump(dest_ref[i], TEST_MIN_SIZE + align);
+ printf("dprod_dut:");
+ dump(dest_ptrs[i], TEST_MIN_SIZE + align);
+ return -1;
+ }
+ }
+
+ putchar('.');
+ }
+
+ // Test rand ptr alignment if available
+
+ for (rtest = 0; rtest < RANDOMS; rtest++) {
+ size = (TEST_LEN - PTR_ALIGN_CHK_B) & ~(TEST_MIN_SIZE - 1);
+ srcs = rand() % TEST_SOURCES;
+ if (srcs == 0)
+ continue;
+
+ offset = (PTR_ALIGN_CHK_B != 0) ? 1 : PTR_ALIGN_CHK_B;
+ // Add random offsets
+ for (i = 0; i < srcs; i++)
+ ubuffs[i] = buffs[i] + (rand() & (PTR_ALIGN_CHK_B - offset));
+
+ for (i = 0; i < vector; i++) {
+ udest_ptrs[i] = dest_ptrs[i] + (rand() & (PTR_ALIGN_CHK_B - offset));
+ memset(dest_ptrs[i], 0, TEST_LEN); // zero pad to check write-over
+ }
+
+ for (i = 0; i < srcs; i++)
+ for (j = 0; j < size; j++)
+ ubuffs[i][j] = rand();
+
+ for (i = 0; i < vector; i++)
+ for (j = 0; j < srcs; j++) {
+ gf[i][j] = rand();
+ gf_vect_mul_init(gf[i][j], &g_tbls[i * (32 * srcs) + j * 32]);
+ }
+
+ for (i = 0; i < vector; i++)
+ gf_vect_dot_prod_base(size, srcs, &g_tbls[i * 32 * srcs], ubuffs,
+ dest_ref[i]);
+
+ for (i = 0; i < srcs; i++) {
+#if (VECT == 1)
+ FUNCTION_UNDER_TEST(size, srcs, i, g_tbls, ubuffs[i], *udest_ptrs);
+#else
+ FUNCTION_UNDER_TEST(size, srcs, i, g_tbls, ubuffs[i], udest_ptrs);
+#endif
+ }
+ for (i = 0; i < vector; i++) {
+ if (0 != memcmp(dest_ref[i], udest_ptrs[i], size)) {
+ printf("Fail rand " xstr(FUNCTION_UNDER_TEST)
+ " test%d ualign srcs=%d\n", i, srcs);
+ dump_matrix(buffs, vector, TEST_SOURCES);
+ printf("dprod_base:");
+ dump(dest_ref[i], 25);
+ printf("dprod_dut:");
+ dump(udest_ptrs[i], 25);
+ return -1;
+ }
+ }
+
+ // Confirm that padding around dests is unchanged
+ memset(dest_ref[0], 0, PTR_ALIGN_CHK_B); // Make reference zero buff
+
+ for (i = 0; i < vector; i++) {
+ offset = udest_ptrs[i] - dest_ptrs[i];
+ if (memcmp(dest_ptrs[i], dest_ref[0], offset)) {
+ printf("Fail rand ualign pad1 start\n");
+ return -1;
+ }
+ if (memcmp
+ (dest_ptrs[i] + offset + size, dest_ref[0],
+ PTR_ALIGN_CHK_B - offset)) {
+ printf("Fail rand ualign pad1 end\n");
+ return -1;
+ }
+ }
+
+ putchar('.');
+ }
+
+ // Test all size alignment
+ align = (LEN_ALIGN_CHK_B != 0) ? 1 : ALIGN_SIZE;
+
+ for (size = TEST_LEN; size >= TEST_MIN_SIZE; size -= align) {
+ for (i = 0; i < TEST_SOURCES; i++)
+ for (j = 0; j < size; j++)
+ buffs[i][j] = rand();
+
+ for (i = 0; i < vector; i++) {
+ for (j = 0; j < TEST_SOURCES; j++) {
+ gf[i][j] = rand();
+ gf_vect_mul_init(gf[i][j],
+ &g_tbls[i * (32 * TEST_SOURCES) + j * 32]);
+ }
+ memset(dest_ptrs[i], 0, TEST_LEN); // zero pad to check write-over
+ }
+
+ for (i = 0; i < vector; i++)
+ gf_vect_dot_prod_base(size, TEST_SOURCES,
+ &g_tbls[i * 32 * TEST_SOURCES], buffs,
+ dest_ref[i]);
+
+ for (i = 0; i < TEST_SOURCES; i++) {
+#if (VECT == 1)
+ FUNCTION_UNDER_TEST(size, TEST_SOURCES, i, g_tbls, buffs[i],
+ *dest_ptrs);
+#else
+ FUNCTION_UNDER_TEST(size, TEST_SOURCES, i, g_tbls, buffs[i],
+ dest_ptrs);
+#endif
+ }
+ for (i = 0; i < vector; i++) {
+ if (0 != memcmp(dest_ref[i], dest_ptrs[i], size)) {
+ printf("Fail rand " xstr(FUNCTION_UNDER_TEST)
+ " test%d ualign len=%d\n", i, size);
+ dump_matrix(buffs, vector, TEST_SOURCES);
+ printf("dprod_base:");
+ dump(dest_ref[i], 25);
+ printf("dprod_dut:");
+ dump(dest_ptrs[i], 25);
+ return -1;
+ }
+ }
+
+ putchar('.');
+
+ }
+
+ printf("Pass\n");
+ return 0;
+
+}
diff --git a/src/isa-l/erasure_code/gf_vect_mul_avx.asm b/src/isa-l/erasure_code/gf_vect_mul_avx.asm
new file mode 100644
index 000000000..91f6d6d3f
--- /dev/null
+++ b/src/isa-l/erasure_code/gf_vect_mul_avx.asm
@@ -0,0 +1,164 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright(c) 2011-2015 Intel Corporation All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in
+; the documentation and/or other materials provided with the
+; distribution.
+; * Neither the name of Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived
+; from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;;;
+;;; gf_vect_mul_avx(len, mul_array, src, dest)
+;;;
+
+%include "reg_sizes.asm"
+
+%ifidn __OUTPUT_FORMAT__, elf64
+ %define arg0 rdi
+ %define arg1 rsi
+ %define arg2 rdx
+ %define arg3 rcx
+ %define arg4 r8
+ %define arg5 r9
+ %define tmp r11
+ %define return rax
+ %define func(x) x: endbranch
+ %define FUNC_SAVE
+ %define FUNC_RESTORE
+
+%elifidn __OUTPUT_FORMAT__, win64
+ %define arg0 rcx
+ %define arg1 rdx
+ %define arg2 r8
+ %define arg3 r9
+ %define return rax
+ %define stack_size 5*16 + 8 ; must be an odd multiple of 8
+ %define func(x) proc_frame x
+ %macro FUNC_SAVE 0
+ alloc_stack stack_size
+ vmovdqa [rsp + 0*16], xmm6
+ vmovdqa [rsp + 1*16], xmm7
+ vmovdqa [rsp + 2*16], xmm13
+ vmovdqa [rsp + 3*16], xmm14
+ vmovdqa [rsp + 4*16], xmm15
+ end_prolog
+ %endmacro
+
+ %macro FUNC_RESTORE 0
+ vmovdqa xmm6, [rsp + 0*16]
+ vmovdqa xmm7, [rsp + 1*16]
+ vmovdqa xmm13, [rsp + 2*16]
+ vmovdqa xmm14, [rsp + 3*16]
+ vmovdqa xmm15, [rsp + 4*16]
+ add rsp, stack_size
+ %endmacro
+
+%endif
+
+
+%define len arg0
+%define mul_array arg1
+%define src arg2
+%define dest arg3
+%define pos return
+
+
+;;; Use Non-temporal load/stor
+%ifdef NO_NT_LDST
+ %define XLDR vmovdqa
+ %define XSTR vmovdqa
+%else
+ %define XLDR vmovntdqa
+ %define XSTR vmovntdq
+%endif
+
+default rel
+
+[bits 64]
+section .text
+
+%define xmask0f xmm15
+%define xgft_lo xmm14
+%define xgft_hi xmm13
+
+%define x0 xmm0
+%define xtmp1a xmm1
+%define xtmp1b xmm2
+%define xtmp1c xmm3
+%define x1 xmm4
+%define xtmp2a xmm5
+%define xtmp2b xmm6
+%define xtmp2c xmm7
+
+align 16
+mk_global gf_vect_mul_avx, function
+func(gf_vect_mul_avx)
+ FUNC_SAVE
+ mov pos, 0
+ vmovdqa xmask0f, [mask0f] ;Load mask of lower nibble in each byte
+ vmovdqu xgft_lo, [mul_array] ;Load array Cx{00}, Cx{01}, Cx{02}, ...
+ vmovdqu xgft_hi, [mul_array+16] ; " Cx{00}, Cx{10}, Cx{20}, ... , Cx{f0}
+
+loop32:
+ XLDR x0, [src+pos] ;Get next source vector
+ XLDR x1, [src+pos+16] ;Get next source vector + 16B ahead
+ add pos, 32 ;Loop on 16 bytes at a time
+ cmp pos, len
+ vpand xtmp1a, x0, xmask0f ;Mask low src nibble in bits 4-0
+ vpand xtmp2a, x1, xmask0f
+ vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0
+ vpsraw x1, x1, 4
+ vpand x0, x0, xmask0f ;Mask high src nibble in bits 4-0
+ vpand x1, x1, xmask0f
+ vpshufb xtmp1b, xgft_hi, x0 ;Lookup mul table of high nibble
+ vpshufb xtmp1c, xgft_lo, xtmp1a ;Lookup mul table of low nibble
+ vpshufb xtmp2b, xgft_hi, x1 ;Lookup mul table of high nibble
+ vpshufb xtmp2c, xgft_lo, xtmp2a ;Lookup mul table of low nibble
+ vpxor xtmp1b, xtmp1b, xtmp1c ;GF add high and low partials
+ vpxor xtmp2b, xtmp2b, xtmp2c
+ XSTR [dest+pos-32], xtmp1b ;Store result
+ XSTR [dest+pos-16], xtmp2b ;Store +16B result
+ jl loop32
+
+
+return_pass:
+ FUNC_RESTORE
+ sub pos, len
+ ret
+
+return_fail:
+ FUNC_RESTORE
+ mov return, 1
+ ret
+
+endproc_frame
+
+section .data
+
+align 16
+
+mask0f:
+dq 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f
+
+;;; func core, ver, snum
+slversion gf_vect_mul_avx, 01, 03, 0036
diff --git a/src/isa-l/erasure_code/gf_vect_mul_base_test.c b/src/isa-l/erasure_code/gf_vect_mul_base_test.c
new file mode 100644
index 000000000..c47d2365d
--- /dev/null
+++ b/src/isa-l/erasure_code/gf_vect_mul_base_test.c
@@ -0,0 +1,129 @@
+/**********************************************************************
+ Copyright(c) 2011-2015 Intel Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h> // for memset
+#include "erasure_code.h"
+
+#define TEST_SIZE 8192
+#define TEST_MEM TEST_SIZE
+#define TEST_LOOPS 100000
+#define TEST_TYPE_STR ""
+
+typedef unsigned char u8;
+
+int main(int argc, char *argv[])
+{
+ int i;
+ u8 *buff1, *buff2, *buff3, gf_const_tbl[64], a = 2;
+ int align, size;
+ unsigned char *efence_buff1;
+ unsigned char *efence_buff2;
+
+ printf("gf_vect_mul_base_test:\n");
+
+ gf_vect_mul_init(a, gf_const_tbl);
+
+ buff1 = (u8 *) malloc(TEST_SIZE);
+ buff2 = (u8 *) malloc(TEST_SIZE);
+ buff3 = (u8 *) malloc(TEST_SIZE);
+
+ if (NULL == buff1 || NULL == buff2 || NULL == buff3) {
+ printf("buffer alloc error\n");
+ return -1;
+ }
+ // Fill with rand data
+ for (i = 0; i < TEST_SIZE; i++)
+ buff1[i] = rand();
+
+ gf_vect_mul_base(TEST_SIZE, gf_const_tbl, buff1, buff2);
+
+ for (i = 0; i < TEST_SIZE; i++)
+ if (gf_mul(a, buff1[i]) != buff2[i]) {
+ printf("fail at %d, 0x%x x 2 = 0x%x (0x%x)\n", i, buff1[i], buff2[i],
+ gf_mul(2, buff1[i]));
+ return 1;
+ }
+
+ gf_vect_mul_base(TEST_SIZE, gf_const_tbl, buff1, buff3);
+
+ // Check reference function
+ for (i = 0; i < TEST_SIZE; i++)
+ if (buff2[i] != buff3[i]) {
+ printf("fail at %d, 0x%x x 0x%d = 0x%x (0x%x)\n",
+ i, a, buff1[i], buff2[i], gf_mul(a, buff1[i]));
+ return 1;
+ }
+
+ for (i = 0; i < TEST_SIZE; i++)
+ buff1[i] = rand();
+
+ // Check each possible constant
+ printf("Random tests ");
+ for (a = 0; a != 255; a++) {
+ gf_vect_mul_init(a, gf_const_tbl);
+ gf_vect_mul_base(TEST_SIZE, gf_const_tbl, buff1, buff2);
+
+ for (i = 0; i < TEST_SIZE; i++)
+ if (gf_mul(a, buff1[i]) != buff2[i]) {
+ printf("fail at %d, 0x%x x %d = 0x%x (0x%x)\n",
+ i, a, buff1[i], buff2[i], gf_mul(2, buff1[i]));
+ return 1;
+ }
+ putchar('.');
+ }
+
+ // Run tests at end of buffer for Electric Fence
+ align = 32;
+ a = 2;
+
+ gf_vect_mul_init(a, gf_const_tbl);
+ for (size = 0; size < TEST_SIZE; size += align) {
+ // Line up TEST_SIZE from end
+ efence_buff1 = buff1 + size;
+ efence_buff2 = buff2 + size;
+
+ gf_vect_mul_base(TEST_SIZE - size, gf_const_tbl, efence_buff1, efence_buff2);
+
+ for (i = 0; i < TEST_SIZE - size; i++)
+ if (gf_mul(a, efence_buff1[i]) != efence_buff2[i]) {
+ printf("fail at %d, 0x%x x 2 = 0x%x (0x%x)\n",
+ i, efence_buff1[i], efence_buff2[i], gf_mul(2,
+ efence_buff1
+ [i]));
+ return 1;
+ }
+
+ putchar('.');
+ }
+
+ printf(" done: Pass\n");
+ return 0;
+}
diff --git a/src/isa-l/erasure_code/gf_vect_mul_perf.c b/src/isa-l/erasure_code/gf_vect_mul_perf.c
new file mode 100644
index 000000000..58194cceb
--- /dev/null
+++ b/src/isa-l/erasure_code/gf_vect_mul_perf.c
@@ -0,0 +1,90 @@
+/**********************************************************************
+ Copyright(c) 2011-2015 Intel Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h> // for memset
+#include "erasure_code.h"
+#include "test.h"
+
+//#define CACHED_TEST
+#ifdef CACHED_TEST
+// Cached test, loop many times over small dataset
+# define TEST_LEN 8*1024
+# define TEST_TYPE_STR "_warm"
+#else
+# ifndef TEST_CUSTOM
+// Uncached test. Pull from large mem base.
+# define TEST_SOURCES 10
+# define GT_L3_CACHE 32*1024*1024 /* some number > last level cache */
+# define TEST_LEN GT_L3_CACHE / 2
+# define TEST_TYPE_STR "_cold"
+# else
+# define TEST_TYPE_STR "_cus"
+# endif
+#endif
+
+#define TEST_MEM (2 * TEST_LEN)
+
+typedef unsigned char u8;
+
+void gf_vect_mul_perf(u8 a, u8 * gf_const_tbl, u8 * buff1, u8 * buff2)
+{
+ gf_vect_mul_init(a, gf_const_tbl);
+ gf_vect_mul(TEST_LEN, gf_const_tbl, buff1, buff2);
+}
+
+int main(int argc, char *argv[])
+{
+ u8 *buff1, *buff2, gf_const_tbl[64], a = 2;
+ struct perf start;
+
+ printf("gf_vect_mul_perf:\n");
+
+ // Allocate large mem region
+ buff1 = (u8 *) malloc(TEST_LEN);
+ buff2 = (u8 *) malloc(TEST_LEN);
+ if (NULL == buff1 || NULL == buff2) {
+ printf("Failed to allocate %dB\n", TEST_LEN);
+ return 1;
+ }
+
+ memset(buff1, 0, TEST_LEN);
+ memset(buff2, 0, TEST_LEN);
+
+ printf("Start timed tests\n");
+ fflush(0);
+
+ BENCHMARK(&start, BENCHMARK_TIME, gf_vect_mul_perf(a, gf_const_tbl, buff1, buff2));
+
+ printf("gf_vect_mul" TEST_TYPE_STR ": ");
+ perf_print(start, (long long)TEST_LEN);
+
+ return 0;
+}
diff --git a/src/isa-l/erasure_code/gf_vect_mul_sse.asm b/src/isa-l/erasure_code/gf_vect_mul_sse.asm
new file mode 100644
index 000000000..fefe7effd
--- /dev/null
+++ b/src/isa-l/erasure_code/gf_vect_mul_sse.asm
@@ -0,0 +1,170 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright(c) 2011-2015 Intel Corporation All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in
+; the documentation and/or other materials provided with the
+; distribution.
+; * Neither the name of Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived
+; from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;;;
+;;; gf_vect_mul_sse(len, mul_array, src, dest)
+;;;
+
+%include "reg_sizes.asm"
+
+%ifidn __OUTPUT_FORMAT__, elf64
+ %define arg0 rdi
+ %define arg1 rsi
+ %define arg2 rdx
+ %define arg3 rcx
+ %define arg4 r8
+ %define arg5 r9
+ %define tmp r11
+ %define return rax
+ %define func(x) x: endbranch
+ %define FUNC_SAVE
+ %define FUNC_RESTORE
+
+%elifidn __OUTPUT_FORMAT__, win64
+ %define arg0 rcx
+ %define arg1 rdx
+ %define arg2 r8
+ %define arg3 r9
+ %define return rax
+ %define stack_size 5*16 + 8 ; must be an odd multiple of 8
+ %define func(x) proc_frame x
+ %macro FUNC_SAVE 0
+ alloc_stack stack_size
+ save_xmm128 xmm6, 0*16
+ save_xmm128 xmm7, 1*16
+ save_xmm128 xmm13, 2*16
+ save_xmm128 xmm14, 3*16
+ save_xmm128 xmm15, 4*16
+ end_prolog
+ %endmacro
+
+ %macro FUNC_RESTORE 0
+ movdqa xmm6, [rsp + 0*16]
+ movdqa xmm7, [rsp + 1*16]
+ movdqa xmm13, [rsp + 2*16]
+ movdqa xmm14, [rsp + 3*16]
+ movdqa xmm15, [rsp + 4*16]
+ add rsp, stack_size
+ %endmacro
+
+%endif
+
+
+%define len arg0
+%define mul_array arg1
+%define src arg2
+%define dest arg3
+%define pos return
+
+
+;;; Use Non-temporal load/stor
+%ifdef NO_NT_LDST
+ %define XLDR movdqa
+ %define XSTR movdqa
+%else
+ %define XLDR movntdqa
+ %define XSTR movntdq
+%endif
+
+default rel
+
+[bits 64]
+section .text
+
+%define xmask0f xmm15
+%define xgft_lo xmm14
+%define xgft_hi xmm13
+
+%define x0 xmm0
+%define xtmp1a xmm1
+%define xtmp1b xmm2
+%define xtmp1c xmm3
+%define x1 xmm4
+%define xtmp2a xmm5
+%define xtmp2b xmm6
+%define xtmp2c xmm7
+
+
+align 16
+mk_global gf_vect_mul_sse, function
+func(gf_vect_mul_sse)
+ FUNC_SAVE
+ mov pos, 0
+ movdqa xmask0f, [mask0f] ;Load mask of lower nibble in each byte
+ movdqu xgft_lo, [mul_array] ;Load array Cx{00}, Cx{01}, Cx{02}, ...
+ movdqu xgft_hi, [mul_array+16] ; " Cx{00}, Cx{10}, Cx{20}, ... , Cx{f0}
+
+loop32:
+ XLDR x0, [src+pos] ;Get next source vector
+ XLDR x1, [src+pos+16] ;Get next source vector + 16B ahead
+ movdqa xtmp1b, xgft_hi ;Reload const array registers
+ movdqa xtmp1c, xgft_lo
+ movdqa xtmp2b, xgft_hi
+ movdqa xtmp2c, xgft_lo
+ movdqa xtmp1a, x0 ;Keep unshifted copy of src
+ movdqa xtmp2a, x1
+ psraw x0, 4 ;Shift to put high nibble into bits 4-0
+ psraw x1, 4
+ pand xtmp1a, xmask0f ;Mask low src nibble in bits 4-0
+ pand xtmp2a, xmask0f
+ pand x0, xmask0f ;Mask high src nibble in bits 4-0
+ pand x1, xmask0f
+ pshufb xtmp1b, x0 ;Lookup mul table of high nibble
+ pshufb xtmp1c, xtmp1a ;Lookup mul table of low nibble
+ pshufb xtmp2b, x1
+ pshufb xtmp2c, xtmp2a
+ pxor xtmp1b, xtmp1c ;GF add high and low partials
+ pxor xtmp2b, xtmp2c
+ XSTR [dest+pos], xtmp1b ;Store result
+ XSTR [dest+pos+16], xtmp2b ;Store +16B result
+ add pos, 32 ;Loop on 32 bytes at at time
+ cmp pos, len
+ jl loop32
+
+
+return_pass:
+ sub pos, len
+ FUNC_RESTORE
+ ret
+
+return_fail:
+ mov return, 1
+ FUNC_RESTORE
+ ret
+
+endproc_frame
+
+section .data
+
+align 16
+mask0f:
+dq 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f
+
+;;; func core, ver, snum
+slversion gf_vect_mul_sse, 00, 03, 0034
diff --git a/src/isa-l/erasure_code/gf_vect_mul_test.c b/src/isa-l/erasure_code/gf_vect_mul_test.c
new file mode 100644
index 000000000..b1a406624
--- /dev/null
+++ b/src/isa-l/erasure_code/gf_vect_mul_test.c
@@ -0,0 +1,158 @@
+/**********************************************************************
+ Copyright(c) 2011-2015 Intel Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "erasure_code.h"
+
+#define TEST_SIZE (128*1024)
+
+typedef unsigned char u8;
+
+int main(int argc, char *argv[])
+{
+ int i;
+ u8 *buff1, *buff2, *buff3, gf_const_tbl[64], a = 2;
+ int tsize;
+ int align, size;
+ unsigned char *efence_buff1;
+ unsigned char *efence_buff2;
+ unsigned char *efence_buff3;
+
+ printf("gf_vect_mul_test: ");
+
+ gf_vect_mul_init(a, gf_const_tbl);
+
+ buff1 = (u8 *) malloc(TEST_SIZE);
+ buff2 = (u8 *) malloc(TEST_SIZE);
+ buff3 = (u8 *) malloc(TEST_SIZE);
+
+ if (NULL == buff1 || NULL == buff2 || NULL == buff3) {
+ printf("buffer alloc error\n");
+ return -1;
+ }
+ // Fill with rand data
+ for (i = 0; i < TEST_SIZE; i++)
+ buff1[i] = rand();
+
+ gf_vect_mul(TEST_SIZE, gf_const_tbl, buff1, buff2);
+
+ for (i = 0; i < TEST_SIZE; i++) {
+ if (gf_mul(a, buff1[i]) != buff2[i]) {
+ printf("fail at %d, 0x%x x 2 = 0x%x (0x%x)\n", i,
+ buff1[i], buff2[i], gf_mul(2, buff1[i]));
+ return -1;
+ }
+ }
+
+ gf_vect_mul_base(TEST_SIZE, gf_const_tbl, buff1, buff3);
+
+ // Check reference function
+ for (i = 0; i < TEST_SIZE; i++) {
+ if (buff2[i] != buff3[i]) {
+ printf("fail at %d, 0x%x x 0x%d = 0x%x (0x%x)\n",
+ i, a, buff1[i], buff2[i], gf_mul(a, buff1[i]));
+ return -1;
+ }
+ }
+
+ for (i = 0; i < TEST_SIZE; i++)
+ buff1[i] = rand();
+
+ // Check each possible constant
+ for (a = 0; a != 255; a++) {
+ gf_vect_mul_init(a, gf_const_tbl);
+ gf_vect_mul(TEST_SIZE, gf_const_tbl, buff1, buff2);
+
+ for (i = 0; i < TEST_SIZE; i++)
+ if (gf_mul(a, buff1[i]) != buff2[i]) {
+ printf("fail at %d, 0x%x x %d = 0x%x (0x%x)\n",
+ i, a, buff1[i], buff2[i], gf_mul(2, buff1[i]));
+ return -1;
+ }
+ putchar('.');
+ }
+
+ // Check buffer len
+ for (tsize = TEST_SIZE; tsize > 0; tsize -= 32) {
+ a = rand();
+ gf_vect_mul_init(a, gf_const_tbl);
+ gf_vect_mul(tsize, gf_const_tbl, buff1, buff2);
+
+ for (i = 0; i < tsize; i++)
+ if (gf_mul(a, buff1[i]) != buff2[i]) {
+ printf("fail at %d, 0x%x x %d = 0x%x (0x%x)\n",
+ i, a, buff1[i], buff2[i], gf_mul(2, buff1[i]));
+ return -1;
+ }
+ if (0 == tsize % (32 * 8)) {
+ putchar('.');
+ fflush(0);
+ }
+ }
+
+ // Run tests at end of buffer for Electric Fence
+ align = 32;
+ a = 2;
+
+ gf_vect_mul_init(a, gf_const_tbl);
+ for (size = 0; size < TEST_SIZE; size += align) {
+ // Line up TEST_SIZE from end
+ efence_buff1 = buff1 + size;
+ efence_buff2 = buff2 + size;
+ efence_buff3 = buff3 + size;
+
+ gf_vect_mul(TEST_SIZE - size, gf_const_tbl, efence_buff1, efence_buff2);
+
+ for (i = 0; i < TEST_SIZE - size; i++)
+ if (gf_mul(a, efence_buff1[i]) != efence_buff2[i]) {
+ printf("fail at %d, 0x%x x 2 = 0x%x (0x%x)\n",
+ i, efence_buff1[i], efence_buff2[i],
+ gf_mul(2, efence_buff1[i]));
+ return 1;
+ }
+
+ gf_vect_mul_base(TEST_SIZE - size, gf_const_tbl, efence_buff1, efence_buff3);
+
+ // Check reference function
+ for (i = 0; i < TEST_SIZE - size; i++)
+ if (efence_buff2[i] != efence_buff3[i]) {
+ printf("fail at %d, 0x%x x 0x%d = 0x%x (0x%x)\n",
+ i, a, efence_buff2[i], efence_buff3[i],
+ gf_mul(2, efence_buff1[i]));
+ return 1;
+ }
+
+ putchar('.');
+ }
+
+ printf(" done: Pass\n");
+ fflush(0);
+ return 0;
+}
diff --git a/src/isa-l/erasure_code/ppc64le/Makefile.am b/src/isa-l/erasure_code/ppc64le/Makefile.am
new file mode 100644
index 000000000..9d263ac22
--- /dev/null
+++ b/src/isa-l/erasure_code/ppc64le/Makefile.am
@@ -0,0 +1,15 @@
+lsrc_ppc64le += erasure_code/ppc64le/ec_base_vsx.c \
+ erasure_code/ppc64le/gf_vect_mul_vsx.c \
+ erasure_code/ppc64le/gf_vect_dot_prod_vsx.c \
+ erasure_code/ppc64le/gf_vect_mad_vsx.c \
+ erasure_code/ppc64le/gf_2vect_dot_prod_vsx.c \
+ erasure_code/ppc64le/gf_2vect_mad_vsx.c \
+ erasure_code/ppc64le/gf_3vect_dot_prod_vsx.c \
+ erasure_code/ppc64le/gf_3vect_mad_vsx.c \
+ erasure_code/ppc64le/gf_4vect_dot_prod_vsx.c \
+ erasure_code/ppc64le/gf_4vect_mad_vsx.c \
+ erasure_code/ppc64le/gf_5vect_dot_prod_vsx.c \
+ erasure_code/ppc64le/gf_5vect_mad_vsx.c \
+ erasure_code/ppc64le/gf_6vect_dot_prod_vsx.c \
+ erasure_code/ppc64le/gf_6vect_mad_vsx.c
+
diff --git a/src/isa-l/erasure_code/ppc64le/ec_base_vsx.c b/src/isa-l/erasure_code/ppc64le/ec_base_vsx.c
new file mode 100644
index 000000000..05624f1b6
--- /dev/null
+++ b/src/isa-l/erasure_code/ppc64le/ec_base_vsx.c
@@ -0,0 +1,97 @@
+#include "erasure_code.h"
+#include "ec_base_vsx.h"
+
+void gf_vect_dot_prod(int len, int vlen, unsigned char *v,
+ unsigned char **src, unsigned char *dest)
+{
+ gf_vect_dot_prod_vsx(len, vlen, v, src, dest);
+}
+
+void gf_vect_mad(int len, int vec, int vec_i, unsigned char *v,
+ unsigned char *src, unsigned char *dest)
+{
+ gf_vect_mad_vsx(len, vec, vec_i, v, src, dest);
+
+}
+
+void ec_encode_data(int len, int srcs, int dests, unsigned char *v,
+ unsigned char **src, unsigned char **dest)
+{
+ if (len < 64) {
+ ec_encode_data_base(len, srcs, dests, v, src, dest);
+ return;
+ }
+
+ while (dests >= 6) {
+ gf_6vect_dot_prod_vsx(len, srcs, v, src, dest);
+ v += 6 * srcs * 32;
+ dest += 6;
+ dests -= 6;
+ }
+ switch (dests) {
+ case 6:
+ gf_6vect_dot_prod_vsx(len, srcs, v, src, dest);
+ break;
+ case 5:
+ gf_5vect_dot_prod_vsx(len, srcs, v, src, dest);
+ break;
+ case 4:
+ gf_4vect_dot_prod_vsx(len, srcs, v, src, dest);
+ break;
+ case 3:
+ gf_3vect_dot_prod_vsx(len, srcs, v, src, dest);
+ break;
+ case 2:
+ gf_2vect_dot_prod_vsx(len, srcs, v, src, dest);
+ break;
+ case 1:
+ gf_vect_dot_prod_vsx(len, srcs, v, src, *dest);
+ break;
+ case 0:
+ break;
+ }
+}
+
+void ec_encode_data_update(int len, int k, int rows, int vec_i, unsigned char *v,
+ unsigned char *data, unsigned char **dest)
+{
+ if (len < 64) {
+ ec_encode_data_update_base(len, k, rows, vec_i, v, data, dest);
+ return;
+ }
+
+ while (rows >= 6) {
+ gf_6vect_mad_vsx(len, k, vec_i, v, data, dest);
+ v += 6 * k * 32;
+ dest += 6;
+ rows -= 6;
+ }
+ switch (rows) {
+ case 6:
+ gf_6vect_mad_vsx(len, k, vec_i, v, data, dest);
+ break;
+ case 5:
+ gf_5vect_mad_vsx(len, k, vec_i, v, data, dest);
+ break;
+ case 4:
+ gf_4vect_mad_vsx(len, k, vec_i, v, data, dest);
+ break;
+ case 3:
+ gf_3vect_mad_vsx(len, k, vec_i, v, data, dest);
+ break;
+ case 2:
+ gf_2vect_mad_vsx(len, k, vec_i, v, data, dest);
+ break;
+ case 1:
+ gf_vect_mad_vsx(len, k, vec_i, v, data, *dest);
+ break;
+ case 0:
+ break;
+ }
+}
+
+int gf_vect_mul(int len, unsigned char *a, void *src, void *dest)
+{
+ gf_vect_mul_vsx(len, a, (unsigned char *)src, (unsigned char *)dest);
+ return 0;
+}
diff --git a/src/isa-l/erasure_code/ppc64le/ec_base_vsx.h b/src/isa-l/erasure_code/ppc64le/ec_base_vsx.h
new file mode 100644
index 000000000..c808629a9
--- /dev/null
+++ b/src/isa-l/erasure_code/ppc64le/ec_base_vsx.h
@@ -0,0 +1,338 @@
+#ifndef _ERASURE_CODE_PPC64LE_H_
+#define _ERASURE_CODE_PPC64LE_H_
+
+#include "erasure_code.h"
+#include <altivec.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#if defined(__ibmxl__)
+#define EC_vec_xl(a, b) vec_xl_be(a, b)
+#define EC_vec_permxor(va, vb, vc) __vpermxor(va, vb, vc)
+#elif defined __GNUC__ && __GNUC__ >= 8
+#define EC_vec_xl(a, b) vec_xl_be(a, b)
+#define EC_vec_permxor(va, vb, vc) __builtin_crypto_vpermxor(va, vb, vc)
+#elif defined __GNUC__ && __GNUC__ >= 7
+#if defined _ARCH_PWR9
+#define EC_vec_xl(a, b) vec_vsx_ld(a, b)
+#define EC_vec_permxor(va, vb, vc) __builtin_crypto_vpermxor(va, vb, vec_nor(vc, vc))
+#else
+inline vector unsigned char EC_vec_xl(int off, unsigned char *ptr) {
+ vector unsigned char vc;
+ __asm__ __volatile__("lxvd2x %x0, %1, %2; xxswapd %x0, %x0" : "=wa" (vc) : "r" (off), "r" (ptr));
+ return vc;
+}
+#define EC_vec_permxor(va, vb, vc) __builtin_crypto_vpermxor(va, vb, vec_nor(vc, vc))
+#endif
+#else
+#if defined _ARCH_PWR8
+inline vector unsigned char EC_vec_xl(int off, unsigned char *ptr) {
+ vector unsigned char vc;
+ __asm__ __volatile__("lxvd2x %x0, %1, %2; xxswapd %x0, %x0" : "=wa" (vc) : "r" (off), "r" (ptr));
+ return vc;
+}
+#define EC_vec_permxor(va, vb, vc) __builtin_crypto_vpermxor(va, vb, vec_nor(vc, vc))
+#else
+#error "This code is only supported on ppc64le."
+#endif
+#endif
+
+/**
+ * @brief GF(2^8) vector multiply. VSX version.
+ *
+ * Does a GF(2^8) multiply across each byte of input source with expanded
+ * constant and save to destination array. Can be used for erasure coding encode
+ * and decode update when only one source is available at a time. Function
+ * requires pre-calculation of a 32 byte constant array based on the input
+ * coefficients.
+ * @requires VSX
+ *
+ * @param len Length of each vector in bytes.
+ * @param gftbls Pointer to array of input tables generated from coding
+ * coefficients in ec_init_tables(). Must be of size 32.
+ * @param src Array of pointers to source inputs.
+ * @param dest Pointer to destination data array.
+ * @returns none
+ */
+
+void gf_vect_mul_vsx(int len, unsigned char *gftbls, unsigned char *src, unsigned char *dest);
+
+/**
+ * @brief GF(2^8) vector dot product. VSX version.
+ *
+ * Does a GF(2^8) dot product across each byte of the input array and a constant
+ * set of coefficients to produce each byte of the output. Can be used for
+ * erasure coding encode and decode. Function requires pre-calculation of a
+ * 32*vlen byte constant array based on the input coefficients.
+ * @requires VSX
+ *
+ * @param len Length of each vector in bytes.
+ * @param vlen Number of vector sources.
+ * @param gftbls Pointer to 32*vlen byte array of pre-calculated constants based
+ * on the array of input coefficients.
+ * @param src Array of pointers to source inputs.
+ * @param dest Pointer to destination data array.
+ * @returns none
+ */
+
+void gf_vect_dot_prod_vsx(int len, int vlen, unsigned char *gftbls,
+ unsigned char **src, unsigned char *dest);
+
+/**
+ * @brief GF(2^8) vector dot product with two outputs. VSX version.
+ *
+ * Vector dot product optimized to calculate two outputs at a time. Does two
+ * GF(2^8) dot products across each byte of the input array and two constant
+ * sets of coefficients to produce each byte of the outputs. Can be used for
+ * erasure coding encode and decode. Function requires pre-calculation of a
+ * 2*32*vlen byte constant array based on the two sets of input coefficients.
+ * @requires VSX
+ *
+ * @param len Length of each vector in bytes.
+ * @param vlen Number of vector sources.
+ * @param gftbls Pointer to 2*32*vlen byte array of pre-calculated constants
+ * based on the array of input coefficients.
+ * @param src Array of pointers to source inputs.
+ * @param dest Array of pointers to destination data buffers.
+ * @returns none
+ */
+
+void gf_2vect_dot_prod_vsx(int len, int vlen, unsigned char *gftbls,
+ unsigned char **src, unsigned char **dest);
+
+/**
+ * @brief GF(2^8) vector dot product with three outputs. VSX version.
+ *
+ * Vector dot product optimized to calculate three outputs at a time. Does three
+ * GF(2^8) dot products across each byte of the input array and three constant
+ * sets of coefficients to produce each byte of the outputs. Can be used for
+ * erasure coding encode and decode. Function requires pre-calculation of a
+ * 3*32*vlen byte constant array based on the three sets of input coefficients.
+ * @requires VSX
+ *
+ * @param len Length of each vector in bytes.
+ * @param vlen Number of vector sources.
+ * @param gftbls Pointer to 3*32*vlen byte array of pre-calculated constants
+ * based on the array of input coefficients.
+ * @param src Array of pointers to source inputs.
+ * @param dest Array of pointers to destination data buffers.
+ * @returns none
+ */
+
+void gf_3vect_dot_prod_vsx(int len, int vlen, unsigned char *gftbls,
+ unsigned char **src, unsigned char **dest);
+
+/**
+ * @brief GF(2^8) vector dot product with four outputs. VSX version.
+ *
+ * Vector dot product optimized to calculate four outputs at a time. Does four
+ * GF(2^8) dot products across each byte of the input array and four constant
+ * sets of coefficients to produce each byte of the outputs. Can be used for
+ * erasure coding encode and decode. Function requires pre-calculation of a
+ * 4*32*vlen byte constant array based on the four sets of input coefficients.
+ * @requires VSX
+ *
+ * @param len Length of each vector in bytes.
+ * @param vlen Number of vector sources.
+ * @param gftbls Pointer to 4*32*vlen byte array of pre-calculated constants
+ * based on the array of input coefficients.
+ * @param src Array of pointers to source inputs.
+ * @param dest Array of pointers to destination data buffers.
+ * @returns none
+ */
+
+void gf_4vect_dot_prod_vsx(int len, int vlen, unsigned char *gftbls,
+ unsigned char **src, unsigned char **dest);
+
+/**
+ * @brief GF(2^8) vector dot product with five outputs. VSX version.
+ *
+ * Vector dot product optimized to calculate five outputs at a time. Does five
+ * GF(2^8) dot products across each byte of the input array and five constant
+ * sets of coefficients to produce each byte of the outputs. Can be used for
+ * erasure coding encode and decode. Function requires pre-calculation of a
+ * 5*32*vlen byte constant array based on the five sets of input coefficients.
+ * @requires VSX
+ *
+ * @param len Length of each vector in bytes. Must >= 16.
+ * @param vlen Number of vector sources.
+ * @param gftbls Pointer to 5*32*vlen byte array of pre-calculated constants
+ * based on the array of input coefficients.
+ * @param src Array of pointers to source inputs.
+ * @param dest Array of pointers to destination data buffers.
+ * @returns none
+ */
+
+void gf_5vect_dot_prod_vsx(int len, int vlen, unsigned char *gftbls,
+ unsigned char **src, unsigned char **dest);
+
+/**
+ * @brief GF(2^8) vector dot product with six outputs. VSX version.
+ *
+ * Vector dot product optimized to calculate six outputs at a time. Does six
+ * GF(2^8) dot products across each byte of the input array and six constant
+ * sets of coefficients to produce each byte of the outputs. Can be used for
+ * erasure coding encode and decode. Function requires pre-calculation of a
+ * 6*32*vlen byte constant array based on the six sets of input coefficients.
+ * @requires VSX
+ *
+ * @param len Length of each vector in bytes.
+ * @param vlen Number of vector sources.
+ * @param gftbls Pointer to 6*32*vlen byte array of pre-calculated constants
+ * based on the array of input coefficients.
+ * @param src Array of pointers to source inputs.
+ * @param dest Array of pointers to destination data buffers.
+ * @returns none
+ */
+
+void gf_6vect_dot_prod_vsx(int len, int vlen, unsigned char *gftbls,
+ unsigned char **src, unsigned char **dest);
+
+/**
+ * @brief GF(2^8) vector multiply accumulate. VSX version.
+ *
+ * Does a GF(2^8) multiply across each byte of input source with expanded
+ * constant and add to destination array. Can be used for erasure coding encode
+ * and decode update when only one source is available at a time. Function
+ * requires pre-calculation of a 32*vec byte constant array based on the input
+ * coefficients.
+ * @requires VSX
+ *
+ * @param len Length of each vector in bytes.
+ * @param vec The number of vector sources or rows in the generator matrix
+ * for coding.
+ * @param vec_i The vector index corresponding to the single input source.
+ * @param gftbls Pointer to array of input tables generated from coding
+ * coefficients in ec_init_tables(). Must be of size 32*vec.
+ * @param src Array of pointers to source inputs.
+ * @param dest Pointer to destination data array.
+ * @returns none
+ */
+
+void gf_vect_mad_vsx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
+ unsigned char *dest);
+/**
+ * @brief GF(2^8) vector multiply with 2 accumulate. VSX version.
+ *
+ * Does a GF(2^8) multiply across each byte of input source with expanded
+ * constants and add to destination arrays. Can be used for erasure coding
+ * encode and decode update when only one source is available at a
+ * time. Function requires pre-calculation of a 32*vec byte constant array based
+ * on the input coefficients.
+ * @requires VSX
+ *
+ * @param len Length of each vector in bytes.
+ * @param vec The number of vector sources or rows in the generator matrix
+ * for coding.
+ * @param vec_i The vector index corresponding to the single input source.
+ * @param gftbls Pointer to array of input tables generated from coding
+ * coefficients in ec_init_tables(). Must be of size 32*vec.
+ * @param src Pointer to source input array.
+ * @param dest Array of pointers to destination input/outputs.
+ * @returns none
+ */
+
+void gf_2vect_mad_vsx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
+ unsigned char **dest);
+
+/**
+ * @brief GF(2^8) vector multiply with 3 accumulate. VSX version.
+ *
+ * Does a GF(2^8) multiply across each byte of input source with expanded
+ * constants and add to destination arrays. Can be used for erasure coding
+ * encode and decode update when only one source is available at a
+ * time. Function requires pre-calculation of a 32*vec byte constant array based
+ * on the input coefficients.
+ * @requires VSX
+ *
+ * @param len Length of each vector in bytes.
+ * @param vec The number of vector sources or rows in the generator matrix
+ * for coding.
+ * @param vec_i The vector index corresponding to the single input source.
+ * @param gftbls Pointer to array of input tables generated from coding
+ * coefficients in ec_init_tables(). Must be of size 32*vec.
+ * @param src Pointer to source input array.
+ * @param dest Array of pointers to destination input/outputs.
+ * @returns none
+ */
+
+void gf_3vect_mad_vsx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
+ unsigned char **dest);
+
+/**
+ * @brief GF(2^8) vector multiply with 4 accumulate. VSX version.
+ *
+ * Does a GF(2^8) multiply across each byte of input source with expanded
+ * constants and add to destination arrays. Can be used for erasure coding
+ * encode and decode update when only one source is available at a
+ * time. Function requires pre-calculation of a 32*vec byte constant array based
+ * on the input coefficients.
+ * @requires VSX
+ *
+ * @param len Length of each vector in bytes.
+ * @param vec The number of vector sources or rows in the generator matrix
+ * for coding.
+ * @param vec_i The vector index corresponding to the single input source.
+ * @param gftbls Pointer to array of input tables generated from coding
+ * coefficients in ec_init_tables(). Must be of size 32*vec.
+ * @param src Pointer to source input array.
+ * @param dest Array of pointers to destination input/outputs.
+ * @returns none
+ */
+
+void gf_4vect_mad_vsx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
+ unsigned char **dest);
+
+/**
+ * @brief GF(2^8) vector multiply with 5 accumulate. VSX version.
+ *
+ * Does a GF(2^8) multiply across each byte of input source with expanded
+ * constants and add to destination arrays. Can be used for erasure coding
+ * encode and decode update when only one source is available at a
+ * time. Function requires pre-calculation of a 32*vec byte constant array based
+ * on the input coefficients.
+ * @requires VSX
+ *
+ * @param len Length of each vector in bytes.
+ * @param vec The number of vector sources or rows in the generator matrix
+ * for coding.
+ * @param vec_i The vector index corresponding to the single input source.
+ * @param gftbls Pointer to array of input tables generated from coding
+ * coefficients in ec_init_tables(). Must be of size 32*vec.
+ * @param src Pointer to source input array.
+ * @param dest Array of pointers to destination input/outputs.
+ * @returns none
+ */
+void gf_5vect_mad_vsx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
+ unsigned char **dest);
+
+/**
+ * @brief GF(2^8) vector multiply with 6 accumulate. VSX version.
+ *
+ * Does a GF(2^8) multiply across each byte of input source with expanded
+ * constants and add to destination arrays. Can be used for erasure coding
+ * encode and decode update when only one source is available at a
+ * time. Function requires pre-calculation of a 32*vec byte constant array based
+ * on the input coefficients.
+ * @requires VSX
+ *
+ * @param len Length of each vector in bytes.
+ * @param vec The number of vector sources or rows in the generator matrix
+ * for coding.
+ * @param vec_i The vector index corresponding to the single input source.
+ * @param gftbls Pointer to array of input tables generated from coding
+ * coefficients in ec_init_tables(). Must be of size 32*vec.
+ * @param src Pointer to source input array.
+ * @param dest Array of pointers to destination input/outputs.
+ * @returns none
+ */
+void gf_6vect_mad_vsx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
+ unsigned char **dest);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif //_ERASURE_CODE_PPC64LE_H_
diff --git a/src/isa-l/erasure_code/ppc64le/gf_2vect_dot_prod_vsx.c b/src/isa-l/erasure_code/ppc64le/gf_2vect_dot_prod_vsx.c
new file mode 100644
index 000000000..3cb269cce
--- /dev/null
+++ b/src/isa-l/erasure_code/ppc64le/gf_2vect_dot_prod_vsx.c
@@ -0,0 +1,83 @@
+#include "ec_base_vsx.h"
+
+void gf_2vect_dot_prod_vsx(int len, int vlen, unsigned char *gftbls,
+ unsigned char **src, unsigned char **dest)
+{
+ unsigned char *s, *t0, *t1;
+ vector unsigned char vX1, vX2, vX3, vX4;
+ vector unsigned char vY1, vY2, vY3, vY4;
+ vector unsigned char vYD, vYE, vYF, vYG;
+ vector unsigned char vhi0, vlo0, vhi1, vlo1;
+ int i, j, head;
+
+ if (vlen < 128) {
+ gf_vect_mul_vsx(len, &gftbls[0 * 32 * vlen], src[0], (unsigned char *)dest[0]);
+ gf_vect_mul_vsx(len, &gftbls[1 * 32 * vlen], src[0], (unsigned char *)dest[1]);
+
+ for (j = 1; j < vlen; j++) {
+ gf_2vect_mad_vsx(len, vlen, j, gftbls, src[j], dest);
+ }
+ return;
+ }
+
+ t0 = (unsigned char *)dest[0];
+ t1 = (unsigned char *)dest[1];
+
+ head = len % 64;
+ if (head != 0) {
+ gf_vect_dot_prod_base(head, vlen, &gftbls[0 * 32 * vlen], src, t0);
+ gf_vect_dot_prod_base(head, vlen, &gftbls[1 * 32 * vlen], src, t1);
+ }
+
+ for (i = head; i < len - 63; i += 64) {
+ vY1 = vY1 ^ vY1;
+ vY2 = vY2 ^ vY2;
+ vY3 = vY3 ^ vY3;
+ vY4 = vY4 ^ vY4;
+
+ vYD = vYD ^ vYD;
+ vYE = vYE ^ vYE;
+ vYF = vYF ^ vYF;
+ vYG = vYG ^ vYG;
+
+ unsigned char *g0 = &gftbls[0 * 32 * vlen];
+ unsigned char *g1 = &gftbls[1 * 32 * vlen];
+
+ for (j = 0; j < vlen; j++) {
+ s = (unsigned char *)src[j];
+ vX1 = vec_xl(0, s + i);
+ vX2 = vec_xl(16, s + i);
+ vX3 = vec_xl(32, s + i);
+ vX4 = vec_xl(48, s + i);
+
+ vlo0 = EC_vec_xl(0, g0);
+ vhi0 = EC_vec_xl(16, g0);
+ vlo1 = EC_vec_xl(0, g1);
+ vhi1 = EC_vec_xl(16, g1);
+
+ vY1 = vY1 ^ EC_vec_permxor(vhi0, vlo0, vX1);
+ vY2 = vY2 ^ EC_vec_permxor(vhi0, vlo0, vX2);
+ vYD = vYD ^ EC_vec_permxor(vhi0, vlo0, vX3);
+ vYE = vYE ^ EC_vec_permxor(vhi0, vlo0, vX4);
+
+ vY3 = vY3 ^ EC_vec_permxor(vhi1, vlo1, vX1);
+ vY4 = vY4 ^ EC_vec_permxor(vhi1, vlo1, vX2);
+ vYF = vYF ^ EC_vec_permxor(vhi1, vlo1, vX3);
+ vYG = vYG ^ EC_vec_permxor(vhi1, vlo1, vX4);
+
+ g0 += 32;
+ g1 += 32;
+ }
+
+ vec_xst(vY1, 0, t0 + i);
+ vec_xst(vY2, 16, t0 + i);
+ vec_xst(vY3, 0, t1 + i);
+ vec_xst(vY4, 16, t1 + i);
+
+ vec_xst(vYD, 32, t0 + i);
+ vec_xst(vYE, 48, t0 + i);
+ vec_xst(vYF, 32, t1 + i);
+ vec_xst(vYG, 48, t1 + i);
+ }
+ return;
+}
diff --git a/src/isa-l/erasure_code/ppc64le/gf_2vect_mad_vsx.c b/src/isa-l/erasure_code/ppc64le/gf_2vect_mad_vsx.c
new file mode 100644
index 000000000..621684a5f
--- /dev/null
+++ b/src/isa-l/erasure_code/ppc64le/gf_2vect_mad_vsx.c
@@ -0,0 +1,65 @@
+#include "ec_base_vsx.h"
+
+void gf_2vect_mad_vsx(int len, int vec, int vec_i, unsigned char *gftbls,
+ unsigned char *src, unsigned char **dest)
+{
+ unsigned char *s, *t0, *t1;
+ vector unsigned char vX1, vX2, vX3, vX4;
+ vector unsigned char vY1, vY2, vY3, vY4;
+ vector unsigned char vYD, vYE, vYF, vYG;
+ vector unsigned char vhi0, vlo0, vhi1, vlo1;
+ int i, head;
+
+ s = (unsigned char *)src;
+ t0 = (unsigned char *)dest[0];
+ t1 = (unsigned char *)dest[1];
+
+ head = len % 64;
+ if (head != 0) {
+ gf_vect_mad_base(head, vec, vec_i, &gftbls[0 * 32 * vec], src, t0);
+ gf_vect_mad_base(head, vec, vec_i, &gftbls[1 * 32 * vec], src, t1);
+ }
+
+ vlo0 = EC_vec_xl(0, gftbls + (((0 * vec) << 5) + (vec_i << 5)));
+ vhi0 = EC_vec_xl(16, gftbls + (((0 * vec) << 5) + (vec_i << 5)));
+ vlo1 = EC_vec_xl(0, gftbls + (((1 * vec) << 5) + (vec_i << 5)));
+ vhi1 = EC_vec_xl(16, gftbls + (((1 * vec) << 5) + (vec_i << 5)));
+
+ for (i = head; i < len - 63; i += 64) {
+ vX1 = vec_xl(0, s + i);
+ vX2 = vec_xl(16, s + i);
+ vX3 = vec_xl(32, s + i);
+ vX4 = vec_xl(48, s + i);
+
+ vY1 = vec_xl(0, t0 + i);
+ vY2 = vec_xl(16, t0 + i);
+ vYD = vec_xl(32, t0 + i);
+ vYE = vec_xl(48, t0 + i);
+
+ vY1 = vY1 ^ EC_vec_permxor(vhi0, vlo0, vX1);
+ vY2 = vY2 ^ EC_vec_permxor(vhi0, vlo0, vX2);
+ vYD = vYD ^ EC_vec_permxor(vhi0, vlo0, vX3);
+ vYE = vYE ^ EC_vec_permxor(vhi0, vlo0, vX4);
+
+ vY3 = vec_xl(0, t1 + i);
+ vY4 = vec_xl(16, t1 + i);
+ vYF = vec_xl(32, t1 + i);
+ vYG = vec_xl(48, t1 + i);
+
+ vec_xst(vY1, 0, t0 + i);
+ vec_xst(vY2, 16, t0 + i);
+ vec_xst(vYD, 32, t0 + i);
+ vec_xst(vYE, 48, t0 + i);
+
+ vY3 = vY3 ^ EC_vec_permxor(vhi1, vlo1, vX1);
+ vY4 = vY4 ^ EC_vec_permxor(vhi1, vlo1, vX2);
+ vYF = vYF ^ EC_vec_permxor(vhi1, vlo1, vX3);
+ vYG = vYG ^ EC_vec_permxor(vhi1, vlo1, vX4);
+
+ vec_xst(vY3, 0, t1 + i);
+ vec_xst(vY4, 16, t1 + i);
+ vec_xst(vYF, 32, t1 + i);
+ vec_xst(vYG, 48, t1 + i);
+ }
+ return;
+}
diff --git a/src/isa-l/erasure_code/ppc64le/gf_3vect_dot_prod_vsx.c b/src/isa-l/erasure_code/ppc64le/gf_3vect_dot_prod_vsx.c
new file mode 100644
index 000000000..23b72dc4b
--- /dev/null
+++ b/src/isa-l/erasure_code/ppc64le/gf_3vect_dot_prod_vsx.c
@@ -0,0 +1,104 @@
+#include "ec_base_vsx.h"
+
+void gf_3vect_dot_prod_vsx(int len, int vlen, unsigned char *gftbls,
+ unsigned char **src, unsigned char **dest)
+{
+ unsigned char *s, *t0, *t1, *t2;
+ vector unsigned char vX1, vX2, vX3, vX4;
+ vector unsigned char vY1, vY2, vY3, vY4, vY5, vY6;
+ vector unsigned char vYD, vYE, vYF, vYG, vYH, vYI;
+ vector unsigned char vhi0, vlo0, vhi1, vlo1, vhi2, vlo2;
+ int i, j, head;
+
+ if (vlen < 128) {
+ gf_vect_mul_vsx(len, &gftbls[0 * 32 * vlen], src[0], (unsigned char *)dest[0]);
+ gf_vect_mul_vsx(len, &gftbls[1 * 32 * vlen], src[0], (unsigned char *)dest[1]);
+ gf_vect_mul_vsx(len, &gftbls[2 * 32 * vlen], src[0], (unsigned char *)dest[2]);
+
+ for (j = 1; j < vlen; j++) {
+ gf_3vect_mad_vsx(len, vlen, j, gftbls, src[j], dest);
+ }
+ return;
+ }
+
+ t0 = (unsigned char *)dest[0];
+ t1 = (unsigned char *)dest[1];
+ t2 = (unsigned char *)dest[2];
+
+ head = len % 64;
+ if (head != 0) {
+ gf_vect_dot_prod_base(head, vlen, &gftbls[0 * 32 * vlen], src, t0);
+ gf_vect_dot_prod_base(head, vlen, &gftbls[1 * 32 * vlen], src, t1);
+ gf_vect_dot_prod_base(head, vlen, &gftbls[2 * 32 * vlen], src, t2);
+ }
+
+ for (i = head; i < len - 63; i += 64) {
+ vY1 = vY1 ^ vY1;
+ vY2 = vY2 ^ vY2;
+ vY3 = vY3 ^ vY3;
+ vY4 = vY4 ^ vY4;
+ vY5 = vY5 ^ vY5;
+ vY6 = vY6 ^ vY6;
+
+ vYD = vYD ^ vYD;
+ vYE = vYE ^ vYE;
+ vYF = vYF ^ vYF;
+ vYG = vYG ^ vYG;
+ vYH = vYH ^ vYH;
+ vYI = vYI ^ vYI;
+
+ unsigned char *g0 = &gftbls[0 * 32 * vlen];
+ unsigned char *g1 = &gftbls[1 * 32 * vlen];
+ unsigned char *g2 = &gftbls[2 * 32 * vlen];
+
+ for (j = 0; j < vlen; j++) {
+ s = (unsigned char *)src[j];
+ vX1 = vec_xl(0, s + i);
+ vX2 = vec_xl(16, s + i);
+ vX3 = vec_xl(32, s + i);
+ vX4 = vec_xl(48, s + i);
+
+ vlo0 = EC_vec_xl(0, g0);
+ vhi0 = EC_vec_xl(16, g0);
+ vlo1 = EC_vec_xl(0, g1);
+ vhi1 = EC_vec_xl(16, g1);
+
+ vY1 = vY1 ^ EC_vec_permxor(vhi0, vlo0, vX1);
+ vY2 = vY2 ^ EC_vec_permxor(vhi0, vlo0, vX2);
+ vYD = vYD ^ EC_vec_permxor(vhi0, vlo0, vX3);
+ vYE = vYE ^ EC_vec_permxor(vhi0, vlo0, vX4);
+
+ vlo2 = vec_xl(0, g2);
+ vhi2 = vec_xl(16, g2);
+
+ vY3 = vY3 ^ EC_vec_permxor(vhi1, vlo1, vX1);
+ vY4 = vY4 ^ EC_vec_permxor(vhi1, vlo1, vX2);
+ vYF = vYF ^ EC_vec_permxor(vhi1, vlo1, vX3);
+ vYG = vYG ^ EC_vec_permxor(vhi1, vlo1, vX4);
+
+ vY5 = vY5 ^ EC_vec_permxor(vhi2, vlo2, vX1);
+ vY6 = vY6 ^ EC_vec_permxor(vhi2, vlo2, vX2);
+ vYH = vYH ^ EC_vec_permxor(vhi2, vlo2, vX3);
+ vYI = vYI ^ EC_vec_permxor(vhi2, vlo2, vX4);
+
+ g0 += 32;
+ g1 += 32;
+ g2 += 32;
+ }
+
+ vec_xst(vY1, 0, t0 + i);
+ vec_xst(vY2, 16, t0 + i);
+ vec_xst(vY3, 0, t1 + i);
+ vec_xst(vY4, 16, t1 + i);
+ vec_xst(vY5, 0, t2 + i);
+ vec_xst(vY6, 16, t2 + i);
+
+ vec_xst(vYD, 32, t0 + i);
+ vec_xst(vYE, 48, t0 + i);
+ vec_xst(vYF, 32, t1 + i);
+ vec_xst(vYG, 48, t1 + i);
+ vec_xst(vYH, 32, t2 + i);
+ vec_xst(vYI, 48, t2 + i);
+ }
+ return;
+}
diff --git a/src/isa-l/erasure_code/ppc64le/gf_3vect_mad_vsx.c b/src/isa-l/erasure_code/ppc64le/gf_3vect_mad_vsx.c
new file mode 100644
index 000000000..ba90c1fdb
--- /dev/null
+++ b/src/isa-l/erasure_code/ppc64le/gf_3vect_mad_vsx.c
@@ -0,0 +1,84 @@
+#include "ec_base_vsx.h"
+
+void gf_3vect_mad_vsx(int len, int vec, int vec_i, unsigned char *gftbls,
+ unsigned char *src, unsigned char **dest)
+{
+ unsigned char *s, *t0, *t1, *t2;
+ vector unsigned char vX1, vX2, vX3, vX4;
+ vector unsigned char vY1, vY2, vY3, vY4, vY5, vY6;
+ vector unsigned char vYD, vYE, vYF, vYG, vYH, vYI;
+ vector unsigned char vhi0, vlo0, vhi1, vlo1, vhi2, vlo2;
+ int i, head;
+
+ s = (unsigned char *)src;
+ t0 = (unsigned char *)dest[0];
+ t1 = (unsigned char *)dest[1];
+ t2 = (unsigned char *)dest[2];
+
+ head = len % 64;
+ if (head != 0) {
+ gf_vect_mad_base(head, vec, vec_i, &gftbls[0 * 32 * vec], src, t0);
+ gf_vect_mad_base(head, vec, vec_i, &gftbls[1 * 32 * vec], src, t1);
+ gf_vect_mad_base(head, vec, vec_i, &gftbls[2 * 32 * vec], src, t2);
+ }
+
+ vlo0 = EC_vec_xl(0, gftbls + (((0 * vec) << 5) + (vec_i << 5)));
+ vhi0 = EC_vec_xl(16, gftbls + (((0 * vec) << 5) + (vec_i << 5)));
+ vlo1 = EC_vec_xl(0, gftbls + (((1 * vec) << 5) + (vec_i << 5)));
+ vhi1 = EC_vec_xl(16, gftbls + (((1 * vec) << 5) + (vec_i << 5)));
+ vlo2 = EC_vec_xl(0, gftbls + (((2 * vec) << 5) + (vec_i << 5)));
+ vhi2 = EC_vec_xl(16, gftbls + (((2 * vec) << 5) + (vec_i << 5)));
+
+ for (i = head; i < len - 63; i += 64) {
+ vX1 = vec_xl(0, s + i);
+ vX2 = vec_xl(16, s + i);
+ vX3 = vec_xl(32, s + i);
+ vX4 = vec_xl(48, s + i);
+
+ vY1 = vec_xl(0, t0 + i);
+ vY2 = vec_xl(16, t0 + i);
+ vYD = vec_xl(32, t0 + i);
+ vYE = vec_xl(48, t0 + i);
+
+ vY1 = vY1 ^ EC_vec_permxor(vhi0, vlo0, vX1);
+ vY2 = vY2 ^ EC_vec_permxor(vhi0, vlo0, vX2);
+ vYD = vYD ^ EC_vec_permxor(vhi0, vlo0, vX3);
+ vYE = vYE ^ EC_vec_permxor(vhi0, vlo0, vX4);
+
+ vY3 = vec_xl(0, t1 + i);
+ vY4 = vec_xl(16, t1 + i);
+ vYF = vec_xl(32, t1 + i);
+ vYG = vec_xl(48, t1 + i);
+
+ vec_xst(vY1, 0, t0 + i);
+ vec_xst(vY2, 16, t0 + i);
+ vec_xst(vYD, 32, t0 + i);
+ vec_xst(vYE, 48, t0 + i);
+
+ vY3 = vY3 ^ EC_vec_permxor(vhi1, vlo1, vX1);
+ vY4 = vY4 ^ EC_vec_permxor(vhi1, vlo1, vX2);
+ vYF = vYF ^ EC_vec_permxor(vhi1, vlo1, vX3);
+ vYG = vYG ^ EC_vec_permxor(vhi1, vlo1, vX4);
+
+ vY5 = vec_xl(0, t2 + i);
+ vY6 = vec_xl(16, t2 + i);
+ vYH = vec_xl(32, t2 + i);
+ vYI = vec_xl(48, t2 + i);
+
+ vec_xst(vY3, 0, t1 + i);
+ vec_xst(vY4, 16, t1 + i);
+ vec_xst(vYF, 32, t1 + i);
+ vec_xst(vYG, 48, t1 + i);
+
+ vY5 = vY5 ^ EC_vec_permxor(vhi2, vlo2, vX1);
+ vY6 = vY6 ^ EC_vec_permxor(vhi2, vlo2, vX2);
+ vYH = vYH ^ EC_vec_permxor(vhi2, vlo2, vX3);
+ vYI = vYI ^ EC_vec_permxor(vhi2, vlo2, vX4);
+
+ vec_xst(vY5, 0, t2 + i);
+ vec_xst(vY6, 16, t2 + i);
+ vec_xst(vYH, 32, t2 + i);
+ vec_xst(vYI, 48, t2 + i);
+ }
+ return;
+}
diff --git a/src/isa-l/erasure_code/ppc64le/gf_4vect_dot_prod_vsx.c b/src/isa-l/erasure_code/ppc64le/gf_4vect_dot_prod_vsx.c
new file mode 100644
index 000000000..e65654453
--- /dev/null
+++ b/src/isa-l/erasure_code/ppc64le/gf_4vect_dot_prod_vsx.c
@@ -0,0 +1,124 @@
+#include "ec_base_vsx.h"
+
+void gf_4vect_dot_prod_vsx(int len, int vlen, unsigned char *gftbls,
+ unsigned char **src, unsigned char **dest)
+{
+ unsigned char *s, *t0, *t1, *t2, *t3;
+ vector unsigned char vX1, vX2, vX3, vX4;
+ vector unsigned char vY1, vY2, vY3, vY4, vY5, vY6, vY7, vY8;
+ vector unsigned char vYD, vYE, vYF, vYG, vYH, vYI, vYJ, vYK;
+ vector unsigned char vhi0, vlo0, vhi1, vlo1, vhi2, vlo2, vhi3, vlo3;
+ int i, j, head;
+
+ if (vlen < 128) {
+ gf_vect_mul_vsx(len, &gftbls[0 * 32 * vlen], src[0], (unsigned char *)dest[0]);
+ gf_vect_mul_vsx(len, &gftbls[1 * 32 * vlen], src[0], (unsigned char *)dest[1]);
+ gf_vect_mul_vsx(len, &gftbls[2 * 32 * vlen], src[0], (unsigned char *)dest[2]);
+ gf_vect_mul_vsx(len, &gftbls[3 * 32 * vlen], src[0], (unsigned char *)dest[3]);
+
+ for (j = 1; j < vlen; j++) {
+ gf_4vect_mad_vsx(len, vlen, j, gftbls, src[j], dest);
+ }
+ return;
+ }
+
+ t0 = (unsigned char *)dest[0];
+ t1 = (unsigned char *)dest[1];
+ t2 = (unsigned char *)dest[2];
+ t3 = (unsigned char *)dest[3];
+
+ head = len % 64;
+ if (head != 0) {
+ gf_vect_dot_prod_base(head, vlen, &gftbls[0 * 32 * vlen], src, t0);
+ gf_vect_dot_prod_base(head, vlen, &gftbls[1 * 32 * vlen], src, t1);
+ gf_vect_dot_prod_base(head, vlen, &gftbls[2 * 32 * vlen], src, t2);
+ gf_vect_dot_prod_base(head, vlen, &gftbls[3 * 32 * vlen], src, t3);
+ }
+
+ for (i = head; i < len - 63; i += 64) {
+ vY1 = vY1 ^ vY1;
+ vY2 = vY2 ^ vY2;
+ vY3 = vY3 ^ vY3;
+ vY4 = vY4 ^ vY4;
+ vY5 = vY5 ^ vY5;
+ vY6 = vY6 ^ vY6;
+ vY7 = vY7 ^ vY7;
+ vY8 = vY8 ^ vY8;
+
+ vYD = vYD ^ vYD;
+ vYE = vYE ^ vYE;
+ vYF = vYF ^ vYF;
+ vYG = vYG ^ vYG;
+ vYH = vYH ^ vYH;
+ vYI = vYI ^ vYI;
+ vYJ = vYJ ^ vYJ;
+ vYK = vYK ^ vYK;
+
+ unsigned char *g0 = &gftbls[0 * 32 * vlen];
+ unsigned char *g1 = &gftbls[1 * 32 * vlen];
+ unsigned char *g2 = &gftbls[2 * 32 * vlen];
+ unsigned char *g3 = &gftbls[3 * 32 * vlen];
+
+ for (j = 0; j < vlen; j++) {
+ s = (unsigned char *)src[j];
+ vX1 = vec_xl(0, s + i);
+ vX2 = vec_xl(16, s + i);
+ vX3 = vec_xl(32, s + i);
+ vX4 = vec_xl(48, s + i);
+
+ vlo0 = EC_vec_xl(0, g0);
+ vhi0 = EC_vec_xl(16, g0);
+ vlo1 = EC_vec_xl(0, g1);
+ vhi1 = EC_vec_xl(16, g1);
+
+ vY1 = vY1 ^ EC_vec_permxor(vhi0, vlo0, vX1);
+ vY2 = vY2 ^ EC_vec_permxor(vhi0, vlo0, vX2);
+ vYD = vYD ^ EC_vec_permxor(vhi0, vlo0, vX3);
+ vYE = vYE ^ EC_vec_permxor(vhi0, vlo0, vX4);
+
+ vlo2 = vec_xl(0, g2);
+ vhi2 = vec_xl(16, g2);
+ vlo3 = vec_xl(0, g3);
+ vhi3 = vec_xl(16, g3);
+
+ vY3 = vY3 ^ EC_vec_permxor(vhi1, vlo1, vX1);
+ vY4 = vY4 ^ EC_vec_permxor(vhi1, vlo1, vX2);
+ vYF = vYF ^ EC_vec_permxor(vhi1, vlo1, vX3);
+ vYG = vYG ^ EC_vec_permxor(vhi1, vlo1, vX4);
+
+ vY5 = vY5 ^ EC_vec_permxor(vhi2, vlo2, vX1);
+ vY6 = vY6 ^ EC_vec_permxor(vhi2, vlo2, vX2);
+ vYH = vYH ^ EC_vec_permxor(vhi2, vlo2, vX3);
+ vYI = vYI ^ EC_vec_permxor(vhi2, vlo2, vX4);
+
+ vY7 = vY7 ^ EC_vec_permxor(vhi3, vlo3, vX1);
+ vY8 = vY8 ^ EC_vec_permxor(vhi3, vlo3, vX2);
+ vYJ = vYJ ^ EC_vec_permxor(vhi3, vlo3, vX3);
+ vYK = vYK ^ EC_vec_permxor(vhi3, vlo3, vX4);
+
+ g0 += 32;
+ g1 += 32;
+ g2 += 32;
+ g3 += 32;
+ }
+
+ vec_xst(vY1, 0, t0 + i);
+ vec_xst(vY2, 16, t0 + i);
+ vec_xst(vY3, 0, t1 + i);
+ vec_xst(vY4, 16, t1 + i);
+ vec_xst(vY5, 0, t2 + i);
+ vec_xst(vY6, 16, t2 + i);
+ vec_xst(vY7, 0, t3 + i);
+ vec_xst(vY8, 16, t3 + i);
+
+ vec_xst(vYD, 32, t0 + i);
+ vec_xst(vYE, 48, t0 + i);
+ vec_xst(vYF, 32, t1 + i);
+ vec_xst(vYG, 48, t1 + i);
+ vec_xst(vYH, 32, t2 + i);
+ vec_xst(vYI, 48, t2 + i);
+ vec_xst(vYJ, 32, t3 + i);
+ vec_xst(vYK, 48, t3 + i);
+ }
+ return;
+}
diff --git a/src/isa-l/erasure_code/ppc64le/gf_4vect_mad_vsx.c b/src/isa-l/erasure_code/ppc64le/gf_4vect_mad_vsx.c
new file mode 100644
index 000000000..7b236b6f8
--- /dev/null
+++ b/src/isa-l/erasure_code/ppc64le/gf_4vect_mad_vsx.c
@@ -0,0 +1,103 @@
+#include "ec_base_vsx.h"
+
+void gf_4vect_mad_vsx(int len, int vec, int vec_i, unsigned char *gftbls,
+ unsigned char *src, unsigned char **dest)
+{
+ unsigned char *s, *t0, *t1, *t2, *t3;
+ vector unsigned char vX1, vX2, vX3, vX4;
+ vector unsigned char vY1, vY2, vY3, vY4, vY5, vY6, vY7, vY8;
+ vector unsigned char vYD, vYE, vYF, vYG, vYH, vYI, vYJ, vYK;
+ vector unsigned char vhi0, vlo0, vhi1, vlo1, vhi2, vlo2, vhi3, vlo3;
+ int i, head;
+
+ s = (unsigned char *)src;
+ t0 = (unsigned char *)dest[0];
+ t1 = (unsigned char *)dest[1];
+ t2 = (unsigned char *)dest[2];
+ t3 = (unsigned char *)dest[3];
+
+ head = len % 64;
+ if (head != 0) {
+ gf_vect_mad_base(head, vec, vec_i, &gftbls[0 * 32 * vec], src, t0);
+ gf_vect_mad_base(head, vec, vec_i, &gftbls[1 * 32 * vec], src, t1);
+ gf_vect_mad_base(head, vec, vec_i, &gftbls[2 * 32 * vec], src, t2);
+ gf_vect_mad_base(head, vec, vec_i, &gftbls[3 * 32 * vec], src, t3);
+ }
+
+ vlo0 = EC_vec_xl(0, gftbls + (((0 * vec) << 5) + (vec_i << 5)));
+ vhi0 = EC_vec_xl(16, gftbls + (((0 * vec) << 5) + (vec_i << 5)));
+ vlo1 = EC_vec_xl(0, gftbls + (((1 * vec) << 5) + (vec_i << 5)));
+ vhi1 = EC_vec_xl(16, gftbls + (((1 * vec) << 5) + (vec_i << 5)));
+ vlo2 = EC_vec_xl(0, gftbls + (((2 * vec) << 5) + (vec_i << 5)));
+ vhi2 = EC_vec_xl(16, gftbls + (((2 * vec) << 5) + (vec_i << 5)));
+ vlo3 = EC_vec_xl(0, gftbls + (((3 * vec) << 5) + (vec_i << 5)));
+ vhi3 = EC_vec_xl(16, gftbls + (((3 * vec) << 5) + (vec_i << 5)));
+
+ for (i = head; i < len - 63; i += 64) {
+ vX1 = vec_xl(0, s + i);
+ vX2 = vec_xl(16, s + i);
+ vX3 = vec_xl(32, s + i);
+ vX4 = vec_xl(48, s + i);
+
+ vY1 = vec_xl(0, t0 + i);
+ vY2 = vec_xl(16, t0 + i);
+ vYD = vec_xl(32, t0 + i);
+ vYE = vec_xl(48, t0 + i);
+
+ vY1 = vY1 ^ EC_vec_permxor(vhi0, vlo0, vX1);
+ vY2 = vY2 ^ EC_vec_permxor(vhi0, vlo0, vX2);
+ vYD = vYD ^ EC_vec_permxor(vhi0, vlo0, vX3);
+ vYE = vYE ^ EC_vec_permxor(vhi0, vlo0, vX4);
+
+ vY3 = vec_xl(0, t1 + i);
+ vY4 = vec_xl(16, t1 + i);
+ vYF = vec_xl(32, t1 + i);
+ vYG = vec_xl(48, t1 + i);
+
+ vec_xst(vY1, 0, t0 + i);
+ vec_xst(vY2, 16, t0 + i);
+ vec_xst(vYD, 32, t0 + i);
+ vec_xst(vYE, 48, t0 + i);
+
+ vY3 = vY3 ^ EC_vec_permxor(vhi1, vlo1, vX1);
+ vY4 = vY4 ^ EC_vec_permxor(vhi1, vlo1, vX2);
+ vYF = vYF ^ EC_vec_permxor(vhi1, vlo1, vX3);
+ vYG = vYG ^ EC_vec_permxor(vhi1, vlo1, vX4);
+
+ vY5 = vec_xl(0, t2 + i);
+ vY6 = vec_xl(16, t2 + i);
+ vYH = vec_xl(32, t2 + i);
+ vYI = vec_xl(48, t2 + i);
+
+ vec_xst(vY3, 0, t1 + i);
+ vec_xst(vY4, 16, t1 + i);
+ vec_xst(vYF, 32, t1 + i);
+ vec_xst(vYG, 48, t1 + i);
+
+ vY5 = vY5 ^ EC_vec_permxor(vhi2, vlo2, vX1);
+ vY6 = vY6 ^ EC_vec_permxor(vhi2, vlo2, vX2);
+ vYH = vYH ^ EC_vec_permxor(vhi2, vlo2, vX3);
+ vYI = vYI ^ EC_vec_permxor(vhi2, vlo2, vX4);
+
+ vY7 = vec_xl(0, t3 + i);
+ vY8 = vec_xl(16, t3 + i);
+ vYJ = vec_xl(32, t3 + i);
+ vYK = vec_xl(48, t3 + i);
+
+ vec_xst(vY5, 0, t2 + i);
+ vec_xst(vY6, 16, t2 + i);
+ vec_xst(vYH, 32, t2 + i);
+ vec_xst(vYI, 48, t2 + i);
+
+ vY7 = vY7 ^ EC_vec_permxor(vhi3, vlo3, vX1);
+ vY8 = vY8 ^ EC_vec_permxor(vhi3, vlo3, vX2);
+ vYJ = vYJ ^ EC_vec_permxor(vhi3, vlo3, vX3);
+ vYK = vYK ^ EC_vec_permxor(vhi3, vlo3, vX4);
+
+ vec_xst(vY7, 0, t3 + i);
+ vec_xst(vY8, 16, t3 + i);
+ vec_xst(vYJ, 32, t3 + i);
+ vec_xst(vYK, 48, t3 + i);
+ }
+ return;
+}
diff --git a/src/isa-l/erasure_code/ppc64le/gf_5vect_dot_prod_vsx.c b/src/isa-l/erasure_code/ppc64le/gf_5vect_dot_prod_vsx.c
new file mode 100644
index 000000000..e9eef0e63
--- /dev/null
+++ b/src/isa-l/erasure_code/ppc64le/gf_5vect_dot_prod_vsx.c
@@ -0,0 +1,145 @@
+#include "ec_base_vsx.h"
+
+void gf_5vect_dot_prod_vsx(int len, int vlen, unsigned char *gftbls,
+ unsigned char **src, unsigned char **dest)
+{
+ unsigned char *s, *t0, *t1, *t2, *t3, *t4;
+ vector unsigned char vX1, vX2, vX3, vX4;
+ vector unsigned char vY1, vY2, vY3, vY4, vY5, vY6, vY7, vY8, vY9, vYA;
+ vector unsigned char vYD, vYE, vYF, vYG, vYH, vYI, vYJ, vYK, vYL, vYM;
+ vector unsigned char vhi0, vlo0, vhi1, vlo1, vhi2, vlo2, vhi3, vlo3, vhi4, vlo4;
+ int i, j, head;
+
+ if (vlen < 128) {
+ gf_vect_mul_vsx(len, &gftbls[0 * 32 * vlen], src[0], (unsigned char *)dest[0]);
+ gf_vect_mul_vsx(len, &gftbls[1 * 32 * vlen], src[0], (unsigned char *)dest[1]);
+ gf_vect_mul_vsx(len, &gftbls[2 * 32 * vlen], src[0], (unsigned char *)dest[2]);
+ gf_vect_mul_vsx(len, &gftbls[3 * 32 * vlen], src[0], (unsigned char *)dest[3]);
+ gf_vect_mul_vsx(len, &gftbls[4 * 32 * vlen], src[0], (unsigned char *)dest[4]);
+
+ for (j = 1; j < vlen; j++) {
+ gf_5vect_mad_vsx(len, vlen, j, gftbls, src[j], dest);
+ }
+ return;
+ }
+
+ t0 = (unsigned char *)dest[0];
+ t1 = (unsigned char *)dest[1];
+ t2 = (unsigned char *)dest[2];
+ t3 = (unsigned char *)dest[3];
+ t4 = (unsigned char *)dest[4];
+
+ head = len % 64;
+ if (head != 0) {
+ gf_vect_dot_prod_base(head, vlen, &gftbls[0 * 32 * vlen], src, t0);
+ gf_vect_dot_prod_base(head, vlen, &gftbls[1 * 32 * vlen], src, t1);
+ gf_vect_dot_prod_base(head, vlen, &gftbls[2 * 32 * vlen], src, t2);
+ gf_vect_dot_prod_base(head, vlen, &gftbls[3 * 32 * vlen], src, t3);
+ gf_vect_dot_prod_base(head, vlen, &gftbls[4 * 32 * vlen], src, t4);
+ }
+
+ for (i = head; i < len - 63; i += 64) {
+ vY1 = vY1 ^ vY1;
+ vY2 = vY2 ^ vY2;
+ vY3 = vY3 ^ vY3;
+ vY4 = vY4 ^ vY4;
+ vY5 = vY5 ^ vY5;
+ vY6 = vY6 ^ vY6;
+ vY7 = vY7 ^ vY7;
+ vY8 = vY8 ^ vY8;
+ vY9 = vY9 ^ vY9;
+ vYA = vYA ^ vYA;
+
+ vYD = vYD ^ vYD;
+ vYE = vYE ^ vYE;
+ vYF = vYF ^ vYF;
+ vYG = vYG ^ vYG;
+ vYH = vYH ^ vYH;
+ vYI = vYI ^ vYI;
+ vYJ = vYJ ^ vYJ;
+ vYK = vYK ^ vYK;
+ vYL = vYL ^ vYL;
+ vYM = vYM ^ vYM;
+
+ unsigned char *g0 = &gftbls[0 * 32 * vlen];
+ unsigned char *g1 = &gftbls[1 * 32 * vlen];
+ unsigned char *g2 = &gftbls[2 * 32 * vlen];
+ unsigned char *g3 = &gftbls[3 * 32 * vlen];
+ unsigned char *g4 = &gftbls[4 * 32 * vlen];
+
+ for (j = 0; j < vlen; j++) {
+ s = (unsigned char *)src[j];
+ vX1 = vec_xl(0, s + i);
+ vX2 = vec_xl(16, s + i);
+ vX3 = vec_xl(32, s + i);
+ vX4 = vec_xl(48, s + i);
+
+ vlo0 = EC_vec_xl(0, g0);
+ vhi0 = EC_vec_xl(16, g0);
+ vlo1 = EC_vec_xl(0, g1);
+ vhi1 = EC_vec_xl(16, g1);
+
+ vY1 = vY1 ^ EC_vec_permxor(vhi0, vlo0, vX1);
+ vY2 = vY2 ^ EC_vec_permxor(vhi0, vlo0, vX2);
+ vYD = vYD ^ EC_vec_permxor(vhi0, vlo0, vX3);
+ vYE = vYE ^ EC_vec_permxor(vhi0, vlo0, vX4);
+
+ vlo2 = vec_xl(0, g2);
+ vhi2 = vec_xl(16, g2);
+ vlo3 = vec_xl(0, g3);
+ vhi3 = vec_xl(16, g3);
+
+ vY3 = vY3 ^ EC_vec_permxor(vhi1, vlo1, vX1);
+ vY4 = vY4 ^ EC_vec_permxor(vhi1, vlo1, vX2);
+ vYF = vYF ^ EC_vec_permxor(vhi1, vlo1, vX3);
+ vYG = vYG ^ EC_vec_permxor(vhi1, vlo1, vX4);
+
+ vlo4 = vec_xl(0, g4);
+ vhi4 = vec_xl(16, g4);
+
+ vY5 = vY5 ^ EC_vec_permxor(vhi2, vlo2, vX1);
+ vY6 = vY6 ^ EC_vec_permxor(vhi2, vlo2, vX2);
+ vYH = vYH ^ EC_vec_permxor(vhi2, vlo2, vX3);
+ vYI = vYI ^ EC_vec_permxor(vhi2, vlo2, vX4);
+
+ vY7 = vY7 ^ EC_vec_permxor(vhi3, vlo3, vX1);
+ vY8 = vY8 ^ EC_vec_permxor(vhi3, vlo3, vX2);
+ vYJ = vYJ ^ EC_vec_permxor(vhi3, vlo3, vX3);
+ vYK = vYK ^ EC_vec_permxor(vhi3, vlo3, vX4);
+
+ vY9 = vY9 ^ EC_vec_permxor(vhi4, vlo4, vX1);
+ vYA = vYA ^ EC_vec_permxor(vhi4, vlo4, vX2);
+ vYL = vYL ^ EC_vec_permxor(vhi4, vlo4, vX3);
+ vYM = vYM ^ EC_vec_permxor(vhi4, vlo4, vX4);
+
+ g0 += 32;
+ g1 += 32;
+ g2 += 32;
+ g3 += 32;
+ g4 += 32;
+ }
+
+ vec_xst(vY1, 0, t0 + i);
+ vec_xst(vY2, 16, t0 + i);
+ vec_xst(vY3, 0, t1 + i);
+ vec_xst(vY4, 16, t1 + i);
+ vec_xst(vY5, 0, t2 + i);
+ vec_xst(vY6, 16, t2 + i);
+ vec_xst(vY7, 0, t3 + i);
+ vec_xst(vY8, 16, t3 + i);
+ vec_xst(vY9, 0, t4 + i);
+ vec_xst(vYA, 16, t4 + i);
+
+ vec_xst(vYD, 32, t0 + i);
+ vec_xst(vYE, 48, t0 + i);
+ vec_xst(vYF, 32, t1 + i);
+ vec_xst(vYG, 48, t1 + i);
+ vec_xst(vYH, 32, t2 + i);
+ vec_xst(vYI, 48, t2 + i);
+ vec_xst(vYJ, 32, t3 + i);
+ vec_xst(vYK, 48, t3 + i);
+ vec_xst(vYL, 32, t4 + i);
+ vec_xst(vYM, 48, t4 + i);
+ }
+ return;
+}
diff --git a/src/isa-l/erasure_code/ppc64le/gf_5vect_mad_vsx.c b/src/isa-l/erasure_code/ppc64le/gf_5vect_mad_vsx.c
new file mode 100644
index 000000000..7bb7bb211
--- /dev/null
+++ b/src/isa-l/erasure_code/ppc64le/gf_5vect_mad_vsx.c
@@ -0,0 +1,122 @@
+#include "ec_base_vsx.h"
+
+void gf_5vect_mad_vsx(int len, int vec, int vec_i, unsigned char *gftbls,
+ unsigned char *src, unsigned char **dest)
+{
+ unsigned char *s, *t0, *t1, *t2, *t3, *t4;
+ vector unsigned char vX1, vX2, vX3, vX4;
+ vector unsigned char vY1, vY2, vY3, vY4, vY5, vY6, vY7, vY8, vY9, vYA;
+ vector unsigned char vYD, vYE, vYF, vYG, vYH, vYI, vYJ, vYK, vYL, vYM;
+ vector unsigned char vhi0, vlo0, vhi1, vlo1, vhi2, vlo2, vhi3, vlo3, vhi4, vlo4;
+ int i, head;
+
+ s = (unsigned char *)src;
+ t0 = (unsigned char *)dest[0];
+ t1 = (unsigned char *)dest[1];
+ t2 = (unsigned char *)dest[2];
+ t3 = (unsigned char *)dest[3];
+ t4 = (unsigned char *)dest[4];
+
+ head = len % 64;
+ if (head != 0) {
+ gf_vect_mad_base(head, vec, vec_i, &gftbls[0 * 32 * vec], src, t0);
+ gf_vect_mad_base(head, vec, vec_i, &gftbls[1 * 32 * vec], src, t1);
+ gf_vect_mad_base(head, vec, vec_i, &gftbls[2 * 32 * vec], src, t2);
+ gf_vect_mad_base(head, vec, vec_i, &gftbls[3 * 32 * vec], src, t3);
+ gf_vect_mad_base(head, vec, vec_i, &gftbls[4 * 32 * vec], src, t4);
+ }
+
+ vlo0 = EC_vec_xl(0, gftbls + (((0 * vec) << 5) + (vec_i << 5)));
+ vhi0 = EC_vec_xl(16, gftbls + (((0 * vec) << 5) + (vec_i << 5)));
+ vlo1 = EC_vec_xl(0, gftbls + (((1 * vec) << 5) + (vec_i << 5)));
+ vhi1 = EC_vec_xl(16, gftbls + (((1 * vec) << 5) + (vec_i << 5)));
+ vlo2 = EC_vec_xl(0, gftbls + (((2 * vec) << 5) + (vec_i << 5)));
+ vhi2 = EC_vec_xl(16, gftbls + (((2 * vec) << 5) + (vec_i << 5)));
+ vlo3 = EC_vec_xl(0, gftbls + (((3 * vec) << 5) + (vec_i << 5)));
+ vhi3 = EC_vec_xl(16, gftbls + (((3 * vec) << 5) + (vec_i << 5)));
+ vlo4 = EC_vec_xl(0, gftbls + (((4 * vec) << 5) + (vec_i << 5)));
+ vhi4 = EC_vec_xl(16, gftbls + (((4 * vec) << 5) + (vec_i << 5)));
+
+ for (i = head; i < len - 63; i += 64) {
+ vX1 = vec_xl(0, s + i);
+ vX2 = vec_xl(16, s + i);
+ vX3 = vec_xl(32, s + i);
+ vX4 = vec_xl(48, s + i);
+
+ vY1 = vec_xl(0, t0 + i);
+ vY2 = vec_xl(16, t0 + i);
+ vYD = vec_xl(32, t0 + i);
+ vYE = vec_xl(48, t0 + i);
+
+ vY1 = vY1 ^ EC_vec_permxor(vhi0, vlo0, vX1);
+ vY2 = vY2 ^ EC_vec_permxor(vhi0, vlo0, vX2);
+ vYD = vYD ^ EC_vec_permxor(vhi0, vlo0, vX3);
+ vYE = vYE ^ EC_vec_permxor(vhi0, vlo0, vX4);
+
+ vY3 = vec_xl(0, t1 + i);
+ vY4 = vec_xl(16, t1 + i);
+ vYF = vec_xl(32, t1 + i);
+ vYG = vec_xl(48, t1 + i);
+
+ vec_xst(vY1, 0, t0 + i);
+ vec_xst(vY2, 16, t0 + i);
+ vec_xst(vYD, 32, t0 + i);
+ vec_xst(vYE, 48, t0 + i);
+
+ vY3 = vY3 ^ EC_vec_permxor(vhi1, vlo1, vX1);
+ vY4 = vY4 ^ EC_vec_permxor(vhi1, vlo1, vX2);
+ vYF = vYF ^ EC_vec_permxor(vhi1, vlo1, vX3);
+ vYG = vYG ^ EC_vec_permxor(vhi1, vlo1, vX4);
+
+ vY5 = vec_xl(0, t2 + i);
+ vY6 = vec_xl(16, t2 + i);
+ vYH = vec_xl(32, t2 + i);
+ vYI = vec_xl(48, t2 + i);
+
+ vec_xst(vY3, 0, t1 + i);
+ vec_xst(vY4, 16, t1 + i);
+ vec_xst(vYF, 32, t1 + i);
+ vec_xst(vYG, 48, t1 + i);
+
+ vY5 = vY5 ^ EC_vec_permxor(vhi2, vlo2, vX1);
+ vY6 = vY6 ^ EC_vec_permxor(vhi2, vlo2, vX2);
+ vYH = vYH ^ EC_vec_permxor(vhi2, vlo2, vX3);
+ vYI = vYI ^ EC_vec_permxor(vhi2, vlo2, vX4);
+
+ vY7 = vec_xl(0, t3 + i);
+ vY8 = vec_xl(16, t3 + i);
+ vYJ = vec_xl(32, t3 + i);
+ vYK = vec_xl(48, t3 + i);
+
+ vec_xst(vY5, 0, t2 + i);
+ vec_xst(vY6, 16, t2 + i);
+ vec_xst(vYH, 32, t2 + i);
+ vec_xst(vYI, 48, t2 + i);
+
+ vY7 = vY7 ^ EC_vec_permxor(vhi3, vlo3, vX1);
+ vY8 = vY8 ^ EC_vec_permxor(vhi3, vlo3, vX2);
+ vYJ = vYJ ^ EC_vec_permxor(vhi3, vlo3, vX3);
+ vYK = vYK ^ EC_vec_permxor(vhi3, vlo3, vX4);
+
+ vY9 = vec_xl(0, t4 + i);
+ vYA = vec_xl(16, t4 + i);
+ vYL = vec_xl(32, t4 + i);
+ vYM = vec_xl(48, t4 + i);
+
+ vec_xst(vY7, 0, t3 + i);
+ vec_xst(vY8, 16, t3 + i);
+ vec_xst(vYJ, 32, t3 + i);
+ vec_xst(vYK, 48, t3 + i);
+
+ vY9 = vY9 ^ EC_vec_permxor(vhi4, vlo4, vX1);
+ vYA = vYA ^ EC_vec_permxor(vhi4, vlo4, vX2);
+ vYL = vYL ^ EC_vec_permxor(vhi4, vlo4, vX3);
+ vYM = vYM ^ EC_vec_permxor(vhi4, vlo4, vX4);
+
+ vec_xst(vY9, 0, t4 + i);
+ vec_xst(vYA, 16, t4 + i);
+ vec_xst(vYL, 32, t4 + i);
+ vec_xst(vYM, 48, t4 + i);
+ }
+ return;
+}
diff --git a/src/isa-l/erasure_code/ppc64le/gf_6vect_dot_prod_vsx.c b/src/isa-l/erasure_code/ppc64le/gf_6vect_dot_prod_vsx.c
new file mode 100644
index 000000000..ac918bd49
--- /dev/null
+++ b/src/isa-l/erasure_code/ppc64le/gf_6vect_dot_prod_vsx.c
@@ -0,0 +1,166 @@
+#include "ec_base_vsx.h"
+
+void gf_6vect_dot_prod_vsx(int len, int vlen, unsigned char *gftbls,
+ unsigned char **src, unsigned char **dest)
+{
+ unsigned char *s, *t0, *t1, *t2, *t3, *t4, *t5;
+ vector unsigned char vX1, vX2, vX3, vX4;
+ vector unsigned char vY1, vY2, vY3, vY4, vY5, vY6, vY7, vY8, vY9, vYA, vYB, vYC;
+ vector unsigned char vYD, vYE, vYF, vYG, vYH, vYI, vYJ, vYK, vYL, vYM, vYN, vYO;
+ vector unsigned char vhi0, vlo0, vhi1, vlo1, vhi2, vlo2;
+ vector unsigned char vhi3, vlo3, vhi4, vlo4, vhi5, vlo5;
+ int i, j, head;
+
+ if (vlen < 128) {
+ gf_vect_mul_vsx(len, &gftbls[0 * 32 * vlen], src[0], (unsigned char *)dest[0]);
+ gf_vect_mul_vsx(len, &gftbls[1 * 32 * vlen], src[0], (unsigned char *)dest[1]);
+ gf_vect_mul_vsx(len, &gftbls[2 * 32 * vlen], src[0], (unsigned char *)dest[2]);
+ gf_vect_mul_vsx(len, &gftbls[3 * 32 * vlen], src[0], (unsigned char *)dest[3]);
+ gf_vect_mul_vsx(len, &gftbls[4 * 32 * vlen], src[0], (unsigned char *)dest[4]);
+ gf_vect_mul_vsx(len, &gftbls[5 * 32 * vlen], src[0], (unsigned char *)dest[5]);
+
+ for (j = 1; j < vlen; j++) {
+ gf_6vect_mad_vsx(len, vlen, j, gftbls, src[j], dest);
+ }
+ return;
+ }
+
+ t0 = (unsigned char *)dest[0];
+ t1 = (unsigned char *)dest[1];
+ t2 = (unsigned char *)dest[2];
+ t3 = (unsigned char *)dest[3];
+ t4 = (unsigned char *)dest[4];
+ t5 = (unsigned char *)dest[5];
+
+ head = len % 64;
+ if (head != 0) {
+ gf_vect_dot_prod_base(head, vlen, &gftbls[0 * 32 * vlen], src, t0);
+ gf_vect_dot_prod_base(head, vlen, &gftbls[1 * 32 * vlen], src, t1);
+ gf_vect_dot_prod_base(head, vlen, &gftbls[2 * 32 * vlen], src, t2);
+ gf_vect_dot_prod_base(head, vlen, &gftbls[3 * 32 * vlen], src, t3);
+ gf_vect_dot_prod_base(head, vlen, &gftbls[4 * 32 * vlen], src, t4);
+ gf_vect_dot_prod_base(head, vlen, &gftbls[5 * 32 * vlen], src, t5);
+ }
+
+ for (i = head; i < len - 63; i += 64) {
+ vY1 = vY1 ^ vY1;
+ vY2 = vY2 ^ vY2;
+ vY3 = vY3 ^ vY3;
+ vY4 = vY4 ^ vY4;
+ vY5 = vY5 ^ vY5;
+ vY6 = vY6 ^ vY6;
+ vY7 = vY7 ^ vY7;
+ vY8 = vY8 ^ vY8;
+ vY9 = vY9 ^ vY9;
+ vYA = vYA ^ vYA;
+ vYB = vYB ^ vYB;
+ vYC = vYC ^ vYC;
+
+ vYD = vYD ^ vYD;
+ vYE = vYE ^ vYE;
+ vYF = vYF ^ vYF;
+ vYG = vYG ^ vYG;
+ vYH = vYH ^ vYH;
+ vYI = vYI ^ vYI;
+ vYJ = vYJ ^ vYJ;
+ vYK = vYK ^ vYK;
+ vYL = vYL ^ vYL;
+ vYM = vYM ^ vYM;
+ vYN = vYN ^ vYN;
+ vYO = vYO ^ vYO;
+
+ unsigned char *g0 = &gftbls[0 * 32 * vlen];
+ unsigned char *g1 = &gftbls[1 * 32 * vlen];
+ unsigned char *g2 = &gftbls[2 * 32 * vlen];
+ unsigned char *g3 = &gftbls[3 * 32 * vlen];
+ unsigned char *g4 = &gftbls[4 * 32 * vlen];
+ unsigned char *g5 = &gftbls[5 * 32 * vlen];
+
+ for (j = 0; j < vlen; j++) {
+ s = (unsigned char *)src[j];
+ vX1 = vec_xl(0, s + i);
+ vX2 = vec_xl(16, s + i);
+ vX3 = vec_xl(32, s + i);
+ vX4 = vec_xl(48, s + i);
+
+ vlo0 = EC_vec_xl(0, g0);
+ vhi0 = EC_vec_xl(16, g0);
+ vlo1 = EC_vec_xl(0, g1);
+ vhi1 = EC_vec_xl(16, g1);
+
+ vY1 = vY1 ^ EC_vec_permxor(vhi0, vlo0, vX1);
+ vY2 = vY2 ^ EC_vec_permxor(vhi0, vlo0, vX2);
+ vYD = vYD ^ EC_vec_permxor(vhi0, vlo0, vX3);
+ vYE = vYE ^ EC_vec_permxor(vhi0, vlo0, vX4);
+
+ vlo2 = EC_vec_xl(0, g2);
+ vhi2 = EC_vec_xl(16, g2);
+ vlo3 = EC_vec_xl(0, g3);
+ vhi3 = EC_vec_xl(16, g3);
+
+ vY3 = vY3 ^ EC_vec_permxor(vhi1, vlo1, vX1);
+ vY4 = vY4 ^ EC_vec_permxor(vhi1, vlo1, vX2);
+ vYF = vYF ^ EC_vec_permxor(vhi1, vlo1, vX3);
+ vYG = vYG ^ EC_vec_permxor(vhi1, vlo1, vX4);
+
+ vlo4 = EC_vec_xl(0, g4);
+ vhi4 = EC_vec_xl(16, g4);
+ vlo5 = EC_vec_xl(0, g5);
+ vhi5 = EC_vec_xl(16, g5);
+
+ vY5 = vY5 ^ EC_vec_permxor(vhi2, vlo2, vX1);
+ vY6 = vY6 ^ EC_vec_permxor(vhi2, vlo2, vX2);
+ vYH = vYH ^ EC_vec_permxor(vhi2, vlo2, vX3);
+ vYI = vYI ^ EC_vec_permxor(vhi2, vlo2, vX4);
+
+ vY7 = vY7 ^ EC_vec_permxor(vhi3, vlo3, vX1);
+ vY8 = vY8 ^ EC_vec_permxor(vhi3, vlo3, vX2);
+ vYJ = vYJ ^ EC_vec_permxor(vhi3, vlo3, vX3);
+ vYK = vYK ^ EC_vec_permxor(vhi3, vlo3, vX4);
+
+ vY9 = vY9 ^ EC_vec_permxor(vhi4, vlo4, vX1);
+ vYA = vYA ^ EC_vec_permxor(vhi4, vlo4, vX2);
+ vYL = vYL ^ EC_vec_permxor(vhi4, vlo4, vX3);
+ vYM = vYM ^ EC_vec_permxor(vhi4, vlo4, vX4);
+
+ vYB = vYB ^ EC_vec_permxor(vhi5, vlo5, vX1);
+ vYC = vYC ^ EC_vec_permxor(vhi5, vlo5, vX2);
+ vYN = vYN ^ EC_vec_permxor(vhi5, vlo5, vX3);
+ vYO = vYO ^ EC_vec_permxor(vhi5, vlo5, vX4);
+
+ g0 += 32;
+ g1 += 32;
+ g2 += 32;
+ g3 += 32;
+ g4 += 32;
+ g5 += 32;
+ }
+
+ vec_xst(vY1, 0, t0 + i);
+ vec_xst(vY2, 16, t0 + i);
+ vec_xst(vY3, 0, t1 + i);
+ vec_xst(vY4, 16, t1 + i);
+ vec_xst(vY5, 0, t2 + i);
+ vec_xst(vY6, 16, t2 + i);
+ vec_xst(vY7, 0, t3 + i);
+ vec_xst(vY8, 16, t3 + i);
+ vec_xst(vY9, 0, t4 + i);
+ vec_xst(vYA, 16, t4 + i);
+ vec_xst(vYB, 0, t5 + i);
+ vec_xst(vYC, 16, t5 + i);
+
+ vec_xst(vYD, 32, t0 + i);
+ vec_xst(vYE, 48, t0 + i);
+ vec_xst(vYF, 32, t1 + i);
+ vec_xst(vYG, 48, t1 + i);
+ vec_xst(vYH, 32, t2 + i);
+ vec_xst(vYI, 48, t2 + i);
+ vec_xst(vYJ, 32, t3 + i);
+ vec_xst(vYK, 48, t3 + i);
+ vec_xst(vYL, 32, t4 + i);
+ vec_xst(vYM, 48, t4 + i);
+ vec_xst(vYN, 32, t5 + i);
+ vec_xst(vYO, 48, t5 + i);
+ }
+ return;
+}
diff --git a/src/isa-l/erasure_code/ppc64le/gf_6vect_mad_vsx.c b/src/isa-l/erasure_code/ppc64le/gf_6vect_mad_vsx.c
new file mode 100644
index 000000000..43ea6c696
--- /dev/null
+++ b/src/isa-l/erasure_code/ppc64le/gf_6vect_mad_vsx.c
@@ -0,0 +1,142 @@
+#include "ec_base_vsx.h"
+
+void gf_6vect_mad_vsx(int len, int vec, int vec_i, unsigned char *gftbls,
+ unsigned char *src, unsigned char **dest)
+{
+ unsigned char *s, *t0, *t1, *t2, *t3, *t4, *t5;
+ vector unsigned char vX1, vX2, vX3, vX4;
+ vector unsigned char vY1, vY2, vY3, vY4, vY5, vY6, vY7, vY8, vY9, vYA, vYB, vYC;
+ vector unsigned char vYD, vYE, vYF, vYG, vYH, vYI, vYJ, vYK, vYL, vYM, vYN, vYO;
+ vector unsigned char vhi0, vlo0, vhi1, vlo1, vhi2, vlo2;
+ vector unsigned char vhi3, vlo3, vhi4, vlo4, vhi5, vlo5;
+ int i, head;
+
+ s = (unsigned char *)src;
+ t0 = (unsigned char *)dest[0];
+ t1 = (unsigned char *)dest[1];
+ t2 = (unsigned char *)dest[2];
+ t3 = (unsigned char *)dest[3];
+ t4 = (unsigned char *)dest[4];
+ t5 = (unsigned char *)dest[5];
+
+ head = len % 64;
+ if (head != 0) {
+ gf_vect_mad_base(head, vec, vec_i, &gftbls[0 * 32 * vec], src, t0);
+ gf_vect_mad_base(head, vec, vec_i, &gftbls[1 * 32 * vec], src, t1);
+ gf_vect_mad_base(head, vec, vec_i, &gftbls[2 * 32 * vec], src, t2);
+ gf_vect_mad_base(head, vec, vec_i, &gftbls[3 * 32 * vec], src, t3);
+ gf_vect_mad_base(head, vec, vec_i, &gftbls[4 * 32 * vec], src, t4);
+ gf_vect_mad_base(head, vec, vec_i, &gftbls[5 * 32 * vec], src, t5);
+ }
+
+ vlo0 = EC_vec_xl(0, gftbls + (((0 * vec) << 5) + (vec_i << 5)));
+ vhi0 = EC_vec_xl(16, gftbls + (((0 * vec) << 5) + (vec_i << 5)));
+ vlo1 = EC_vec_xl(0, gftbls + (((1 * vec) << 5) + (vec_i << 5)));
+ vhi1 = EC_vec_xl(16, gftbls + (((1 * vec) << 5) + (vec_i << 5)));
+ vlo2 = EC_vec_xl(0, gftbls + (((2 * vec) << 5) + (vec_i << 5)));
+ vhi2 = EC_vec_xl(16, gftbls + (((2 * vec) << 5) + (vec_i << 5)));
+ vlo3 = EC_vec_xl(0, gftbls + (((3 * vec) << 5) + (vec_i << 5)));
+ vhi3 = EC_vec_xl(16, gftbls + (((3 * vec) << 5) + (vec_i << 5)));
+ vlo4 = EC_vec_xl(0, gftbls + (((4 * vec) << 5) + (vec_i << 5)));
+ vhi4 = EC_vec_xl(16, gftbls + (((4 * vec) << 5) + (vec_i << 5)));
+ vlo5 = EC_vec_xl(0, gftbls + (((5 * vec) << 5) + (vec_i << 5)));
+ vhi5 = EC_vec_xl(16, gftbls + (((5 * vec) << 5) + (vec_i << 5)));
+
+ for (i = head; i < len - 63; i += 64) {
+ vX1 = vec_xl(0, s + i);
+ vX2 = vec_xl(16, s + i);
+ vX3 = vec_xl(32, s + i);
+ vX4 = vec_xl(48, s + i);
+
+ vY1 = vec_xl(0, t0 + i);
+ vY2 = vec_xl(16, t0 + i);
+ vYD = vec_xl(32, t0 + i);
+ vYE = vec_xl(48, t0 + i);
+
+ vY1 = vY1 ^ EC_vec_permxor(vhi0, vlo0, vX1);
+ vY2 = vY2 ^ EC_vec_permxor(vhi0, vlo0, vX2);
+ vYD = vYD ^ EC_vec_permxor(vhi0, vlo0, vX3);
+ vYE = vYE ^ EC_vec_permxor(vhi0, vlo0, vX4);
+
+ vec_xst(vY1, 0, t0 + i);
+ vec_xst(vY2, 16, t0 + i);
+ vec_xst(vYD, 32, t0 + i);
+ vec_xst(vYE, 48, t0 + i);
+
+ vY3 = vec_xl(0, t1 + i);
+ vY4 = vec_xl(16, t1 + i);
+ vYF = vec_xl(32, t1 + i);
+ vYG = vec_xl(48, t1 + i);
+
+ vY3 = vY3 ^ EC_vec_permxor(vhi1, vlo1, vX1);
+ vY4 = vY4 ^ EC_vec_permxor(vhi1, vlo1, vX2);
+ vYF = vYF ^ EC_vec_permxor(vhi1, vlo1, vX3);
+ vYG = vYG ^ EC_vec_permxor(vhi1, vlo1, vX4);
+
+ vec_xst(vY3, 0, t1 + i);
+ vec_xst(vY4, 16, t1 + i);
+ vec_xst(vYF, 32, t1 + i);
+ vec_xst(vYG, 48, t1 + i);
+
+ vY5 = vec_xl(0, t2 + i);
+ vY6 = vec_xl(16, t2 + i);
+ vYH = vec_xl(32, t2 + i);
+ vYI = vec_xl(48, t2 + i);
+
+ vY5 = vY5 ^ EC_vec_permxor(vhi2, vlo2, vX1);
+ vY6 = vY6 ^ EC_vec_permxor(vhi2, vlo2, vX2);
+ vYH = vYH ^ EC_vec_permxor(vhi2, vlo2, vX3);
+ vYI = vYI ^ EC_vec_permxor(vhi2, vlo2, vX4);
+
+ vY7 = vec_xl(0, t3 + i);
+ vY8 = vec_xl(16, t3 + i);
+ vYJ = vec_xl(32, t3 + i);
+ vYK = vec_xl(48, t3 + i);
+
+ vec_xst(vY5, 0, t2 + i);
+ vec_xst(vY6, 16, t2 + i);
+ vec_xst(vYH, 32, t2 + i);
+ vec_xst(vYI, 48, t2 + i);
+
+ vY7 = vY7 ^ EC_vec_permxor(vhi3, vlo3, vX1);
+ vY8 = vY8 ^ EC_vec_permxor(vhi3, vlo3, vX2);
+ vYJ = vYJ ^ EC_vec_permxor(vhi3, vlo3, vX3);
+ vYK = vYK ^ EC_vec_permxor(vhi3, vlo3, vX4);
+
+ vY9 = vec_xl(0, t4 + i);
+ vYA = vec_xl(16, t4 + i);
+ vYL = vec_xl(32, t4 + i);
+ vYM = vec_xl(48, t4 + i);
+
+ vec_xst(vY7, 0, t3 + i);
+ vec_xst(vY8, 16, t3 + i);
+ vec_xst(vYJ, 32, t3 + i);
+ vec_xst(vYK, 48, t3 + i);
+
+ vY9 = vY9 ^ EC_vec_permxor(vhi4, vlo4, vX1);
+ vYA = vYA ^ EC_vec_permxor(vhi4, vlo4, vX2);
+ vYL = vYL ^ EC_vec_permxor(vhi4, vlo4, vX3);
+ vYM = vYM ^ EC_vec_permxor(vhi4, vlo4, vX4);
+
+ vYB = vec_xl(0, t5 + i);
+ vYC = vec_xl(16, t5 + i);
+ vYN = vec_xl(32, t5 + i);
+ vYO = vec_xl(48, t5 + i);
+
+ vec_xst(vY9, 0, t4 + i);
+ vec_xst(vYA, 16, t4 + i);
+ vec_xst(vYL, 32, t4 + i);
+ vec_xst(vYM, 48, t4 + i);
+
+ vYB = vYB ^ EC_vec_permxor(vhi5, vlo5, vX1);
+ vYC = vYC ^ EC_vec_permxor(vhi5, vlo5, vX2);
+ vYN = vYN ^ EC_vec_permxor(vhi5, vlo5, vX3);
+ vYO = vYO ^ EC_vec_permxor(vhi5, vlo5, vX4);
+
+ vec_xst(vYB, 0, t5 + i);
+ vec_xst(vYC, 16, t5 + i);
+ vec_xst(vYN, 32, t5 + i);
+ vec_xst(vYO, 48, t5 + i);
+ }
+ return;
+}
diff --git a/src/isa-l/erasure_code/ppc64le/gf_vect_dot_prod_vsx.c b/src/isa-l/erasure_code/ppc64le/gf_vect_dot_prod_vsx.c
new file mode 100644
index 000000000..2f97e3421
--- /dev/null
+++ b/src/isa-l/erasure_code/ppc64le/gf_vect_dot_prod_vsx.c
@@ -0,0 +1,85 @@
+#include "ec_base_vsx.h"
+
+void gf_vect_dot_prod_vsx(int len, int vlen, unsigned char *gftbls,
+ unsigned char **src, unsigned char *dest)
+{
+ unsigned char *s, *t0;
+ vector unsigned char vX1, vY1;
+ vector unsigned char vX2, vY2;
+ vector unsigned char vX3, vY3;
+ vector unsigned char vX4, vY4;
+ vector unsigned char vX5, vY5;
+ vector unsigned char vX6, vY6;
+ vector unsigned char vX7, vY7;
+ vector unsigned char vX8, vY8;
+ vector unsigned char vhi0, vlo0;
+ int i, j, head;
+
+ if (vlen < 128) {
+ gf_vect_mul_vsx(len, &gftbls[0 * 32 * vlen], src[0], (unsigned char *)dest);
+
+ for (j = 1; j < vlen; j++) {
+ gf_vect_mad_vsx(len, vlen, j, gftbls, src[j], dest);
+ }
+ return;
+ }
+
+ t0 = (unsigned char *)dest;
+
+ head = len % 128;
+ if (head != 0) {
+ gf_vect_dot_prod_base(head, vlen, &gftbls[0 * 32 * vlen], src, t0);
+ }
+
+ for (i = head; i < len - 127; i += 128) {
+ vY1 = vY1 ^ vY1;
+ vY2 = vY2 ^ vY2;
+ vY3 = vY3 ^ vY3;
+ vY4 = vY4 ^ vY4;
+
+ vY5 = vY5 ^ vY5;
+ vY6 = vY6 ^ vY6;
+ vY7 = vY7 ^ vY7;
+ vY8 = vY8 ^ vY8;
+
+ unsigned char *g0 = &gftbls[0 * 32 * vlen];
+
+ for (j = 0; j < vlen; j++) {
+ s = (unsigned char *)src[j];
+ vX1 = vec_xl(0, s + i);
+ vX2 = vec_xl(16, s + i);
+ vX3 = vec_xl(32, s + i);
+ vX4 = vec_xl(48, s + i);
+
+ vlo0 = EC_vec_xl(0, g0);
+ vhi0 = EC_vec_xl(16, g0);
+
+ vX5 = vec_xl(64, s + i);
+ vX6 = vec_xl(80, s + i);
+ vX7 = vec_xl(96, s + i);
+ vX8 = vec_xl(112, s + i);
+
+ vY1 = vY1 ^ EC_vec_permxor(vhi0, vlo0, vX1);
+ vY2 = vY2 ^ EC_vec_permxor(vhi0, vlo0, vX2);
+ vY3 = vY3 ^ EC_vec_permxor(vhi0, vlo0, vX3);
+ vY4 = vY4 ^ EC_vec_permxor(vhi0, vlo0, vX4);
+
+ vY5 = vY5 ^ EC_vec_permxor(vhi0, vlo0, vX5);
+ vY6 = vY6 ^ EC_vec_permxor(vhi0, vlo0, vX6);
+ vY7 = vY7 ^ EC_vec_permxor(vhi0, vlo0, vX7);
+ vY8 = vY8 ^ EC_vec_permxor(vhi0, vlo0, vX8);
+
+ g0 += 32;
+ }
+ vec_xst(vY1, 0, t0 + i);
+ vec_xst(vY2, 16, t0 + i);
+ vec_xst(vY3, 32, t0 + i);
+ vec_xst(vY4, 48, t0 + i);
+
+ vec_xst(vY5, 64, t0 + i);
+ vec_xst(vY6, 80, t0 + i);
+ vec_xst(vY7, 96, t0 + i);
+ vec_xst(vY8, 112, t0 + i);
+ }
+ return;
+}
diff --git a/src/isa-l/erasure_code/ppc64le/gf_vect_mad_vsx.c b/src/isa-l/erasure_code/ppc64le/gf_vect_mad_vsx.c
new file mode 100644
index 000000000..a4810b96d
--- /dev/null
+++ b/src/isa-l/erasure_code/ppc64le/gf_vect_mad_vsx.c
@@ -0,0 +1,48 @@
+#include "ec_base_vsx.h"
+
+void gf_vect_mad_vsx(int len, int vec, int vec_i, unsigned char *gftbls,
+ unsigned char *src, unsigned char *dest)
+{
+ unsigned char *s, *t0;
+ vector unsigned char vX1, vY1;
+ vector unsigned char vX2, vY2;
+ vector unsigned char vX3, vY3;
+ vector unsigned char vX4, vY4;
+ vector unsigned char vhi0, vlo0;
+ int i, head;
+
+ s = (unsigned char *)src;
+ t0 = (unsigned char *)dest;
+
+ head = len % 64;
+ if (head != 0) {
+ gf_vect_mad_base(head, vec, vec_i, &gftbls[0 * 32 * vec], src, dest);
+ }
+
+ vlo0 = EC_vec_xl(0, gftbls + (((0 * vec) << 5) + (vec_i << 5)));
+ vhi0 = EC_vec_xl(16, gftbls + (((0 * vec) << 5) + (vec_i << 5)));
+
+ for (i = head; i < len - 63; i += 64) {
+ vX1 = vec_xl(0, s + i);
+ vX2 = vec_xl(16, s + i);
+ vX3 = vec_xl(32, s + i);
+ vX4 = vec_xl(48, s + i);
+
+ vY1 = vec_xl(0, t0 + i);
+ vY2 = vec_xl(16, t0 + i);
+ vY3 = vec_xl(32, t0 + i);
+ vY4 = vec_xl(48, t0 + i);
+
+ vY1 = vY1 ^ EC_vec_permxor(vhi0, vlo0, vX1);
+ vY2 = vY2 ^ EC_vec_permxor(vhi0, vlo0, vX2);
+ vY3 = vY3 ^ EC_vec_permxor(vhi0, vlo0, vX3);
+ vY4 = vY4 ^ EC_vec_permxor(vhi0, vlo0, vX4);
+
+ vec_xst(vY1, 0, t0 + i);
+ vec_xst(vY2, 16, t0 + i);
+ vec_xst(vY3, 32, t0 + i);
+ vec_xst(vY4, 48, t0 + i);
+ }
+
+ return;
+}
diff --git a/src/isa-l/erasure_code/ppc64le/gf_vect_mul_vsx.c b/src/isa-l/erasure_code/ppc64le/gf_vect_mul_vsx.c
new file mode 100644
index 000000000..3e610a104
--- /dev/null
+++ b/src/isa-l/erasure_code/ppc64le/gf_vect_mul_vsx.c
@@ -0,0 +1,61 @@
+#include "ec_base_vsx.h"
+
+void gf_vect_mul_vsx(int len, unsigned char *gftbl, unsigned char *src, unsigned char *dest)
+{
+ unsigned char *s, *t0;
+ vector unsigned char vX1, vY1;
+ vector unsigned char vX2, vY2;
+ vector unsigned char vX3, vY3;
+ vector unsigned char vX4, vY4;
+ vector unsigned char vX5, vY5;
+ vector unsigned char vX6, vY6;
+ vector unsigned char vX7, vY7;
+ vector unsigned char vX8, vY8;
+ vector unsigned char vhi0, vlo0;
+ int i, head;
+
+ s = (unsigned char *)src;
+ t0 = (unsigned char *)dest;
+
+ head = len % 128;
+ if (head != 0) {
+ gf_vect_mul_base(head, gftbl, src, dest);
+ }
+
+ vlo0 = EC_vec_xl(0, gftbl);
+ vhi0 = EC_vec_xl(16, gftbl);
+
+ for (i = head; i < len - 127; i += 128) {
+ vX1 = vec_xl(0, s + i);
+ vX2 = vec_xl(16, s + i);
+ vX3 = vec_xl(32, s + i);
+ vX4 = vec_xl(48, s + i);
+
+ vX5 = vec_xl(64, s + i);
+ vX6 = vec_xl(80, s + i);
+ vX7 = vec_xl(96, s + i);
+ vX8 = vec_xl(112, s + i);
+
+ vY1 = EC_vec_permxor(vhi0, vlo0, vX1);
+ vY2 = EC_vec_permxor(vhi0, vlo0, vX2);
+ vY3 = EC_vec_permxor(vhi0, vlo0, vX3);
+ vY4 = EC_vec_permxor(vhi0, vlo0, vX4);
+
+ vY5 = EC_vec_permxor(vhi0, vlo0, vX5);
+ vY6 = EC_vec_permxor(vhi0, vlo0, vX6);
+ vY7 = EC_vec_permxor(vhi0, vlo0, vX7);
+ vY8 = EC_vec_permxor(vhi0, vlo0, vX8);
+
+ vec_xst(vY1, 0, t0 + i);
+ vec_xst(vY2, 16, t0 + i);
+ vec_xst(vY3, 32, t0 + i);
+ vec_xst(vY4, 48, t0 + i);
+
+ vec_xst(vY5, 64, t0 + i);
+ vec_xst(vY6, 80, t0 + i);
+ vec_xst(vY7, 96, t0 + i);
+ vec_xst(vY8, 112, t0 + i);
+ }
+
+ return;
+}
diff --git a/src/isa-l/examples/ec/Makefile.am b/src/isa-l/examples/ec/Makefile.am
new file mode 100644
index 000000000..e7121af83
--- /dev/null
+++ b/src/isa-l/examples/ec/Makefile.am
@@ -0,0 +1,33 @@
+########################################################################
+# Copyright(c) 2011-2018 Intel Corporation All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Intel Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+########################################################################
+
+src_include += -I $(srcdir)/examples/ec
+
+examples += examples/ec/ec_simple_example
+examples += examples/ec/ec_piggyback_example
diff --git a/src/isa-l/examples/ec/Makefile.unx b/src/isa-l/examples/ec/Makefile.unx
new file mode 100644
index 000000000..b04cfdfe6
--- /dev/null
+++ b/src/isa-l/examples/ec/Makefile.unx
@@ -0,0 +1,8 @@
+
+default: ex
+
+include ../../erasure_code/Makefile.am
+include Makefile.am
+include ../../make.inc
+
+VPATH = . ../../erasure_code ../../include
diff --git a/src/isa-l/examples/ec/ec_piggyback_example.c b/src/isa-l/examples/ec/ec_piggyback_example.c
new file mode 100644
index 000000000..e19abc067
--- /dev/null
+++ b/src/isa-l/examples/ec/ec_piggyback_example.c
@@ -0,0 +1,506 @@
+/**********************************************************************
+ Copyright(c) 2011-2018 Intel Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <getopt.h>
+#include "erasure_code.h" // use <isa-l.h> instead when linking against installed
+#include "test.h"
+
+#define MMAX 255
+#define KMAX 255
+
+typedef unsigned char u8;
+int verbose = 0;
+
+int usage(void)
+{
+ fprintf(stderr,
+ "Usage: ec_piggyback_example [options]\n"
+ " -h Help\n"
+ " -k <val> Number of source fragments\n"
+ " -p <val> Number of parity fragments\n"
+ " -l <val> Length of fragments\n"
+ " -e <val> Simulate erasure on frag index val. Zero based. Can be repeated.\n"
+ " -v Verbose\n"
+ " -b Run timed benchmark\n"
+ " -s Toggle use of sparse matrix opt\n"
+ " -r <seed> Pick random (k, p) with seed\n");
+ exit(0);
+}
+
+// Cauchy-based matrix
+void gf_gen_full_pb_cauchy_matrix(u8 * a, int m, int k)
+{
+ int i, j, p = m - k;
+
+ // Identity matrix in top k x k to indicate a symetric code
+ memset(a, 0, k * m);
+ for (i = 0; i < k; i++)
+ a[k * i + i] = 1;
+
+ for (i = k; i < (k + p / 2); i++) {
+ for (j = 0; j < k / 2; j++)
+ a[k * i + j] = gf_inv(i ^ j);
+ for (; j < k; j++)
+ a[k * i + j] = 0;
+ }
+ for (; i < m; i++) {
+ for (j = 0; j < k / 2; j++)
+ a[k * i + j] = 0;
+ for (; j < k; j++)
+ a[k * i + j] = gf_inv((i - p / 2) ^ (j - k / 2));
+ }
+
+ // Fill in mixture of B parity depending on a few localized A sources
+ int r = 0, c = 0;
+ int repeat_len = k / (p - 2);
+ int parity_rows = p / 2;
+
+ for (i = 1 + k + parity_rows; i < m; i++, r++) {
+ if (r == (parity_rows - 1) - ((k / 2 % (parity_rows - 1))))
+ repeat_len++;
+
+ for (j = 0; j < repeat_len; j++, c++)
+ a[k * i + c] = gf_inv((k + 1) ^ c);
+ }
+}
+
+// Vandermonde based matrix - not recommended due to limits when invertable
+void gf_gen_full_pb_vand_matrix(u8 * a, int m, int k)
+{
+ int i, j, p = m - k;
+ unsigned char q, gen = 1;
+
+ // Identity matrix in top k x k to indicate a symetric code
+ memset(a, 0, k * m);
+ for (i = 0; i < k; i++)
+ a[k * i + i] = 1;
+
+ for (i = k; i < (k + (p / 2)); i++) {
+ q = 1;
+ for (j = 0; j < k / 2; j++) {
+ a[k * i + j] = q;
+ q = gf_mul(q, gen);
+ }
+ for (; j < k; j++)
+ a[k * i + j] = 0;
+ gen = gf_mul(gen, 2);
+ }
+ gen = 1;
+ for (; i < m; i++) {
+ q = 1;
+ for (j = 0; j < k / 2; j++) {
+ a[k * i + j] = 0;
+ }
+ for (; j < k; j++) {
+ a[k * i + j] = q;
+ q = gf_mul(q, gen);
+ }
+ gen = gf_mul(gen, 2);
+ }
+
+ // Fill in mixture of B parity depending on a few localized A sources
+ int r = 0, c = 0;
+ int repeat_len = k / (p - 2);
+ int parity_rows = p / 2;
+
+ for (i = 1 + k + parity_rows; i < m; i++, r++) {
+ if (r == (parity_rows - 1) - ((k / 2 % (parity_rows - 1))))
+ repeat_len++;
+
+ for (j = 0; j < repeat_len; j++)
+ a[k * i + c++] = 1;
+ }
+}
+
+void print_matrix(int m, int k, unsigned char *s, const char *msg)
+{
+ int i, j;
+
+ printf("%s:\n", msg);
+ for (i = 0; i < m; i++) {
+ printf("%3d- ", i);
+ for (j = 0; j < k; j++) {
+ printf(" %2x", 0xff & s[j + (i * k)]);
+ }
+ printf("\n");
+ }
+ printf("\n");
+}
+
+void print_list(int n, unsigned char *s, const char *msg)
+{
+ int i;
+ if (!verbose)
+ return;
+
+ printf("%s: ", msg);
+ for (i = 0; i < n; i++)
+ printf(" %d", s[i]);
+ printf("\n");
+}
+
+static int gf_gen_decode_matrix(u8 * encode_matrix,
+ u8 * decode_matrix,
+ u8 * invert_matrix,
+ u8 * temp_matrix,
+ u8 * decode_index,
+ u8 * frag_err_list, int nerrs, int k, int m);
+
+int main(int argc, char *argv[])
+{
+ int i, j, m, c, e, ret;
+ int k = 10, p = 4, len = 8 * 1024; // Default params
+ int nerrs = 0;
+ int benchmark = 0;
+ int sparse_matrix_opt = 1;
+
+ // Fragment buffer pointers
+ u8 *frag_ptrs[MMAX];
+ u8 *parity_ptrs[KMAX];
+ u8 *recover_srcs[KMAX];
+ u8 *recover_outp[KMAX];
+ u8 frag_err_list[MMAX];
+
+ // Coefficient matrices
+ u8 *encode_matrix, *decode_matrix;
+ u8 *invert_matrix, *temp_matrix;
+ u8 *g_tbls;
+ u8 decode_index[MMAX];
+
+ if (argc == 1)
+ for (i = 0; i < p; i++)
+ frag_err_list[nerrs++] = rand() % (k + p);
+
+ while ((c = getopt(argc, argv, "k:p:l:e:r:hvbs")) != -1) {
+ switch (c) {
+ case 'k':
+ k = atoi(optarg);
+ break;
+ case 'p':
+ p = atoi(optarg);
+ break;
+ case 'l':
+ len = atoi(optarg);
+ if (len < 0)
+ usage();
+ break;
+ case 'e':
+ e = atoi(optarg);
+ frag_err_list[nerrs++] = e;
+ break;
+ case 'r':
+ srand(atoi(optarg));
+ k = (rand() % MMAX) / 4;
+ k = (k < 2) ? 2 : k;
+ p = (rand() % (MMAX - k)) / 4;
+ p = (p < 2) ? 2 : p;
+ for (i = 0; i < k && nerrs < p; i++)
+ if (rand() & 1)
+ frag_err_list[nerrs++] = i;
+ break;
+ case 'v':
+ verbose++;
+ break;
+ case 'b':
+ benchmark = 1;
+ break;
+ case 's':
+ sparse_matrix_opt = !sparse_matrix_opt;
+ break;
+ case 'h':
+ default:
+ usage();
+ break;
+ }
+ }
+ m = k + p;
+
+ // Check for valid parameters
+ if (m > (MMAX / 2) || k > (KMAX / 2) || m < 0 || p < 2 || k < 1) {
+ printf(" Input test parameter error m=%d, k=%d, p=%d, erasures=%d\n",
+ m, k, p, nerrs);
+ usage();
+ }
+ if (nerrs > p) {
+ printf(" Number of erasures chosen exceeds power of code erasures=%d p=%d\n",
+ nerrs, p);
+ }
+ for (i = 0; i < nerrs; i++) {
+ if (frag_err_list[i] >= m)
+ printf(" fragment %d not in range\n", frag_err_list[i]);
+ }
+
+ printf("ec_piggyback_example:\n");
+
+ /*
+ * One simple way to implement piggyback codes is to keep a 2x wide matrix
+ * that covers the how each parity is related to both A and B sources. This
+ * keeps it easy to generalize in parameters m,k and the resulting sparse
+ * matrix multiplication can be optimized by pre-removal of zero items.
+ */
+
+ int k2 = 2 * k;
+ int p2 = 2 * p;
+ int m2 = k2 + p2;
+ int nerrs2 = nerrs;
+
+ encode_matrix = malloc(m2 * k2);
+ decode_matrix = malloc(m2 * k2);
+ invert_matrix = malloc(m2 * k2);
+ temp_matrix = malloc(m2 * k2);
+ g_tbls = malloc(k2 * p2 * 32);
+
+ if (encode_matrix == NULL || decode_matrix == NULL
+ || invert_matrix == NULL || temp_matrix == NULL || g_tbls == NULL) {
+ printf("Test failure! Error with malloc\n");
+ return -1;
+ }
+ // Allocate the src fragments
+ for (i = 0; i < k; i++) {
+ if (NULL == (frag_ptrs[i] = malloc(len))) {
+ printf("alloc error: Fail\n");
+ return -1;
+ }
+ }
+ // Allocate the parity fragments
+ for (i = 0; i < p2; i++) {
+ if (NULL == (parity_ptrs[i] = malloc(len / 2))) {
+ printf("alloc error: Fail\n");
+ return -1;
+ }
+ }
+
+ // Allocate buffers for recovered data
+ for (i = 0; i < p2; i++) {
+ if (NULL == (recover_outp[i] = malloc(len / 2))) {
+ printf("alloc error: Fail\n");
+ return -1;
+ }
+ }
+
+ // Fill sources with random data
+ for (i = 0; i < k; i++)
+ for (j = 0; j < len; j++)
+ frag_ptrs[i][j] = rand();
+
+ printf(" encode (m,k,p)=(%d,%d,%d) len=%d\n", m, k, p, len);
+
+ // Pick an encode matrix.
+ gf_gen_full_pb_cauchy_matrix(encode_matrix, m2, k2);
+
+ if (verbose)
+ print_matrix(m2, k2, encode_matrix, "encode matrix");
+
+ // Initialize g_tbls from encode matrix
+ ec_init_tables(k2, p2, &encode_matrix[k2 * k2], g_tbls);
+
+ // Fold A and B into single list of fragments
+ for (i = 0; i < k; i++)
+ frag_ptrs[i + k] = &frag_ptrs[i][len / 2];
+
+ if (!sparse_matrix_opt) {
+ // Standard encode using no assumptions on the encode matrix
+
+ // Generate EC parity blocks from sources
+ ec_encode_data(len / 2, k2, p2, g_tbls, frag_ptrs, parity_ptrs);
+
+ if (benchmark) {
+ struct perf start;
+ BENCHMARK(&start, BENCHMARK_TIME,
+ ec_encode_data(len / 2, k2, p2, g_tbls, frag_ptrs,
+ parity_ptrs));
+ printf("ec_piggyback_encode_std: ");
+ perf_print(start, m2 * len / 2);
+ }
+ } else {
+ // Sparse matrix optimization - use fact that input matrix is sparse
+
+ // Keep an encode matrix with some zero elements removed
+ u8 *encode_matrix_faster, *g_tbls_faster;
+ encode_matrix_faster = malloc(m * k);
+ g_tbls_faster = malloc(k * p * 32);
+ if (encode_matrix_faster == NULL || g_tbls_faster == NULL) {
+ printf("Test failure! Error with malloc\n");
+ return -1;
+ }
+
+ /*
+ * Pack with only the part that we know are non-zero. Alternatively
+ * we could search and keep track of non-zero elements but for
+ * simplicity we just skip the lower quadrant.
+ */
+ for (i = k, j = k2; i < m; i++, j++)
+ memcpy(&encode_matrix_faster[k * i], &encode_matrix[k2 * j], k);
+
+ if (verbose) {
+ print_matrix(p, k, &encode_matrix_faster[k * k],
+ "encode via sparse-opt");
+ print_matrix(p2 / 2, k2, &encode_matrix[(k2 + p2 / 2) * k2],
+ "encode via sparse-opt");
+ }
+ // Initialize g_tbls from encode matrix
+ ec_init_tables(k, p, &encode_matrix_faster[k * k], g_tbls_faster);
+
+ // Generate EC parity blocks from sources
+ ec_encode_data(len / 2, k, p, g_tbls_faster, frag_ptrs, parity_ptrs);
+ ec_encode_data(len / 2, k2, p, &g_tbls[k2 * p * 32], frag_ptrs,
+ &parity_ptrs[p]);
+
+ if (benchmark) {
+ struct perf start;
+ BENCHMARK(&start, BENCHMARK_TIME,
+ ec_encode_data(len / 2, k, p, g_tbls_faster, frag_ptrs,
+ parity_ptrs);
+ ec_encode_data(len / 2, k2, p, &g_tbls[k2 * p * 32],
+ frag_ptrs, &parity_ptrs[p]));
+ printf("ec_piggyback_encode_sparse: ");
+ perf_print(start, m2 * len / 2);
+ }
+ }
+
+ if (nerrs <= 0)
+ return 0;
+
+ printf(" recover %d fragments\n", nerrs);
+
+ // Set frag pointers to correspond to parity
+ for (i = k2; i < m2; i++)
+ frag_ptrs[i] = parity_ptrs[i - k2];
+
+ print_list(nerrs2, frag_err_list, " frag err list");
+
+ // Find a decode matrix to regenerate all erasures from remaining frags
+ ret = gf_gen_decode_matrix(encode_matrix, decode_matrix,
+ invert_matrix, temp_matrix, decode_index, frag_err_list,
+ nerrs2, k2, m2);
+
+ if (ret != 0) {
+ printf("Fail on generate decode matrix\n");
+ return -1;
+ }
+ // Pack recovery array pointers as list of valid fragments
+ for (i = 0; i < k2; i++)
+ if (decode_index[i] < k2)
+ recover_srcs[i] = frag_ptrs[decode_index[i]];
+ else
+ recover_srcs[i] = parity_ptrs[decode_index[i] - k2];
+
+ print_list(k2, decode_index, " decode index");
+
+ // Recover data
+ ec_init_tables(k2, nerrs2, decode_matrix, g_tbls);
+ ec_encode_data(len / 2, k2, nerrs2, g_tbls, recover_srcs, recover_outp);
+
+ if (benchmark) {
+ struct perf start;
+ BENCHMARK(&start, BENCHMARK_TIME,
+ ec_encode_data(len / 2, k2, nerrs2, g_tbls, recover_srcs,
+ recover_outp));
+ printf("ec_piggyback_decode: ");
+ perf_print(start, (k2 + nerrs2) * len / 2);
+ }
+ // Check that recovered buffers are the same as original
+ printf(" check recovery of block {");
+ for (i = 0; i < nerrs2; i++) {
+ printf(" %d", frag_err_list[i]);
+ if (memcmp(recover_outp[i], frag_ptrs[frag_err_list[i]], len / 2)) {
+ printf(" Fail erasure recovery %d, frag %d\n", i, frag_err_list[i]);
+ return -1;
+ }
+ }
+ printf(" } done all: Pass\n");
+
+ return 0;
+}
+
+// Generate decode matrix from encode matrix and erasure list
+
+static int gf_gen_decode_matrix(u8 * encode_matrix,
+ u8 * decode_matrix,
+ u8 * invert_matrix,
+ u8 * temp_matrix,
+ u8 * decode_index, u8 * frag_err_list, int nerrs, int k, int m)
+{
+ int i, j, p, r;
+ int nsrcerrs = 0;
+ u8 s, *b = temp_matrix;
+ u8 frag_in_err[MMAX];
+
+ memset(frag_in_err, 0, sizeof(frag_in_err));
+
+ // Order the fragments in erasure for easier sorting
+ for (i = 0; i < nerrs; i++) {
+ if (frag_err_list[i] < k)
+ nsrcerrs++;
+ frag_in_err[frag_err_list[i]] = 1;
+ }
+
+ // Construct b (matrix that encoded remaining frags) by removing erased rows
+ for (i = 0, r = 0; i < k; i++, r++) {
+ while (frag_in_err[r])
+ r++;
+ for (j = 0; j < k; j++)
+ b[k * i + j] = encode_matrix[k * r + j];
+ decode_index[i] = r;
+ }
+ if (verbose > 1)
+ print_matrix(k, k, b, "matrix to invert");
+
+ // Invert matrix to get recovery matrix
+ if (gf_invert_matrix(b, invert_matrix, k) < 0)
+ return -1;
+
+ if (verbose > 2)
+ print_matrix(k, k, invert_matrix, "matrix inverted");
+
+ // Get decode matrix with only wanted recovery rows
+ for (i = 0; i < nsrcerrs; i++) {
+ for (j = 0; j < k; j++) {
+ decode_matrix[k * i + j] = invert_matrix[k * frag_err_list[i] + j];
+ }
+ }
+
+ // For non-src (parity) erasures need to multiply encode matrix * invert
+ for (p = nsrcerrs; p < nerrs; p++) {
+ for (i = 0; i < k; i++) {
+ s = 0;
+ for (j = 0; j < k; j++)
+ s ^= gf_mul(invert_matrix[j * k + i],
+ encode_matrix[k * frag_err_list[p] + j]);
+
+ decode_matrix[k * p + i] = s;
+ }
+ }
+ if (verbose > 1)
+ print_matrix(nerrs, k, decode_matrix, "decode matrix");
+ return 0;
+}
diff --git a/src/isa-l/examples/ec/ec_simple_example.c b/src/isa-l/examples/ec/ec_simple_example.c
new file mode 100644
index 000000000..82efa6b48
--- /dev/null
+++ b/src/isa-l/examples/ec/ec_simple_example.c
@@ -0,0 +1,277 @@
+/**********************************************************************
+ Copyright(c) 2011-2018 Intel Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <getopt.h>
+#include "erasure_code.h" // use <isa-l.h> instead when linking against installed
+
+#define MMAX 255
+#define KMAX 255
+
+typedef unsigned char u8;
+
+int usage(void)
+{
+ fprintf(stderr,
+ "Usage: ec_simple_example [options]\n"
+ " -h Help\n"
+ " -k <val> Number of source fragments\n"
+ " -p <val> Number of parity fragments\n"
+ " -l <val> Length of fragments\n"
+ " -e <val> Simulate erasure on frag index val. Zero based. Can be repeated.\n"
+ " -r <seed> Pick random (k, p) with seed\n");
+ exit(0);
+}
+
+static int gf_gen_decode_matrix_simple(u8 * encode_matrix,
+ u8 * decode_matrix,
+ u8 * invert_matrix,
+ u8 * temp_matrix,
+ u8 * decode_index,
+ u8 * frag_err_list, int nerrs, int k, int m);
+
+int main(int argc, char *argv[])
+{
+ int i, j, m, c, e, ret;
+ int k = 10, p = 4, len = 8 * 1024; // Default params
+ int nerrs = 0;
+
+ // Fragment buffer pointers
+ u8 *frag_ptrs[MMAX];
+ u8 *recover_srcs[KMAX];
+ u8 *recover_outp[KMAX];
+ u8 frag_err_list[MMAX];
+
+ // Coefficient matrices
+ u8 *encode_matrix, *decode_matrix;
+ u8 *invert_matrix, *temp_matrix;
+ u8 *g_tbls;
+ u8 decode_index[MMAX];
+
+ if (argc == 1)
+ for (i = 0; i < p; i++)
+ frag_err_list[nerrs++] = rand() % (k + p);
+
+ while ((c = getopt(argc, argv, "k:p:l:e:r:h")) != -1) {
+ switch (c) {
+ case 'k':
+ k = atoi(optarg);
+ break;
+ case 'p':
+ p = atoi(optarg);
+ break;
+ case 'l':
+ len = atoi(optarg);
+ if (len < 0)
+ usage();
+ break;
+ case 'e':
+ e = atoi(optarg);
+ frag_err_list[nerrs++] = e;
+ break;
+ case 'r':
+ srand(atoi(optarg));
+ k = (rand() % (MMAX - 1)) + 1; // Pick k {1 to MMAX - 1}
+ p = (rand() % (MMAX - k)) + 1; // Pick p {1 to MMAX - k}
+
+ for (i = 0; i < k + p && nerrs < p; i++)
+ if (rand() & 1)
+ frag_err_list[nerrs++] = i;
+ break;
+ case 'h':
+ default:
+ usage();
+ break;
+ }
+ }
+ m = k + p;
+
+ // Check for valid parameters
+ if (m > MMAX || k > KMAX || m < 0 || p < 1 || k < 1) {
+ printf(" Input test parameter error m=%d, k=%d, p=%d, erasures=%d\n",
+ m, k, p, nerrs);
+ usage();
+ }
+ if (nerrs > p) {
+ printf(" Number of erasures chosen exceeds power of code erasures=%d p=%d\n",
+ nerrs, p);
+ usage();
+ }
+ for (i = 0; i < nerrs; i++) {
+ if (frag_err_list[i] >= m) {
+ printf(" fragment %d not in range\n", frag_err_list[i]);
+ usage();
+ }
+ }
+
+ printf("ec_simple_example:\n");
+
+ // Allocate coding matrices
+ encode_matrix = malloc(m * k);
+ decode_matrix = malloc(m * k);
+ invert_matrix = malloc(m * k);
+ temp_matrix = malloc(m * k);
+ g_tbls = malloc(k * p * 32);
+
+ if (encode_matrix == NULL || decode_matrix == NULL
+ || invert_matrix == NULL || temp_matrix == NULL || g_tbls == NULL) {
+ printf("Test failure! Error with malloc\n");
+ return -1;
+ }
+ // Allocate the src & parity buffers
+ for (i = 0; i < m; i++) {
+ if (NULL == (frag_ptrs[i] = malloc(len))) {
+ printf("alloc error: Fail\n");
+ return -1;
+ }
+ }
+
+ // Allocate buffers for recovered data
+ for (i = 0; i < p; i++) {
+ if (NULL == (recover_outp[i] = malloc(len))) {
+ printf("alloc error: Fail\n");
+ return -1;
+ }
+ }
+
+ // Fill sources with random data
+ for (i = 0; i < k; i++)
+ for (j = 0; j < len; j++)
+ frag_ptrs[i][j] = rand();
+
+ printf(" encode (m,k,p)=(%d,%d,%d) len=%d\n", m, k, p, len);
+
+ // Pick an encode matrix. A Cauchy matrix is a good choice as even
+ // large k are always invertable keeping the recovery rule simple.
+ gf_gen_cauchy1_matrix(encode_matrix, m, k);
+
+ // Initialize g_tbls from encode matrix
+ ec_init_tables(k, p, &encode_matrix[k * k], g_tbls);
+
+ // Generate EC parity blocks from sources
+ ec_encode_data(len, k, p, g_tbls, frag_ptrs, &frag_ptrs[k]);
+
+ if (nerrs <= 0)
+ return 0;
+
+ printf(" recover %d fragments\n", nerrs);
+
+ // Find a decode matrix to regenerate all erasures from remaining frags
+ ret = gf_gen_decode_matrix_simple(encode_matrix, decode_matrix,
+ invert_matrix, temp_matrix, decode_index,
+ frag_err_list, nerrs, k, m);
+ if (ret != 0) {
+ printf("Fail on generate decode matrix\n");
+ return -1;
+ }
+ // Pack recovery array pointers as list of valid fragments
+ for (i = 0; i < k; i++)
+ recover_srcs[i] = frag_ptrs[decode_index[i]];
+
+ // Recover data
+ ec_init_tables(k, nerrs, decode_matrix, g_tbls);
+ ec_encode_data(len, k, nerrs, g_tbls, recover_srcs, recover_outp);
+
+ // Check that recovered buffers are the same as original
+ printf(" check recovery of block {");
+ for (i = 0; i < nerrs; i++) {
+ printf(" %d", frag_err_list[i]);
+ if (memcmp(recover_outp[i], frag_ptrs[frag_err_list[i]], len)) {
+ printf(" Fail erasure recovery %d, frag %d\n", i, frag_err_list[i]);
+ return -1;
+ }
+ }
+
+ printf(" } done all: Pass\n");
+ return 0;
+}
+
+/*
+ * Generate decode matrix from encode matrix and erasure list
+ *
+ */
+
+static int gf_gen_decode_matrix_simple(u8 * encode_matrix,
+ u8 * decode_matrix,
+ u8 * invert_matrix,
+ u8 * temp_matrix,
+ u8 * decode_index, u8 * frag_err_list, int nerrs, int k,
+ int m)
+{
+ int i, j, p, r;
+ int nsrcerrs = 0;
+ u8 s, *b = temp_matrix;
+ u8 frag_in_err[MMAX];
+
+ memset(frag_in_err, 0, sizeof(frag_in_err));
+
+ // Order the fragments in erasure for easier sorting
+ for (i = 0; i < nerrs; i++) {
+ if (frag_err_list[i] < k)
+ nsrcerrs++;
+ frag_in_err[frag_err_list[i]] = 1;
+ }
+
+ // Construct b (matrix that encoded remaining frags) by removing erased rows
+ for (i = 0, r = 0; i < k; i++, r++) {
+ while (frag_in_err[r])
+ r++;
+ for (j = 0; j < k; j++)
+ b[k * i + j] = encode_matrix[k * r + j];
+ decode_index[i] = r;
+ }
+
+ // Invert matrix to get recovery matrix
+ if (gf_invert_matrix(b, invert_matrix, k) < 0)
+ return -1;
+
+ // Get decode matrix with only wanted recovery rows
+ for (i = 0; i < nerrs; i++) {
+ if (frag_err_list[i] < k) // A src err
+ for (j = 0; j < k; j++)
+ decode_matrix[k * i + j] =
+ invert_matrix[k * frag_err_list[i] + j];
+ }
+
+ // For non-src (parity) erasures need to multiply encode matrix * invert
+ for (p = 0; p < nerrs; p++) {
+ if (frag_err_list[p] >= k) { // A parity err
+ for (i = 0; i < k; i++) {
+ s = 0;
+ for (j = 0; j < k; j++)
+ s ^= gf_mul(invert_matrix[j * k + i],
+ encode_matrix[k * frag_err_list[p] + j]);
+ decode_matrix[k * p + i] = s;
+ }
+ }
+ }
+ return 0;
+}
diff --git a/src/isa-l/igzip/Makefile.am b/src/isa-l/igzip/Makefile.am
new file mode 100644
index 000000000..bec359ab5
--- /dev/null
+++ b/src/isa-l/igzip/Makefile.am
@@ -0,0 +1,144 @@
+########################################################################
+# Copyright(c) 2011-2016 Intel Corporation All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Intel Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+########################################################################
+
+lsrc += igzip/igzip.c \
+ igzip/hufftables_c.c \
+ igzip/igzip_base.c \
+ igzip/igzip_icf_base.c \
+ igzip/adler32_base.c \
+ igzip/flatten_ll.c \
+ igzip/encode_df.c \
+ igzip/igzip_icf_body.c
+
+lsrc_base_aliases += igzip/igzip_base_aliases.c igzip/proc_heap_base.c
+lsrc_x86_32 += igzip/igzip_base_aliases.c igzip/proc_heap_base.c
+lsrc_ppc64le += igzip/igzip_base_aliases.c igzip/proc_heap_base.c
+
+lsrc_aarch64 += igzip/aarch64/igzip_inflate_multibinary_arm64.S \
+ igzip/aarch64/igzip_multibinary_arm64.S \
+ igzip/aarch64/igzip_isal_adler32_neon.S \
+ igzip/aarch64/igzip_multibinary_aarch64_dispatcher.c \
+ igzip/aarch64/igzip_deflate_body_aarch64.S \
+ igzip/aarch64/igzip_deflate_finish_aarch64.S \
+ igzip/aarch64/isal_deflate_icf_body_hash_hist.S \
+ igzip/aarch64/isal_deflate_icf_finish_hash_hist.S \
+ igzip/aarch64/igzip_set_long_icf_fg.S \
+ igzip/aarch64/encode_df.S \
+ igzip/aarch64/isal_update_histogram.S \
+ igzip/aarch64/gen_icf_map.S \
+ igzip/aarch64/igzip_deflate_hash_aarch64.S \
+ igzip/aarch64/igzip_decode_huffman_code_block_aarch64.S \
+ igzip/proc_heap_base.c
+
+lsrc_x86_64 += igzip/igzip_body.asm \
+ igzip/igzip_finish.asm \
+ igzip/igzip_icf_body_h1_gr_bt.asm \
+ igzip/igzip_icf_finish.asm \
+ igzip/rfc1951_lookup.asm \
+ igzip/adler32_sse.asm \
+ igzip/adler32_avx2_4.asm \
+ igzip/igzip_multibinary.asm \
+ igzip/igzip_update_histogram_01.asm \
+ igzip/igzip_update_histogram_04.asm \
+ igzip/igzip_decode_block_stateless_01.asm \
+ igzip/igzip_decode_block_stateless_04.asm \
+ igzip/igzip_inflate_multibinary.asm \
+ igzip/encode_df_04.asm \
+ igzip/encode_df_06.asm \
+ igzip/proc_heap.asm \
+ igzip/igzip_deflate_hash.asm \
+ igzip/igzip_gen_icf_map_lh1_06.asm \
+ igzip/igzip_gen_icf_map_lh1_04.asm \
+ igzip/igzip_set_long_icf_fg_04.asm \
+ igzip/igzip_set_long_icf_fg_06.asm
+
+src_include += -I $(srcdir)/igzip
+extern_hdrs += include/igzip_lib.h
+
+check_tests += igzip/igzip_rand_test
+check_tests += igzip/igzip_wrapper_hdr_test
+check_tests += igzip/checksum32_funcs_test
+
+other_tests += igzip/igzip_file_perf igzip/igzip_hist_perf
+other_tests += igzip/igzip_perf
+other_tests += igzip/igzip_semi_dyn_file_perf
+other_tests += igzip/igzip_build_hash_table_perf
+
+other_src += igzip/bitbuf2.asm \
+ igzip/data_struct2.asm \
+ igzip/inflate_data_structs.asm \
+ igzip/igzip_body.asm \
+ igzip/igzip_finish.asm \
+ igzip/lz0a_const.asm \
+ igzip/options.asm \
+ igzip/stdmac.asm \
+ igzip/igzip_compare_types.asm \
+ igzip/bitbuf2.h \
+ igzip/repeated_char_result.h \
+ igzip/igzip_update_histogram.asm \
+ igzip/huffman.asm \
+ include/reg_sizes.asm \
+ include/multibinary.asm \
+ include/test.h \
+ include/unaligned.h \
+ igzip/huffman.h \
+ igzip/igzip_level_buf_structs.h \
+ igzip/igzip_decode_block_stateless.asm \
+ igzip/inflate_std_vects.h \
+ igzip/flatten_ll.h \
+ igzip/encode_df.h \
+ igzip/heap_macros.asm \
+ igzip/igzip_wrapper.h \
+ igzip/static_inflate.h \
+ igzip/igzip_checksums.h
+
+perf_tests += igzip/adler32_perf
+
+examples += igzip/igzip_example igzip/igzip_sync_flush_example
+
+igzip_igzip_rand_test_LDADD = libisal.la
+
+# Include tools to make custom Huffman tables based on sample data
+other_tests += igzip/generate_custom_hufftables
+other_tests += igzip/generate_static_inflate
+other_src += igzip/huff_codes.h
+lsrc += igzip/huff_codes.c
+
+# Include tools and tests using the reference inflate
+other_tests += igzip/igzip_inflate_test
+lsrc += igzip/igzip_inflate.c
+other_src += igzip/checksum_test_ref.h
+
+igzip_perf: LDLIBS += -lz
+igzip_igzip_perf_LDADD = libisal.la
+igzip_igzip_perf_LDFLAGS = -lz
+igzip_inflate_test: LDLIBS += -lz
+igzip_igzip_inflate_test_LDADD = libisal.la
+igzip_igzip_inflate_test_LDFLAGS = -lz
+igzip_igzip_hist_perf_LDADD = libisal.la
diff --git a/src/isa-l/igzip/aarch64/bitbuf2_aarch64.h b/src/isa-l/igzip/aarch64/bitbuf2_aarch64.h
new file mode 100644
index 000000000..88eb18dfd
--- /dev/null
+++ b/src/isa-l/igzip/aarch64/bitbuf2_aarch64.h
@@ -0,0 +1,57 @@
+/**********************************************************************
+ Copyright(c) 2019 Arm Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Arm Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+#ifndef __BITBUF2_AARCH64_H__
+#define __BITBUF2_AARCH64_H__
+#include "options_aarch64.h"
+
+#ifdef __ASSEMBLY__
+.macro update_bits stream:req,code:req,code_len:req,m_bits:req,m_bit_count:req \
+ m_out_buf:req
+
+ lsl x_\code,x_\code,x_\m_bit_count
+ orr x_\m_bits,x_\code,x_\m_bits
+ add x_\m_bit_count,x_\code_len,x_\m_bit_count
+
+ str x_\m_bits,[x_\m_out_buf]
+
+ and w_\code,w_\m_bit_count,-8
+ lsr w_\code_len,w_\m_bit_count,3
+ add x_\m_out_buf,x_\m_out_buf,w_\code_len,uxtw
+ sub w_\m_bit_count,w_\m_bit_count,w_\code
+ lsr x_\m_bits,x_\m_bits,x_\code
+
+ str x_\m_bits,[stream,_internal_state_bitbuf_m_bits]
+ str w_\m_bit_count,[stream,_internal_state_bitbuf_m_bit_count]
+ str x_\m_out_buf,[stream,_internal_state_bitbuf_m_out_buf]
+
+
+.endm
+#endif
+#endif
diff --git a/src/isa-l/igzip/aarch64/data_struct_aarch64.h b/src/isa-l/igzip/aarch64/data_struct_aarch64.h
new file mode 100644
index 000000000..5f8676d34
--- /dev/null
+++ b/src/isa-l/igzip/aarch64/data_struct_aarch64.h
@@ -0,0 +1,215 @@
+/**********************************************************************
+ Copyright(c) 2019 Arm Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Arm Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+#ifndef __AARCH64_DATA_STRUCT_H__
+#define __AARCH64_DATA_STRUCT_H__
+#ifdef __ASSEMBLY__
+
+.macro start_struct name:req
+ .set _FIELD_OFFSET,0
+ .set _STRUCT_ALIGN,0
+.endm
+.macro end_struct name:req
+ .set _\name\()_size,_FIELD_OFFSET
+ .set _\name\()_align,_STRUCT_ALIGN
+.endm
+.macro field name:req, size:req, align:req
+ .set _FIELD_OFFSET,(_FIELD_OFFSET + (\align) - 1) & (~ ((\align)-1))
+ .set \name,_FIELD_OFFSET
+ .set _FIELD_OFFSET,_FIELD_OFFSET + \size
+ .if \align > _STRUCT_ALIGN
+ .set _STRUCT_ALIGN, \align
+ .endif
+.endm
+
+/// BitBuf2
+start_struct BitBuf2
+ /// name size align
+ field _m_bits, 8, 8
+ field _m_bit_count, 4, 4
+ field _m_out_buf, 8, 8
+ field _m_out_end, 8, 8
+ field _m_out_start, 8, 8
+end_struct BitBuf2
+
+/// isal_mod_hist
+#define HIST_ELEM_SIZE 4
+start_struct isal_mod_hist
+ /// name size align
+ field _d_hist, 30*HIST_ELEM_SIZE, HIST_ELEM_SIZE
+ field _ll_hist, 513*HIST_ELEM_SIZE, HIST_ELEM_SIZE
+end_struct isal_mod_hist
+
+/// hufftables_icf
+#define HUFF_CODE_SIZE 4
+start_struct hufftables_icf
+ /// name size align
+ field _dist_table, 31 * HUFF_CODE_SIZE, HUFF_CODE_SIZE
+ field _lit_len_table, 513 * HUFF_CODE_SIZE, HUFF_CODE_SIZE
+end_struct hufftables_icf
+
+/// hash8k_buf
+start_struct hash8k_buf
+ /// name size align
+ field _hash8k_table, 2 * IGZIP_HASH8K_HASH_SIZE, 2
+end_struct hash8k_buf
+
+/// hash_map_buf
+start_struct hash_map_buf
+ /// name size align
+ field _hash_table, 2 * IGZIP_HASH_MAP_HASH_SIZE, 2
+ field _matches_next, 8, 8
+ field _matches_end, 8, 8
+ field _matches, 4*4*1024, 4
+ field _overflow, 4*LA, 4
+end_struct hash_map_buf
+
+/// level_buf
+#define DEF_MAX_HDR_SIZE 328
+start_struct level_buf
+ /// name size align
+ field _encode_tables, _hufftables_icf_size, _hufftables_icf_align
+ field _hist, _isal_mod_hist_size, _isal_mod_hist_align
+ field _deflate_hdr_count, 4, 4
+ field _deflate_hdr_extra_bits,4, 4
+ field _deflate_hdr, DEF_MAX_HDR_SIZE, 1
+ field _icf_buf_next, 8, 8
+ field _icf_buf_avail_out, 8, 8
+ field _icf_buf_start, 8, 8
+ field _lvl_extra, _hash_map_buf_size, _hash_map_buf_align
+end_struct level_buf
+
+.set _hash8k_hash_table , _lvl_extra + _hash8k_table
+.set _hash_map_hash_table , _lvl_extra + _hash_table
+.set _hash_map_matches_next , _lvl_extra + _matches_next
+.set _hash_map_matches_end , _lvl_extra + _matches_end
+.set _hash_map_matches , _lvl_extra + _matches
+.set _hist_lit_len , _hist+_ll_hist
+.set _hist_dist , _hist+_d_hist
+
+/// isal_zstate
+start_struct isal_zstate
+ /// name size align
+ field _total_in_start,4, 4
+ field _block_next, 4, 4
+ field _block_end, 4, 4
+ field _dist_mask, 4, 4
+ field _hash_mask, 4, 4
+ field _state, 4, 4
+ field _bitbuf, _BitBuf2_size, _BitBuf2_align
+ field _crc, 4, 4
+ field _has_wrap_hdr, 1, 1
+ field _has_eob_hdr, 1, 1
+ field _has_eob, 1, 1
+ field _has_hist, 1, 1
+ field _has_level_buf_init, 2, 2
+ field _count, 4, 4
+ field _tmp_out_buff, 16, 1
+ field _tmp_out_start, 4, 4
+ field _tmp_out_end, 4, 4
+ field _b_bytes_valid, 4, 4
+ field _b_bytes_processed, 4, 4
+ field _buffer, BSIZE, 1
+ field _head, IGZIP_LVL0_HASH_SIZE*2, 2
+end_struct isal_zstate
+
+.set _bitbuf_m_bits , _bitbuf+_m_bits
+.set _bitbuf_m_bit_count , _bitbuf+_m_bit_count
+.set _bitbuf_m_out_buf , _bitbuf+_m_out_buf
+.set _bitbuf_m_out_end , _bitbuf+_m_out_end
+.set _bitbuf_m_out_start , _bitbuf+_m_out_start
+
+/// isal_zstream
+start_struct isal_zstream
+ /// name size align
+ field _next_in, 8, 8
+ field _avail_in, 4, 4
+ field _total_in, 4, 4
+ field _next_out, 8, 8
+ field _avail_out, 4, 4
+ field _total_out, 4, 4
+ field _hufftables, 8, 8
+ field _level, 4, 4
+ field _level_buf_size, 4, 4
+ field _level_buf, 8, 8
+ field _end_of_stream, 2, 2
+ field _flush, 2, 2
+ field _gzip_flag, 2, 2
+ field _hist_bits, 2, 2
+ field _internal_state, _isal_zstate_size, _isal_zstate_align
+end_struct isal_zstream
+
+.set _internal_state_total_in_start , _internal_state+_total_in_start
+.set _internal_state_block_next , _internal_state+_block_next
+.set _internal_state_block_end , _internal_state+_block_end
+.set _internal_state_b_bytes_valid , _internal_state+_b_bytes_valid
+.set _internal_state_b_bytes_processed , _internal_state+_b_bytes_processed
+.set _internal_state_crc , _internal_state+_crc
+.set _internal_state_dist_mask , _internal_state+_dist_mask
+.set _internal_state_hash_mask , _internal_state+_hash_mask
+.set _internal_state_bitbuf , _internal_state+_bitbuf
+.set _internal_state_state , _internal_state+_state
+.set _internal_state_count , _internal_state+_count
+.set _internal_state_tmp_out_buff , _internal_state+_tmp_out_buff
+.set _internal_state_tmp_out_start , _internal_state+_tmp_out_start
+.set _internal_state_tmp_out_end , _internal_state+_tmp_out_end
+.set _internal_state_has_wrap_hdr , _internal_state+_has_wrap_hdr
+.set _internal_state_has_eob , _internal_state+_has_eob
+.set _internal_state_has_eob_hdr , _internal_state+_has_eob_hdr
+.set _internal_state_has_hist , _internal_state+_has_hist
+.set _internal_state_has_level_buf_init , _internal_state+_has_level_buf_init
+.set _internal_state_buffer , _internal_state+_buffer
+.set _internal_state_head , _internal_state+_head
+.set _internal_state_bitbuf_m_bits , _internal_state+_bitbuf_m_bits
+.set _internal_state_bitbuf_m_bit_count , _internal_state+_bitbuf_m_bit_count
+.set _internal_state_bitbuf_m_out_buf , _internal_state+_bitbuf_m_out_buf
+.set _internal_state_bitbuf_m_out_end , _internal_state+_bitbuf_m_out_end
+.set _internal_state_bitbuf_m_out_start , _internal_state+_bitbuf_m_out_start
+
+/// Internal States
+.set ZSTATE_NEW_HDR , 0
+.set ZSTATE_HDR , (ZSTATE_NEW_HDR + 1)
+.set ZSTATE_CREATE_HDR , (ZSTATE_HDR + 1)
+.set ZSTATE_BODY , (ZSTATE_CREATE_HDR + 1)
+.set ZSTATE_FLUSH_READ_BUFFER , (ZSTATE_BODY + 1)
+.set ZSTATE_FLUSH_ICF_BUFFER , (ZSTATE_FLUSH_READ_BUFFER + 1)
+.set ZSTATE_TYPE0_HDR , (ZSTATE_FLUSH_ICF_BUFFER + 1)
+.set ZSTATE_TYPE0_BODY , (ZSTATE_TYPE0_HDR + 1)
+.set ZSTATE_SYNC_FLUSH , (ZSTATE_TYPE0_BODY + 1)
+.set ZSTATE_FLUSH_WRITE_BUFFER , (ZSTATE_SYNC_FLUSH + 1)
+.set ZSTATE_TRL , (ZSTATE_FLUSH_WRITE_BUFFER + 1)
+
+.set _NO_FLUSH , 0
+.set _SYNC_FLUSH , 1
+.set _FULL_FLUSH , 2
+.set _STORED_BLK , 0
+.set IGZIP_NO_HIST , 0
+.set IGZIP_HIST , 1
+.set IGZIP_DICT_HIST , 2
+#endif
+#endif
diff --git a/src/isa-l/igzip/aarch64/encode_df.S b/src/isa-l/igzip/aarch64/encode_df.S
new file mode 100644
index 000000000..6dddddf0a
--- /dev/null
+++ b/src/isa-l/igzip/aarch64/encode_df.S
@@ -0,0 +1,159 @@
+/**********************************************************************
+ Copyright(c) 2019 Arm Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Arm Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+ .arch armv8-a+crc
+ .text
+ .align 2
+#include "lz0a_const_aarch64.h"
+#include "data_struct_aarch64.h"
+#include "huffman_aarch64.h"
+#include "bitbuf2_aarch64.h"
+#include "stdmac_aarch64.h"
+
+/*
+declare Macros
+*/
+
+.macro declare_generic_reg name:req,reg:req,default:req
+ \name .req \default\reg
+ w_\name .req w\reg
+ x_\name .req x\reg
+.endm
+
+ .global encode_deflate_icf_aarch64
+ .type encode_deflate_icf_aarch64, %function
+
+/*
+ struct deflate_icf *encode_deflate_icf_base(struct deflate_icf *next_in,
+ struct deflate_icf *end_in, struct BitBuf2 *bb,
+ struct hufftables_icf *hufftables)
+
+*/
+
+ // parameters
+ declare_generic_reg next_in, 0,x
+ declare_generic_reg end_in, 1,x
+ declare_generic_reg bb, 2,x
+ declare_generic_reg hufftables, 3,x
+
+ // local variable
+ declare_generic_reg bb_out_end, 4,x
+ declare_generic_reg bb_bit_count, 5,w
+ declare_generic_reg dist_extra, 6,x
+ declare_generic_reg dist_lit_table, 7,x
+ declare_generic_reg code_and_extra, 8,x
+ declare_generic_reg bb_out_buf, 9,x
+ declare_generic_reg bb_bits, 10,x
+ declare_generic_reg d_length, 11,x
+ declare_generic_reg l_length, 12,x
+ declare_generic_reg d_extra_bit_count, 13,x
+
+ declare_generic_reg code_sum, 4,x
+ declare_generic_reg count_sum, 7,x
+
+ declare_generic_reg tmp0, 14,x
+ declare_generic_reg tmp1, 15,x
+
+// bit buffer offset
+.equ offset_m_bits, 0
+.equ offset_m_bit_count, 8
+.equ offset_m_out_buf, 16
+.equ offset_m_out_end, 24
+
+encode_deflate_icf_aarch64:
+ cmp next_in, end_in
+ bcs .done
+
+ ldp bb_out_buf, bb_out_end, [bb, offset_m_out_buf]
+ cmp bb_out_end, bb_out_buf
+ bcc .done
+
+ ldr bb_bit_count, [bb, offset_m_bit_count]
+ ldr bb_bits, [bb, offset_m_bits]
+ b .loop_start
+
+ .align 3
+.loop:
+ ldr bb_out_end, [bb, offset_m_out_end]
+ cmp bb_out_end, bb_out_buf
+ bcc .done
+
+.loop_start:
+ ldrh w_code_and_extra, [next_in]
+ add next_in, next_in, 4
+ ldr w_dist_lit_table, [next_in, -4]
+ and code_and_extra, code_and_extra, 1023
+
+ ldrh w_dist_extra, [next_in, -2]
+ add code_and_extra, code_and_extra, 31
+ ubfx x_dist_lit_table, x_dist_lit_table, 10, 9
+ add x_tmp0, hufftables, code_and_extra, lsl 2
+ ubfx x_dist_extra, x_dist_extra, 3, 13
+ lsl x_dist_lit_table, x_dist_lit_table, 2
+
+ ldr w_code_and_extra, [hufftables, code_and_extra, lsl 2]
+ add x_d_extra_bit_count, hufftables, x_dist_lit_table
+ ldrb w_l_length, [x_tmp0, 3]
+ and code_and_extra, code_and_extra, 0xffffff
+ ldrh w_code_sum, [hufftables, x_dist_lit_table]
+ ldrb w_d_length, [x_d_extra_bit_count, 3]
+ add w_l_length, w_l_length, bb_bit_count
+ ldrb w_d_extra_bit_count, [x_d_extra_bit_count, 2]
+
+ lsl x_tmp0, code_and_extra, x_bb_bit_count
+ add bb_bit_count, w_d_length, w_l_length
+ lsl x_code_sum, x_code_sum, x_l_length
+ orr x_code_sum, x_code_sum, x_tmp0
+ add w_count_sum, w_d_extra_bit_count, bb_bit_count
+ lsl x_bb_bit_count, x_dist_extra, x_bb_bit_count
+
+ orr x_bb_bit_count, x_bb_bit_count, bb_bits
+ orr x_tmp0, x_code_sum, x_bb_bit_count // me->m_bits => x_tmp0
+ str x_tmp0, [bb, offset_m_bits] // me->m_bits => x_tmp0
+ str w_count_sum, [bb, offset_m_bit_count]
+
+ str x_tmp0, [bb_out_buf] // me->m_bits => x_tmp0
+ ldr bb_bit_count, [bb, offset_m_bit_count]
+ ldr bb_bits, [bb, offset_m_bits]
+ and w_tmp0, bb_bit_count, -8 // bits => w_tmp0
+ ldr bb_out_buf, [bb, offset_m_out_buf]
+ lsr w_tmp1, bb_bit_count, 3 // bits/8 => w_tmp1
+ lsr bb_bits, bb_bits, x_tmp0 // bits => x_tmp0
+ sub bb_bit_count, bb_bit_count, w_tmp0 // bits => w_tmp0
+ add bb_out_buf, bb_out_buf, x_tmp1 // bits/8 => x_tmp1
+ str bb_bits, [bb,offset_m_bits]
+ str bb_bit_count, [bb, offset_m_bit_count]
+ str bb_out_buf, [bb, offset_m_out_buf]
+
+ cmp end_in, next_in
+ bhi .loop
+
+.done:
+ ret
+ .size encode_deflate_icf_aarch64, .-encode_deflate_icf_aarch64
diff --git a/src/isa-l/igzip/aarch64/gen_icf_map.S b/src/isa-l/igzip/aarch64/gen_icf_map.S
new file mode 100644
index 000000000..fe04ee4c3
--- /dev/null
+++ b/src/isa-l/igzip/aarch64/gen_icf_map.S
@@ -0,0 +1,266 @@
+/**********************************************************************
+ Copyright(c) 2019 Arm Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Arm Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+ .arch armv8-a+crc+crypto
+ .text
+ .align 2
+
+#include "lz0a_const_aarch64.h"
+#include "data_struct_aarch64.h"
+#include "huffman_aarch64.h"
+#include "bitbuf2_aarch64.h"
+#include "stdmac_aarch64.h"
+
+/*
+declare Macros
+*/
+
+.macro declare_generic_reg name:req,reg:req,default:req
+ \name .req \default\reg
+ w_\name .req w\reg
+ x_\name .req x\reg
+.endm
+
+.macro tzbytecnt param0:req,param1:req
+ rbit x_\param1, x_\param0
+ cmp x_\param0, 0
+ clz x_\param1, x_\param1
+ mov w_\param0, 8
+ lsr w_\param1, w_\param1, 3
+ csel w_\param0, w_\param1, w_\param0, ne
+.endm
+
+.macro write_deflate_icf param0:req,param1:req,param2:req,param3:req
+ orr w_\param1, w_\param1, w_\param3, lsl 19
+ orr w_\param1, w_\param1, w_\param2, lsl 10
+ str w_\param1, [x_\param0]
+.endm
+
+ .align 2
+ .global gen_icf_map_h1_aarch64
+ .type gen_icf_map_h1_aarch64, %function
+
+ /* arguments */
+ declare_generic_reg stream_param, 0,x
+ declare_generic_reg matches_icf_lookup_param, 1,x
+ declare_generic_reg input_size_param, 2,x
+
+ declare_generic_reg param0, 0,x
+ declare_generic_reg param1, 1,x
+ declare_generic_reg param2, 2,x
+ declare_generic_reg param3, 3,x
+
+ /* return */
+ declare_generic_reg ret_val, 0,x
+
+ /* variables */
+ declare_generic_reg input_size, 3,x
+ declare_generic_reg next_in, 4,x
+ declare_generic_reg matches_icf_lookup, 6,x
+ declare_generic_reg hash_table, 7,x
+ declare_generic_reg end_in, 8,x
+ declare_generic_reg file_start, 9,x
+ declare_generic_reg hash_mask, 10,w
+ declare_generic_reg hist_size, 11,w
+ declare_generic_reg stream_saved, 12,x
+ declare_generic_reg literal_32, 13,w
+ declare_generic_reg literal_1, 14,w
+ declare_generic_reg dist, 15,w
+
+ declare_generic_reg tmp_has_hist, 0,w
+ declare_generic_reg tmp_offset_hash_table, 1,x
+ declare_generic_reg tmp0, 0,x
+ declare_generic_reg tmp1, 1,x
+ declare_generic_reg tmp2, 2,x
+ declare_generic_reg tmp3, 3,x
+ declare_generic_reg tmp5, 5,x
+
+/* constant */
+.equ ISAL_LOOK_AHEAD, 288
+.equ SHORTEST_MATCH, 4
+.equ LEN_OFFSET, 254
+
+/* mask */
+.equ mask_10bit, 1023
+.equ mask_lit_dist, 0x7800
+
+/* offset of struct isal_zstream */
+.equ offset_next_in, 0
+.equ offset_avail_in, 8
+.equ offset_total_in, 12
+.equ offset_next_out, 16
+.equ offset_avail_out, 24
+.equ offset_total_out, 28
+.equ offset_hufftables, 32
+.equ offset_level, 40
+.equ offset_level_buf_size, 44
+.equ offset_level_buf, 48
+.equ offset_end_of_stream, 56
+.equ offset_flush, 58
+.equ offset_gzip_flag, 60
+.equ offset_hist_bits, 62
+.equ offset_state, 64
+.equ offset_state_block_end, 72
+.equ offset_state_dist_mask, 76
+.equ offset_state_has_hist, 135
+
+/* offset of struct level_buf */
+.equ offset_hash_map_hash_table, 4712
+
+/*
+uint64_t gen_icf_map_h1_base(struct isal_zstream *stream,
+ struct deflate_icf *matches_icf_lookup, uint64_t input_size)
+*/
+
+gen_icf_map_h1_aarch64:
+ cmp input_size_param, (ISAL_LOOK_AHEAD-1) // 287
+ bls .fast_exit
+ stp x29, x30, [sp, -16]!
+
+ mov stream_saved, stream_param
+ mov matches_icf_lookup, matches_icf_lookup_param
+ mov x29, sp
+
+ ldrb tmp_has_hist, [stream_saved, offset_state_has_hist]
+ mov tmp_offset_hash_table, offset_hash_map_hash_table
+ ldr end_in, [stream_saved, offset_next_in]
+ mov input_size, input_size_param
+ ldr hash_table, [stream_saved, offset_level_buf]
+ ldr w_file_start, [stream_saved, offset_total_in]
+ ldp hist_size, hash_mask, [stream_saved, offset_state_dist_mask]
+ add hash_table, hash_table, tmp_offset_hash_table
+ sub file_start, end_in, file_start
+ cbz tmp_has_hist, .igzip_no_hist
+ b .while_check1
+
+ .align 3
+.igzip_no_hist:
+ ldrb w_tmp1, [end_in]
+ add next_in, end_in, 1
+ ldrh w_tmp0, [matches_icf_lookup]
+ bfi w_tmp0, w_tmp1, 0, 10
+ strh w_tmp0, [matches_icf_lookup]
+ ldr w_tmp0, [matches_icf_lookup]
+ and w_tmp0, w_tmp0, mask_10bit
+ orr w_tmp0, w_tmp0, mask_lit_dist
+ str w_tmp0, [matches_icf_lookup], 4
+ ldr w_tmp0, [end_in]
+ crc32cw w_tmp0, wzr, w_tmp0
+
+ and w_tmp5, w_tmp0, hash_mask
+ sub x_tmp1, end_in, file_start
+ mov w_tmp2, 1
+ mov x_tmp0, 1
+ strh w_tmp1, [hash_table, x_tmp5, lsl 1]
+ strb w_tmp2, [stream_saved, offset_state_has_hist]
+ b .while_check2
+
+.while_check1:
+ mov next_in, end_in
+ mov x_tmp0, 0
+
+.while_check2:
+ sub input_size, input_size, #288
+ add end_in, end_in, input_size
+ cmp next_in, end_in
+ bcs .exit
+ mov literal_32, 32
+ mov literal_1, 1
+ b .while_loop
+
+ .align 3
+.new_match_found:
+ clz w_tmp5, w_tmp2
+ add w_tmp1, w_tmp0, LEN_OFFSET
+ sub w_tmp5, literal_32, w_tmp5
+ cmp dist, 2
+ sub w_tmp5, w_tmp5, #2
+ bls .skip_compute_dist_icf_code
+
+ lsl w_tmp3, literal_1, w_tmp5
+ sub w_tmp3, w_tmp3, #1
+ lsr w_tmp0, w_tmp2, w_tmp5
+ and w_tmp3, w_tmp3, w_tmp2
+ add w_tmp2, w_tmp0, w_tmp5, lsl 1
+
+.skip_compute_dist_icf_code:
+ mov param0, matches_icf_lookup
+ write_deflate_icf param0,param1,param2,param3
+
+ add next_in, next_in, 1
+ add matches_icf_lookup, matches_icf_lookup, 4
+ cmp next_in, end_in
+ beq .save_with_exit
+
+.while_loop:
+ ldr w_tmp0, [next_in]
+ crc32cw w_tmp0, wzr, w_tmp0
+
+ and w_tmp0, w_tmp0, hash_mask
+ sub x_tmp1, next_in, file_start
+ lsl x_tmp0, x_tmp0, 1
+ sub w_tmp2, w_tmp1, #1
+ ldrh w_tmp3, [hash_table, x_tmp0]
+ strh w_tmp1, [hash_table, x_tmp0]
+ sub w_tmp2, w_tmp2, w_tmp3
+ and w_tmp2, w_tmp2, hist_size
+ add dist, w_tmp2, 1
+ ldr x_tmp0, [next_in]
+ sub x_tmp1, next_in, w_dist, uxtw
+ ldr x_tmp1, [x_tmp1]
+ eor x_tmp0, x_tmp1, x_tmp0
+ tzbytecnt param0,param1
+
+ cmp w_tmp0, (SHORTEST_MATCH-1)
+ mov w_tmp3, 0
+ bhi .new_match_found
+
+ ldrb w_param1, [next_in]
+ mov x_param0, matches_icf_lookup
+ mov w_param3, 0
+ mov w_param2, 0x1e
+ write_deflate_icf param0,param1,param2,param3
+
+ add next_in, next_in, 1
+ add matches_icf_lookup, matches_icf_lookup, 4
+ cmp next_in, end_in
+ bne .while_loop
+
+.save_with_exit:
+ ldr ret_val, [stream_saved, offset_next_in]
+ sub ret_val, next_in, ret_val
+
+.exit:
+ ldp x29, x30, [sp], 16
+ ret
+
+ .align 3
+.fast_exit:
+ mov ret_val, 0
+ ret
+ .size gen_icf_map_h1_aarch64, .-gen_icf_map_h1_aarch64
diff --git a/src/isa-l/igzip/aarch64/huffman_aarch64.h b/src/isa-l/igzip/aarch64/huffman_aarch64.h
new file mode 100644
index 000000000..4ceae23f4
--- /dev/null
+++ b/src/isa-l/igzip/aarch64/huffman_aarch64.h
@@ -0,0 +1,173 @@
+/**********************************************************************
+ Copyright(c) 2019 Arm Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Arm Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+#ifndef __HUFFMAN_AARCH64_H__
+#define __HUFFMAN_AARCH64_H__
+
+#ifdef __ASSEMBLY__
+#ifdef LONGER_HUFFTABLE
+ #if (D > 8192)
+ #error History D is larger than 8K
+ #else
+ #define DIST_TABLE_SIZE 8192
+ #define DECODE_OFFSET 26
+ #endif
+#else
+ #define DIST_TABLE_SIZE 2
+ #define DECODE_OFFSET 0
+#endif
+
+#define LEN_TABLE_SIZE 256
+#define LIT_TABLE_SIZE 257
+
+#define DIST_TABLE_START (ISAL_DEF_MAX_HDR_SIZE + 8) //328+8
+#define DIST_TABLE_OFFSET (DIST_TABLE_START + - 4 * 1) //336-4
+#define LEN_TABLE_OFFSET (DIST_TABLE_START + DIST_TABLE_SIZE * 4 - 4*3) //332 + 2*4 -4*3 =328
+#define LIT_TABLE_OFFSET (DIST_TABLE_START + 4 * DIST_TABLE_SIZE + 4 * LEN_TABLE_SIZE)
+#define LIT_TABLE_SIZES_OFFSET (LIT_TABLE_OFFSET + 2 * LIT_TABLE_SIZE)
+#define DCODE_TABLE_OFFSET (LIT_TABLE_SIZES_OFFSET + LIT_TABLE_SIZE + 1 - DECODE_OFFSET * 2)
+#define DCODE_TABLE_SIZE_OFFSET (DCODE_TABLE_OFFSET + 2 * 30 - DECODE_OFFSET)
+
+#define IGZIP_DECODE_OFFSET 0
+#define IGZIP_DIST_TABLE_SIZE 2
+
+.macro get_len_code hufftables:req,length:req,code:req,code_len:req,tmp0:req
+ add x_\tmp0,\hufftables,LEN_TABLE_OFFSET
+ ldr w_\code_len,[x_\tmp0,x_\length,lsl 2]
+ lsr w_\code, w_\code_len , 5
+ and x_\code_len,x_\code_len,0x1f
+.endm
+
+.macro get_lit_code hufftables:req,lit:req,code:req,code_len:req
+ add x_\code,\hufftables,LIT_TABLE_OFFSET
+ ldrh w_\code,[x_\code,x_\lit,lsl 1]
+ add x_\code_len,\hufftables,LIT_TABLE_SIZES_OFFSET
+ ldrb w_\code_len,[x_\code_len,x_\lit]
+.endm
+
+.macro get_dist_code hufftables:req,dist:req,code:req,code_len:req,tmp0:req,tmp1:req,tmp2:req
+ cmp dist,DIST_TABLE_SIZE
+ bhi _compute_dist_code
+ add x_\tmp0,\hufftables,DIST_TABLE_OFFSET
+ ldr w_\code_len,[x_\tmp0,x_\dist,lsl 2]
+ lsr w_\code, w_\code_len , 5
+ and x_\code_len,x_\code_len,0x1f
+ b _end_get_dist_code
+_compute_dist_code:
+ and w_\dist,w_\dist,0xffff
+ sub w_\dist,w_\dist,1
+ clz w_\tmp0,w_\dist
+ mov w_\tmp1,30
+ sub w_\tmp0,w_\tmp1,w_\tmp0 //tmp0== num_extra_bists
+ mov w_\tmp1,1
+ lsl w_\tmp1,w_\tmp1,w_\tmp0
+ sub w_\tmp1,w_\tmp1,1
+ and w_\tmp1,w_\tmp1,w_\dist //tmp1=extra_bits
+ asr w_\dist,w_\dist,w_\tmp0
+ lsl w_\tmp2,w_\tmp0,1
+ add w_\tmp2,w_\dist,w_\tmp2 //tmp2=sym
+
+ add x_\code,\hufftables,DCODE_TABLE_OFFSET - IGZIP_DECODE_OFFSET*2
+ add x_\code_len,\hufftables,DCODE_TABLE_SIZE_OFFSET - IGZIP_DECODE_OFFSET
+ ldrh w_\code,[x_\code,x_\tmp2,lsl 1]
+ ldrb w_\code_len,[x_\code_len,x_\tmp2]
+ lsl w_\tmp1,w_\tmp1,w_\code_len
+ orr w_\code,w_\code,w_\tmp1
+ add w_\code_len,w_\code_len,w_\tmp0
+
+ //compute_dist_code
+_end_get_dist_code:
+.endm
+
+
+.macro compare_258_bytes str0:req,str1:req,match_length:req,tmp0:req,tmp1:req
+ mov x_\match_length,0
+_compare_258_loop:
+ ldr x_\tmp0,[x_\str0,x_\match_length]
+ ldr x_\tmp1,[x_\str1,x_\match_length]
+ eor x_\tmp0,x_\tmp1,x_\tmp0
+ rbit x_\tmp0,x_\tmp0
+ clz x_\tmp0,x_\tmp0
+ lsr x_\tmp0,x_\tmp0,3
+ add x_\match_length,x_\match_length,x_\tmp0
+
+
+ cmp x_\match_length,257
+ ccmp x_\tmp0,8,0,ls
+ beq _compare_258_loop
+
+ cmp x_\match_length,258
+ mov x_\tmp1,258
+ csel x_\match_length,x_\match_length,x_\tmp1,ls
+.endm
+
+.macro compare_max_258_bytes str0:req,str1:req,max_length:req,match_length:req,tmp0:req,tmp1:req
+ mov x_\match_length,0
+ mov x_\tmp0,258
+ cmp x_\max_length,x_\tmp0
+ csel x_\max_length,x_\max_length,x_\tmp0,ls
+_compare_258_loop:
+ ldr x_\tmp0,[x_\str0,x_\match_length]
+ ldr x_\tmp1,[x_\str1,x_\match_length]
+ eor x_\tmp0,x_\tmp1,x_\tmp0
+ rbit x_\tmp0,x_\tmp0
+ clz x_\tmp0,x_\tmp0
+ lsr x_\tmp0,x_\tmp0,3
+ add x_\match_length,x_\match_length,x_\tmp0
+
+
+ cmp x_\max_length,x_\match_length
+ ccmp x_\tmp0,8,0,hi
+ beq _compare_258_loop
+
+ cmp x_\match_length,x_\max_length
+ csel x_\match_length,x_\match_length,x_\max_length,ls
+.endm
+
+.macro compare_aarch64 str0:req,str1:req,max_length:req,match_length:req,tmp0:req,tmp1:req
+ mov x_\match_length,0
+_compare_loop:
+ ldr x_\tmp0,[x_\str0,x_\match_length]
+ ldr x_\tmp1,[x_\str1,x_\match_length]
+ eor x_\tmp0,x_\tmp1,x_\tmp0
+ rbit x_\tmp0,x_\tmp0
+ clz x_\tmp0,x_\tmp0
+ lsr x_\tmp0,x_\tmp0,3
+ add x_\match_length,x_\match_length,x_\tmp0
+
+ cmp x_\max_length,x_\match_length
+ ccmp x_\tmp0,8,0,hi
+ beq _compare_loop
+
+ cmp x_\match_length,x_\max_length
+ csel x_\match_length,x_\match_length,x_\max_length,ls
+.endm
+
+#endif
+#endif
diff --git a/src/isa-l/igzip/aarch64/igzip_decode_huffman_code_block_aarch64.S b/src/isa-l/igzip/aarch64/igzip_decode_huffman_code_block_aarch64.S
new file mode 100644
index 000000000..3255ba4c7
--- /dev/null
+++ b/src/isa-l/igzip/aarch64/igzip_decode_huffman_code_block_aarch64.S
@@ -0,0 +1,689 @@
+/**********************************************************************
+ Copyright(c) 2019 Arm Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Arm Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+ .arch armv8-a
+ .text
+ .align 2
+#include "lz0a_const_aarch64.h"
+#include "huffman_aarch64.h"
+#include "bitbuf2_aarch64.h"
+#include "stdmac_aarch64.h"
+
+#define ENABLE_TBL_INSTRUCTION 1
+
+.macro start_struct name:req
+ .set _FIELD_OFFSET,0
+ .set _STRUCT_ALIGN,0
+.endm
+.macro end_struct name:req
+ .set _\name\()_size,_FIELD_OFFSET
+ .set _\name\()_align,_STRUCT_ALIGN
+.endm
+.macro field name:req, size:req, align:req
+ .set _FIELD_OFFSET,(_FIELD_OFFSET + (\align) - 1) & (~ ((\align)-1))
+ .set \name,_FIELD_OFFSET
+ .set _FIELD_OFFSET,_FIELD_OFFSET + \size
+ .if \align > _STRUCT_ALIGN
+ .set _STRUCT_ALIGN, \align
+ .endif
+.endm
+
+#define ISAL_DECODE_LONG_BITS 12
+#define ISAL_DECODE_SHORT_BITS 10
+
+#define L_REM (21 - ISAL_DECODE_LONG_BITS)
+#define S_REM (15 - ISAL_DECODE_SHORT_BITS)
+#define L_DUP ((1 << L_REM) - (L_REM + 1))
+#define S_DUP ((1 << S_REM) - (S_REM + 1))
+#define L_UNUSED ((1 << L_REM) - (1 << ((L_REM)/2)) - (1 << ((L_REM + 1)/2)) + 1)
+#define S_UNUSED ((1 << S_REM) - (1 << ((S_REM)/2)) - (1 << ((S_REM + 1)/2)) + 1)
+#define L_SIZE (286 + L_DUP + L_UNUSED)
+#define S_SIZE (30 + S_DUP + S_UNUSED)
+#define HUFF_CODE_LARGE_LONG_ALIGNED (L_SIZE + (-L_SIZE & 0xf))
+#define HUFF_CODE_SMALL_LONG_ALIGNED (S_SIZE + (-S_SIZE & 0xf))
+#define MAX_LONG_CODE_LARGE (L_SIZE + (-L_SIZE & 0xf))
+#define MAX_LONG_CODE_SMALL (S_SIZE + (-S_SIZE & 0xf))
+#define LARGE_SHORT_CODE_SIZE 4
+#define LARGE_LONG_CODE_SIZE 2
+#define SMALL_SHORT_CODE_SIZE 2
+#define SMALL_LONG_CODE_SIZE 2
+
+
+// inflate_huff_code
+start_struct inflate_huff_code_large
+ // name size align
+ field _short_code_lookup_large, LARGE_SHORT_CODE_SIZE*(1<<(ISAL_DECODE_LONG_BITS)), LARGE_LONG_CODE_SIZE
+ field _long_code_lookup_large, LARGE_LONG_CODE_SIZE*MAX_LONG_CODE_LARGE, LARGE_SHORT_CODE_SIZE
+end_struct inflate_huff_code_large
+
+// inflate_huff_code
+start_struct inflate_huff_code_small
+ // name size align
+ field _short_code_lookup_small, SMALL_SHORT_CODE_SIZE*(1<<(ISAL_DECODE_SHORT_BITS)), SMALL_LONG_CODE_SIZE
+ field _long_code_lookup_small, SMALL_LONG_CODE_SIZE*MAX_LONG_CODE_SMALL, SMALL_SHORT_CODE_SIZE
+end_struct inflate_huff_code_small
+
+// inflate_state
+start_struct inflate_state
+ // name size align
+ field _next_out, 8, 8
+ field _avail_out, 4, 4
+ field _total_out, 4, 4
+ field _next_in, 8, 8
+ field _read_in, 8, 8
+ field _avail_in, 4, 4
+ field _read_in_length, 4, 4
+ field _lit_huff_code, _inflate_huff_code_large_size, _inflate_huff_code_large_align
+ field _dist_huff_code, _inflate_huff_code_small_size, _inflate_huff_code_small_align
+ field _block_state, 4, 4
+ field _dict_length, 4, 4
+ field _bfinal, 4, 4
+ field _crc_flag, 4, 4
+ field _crc, 4, 4
+ field _hist_bits, 4, 4
+ field _type0_block_len, 4, 4
+ field _write_overflow_lits, 4, 4
+ field _write_overflow_len, 4, 4
+ field _copy_overflow_len, 4, 4
+ field _copy_overflow_dist, 4, 4
+end_struct inflate_state
+
+.set _lit_huff_code_short_code_lookup , _lit_huff_code+_short_code_lookup_large
+.set _lit_huff_code_long_code_lookup , _lit_huff_code+_long_code_lookup_large
+.set _dist_huff_code_short_code_lookup , _dist_huff_code+_short_code_lookup_small
+.set _dist_huff_code_long_code_lookup , _dist_huff_code+_long_code_lookup_small
+.set ISAL_BLOCK_NEW_HDR , 0
+.set ISAL_BLOCK_HDR , 1
+.set ISAL_BLOCK_TYPE0 , 2
+.set ISAL_BLOCK_CODED , 3
+.set ISAL_BLOCK_INPUT_DONE , 4
+.set ISAL_BLOCK_FINISH , 5
+
+/* Inflate Return values */
+#define ISAL_DECOMP_OK 0 /* No errors encountered while decompressing */
+#define ISAL_END_INPUT 1 /* End of input reached */
+#define ISAL_OUT_OVERFLOW 2 /* End of output reached */
+#define ISAL_NAME_OVERFLOW 3 /* End of gzip name buffer reached */
+#define ISAL_COMMENT_OVERFLOW 4 /* End of gzip name buffer reached */
+#define ISAL_EXTRA_OVERFLOW 5 /* End of extra buffer reached */
+#define ISAL_NEED_DICT 6 /* Stream needs a dictionary to continue */
+#define ISAL_INVALID_BLOCK -1 /* Invalid deflate block found */
+#define ISAL_INVALID_SYMBOL -2 /* Invalid deflate symbol found */
+#define ISAL_INVALID_LOOKBACK -3 /* Invalid lookback distance found */
+#define ISAL_INVALID_WRAPPER -4 /* Invalid gzip/zlib wrapper found */
+#define ISAL_UNSUPPORTED_METHOD -5 /* Gzip/zlib wrapper specifies unsupported compress method */
+#define ISAL_INCORRECT_CHECKSUM -6 /* Incorrect checksum found */
+
+
+#define ISAL_DEF_MAX_CODE_LEN 15
+#define LARGE_SHORT_SYM_LEN 25
+#define LARGE_SHORT_SYM_MASK ((1 << LARGE_SHORT_SYM_LEN) - 1)
+#define LARGE_LONG_SYM_LEN 10
+#define LARGE_LONG_SYM_MASK ((1 << LARGE_LONG_SYM_LEN) - 1)
+#define LARGE_SHORT_CODE_LEN_OFFSET 28
+#define LARGE_LONG_CODE_LEN_OFFSET 10
+#define LARGE_FLAG_BIT_OFFSET 25
+#define LARGE_FLAG_BIT (1 << LARGE_FLAG_BIT_OFFSET)
+#define LARGE_SYM_COUNT_OFFSET 26
+#define LARGE_SYM_COUNT_LEN 2
+#define LARGE_SYM_COUNT_MASK ((1 << LARGE_SYM_COUNT_LEN) - 1)
+#define LARGE_SHORT_MAX_LEN_OFFSET 26
+
+#define SMALL_SHORT_SYM_LEN 9
+#define SMALL_SHORT_SYM_MASK ((1 << SMALL_SHORT_SYM_LEN) - 1)
+#define SMALL_LONG_SYM_LEN 9
+#define SMALL_LONG_SYM_MASK ((1 << SMALL_LONG_SYM_LEN) - 1)
+#define SMALL_SHORT_CODE_LEN_OFFSET 11
+#define SMALL_LONG_CODE_LEN_OFFSET 10
+#define SMALL_FLAG_BIT_OFFSET 10
+#define SMALL_FLAG_BIT (1 << SMALL_FLAG_BIT_OFFSET)
+
+#define DIST_SYM_OFFSET 0
+#define DIST_SYM_LEN 5
+#define DIST_SYM_MASK ((1 << DIST_SYM_LEN) - 1)
+#define DIST_SYM_EXTRA_OFFSET 5
+#define DIST_SYM_EXTRA_LEN 4
+#define DIST_SYM_EXTRA_MASK ((1 << DIST_SYM_EXTRA_LEN) - 1)
+
+#define MAX_LIT_LEN_CODE_LEN 21
+#define MAX_LIT_LEN_COUNT (MAX_LIT_LEN_CODE_LEN + 2)
+#define MAX_LIT_LEN_SYM 512
+#define LIT_LEN_ELEMS 514
+
+#define INVALID_SYMBOL 0x1FFF
+#define INVALID_CODE 0xFFFFFF
+
+#define MIN_DEF_MATCH 3
+
+#define TRIPLE_SYM_FLAG 0
+#define DOUBLE_SYM_FLAG TRIPLE_SYM_FLAG + 1
+#define SINGLE_SYM_FLAG DOUBLE_SYM_FLAG + 1
+#define DEFAULT_SYM_FLAG TRIPLE_SYM_FLAG
+
+#define SINGLE_SYM_THRESH (2 * 1024)
+#define DOUBLE_SYM_THRESH (4 * 1024)
+
+
+/*
+declare Macros
+*/
+
+.macro declare_generic_reg name:req,reg:req,default:req
+ \name .req \default\reg
+ w_\name .req w\reg
+ x_\name .req x\reg
+.endm
+
+
+.macro inflate_in_load_read_byte
+ cmp read_in_length,56
+ bgt 1f
+ cbz avail_in,1f
+ ldrb w_temp,[next_in],1
+ sub avail_in,avail_in,1
+ lsl temp,temp,x_read_in_length
+ orr read_in,read_in,temp
+ add read_in_length,read_in_length,8
+ uxtw x_read_in_length,read_in_length
+
+.endm
+
+.macro inflate_in_load
+
+ cmp read_in_length, 63
+ bgt 1f
+
+ /*if (state->avail_in >= 8) */
+ cmp avail_in, 7
+ bhi 2f
+
+ // loop max 7 times
+ // while (state->read_in_length < 57 && state->avail_in > 0)
+ inflate_in_load_read_byte
+ inflate_in_load_read_byte
+ inflate_in_load_read_byte
+ inflate_in_load_read_byte
+ inflate_in_load_read_byte
+ inflate_in_load_read_byte
+ inflate_in_load_read_byte
+ b 1f
+2:
+ add new_bytes,read_in_length,7
+ mov w_temp,8
+ lsr new_bytes,new_bytes,3
+ sub new_bytes,w_temp,new_bytes
+ ldr temp,[next_in]
+ lsl temp,temp,x_read_in_length
+ orr read_in,read_in,temp
+ add next_in,next_in,new_bytes,uxtb
+ add read_in_length,read_in_length,new_bytes,lsl 3
+ sub avail_in,avail_in,new_bytes
+
+1:
+.endm
+
+.macro copy_word
+ sub repeat_length,repeat_length,#4
+ ldr w_arg0, [arg1],4
+ cmp repeat_length, 3
+ str w_arg0, [next_out],4
+ bls load_byte_less_than_4
+.endm
+
+
+ .global decode_huffman_code_block_stateless_aarch64
+ .type decode_huffman_code_block_stateless_aarch64, %function
+/*
+ void decode_huffman_code_block_stateless_aarch64(
+ struct inflate_state *state,
+ uint8_t * start_out)
+*/
+ declare_generic_reg arg0, 0, x
+ declare_generic_reg arg1, 1, x
+ declare_generic_reg arg2, 2, x
+
+ declare_generic_reg state, 11,x
+ declare_generic_reg start_out, 18,x
+
+ declare_generic_reg read_in, 3,x
+ declare_generic_reg read_in_length, 4,w
+ declare_generic_reg sym_count, 5,w
+ declare_generic_reg next_bits, 6,w
+ declare_generic_reg next_lits, 6,w
+ declare_generic_reg avail_in, 20,w
+ declare_generic_reg next_in, 23,x
+
+ declare_generic_reg temp, 16,x //local temp variable
+ declare_generic_reg new_bytes, 7,w //temp variable
+ declare_generic_reg copy_overflow_length, 28,w
+
+
+
+ declare_generic_reg block_state, 8,w
+ declare_generic_reg block_state_adr,9,x
+ declare_generic_reg look_back_dist, 10,w
+ declare_generic_reg bfinal, 22,x
+
+ declare_generic_reg next_out, 12,x
+ declare_generic_reg avail_out, 13,w
+ declare_generic_reg total_out, 14,w
+
+ declare_generic_reg rfc_table, 15,x
+ declare_generic_reg next_sym, 17,w
+ declare_generic_reg next_dist, 17,w
+ declare_generic_reg bit_count, 19,w
+
+ declare_generic_reg bit_mask, 21,w
+ declare_generic_reg next_lit, 24,w
+ declare_generic_reg write_overflow_len,25,w
+ declare_generic_reg write_overflow_lits,26,w
+ declare_generic_reg repeat_length,27,w
+
+decode_huffman_code_block_stateless_aarch64:
+ //save registers
+ push_stack
+
+ //load variables
+ mov state,arg0
+ mov block_state,_block_state
+ mov start_out,arg1
+ add block_state_adr,state,block_state,uxtw
+ ldr block_state, [block_state_adr]
+ ldr bfinal, [block_state_adr,_bfinal-_block_state]
+
+ ldr next_out, [state]
+ ldp avail_out,total_out,[state,_avail_out]
+ ldp next_in, read_in, [state,_next_in]
+ ldp avail_in, read_in_length, [state,_avail_in]
+ ldp write_overflow_lits,write_overflow_len,[block_state_adr,_write_overflow_lits-_block_state]
+
+ //init rfc_table
+ adrp rfc_table,rfc_lookup_table
+ add rfc_table,rfc_table,:lo12:rfc_lookup_table
+#if ENABLE_TBL_INSTRUCTION
+ ld1 {v1.16b,v2.16b,v3.16b},[rfc_table]
+ add rfc_table,rfc_table,48
+ ld1 {v4.16b-v7.16b},[rfc_table]
+#endif
+
+ /*
+ state->copy_overflow_length = 0;
+ state->copy_overflow_distance = 0;
+ */
+ mov x_copy_overflow_length,xzr
+ str xzr,[block_state_adr,_copy_overflow_len-_block_state]
+
+ /* while (state->block_state == ISAL_BLOCK_CODED) */
+block_state_loop:
+ cmp block_state ,ISAL_BLOCK_CODED
+ bne exit_func_success
+
+ inflate_in_load
+
+ /* save state here */
+ str next_out, [state]
+ stp avail_out,total_out,[state,_avail_out]
+ stp next_in, read_in, [state,_next_in]
+ stp avail_in, read_in_length, [state,_avail_in]
+ stp write_overflow_lits,write_overflow_len,[block_state_adr,_write_overflow_lits-_block_state]
+
+ /*
+ decode_next_lit_len(&next_lits, &sym_count,
+ state, &state->lit_huff_code,
+ &temp_dat, &temp_bytes);
+ */
+ cmp read_in_length,ISAL_DEF_MAX_CODE_LEN
+ ble inflate_in_load_decode
+decode_next_lit_len_start:
+ and x_next_bits,read_in,((1 << ISAL_DECODE_LONG_BITS) - 1)
+ /*next_sym = huff_code->short_code_lookup[next_bits];*/
+ add next_bits,next_bits,_lit_huff_code>>2
+ ldr next_sym,[state,x_next_bits,lsl 2]
+ /*if ((next_sym & LARGE_FLAG_BIT) == 0) {*/
+ tbnz next_sym,LARGE_FLAG_BIT_OFFSET,long_code_lookup_routine
+ lsr bit_count,next_sym,LARGE_SHORT_CODE_LEN_OFFSET
+ sub read_in_length,read_in_length,bit_count
+ lsr read_in,read_in,x_bit_count
+ mov temp,0x1fff
+ cmp bit_count,0
+ csel next_sym,next_sym,w_temp,ne
+ ubfx sym_count,next_sym,LARGE_SYM_COUNT_OFFSET,LARGE_SYM_COUNT_LEN
+ and next_lits,next_sym,LARGE_SHORT_SYM_MASK
+ b decode_next_lit_len_end
+long_code_lookup_routine:
+ lsr bit_mask,next_sym,LARGE_SHORT_MAX_LEN_OFFSET
+ mov sym_count,1
+ and next_sym,next_sym,LARGE_SHORT_SYM_MASK
+ mov temp,1023
+ lsl bit_mask,sym_count,bit_mask
+ sub bit_mask,bit_mask,1
+ and x_next_bits,read_in,x_bit_mask
+ add next_bits,next_sym,next_bits,lsr ISAL_DECODE_LONG_BITS
+ mov next_sym,(_lit_huff_code+_long_code_lookup_large)>>1
+ add next_bits,next_bits,next_sym
+ ldrh next_sym,[state,x_next_bits,lsl 1]
+ lsr bit_count,next_sym,10
+ sub read_in_length,read_in_length,bit_count
+ and next_lits,next_sym,w_temp
+ lsr read_in,read_in,x_bit_count
+ cmp bit_count,0
+ csel next_lits,next_lits,w_temp,ne
+decode_next_lit_len_end:
+
+ /* if (sym_count == 0) */
+ cbz sym_count,invalid_symbol
+ tbnz read_in_length,31, end_input
+
+ /* while (sym_count > 0) start */
+sym_count_loop:
+ and next_lit,next_lits , 0xffff
+
+ /*if (next_lit < 256 || sym_count > 1) {*/
+ cmp next_lit,255
+ ccmp sym_count,1,0,hi
+ beq next_lit_256
+
+ /* if (state->avail_out < 1) { */
+ cbnz avail_out,sym_count_adjust
+
+ mov write_overflow_len,sym_count
+ lsl sym_count,sym_count,3
+ mov write_overflow_lits,next_lits
+ sub sym_count,sym_count,8
+ lsr next_lits,next_lits,sym_count
+ mov sym_count,1
+ cmp next_lits,255
+ bls isal_out_overflow
+ cmp next_lits,256
+ sub write_overflow_len,write_overflow_len,1
+ beq isal_out_overflow_1
+ b sym_count_loop
+
+sym_count_adjust:
+ /*
+ while (sym_count > 0) end
+ next_lits >>= 8;
+ sym_count--;
+ */
+ subs sym_count,sym_count,1
+ lsr next_lits,next_lits,8
+ strb next_lit,[next_out],1
+ sub avail_out,avail_out,1
+ add total_out,total_out,1
+ bne sym_count_loop
+ b block_state_loop
+
+next_lit_256:
+ /* if (next_lit == 256) { */
+ cmp next_lit,256
+ beq next_lit_eq_256
+
+
+ /*
+ if (next_lit <= MAX_LIT_LEN_SYM)
+ sym_count must be 1
+ */
+ cmp next_lit,MAX_LIT_LEN_SYM
+ bhi invalid_symbol
+ sub repeat_length,next_lit,254
+ /*
+ next_dist =
+ decode_next_dist(state, &state->dist_huff_code, &temp_dat,
+ &temp_bytes);
+ */
+ cmp read_in_length,ISAL_DEF_MAX_CODE_LEN
+ ble inflate_in_load_decode_next_dist
+decode_next_dist_start:
+ and x_next_bits,read_in,((1 << ISAL_DECODE_SHORT_BITS) - 1)
+ mov next_sym,_dist_huff_code>>1
+ add next_bits,next_bits,next_sym
+ ldrh next_sym, [state,x_next_bits,lsl 1]
+ tbz next_sym,SMALL_FLAG_BIT_OFFSET,decode_next_dist_flag
+ sub bit_mask,next_sym,SMALL_FLAG_BIT
+ mov temp,1
+ asr bit_mask,bit_mask,SMALL_SHORT_CODE_LEN_OFFSET
+ and next_sym,next_sym,SMALL_SHORT_SYM_MASK
+ lsl bit_mask,w_temp,bit_mask
+ sub bit_mask,bit_mask,1
+ and x_next_bits,read_in,x_bit_mask
+ add next_bits,next_sym,next_bits,lsr ISAL_DECODE_SHORT_BITS
+ mov next_sym,(_dist_huff_code + _long_code_lookup_small)>>1
+ add next_bits,next_bits,next_sym
+ ldrh next_sym,[state,x_next_bits,lsl 1]
+ lsr bit_count,next_sym,SMALL_LONG_CODE_LEN_OFFSET
+ b decode_next_dist_adjust
+decode_next_dist_flag:
+ lsr bit_count,next_sym,SMALL_SHORT_CODE_LEN_OFFSET
+decode_next_dist_adjust:
+ sub read_in_length,read_in_length,bit_count
+ lsr read_in,read_in,x_bit_count
+ cbnz bit_count,decode_next_dist_end
+ sub read_in_length,read_in_length,next_sym
+ mov next_sym,INVALID_SYMBOL
+decode_next_dist_end:
+ and next_sym,next_sym,DIST_SYM_MASK
+
+ tbnz read_in_length,31,end_input_1
+ cmp next_dist,29
+ bhi invalid_symbol
+
+
+#if ENABLE_TBL_INSTRUCTION
+ ins v0.b[0],next_dist
+ tbl v0.8b,{v2.16b,v3.16b},v0.8b
+ umov bit_count,v0.b[0]
+#else
+ ldrb bit_count,[rfc_table,next_dist,sxtw]
+#endif
+
+ /*inflate_in_read_bits(state,
+ dist_extra_bit_count, &temp_dat,
+ &temp_bytes);
+ */
+ inflate_in_load
+ mov temp,1
+ lsl temp,temp,x_bit_count
+ sub read_in_length,read_in_length,bit_count
+ sub temp,temp,1
+ and x_look_back_dist,temp,read_in
+ lsr read_in,read_in,x_bit_count
+#if ENABLE_TBL_INSTRUCTION
+ dup v0.8b,next_dist
+ add v0.8b,v1.8b,v0.8b
+ tbl v0.8b,{v4.16b-v7.16b},v0.8b
+ umov next_dist,v0.h[0]
+#else
+ add next_dist,next_dist,16
+ ldrh next_dist,[rfc_table,x_next_dist,lsl 1]
+#endif
+ add look_back_dist,look_back_dist,next_dist
+
+ /*
+ if (state->read_in_length < 0) {
+ */
+ tbnz read_in_length,31,end_input_1
+
+ /*
+ if (state->next_out - look_back_dist < start_out) {
+ */
+ sub temp,next_out,x_look_back_dist
+ cmp temp,start_out
+ bcc isal_invalid_lookback
+ /*
+ if (state->avail_out < repeat_length) {
+ */
+ cmp avail_out , repeat_length
+ bcs decompress_data_start
+ sub copy_overflow_length,repeat_length,avail_out
+ stp copy_overflow_length,look_back_dist,[block_state_adr,_copy_overflow_len-_block_state]
+ mov repeat_length,avail_out
+
+decompress_data_start:
+ add total_out,total_out,repeat_length
+ sub avail_out,avail_out,repeat_length
+ sub arg1,next_out,x_look_back_dist
+ #if 1
+ cmp look_back_dist,repeat_length
+ bls byte_copy_start
+ #else
+ b byte_copy_start
+ #endif
+
+
+ cbz repeat_length,decompress_data_end
+ cmp repeat_length, 3
+ bls load_byte_less_than_4 //0.5% will jump
+load_byte_4:
+ sub repeat_length, repeat_length, #4
+ ldr w_arg0, [arg1],4
+ cmp repeat_length, 3
+ str w_arg0, [next_out],4
+ bls load_byte_less_than_4
+ .rept 62
+ copy_word
+ .endr
+ sub repeat_length, repeat_length, #4
+ ldr w_arg0, [arg1],4
+ cmp repeat_length, 4
+ str w_arg0, [next_out],4
+ bge load_byte_4
+load_byte_less_than_4:
+ tbz repeat_length,0,load_byte_2
+ ldrb w_arg0, [arg1],1
+ sub repeat_length, repeat_length, #1
+ strb w_arg0, [next_out],1
+load_byte_2:
+ tbz repeat_length,1,decompress_data_end
+ ldrh w_arg0, [arg1],2
+ strh w_arg0, [next_out],2
+decompress_data_end:
+
+
+
+ /*
+ if (state->copy_overflow_length > 0)
+ */
+ cmp copy_overflow_length,0
+ bgt isal_out_overflow
+ b block_state_loop
+next_lit_eq_256:
+ /*
+ state->block_state = state->bfinal ?
+ ISAL_BLOCK_INPUT_DONE : ISAL_BLOCK_NEW_HDR;
+ */
+ mov block_state, ISAL_BLOCK_INPUT_DONE
+ cmp w_bfinal,0
+ csel block_state, block_state, w_bfinal, ne
+ str block_state, [block_state_adr]
+
+ b block_state_loop
+exit_func_success:
+ mov w0 , 0
+exit_func:
+ str next_out, [state]
+ stp avail_out,total_out,[state,_avail_out]
+ stp next_in, read_in, [state,_next_in]
+ stp avail_in, read_in_length, [state,_avail_in]
+ stp write_overflow_lits,write_overflow_len,[block_state_adr,_write_overflow_lits-_block_state]
+
+ pop_stack
+ ret
+end_input_1:
+end_input:
+ mov w0,ISAL_END_INPUT
+ pop_stack
+ ret
+
+invalid_symbol:
+ /*
+ below variable was changed
+ */
+ str next_out, [state]
+ stp avail_out,total_out,[state,_avail_out]
+ stp next_in, read_in, [state,_next_in]
+ stp avail_in, read_in_length, [state,_avail_in]
+ stp write_overflow_lits,write_overflow_len,[block_state_adr,_write_overflow_lits-_block_state]
+ mov w0, ISAL_INVALID_SYMBOL
+ b exit_func
+isal_out_overflow_1:
+
+ cmp bfinal,0
+ mov block_state, ISAL_BLOCK_INPUT_DONE
+ csel block_state, block_state, wzr, ne
+ str block_state, [block_state_adr]
+isal_out_overflow:
+ mov w0, ISAL_OUT_OVERFLOW
+
+ b exit_func
+isal_invalid_lookback:
+ mov w0, ISAL_INVALID_LOOKBACK
+ b exit_func
+inflate_in_load_decode:
+ inflate_in_load
+ b decode_next_lit_len_start
+inflate_in_load_decode_next_dist:
+ inflate_in_load
+ b decode_next_dist_start
+byte_copy_start:
+ add arg2,next_out,x_repeat_length
+ cmp arg2, next_out
+ beq decompress_data_end
+ sub arg2,arg2,1
+byte_copy_loop:
+ ldrb w_arg0, [arg1] , 1
+ cmp arg2, next_out
+ strb w_arg0, [next_out],1
+ bne byte_copy_loop
+ b decompress_data_end
+ .size decode_huffman_code_block_stateless_aarch64, .-decode_huffman_code_block_stateless_aarch64
+ .type rfc_lookup_table, %object
+
+rfc_lookup_table:
+#if ENABLE_TBL_INSTRUCTION
+ .byte 0x00, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
+ .zero 8
+#endif
+ //dist_extra_bit_count
+ .byte 0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x02, 0x02
+ .byte 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06
+ .byte 0x07, 0x07, 0x08, 0x08, 0x09, 0x09, 0x0a, 0x0a
+ .byte 0x0b, 0x0b, 0x0c, 0x0c, 0x0d, 0x0d, 0x00, 0x00
+ //dist_start
+#if ENABLE_TBL_INSTRUCTION
+ .byte 0x01,0x02,0x03,0x04,0x05,0x07,0x09,0x0d,0x11,0x19,0x21,0x31,0x41,0x61,0x81,0xc1
+ .byte 0x01,0x81,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x00,0x00
+ .byte 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
+ .byte 0x01,0x01,0x02,0x03,0x04,0x06,0x08,0x0c,0x10,0x18,0x20,0x30,0x40,0x60,0x00,0x00
+#else
+ .short 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0007, 0x0009, 0x000d
+ .short 0x0011, 0x0019, 0x0021, 0x0031, 0x0041, 0x0061, 0x0081, 0x00c1
+ .short 0x0101, 0x0181, 0x0201, 0x0301, 0x0401, 0x0601, 0x0801, 0x0c01
+ .short 0x1001, 0x1801, 0x2001, 0x3001, 0x4001, 0x6001, 0x0000, 0x0000
+#endif
+ .size rfc_lookup_table, . - rfc_lookup_table
diff --git a/src/isa-l/igzip/aarch64/igzip_deflate_body_aarch64.S b/src/isa-l/igzip/aarch64/igzip_deflate_body_aarch64.S
new file mode 100644
index 000000000..254f74c61
--- /dev/null
+++ b/src/isa-l/igzip/aarch64/igzip_deflate_body_aarch64.S
@@ -0,0 +1,261 @@
+/**********************************************************************
+ Copyright(c) 2019 Arm Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Arm Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+ .arch armv8-a+crc
+ .text
+ .align 2
+#include "lz0a_const_aarch64.h"
+#include "data_struct_aarch64.h"
+#include "huffman_aarch64.h"
+#include "bitbuf2_aarch64.h"
+#include "stdmac_aarch64.h"
+/*
+declare Macros
+*/
+
+.macro declare_generic_reg name:req,reg:req,default:req
+ \name .req \default\reg
+ w_\name .req w\reg
+ x_\name .req x\reg
+.endm
+
+.macro update_state stream:req,start_in:req,next_in:req,end_in:req, \
+ m_out_buf:req,m_out_start:req,tmp0:req,tmp1:req
+
+ //m_out_buf=bytes_written
+ sub x_\m_out_buf,x_\m_out_buf,x_\m_out_start
+ cmp next_in,start_in
+ bls skip_has_hist
+ mov w_\tmp0,1
+ strb w_\tmp0,[x_\stream,_internal_state_has_hist]
+skip_has_hist:
+ ldr w_\tmp0,[\stream,_total_in]
+ ldr x_\m_out_start,[\stream,_next_out] //m_out_start = next_out
+
+ str x_\next_in,[\stream,_next_in]
+ sub x_\start_in,x_\next_in,x_\start_in
+ sub x_\end_in,x_\end_in,x_\next_in
+ add w_\tmp0,w_\tmp0,w_\start_in
+ stp w_\end_in,w_\tmp0,[\stream,_avail_in]
+ //next_in=avail_out,start_in=total_out
+ ldp w_\next_in,w_\start_in,[\stream,_avail_out]
+ add x_\m_out_start,x_\m_out_start,x_\m_out_buf
+ str x_\m_out_start,[\stream,_next_out]
+ add w_\start_in,w_\start_in,w_\m_out_buf
+ sub w_\next_in,w_\next_in,w_\m_out_buf
+ stp w_\next_in,w_\start_in,[\stream,_avail_out]
+.endm
+
+
+ .global isal_deflate_body_aarch64
+ .type isal_deflate_body_aarch64, %function
+/*
+ void isal_deflate_body_aarch64(struct isal_zstream *stream)
+*/
+ declare_generic_reg stream, 0,x //struct isal_zstream *stream
+ declare_generic_reg state, 8,x //&stream->state
+ declare_generic_reg avail_in, 9,w
+ declare_generic_reg end_of_stream, 10,w //can be used in loop
+
+ declare_generic_reg hash_mask, 11,w
+ declare_generic_reg match_length, 12,w
+ declare_generic_reg hufftables, 13,x
+
+ declare_generic_reg m_out_buf, 14,x
+ declare_generic_reg m_out_start, 15,x
+ declare_generic_reg m_out_end, 16,x
+ declare_generic_reg m_bits, 17,x
+ declare_generic_reg m_bit_count, 18,w
+
+ declare_generic_reg start_in, 19,x
+ declare_generic_reg end_in, 20,x
+ declare_generic_reg next_in, 21,x
+ declare_generic_reg loop_end_cnt, 22,x
+
+ declare_generic_reg literal, 23,w
+ declare_generic_reg hash, 24,w
+ declare_generic_reg dist, 25,w
+
+ declare_generic_reg last_seen, 26,x
+ declare_generic_reg file_start, 27,x
+ declare_generic_reg hist_size, 28,w
+
+ declare_generic_reg tmp0, 5 ,w
+ declare_generic_reg tmp1, 6 ,w
+ declare_generic_reg tmp2, 7 ,w
+
+ declare_generic_reg code, 3,x
+ declare_generic_reg code_len, 24,x
+ declare_generic_reg code2, 10,x
+ declare_generic_reg code_len2, 4,x
+
+
+isal_deflate_body_aarch64:
+ //save registers
+ push_stack
+ ldr avail_in, [stream, _avail_in]
+ cbz avail_in, exit_save_state
+
+ // set_buf(&state->bitbuf, stream->next_out, stream->avail_out);
+ ldr w_m_out_end,[stream,_avail_out]
+ ldr m_out_buf,[stream,_next_out]
+ add m_out_end,m_out_buf,w_m_out_end,uxtw
+ sub m_out_end,m_out_end , 8
+ mov m_out_start,m_out_buf
+ stp m_out_buf,m_out_end,[stream, _bitbuf + _internal_state + _m_out_buf]
+ str m_out_start,[stream, _bitbuf + _internal_state + _m_out_start]
+ ldr m_bit_count ,[stream,_internal_state_bitbuf_m_bit_count]
+ ldr m_bits ,[stream,_internal_state_bitbuf_m_bits]
+
+
+ //init variables
+ //last_seen=&stream.internal_state.head = _internal_state+_head
+ add last_seen,stream,65536
+ add last_seen,last_seen,_internal_state+_head -65536
+
+
+ //start_in=stream->next_in;next_in=start_in
+ ldr start_in,[stream,_next_in]
+ mov next_in,start_in
+ add end_in,start_in,avail_in,uxtw //avail_in reg is free now
+ sub loop_end_cnt,end_in,289 //loop end
+ cmp next_in,loop_end_cnt
+
+
+ //file_start = (uint8_t *) ((uintptr_t) stream->next_in - stream->total_in);
+ ldr w_file_start,[stream,_total_in]
+ sub file_start,next_in,w_file_start,uxtw
+
+ //uint32_t hist_size = state->dist_mask;
+ ldr hist_size,[stream,_internal_state + _dist_mask]
+
+ //uint32_t hash_mask = state->hash_mask;
+ ldr hash_mask,[stream,_internal_state + _hash_mask]
+
+ ldr hufftables,[stream,_hufftables]
+
+ bhi main_loop_end
+main_loop_start:
+ //is_full(&state->bitbuf)
+ cmp m_out_buf,m_out_end
+ bhi update_state_exit
+
+ ldr literal,[next_in]
+ crc32cw hash,wzr,literal
+ and hash,hash,hash_mask
+
+ ///dist = (next_in - file_start - last_seen[hash]) & 0xFFFF;
+ ldrh w_tmp0,[last_seen,x_hash,lsl 1] //tmp_w last_seen[hash]
+ sub x_dist,next_in,file_start
+ //last_seen[hash] = (uint64_t) (next_in - file_start);
+ strh dist,[last_seen,x_hash,lsl 1]
+ sub dist,dist,w_tmp0
+ and dist,dist,0xffff
+
+ sub w_tmp0,dist,1
+ cmp hist_size,w_tmp0
+ bls get_lit_code
+
+ ///match_length = compare258(next_in - dist, next_in, 258);
+ sub x_tmp2,next_in,x_dist
+ compare_258_bytes tmp2,next_in,match_length,tmp0,tmp1
+ cmp match_length,3
+ bls get_lit_code
+
+ sub x_tmp0,next_in,file_start
+ ldr literal,[next_in,1]
+ crc32cw hash,wzr,literal
+ and hash,hash,hash_mask
+ add tmp0,tmp0,1
+ strh tmp0,[last_seen,x_hash,lsl 1]
+ //call_print_b hash,dist,last_seen
+
+ ldr literal,[next_in,2]
+ crc32cw hash,wzr,literal
+ and hash,hash,hash_mask
+ add tmp0,tmp0,1
+ strh tmp0,[last_seen,x_hash,lsl 1]
+
+ //get_len_code(stream->hufftables, match_length, &code,
+ // &code_len);
+ get_len_code hufftables,match_length,code,code_len,tmp0
+
+ //get_dist_code(stream->hufftables, dist, &code2, &code_len2);
+ get_dist_code hufftables,dist,code2,code_len2,tmp0,tmp1,tmp2
+
+ //code |= code2 << code_len;
+ //code_len += code_len2;
+ lsl code2,code2,code_len
+ orr code,code,code2
+ add code_len,code_len,code_len2
+
+ //next_in += match_length;
+ add next_in,next_in,match_length,uxtw
+
+ //write_bits(&state->bitbuf, code, code_len);
+ update_bits stream,code,code_len,m_bits,m_bit_count,m_out_buf
+
+
+
+ cmp next_in,loop_end_cnt
+ bls main_loop_start
+ b main_loop_end
+get_lit_code:
+ //get_lit_code(stream->hufftables, literal & 0xFF, &code, &code_len);
+ and literal,literal,0xff
+ get_lit_code hufftables,literal,code,code_len
+
+ //next_in++;
+ add next_in,next_in,1
+
+ //write_bits(&state->bitbuf, code, code_len);
+ update_bits stream,code,code_len,m_bits,m_bit_count,m_out_buf
+ cmp next_in,loop_end_cnt
+ bls main_loop_start
+
+main_loop_end:
+ //update state here
+
+ //load end_of_stream and flush together
+ ldr w_end_of_stream, [stream, _end_of_stream]
+ //(stream->end_of_stream || stream->flush != 0)
+ cbz w_end_of_stream, update_state_exit
+ mov w_tmp0 , ZSTATE_FLUSH_READ_BUFFER
+ str w_tmp0, [stream, _internal_state+_state]
+update_state_exit:
+ update_state stream,start_in,next_in,end_in,m_out_buf,m_out_start,tmp0,tmp1
+exit_ret:
+ pop_stack
+ ret
+exit_save_state:
+ ldr w_end_of_stream, [stream, _end_of_stream]
+ cbz w_end_of_stream, exit_ret //(stream->end_of_stream || stream->flush != 0)
+ mov w_tmp0 , ZSTATE_FLUSH_READ_BUFFER
+ str w_tmp0, [stream, _internal_state+_state]
+ b exit_ret
+ .size isal_deflate_body_aarch64, .-isal_deflate_body_aarch64
diff --git a/src/isa-l/igzip/aarch64/igzip_deflate_finish_aarch64.S b/src/isa-l/igzip/aarch64/igzip_deflate_finish_aarch64.S
new file mode 100644
index 000000000..e5842b5bc
--- /dev/null
+++ b/src/isa-l/igzip/aarch64/igzip_deflate_finish_aarch64.S
@@ -0,0 +1,264 @@
+/**********************************************************************
+ Copyright(c) 2019 Arm Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Arm Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+ .arch armv8-a+crc
+ .text
+ .align 2
+
+#include "lz0a_const_aarch64.h"
+#include "data_struct_aarch64.h"
+#include "huffman_aarch64.h"
+#include "bitbuf2_aarch64.h"
+#include "stdmac_aarch64.h"
+
+
+/*
+declare Macros
+*/
+
+.macro declare_generic_reg name:req,reg:req,default:req
+ \name .req \default\reg
+ w_\name .req w\reg
+ x_\name .req x\reg
+.endm
+
+.macro update_state stream:req,start_in:req,next_in:req,end_in:req, \
+ m_out_buf:req,m_out_start:req,tmp0:req,tmp1:req
+
+ //m_out_buf=bytes_written
+ sub x_\m_out_buf,x_\m_out_buf,x_\m_out_start
+ cmp next_in,start_in
+ bls skip_has_hist
+ mov w_\tmp0,1
+ strb w_\tmp0,[x_\stream,_internal_state_has_hist]
+skip_has_hist:
+ ldr w_\tmp0,[\stream,_total_in]
+ ldr x_\m_out_start,[\stream,_next_out] //m_out_start = next_out
+
+ str x_\next_in,[\stream,_next_in]
+ sub x_\start_in,x_\next_in,x_\start_in
+ sub x_\end_in,x_\end_in,x_\next_in
+ add w_\tmp0,w_\tmp0,w_\start_in
+ stp w_\end_in,w_\tmp0,[\stream,_avail_in]
+ //next_in=avail_out,start_in=total_out
+ ldp w_\next_in,w_\start_in,[\stream,_avail_out]
+ add x_\m_out_start,x_\m_out_start,x_\m_out_buf
+ str x_\m_out_start,[\stream,_next_out]
+ add w_\start_in,w_\start_in,w_\m_out_buf
+ sub w_\next_in,w_\next_in,w_\m_out_buf
+ stp w_\next_in,w_\start_in,[\stream,_avail_out]
+.endm
+ .global isal_deflate_finish_aarch64
+ .arch armv8-a+crc
+ .type isal_deflate_finish_aarch64, %function
+/*
+ void isal_deflate_finish_aarch64(struct isal_zstream *stream)
+*/
+ declare_generic_reg stream, 0,x //struct isal_zstream *stream
+ declare_generic_reg state, 8,x //&stream->state
+ declare_generic_reg avail_in, 9,w
+ declare_generic_reg end_of_stream, 10,w //can be used in loop
+
+ declare_generic_reg hash_mask, 11,w
+ declare_generic_reg match_length, 12,w
+ declare_generic_reg hufftables, 13,x
+
+ declare_generic_reg m_out_buf, 14,x
+ declare_generic_reg m_out_start, 15,x
+ declare_generic_reg m_out_end, 16,x
+ declare_generic_reg m_bits, 17,x
+ declare_generic_reg m_bit_count, 18,w
+
+ declare_generic_reg start_in, 19,x
+ declare_generic_reg end_in, 20,x
+ declare_generic_reg next_in, 21,x
+ declare_generic_reg loop_end_cnt, 22,x
+
+ declare_generic_reg literal, 23,w
+ declare_generic_reg hash, 24,w
+ declare_generic_reg dist, 25,w
+
+ declare_generic_reg last_seen, 26,x
+ declare_generic_reg file_start, 27,x
+ declare_generic_reg hist_size, 28,w
+
+ declare_generic_reg tmp0, 5 ,w
+ declare_generic_reg tmp1, 6 ,w
+ declare_generic_reg tmp2, 7 ,w
+
+ declare_generic_reg code, 3,x
+ declare_generic_reg code_len, 24,x
+ declare_generic_reg code2, 10,x
+ declare_generic_reg code_len2, 4,x
+
+
+isal_deflate_finish_aarch64:
+ //save registers
+ push_stack
+
+ // set_buf(&state->bitbuf, stream->next_out, stream->avail_out);
+ ldr w_m_out_end,[stream,_avail_out]
+ ldr m_out_buf,[stream,_next_out]
+ add m_out_end,m_out_buf,w_m_out_end,uxtw
+ sub m_out_end,m_out_end , 8
+ mov m_out_start,m_out_buf
+ stp m_out_buf,m_out_end,[stream, _bitbuf + _internal_state + _m_out_buf]
+ str m_out_start,[stream, _bitbuf + _internal_state + _m_out_start]
+ ldr m_bit_count ,[stream,_internal_state_bitbuf_m_bit_count]
+ ldr m_bits ,[stream,_internal_state_bitbuf_m_bits]
+
+ //init variables
+ //last_seen=&stream.internal_state.head = _internal_state+_head
+ add last_seen,stream,65536
+ add last_seen,last_seen,_internal_state+_head -65536
+
+
+ //start_in=stream->next_in;next_in=start_in
+ ldr avail_in, [stream, _avail_in]
+ ldr start_in,[stream,_next_in]
+ mov next_in,start_in
+ add end_in,start_in,avail_in,uxtw //avail_in reg is free now
+ ldr hufftables,[stream,_hufftables]
+ cbz avail_in, update_not_full
+
+
+ sub loop_end_cnt,end_in,4 //loop end
+ cmp next_in,loop_end_cnt
+
+
+ //file_start = (uint8_t *) ((uintptr_t) stream->next_in - stream->total_in);
+ ldr w_file_start,[stream,_total_in]
+ sub file_start, next_in, w_file_start, uxtw
+
+ //uint32_t hist_size = state->dist_mask;
+ ldr hist_size,[stream,_internal_state + _dist_mask]
+
+ //uint32_t hash_mask = state->hash_mask;
+ ldr hash_mask,[stream,_internal_state + _hash_mask]
+
+ bhi main_loop_end
+main_loop_start:
+ //is_full(&state->bitbuf)
+ cmp m_out_buf,m_out_end
+ bhi update_state_exit
+
+ ldr literal,[next_in]
+ crc32cw hash,wzr,literal
+ and hash,hash,hash_mask
+
+ ///dist = (next_in - file_start - last_seen[hash]) & 0xFFFF;
+ ldrh w_tmp0,[last_seen,x_hash,lsl 1] //tmp_w last_seen[hash]
+ sub x_dist,next_in,file_start
+ //last_seen[hash] = (uint64_t) (next_in - file_start);
+ strh dist,[last_seen,x_hash,lsl 1]
+ sub dist,dist,w_tmp0
+ and dist,dist,0xffff
+
+ sub w_tmp0,dist,1
+ cmp hist_size,w_tmp0
+ bls get_lit_code
+
+ /// match_length = compare258(next_in - dist, next_in, 258);
+ sub x_tmp2,next_in,x_dist
+ sub x_hash,end_in,next_in
+ compare_max_258_bytes tmp2,next_in,hash,match_length,tmp0,tmp1
+ cmp match_length,3
+ bls get_lit_code
+
+ get_len_code hufftables,match_length,code,code_len,tmp0
+ get_dist_code hufftables,dist,code2,code_len2,tmp0,tmp1,tmp2
+
+ //code |= code2 << code_len;
+ //code_len += code_len2;
+ lsl code2,code2,code_len
+ orr code,code,code2
+ add code_len,code_len,code_len2
+
+ //next_in += match_length;
+ add next_in,next_in,match_length,uxtw
+
+ //write_bits(&state->bitbuf, code, code_len);
+ update_bits stream,code,code_len,m_bits,m_bit_count,m_out_buf
+
+ cmp next_in,loop_end_cnt
+ bls main_loop_start
+ b main_loop_end
+get_lit_code:
+ //get_lit_code(stream->hufftables, literal & 0xFF, &code, &code_len);
+ and literal,literal,0xff
+ get_lit_code hufftables,literal,code,code_len
+
+ //next_in++;
+ add next_in,next_in,1
+
+ //write_bits(&state->bitbuf, code, code_len);
+ update_bits stream,code,code_len,m_bits,m_bit_count,m_out_buf
+ cmp next_in,loop_end_cnt
+ bls main_loop_start
+main_loop_end:
+ sub loop_end_cnt,end_in,1
+ cmp next_in,loop_end_cnt
+ bhi update_not_full
+second_loop_start:
+ cmp m_out_buf,m_out_end
+ bhi update_state_exit
+ ldr literal,[next_in]
+ and literal,literal,0xff
+ get_lit_code hufftables,literal,code,code_len
+ //next_in++;
+ add next_in,next_in,1
+
+ //write_bits(&state->bitbuf, code, code_len);
+ update_bits stream,code,code_len,m_bits,m_bit_count,m_out_buf
+ cmp next_in,loop_end_cnt
+ bls second_loop_start
+
+update_not_full:
+ cmp m_out_buf,m_out_end
+ bhi update_state_exit
+
+ mov literal,256
+ get_lit_code hufftables,literal,code,code_len
+
+ //write_bits(&state->bitbuf, code, code_len);
+ update_bits stream,code,code_len,m_bits,m_bit_count,m_out_buf
+ ldrh w_end_of_stream, [stream, _end_of_stream]
+ mov w_tmp0,1
+ strb w_tmp0,[stream,_internal_state_has_eob]
+ cmp w_end_of_stream,w_tmp0
+ mov w_tmp0, ZSTATE_TRL
+ mov w_tmp1, ZSTATE_SYNC_FLUSH
+ csel w_tmp0,w_tmp0,w_tmp1,eq
+ str w_tmp0, [stream, _internal_state+_state]
+
+update_state_exit:
+ update_state stream,start_in,next_in,end_in,m_out_buf,m_out_start,tmp0,tmp1
+ pop_stack
+ ret
+
+ .size isal_deflate_finish_aarch64, .-isal_deflate_finish_aarch64
diff --git a/src/isa-l/igzip/aarch64/igzip_deflate_hash_aarch64.S b/src/isa-l/igzip/aarch64/igzip_deflate_hash_aarch64.S
new file mode 100644
index 000000000..40251dab4
--- /dev/null
+++ b/src/isa-l/igzip/aarch64/igzip_deflate_hash_aarch64.S
@@ -0,0 +1,95 @@
+/**********************************************************************
+ Copyright(c) 2019 Arm Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Arm Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+ .arch armv8-a+crc
+ .text
+ .align 2
+#include "lz0a_const_aarch64.h"
+#include "data_struct_aarch64.h"
+#include "huffman_aarch64.h"
+#include "bitbuf2_aarch64.h"
+#include "stdmac_aarch64.h"
+/*
+declare Macros
+*/
+
+.macro declare_generic_reg name:req,reg:req,default:req
+ \name .req \default\reg
+ w_\name .req w\reg
+ x_\name .req x\reg
+.endm
+
+
+
+ .global isal_deflate_hash_aarch64
+ .type isal_deflate_hash_aarch64, %function
+/*
+ void isal_deflate_hash_aarch64(uint16_t * hash_table, uint32_t hash_mask,
+ uint32_t current_index, uint8_t * dict, uint32_t dict_len)
+*/
+ declare_generic_reg hash_table, 0,x
+ declare_generic_reg hash_mask, 1,w
+ declare_generic_reg current_index, 2,w
+ declare_generic_reg dict, 3,x
+ declare_generic_reg dict_len, 4,w
+
+ declare_generic_reg next_in 3,x
+ declare_generic_reg end_in 6,x
+ declare_generic_reg ind 5,w
+ declare_generic_reg hash 2,w
+ declare_generic_reg literal 2,w
+#define SHORTEST_MATCH #4
+
+isal_deflate_hash_aarch64:
+ sub ind, current_index, dict_len
+ and ind,ind,0xffff
+
+
+ uxtw x_dict_len, dict_len
+ sub x_dict_len, x_dict_len, SHORTEST_MATCH
+ add end_in, dict, x_dict_len
+
+
+
+ cmp next_in, end_in
+ bcs exit_func
+
+ mov w7, 0
+loop_start:
+ ldr literal, [next_in]
+ add next_in, next_in, 1
+ cmp next_in, end_in
+ crc32cw hash, w7, literal
+ and hash, hash, hash_mask
+ strh ind, [hash_table, x_hash, lsl 1]
+ add ind,ind,1
+ bne loop_start
+exit_func:
+
+ ret
+ .size isal_deflate_hash_aarch64, .-isal_deflate_hash_aarch64
diff --git a/src/isa-l/igzip/aarch64/igzip_inflate_multibinary_arm64.S b/src/isa-l/igzip/aarch64/igzip_inflate_multibinary_arm64.S
new file mode 100644
index 000000000..4f2fe22aa
--- /dev/null
+++ b/src/isa-l/igzip/aarch64/igzip_inflate_multibinary_arm64.S
@@ -0,0 +1,32 @@
+/**********************************************************************
+ Copyright(c) 2019 Arm Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Arm Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+#include "aarch64_multibinary.h"
+
+mbin_interface decode_huffman_code_block_stateless
diff --git a/src/isa-l/igzip/aarch64/igzip_isal_adler32_neon.S b/src/isa-l/igzip/aarch64/igzip_isal_adler32_neon.S
new file mode 100644
index 000000000..78d23940d
--- /dev/null
+++ b/src/isa-l/igzip/aarch64/igzip_isal_adler32_neon.S
@@ -0,0 +1,178 @@
+/**********************************************************************
+ Copyright(c) 2019 Arm Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Arm Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+ .arch armv8-a+crypto
+ .text
+ .align 3
+
+/*
+Macros
+*/
+
+.macro declare_var_vector_reg name:req,reg:req
+ \name\()_q .req q\reg
+ \name\()_v .req v\reg
+ \name\()_s .req s\reg
+ \name\()_d .req d\reg
+.endm
+
+.macro mod_adler dest:req,tmp:req
+ umull \tmp\()_x,\dest,const_div1
+ lsr \tmp\()_x,\tmp\()_x,47
+ msub \dest,\tmp,const_div2,\dest
+.endm
+
+/*
+ uint32_t adler32_neon(uint32_t adler32, uint8_t * start, uint32_t length);
+*/
+/*
+Arguements list
+*/
+ adler32 .req w0
+ start .req x1
+ length .req x2
+ .global adler32_neon
+ .type adler32_neon, %function
+adler32_neon:
+/*
+local variables
+*/
+ declare_var_vector_reg factor0 , 6
+ declare_var_vector_reg factor1 , 7
+ declare_var_vector_reg d0 , 4
+ declare_var_vector_reg d1 , 5
+ declare_var_vector_reg adacc , 2
+ declare_var_vector_reg s2acc , 3
+ declare_var_vector_reg zero , 16
+ declare_var_vector_reg adler , 17
+ declare_var_vector_reg back_d0 , 18
+ declare_var_vector_reg back_d1 , 19
+ declare_var_vector_reg sum2 , 20
+ declare_var_vector_reg tmp2 , 20
+
+ adler0 .req w4
+ adler1 .req w5
+ adler0_x .req x4
+ adler1_x .req x5
+ end .req x0
+ tmp .req w8
+ tmp_x .req x8
+ tmp1_x .req x9
+ loop_cnt .req x10
+ loop_const .req x11
+ const_div1 .req w6
+ const_div2 .req w7
+ mov const_div1, 32881
+ movk const_div1, 0x8007, lsl 16
+ mov const_div2, 65521
+ and adler0, adler32, 0xffff
+ lsr adler1, adler32, 16
+
+ lsr loop_cnt,length,5
+ adrp x3,factors
+ add x3,x3,:lo12:factors
+ ld1 {factor0_v.16b-factor1_v.16b},[x3]
+
+ add end,start,length
+ cbz loop_cnt,final_accum32
+ ld1 {back_d0_v.16b-back_d1_v.16b},[start]
+ mov loop_const,173
+
+ movi v16.4s,0
+
+
+
+
+great_than_32:
+ cmp loop_cnt,173
+ csel loop_const,loop_cnt,loop_const,le
+ mov adacc_v.16b,zero_v.16b
+ mov s2acc_v.16b,zero_v.16b
+ ins adacc_v.s[0],adler0
+ ins s2acc_v.s[0],adler1
+ add tmp_x,start,loop_const,lsl 5
+
+accum32_neon:
+ add start,start,32
+ mov d0_v.16b,back_d0_v.16b
+ mov d1_v.16b,back_d1_v.16b
+ ld1 {back_d0_v.16b-back_d1_v.16b},[start]
+
+ shl tmp2_v.4s,adacc_v.4s,5
+ add s2acc_v.4s,s2acc_v.4s,tmp2_v.4s
+
+ uaddlp adler_v.8h,d0_v.16b
+ uadalp adler_v.8h,d1_v.16b
+ uadalp adacc_v.4s,adler_v.8h
+
+ umull sum2_v.8h,factor0_v.8b ,d0_v.8b
+ umlal2 sum2_v.8h,factor0_v.16b,d0_v.16b
+ umlal sum2_v.8h,factor1_v.8b ,d1_v.8b
+ umlal2 sum2_v.8h,factor1_v.16b,d1_v.16b
+ uadalp s2acc_v.4s,sum2_v.8h
+
+ cmp start,tmp_x
+ bne accum32_neon
+
+ uaddlv adacc_d,adacc_v.4s
+ uaddlv s2acc_d,s2acc_v.4s
+ fmov adler0_x,adacc_d
+ fmov adler1_x,s2acc_d
+
+ mod_adler adler0,tmp
+ mod_adler adler1,tmp
+ sub loop_cnt,loop_cnt,loop_const
+ cbnz loop_cnt,great_than_32
+
+final_accum32:
+ and length,length,31
+ cbz length,end_func
+
+accum32_body:
+ cmp start,end
+ beq end_func
+ ldrb tmp,[start],1
+ add adler0,adler0,tmp
+ add adler1,adler1,adler0
+ b accum32_body
+
+end_func:
+ mod_adler adler0,tmp
+ mod_adler adler1,tmp
+ orr w0,adler0,adler1,lsl 16
+ ret
+
+ .size adler32_neon, .-adler32_neon
+ .section .rodata.cst16,"aM",@progbits,16
+ .align 4
+factors:
+ .quad 0x191a1b1c1d1e1f20
+ .quad 0x1112131415161718
+ .quad 0x090a0b0c0d0e0f10
+ .quad 0x0102030405060708
+
diff --git a/src/isa-l/igzip/aarch64/igzip_multibinary_aarch64_dispatcher.c b/src/isa-l/igzip/aarch64/igzip_multibinary_aarch64_dispatcher.c
new file mode 100644
index 000000000..183010c22
--- /dev/null
+++ b/src/isa-l/igzip/aarch64/igzip_multibinary_aarch64_dispatcher.c
@@ -0,0 +1,188 @@
+/**********************************************************************
+ Copyright(c) 2019 Arm Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Arm Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+#include <aarch64_multibinary.h>
+
+DEFINE_INTERFACE_DISPATCHER(isal_adler32)
+{
+ unsigned long auxval = getauxval(AT_HWCAP);
+ if (auxval & HWCAP_ASIMD)
+ return PROVIDER_INFO(adler32_neon);
+
+ return PROVIDER_BASIC(adler32);
+
+}
+
+DEFINE_INTERFACE_DISPATCHER(isal_deflate_body)
+{
+ unsigned long auxval = getauxval(AT_HWCAP);
+
+ if (auxval & HWCAP_CRC32)
+ return PROVIDER_INFO(isal_deflate_body_aarch64);
+
+ return PROVIDER_BASIC(isal_deflate_body);
+
+}
+
+DEFINE_INTERFACE_DISPATCHER(isal_deflate_finish)
+{
+ unsigned long auxval = getauxval(AT_HWCAP);
+ if (auxval & HWCAP_CRC32)
+ return PROVIDER_INFO(isal_deflate_finish_aarch64);
+
+ return PROVIDER_BASIC(isal_deflate_finish);
+
+}
+
+DEFINE_INTERFACE_DISPATCHER(isal_deflate_icf_body_lvl1)
+{
+ unsigned long auxval = getauxval(AT_HWCAP);
+ if (auxval & HWCAP_CRC32)
+ return PROVIDER_INFO(isal_deflate_icf_body_hash_hist_aarch64);
+
+ return PROVIDER_BASIC(isal_deflate_icf_body_hash_hist);
+}
+
+DEFINE_INTERFACE_DISPATCHER(isal_deflate_icf_finish_lvl1)
+{
+ unsigned long auxval = getauxval(AT_HWCAP);
+ if (auxval & HWCAP_CRC32)
+ return PROVIDER_INFO(isal_deflate_icf_finish_hash_hist_aarch64);
+
+ return PROVIDER_BASIC(isal_deflate_icf_finish_hash_hist);
+}
+
+DEFINE_INTERFACE_DISPATCHER(isal_deflate_icf_body_lvl2)
+{
+ unsigned long auxval = getauxval(AT_HWCAP);
+ if (auxval & HWCAP_CRC32)
+ return PROVIDER_INFO(isal_deflate_icf_body_hash_hist_aarch64);
+
+ return PROVIDER_BASIC(isal_deflate_icf_body_hash_hist);
+}
+
+DEFINE_INTERFACE_DISPATCHER(isal_deflate_icf_finish_lvl2)
+{
+ unsigned long auxval = getauxval(AT_HWCAP);
+ if (auxval & HWCAP_CRC32)
+ return PROVIDER_INFO(isal_deflate_icf_finish_hash_hist_aarch64);
+
+ return PROVIDER_BASIC(isal_deflate_icf_finish_hash_hist);
+}
+
+DEFINE_INTERFACE_DISPATCHER(isal_deflate_icf_body_lvl3)
+{
+ unsigned long auxval = getauxval(AT_HWCAP);
+ if (auxval & HWCAP_CRC32)
+ return PROVIDER_INFO(icf_body_hash1_fillgreedy_lazy);
+
+ return PROVIDER_INFO(icf_body_hash1_fillgreedy_lazy);
+}
+
+DEFINE_INTERFACE_DISPATCHER(isal_deflate_icf_finish_lvl3)
+{
+ unsigned long auxval = getauxval(AT_HWCAP);
+ if (auxval & HWCAP_CRC32)
+ return PROVIDER_INFO(isal_deflate_icf_finish_hash_map_base);
+
+ return PROVIDER_BASIC(isal_deflate_icf_finish_hash_map);
+}
+
+DEFINE_INTERFACE_DISPATCHER(set_long_icf_fg)
+{
+ return PROVIDER_INFO(set_long_icf_fg_aarch64);
+}
+
+DEFINE_INTERFACE_DISPATCHER(encode_deflate_icf)
+{
+ return PROVIDER_INFO(encode_deflate_icf_aarch64);
+}
+
+DEFINE_INTERFACE_DISPATCHER(isal_update_histogram)
+{
+ unsigned long auxval = getauxval(AT_HWCAP);
+ if (auxval & HWCAP_CRC32)
+ return PROVIDER_INFO(isal_update_histogram_aarch64);
+
+ return PROVIDER_BASIC(isal_update_histogram);
+}
+
+DEFINE_INTERFACE_DISPATCHER(gen_icf_map_lh1)
+{
+ unsigned long auxval = getauxval(AT_HWCAP);
+ if (auxval & HWCAP_CRC32) {
+ return PROVIDER_INFO(gen_icf_map_h1_aarch64);
+ }
+
+ return PROVIDER_BASIC(gen_icf_map_h1);
+}
+
+DEFINE_INTERFACE_DISPATCHER(isal_deflate_hash_lvl0)
+{
+ unsigned long auxval = getauxval(AT_HWCAP);
+ if (auxval & HWCAP_CRC32)
+ return PROVIDER_INFO(isal_deflate_hash_aarch64);
+
+ return PROVIDER_BASIC(isal_deflate_hash);
+}
+
+DEFINE_INTERFACE_DISPATCHER(isal_deflate_hash_lvl1)
+{
+ unsigned long auxval = getauxval(AT_HWCAP);
+ if (auxval & HWCAP_CRC32)
+ return PROVIDER_INFO(isal_deflate_hash_aarch64);
+
+ return PROVIDER_BASIC(isal_deflate_hash);
+}
+
+DEFINE_INTERFACE_DISPATCHER(isal_deflate_hash_lvl2)
+{
+ unsigned long auxval = getauxval(AT_HWCAP);
+ if (auxval & HWCAP_CRC32)
+ return PROVIDER_INFO(isal_deflate_hash_aarch64);
+
+ return PROVIDER_BASIC(isal_deflate_hash);
+}
+
+DEFINE_INTERFACE_DISPATCHER(isal_deflate_hash_lvl3)
+{
+ unsigned long auxval = getauxval(AT_HWCAP);
+ if (auxval & HWCAP_CRC32)
+ return PROVIDER_INFO(isal_deflate_hash_aarch64);
+
+ return PROVIDER_BASIC(isal_deflate_hash);
+}
+
+DEFINE_INTERFACE_DISPATCHER(decode_huffman_code_block_stateless)
+{
+ unsigned long auxval = getauxval(AT_HWCAP);
+ if (auxval & HWCAP_CRC32)
+ return PROVIDER_INFO(decode_huffman_code_block_stateless_aarch64);
+
+ return PROVIDER_BASIC(decode_huffman_code_block_stateless);
+}
diff --git a/src/isa-l/igzip/aarch64/igzip_multibinary_arm64.S b/src/isa-l/igzip/aarch64/igzip_multibinary_arm64.S
new file mode 100644
index 000000000..57d5230a5
--- /dev/null
+++ b/src/isa-l/igzip/aarch64/igzip_multibinary_arm64.S
@@ -0,0 +1,50 @@
+/**********************************************************************
+ Copyright(c) 2019 Arm Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Arm Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+#include "aarch64_multibinary.h"
+
+
+mbin_interface isal_deflate_icf_body_lvl1
+mbin_interface isal_deflate_icf_body_lvl2
+mbin_interface isal_deflate_icf_body_lvl3
+mbin_interface isal_deflate_icf_finish_lvl1
+mbin_interface isal_deflate_icf_finish_lvl2
+mbin_interface isal_deflate_icf_finish_lvl3
+mbin_interface isal_update_histogram
+mbin_interface encode_deflate_icf
+mbin_interface set_long_icf_fg
+mbin_interface gen_icf_map_lh1
+mbin_interface isal_deflate_hash_lvl0
+mbin_interface isal_deflate_hash_lvl1
+mbin_interface isal_deflate_hash_lvl2
+mbin_interface isal_deflate_hash_lvl3
+
+mbin_interface isal_deflate_body
+mbin_interface isal_deflate_finish
+mbin_interface isal_adler32
diff --git a/src/isa-l/igzip/aarch64/igzip_set_long_icf_fg.S b/src/isa-l/igzip/aarch64/igzip_set_long_icf_fg.S
new file mode 100644
index 000000000..13f9b087d
--- /dev/null
+++ b/src/isa-l/igzip/aarch64/igzip_set_long_icf_fg.S
@@ -0,0 +1,194 @@
+/**********************************************************************
+ Copyright(c) 2019 Arm Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Arm Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+ .arch armv8-a
+ .text
+ .align 2
+
+#include "lz0a_const_aarch64.h"
+#include "data_struct_aarch64.h"
+#include "huffman_aarch64.h"
+#include "bitbuf2_aarch64.h"
+#include "stdmac_aarch64.h"
+
+/*
+declare Macros
+*/
+
+.macro declare_generic_reg name:req,reg:req,default:req
+ \name .req \default\reg
+ w_\name .req w\reg
+ x_\name .req x\reg
+.endm
+
+ .text
+ .align 2
+ .global set_long_icf_fg_aarch64
+ .type set_long_icf_fg_aarch64, %function
+
+/*
+void set_long_icf_fg_aarch64(uint8_t * next_in, uint64_t processed, uint64_t input_size,
+ struct deflate_icf *match_lookup)
+*/
+
+ /* arguments */
+ declare_generic_reg next_in_param, 0,x
+ declare_generic_reg processed_param, 1,x
+ declare_generic_reg input_size_param, 2,x
+ declare_generic_reg match_lookup_param, 3,x
+
+ declare_generic_reg param0, 0,x
+ declare_generic_reg param1, 1,x
+ declare_generic_reg param2, 2,x
+
+ /* local variable */
+ declare_generic_reg len, 7,w
+ declare_generic_reg dist_code, 8,w
+ declare_generic_reg shortest_match_len 9,w
+ declare_generic_reg len_max, 10,w
+ declare_generic_reg dist_extra, 11,w
+ declare_generic_reg const_8, 13,x
+ declare_generic_reg next_in, 20,x
+ declare_generic_reg dist_start, 21,x
+ declare_generic_reg end_processed, 22,x
+ declare_generic_reg end_in, 23,x
+ declare_generic_reg match_lookup, 19,x
+
+ declare_generic_reg match_length, 4,w
+ declare_generic_reg tmp0, 5,w
+ declare_generic_reg tmp1, 6,w
+
+/* constant */
+.equ DIST_START_SIZE, 128
+.equ ISAL_LOOK_AHEAD, 288
+.equ LEN_OFFSET, 254
+.equ SHORTEST_MATCH, 4
+.equ LEN_MAX_CONST, 512
+
+set_long_icf_fg_aarch64:
+ stp x29, x30, [sp, -192]!
+ add x29, sp, 0
+ stp x21, x22, [sp, 32]
+ add x21, x29, 64
+ stp x19, x20, [sp, 16]
+ str x23, [sp, 48]
+
+ add end_processed, next_in_param, processed_param
+ mov next_in, next_in_param
+ add end_in, next_in_param, input_size_param
+ mov match_lookup, match_lookup_param
+
+ adrp x1, .data_dist_start
+ mov x2, DIST_START_SIZE // 128
+ add x1, x1, :lo12:.data_dist_start
+ mov x0, dist_start
+ bl memcpy
+
+ add x_tmp0, end_processed, ISAL_LOOK_AHEAD // 288
+ cmp end_in, x_tmp0
+ csel end_in, end_in, x_tmp0, cc
+ cmp next_in, end_processed
+ bcs .done
+
+ mov const_8, 8
+ mov len_max, LEN_MAX_CONST // 512
+ mov shortest_match_len, (LEN_OFFSET + SHORTEST_MATCH - 1)
+ b .while_outer_loop
+
+ .align 2
+.while_outer_check:
+ add next_in, next_in, 1
+ add match_lookup, match_lookup, 4
+ cmp end_processed, next_in
+ bls .done
+
+.while_outer_loop:
+ ldrh len, [match_lookup]
+ and len, len, LIT_LEN_MASK // 1023
+ cmp len, (LEN_OFFSET + 8 - 1) // 261
+ bls .while_outer_check
+
+ ldr dist_code, [match_lookup]
+ add x1, next_in, 8
+ ldrh dist_extra, [match_lookup, 2]
+ sub w2, w_end_in, w1
+ ubfx x_dist_code, x_dist_code, 10, 9
+ ubfx x_dist_extra, x_dist_extra, 3, 13
+ uxtw x0, dist_code
+ ldr w0, [dist_start, x0, lsl 2]
+ add w0, dist_extra, w0
+ sub x0, const_8, x0
+ add x0, next_in, x0
+
+ compare_aarch64 param0,param1,param2,match_length,tmp0,tmp1
+ mov w0, w_match_length
+
+ add w0, w0, (LEN_OFFSET + 8) // 262
+ cmp w0, len
+ bls .while_outer_check
+
+ lsl w2, dist_extra, 19
+ orr w2, w2, dist_code, lsl 10
+
+ .align 3
+.while_inner_loop:
+ cmp w0, LEN_MAX_CONST // 512
+ add next_in, next_in, 1
+ csel w1, w0, len_max, ls
+ sub w0, w0, #1
+ orr w1, w1, w2
+ str w1, [match_lookup]
+ ldrh w1, [match_lookup, 4]!
+
+ and w1, w1, LIT_LEN_MASK // 1023
+ cmp w1, (LEN_OFFSET + SHORTEST_MATCH - 1) // 257
+ csel w1, w1, shortest_match_len, cs
+ cmp w1, w0
+ bcc .while_inner_loop
+
+ add next_in, next_in, 1
+ add match_lookup, match_lookup, 4
+ cmp end_processed, next_in
+ bhi .while_outer_loop
+
+.done:
+ ldp x19, x20, [sp, 16]
+ ldp x21, x22, [sp, 32]
+ ldr x23, [sp, 48]
+ ldp x29, x30, [sp], 192
+ ret
+ .size set_long_icf_fg_aarch64, .-set_long_icf_fg_aarch64
+
+ .section .rodata
+ .align 3
+ .set .data_dist_start,. + 0
+.real_data_dist_start:
+ .word 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0007, 0x0009, 0x000d
+ .word 0x0011, 0x0019, 0x0021, 0x0031, 0x0041, 0x0061, 0x0081, 0x00c1
+ .word 0x0101, 0x0181, 0x0201, 0x0301, 0x0401, 0x0601, 0x0801, 0x0c01
+ .word 0x1001, 0x1801, 0x2001, 0x3001, 0x4001, 0x6001, 0x0000, 0x0000
diff --git a/src/isa-l/igzip/aarch64/isal_deflate_icf_body_hash_hist.S b/src/isa-l/igzip/aarch64/isal_deflate_icf_body_hash_hist.S
new file mode 100644
index 000000000..3daaa1ba3
--- /dev/null
+++ b/src/isa-l/igzip/aarch64/isal_deflate_icf_body_hash_hist.S
@@ -0,0 +1,364 @@
+/**********************************************************************
+ Copyright(c) 2019 Arm Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Arm Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+ .arch armv8-a+crc
+ .text
+ .align 2
+
+#include "lz0a_const_aarch64.h"
+#include "data_struct_aarch64.h"
+#include "huffman_aarch64.h"
+#include "bitbuf2_aarch64.h"
+#include "stdmac_aarch64.h"
+
+/*
+declare Macros
+*/
+.macro declare_generic_reg name:req,reg:req,default:req
+ \name .req \default\reg
+ w_\name .req w\reg
+ x_\name .req x\reg
+.endm
+
+ .global isal_deflate_icf_body_hash_hist_aarch64
+ .type isal_deflate_icf_body_hash_hist_aarch64, %function
+/*
+void isal_deflate_icf_body_hash_hist_base(struct isal_zstream *stream);
+*/
+
+/* constant */
+
+/* offset of struct isal_zstream */
+.equ offset_next_in, 0
+.equ offset_avail_in, 8
+.equ offset_total_in, 12
+.equ offset_next_out, 16
+.equ offset_avail_out, 24
+.equ offset_total_out, 28
+.equ offset_hufftables, 32
+.equ offset_level, 40
+.equ offset_level_buf_size, 44
+.equ offset_level_buf, 48
+.equ offset_end_of_stream, 56
+.equ offset_flush, 58
+.equ offset_gzip_flag, 60
+.equ offset_hist_bits, 62
+.equ offset_state, 64
+.equ offset_state_block_end, 72
+.equ offset_state_has_hist, 135
+
+/* offset of struct level_buf */
+.equ offset_encode_tables, 0
+.equ offset_hist, 2176
+.equ offset_hist_d_hist, 2176
+.equ offset_hist_ll_hist, 2296
+.equ offset_deflate_hdr_count, 4348
+.equ offset_deflate_hdr_extra_bits, 4352
+.equ offset_deflate_hdr, 4356
+.equ offset_icf_buf_next, 4688
+.equ offset_icf_buf_avail_out, 4696
+.equ offset_icf_buf_start, 4704
+.equ offset_hash8k, 4712
+.equ offset_hash_hist, 4712
+
+/* offset of struct isal_zstate */
+.equ offset_dist_mask, 12
+.equ offset_hash_mask, 16
+
+/* macros*/
+.equ ISAL_LOOK_AHEAD, 288
+
+ /* arguments */
+ declare_generic_reg stream, 0,x
+ declare_generic_reg stream_saved, 11,x
+
+ declare_generic_reg param0, 0,x
+ declare_generic_reg param1, 1,x
+ declare_generic_reg param2, 2,x
+
+ /* local varibale */
+ declare_generic_reg level_buf, 18,x
+ declare_generic_reg avail_in, 13,w
+ declare_generic_reg end_in, 13,x
+ declare_generic_reg start_in, 19,x
+ declare_generic_reg next_in, 9,x
+ declare_generic_reg next_in_iter, 14,x
+ declare_generic_reg state, 24,x
+ declare_generic_reg hist_size, 22,w
+ declare_generic_reg hash_mask, 21,w
+ declare_generic_reg start_out, 12,x
+ declare_generic_reg end_out, 12,x
+ declare_generic_reg next_out, 8,x
+ declare_generic_reg file_start, 20,x
+ declare_generic_reg last_seen, 15,x
+ declare_generic_reg total_in, 25,x
+ declare_generic_reg NULL_DIST_SYM, 23,w
+ declare_generic_reg match_length, 3,x
+ declare_generic_reg dist, 7,x
+ declare_generic_reg dist_inc, 26,w // dist - 1
+ declare_generic_reg literal, 10,x
+
+ declare_generic_reg tmp0, 4,x
+ declare_generic_reg tmp1, 5,x
+
+isal_deflate_icf_body_hash_hist_aarch64:
+ stp x29, x30, [sp, -80]!
+ add x29, sp, 0
+ str x24, [sp, 56]
+
+ ldr avail_in, [stream, offset_avail_in]
+ cbnz avail_in, .stream_available
+
+ ldr w1, [stream, offset_end_of_stream] // w1 keeps two values of end_of_stream and flush
+ cbz w1, .done
+
+ add state, stream, offset_state
+ b .state_flush_read_buffer
+
+ .align 2
+.stream_available:
+ stp x19, x20, [x29, 16]
+ stp x21, x22, [x29, 32]
+ str x23, [x29, 48]
+ stp x25, x26, [x29, 64]
+
+ ldr level_buf, [stream, offset_level_buf]
+ add state, stream, offset_state // 64
+ mov stream_saved, stream
+ ldr start_in, [stream, offset_next_in] // 0
+ ldr w_total_in, [stream, offset_total_in]
+
+ mov x0, offset_hash_hist
+ add last_seen, level_buf, x0
+
+ ldr x0, [level_buf, offset_icf_buf_avail_out] // 4696
+ ldr start_out, [level_buf, offset_icf_buf_next] // 4688
+
+ mov next_in, start_in
+ and x0, x0, -4
+ ldp hist_size, hash_mask, [state, offset_dist_mask] // 12
+ add end_in, start_in, avail_in, uxtw
+ mov next_out, start_out
+ add end_out, start_out, x0
+
+ add x0, next_in, ISAL_LOOK_AHEAD // 288
+ sub file_start, start_in, w_total_in, uxtw
+ mov NULL_DIST_SYM, 30
+ add next_in_iter, next_in, 1
+ cmp end_in, x0
+ bls .while_loop_end
+
+ .align 3
+.while_loop:
+ cmp next_out, end_out
+ bcs .state_create_hdr
+
+ ldr w_literal, [next_in]
+ mov w0, w_literal
+ crc32cw w0, wzr, w0
+
+ and w0, w0, hash_mask
+ sub x1, next_in, file_start
+ lsl x0, x0, 1
+
+ ldrh w_dist, [last_seen, x0]
+ strh w1, [last_seen, x0]
+ sub w1, w1, w_dist
+ and w_dist, w1, 65535
+
+ sub dist_inc, w_dist, #1
+ cmp dist_inc, hist_size
+ bcc .dist_vs_hist_size
+
+.while_latter_part:
+ and w_literal, w_literal, 255
+ mov next_in, next_in_iter
+ add next_out, next_out, 4
+ add x1, level_buf, w_literal, uxtb 2
+ ldr w0, [x1, 2296]
+ add w0, w0, 1
+ str w0, [x1, 2296]
+ ldrh w0, [next_out, -4]
+ bfi w0, w_literal, 0, 10
+ strh w0, [next_out, -4]
+ ldr w0, [next_out, -4]
+ bfi w0, NULL_DIST_SYM, 10, 9
+ str w0, [next_out, -4]
+ ubfx x0, x0, 16, 3
+ strh w0, [next_out, -2]
+
+.while_loop_check:
+ add x0, next_in, ISAL_LOOK_AHEAD // 288
+ add next_in_iter, next_in, 1
+ cmp end_in, x0
+ bhi .while_loop
+ b .while_loop_end
+
+ .align 2
+.dist_vs_hist_size:
+ mov x1, next_in
+ mov w2, 258
+ sub x0, next_in, w_dist, uxth
+ compare_258_bytes param0,param1,match_length,tmp0,tmp1
+
+ and w1, w_match_length, 65535 // 0xffff
+ cmp w1, 3
+ bls .while_latter_part
+
+ ldr w0, [next_in, 1]
+ mov x4, next_in
+ add next_in, next_in, w1, uxth
+ crc32cw w0, wzr, w0
+
+ and w0, hash_mask, w0
+ sub next_in_iter, next_in_iter, file_start
+ strh w_next_in_iter, [last_seen, x0, lsl 1]
+ ldr w0, [x4, 2]!
+ crc32cw w0, wzr, w0
+
+ and w0, hash_mask, w0
+ and w_match_length, w_match_length, 65535 // 0xffff
+ sub x4, x4, file_start
+
+ // get_len_icf_code
+ add w_match_length, w_match_length, 254
+ // get_dist_icf_code, first part
+ mov w1, 0 // w1 => dist_extra
+ strh w4, [last_seen, x0, lsl 1]
+ cmp w_dist, 2
+ ubfiz x0, match_length, 2, 17
+ add x0, level_buf, x0
+ bhi .compute_dist_icf_code
+
+.match_length_end:
+ // handle level_buf->hist
+ ldr w2, [x0, offset_hist_ll_hist] // 2296, ll_hist
+ add x4, level_buf, dist_inc, uxtw 2 // d_hist
+ add next_out, next_out, 4
+ add w2, w2, 1 // ll_hist
+ str w2, [x0, offset_hist_ll_hist] // 2296, ll_hist
+ ldr w0, [x4, offset_hist_d_hist] // 2176, d_hist
+ add w0, w0, 1 // d_hist
+ str w0, [x4, offset_hist_d_hist] // 2176, d_hist
+
+ // write_deflate_icf
+ ldrh w0, [next_out, -4]
+ bfi w0, w3, 0, 10
+ strh w0, [next_out, -4]
+ ldr w0, [next_out, -4]
+ bfi w0, dist_inc, 10, 9
+ str w0, [next_out, -4]
+ lsr w0, w0, 16
+ bfi w0, w1, 3, 13 // w1 => dist_extra
+ strh w0, [next_out, -2]
+ b .while_loop_check
+
+ .align 2
+// get_dist_icf_code, 2nd part
+.compute_dist_icf_code:
+ clz w1, dist_inc
+ mov w2, 30
+ sub w2, w2, w1
+ mov w1, 1
+ lsl w1, w1, w2
+ sub w1, w1, #1
+ and w1, w1, dist_inc
+ lsr dist_inc, dist_inc, w2
+ add dist_inc, dist_inc, w2, lsl 1
+ and w1, w1, 8191
+ b .match_length_end
+
+.while_loop_end:
+ sub x19, next_in, x19
+ cmp x19, 0
+ ble .skip_igzip_hist2
+
+ mov w0, 1
+ strb w0, [stream_saved, offset_state_has_hist] // 135
+
+.skip_igzip_hist2:
+ add w19, w_total_in, w19
+ ldr w0, [stream_saved, offset_end_of_stream] // 56
+ sub x12, end_out, next_out
+ asr x12, x12, 2 // x12 => end_out - next_out
+ str next_in, [stream_saved]
+ str w19, [stream_saved, offset_total_in] // 12
+ sub next_in, end_in, next_in
+ str w19, [stream_saved, offset_state_block_end] // 72
+
+ ldp x25, x26, [x29, 64]
+ ldr x23, [x29, 48]
+ ldp x21, x22, [x29, 32]
+ ldp x19, x20, [x29, 16]
+
+ str w9, [stream_saved, offset_avail_in] // 8
+ str next_out, [level_buf, offset_icf_buf_next] // 4688
+ str x12, [level_buf, offset_icf_buf_avail_out] // 4696, x12 => end_out - next_out
+ cbnz w0, .state_flush_read_buffer
+ b .done
+
+ .align 2
+.state_create_hdr:
+ mov w0, 2
+ str w0, [x24, 20]
+ sub start_in, next_in, start_in
+ cmp start_in, 0
+ ble .skip_igzip_hist
+
+ mov w0, 1
+ strb w0, [stream_saved, offset_state_has_hist] // 135
+
+.skip_igzip_hist:
+ add w_total_in, w_total_in, w19
+ sub x12, end_out, next_out
+ asr x12, x12, 2 // x12 => end_out - next_out
+ str next_in, [stream_saved]
+ sub next_in, end_in, next_in
+ str w_total_in, [stream_saved, offset_total_in] // 12
+ str w_total_in, [stream_saved, offset_state_block_end] // 72
+
+ ldp x25, x26, [x29, 64]
+ ldr x23, [x29, 48]
+ ldp x21, x22, [x29, 32]
+ ldp x19, x20, [x29, 16]
+
+ str w9, [stream_saved, offset_avail_in] // 8
+ str next_out, [level_buf, offset_icf_buf_next] // 4688
+ str x12, [level_buf, offset_icf_buf_avail_out] // 4696, x12 => end_out - next_out
+ b .done
+
+.state_flush_read_buffer:
+ mov w0, 4
+ str w0, [x24, 20]
+
+.done:
+ ldr x24, [sp, 56]
+ ldp x29, x30, [sp], 80
+ ret
+
+ .size isal_deflate_icf_body_hash_hist_aarch64, .-isal_deflate_icf_body_hash_hist_aarch64
diff --git a/src/isa-l/igzip/aarch64/isal_deflate_icf_finish_hash_hist.S b/src/isa-l/igzip/aarch64/isal_deflate_icf_finish_hash_hist.S
new file mode 100644
index 000000000..bb2baa22f
--- /dev/null
+++ b/src/isa-l/igzip/aarch64/isal_deflate_icf_finish_hash_hist.S
@@ -0,0 +1,397 @@
+/**********************************************************************
+ Copyright(c) 2019 Arm Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Arm Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+ .arch armv8-a+crc
+ .text
+
+#include "lz0a_const_aarch64.h"
+#include "data_struct_aarch64.h"
+#include "huffman_aarch64.h"
+#include "bitbuf2_aarch64.h"
+#include "stdmac_aarch64.h"
+
+/*
+declare Macros
+*/
+.macro declare_generic_reg name:req,reg:req,default:req
+ \name .req \default\reg
+ w_\name .req w\reg
+ x_\name .req x\reg
+.endm
+
+/*
+void isal_deflate_icf_finish_hash_hist_aarch64(struct isal_zstream *stream);
+*/
+
+/* constant */
+
+/* offset of struct isal_zstream */
+.equ offset_next_in, 0
+.equ offset_avail_in, 8
+.equ offset_total_in, 12
+.equ offset_next_out, 16
+.equ offset_avail_out, 24
+.equ offset_total_out, 28
+.equ offset_hufftables, 32
+.equ offset_level, 40
+.equ offset_level_buf_size, 44
+.equ offset_level_buf, 48
+.equ offset_end_of_stream, 56
+.equ offset_flush, 58
+.equ offset_gzip_flag, 60
+.equ offset_hist_bits, 62
+.equ offset_state, 64
+.equ offset_state_block_end, 72
+.equ offset_state_state, 84
+.equ offset_state_has_hist, 135
+
+/* offset of struct level_buf */
+.equ offset_encode_tables, 0
+.equ offset_hist, 2176
+.equ offset_hist_d_hist, 2176
+.equ offset_hist_ll_hist, 2296
+.equ offset_deflate_hdr_count, 4348
+.equ offset_deflate_hdr_extra_bits, 4352
+.equ offset_deflate_hdr, 4356
+.equ offset_icf_buf_next, 4688
+.equ offset_icf_buf_avail_out, 4696
+.equ offset_icf_buf_start, 4704
+.equ offset_hash8k, 4712
+.equ offset_hash_hist, 4712
+
+/* offset of struct isal_zstate */
+.equ offset_dist_mask, 12
+.equ offset_hash_mask, 16
+.equ offset_state_of_zstate, 20
+
+/* macros*/
+.equ ISAL_LOOK_AHEAD, 288
+
+ /* arguments */
+ declare_generic_reg stream, 0,x
+
+ declare_generic_reg param0, 0,x
+ declare_generic_reg param1, 1,x
+ declare_generic_reg param2, 2,x
+ declare_generic_reg param3, 3,x
+ declare_generic_reg param4, 4,x
+ declare_generic_reg param5, 5,x
+ declare_generic_reg param6, 6,x
+
+ /* local variable */
+ declare_generic_reg stream_saved, 15,x
+ declare_generic_reg level_buf, 13,x
+ declare_generic_reg start_in, 21,x
+ declare_generic_reg start_out, 22,x
+ declare_generic_reg state, 23,x
+ declare_generic_reg end_out, 12,x
+ declare_generic_reg end_in, 11,x
+ declare_generic_reg next_in, 8,x
+ declare_generic_reg next_out, 10,x
+ declare_generic_reg next_out_iter, 5,x
+ declare_generic_reg file_start, 18,x
+ declare_generic_reg last_seen, 14,x
+
+ declare_generic_reg literal_code, 9,w
+ declare_generic_reg hash_mask, 19,w
+ declare_generic_reg hist_size, 20,w
+ declare_generic_reg dist, 7,w
+ declare_generic_reg dist_inc, 24,w
+
+ declare_generic_reg tmp0, 25,x
+ declare_generic_reg tmp1, 26,x
+ declare_generic_reg tmp2, 27,x
+ declare_generic_reg tmp3, 28,x
+
+ .align 2
+ .type write_deflate_icf_constprop, %function
+write_deflate_icf_constprop:
+ ldrh w2, [x0]
+ mov w3, 30
+ bfi w2, w1, 0, 10
+ strh w2, [x0]
+ ldr w1, [x0]
+ bfi w1, w3, 10, 9
+ str w1, [x0]
+ ubfx x1, x1, 16, 3
+ strh w1, [x0, 2]
+ ret
+ .size write_deflate_icf_constprop, .-write_deflate_icf_constprop
+
+ .align 2
+ .type write_deflate_icf, %function
+write_deflate_icf:
+ ldrh w4, [x0]
+ bfi w4, w1, 0, 10
+ strh w4, [x0]
+ ldr w1, [x0]
+ bfi w1, w2, 10, 9
+ str w1, [x0]
+ lsr w1, w1, 16
+ bfi w1, w3, 3, 13
+ strh w1, [x0, 2]
+ ret
+ .size write_deflate_icf, .-write_deflate_icf
+
+ .align 2
+ .type update_state, %function
+update_state:
+ sub x7, x2, x1
+ ldr x4, [x0, 48]
+ cmp x7, 0
+ ble .L48
+ mov w1, 1
+ strb w1, [x0, 135]
+.L48:
+ ldr w1, [x0, 12]
+ sub x6, x6, x5
+ str x2, [x0]
+ sub x3, x3, x2
+ add w1, w1, w7
+ stp w3, w1, [x0, 8]
+ str w1, [x0, 72]
+ asr x6, x6, 2
+ str x5, [x4, 4688]
+ str x6, [x4, 4696]
+ ret
+ .size update_state, .-update_state
+
+ .align 2
+ .global isal_deflate_icf_finish_hash_hist_aarch64
+ .type isal_deflate_icf_finish_hash_hist_aarch64, %function
+isal_deflate_icf_finish_hash_hist_aarch64:
+ ldr w_end_in, [stream, 8] // stream->avail_in
+ cbz w_end_in, .stream_not_available
+
+ stp x29, x30, [sp, -96]!
+ add x29, sp, 0
+ stp x19, x20, [sp, 16]
+ stp x21, x22, [sp, 32]
+ stp x23, x24, [sp, 48]
+ stp x25, x26, [sp, 64]
+ stp x27, x28, [sp, 80]
+
+ mov stream_saved, stream
+ ldr level_buf, [stream, offset_level_buf] // 48
+ ldr start_in, [stream, offset_next_in] // 0
+ ldr start_out, [level_buf, offset_icf_buf_next] // 4688
+ add state, stream, offset_state // 64
+ ldr end_out, [level_buf, offset_icf_buf_avail_out] // 4696
+ mov next_in, start_in
+ ldr w_file_start, [stream, offset_total_in] // 12
+ mov tmp0, offset_hash_hist // 4712
+ add last_seen, level_buf, tmp0
+ add end_in, start_in, w_end_in, uxtw
+ and end_out, end_out, -4
+ mov next_out, start_out
+ ldp hist_size, hash_mask, [state, offset_dist_mask] // 12
+ sub file_start, start_in, file_start
+ add end_out, start_out, end_out
+ mov next_out_iter, next_out
+
+ add x0, next_in, 3
+ cmp end_in, x0 // x0 <= next_in + 3
+ bls .while_first_end
+
+ .p2align 3
+.while_first:
+ cmp next_out, end_out
+ bcs .save_and_update_state
+ ldr literal_code, [next_in]
+ mov w0, literal_code
+ crc32cw w0, wzr, w0
+ and w0, w0, hash_mask
+ sub x2, next_in, file_start
+ lsl x0, x0, 1
+ ldrh dist, [last_seen, x0]
+ strh w2, [last_seen, x0]
+ sub w2, w2, dist
+ and w_dist, w2, 65535
+ sub dist_inc, dist, #1
+ cmp dist_inc, hist_size
+ bcs .skip_compare258
+
+ mov x2, 0
+ sub w2, w_end_in, w8
+ mov x1, next_in
+ sub x0, next_in, w_dist, uxth
+
+ compare_max_258_bytes param0,param1,param2,tmp2,tmp0,tmp1
+ mov w0, w_tmp2
+ and w2, w0, 65535
+
+ cmp w2, 3
+ bhi .while_first_match_length
+
+.skip_compare258:
+ and literal_code, literal_code, 255 // get_lit_icf_code
+ add next_in, next_in, 1
+ mov w1, literal_code
+ mov x0, next_out
+ add x_literal_code, level_buf, literal_code, uxtb 2 // level_buf->hist.ll_hist
+
+ ldr w_tmp0, [x_literal_code, offset_hist_ll_hist] // 2296
+ add w_tmp0, w_tmp0, 1
+ str w_tmp0, [x_literal_code, offset_hist_ll_hist] // 2296
+
+ bl write_deflate_icf_constprop // write_deflate_icf
+
+ add next_out, next_out, 4
+.while_first_check:
+ add x0, next_in, 3
+ mov next_out_iter, next_out
+ cmp end_in, x0
+ bhi .while_first
+
+.while_first_end:
+ cmp next_in, end_in
+ bcs .while_2nd_end
+
+ cmp next_out, end_out
+ bcc .while_2nd_handle
+ b .save_and_update_state_2nd
+
+ .p2align 2
+.while_2nd:
+ cmp end_out, next_out_iter
+ bls .save_and_update_state_2nd
+
+.while_2nd_handle:
+ ldrb w2, [next_in], 1
+ mov x0, next_out_iter
+ add next_out_iter, next_out_iter, 4
+ mov w1, w2
+ add x2, level_buf, w2, uxtb 2
+
+ ldr w_tmp0, [x2, offset_hist_ll_hist] // 2296
+ add w_tmp0, w_tmp0, 1
+ str w_tmp0, [x2, offset_hist_ll_hist] // 2296
+
+ bl write_deflate_icf_constprop
+ cmp end_in, next_in
+ bne .while_2nd
+
+ mov next_in, end_in
+ b .end_of_stream_check_and_exit
+
+ .p2align 2
+.while_first_match_length:
+ and w0, w0, 65535
+ mov w3, 0
+ add w1, w0, 254 // get_len_icf_code
+ cmp dist, 2
+ bhi .compute_dist_icf_code
+
+.while_first_match_length_end:
+ ubfiz x_tmp2, x1, 2, 17
+ add x_tmp1, level_buf, dist_inc, uxtw 2
+ add x_tmp2, level_buf, x_tmp2
+
+ add next_in, next_in, w2, uxth
+ mov w2, dist_inc
+
+ ldr w_tmp0, [x_tmp2, offset_hist_ll_hist] // 2296
+ add w_tmp0, w_tmp0, 1
+ str w_tmp0, [x_tmp2, offset_hist_ll_hist] // 2296
+
+ mov x0, next_out
+ ldr w_tmp0, [x_tmp1, offset_hist_d_hist] // 2176
+ add w_tmp0, w_tmp0, 1
+ str w_tmp0, [x_tmp1, offset_hist_d_hist] // 2176
+
+ bl write_deflate_icf
+ add next_out, next_out, 4
+ b .while_first_check
+
+// compute_dist_icf_code
+ .p2align 2
+.compute_dist_icf_code:
+ clz w3, dist_inc
+ mov w0, 30
+ sub w0, w0, w3
+
+ mov w3, 1
+ lsl w3, w3, w0
+ sub w3, w3, #1
+ and w3, w3, dist_inc
+ lsl w4, w0, 1
+ lsr dist_inc, dist_inc, w0
+ add dist_inc, dist_inc, w4
+ b .while_first_match_length_end
+
+.while_2nd_end:
+ beq .end_of_stream_check_and_exit
+ mov param6, end_out
+ b .update_state
+
+.end_of_stream_check_and_exit:
+ ldr w_tmp0, [stream_saved, offset_end_of_stream] // 56
+ cbz w_tmp0, .update_state_2nd
+ b .save_and_update_state_2nd
+
+ .p2align 3
+.save_and_update_state_2nd:
+ mov w_tmp0, 2
+ str w_tmp0, [state, offset_state_of_zstate] // 20
+.update_state_2nd:
+ mov param6, end_out
+ b .update_state
+
+ .p2align 2
+.save_and_update_state:
+ mov param6, end_out
+ mov param5, next_out
+ mov w_tmp0, 2
+ str w_tmp0, [state, offset_state_of_zstate] // 20
+.update_state:
+ mov param4, start_out
+ mov param1, start_in
+ mov param3, end_in
+ mov param2, next_in
+ mov param0, stream_saved
+
+ ldp x19, x20, [sp, 16]
+ ldp x21, x22, [sp, 32]
+ ldp x23, x24, [sp, 48]
+ ldp x25, x26, [sp, 64]
+ ldp x27, x28, [sp, 80]
+ ldp x29, x30, [sp], 96
+
+ b update_state
+
+ .p2align 2
+.stream_not_available:
+ ldr w1, [stream, offset_end_of_stream] // 56
+ cbz w1, .done
+
+ mov w1, 2
+ str w1, [stream, offset_state_state] // 84
+.done:
+ ret
+
+ .size isal_deflate_icf_finish_hash_hist_aarch64, .-isal_deflate_icf_finish_hash_hist_aarch64
diff --git a/src/isa-l/igzip/aarch64/isal_update_histogram.S b/src/isa-l/igzip/aarch64/isal_update_histogram.S
new file mode 100644
index 000000000..abcec0f14
--- /dev/null
+++ b/src/isa-l/igzip/aarch64/isal_update_histogram.S
@@ -0,0 +1,311 @@
+/**********************************************************************
+ Copyright(c) 2019 Arm Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Arm Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+ .arch armv8-a+crc
+ .text
+ .align 2
+
+#include "lz0a_const_aarch64.h"
+#include "data_struct_aarch64.h"
+#include "huffman_aarch64.h"
+#include "bitbuf2_aarch64.h"
+#include "stdmac_aarch64.h"
+
+/*
+declare Macros
+*/
+
+.macro declare_generic_reg name:req,reg:req,default:req
+ \name .req \default\reg
+ w_\name .req w\reg
+ x_\name .req x\reg
+.endm
+
+.macro convert_dist_to_dist_sym dist:req,tmp0:req,tmp1:req
+ mov w_\tmp0, w_\dist
+ mov w_\dist, -1
+ cmp w_\tmp0, 32768
+ bhi .dist2code_done
+ sub w_\dist, w_\tmp0, #1
+ cmp w_\tmp0, 4
+ bls .dist2code_done
+ clz w_\tmp1, w_\dist
+ mov w_\tmp0, 30
+ sub w_\tmp0, w_\tmp0, w_\tmp1
+ lsr w_\dist, w_\dist, w_\tmp0
+ add w_\dist, w_\dist, w_\tmp0, lsl 1
+.dist2code_done:
+.endm
+
+.macro convert_length_to_len_sym length:req,length_out:req,tmp0:req
+ adrp x_\tmp0, .len_to_code_tab_lanchor
+ add x_\tmp0, x_\tmp0, :lo12:.len_to_code_tab_lanchor
+ ldr w_\length_out, [x_\tmp0, w_\length, uxtw 2]
+ add w_\length_out, w_\length_out, 256
+.endm
+
+ .section .rodata
+ .align 4
+.len_to_code_tab_lanchor = . + 0
+ .type len_to_code_tab, %object
+ .size len_to_code_tab, 1056
+len_to_code_tab:
+ .word 0x00, 0x00, 0x00
+ .word 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08
+ .word 0x09, 0x09, 0x0a, 0x0a, 0x0b, 0x0b, 0x0c, 0x0c
+ .word 0x0d, 0x0d, 0x0d, 0x0d, 0x0e, 0x0e, 0x0e, 0x0e
+ .word 0x0f, 0x0f, 0x0f, 0x0f, 0x10, 0x10, 0x10, 0x10
+ .word 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11
+ .word 0x12, 0x12, 0x12, 0x12, 0x12, 0x12, 0x12, 0x12
+ .word 0x13, 0x13, 0x13, 0x13, 0x13, 0x13, 0x13, 0x13
+ .word 0x14, 0x14, 0x14, 0x14, 0x14, 0x14, 0x14, 0x14
+ .word 0x15, 0x15, 0x15, 0x15, 0x15, 0x15, 0x15, 0x15
+ .word 0x15, 0x15, 0x15, 0x15, 0x15, 0x15, 0x15, 0x15
+ .word 0x16, 0x16, 0x16, 0x16, 0x16, 0x16, 0x16, 0x16
+ .word 0x16, 0x16, 0x16, 0x16, 0x16, 0x16, 0x16, 0x16
+ .word 0x17, 0x17, 0x17, 0x17, 0x17, 0x17, 0x17, 0x17
+ .word 0x17, 0x17, 0x17, 0x17, 0x17, 0x17, 0x17, 0x17
+ .word 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18
+ .word 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18
+ .word 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19
+ .word 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19
+ .word 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19
+ .word 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19
+ .word 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a
+ .word 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a
+ .word 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a
+ .word 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a
+ .word 0x1b, 0x1b, 0x1b, 0x1b, 0x1b, 0x1b, 0x1b, 0x1b
+ .word 0x1b, 0x1b, 0x1b, 0x1b, 0x1b, 0x1b, 0x1b, 0x1b
+ .word 0x1b, 0x1b, 0x1b, 0x1b, 0x1b, 0x1b, 0x1b, 0x1b
+ .word 0x1b, 0x1b, 0x1b, 0x1b, 0x1b, 0x1b, 0x1b, 0x1b
+ .word 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c
+ .word 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c
+ .word 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c
+ .word 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1d
+ .word 0x00, 0x00, 0x00, 0x00, 0x00
+
+ .text
+ .global isal_update_histogram_aarch64
+ .arch armv8-a+crc
+ .type isal_update_histogram_aarch64, %function
+
+/*
+void isal_update_histogram_aarch64(uint8_t * start_stream, int length,
+ struct isal_huff_histogram *histogram);
+*/
+
+ /* arguments */
+ declare_generic_reg start_stream, 0,x
+ declare_generic_reg length, 1,x
+ declare_generic_reg histogram, 2,x
+
+ declare_generic_reg param0, 0,x
+ declare_generic_reg param1, 1,x
+ declare_generic_reg param2, 2,x
+
+ /* local variable */
+ declare_generic_reg start_stream_saved, 10,x
+ declare_generic_reg histogram_saved, 23,x
+ declare_generic_reg current, 19,x
+ declare_generic_reg last_seen, 20,x
+ declare_generic_reg end_stream, 21,x
+ declare_generic_reg loop_end_iter, 22,x
+ declare_generic_reg dist_histogram, 12,x
+ declare_generic_reg lit_len_histogram, 23,x
+ declare_generic_reg literal, 8,x
+ declare_generic_reg next_hash, 9,x
+ declare_generic_reg end, 4,x
+ declare_generic_reg dist, 7,x
+ declare_generic_reg D, 11,w
+ declare_generic_reg match_length, 3,w
+
+ declare_generic_reg tmp0, 5,w
+ declare_generic_reg tmp1, 6,w
+
+/* constant */
+.equ LIT_LEN, 286
+.equ DIST_LEN, 30
+
+.equ lit_len_offset, 0
+.equ dist_offset, (8*LIT_LEN) // 2288
+.equ hash_offset, (dist_offset + 8*DIST_LEN) // 2528
+.equ hash_table_size, (8*1024*2) // 16384
+
+isal_update_histogram_aarch64:
+ cmp w_length, 0
+ ble .done
+
+ stp x29, x30, [sp, -64]!
+ add x29, sp, 0
+ stp x19, x20, [sp, 16]
+ stp x21, x22, [sp, 32]
+ str x23, [sp, 48]
+
+ add last_seen, histogram, hash_offset
+ add end_stream, start_stream, w_length, sxtw
+ mov current, start_stream
+ sub loop_end_iter, end_stream, #3
+ mov histogram_saved, histogram
+
+ mov x0, last_seen
+ mov w1, 0
+ mov x2, hash_table_size
+ bl memset
+
+ cmp current, loop_end_iter
+ bcs .loop_end
+
+ mov start_stream_saved, current
+ add dist_histogram, histogram_saved, dist_offset
+ mov D, 32766
+ b .loop
+
+ .align 2
+.loop_2nd_stream:
+ and literal, literal, 0xff
+ mov current, next_hash
+ cmp loop_end_iter, current
+
+ ldr x0, [lit_len_histogram, literal, lsl 3]
+ add x0, x0, 1
+ str x0, [lit_len_histogram, literal, lsl 3]
+ bls .loop_end
+
+.loop:
+ ldr w_literal, [current]
+ add next_hash, current, 1
+
+ mov w0, w_literal
+ crc32cw w0, wzr, w0
+
+ ubfiz x0, x0, 1, 13
+ sub x2, current, start_stream_saved
+ ldrh w_dist, [last_seen, x0]
+ strh w2, [last_seen, x0]
+ sub w2, w2, w_dist
+ and w_dist, w2, 65535
+
+ sub w0, w_dist, #1
+ cmp w0, D
+ bhi .loop_2nd_stream
+
+ sub w2, w_end_stream, w_current
+ mov x1, current
+ sub x0, current, w_dist, uxth
+ compare_max_258_bytes param0,param1,param2,match_length,tmp0,tmp1
+
+ cmp match_length, 3
+ bls .loop_2nd_stream
+
+ add end, current, 3
+ cmp end, loop_end_iter
+ csel end, end, loop_end_iter, ls
+ cmp end, next_hash
+ bls .skip_inner_loop
+
+ .align 3
+.inner_loop:
+ ldr w0, [next_hash]
+ crc32cw w0, wzr, w0
+
+ ubfiz x0, x0, 1, 13
+ sub x1, next_hash, start_stream_saved
+ add next_hash, next_hash, 1
+ cmp next_hash, end
+ strh w1, [last_seen, x0]
+ bne .inner_loop
+
+.skip_inner_loop:
+ convert_dist_to_dist_sym dist, tmp0, tmp1
+ uxtw x2, w_dist
+ ldr x1, [dist_histogram, x2, lsl 3]
+ add x1, x1, 1
+ str x1, [dist_histogram, x2, lsl 3]
+
+ convert_length_to_len_sym match_length,tmp1,tmp0
+ uxtw x0, w_tmp1
+ ldr x1, [lit_len_histogram, x0, lsl 3]
+ add x1, x1, 1
+ str x1, [lit_len_histogram, x0, lsl 3]
+
+ sub match_length, match_length, #1
+ add x3, x3, 1
+ add current, current, x3
+ cmp loop_end_iter, current
+ bhi .loop
+
+ .align 3
+// fold the last for loop
+.loop_end:
+ cmp end_stream, current
+ bls .loop_fold_end
+
+ mov x0, current
+ ldrb w1, [x0], 1
+ cmp end_stream, x0
+ ldr x0, [lit_len_histogram, x1, lsl 3]
+ add x0, x0, 1
+ str x0, [lit_len_histogram, x1, lsl 3]
+ bls .loop_fold_end
+
+ ldrb w1, [current, 1]
+ add x0, current, 2
+ cmp end_stream, x0
+ ldr x0, [lit_len_histogram, x1, lsl 3]
+ add x0, x0, 1
+ str x0, [lit_len_histogram, x1, lsl 3]
+ bls .loop_fold_end
+
+ ldrb w1, [current, 2]
+ add x0, current, 3
+ cmp end_stream, x0
+ ldr x0, [lit_len_histogram, x1, lsl 3]
+ add x0, x0, 1
+ str x0, [lit_len_histogram, x1, lsl 3]
+ bls .loop_fold_end
+
+ ldrb w1, [current, 3]
+ ldr x0, [lit_len_histogram, x1, lsl 3]
+ add x0, x0, 1
+ str x0, [lit_len_histogram, x1, lsl 3]
+
+.loop_fold_end:
+ ldr x0, [lit_len_histogram, (256*8)]
+ add x0, x0, 1
+ str x0, [lit_len_histogram, (256*8)]
+
+ ldr x23, [sp, 48]
+ ldp x19, x20, [sp, 16]
+ ldp x21, x22, [sp, 32]
+ ldp x29, x30, [sp], 64
+ ret
+ .align 2
+.done:
+ ret
+ .size isal_update_histogram_aarch64, .-isal_update_histogram_aarch64
diff --git a/src/isa-l/igzip/aarch64/lz0a_const_aarch64.h b/src/isa-l/igzip/aarch64/lz0a_const_aarch64.h
new file mode 100644
index 000000000..d55ec09dc
--- /dev/null
+++ b/src/isa-l/igzip/aarch64/lz0a_const_aarch64.h
@@ -0,0 +1,72 @@
+/**********************************************************************
+ Copyright(c) 2019 Arm Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Arm Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+#ifndef __LZ0A_CONST_AARCH64_H__
+#define __LZ0A_CONST_AARCH64_H__
+#include "options_aarch64.h"
+
+#ifdef __ASSEMBLY__
+.set K , 1024
+.set D , IGZIP_HIST_SIZE // Amount of history
+.set LA , 18 * 16 // Max look-ahead, rounded up to 32 byte boundary
+.set BSIZE , 2*IGZIP_HIST_SIZE + LA // Nominal buffer size
+
+/// Constants for stateless compression
+#define LAST_BYTES_COUNT 3 // Bytes to prevent reading out of array bounds
+#define LA_STATELESS 258 // No round up since no data is copied to a buffer
+
+.set IGZIP_LVL0_HASH_SIZE , (8 * K)
+.set IGZIP_HASH8K_HASH_SIZE , (8 * K)
+.set IGZIP_HASH_HIST_HASH_SIZE , IGZIP_HIST_SIZE
+.set IGZIP_HASH_MAP_HASH_SIZE , IGZIP_HIST_SIZE
+
+#define LVL0_HASH_MASK (IGZIP_LVL0_HASH_SIZE - 1)
+#define HASH8K_HASH_MASK (IGZIP_HASH8K_HASH_SIZE - 1)
+#define HASH_HIST_HASH_MASK (IGZIP_HASH_HIST_HASH_SIZE - 1)
+#define HASH_MAP_HASH_MASK (IGZIP_HASH_MAP_HASH_SIZE - 1)
+
+.set MIN_DEF_MATCH , 3 // Minimum length of a match in deflate
+.set SHORTEST_MATCH , 4
+
+.set SLOP , 8
+
+#define ICF_CODE_BYTES 4
+#define LIT_LEN_BIT_COUNT 10
+#define DIST_LIT_BIT_COUNT 9
+
+#define LIT_LEN_MASK ((1 << LIT_LEN_BIT_COUNT) - 1)
+#define LIT_DIST_MASK ((1 << DIST_LIT_BIT_COUNT) - 1)
+
+#define DIST_OFFSET LIT_LEN_BIT_COUNT
+#define EXTRA_BITS_OFFSET (DIST_OFFSET + DIST_LIT_BIT_COUNT)
+#define LIT (0x1E << DIST_OFFSET)
+
+
+#endif
+#endif
diff --git a/src/isa-l/igzip/aarch64/options_aarch64.h b/src/isa-l/igzip/aarch64/options_aarch64.h
new file mode 100644
index 000000000..32db918f3
--- /dev/null
+++ b/src/isa-l/igzip/aarch64/options_aarch64.h
@@ -0,0 +1,71 @@
+/**********************************************************************
+ Copyright(c) 2019 Arm Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Arm Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+#ifndef __OPTIONS_AARCH64_H__
+#define __OPTIONS_AARCH64_H__
+
+
+#ifdef __ASSEMBLY__
+
+/// Options:dir
+/// m - reschedule mem reads
+/// e b - bitbuff style
+/// t s x - compare style
+/// h - limit hash updates
+/// l - use longer huffman table
+/// f - fix cache read
+
+#ifndef IGZIP_HIST_SIZE
+#define IGZIP_HIST_SIZE (32 * 1024)
+#endif
+
+#if (IGZIP_HIST_SIZE > (32 * 1024))
+#undef IGZIP_HIST_SIZE
+#define IGZIP_HIST_SIZE (32 * 1024)
+#endif
+
+#ifdef LONGER_HUFFTABLE
+#if (IGZIP_HIST_SIZE > 8 * 1024)
+#undef IGZIP_HIST_SIZE
+#define IGZIP_HIST_SIZE (8 * 1024)
+#endif
+#endif
+
+/// (h) limit hash update
+#define LIMIT_HASH_UPDATE
+
+/// (f) fix cache read problem
+#define FIX_CACHE_READ
+
+#define ISAL_DEF_MAX_HDR_SIZE 328
+
+
+
+#endif
+#endif
diff --git a/src/isa-l/igzip/aarch64/stdmac_aarch64.h b/src/isa-l/igzip/aarch64/stdmac_aarch64.h
new file mode 100644
index 000000000..39afbc640
--- /dev/null
+++ b/src/isa-l/igzip/aarch64/stdmac_aarch64.h
@@ -0,0 +1,57 @@
+/**********************************************************************
+ Copyright(c) 2019 Arm Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Arm Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+#ifndef __STDMAC_AARCH64_H__
+#define __STDMAC_AARCH64_H__
+
+#ifdef __ASSEMBLY__
+
+#define DEBUG_STACK 144
+
+.macro push_stack
+ stp x29, x30,[sp,0-DEBUG_STACK]!
+ mov x29, sp
+ stp x19, x20, [sp, 16]
+ stp x21, x22, [sp, 32]
+ stp x23, x24, [sp, 48]
+ stp x25, x26, [sp, 64]
+ stp x27, x28, [sp, 80]
+.endm
+.macro pop_stack
+ ldp x19, x20, [sp, 16]
+ ldp x21, x22, [sp, 32]
+ ldp x23, x24, [sp, 48]
+ ldp x25, x26, [sp, 64]
+ ldp x27, x28, [sp, 80]
+
+ ldp x29, x30, [sp], DEBUG_STACK
+.endm
+
+#endif
+#endif
diff --git a/src/isa-l/igzip/adler32_avx2_4.asm b/src/isa-l/igzip/adler32_avx2_4.asm
new file mode 100644
index 000000000..798310fd0
--- /dev/null
+++ b/src/isa-l/igzip/adler32_avx2_4.asm
@@ -0,0 +1,295 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright(c) 2011-2017 Intel Corporation All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in
+; the documentation and/or other materials provided with the
+; distribution.
+; * Neither the name of Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived
+; from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+; uint32_t adler32_avx2(uint32_t init, const unsigned char *buf, uint64_t len)
+
+%define LIMIT 5552
+%define BASE 0xFFF1 ; 65521
+
+%define CHUNKSIZE 16
+%define CHUNKSIZE_M1 (CHUNKSIZE-1)
+
+%include "reg_sizes.asm"
+
+default rel
+[bits 64]
+
+; need to keep free: eax, ecx, edx
+
+%ifidn __OUTPUT_FORMAT__, elf64
+ %define arg1 rdi
+ %define arg2 rsi
+ %define arg3 rdx
+
+ %define init_d edi
+ %define data r9
+ %define size r10
+ %define s r11
+ %define a_d r12d
+ %define b_d r8d
+ %define end r13
+
+ %define func(x) x: endbranch
+ %macro FUNC_SAVE 0
+ push r12
+ push r13
+ %endmacro
+ %macro FUNC_RESTORE 0
+ pop r13
+ pop r12
+ %endmacro
+%endif
+
+%ifidn __OUTPUT_FORMAT__, win64
+ %define arg1 rcx
+ %define arg2 rdx
+ %define arg3 r8
+
+ %define init_d r12d
+ %define data r9
+ %define size r10
+ %define s r11
+ %define a_d esi
+ %define b_d edi
+ %define end r13
+
+ %define stack_size 2*16 + 5*8 ; must be an odd multiple of 8
+ %define arg(x) [rsp + stack_size + PS + PS*x]
+ %define func(x) proc_frame x
+ %macro FUNC_SAVE 0
+ alloc_stack stack_size
+ vmovdqa [rsp + 0*16], xmm6
+ vmovdqa [rsp + 1*16], xmm7
+ save_reg rdi, 2*16 + 0*8
+ save_reg rsi, 2*16 + 1*8
+ save_reg r12, 2*16 + 2*8
+ save_reg r13, 2*16 + 3*8
+ end_prolog
+ mov init_d, ecx ; initalize init_d from arg1 to keep ecx free
+ %endmacro
+
+ %macro FUNC_RESTORE 0
+ vmovdqa xmm6, [rsp + 0*16]
+ vmovdqa xmm7, [rsp + 1*16]
+ mov rdi, [rsp + 2*16 + 0*8]
+ mov rsi, [rsp + 2*16 + 1*8]
+ mov r12, [rsp + 2*16 + 2*8]
+ mov r13, [rsp + 2*16 + 3*8]
+ add rsp, stack_size
+ %endmacro
+%endif
+
+%define ya ymm0
+%define yb ymm1
+%define ydata0 ymm2
+%define ydata1 ymm3
+%define ysa ymm4
+%define ydata ysa
+%define ytmp0 ydata0
+%define ytmp1 ydata1
+%define ytmp2 ymm5
+%define xa xmm0
+%define xb xmm1
+%define xtmp0 xmm2
+%define xtmp1 xmm3
+%define xsa xmm4
+%define xtmp2 xmm5
+%define yshuf0 ymm6
+%define yshuf1 ymm7
+
+[bits 64]
+default rel
+section .text
+
+mk_global adler32_avx2_4, function
+func(adler32_avx2_4)
+ FUNC_SAVE
+
+ vmovdqa yshuf0, [SHUF0]
+ vmovdqa yshuf1, [SHUF1]
+
+ mov data, arg2
+ mov size, arg3
+
+ mov b_d, init_d
+ shr b_d, 16
+ and init_d, 0xFFFF
+ cmp size, 32
+ jb .lt64
+ vmovd xa, init_d
+ vpxor yb, yb, yb
+.sloop1:
+ mov s, LIMIT
+ cmp s, size
+ cmova s, size ; s = min(size, LIMIT)
+ lea end, [data + s - CHUNKSIZE_M1]
+ cmp data, end
+ jae .skip_loop_1a
+align 32
+.sloop1a:
+ ; do CHUNKSIZE adds
+ vbroadcastf128 ydata, [data]
+ add data, CHUNKSIZE
+ vpshufb ydata0, ydata, yshuf0
+ vpaddd ya, ya, ydata0
+ vpaddd yb, yb, ya
+ vpshufb ydata1, ydata, yshuf1
+ vpaddd ya, ya, ydata1
+ vpaddd yb, yb, ya
+ cmp data, end
+ jb .sloop1a
+
+.skip_loop_1a:
+ add end, CHUNKSIZE_M1
+
+ test s, CHUNKSIZE_M1
+ jnz .do_final
+
+ ; either we're done, or we just did LIMIT
+ sub size, s
+
+ ; reduce
+ vpslld yb, 3 ; b is scaled by 8
+ vpmulld ysa, ya, [A_SCALE] ; scaled a
+
+ ; compute horizontal sums of ya, yb, ysa
+ vextracti128 xtmp0, ya, 1
+ vextracti128 xtmp1, yb, 1
+ vextracti128 xtmp2, ysa, 1
+ vpaddd xa, xa, xtmp0
+ vpaddd xb, xb, xtmp1
+ vpaddd xsa, xsa, xtmp2
+ vphaddd xa, xa, xa
+ vphaddd xb, xb, xb
+ vphaddd xsa, xsa, xsa
+ vphaddd xa, xa, xa
+ vphaddd xb, xb, xb
+ vphaddd xsa, xsa, xsa
+
+ vmovd eax, xa
+ xor edx, edx
+ mov ecx, BASE
+ div ecx ; divide edx:eax by ecx, quot->eax, rem->edx
+ mov a_d, edx
+
+ vpsubd xb, xb, xsa
+ vmovd eax, xb
+ add eax, b_d
+ xor edx, edx
+ mov ecx, BASE
+ div ecx ; divide edx:eax by ecx, quot->eax, rem->edx
+ mov b_d, edx
+
+ test size, size
+ jz .finish
+
+ ; continue loop
+ vmovd xa, a_d
+ vpxor yb, yb
+ jmp .sloop1
+
+.finish:
+ mov eax, b_d
+ shl eax, 16
+ or eax, a_d
+ jmp .end
+
+.lt64:
+ mov a_d, init_d
+ lea end, [data + size]
+ test size, size
+ jnz .final_loop
+ jmp .zero_size
+
+ ; handle remaining 1...15 bytes
+.do_final:
+ ; reduce
+ vpslld yb, 3 ; b is scaled by 8
+ vpmulld ysa, ya, [A_SCALE] ; scaled a
+
+ vextracti128 xtmp0, ya, 1
+ vextracti128 xtmp1, yb, 1
+ vextracti128 xtmp2, ysa, 1
+ vpaddd xa, xa, xtmp0
+ vpaddd xb, xb, xtmp1
+ vpaddd xsa, xsa, xtmp2
+ vphaddd xa, xa, xa
+ vphaddd xb, xb, xb
+ vphaddd xsa, xsa, xsa
+ vphaddd xa, xa, xa
+ vphaddd xb, xb, xb
+ vphaddd xsa, xsa, xsa
+ vpsubd xb, xb, xsa
+
+ vmovd a_d, xa
+ vmovd eax, xb
+ add b_d, eax
+
+align 32
+.final_loop:
+ movzx eax, byte[data]
+ add a_d, eax
+ inc data
+ add b_d, a_d
+ cmp data, end
+ jb .final_loop
+
+.zero_size:
+ mov eax, a_d
+ xor edx, edx
+ mov ecx, BASE
+ div ecx ; divide edx:eax by ecx, quot->eax, rem->edx
+ mov a_d, edx
+
+ mov eax, b_d
+ xor edx, edx
+ mov ecx, BASE
+ div ecx ; divide edx:eax by ecx, quot->eax, rem->edx
+ shl edx, 16
+ or edx, a_d
+ mov eax, edx
+
+.end:
+ FUNC_RESTORE
+ ret
+
+endproc_frame
+
+section .data
+align 32
+A_SCALE:
+ dq 0x0000000100000000, 0x0000000300000002
+ dq 0x0000000500000004, 0x0000000700000006
+SHUF0:
+ dq 0xFFFFFF01FFFFFF00, 0xFFFFFF03FFFFFF02
+ dq 0xFFFFFF05FFFFFF04, 0xFFFFFF07FFFFFF06
+SHUF1:
+ dq 0xFFFFFF09FFFFFF08, 0xFFFFFF0BFFFFFF0A
+ dq 0xFFFFFF0DFFFFFF0C, 0xFFFFFF0FFFFFFF0E
+
diff --git a/src/isa-l/igzip/adler32_base.c b/src/isa-l/igzip/adler32_base.c
new file mode 100644
index 000000000..034b71a41
--- /dev/null
+++ b/src/isa-l/igzip/adler32_base.c
@@ -0,0 +1,63 @@
+/**********************************************************************
+ Copyright(c) 2011-2017 Intel Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+#include <stdint.h>
+#include "igzip_checksums.h"
+
+uint32_t adler32_base(uint32_t adler32, uint8_t * start, uint32_t length)
+{
+ uint8_t *end, *next = start;
+ uint64_t A, B;
+
+ A = adler32 & 0xffff;
+ B = adler32 >> 16;
+
+ while (length > MAX_ADLER_BUF) {
+ end = next + MAX_ADLER_BUF;
+ for (; next < end; next++) {
+ A += *next;
+ B += A;
+ }
+
+ A = A % ADLER_MOD;
+ B = B % ADLER_MOD;
+ length -= MAX_ADLER_BUF;
+ }
+
+ end = next + length;
+ for (; next < end; next++) {
+ A += *next;
+ B += A;
+ }
+
+ A = A % ADLER_MOD;
+ B = B % ADLER_MOD;
+
+ return B << 16 | A;
+}
diff --git a/src/isa-l/igzip/adler32_perf.c b/src/isa-l/igzip/adler32_perf.c
new file mode 100644
index 000000000..055e0725f
--- /dev/null
+++ b/src/isa-l/igzip/adler32_perf.c
@@ -0,0 +1,72 @@
+/**********************************************************************
+ Copyright(c) 2011-2019 Intel Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdint.h>
+#include "igzip_lib.h"
+#include "test.h"
+
+//#define CACHED_TEST
+#ifdef CACHED_TEST
+// Cached test, loop many times over small dataset
+#define TEST_LEN 8*1024
+#define TEST_TYPE_STR "_warm"
+#else
+// Uncached test. Pull from large mem base.
+#define GT_L3_CACHE 32*1024*1024 /* some number > last level cache */
+#define TEST_LEN (2 * GT_L3_CACHE)
+#define TEST_TYPE_STR "_cold"
+#endif
+
+#ifndef TEST_SEED
+#define TEST_SEED 0x1234
+#endif
+
+int main(int argc, char *argv[])
+{
+ void *buf;
+ uint32_t checksum = 0;
+ struct perf start;
+
+ printf("adler32_perf:\n");
+
+ if (posix_memalign(&buf, 1024, TEST_LEN)) {
+ printf("alloc error: Fail");
+ return -1;
+ }
+ memset(buf, 0, TEST_LEN);
+
+ BENCHMARK(&start, BENCHMARK_TIME, checksum |= isal_adler32(TEST_SEED, buf, TEST_LEN));
+ printf("adler32" TEST_TYPE_STR ": ");
+ perf_print(start, (long long)TEST_LEN);
+
+ return 0;
+}
diff --git a/src/isa-l/igzip/adler32_sse.asm b/src/isa-l/igzip/adler32_sse.asm
new file mode 100644
index 000000000..fc986cb31
--- /dev/null
+++ b/src/isa-l/igzip/adler32_sse.asm
@@ -0,0 +1,253 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright(c) 2011-2017 Intel Corporation All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in
+; the documentation and/or other materials provided with the
+; distribution.
+; * Neither the name of Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived
+; from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+; uint32_t adler32_avx2(uint32_t init, const unsigned char *buf, uint64_t len)
+
+%define LIMIT 5552
+%define BASE 0xFFF1 ; 65521
+
+%include "reg_sizes.asm"
+
+default rel
+[bits 64]
+
+; need to keep free: eax, ecx, edx
+
+%ifidn __OUTPUT_FORMAT__, elf64
+ %define arg1 rdi
+ %define arg2 rsi
+ %define arg3 rdx
+
+ %define init_d edi
+ %define data r9
+ %define size r10
+ %define s r11
+ %define a_d r12d
+ %define b_d r8d
+ %define end r13
+
+ %define func(x) x: endbranch
+ %macro FUNC_SAVE 0
+ push r12
+ push r13
+ %endmacro
+%macro FUNC_RESTORE 0
+ pop r13
+ pop r12
+ %endmacro
+%endif
+
+
+%ifidn __OUTPUT_FORMAT__, win64
+ %define arg1 rcx
+ %define arg2 rdx
+ %define arg3 r8
+
+ %define init_d r12d
+ %define data r9
+ %define size r10
+ %define s r11
+ %define a_d esi
+ %define b_d edi
+ %define end r13
+
+ %define stack_size 5*8 ; must be an odd multiple of 8
+ %define func(x) proc_frame x
+ %macro FUNC_SAVE 0
+ alloc_stack stack_size
+ save_reg rdi, 0*8
+ save_reg rsi, 1*8
+ save_reg r12, 2*8
+ save_reg r13, 3*8
+ end_prolog
+ mov init_d, ecx ; initalize init_d from arg1 to keep ecx free
+ %endmacro
+
+ %macro FUNC_RESTORE 0
+ mov rdi, [rsp + 0*8]
+ mov rsi, [rsp + 1*8]
+ mov r12, [rsp + 2*8]
+ mov r13, [rsp + 3*8]
+ add rsp, stack_size
+ %endmacro
+%endif
+
+%define xa xmm0
+%define xb xmm1
+%define xdata0 xmm2
+%define xdata1 xmm3
+%define xsa xmm4
+
+[bits 64]
+default rel
+section .text
+
+mk_global adler32_sse, function
+func(adler32_sse)
+ FUNC_SAVE
+
+ mov data, arg2
+ mov size, arg3
+
+ mov b_d, init_d
+ shr b_d, 16
+ and init_d, 0xFFFF
+ cmp size, 32
+ jb .lt64
+ movd xa, init_d
+ pxor xb, xb
+.sloop1:
+ mov s, LIMIT
+ cmp s, size
+ cmova s, size ; s = min(size, LIMIT)
+ lea end, [data + s - 7]
+ cmp data, end
+ jae .skip_loop_1a
+align 32
+.sloop1a:
+ ; do 8 adds
+ pmovzxbd xdata0, [data]
+ pmovzxbd xdata1, [data + 4]
+ add data, 8
+ paddd xa, xdata0
+ paddd xb, xa
+ paddd xa, xdata1
+ paddd xb, xa
+ cmp data, end
+ jb .sloop1a
+
+.skip_loop_1a:
+ add end, 7
+
+ test s, 7
+ jnz .do_final
+
+ ; either we're done, or we just did LIMIT
+ sub size, s
+
+ ; reduce
+ pslld xb, 2 ; b is scaled by 4
+ movdqa xsa, xa ; scaled a
+ pmulld xsa, [A_SCALE]
+
+ phaddd xa, xa
+ phaddd xb, xb
+ phaddd xsa, xsa
+ phaddd xa, xa
+ phaddd xb, xb
+ phaddd xsa, xsa
+
+ movd eax, xa
+ xor edx, edx
+ mov ecx, BASE
+ div ecx ; divide edx:eax by ecx, quot->eax, rem->edx
+ mov a_d, edx
+
+ psubd xb, xsa
+ movd eax, xb
+ add eax, b_d
+ xor edx, edx
+ mov ecx, BASE
+ div ecx ; divide edx:eax by ecx, quot->eax, rem->edx
+ mov b_d, edx
+
+ test size, size
+ jz .finish
+
+ ; continue loop
+ movd xa, a_d
+ pxor xb, xb
+ jmp .sloop1
+
+.finish:
+ mov eax, b_d
+ shl eax, 16
+ or eax, a_d
+ jmp .end
+
+.lt64:
+ mov a_d, init_d
+ lea end, [data + size]
+ test size, size
+ jnz .final_loop
+ jmp .zero_size
+
+ ; handle remaining 1...15 bytes
+.do_final:
+ ; reduce
+ pslld xb, 2 ; b is scaled by 4
+ movdqa xsa, xa ; scaled a
+ pmulld xsa, [A_SCALE]
+
+ phaddd xa, xa
+ phaddd xb, xb
+ phaddd xsa, xsa
+ phaddd xa, xa
+ phaddd xb, xb
+ phaddd xsa, xsa
+ psubd xb, xsa
+
+ movd a_d, xa
+ movd eax, xb
+ add b_d, eax
+
+align 32
+.final_loop:
+ movzx eax, byte[data]
+ add a_d, eax
+ inc data
+ add b_d, a_d
+ cmp data, end
+ jb .final_loop
+
+.zero_size:
+ mov eax, a_d
+ xor edx, edx
+ mov ecx, BASE
+ div ecx ; divide edx:eax by ecx, quot->eax, rem->edx
+ mov a_d, edx
+
+ mov eax, b_d
+ xor edx, edx
+ mov ecx, BASE
+ div ecx ; divide edx:eax by ecx, quot->eax, rem->edx
+ shl edx, 16
+ or edx, a_d
+ mov eax, edx
+
+.end:
+ FUNC_RESTORE
+ ret
+
+endproc_frame
+
+section .data
+align 32
+A_SCALE:
+ dq 0x0000000100000000, 0x0000000300000002
diff --git a/src/isa-l/igzip/bitbuf2.asm b/src/isa-l/igzip/bitbuf2.asm
new file mode 100644
index 000000000..71493825e
--- /dev/null
+++ b/src/isa-l/igzip/bitbuf2.asm
@@ -0,0 +1,64 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright(c) 2011-2016 Intel Corporation All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in
+; the documentation and/or other materials provided with the
+; distribution.
+; * Neither the name of Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived
+; from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+%include "options.asm"
+%include "stdmac.asm"
+
+; Assumes m_out_buf is a register
+; Clobbers RCX
+; code is clobbered
+; write_bits_always m_bits, m_bit_count, code, count, m_out_buf
+%macro write_bits 5
+%define %%m_bits %1
+%define %%m_bit_count %2
+%define %%code %3
+%define %%count %4
+%define %%m_out_buf %5
+
+ SHLX %%code, %%code, %%m_bit_count
+
+ or %%m_bits, %%code
+ add %%m_bit_count, %%count
+
+ mov [%%m_out_buf], %%m_bits
+ mov rcx, %%m_bit_count
+ shr rcx, 3 ; rcx = bytes
+ add %%m_out_buf, rcx
+ shl rcx, 3 ; rcx = bits
+ and %%m_bit_count, 0x7
+
+ SHRX %%m_bits, %%m_bits, rcx
+%endm
+
+%macro write_dword 2
+%define %%data %1d
+%define %%addr %2
+ mov [%%addr], %%data
+ add %%addr, 4
+%endm
diff --git a/src/isa-l/igzip/bitbuf2.h b/src/isa-l/igzip/bitbuf2.h
new file mode 100644
index 000000000..51bd752d0
--- /dev/null
+++ b/src/isa-l/igzip/bitbuf2.h
@@ -0,0 +1,130 @@
+/**********************************************************************
+ Copyright(c) 2011-2016 Intel Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+#ifndef BITBUF2_H
+#define BITBUF2_H
+
+#include "igzip_lib.h"
+#include "unaligned.h"
+
+#ifdef _MSC_VER
+#define inline __inline
+#endif
+
+
+/* MAX_BITBUF_BIT WRITE is the maximum number of bits than can be safely written
+ * by consecutive calls of write_bits. Note this assumes the bitbuf is in a
+ * state that is possible at the exit of write_bits */
+#define MAX_BITBUF_BIT_WRITE 56
+
+static inline void init(struct BitBuf2 *me)
+{
+ me->m_bits = 0;
+ me->m_bit_count = 0;
+}
+
+static inline void set_buf(struct BitBuf2 *me, unsigned char *buf, unsigned int len)
+{
+ unsigned int slop = 8;
+ me->m_out_buf = me->m_out_start = buf;
+ me->m_out_end = buf + len - slop;
+}
+
+static inline int is_full(struct BitBuf2 *me)
+{
+ return (me->m_out_buf > me->m_out_end);
+}
+
+static inline uint8_t * buffer_ptr(struct BitBuf2 *me)
+{
+ return me->m_out_buf;
+}
+
+static inline uint32_t buffer_used(struct BitBuf2 *me)
+{
+ return (uint32_t)(me->m_out_buf - me->m_out_start);
+}
+
+static inline uint32_t buffer_bits_used(struct BitBuf2 *me)
+{
+ return (8 * (uint32_t)(me->m_out_buf - me->m_out_start) + me->m_bit_count);
+}
+
+static inline void flush_bits(struct BitBuf2 *me)
+{
+ uint32_t bits;
+ store_u64(me->m_out_buf, me->m_bits);
+ bits = me->m_bit_count & ~7;
+ me->m_bit_count -= bits;
+ me->m_out_buf += bits/8;
+ me->m_bits >>= bits;
+
+}
+
+/* Can write up to 8 bytes to output buffer */
+static inline void flush(struct BitBuf2 *me)
+{
+ uint32_t bytes;
+ if (me->m_bit_count) {
+ store_u64(me->m_out_buf, me->m_bits);
+ bytes = (me->m_bit_count + 7) / 8;
+ me->m_out_buf += bytes;
+ }
+ me->m_bits = 0;
+ me->m_bit_count = 0;
+}
+
+static inline void check_space(struct BitBuf2 *me, uint32_t num_bits)
+{
+ /* Checks if bitbuf has num_bits extra space and flushes the bytes in
+ * the bitbuf if it doesn't. */
+ if (63 - me->m_bit_count < num_bits)
+ flush_bits(me);
+}
+
+static inline void write_bits_unsafe(struct BitBuf2 *me, uint64_t code, uint32_t count)
+{
+ me->m_bits |= code << me->m_bit_count;
+ me->m_bit_count += count;
+}
+
+static inline void write_bits(struct BitBuf2 *me, uint64_t code, uint32_t count)
+{ /* Assumes there is space to fit code into m_bits. */
+ me->m_bits |= code << me->m_bit_count;
+ me->m_bit_count += count;
+ flush_bits(me);
+}
+
+static inline void write_bits_flush(struct BitBuf2 *me, uint64_t code, uint32_t count)
+{ /* Assumes there is space to fit code into m_bits. */
+ me->m_bits |= code << me->m_bit_count;
+ me->m_bit_count += count;
+ flush(me);
+}
+
+#endif //BITBUF2_H
diff --git a/src/isa-l/igzip/checksum32_funcs_test.c b/src/isa-l/igzip/checksum32_funcs_test.c
new file mode 100644
index 000000000..cbb5d1bf5
--- /dev/null
+++ b/src/isa-l/igzip/checksum32_funcs_test.c
@@ -0,0 +1,308 @@
+/**********************************************************************
+ Copyright(c) 2011-2018 Intel Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdint.h>
+#include "igzip_checksums.h"
+#include "checksum_test_ref.h"
+#include "types.h"
+
+#ifndef TEST_SEED
+# define TEST_SEED 0x1234
+#endif
+
+#define MAX_BUF 512
+#define TEST_SIZE 20
+
+typedef uint32_t(*checksum32_func_t) (uint32_t, const unsigned char *, uint64_t);
+
+typedef struct func_case {
+ char *note;
+ checksum32_func_t checksum32_func_call;
+ checksum32_func_t checksum32_ref_call;
+} func_case_t;
+
+func_case_t test_funcs[] = {
+ {"checksum32_adler", isal_adler32, adler_ref},
+};
+
+// Generates pseudo-random data
+
+void rand_buffer(unsigned char *buf, long buffer_size)
+{
+ long i;
+ for (i = 0; i < buffer_size; i++)
+ buf[i] = rand();
+}
+
+// Test cases
+int zeros_test(func_case_t * test_func);
+int simple_pattern_test(func_case_t * test_func);
+int seeds_sizes_test(func_case_t * test_func);
+int eob_test(func_case_t * test_func);
+int update_test(func_case_t * test_func);
+int update_over_mod_test(func_case_t * test_func);
+
+int verbose = 0;
+void *buf_alloc = NULL;
+
+int main(int argc, char *argv[])
+{
+ int fail = 0, fail_case;
+ int i, ret;
+ func_case_t *test_func;
+
+ verbose = argc - 1;
+
+ // Align to MAX_BUF boundary
+ ret = posix_memalign(&buf_alloc, MAX_BUF, MAX_BUF * TEST_SIZE);
+ if (ret) {
+ printf("alloc error: Fail");
+ return -1;
+ }
+ srand(TEST_SEED);
+ printf("CHECKSUM32 Tests seed=0x%x\n", TEST_SEED);
+
+ for (i = 0; i < sizeof(test_funcs) / sizeof(test_funcs[0]); i++) {
+ fail_case = 0;
+ test_func = &test_funcs[i];
+
+ printf("Test %s ", test_func->note);
+ fail_case += zeros_test(test_func);
+ fail_case += simple_pattern_test(test_func);
+ fail_case += seeds_sizes_test(test_func);
+ fail_case += eob_test(test_func);
+ fail_case += update_test(test_func);
+ fail_case += update_over_mod_test(test_func);
+ printf("Test %s done: %s\n", test_func->note, fail_case ? "Fail" : "Pass");
+
+ if (fail_case) {
+ printf("\n%s Failed %d tests\n", test_func->note, fail_case);
+ fail++;
+ }
+ }
+
+ printf("CHECKSUM32 Tests all done: %s\n", fail ? "Fail" : "Pass");
+
+ return fail;
+}
+
+// Test of all zeros
+int zeros_test(func_case_t * test_func)
+{
+ uint32_t c_dut, c_ref;
+ int fail = 0;
+ unsigned char *buf = NULL;
+
+ buf = (unsigned char *)buf_alloc;
+ memset(buf, 0, MAX_BUF * 10);
+ c_dut = test_func->checksum32_func_call(TEST_SEED, buf, MAX_BUF * 10);
+ c_ref = test_func->checksum32_ref_call(TEST_SEED, buf, MAX_BUF * 10);
+
+ if (c_dut != c_ref) {
+ fail++;
+ printf("\n opt ref\n");
+ printf(" ------ ------\n");
+ printf("checksum zero = 0x%8x 0x%8x \n", c_dut, c_ref);
+ } else
+ printf(".");
+
+ return fail;
+}
+
+// Another simple test pattern
+int simple_pattern_test(func_case_t * test_func)
+{
+ uint32_t c_dut, c_ref;
+ int fail = 0;
+ unsigned char *buf = NULL;
+
+ buf = (unsigned char *)buf_alloc;
+ memset(buf, 0x8a, MAX_BUF);
+ c_dut = test_func->checksum32_func_call(TEST_SEED, buf, MAX_BUF);
+ c_ref = test_func->checksum32_ref_call(TEST_SEED, buf, MAX_BUF);
+ if (c_dut != c_ref)
+ fail++;
+ if (verbose)
+ printf("checksum all 8a = 0x%8x 0x%8x\n", c_dut, c_ref);
+ else
+ printf(".");
+
+ return fail;
+}
+
+int seeds_sizes_test(func_case_t * test_func)
+{
+ uint32_t c_dut, c_ref;
+ int fail = 0;
+ int i;
+ uint32_t r, s;
+ unsigned char *buf = NULL;
+
+ // Do a few random tests
+ buf = (unsigned char *)buf_alloc; //reset buf
+ r = rand();
+ rand_buffer(buf, MAX_BUF * TEST_SIZE);
+
+ for (i = 0; i < TEST_SIZE; i++) {
+ c_dut = test_func->checksum32_func_call(r, buf, MAX_BUF);
+ c_ref = test_func->checksum32_ref_call(r, buf, MAX_BUF);
+ if (c_dut != c_ref)
+ fail++;
+ if (verbose)
+ printf("checksum rand%3d = 0x%8x 0x%8x\n", i, c_dut, c_ref);
+ else
+ printf(".");
+ buf += MAX_BUF;
+ }
+
+ // Do a few random sizes
+ buf = (unsigned char *)buf_alloc; //reset buf
+ r = rand();
+
+ for (i = MAX_BUF; i >= 0; i--) {
+ c_dut = test_func->checksum32_func_call(r, buf, i);
+ c_ref = test_func->checksum32_ref_call(r, buf, i);
+ if (c_dut != c_ref) {
+ fail++;
+ printf("fail random size%i 0x%8x 0x%8x\n", i, c_dut, c_ref);
+ } else
+ printf(".");
+ }
+
+ // Try different seeds
+ for (s = 0; s < 20; s++) {
+ buf = (unsigned char *)buf_alloc; //reset buf
+
+ r = rand(); // just to get a new seed
+ rand_buffer(buf, MAX_BUF * TEST_SIZE); // new pseudo-rand data
+
+ if (verbose)
+ printf("seed = 0x%x\n", r);
+
+ for (i = 0; i < TEST_SIZE; i++) {
+ c_dut = test_func->checksum32_func_call(r, buf, MAX_BUF);
+ c_ref = test_func->checksum32_ref_call(r, buf, MAX_BUF);
+ if (c_dut != c_ref)
+ fail++;
+ if (verbose)
+ printf("checksum rand%3d = 0x%8x 0x%8x\n", i, c_dut, c_ref);
+ else
+ printf(".");
+ buf += MAX_BUF;
+ }
+ }
+
+ return fail;
+}
+
+// Run tests at end of buffer
+int eob_test(func_case_t * test_func)
+{
+ uint32_t c_dut, c_ref;
+ int fail = 0;
+ int i;
+ unsigned char *buf = NULL;
+
+ buf = (unsigned char *)buf_alloc; //reset buf
+ buf = buf + ((MAX_BUF - 1) * TEST_SIZE); //Line up TEST_SIZE from end
+ for (i = 0; i < TEST_SIZE; i++) {
+ c_dut = test_func->checksum32_func_call(TEST_SEED, buf + i, TEST_SIZE - i);
+ c_ref = test_func->checksum32_ref_call(TEST_SEED, buf + i, TEST_SIZE - i);
+ if (c_dut != c_ref)
+ fail++;
+ if (verbose)
+ printf("checksum eob rand%3d = 0x%8x 0x%8x\n", i, c_dut, c_ref);
+ else
+ printf(".");
+ }
+
+ return fail;
+}
+
+int update_test(func_case_t * test_func)
+{
+ uint32_t c_dut, c_ref;
+ int fail = 0;
+ int i;
+ uint32_t r;
+ unsigned char *buf = NULL;
+
+ buf = (unsigned char *)buf_alloc; //reset buf
+ r = rand();
+ // Process the whole buf with reference func single call.
+ c_ref = test_func->checksum32_ref_call(r, buf, MAX_BUF * TEST_SIZE);
+ // Process buf with update method.
+ for (i = 0; i < TEST_SIZE; i++) {
+ c_dut = test_func->checksum32_func_call(r, buf, MAX_BUF);
+ // Update checksum seeds and buf pointer.
+ r = c_dut;
+ buf += MAX_BUF;
+ }
+
+ if (c_dut != c_ref)
+ fail++;
+ if (verbose)
+ printf("checksum rand%3d = 0x%8x 0x%8x\n", i, c_dut, c_ref);
+ else
+ printf(".");
+
+ return fail;
+}
+
+int update_over_mod_test(func_case_t * test_func)
+{
+ uint32_t c_dut, c_ref;
+ int fail = 0;
+ int i;
+ unsigned char *buf = NULL;
+
+ buf = malloc(ADLER_MOD);
+ memset(buf, 0xff, ADLER_MOD);
+
+ c_ref = c_dut = rand();
+
+ // Process buf with update method.
+ for (i = 0; i < 20; i++) {
+ c_ref = test_func->checksum32_ref_call(c_ref, buf, ADLER_MOD - 64);
+ c_dut = test_func->checksum32_func_call(c_dut, buf, ADLER_MOD - 64);
+ }
+
+ if (c_dut != c_ref)
+ fail++;
+ if (verbose)
+ printf("checksum rand%3d = 0x%8x 0x%8x\n", i, c_dut, c_ref);
+ else
+ printf(".");
+
+ free(buf);
+ return fail;
+}
diff --git a/src/isa-l/igzip/checksum_test_ref.h b/src/isa-l/igzip/checksum_test_ref.h
new file mode 100644
index 000000000..b561be975
--- /dev/null
+++ b/src/isa-l/igzip/checksum_test_ref.h
@@ -0,0 +1,102 @@
+/*
+ * Reference checksums used in compression tests
+ */
+
+#ifndef CHECKSUM_TEST_REF_H
+#define CHECKSUM_TEST_REF_H
+
+#include <stdint.h>
+
+uint32_t inflate_crc_table[256] = {
+ 0x00000000, 0x77073096, 0xee0e612c, 0x990951ba,
+ 0x076dc419, 0x706af48f, 0xe963a535, 0x9e6495a3,
+ 0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988,
+ 0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, 0x90bf1d91,
+ 0x1db71064, 0x6ab020f2, 0xf3b97148, 0x84be41de,
+ 0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7,
+ 0x136c9856, 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec,
+ 0x14015c4f, 0x63066cd9, 0xfa0f3d63, 0x8d080df5,
+ 0x3b6e20c8, 0x4c69105e, 0xd56041e4, 0xa2677172,
+ 0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b,
+ 0x35b5a8fa, 0x42b2986c, 0xdbbbc9d6, 0xacbcf940,
+ 0x32d86ce3, 0x45df5c75, 0xdcd60dcf, 0xabd13d59,
+ 0x26d930ac, 0x51de003a, 0xc8d75180, 0xbfd06116,
+ 0x21b4f4b5, 0x56b3c423, 0xcfba9599, 0xb8bda50f,
+ 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924,
+ 0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d,
+ 0x76dc4190, 0x01db7106, 0x98d220bc, 0xefd5102a,
+ 0x71b18589, 0x06b6b51f, 0x9fbfe4a5, 0xe8b8d433,
+ 0x7807c9a2, 0x0f00f934, 0x9609a88e, 0xe10e9818,
+ 0x7f6a0dbb, 0x086d3d2d, 0x91646c97, 0xe6635c01,
+ 0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e,
+ 0x6c0695ed, 0x1b01a57b, 0x8208f4c1, 0xf50fc457,
+ 0x65b0d9c6, 0x12b7e950, 0x8bbeb8ea, 0xfcb9887c,
+ 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, 0xfbd44c65,
+ 0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2,
+ 0x4adfa541, 0x3dd895d7, 0xa4d1c46d, 0xd3d6f4fb,
+ 0x4369e96a, 0x346ed9fc, 0xad678846, 0xda60b8d0,
+ 0x44042d73, 0x33031de5, 0xaa0a4c5f, 0xdd0d7cc9,
+ 0x5005713c, 0x270241aa, 0xbe0b1010, 0xc90c2086,
+ 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f,
+ 0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4,
+ 0x59b33d17, 0x2eb40d81, 0xb7bd5c3b, 0xc0ba6cad,
+ 0xedb88320, 0x9abfb3b6, 0x03b6e20c, 0x74b1d29a,
+ 0xead54739, 0x9dd277af, 0x04db2615, 0x73dc1683,
+ 0xe3630b12, 0x94643b84, 0x0d6d6a3e, 0x7a6a5aa8,
+ 0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1,
+ 0xf00f9344, 0x8708a3d2, 0x1e01f268, 0x6906c2fe,
+ 0xf762575d, 0x806567cb, 0x196c3671, 0x6e6b06e7,
+ 0xfed41b76, 0x89d32be0, 0x10da7a5a, 0x67dd4acc,
+ 0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5,
+ 0xd6d6a3e8, 0xa1d1937e, 0x38d8c2c4, 0x4fdff252,
+ 0xd1bb67f1, 0xa6bc5767, 0x3fb506dd, 0x48b2364b,
+ 0xd80d2bda, 0xaf0a1b4c, 0x36034af6, 0x41047a60,
+ 0xdf60efc3, 0xa867df55, 0x316e8eef, 0x4669be79,
+ 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236,
+ 0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f,
+ 0xc5ba3bbe, 0xb2bd0b28, 0x2bb45a92, 0x5cb36a04,
+ 0xc2d7ffa7, 0xb5d0cf31, 0x2cd99e8b, 0x5bdeae1d,
+ 0x9b64c2b0, 0xec63f226, 0x756aa39c, 0x026d930a,
+ 0x9c0906a9, 0xeb0e363f, 0x72076785, 0x05005713,
+ 0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38,
+ 0x92d28e9b, 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21,
+ 0x86d3d2d4, 0xf1d4e242, 0x68ddb3f8, 0x1fda836e,
+ 0x81be16cd, 0xf6b9265b, 0x6fb077e1, 0x18b74777,
+ 0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c,
+ 0x8f659eff, 0xf862ae69, 0x616bffd3, 0x166ccf45,
+ 0xa00ae278, 0xd70dd2ee, 0x4e048354, 0x3903b3c2,
+ 0xa7672661, 0xd06016f7, 0x4969474d, 0x3e6e77db,
+ 0xaed16a4a, 0xd9d65adc, 0x40df0b66, 0x37d83bf0,
+ 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9,
+ 0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6,
+ 0xbad03605, 0xcdd70693, 0x54de5729, 0x23d967bf,
+ 0xb3667a2e, 0xc4614ab8, 0x5d681b02, 0x2a6f2b94,
+ 0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d};
+
+
+uint32_t crc32_gzip_refl_ref(uint32_t crc, const unsigned char *buf, uint64_t len)
+{
+ uint64_t i;
+ crc = ~crc;
+ for (i = 0; i < len; i++)
+ crc = (crc >> 8) ^ inflate_crc_table[(crc & 0xff) ^ buf[i]];
+ return ~crc;
+}
+
+#define ADLER_MOD 65521
+
+
+uint32_t adler_ref(uint32_t init, const unsigned char *buf, uint64_t len)
+{
+ uint64_t i;
+ uint32_t a = init & 0xffff;
+ uint32_t b = init >> 16;
+
+ for (i = 0; i < len; i++) {
+ a = (a + buf[i]) % ADLER_MOD;
+ b = (b + a) % ADLER_MOD;
+ }
+ return (b << 16) | a;
+}
+
+#endif /* CHECKSUM_TEST_REF_H */
diff --git a/src/isa-l/igzip/data_struct2.asm b/src/isa-l/igzip/data_struct2.asm
new file mode 100644
index 000000000..233e264d3
--- /dev/null
+++ b/src/isa-l/igzip/data_struct2.asm
@@ -0,0 +1,275 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright(c) 2011-2016 Intel Corporation All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in
+; the documentation and/or other materials provided with the
+; distribution.
+; * Neither the name of Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived
+; from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; START_FIELDS
+%macro START_FIELDS 0
+%assign _FIELD_OFFSET 0
+%assign _STRUCT_ALIGN 0
+%endm
+
+;; FIELD name size align
+%macro FIELD 3
+%define %%name %1
+%define %%size %2
+%define %%align %3
+
+%assign _FIELD_OFFSET (_FIELD_OFFSET + (%%align) - 1) & (~ ((%%align)-1))
+%%name equ _FIELD_OFFSET
+%assign _FIELD_OFFSET _FIELD_OFFSET + (%%size)
+%if (%%align > _STRUCT_ALIGN)
+%assign _STRUCT_ALIGN %%align
+%endif
+%endm
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+START_FIELDS ;; BitBuf2
+
+;; name size align
+FIELD _m_bits, 8, 8
+FIELD _m_bit_count, 4, 4
+FIELD _m_out_buf, 8, 8
+FIELD _m_out_end, 8, 8
+FIELD _m_out_start, 8, 8
+
+%assign _BitBuf2_size _FIELD_OFFSET
+%assign _BitBuf2_align _STRUCT_ALIGN
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+%define HIST_ELEM_SIZE 4
+
+START_FIELDS ;; isal_mod_hist
+
+;; name size align
+FIELD _d_hist, 30*HIST_ELEM_SIZE, HIST_ELEM_SIZE
+FIELD _ll_hist, 513*HIST_ELEM_SIZE, HIST_ELEM_SIZE
+
+%assign _isal_mod_hist_size _FIELD_OFFSET
+%assign _isal_mod_hist_align _STRUCT_ALIGN
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+%define HUFF_CODE_SIZE 4
+
+START_FIELDS ;; hufftables_icf
+
+;; name size align
+FIELD _dist_table, 31 * HUFF_CODE_SIZE, HUFF_CODE_SIZE
+FIELD _lit_len_table, 513 * HUFF_CODE_SIZE, HUFF_CODE_SIZE
+
+%assign _hufftables_icf_size _FIELD_OFFSET
+%assign _hufftables_icf_align _STRUCT_ALIGN
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+START_FIELDS ;; hash8k_buf
+
+;; name size align
+FIELD _hash8k_table, 2 * IGZIP_HASH8K_HASH_SIZE, 2
+
+%assign _hash_buf1_size _FIELD_OFFSET
+%assign _hash_buf1_align _STRUCT_ALIGN
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+START_FIELDS ;; hash_map_buf
+
+;; name size align
+FIELD _hash_table, 2 * IGZIP_HASH_MAP_HASH_SIZE, 2
+FIELD _matches_next, 8, 8
+FIELD _matches_end, 8, 8
+FIELD _matches, 4*4*1024, 4
+FIELD _overflow, 4*LA, 4
+
+%assign _hash_map_buf_size _FIELD_OFFSET
+%assign _hash_map_buf_align _STRUCT_ALIGN
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+%define DEF_MAX_HDR_SIZE 328
+START_FIELDS ;; level_buf
+
+;; name size align
+FIELD _encode_tables, _hufftables_icf_size, _hufftables_icf_align
+FIELD _hist, _isal_mod_hist_size, _isal_mod_hist_align
+FIELD _deflate_hdr_count, 4, 4
+FIELD _deflate_hdr_extra_bits,4, 4
+FIELD _deflate_hdr, DEF_MAX_HDR_SIZE, 1
+FIELD _icf_buf_next, 8, 8
+FIELD _icf_buf_avail_out, 8, 8
+FIELD _icf_buf_start, 8, 8
+FIELD _lvl_extra, _hash_map_buf_size, _hash_map_buf_align
+
+%assign _level_buf_base_size _FIELD_OFFSET
+%assign _level_buf_base_align _STRUCT_ALIGN
+
+_hash8k_hash_table equ _lvl_extra + _hash8k_table
+_hash_map_hash_table equ _lvl_extra + _hash_table
+_hash_map_matches_next equ _lvl_extra + _matches_next
+_hash_map_matches_end equ _lvl_extra + _matches_end
+_hash_map_matches equ _lvl_extra + _matches
+_hist_lit_len equ _hist+_ll_hist
+_hist_dist equ _hist+_d_hist
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+START_FIELDS ;; isal_zstate
+
+;; name size align
+FIELD _total_in_start,4, 4
+FIELD _block_next, 4, 4
+FIELD _block_end, 4, 4
+FIELD _dist_mask, 4, 4
+FIELD _hash_mask, 4, 4
+FIELD _state, 4, 4
+FIELD _bitbuf, _BitBuf2_size, _BitBuf2_align
+FIELD _crc, 4, 4
+FIELD _has_wrap_hdr, 1, 1
+FIELD _has_eob_hdr, 1, 1
+FIELD _has_eob, 1, 1
+FIELD _has_hist, 1, 1
+FIELD _has_level_buf_init, 2, 2
+FIELD _count, 4, 4
+FIELD _tmp_out_buff, 16, 1
+FIELD _tmp_out_start, 4, 4
+FIELD _tmp_out_end, 4, 4
+FIELD _b_bytes_valid, 4, 4
+FIELD _b_bytes_processed, 4, 4
+FIELD _buffer, BSIZE, 1
+FIELD _head, IGZIP_LVL0_HASH_SIZE*2, 2
+%assign _isal_zstate_size _FIELD_OFFSET
+%assign _isal_zstate_align _STRUCT_ALIGN
+
+_bitbuf_m_bits equ _bitbuf+_m_bits
+_bitbuf_m_bit_count equ _bitbuf+_m_bit_count
+_bitbuf_m_out_buf equ _bitbuf+_m_out_buf
+_bitbuf_m_out_end equ _bitbuf+_m_out_end
+_bitbuf_m_out_start equ _bitbuf+_m_out_start
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+START_FIELDS ;; isal_zstream
+
+;; name size align
+FIELD _next_in, 8, 8
+FIELD _avail_in, 4, 4
+FIELD _total_in, 4, 4
+FIELD _next_out, 8, 8
+FIELD _avail_out, 4, 4
+FIELD _total_out, 4, 4
+FIELD _hufftables, 8, 8
+FIELD _level, 4, 4
+FIELD _level_buf_size, 4, 4
+FIELD _level_buf, 8, 8
+FIELD _end_of_stream, 2, 2
+FIELD _flush, 2, 2
+FIELD _gzip_flag, 2, 2
+FIELD _hist_bits, 2, 2
+FIELD _internal_state, _isal_zstate_size, _isal_zstate_align
+
+%assign _isal_zstream_size _FIELD_OFFSET
+%assign _isal_zstream_align _STRUCT_ALIGN
+
+_internal_state_total_in_start equ _internal_state+_total_in_start
+_internal_state_block_next equ _internal_state+_block_next
+_internal_state_block_end equ _internal_state+_block_end
+_internal_state_b_bytes_valid equ _internal_state+_b_bytes_valid
+_internal_state_b_bytes_processed equ _internal_state+_b_bytes_processed
+_internal_state_crc equ _internal_state+_crc
+_internal_state_dist_mask equ _internal_state+_dist_mask
+_internal_state_hash_mask equ _internal_state+_hash_mask
+_internal_state_bitbuf equ _internal_state+_bitbuf
+_internal_state_state equ _internal_state+_state
+_internal_state_count equ _internal_state+_count
+_internal_state_tmp_out_buff equ _internal_state+_tmp_out_buff
+_internal_state_tmp_out_start equ _internal_state+_tmp_out_start
+_internal_state_tmp_out_end equ _internal_state+_tmp_out_end
+_internal_state_has_wrap_hdr equ _internal_state+_has_wrap_hdr
+_internal_state_has_eob equ _internal_state+_has_eob
+_internal_state_has_eob_hdr equ _internal_state+_has_eob_hdr
+_internal_state_has_hist equ _internal_state+_has_hist
+_internal_state_has_level_buf_init equ _internal_state+_has_level_buf_init
+_internal_state_buffer equ _internal_state+_buffer
+_internal_state_head equ _internal_state+_head
+_internal_state_bitbuf_m_bits equ _internal_state+_bitbuf_m_bits
+_internal_state_bitbuf_m_bit_count equ _internal_state+_bitbuf_m_bit_count
+_internal_state_bitbuf_m_out_buf equ _internal_state+_bitbuf_m_out_buf
+_internal_state_bitbuf_m_out_end equ _internal_state+_bitbuf_m_out_end
+_internal_state_bitbuf_m_out_start equ _internal_state+_bitbuf_m_out_start
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Internal States
+ZSTATE_NEW_HDR equ 0
+ZSTATE_HDR equ (ZSTATE_NEW_HDR + 1)
+ZSTATE_CREATE_HDR equ (ZSTATE_HDR + 1)
+ZSTATE_BODY equ (ZSTATE_CREATE_HDR + 1)
+ZSTATE_FLUSH_READ_BUFFER equ (ZSTATE_BODY + 1)
+ZSTATE_FLUSH_ICF_BUFFER equ (ZSTATE_FLUSH_READ_BUFFER + 1)
+ZSTATE_TYPE0_HDR equ (ZSTATE_FLUSH_ICF_BUFFER + 1)
+ZSTATE_TYPE0_BODY equ (ZSTATE_TYPE0_HDR + 1)
+ZSTATE_SYNC_FLUSH equ (ZSTATE_TYPE0_BODY + 1)
+ZSTATE_FLUSH_WRITE_BUFFER equ (ZSTATE_SYNC_FLUSH + 1)
+ZSTATE_TRL equ (ZSTATE_FLUSH_WRITE_BUFFER + 1)
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+_NO_FLUSH equ 0
+_SYNC_FLUSH equ 1
+_FULL_FLUSH equ 2
+_STORED_BLK equ 0
+%assign _STORED_BLK_END 65535
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+IGZIP_NO_HIST equ 0
+IGZIP_HIST equ 1
+IGZIP_DICT_HIST equ 2
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
diff --git a/src/isa-l/igzip/encode_df.c b/src/isa-l/igzip/encode_df.c
new file mode 100644
index 000000000..d26d1c942
--- /dev/null
+++ b/src/isa-l/igzip/encode_df.c
@@ -0,0 +1,38 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <memory.h>
+#include <assert.h>
+
+#if __x86_64__ || __i386__ || _M_X64 || _M_IX86
+#ifdef _MSC_VER
+# include <intrin.h>
+#else
+# include <x86intrin.h>
+#endif
+#endif //__x86_64__ || __i386__ || _M_X64 || _M_IX86
+
+#include "encode_df.h"
+#include "bitbuf2.h"
+
+struct deflate_icf *encode_deflate_icf_base(struct deflate_icf *next_in,
+ struct deflate_icf *end_in, struct BitBuf2 *bb,
+ struct hufftables_icf *hufftables)
+{
+ struct huff_code lsym, dsym;
+
+ while (next_in < end_in && !is_full(bb)) {
+ lsym = hufftables->lit_len_table[next_in->lit_len];
+ dsym = hufftables->dist_lit_table[next_in->lit_dist];
+
+ // insert ll code, dist_code, and extra_bits
+ write_bits_unsafe(bb, lsym.code_and_extra, lsym.length);
+ write_bits_unsafe(bb, dsym.code, dsym.length);
+ write_bits_unsafe(bb, next_in->dist_extra, dsym.extra_bit_count);
+ flush_bits(bb);
+
+ next_in++;
+ }
+
+ return next_in;
+}
diff --git a/src/isa-l/igzip/encode_df.h b/src/isa-l/igzip/encode_df.h
new file mode 100644
index 000000000..f3e4f754d
--- /dev/null
+++ b/src/isa-l/igzip/encode_df.h
@@ -0,0 +1,30 @@
+#ifndef ENCODE_DF_H
+#define ENCODE_DF_H
+
+#include <stdint.h>
+#include "igzip_lib.h"
+#include "huff_codes.h"
+
+/* Deflate Intermediate Compression Format */
+#define LIT_LEN_BIT_COUNT 10
+#define LIT_LEN_MASK ((1 << LIT_LEN_BIT_COUNT) - 1)
+#define DIST_LIT_BIT_COUNT 9
+#define DIST_LIT_MASK ((1 << DIST_LIT_BIT_COUNT) - 1)
+#define ICF_DIST_OFFSET LIT_LEN_BIT_COUNT
+#define NULL_DIST_SYM 30
+
+#define LEN_START ISAL_DEF_LIT_SYMBOLS
+#define LEN_OFFSET (LEN_START - ISAL_DEF_MIN_MATCH)
+#define LEN_MAX (LEN_OFFSET + ISAL_DEF_MAX_MATCH)
+#define LIT_START (NULL_DIST_SYM + 1)
+#define ICF_CODE_LEN 32
+
+struct deflate_icf {
+ uint32_t lit_len:LIT_LEN_BIT_COUNT;
+ uint32_t lit_dist:DIST_LIT_BIT_COUNT;
+ uint32_t dist_extra:ICF_CODE_LEN - DIST_LIT_BIT_COUNT - ICF_DIST_OFFSET;
+};
+
+struct deflate_icf *encode_deflate_icf(struct deflate_icf *next_in, struct deflate_icf *end_in,
+ struct BitBuf2 *bb, struct hufftables_icf * hufftables);
+#endif
diff --git a/src/isa-l/igzip/encode_df_04.asm b/src/isa-l/igzip/encode_df_04.asm
new file mode 100644
index 000000000..5b913aec4
--- /dev/null
+++ b/src/isa-l/igzip/encode_df_04.asm
@@ -0,0 +1,580 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright(c) 2011-2018 Intel Corporation All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in
+; the documentation and/or other materials provided with the
+; distribution.
+; * Neither the name of Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived
+; from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+%include "reg_sizes.asm"
+%include "lz0a_const.asm"
+%include "data_struct2.asm"
+%include "stdmac.asm"
+
+%define ARCH 04
+%define USE_HSWNI
+
+; tree entry is 4 bytes:
+; lit/len tree (513 entries)
+; | 3 | 2 | 1 | 0 |
+; | len | code |
+;
+; dist tree
+; | 3 | 2 | 1 | 0 |
+; |eblen:codlen| code |
+
+; token format:
+; DIST_OFFSET:0 : lit/len
+; 31:(DIST_OFFSET + 5) : dist Extra Bits
+; (DIST_OFFSET + 5):DIST_OFFSET : dist code
+; lit/len: 0-256 (literal)
+; 257-512 (dist + 254)
+
+; returns final token pointer
+; equal to token_end if successful
+; uint32_t* encode_df(uint32_t *token_start, uint32_t *token_end,
+; BitBuf *out_buf, uint32_t *trees);
+
+%ifidn __OUTPUT_FORMAT__, win64
+%define arg1 rcx
+%define arg2 rdx
+%define arg3 r8
+%define arg4 r9
+%define sym rsi
+%define dsym rdi
+%define hufftables r9
+%define ptr r11
+%else
+; Linux
+%define arg1 rdi
+%define arg2 rsi
+%define arg3 rdx
+%define arg4 rcx
+%define sym r9
+%define dsym r8
+%define hufftables r11
+%define ptr rdi
+%endif
+
+%define in_buf_end arg2
+%define bitbuf arg3
+%define out_buf bitbuf
+; bit_count is rcx
+%define bits rax
+%define data r12
+%define tmp rbx
+%define len dsym
+%define tmp2 r10
+%define end_ptr rbp
+
+%define LIT_MASK ((0x1 << LIT_LEN_BIT_COUNT) - 1)
+%define DIST_MASK ((0x1 << DIST_LIT_BIT_COUNT) - 1)
+
+%define codes1 ymm1
+%define code_lens1 ymm2
+%define codes2 ymm3
+%define code_lens2 ymm4
+%define codes3 ymm5
+%define code_lens3 ymm6
+%define codes4 ymm7
+%define syms ymm7
+
+%define code_lens4 ymm8
+%define dsyms ymm8
+
+%define ytmp ymm9
+%define codes_lookup1 ymm10
+%define codes_lookup2 ymm11
+%define datas ymm12
+%define ybits ymm13
+%define ybits_count ymm14
+%define yoffset_mask ymm15
+
+%define VECTOR_SIZE 0x20
+%define VECTOR_LOOP_PROCESSED (2 * VECTOR_SIZE)
+%define VECTOR_SLOP 0x20 - 8
+
+gpr_save_mem_offset equ 0
+gpr_save_mem_size equ 8 * 6
+xmm_save_mem_offset equ gpr_save_mem_offset + gpr_save_mem_size
+xmm_save_mem_size equ 10 * 16
+bitbuf_mem_offset equ xmm_save_mem_offset + xmm_save_mem_size
+bitbuf_mem_size equ 8
+stack_size equ gpr_save_mem_size + xmm_save_mem_size + bitbuf_mem_size
+
+
+%macro FUNC_SAVE 0
+ sub rsp, stack_size
+ mov [rsp + gpr_save_mem_offset + 0*8], rbx
+ mov [rsp + gpr_save_mem_offset + 1*8], rbp
+ mov [rsp + gpr_save_mem_offset + 2*8], r12
+
+%ifidn __OUTPUT_FORMAT__, win64
+ mov [rsp + gpr_save_mem_offset + 3*8], rsi
+ mov [rsp + gpr_save_mem_offset + 4*8], rdi
+
+ MOVDQU [rsp + xmm_save_mem_offset + 0*8], xmm6
+ MOVDQU [rsp + xmm_save_mem_offset + 1*8], xmm7
+ MOVDQU [rsp + xmm_save_mem_offset + 2*8], xmm8
+ MOVDQU [rsp + xmm_save_mem_offset + 3*8], xmm9
+ MOVDQU [rsp + xmm_save_mem_offset + 4*8], xmm10
+ MOVDQU [rsp + xmm_save_mem_offset + 5*8], xmm11
+ MOVDQU [rsp + xmm_save_mem_offset + 6*8], xmm12
+ MOVDQU [rsp + xmm_save_mem_offset + 7*8], xmm13
+ MOVDQU [rsp + xmm_save_mem_offset + 8*8], xmm14
+ MOVDQU [rsp + xmm_save_mem_offset + 9*8], xmm15
+%endif
+
+%endm
+
+%macro FUNC_RESTORE 0
+ mov rbx, [rsp + gpr_save_mem_offset + 0*8]
+ mov rbp, [rsp + gpr_save_mem_offset + 1*8]
+ mov r12, [rsp + gpr_save_mem_offset + 2*8]
+
+%ifidn __OUTPUT_FORMAT__, win64
+ mov rsi, [rsp + gpr_save_mem_offset + 3*8]
+ mov rdi, [rsp + gpr_save_mem_offset + 4*8]
+
+ MOVDQU xmm6, [rsp + xmm_save_mem_offset + 0*8]
+ MOVDQU xmm7, [rsp + xmm_save_mem_offset + 1*8]
+ MOVDQU xmm8, [rsp + xmm_save_mem_offset + 2*8]
+ MOVDQU xmm9, [rsp + xmm_save_mem_offset + 3*8]
+ MOVDQU xmm10, [rsp + xmm_save_mem_offset + 4*8]
+ MOVDQU xmm11, [rsp + xmm_save_mem_offset + 5*8]
+ MOVDQU xmm12, [rsp + xmm_save_mem_offset + 6*8]
+ MOVDQU xmm13, [rsp + xmm_save_mem_offset + 7*8]
+ MOVDQU xmm14, [rsp + xmm_save_mem_offset + 8*8]
+ MOVDQU xmm15, [rsp + xmm_save_mem_offset + 9*8]
+%endif
+ add rsp, stack_size
+
+%endmacro
+
+default rel
+section .text
+
+global encode_deflate_icf_ %+ ARCH
+encode_deflate_icf_ %+ ARCH:
+ endbranch
+ FUNC_SAVE
+
+%ifnidn ptr, arg1
+ mov ptr, arg1
+%endif
+%ifnidn hufftables, arg4
+ mov hufftables, arg4
+%endif
+
+ mov [rsp + bitbuf_mem_offset], bitbuf
+ mov bits, [bitbuf + _m_bits]
+ mov ecx, [bitbuf + _m_bit_count]
+ mov end_ptr, [bitbuf + _m_out_end]
+ mov out_buf, [bitbuf + _m_out_buf] ; clobbers bitbuf
+
+ sub end_ptr, VECTOR_SLOP
+ sub in_buf_end, VECTOR_LOOP_PROCESSED
+ cmp ptr, in_buf_end
+ jge .finish
+
+ vpcmpeqq ytmp, ytmp, ytmp
+ vmovdqu datas, [ptr]
+ vpand syms, datas, [lit_mask]
+ vpgatherdd codes_lookup1, [hufftables + _lit_len_table + 4 * syms], ytmp
+
+ vpcmpeqq ytmp, ytmp, ytmp
+ vpsrld dsyms, datas, DIST_OFFSET
+ vpand dsyms, dsyms, [dist_mask]
+ vpgatherdd codes_lookup2, [hufftables + _dist_table + 4 * dsyms], ytmp
+
+ vmovq ybits %+ x, bits
+ vmovq ybits_count %+ x, rcx
+ vmovdqa yoffset_mask, [offset_mask]
+
+.main_loop:
+ ;; Sets codes1 to contain lit/len codes andcode_lens1 the corresponding lengths
+ vpsrld code_lens1, codes_lookup1, 24
+ vpand codes1, codes_lookup1, [lit_icr_mask]
+
+ ;; Sets codes2 to contain dist codes, code_lens2 the corresponding lengths,
+ ;; and code_lens3 the extra bit counts
+ vpblendw codes2, ybits, codes_lookup2, 0x55 ;Bits 8 and above of ybits are 0
+ vpsrld code_lens2, codes_lookup2, 24
+ vpsrld code_lens3, codes_lookup2, 16
+ vpand code_lens3, [eb_icr_mask]
+
+ ;; Set codes3 to contain the extra bits
+ vpsrld codes3, datas, EXTRA_BITS_OFFSET
+
+ cmp out_buf, end_ptr
+ ja .main_loop_exit
+
+ ;; Start code lookups for next iteration
+ add ptr, VECTOR_SIZE
+ vpcmpeqq ytmp, ytmp, ytmp
+ vmovdqu datas, [ptr]
+ vpand syms, datas, [lit_mask]
+ vpgatherdd codes_lookup1, [hufftables + _lit_len_table + 4 * syms], ytmp
+
+ vpcmpeqq ytmp, ytmp, ytmp
+ vpsrld dsyms, datas, DIST_OFFSET
+ vpand dsyms, dsyms, [dist_mask]
+ vpgatherdd codes_lookup2, [hufftables + _dist_table + 4 * dsyms], ytmp
+
+ ;; Merge dist code with extra bits
+ vpsllvd codes3, codes3, code_lens2
+ vpxor codes2, codes2, codes3
+ vpaddd code_lens2, code_lens2, code_lens3
+
+ ;; Check for long codes
+ vpaddd code_lens3, code_lens1, code_lens2
+ vpcmpgtd ytmp, code_lens3, [max_write_d]
+ vptest ytmp, ytmp
+ jnz .long_codes
+
+ ;; Merge dist and len codes
+ vpsllvd codes2, codes2, code_lens1
+ vpxor codes1, codes1, codes2
+
+ ;; Split buffer data into qwords, ytmp is 0 after last branch
+ vpblendd codes3, ytmp, codes1, 0x55
+ vpsrlq codes1, codes1, 32
+ vpsrlq code_lens1, code_lens3, 32
+ vpblendd code_lens3, ytmp, code_lens3, 0x55
+
+ ;; Merge bitbuf bits
+ vpsllvq codes3, codes3, ybits_count
+ vpxor codes3, codes3, ybits
+ vpaddq code_lens3, code_lens3, ybits_count
+
+ ;; Merge two symbols into qwords
+ vpsllvq codes1, codes1, code_lens3
+ vpxor codes1, codes1, codes3
+ vpaddq code_lens1, code_lens1, code_lens3
+
+ ;; Split buffer data into dqwords, ytmp is 0 after last branch
+ vpblendd codes2, ytmp, codes1, 0x33
+ vpblendd code_lens2, ytmp, code_lens1, 0x33
+ vpsrldq codes1, 8
+ vpsrldq code_lens1, 8
+
+ ;; Bit align dqwords
+ vpaddq code_lens1, code_lens1, code_lens2
+ vpand ybits_count, code_lens1, yoffset_mask ;Extra bits
+ vpermq ybits_count, ybits_count, 0xcf
+ vpaddq code_lens2, ybits_count
+ vpsllvq codes2, codes2, ybits_count
+
+ ;; Merge two qwords into dqwords
+ vmovdqa ytmp, [q_64]
+ vpsubq code_lens3, ytmp, code_lens2
+ vpsrlvq codes3, codes1, code_lens3
+ vpslldq codes3, codes3, 8
+
+ vpsllvq codes1, codes1, code_lens2
+
+ vpxor codes1, codes1, codes3
+ vpxor codes1, codes1, codes2
+
+ vmovq tmp, code_lens1 %+ x ;Number of bytes
+ shr tmp, 3
+
+ ;; Extract last bytes
+ vpaddq code_lens2, code_lens1, ybits_count
+ vpsrlq code_lens2, code_lens2, 3
+ vpshufb codes2, codes1, code_lens2
+ vpand codes2, codes2, [bytes_mask]
+ vextracti128 ybits %+ x, codes2, 1
+
+ ;; Check for short codes
+ vptest code_lens2, [min_write_mask]
+ jz .short_codes
+.short_codes_next:
+
+ vpermq codes2, codes2, 0x45
+ vpor codes1, codes1, codes2
+
+ ;; bit shift upper dqword combined bits to line up with lower dqword
+ vextracti128 code_lens2 %+ x, code_lens1, 1
+
+ ; Write out lower dqword of combined bits
+ vmovdqu [out_buf], codes1
+ vpaddq code_lens1, code_lens1, code_lens2
+
+ vmovq tmp2, code_lens1 %+ x ;Number of bytes
+ shr tmp2, 3
+ vpand ybits_count, code_lens1, yoffset_mask ;Extra bits
+
+ ; Write out upper dqword of combined bits
+ vextracti128 [out_buf + tmp], codes1, 1
+ add out_buf, tmp2
+
+ cmp ptr, in_buf_end
+ jbe .main_loop
+
+.main_loop_exit:
+ vmovq rcx, ybits_count %+ x
+ vmovq bits, ybits %+ x
+ jmp .finish
+
+.short_codes:
+ ;; Merge last bytes when the second dqword contains less than a byte
+ vpor ybits %+ x, codes2 %+ x
+ jmp .short_codes_next
+
+.long_codes:
+ add end_ptr, VECTOR_SLOP
+ sub ptr, VECTOR_SIZE
+
+ vpxor ytmp, ytmp, ytmp
+ vpblendd codes3, ytmp, codes1, 0x55
+ vpblendd code_lens3, ytmp, code_lens1, 0x55
+ vpblendd codes4, ytmp, codes2, 0x55
+
+ vpsllvq codes4, codes4, code_lens3
+ vpxor codes3, codes3, codes4
+ vpaddd code_lens3, code_lens1, code_lens2
+
+ vpsrlq codes1, codes1, 32
+ vpsrlq code_lens1, code_lens1, 32
+ vpsrlq codes2, codes2, 32
+
+ vpsllvq codes2, codes2, code_lens1
+ vpxor codes1, codes1, codes2
+
+ vpsrlq code_lens1, code_lens3, 32
+ vpblendd code_lens3, ytmp, code_lens3, 0x55
+
+ ;; Merge bitbuf bits
+ vpsllvq codes3, codes3, ybits_count
+ vpxor codes3, codes3, ybits
+ vpaddq code_lens3, code_lens3, ybits_count
+ vpaddq code_lens1, code_lens1, code_lens3
+
+ xor bits, bits
+ xor rcx, rcx
+ vpsubq code_lens1, code_lens1, code_lens3
+%rep 2
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+ cmp out_buf, end_ptr
+ ja .overflow
+ ;; insert LL code
+ vmovq sym, codes3 %+ x
+ vmovq tmp2, code_lens3 %+ x
+ SHLX sym, sym, rcx
+ or bits, sym
+ add rcx, tmp2
+
+ ; empty bits
+ mov [out_buf], bits
+ mov tmp, rcx
+ shr tmp, 3 ; byte count
+ add out_buf, tmp
+ mov tmp, rcx
+ and rcx, ~7
+ SHRX bits, bits, rcx
+ mov rcx, tmp
+ and rcx, 7
+ add ptr, 4
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+ cmp out_buf, end_ptr
+ ja .overflow
+ ;; insert LL code
+ vmovq sym, codes1 %+ x
+ vmovq tmp2, code_lens1 %+ x
+ SHLX sym, sym, rcx
+ or bits, sym
+ add rcx, tmp2
+
+ ; empty bits
+ mov [out_buf], bits
+ mov tmp, rcx
+ shr tmp, 3 ; byte count
+ add out_buf, tmp
+ mov tmp, rcx
+ and rcx, ~7
+ SHRX bits, bits, rcx
+ mov rcx, tmp
+ and rcx, 7
+ add ptr, 4
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+ cmp out_buf, end_ptr
+ ja .overflow
+ ;; insert LL code
+ vpextrq sym, codes3 %+ x, 1
+ vpextrq tmp2, code_lens3 %+ x, 1
+ SHLX sym, sym, rcx
+ or bits, sym
+ add rcx, tmp2
+
+ ; empty bits
+ mov [out_buf], bits
+ mov tmp, rcx
+ shr tmp, 3 ; byte count
+ add out_buf, tmp
+ mov tmp, rcx
+ and rcx, ~7
+ SHRX bits, bits, rcx
+ mov rcx, tmp
+ and rcx, 7
+ add ptr, 4
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+ cmp out_buf, end_ptr
+ ja .overflow
+ ;; insert LL code
+ vpextrq sym, codes1 %+ x, 1
+ vpextrq tmp2, code_lens1 %+ x, 1
+ SHLX sym, sym, rcx
+ or bits, sym
+ add rcx, tmp2
+
+ ; empty bits
+ mov [out_buf], bits
+ mov tmp, rcx
+ shr tmp, 3 ; byte count
+ add out_buf, tmp
+ mov tmp, rcx
+ and rcx, ~7
+ SHRX bits, bits, rcx
+ mov rcx, tmp
+ and rcx, 7
+ add ptr, 4
+
+ vextracti128 codes3 %+ x, codes3, 1
+ vextracti128 code_lens3 %+ x, code_lens3, 1
+ vextracti128 codes1 %+ x, codes1, 1
+ vextracti128 code_lens1 %+ x, code_lens1, 1
+%endrep
+ sub end_ptr, VECTOR_SLOP
+
+ vmovq ybits %+ x, bits
+ vmovq ybits_count %+ x, rcx
+ cmp ptr, in_buf_end
+ jbe .main_loop
+
+.finish:
+ add in_buf_end, VECTOR_LOOP_PROCESSED
+ add end_ptr, VECTOR_SLOP
+
+ cmp ptr, in_buf_end
+ jge .overflow
+
+.finish_loop:
+ mov DWORD(data), [ptr]
+
+ cmp out_buf, end_ptr
+ ja .overflow
+
+ mov sym, data
+ and sym, LIT_MASK ; sym has ll_code
+ mov DWORD(sym), [hufftables + _lit_len_table + sym * 4]
+
+ ; look up dist sym
+ mov dsym, data
+ shr dsym, DIST_OFFSET
+ and dsym, DIST_MASK
+ mov DWORD(dsym), [hufftables + _dist_table + dsym * 4]
+
+ ; insert LL code
+ ; sym: 31:24 length; 23:0 code
+ mov tmp2, sym
+ and sym, 0xFFFFFF
+ SHLX sym, sym, rcx
+ shr tmp2, 24
+ or bits, sym
+ add rcx, tmp2
+
+ ; insert dist code
+ movzx tmp, WORD(dsym)
+ SHLX tmp, tmp, rcx
+ or bits, tmp
+ mov tmp, dsym
+ shr tmp, 24
+ add rcx, tmp
+
+ ; insert dist extra bits
+ shr data, EXTRA_BITS_OFFSET
+ add ptr, 4
+ SHLX data, data, rcx
+ or bits, data
+ shr dsym, 16
+ and dsym, 0xFF
+ add rcx, dsym
+
+ ; empty bits
+ mov [out_buf], bits
+ mov tmp, rcx
+ shr tmp, 3 ; byte count
+ add out_buf, tmp
+ mov tmp, rcx
+ and rcx, ~7
+ SHRX bits, bits, rcx
+ mov rcx, tmp
+ and rcx, 7
+
+ cmp ptr, in_buf_end
+ jb .finish_loop
+
+.overflow:
+ mov tmp, [rsp + bitbuf_mem_offset]
+ mov [tmp + _m_bits], bits
+ mov [tmp + _m_bit_count], ecx
+ mov [tmp + _m_out_buf], out_buf
+
+ mov rax, ptr
+
+ FUNC_RESTORE
+
+ ret
+
+section .data
+ align 32
+max_write_d:
+ dd 0x1c, 0x1d, 0x1f, 0x20, 0x1c, 0x1d, 0x1f, 0x20
+min_write_mask:
+ dq 0x00, 0x00, 0xff, 0x00
+offset_mask:
+ dq 0x0000000000000007, 0x0000000000000000
+ dq 0x0000000000000000, 0x0000000000000000
+q_64:
+ dq 0x0000000000000040, 0x0000000000000000
+ dq 0x0000000000000040, 0x0000000000000000
+lit_mask:
+ dd LIT_MASK, LIT_MASK, LIT_MASK, LIT_MASK
+ dd LIT_MASK, LIT_MASK, LIT_MASK, LIT_MASK
+dist_mask:
+ dd DIST_MASK, DIST_MASK, DIST_MASK, DIST_MASK
+ dd DIST_MASK, DIST_MASK, DIST_MASK, DIST_MASK
+lit_icr_mask:
+ dd 0x00FFFFFF, 0x00FFFFFF, 0x00FFFFFF, 0x00FFFFFF
+ dd 0x00FFFFFF, 0x00FFFFFF, 0x00FFFFFF, 0x00FFFFFF
+eb_icr_mask:
+ dd 0x000000FF, 0x000000FF, 0x000000FF, 0x000000FF
+ dd 0x000000FF, 0x000000FF, 0x000000FF, 0x000000FF
+bytes_mask:
+ dq 0x00000000000000ff, 0x0000000000000000
+ dq 0x00000000000000ff, 0x0000000000000000
diff --git a/src/isa-l/igzip/encode_df_06.asm b/src/isa-l/igzip/encode_df_06.asm
new file mode 100644
index 000000000..9e747954a
--- /dev/null
+++ b/src/isa-l/igzip/encode_df_06.asm
@@ -0,0 +1,624 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright(c) 2011-2018 Intel Corporation All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in
+; the documentation and/or other materials provided with the
+; distribution.
+; * Neither the name of Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived
+; from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+%include "reg_sizes.asm"
+%include "lz0a_const.asm"
+%include "data_struct2.asm"
+%include "stdmac.asm"
+
+%ifdef HAVE_AS_KNOWS_AVX512
+
+%define ARCH 06
+%define USE_HSWNI
+
+; tree entry is 4 bytes:
+; lit/len tree (513 entries)
+; | 3 | 2 | 1 | 0 |
+; | len | code |
+;
+; dist tree
+; | 3 | 2 | 1 | 0 |
+; |eblen:codlen| code |
+
+; token format:
+; DIST_OFFSET:0 : lit/len
+; 31:(DIST_OFFSET + 5) : dist Extra Bits
+; (DIST_OFFSET + 5):DIST_OFFSET : dist code
+; lit/len: 0-256 (literal)
+; 257-512 (dist + 254)
+
+; returns final token pointer
+; equal to token_end if successful
+; uint32_t* encode_df(uint32_t *token_start, uint32_t *token_end,
+; BitBuf *out_buf, uint32_t *trees);
+
+%ifidn __OUTPUT_FORMAT__, win64
+%define arg1 rcx
+%define arg2 rdx
+%define arg3 r8
+%define arg4 r9
+%define sym rsi
+%define dsym rdi
+%define hufftables r9
+%define ptr r11
+%else
+; Linux
+%define arg1 rdi
+%define arg2 rsi
+%define arg3 rdx
+%define arg4 rcx
+%define sym r9
+%define dsym r8
+%define hufftables r11
+%define ptr rdi
+%endif
+
+%define in_buf_end arg2
+%define bitbuf arg3
+%define out_buf bitbuf
+; bit_count is rcx
+%define bits rax
+%define data r12
+%define tmp rbx
+%define len dsym
+%define tmp2 r10
+%define end_ptr rbp
+
+%define LIT_MASK ((0x1 << LIT_LEN_BIT_COUNT) - 1)
+%define DIST_MASK ((0x1 << DIST_LIT_BIT_COUNT) - 1)
+
+%define codes1 zmm1
+%define code_lens1 zmm2
+%define codes2 zmm3
+%define code_lens2 zmm4
+%define codes3 zmm5
+%define ztmp zmm5
+%define code_lens3 zmm6
+%define codes4 zmm7
+%define syms zmm7
+
+%define code_lens4 zmm8
+%define dsyms zmm8
+%define zbits_count_q zmm8
+
+%define codes_lookup1 zmm9
+%define codes_lookup2 zmm10
+%define datas zmm11
+%define zbits zmm12
+%define zbits_count zmm13
+%define zoffset_mask zmm14
+%define znotoffset_mask zmm23
+
+%define zq_64 zmm15
+%define zlit_mask zmm16
+%define zdist_mask zmm17
+%define zlit_icr_mask zmm18
+%define zeb_icr_mask zmm19
+%define zmax_write zmm20
+%define zrot_perm zmm21
+%define zq_8 zmm22
+
+%define VECTOR_SIZE 0x40
+%define VECTOR_LOOP_PROCESSED (2 * VECTOR_SIZE)
+%define VECTOR_SLOP 0x40 - 8
+
+gpr_save_mem_offset equ 0
+gpr_save_mem_size equ 8 * 6
+xmm_save_mem_offset equ gpr_save_mem_offset + gpr_save_mem_size
+xmm_save_mem_size equ 10 * 16
+bitbuf_mem_offset equ xmm_save_mem_offset + xmm_save_mem_size
+bitbuf_mem_size equ 8
+stack_size equ gpr_save_mem_size + xmm_save_mem_size + bitbuf_mem_size
+
+
+%macro FUNC_SAVE 0
+ sub rsp, stack_size
+ mov [rsp + gpr_save_mem_offset + 0*8], rbx
+ mov [rsp + gpr_save_mem_offset + 1*8], rbp
+ mov [rsp + gpr_save_mem_offset + 2*8], r12
+
+%ifidn __OUTPUT_FORMAT__, win64
+ mov [rsp + gpr_save_mem_offset + 3*8], rsi
+ mov [rsp + gpr_save_mem_offset + 4*8], rdi
+
+ MOVDQU [rsp + xmm_save_mem_offset + 0*8], xmm6
+ MOVDQU [rsp + xmm_save_mem_offset + 1*8], xmm7
+ MOVDQU [rsp + xmm_save_mem_offset + 2*8], xmm8
+ MOVDQU [rsp + xmm_save_mem_offset + 3*8], xmm9
+ MOVDQU [rsp + xmm_save_mem_offset + 4*8], xmm10
+ MOVDQU [rsp + xmm_save_mem_offset + 5*8], xmm11
+ MOVDQU [rsp + xmm_save_mem_offset + 6*8], xmm12
+ MOVDQU [rsp + xmm_save_mem_offset + 7*8], xmm13
+ MOVDQU [rsp + xmm_save_mem_offset + 8*8], xmm14
+ MOVDQU [rsp + xmm_save_mem_offset + 9*8], xmm15
+%endif
+
+%endm
+
+%macro FUNC_RESTORE 0
+ mov rbx, [rsp + gpr_save_mem_offset + 0*8]
+ mov rbp, [rsp + gpr_save_mem_offset + 1*8]
+ mov r12, [rsp + gpr_save_mem_offset + 2*8]
+
+%ifidn __OUTPUT_FORMAT__, win64
+ mov rsi, [rsp + gpr_save_mem_offset + 3*8]
+ mov rdi, [rsp + gpr_save_mem_offset + 4*8]
+
+ MOVDQU xmm6, [rsp + xmm_save_mem_offset + 0*8]
+ MOVDQU xmm7, [rsp + xmm_save_mem_offset + 1*8]
+ MOVDQU xmm8, [rsp + xmm_save_mem_offset + 2*8]
+ MOVDQU xmm9, [rsp + xmm_save_mem_offset + 3*8]
+ MOVDQU xmm10, [rsp + xmm_save_mem_offset + 4*8]
+ MOVDQU xmm11, [rsp + xmm_save_mem_offset + 5*8]
+ MOVDQU xmm12, [rsp + xmm_save_mem_offset + 6*8]
+ MOVDQU xmm13, [rsp + xmm_save_mem_offset + 7*8]
+ MOVDQU xmm14, [rsp + xmm_save_mem_offset + 8*8]
+ MOVDQU xmm15, [rsp + xmm_save_mem_offset + 9*8]
+%endif
+ add rsp, stack_size
+
+%endmacro
+
+default rel
+section .text
+
+global encode_deflate_icf_ %+ ARCH
+encode_deflate_icf_ %+ ARCH:
+ endbranch
+ FUNC_SAVE
+
+%ifnidn ptr, arg1
+ mov ptr, arg1
+%endif
+%ifnidn hufftables, arg4
+ mov hufftables, arg4
+%endif
+
+ mov [rsp + bitbuf_mem_offset], bitbuf
+ mov bits, [bitbuf + _m_bits]
+ mov ecx, [bitbuf + _m_bit_count]
+ mov end_ptr, [bitbuf + _m_out_end]
+ mov out_buf, [bitbuf + _m_out_buf] ; clobbers bitbuf
+
+ sub end_ptr, VECTOR_SLOP
+ sub in_buf_end, VECTOR_LOOP_PROCESSED
+ cmp ptr, in_buf_end
+ jge .finish
+
+ kxorq k0, k0, k0
+ kmovq k1, [k_mask_1]
+ kmovq k2, [k_mask_2]
+ kmovq k3, [k_mask_3]
+ kmovq k4, [k_mask_4]
+ kmovq k5, [k_mask_5]
+
+ vmovdqa64 zrot_perm, [rot_perm]
+
+ vbroadcasti64x2 zq_64, [q_64]
+ vbroadcasti64x2 zq_8, [q_8]
+
+ vpbroadcastq zoffset_mask, [offset_mask]
+ vpternlogd znotoffset_mask, znotoffset_mask, zoffset_mask, 0x55
+
+ vpbroadcastd zlit_mask, [lit_mask]
+ vpbroadcastd zdist_mask, [dist_mask]
+ vpbroadcastd zlit_icr_mask, [lit_icr_mask]
+ vpbroadcastd zeb_icr_mask, [eb_icr_mask]
+ vpbroadcastd zmax_write, [max_write_d]
+
+ knotq k6, k0
+ vmovdqu64 datas, [ptr]
+ vpandd syms, datas, zlit_mask
+ vpgatherdd codes_lookup1 {k6}, [hufftables + _lit_len_table + 4 * syms]
+
+ knotq k7, k0
+ vpsrld dsyms, datas, DIST_OFFSET
+ vpandd dsyms, dsyms, zdist_mask
+ vpgatherdd codes_lookup2 {k7}, [hufftables + _dist_table + 4 * dsyms]
+
+ vmovq zbits %+ x, bits
+ vmovq zbits_count %+ x, rcx
+
+.main_loop:
+ ;; Sets codes1 to contain lit/len codes andcode_lens1 the corresponding lengths
+ vpsrld code_lens1, codes_lookup1, 24
+ vpandd codes1, codes_lookup1, zlit_icr_mask
+
+ ;; Sets codes2 to contain dist codes, code_lens2 the corresponding lengths,
+ ;; and code_lens3 the extra bit counts
+ vmovdqu16 codes2 {k1}{z}, codes_lookup2 ;Bits 8 and above of zbits are 0
+ vpsrld code_lens2, codes_lookup2, 24
+ vpsrld code_lens3, codes_lookup2, 16
+ vpandd code_lens3, code_lens3, zeb_icr_mask
+
+ ;; Set codes3 to contain the extra bits
+ vpsrld codes3, datas, EXTRA_BITS_OFFSET
+
+ cmp out_buf, end_ptr
+ ja .main_loop_exit
+
+ ;; Start code lookups for next iteration
+ knotq k6, k0
+ add ptr, VECTOR_SIZE
+ vmovdqu64 datas, [ptr]
+ vpandd syms, datas, zlit_mask
+ vpgatherdd codes_lookup1 {k6}, [hufftables + _lit_len_table + 4 * syms]
+
+ knotq k7, k0
+ vpsrld dsyms, datas, DIST_OFFSET
+ vpandd dsyms, dsyms, zdist_mask
+ vpgatherdd codes_lookup2 {k7}, [hufftables + _dist_table + 4 * dsyms]
+
+ ;; Merge dist code with extra bits
+ vpsllvd codes3, codes3, code_lens2
+ vpxord codes2, codes2, codes3
+ vpaddd code_lens2, code_lens2, code_lens3
+
+ ;; Check for long codes
+ vpaddd code_lens3, code_lens1, code_lens2
+ vpcmpgtd k6, code_lens3, zmax_write
+ ktestd k6, k6
+ jnz .long_codes
+
+ ;; Merge dist and len codes
+ vpsllvd codes2, codes2, code_lens1
+ vpxord codes1, codes1, codes2
+
+ vmovdqa32 codes3 {k1}{z}, codes1
+ vpsrlq codes1, codes1, 32
+ vpsrlq code_lens1, code_lens3, 32
+ vmovdqa32 code_lens3 {k1}{z}, code_lens3
+
+ ;; Merge bitbuf bits
+ vpsllvq codes3, codes3, zbits_count
+ vpxord codes3, codes3, zbits
+ vpaddq code_lens3, code_lens3, zbits_count
+
+ ;; Merge two symbols into qwords
+ vpsllvq codes1, codes1, code_lens3
+ vpxord codes1, codes1, codes3
+ vpaddq code_lens1, code_lens1, code_lens3
+
+ ;; Determine total bits at end of each qword
+ vpermq zbits_count {k5}{z}, zrot_perm, code_lens1
+ vpaddq code_lens2, zbits_count, code_lens1
+ vshufi64x2 zbits_count {k3}{z}, code_lens2, code_lens2, 0x90
+ vpaddq code_lens2, code_lens2, zbits_count
+ vshufi64x2 zbits_count {k2}{z}, code_lens2, code_lens2, 0x40
+ vpaddq code_lens2, code_lens2, zbits_count
+
+ ;; Bit align quadwords
+ vpandd zbits_count, code_lens2, zoffset_mask
+ vpermq zbits_count_q {k5}{z}, zrot_perm, zbits_count
+ vpsllvq codes1, codes1, zbits_count_q
+
+ ;; Check whether any of the last bytes overlap
+ vpcmpq k6 {k5}, code_lens1, zbits_count, 1
+
+ ;; Get last byte in each qword
+ vpsrlq code_lens2, code_lens2, 3
+ vpaddq code_lens1, code_lens1, zbits_count_q
+ vpandq code_lens1, code_lens1, znotoffset_mask
+ vpsrlvq codes3, codes1, code_lens1
+
+ ;; Branch to handle overlapping last bytes
+ ktestd k6, k6
+ jnz .small_codes
+
+.small_codes_next:
+ ;; Save off zbits and zbits_count for next loop
+ knotq k7, k5
+ vpermq zbits {k7}{z}, zrot_perm, codes3
+ vpermq zbits_count {k7}{z}, zrot_perm, zbits_count
+
+ ;; Merge last byte in each qword with the next qword
+ vpermq codes3 {k5}{z}, zrot_perm, codes3
+ vpxord codes1, codes1, codes3
+
+ ;; Determine total bytes written
+ vextracti64x2 code_lens1 %+ x, code_lens2, 3
+ vpextrq tmp2, code_lens1 %+ x, 1
+
+ ;; Write out qwords
+ knotq k6, k0
+ vpermq code_lens2 {k5}{z}, zrot_perm, code_lens2
+ vpscatterqq [out_buf + code_lens2] {k6}, codes1
+
+ add out_buf, tmp2
+
+ cmp ptr, in_buf_end
+ jbe .main_loop
+
+.main_loop_exit:
+ vmovq rcx, zbits_count %+ x
+ vmovq bits, zbits %+ x
+ jmp .finish
+
+.small_codes:
+ ;; Merge overlapping last bytes
+ vpermq codes4 {k6}{z}, zrot_perm, codes3
+ vporq codes3, codes3, codes4
+ kshiftlq k7, k6, 1
+ ktestd k6, k7
+ jz .small_codes_next
+
+ kandq k6, k6, k7
+ jmp .small_codes
+
+.long_codes:
+ add end_ptr, VECTOR_SLOP
+ sub ptr, VECTOR_SIZE
+
+ vmovdqa32 codes3 {k1}{z}, codes1
+ vmovdqa32 code_lens3 {k1}{z}, code_lens1
+ vmovdqa32 codes4 {k1}{z}, codes2
+
+ vpsllvq codes4, codes4, code_lens3
+ vpxord codes3, codes3, codes4
+ vpaddd code_lens3, code_lens1, code_lens2
+
+ vpsrlq codes1, codes1, 32
+ vpsrlq code_lens1, code_lens1, 32
+ vpsrlq codes2, codes2, 32
+
+ vpsllvq codes2, codes2, code_lens1
+ vpxord codes1, codes1, codes2
+
+ vpsrlq code_lens1, code_lens3, 32
+ vmovdqa32 code_lens3 {k1}{z}, code_lens3
+
+ ;; Merge bitbuf bits
+ vpsllvq codes3, codes3, zbits_count
+ vpxord codes3, codes3, zbits
+ vpaddq code_lens3, code_lens3, zbits_count
+ vpaddq code_lens1, code_lens1, code_lens3
+
+ xor bits, bits
+ xor rcx, rcx
+ vpsubq code_lens1, code_lens1, code_lens3
+
+ vmovdqu64 codes2, codes1
+ vmovdqu64 code_lens2, code_lens1
+ vmovdqu64 codes4, codes3
+ vmovdqu64 code_lens4, code_lens3
+%assign i 0
+%rep 4
+%assign i (i + 1)
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+ cmp out_buf, end_ptr
+ ja .overflow
+ ;; insert LL code
+ vmovq sym, codes3 %+ x
+ vmovq tmp2, code_lens3 %+ x
+ SHLX sym, sym, rcx
+ or bits, sym
+ add rcx, tmp2
+
+ ; empty bits
+ mov [out_buf], bits
+ mov tmp, rcx
+ shr tmp, 3 ; byte count
+ add out_buf, tmp
+ mov tmp, rcx
+ and rcx, ~7
+ SHRX bits, bits, rcx
+ mov rcx, tmp
+ and rcx, 7
+ add ptr, 4
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+ cmp out_buf, end_ptr
+ ja .overflow
+ ;; insert LL code
+ vmovq sym, codes1 %+ x
+ vmovq tmp2, code_lens1 %+ x
+ SHLX sym, sym, rcx
+ or bits, sym
+ add rcx, tmp2
+
+ ; empty bits
+ mov [out_buf], bits
+ mov tmp, rcx
+ shr tmp, 3 ; byte count
+ add out_buf, tmp
+ mov tmp, rcx
+ and rcx, ~7
+ SHRX bits, bits, rcx
+ mov rcx, tmp
+ and rcx, 7
+ add ptr, 4
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+ cmp out_buf, end_ptr
+ ja .overflow
+ ;; insert LL code
+ vpextrq sym, codes3 %+ x, 1
+ vpextrq tmp2, code_lens3 %+ x, 1
+ SHLX sym, sym, rcx
+ or bits, sym
+ add rcx, tmp2
+
+ ; empty bits
+ mov [out_buf], bits
+ mov tmp, rcx
+ shr tmp, 3 ; byte count
+ add out_buf, tmp
+ mov tmp, rcx
+ and rcx, ~7
+ SHRX bits, bits, rcx
+ mov rcx, tmp
+ and rcx, 7
+ add ptr, 4
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+ cmp out_buf, end_ptr
+ ja .overflow
+ ;; insert LL code
+ vpextrq sym, codes1 %+ x, 1
+ vpextrq tmp2, code_lens1 %+ x, 1
+ SHLX sym, sym, rcx
+ or bits, sym
+ add rcx, tmp2
+
+ ; empty bits
+ mov [out_buf], bits
+ mov tmp, rcx
+ shr tmp, 3 ; byte count
+ add out_buf, tmp
+ mov tmp, rcx
+ and rcx, ~7
+ SHRX bits, bits, rcx
+ mov rcx, tmp
+ and rcx, 7
+ add ptr, 4
+
+ vextracti32x4 codes3 %+ x, codes4, i
+ vextracti32x4 code_lens3 %+ x, code_lens4, i
+ vextracti32x4 codes1 %+ x, codes2, i
+ vextracti32x4 code_lens1 %+ x, code_lens2, i
+%endrep
+ sub end_ptr, VECTOR_SLOP
+
+ vmovq zbits %+ x, bits
+ vmovq zbits_count %+ x, rcx
+ cmp ptr, in_buf_end
+ jbe .main_loop
+
+.finish:
+ add in_buf_end, VECTOR_LOOP_PROCESSED
+ add end_ptr, VECTOR_SLOP
+
+ cmp ptr, in_buf_end
+ jge .overflow
+
+.finish_loop:
+ mov DWORD(data), [ptr]
+
+ cmp out_buf, end_ptr
+ ja .overflow
+
+ mov sym, data
+ and sym, LIT_MASK ; sym has ll_code
+ mov DWORD(sym), [hufftables + _lit_len_table + sym * 4]
+
+ ; look up dist sym
+ mov dsym, data
+ shr dsym, DIST_OFFSET
+ and dsym, DIST_MASK
+ mov DWORD(dsym), [hufftables + _dist_table + dsym * 4]
+
+ ; insert LL code
+ ; sym: 31:24 length; 23:0 code
+ mov tmp2, sym
+ and sym, 0xFFFFFF
+ SHLX sym, sym, rcx
+ shr tmp2, 24
+ or bits, sym
+ add rcx, tmp2
+
+ ; insert dist code
+ movzx tmp, WORD(dsym)
+ SHLX tmp, tmp, rcx
+ or bits, tmp
+ mov tmp, dsym
+ shr tmp, 24
+ add rcx, tmp
+
+ ; insert dist extra bits
+ shr data, EXTRA_BITS_OFFSET
+ add ptr, 4
+ SHLX data, data, rcx
+ or bits, data
+ shr dsym, 16
+ and dsym, 0xFF
+ add rcx, dsym
+
+ ; empty bits
+ mov [out_buf], bits
+ mov tmp, rcx
+ shr tmp, 3 ; byte count
+ add out_buf, tmp
+ mov tmp, rcx
+ and rcx, ~7
+ SHRX bits, bits, rcx
+ mov rcx, tmp
+ and rcx, 7
+
+ cmp ptr, in_buf_end
+ jb .finish_loop
+
+.overflow:
+ mov tmp, [rsp + bitbuf_mem_offset]
+ mov [tmp + _m_bits], bits
+ mov [tmp + _m_bit_count], ecx
+ mov [tmp + _m_out_buf], out_buf
+
+ mov rax, ptr
+
+ FUNC_RESTORE
+
+ ret
+
+section .data
+ align 64
+;; 64 byte data
+rot_perm:
+ dq 0x00000007, 0x00000000, 0x00000001, 0x00000002
+ dq 0x00000003, 0x00000004, 0x00000005, 0x00000006
+
+;; 16 byte data
+q_64:
+ dq 0x0000000000000040, 0x0000000000000000
+q_8 :
+ dq 0x0000000000000000, 0x0000000000000008
+
+;; 8 byte data
+offset_mask:
+ dq 0x0000000000000007
+
+;; 4 byte data
+max_write_d:
+ dd 0x1c
+lit_mask:
+ dd LIT_MASK
+dist_mask:
+ dd DIST_MASK
+lit_icr_mask:
+ dd 0x00ffffff
+eb_icr_mask:
+ dd 0x000000ff
+
+;; k mask constants
+k_mask_1: dq 0x55555555
+k_mask_2: dq 0xfffffff0
+k_mask_3: dq 0xfffffffc
+k_mask_4: dw 0x0101, 0x0101, 0x0101, 0x0101
+k_mask_5: dq 0xfffffffe
+
+%endif
diff --git a/src/isa-l/igzip/flatten_ll.c b/src/isa-l/igzip/flatten_ll.c
new file mode 100644
index 000000000..1eb13b559
--- /dev/null
+++ b/src/isa-l/igzip/flatten_ll.c
@@ -0,0 +1,41 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+
+#include "flatten_ll.h"
+
+void flatten_ll(uint32_t * ll_hist)
+{
+ uint32_t i, j;
+ uint32_t *s = ll_hist, x, *p;
+
+ s[265] += s[266];
+ s[266] = s[267] + s[268];
+ s[267] = s[269] + s[270];
+ s[268] = s[271] + s[272];
+ s[269] = s[273] + s[274] + s[275] + s[276];
+ s[270] = s[277] + s[278] + s[279] + s[280];
+ s[271] = s[281] + s[282] + s[283] + s[284];
+ s[272] = s[285] + s[286] + s[287] + s[288];
+ p = s + 289;
+ for (i = 273; i < 277; i++) {
+ x = *(p++);
+ for (j = 1; j < 8; j++)
+ x += *(p++);
+ s[i] = x;
+ }
+ for (; i < 281; i++) {
+ x = *(p++);
+ for (j = 1; j < 16; j++)
+ x += *(p++);
+ s[i] = x;
+ }
+ for (; i < 285; i++) {
+ x = *(p++);
+ for (j = 1; j < 32; j++)
+ x += *(p++);
+ s[i] = x;
+ }
+ s[284] -= s[512];
+ s[285] = s[512];
+}
diff --git a/src/isa-l/igzip/flatten_ll.h b/src/isa-l/igzip/flatten_ll.h
new file mode 100644
index 000000000..9aaf89106
--- /dev/null
+++ b/src/isa-l/igzip/flatten_ll.h
@@ -0,0 +1,3 @@
+#include <stdint.h>
+
+void flatten_ll(uint32_t *ll_hist);
diff --git a/src/isa-l/igzip/generate_custom_hufftables.c b/src/isa-l/igzip/generate_custom_hufftables.c
new file mode 100644
index 000000000..449f70983
--- /dev/null
+++ b/src/isa-l/igzip/generate_custom_hufftables.c
@@ -0,0 +1,480 @@
+/**********************************************************************
+ Copyright(c) 2011-2016 Intel Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+/* This program can be used to generate custom a custom huffman encoding to get
+ * better data compression. This is most useful when the type of data being
+ * compressed is well known.
+ *
+ * To use generate_custom_hufftables, pass a sequence of files to the program
+ * that together form an accurate representation of the data that is being
+ * compressed. Generate_custom_hufftables will then produce the file
+ * hufftables_c.c, which should be moved to replace its counterpart in the igzip
+ * source folder. After recompiling the Isa-l library, the igzip compression
+ * functions will use the new hufftables.
+ *
+ * Generate_custom_hufftables should be compiled with the same compile time
+ * parameters as the igzip source code. Generating custom hufftables with
+ * different compile time parameters may cause igzip to produce invalid output
+ * for the reasons described below. The default parameters used by
+ * generate_custom_hufftables are the same as the default parameters used by
+ * igzip.
+ *
+ * *WARNING* generate custom hufftables must be compiled with a IGZIP_HIST_SIZE
+ * that is at least as large as the IGZIP_HIST_SIZE used by igzip. By default
+ * IGZIP_HIST_SIZE is 32K, the maximum usable IGZIP_HIST_SIZE is 32K. The reason
+ * for this is to generate better compression. Igzip cannot produce look back
+ * distances with sizes larger than the IGZIP_HIST_SIZE igzip was compiled with,
+ * so look back distances with sizes larger than IGZIP_HIST_SIZE are not
+ * assigned a huffman code. The definition of LONGER_HUFFTABLES must be
+ * consistent as well since that definition changes the size of the structures
+ * printed by this tool.
+ *
+ */
+
+#include <stdint.h>
+#include <stdio.h>
+#include <inttypes.h>
+#include <string.h>
+#include <stdlib.h>
+#include "igzip_lib.h"
+
+#include "huff_codes.h"
+#include "huffman.h"
+
+/*These max code lengths are limited by how the data is stored in
+ * hufftables.asm. The deflate standard max is 15.*/
+
+#define MAX_HEADER_SIZE ISAL_DEF_MAX_HDR_SIZE
+
+#define GZIP_HEADER_SIZE 10
+#define GZIP_TRAILER_SIZE 8
+#define ZLIB_HEADER_SIZE 2
+#define ZLIB_TRAILER_SIZE 4
+
+/**
+ * @brief Prints a table of uint8_t elements to a file.
+ * @param outfile: the file the table is printed to.
+ * @param table: the table to be printed.
+ * @param length: number of elements to be printed.
+ * @param header: header to append in front of the table.
+ * @param footer: footer to append at the end of the table.
+ * @param begin_line: string printed at beginning of new line
+ */
+void fprint_uint8_table(FILE * outfile, uint8_t * table, uint64_t length, char *header,
+ char *footer, char *begin_line)
+{
+ int i;
+ fprintf(outfile, "%s", header);
+ for (i = 0; i < length - 1; i++) {
+ if ((i & 7) == 0)
+ fprintf(outfile, "\n%s", begin_line);
+ else
+ fprintf(outfile, " ");
+ fprintf(outfile, "0x%02x,", table[i]);
+ }
+
+ if ((i & 7) == 0)
+ fprintf(outfile, "\n%s", begin_line);
+ else
+ fprintf(outfile, " ");
+ fprintf(outfile, "0x%02x", table[i]);
+ fprintf(outfile, "%s", footer);
+
+}
+
+/**
+ * @brief Prints a table of uint16_t elements to a file.
+ * @param outfile: the file the table is printed to.
+ * @param table: the table to be printed.
+ * @param length: number of elements to be printed.
+ * @param header: header to append in front of the table.
+ * @param footer: footer to append at the end of the table.
+ * @param begin_line: string printed at beginning of new line
+ */
+void fprint_uint16_table(FILE * outfile, uint16_t * table, uint64_t length, char *header,
+ char *footer, char *begin_line)
+{
+ int i;
+ fprintf(outfile, "%s", header);
+ for (i = 0; i < length - 1; i++) {
+ if ((i & 7) == 0)
+ fprintf(outfile, "\n%s", begin_line);
+ else
+ fprintf(outfile, " ");
+ fprintf(outfile, "0x%04x,", table[i]);
+ }
+
+ if ((i & 7) == 0)
+ fprintf(outfile, "\n%s", begin_line);
+ else
+ fprintf(outfile, " ");
+ fprintf(outfile, "0x%04x", table[i]);
+ fprintf(outfile, "%s", footer);
+
+}
+
+/**
+ * @brief Prints a table of uint32_t elements to a file.
+ * @param outfile: the file the table is printed to.
+ * @param table: the table to be printed.
+ * @param length: number of elements to be printed.
+ * @param header: header to append in front of the table.
+ * @param footer: footer to append at the end of the table.
+ * @param begin_line: string printed at beginning of new line
+ */
+void fprint_uint32_table(FILE * outfile, uint32_t * table, uint64_t length, char *header,
+ char *footer, char *begin_line)
+{
+ int i;
+ fprintf(outfile, "%s", header);
+ for (i = 0; i < length - 1; i++) {
+ if ((i & 3) == 0)
+ fprintf(outfile, "\n%s", begin_line);
+ else
+ fprintf(outfile, " ");
+ fprintf(outfile, "0x%08x,", table[i]);
+ }
+
+ if ((i & 3) == 0)
+ fprintf(outfile, "%s", begin_line);
+ else
+ fprintf(outfile, " ");
+ fprintf(outfile, "0x%08x", table[i]);
+ fprintf(outfile, "%s", footer);
+
+}
+
+void fprint_hufftables(FILE * output_file, char *hufftables_name,
+ struct isal_hufftables *hufftables)
+{
+ fprintf(output_file, "struct isal_hufftables %s = {\n\n", hufftables_name);
+
+ fprint_uint8_table(output_file, hufftables->deflate_hdr,
+ hufftables->deflate_hdr_count +
+ (hufftables->deflate_hdr_extra_bits + 7) / 8,
+ "\t.deflate_hdr = {", "},\n\n", "\t\t");
+
+ fprintf(output_file, "\t.deflate_hdr_count = %d,\n", hufftables->deflate_hdr_count);
+ fprintf(output_file, "\t.deflate_hdr_extra_bits = %d,\n\n",
+ hufftables->deflate_hdr_extra_bits);
+
+ fprint_uint32_table(output_file, hufftables->dist_table, IGZIP_DIST_TABLE_SIZE,
+ "\t.dist_table = {", "},\n\n", "\t\t");
+
+ fprint_uint32_table(output_file, hufftables->len_table, IGZIP_LEN_TABLE_SIZE,
+ "\t.len_table = {", "},\n\n", "\t\t");
+
+ fprint_uint16_table(output_file, hufftables->lit_table, IGZIP_LIT_TABLE_SIZE,
+ "\t.lit_table = {", "},\n\n", "\t\t");
+ fprint_uint8_table(output_file, hufftables->lit_table_sizes, IGZIP_LIT_TABLE_SIZE,
+ "\t.lit_table_sizes = {", "},\n\n", "\t\t");
+
+ fprint_uint16_table(output_file, hufftables->dcodes,
+ ISAL_DEF_DIST_SYMBOLS - IGZIP_DECODE_OFFSET,
+ "\t.dcodes = {", "},\n\n", "\t\t");
+ fprint_uint8_table(output_file, hufftables->dcodes_sizes,
+ ISAL_DEF_DIST_SYMBOLS - IGZIP_DECODE_OFFSET,
+ "\t.dcodes_sizes = {", "}\n", "\t\t");
+ fprintf(output_file, "};\n");
+}
+
+void fprint_header(FILE * output_file)
+{
+
+ fprintf(output_file, "#include <stdint.h>\n");
+ fprintf(output_file, "#include <igzip_lib.h>\n\n");
+
+ fprintf(output_file, "#if IGZIP_HIST_SIZE > %d\n"
+ "# error \"Invalid history size for the custom hufftable\"\n"
+ "#endif\n", IGZIP_HIST_SIZE);
+
+#ifdef LONGER_HUFFTABLE
+ fprintf(output_file, "#ifndef LONGER_HUFFTABLE\n"
+ "# error \"Custom hufftable requires LONGER_HUFFTABLE to be defined \"\n"
+ "#endif\n");
+#else
+ fprintf(output_file, "#ifdef LONGER_HUFFTABLE\n"
+ "# error \"Custom hufftable requires LONGER_HUFFTABLE to not be defined \"\n"
+ "#endif\n");
+#endif
+ fprintf(output_file, "\n");
+
+ fprintf(output_file, "const uint8_t gzip_hdr[] = {\n"
+ "\t0x1f, 0x8b, 0x08, 0x00, 0x00,\n" "\t0x00, 0x00, 0x00, 0x00, 0xff\t};\n\n");
+
+ fprintf(output_file, "const uint32_t gzip_hdr_bytes = %d;\n", GZIP_HEADER_SIZE);
+ fprintf(output_file, "const uint32_t gzip_trl_bytes = %d;\n\n", GZIP_TRAILER_SIZE);
+
+ fprintf(output_file, "const uint8_t zlib_hdr[] = { 0x78, 0x01 };\n\n");
+ fprintf(output_file, "const uint32_t zlib_hdr_bytes = %d;\n", ZLIB_HEADER_SIZE);
+ fprintf(output_file, "const uint32_t zlib_trl_bytes = %d;\n", ZLIB_TRAILER_SIZE);
+}
+
+static uint32_t convert_dist_to_dist_sym(uint32_t dist)
+{
+ assert(dist <= 32768 && dist > 0);
+ if (dist <= 32768) {
+ uint32_t msb = dist > 4 ? bsr(dist - 1) - 2 : 0;
+ return (msb * 2) + ((dist - 1) >> msb);
+ } else {
+ return ~0;
+ }
+}
+
+/**
+ * @brief Returns the deflate symbol value for a repeat length.
+ */
+static uint32_t convert_length_to_len_sym(uint32_t length)
+{
+ assert(length > 2 && length < 259);
+
+ /* Based on tables on page 11 in RFC 1951 */
+ if (length < 11)
+ return 257 + length - 3;
+ else if (length < 19)
+ return 261 + (length - 3) / 2;
+ else if (length < 35)
+ return 265 + (length - 3) / 4;
+ else if (length < 67)
+ return 269 + (length - 3) / 8;
+ else if (length < 131)
+ return 273 + (length - 3) / 16;
+ else if (length < 258)
+ return 277 + (length - 3) / 32;
+ else
+ return 285;
+}
+
+void isal_update_histogram_dict(uint8_t * start_stream, int dict_length, int length,
+ struct isal_huff_histogram *histogram)
+{
+ uint32_t literal = 0, hash;
+ uint16_t seen, *last_seen = histogram->hash_table;
+ uint8_t *current, *end_stream, *next_hash, *end, *end_dict;
+ uint32_t match_length;
+ uint32_t dist;
+ uint64_t *lit_len_histogram = histogram->lit_len_histogram;
+ uint64_t *dist_histogram = histogram->dist_histogram;
+
+ if (length <= 0)
+ return;
+
+ end_stream = start_stream + dict_length + length;
+ end_dict = start_stream + dict_length;
+
+ memset(last_seen, 0, sizeof(histogram->hash_table)); /* Initialize last_seen to be 0. */
+
+ for (current = start_stream; current < end_dict - 4; current++) {
+ literal = load_u32(current);
+ hash = compute_hash(literal) & LVL0_HASH_MASK;
+ last_seen[hash] = (current - start_stream) & 0xFFFF;
+ }
+
+ for (current = start_stream + dict_length; current < end_stream - 3; current++) {
+ literal = load_u32(current);
+ hash = compute_hash(literal) & LVL0_HASH_MASK;
+ seen = last_seen[hash];
+ last_seen[hash] = (current - start_stream) & 0xFFFF;
+ dist = (current - start_stream - seen) & 0xFFFF;
+ if (dist - 1 < D - 1) {
+ assert(start_stream <= current - dist);
+ match_length =
+ compare258(current - dist, current, end_stream - current);
+ if (match_length >= SHORTEST_MATCH) {
+ next_hash = current;
+#ifdef ISAL_LIMIT_HASH_UPDATE
+ end = next_hash + 3;
+#else
+ end = next_hash + match_length;
+#endif
+ if (end > end_stream - 3)
+ end = end_stream - 3;
+ next_hash++;
+ for (; next_hash < end; next_hash++) {
+ literal = load_u32(next_hash);
+ hash = compute_hash(literal) & LVL0_HASH_MASK;
+ last_seen[hash] = (next_hash - start_stream) & 0xFFFF;
+ }
+
+ dist_histogram[convert_dist_to_dist_sym(dist)] += 1;
+ lit_len_histogram[convert_length_to_len_sym(match_length)] +=
+ 1;
+ current += match_length - 1;
+ continue;
+ }
+ }
+ lit_len_histogram[literal & 0xFF] += 1;
+ }
+
+ for (; current < end_stream; current++)
+ lit_len_histogram[*current] += 1;
+
+ lit_len_histogram[256] += 1;
+ return;
+}
+
+int main(int argc, char *argv[])
+{
+ long int file_length;
+ int argi = 1;
+ uint8_t *stream = NULL;
+ struct isal_hufftables hufftables;
+ struct isal_huff_histogram histogram;
+ struct isal_zstream tmp_stream;
+ FILE *file = NULL;
+ FILE *dict_file = NULL;
+ FILE *hist_file = NULL;
+ long int dict_file_length = 0;
+ long int hist_file_length = 0;
+ uint8_t *dict_stream = NULL;
+
+ if (argc == 1) {
+ printf("Error, no input file.\n");
+ return 1;
+ }
+
+ if (argc > 3 && argv[1][0] == '-' && argv[1][1] == 'd') {
+ dict_file = fopen(argv[2], "r");
+
+ fseek(dict_file, 0, SEEK_END);
+ dict_file_length = ftell(dict_file);
+ fseek(dict_file, 0, SEEK_SET);
+ dict_file_length -= ftell(dict_file);
+ dict_stream = malloc(dict_file_length);
+ if (dict_stream == NULL) {
+ printf("Failed to allocate memory to read in dictionary file\n");
+ fclose(dict_file);
+ return 1;
+ }
+ if (fread(dict_stream, 1, dict_file_length, dict_file) != dict_file_length) {
+ printf("Error occurred when reading dictionary file");
+ fclose(dict_file);
+ free(dict_stream);
+ return 1;
+ }
+ isal_update_histogram(dict_stream, dict_file_length, &histogram);
+
+ printf("Read %ld bytes of dictionary file %s\n", dict_file_length, argv[2]);
+ argi += 2;
+ fclose(dict_file);
+ free(dict_stream);
+ }
+
+ if ((argc > argi + 1) && argv[argi][0] == '-' && argv[argi][1] == 'h') {
+ hist_file = fopen(argv[argi + 1], "r+");
+ fseek(hist_file, 0, SEEK_END);
+ hist_file_length = ftell(hist_file);
+ fseek(hist_file, 0, SEEK_SET);
+ hist_file_length -= ftell(hist_file);
+ if (hist_file_length > sizeof(histogram)) {
+ printf("Histogram file too long\n");
+ return 1;
+ }
+ if (fread(&histogram, 1, hist_file_length, hist_file) != hist_file_length) {
+ printf("Error occurred when reading history file");
+ fclose(hist_file);
+ return 1;
+ }
+ fseek(hist_file, 0, SEEK_SET);
+
+ printf("Read %ld bytes of history file %s\n", hist_file_length,
+ argv[argi + 1]);
+ argi += 2;
+ } else
+ memset(&histogram, 0, sizeof(histogram)); /* Initialize histograms. */
+
+ while (argi < argc) {
+ printf("Processing %s\n", argv[argi]);
+ file = fopen(argv[argi], "r");
+ if (file == NULL) {
+ printf("Error opening file\n");
+ return 1;
+ }
+ fseek(file, 0, SEEK_END);
+ file_length = ftell(file);
+ fseek(file, 0, SEEK_SET);
+ file_length -= ftell(file);
+ stream = malloc(file_length + dict_file_length);
+ if (stream == NULL) {
+ printf("Failed to allocate memory to read in file\n");
+ fclose(file);
+ return 1;
+ }
+ if (dict_file_length > 0)
+ memcpy(stream, dict_stream, dict_file_length);
+
+ if (fread(&stream[dict_file_length], 1, file_length, file) != file_length) {
+ printf("Error occurred when reading file");
+ fclose(file);
+ free(stream);
+ return 1;
+ }
+
+ /* Create a histogram of frequency of symbols found in stream to
+ * generate the huffman tree.*/
+ if (0 == dict_file_length)
+ isal_update_histogram(stream, file_length, &histogram);
+ else
+ isal_update_histogram_dict(stream, dict_file_length, file_length,
+ &histogram);
+
+ fclose(file);
+ free(stream);
+ argi++;
+ }
+
+ isal_create_hufftables(&hufftables, &histogram);
+
+ file = fopen("hufftables_c.c", "w");
+ if (file == NULL) {
+ printf("Error creating file hufftables_c.c\n");
+ return 1;
+ }
+
+ fprint_header(file);
+
+ fprintf(file, "\n");
+
+ fprint_hufftables(file, "hufftables_default", &hufftables);
+
+ fprintf(file, "\n");
+
+ isal_deflate_stateless_init(&tmp_stream);
+ isal_deflate_set_hufftables(&tmp_stream, NULL, IGZIP_HUFFTABLE_STATIC);
+ fprint_hufftables(file, "hufftables_static", tmp_stream.hufftables);
+
+ fclose(file);
+
+ if (hist_file) {
+ int len = fwrite(&histogram, 1, sizeof(histogram), hist_file);
+ printf("wrote %d bytes of histogram file\n", len);
+ fclose(hist_file);
+ }
+ return 0;
+}
diff --git a/src/isa-l/igzip/generate_static_inflate.c b/src/isa-l/igzip/generate_static_inflate.c
new file mode 100644
index 000000000..391767765
--- /dev/null
+++ b/src/isa-l/igzip/generate_static_inflate.c
@@ -0,0 +1,205 @@
+/**********************************************************************
+ Copyright(c) 2011-2018 Intel Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+#include <stdint.h>
+#include <stdio.h>
+#include <inttypes.h>
+#include <string.h>
+#include <stdlib.h>
+#include "igzip_lib.h"
+
+#define STATIC_INFLATE_FILE "static_inflate.h"
+#define DOUBLE_SYM_THRESH (4 * 1024)
+
+extern struct isal_hufftables hufftables_default;
+
+/**
+ * @brief Prints a table of uint16_t elements to a file.
+ * @param outfile: the file the table is printed to.
+ * @param table: the table to be printed.
+ * @param length: number of elements to be printed.
+ * @param header: header to append in front of the table.
+ * @param footer: footer to append at the end of the table.
+ * @param begin_line: string printed at beginning of new line
+ */
+void fprint_uint16_table(FILE * outfile, uint16_t * table, uint64_t length, char *header,
+ char *footer, char *begin_line)
+{
+ int i;
+ fprintf(outfile, "%s", header);
+ for (i = 0; i < length - 1; i++) {
+ if ((i & 7) == 0)
+ fprintf(outfile, "\n%s", begin_line);
+ else
+ fprintf(outfile, " ");
+ fprintf(outfile, "0x%04x,", table[i]);
+ }
+
+ if ((i & 7) == 0)
+ fprintf(outfile, "\n%s", begin_line);
+ else
+ fprintf(outfile, " ");
+ fprintf(outfile, "0x%04x", table[i]);
+ fprintf(outfile, "%s", footer);
+
+}
+
+/**
+ * @brief Prints a table of uint32_t elements to a file.
+ * @param outfile: the file the table is printed to.
+ * @param table: the table to be printed.
+ * @param length: number of elements to be printed.
+ * @param header: header to append in front of the table.
+ * @param footer: footer to append at the end of the table.
+ * @param begin_line: string printed at beginning of new line
+ */
+void fprint_uint32_table(FILE * outfile, uint32_t * table, uint64_t length, char *header,
+ char *footer, char *begin_line)
+{
+ int i;
+ fprintf(outfile, "%s", header);
+ for (i = 0; i < length - 1; i++) {
+ if ((i & 3) == 0)
+ fprintf(outfile, "\n%s", begin_line);
+ else
+ fprintf(outfile, " ");
+ fprintf(outfile, "0x%08x,", table[i]);
+ }
+
+ if ((i & 3) == 0)
+ fprintf(outfile, "%s", begin_line);
+ else
+ fprintf(outfile, " ");
+ fprintf(outfile, "0x%08x", table[i]);
+ fprintf(outfile, "%s", footer);
+
+}
+
+void fprint_header(FILE * output_file)
+{
+ fprintf(output_file, "#include \"igzip_lib.h\"\n\n");
+ fprintf(output_file, "#define LONG_BITS_CHECK %d\n", ISAL_DECODE_LONG_BITS);
+ fprintf(output_file, "#define SHORT_BITS_CHECK %d\n", ISAL_DECODE_SHORT_BITS);
+ fprintf(output_file,
+ "#if (LONG_BITS_CHECK == ISAL_DECODE_LONG_BITS) && (SHORT_BITS_CHECK == ISAL_DECODE_SHORT_BITS)\n"
+ "# define ISAL_STATIC_INFLATE_TABLE\n"
+ "#else\n"
+ "# warning \"Incompatible compile time defines for optimized static inflate table.\"\n"
+ "#endif\n\n");
+}
+
+int main(int argc, char *argv[])
+{
+ struct inflate_state state;
+ FILE *file;
+ uint8_t static_deflate_hdr = 3;
+ uint8_t tmp_space[8], *in_buf;
+
+ if (NULL == (in_buf = malloc(DOUBLE_SYM_THRESH + 1))) {
+ printf("Can not allocote memory\n");
+ return 1;
+ }
+
+ isal_inflate_init(&state);
+
+ memcpy(in_buf, &static_deflate_hdr, sizeof(static_deflate_hdr));
+ state.next_in = in_buf;
+ state.avail_in = DOUBLE_SYM_THRESH + 1;
+ state.next_out = tmp_space;
+ state.avail_out = sizeof(tmp_space);
+
+ isal_inflate(&state);
+
+ file = fopen(STATIC_INFLATE_FILE, "w");
+
+ if (file == NULL) {
+ printf("Error creating file hufftables_c.c\n");
+ return 1;
+ }
+ // Add decode tables describing a type 2 static (fixed) header
+
+ fprintf(file, "#ifndef STATIC_HEADER_H\n" "#define STATIC_HEADER_H\n\n");
+
+ fprint_header(file);
+
+ fprintf(file, "struct inflate_huff_code_large static_lit_huff_code = {\n");
+ fprint_uint32_table(file, state.lit_huff_code.short_code_lookup,
+ sizeof(state.lit_huff_code.short_code_lookup) / sizeof(uint32_t),
+ "\t.short_code_lookup = {", "\t},\n\n", "\t\t");
+ fprint_uint16_table(file, state.lit_huff_code.long_code_lookup,
+ sizeof(state.lit_huff_code.long_code_lookup) / sizeof(uint16_t),
+ "\t.long_code_lookup = {", "\t}\n", "\t\t");
+ fprintf(file, "};\n\n");
+
+ fprintf(file, "struct inflate_huff_code_small static_dist_huff_code = {\n");
+ fprint_uint16_table(file, state.dist_huff_code.short_code_lookup,
+ sizeof(state.dist_huff_code.short_code_lookup) / sizeof(uint16_t),
+ "\t.short_code_lookup = {", "\t},\n\n", "\t\t");
+ fprint_uint16_table(file, state.dist_huff_code.long_code_lookup,
+ sizeof(state.dist_huff_code.long_code_lookup) / sizeof(uint16_t),
+ "\t.long_code_lookup = {", "\t}\n", "\t\t");
+ fprintf(file, "};\n\n");
+
+ fprintf(file, "#endif\n");
+
+ // Add other tables for known dynamic headers - level 0
+
+ isal_inflate_init(&state);
+
+ memcpy(in_buf, &hufftables_default.deflate_hdr,
+ sizeof(hufftables_default.deflate_hdr));
+ state.next_in = in_buf;
+ state.avail_in = DOUBLE_SYM_THRESH + 1;
+ state.next_out = tmp_space;
+ state.avail_out = sizeof(tmp_space);
+
+ isal_inflate(&state);
+
+ fprintf(file, "struct inflate_huff_code_large pregen_lit_huff_code = {\n");
+ fprint_uint32_table(file, state.lit_huff_code.short_code_lookup,
+ sizeof(state.lit_huff_code.short_code_lookup) / sizeof(uint32_t),
+ "\t.short_code_lookup = {", "\t},\n\n", "\t\t");
+ fprint_uint16_table(file, state.lit_huff_code.long_code_lookup,
+ sizeof(state.lit_huff_code.long_code_lookup) / sizeof(uint16_t),
+ "\t.long_code_lookup = {", "\t}\n", "\t\t");
+ fprintf(file, "};\n\n");
+
+ fprintf(file, "struct inflate_huff_code_small pregen_dist_huff_code = {\n");
+ fprint_uint16_table(file, state.dist_huff_code.short_code_lookup,
+ sizeof(state.dist_huff_code.short_code_lookup) / sizeof(uint16_t),
+ "\t.short_code_lookup = {", "\t},\n\n", "\t\t");
+ fprint_uint16_table(file, state.dist_huff_code.long_code_lookup,
+ sizeof(state.dist_huff_code.long_code_lookup) / sizeof(uint16_t),
+ "\t.long_code_lookup = {", "\t}\n", "\t\t");
+ fprintf(file, "};\n\n");
+
+ fclose(file);
+ free(in_buf);
+ return 0;
+}
diff --git a/src/isa-l/igzip/heap_macros.asm b/src/isa-l/igzip/heap_macros.asm
new file mode 100644
index 000000000..4385fae66
--- /dev/null
+++ b/src/isa-l/igzip/heap_macros.asm
@@ -0,0 +1,98 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright(c) 2011-2018 Intel Corporation All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in
+; the documentation and/or other materials provided with the
+; distribution.
+; * Neither the name of Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived
+; from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+; heapify heap, heap_size, i, child, tmp1, tmp2, tmpd
+%macro heapify2 7
+%define %%heap %1 ; qword ptr
+%define %%heap_size %2 ; dword
+%define %%i %3 ; dword
+%define %%child %4 ; dword
+%define %%tmp1 %5 ; qword
+%define %%tmp2 %6 ; qword
+%define %%tmpd %7 ; dword
+ align 16
+%%heapify1:
+ lea %%child, [%%i + %%i]
+ cmp %%child, %%heap_size
+ ja %%end_heapify1
+ mov %%tmp1, [%%heap + %%child]
+ mov %%tmpd, %%child
+ mov %%tmp2, [%%heap + %%child) + 8]
+ lea %%child, [%%child + 1]
+ cmove %%tmp2, %%tmp1
+ cmp %%tmp1, %%tmp2
+ cmovbe %%child, %%tmpd
+ cmovbe %%tmp2, %%tmp1
+ ; child is correct, %%tmp2 = heap[child]
+ mov %%tmp1, [%%heap + %%i]
+ cmp %%tmp1, %%tmp2
+ jbe %%end_heapify1
+ mov [%%heap + %%i], %%tmp2
+ mov [%%heap + %%child], %%tmp1
+ mov %%i, %%child
+ jmp %%heapify1
+%%end_heapify1
+%endm
+
+; heapify heap, heap_size, i, child, tmp1, tmp2, tmpd, tmp3
+%macro heapify 8
+%define %%heap %1 ; qword ptr
+%define %%heap_size %2 ; qword
+%define %%i %3 ; qword
+%define %%child %4 ; qword
+%define %%tmp1 %5 ; qword
+%define %%tmp2 %6 ; qword
+%define %%tmpd %7 ; qword
+%define %%tmp3 %8
+ align 16
+%%heapify1:
+ lea %%child, [%%i + %%i]
+; mov %%child, %%i
+; add %%child, %%child
+ cmp %%child, %%heap_size
+ ja %%end_heapify1
+ mov %%tmp1, [%%heap + %%child*8]
+ mov %%tmp2, [%%heap + %%child*8 + 8]
+ mov %%tmp3, [%%heap + %%i*8]
+ mov %%tmpd, %%child
+ add %%tmpd, 1
+
+ cmp %%tmp2, %%tmp1
+ cmovb %%child, %%tmpd
+ cmovb %%tmp1, %%tmp2
+ ; child is correct, tmp1 = heap[child]
+ cmp %%tmp3, %%tmp1
+ jbe %%end_heapify1
+ ; swap i and child
+ mov [%%heap + %%i*8], %%tmp1
+ mov [%%heap + %%child*8], %%tmp3
+ mov %%i, %%child
+ jmp %%heapify1
+%%end_heapify1:
+%endm
diff --git a/src/isa-l/igzip/huff_codes.c b/src/isa-l/igzip/huff_codes.c
new file mode 100644
index 000000000..7512af234
--- /dev/null
+++ b/src/isa-l/igzip/huff_codes.c
@@ -0,0 +1,1694 @@
+/**********************************************************************
+ Copyright(c) 2011-2016 Intel Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+#include "huff_codes.h"
+#include "huffman.h"
+#include "flatten_ll.h"
+
+/* The order code length codes are written in the dynamic code header. This is
+ * defined in RFC 1951 page 13 */
+static const uint8_t code_length_code_order[] =
+ { 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15 };
+
+static const uint32_t len_code_extra_bits[] = {
+ 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
+ 0x1, 0x1, 0x1, 0x1, 0x2, 0x2, 0x2, 0x2,
+ 0x3, 0x3, 0x3, 0x3, 0x4, 0x4, 0x4, 0x4,
+ 0x5, 0x5, 0x5, 0x5, 0x0
+};
+
+static const uint32_t dist_code_extra_bits[] = {
+ 0x0, 0x0, 0x0, 0x0, 0x1, 0x1, 0x2, 0x2,
+ 0x3, 0x3, 0x4, 0x4, 0x5, 0x5, 0x6, 0x6,
+ 0x7, 0x7, 0x8, 0x8, 0x9, 0x9, 0xa, 0xa,
+ 0xb, 0xb, 0xc, 0xc, 0xd, 0xd
+};
+
+static struct hufftables_icf static_hufftables = {
+ .lit_len_table = {
+ {{{.code_and_extra = 0x00c,.length2 = 0x8}}},
+ {{{.code_and_extra = 0x08c,.length2 = 0x8}}},
+ {{{.code_and_extra = 0x04c,.length2 = 0x8}}},
+ {{{.code_and_extra = 0x0cc,.length2 = 0x8}}},
+ {{{.code_and_extra = 0x02c,.length2 = 0x8}}},
+ {{{.code_and_extra = 0x0ac,.length2 = 0x8}}},
+ {{{.code_and_extra = 0x06c,.length2 = 0x8}}},
+ {{{.code_and_extra = 0x0ec,.length2 = 0x8}}},
+ {{{.code_and_extra = 0x01c,.length2 = 0x8}}},
+ {{{.code_and_extra = 0x09c,.length2 = 0x8}}},
+ {{{.code_and_extra = 0x05c,.length2 = 0x8}}},
+ {{{.code_and_extra = 0x0dc,.length2 = 0x8}}},
+ {{{.code_and_extra = 0x03c,.length2 = 0x8}}},
+ {{{.code_and_extra = 0x0bc,.length2 = 0x8}}},
+ {{{.code_and_extra = 0x07c,.length2 = 0x8}}},
+ {{{.code_and_extra = 0x0fc,.length2 = 0x8}}},
+ {{{.code_and_extra = 0x002,.length2 = 0x8}}},
+ {{{.code_and_extra = 0x082,.length2 = 0x8}}},
+ {{{.code_and_extra = 0x042,.length2 = 0x8}}},
+ {{{.code_and_extra = 0x0c2,.length2 = 0x8}}},
+ {{{.code_and_extra = 0x022,.length2 = 0x8}}},
+ {{{.code_and_extra = 0x0a2,.length2 = 0x8}}},
+ {{{.code_and_extra = 0x062,.length2 = 0x8}}},
+ {{{.code_and_extra = 0x0e2,.length2 = 0x8}}},
+ {{{.code_and_extra = 0x012,.length2 = 0x8}}},
+ {{{.code_and_extra = 0x092,.length2 = 0x8}}},
+ {{{.code_and_extra = 0x052,.length2 = 0x8}}},
+ {{{.code_and_extra = 0x0d2,.length2 = 0x8}}},
+ {{{.code_and_extra = 0x032,.length2 = 0x8}}},
+ {{{.code_and_extra = 0x0b2,.length2 = 0x8}}},
+ {{{.code_and_extra = 0x072,.length2 = 0x8}}},
+ {{{.code_and_extra = 0x0f2,.length2 = 0x8}}},
+ {{{.code_and_extra = 0x00a,.length2 = 0x8}}},
+ {{{.code_and_extra = 0x08a,.length2 = 0x8}}},
+ {{{.code_and_extra = 0x04a,.length2 = 0x8}}},
+ {{{.code_and_extra = 0x0ca,.length2 = 0x8}}},
+ {{{.code_and_extra = 0x02a,.length2 = 0x8}}},
+ {{{.code_and_extra = 0x0aa,.length2 = 0x8}}},
+ {{{.code_and_extra = 0x06a,.length2 = 0x8}}},
+ {{{.code_and_extra = 0x0ea,.length2 = 0x8}}},
+ {{{.code_and_extra = 0x01a,.length2 = 0x8}}},
+ {{{.code_and_extra = 0x09a,.length2 = 0x8}}},
+ {{{.code_and_extra = 0x05a,.length2 = 0x8}}},
+ {{{.code_and_extra = 0x0da,.length2 = 0x8}}},
+ {{{.code_and_extra = 0x03a,.length2 = 0x8}}},
+ {{{.code_and_extra = 0x0ba,.length2 = 0x8}}},
+ {{{.code_and_extra = 0x07a,.length2 = 0x8}}},
+ {{{.code_and_extra = 0x0fa,.length2 = 0x8}}},
+ {{{.code_and_extra = 0x006,.length2 = 0x8}}},
+ {{{.code_and_extra = 0x086,.length2 = 0x8}}},
+ {{{.code_and_extra = 0x046,.length2 = 0x8}}},
+ {{{.code_and_extra = 0x0c6,.length2 = 0x8}}},
+ {{{.code_and_extra = 0x026,.length2 = 0x8}}},
+ {{{.code_and_extra = 0x0a6,.length2 = 0x8}}},
+ {{{.code_and_extra = 0x066,.length2 = 0x8}}},
+ {{{.code_and_extra = 0x0e6,.length2 = 0x8}}},
+ {{{.code_and_extra = 0x016,.length2 = 0x8}}},
+ {{{.code_and_extra = 0x096,.length2 = 0x8}}},
+ {{{.code_and_extra = 0x056,.length2 = 0x8}}},
+ {{{.code_and_extra = 0x0d6,.length2 = 0x8}}},
+ {{{.code_and_extra = 0x036,.length2 = 0x8}}},
+ {{{.code_and_extra = 0x0b6,.length2 = 0x8}}},
+ {{{.code_and_extra = 0x076,.length2 = 0x8}}},
+ {{{.code_and_extra = 0x0f6,.length2 = 0x8}}},
+ {{{.code_and_extra = 0x00e,.length2 = 0x8}}},
+ {{{.code_and_extra = 0x08e,.length2 = 0x8}}},
+ {{{.code_and_extra = 0x04e,.length2 = 0x8}}},
+ {{{.code_and_extra = 0x0ce,.length2 = 0x8}}},
+ {{{.code_and_extra = 0x02e,.length2 = 0x8}}},
+ {{{.code_and_extra = 0x0ae,.length2 = 0x8}}},
+ {{{.code_and_extra = 0x06e,.length2 = 0x8}}},
+ {{{.code_and_extra = 0x0ee,.length2 = 0x8}}},
+ {{{.code_and_extra = 0x01e,.length2 = 0x8}}},
+ {{{.code_and_extra = 0x09e,.length2 = 0x8}}},
+ {{{.code_and_extra = 0x05e,.length2 = 0x8}}},
+ {{{.code_and_extra = 0x0de,.length2 = 0x8}}},
+ {{{.code_and_extra = 0x03e,.length2 = 0x8}}},
+ {{{.code_and_extra = 0x0be,.length2 = 0x8}}},
+ {{{.code_and_extra = 0x07e,.length2 = 0x8}}},
+ {{{.code_and_extra = 0x0fe,.length2 = 0x8}}},
+ {{{.code_and_extra = 0x001,.length2 = 0x8}}},
+ {{{.code_and_extra = 0x081,.length2 = 0x8}}},
+ {{{.code_and_extra = 0x041,.length2 = 0x8}}},
+ {{{.code_and_extra = 0x0c1,.length2 = 0x8}}},
+ {{{.code_and_extra = 0x021,.length2 = 0x8}}},
+ {{{.code_and_extra = 0x0a1,.length2 = 0x8}}},
+ {{{.code_and_extra = 0x061,.length2 = 0x8}}},
+ {{{.code_and_extra = 0x0e1,.length2 = 0x8}}},
+ {{{.code_and_extra = 0x011,.length2 = 0x8}}},
+ {{{.code_and_extra = 0x091,.length2 = 0x8}}},
+ {{{.code_and_extra = 0x051,.length2 = 0x8}}},
+ {{{.code_and_extra = 0x0d1,.length2 = 0x8}}},
+ {{{.code_and_extra = 0x031,.length2 = 0x8}}},
+ {{{.code_and_extra = 0x0b1,.length2 = 0x8}}},
+ {{{.code_and_extra = 0x071,.length2 = 0x8}}},
+ {{{.code_and_extra = 0x0f1,.length2 = 0x8}}},
+ {{{.code_and_extra = 0x009,.length2 = 0x8}}},
+ {{{.code_and_extra = 0x089,.length2 = 0x8}}},
+ {{{.code_and_extra = 0x049,.length2 = 0x8}}},
+ {{{.code_and_extra = 0x0c9,.length2 = 0x8}}},
+ {{{.code_and_extra = 0x029,.length2 = 0x8}}},
+ {{{.code_and_extra = 0x0a9,.length2 = 0x8}}},
+ {{{.code_and_extra = 0x069,.length2 = 0x8}}},
+ {{{.code_and_extra = 0x0e9,.length2 = 0x8}}},
+ {{{.code_and_extra = 0x019,.length2 = 0x8}}},
+ {{{.code_and_extra = 0x099,.length2 = 0x8}}},
+ {{{.code_and_extra = 0x059,.length2 = 0x8}}},
+ {{{.code_and_extra = 0x0d9,.length2 = 0x8}}},
+ {{{.code_and_extra = 0x039,.length2 = 0x8}}},
+ {{{.code_and_extra = 0x0b9,.length2 = 0x8}}},
+ {{{.code_and_extra = 0x079,.length2 = 0x8}}},
+ {{{.code_and_extra = 0x0f9,.length2 = 0x8}}},
+ {{{.code_and_extra = 0x005,.length2 = 0x8}}},
+ {{{.code_and_extra = 0x085,.length2 = 0x8}}},
+ {{{.code_and_extra = 0x045,.length2 = 0x8}}},
+ {{{.code_and_extra = 0x0c5,.length2 = 0x8}}},
+ {{{.code_and_extra = 0x025,.length2 = 0x8}}},
+ {{{.code_and_extra = 0x0a5,.length2 = 0x8}}},
+ {{{.code_and_extra = 0x065,.length2 = 0x8}}},
+ {{{.code_and_extra = 0x0e5,.length2 = 0x8}}},
+ {{{.code_and_extra = 0x015,.length2 = 0x8}}},
+ {{{.code_and_extra = 0x095,.length2 = 0x8}}},
+ {{{.code_and_extra = 0x055,.length2 = 0x8}}},
+ {{{.code_and_extra = 0x0d5,.length2 = 0x8}}},
+ {{{.code_and_extra = 0x035,.length2 = 0x8}}},
+ {{{.code_and_extra = 0x0b5,.length2 = 0x8}}},
+ {{{.code_and_extra = 0x075,.length2 = 0x8}}},
+ {{{.code_and_extra = 0x0f5,.length2 = 0x8}}},
+ {{{.code_and_extra = 0x00d,.length2 = 0x8}}},
+ {{{.code_and_extra = 0x08d,.length2 = 0x8}}},
+ {{{.code_and_extra = 0x04d,.length2 = 0x8}}},
+ {{{.code_and_extra = 0x0cd,.length2 = 0x8}}},
+ {{{.code_and_extra = 0x02d,.length2 = 0x8}}},
+ {{{.code_and_extra = 0x0ad,.length2 = 0x8}}},
+ {{{.code_and_extra = 0x06d,.length2 = 0x8}}},
+ {{{.code_and_extra = 0x0ed,.length2 = 0x8}}},
+ {{{.code_and_extra = 0x01d,.length2 = 0x8}}},
+ {{{.code_and_extra = 0x09d,.length2 = 0x8}}},
+ {{{.code_and_extra = 0x05d,.length2 = 0x8}}},
+ {{{.code_and_extra = 0x0dd,.length2 = 0x8}}},
+ {{{.code_and_extra = 0x03d,.length2 = 0x8}}},
+ {{{.code_and_extra = 0x0bd,.length2 = 0x8}}},
+ {{{.code_and_extra = 0x07d,.length2 = 0x8}}},
+ {{{.code_and_extra = 0x0fd,.length2 = 0x8}}},
+ {{{.code_and_extra = 0x013,.length2 = 0x9}}},
+ {{{.code_and_extra = 0x113,.length2 = 0x9}}},
+ {{{.code_and_extra = 0x093,.length2 = 0x9}}},
+ {{{.code_and_extra = 0x193,.length2 = 0x9}}},
+ {{{.code_and_extra = 0x053,.length2 = 0x9}}},
+ {{{.code_and_extra = 0x153,.length2 = 0x9}}},
+ {{{.code_and_extra = 0x0d3,.length2 = 0x9}}},
+ {{{.code_and_extra = 0x1d3,.length2 = 0x9}}},
+ {{{.code_and_extra = 0x033,.length2 = 0x9}}},
+ {{{.code_and_extra = 0x133,.length2 = 0x9}}},
+ {{{.code_and_extra = 0x0b3,.length2 = 0x9}}},
+ {{{.code_and_extra = 0x1b3,.length2 = 0x9}}},
+ {{{.code_and_extra = 0x073,.length2 = 0x9}}},
+ {{{.code_and_extra = 0x173,.length2 = 0x9}}},
+ {{{.code_and_extra = 0x0f3,.length2 = 0x9}}},
+ {{{.code_and_extra = 0x1f3,.length2 = 0x9}}},
+ {{{.code_and_extra = 0x00b,.length2 = 0x9}}},
+ {{{.code_and_extra = 0x10b,.length2 = 0x9}}},
+ {{{.code_and_extra = 0x08b,.length2 = 0x9}}},
+ {{{.code_and_extra = 0x18b,.length2 = 0x9}}},
+ {{{.code_and_extra = 0x04b,.length2 = 0x9}}},
+ {{{.code_and_extra = 0x14b,.length2 = 0x9}}},
+ {{{.code_and_extra = 0x0cb,.length2 = 0x9}}},
+ {{{.code_and_extra = 0x1cb,.length2 = 0x9}}},
+ {{{.code_and_extra = 0x02b,.length2 = 0x9}}},
+ {{{.code_and_extra = 0x12b,.length2 = 0x9}}},
+ {{{.code_and_extra = 0x0ab,.length2 = 0x9}}},
+ {{{.code_and_extra = 0x1ab,.length2 = 0x9}}},
+ {{{.code_and_extra = 0x06b,.length2 = 0x9}}},
+ {{{.code_and_extra = 0x16b,.length2 = 0x9}}},
+ {{{.code_and_extra = 0x0eb,.length2 = 0x9}}},
+ {{{.code_and_extra = 0x1eb,.length2 = 0x9}}},
+ {{{.code_and_extra = 0x01b,.length2 = 0x9}}},
+ {{{.code_and_extra = 0x11b,.length2 = 0x9}}},
+ {{{.code_and_extra = 0x09b,.length2 = 0x9}}},
+ {{{.code_and_extra = 0x19b,.length2 = 0x9}}},
+ {{{.code_and_extra = 0x05b,.length2 = 0x9}}},
+ {{{.code_and_extra = 0x15b,.length2 = 0x9}}},
+ {{{.code_and_extra = 0x0db,.length2 = 0x9}}},
+ {{{.code_and_extra = 0x1db,.length2 = 0x9}}},
+ {{{.code_and_extra = 0x03b,.length2 = 0x9}}},
+ {{{.code_and_extra = 0x13b,.length2 = 0x9}}},
+ {{{.code_and_extra = 0x0bb,.length2 = 0x9}}},
+ {{{.code_and_extra = 0x1bb,.length2 = 0x9}}},
+ {{{.code_and_extra = 0x07b,.length2 = 0x9}}},
+ {{{.code_and_extra = 0x17b,.length2 = 0x9}}},
+ {{{.code_and_extra = 0x0fb,.length2 = 0x9}}},
+ {{{.code_and_extra = 0x1fb,.length2 = 0x9}}},
+ {{{.code_and_extra = 0x007,.length2 = 0x9}}},
+ {{{.code_and_extra = 0x107,.length2 = 0x9}}},
+ {{{.code_and_extra = 0x087,.length2 = 0x9}}},
+ {{{.code_and_extra = 0x187,.length2 = 0x9}}},
+ {{{.code_and_extra = 0x047,.length2 = 0x9}}},
+ {{{.code_and_extra = 0x147,.length2 = 0x9}}},
+ {{{.code_and_extra = 0x0c7,.length2 = 0x9}}},
+ {{{.code_and_extra = 0x1c7,.length2 = 0x9}}},
+ {{{.code_and_extra = 0x027,.length2 = 0x9}}},
+ {{{.code_and_extra = 0x127,.length2 = 0x9}}},
+ {{{.code_and_extra = 0x0a7,.length2 = 0x9}}},
+ {{{.code_and_extra = 0x1a7,.length2 = 0x9}}},
+ {{{.code_and_extra = 0x067,.length2 = 0x9}}},
+ {{{.code_and_extra = 0x167,.length2 = 0x9}}},
+ {{{.code_and_extra = 0x0e7,.length2 = 0x9}}},
+ {{{.code_and_extra = 0x1e7,.length2 = 0x9}}},
+ {{{.code_and_extra = 0x017,.length2 = 0x9}}},
+ {{{.code_and_extra = 0x117,.length2 = 0x9}}},
+ {{{.code_and_extra = 0x097,.length2 = 0x9}}},
+ {{{.code_and_extra = 0x197,.length2 = 0x9}}},
+ {{{.code_and_extra = 0x057,.length2 = 0x9}}},
+ {{{.code_and_extra = 0x157,.length2 = 0x9}}},
+ {{{.code_and_extra = 0x0d7,.length2 = 0x9}}},
+ {{{.code_and_extra = 0x1d7,.length2 = 0x9}}},
+ {{{.code_and_extra = 0x037,.length2 = 0x9}}},
+ {{{.code_and_extra = 0x137,.length2 = 0x9}}},
+ {{{.code_and_extra = 0x0b7,.length2 = 0x9}}},
+ {{{.code_and_extra = 0x1b7,.length2 = 0x9}}},
+ {{{.code_and_extra = 0x077,.length2 = 0x9}}},
+ {{{.code_and_extra = 0x177,.length2 = 0x9}}},
+ {{{.code_and_extra = 0x0f7,.length2 = 0x9}}},
+ {{{.code_and_extra = 0x1f7,.length2 = 0x9}}},
+ {{{.code_and_extra = 0x00f,.length2 = 0x9}}},
+ {{{.code_and_extra = 0x10f,.length2 = 0x9}}},
+ {{{.code_and_extra = 0x08f,.length2 = 0x9}}},
+ {{{.code_and_extra = 0x18f,.length2 = 0x9}}},
+ {{{.code_and_extra = 0x04f,.length2 = 0x9}}},
+ {{{.code_and_extra = 0x14f,.length2 = 0x9}}},
+ {{{.code_and_extra = 0x0cf,.length2 = 0x9}}},
+ {{{.code_and_extra = 0x1cf,.length2 = 0x9}}},
+ {{{.code_and_extra = 0x02f,.length2 = 0x9}}},
+ {{{.code_and_extra = 0x12f,.length2 = 0x9}}},
+ {{{.code_and_extra = 0x0af,.length2 = 0x9}}},
+ {{{.code_and_extra = 0x1af,.length2 = 0x9}}},
+ {{{.code_and_extra = 0x06f,.length2 = 0x9}}},
+ {{{.code_and_extra = 0x16f,.length2 = 0x9}}},
+ {{{.code_and_extra = 0x0ef,.length2 = 0x9}}},
+ {{{.code_and_extra = 0x1ef,.length2 = 0x9}}},
+ {{{.code_and_extra = 0x01f,.length2 = 0x9}}},
+ {{{.code_and_extra = 0x11f,.length2 = 0x9}}},
+ {{{.code_and_extra = 0x09f,.length2 = 0x9}}},
+ {{{.code_and_extra = 0x19f,.length2 = 0x9}}},
+ {{{.code_and_extra = 0x05f,.length2 = 0x9}}},
+ {{{.code_and_extra = 0x15f,.length2 = 0x9}}},
+ {{{.code_and_extra = 0x0df,.length2 = 0x9}}},
+ {{{.code_and_extra = 0x1df,.length2 = 0x9}}},
+ {{{.code_and_extra = 0x03f,.length2 = 0x9}}},
+ {{{.code_and_extra = 0x13f,.length2 = 0x9}}},
+ {{{.code_and_extra = 0x0bf,.length2 = 0x9}}},
+ {{{.code_and_extra = 0x1bf,.length2 = 0x9}}},
+ {{{.code_and_extra = 0x07f,.length2 = 0x9}}},
+ {{{.code_and_extra = 0x17f,.length2 = 0x9}}},
+ {{{.code_and_extra = 0x0ff,.length2 = 0x9}}},
+ {{{.code_and_extra = 0x1ff,.length2 = 0x9}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x7}}},
+ {{{.code_and_extra = 0x040,.length2 = 0x7}}},
+ {{{.code_and_extra = 0x020,.length2 = 0x7}}},
+ {{{.code_and_extra = 0x060,.length2 = 0x7}}},
+ {{{.code_and_extra = 0x010,.length2 = 0x7}}},
+ {{{.code_and_extra = 0x050,.length2 = 0x7}}},
+ {{{.code_and_extra = 0x030,.length2 = 0x7}}},
+ {{{.code_and_extra = 0x070,.length2 = 0x7}}},
+ {{{.code_and_extra = 0x008,.length2 = 0x7}}},
+ {{{.code_and_extra = 0x048,.length2 = 0x7}}},
+ {{{.code_and_extra = 0x028,.length2 = 0x7}}},
+ {{{.code_and_extra = 0x068,.length2 = 0x7}}},
+ {{{.code_and_extra = 0x018,.length2 = 0x7}}},
+ {{{.code_and_extra = 0x058,.length2 = 0x7}}},
+ {{{.code_and_extra = 0x038,.length2 = 0x7}}},
+ {{{.code_and_extra = 0x078,.length2 = 0x7}}},
+ {{{.code_and_extra = 0x004,.length2 = 0x7}}},
+ {{{.code_and_extra = 0x044,.length2 = 0x7}}},
+ {{{.code_and_extra = 0x024,.length2 = 0x7}}},
+ {{{.code_and_extra = 0x064,.length2 = 0x7}}},
+ {{{.code_and_extra = 0x014,.length2 = 0x7}}},
+ {{{.code_and_extra = 0x054,.length2 = 0x7}}},
+ {{{.code_and_extra = 0x034,.length2 = 0x7}}},
+ {{{.code_and_extra = 0x074,.length2 = 0x7}}},
+ {{{.code_and_extra = 0x003,.length2 = 0x8}}},
+ {{{.code_and_extra = 0x083,.length2 = 0x8}}},
+ {{{.code_and_extra = 0x043,.length2 = 0x8}}},
+ {{{.code_and_extra = 0x0c3,.length2 = 0x8}}},
+ {{{.code_and_extra = 0x023,.length2 = 0x8}}},
+ {{{.code_and_extra = 0x0a3,.length2 = 0x8}}},
+ {{{.code_and_extra = 0x063,.length2 = 0x8}}},
+ {{{.code_and_extra = 0x0e3,.length2 = 0x8}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}}},
+ .dist_table = {
+ {{{.code_and_extra = 0x000,.length2 = 0x5}}},
+ {{{.code_and_extra = 0x010,.length2 = 0x5}}},
+ {{{.code_and_extra = 0x008,.length2 = 0x5}}},
+ {{{.code_and_extra = 0x018,.length2 = 0x5}}},
+ {{{.code_and_extra = 0x10004,.length2 = 0x5}}},
+ {{{.code_and_extra = 0x10014,.length2 = 0x5}}},
+ {{{.code_and_extra = 0x2000c,.length2 = 0x5}}},
+ {{{.code_and_extra = 0x2001c,.length2 = 0x5}}},
+ {{{.code_and_extra = 0x30002,.length2 = 0x5}}},
+ {{{.code_and_extra = 0x30012,.length2 = 0x5}}},
+ {{{.code_and_extra = 0x4000a,.length2 = 0x5}}},
+ {{{.code_and_extra = 0x4001a,.length2 = 0x5}}},
+ {{{.code_and_extra = 0x50006,.length2 = 0x5}}},
+ {{{.code_and_extra = 0x50016,.length2 = 0x5}}},
+ {{{.code_and_extra = 0x6000e,.length2 = 0x5}}},
+ {{{.code_and_extra = 0x6001e,.length2 = 0x5}}},
+ {{{.code_and_extra = 0x70001,.length2 = 0x5}}},
+ {{{.code_and_extra = 0x70011,.length2 = 0x5}}},
+ {{{.code_and_extra = 0x80009,.length2 = 0x5}}},
+ {{{.code_and_extra = 0x80019,.length2 = 0x5}}},
+ {{{.code_and_extra = 0x90005,.length2 = 0x5}}},
+ {{{.code_and_extra = 0x90015,.length2 = 0x5}}},
+ {{{.code_and_extra = 0xa000d,.length2 = 0x5}}},
+ {{{.code_and_extra = 0xa001d,.length2 = 0x5}}},
+ {{{.code_and_extra = 0xb0003,.length2 = 0x5}}},
+ {{{.code_and_extra = 0xb0013,.length2 = 0x5}}},
+ {{{.code_and_extra = 0xc000b,.length2 = 0x5}}},
+ {{{.code_and_extra = 0xc001b,.length2 = 0x5}}},
+ {{{.code_and_extra = 0xd0007,.length2 = 0x5}}},
+ {{{.code_and_extra = 0xd0017,.length2 = 0x5}}},
+ {{{.code_and_extra = 0x000,.length2 = 0x0}}}}
+};
+
+struct slver {
+ uint16_t snum;
+ uint8_t ver;
+ uint8_t core;
+};
+
+/* Version info */
+struct slver isal_update_histogram_slver_00010085;
+struct slver isal_update_histogram_slver = { 0x0085, 0x01, 0x00 };
+
+struct slver isal_create_hufftables_slver_00010086;
+struct slver isal_create_hufftables_slver = { 0x0086, 0x01, 0x00 };
+
+struct slver isal_create_hufftables_subset_slver_00010087;
+struct slver isal_create_hufftables_subset_slver = { 0x0087, 0x01, 0x00 };
+
+extern uint32_t build_huff_tree(struct heap_tree *heap, uint64_t heap_size, uint64_t node_ptr);
+extern void build_heap(uint64_t * heap, uint64_t heap_size);
+
+static uint32_t convert_dist_to_dist_sym(uint32_t dist);
+static uint32_t convert_length_to_len_sym(uint32_t length);
+
+static const uint8_t bitrev8[0x100] = {
+ 0x00, 0x80, 0x40, 0xC0, 0x20, 0xA0, 0x60, 0xE0,
+ 0x10, 0x90, 0x50, 0xD0, 0x30, 0xB0, 0x70, 0xF0,
+ 0x08, 0x88, 0x48, 0xC8, 0x28, 0xA8, 0x68, 0xE8,
+ 0x18, 0x98, 0x58, 0xD8, 0x38, 0xB8, 0x78, 0xF8,
+ 0x04, 0x84, 0x44, 0xC4, 0x24, 0xA4, 0x64, 0xE4,
+ 0x14, 0x94, 0x54, 0xD4, 0x34, 0xB4, 0x74, 0xF4,
+ 0x0C, 0x8C, 0x4C, 0xCC, 0x2C, 0xAC, 0x6C, 0xEC,
+ 0x1C, 0x9C, 0x5C, 0xDC, 0x3C, 0xBC, 0x7C, 0xFC,
+ 0x02, 0x82, 0x42, 0xC2, 0x22, 0xA2, 0x62, 0xE2,
+ 0x12, 0x92, 0x52, 0xD2, 0x32, 0xB2, 0x72, 0xF2,
+ 0x0A, 0x8A, 0x4A, 0xCA, 0x2A, 0xAA, 0x6A, 0xEA,
+ 0x1A, 0x9A, 0x5A, 0xDA, 0x3A, 0xBA, 0x7A, 0xFA,
+ 0x06, 0x86, 0x46, 0xC6, 0x26, 0xA6, 0x66, 0xE6,
+ 0x16, 0x96, 0x56, 0xD6, 0x36, 0xB6, 0x76, 0xF6,
+ 0x0E, 0x8E, 0x4E, 0xCE, 0x2E, 0xAE, 0x6E, 0xEE,
+ 0x1E, 0x9E, 0x5E, 0xDE, 0x3E, 0xBE, 0x7E, 0xFE,
+ 0x01, 0x81, 0x41, 0xC1, 0x21, 0xA1, 0x61, 0xE1,
+ 0x11, 0x91, 0x51, 0xD1, 0x31, 0xB1, 0x71, 0xF1,
+ 0x09, 0x89, 0x49, 0xC9, 0x29, 0xA9, 0x69, 0xE9,
+ 0x19, 0x99, 0x59, 0xD9, 0x39, 0xB9, 0x79, 0xF9,
+ 0x05, 0x85, 0x45, 0xC5, 0x25, 0xA5, 0x65, 0xE5,
+ 0x15, 0x95, 0x55, 0xD5, 0x35, 0xB5, 0x75, 0xF5,
+ 0x0D, 0x8D, 0x4D, 0xCD, 0x2D, 0xAD, 0x6D, 0xED,
+ 0x1D, 0x9D, 0x5D, 0xDD, 0x3D, 0xBD, 0x7D, 0xFD,
+ 0x03, 0x83, 0x43, 0xC3, 0x23, 0xA3, 0x63, 0xE3,
+ 0x13, 0x93, 0x53, 0xD3, 0x33, 0xB3, 0x73, 0xF3,
+ 0x0B, 0x8B, 0x4B, 0xCB, 0x2B, 0xAB, 0x6B, 0xEB,
+ 0x1B, 0x9B, 0x5B, 0xDB, 0x3B, 0xBB, 0x7B, 0xFB,
+ 0x07, 0x87, 0x47, 0xC7, 0x27, 0xA7, 0x67, 0xE7,
+ 0x17, 0x97, 0x57, 0xD7, 0x37, 0xB7, 0x77, 0xF7,
+ 0x0F, 0x8F, 0x4F, 0xCF, 0x2F, 0xAF, 0x6F, 0xEF,
+ 0x1F, 0x9F, 0x5F, 0xDF, 0x3F, 0xBF, 0x7F, 0xFF
+};
+
+// bit reverse low order LENGTH bits in code, and return result in low order bits
+static inline uint16_t bit_reverse(uint16_t code, uint32_t length)
+{
+ code = (bitrev8[code & 0x00FF] << 8) | (bitrev8[code >> 8]);
+ return (code >> (16 - length));
+}
+
+void isal_update_histogram_base(uint8_t * start_stream, int length,
+ struct isal_huff_histogram *histogram)
+{
+ uint32_t literal = 0, hash;
+ uint16_t seen, *last_seen = histogram->hash_table;
+ uint8_t *current, *end_stream, *next_hash, *end;
+ uint32_t match_length;
+ uint32_t dist;
+ uint64_t *lit_len_histogram = histogram->lit_len_histogram;
+ uint64_t *dist_histogram = histogram->dist_histogram;
+
+ if (length <= 0)
+ return;
+
+ end_stream = start_stream + length;
+ memset(last_seen, 0, sizeof(histogram->hash_table)); /* Initialize last_seen to be 0. */
+ for (current = start_stream; current < end_stream - 3; current++) {
+ literal = load_u32(current);
+ hash = compute_hash(literal) & LVL0_HASH_MASK;
+ seen = last_seen[hash];
+ last_seen[hash] = (current - start_stream) & 0xFFFF;
+ dist = (current - start_stream - seen) & 0xFFFF;
+ if (dist - 1 < D - 1) {
+ assert(start_stream <= current - dist);
+ match_length =
+ compare258(current - dist, current, end_stream - current);
+ if (match_length >= SHORTEST_MATCH) {
+ next_hash = current;
+#ifdef ISAL_LIMIT_HASH_UPDATE
+ end = next_hash + 3;
+#else
+ end = next_hash + match_length;
+#endif
+ if (end > end_stream - 3)
+ end = end_stream - 3;
+ next_hash++;
+ for (; next_hash < end; next_hash++) {
+ literal = load_u32(next_hash);
+ hash = compute_hash(literal) & LVL0_HASH_MASK;
+ last_seen[hash] = (next_hash - start_stream) & 0xFFFF;
+ }
+
+ dist_histogram[convert_dist_to_dist_sym(dist)] += 1;
+ lit_len_histogram[convert_length_to_len_sym(match_length)] +=
+ 1;
+ current += match_length - 1;
+ continue;
+ }
+ }
+ lit_len_histogram[literal & 0xFF] += 1;
+ }
+
+ for (; current < end_stream; current++)
+ lit_len_histogram[*current] += 1;
+
+ lit_len_histogram[256] += 1;
+ return;
+}
+
+/**
+ * @brief Returns the deflate symbol value for a look back distance.
+ */
+static uint32_t convert_dist_to_dist_sym(uint32_t dist)
+{
+ assert(dist <= 32768 && dist > 0);
+ if (dist <= 32768) {
+ uint32_t msb = dist > 4 ? bsr(dist - 1) - 2 : 0;
+ return (msb * 2) + ((dist - 1) >> msb);
+ } else {
+ return ~0;
+ }
+}
+
+/**
+ * @brief Returns the deflate symbol value for a repeat length.
+ */
+static uint32_t convert_length_to_len_sym(uint32_t length)
+{
+ assert(length > 2 && length < 259);
+
+ /* Based on tables on page 11 in RFC 1951 */
+ if (length < 11)
+ return 257 + length - 3;
+ else if (length < 19)
+ return 261 + (length - 3) / 2;
+ else if (length < 35)
+ return 265 + (length - 3) / 4;
+ else if (length < 67)
+ return 269 + (length - 3) / 8;
+ else if (length < 131)
+ return 273 + (length - 3) / 16;
+ else if (length < 258)
+ return 277 + (length - 3) / 32;
+ else
+ return 285;
+}
+
+// Upon return, codes[] contains the code lengths,
+// and bl_count is the count of the lengths
+
+/* Init heap with the histogram, and return the histogram size */
+static inline uint32_t init_heap32(struct heap_tree *heap_space, uint32_t * histogram,
+ uint32_t hist_size)
+{
+ uint32_t heap_size, i;
+
+ memset(heap_space, 0, sizeof(struct heap_tree));
+
+ heap_size = 0;
+ for (i = 0; i < hist_size; i++) {
+ if (histogram[i] != 0)
+ heap_space->heap[++heap_size] =
+ (((uint64_t) histogram[i]) << FREQ_SHIFT) | i;
+ }
+
+ // make sure heap has at least two elements in it
+ if (heap_size < 2) {
+ if (heap_size == 0) {
+ heap_space->heap[1] = 1ULL << FREQ_SHIFT;
+ heap_space->heap[2] = (1ULL << FREQ_SHIFT) | 1;
+ heap_size = 2;
+ } else {
+ // heap size == 1
+ if (histogram[0] == 0)
+ heap_space->heap[2] = 1ULL << FREQ_SHIFT;
+ else
+ heap_space->heap[2] = (1ULL << FREQ_SHIFT) | 1;
+ heap_size = 2;
+ }
+ }
+
+ build_heap(heap_space->heap, heap_size);
+
+ return heap_size;
+}
+
+static inline uint32_t init_heap64(struct heap_tree *heap_space, uint64_t * histogram,
+ uint64_t hist_size)
+{
+ uint32_t heap_size, i;
+
+ memset(heap_space, 0, sizeof(struct heap_tree));
+
+ heap_size = 0;
+ for (i = 0; i < hist_size; i++) {
+ if (histogram[i] != 0)
+ heap_space->heap[++heap_size] = ((histogram[i]) << FREQ_SHIFT) | i;
+ }
+
+ // make sure heap has at least two elements in it
+ if (heap_size < 2) {
+ if (heap_size == 0) {
+ heap_space->heap[1] = 1ULL << FREQ_SHIFT;
+ heap_space->heap[2] = (1ULL << FREQ_SHIFT) | 1;
+ heap_size = 2;
+ } else {
+ // heap size == 1
+ if (histogram[0] == 0)
+ heap_space->heap[2] = 1ULL << FREQ_SHIFT;
+ else
+ heap_space->heap[2] = (1ULL << FREQ_SHIFT) | 1;
+ heap_size = 2;
+ }
+ }
+
+ build_heap(heap_space->heap, heap_size);
+
+ return heap_size;
+}
+
+static inline uint32_t init_heap64_semi_complete(struct heap_tree *heap_space,
+ uint64_t * histogram, uint64_t hist_size,
+ uint64_t complete_start)
+{
+ uint32_t heap_size, i;
+
+ memset(heap_space, 0, sizeof(struct heap_tree));
+
+ heap_size = 0;
+ for (i = 0; i < complete_start; i++) {
+ if (histogram[i] != 0)
+ heap_space->heap[++heap_size] = ((histogram[i]) << FREQ_SHIFT) | i;
+ }
+
+ for (; i < hist_size; i++)
+ heap_space->heap[++heap_size] = ((histogram[i]) << FREQ_SHIFT) | i;
+
+ // make sure heap has at least two elements in it
+ if (heap_size < 2) {
+ if (heap_size == 0) {
+ heap_space->heap[1] = 1ULL << FREQ_SHIFT;
+ heap_space->heap[2] = (1ULL << FREQ_SHIFT) | 1;
+ heap_size = 2;
+ } else {
+ // heap size == 1
+ if (histogram[0] == 0)
+ heap_space->heap[2] = 1ULL << FREQ_SHIFT;
+ else
+ heap_space->heap[2] = (1ULL << FREQ_SHIFT) | 1;
+ heap_size = 2;
+ }
+ }
+
+ build_heap(heap_space->heap, heap_size);
+
+ return heap_size;
+}
+
+static inline uint32_t init_heap64_complete(struct heap_tree *heap_space, uint64_t * histogram,
+ uint64_t hist_size)
+{
+ uint32_t heap_size, i;
+
+ memset(heap_space, 0, sizeof(struct heap_tree));
+
+ heap_size = 0;
+ for (i = 0; i < hist_size; i++)
+ heap_space->heap[++heap_size] = ((histogram[i]) << FREQ_SHIFT) | i;
+
+ build_heap(heap_space->heap, heap_size);
+
+ return heap_size;
+}
+
+static inline uint32_t fix_code_lens(struct heap_tree *heap_space, uint32_t root_node,
+ uint32_t * bl_count, uint32_t max_code_len)
+{
+ struct tree_node *tree = heap_space->tree;
+ uint64_t *code_len_count = heap_space->code_len_count;
+ uint32_t i, j, k, child, depth, code_len;
+
+ // compute code lengths and code length counts
+ code_len = 0;
+ j = root_node;
+ for (i = root_node; i <= HEAP_TREE_NODE_START; i++) {
+ child = tree[i].child;
+ if (child > MAX_HISTHEAP_SIZE) {
+ depth = 1 + tree[i].depth;
+
+ tree[child].depth = depth;
+ tree[child - 1].depth = depth;
+ } else {
+ tree[j++] = tree[i];
+ depth = tree[i].depth;
+ while (code_len < depth) {
+ code_len++;
+ code_len_count[code_len] = 0;
+ }
+ code_len_count[depth]++;
+ }
+ }
+
+ if (code_len > max_code_len) {
+ while (code_len > max_code_len) {
+ assert(code_len_count[code_len] > 1);
+ for (i = max_code_len - 1; i != 0; i--)
+ if (code_len_count[i] != 0)
+ break;
+ assert(i != 0);
+ code_len_count[i]--;
+ code_len_count[i + 1] += 2;
+ code_len_count[code_len - 1]++;
+ code_len_count[code_len] -= 2;
+ if (code_len_count[code_len] == 0)
+ code_len--;
+ }
+
+ bl_count[0] = 0;
+ for (i = 1; i <= code_len; i++)
+ bl_count[i] = code_len_count[i];
+ for (; i <= max_code_len; i++)
+ bl_count[i] = 0;
+
+ for (k = 1; code_len_count[k] == 0; k++) ;
+ for (i = root_node; i < j; i++) {
+ tree[i].depth = k;
+ code_len_count[k]--;
+ for (; code_len_count[k] == 0; k++) ;
+ }
+ } else {
+ bl_count[0] = 0;
+ for (i = 1; i <= code_len; i++)
+ bl_count[i] = code_len_count[i];
+ for (; i <= max_code_len; i++)
+ bl_count[i] = 0;
+ }
+
+ return j;
+
+}
+
+static inline void
+gen_huff_code_lens(struct heap_tree *heap_space, uint32_t heap_size, uint32_t * bl_count,
+ struct huff_code *codes, uint32_t codes_count, uint32_t max_code_len)
+{
+ struct tree_node *tree = heap_space->tree;
+ uint32_t root_node = HEAP_TREE_NODE_START, node_ptr;
+ uint32_t end_node;
+
+ root_node = build_huff_tree(heap_space, heap_size, root_node);
+
+ end_node = fix_code_lens(heap_space, root_node, bl_count, max_code_len);
+
+ memset(codes, 0, codes_count * sizeof(*codes));
+ for (node_ptr = root_node; node_ptr < end_node; node_ptr++)
+ codes[tree[node_ptr].child].length = tree[node_ptr].depth;
+
+}
+
+/**
+ * @brief Determines the code each element of a deflate compliant huffman tree and stores
+ * it in a lookup table
+ * @requires table has been initialized to already contain the code length for each element.
+ * @param table: A lookup table used to store the codes.
+ * @param table_length: The length of table.
+ * @param count: a histogram representing the number of occurences of codes of a given length
+ */
+static inline uint32_t set_huff_codes(struct huff_code *huff_code_table, int table_length,
+ uint32_t * count)
+{
+ /* Uses the algorithm mentioned in the deflate standard, Rfc 1951. */
+ int i;
+ uint16_t code = 0;
+ uint16_t next_code[MAX_HUFF_TREE_DEPTH + 1];
+ uint32_t max_code = 0;
+
+ next_code[0] = code;
+
+ for (i = 1; i < MAX_HUFF_TREE_DEPTH + 1; i++)
+ next_code[i] = (next_code[i - 1] + count[i - 1]) << 1;
+
+ for (i = 0; i < table_length; i++) {
+ if (huff_code_table[i].length != 0) {
+ huff_code_table[i].code =
+ bit_reverse(next_code[huff_code_table[i].length],
+ huff_code_table[i].length);
+ next_code[huff_code_table[i].length] += 1;
+ max_code = i;
+ }
+ }
+
+ return max_code;
+}
+
+// on input, codes contain the code lengths
+// on output, code contains:
+// 23:16 code length
+// 15:0 code value in low order bits
+// returns max code value
+static inline uint32_t set_dist_huff_codes(struct huff_code *codes, uint32_t * bl_count)
+{
+ uint32_t code, code_len, bits, i;
+ uint32_t next_code[MAX_DEFLATE_CODE_LEN + 1];
+ uint32_t max_code = 0;
+ const uint32_t num_codes = DIST_LEN;
+
+ code = bl_count[0] = 0;
+ for (bits = 1; bits <= MAX_HUFF_TREE_DEPTH; bits++) {
+ code = (code + bl_count[bits - 1]) << 1;
+ next_code[bits] = code;
+ }
+ for (i = 0; i < num_codes; i++) {
+ code_len = codes[i].length;
+ if (code_len != 0) {
+ codes[i].code = bit_reverse(next_code[code_len], code_len);
+ codes[i].extra_bit_count = dist_code_extra_bits[i];
+ next_code[code_len] += 1;
+ max_code = i;
+ }
+ }
+ return max_code;
+}
+
+/**
+ * @brief Creates the header for run length encoded huffman trees.
+ * @param header: the output header.
+ * @param lookup_table: a huffman lookup table.
+ * @param huffman_rep: a run length encoded huffman tree.
+ * @extra_bits: extra bits associated with the corresponding spot in huffman_rep
+ * @param huffman_rep_length: the length of huffman_rep.
+ * @param end_of_block: Value determining whether end of block header is produced or not;
+ * 0 corresponds to not end of block and all other inputs correspond to end of block.
+ * @param hclen: Length of huffman code for huffman codes minus 4.
+ * @param hlit: Length of literal/length table minus 257.
+ * @parm hdist: Length of distance table minus 1.
+ */
+static int create_huffman_header(struct BitBuf2 *header_bitbuf,
+ struct huff_code *lookup_table,
+ struct rl_code *huffman_rep,
+ uint16_t huffman_rep_length, uint32_t end_of_block,
+ uint32_t hclen, uint32_t hlit, uint32_t hdist)
+{
+ /* hlit, hdist, hclen are as defined in the deflate standard, head is the
+ * first three deflate header bits.*/
+ int i;
+ uint64_t bit_count;
+ uint64_t data;
+ struct huff_code huffman_value;
+ const uint32_t extra_bits[3] = { 2, 3, 7 };
+
+ bit_count = buffer_bits_used(header_bitbuf);
+
+ data = (end_of_block ? 5 : 4) | (hlit << 3) | (hdist << 8) | (hclen << 13);
+ data |= ((lookup_table[code_length_code_order[0]].length) << DYN_HDR_START_LEN);
+ write_bits(header_bitbuf, data, DYN_HDR_START_LEN + 3);
+ data = 0;
+ for (i = hclen + 3; i >= 1; i--)
+ data = (data << 3) | lookup_table[code_length_code_order[i]].length;
+
+ write_bits(header_bitbuf, data, (hclen + 3) * 3);
+
+ for (i = 0; i < huffman_rep_length; i++) {
+ huffman_value = lookup_table[huffman_rep[i].code];
+
+ write_bits(header_bitbuf, (uint64_t) huffman_value.code,
+ (uint32_t) huffman_value.length);
+
+ if (huffman_rep[i].code > 15) {
+ write_bits(header_bitbuf, (uint64_t) huffman_rep[i].extra_bits,
+ (uint32_t) extra_bits[huffman_rep[i].code - 16]);
+ }
+ }
+ bit_count = buffer_bits_used(header_bitbuf) - bit_count;
+
+ return bit_count;
+}
+
+/**
+ * @brief Creates the dynamic huffman deflate header.
+ * @returns Returns the length of header in bits.
+ * @requires This function requires header is large enough to store the whole header.
+ * @param header: The output header.
+ * @param lit_huff_table: A literal/length code huffman lookup table.\
+ * @param dist_huff_table: A distance huffman code lookup table.
+ * @param end_of_block: Value determining whether end of block header is produced or not;
+ * 0 corresponds to not end of block and all other inputs correspond to end of block.
+ */
+static inline int create_header(struct BitBuf2 *header_bitbuf, struct rl_code *huffman_rep,
+ uint32_t length, uint64_t * histogram, uint32_t hlit,
+ uint32_t hdist, uint32_t end_of_block)
+{
+ int i;
+
+ uint32_t heap_size;
+ struct heap_tree heap_space;
+ uint32_t code_len_count[MAX_HUFF_TREE_DEPTH + 1];
+ struct huff_code lookup_table[HUFF_LEN];
+
+ /* hlit, hdist, and hclen are defined in RFC 1951 page 13 */
+ uint32_t hclen;
+ uint64_t bit_count;
+
+ /* Create a huffman tree to encode run length encoded representation. */
+ heap_size = init_heap64(&heap_space, histogram, HUFF_LEN);
+ gen_huff_code_lens(&heap_space, heap_size, code_len_count,
+ (struct huff_code *)lookup_table, HUFF_LEN, 7);
+ set_huff_codes(lookup_table, HUFF_LEN, code_len_count);
+
+ /* Calculate hclen */
+ for (i = CODE_LEN_CODES - 1; i > 3; i--) /* i must be at least 4 */
+ if (lookup_table[code_length_code_order[i]].length != 0)
+ break;
+
+ hclen = i - 3;
+
+ /* Generate actual header. */
+ bit_count = create_huffman_header(header_bitbuf, lookup_table, huffman_rep,
+ length, end_of_block, hclen, hlit, hdist);
+
+ return bit_count;
+}
+
+static inline
+ struct rl_code *write_rl(struct rl_code *pout, uint16_t last_len, uint32_t run_len,
+ uint64_t * counts)
+{
+ if (last_len == 0) {
+ while (run_len > 138) {
+ pout->code = 18;
+ pout->extra_bits = 138 - 11;
+ pout++;
+ run_len -= 138;
+ counts[18]++;
+ }
+ // 1 <= run_len <= 138
+ if (run_len > 10) {
+ pout->code = 18;
+ pout->extra_bits = run_len - 11;
+ pout++;
+ counts[18]++;
+ } else if (run_len > 2) {
+ pout->code = 17;
+ pout->extra_bits = run_len - 3;
+ pout++;
+ counts[17]++;
+ } else if (run_len == 1) {
+ pout->code = 0;
+ pout->extra_bits = 0;
+ pout++;
+ counts[0]++;
+ } else {
+ assert(run_len == 2);
+ pout[0].code = 0;
+ pout[0].extra_bits = 0;
+ pout[1].code = 0;
+ pout[1].extra_bits = 0;
+ pout += 2;
+ counts[0] += 2;
+ }
+ } else {
+ // last_len != 0
+ pout->code = last_len;
+ pout->extra_bits = 0;
+ pout++;
+ counts[last_len]++;
+ run_len--;
+ if (run_len != 0) {
+ while (run_len > 6) {
+ pout->code = 16;
+ pout->extra_bits = 6 - 3;
+ pout++;
+ run_len -= 6;
+ counts[16]++;
+ }
+ // 1 <= run_len <= 6
+ switch (run_len) {
+ case 1:
+ pout->code = last_len;
+ pout->extra_bits = 0;
+ pout++;
+ counts[last_len]++;
+ break;
+ case 2:
+ pout[0].code = last_len;
+ pout[0].extra_bits = 0;
+ pout[1].code = last_len;
+ pout[1].extra_bits = 0;
+ pout += 2;
+ counts[last_len] += 2;
+ break;
+ default: // 3...6
+ pout->code = 16;
+ pout->extra_bits = run_len - 3;
+ pout++;
+ counts[16]++;
+ }
+ }
+ }
+ return pout;
+}
+
+// convert codes into run-length symbols, write symbols into OUT
+// generate histogram into COUNTS (assumed to be initialized to 0)
+// Format of OUT:
+// 4:0 code (0...18)
+// 15:8 Extra bits (0...127)
+// returns number of symbols in out
+static inline uint32_t rl_encode(uint16_t * codes, uint32_t num_codes, uint64_t * counts,
+ struct rl_code *out)
+{
+ uint32_t i, run_len;
+ uint16_t last_len, len;
+ struct rl_code *pout;
+
+ pout = out;
+ last_len = codes[0];
+ run_len = 1;
+ for (i = 1; i < num_codes; i++) {
+ len = codes[i];
+ if (len == last_len) {
+ run_len++;
+ continue;
+ }
+ pout = write_rl(pout, last_len, run_len, counts);
+ last_len = len;
+ run_len = 1;
+ }
+ pout = write_rl(pout, last_len, run_len, counts);
+
+ return (uint32_t) (pout - out);
+}
+
+/**
+ * @brief Creates a two table representation of huffman codes.
+ * @param code_table: output table containing the code
+ * @param code_size_table: output table containing the code length
+ * @param length: the lenght of hufftable
+ * @param hufftable: a huffman lookup table
+ */
+static void create_code_tables(uint16_t * code_table, uint8_t * code_length_table,
+ uint32_t length, struct huff_code *hufftable)
+{
+ int i;
+ for (i = 0; i < length; i++) {
+ code_table[i] = hufftable[i].code;
+ code_length_table[i] = hufftable[i].length;
+ }
+}
+
+/**
+ * @brief Creates a packed representation of length huffman codes.
+ * @details In packed_table, bits 32:8 contain the extra bits appended to the huffman
+ * code and bits 8:0 contain the code length.
+ * @param packed_table: the output table
+ * @param length: the length of lit_len_hufftable
+ * @param lit_len_hufftable: a literal/length huffman lookup table
+ */
+static void create_packed_len_table(uint32_t * packed_table,
+ struct huff_code *lit_len_hufftable)
+{
+ int i, count = 0;
+ uint16_t extra_bits;
+ uint16_t extra_bits_count = 0;
+
+ /* Gain extra bits is the next place where the number of extra bits in
+ * lenght codes increases. */
+ uint16_t gain_extra_bits = LEN_EXTRA_BITS_START;
+
+ for (i = 257; i < LIT_LEN - 1; i++) {
+ for (extra_bits = 0; extra_bits < (1 << extra_bits_count); extra_bits++) {
+ if (count > 254)
+ break;
+ packed_table[count++] =
+ (extra_bits << (lit_len_hufftable[i].length + LENGTH_BITS)) |
+ (lit_len_hufftable[i].code << LENGTH_BITS) |
+ (lit_len_hufftable[i].length + extra_bits_count);
+ }
+
+ if (i == gain_extra_bits) {
+ gain_extra_bits += LEN_EXTRA_BITS_INTERVAL;
+ extra_bits_count += 1;
+ }
+ }
+
+ packed_table[count] = (lit_len_hufftable[LIT_LEN - 1].code << LENGTH_BITS) |
+ (lit_len_hufftable[LIT_LEN - 1].length);
+}
+
+/**
+ * @brief Creates a packed representation of distance huffman codes.
+ * @details In packed_table, bits 32:8 contain the extra bits appended to the huffman
+ * code and bits 8:0 contain the code length.
+ * @param packed_table: the output table
+ * @param length: the length of lit_len_hufftable
+ * @param dist_hufftable: a distance huffman lookup table
+ */
+static void create_packed_dist_table(uint32_t * packed_table, uint32_t length,
+ struct huff_code *dist_hufftable)
+{
+ int i, count = 0;
+ uint16_t extra_bits;
+ uint16_t extra_bits_count = 0;
+
+ /* Gain extra bits is the next place where the number of extra bits in
+ * distance codes increases. */
+ uint16_t gain_extra_bits = DIST_EXTRA_BITS_START;
+
+ for (i = 0; i < DIST_LEN; i++) {
+ for (extra_bits = 0; extra_bits < (1 << extra_bits_count); extra_bits++) {
+ if (count >= length)
+ return;
+
+ packed_table[count++] =
+ (extra_bits << (dist_hufftable[i].length + LENGTH_BITS)) |
+ (dist_hufftable[i].code << LENGTH_BITS) |
+ (dist_hufftable[i].length + extra_bits_count);
+
+ }
+
+ if (i == gain_extra_bits) {
+ gain_extra_bits += DIST_EXTRA_BITS_INTERVAL;
+ extra_bits_count += 1;
+ }
+ }
+}
+
+/**
+ * @brief Checks to see if the hufftable is usable by igzip
+ *
+ * @param lit_len_hufftable: literal/length huffman code
+ * @param dist_hufftable: distance huffman code
+ * @returns Returns 0 if the table is usable
+ */
+static int are_hufftables_useable(struct huff_code *lit_len_hufftable,
+ struct huff_code *dist_hufftable)
+{
+ int max_lit_code_len = 0, max_len_code_len = 0, max_dist_code_len = 0;
+ int dist_extra_bits = 0, len_extra_bits = 0;
+ int gain_dist_extra_bits = DIST_EXTRA_BITS_START;
+ int gain_len_extra_bits = LEN_EXTRA_BITS_START;
+ int max_code_len;
+ int i;
+
+ for (i = 0; i < LIT_LEN; i++)
+ if (lit_len_hufftable[i].length > max_lit_code_len)
+ max_lit_code_len = lit_len_hufftable[i].length;
+
+ for (i = 257; i < LIT_LEN - 1; i++) {
+ if (lit_len_hufftable[i].length + len_extra_bits > max_len_code_len)
+ max_len_code_len = lit_len_hufftable[i].length + len_extra_bits;
+
+ if (i == gain_len_extra_bits) {
+ gain_len_extra_bits += LEN_EXTRA_BITS_INTERVAL;
+ len_extra_bits += 1;
+ }
+ }
+
+ for (i = 0; i < DIST_LEN; i++) {
+ if (dist_hufftable[i].length + dist_extra_bits > max_dist_code_len)
+ max_dist_code_len = dist_hufftable[i].length + dist_extra_bits;
+
+ if (i == gain_dist_extra_bits) {
+ gain_dist_extra_bits += DIST_EXTRA_BITS_INTERVAL;
+ dist_extra_bits += 1;
+ }
+ }
+
+ max_code_len = max_lit_code_len + max_len_code_len + max_dist_code_len;
+
+ /* Some versions of igzip can write upto one literal, one length and one
+ * distance code at the same time. This checks to make sure that is
+ * always writeable in bitbuf*/
+ return (max_code_len > MAX_BITBUF_BIT_WRITE);
+}
+
+int isal_create_hufftables(struct isal_hufftables *hufftables,
+ struct isal_huff_histogram *histogram)
+{
+ struct huff_code lit_huff_table[LIT_LEN], dist_huff_table[DIST_LEN];
+ uint64_t bit_count;
+ int max_dist = convert_dist_to_dist_sym(IGZIP_HIST_SIZE);
+ struct heap_tree heap_space;
+ uint32_t heap_size;
+ uint32_t code_len_count[MAX_HUFF_TREE_DEPTH + 1];
+ struct BitBuf2 header_bitbuf;
+ uint32_t max_lit_len_sym;
+ uint32_t max_dist_sym;
+ uint32_t hlit, hdist, i;
+ uint16_t combined_table[LIT_LEN + DIST_LEN];
+ uint64_t count_histogram[HUFF_LEN];
+ struct rl_code rl_huff[LIT_LEN + DIST_LEN];
+ uint32_t rl_huff_len;
+
+ uint32_t *dist_table = hufftables->dist_table;
+ uint32_t *len_table = hufftables->len_table;
+ uint16_t *lit_table = hufftables->lit_table;
+ uint16_t *dcodes = hufftables->dcodes;
+ uint8_t *lit_table_sizes = hufftables->lit_table_sizes;
+ uint8_t *dcodes_sizes = hufftables->dcodes_sizes;
+ uint64_t *lit_len_histogram = histogram->lit_len_histogram;
+ uint64_t *dist_histogram = histogram->dist_histogram;
+
+ memset(hufftables, 0, sizeof(struct isal_hufftables));
+
+ heap_size = init_heap64_complete(&heap_space, lit_len_histogram, LIT_LEN);
+ gen_huff_code_lens(&heap_space, heap_size, code_len_count,
+ (struct huff_code *)lit_huff_table, LIT_LEN, MAX_DEFLATE_CODE_LEN);
+ max_lit_len_sym = set_huff_codes(lit_huff_table, LIT_LEN, code_len_count);
+
+ heap_size = init_heap64_complete(&heap_space, dist_histogram, DIST_LEN);
+ gen_huff_code_lens(&heap_space, heap_size, code_len_count,
+ (struct huff_code *)dist_huff_table, max_dist,
+ MAX_DEFLATE_CODE_LEN);
+ max_dist_sym = set_huff_codes(dist_huff_table, DIST_LEN, code_len_count);
+
+ if (are_hufftables_useable(lit_huff_table, dist_huff_table)) {
+ heap_size = init_heap64_complete(&heap_space, lit_len_histogram, LIT_LEN);
+ gen_huff_code_lens(&heap_space, heap_size, code_len_count,
+ (struct huff_code *)lit_huff_table, LIT_LEN,
+ MAX_SAFE_LIT_CODE_LEN);
+ max_lit_len_sym = set_huff_codes(lit_huff_table, LIT_LEN, code_len_count);
+
+ heap_size = init_heap64_complete(&heap_space, dist_histogram, DIST_LEN);
+ gen_huff_code_lens(&heap_space, heap_size, code_len_count,
+ (struct huff_code *)dist_huff_table, max_dist,
+ MAX_SAFE_DIST_CODE_LEN);
+ max_dist_sym = set_huff_codes(dist_huff_table, DIST_LEN, code_len_count);
+
+ }
+
+ create_code_tables(dcodes, dcodes_sizes, DIST_LEN - DCODE_OFFSET,
+ dist_huff_table + DCODE_OFFSET);
+
+ create_code_tables(lit_table, lit_table_sizes, IGZIP_LIT_TABLE_SIZE, lit_huff_table);
+
+ create_packed_len_table(len_table, lit_huff_table);
+ create_packed_dist_table(dist_table, IGZIP_DIST_TABLE_SIZE, dist_huff_table);
+
+ set_buf(&header_bitbuf, hufftables->deflate_hdr, sizeof(hufftables->deflate_hdr));
+ init(&header_bitbuf);
+
+ hlit = max_lit_len_sym - 256;
+ hdist = max_dist_sym;
+
+ /* Run length encode the length and distance huffman codes */
+ memset(count_histogram, 0, sizeof(count_histogram));
+ for (i = 0; i < 257 + hlit; i++)
+ combined_table[i] = lit_huff_table[i].length;
+ for (i = 0; i < 1 + hdist; i++)
+ combined_table[i + hlit + 257] = dist_huff_table[i].length;
+ rl_huff_len =
+ rl_encode(combined_table, hlit + 257 + hdist + 1, count_histogram, rl_huff);
+
+ /* Create header */
+ bit_count =
+ create_header(&header_bitbuf, rl_huff, rl_huff_len,
+ count_histogram, hlit, hdist, LAST_BLOCK);
+ flush(&header_bitbuf);
+
+ hufftables->deflate_hdr_count = bit_count / 8;
+ hufftables->deflate_hdr_extra_bits = bit_count % 8;
+
+ return 0;
+}
+
+int isal_create_hufftables_subset(struct isal_hufftables *hufftables,
+ struct isal_huff_histogram *histogram)
+{
+ struct huff_code lit_huff_table[LIT_LEN], dist_huff_table[DIST_LEN];
+ uint64_t bit_count;
+ int max_dist = convert_dist_to_dist_sym(IGZIP_HIST_SIZE);
+ struct heap_tree heap_space;
+ uint32_t heap_size;
+ uint32_t code_len_count[MAX_HUFF_TREE_DEPTH + 1];
+ struct BitBuf2 header_bitbuf;
+ uint32_t max_lit_len_sym;
+ uint32_t max_dist_sym;
+ uint32_t hlit, hdist, i;
+ uint16_t combined_table[LIT_LEN + DIST_LEN];
+ uint64_t count_histogram[HUFF_LEN];
+ struct rl_code rl_huff[LIT_LEN + DIST_LEN];
+ uint32_t rl_huff_len;
+
+ uint32_t *dist_table = hufftables->dist_table;
+ uint32_t *len_table = hufftables->len_table;
+ uint16_t *lit_table = hufftables->lit_table;
+ uint16_t *dcodes = hufftables->dcodes;
+ uint8_t *lit_table_sizes = hufftables->lit_table_sizes;
+ uint8_t *dcodes_sizes = hufftables->dcodes_sizes;
+ uint64_t *lit_len_histogram = histogram->lit_len_histogram;
+ uint64_t *dist_histogram = histogram->dist_histogram;
+
+ memset(hufftables, 0, sizeof(struct isal_hufftables));
+
+ heap_size =
+ init_heap64_semi_complete(&heap_space, lit_len_histogram, LIT_LEN,
+ ISAL_DEF_LIT_SYMBOLS);
+ gen_huff_code_lens(&heap_space, heap_size, code_len_count,
+ (struct huff_code *)lit_huff_table, LIT_LEN, MAX_DEFLATE_CODE_LEN);
+ max_lit_len_sym = set_huff_codes(lit_huff_table, LIT_LEN, code_len_count);
+
+ heap_size = init_heap64_complete(&heap_space, dist_histogram, DIST_LEN);
+ gen_huff_code_lens(&heap_space, heap_size, code_len_count,
+ (struct huff_code *)dist_huff_table, max_dist,
+ MAX_DEFLATE_CODE_LEN);
+ max_dist_sym = set_huff_codes(dist_huff_table, DIST_LEN, code_len_count);
+
+ if (are_hufftables_useable(lit_huff_table, dist_huff_table)) {
+ heap_size = init_heap64_complete(&heap_space, lit_len_histogram, LIT_LEN);
+ gen_huff_code_lens(&heap_space, heap_size, code_len_count,
+ (struct huff_code *)lit_huff_table, LIT_LEN,
+ MAX_SAFE_LIT_CODE_LEN);
+ max_lit_len_sym = set_huff_codes(lit_huff_table, LIT_LEN, code_len_count);
+
+ heap_size = init_heap64_complete(&heap_space, dist_histogram, DIST_LEN);
+ gen_huff_code_lens(&heap_space, heap_size, code_len_count,
+ (struct huff_code *)dist_huff_table, max_dist,
+ MAX_SAFE_DIST_CODE_LEN);
+ max_dist_sym = set_huff_codes(dist_huff_table, DIST_LEN, code_len_count);
+
+ }
+
+ create_code_tables(dcodes, dcodes_sizes, DIST_LEN - DCODE_OFFSET,
+ dist_huff_table + DCODE_OFFSET);
+
+ create_code_tables(lit_table, lit_table_sizes, IGZIP_LIT_TABLE_SIZE, lit_huff_table);
+
+ create_packed_len_table(len_table, lit_huff_table);
+ create_packed_dist_table(dist_table, IGZIP_DIST_TABLE_SIZE, dist_huff_table);
+
+ set_buf(&header_bitbuf, hufftables->deflate_hdr, sizeof(hufftables->deflate_hdr));
+ init(&header_bitbuf);
+
+ hlit = max_lit_len_sym - 256;
+ hdist = max_dist_sym;
+
+ /* Run length encode the length and distance huffman codes */
+ memset(count_histogram, 0, sizeof(count_histogram));
+ for (i = 0; i < 257 + hlit; i++)
+ combined_table[i] = lit_huff_table[i].length;
+ for (i = 0; i < 1 + hdist; i++)
+ combined_table[i + hlit + 257] = dist_huff_table[i].length;
+ rl_huff_len =
+ rl_encode(combined_table, hlit + 257 + hdist + 1, count_histogram, rl_huff);
+
+ /* Create header */
+ bit_count =
+ create_header(&header_bitbuf, rl_huff, rl_huff_len,
+ count_histogram, hlit, hdist, LAST_BLOCK);
+ flush(&header_bitbuf);
+
+ hufftables->deflate_hdr_count = bit_count / 8;
+ hufftables->deflate_hdr_extra_bits = bit_count % 8;
+
+ return 0;
+}
+
+static void expand_hufftables_icf(struct hufftables_icf *hufftables)
+{
+ uint32_t i, eb, j, k, len, code;
+ struct huff_code orig[21], *p_code;
+ struct huff_code *lit_len_codes = hufftables->lit_len_table;
+ struct huff_code *dist_codes = hufftables->dist_table;
+
+ for (i = 0; i < 21; i++)
+ orig[i] = lit_len_codes[i + 265];
+
+ p_code = &lit_len_codes[265];
+
+ i = 0;
+ for (eb = 1; eb < 6; eb++) {
+ for (k = 0; k < 4; k++) {
+ len = orig[i].length;
+ code = orig[i++].code;
+ for (j = 0; j < (1u << eb); j++) {
+ p_code->code_and_extra = code | (j << len);
+ p_code->length = len + eb;
+ p_code++;
+ }
+ } // end for k
+ } // end for eb
+ // fix up last record
+ p_code[-1] = orig[i];
+
+ dist_codes[DIST_LEN].code_and_extra = 0;
+ dist_codes[DIST_LEN].length = 0;
+}
+
+uint64_t
+create_hufftables_icf(struct BitBuf2 *bb, struct hufftables_icf *hufftables,
+ struct isal_mod_hist *hist, uint32_t end_of_block)
+{
+ uint32_t bl_count[MAX_DEFLATE_CODE_LEN + 1];
+ uint32_t max_ll_code, max_d_code;
+ struct heap_tree heap_space;
+ uint32_t heap_size;
+ struct rl_code cl_tokens[LIT_LEN + DIST_LEN];
+ uint32_t num_cl_tokens;
+ uint64_t cl_counts[CODE_LEN_CODES];
+ uint16_t combined_table[LIT_LEN + DIST_LEN];
+ int i;
+ uint64_t compressed_len = 0;
+ uint64_t static_compressed_len = 3; /* The static header size */
+ struct BitBuf2 bb_tmp;
+
+ struct huff_code *ll_codes = hufftables->lit_len_table;
+ struct huff_code *d_codes = hufftables->dist_table;
+ uint32_t *ll_hist = hist->ll_hist;
+ uint32_t *d_hist = hist->d_hist;
+ struct huff_code *static_ll_codes = static_hufftables.lit_len_table;
+ struct huff_code *static_d_codes = static_hufftables.dist_table;
+
+ memcpy(&bb_tmp, bb, sizeof(struct BitBuf2));
+
+ flatten_ll(hist->ll_hist);
+
+ // make sure EOB is present
+ if (ll_hist[256] == 0)
+ ll_hist[256] = 1;
+
+ heap_size = init_heap32(&heap_space, ll_hist, LIT_LEN);
+ gen_huff_code_lens(&heap_space, heap_size, bl_count,
+ ll_codes, LIT_LEN, MAX_DEFLATE_CODE_LEN);
+ max_ll_code = set_huff_codes(ll_codes, LIT_LEN, bl_count);
+
+ heap_size = init_heap32(&heap_space, d_hist, DIST_LEN);
+ gen_huff_code_lens(&heap_space, heap_size, bl_count, d_codes,
+ DIST_LEN, MAX_DEFLATE_CODE_LEN);
+ max_d_code = set_dist_huff_codes(d_codes, bl_count);
+
+ assert(max_ll_code >= 256); // must be EOB code
+ assert(max_d_code != 0);
+
+ /* Run length encode the length and distance huffman codes */
+ memset(cl_counts, 0, sizeof(cl_counts));
+
+ for (i = 0; i <= 256; i++) {
+ combined_table[i] = ll_codes[i].length;
+ compressed_len += ll_codes[i].length * ll_hist[i];
+ static_compressed_len += static_ll_codes[i].length * ll_hist[i];
+ }
+
+ for (; i < max_ll_code + 1; i++) {
+ combined_table[i] = ll_codes[i].length;
+ compressed_len +=
+ (ll_codes[i].length + len_code_extra_bits[i - 257]) * ll_hist[i];
+ static_compressed_len +=
+ (static_ll_codes[i].length + len_code_extra_bits[i - 257]) * ll_hist[i];
+ }
+
+ for (i = 0; i < max_d_code + 1; i++) {
+ combined_table[i + max_ll_code + 1] = d_codes[i].length;
+ compressed_len += (d_codes[i].length + dist_code_extra_bits[i]) * d_hist[i];
+ static_compressed_len +=
+ (static_d_codes[i].length + dist_code_extra_bits[i]) * d_hist[i];
+ }
+
+ if (static_compressed_len > compressed_len) {
+ num_cl_tokens = rl_encode(combined_table, max_ll_code + max_d_code + 2,
+ cl_counts, cl_tokens);
+
+ /* Create header */
+ create_header(bb, cl_tokens, num_cl_tokens, cl_counts, max_ll_code - 256,
+ max_d_code, end_of_block);
+ compressed_len += 8 * buffer_used(bb) + bb->m_bit_count;
+ }
+
+ /* Substitute in static block since it creates smaller block */
+ if (static_compressed_len <= compressed_len) {
+ memcpy(hufftables, &static_hufftables, sizeof(struct hufftables_icf));
+ memcpy(bb, &bb_tmp, sizeof(struct BitBuf2));
+ end_of_block = end_of_block ? 1 : 0;
+ write_bits(bb, 0x2 | end_of_block, 3);
+ compressed_len = static_compressed_len;
+ }
+
+ expand_hufftables_icf(hufftables);
+ return compressed_len;
+}
diff --git a/src/isa-l/igzip/huff_codes.h b/src/isa-l/igzip/huff_codes.h
new file mode 100644
index 000000000..d353d27ea
--- /dev/null
+++ b/src/isa-l/igzip/huff_codes.h
@@ -0,0 +1,170 @@
+/**********************************************************************
+ Copyright(c) 2011-2016 Intel Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+#ifndef HUFF_CODES_H
+#define HUFF_CODES_H
+
+#include <stdint.h>
+#include <string.h>
+#include <assert.h>
+#include "igzip_lib.h"
+#include "bitbuf2.h"
+
+#if __x86_64__ || __i386__ || _M_X64 || _M_IX86
+# include <immintrin.h>
+#ifdef _MSC_VER
+# include <intrin.h>
+#else
+# include <x86intrin.h>
+#endif
+#endif //__x86_64__ || __i386__ || _M_X64 || _M_IX86
+
+#define LIT_LEN ISAL_DEF_LIT_LEN_SYMBOLS
+#define DIST_LEN ISAL_DEF_DIST_SYMBOLS
+#define CODE_LEN_CODES 19
+#define HUFF_LEN 19
+#ifdef LONGER_HUFFTABLE
+# define DCODE_OFFSET 26
+#else
+# define DCODE_OFFSET 0
+#endif
+#define DYN_HDR_START_LEN 17
+#define MAX_HISTHEAP_SIZE LIT_LEN
+#define MAX_HUFF_TREE_DEPTH 15
+#define D IGZIP_HIST_SIZE /* Amount of history */
+
+#define MAX_DEFLATE_CODE_LEN 15
+#define MAX_SAFE_LIT_CODE_LEN 13
+#define MAX_SAFE_DIST_CODE_LEN 12
+
+#define LONG_DIST_TABLE_SIZE 8192
+#define SHORT_DIST_TABLE_SIZE 2
+#define LEN_TABLE_SIZE 256
+#define LIT_TABLE_SIZE 257
+#define LAST_BLOCK 1
+
+#define LEN_EXTRA_BITS_START 264
+#define LEN_EXTRA_BITS_INTERVAL 4
+#define DIST_EXTRA_BITS_START 3
+#define DIST_EXTRA_BITS_INTERVAL 2
+
+#define INVALID_LIT_LEN_HUFFCODE 1
+#define INVALID_DIST_HUFFCODE 1
+#define INVALID_HUFFCODE 1
+
+#define HASH8K_HASH_MASK (IGZIP_HASH8K_HASH_SIZE - 1)
+#define HASH_HIST_HASH_MASK (IGZIP_HASH_HIST_SIZE - 1)
+#define HASH_MAP_HASH_MASK (IGZIP_HASH_MAP_HASH_SIZE - 1)
+
+#define LVL0_HASH_MASK (IGZIP_LVL0_HASH_SIZE - 1)
+#define LVL1_HASH_MASK (IGZIP_LVL1_HASH_SIZE - 1)
+#define LVL2_HASH_MASK (IGZIP_LVL2_HASH_SIZE - 1)
+#define LVL3_HASH_MASK (IGZIP_LVL3_HASH_SIZE - 1)
+#define SHORTEST_MATCH 4
+
+#define LENGTH_BITS 5
+#define FREQ_SHIFT 16
+#define FREQ_MASK_HI (0xFFFFFFFFFFFF0000)
+#define DEPTH_SHIFT 24
+#define DEPTH_MASK 0x7F
+#define DEPTH_MASK_HI (DEPTH_MASK << DEPTH_SHIFT)
+#define DEPTH_1 (1 << DEPTH_SHIFT)
+#define HEAP_TREE_SIZE (3*MAX_HISTHEAP_SIZE + 1)
+#define HEAP_TREE_NODE_START (HEAP_TREE_SIZE-1)
+#define MAX_BL_CODE_LEN 7
+
+/**
+ * @brief Structure used to store huffman codes
+ */
+struct huff_code {
+ union {
+ struct {
+ uint32_t code_and_extra:24;
+ uint32_t length2:8;
+ };
+
+ struct {
+ uint16_t code;
+ uint8_t extra_bit_count;
+ uint8_t length;
+ };
+
+ uint32_t code_and_length;
+ };
+};
+
+struct tree_node {
+ uint32_t child;
+ uint32_t depth;
+};
+
+struct heap_tree {
+ union {
+ uint64_t heap[HEAP_TREE_SIZE];
+ uint64_t code_len_count[MAX_HUFF_TREE_DEPTH + 1];
+ struct tree_node tree[HEAP_TREE_SIZE];
+ };
+};
+
+struct rl_code {
+ uint8_t code;
+ uint8_t extra_bits;
+};
+
+struct hufftables_icf {
+ union {
+ struct {
+ struct huff_code dist_lit_table[288];
+ struct huff_code len_table[256];
+ };
+
+ struct {
+ struct huff_code dist_table[31];
+ struct huff_code lit_len_table[513];
+ };
+ };
+};
+
+/**
+ * @brief Creates a representation of the huffman code from a histogram used to
+ * decompress the intermediate compression format.
+ *
+ * @param bb: bitbuf structure where the header huffman code header is written
+ * @param hufftables: output huffman code representation
+ * @param hist: histogram used to generat huffman code
+ * @param end_of_block: flag whether this is the final huffman code
+ *
+ * @returns Returns the length in bits of the block with histogram hist encoded
+ * with the set hufftable
+ */
+uint64_t
+create_hufftables_icf(struct BitBuf2 *bb, struct hufftables_icf * hufftables,
+ struct isal_mod_hist *hist, uint32_t end_of_block);
+
+#endif
diff --git a/src/isa-l/igzip/huffman.asm b/src/isa-l/igzip/huffman.asm
new file mode 100644
index 000000000..9056b5ee4
--- /dev/null
+++ b/src/isa-l/igzip/huffman.asm
@@ -0,0 +1,249 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright(c) 2011-2016 Intel Corporation All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in
+; the documentation and/or other materials provided with the
+; distribution.
+; * Neither the name of Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived
+; from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+%include "options.asm"
+%include "lz0a_const.asm"
+%include "stdmac.asm"
+
+; Macros for doing Huffman Encoding
+
+%ifdef LONGER_HUFFTABLE
+ %if (D > 8192)
+ %error History D is larger than 8K, cannot use %LONGER_HUFFTABLE
+ % error
+ %else
+ %define DIST_TABLE_SIZE 8192
+ %define DECODE_OFFSET 26
+ %endif
+%else
+ %define DIST_TABLE_SIZE 2
+ %define DECODE_OFFSET 0
+%endif
+
+%define LEN_TABLE_SIZE 256
+%define LIT_TABLE_SIZE 257
+
+%define DIST_TABLE_START (ISAL_DEF_MAX_HDR_SIZE + 8)
+%define DIST_TABLE_OFFSET (DIST_TABLE_START + - 4 * 1)
+%define LEN_TABLE_OFFSET (DIST_TABLE_START + DIST_TABLE_SIZE * 4 - 4*3)
+%define LIT_TABLE_OFFSET (DIST_TABLE_START + 4 * DIST_TABLE_SIZE + 4 * LEN_TABLE_SIZE)
+%define LIT_TABLE_SIZES_OFFSET (LIT_TABLE_OFFSET + 2 * LIT_TABLE_SIZE)
+%define DCODE_TABLE_OFFSET (LIT_TABLE_SIZES_OFFSET + LIT_TABLE_SIZE + 1 - DECODE_OFFSET * 2)
+%define DCODE_TABLE_SIZE_OFFSET (DCODE_TABLE_OFFSET + 2 * 30 - DECODE_OFFSET)
+;; /** @brief Holds the huffman tree used to huffman encode the input stream **/
+;; struct isal_hufftables {
+;; // deflate huffman tree header
+;; uint8_t deflate_huff_hdr[ISAL_DEF_MAX_HDR_SIZE];
+;;
+;; //!< Number of whole bytes in deflate_huff_hdr
+;; uint32_t deflate_huff_hdr_count;
+;;
+;; //!< Number of bits in the partial byte in header
+;; uint32_t deflate_huff_hdr_extra_bits;
+;;
+;; //!< bits 7:0 are the code length, bits 31:8 are the code
+;; uint32_t dist_table[DIST_TABLE_SIZE];
+;;
+;; //!< bits 7:0 are the code length, bits 31:8 are the code
+;; uint32_t len_table[LEN_TABLE_SIZE];
+;;
+;; //!< bits 3:0 are the code length, bits 15:4 are the code
+;; uint16_t lit_table[LIT_TABLE_SIZE];
+;;
+;; //!< bits 3:0 are the code length, bits 15:4 are the code
+;; uint16_t dcodes[30 - DECODE_OFFSET];
+
+;; };
+
+
+%ifdef LONGER_HUFFTABLE
+; Uses RCX, clobbers dist
+; get_dist_code dist, code, len
+%macro get_dist_code 4
+%define %%dist %1 ; 64-bit IN
+%define %%code %2d ; 32-bit OUT
+%define %%len %3d ; 32-bit OUT
+%define %%hufftables %4 ; address of the hufftable
+
+ mov %%len, [%%hufftables + DIST_TABLE_OFFSET + 4*(%%dist + 1) ]
+ mov %%code, %%len
+ and %%len, 0x1F;
+ shr %%code, 5
+%endm
+
+%macro get_packed_dist_code 3
+%define %%dist %1 ; 64-bit IN
+%define %%code_len %2d ; 32-bit OUT
+%define %%hufftables %3 ; address of the hufftable
+ mov %%code_len, [%%hufftables + DIST_TABLE_OFFSET + 4*%%dist ]
+%endm
+
+%macro unpack_dist_code 2
+%define %%code %1d ; 32-bit OUT
+%define %%len %2d ; 32-bit OUT
+
+ mov %%len, %%code
+ and %%len, 0x1F;
+ shr %%code, 5
+%endm
+
+%else
+; Assumes (dist != 0)
+; Uses RCX, clobbers dist
+; void compute_dist_code dist, code, len
+%macro compute_dist_code 4
+%define %%dist %1 ; IN, clobbered
+%define %%distq %1
+%define %%code %2 ; OUT
+%define %%len %3 ; OUT
+%define %%hufftables %4
+
+ bsr rcx, %%dist ; ecx = msb = bsr(dist)
+ dec rcx ; ecx = num_extra_bits = msb - N
+ BZHI %%code, %%dist, rcx, %%len
+ SHRX %%dist, %%dist, rcx ; dist >>= num_extra_bits
+ lea %%dist, [%%dist + 2*rcx] ; dist = sym = dist + num_extra_bits*2
+ mov %%len, rcx ; len = num_extra_bits
+ movzx rcx, byte [hufftables + DCODE_TABLE_SIZE_OFFSET + %%distq WRT_OPT]
+ movzx %%dist, word [hufftables + DCODE_TABLE_OFFSET + 2 * %%distq WRT_OPT]
+ SHLX %%code, %%code, rcx ; code = extra_bits << (sym & 0xF)
+ or %%code, %%dist ; code = (sym >> 4) | (extra_bits << (sym & 0xF))
+ add %%len, rcx ; len = num_extra_bits + (sym & 0xF)
+%endm
+
+; Uses RCX, clobbers dist
+; get_dist_code dist, code, len
+%macro get_dist_code 4
+%define %%dist %1 ; 32-bit IN, clobbered
+%define %%distq %1 ; 64-bit IN, clobbered
+%define %%code %2 ; 32-bit OUT
+%define %%len %3 ; 32-bit OUT
+%define %%hufftables %4
+
+ cmp %%dist, DIST_TABLE_SIZE - 1
+ jg %%do_compute
+%ifndef IACA
+ mov %%len %+ d, dword [hufftables + DIST_TABLE_OFFSET + 4*(%%distq + 1) WRT_OPT]
+ mov %%code, %%len
+ and %%len, 0x1F;
+ shr %%code, 5
+ jmp %%done
+%endif
+%%do_compute:
+ compute_dist_code %%distq, %%code, %%len, %%hufftables
+%%done:
+%endm
+
+%macro get_packed_dist_code 3
+%define %%dist %1 ; 64-bit IN
+%define %%code_len %2d ; 32-bit OUT
+%define %%hufftables %3 ; address of the hufftable
+%endm
+
+%endif
+
+
+; Macros for doing Huffman Encoding
+
+; Assumes (dist != 0)
+; Uses RCX, clobbers dist
+; void compute_dist_code dist, code, len
+%macro compute_dist_icf_code 3
+%define %%dist %1 ; IN, clobbered
+%define %%distq %1
+%define %%code %2 ; OUT
+%define %%tmp1 %3
+
+ bsr rcx, %%dist ; ecx = msb = bsr(dist)
+ dec rcx ; ecx = num_extra_bits = msb - N
+ BZHI %%code, %%dist, rcx, %%tmp1
+ SHRX %%dist, %%dist, rcx ; dist >>= num_extra_bits
+ lea %%dist, [%%dist + 2*rcx] ; code = sym = dist + num_extra_bits*2
+ shl %%code, EXTRA_BITS_OFFSET - DIST_OFFSET
+ add %%code, %%dist ; code = extra_bits | sym
+
+%endm
+
+; Uses RCX, clobbers dist
+; get_dist_code dist, code, len
+%macro get_dist_icf_code 3
+%define %%dist %1 ; 32-bit IN, clobbered
+%define %%distq %1 ; 64-bit IN, clobbered
+%define %%code %2 ; 32-bit OUT
+%define %%tmp1 %3
+
+ cmp %%dist, 1
+ jg %%do_compute
+
+%ifnidn %%code, %%dist
+ mov %%code, %%dist
+%endif
+ jmp %%done
+%%do_compute:
+ compute_dist_icf_code %%distq, %%code, %%tmp1
+%%done:
+ shl %%code, DIST_OFFSET
+%endm
+
+
+; "len" can be same register as "length"
+; get_len_code length, code, len
+%macro get_len_code 4
+%define %%length %1 ; 64-bit IN
+%define %%code %2d ; 32-bit OUT
+%define %%len %3d ; 32-bit OUT
+%define %%hufftables %4
+
+ mov %%len, [%%hufftables + LEN_TABLE_OFFSET + 4 * %%length]
+ mov %%code, %%len
+ and %%len, 0x1F
+ shr %%code, 5
+%endm
+
+
+%macro get_lit_code 4
+%define %%lit %1 ; 64-bit IN or CONST
+%define %%code %2d ; 32-bit OUT
+%define %%len %3d ; 32-bit OUT
+%define %%hufftables %4
+
+ movzx %%len, byte [%%hufftables + LIT_TABLE_SIZES_OFFSET + %%lit]
+ movzx %%code, word [%%hufftables + LIT_TABLE_OFFSET + 2 * %%lit]
+
+%endm
+
+
+;; Compute hash of first 3 bytes of data
+%macro compute_hash 2
+%define %%result %1d ; 32-bit reg
+%define %%data %2d ; 32-bit reg (low byte not clobbered)
+
+ xor %%result, %%result
+ crc32 %%result, %%data
+%endm
diff --git a/src/isa-l/igzip/huffman.h b/src/isa-l/igzip/huffman.h
new file mode 100644
index 000000000..2b44b617b
--- /dev/null
+++ b/src/isa-l/igzip/huffman.h
@@ -0,0 +1,359 @@
+/**********************************************************************
+ Copyright(c) 2011-2016 Intel Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+#include <stdint.h>
+#include <stdlib.h>
+#include <assert.h>
+#include "igzip_lib.h"
+#include "unaligned.h"
+
+#if __x86_64__ || __i386__ || _M_X64 || _M_IX86
+#ifdef _MSC_VER
+# include <intrin.h>
+# define inline __inline
+#else
+# include <x86intrin.h>
+#endif
+#else
+# define inline __inline
+#endif //__x86_64__ || __i386__ || _M_X64 || _M_IX86
+
+/**
+ * @brief Calculate the bit offset of the msb.
+ * @param val 32-bit unsigned integer input
+ *
+ * @returns bit offset of msb starting at 1 for first bit
+ */
+static inline uint32_t bsr(uint32_t val)
+{
+ uint32_t msb;
+#if defined(_MSC_VER)
+ unsigned long ret = 0;
+ if (val != 0) {
+ _BitScanReverse(&ret, val);
+ msb = ret + 1;
+ }
+ else
+ msb = 0;
+#elif defined( __LZCNT__)
+ msb = 32 - __lzcnt32(val);
+#elif defined(__x86_64__) || defined(__aarch64__)
+ msb = (val == 0)? 0 : 32 - __builtin_clz(val);
+#else
+ for(msb = 0; val > 0; val >>= 1)
+ msb++;
+#endif
+ return msb;
+}
+
+static inline uint32_t tzbytecnt(uint64_t val)
+{
+ uint32_t cnt;
+
+#ifdef __BMI__
+ cnt = __tzcnt_u64(val);
+ cnt = cnt / 8;
+#elif defined(__x86_64__) || defined(__aarch64__)
+
+ cnt = (val == 0)? 64 : __builtin_ctzll(val);
+ cnt = cnt / 8;
+
+#else
+ for(cnt = 8; val > 0; val <<= 8)
+ cnt -= 1;
+#endif
+ return cnt;
+}
+
+static void compute_dist_code(struct isal_hufftables *hufftables, uint16_t dist, uint64_t *p_code, uint64_t *p_len)
+{
+ assert(dist > IGZIP_DIST_TABLE_SIZE);
+
+ dist -= 1;
+ uint32_t msb;
+ uint32_t num_extra_bits;
+ uint32_t extra_bits;
+ uint32_t sym;
+ uint32_t len;
+ uint32_t code;
+
+ msb = bsr(dist);
+ assert(msb >= 1);
+ num_extra_bits = msb - 2;
+ extra_bits = dist & ((1 << num_extra_bits) - 1);
+ dist >>= num_extra_bits;
+ sym = dist + 2 * num_extra_bits;
+ assert(sym < 30);
+ code = hufftables->dcodes[sym - IGZIP_DECODE_OFFSET];
+ len = hufftables->dcodes_sizes[sym - IGZIP_DECODE_OFFSET];
+ *p_code = code | (extra_bits << len);
+ *p_len = len + num_extra_bits;
+}
+
+static inline void get_dist_code(struct isal_hufftables *hufftables, uint32_t dist, uint64_t *code, uint64_t *len)
+{
+ if (dist < 1)
+ dist = 0;
+ assert(dist >= 1);
+ assert(dist <= 32768);
+ if (dist <= IGZIP_DIST_TABLE_SIZE) {
+ uint64_t code_len;
+ code_len = hufftables->dist_table[dist - 1];
+ *code = code_len >> 5;
+ *len = code_len & 0x1F;
+ } else {
+ compute_dist_code(hufftables, dist, code, len);
+ }
+}
+
+static inline void get_len_code(struct isal_hufftables *hufftables, uint32_t length, uint64_t *code, uint64_t *len)
+{
+ assert(length >= 3);
+ assert(length <= 258);
+
+ uint64_t code_len;
+ code_len = hufftables->len_table[length - 3];
+ *code = code_len >> 5;
+ *len = code_len & 0x1F;
+}
+
+static inline void get_lit_code(struct isal_hufftables *hufftables, uint32_t lit, uint64_t *code, uint64_t *len)
+{
+ assert(lit <= 256);
+
+ *code = hufftables->lit_table[lit];
+ *len = hufftables->lit_table_sizes[lit];
+}
+
+static void compute_dist_icf_code(uint32_t dist, uint32_t *code, uint32_t *extra_bits)
+{
+ uint32_t msb;
+ uint32_t num_extra_bits;
+
+ dist -= 1;
+ msb = bsr(dist);
+ assert(msb >= 1);
+ num_extra_bits = msb - 2;
+ *extra_bits = dist & ((1 << num_extra_bits) - 1);
+ dist >>= num_extra_bits;
+ *code = dist + 2 * num_extra_bits;
+ assert(*code < 30);
+}
+
+static inline void get_dist_icf_code(uint32_t dist, uint32_t *code, uint32_t *extra_bits)
+{
+ assert(dist >= 1);
+ assert(dist <= 32768);
+ if (dist <= 2) {
+ *code = dist - 1;
+ *extra_bits = 0;
+ } else {
+ compute_dist_icf_code(dist, code, extra_bits);
+ }
+}
+
+static inline void get_len_icf_code(uint32_t length, uint32_t *code)
+{
+ assert(length >= 3);
+ assert(length <= 258);
+
+ *code = length + 254;
+}
+
+static inline void get_lit_icf_code(uint32_t lit, uint32_t *code)
+{
+ assert(lit <= 256);
+
+ *code = lit;
+}
+
+/**
+ * @brief Returns a hash of the first 3 bytes of input data.
+ */
+static inline uint32_t compute_hash(uint32_t data)
+{
+#ifdef __SSE4_2__
+
+ return _mm_crc32_u32(0, data);
+
+#else
+ uint64_t hash;
+ /* Use multiplication to create a hash, 0xBDD06057 is a prime number */
+ hash = data;
+ hash *= 0xB2D06057;
+ hash >>= 16;
+ hash *= 0xB2D06057;
+ hash >>= 16;
+
+ return hash;
+
+#endif /* __SSE4_2__ */
+}
+
+#define PROD1 0xFFFFE84B
+#define PROD2 0xFFFF97B1
+static inline uint32_t compute_hash_mad(uint32_t data)
+{
+ int16_t data_low;
+ int16_t data_high;
+
+ data_low = data;
+ data_high = data >> 16;
+ data = PROD1 * data_low + PROD2 * data_high;
+
+ data_low = data;
+ data_high = data >> 16;
+ data = PROD1 * data_low + PROD2 * data_high;
+
+ return data;
+}
+
+static inline uint32_t compute_long_hash(uint64_t data) {
+
+ return compute_hash(data >> 32)^compute_hash(data);
+}
+
+/**
+ * @brief Returns how long str1 and str2 have the same symbols.
+ * @param str1: First input string.
+ * @param str2: Second input string.
+ * @param max_length: length of the smaller string.
+ */
+static inline int compare258(uint8_t * str1, uint8_t * str2, uint32_t max_length)
+{
+ uint32_t count;
+ uint64_t test;
+ uint64_t loop_length;
+
+ if(max_length > 258)
+ max_length = 258;
+
+ loop_length = max_length & ~0x7;
+
+ for(count = 0; count < loop_length; count += 8){
+ test = load_u64(str1);
+ test ^= load_u64(str2);
+ if(test != 0)
+ return count + tzbytecnt(test);
+ str1 += 8;
+ str2 += 8;
+ }
+
+ switch(max_length % 8){
+
+ case 7:
+ if(*str1++ != *str2++)
+ return count;
+ count++;
+ case 6:
+ if(*str1++ != *str2++)
+ return count;
+ count++;
+ case 5:
+ if(*str1++ != *str2++)
+ return count;
+ count++;
+ case 4:
+ if(*str1++ != *str2++)
+ return count;
+ count++;
+ case 3:
+ if(*str1++ != *str2++)
+ return count;
+ count++;
+ case 2:
+ if(*str1++ != *str2++)
+ return count;
+ count++;
+ case 1:
+ if(*str1 != *str2)
+ return count;
+ count++;
+ }
+
+ return count;
+}
+
+/**
+ * @brief Returns how long str1 and str2 have the same symbols.
+ * @param str1: First input string.
+ * @param str2: Second input string.
+ * @param max_length: length of the smaller string.
+ */
+static inline int compare(uint8_t * str1, uint8_t * str2, uint32_t max_length)
+{
+ uint32_t count;
+ uint64_t test;
+ uint64_t loop_length;
+
+ loop_length = max_length & ~0x7;
+
+ for(count = 0; count < loop_length; count += 8){
+ test = load_u64(str1);
+ test ^= load_u64(str2);
+ if(test != 0)
+ return count + tzbytecnt(test);
+ str1 += 8;
+ str2 += 8;
+ }
+
+ switch(max_length % 8){
+
+ case 7:
+ if(*str1++ != *str2++)
+ return count;
+ count++;
+ case 6:
+ if(*str1++ != *str2++)
+ return count;
+ count++;
+ case 5:
+ if(*str1++ != *str2++)
+ return count;
+ count++;
+ case 4:
+ if(*str1++ != *str2++)
+ return count;
+ count++;
+ case 3:
+ if(*str1++ != *str2++)
+ return count;
+ count++;
+ case 2:
+ if(*str1++ != *str2++)
+ return count;
+ count++;
+ case 1:
+ if(*str1 != *str2)
+ return count;
+ count++;
+ }
+
+ return count;
+}
diff --git a/src/isa-l/igzip/hufftables_c.c b/src/isa-l/igzip/hufftables_c.c
new file mode 100644
index 000000000..281f3e940
--- /dev/null
+++ b/src/isa-l/igzip/hufftables_c.c
@@ -0,0 +1,6742 @@
+/**********************************************************************
+ Copyright(c) 2011-2016 Intel Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+#include <stdint.h>
+#include <igzip_lib.h>
+
+#if (IGZIP_HIST_SIZE <= 8192)
+
+const uint8_t gzip_hdr[] = {
+ 0x1f, 0x8b, 0x08, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0xff
+};
+
+const uint32_t gzip_hdr_bytes = 10;
+const uint32_t gzip_trl_bytes = 8;
+
+const uint8_t zlib_hdr[] = { 0x78, 0x01 };
+
+const uint32_t zlib_hdr_bytes = 2;
+const uint32_t zlib_trl_bytes = 4;
+
+struct isal_hufftables hufftables_default = {
+
+ .deflate_hdr = {
+ 0xed, 0xf9, 0x09, 0x60, 0x54, 0xd5, 0xf9, 0x37,
+ 0x00, 0x9f, 0x90, 0x04, 0xc8, 0x40, 0x00, 0x77,
+ 0xdb, 0x5a, 0x38, 0x22, 0x4a, 0xd0, 0xc9, 0x98,
+ 0x15, 0x02, 0x20, 0x24, 0x09, 0x5b, 0x10, 0x20,
+ 0x12, 0x10, 0x77, 0x39, 0x33, 0xf7, 0xcc, 0xcc,
+ 0x25, 0x77, 0xee, 0x1d, 0xef, 0xbd, 0x37, 0xc3,
+ 0x50, 0x55, 0x5a, 0x6d, 0xb5, 0xb5, 0xad, 0x76,
+ 0xdf, 0x5b, 0xdb, 0x5a, 0x6b, 0x77, 0xdb, 0xda,
+ 0xbd, 0x56, 0x84, 0xb6, 0xda, 0x55, 0xbb, 0xef,
+ 0x2d, 0x56, 0x5b, 0xed, 0x2a, 0x56, 0xdb, 0x62,
+ 0x8b, 0xe4, 0xfb, 0x7e, 0xcf, 0x39, 0x77, 0xe6,
+ 0x24, 0x09, 0xae, 0xfd, 0xbf, 0xef, 0xff, 0xfd,
+ 0xbe, 0x22, 0x92, 0xdc, 0x7b, 0xcf, 0x79, 0xce,
+ 0xb3, 0x9f, 0xdf, 0xf3, 0x3c},
+
+ .deflate_hdr_count = 109,
+ .deflate_hdr_extra_bits = 0,
+
+ .dist_table = {
+ 0x00000fe9, 0x00003fea,
+#ifdef LONGER_HUFFTABLE
+ 0x00002fe9, 0x00007fea, 0x00001fea, 0x00005fea,
+ 0x000007e9, 0x000027e9, 0x000017ea, 0x000037ea,
+ 0x000057ea, 0x000077ea, 0x000001e8, 0x000009e8,
+ 0x000011e8, 0x000019e8, 0x000005e9, 0x00000de9,
+ 0x000015e9, 0x00001de9, 0x000025e9, 0x00002de9,
+ 0x000035e9, 0x00003de9, 0x000003e9, 0x00000be9,
+ 0x000013e9, 0x00001be9, 0x000023e9, 0x00002be9,
+ 0x000033e9, 0x00003be9, 0x00000169, 0x00000569,
+ 0x00000969, 0x00000d69, 0x00001169, 0x00001569,
+ 0x00001969, 0x00001d69, 0x00002169, 0x00002569,
+ 0x00002969, 0x00002d69, 0x00003169, 0x00003569,
+ 0x00003969, 0x00003d69, 0x00000369, 0x00000769,
+ 0x00000b69, 0x00000f69, 0x00001369, 0x00001769,
+ 0x00001b69, 0x00001f69, 0x00002369, 0x00002769,
+ 0x00002b69, 0x00002f69, 0x00003369, 0x00003769,
+ 0x00003b69, 0x00003f69, 0x00000089, 0x00000289,
+ 0x00000489, 0x00000689, 0x00000889, 0x00000a89,
+ 0x00000c89, 0x00000e89, 0x00001089, 0x00001289,
+ 0x00001489, 0x00001689, 0x00001889, 0x00001a89,
+ 0x00001c89, 0x00001e89, 0x00002089, 0x00002289,
+ 0x00002489, 0x00002689, 0x00002889, 0x00002a89,
+ 0x00002c89, 0x00002e89, 0x00003089, 0x00003289,
+ 0x00003489, 0x00003689, 0x00003889, 0x00003a89,
+ 0x00003c89, 0x00003e89, 0x000000ea, 0x000004ea,
+ 0x000008ea, 0x00000cea, 0x000010ea, 0x000014ea,
+ 0x000018ea, 0x00001cea, 0x000020ea, 0x000024ea,
+ 0x000028ea, 0x00002cea, 0x000030ea, 0x000034ea,
+ 0x000038ea, 0x00003cea, 0x000040ea, 0x000044ea,
+ 0x000048ea, 0x00004cea, 0x000050ea, 0x000054ea,
+ 0x000058ea, 0x00005cea, 0x000060ea, 0x000064ea,
+ 0x000068ea, 0x00006cea, 0x000070ea, 0x000074ea,
+ 0x000078ea, 0x00007cea, 0x0000018a, 0x0000038a,
+ 0x0000058a, 0x0000078a, 0x0000098a, 0x00000b8a,
+ 0x00000d8a, 0x00000f8a, 0x0000118a, 0x0000138a,
+ 0x0000158a, 0x0000178a, 0x0000198a, 0x00001b8a,
+ 0x00001d8a, 0x00001f8a, 0x0000218a, 0x0000238a,
+ 0x0000258a, 0x0000278a, 0x0000298a, 0x00002b8a,
+ 0x00002d8a, 0x00002f8a, 0x0000318a, 0x0000338a,
+ 0x0000358a, 0x0000378a, 0x0000398a, 0x00003b8a,
+ 0x00003d8a, 0x00003f8a, 0x0000418a, 0x0000438a,
+ 0x0000458a, 0x0000478a, 0x0000498a, 0x00004b8a,
+ 0x00004d8a, 0x00004f8a, 0x0000518a, 0x0000538a,
+ 0x0000558a, 0x0000578a, 0x0000598a, 0x00005b8a,
+ 0x00005d8a, 0x00005f8a, 0x0000618a, 0x0000638a,
+ 0x0000658a, 0x0000678a, 0x0000698a, 0x00006b8a,
+ 0x00006d8a, 0x00006f8a, 0x0000718a, 0x0000738a,
+ 0x0000758a, 0x0000778a, 0x0000798a, 0x00007b8a,
+ 0x00007d8a, 0x00007f8a, 0x0000004a, 0x0000024a,
+ 0x0000044a, 0x0000064a, 0x0000084a, 0x00000a4a,
+ 0x00000c4a, 0x00000e4a, 0x0000104a, 0x0000124a,
+ 0x0000144a, 0x0000164a, 0x0000184a, 0x00001a4a,
+ 0x00001c4a, 0x00001e4a, 0x0000204a, 0x0000224a,
+ 0x0000244a, 0x0000264a, 0x0000284a, 0x00002a4a,
+ 0x00002c4a, 0x00002e4a, 0x0000304a, 0x0000324a,
+ 0x0000344a, 0x0000364a, 0x0000384a, 0x00003a4a,
+ 0x00003c4a, 0x00003e4a, 0x0000404a, 0x0000424a,
+ 0x0000444a, 0x0000464a, 0x0000484a, 0x00004a4a,
+ 0x00004c4a, 0x00004e4a, 0x0000504a, 0x0000524a,
+ 0x0000544a, 0x0000564a, 0x0000584a, 0x00005a4a,
+ 0x00005c4a, 0x00005e4a, 0x0000604a, 0x0000624a,
+ 0x0000644a, 0x0000664a, 0x0000684a, 0x00006a4a,
+ 0x00006c4a, 0x00006e4a, 0x0000704a, 0x0000724a,
+ 0x0000744a, 0x0000764a, 0x0000784a, 0x00007a4a,
+ 0x00007c4a, 0x00007e4a, 0x0000014b, 0x0000034b,
+ 0x0000054b, 0x0000074b, 0x0000094b, 0x00000b4b,
+ 0x00000d4b, 0x00000f4b, 0x0000114b, 0x0000134b,
+ 0x0000154b, 0x0000174b, 0x0000194b, 0x00001b4b,
+ 0x00001d4b, 0x00001f4b, 0x0000214b, 0x0000234b,
+ 0x0000254b, 0x0000274b, 0x0000294b, 0x00002b4b,
+ 0x00002d4b, 0x00002f4b, 0x0000314b, 0x0000334b,
+ 0x0000354b, 0x0000374b, 0x0000394b, 0x00003b4b,
+ 0x00003d4b, 0x00003f4b, 0x0000414b, 0x0000434b,
+ 0x0000454b, 0x0000474b, 0x0000494b, 0x00004b4b,
+ 0x00004d4b, 0x00004f4b, 0x0000514b, 0x0000534b,
+ 0x0000554b, 0x0000574b, 0x0000594b, 0x00005b4b,
+ 0x00005d4b, 0x00005f4b, 0x0000614b, 0x0000634b,
+ 0x0000654b, 0x0000674b, 0x0000694b, 0x00006b4b,
+ 0x00006d4b, 0x00006f4b, 0x0000714b, 0x0000734b,
+ 0x0000754b, 0x0000774b, 0x0000794b, 0x00007b4b,
+ 0x00007d4b, 0x00007f4b, 0x0000814b, 0x0000834b,
+ 0x0000854b, 0x0000874b, 0x0000894b, 0x00008b4b,
+ 0x00008d4b, 0x00008f4b, 0x0000914b, 0x0000934b,
+ 0x0000954b, 0x0000974b, 0x0000994b, 0x00009b4b,
+ 0x00009d4b, 0x00009f4b, 0x0000a14b, 0x0000a34b,
+ 0x0000a54b, 0x0000a74b, 0x0000a94b, 0x0000ab4b,
+ 0x0000ad4b, 0x0000af4b, 0x0000b14b, 0x0000b34b,
+ 0x0000b54b, 0x0000b74b, 0x0000b94b, 0x0000bb4b,
+ 0x0000bd4b, 0x0000bf4b, 0x0000c14b, 0x0000c34b,
+ 0x0000c54b, 0x0000c74b, 0x0000c94b, 0x0000cb4b,
+ 0x0000cd4b, 0x0000cf4b, 0x0000d14b, 0x0000d34b,
+ 0x0000d54b, 0x0000d74b, 0x0000d94b, 0x0000db4b,
+ 0x0000dd4b, 0x0000df4b, 0x0000e14b, 0x0000e34b,
+ 0x0000e54b, 0x0000e74b, 0x0000e94b, 0x0000eb4b,
+ 0x0000ed4b, 0x0000ef4b, 0x0000f14b, 0x0000f34b,
+ 0x0000f54b, 0x0000f74b, 0x0000f94b, 0x0000fb4b,
+ 0x0000fd4b, 0x0000ff4b, 0x000000cb, 0x000002cb,
+ 0x000004cb, 0x000006cb, 0x000008cb, 0x00000acb,
+ 0x00000ccb, 0x00000ecb, 0x000010cb, 0x000012cb,
+ 0x000014cb, 0x000016cb, 0x000018cb, 0x00001acb,
+ 0x00001ccb, 0x00001ecb, 0x000020cb, 0x000022cb,
+ 0x000024cb, 0x000026cb, 0x000028cb, 0x00002acb,
+ 0x00002ccb, 0x00002ecb, 0x000030cb, 0x000032cb,
+ 0x000034cb, 0x000036cb, 0x000038cb, 0x00003acb,
+ 0x00003ccb, 0x00003ecb, 0x000040cb, 0x000042cb,
+ 0x000044cb, 0x000046cb, 0x000048cb, 0x00004acb,
+ 0x00004ccb, 0x00004ecb, 0x000050cb, 0x000052cb,
+ 0x000054cb, 0x000056cb, 0x000058cb, 0x00005acb,
+ 0x00005ccb, 0x00005ecb, 0x000060cb, 0x000062cb,
+ 0x000064cb, 0x000066cb, 0x000068cb, 0x00006acb,
+ 0x00006ccb, 0x00006ecb, 0x000070cb, 0x000072cb,
+ 0x000074cb, 0x000076cb, 0x000078cb, 0x00007acb,
+ 0x00007ccb, 0x00007ecb, 0x000080cb, 0x000082cb,
+ 0x000084cb, 0x000086cb, 0x000088cb, 0x00008acb,
+ 0x00008ccb, 0x00008ecb, 0x000090cb, 0x000092cb,
+ 0x000094cb, 0x000096cb, 0x000098cb, 0x00009acb,
+ 0x00009ccb, 0x00009ecb, 0x0000a0cb, 0x0000a2cb,
+ 0x0000a4cb, 0x0000a6cb, 0x0000a8cb, 0x0000aacb,
+ 0x0000accb, 0x0000aecb, 0x0000b0cb, 0x0000b2cb,
+ 0x0000b4cb, 0x0000b6cb, 0x0000b8cb, 0x0000bacb,
+ 0x0000bccb, 0x0000becb, 0x0000c0cb, 0x0000c2cb,
+ 0x0000c4cb, 0x0000c6cb, 0x0000c8cb, 0x0000cacb,
+ 0x0000cccb, 0x0000cecb, 0x0000d0cb, 0x0000d2cb,
+ 0x0000d4cb, 0x0000d6cb, 0x0000d8cb, 0x0000dacb,
+ 0x0000dccb, 0x0000decb, 0x0000e0cb, 0x0000e2cb,
+ 0x0000e4cb, 0x0000e6cb, 0x0000e8cb, 0x0000eacb,
+ 0x0000eccb, 0x0000eecb, 0x0000f0cb, 0x0000f2cb,
+ 0x0000f4cb, 0x0000f6cb, 0x0000f8cb, 0x0000facb,
+ 0x0000fccb, 0x0000fecb, 0x000001cc, 0x000003cc,
+ 0x000005cc, 0x000007cc, 0x000009cc, 0x00000bcc,
+ 0x00000dcc, 0x00000fcc, 0x000011cc, 0x000013cc,
+ 0x000015cc, 0x000017cc, 0x000019cc, 0x00001bcc,
+ 0x00001dcc, 0x00001fcc, 0x000021cc, 0x000023cc,
+ 0x000025cc, 0x000027cc, 0x000029cc, 0x00002bcc,
+ 0x00002dcc, 0x00002fcc, 0x000031cc, 0x000033cc,
+ 0x000035cc, 0x000037cc, 0x000039cc, 0x00003bcc,
+ 0x00003dcc, 0x00003fcc, 0x000041cc, 0x000043cc,
+ 0x000045cc, 0x000047cc, 0x000049cc, 0x00004bcc,
+ 0x00004dcc, 0x00004fcc, 0x000051cc, 0x000053cc,
+ 0x000055cc, 0x000057cc, 0x000059cc, 0x00005bcc,
+ 0x00005dcc, 0x00005fcc, 0x000061cc, 0x000063cc,
+ 0x000065cc, 0x000067cc, 0x000069cc, 0x00006bcc,
+ 0x00006dcc, 0x00006fcc, 0x000071cc, 0x000073cc,
+ 0x000075cc, 0x000077cc, 0x000079cc, 0x00007bcc,
+ 0x00007dcc, 0x00007fcc, 0x000081cc, 0x000083cc,
+ 0x000085cc, 0x000087cc, 0x000089cc, 0x00008bcc,
+ 0x00008dcc, 0x00008fcc, 0x000091cc, 0x000093cc,
+ 0x000095cc, 0x000097cc, 0x000099cc, 0x00009bcc,
+ 0x00009dcc, 0x00009fcc, 0x0000a1cc, 0x0000a3cc,
+ 0x0000a5cc, 0x0000a7cc, 0x0000a9cc, 0x0000abcc,
+ 0x0000adcc, 0x0000afcc, 0x0000b1cc, 0x0000b3cc,
+ 0x0000b5cc, 0x0000b7cc, 0x0000b9cc, 0x0000bbcc,
+ 0x0000bdcc, 0x0000bfcc, 0x0000c1cc, 0x0000c3cc,
+ 0x0000c5cc, 0x0000c7cc, 0x0000c9cc, 0x0000cbcc,
+ 0x0000cdcc, 0x0000cfcc, 0x0000d1cc, 0x0000d3cc,
+ 0x0000d5cc, 0x0000d7cc, 0x0000d9cc, 0x0000dbcc,
+ 0x0000ddcc, 0x0000dfcc, 0x0000e1cc, 0x0000e3cc,
+ 0x0000e5cc, 0x0000e7cc, 0x0000e9cc, 0x0000ebcc,
+ 0x0000edcc, 0x0000efcc, 0x0000f1cc, 0x0000f3cc,
+ 0x0000f5cc, 0x0000f7cc, 0x0000f9cc, 0x0000fbcc,
+ 0x0000fdcc, 0x0000ffcc, 0x000101cc, 0x000103cc,
+ 0x000105cc, 0x000107cc, 0x000109cc, 0x00010bcc,
+ 0x00010dcc, 0x00010fcc, 0x000111cc, 0x000113cc,
+ 0x000115cc, 0x000117cc, 0x000119cc, 0x00011bcc,
+ 0x00011dcc, 0x00011fcc, 0x000121cc, 0x000123cc,
+ 0x000125cc, 0x000127cc, 0x000129cc, 0x00012bcc,
+ 0x00012dcc, 0x00012fcc, 0x000131cc, 0x000133cc,
+ 0x000135cc, 0x000137cc, 0x000139cc, 0x00013bcc,
+ 0x00013dcc, 0x00013fcc, 0x000141cc, 0x000143cc,
+ 0x000145cc, 0x000147cc, 0x000149cc, 0x00014bcc,
+ 0x00014dcc, 0x00014fcc, 0x000151cc, 0x000153cc,
+ 0x000155cc, 0x000157cc, 0x000159cc, 0x00015bcc,
+ 0x00015dcc, 0x00015fcc, 0x000161cc, 0x000163cc,
+ 0x000165cc, 0x000167cc, 0x000169cc, 0x00016bcc,
+ 0x00016dcc, 0x00016fcc, 0x000171cc, 0x000173cc,
+ 0x000175cc, 0x000177cc, 0x000179cc, 0x00017bcc,
+ 0x00017dcc, 0x00017fcc, 0x000181cc, 0x000183cc,
+ 0x000185cc, 0x000187cc, 0x000189cc, 0x00018bcc,
+ 0x00018dcc, 0x00018fcc, 0x000191cc, 0x000193cc,
+ 0x000195cc, 0x000197cc, 0x000199cc, 0x00019bcc,
+ 0x00019dcc, 0x00019fcc, 0x0001a1cc, 0x0001a3cc,
+ 0x0001a5cc, 0x0001a7cc, 0x0001a9cc, 0x0001abcc,
+ 0x0001adcc, 0x0001afcc, 0x0001b1cc, 0x0001b3cc,
+ 0x0001b5cc, 0x0001b7cc, 0x0001b9cc, 0x0001bbcc,
+ 0x0001bdcc, 0x0001bfcc, 0x0001c1cc, 0x0001c3cc,
+ 0x0001c5cc, 0x0001c7cc, 0x0001c9cc, 0x0001cbcc,
+ 0x0001cdcc, 0x0001cfcc, 0x0001d1cc, 0x0001d3cc,
+ 0x0001d5cc, 0x0001d7cc, 0x0001d9cc, 0x0001dbcc,
+ 0x0001ddcc, 0x0001dfcc, 0x0001e1cc, 0x0001e3cc,
+ 0x0001e5cc, 0x0001e7cc, 0x0001e9cc, 0x0001ebcc,
+ 0x0001edcc, 0x0001efcc, 0x0001f1cc, 0x0001f3cc,
+ 0x0001f5cc, 0x0001f7cc, 0x0001f9cc, 0x0001fbcc,
+ 0x0001fdcc, 0x0001ffcc, 0x0000002c, 0x0000022c,
+ 0x0000042c, 0x0000062c, 0x0000082c, 0x00000a2c,
+ 0x00000c2c, 0x00000e2c, 0x0000102c, 0x0000122c,
+ 0x0000142c, 0x0000162c, 0x0000182c, 0x00001a2c,
+ 0x00001c2c, 0x00001e2c, 0x0000202c, 0x0000222c,
+ 0x0000242c, 0x0000262c, 0x0000282c, 0x00002a2c,
+ 0x00002c2c, 0x00002e2c, 0x0000302c, 0x0000322c,
+ 0x0000342c, 0x0000362c, 0x0000382c, 0x00003a2c,
+ 0x00003c2c, 0x00003e2c, 0x0000402c, 0x0000422c,
+ 0x0000442c, 0x0000462c, 0x0000482c, 0x00004a2c,
+ 0x00004c2c, 0x00004e2c, 0x0000502c, 0x0000522c,
+ 0x0000542c, 0x0000562c, 0x0000582c, 0x00005a2c,
+ 0x00005c2c, 0x00005e2c, 0x0000602c, 0x0000622c,
+ 0x0000642c, 0x0000662c, 0x0000682c, 0x00006a2c,
+ 0x00006c2c, 0x00006e2c, 0x0000702c, 0x0000722c,
+ 0x0000742c, 0x0000762c, 0x0000782c, 0x00007a2c,
+ 0x00007c2c, 0x00007e2c, 0x0000802c, 0x0000822c,
+ 0x0000842c, 0x0000862c, 0x0000882c, 0x00008a2c,
+ 0x00008c2c, 0x00008e2c, 0x0000902c, 0x0000922c,
+ 0x0000942c, 0x0000962c, 0x0000982c, 0x00009a2c,
+ 0x00009c2c, 0x00009e2c, 0x0000a02c, 0x0000a22c,
+ 0x0000a42c, 0x0000a62c, 0x0000a82c, 0x0000aa2c,
+ 0x0000ac2c, 0x0000ae2c, 0x0000b02c, 0x0000b22c,
+ 0x0000b42c, 0x0000b62c, 0x0000b82c, 0x0000ba2c,
+ 0x0000bc2c, 0x0000be2c, 0x0000c02c, 0x0000c22c,
+ 0x0000c42c, 0x0000c62c, 0x0000c82c, 0x0000ca2c,
+ 0x0000cc2c, 0x0000ce2c, 0x0000d02c, 0x0000d22c,
+ 0x0000d42c, 0x0000d62c, 0x0000d82c, 0x0000da2c,
+ 0x0000dc2c, 0x0000de2c, 0x0000e02c, 0x0000e22c,
+ 0x0000e42c, 0x0000e62c, 0x0000e82c, 0x0000ea2c,
+ 0x0000ec2c, 0x0000ee2c, 0x0000f02c, 0x0000f22c,
+ 0x0000f42c, 0x0000f62c, 0x0000f82c, 0x0000fa2c,
+ 0x0000fc2c, 0x0000fe2c, 0x0001002c, 0x0001022c,
+ 0x0001042c, 0x0001062c, 0x0001082c, 0x00010a2c,
+ 0x00010c2c, 0x00010e2c, 0x0001102c, 0x0001122c,
+ 0x0001142c, 0x0001162c, 0x0001182c, 0x00011a2c,
+ 0x00011c2c, 0x00011e2c, 0x0001202c, 0x0001222c,
+ 0x0001242c, 0x0001262c, 0x0001282c, 0x00012a2c,
+ 0x00012c2c, 0x00012e2c, 0x0001302c, 0x0001322c,
+ 0x0001342c, 0x0001362c, 0x0001382c, 0x00013a2c,
+ 0x00013c2c, 0x00013e2c, 0x0001402c, 0x0001422c,
+ 0x0001442c, 0x0001462c, 0x0001482c, 0x00014a2c,
+ 0x00014c2c, 0x00014e2c, 0x0001502c, 0x0001522c,
+ 0x0001542c, 0x0001562c, 0x0001582c, 0x00015a2c,
+ 0x00015c2c, 0x00015e2c, 0x0001602c, 0x0001622c,
+ 0x0001642c, 0x0001662c, 0x0001682c, 0x00016a2c,
+ 0x00016c2c, 0x00016e2c, 0x0001702c, 0x0001722c,
+ 0x0001742c, 0x0001762c, 0x0001782c, 0x00017a2c,
+ 0x00017c2c, 0x00017e2c, 0x0001802c, 0x0001822c,
+ 0x0001842c, 0x0001862c, 0x0001882c, 0x00018a2c,
+ 0x00018c2c, 0x00018e2c, 0x0001902c, 0x0001922c,
+ 0x0001942c, 0x0001962c, 0x0001982c, 0x00019a2c,
+ 0x00019c2c, 0x00019e2c, 0x0001a02c, 0x0001a22c,
+ 0x0001a42c, 0x0001a62c, 0x0001a82c, 0x0001aa2c,
+ 0x0001ac2c, 0x0001ae2c, 0x0001b02c, 0x0001b22c,
+ 0x0001b42c, 0x0001b62c, 0x0001b82c, 0x0001ba2c,
+ 0x0001bc2c, 0x0001be2c, 0x0001c02c, 0x0001c22c,
+ 0x0001c42c, 0x0001c62c, 0x0001c82c, 0x0001ca2c,
+ 0x0001cc2c, 0x0001ce2c, 0x0001d02c, 0x0001d22c,
+ 0x0001d42c, 0x0001d62c, 0x0001d82c, 0x0001da2c,
+ 0x0001dc2c, 0x0001de2c, 0x0001e02c, 0x0001e22c,
+ 0x0001e42c, 0x0001e62c, 0x0001e82c, 0x0001ea2c,
+ 0x0001ec2c, 0x0001ee2c, 0x0001f02c, 0x0001f22c,
+ 0x0001f42c, 0x0001f62c, 0x0001f82c, 0x0001fa2c,
+ 0x0001fc2c, 0x0001fe2c, 0x0000012d, 0x0000032d,
+ 0x0000052d, 0x0000072d, 0x0000092d, 0x00000b2d,
+ 0x00000d2d, 0x00000f2d, 0x0000112d, 0x0000132d,
+ 0x0000152d, 0x0000172d, 0x0000192d, 0x00001b2d,
+ 0x00001d2d, 0x00001f2d, 0x0000212d, 0x0000232d,
+ 0x0000252d, 0x0000272d, 0x0000292d, 0x00002b2d,
+ 0x00002d2d, 0x00002f2d, 0x0000312d, 0x0000332d,
+ 0x0000352d, 0x0000372d, 0x0000392d, 0x00003b2d,
+ 0x00003d2d, 0x00003f2d, 0x0000412d, 0x0000432d,
+ 0x0000452d, 0x0000472d, 0x0000492d, 0x00004b2d,
+ 0x00004d2d, 0x00004f2d, 0x0000512d, 0x0000532d,
+ 0x0000552d, 0x0000572d, 0x0000592d, 0x00005b2d,
+ 0x00005d2d, 0x00005f2d, 0x0000612d, 0x0000632d,
+ 0x0000652d, 0x0000672d, 0x0000692d, 0x00006b2d,
+ 0x00006d2d, 0x00006f2d, 0x0000712d, 0x0000732d,
+ 0x0000752d, 0x0000772d, 0x0000792d, 0x00007b2d,
+ 0x00007d2d, 0x00007f2d, 0x0000812d, 0x0000832d,
+ 0x0000852d, 0x0000872d, 0x0000892d, 0x00008b2d,
+ 0x00008d2d, 0x00008f2d, 0x0000912d, 0x0000932d,
+ 0x0000952d, 0x0000972d, 0x0000992d, 0x00009b2d,
+ 0x00009d2d, 0x00009f2d, 0x0000a12d, 0x0000a32d,
+ 0x0000a52d, 0x0000a72d, 0x0000a92d, 0x0000ab2d,
+ 0x0000ad2d, 0x0000af2d, 0x0000b12d, 0x0000b32d,
+ 0x0000b52d, 0x0000b72d, 0x0000b92d, 0x0000bb2d,
+ 0x0000bd2d, 0x0000bf2d, 0x0000c12d, 0x0000c32d,
+ 0x0000c52d, 0x0000c72d, 0x0000c92d, 0x0000cb2d,
+ 0x0000cd2d, 0x0000cf2d, 0x0000d12d, 0x0000d32d,
+ 0x0000d52d, 0x0000d72d, 0x0000d92d, 0x0000db2d,
+ 0x0000dd2d, 0x0000df2d, 0x0000e12d, 0x0000e32d,
+ 0x0000e52d, 0x0000e72d, 0x0000e92d, 0x0000eb2d,
+ 0x0000ed2d, 0x0000ef2d, 0x0000f12d, 0x0000f32d,
+ 0x0000f52d, 0x0000f72d, 0x0000f92d, 0x0000fb2d,
+ 0x0000fd2d, 0x0000ff2d, 0x0001012d, 0x0001032d,
+ 0x0001052d, 0x0001072d, 0x0001092d, 0x00010b2d,
+ 0x00010d2d, 0x00010f2d, 0x0001112d, 0x0001132d,
+ 0x0001152d, 0x0001172d, 0x0001192d, 0x00011b2d,
+ 0x00011d2d, 0x00011f2d, 0x0001212d, 0x0001232d,
+ 0x0001252d, 0x0001272d, 0x0001292d, 0x00012b2d,
+ 0x00012d2d, 0x00012f2d, 0x0001312d, 0x0001332d,
+ 0x0001352d, 0x0001372d, 0x0001392d, 0x00013b2d,
+ 0x00013d2d, 0x00013f2d, 0x0001412d, 0x0001432d,
+ 0x0001452d, 0x0001472d, 0x0001492d, 0x00014b2d,
+ 0x00014d2d, 0x00014f2d, 0x0001512d, 0x0001532d,
+ 0x0001552d, 0x0001572d, 0x0001592d, 0x00015b2d,
+ 0x00015d2d, 0x00015f2d, 0x0001612d, 0x0001632d,
+ 0x0001652d, 0x0001672d, 0x0001692d, 0x00016b2d,
+ 0x00016d2d, 0x00016f2d, 0x0001712d, 0x0001732d,
+ 0x0001752d, 0x0001772d, 0x0001792d, 0x00017b2d,
+ 0x00017d2d, 0x00017f2d, 0x0001812d, 0x0001832d,
+ 0x0001852d, 0x0001872d, 0x0001892d, 0x00018b2d,
+ 0x00018d2d, 0x00018f2d, 0x0001912d, 0x0001932d,
+ 0x0001952d, 0x0001972d, 0x0001992d, 0x00019b2d,
+ 0x00019d2d, 0x00019f2d, 0x0001a12d, 0x0001a32d,
+ 0x0001a52d, 0x0001a72d, 0x0001a92d, 0x0001ab2d,
+ 0x0001ad2d, 0x0001af2d, 0x0001b12d, 0x0001b32d,
+ 0x0001b52d, 0x0001b72d, 0x0001b92d, 0x0001bb2d,
+ 0x0001bd2d, 0x0001bf2d, 0x0001c12d, 0x0001c32d,
+ 0x0001c52d, 0x0001c72d, 0x0001c92d, 0x0001cb2d,
+ 0x0001cd2d, 0x0001cf2d, 0x0001d12d, 0x0001d32d,
+ 0x0001d52d, 0x0001d72d, 0x0001d92d, 0x0001db2d,
+ 0x0001dd2d, 0x0001df2d, 0x0001e12d, 0x0001e32d,
+ 0x0001e52d, 0x0001e72d, 0x0001e92d, 0x0001eb2d,
+ 0x0001ed2d, 0x0001ef2d, 0x0001f12d, 0x0001f32d,
+ 0x0001f52d, 0x0001f72d, 0x0001f92d, 0x0001fb2d,
+ 0x0001fd2d, 0x0001ff2d, 0x0002012d, 0x0002032d,
+ 0x0002052d, 0x0002072d, 0x0002092d, 0x00020b2d,
+ 0x00020d2d, 0x00020f2d, 0x0002112d, 0x0002132d,
+ 0x0002152d, 0x0002172d, 0x0002192d, 0x00021b2d,
+ 0x00021d2d, 0x00021f2d, 0x0002212d, 0x0002232d,
+ 0x0002252d, 0x0002272d, 0x0002292d, 0x00022b2d,
+ 0x00022d2d, 0x00022f2d, 0x0002312d, 0x0002332d,
+ 0x0002352d, 0x0002372d, 0x0002392d, 0x00023b2d,
+ 0x00023d2d, 0x00023f2d, 0x0002412d, 0x0002432d,
+ 0x0002452d, 0x0002472d, 0x0002492d, 0x00024b2d,
+ 0x00024d2d, 0x00024f2d, 0x0002512d, 0x0002532d,
+ 0x0002552d, 0x0002572d, 0x0002592d, 0x00025b2d,
+ 0x00025d2d, 0x00025f2d, 0x0002612d, 0x0002632d,
+ 0x0002652d, 0x0002672d, 0x0002692d, 0x00026b2d,
+ 0x00026d2d, 0x00026f2d, 0x0002712d, 0x0002732d,
+ 0x0002752d, 0x0002772d, 0x0002792d, 0x00027b2d,
+ 0x00027d2d, 0x00027f2d, 0x0002812d, 0x0002832d,
+ 0x0002852d, 0x0002872d, 0x0002892d, 0x00028b2d,
+ 0x00028d2d, 0x00028f2d, 0x0002912d, 0x0002932d,
+ 0x0002952d, 0x0002972d, 0x0002992d, 0x00029b2d,
+ 0x00029d2d, 0x00029f2d, 0x0002a12d, 0x0002a32d,
+ 0x0002a52d, 0x0002a72d, 0x0002a92d, 0x0002ab2d,
+ 0x0002ad2d, 0x0002af2d, 0x0002b12d, 0x0002b32d,
+ 0x0002b52d, 0x0002b72d, 0x0002b92d, 0x0002bb2d,
+ 0x0002bd2d, 0x0002bf2d, 0x0002c12d, 0x0002c32d,
+ 0x0002c52d, 0x0002c72d, 0x0002c92d, 0x0002cb2d,
+ 0x0002cd2d, 0x0002cf2d, 0x0002d12d, 0x0002d32d,
+ 0x0002d52d, 0x0002d72d, 0x0002d92d, 0x0002db2d,
+ 0x0002dd2d, 0x0002df2d, 0x0002e12d, 0x0002e32d,
+ 0x0002e52d, 0x0002e72d, 0x0002e92d, 0x0002eb2d,
+ 0x0002ed2d, 0x0002ef2d, 0x0002f12d, 0x0002f32d,
+ 0x0002f52d, 0x0002f72d, 0x0002f92d, 0x0002fb2d,
+ 0x0002fd2d, 0x0002ff2d, 0x0003012d, 0x0003032d,
+ 0x0003052d, 0x0003072d, 0x0003092d, 0x00030b2d,
+ 0x00030d2d, 0x00030f2d, 0x0003112d, 0x0003132d,
+ 0x0003152d, 0x0003172d, 0x0003192d, 0x00031b2d,
+ 0x00031d2d, 0x00031f2d, 0x0003212d, 0x0003232d,
+ 0x0003252d, 0x0003272d, 0x0003292d, 0x00032b2d,
+ 0x00032d2d, 0x00032f2d, 0x0003312d, 0x0003332d,
+ 0x0003352d, 0x0003372d, 0x0003392d, 0x00033b2d,
+ 0x00033d2d, 0x00033f2d, 0x0003412d, 0x0003432d,
+ 0x0003452d, 0x0003472d, 0x0003492d, 0x00034b2d,
+ 0x00034d2d, 0x00034f2d, 0x0003512d, 0x0003532d,
+ 0x0003552d, 0x0003572d, 0x0003592d, 0x00035b2d,
+ 0x00035d2d, 0x00035f2d, 0x0003612d, 0x0003632d,
+ 0x0003652d, 0x0003672d, 0x0003692d, 0x00036b2d,
+ 0x00036d2d, 0x00036f2d, 0x0003712d, 0x0003732d,
+ 0x0003752d, 0x0003772d, 0x0003792d, 0x00037b2d,
+ 0x00037d2d, 0x00037f2d, 0x0003812d, 0x0003832d,
+ 0x0003852d, 0x0003872d, 0x0003892d, 0x00038b2d,
+ 0x00038d2d, 0x00038f2d, 0x0003912d, 0x0003932d,
+ 0x0003952d, 0x0003972d, 0x0003992d, 0x00039b2d,
+ 0x00039d2d, 0x00039f2d, 0x0003a12d, 0x0003a32d,
+ 0x0003a52d, 0x0003a72d, 0x0003a92d, 0x0003ab2d,
+ 0x0003ad2d, 0x0003af2d, 0x0003b12d, 0x0003b32d,
+ 0x0003b52d, 0x0003b72d, 0x0003b92d, 0x0003bb2d,
+ 0x0003bd2d, 0x0003bf2d, 0x0003c12d, 0x0003c32d,
+ 0x0003c52d, 0x0003c72d, 0x0003c92d, 0x0003cb2d,
+ 0x0003cd2d, 0x0003cf2d, 0x0003d12d, 0x0003d32d,
+ 0x0003d52d, 0x0003d72d, 0x0003d92d, 0x0003db2d,
+ 0x0003dd2d, 0x0003df2d, 0x0003e12d, 0x0003e32d,
+ 0x0003e52d, 0x0003e72d, 0x0003e92d, 0x0003eb2d,
+ 0x0003ed2d, 0x0003ef2d, 0x0003f12d, 0x0003f32d,
+ 0x0003f52d, 0x0003f72d, 0x0003f92d, 0x0003fb2d,
+ 0x0003fd2d, 0x0003ff2d, 0x000002ee, 0x000006ee,
+ 0x00000aee, 0x00000eee, 0x000012ee, 0x000016ee,
+ 0x00001aee, 0x00001eee, 0x000022ee, 0x000026ee,
+ 0x00002aee, 0x00002eee, 0x000032ee, 0x000036ee,
+ 0x00003aee, 0x00003eee, 0x000042ee, 0x000046ee,
+ 0x00004aee, 0x00004eee, 0x000052ee, 0x000056ee,
+ 0x00005aee, 0x00005eee, 0x000062ee, 0x000066ee,
+ 0x00006aee, 0x00006eee, 0x000072ee, 0x000076ee,
+ 0x00007aee, 0x00007eee, 0x000082ee, 0x000086ee,
+ 0x00008aee, 0x00008eee, 0x000092ee, 0x000096ee,
+ 0x00009aee, 0x00009eee, 0x0000a2ee, 0x0000a6ee,
+ 0x0000aaee, 0x0000aeee, 0x0000b2ee, 0x0000b6ee,
+ 0x0000baee, 0x0000beee, 0x0000c2ee, 0x0000c6ee,
+ 0x0000caee, 0x0000ceee, 0x0000d2ee, 0x0000d6ee,
+ 0x0000daee, 0x0000deee, 0x0000e2ee, 0x0000e6ee,
+ 0x0000eaee, 0x0000eeee, 0x0000f2ee, 0x0000f6ee,
+ 0x0000faee, 0x0000feee, 0x000102ee, 0x000106ee,
+ 0x00010aee, 0x00010eee, 0x000112ee, 0x000116ee,
+ 0x00011aee, 0x00011eee, 0x000122ee, 0x000126ee,
+ 0x00012aee, 0x00012eee, 0x000132ee, 0x000136ee,
+ 0x00013aee, 0x00013eee, 0x000142ee, 0x000146ee,
+ 0x00014aee, 0x00014eee, 0x000152ee, 0x000156ee,
+ 0x00015aee, 0x00015eee, 0x000162ee, 0x000166ee,
+ 0x00016aee, 0x00016eee, 0x000172ee, 0x000176ee,
+ 0x00017aee, 0x00017eee, 0x000182ee, 0x000186ee,
+ 0x00018aee, 0x00018eee, 0x000192ee, 0x000196ee,
+ 0x00019aee, 0x00019eee, 0x0001a2ee, 0x0001a6ee,
+ 0x0001aaee, 0x0001aeee, 0x0001b2ee, 0x0001b6ee,
+ 0x0001baee, 0x0001beee, 0x0001c2ee, 0x0001c6ee,
+ 0x0001caee, 0x0001ceee, 0x0001d2ee, 0x0001d6ee,
+ 0x0001daee, 0x0001deee, 0x0001e2ee, 0x0001e6ee,
+ 0x0001eaee, 0x0001eeee, 0x0001f2ee, 0x0001f6ee,
+ 0x0001faee, 0x0001feee, 0x000202ee, 0x000206ee,
+ 0x00020aee, 0x00020eee, 0x000212ee, 0x000216ee,
+ 0x00021aee, 0x00021eee, 0x000222ee, 0x000226ee,
+ 0x00022aee, 0x00022eee, 0x000232ee, 0x000236ee,
+ 0x00023aee, 0x00023eee, 0x000242ee, 0x000246ee,
+ 0x00024aee, 0x00024eee, 0x000252ee, 0x000256ee,
+ 0x00025aee, 0x00025eee, 0x000262ee, 0x000266ee,
+ 0x00026aee, 0x00026eee, 0x000272ee, 0x000276ee,
+ 0x00027aee, 0x00027eee, 0x000282ee, 0x000286ee,
+ 0x00028aee, 0x00028eee, 0x000292ee, 0x000296ee,
+ 0x00029aee, 0x00029eee, 0x0002a2ee, 0x0002a6ee,
+ 0x0002aaee, 0x0002aeee, 0x0002b2ee, 0x0002b6ee,
+ 0x0002baee, 0x0002beee, 0x0002c2ee, 0x0002c6ee,
+ 0x0002caee, 0x0002ceee, 0x0002d2ee, 0x0002d6ee,
+ 0x0002daee, 0x0002deee, 0x0002e2ee, 0x0002e6ee,
+ 0x0002eaee, 0x0002eeee, 0x0002f2ee, 0x0002f6ee,
+ 0x0002faee, 0x0002feee, 0x000302ee, 0x000306ee,
+ 0x00030aee, 0x00030eee, 0x000312ee, 0x000316ee,
+ 0x00031aee, 0x00031eee, 0x000322ee, 0x000326ee,
+ 0x00032aee, 0x00032eee, 0x000332ee, 0x000336ee,
+ 0x00033aee, 0x00033eee, 0x000342ee, 0x000346ee,
+ 0x00034aee, 0x00034eee, 0x000352ee, 0x000356ee,
+ 0x00035aee, 0x00035eee, 0x000362ee, 0x000366ee,
+ 0x00036aee, 0x00036eee, 0x000372ee, 0x000376ee,
+ 0x00037aee, 0x00037eee, 0x000382ee, 0x000386ee,
+ 0x00038aee, 0x00038eee, 0x000392ee, 0x000396ee,
+ 0x00039aee, 0x00039eee, 0x0003a2ee, 0x0003a6ee,
+ 0x0003aaee, 0x0003aeee, 0x0003b2ee, 0x0003b6ee,
+ 0x0003baee, 0x0003beee, 0x0003c2ee, 0x0003c6ee,
+ 0x0003caee, 0x0003ceee, 0x0003d2ee, 0x0003d6ee,
+ 0x0003daee, 0x0003deee, 0x0003e2ee, 0x0003e6ee,
+ 0x0003eaee, 0x0003eeee, 0x0003f2ee, 0x0003f6ee,
+ 0x0003faee, 0x0003feee, 0x000402ee, 0x000406ee,
+ 0x00040aee, 0x00040eee, 0x000412ee, 0x000416ee,
+ 0x00041aee, 0x00041eee, 0x000422ee, 0x000426ee,
+ 0x00042aee, 0x00042eee, 0x000432ee, 0x000436ee,
+ 0x00043aee, 0x00043eee, 0x000442ee, 0x000446ee,
+ 0x00044aee, 0x00044eee, 0x000452ee, 0x000456ee,
+ 0x00045aee, 0x00045eee, 0x000462ee, 0x000466ee,
+ 0x00046aee, 0x00046eee, 0x000472ee, 0x000476ee,
+ 0x00047aee, 0x00047eee, 0x000482ee, 0x000486ee,
+ 0x00048aee, 0x00048eee, 0x000492ee, 0x000496ee,
+ 0x00049aee, 0x00049eee, 0x0004a2ee, 0x0004a6ee,
+ 0x0004aaee, 0x0004aeee, 0x0004b2ee, 0x0004b6ee,
+ 0x0004baee, 0x0004beee, 0x0004c2ee, 0x0004c6ee,
+ 0x0004caee, 0x0004ceee, 0x0004d2ee, 0x0004d6ee,
+ 0x0004daee, 0x0004deee, 0x0004e2ee, 0x0004e6ee,
+ 0x0004eaee, 0x0004eeee, 0x0004f2ee, 0x0004f6ee,
+ 0x0004faee, 0x0004feee, 0x000502ee, 0x000506ee,
+ 0x00050aee, 0x00050eee, 0x000512ee, 0x000516ee,
+ 0x00051aee, 0x00051eee, 0x000522ee, 0x000526ee,
+ 0x00052aee, 0x00052eee, 0x000532ee, 0x000536ee,
+ 0x00053aee, 0x00053eee, 0x000542ee, 0x000546ee,
+ 0x00054aee, 0x00054eee, 0x000552ee, 0x000556ee,
+ 0x00055aee, 0x00055eee, 0x000562ee, 0x000566ee,
+ 0x00056aee, 0x00056eee, 0x000572ee, 0x000576ee,
+ 0x00057aee, 0x00057eee, 0x000582ee, 0x000586ee,
+ 0x00058aee, 0x00058eee, 0x000592ee, 0x000596ee,
+ 0x00059aee, 0x00059eee, 0x0005a2ee, 0x0005a6ee,
+ 0x0005aaee, 0x0005aeee, 0x0005b2ee, 0x0005b6ee,
+ 0x0005baee, 0x0005beee, 0x0005c2ee, 0x0005c6ee,
+ 0x0005caee, 0x0005ceee, 0x0005d2ee, 0x0005d6ee,
+ 0x0005daee, 0x0005deee, 0x0005e2ee, 0x0005e6ee,
+ 0x0005eaee, 0x0005eeee, 0x0005f2ee, 0x0005f6ee,
+ 0x0005faee, 0x0005feee, 0x000602ee, 0x000606ee,
+ 0x00060aee, 0x00060eee, 0x000612ee, 0x000616ee,
+ 0x00061aee, 0x00061eee, 0x000622ee, 0x000626ee,
+ 0x00062aee, 0x00062eee, 0x000632ee, 0x000636ee,
+ 0x00063aee, 0x00063eee, 0x000642ee, 0x000646ee,
+ 0x00064aee, 0x00064eee, 0x000652ee, 0x000656ee,
+ 0x00065aee, 0x00065eee, 0x000662ee, 0x000666ee,
+ 0x00066aee, 0x00066eee, 0x000672ee, 0x000676ee,
+ 0x00067aee, 0x00067eee, 0x000682ee, 0x000686ee,
+ 0x00068aee, 0x00068eee, 0x000692ee, 0x000696ee,
+ 0x00069aee, 0x00069eee, 0x0006a2ee, 0x0006a6ee,
+ 0x0006aaee, 0x0006aeee, 0x0006b2ee, 0x0006b6ee,
+ 0x0006baee, 0x0006beee, 0x0006c2ee, 0x0006c6ee,
+ 0x0006caee, 0x0006ceee, 0x0006d2ee, 0x0006d6ee,
+ 0x0006daee, 0x0006deee, 0x0006e2ee, 0x0006e6ee,
+ 0x0006eaee, 0x0006eeee, 0x0006f2ee, 0x0006f6ee,
+ 0x0006faee, 0x0006feee, 0x000702ee, 0x000706ee,
+ 0x00070aee, 0x00070eee, 0x000712ee, 0x000716ee,
+ 0x00071aee, 0x00071eee, 0x000722ee, 0x000726ee,
+ 0x00072aee, 0x00072eee, 0x000732ee, 0x000736ee,
+ 0x00073aee, 0x00073eee, 0x000742ee, 0x000746ee,
+ 0x00074aee, 0x00074eee, 0x000752ee, 0x000756ee,
+ 0x00075aee, 0x00075eee, 0x000762ee, 0x000766ee,
+ 0x00076aee, 0x00076eee, 0x000772ee, 0x000776ee,
+ 0x00077aee, 0x00077eee, 0x000782ee, 0x000786ee,
+ 0x00078aee, 0x00078eee, 0x000792ee, 0x000796ee,
+ 0x00079aee, 0x00079eee, 0x0007a2ee, 0x0007a6ee,
+ 0x0007aaee, 0x0007aeee, 0x0007b2ee, 0x0007b6ee,
+ 0x0007baee, 0x0007beee, 0x0007c2ee, 0x0007c6ee,
+ 0x0007caee, 0x0007ceee, 0x0007d2ee, 0x0007d6ee,
+ 0x0007daee, 0x0007deee, 0x0007e2ee, 0x0007e6ee,
+ 0x0007eaee, 0x0007eeee, 0x0007f2ee, 0x0007f6ee,
+ 0x0007faee, 0x0007feee, 0x0000000d, 0x0000010d,
+ 0x0000020d, 0x0000030d, 0x0000040d, 0x0000050d,
+ 0x0000060d, 0x0000070d, 0x0000080d, 0x0000090d,
+ 0x00000a0d, 0x00000b0d, 0x00000c0d, 0x00000d0d,
+ 0x00000e0d, 0x00000f0d, 0x0000100d, 0x0000110d,
+ 0x0000120d, 0x0000130d, 0x0000140d, 0x0000150d,
+ 0x0000160d, 0x0000170d, 0x0000180d, 0x0000190d,
+ 0x00001a0d, 0x00001b0d, 0x00001c0d, 0x00001d0d,
+ 0x00001e0d, 0x00001f0d, 0x0000200d, 0x0000210d,
+ 0x0000220d, 0x0000230d, 0x0000240d, 0x0000250d,
+ 0x0000260d, 0x0000270d, 0x0000280d, 0x0000290d,
+ 0x00002a0d, 0x00002b0d, 0x00002c0d, 0x00002d0d,
+ 0x00002e0d, 0x00002f0d, 0x0000300d, 0x0000310d,
+ 0x0000320d, 0x0000330d, 0x0000340d, 0x0000350d,
+ 0x0000360d, 0x0000370d, 0x0000380d, 0x0000390d,
+ 0x00003a0d, 0x00003b0d, 0x00003c0d, 0x00003d0d,
+ 0x00003e0d, 0x00003f0d, 0x0000400d, 0x0000410d,
+ 0x0000420d, 0x0000430d, 0x0000440d, 0x0000450d,
+ 0x0000460d, 0x0000470d, 0x0000480d, 0x0000490d,
+ 0x00004a0d, 0x00004b0d, 0x00004c0d, 0x00004d0d,
+ 0x00004e0d, 0x00004f0d, 0x0000500d, 0x0000510d,
+ 0x0000520d, 0x0000530d, 0x0000540d, 0x0000550d,
+ 0x0000560d, 0x0000570d, 0x0000580d, 0x0000590d,
+ 0x00005a0d, 0x00005b0d, 0x00005c0d, 0x00005d0d,
+ 0x00005e0d, 0x00005f0d, 0x0000600d, 0x0000610d,
+ 0x0000620d, 0x0000630d, 0x0000640d, 0x0000650d,
+ 0x0000660d, 0x0000670d, 0x0000680d, 0x0000690d,
+ 0x00006a0d, 0x00006b0d, 0x00006c0d, 0x00006d0d,
+ 0x00006e0d, 0x00006f0d, 0x0000700d, 0x0000710d,
+ 0x0000720d, 0x0000730d, 0x0000740d, 0x0000750d,
+ 0x0000760d, 0x0000770d, 0x0000780d, 0x0000790d,
+ 0x00007a0d, 0x00007b0d, 0x00007c0d, 0x00007d0d,
+ 0x00007e0d, 0x00007f0d, 0x0000800d, 0x0000810d,
+ 0x0000820d, 0x0000830d, 0x0000840d, 0x0000850d,
+ 0x0000860d, 0x0000870d, 0x0000880d, 0x0000890d,
+ 0x00008a0d, 0x00008b0d, 0x00008c0d, 0x00008d0d,
+ 0x00008e0d, 0x00008f0d, 0x0000900d, 0x0000910d,
+ 0x0000920d, 0x0000930d, 0x0000940d, 0x0000950d,
+ 0x0000960d, 0x0000970d, 0x0000980d, 0x0000990d,
+ 0x00009a0d, 0x00009b0d, 0x00009c0d, 0x00009d0d,
+ 0x00009e0d, 0x00009f0d, 0x0000a00d, 0x0000a10d,
+ 0x0000a20d, 0x0000a30d, 0x0000a40d, 0x0000a50d,
+ 0x0000a60d, 0x0000a70d, 0x0000a80d, 0x0000a90d,
+ 0x0000aa0d, 0x0000ab0d, 0x0000ac0d, 0x0000ad0d,
+ 0x0000ae0d, 0x0000af0d, 0x0000b00d, 0x0000b10d,
+ 0x0000b20d, 0x0000b30d, 0x0000b40d, 0x0000b50d,
+ 0x0000b60d, 0x0000b70d, 0x0000b80d, 0x0000b90d,
+ 0x0000ba0d, 0x0000bb0d, 0x0000bc0d, 0x0000bd0d,
+ 0x0000be0d, 0x0000bf0d, 0x0000c00d, 0x0000c10d,
+ 0x0000c20d, 0x0000c30d, 0x0000c40d, 0x0000c50d,
+ 0x0000c60d, 0x0000c70d, 0x0000c80d, 0x0000c90d,
+ 0x0000ca0d, 0x0000cb0d, 0x0000cc0d, 0x0000cd0d,
+ 0x0000ce0d, 0x0000cf0d, 0x0000d00d, 0x0000d10d,
+ 0x0000d20d, 0x0000d30d, 0x0000d40d, 0x0000d50d,
+ 0x0000d60d, 0x0000d70d, 0x0000d80d, 0x0000d90d,
+ 0x0000da0d, 0x0000db0d, 0x0000dc0d, 0x0000dd0d,
+ 0x0000de0d, 0x0000df0d, 0x0000e00d, 0x0000e10d,
+ 0x0000e20d, 0x0000e30d, 0x0000e40d, 0x0000e50d,
+ 0x0000e60d, 0x0000e70d, 0x0000e80d, 0x0000e90d,
+ 0x0000ea0d, 0x0000eb0d, 0x0000ec0d, 0x0000ed0d,
+ 0x0000ee0d, 0x0000ef0d, 0x0000f00d, 0x0000f10d,
+ 0x0000f20d, 0x0000f30d, 0x0000f40d, 0x0000f50d,
+ 0x0000f60d, 0x0000f70d, 0x0000f80d, 0x0000f90d,
+ 0x0000fa0d, 0x0000fb0d, 0x0000fc0d, 0x0000fd0d,
+ 0x0000fe0d, 0x0000ff0d, 0x0001000d, 0x0001010d,
+ 0x0001020d, 0x0001030d, 0x0001040d, 0x0001050d,
+ 0x0001060d, 0x0001070d, 0x0001080d, 0x0001090d,
+ 0x00010a0d, 0x00010b0d, 0x00010c0d, 0x00010d0d,
+ 0x00010e0d, 0x00010f0d, 0x0001100d, 0x0001110d,
+ 0x0001120d, 0x0001130d, 0x0001140d, 0x0001150d,
+ 0x0001160d, 0x0001170d, 0x0001180d, 0x0001190d,
+ 0x00011a0d, 0x00011b0d, 0x00011c0d, 0x00011d0d,
+ 0x00011e0d, 0x00011f0d, 0x0001200d, 0x0001210d,
+ 0x0001220d, 0x0001230d, 0x0001240d, 0x0001250d,
+ 0x0001260d, 0x0001270d, 0x0001280d, 0x0001290d,
+ 0x00012a0d, 0x00012b0d, 0x00012c0d, 0x00012d0d,
+ 0x00012e0d, 0x00012f0d, 0x0001300d, 0x0001310d,
+ 0x0001320d, 0x0001330d, 0x0001340d, 0x0001350d,
+ 0x0001360d, 0x0001370d, 0x0001380d, 0x0001390d,
+ 0x00013a0d, 0x00013b0d, 0x00013c0d, 0x00013d0d,
+ 0x00013e0d, 0x00013f0d, 0x0001400d, 0x0001410d,
+ 0x0001420d, 0x0001430d, 0x0001440d, 0x0001450d,
+ 0x0001460d, 0x0001470d, 0x0001480d, 0x0001490d,
+ 0x00014a0d, 0x00014b0d, 0x00014c0d, 0x00014d0d,
+ 0x00014e0d, 0x00014f0d, 0x0001500d, 0x0001510d,
+ 0x0001520d, 0x0001530d, 0x0001540d, 0x0001550d,
+ 0x0001560d, 0x0001570d, 0x0001580d, 0x0001590d,
+ 0x00015a0d, 0x00015b0d, 0x00015c0d, 0x00015d0d,
+ 0x00015e0d, 0x00015f0d, 0x0001600d, 0x0001610d,
+ 0x0001620d, 0x0001630d, 0x0001640d, 0x0001650d,
+ 0x0001660d, 0x0001670d, 0x0001680d, 0x0001690d,
+ 0x00016a0d, 0x00016b0d, 0x00016c0d, 0x00016d0d,
+ 0x00016e0d, 0x00016f0d, 0x0001700d, 0x0001710d,
+ 0x0001720d, 0x0001730d, 0x0001740d, 0x0001750d,
+ 0x0001760d, 0x0001770d, 0x0001780d, 0x0001790d,
+ 0x00017a0d, 0x00017b0d, 0x00017c0d, 0x00017d0d,
+ 0x00017e0d, 0x00017f0d, 0x0001800d, 0x0001810d,
+ 0x0001820d, 0x0001830d, 0x0001840d, 0x0001850d,
+ 0x0001860d, 0x0001870d, 0x0001880d, 0x0001890d,
+ 0x00018a0d, 0x00018b0d, 0x00018c0d, 0x00018d0d,
+ 0x00018e0d, 0x00018f0d, 0x0001900d, 0x0001910d,
+ 0x0001920d, 0x0001930d, 0x0001940d, 0x0001950d,
+ 0x0001960d, 0x0001970d, 0x0001980d, 0x0001990d,
+ 0x00019a0d, 0x00019b0d, 0x00019c0d, 0x00019d0d,
+ 0x00019e0d, 0x00019f0d, 0x0001a00d, 0x0001a10d,
+ 0x0001a20d, 0x0001a30d, 0x0001a40d, 0x0001a50d,
+ 0x0001a60d, 0x0001a70d, 0x0001a80d, 0x0001a90d,
+ 0x0001aa0d, 0x0001ab0d, 0x0001ac0d, 0x0001ad0d,
+ 0x0001ae0d, 0x0001af0d, 0x0001b00d, 0x0001b10d,
+ 0x0001b20d, 0x0001b30d, 0x0001b40d, 0x0001b50d,
+ 0x0001b60d, 0x0001b70d, 0x0001b80d, 0x0001b90d,
+ 0x0001ba0d, 0x0001bb0d, 0x0001bc0d, 0x0001bd0d,
+ 0x0001be0d, 0x0001bf0d, 0x0001c00d, 0x0001c10d,
+ 0x0001c20d, 0x0001c30d, 0x0001c40d, 0x0001c50d,
+ 0x0001c60d, 0x0001c70d, 0x0001c80d, 0x0001c90d,
+ 0x0001ca0d, 0x0001cb0d, 0x0001cc0d, 0x0001cd0d,
+ 0x0001ce0d, 0x0001cf0d, 0x0001d00d, 0x0001d10d,
+ 0x0001d20d, 0x0001d30d, 0x0001d40d, 0x0001d50d,
+ 0x0001d60d, 0x0001d70d, 0x0001d80d, 0x0001d90d,
+ 0x0001da0d, 0x0001db0d, 0x0001dc0d, 0x0001dd0d,
+ 0x0001de0d, 0x0001df0d, 0x0001e00d, 0x0001e10d,
+ 0x0001e20d, 0x0001e30d, 0x0001e40d, 0x0001e50d,
+ 0x0001e60d, 0x0001e70d, 0x0001e80d, 0x0001e90d,
+ 0x0001ea0d, 0x0001eb0d, 0x0001ec0d, 0x0001ed0d,
+ 0x0001ee0d, 0x0001ef0d, 0x0001f00d, 0x0001f10d,
+ 0x0001f20d, 0x0001f30d, 0x0001f40d, 0x0001f50d,
+ 0x0001f60d, 0x0001f70d, 0x0001f80d, 0x0001f90d,
+ 0x0001fa0d, 0x0001fb0d, 0x0001fc0d, 0x0001fd0d,
+ 0x0001fe0d, 0x0001ff0d, 0x0002000d, 0x0002010d,
+ 0x0002020d, 0x0002030d, 0x0002040d, 0x0002050d,
+ 0x0002060d, 0x0002070d, 0x0002080d, 0x0002090d,
+ 0x00020a0d, 0x00020b0d, 0x00020c0d, 0x00020d0d,
+ 0x00020e0d, 0x00020f0d, 0x0002100d, 0x0002110d,
+ 0x0002120d, 0x0002130d, 0x0002140d, 0x0002150d,
+ 0x0002160d, 0x0002170d, 0x0002180d, 0x0002190d,
+ 0x00021a0d, 0x00021b0d, 0x00021c0d, 0x00021d0d,
+ 0x00021e0d, 0x00021f0d, 0x0002200d, 0x0002210d,
+ 0x0002220d, 0x0002230d, 0x0002240d, 0x0002250d,
+ 0x0002260d, 0x0002270d, 0x0002280d, 0x0002290d,
+ 0x00022a0d, 0x00022b0d, 0x00022c0d, 0x00022d0d,
+ 0x00022e0d, 0x00022f0d, 0x0002300d, 0x0002310d,
+ 0x0002320d, 0x0002330d, 0x0002340d, 0x0002350d,
+ 0x0002360d, 0x0002370d, 0x0002380d, 0x0002390d,
+ 0x00023a0d, 0x00023b0d, 0x00023c0d, 0x00023d0d,
+ 0x00023e0d, 0x00023f0d, 0x0002400d, 0x0002410d,
+ 0x0002420d, 0x0002430d, 0x0002440d, 0x0002450d,
+ 0x0002460d, 0x0002470d, 0x0002480d, 0x0002490d,
+ 0x00024a0d, 0x00024b0d, 0x00024c0d, 0x00024d0d,
+ 0x00024e0d, 0x00024f0d, 0x0002500d, 0x0002510d,
+ 0x0002520d, 0x0002530d, 0x0002540d, 0x0002550d,
+ 0x0002560d, 0x0002570d, 0x0002580d, 0x0002590d,
+ 0x00025a0d, 0x00025b0d, 0x00025c0d, 0x00025d0d,
+ 0x00025e0d, 0x00025f0d, 0x0002600d, 0x0002610d,
+ 0x0002620d, 0x0002630d, 0x0002640d, 0x0002650d,
+ 0x0002660d, 0x0002670d, 0x0002680d, 0x0002690d,
+ 0x00026a0d, 0x00026b0d, 0x00026c0d, 0x00026d0d,
+ 0x00026e0d, 0x00026f0d, 0x0002700d, 0x0002710d,
+ 0x0002720d, 0x0002730d, 0x0002740d, 0x0002750d,
+ 0x0002760d, 0x0002770d, 0x0002780d, 0x0002790d,
+ 0x00027a0d, 0x00027b0d, 0x00027c0d, 0x00027d0d,
+ 0x00027e0d, 0x00027f0d, 0x0002800d, 0x0002810d,
+ 0x0002820d, 0x0002830d, 0x0002840d, 0x0002850d,
+ 0x0002860d, 0x0002870d, 0x0002880d, 0x0002890d,
+ 0x00028a0d, 0x00028b0d, 0x00028c0d, 0x00028d0d,
+ 0x00028e0d, 0x00028f0d, 0x0002900d, 0x0002910d,
+ 0x0002920d, 0x0002930d, 0x0002940d, 0x0002950d,
+ 0x0002960d, 0x0002970d, 0x0002980d, 0x0002990d,
+ 0x00029a0d, 0x00029b0d, 0x00029c0d, 0x00029d0d,
+ 0x00029e0d, 0x00029f0d, 0x0002a00d, 0x0002a10d,
+ 0x0002a20d, 0x0002a30d, 0x0002a40d, 0x0002a50d,
+ 0x0002a60d, 0x0002a70d, 0x0002a80d, 0x0002a90d,
+ 0x0002aa0d, 0x0002ab0d, 0x0002ac0d, 0x0002ad0d,
+ 0x0002ae0d, 0x0002af0d, 0x0002b00d, 0x0002b10d,
+ 0x0002b20d, 0x0002b30d, 0x0002b40d, 0x0002b50d,
+ 0x0002b60d, 0x0002b70d, 0x0002b80d, 0x0002b90d,
+ 0x0002ba0d, 0x0002bb0d, 0x0002bc0d, 0x0002bd0d,
+ 0x0002be0d, 0x0002bf0d, 0x0002c00d, 0x0002c10d,
+ 0x0002c20d, 0x0002c30d, 0x0002c40d, 0x0002c50d,
+ 0x0002c60d, 0x0002c70d, 0x0002c80d, 0x0002c90d,
+ 0x0002ca0d, 0x0002cb0d, 0x0002cc0d, 0x0002cd0d,
+ 0x0002ce0d, 0x0002cf0d, 0x0002d00d, 0x0002d10d,
+ 0x0002d20d, 0x0002d30d, 0x0002d40d, 0x0002d50d,
+ 0x0002d60d, 0x0002d70d, 0x0002d80d, 0x0002d90d,
+ 0x0002da0d, 0x0002db0d, 0x0002dc0d, 0x0002dd0d,
+ 0x0002de0d, 0x0002df0d, 0x0002e00d, 0x0002e10d,
+ 0x0002e20d, 0x0002e30d, 0x0002e40d, 0x0002e50d,
+ 0x0002e60d, 0x0002e70d, 0x0002e80d, 0x0002e90d,
+ 0x0002ea0d, 0x0002eb0d, 0x0002ec0d, 0x0002ed0d,
+ 0x0002ee0d, 0x0002ef0d, 0x0002f00d, 0x0002f10d,
+ 0x0002f20d, 0x0002f30d, 0x0002f40d, 0x0002f50d,
+ 0x0002f60d, 0x0002f70d, 0x0002f80d, 0x0002f90d,
+ 0x0002fa0d, 0x0002fb0d, 0x0002fc0d, 0x0002fd0d,
+ 0x0002fe0d, 0x0002ff0d, 0x0003000d, 0x0003010d,
+ 0x0003020d, 0x0003030d, 0x0003040d, 0x0003050d,
+ 0x0003060d, 0x0003070d, 0x0003080d, 0x0003090d,
+ 0x00030a0d, 0x00030b0d, 0x00030c0d, 0x00030d0d,
+ 0x00030e0d, 0x00030f0d, 0x0003100d, 0x0003110d,
+ 0x0003120d, 0x0003130d, 0x0003140d, 0x0003150d,
+ 0x0003160d, 0x0003170d, 0x0003180d, 0x0003190d,
+ 0x00031a0d, 0x00031b0d, 0x00031c0d, 0x00031d0d,
+ 0x00031e0d, 0x00031f0d, 0x0003200d, 0x0003210d,
+ 0x0003220d, 0x0003230d, 0x0003240d, 0x0003250d,
+ 0x0003260d, 0x0003270d, 0x0003280d, 0x0003290d,
+ 0x00032a0d, 0x00032b0d, 0x00032c0d, 0x00032d0d,
+ 0x00032e0d, 0x00032f0d, 0x0003300d, 0x0003310d,
+ 0x0003320d, 0x0003330d, 0x0003340d, 0x0003350d,
+ 0x0003360d, 0x0003370d, 0x0003380d, 0x0003390d,
+ 0x00033a0d, 0x00033b0d, 0x00033c0d, 0x00033d0d,
+ 0x00033e0d, 0x00033f0d, 0x0003400d, 0x0003410d,
+ 0x0003420d, 0x0003430d, 0x0003440d, 0x0003450d,
+ 0x0003460d, 0x0003470d, 0x0003480d, 0x0003490d,
+ 0x00034a0d, 0x00034b0d, 0x00034c0d, 0x00034d0d,
+ 0x00034e0d, 0x00034f0d, 0x0003500d, 0x0003510d,
+ 0x0003520d, 0x0003530d, 0x0003540d, 0x0003550d,
+ 0x0003560d, 0x0003570d, 0x0003580d, 0x0003590d,
+ 0x00035a0d, 0x00035b0d, 0x00035c0d, 0x00035d0d,
+ 0x00035e0d, 0x00035f0d, 0x0003600d, 0x0003610d,
+ 0x0003620d, 0x0003630d, 0x0003640d, 0x0003650d,
+ 0x0003660d, 0x0003670d, 0x0003680d, 0x0003690d,
+ 0x00036a0d, 0x00036b0d, 0x00036c0d, 0x00036d0d,
+ 0x00036e0d, 0x00036f0d, 0x0003700d, 0x0003710d,
+ 0x0003720d, 0x0003730d, 0x0003740d, 0x0003750d,
+ 0x0003760d, 0x0003770d, 0x0003780d, 0x0003790d,
+ 0x00037a0d, 0x00037b0d, 0x00037c0d, 0x00037d0d,
+ 0x00037e0d, 0x00037f0d, 0x0003800d, 0x0003810d,
+ 0x0003820d, 0x0003830d, 0x0003840d, 0x0003850d,
+ 0x0003860d, 0x0003870d, 0x0003880d, 0x0003890d,
+ 0x00038a0d, 0x00038b0d, 0x00038c0d, 0x00038d0d,
+ 0x00038e0d, 0x00038f0d, 0x0003900d, 0x0003910d,
+ 0x0003920d, 0x0003930d, 0x0003940d, 0x0003950d,
+ 0x0003960d, 0x0003970d, 0x0003980d, 0x0003990d,
+ 0x00039a0d, 0x00039b0d, 0x00039c0d, 0x00039d0d,
+ 0x00039e0d, 0x00039f0d, 0x0003a00d, 0x0003a10d,
+ 0x0003a20d, 0x0003a30d, 0x0003a40d, 0x0003a50d,
+ 0x0003a60d, 0x0003a70d, 0x0003a80d, 0x0003a90d,
+ 0x0003aa0d, 0x0003ab0d, 0x0003ac0d, 0x0003ad0d,
+ 0x0003ae0d, 0x0003af0d, 0x0003b00d, 0x0003b10d,
+ 0x0003b20d, 0x0003b30d, 0x0003b40d, 0x0003b50d,
+ 0x0003b60d, 0x0003b70d, 0x0003b80d, 0x0003b90d,
+ 0x0003ba0d, 0x0003bb0d, 0x0003bc0d, 0x0003bd0d,
+ 0x0003be0d, 0x0003bf0d, 0x0003c00d, 0x0003c10d,
+ 0x0003c20d, 0x0003c30d, 0x0003c40d, 0x0003c50d,
+ 0x0003c60d, 0x0003c70d, 0x0003c80d, 0x0003c90d,
+ 0x0003ca0d, 0x0003cb0d, 0x0003cc0d, 0x0003cd0d,
+ 0x0003ce0d, 0x0003cf0d, 0x0003d00d, 0x0003d10d,
+ 0x0003d20d, 0x0003d30d, 0x0003d40d, 0x0003d50d,
+ 0x0003d60d, 0x0003d70d, 0x0003d80d, 0x0003d90d,
+ 0x0003da0d, 0x0003db0d, 0x0003dc0d, 0x0003dd0d,
+ 0x0003de0d, 0x0003df0d, 0x0003e00d, 0x0003e10d,
+ 0x0003e20d, 0x0003e30d, 0x0003e40d, 0x0003e50d,
+ 0x0003e60d, 0x0003e70d, 0x0003e80d, 0x0003e90d,
+ 0x0003ea0d, 0x0003eb0d, 0x0003ec0d, 0x0003ed0d,
+ 0x0003ee0d, 0x0003ef0d, 0x0003f00d, 0x0003f10d,
+ 0x0003f20d, 0x0003f30d, 0x0003f40d, 0x0003f50d,
+ 0x0003f60d, 0x0003f70d, 0x0003f80d, 0x0003f90d,
+ 0x0003fa0d, 0x0003fb0d, 0x0003fc0d, 0x0003fd0d,
+ 0x0003fe0d, 0x0003ff0d, 0x000000ae, 0x000002ae,
+ 0x000004ae, 0x000006ae, 0x000008ae, 0x00000aae,
+ 0x00000cae, 0x00000eae, 0x000010ae, 0x000012ae,
+ 0x000014ae, 0x000016ae, 0x000018ae, 0x00001aae,
+ 0x00001cae, 0x00001eae, 0x000020ae, 0x000022ae,
+ 0x000024ae, 0x000026ae, 0x000028ae, 0x00002aae,
+ 0x00002cae, 0x00002eae, 0x000030ae, 0x000032ae,
+ 0x000034ae, 0x000036ae, 0x000038ae, 0x00003aae,
+ 0x00003cae, 0x00003eae, 0x000040ae, 0x000042ae,
+ 0x000044ae, 0x000046ae, 0x000048ae, 0x00004aae,
+ 0x00004cae, 0x00004eae, 0x000050ae, 0x000052ae,
+ 0x000054ae, 0x000056ae, 0x000058ae, 0x00005aae,
+ 0x00005cae, 0x00005eae, 0x000060ae, 0x000062ae,
+ 0x000064ae, 0x000066ae, 0x000068ae, 0x00006aae,
+ 0x00006cae, 0x00006eae, 0x000070ae, 0x000072ae,
+ 0x000074ae, 0x000076ae, 0x000078ae, 0x00007aae,
+ 0x00007cae, 0x00007eae, 0x000080ae, 0x000082ae,
+ 0x000084ae, 0x000086ae, 0x000088ae, 0x00008aae,
+ 0x00008cae, 0x00008eae, 0x000090ae, 0x000092ae,
+ 0x000094ae, 0x000096ae, 0x000098ae, 0x00009aae,
+ 0x00009cae, 0x00009eae, 0x0000a0ae, 0x0000a2ae,
+ 0x0000a4ae, 0x0000a6ae, 0x0000a8ae, 0x0000aaae,
+ 0x0000acae, 0x0000aeae, 0x0000b0ae, 0x0000b2ae,
+ 0x0000b4ae, 0x0000b6ae, 0x0000b8ae, 0x0000baae,
+ 0x0000bcae, 0x0000beae, 0x0000c0ae, 0x0000c2ae,
+ 0x0000c4ae, 0x0000c6ae, 0x0000c8ae, 0x0000caae,
+ 0x0000ccae, 0x0000ceae, 0x0000d0ae, 0x0000d2ae,
+ 0x0000d4ae, 0x0000d6ae, 0x0000d8ae, 0x0000daae,
+ 0x0000dcae, 0x0000deae, 0x0000e0ae, 0x0000e2ae,
+ 0x0000e4ae, 0x0000e6ae, 0x0000e8ae, 0x0000eaae,
+ 0x0000ecae, 0x0000eeae, 0x0000f0ae, 0x0000f2ae,
+ 0x0000f4ae, 0x0000f6ae, 0x0000f8ae, 0x0000faae,
+ 0x0000fcae, 0x0000feae, 0x000100ae, 0x000102ae,
+ 0x000104ae, 0x000106ae, 0x000108ae, 0x00010aae,
+ 0x00010cae, 0x00010eae, 0x000110ae, 0x000112ae,
+ 0x000114ae, 0x000116ae, 0x000118ae, 0x00011aae,
+ 0x00011cae, 0x00011eae, 0x000120ae, 0x000122ae,
+ 0x000124ae, 0x000126ae, 0x000128ae, 0x00012aae,
+ 0x00012cae, 0x00012eae, 0x000130ae, 0x000132ae,
+ 0x000134ae, 0x000136ae, 0x000138ae, 0x00013aae,
+ 0x00013cae, 0x00013eae, 0x000140ae, 0x000142ae,
+ 0x000144ae, 0x000146ae, 0x000148ae, 0x00014aae,
+ 0x00014cae, 0x00014eae, 0x000150ae, 0x000152ae,
+ 0x000154ae, 0x000156ae, 0x000158ae, 0x00015aae,
+ 0x00015cae, 0x00015eae, 0x000160ae, 0x000162ae,
+ 0x000164ae, 0x000166ae, 0x000168ae, 0x00016aae,
+ 0x00016cae, 0x00016eae, 0x000170ae, 0x000172ae,
+ 0x000174ae, 0x000176ae, 0x000178ae, 0x00017aae,
+ 0x00017cae, 0x00017eae, 0x000180ae, 0x000182ae,
+ 0x000184ae, 0x000186ae, 0x000188ae, 0x00018aae,
+ 0x00018cae, 0x00018eae, 0x000190ae, 0x000192ae,
+ 0x000194ae, 0x000196ae, 0x000198ae, 0x00019aae,
+ 0x00019cae, 0x00019eae, 0x0001a0ae, 0x0001a2ae,
+ 0x0001a4ae, 0x0001a6ae, 0x0001a8ae, 0x0001aaae,
+ 0x0001acae, 0x0001aeae, 0x0001b0ae, 0x0001b2ae,
+ 0x0001b4ae, 0x0001b6ae, 0x0001b8ae, 0x0001baae,
+ 0x0001bcae, 0x0001beae, 0x0001c0ae, 0x0001c2ae,
+ 0x0001c4ae, 0x0001c6ae, 0x0001c8ae, 0x0001caae,
+ 0x0001ccae, 0x0001ceae, 0x0001d0ae, 0x0001d2ae,
+ 0x0001d4ae, 0x0001d6ae, 0x0001d8ae, 0x0001daae,
+ 0x0001dcae, 0x0001deae, 0x0001e0ae, 0x0001e2ae,
+ 0x0001e4ae, 0x0001e6ae, 0x0001e8ae, 0x0001eaae,
+ 0x0001ecae, 0x0001eeae, 0x0001f0ae, 0x0001f2ae,
+ 0x0001f4ae, 0x0001f6ae, 0x0001f8ae, 0x0001faae,
+ 0x0001fcae, 0x0001feae, 0x000200ae, 0x000202ae,
+ 0x000204ae, 0x000206ae, 0x000208ae, 0x00020aae,
+ 0x00020cae, 0x00020eae, 0x000210ae, 0x000212ae,
+ 0x000214ae, 0x000216ae, 0x000218ae, 0x00021aae,
+ 0x00021cae, 0x00021eae, 0x000220ae, 0x000222ae,
+ 0x000224ae, 0x000226ae, 0x000228ae, 0x00022aae,
+ 0x00022cae, 0x00022eae, 0x000230ae, 0x000232ae,
+ 0x000234ae, 0x000236ae, 0x000238ae, 0x00023aae,
+ 0x00023cae, 0x00023eae, 0x000240ae, 0x000242ae,
+ 0x000244ae, 0x000246ae, 0x000248ae, 0x00024aae,
+ 0x00024cae, 0x00024eae, 0x000250ae, 0x000252ae,
+ 0x000254ae, 0x000256ae, 0x000258ae, 0x00025aae,
+ 0x00025cae, 0x00025eae, 0x000260ae, 0x000262ae,
+ 0x000264ae, 0x000266ae, 0x000268ae, 0x00026aae,
+ 0x00026cae, 0x00026eae, 0x000270ae, 0x000272ae,
+ 0x000274ae, 0x000276ae, 0x000278ae, 0x00027aae,
+ 0x00027cae, 0x00027eae, 0x000280ae, 0x000282ae,
+ 0x000284ae, 0x000286ae, 0x000288ae, 0x00028aae,
+ 0x00028cae, 0x00028eae, 0x000290ae, 0x000292ae,
+ 0x000294ae, 0x000296ae, 0x000298ae, 0x00029aae,
+ 0x00029cae, 0x00029eae, 0x0002a0ae, 0x0002a2ae,
+ 0x0002a4ae, 0x0002a6ae, 0x0002a8ae, 0x0002aaae,
+ 0x0002acae, 0x0002aeae, 0x0002b0ae, 0x0002b2ae,
+ 0x0002b4ae, 0x0002b6ae, 0x0002b8ae, 0x0002baae,
+ 0x0002bcae, 0x0002beae, 0x0002c0ae, 0x0002c2ae,
+ 0x0002c4ae, 0x0002c6ae, 0x0002c8ae, 0x0002caae,
+ 0x0002ccae, 0x0002ceae, 0x0002d0ae, 0x0002d2ae,
+ 0x0002d4ae, 0x0002d6ae, 0x0002d8ae, 0x0002daae,
+ 0x0002dcae, 0x0002deae, 0x0002e0ae, 0x0002e2ae,
+ 0x0002e4ae, 0x0002e6ae, 0x0002e8ae, 0x0002eaae,
+ 0x0002ecae, 0x0002eeae, 0x0002f0ae, 0x0002f2ae,
+ 0x0002f4ae, 0x0002f6ae, 0x0002f8ae, 0x0002faae,
+ 0x0002fcae, 0x0002feae, 0x000300ae, 0x000302ae,
+ 0x000304ae, 0x000306ae, 0x000308ae, 0x00030aae,
+ 0x00030cae, 0x00030eae, 0x000310ae, 0x000312ae,
+ 0x000314ae, 0x000316ae, 0x000318ae, 0x00031aae,
+ 0x00031cae, 0x00031eae, 0x000320ae, 0x000322ae,
+ 0x000324ae, 0x000326ae, 0x000328ae, 0x00032aae,
+ 0x00032cae, 0x00032eae, 0x000330ae, 0x000332ae,
+ 0x000334ae, 0x000336ae, 0x000338ae, 0x00033aae,
+ 0x00033cae, 0x00033eae, 0x000340ae, 0x000342ae,
+ 0x000344ae, 0x000346ae, 0x000348ae, 0x00034aae,
+ 0x00034cae, 0x00034eae, 0x000350ae, 0x000352ae,
+ 0x000354ae, 0x000356ae, 0x000358ae, 0x00035aae,
+ 0x00035cae, 0x00035eae, 0x000360ae, 0x000362ae,
+ 0x000364ae, 0x000366ae, 0x000368ae, 0x00036aae,
+ 0x00036cae, 0x00036eae, 0x000370ae, 0x000372ae,
+ 0x000374ae, 0x000376ae, 0x000378ae, 0x00037aae,
+ 0x00037cae, 0x00037eae, 0x000380ae, 0x000382ae,
+ 0x000384ae, 0x000386ae, 0x000388ae, 0x00038aae,
+ 0x00038cae, 0x00038eae, 0x000390ae, 0x000392ae,
+ 0x000394ae, 0x000396ae, 0x000398ae, 0x00039aae,
+ 0x00039cae, 0x00039eae, 0x0003a0ae, 0x0003a2ae,
+ 0x0003a4ae, 0x0003a6ae, 0x0003a8ae, 0x0003aaae,
+ 0x0003acae, 0x0003aeae, 0x0003b0ae, 0x0003b2ae,
+ 0x0003b4ae, 0x0003b6ae, 0x0003b8ae, 0x0003baae,
+ 0x0003bcae, 0x0003beae, 0x0003c0ae, 0x0003c2ae,
+ 0x0003c4ae, 0x0003c6ae, 0x0003c8ae, 0x0003caae,
+ 0x0003ccae, 0x0003ceae, 0x0003d0ae, 0x0003d2ae,
+ 0x0003d4ae, 0x0003d6ae, 0x0003d8ae, 0x0003daae,
+ 0x0003dcae, 0x0003deae, 0x0003e0ae, 0x0003e2ae,
+ 0x0003e4ae, 0x0003e6ae, 0x0003e8ae, 0x0003eaae,
+ 0x0003ecae, 0x0003eeae, 0x0003f0ae, 0x0003f2ae,
+ 0x0003f4ae, 0x0003f6ae, 0x0003f8ae, 0x0003faae,
+ 0x0003fcae, 0x0003feae, 0x000400ae, 0x000402ae,
+ 0x000404ae, 0x000406ae, 0x000408ae, 0x00040aae,
+ 0x00040cae, 0x00040eae, 0x000410ae, 0x000412ae,
+ 0x000414ae, 0x000416ae, 0x000418ae, 0x00041aae,
+ 0x00041cae, 0x00041eae, 0x000420ae, 0x000422ae,
+ 0x000424ae, 0x000426ae, 0x000428ae, 0x00042aae,
+ 0x00042cae, 0x00042eae, 0x000430ae, 0x000432ae,
+ 0x000434ae, 0x000436ae, 0x000438ae, 0x00043aae,
+ 0x00043cae, 0x00043eae, 0x000440ae, 0x000442ae,
+ 0x000444ae, 0x000446ae, 0x000448ae, 0x00044aae,
+ 0x00044cae, 0x00044eae, 0x000450ae, 0x000452ae,
+ 0x000454ae, 0x000456ae, 0x000458ae, 0x00045aae,
+ 0x00045cae, 0x00045eae, 0x000460ae, 0x000462ae,
+ 0x000464ae, 0x000466ae, 0x000468ae, 0x00046aae,
+ 0x00046cae, 0x00046eae, 0x000470ae, 0x000472ae,
+ 0x000474ae, 0x000476ae, 0x000478ae, 0x00047aae,
+ 0x00047cae, 0x00047eae, 0x000480ae, 0x000482ae,
+ 0x000484ae, 0x000486ae, 0x000488ae, 0x00048aae,
+ 0x00048cae, 0x00048eae, 0x000490ae, 0x000492ae,
+ 0x000494ae, 0x000496ae, 0x000498ae, 0x00049aae,
+ 0x00049cae, 0x00049eae, 0x0004a0ae, 0x0004a2ae,
+ 0x0004a4ae, 0x0004a6ae, 0x0004a8ae, 0x0004aaae,
+ 0x0004acae, 0x0004aeae, 0x0004b0ae, 0x0004b2ae,
+ 0x0004b4ae, 0x0004b6ae, 0x0004b8ae, 0x0004baae,
+ 0x0004bcae, 0x0004beae, 0x0004c0ae, 0x0004c2ae,
+ 0x0004c4ae, 0x0004c6ae, 0x0004c8ae, 0x0004caae,
+ 0x0004ccae, 0x0004ceae, 0x0004d0ae, 0x0004d2ae,
+ 0x0004d4ae, 0x0004d6ae, 0x0004d8ae, 0x0004daae,
+ 0x0004dcae, 0x0004deae, 0x0004e0ae, 0x0004e2ae,
+ 0x0004e4ae, 0x0004e6ae, 0x0004e8ae, 0x0004eaae,
+ 0x0004ecae, 0x0004eeae, 0x0004f0ae, 0x0004f2ae,
+ 0x0004f4ae, 0x0004f6ae, 0x0004f8ae, 0x0004faae,
+ 0x0004fcae, 0x0004feae, 0x000500ae, 0x000502ae,
+ 0x000504ae, 0x000506ae, 0x000508ae, 0x00050aae,
+ 0x00050cae, 0x00050eae, 0x000510ae, 0x000512ae,
+ 0x000514ae, 0x000516ae, 0x000518ae, 0x00051aae,
+ 0x00051cae, 0x00051eae, 0x000520ae, 0x000522ae,
+ 0x000524ae, 0x000526ae, 0x000528ae, 0x00052aae,
+ 0x00052cae, 0x00052eae, 0x000530ae, 0x000532ae,
+ 0x000534ae, 0x000536ae, 0x000538ae, 0x00053aae,
+ 0x00053cae, 0x00053eae, 0x000540ae, 0x000542ae,
+ 0x000544ae, 0x000546ae, 0x000548ae, 0x00054aae,
+ 0x00054cae, 0x00054eae, 0x000550ae, 0x000552ae,
+ 0x000554ae, 0x000556ae, 0x000558ae, 0x00055aae,
+ 0x00055cae, 0x00055eae, 0x000560ae, 0x000562ae,
+ 0x000564ae, 0x000566ae, 0x000568ae, 0x00056aae,
+ 0x00056cae, 0x00056eae, 0x000570ae, 0x000572ae,
+ 0x000574ae, 0x000576ae, 0x000578ae, 0x00057aae,
+ 0x00057cae, 0x00057eae, 0x000580ae, 0x000582ae,
+ 0x000584ae, 0x000586ae, 0x000588ae, 0x00058aae,
+ 0x00058cae, 0x00058eae, 0x000590ae, 0x000592ae,
+ 0x000594ae, 0x000596ae, 0x000598ae, 0x00059aae,
+ 0x00059cae, 0x00059eae, 0x0005a0ae, 0x0005a2ae,
+ 0x0005a4ae, 0x0005a6ae, 0x0005a8ae, 0x0005aaae,
+ 0x0005acae, 0x0005aeae, 0x0005b0ae, 0x0005b2ae,
+ 0x0005b4ae, 0x0005b6ae, 0x0005b8ae, 0x0005baae,
+ 0x0005bcae, 0x0005beae, 0x0005c0ae, 0x0005c2ae,
+ 0x0005c4ae, 0x0005c6ae, 0x0005c8ae, 0x0005caae,
+ 0x0005ccae, 0x0005ceae, 0x0005d0ae, 0x0005d2ae,
+ 0x0005d4ae, 0x0005d6ae, 0x0005d8ae, 0x0005daae,
+ 0x0005dcae, 0x0005deae, 0x0005e0ae, 0x0005e2ae,
+ 0x0005e4ae, 0x0005e6ae, 0x0005e8ae, 0x0005eaae,
+ 0x0005ecae, 0x0005eeae, 0x0005f0ae, 0x0005f2ae,
+ 0x0005f4ae, 0x0005f6ae, 0x0005f8ae, 0x0005faae,
+ 0x0005fcae, 0x0005feae, 0x000600ae, 0x000602ae,
+ 0x000604ae, 0x000606ae, 0x000608ae, 0x00060aae,
+ 0x00060cae, 0x00060eae, 0x000610ae, 0x000612ae,
+ 0x000614ae, 0x000616ae, 0x000618ae, 0x00061aae,
+ 0x00061cae, 0x00061eae, 0x000620ae, 0x000622ae,
+ 0x000624ae, 0x000626ae, 0x000628ae, 0x00062aae,
+ 0x00062cae, 0x00062eae, 0x000630ae, 0x000632ae,
+ 0x000634ae, 0x000636ae, 0x000638ae, 0x00063aae,
+ 0x00063cae, 0x00063eae, 0x000640ae, 0x000642ae,
+ 0x000644ae, 0x000646ae, 0x000648ae, 0x00064aae,
+ 0x00064cae, 0x00064eae, 0x000650ae, 0x000652ae,
+ 0x000654ae, 0x000656ae, 0x000658ae, 0x00065aae,
+ 0x00065cae, 0x00065eae, 0x000660ae, 0x000662ae,
+ 0x000664ae, 0x000666ae, 0x000668ae, 0x00066aae,
+ 0x00066cae, 0x00066eae, 0x000670ae, 0x000672ae,
+ 0x000674ae, 0x000676ae, 0x000678ae, 0x00067aae,
+ 0x00067cae, 0x00067eae, 0x000680ae, 0x000682ae,
+ 0x000684ae, 0x000686ae, 0x000688ae, 0x00068aae,
+ 0x00068cae, 0x00068eae, 0x000690ae, 0x000692ae,
+ 0x000694ae, 0x000696ae, 0x000698ae, 0x00069aae,
+ 0x00069cae, 0x00069eae, 0x0006a0ae, 0x0006a2ae,
+ 0x0006a4ae, 0x0006a6ae, 0x0006a8ae, 0x0006aaae,
+ 0x0006acae, 0x0006aeae, 0x0006b0ae, 0x0006b2ae,
+ 0x0006b4ae, 0x0006b6ae, 0x0006b8ae, 0x0006baae,
+ 0x0006bcae, 0x0006beae, 0x0006c0ae, 0x0006c2ae,
+ 0x0006c4ae, 0x0006c6ae, 0x0006c8ae, 0x0006caae,
+ 0x0006ccae, 0x0006ceae, 0x0006d0ae, 0x0006d2ae,
+ 0x0006d4ae, 0x0006d6ae, 0x0006d8ae, 0x0006daae,
+ 0x0006dcae, 0x0006deae, 0x0006e0ae, 0x0006e2ae,
+ 0x0006e4ae, 0x0006e6ae, 0x0006e8ae, 0x0006eaae,
+ 0x0006ecae, 0x0006eeae, 0x0006f0ae, 0x0006f2ae,
+ 0x0006f4ae, 0x0006f6ae, 0x0006f8ae, 0x0006faae,
+ 0x0006fcae, 0x0006feae, 0x000700ae, 0x000702ae,
+ 0x000704ae, 0x000706ae, 0x000708ae, 0x00070aae,
+ 0x00070cae, 0x00070eae, 0x000710ae, 0x000712ae,
+ 0x000714ae, 0x000716ae, 0x000718ae, 0x00071aae,
+ 0x00071cae, 0x00071eae, 0x000720ae, 0x000722ae,
+ 0x000724ae, 0x000726ae, 0x000728ae, 0x00072aae,
+ 0x00072cae, 0x00072eae, 0x000730ae, 0x000732ae,
+ 0x000734ae, 0x000736ae, 0x000738ae, 0x00073aae,
+ 0x00073cae, 0x00073eae, 0x000740ae, 0x000742ae,
+ 0x000744ae, 0x000746ae, 0x000748ae, 0x00074aae,
+ 0x00074cae, 0x00074eae, 0x000750ae, 0x000752ae,
+ 0x000754ae, 0x000756ae, 0x000758ae, 0x00075aae,
+ 0x00075cae, 0x00075eae, 0x000760ae, 0x000762ae,
+ 0x000764ae, 0x000766ae, 0x000768ae, 0x00076aae,
+ 0x00076cae, 0x00076eae, 0x000770ae, 0x000772ae,
+ 0x000774ae, 0x000776ae, 0x000778ae, 0x00077aae,
+ 0x00077cae, 0x00077eae, 0x000780ae, 0x000782ae,
+ 0x000784ae, 0x000786ae, 0x000788ae, 0x00078aae,
+ 0x00078cae, 0x00078eae, 0x000790ae, 0x000792ae,
+ 0x000794ae, 0x000796ae, 0x000798ae, 0x00079aae,
+ 0x00079cae, 0x00079eae, 0x0007a0ae, 0x0007a2ae,
+ 0x0007a4ae, 0x0007a6ae, 0x0007a8ae, 0x0007aaae,
+ 0x0007acae, 0x0007aeae, 0x0007b0ae, 0x0007b2ae,
+ 0x0007b4ae, 0x0007b6ae, 0x0007b8ae, 0x0007baae,
+ 0x0007bcae, 0x0007beae, 0x0007c0ae, 0x0007c2ae,
+ 0x0007c4ae, 0x0007c6ae, 0x0007c8ae, 0x0007caae,
+ 0x0007ccae, 0x0007ceae, 0x0007d0ae, 0x0007d2ae,
+ 0x0007d4ae, 0x0007d6ae, 0x0007d8ae, 0x0007daae,
+ 0x0007dcae, 0x0007deae, 0x0007e0ae, 0x0007e2ae,
+ 0x0007e4ae, 0x0007e6ae, 0x0007e8ae, 0x0007eaae,
+ 0x0007ecae, 0x0007eeae, 0x0007f0ae, 0x0007f2ae,
+ 0x0007f4ae, 0x0007f6ae, 0x0007f8ae, 0x0007faae,
+ 0x0007fcae, 0x0007feae, 0x000001af, 0x000003af,
+ 0x000005af, 0x000007af, 0x000009af, 0x00000baf,
+ 0x00000daf, 0x00000faf, 0x000011af, 0x000013af,
+ 0x000015af, 0x000017af, 0x000019af, 0x00001baf,
+ 0x00001daf, 0x00001faf, 0x000021af, 0x000023af,
+ 0x000025af, 0x000027af, 0x000029af, 0x00002baf,
+ 0x00002daf, 0x00002faf, 0x000031af, 0x000033af,
+ 0x000035af, 0x000037af, 0x000039af, 0x00003baf,
+ 0x00003daf, 0x00003faf, 0x000041af, 0x000043af,
+ 0x000045af, 0x000047af, 0x000049af, 0x00004baf,
+ 0x00004daf, 0x00004faf, 0x000051af, 0x000053af,
+ 0x000055af, 0x000057af, 0x000059af, 0x00005baf,
+ 0x00005daf, 0x00005faf, 0x000061af, 0x000063af,
+ 0x000065af, 0x000067af, 0x000069af, 0x00006baf,
+ 0x00006daf, 0x00006faf, 0x000071af, 0x000073af,
+ 0x000075af, 0x000077af, 0x000079af, 0x00007baf,
+ 0x00007daf, 0x00007faf, 0x000081af, 0x000083af,
+ 0x000085af, 0x000087af, 0x000089af, 0x00008baf,
+ 0x00008daf, 0x00008faf, 0x000091af, 0x000093af,
+ 0x000095af, 0x000097af, 0x000099af, 0x00009baf,
+ 0x00009daf, 0x00009faf, 0x0000a1af, 0x0000a3af,
+ 0x0000a5af, 0x0000a7af, 0x0000a9af, 0x0000abaf,
+ 0x0000adaf, 0x0000afaf, 0x0000b1af, 0x0000b3af,
+ 0x0000b5af, 0x0000b7af, 0x0000b9af, 0x0000bbaf,
+ 0x0000bdaf, 0x0000bfaf, 0x0000c1af, 0x0000c3af,
+ 0x0000c5af, 0x0000c7af, 0x0000c9af, 0x0000cbaf,
+ 0x0000cdaf, 0x0000cfaf, 0x0000d1af, 0x0000d3af,
+ 0x0000d5af, 0x0000d7af, 0x0000d9af, 0x0000dbaf,
+ 0x0000ddaf, 0x0000dfaf, 0x0000e1af, 0x0000e3af,
+ 0x0000e5af, 0x0000e7af, 0x0000e9af, 0x0000ebaf,
+ 0x0000edaf, 0x0000efaf, 0x0000f1af, 0x0000f3af,
+ 0x0000f5af, 0x0000f7af, 0x0000f9af, 0x0000fbaf,
+ 0x0000fdaf, 0x0000ffaf, 0x000101af, 0x000103af,
+ 0x000105af, 0x000107af, 0x000109af, 0x00010baf,
+ 0x00010daf, 0x00010faf, 0x000111af, 0x000113af,
+ 0x000115af, 0x000117af, 0x000119af, 0x00011baf,
+ 0x00011daf, 0x00011faf, 0x000121af, 0x000123af,
+ 0x000125af, 0x000127af, 0x000129af, 0x00012baf,
+ 0x00012daf, 0x00012faf, 0x000131af, 0x000133af,
+ 0x000135af, 0x000137af, 0x000139af, 0x00013baf,
+ 0x00013daf, 0x00013faf, 0x000141af, 0x000143af,
+ 0x000145af, 0x000147af, 0x000149af, 0x00014baf,
+ 0x00014daf, 0x00014faf, 0x000151af, 0x000153af,
+ 0x000155af, 0x000157af, 0x000159af, 0x00015baf,
+ 0x00015daf, 0x00015faf, 0x000161af, 0x000163af,
+ 0x000165af, 0x000167af, 0x000169af, 0x00016baf,
+ 0x00016daf, 0x00016faf, 0x000171af, 0x000173af,
+ 0x000175af, 0x000177af, 0x000179af, 0x00017baf,
+ 0x00017daf, 0x00017faf, 0x000181af, 0x000183af,
+ 0x000185af, 0x000187af, 0x000189af, 0x00018baf,
+ 0x00018daf, 0x00018faf, 0x000191af, 0x000193af,
+ 0x000195af, 0x000197af, 0x000199af, 0x00019baf,
+ 0x00019daf, 0x00019faf, 0x0001a1af, 0x0001a3af,
+ 0x0001a5af, 0x0001a7af, 0x0001a9af, 0x0001abaf,
+ 0x0001adaf, 0x0001afaf, 0x0001b1af, 0x0001b3af,
+ 0x0001b5af, 0x0001b7af, 0x0001b9af, 0x0001bbaf,
+ 0x0001bdaf, 0x0001bfaf, 0x0001c1af, 0x0001c3af,
+ 0x0001c5af, 0x0001c7af, 0x0001c9af, 0x0001cbaf,
+ 0x0001cdaf, 0x0001cfaf, 0x0001d1af, 0x0001d3af,
+ 0x0001d5af, 0x0001d7af, 0x0001d9af, 0x0001dbaf,
+ 0x0001ddaf, 0x0001dfaf, 0x0001e1af, 0x0001e3af,
+ 0x0001e5af, 0x0001e7af, 0x0001e9af, 0x0001ebaf,
+ 0x0001edaf, 0x0001efaf, 0x0001f1af, 0x0001f3af,
+ 0x0001f5af, 0x0001f7af, 0x0001f9af, 0x0001fbaf,
+ 0x0001fdaf, 0x0001ffaf, 0x000201af, 0x000203af,
+ 0x000205af, 0x000207af, 0x000209af, 0x00020baf,
+ 0x00020daf, 0x00020faf, 0x000211af, 0x000213af,
+ 0x000215af, 0x000217af, 0x000219af, 0x00021baf,
+ 0x00021daf, 0x00021faf, 0x000221af, 0x000223af,
+ 0x000225af, 0x000227af, 0x000229af, 0x00022baf,
+ 0x00022daf, 0x00022faf, 0x000231af, 0x000233af,
+ 0x000235af, 0x000237af, 0x000239af, 0x00023baf,
+ 0x00023daf, 0x00023faf, 0x000241af, 0x000243af,
+ 0x000245af, 0x000247af, 0x000249af, 0x00024baf,
+ 0x00024daf, 0x00024faf, 0x000251af, 0x000253af,
+ 0x000255af, 0x000257af, 0x000259af, 0x00025baf,
+ 0x00025daf, 0x00025faf, 0x000261af, 0x000263af,
+ 0x000265af, 0x000267af, 0x000269af, 0x00026baf,
+ 0x00026daf, 0x00026faf, 0x000271af, 0x000273af,
+ 0x000275af, 0x000277af, 0x000279af, 0x00027baf,
+ 0x00027daf, 0x00027faf, 0x000281af, 0x000283af,
+ 0x000285af, 0x000287af, 0x000289af, 0x00028baf,
+ 0x00028daf, 0x00028faf, 0x000291af, 0x000293af,
+ 0x000295af, 0x000297af, 0x000299af, 0x00029baf,
+ 0x00029daf, 0x00029faf, 0x0002a1af, 0x0002a3af,
+ 0x0002a5af, 0x0002a7af, 0x0002a9af, 0x0002abaf,
+ 0x0002adaf, 0x0002afaf, 0x0002b1af, 0x0002b3af,
+ 0x0002b5af, 0x0002b7af, 0x0002b9af, 0x0002bbaf,
+ 0x0002bdaf, 0x0002bfaf, 0x0002c1af, 0x0002c3af,
+ 0x0002c5af, 0x0002c7af, 0x0002c9af, 0x0002cbaf,
+ 0x0002cdaf, 0x0002cfaf, 0x0002d1af, 0x0002d3af,
+ 0x0002d5af, 0x0002d7af, 0x0002d9af, 0x0002dbaf,
+ 0x0002ddaf, 0x0002dfaf, 0x0002e1af, 0x0002e3af,
+ 0x0002e5af, 0x0002e7af, 0x0002e9af, 0x0002ebaf,
+ 0x0002edaf, 0x0002efaf, 0x0002f1af, 0x0002f3af,
+ 0x0002f5af, 0x0002f7af, 0x0002f9af, 0x0002fbaf,
+ 0x0002fdaf, 0x0002ffaf, 0x000301af, 0x000303af,
+ 0x000305af, 0x000307af, 0x000309af, 0x00030baf,
+ 0x00030daf, 0x00030faf, 0x000311af, 0x000313af,
+ 0x000315af, 0x000317af, 0x000319af, 0x00031baf,
+ 0x00031daf, 0x00031faf, 0x000321af, 0x000323af,
+ 0x000325af, 0x000327af, 0x000329af, 0x00032baf,
+ 0x00032daf, 0x00032faf, 0x000331af, 0x000333af,
+ 0x000335af, 0x000337af, 0x000339af, 0x00033baf,
+ 0x00033daf, 0x00033faf, 0x000341af, 0x000343af,
+ 0x000345af, 0x000347af, 0x000349af, 0x00034baf,
+ 0x00034daf, 0x00034faf, 0x000351af, 0x000353af,
+ 0x000355af, 0x000357af, 0x000359af, 0x00035baf,
+ 0x00035daf, 0x00035faf, 0x000361af, 0x000363af,
+ 0x000365af, 0x000367af, 0x000369af, 0x00036baf,
+ 0x00036daf, 0x00036faf, 0x000371af, 0x000373af,
+ 0x000375af, 0x000377af, 0x000379af, 0x00037baf,
+ 0x00037daf, 0x00037faf, 0x000381af, 0x000383af,
+ 0x000385af, 0x000387af, 0x000389af, 0x00038baf,
+ 0x00038daf, 0x00038faf, 0x000391af, 0x000393af,
+ 0x000395af, 0x000397af, 0x000399af, 0x00039baf,
+ 0x00039daf, 0x00039faf, 0x0003a1af, 0x0003a3af,
+ 0x0003a5af, 0x0003a7af, 0x0003a9af, 0x0003abaf,
+ 0x0003adaf, 0x0003afaf, 0x0003b1af, 0x0003b3af,
+ 0x0003b5af, 0x0003b7af, 0x0003b9af, 0x0003bbaf,
+ 0x0003bdaf, 0x0003bfaf, 0x0003c1af, 0x0003c3af,
+ 0x0003c5af, 0x0003c7af, 0x0003c9af, 0x0003cbaf,
+ 0x0003cdaf, 0x0003cfaf, 0x0003d1af, 0x0003d3af,
+ 0x0003d5af, 0x0003d7af, 0x0003d9af, 0x0003dbaf,
+ 0x0003ddaf, 0x0003dfaf, 0x0003e1af, 0x0003e3af,
+ 0x0003e5af, 0x0003e7af, 0x0003e9af, 0x0003ebaf,
+ 0x0003edaf, 0x0003efaf, 0x0003f1af, 0x0003f3af,
+ 0x0003f5af, 0x0003f7af, 0x0003f9af, 0x0003fbaf,
+ 0x0003fdaf, 0x0003ffaf, 0x000401af, 0x000403af,
+ 0x000405af, 0x000407af, 0x000409af, 0x00040baf,
+ 0x00040daf, 0x00040faf, 0x000411af, 0x000413af,
+ 0x000415af, 0x000417af, 0x000419af, 0x00041baf,
+ 0x00041daf, 0x00041faf, 0x000421af, 0x000423af,
+ 0x000425af, 0x000427af, 0x000429af, 0x00042baf,
+ 0x00042daf, 0x00042faf, 0x000431af, 0x000433af,
+ 0x000435af, 0x000437af, 0x000439af, 0x00043baf,
+ 0x00043daf, 0x00043faf, 0x000441af, 0x000443af,
+ 0x000445af, 0x000447af, 0x000449af, 0x00044baf,
+ 0x00044daf, 0x00044faf, 0x000451af, 0x000453af,
+ 0x000455af, 0x000457af, 0x000459af, 0x00045baf,
+ 0x00045daf, 0x00045faf, 0x000461af, 0x000463af,
+ 0x000465af, 0x000467af, 0x000469af, 0x00046baf,
+ 0x00046daf, 0x00046faf, 0x000471af, 0x000473af,
+ 0x000475af, 0x000477af, 0x000479af, 0x00047baf,
+ 0x00047daf, 0x00047faf, 0x000481af, 0x000483af,
+ 0x000485af, 0x000487af, 0x000489af, 0x00048baf,
+ 0x00048daf, 0x00048faf, 0x000491af, 0x000493af,
+ 0x000495af, 0x000497af, 0x000499af, 0x00049baf,
+ 0x00049daf, 0x00049faf, 0x0004a1af, 0x0004a3af,
+ 0x0004a5af, 0x0004a7af, 0x0004a9af, 0x0004abaf,
+ 0x0004adaf, 0x0004afaf, 0x0004b1af, 0x0004b3af,
+ 0x0004b5af, 0x0004b7af, 0x0004b9af, 0x0004bbaf,
+ 0x0004bdaf, 0x0004bfaf, 0x0004c1af, 0x0004c3af,
+ 0x0004c5af, 0x0004c7af, 0x0004c9af, 0x0004cbaf,
+ 0x0004cdaf, 0x0004cfaf, 0x0004d1af, 0x0004d3af,
+ 0x0004d5af, 0x0004d7af, 0x0004d9af, 0x0004dbaf,
+ 0x0004ddaf, 0x0004dfaf, 0x0004e1af, 0x0004e3af,
+ 0x0004e5af, 0x0004e7af, 0x0004e9af, 0x0004ebaf,
+ 0x0004edaf, 0x0004efaf, 0x0004f1af, 0x0004f3af,
+ 0x0004f5af, 0x0004f7af, 0x0004f9af, 0x0004fbaf,
+ 0x0004fdaf, 0x0004ffaf, 0x000501af, 0x000503af,
+ 0x000505af, 0x000507af, 0x000509af, 0x00050baf,
+ 0x00050daf, 0x00050faf, 0x000511af, 0x000513af,
+ 0x000515af, 0x000517af, 0x000519af, 0x00051baf,
+ 0x00051daf, 0x00051faf, 0x000521af, 0x000523af,
+ 0x000525af, 0x000527af, 0x000529af, 0x00052baf,
+ 0x00052daf, 0x00052faf, 0x000531af, 0x000533af,
+ 0x000535af, 0x000537af, 0x000539af, 0x00053baf,
+ 0x00053daf, 0x00053faf, 0x000541af, 0x000543af,
+ 0x000545af, 0x000547af, 0x000549af, 0x00054baf,
+ 0x00054daf, 0x00054faf, 0x000551af, 0x000553af,
+ 0x000555af, 0x000557af, 0x000559af, 0x00055baf,
+ 0x00055daf, 0x00055faf, 0x000561af, 0x000563af,
+ 0x000565af, 0x000567af, 0x000569af, 0x00056baf,
+ 0x00056daf, 0x00056faf, 0x000571af, 0x000573af,
+ 0x000575af, 0x000577af, 0x000579af, 0x00057baf,
+ 0x00057daf, 0x00057faf, 0x000581af, 0x000583af,
+ 0x000585af, 0x000587af, 0x000589af, 0x00058baf,
+ 0x00058daf, 0x00058faf, 0x000591af, 0x000593af,
+ 0x000595af, 0x000597af, 0x000599af, 0x00059baf,
+ 0x00059daf, 0x00059faf, 0x0005a1af, 0x0005a3af,
+ 0x0005a5af, 0x0005a7af, 0x0005a9af, 0x0005abaf,
+ 0x0005adaf, 0x0005afaf, 0x0005b1af, 0x0005b3af,
+ 0x0005b5af, 0x0005b7af, 0x0005b9af, 0x0005bbaf,
+ 0x0005bdaf, 0x0005bfaf, 0x0005c1af, 0x0005c3af,
+ 0x0005c5af, 0x0005c7af, 0x0005c9af, 0x0005cbaf,
+ 0x0005cdaf, 0x0005cfaf, 0x0005d1af, 0x0005d3af,
+ 0x0005d5af, 0x0005d7af, 0x0005d9af, 0x0005dbaf,
+ 0x0005ddaf, 0x0005dfaf, 0x0005e1af, 0x0005e3af,
+ 0x0005e5af, 0x0005e7af, 0x0005e9af, 0x0005ebaf,
+ 0x0005edaf, 0x0005efaf, 0x0005f1af, 0x0005f3af,
+ 0x0005f5af, 0x0005f7af, 0x0005f9af, 0x0005fbaf,
+ 0x0005fdaf, 0x0005ffaf, 0x000601af, 0x000603af,
+ 0x000605af, 0x000607af, 0x000609af, 0x00060baf,
+ 0x00060daf, 0x00060faf, 0x000611af, 0x000613af,
+ 0x000615af, 0x000617af, 0x000619af, 0x00061baf,
+ 0x00061daf, 0x00061faf, 0x000621af, 0x000623af,
+ 0x000625af, 0x000627af, 0x000629af, 0x00062baf,
+ 0x00062daf, 0x00062faf, 0x000631af, 0x000633af,
+ 0x000635af, 0x000637af, 0x000639af, 0x00063baf,
+ 0x00063daf, 0x00063faf, 0x000641af, 0x000643af,
+ 0x000645af, 0x000647af, 0x000649af, 0x00064baf,
+ 0x00064daf, 0x00064faf, 0x000651af, 0x000653af,
+ 0x000655af, 0x000657af, 0x000659af, 0x00065baf,
+ 0x00065daf, 0x00065faf, 0x000661af, 0x000663af,
+ 0x000665af, 0x000667af, 0x000669af, 0x00066baf,
+ 0x00066daf, 0x00066faf, 0x000671af, 0x000673af,
+ 0x000675af, 0x000677af, 0x000679af, 0x00067baf,
+ 0x00067daf, 0x00067faf, 0x000681af, 0x000683af,
+ 0x000685af, 0x000687af, 0x000689af, 0x00068baf,
+ 0x00068daf, 0x00068faf, 0x000691af, 0x000693af,
+ 0x000695af, 0x000697af, 0x000699af, 0x00069baf,
+ 0x00069daf, 0x00069faf, 0x0006a1af, 0x0006a3af,
+ 0x0006a5af, 0x0006a7af, 0x0006a9af, 0x0006abaf,
+ 0x0006adaf, 0x0006afaf, 0x0006b1af, 0x0006b3af,
+ 0x0006b5af, 0x0006b7af, 0x0006b9af, 0x0006bbaf,
+ 0x0006bdaf, 0x0006bfaf, 0x0006c1af, 0x0006c3af,
+ 0x0006c5af, 0x0006c7af, 0x0006c9af, 0x0006cbaf,
+ 0x0006cdaf, 0x0006cfaf, 0x0006d1af, 0x0006d3af,
+ 0x0006d5af, 0x0006d7af, 0x0006d9af, 0x0006dbaf,
+ 0x0006ddaf, 0x0006dfaf, 0x0006e1af, 0x0006e3af,
+ 0x0006e5af, 0x0006e7af, 0x0006e9af, 0x0006ebaf,
+ 0x0006edaf, 0x0006efaf, 0x0006f1af, 0x0006f3af,
+ 0x0006f5af, 0x0006f7af, 0x0006f9af, 0x0006fbaf,
+ 0x0006fdaf, 0x0006ffaf, 0x000701af, 0x000703af,
+ 0x000705af, 0x000707af, 0x000709af, 0x00070baf,
+ 0x00070daf, 0x00070faf, 0x000711af, 0x000713af,
+ 0x000715af, 0x000717af, 0x000719af, 0x00071baf,
+ 0x00071daf, 0x00071faf, 0x000721af, 0x000723af,
+ 0x000725af, 0x000727af, 0x000729af, 0x00072baf,
+ 0x00072daf, 0x00072faf, 0x000731af, 0x000733af,
+ 0x000735af, 0x000737af, 0x000739af, 0x00073baf,
+ 0x00073daf, 0x00073faf, 0x000741af, 0x000743af,
+ 0x000745af, 0x000747af, 0x000749af, 0x00074baf,
+ 0x00074daf, 0x00074faf, 0x000751af, 0x000753af,
+ 0x000755af, 0x000757af, 0x000759af, 0x00075baf,
+ 0x00075daf, 0x00075faf, 0x000761af, 0x000763af,
+ 0x000765af, 0x000767af, 0x000769af, 0x00076baf,
+ 0x00076daf, 0x00076faf, 0x000771af, 0x000773af,
+ 0x000775af, 0x000777af, 0x000779af, 0x00077baf,
+ 0x00077daf, 0x00077faf, 0x000781af, 0x000783af,
+ 0x000785af, 0x000787af, 0x000789af, 0x00078baf,
+ 0x00078daf, 0x00078faf, 0x000791af, 0x000793af,
+ 0x000795af, 0x000797af, 0x000799af, 0x00079baf,
+ 0x00079daf, 0x00079faf, 0x0007a1af, 0x0007a3af,
+ 0x0007a5af, 0x0007a7af, 0x0007a9af, 0x0007abaf,
+ 0x0007adaf, 0x0007afaf, 0x0007b1af, 0x0007b3af,
+ 0x0007b5af, 0x0007b7af, 0x0007b9af, 0x0007bbaf,
+ 0x0007bdaf, 0x0007bfaf, 0x0007c1af, 0x0007c3af,
+ 0x0007c5af, 0x0007c7af, 0x0007c9af, 0x0007cbaf,
+ 0x0007cdaf, 0x0007cfaf, 0x0007d1af, 0x0007d3af,
+ 0x0007d5af, 0x0007d7af, 0x0007d9af, 0x0007dbaf,
+ 0x0007ddaf, 0x0007dfaf, 0x0007e1af, 0x0007e3af,
+ 0x0007e5af, 0x0007e7af, 0x0007e9af, 0x0007ebaf,
+ 0x0007edaf, 0x0007efaf, 0x0007f1af, 0x0007f3af,
+ 0x0007f5af, 0x0007f7af, 0x0007f9af, 0x0007fbaf,
+ 0x0007fdaf, 0x0007ffaf, 0x000801af, 0x000803af,
+ 0x000805af, 0x000807af, 0x000809af, 0x00080baf,
+ 0x00080daf, 0x00080faf, 0x000811af, 0x000813af,
+ 0x000815af, 0x000817af, 0x000819af, 0x00081baf,
+ 0x00081daf, 0x00081faf, 0x000821af, 0x000823af,
+ 0x000825af, 0x000827af, 0x000829af, 0x00082baf,
+ 0x00082daf, 0x00082faf, 0x000831af, 0x000833af,
+ 0x000835af, 0x000837af, 0x000839af, 0x00083baf,
+ 0x00083daf, 0x00083faf, 0x000841af, 0x000843af,
+ 0x000845af, 0x000847af, 0x000849af, 0x00084baf,
+ 0x00084daf, 0x00084faf, 0x000851af, 0x000853af,
+ 0x000855af, 0x000857af, 0x000859af, 0x00085baf,
+ 0x00085daf, 0x00085faf, 0x000861af, 0x000863af,
+ 0x000865af, 0x000867af, 0x000869af, 0x00086baf,
+ 0x00086daf, 0x00086faf, 0x000871af, 0x000873af,
+ 0x000875af, 0x000877af, 0x000879af, 0x00087baf,
+ 0x00087daf, 0x00087faf, 0x000881af, 0x000883af,
+ 0x000885af, 0x000887af, 0x000889af, 0x00088baf,
+ 0x00088daf, 0x00088faf, 0x000891af, 0x000893af,
+ 0x000895af, 0x000897af, 0x000899af, 0x00089baf,
+ 0x00089daf, 0x00089faf, 0x0008a1af, 0x0008a3af,
+ 0x0008a5af, 0x0008a7af, 0x0008a9af, 0x0008abaf,
+ 0x0008adaf, 0x0008afaf, 0x0008b1af, 0x0008b3af,
+ 0x0008b5af, 0x0008b7af, 0x0008b9af, 0x0008bbaf,
+ 0x0008bdaf, 0x0008bfaf, 0x0008c1af, 0x0008c3af,
+ 0x0008c5af, 0x0008c7af, 0x0008c9af, 0x0008cbaf,
+ 0x0008cdaf, 0x0008cfaf, 0x0008d1af, 0x0008d3af,
+ 0x0008d5af, 0x0008d7af, 0x0008d9af, 0x0008dbaf,
+ 0x0008ddaf, 0x0008dfaf, 0x0008e1af, 0x0008e3af,
+ 0x0008e5af, 0x0008e7af, 0x0008e9af, 0x0008ebaf,
+ 0x0008edaf, 0x0008efaf, 0x0008f1af, 0x0008f3af,
+ 0x0008f5af, 0x0008f7af, 0x0008f9af, 0x0008fbaf,
+ 0x0008fdaf, 0x0008ffaf, 0x000901af, 0x000903af,
+ 0x000905af, 0x000907af, 0x000909af, 0x00090baf,
+ 0x00090daf, 0x00090faf, 0x000911af, 0x000913af,
+ 0x000915af, 0x000917af, 0x000919af, 0x00091baf,
+ 0x00091daf, 0x00091faf, 0x000921af, 0x000923af,
+ 0x000925af, 0x000927af, 0x000929af, 0x00092baf,
+ 0x00092daf, 0x00092faf, 0x000931af, 0x000933af,
+ 0x000935af, 0x000937af, 0x000939af, 0x00093baf,
+ 0x00093daf, 0x00093faf, 0x000941af, 0x000943af,
+ 0x000945af, 0x000947af, 0x000949af, 0x00094baf,
+ 0x00094daf, 0x00094faf, 0x000951af, 0x000953af,
+ 0x000955af, 0x000957af, 0x000959af, 0x00095baf,
+ 0x00095daf, 0x00095faf, 0x000961af, 0x000963af,
+ 0x000965af, 0x000967af, 0x000969af, 0x00096baf,
+ 0x00096daf, 0x00096faf, 0x000971af, 0x000973af,
+ 0x000975af, 0x000977af, 0x000979af, 0x00097baf,
+ 0x00097daf, 0x00097faf, 0x000981af, 0x000983af,
+ 0x000985af, 0x000987af, 0x000989af, 0x00098baf,
+ 0x00098daf, 0x00098faf, 0x000991af, 0x000993af,
+ 0x000995af, 0x000997af, 0x000999af, 0x00099baf,
+ 0x00099daf, 0x00099faf, 0x0009a1af, 0x0009a3af,
+ 0x0009a5af, 0x0009a7af, 0x0009a9af, 0x0009abaf,
+ 0x0009adaf, 0x0009afaf, 0x0009b1af, 0x0009b3af,
+ 0x0009b5af, 0x0009b7af, 0x0009b9af, 0x0009bbaf,
+ 0x0009bdaf, 0x0009bfaf, 0x0009c1af, 0x0009c3af,
+ 0x0009c5af, 0x0009c7af, 0x0009c9af, 0x0009cbaf,
+ 0x0009cdaf, 0x0009cfaf, 0x0009d1af, 0x0009d3af,
+ 0x0009d5af, 0x0009d7af, 0x0009d9af, 0x0009dbaf,
+ 0x0009ddaf, 0x0009dfaf, 0x0009e1af, 0x0009e3af,
+ 0x0009e5af, 0x0009e7af, 0x0009e9af, 0x0009ebaf,
+ 0x0009edaf, 0x0009efaf, 0x0009f1af, 0x0009f3af,
+ 0x0009f5af, 0x0009f7af, 0x0009f9af, 0x0009fbaf,
+ 0x0009fdaf, 0x0009ffaf, 0x000a01af, 0x000a03af,
+ 0x000a05af, 0x000a07af, 0x000a09af, 0x000a0baf,
+ 0x000a0daf, 0x000a0faf, 0x000a11af, 0x000a13af,
+ 0x000a15af, 0x000a17af, 0x000a19af, 0x000a1baf,
+ 0x000a1daf, 0x000a1faf, 0x000a21af, 0x000a23af,
+ 0x000a25af, 0x000a27af, 0x000a29af, 0x000a2baf,
+ 0x000a2daf, 0x000a2faf, 0x000a31af, 0x000a33af,
+ 0x000a35af, 0x000a37af, 0x000a39af, 0x000a3baf,
+ 0x000a3daf, 0x000a3faf, 0x000a41af, 0x000a43af,
+ 0x000a45af, 0x000a47af, 0x000a49af, 0x000a4baf,
+ 0x000a4daf, 0x000a4faf, 0x000a51af, 0x000a53af,
+ 0x000a55af, 0x000a57af, 0x000a59af, 0x000a5baf,
+ 0x000a5daf, 0x000a5faf, 0x000a61af, 0x000a63af,
+ 0x000a65af, 0x000a67af, 0x000a69af, 0x000a6baf,
+ 0x000a6daf, 0x000a6faf, 0x000a71af, 0x000a73af,
+ 0x000a75af, 0x000a77af, 0x000a79af, 0x000a7baf,
+ 0x000a7daf, 0x000a7faf, 0x000a81af, 0x000a83af,
+ 0x000a85af, 0x000a87af, 0x000a89af, 0x000a8baf,
+ 0x000a8daf, 0x000a8faf, 0x000a91af, 0x000a93af,
+ 0x000a95af, 0x000a97af, 0x000a99af, 0x000a9baf,
+ 0x000a9daf, 0x000a9faf, 0x000aa1af, 0x000aa3af,
+ 0x000aa5af, 0x000aa7af, 0x000aa9af, 0x000aabaf,
+ 0x000aadaf, 0x000aafaf, 0x000ab1af, 0x000ab3af,
+ 0x000ab5af, 0x000ab7af, 0x000ab9af, 0x000abbaf,
+ 0x000abdaf, 0x000abfaf, 0x000ac1af, 0x000ac3af,
+ 0x000ac5af, 0x000ac7af, 0x000ac9af, 0x000acbaf,
+ 0x000acdaf, 0x000acfaf, 0x000ad1af, 0x000ad3af,
+ 0x000ad5af, 0x000ad7af, 0x000ad9af, 0x000adbaf,
+ 0x000addaf, 0x000adfaf, 0x000ae1af, 0x000ae3af,
+ 0x000ae5af, 0x000ae7af, 0x000ae9af, 0x000aebaf,
+ 0x000aedaf, 0x000aefaf, 0x000af1af, 0x000af3af,
+ 0x000af5af, 0x000af7af, 0x000af9af, 0x000afbaf,
+ 0x000afdaf, 0x000affaf, 0x000b01af, 0x000b03af,
+ 0x000b05af, 0x000b07af, 0x000b09af, 0x000b0baf,
+ 0x000b0daf, 0x000b0faf, 0x000b11af, 0x000b13af,
+ 0x000b15af, 0x000b17af, 0x000b19af, 0x000b1baf,
+ 0x000b1daf, 0x000b1faf, 0x000b21af, 0x000b23af,
+ 0x000b25af, 0x000b27af, 0x000b29af, 0x000b2baf,
+ 0x000b2daf, 0x000b2faf, 0x000b31af, 0x000b33af,
+ 0x000b35af, 0x000b37af, 0x000b39af, 0x000b3baf,
+ 0x000b3daf, 0x000b3faf, 0x000b41af, 0x000b43af,
+ 0x000b45af, 0x000b47af, 0x000b49af, 0x000b4baf,
+ 0x000b4daf, 0x000b4faf, 0x000b51af, 0x000b53af,
+ 0x000b55af, 0x000b57af, 0x000b59af, 0x000b5baf,
+ 0x000b5daf, 0x000b5faf, 0x000b61af, 0x000b63af,
+ 0x000b65af, 0x000b67af, 0x000b69af, 0x000b6baf,
+ 0x000b6daf, 0x000b6faf, 0x000b71af, 0x000b73af,
+ 0x000b75af, 0x000b77af, 0x000b79af, 0x000b7baf,
+ 0x000b7daf, 0x000b7faf, 0x000b81af, 0x000b83af,
+ 0x000b85af, 0x000b87af, 0x000b89af, 0x000b8baf,
+ 0x000b8daf, 0x000b8faf, 0x000b91af, 0x000b93af,
+ 0x000b95af, 0x000b97af, 0x000b99af, 0x000b9baf,
+ 0x000b9daf, 0x000b9faf, 0x000ba1af, 0x000ba3af,
+ 0x000ba5af, 0x000ba7af, 0x000ba9af, 0x000babaf,
+ 0x000badaf, 0x000bafaf, 0x000bb1af, 0x000bb3af,
+ 0x000bb5af, 0x000bb7af, 0x000bb9af, 0x000bbbaf,
+ 0x000bbdaf, 0x000bbfaf, 0x000bc1af, 0x000bc3af,
+ 0x000bc5af, 0x000bc7af, 0x000bc9af, 0x000bcbaf,
+ 0x000bcdaf, 0x000bcfaf, 0x000bd1af, 0x000bd3af,
+ 0x000bd5af, 0x000bd7af, 0x000bd9af, 0x000bdbaf,
+ 0x000bddaf, 0x000bdfaf, 0x000be1af, 0x000be3af,
+ 0x000be5af, 0x000be7af, 0x000be9af, 0x000bebaf,
+ 0x000bedaf, 0x000befaf, 0x000bf1af, 0x000bf3af,
+ 0x000bf5af, 0x000bf7af, 0x000bf9af, 0x000bfbaf,
+ 0x000bfdaf, 0x000bffaf, 0x000c01af, 0x000c03af,
+ 0x000c05af, 0x000c07af, 0x000c09af, 0x000c0baf,
+ 0x000c0daf, 0x000c0faf, 0x000c11af, 0x000c13af,
+ 0x000c15af, 0x000c17af, 0x000c19af, 0x000c1baf,
+ 0x000c1daf, 0x000c1faf, 0x000c21af, 0x000c23af,
+ 0x000c25af, 0x000c27af, 0x000c29af, 0x000c2baf,
+ 0x000c2daf, 0x000c2faf, 0x000c31af, 0x000c33af,
+ 0x000c35af, 0x000c37af, 0x000c39af, 0x000c3baf,
+ 0x000c3daf, 0x000c3faf, 0x000c41af, 0x000c43af,
+ 0x000c45af, 0x000c47af, 0x000c49af, 0x000c4baf,
+ 0x000c4daf, 0x000c4faf, 0x000c51af, 0x000c53af,
+ 0x000c55af, 0x000c57af, 0x000c59af, 0x000c5baf,
+ 0x000c5daf, 0x000c5faf, 0x000c61af, 0x000c63af,
+ 0x000c65af, 0x000c67af, 0x000c69af, 0x000c6baf,
+ 0x000c6daf, 0x000c6faf, 0x000c71af, 0x000c73af,
+ 0x000c75af, 0x000c77af, 0x000c79af, 0x000c7baf,
+ 0x000c7daf, 0x000c7faf, 0x000c81af, 0x000c83af,
+ 0x000c85af, 0x000c87af, 0x000c89af, 0x000c8baf,
+ 0x000c8daf, 0x000c8faf, 0x000c91af, 0x000c93af,
+ 0x000c95af, 0x000c97af, 0x000c99af, 0x000c9baf,
+ 0x000c9daf, 0x000c9faf, 0x000ca1af, 0x000ca3af,
+ 0x000ca5af, 0x000ca7af, 0x000ca9af, 0x000cabaf,
+ 0x000cadaf, 0x000cafaf, 0x000cb1af, 0x000cb3af,
+ 0x000cb5af, 0x000cb7af, 0x000cb9af, 0x000cbbaf,
+ 0x000cbdaf, 0x000cbfaf, 0x000cc1af, 0x000cc3af,
+ 0x000cc5af, 0x000cc7af, 0x000cc9af, 0x000ccbaf,
+ 0x000ccdaf, 0x000ccfaf, 0x000cd1af, 0x000cd3af,
+ 0x000cd5af, 0x000cd7af, 0x000cd9af, 0x000cdbaf,
+ 0x000cddaf, 0x000cdfaf, 0x000ce1af, 0x000ce3af,
+ 0x000ce5af, 0x000ce7af, 0x000ce9af, 0x000cebaf,
+ 0x000cedaf, 0x000cefaf, 0x000cf1af, 0x000cf3af,
+ 0x000cf5af, 0x000cf7af, 0x000cf9af, 0x000cfbaf,
+ 0x000cfdaf, 0x000cffaf, 0x000d01af, 0x000d03af,
+ 0x000d05af, 0x000d07af, 0x000d09af, 0x000d0baf,
+ 0x000d0daf, 0x000d0faf, 0x000d11af, 0x000d13af,
+ 0x000d15af, 0x000d17af, 0x000d19af, 0x000d1baf,
+ 0x000d1daf, 0x000d1faf, 0x000d21af, 0x000d23af,
+ 0x000d25af, 0x000d27af, 0x000d29af, 0x000d2baf,
+ 0x000d2daf, 0x000d2faf, 0x000d31af, 0x000d33af,
+ 0x000d35af, 0x000d37af, 0x000d39af, 0x000d3baf,
+ 0x000d3daf, 0x000d3faf, 0x000d41af, 0x000d43af,
+ 0x000d45af, 0x000d47af, 0x000d49af, 0x000d4baf,
+ 0x000d4daf, 0x000d4faf, 0x000d51af, 0x000d53af,
+ 0x000d55af, 0x000d57af, 0x000d59af, 0x000d5baf,
+ 0x000d5daf, 0x000d5faf, 0x000d61af, 0x000d63af,
+ 0x000d65af, 0x000d67af, 0x000d69af, 0x000d6baf,
+ 0x000d6daf, 0x000d6faf, 0x000d71af, 0x000d73af,
+ 0x000d75af, 0x000d77af, 0x000d79af, 0x000d7baf,
+ 0x000d7daf, 0x000d7faf, 0x000d81af, 0x000d83af,
+ 0x000d85af, 0x000d87af, 0x000d89af, 0x000d8baf,
+ 0x000d8daf, 0x000d8faf, 0x000d91af, 0x000d93af,
+ 0x000d95af, 0x000d97af, 0x000d99af, 0x000d9baf,
+ 0x000d9daf, 0x000d9faf, 0x000da1af, 0x000da3af,
+ 0x000da5af, 0x000da7af, 0x000da9af, 0x000dabaf,
+ 0x000dadaf, 0x000dafaf, 0x000db1af, 0x000db3af,
+ 0x000db5af, 0x000db7af, 0x000db9af, 0x000dbbaf,
+ 0x000dbdaf, 0x000dbfaf, 0x000dc1af, 0x000dc3af,
+ 0x000dc5af, 0x000dc7af, 0x000dc9af, 0x000dcbaf,
+ 0x000dcdaf, 0x000dcfaf, 0x000dd1af, 0x000dd3af,
+ 0x000dd5af, 0x000dd7af, 0x000dd9af, 0x000ddbaf,
+ 0x000dddaf, 0x000ddfaf, 0x000de1af, 0x000de3af,
+ 0x000de5af, 0x000de7af, 0x000de9af, 0x000debaf,
+ 0x000dedaf, 0x000defaf, 0x000df1af, 0x000df3af,
+ 0x000df5af, 0x000df7af, 0x000df9af, 0x000dfbaf,
+ 0x000dfdaf, 0x000dffaf, 0x000e01af, 0x000e03af,
+ 0x000e05af, 0x000e07af, 0x000e09af, 0x000e0baf,
+ 0x000e0daf, 0x000e0faf, 0x000e11af, 0x000e13af,
+ 0x000e15af, 0x000e17af, 0x000e19af, 0x000e1baf,
+ 0x000e1daf, 0x000e1faf, 0x000e21af, 0x000e23af,
+ 0x000e25af, 0x000e27af, 0x000e29af, 0x000e2baf,
+ 0x000e2daf, 0x000e2faf, 0x000e31af, 0x000e33af,
+ 0x000e35af, 0x000e37af, 0x000e39af, 0x000e3baf,
+ 0x000e3daf, 0x000e3faf, 0x000e41af, 0x000e43af,
+ 0x000e45af, 0x000e47af, 0x000e49af, 0x000e4baf,
+ 0x000e4daf, 0x000e4faf, 0x000e51af, 0x000e53af,
+ 0x000e55af, 0x000e57af, 0x000e59af, 0x000e5baf,
+ 0x000e5daf, 0x000e5faf, 0x000e61af, 0x000e63af,
+ 0x000e65af, 0x000e67af, 0x000e69af, 0x000e6baf,
+ 0x000e6daf, 0x000e6faf, 0x000e71af, 0x000e73af,
+ 0x000e75af, 0x000e77af, 0x000e79af, 0x000e7baf,
+ 0x000e7daf, 0x000e7faf, 0x000e81af, 0x000e83af,
+ 0x000e85af, 0x000e87af, 0x000e89af, 0x000e8baf,
+ 0x000e8daf, 0x000e8faf, 0x000e91af, 0x000e93af,
+ 0x000e95af, 0x000e97af, 0x000e99af, 0x000e9baf,
+ 0x000e9daf, 0x000e9faf, 0x000ea1af, 0x000ea3af,
+ 0x000ea5af, 0x000ea7af, 0x000ea9af, 0x000eabaf,
+ 0x000eadaf, 0x000eafaf, 0x000eb1af, 0x000eb3af,
+ 0x000eb5af, 0x000eb7af, 0x000eb9af, 0x000ebbaf,
+ 0x000ebdaf, 0x000ebfaf, 0x000ec1af, 0x000ec3af,
+ 0x000ec5af, 0x000ec7af, 0x000ec9af, 0x000ecbaf,
+ 0x000ecdaf, 0x000ecfaf, 0x000ed1af, 0x000ed3af,
+ 0x000ed5af, 0x000ed7af, 0x000ed9af, 0x000edbaf,
+ 0x000eddaf, 0x000edfaf, 0x000ee1af, 0x000ee3af,
+ 0x000ee5af, 0x000ee7af, 0x000ee9af, 0x000eebaf,
+ 0x000eedaf, 0x000eefaf, 0x000ef1af, 0x000ef3af,
+ 0x000ef5af, 0x000ef7af, 0x000ef9af, 0x000efbaf,
+ 0x000efdaf, 0x000effaf, 0x000f01af, 0x000f03af,
+ 0x000f05af, 0x000f07af, 0x000f09af, 0x000f0baf,
+ 0x000f0daf, 0x000f0faf, 0x000f11af, 0x000f13af,
+ 0x000f15af, 0x000f17af, 0x000f19af, 0x000f1baf,
+ 0x000f1daf, 0x000f1faf, 0x000f21af, 0x000f23af,
+ 0x000f25af, 0x000f27af, 0x000f29af, 0x000f2baf,
+ 0x000f2daf, 0x000f2faf, 0x000f31af, 0x000f33af,
+ 0x000f35af, 0x000f37af, 0x000f39af, 0x000f3baf,
+ 0x000f3daf, 0x000f3faf, 0x000f41af, 0x000f43af,
+ 0x000f45af, 0x000f47af, 0x000f49af, 0x000f4baf,
+ 0x000f4daf, 0x000f4faf, 0x000f51af, 0x000f53af,
+ 0x000f55af, 0x000f57af, 0x000f59af, 0x000f5baf,
+ 0x000f5daf, 0x000f5faf, 0x000f61af, 0x000f63af,
+ 0x000f65af, 0x000f67af, 0x000f69af, 0x000f6baf,
+ 0x000f6daf, 0x000f6faf, 0x000f71af, 0x000f73af,
+ 0x000f75af, 0x000f77af, 0x000f79af, 0x000f7baf,
+ 0x000f7daf, 0x000f7faf, 0x000f81af, 0x000f83af,
+ 0x000f85af, 0x000f87af, 0x000f89af, 0x000f8baf,
+ 0x000f8daf, 0x000f8faf, 0x000f91af, 0x000f93af,
+ 0x000f95af, 0x000f97af, 0x000f99af, 0x000f9baf,
+ 0x000f9daf, 0x000f9faf, 0x000fa1af, 0x000fa3af,
+ 0x000fa5af, 0x000fa7af, 0x000fa9af, 0x000fabaf,
+ 0x000fadaf, 0x000fafaf, 0x000fb1af, 0x000fb3af,
+ 0x000fb5af, 0x000fb7af, 0x000fb9af, 0x000fbbaf,
+ 0x000fbdaf, 0x000fbfaf, 0x000fc1af, 0x000fc3af,
+ 0x000fc5af, 0x000fc7af, 0x000fc9af, 0x000fcbaf,
+ 0x000fcdaf, 0x000fcfaf, 0x000fd1af, 0x000fd3af,
+ 0x000fd5af, 0x000fd7af, 0x000fd9af, 0x000fdbaf,
+ 0x000fddaf, 0x000fdfaf, 0x000fe1af, 0x000fe3af,
+ 0x000fe5af, 0x000fe7af, 0x000fe9af, 0x000febaf,
+ 0x000fedaf, 0x000fefaf, 0x000ff1af, 0x000ff3af,
+ 0x000ff5af, 0x000ff7af, 0x000ff9af, 0x000ffbaf,
+ 0x000ffdaf, 0x000fffaf, 0x0000006f, 0x0000026f,
+ 0x0000046f, 0x0000066f, 0x0000086f, 0x00000a6f,
+ 0x00000c6f, 0x00000e6f, 0x0000106f, 0x0000126f,
+ 0x0000146f, 0x0000166f, 0x0000186f, 0x00001a6f,
+ 0x00001c6f, 0x00001e6f, 0x0000206f, 0x0000226f,
+ 0x0000246f, 0x0000266f, 0x0000286f, 0x00002a6f,
+ 0x00002c6f, 0x00002e6f, 0x0000306f, 0x0000326f,
+ 0x0000346f, 0x0000366f, 0x0000386f, 0x00003a6f,
+ 0x00003c6f, 0x00003e6f, 0x0000406f, 0x0000426f,
+ 0x0000446f, 0x0000466f, 0x0000486f, 0x00004a6f,
+ 0x00004c6f, 0x00004e6f, 0x0000506f, 0x0000526f,
+ 0x0000546f, 0x0000566f, 0x0000586f, 0x00005a6f,
+ 0x00005c6f, 0x00005e6f, 0x0000606f, 0x0000626f,
+ 0x0000646f, 0x0000666f, 0x0000686f, 0x00006a6f,
+ 0x00006c6f, 0x00006e6f, 0x0000706f, 0x0000726f,
+ 0x0000746f, 0x0000766f, 0x0000786f, 0x00007a6f,
+ 0x00007c6f, 0x00007e6f, 0x0000806f, 0x0000826f,
+ 0x0000846f, 0x0000866f, 0x0000886f, 0x00008a6f,
+ 0x00008c6f, 0x00008e6f, 0x0000906f, 0x0000926f,
+ 0x0000946f, 0x0000966f, 0x0000986f, 0x00009a6f,
+ 0x00009c6f, 0x00009e6f, 0x0000a06f, 0x0000a26f,
+ 0x0000a46f, 0x0000a66f, 0x0000a86f, 0x0000aa6f,
+ 0x0000ac6f, 0x0000ae6f, 0x0000b06f, 0x0000b26f,
+ 0x0000b46f, 0x0000b66f, 0x0000b86f, 0x0000ba6f,
+ 0x0000bc6f, 0x0000be6f, 0x0000c06f, 0x0000c26f,
+ 0x0000c46f, 0x0000c66f, 0x0000c86f, 0x0000ca6f,
+ 0x0000cc6f, 0x0000ce6f, 0x0000d06f, 0x0000d26f,
+ 0x0000d46f, 0x0000d66f, 0x0000d86f, 0x0000da6f,
+ 0x0000dc6f, 0x0000de6f, 0x0000e06f, 0x0000e26f,
+ 0x0000e46f, 0x0000e66f, 0x0000e86f, 0x0000ea6f,
+ 0x0000ec6f, 0x0000ee6f, 0x0000f06f, 0x0000f26f,
+ 0x0000f46f, 0x0000f66f, 0x0000f86f, 0x0000fa6f,
+ 0x0000fc6f, 0x0000fe6f, 0x0001006f, 0x0001026f,
+ 0x0001046f, 0x0001066f, 0x0001086f, 0x00010a6f,
+ 0x00010c6f, 0x00010e6f, 0x0001106f, 0x0001126f,
+ 0x0001146f, 0x0001166f, 0x0001186f, 0x00011a6f,
+ 0x00011c6f, 0x00011e6f, 0x0001206f, 0x0001226f,
+ 0x0001246f, 0x0001266f, 0x0001286f, 0x00012a6f,
+ 0x00012c6f, 0x00012e6f, 0x0001306f, 0x0001326f,
+ 0x0001346f, 0x0001366f, 0x0001386f, 0x00013a6f,
+ 0x00013c6f, 0x00013e6f, 0x0001406f, 0x0001426f,
+ 0x0001446f, 0x0001466f, 0x0001486f, 0x00014a6f,
+ 0x00014c6f, 0x00014e6f, 0x0001506f, 0x0001526f,
+ 0x0001546f, 0x0001566f, 0x0001586f, 0x00015a6f,
+ 0x00015c6f, 0x00015e6f, 0x0001606f, 0x0001626f,
+ 0x0001646f, 0x0001666f, 0x0001686f, 0x00016a6f,
+ 0x00016c6f, 0x00016e6f, 0x0001706f, 0x0001726f,
+ 0x0001746f, 0x0001766f, 0x0001786f, 0x00017a6f,
+ 0x00017c6f, 0x00017e6f, 0x0001806f, 0x0001826f,
+ 0x0001846f, 0x0001866f, 0x0001886f, 0x00018a6f,
+ 0x00018c6f, 0x00018e6f, 0x0001906f, 0x0001926f,
+ 0x0001946f, 0x0001966f, 0x0001986f, 0x00019a6f,
+ 0x00019c6f, 0x00019e6f, 0x0001a06f, 0x0001a26f,
+ 0x0001a46f, 0x0001a66f, 0x0001a86f, 0x0001aa6f,
+ 0x0001ac6f, 0x0001ae6f, 0x0001b06f, 0x0001b26f,
+ 0x0001b46f, 0x0001b66f, 0x0001b86f, 0x0001ba6f,
+ 0x0001bc6f, 0x0001be6f, 0x0001c06f, 0x0001c26f,
+ 0x0001c46f, 0x0001c66f, 0x0001c86f, 0x0001ca6f,
+ 0x0001cc6f, 0x0001ce6f, 0x0001d06f, 0x0001d26f,
+ 0x0001d46f, 0x0001d66f, 0x0001d86f, 0x0001da6f,
+ 0x0001dc6f, 0x0001de6f, 0x0001e06f, 0x0001e26f,
+ 0x0001e46f, 0x0001e66f, 0x0001e86f, 0x0001ea6f,
+ 0x0001ec6f, 0x0001ee6f, 0x0001f06f, 0x0001f26f,
+ 0x0001f46f, 0x0001f66f, 0x0001f86f, 0x0001fa6f,
+ 0x0001fc6f, 0x0001fe6f, 0x0002006f, 0x0002026f,
+ 0x0002046f, 0x0002066f, 0x0002086f, 0x00020a6f,
+ 0x00020c6f, 0x00020e6f, 0x0002106f, 0x0002126f,
+ 0x0002146f, 0x0002166f, 0x0002186f, 0x00021a6f,
+ 0x00021c6f, 0x00021e6f, 0x0002206f, 0x0002226f,
+ 0x0002246f, 0x0002266f, 0x0002286f, 0x00022a6f,
+ 0x00022c6f, 0x00022e6f, 0x0002306f, 0x0002326f,
+ 0x0002346f, 0x0002366f, 0x0002386f, 0x00023a6f,
+ 0x00023c6f, 0x00023e6f, 0x0002406f, 0x0002426f,
+ 0x0002446f, 0x0002466f, 0x0002486f, 0x00024a6f,
+ 0x00024c6f, 0x00024e6f, 0x0002506f, 0x0002526f,
+ 0x0002546f, 0x0002566f, 0x0002586f, 0x00025a6f,
+ 0x00025c6f, 0x00025e6f, 0x0002606f, 0x0002626f,
+ 0x0002646f, 0x0002666f, 0x0002686f, 0x00026a6f,
+ 0x00026c6f, 0x00026e6f, 0x0002706f, 0x0002726f,
+ 0x0002746f, 0x0002766f, 0x0002786f, 0x00027a6f,
+ 0x00027c6f, 0x00027e6f, 0x0002806f, 0x0002826f,
+ 0x0002846f, 0x0002866f, 0x0002886f, 0x00028a6f,
+ 0x00028c6f, 0x00028e6f, 0x0002906f, 0x0002926f,
+ 0x0002946f, 0x0002966f, 0x0002986f, 0x00029a6f,
+ 0x00029c6f, 0x00029e6f, 0x0002a06f, 0x0002a26f,
+ 0x0002a46f, 0x0002a66f, 0x0002a86f, 0x0002aa6f,
+ 0x0002ac6f, 0x0002ae6f, 0x0002b06f, 0x0002b26f,
+ 0x0002b46f, 0x0002b66f, 0x0002b86f, 0x0002ba6f,
+ 0x0002bc6f, 0x0002be6f, 0x0002c06f, 0x0002c26f,
+ 0x0002c46f, 0x0002c66f, 0x0002c86f, 0x0002ca6f,
+ 0x0002cc6f, 0x0002ce6f, 0x0002d06f, 0x0002d26f,
+ 0x0002d46f, 0x0002d66f, 0x0002d86f, 0x0002da6f,
+ 0x0002dc6f, 0x0002de6f, 0x0002e06f, 0x0002e26f,
+ 0x0002e46f, 0x0002e66f, 0x0002e86f, 0x0002ea6f,
+ 0x0002ec6f, 0x0002ee6f, 0x0002f06f, 0x0002f26f,
+ 0x0002f46f, 0x0002f66f, 0x0002f86f, 0x0002fa6f,
+ 0x0002fc6f, 0x0002fe6f, 0x0003006f, 0x0003026f,
+ 0x0003046f, 0x0003066f, 0x0003086f, 0x00030a6f,
+ 0x00030c6f, 0x00030e6f, 0x0003106f, 0x0003126f,
+ 0x0003146f, 0x0003166f, 0x0003186f, 0x00031a6f,
+ 0x00031c6f, 0x00031e6f, 0x0003206f, 0x0003226f,
+ 0x0003246f, 0x0003266f, 0x0003286f, 0x00032a6f,
+ 0x00032c6f, 0x00032e6f, 0x0003306f, 0x0003326f,
+ 0x0003346f, 0x0003366f, 0x0003386f, 0x00033a6f,
+ 0x00033c6f, 0x00033e6f, 0x0003406f, 0x0003426f,
+ 0x0003446f, 0x0003466f, 0x0003486f, 0x00034a6f,
+ 0x00034c6f, 0x00034e6f, 0x0003506f, 0x0003526f,
+ 0x0003546f, 0x0003566f, 0x0003586f, 0x00035a6f,
+ 0x00035c6f, 0x00035e6f, 0x0003606f, 0x0003626f,
+ 0x0003646f, 0x0003666f, 0x0003686f, 0x00036a6f,
+ 0x00036c6f, 0x00036e6f, 0x0003706f, 0x0003726f,
+ 0x0003746f, 0x0003766f, 0x0003786f, 0x00037a6f,
+ 0x00037c6f, 0x00037e6f, 0x0003806f, 0x0003826f,
+ 0x0003846f, 0x0003866f, 0x0003886f, 0x00038a6f,
+ 0x00038c6f, 0x00038e6f, 0x0003906f, 0x0003926f,
+ 0x0003946f, 0x0003966f, 0x0003986f, 0x00039a6f,
+ 0x00039c6f, 0x00039e6f, 0x0003a06f, 0x0003a26f,
+ 0x0003a46f, 0x0003a66f, 0x0003a86f, 0x0003aa6f,
+ 0x0003ac6f, 0x0003ae6f, 0x0003b06f, 0x0003b26f,
+ 0x0003b46f, 0x0003b66f, 0x0003b86f, 0x0003ba6f,
+ 0x0003bc6f, 0x0003be6f, 0x0003c06f, 0x0003c26f,
+ 0x0003c46f, 0x0003c66f, 0x0003c86f, 0x0003ca6f,
+ 0x0003cc6f, 0x0003ce6f, 0x0003d06f, 0x0003d26f,
+ 0x0003d46f, 0x0003d66f, 0x0003d86f, 0x0003da6f,
+ 0x0003dc6f, 0x0003de6f, 0x0003e06f, 0x0003e26f,
+ 0x0003e46f, 0x0003e66f, 0x0003e86f, 0x0003ea6f,
+ 0x0003ec6f, 0x0003ee6f, 0x0003f06f, 0x0003f26f,
+ 0x0003f46f, 0x0003f66f, 0x0003f86f, 0x0003fa6f,
+ 0x0003fc6f, 0x0003fe6f, 0x0004006f, 0x0004026f,
+ 0x0004046f, 0x0004066f, 0x0004086f, 0x00040a6f,
+ 0x00040c6f, 0x00040e6f, 0x0004106f, 0x0004126f,
+ 0x0004146f, 0x0004166f, 0x0004186f, 0x00041a6f,
+ 0x00041c6f, 0x00041e6f, 0x0004206f, 0x0004226f,
+ 0x0004246f, 0x0004266f, 0x0004286f, 0x00042a6f,
+ 0x00042c6f, 0x00042e6f, 0x0004306f, 0x0004326f,
+ 0x0004346f, 0x0004366f, 0x0004386f, 0x00043a6f,
+ 0x00043c6f, 0x00043e6f, 0x0004406f, 0x0004426f,
+ 0x0004446f, 0x0004466f, 0x0004486f, 0x00044a6f,
+ 0x00044c6f, 0x00044e6f, 0x0004506f, 0x0004526f,
+ 0x0004546f, 0x0004566f, 0x0004586f, 0x00045a6f,
+ 0x00045c6f, 0x00045e6f, 0x0004606f, 0x0004626f,
+ 0x0004646f, 0x0004666f, 0x0004686f, 0x00046a6f,
+ 0x00046c6f, 0x00046e6f, 0x0004706f, 0x0004726f,
+ 0x0004746f, 0x0004766f, 0x0004786f, 0x00047a6f,
+ 0x00047c6f, 0x00047e6f, 0x0004806f, 0x0004826f,
+ 0x0004846f, 0x0004866f, 0x0004886f, 0x00048a6f,
+ 0x00048c6f, 0x00048e6f, 0x0004906f, 0x0004926f,
+ 0x0004946f, 0x0004966f, 0x0004986f, 0x00049a6f,
+ 0x00049c6f, 0x00049e6f, 0x0004a06f, 0x0004a26f,
+ 0x0004a46f, 0x0004a66f, 0x0004a86f, 0x0004aa6f,
+ 0x0004ac6f, 0x0004ae6f, 0x0004b06f, 0x0004b26f,
+ 0x0004b46f, 0x0004b66f, 0x0004b86f, 0x0004ba6f,
+ 0x0004bc6f, 0x0004be6f, 0x0004c06f, 0x0004c26f,
+ 0x0004c46f, 0x0004c66f, 0x0004c86f, 0x0004ca6f,
+ 0x0004cc6f, 0x0004ce6f, 0x0004d06f, 0x0004d26f,
+ 0x0004d46f, 0x0004d66f, 0x0004d86f, 0x0004da6f,
+ 0x0004dc6f, 0x0004de6f, 0x0004e06f, 0x0004e26f,
+ 0x0004e46f, 0x0004e66f, 0x0004e86f, 0x0004ea6f,
+ 0x0004ec6f, 0x0004ee6f, 0x0004f06f, 0x0004f26f,
+ 0x0004f46f, 0x0004f66f, 0x0004f86f, 0x0004fa6f,
+ 0x0004fc6f, 0x0004fe6f, 0x0005006f, 0x0005026f,
+ 0x0005046f, 0x0005066f, 0x0005086f, 0x00050a6f,
+ 0x00050c6f, 0x00050e6f, 0x0005106f, 0x0005126f,
+ 0x0005146f, 0x0005166f, 0x0005186f, 0x00051a6f,
+ 0x00051c6f, 0x00051e6f, 0x0005206f, 0x0005226f,
+ 0x0005246f, 0x0005266f, 0x0005286f, 0x00052a6f,
+ 0x00052c6f, 0x00052e6f, 0x0005306f, 0x0005326f,
+ 0x0005346f, 0x0005366f, 0x0005386f, 0x00053a6f,
+ 0x00053c6f, 0x00053e6f, 0x0005406f, 0x0005426f,
+ 0x0005446f, 0x0005466f, 0x0005486f, 0x00054a6f,
+ 0x00054c6f, 0x00054e6f, 0x0005506f, 0x0005526f,
+ 0x0005546f, 0x0005566f, 0x0005586f, 0x00055a6f,
+ 0x00055c6f, 0x00055e6f, 0x0005606f, 0x0005626f,
+ 0x0005646f, 0x0005666f, 0x0005686f, 0x00056a6f,
+ 0x00056c6f, 0x00056e6f, 0x0005706f, 0x0005726f,
+ 0x0005746f, 0x0005766f, 0x0005786f, 0x00057a6f,
+ 0x00057c6f, 0x00057e6f, 0x0005806f, 0x0005826f,
+ 0x0005846f, 0x0005866f, 0x0005886f, 0x00058a6f,
+ 0x00058c6f, 0x00058e6f, 0x0005906f, 0x0005926f,
+ 0x0005946f, 0x0005966f, 0x0005986f, 0x00059a6f,
+ 0x00059c6f, 0x00059e6f, 0x0005a06f, 0x0005a26f,
+ 0x0005a46f, 0x0005a66f, 0x0005a86f, 0x0005aa6f,
+ 0x0005ac6f, 0x0005ae6f, 0x0005b06f, 0x0005b26f,
+ 0x0005b46f, 0x0005b66f, 0x0005b86f, 0x0005ba6f,
+ 0x0005bc6f, 0x0005be6f, 0x0005c06f, 0x0005c26f,
+ 0x0005c46f, 0x0005c66f, 0x0005c86f, 0x0005ca6f,
+ 0x0005cc6f, 0x0005ce6f, 0x0005d06f, 0x0005d26f,
+ 0x0005d46f, 0x0005d66f, 0x0005d86f, 0x0005da6f,
+ 0x0005dc6f, 0x0005de6f, 0x0005e06f, 0x0005e26f,
+ 0x0005e46f, 0x0005e66f, 0x0005e86f, 0x0005ea6f,
+ 0x0005ec6f, 0x0005ee6f, 0x0005f06f, 0x0005f26f,
+ 0x0005f46f, 0x0005f66f, 0x0005f86f, 0x0005fa6f,
+ 0x0005fc6f, 0x0005fe6f, 0x0006006f, 0x0006026f,
+ 0x0006046f, 0x0006066f, 0x0006086f, 0x00060a6f,
+ 0x00060c6f, 0x00060e6f, 0x0006106f, 0x0006126f,
+ 0x0006146f, 0x0006166f, 0x0006186f, 0x00061a6f,
+ 0x00061c6f, 0x00061e6f, 0x0006206f, 0x0006226f,
+ 0x0006246f, 0x0006266f, 0x0006286f, 0x00062a6f,
+ 0x00062c6f, 0x00062e6f, 0x0006306f, 0x0006326f,
+ 0x0006346f, 0x0006366f, 0x0006386f, 0x00063a6f,
+ 0x00063c6f, 0x00063e6f, 0x0006406f, 0x0006426f,
+ 0x0006446f, 0x0006466f, 0x0006486f, 0x00064a6f,
+ 0x00064c6f, 0x00064e6f, 0x0006506f, 0x0006526f,
+ 0x0006546f, 0x0006566f, 0x0006586f, 0x00065a6f,
+ 0x00065c6f, 0x00065e6f, 0x0006606f, 0x0006626f,
+ 0x0006646f, 0x0006666f, 0x0006686f, 0x00066a6f,
+ 0x00066c6f, 0x00066e6f, 0x0006706f, 0x0006726f,
+ 0x0006746f, 0x0006766f, 0x0006786f, 0x00067a6f,
+ 0x00067c6f, 0x00067e6f, 0x0006806f, 0x0006826f,
+ 0x0006846f, 0x0006866f, 0x0006886f, 0x00068a6f,
+ 0x00068c6f, 0x00068e6f, 0x0006906f, 0x0006926f,
+ 0x0006946f, 0x0006966f, 0x0006986f, 0x00069a6f,
+ 0x00069c6f, 0x00069e6f, 0x0006a06f, 0x0006a26f,
+ 0x0006a46f, 0x0006a66f, 0x0006a86f, 0x0006aa6f,
+ 0x0006ac6f, 0x0006ae6f, 0x0006b06f, 0x0006b26f,
+ 0x0006b46f, 0x0006b66f, 0x0006b86f, 0x0006ba6f,
+ 0x0006bc6f, 0x0006be6f, 0x0006c06f, 0x0006c26f,
+ 0x0006c46f, 0x0006c66f, 0x0006c86f, 0x0006ca6f,
+ 0x0006cc6f, 0x0006ce6f, 0x0006d06f, 0x0006d26f,
+ 0x0006d46f, 0x0006d66f, 0x0006d86f, 0x0006da6f,
+ 0x0006dc6f, 0x0006de6f, 0x0006e06f, 0x0006e26f,
+ 0x0006e46f, 0x0006e66f, 0x0006e86f, 0x0006ea6f,
+ 0x0006ec6f, 0x0006ee6f, 0x0006f06f, 0x0006f26f,
+ 0x0006f46f, 0x0006f66f, 0x0006f86f, 0x0006fa6f,
+ 0x0006fc6f, 0x0006fe6f, 0x0007006f, 0x0007026f,
+ 0x0007046f, 0x0007066f, 0x0007086f, 0x00070a6f,
+ 0x00070c6f, 0x00070e6f, 0x0007106f, 0x0007126f,
+ 0x0007146f, 0x0007166f, 0x0007186f, 0x00071a6f,
+ 0x00071c6f, 0x00071e6f, 0x0007206f, 0x0007226f,
+ 0x0007246f, 0x0007266f, 0x0007286f, 0x00072a6f,
+ 0x00072c6f, 0x00072e6f, 0x0007306f, 0x0007326f,
+ 0x0007346f, 0x0007366f, 0x0007386f, 0x00073a6f,
+ 0x00073c6f, 0x00073e6f, 0x0007406f, 0x0007426f,
+ 0x0007446f, 0x0007466f, 0x0007486f, 0x00074a6f,
+ 0x00074c6f, 0x00074e6f, 0x0007506f, 0x0007526f,
+ 0x0007546f, 0x0007566f, 0x0007586f, 0x00075a6f,
+ 0x00075c6f, 0x00075e6f, 0x0007606f, 0x0007626f,
+ 0x0007646f, 0x0007666f, 0x0007686f, 0x00076a6f,
+ 0x00076c6f, 0x00076e6f, 0x0007706f, 0x0007726f,
+ 0x0007746f, 0x0007766f, 0x0007786f, 0x00077a6f,
+ 0x00077c6f, 0x00077e6f, 0x0007806f, 0x0007826f,
+ 0x0007846f, 0x0007866f, 0x0007886f, 0x00078a6f,
+ 0x00078c6f, 0x00078e6f, 0x0007906f, 0x0007926f,
+ 0x0007946f, 0x0007966f, 0x0007986f, 0x00079a6f,
+ 0x00079c6f, 0x00079e6f, 0x0007a06f, 0x0007a26f,
+ 0x0007a46f, 0x0007a66f, 0x0007a86f, 0x0007aa6f,
+ 0x0007ac6f, 0x0007ae6f, 0x0007b06f, 0x0007b26f,
+ 0x0007b46f, 0x0007b66f, 0x0007b86f, 0x0007ba6f,
+ 0x0007bc6f, 0x0007be6f, 0x0007c06f, 0x0007c26f,
+ 0x0007c46f, 0x0007c66f, 0x0007c86f, 0x0007ca6f,
+ 0x0007cc6f, 0x0007ce6f, 0x0007d06f, 0x0007d26f,
+ 0x0007d46f, 0x0007d66f, 0x0007d86f, 0x0007da6f,
+ 0x0007dc6f, 0x0007de6f, 0x0007e06f, 0x0007e26f,
+ 0x0007e46f, 0x0007e66f, 0x0007e86f, 0x0007ea6f,
+ 0x0007ec6f, 0x0007ee6f, 0x0007f06f, 0x0007f26f,
+ 0x0007f46f, 0x0007f66f, 0x0007f86f, 0x0007fa6f,
+ 0x0007fc6f, 0x0007fe6f, 0x0008006f, 0x0008026f,
+ 0x0008046f, 0x0008066f, 0x0008086f, 0x00080a6f,
+ 0x00080c6f, 0x00080e6f, 0x0008106f, 0x0008126f,
+ 0x0008146f, 0x0008166f, 0x0008186f, 0x00081a6f,
+ 0x00081c6f, 0x00081e6f, 0x0008206f, 0x0008226f,
+ 0x0008246f, 0x0008266f, 0x0008286f, 0x00082a6f,
+ 0x00082c6f, 0x00082e6f, 0x0008306f, 0x0008326f,
+ 0x0008346f, 0x0008366f, 0x0008386f, 0x00083a6f,
+ 0x00083c6f, 0x00083e6f, 0x0008406f, 0x0008426f,
+ 0x0008446f, 0x0008466f, 0x0008486f, 0x00084a6f,
+ 0x00084c6f, 0x00084e6f, 0x0008506f, 0x0008526f,
+ 0x0008546f, 0x0008566f, 0x0008586f, 0x00085a6f,
+ 0x00085c6f, 0x00085e6f, 0x0008606f, 0x0008626f,
+ 0x0008646f, 0x0008666f, 0x0008686f, 0x00086a6f,
+ 0x00086c6f, 0x00086e6f, 0x0008706f, 0x0008726f,
+ 0x0008746f, 0x0008766f, 0x0008786f, 0x00087a6f,
+ 0x00087c6f, 0x00087e6f, 0x0008806f, 0x0008826f,
+ 0x0008846f, 0x0008866f, 0x0008886f, 0x00088a6f,
+ 0x00088c6f, 0x00088e6f, 0x0008906f, 0x0008926f,
+ 0x0008946f, 0x0008966f, 0x0008986f, 0x00089a6f,
+ 0x00089c6f, 0x00089e6f, 0x0008a06f, 0x0008a26f,
+ 0x0008a46f, 0x0008a66f, 0x0008a86f, 0x0008aa6f,
+ 0x0008ac6f, 0x0008ae6f, 0x0008b06f, 0x0008b26f,
+ 0x0008b46f, 0x0008b66f, 0x0008b86f, 0x0008ba6f,
+ 0x0008bc6f, 0x0008be6f, 0x0008c06f, 0x0008c26f,
+ 0x0008c46f, 0x0008c66f, 0x0008c86f, 0x0008ca6f,
+ 0x0008cc6f, 0x0008ce6f, 0x0008d06f, 0x0008d26f,
+ 0x0008d46f, 0x0008d66f, 0x0008d86f, 0x0008da6f,
+ 0x0008dc6f, 0x0008de6f, 0x0008e06f, 0x0008e26f,
+ 0x0008e46f, 0x0008e66f, 0x0008e86f, 0x0008ea6f,
+ 0x0008ec6f, 0x0008ee6f, 0x0008f06f, 0x0008f26f,
+ 0x0008f46f, 0x0008f66f, 0x0008f86f, 0x0008fa6f,
+ 0x0008fc6f, 0x0008fe6f, 0x0009006f, 0x0009026f,
+ 0x0009046f, 0x0009066f, 0x0009086f, 0x00090a6f,
+ 0x00090c6f, 0x00090e6f, 0x0009106f, 0x0009126f,
+ 0x0009146f, 0x0009166f, 0x0009186f, 0x00091a6f,
+ 0x00091c6f, 0x00091e6f, 0x0009206f, 0x0009226f,
+ 0x0009246f, 0x0009266f, 0x0009286f, 0x00092a6f,
+ 0x00092c6f, 0x00092e6f, 0x0009306f, 0x0009326f,
+ 0x0009346f, 0x0009366f, 0x0009386f, 0x00093a6f,
+ 0x00093c6f, 0x00093e6f, 0x0009406f, 0x0009426f,
+ 0x0009446f, 0x0009466f, 0x0009486f, 0x00094a6f,
+ 0x00094c6f, 0x00094e6f, 0x0009506f, 0x0009526f,
+ 0x0009546f, 0x0009566f, 0x0009586f, 0x00095a6f,
+ 0x00095c6f, 0x00095e6f, 0x0009606f, 0x0009626f,
+ 0x0009646f, 0x0009666f, 0x0009686f, 0x00096a6f,
+ 0x00096c6f, 0x00096e6f, 0x0009706f, 0x0009726f,
+ 0x0009746f, 0x0009766f, 0x0009786f, 0x00097a6f,
+ 0x00097c6f, 0x00097e6f, 0x0009806f, 0x0009826f,
+ 0x0009846f, 0x0009866f, 0x0009886f, 0x00098a6f,
+ 0x00098c6f, 0x00098e6f, 0x0009906f, 0x0009926f,
+ 0x0009946f, 0x0009966f, 0x0009986f, 0x00099a6f,
+ 0x00099c6f, 0x00099e6f, 0x0009a06f, 0x0009a26f,
+ 0x0009a46f, 0x0009a66f, 0x0009a86f, 0x0009aa6f,
+ 0x0009ac6f, 0x0009ae6f, 0x0009b06f, 0x0009b26f,
+ 0x0009b46f, 0x0009b66f, 0x0009b86f, 0x0009ba6f,
+ 0x0009bc6f, 0x0009be6f, 0x0009c06f, 0x0009c26f,
+ 0x0009c46f, 0x0009c66f, 0x0009c86f, 0x0009ca6f,
+ 0x0009cc6f, 0x0009ce6f, 0x0009d06f, 0x0009d26f,
+ 0x0009d46f, 0x0009d66f, 0x0009d86f, 0x0009da6f,
+ 0x0009dc6f, 0x0009de6f, 0x0009e06f, 0x0009e26f,
+ 0x0009e46f, 0x0009e66f, 0x0009e86f, 0x0009ea6f,
+ 0x0009ec6f, 0x0009ee6f, 0x0009f06f, 0x0009f26f,
+ 0x0009f46f, 0x0009f66f, 0x0009f86f, 0x0009fa6f,
+ 0x0009fc6f, 0x0009fe6f, 0x000a006f, 0x000a026f,
+ 0x000a046f, 0x000a066f, 0x000a086f, 0x000a0a6f,
+ 0x000a0c6f, 0x000a0e6f, 0x000a106f, 0x000a126f,
+ 0x000a146f, 0x000a166f, 0x000a186f, 0x000a1a6f,
+ 0x000a1c6f, 0x000a1e6f, 0x000a206f, 0x000a226f,
+ 0x000a246f, 0x000a266f, 0x000a286f, 0x000a2a6f,
+ 0x000a2c6f, 0x000a2e6f, 0x000a306f, 0x000a326f,
+ 0x000a346f, 0x000a366f, 0x000a386f, 0x000a3a6f,
+ 0x000a3c6f, 0x000a3e6f, 0x000a406f, 0x000a426f,
+ 0x000a446f, 0x000a466f, 0x000a486f, 0x000a4a6f,
+ 0x000a4c6f, 0x000a4e6f, 0x000a506f, 0x000a526f,
+ 0x000a546f, 0x000a566f, 0x000a586f, 0x000a5a6f,
+ 0x000a5c6f, 0x000a5e6f, 0x000a606f, 0x000a626f,
+ 0x000a646f, 0x000a666f, 0x000a686f, 0x000a6a6f,
+ 0x000a6c6f, 0x000a6e6f, 0x000a706f, 0x000a726f,
+ 0x000a746f, 0x000a766f, 0x000a786f, 0x000a7a6f,
+ 0x000a7c6f, 0x000a7e6f, 0x000a806f, 0x000a826f,
+ 0x000a846f, 0x000a866f, 0x000a886f, 0x000a8a6f,
+ 0x000a8c6f, 0x000a8e6f, 0x000a906f, 0x000a926f,
+ 0x000a946f, 0x000a966f, 0x000a986f, 0x000a9a6f,
+ 0x000a9c6f, 0x000a9e6f, 0x000aa06f, 0x000aa26f,
+ 0x000aa46f, 0x000aa66f, 0x000aa86f, 0x000aaa6f,
+ 0x000aac6f, 0x000aae6f, 0x000ab06f, 0x000ab26f,
+ 0x000ab46f, 0x000ab66f, 0x000ab86f, 0x000aba6f,
+ 0x000abc6f, 0x000abe6f, 0x000ac06f, 0x000ac26f,
+ 0x000ac46f, 0x000ac66f, 0x000ac86f, 0x000aca6f,
+ 0x000acc6f, 0x000ace6f, 0x000ad06f, 0x000ad26f,
+ 0x000ad46f, 0x000ad66f, 0x000ad86f, 0x000ada6f,
+ 0x000adc6f, 0x000ade6f, 0x000ae06f, 0x000ae26f,
+ 0x000ae46f, 0x000ae66f, 0x000ae86f, 0x000aea6f,
+ 0x000aec6f, 0x000aee6f, 0x000af06f, 0x000af26f,
+ 0x000af46f, 0x000af66f, 0x000af86f, 0x000afa6f,
+ 0x000afc6f, 0x000afe6f, 0x000b006f, 0x000b026f,
+ 0x000b046f, 0x000b066f, 0x000b086f, 0x000b0a6f,
+ 0x000b0c6f, 0x000b0e6f, 0x000b106f, 0x000b126f,
+ 0x000b146f, 0x000b166f, 0x000b186f, 0x000b1a6f,
+ 0x000b1c6f, 0x000b1e6f, 0x000b206f, 0x000b226f,
+ 0x000b246f, 0x000b266f, 0x000b286f, 0x000b2a6f,
+ 0x000b2c6f, 0x000b2e6f, 0x000b306f, 0x000b326f,
+ 0x000b346f, 0x000b366f, 0x000b386f, 0x000b3a6f,
+ 0x000b3c6f, 0x000b3e6f, 0x000b406f, 0x000b426f,
+ 0x000b446f, 0x000b466f, 0x000b486f, 0x000b4a6f,
+ 0x000b4c6f, 0x000b4e6f, 0x000b506f, 0x000b526f,
+ 0x000b546f, 0x000b566f, 0x000b586f, 0x000b5a6f,
+ 0x000b5c6f, 0x000b5e6f, 0x000b606f, 0x000b626f,
+ 0x000b646f, 0x000b666f, 0x000b686f, 0x000b6a6f,
+ 0x000b6c6f, 0x000b6e6f, 0x000b706f, 0x000b726f,
+ 0x000b746f, 0x000b766f, 0x000b786f, 0x000b7a6f,
+ 0x000b7c6f, 0x000b7e6f, 0x000b806f, 0x000b826f,
+ 0x000b846f, 0x000b866f, 0x000b886f, 0x000b8a6f,
+ 0x000b8c6f, 0x000b8e6f, 0x000b906f, 0x000b926f,
+ 0x000b946f, 0x000b966f, 0x000b986f, 0x000b9a6f,
+ 0x000b9c6f, 0x000b9e6f, 0x000ba06f, 0x000ba26f,
+ 0x000ba46f, 0x000ba66f, 0x000ba86f, 0x000baa6f,
+ 0x000bac6f, 0x000bae6f, 0x000bb06f, 0x000bb26f,
+ 0x000bb46f, 0x000bb66f, 0x000bb86f, 0x000bba6f,
+ 0x000bbc6f, 0x000bbe6f, 0x000bc06f, 0x000bc26f,
+ 0x000bc46f, 0x000bc66f, 0x000bc86f, 0x000bca6f,
+ 0x000bcc6f, 0x000bce6f, 0x000bd06f, 0x000bd26f,
+ 0x000bd46f, 0x000bd66f, 0x000bd86f, 0x000bda6f,
+ 0x000bdc6f, 0x000bde6f, 0x000be06f, 0x000be26f,
+ 0x000be46f, 0x000be66f, 0x000be86f, 0x000bea6f,
+ 0x000bec6f, 0x000bee6f, 0x000bf06f, 0x000bf26f,
+ 0x000bf46f, 0x000bf66f, 0x000bf86f, 0x000bfa6f,
+ 0x000bfc6f, 0x000bfe6f, 0x000c006f, 0x000c026f,
+ 0x000c046f, 0x000c066f, 0x000c086f, 0x000c0a6f,
+ 0x000c0c6f, 0x000c0e6f, 0x000c106f, 0x000c126f,
+ 0x000c146f, 0x000c166f, 0x000c186f, 0x000c1a6f,
+ 0x000c1c6f, 0x000c1e6f, 0x000c206f, 0x000c226f,
+ 0x000c246f, 0x000c266f, 0x000c286f, 0x000c2a6f,
+ 0x000c2c6f, 0x000c2e6f, 0x000c306f, 0x000c326f,
+ 0x000c346f, 0x000c366f, 0x000c386f, 0x000c3a6f,
+ 0x000c3c6f, 0x000c3e6f, 0x000c406f, 0x000c426f,
+ 0x000c446f, 0x000c466f, 0x000c486f, 0x000c4a6f,
+ 0x000c4c6f, 0x000c4e6f, 0x000c506f, 0x000c526f,
+ 0x000c546f, 0x000c566f, 0x000c586f, 0x000c5a6f,
+ 0x000c5c6f, 0x000c5e6f, 0x000c606f, 0x000c626f,
+ 0x000c646f, 0x000c666f, 0x000c686f, 0x000c6a6f,
+ 0x000c6c6f, 0x000c6e6f, 0x000c706f, 0x000c726f,
+ 0x000c746f, 0x000c766f, 0x000c786f, 0x000c7a6f,
+ 0x000c7c6f, 0x000c7e6f, 0x000c806f, 0x000c826f,
+ 0x000c846f, 0x000c866f, 0x000c886f, 0x000c8a6f,
+ 0x000c8c6f, 0x000c8e6f, 0x000c906f, 0x000c926f,
+ 0x000c946f, 0x000c966f, 0x000c986f, 0x000c9a6f,
+ 0x000c9c6f, 0x000c9e6f, 0x000ca06f, 0x000ca26f,
+ 0x000ca46f, 0x000ca66f, 0x000ca86f, 0x000caa6f,
+ 0x000cac6f, 0x000cae6f, 0x000cb06f, 0x000cb26f,
+ 0x000cb46f, 0x000cb66f, 0x000cb86f, 0x000cba6f,
+ 0x000cbc6f, 0x000cbe6f, 0x000cc06f, 0x000cc26f,
+ 0x000cc46f, 0x000cc66f, 0x000cc86f, 0x000cca6f,
+ 0x000ccc6f, 0x000cce6f, 0x000cd06f, 0x000cd26f,
+ 0x000cd46f, 0x000cd66f, 0x000cd86f, 0x000cda6f,
+ 0x000cdc6f, 0x000cde6f, 0x000ce06f, 0x000ce26f,
+ 0x000ce46f, 0x000ce66f, 0x000ce86f, 0x000cea6f,
+ 0x000cec6f, 0x000cee6f, 0x000cf06f, 0x000cf26f,
+ 0x000cf46f, 0x000cf66f, 0x000cf86f, 0x000cfa6f,
+ 0x000cfc6f, 0x000cfe6f, 0x000d006f, 0x000d026f,
+ 0x000d046f, 0x000d066f, 0x000d086f, 0x000d0a6f,
+ 0x000d0c6f, 0x000d0e6f, 0x000d106f, 0x000d126f,
+ 0x000d146f, 0x000d166f, 0x000d186f, 0x000d1a6f,
+ 0x000d1c6f, 0x000d1e6f, 0x000d206f, 0x000d226f,
+ 0x000d246f, 0x000d266f, 0x000d286f, 0x000d2a6f,
+ 0x000d2c6f, 0x000d2e6f, 0x000d306f, 0x000d326f,
+ 0x000d346f, 0x000d366f, 0x000d386f, 0x000d3a6f,
+ 0x000d3c6f, 0x000d3e6f, 0x000d406f, 0x000d426f,
+ 0x000d446f, 0x000d466f, 0x000d486f, 0x000d4a6f,
+ 0x000d4c6f, 0x000d4e6f, 0x000d506f, 0x000d526f,
+ 0x000d546f, 0x000d566f, 0x000d586f, 0x000d5a6f,
+ 0x000d5c6f, 0x000d5e6f, 0x000d606f, 0x000d626f,
+ 0x000d646f, 0x000d666f, 0x000d686f, 0x000d6a6f,
+ 0x000d6c6f, 0x000d6e6f, 0x000d706f, 0x000d726f,
+ 0x000d746f, 0x000d766f, 0x000d786f, 0x000d7a6f,
+ 0x000d7c6f, 0x000d7e6f, 0x000d806f, 0x000d826f,
+ 0x000d846f, 0x000d866f, 0x000d886f, 0x000d8a6f,
+ 0x000d8c6f, 0x000d8e6f, 0x000d906f, 0x000d926f,
+ 0x000d946f, 0x000d966f, 0x000d986f, 0x000d9a6f,
+ 0x000d9c6f, 0x000d9e6f, 0x000da06f, 0x000da26f,
+ 0x000da46f, 0x000da66f, 0x000da86f, 0x000daa6f,
+ 0x000dac6f, 0x000dae6f, 0x000db06f, 0x000db26f,
+ 0x000db46f, 0x000db66f, 0x000db86f, 0x000dba6f,
+ 0x000dbc6f, 0x000dbe6f, 0x000dc06f, 0x000dc26f,
+ 0x000dc46f, 0x000dc66f, 0x000dc86f, 0x000dca6f,
+ 0x000dcc6f, 0x000dce6f, 0x000dd06f, 0x000dd26f,
+ 0x000dd46f, 0x000dd66f, 0x000dd86f, 0x000dda6f,
+ 0x000ddc6f, 0x000dde6f, 0x000de06f, 0x000de26f,
+ 0x000de46f, 0x000de66f, 0x000de86f, 0x000dea6f,
+ 0x000dec6f, 0x000dee6f, 0x000df06f, 0x000df26f,
+ 0x000df46f, 0x000df66f, 0x000df86f, 0x000dfa6f,
+ 0x000dfc6f, 0x000dfe6f, 0x000e006f, 0x000e026f,
+ 0x000e046f, 0x000e066f, 0x000e086f, 0x000e0a6f,
+ 0x000e0c6f, 0x000e0e6f, 0x000e106f, 0x000e126f,
+ 0x000e146f, 0x000e166f, 0x000e186f, 0x000e1a6f,
+ 0x000e1c6f, 0x000e1e6f, 0x000e206f, 0x000e226f,
+ 0x000e246f, 0x000e266f, 0x000e286f, 0x000e2a6f,
+ 0x000e2c6f, 0x000e2e6f, 0x000e306f, 0x000e326f,
+ 0x000e346f, 0x000e366f, 0x000e386f, 0x000e3a6f,
+ 0x000e3c6f, 0x000e3e6f, 0x000e406f, 0x000e426f,
+ 0x000e446f, 0x000e466f, 0x000e486f, 0x000e4a6f,
+ 0x000e4c6f, 0x000e4e6f, 0x000e506f, 0x000e526f,
+ 0x000e546f, 0x000e566f, 0x000e586f, 0x000e5a6f,
+ 0x000e5c6f, 0x000e5e6f, 0x000e606f, 0x000e626f,
+ 0x000e646f, 0x000e666f, 0x000e686f, 0x000e6a6f,
+ 0x000e6c6f, 0x000e6e6f, 0x000e706f, 0x000e726f,
+ 0x000e746f, 0x000e766f, 0x000e786f, 0x000e7a6f,
+ 0x000e7c6f, 0x000e7e6f, 0x000e806f, 0x000e826f,
+ 0x000e846f, 0x000e866f, 0x000e886f, 0x000e8a6f,
+ 0x000e8c6f, 0x000e8e6f, 0x000e906f, 0x000e926f,
+ 0x000e946f, 0x000e966f, 0x000e986f, 0x000e9a6f,
+ 0x000e9c6f, 0x000e9e6f, 0x000ea06f, 0x000ea26f,
+ 0x000ea46f, 0x000ea66f, 0x000ea86f, 0x000eaa6f,
+ 0x000eac6f, 0x000eae6f, 0x000eb06f, 0x000eb26f,
+ 0x000eb46f, 0x000eb66f, 0x000eb86f, 0x000eba6f,
+ 0x000ebc6f, 0x000ebe6f, 0x000ec06f, 0x000ec26f,
+ 0x000ec46f, 0x000ec66f, 0x000ec86f, 0x000eca6f,
+ 0x000ecc6f, 0x000ece6f, 0x000ed06f, 0x000ed26f,
+ 0x000ed46f, 0x000ed66f, 0x000ed86f, 0x000eda6f,
+ 0x000edc6f, 0x000ede6f, 0x000ee06f, 0x000ee26f,
+ 0x000ee46f, 0x000ee66f, 0x000ee86f, 0x000eea6f,
+ 0x000eec6f, 0x000eee6f, 0x000ef06f, 0x000ef26f,
+ 0x000ef46f, 0x000ef66f, 0x000ef86f, 0x000efa6f,
+ 0x000efc6f, 0x000efe6f, 0x000f006f, 0x000f026f,
+ 0x000f046f, 0x000f066f, 0x000f086f, 0x000f0a6f,
+ 0x000f0c6f, 0x000f0e6f, 0x000f106f, 0x000f126f,
+ 0x000f146f, 0x000f166f, 0x000f186f, 0x000f1a6f,
+ 0x000f1c6f, 0x000f1e6f, 0x000f206f, 0x000f226f,
+ 0x000f246f, 0x000f266f, 0x000f286f, 0x000f2a6f,
+ 0x000f2c6f, 0x000f2e6f, 0x000f306f, 0x000f326f,
+ 0x000f346f, 0x000f366f, 0x000f386f, 0x000f3a6f,
+ 0x000f3c6f, 0x000f3e6f, 0x000f406f, 0x000f426f,
+ 0x000f446f, 0x000f466f, 0x000f486f, 0x000f4a6f,
+ 0x000f4c6f, 0x000f4e6f, 0x000f506f, 0x000f526f,
+ 0x000f546f, 0x000f566f, 0x000f586f, 0x000f5a6f,
+ 0x000f5c6f, 0x000f5e6f, 0x000f606f, 0x000f626f,
+ 0x000f646f, 0x000f666f, 0x000f686f, 0x000f6a6f,
+ 0x000f6c6f, 0x000f6e6f, 0x000f706f, 0x000f726f,
+ 0x000f746f, 0x000f766f, 0x000f786f, 0x000f7a6f,
+ 0x000f7c6f, 0x000f7e6f, 0x000f806f, 0x000f826f,
+ 0x000f846f, 0x000f866f, 0x000f886f, 0x000f8a6f,
+ 0x000f8c6f, 0x000f8e6f, 0x000f906f, 0x000f926f,
+ 0x000f946f, 0x000f966f, 0x000f986f, 0x000f9a6f,
+ 0x000f9c6f, 0x000f9e6f, 0x000fa06f, 0x000fa26f,
+ 0x000fa46f, 0x000fa66f, 0x000fa86f, 0x000faa6f,
+ 0x000fac6f, 0x000fae6f, 0x000fb06f, 0x000fb26f,
+ 0x000fb46f, 0x000fb66f, 0x000fb86f, 0x000fba6f,
+ 0x000fbc6f, 0x000fbe6f, 0x000fc06f, 0x000fc26f,
+ 0x000fc46f, 0x000fc66f, 0x000fc86f, 0x000fca6f,
+ 0x000fcc6f, 0x000fce6f, 0x000fd06f, 0x000fd26f,
+ 0x000fd46f, 0x000fd66f, 0x000fd86f, 0x000fda6f,
+ 0x000fdc6f, 0x000fde6f, 0x000fe06f, 0x000fe26f,
+ 0x000fe46f, 0x000fe66f, 0x000fe86f, 0x000fea6f,
+ 0x000fec6f, 0x000fee6f, 0x000ff06f, 0x000ff26f,
+ 0x000ff46f, 0x000ff66f, 0x000ff86f, 0x000ffa6f,
+ 0x000ffc6f, 0x000ffe6f
+#endif /* LONGER_HUFFTABLE */
+ },
+
+ .len_table = {
+ 0x000bffef, 0x00000003, 0x00000084, 0x00000145,
+ 0x00000345, 0x00000626, 0x000002a7, 0x00000aa7,
+ 0x000000c6, 0x000004c6, 0x00001469, 0x00003469,
+ 0x00000c69, 0x00002c69, 0x00001c69, 0x00003c69,
+ 0x0000026a, 0x0000226a, 0x0000426a, 0x0000626a,
+ 0x000008eb, 0x000048eb, 0x000088eb, 0x0000c8eb,
+ 0x000029ec, 0x0000a9ec, 0x000129ec, 0x0001a9ec,
+ 0x000069ec, 0x0000e9ec, 0x000169ec, 0x0001e9ec,
+ 0x000019ed, 0x000099ed, 0x000119ed, 0x000199ed,
+ 0x000219ed, 0x000299ed, 0x000319ed, 0x000399ed,
+ 0x000059ed, 0x0000d9ed, 0x000159ed, 0x0001d9ed,
+ 0x000259ed, 0x0002d9ed, 0x000359ed, 0x0003d9ed,
+ 0x000039ed, 0x0000b9ed, 0x000139ed, 0x0001b9ed,
+ 0x000239ed, 0x0002b9ed, 0x000339ed, 0x0003b9ed,
+ 0x000079ed, 0x0000f9ed, 0x000179ed, 0x0001f9ed,
+ 0x000279ed, 0x0002f9ed, 0x000379ed, 0x0003f9ed,
+ 0x00003fef, 0x00013fef, 0x00023fef, 0x00033fef,
+ 0x00043fef, 0x00053fef, 0x00063fef, 0x00073fef,
+ 0x00083fef, 0x00093fef, 0x000a3fef, 0x000b3fef,
+ 0x000c3fef, 0x000d3fef, 0x000e3fef, 0x000f3fef,
+ 0x00007ff0, 0x00027ff0, 0x00047ff0, 0x00067ff0,
+ 0x00087ff0, 0x000a7ff0, 0x000c7ff0, 0x000e7ff0,
+ 0x00107ff0, 0x00127ff0, 0x00147ff0, 0x00167ff0,
+ 0x00187ff0, 0x001a7ff0, 0x001c7ff0, 0x001e7ff0,
+ 0x0000fff1, 0x0004fff1, 0x0008fff1, 0x000cfff1,
+ 0x0010fff1, 0x0014fff1, 0x0018fff1, 0x001cfff1,
+ 0x0020fff1, 0x0024fff1, 0x0028fff1, 0x002cfff1,
+ 0x0030fff1, 0x0034fff1, 0x0038fff1, 0x003cfff1,
+ 0x0002fff1, 0x0006fff1, 0x000afff1, 0x000efff1,
+ 0x0012fff1, 0x0016fff1, 0x001afff1, 0x001efff1,
+ 0x0022fff1, 0x0026fff1, 0x002afff1, 0x002efff1,
+ 0x0032fff1, 0x0036fff1, 0x003afff1, 0x003efff1,
+ 0x00017ff1, 0x00037ff1, 0x00057ff1, 0x00077ff1,
+ 0x00097ff1, 0x000b7ff1, 0x000d7ff1, 0x000f7ff1,
+ 0x00117ff1, 0x00137ff1, 0x00157ff1, 0x00177ff1,
+ 0x00197ff1, 0x001b7ff1, 0x001d7ff1, 0x001f7ff1,
+ 0x00217ff1, 0x00237ff1, 0x00257ff1, 0x00277ff1,
+ 0x00297ff1, 0x002b7ff1, 0x002d7ff1, 0x002f7ff1,
+ 0x00317ff1, 0x00337ff1, 0x00357ff1, 0x00377ff1,
+ 0x00397ff1, 0x003b7ff1, 0x003d7ff1, 0x003f7ff1,
+ 0x0001fff2, 0x0005fff2, 0x0009fff2, 0x000dfff2,
+ 0x0011fff2, 0x0015fff2, 0x0019fff2, 0x001dfff2,
+ 0x0021fff2, 0x0025fff2, 0x0029fff2, 0x002dfff2,
+ 0x0031fff2, 0x0035fff2, 0x0039fff2, 0x003dfff2,
+ 0x0041fff2, 0x0045fff2, 0x0049fff2, 0x004dfff2,
+ 0x0051fff2, 0x0055fff2, 0x0059fff2, 0x005dfff2,
+ 0x0061fff2, 0x0065fff2, 0x0069fff2, 0x006dfff2,
+ 0x0071fff2, 0x0075fff2, 0x0079fff2, 0x007dfff2,
+ 0x0007fff4, 0x0017fff4, 0x0027fff4, 0x0037fff4,
+ 0x0047fff4, 0x0057fff4, 0x0067fff4, 0x0077fff4,
+ 0x0087fff4, 0x0097fff4, 0x00a7fff4, 0x00b7fff4,
+ 0x00c7fff4, 0x00d7fff4, 0x00e7fff4, 0x00f7fff4,
+ 0x0107fff4, 0x0117fff4, 0x0127fff4, 0x0137fff4,
+ 0x0147fff4, 0x0157fff4, 0x0167fff4, 0x0177fff4,
+ 0x0187fff4, 0x0197fff4, 0x01a7fff4, 0x01b7fff4,
+ 0x01c7fff4, 0x01d7fff4, 0x01e7fff4, 0x01f7fff4,
+ 0x000ffff4, 0x001ffff4, 0x002ffff4, 0x003ffff4,
+ 0x004ffff4, 0x005ffff4, 0x006ffff4, 0x007ffff4,
+ 0x008ffff4, 0x009ffff4, 0x00affff4, 0x00bffff4,
+ 0x00cffff4, 0x00dffff4, 0x00effff4, 0x00fffff4,
+ 0x010ffff4, 0x011ffff4, 0x012ffff4, 0x013ffff4,
+ 0x014ffff4, 0x015ffff4, 0x016ffff4, 0x017ffff4,
+ 0x018ffff4, 0x019ffff4, 0x01affff4, 0x01bffff4,
+ 0x01cffff4, 0x01dffff4, 0x01effff4, 0x0000bfeb},
+
+ .lit_table = {
+ 0x000c, 0x0035, 0x0093, 0x00b5, 0x0075, 0x00f5, 0x0193, 0x0053,
+ 0x0153, 0x000d, 0x0009, 0x00d3, 0x01d3, 0x008d, 0x0033, 0x0133,
+ 0x00b3, 0x0147, 0x0347, 0x00c7, 0x02c7, 0x01c7, 0x03c7, 0x0027,
+ 0x0227, 0x002f, 0x042f, 0x022f, 0x0127, 0x062f, 0x01b3, 0x0073,
+ 0x001c, 0x0327, 0x0173, 0x00a7, 0x00f3, 0x02a7, 0x01a7, 0x01f3,
+ 0x004d, 0x000b, 0x03a7, 0x0067, 0x0049, 0x00cd, 0x0029, 0x0267,
+ 0x002d, 0x00ad, 0x006d, 0x00ed, 0x001d, 0x009d, 0x010b, 0x008b,
+ 0x005d, 0x018b, 0x004b, 0x014b, 0x00cb, 0x0167, 0x01cb, 0x002b,
+ 0x00dd, 0x003d, 0x00bd, 0x007d, 0x012b, 0x00ab, 0x01ab, 0x006b,
+ 0x016b, 0x00fd, 0x00eb, 0x0367, 0x01eb, 0x001b, 0x011b, 0x009b,
+ 0x0003, 0x00e7, 0x019b, 0x0083, 0x005b, 0x015b, 0x02e7, 0x00db,
+ 0x01e7, 0x03e7, 0x0017, 0x0217, 0x0117, 0x0317, 0x0097, 0x0297,
+ 0x01db, 0x0002, 0x0069, 0x0019, 0x0016, 0x0012, 0x0059, 0x0039,
+ 0x0079, 0x0036, 0x003b, 0x0043, 0x000e, 0x0005, 0x002e, 0x001e,
+ 0x0045, 0x0197, 0x003e, 0x0001, 0x0021, 0x0011, 0x00c3, 0x0025,
+ 0x013b, 0x0065, 0x00bb, 0x012f, 0x0397, 0x0057, 0x0257, 0x0157,
+ 0x01bb, 0x052f, 0x032f, 0x0357, 0x00d7, 0x072f, 0x00af, 0x02d7,
+ 0x01d7, 0x04af, 0x02af, 0x03d7, 0x06af, 0x01af, 0x05af, 0x0037,
+ 0x0237, 0x03af, 0x07af, 0x006f, 0x046f, 0x026f, 0x066f, 0x016f,
+ 0x056f, 0x036f, 0x076f, 0x00ef, 0x04ef, 0x02ef, 0x06ef, 0x01ef,
+ 0x0137, 0x05ef, 0x03ef, 0x07ef, 0x0337, 0x001f, 0x00b7, 0x041f,
+ 0x02b7, 0x021f, 0x061f, 0x011f, 0x051f, 0x031f, 0x071f, 0x009f,
+ 0x01b7, 0x049f, 0x029f, 0x069f, 0x03b7, 0x019f, 0x059f, 0x039f,
+ 0x079f, 0x005f, 0x045f, 0x025f, 0x065f, 0x0077, 0x015f, 0x0277,
+ 0x007b, 0x0177, 0x017b, 0x00fb, 0x055f, 0x035f, 0x075f, 0x0377,
+ 0x00f7, 0x00df, 0x04df, 0x02df, 0x06df, 0x01df, 0x05df, 0x02f7,
+ 0x01f7, 0x03df, 0x07df, 0x003f, 0x043f, 0x023f, 0x063f, 0x013f,
+ 0x053f, 0x033f, 0x073f, 0x00bf, 0x04bf, 0x02bf, 0x06bf, 0x01bf,
+ 0x01fb, 0x03f7, 0x05bf, 0x000f, 0x020f, 0x03bf, 0x07bf, 0x010f,
+ 0x030f, 0x007f, 0x047f, 0x027f, 0x067f, 0x017f, 0x057f, 0x008f,
+ 0x0007, 0x028f, 0x037f, 0x018f, 0x038f, 0x077f, 0x00ff, 0x04ff,
+ 0x0107, 0x004f, 0x02ff, 0x06ff, 0x0087, 0x024f, 0x0187, 0x0023,
+ 0x1fff},
+
+ .lit_table_sizes = {
+ 0x05, 0x08, 0x09, 0x08, 0x08, 0x08, 0x09, 0x09,
+ 0x09, 0x08, 0x07, 0x09, 0x09, 0x08, 0x09, 0x09,
+ 0x09, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a,
+ 0x0a, 0x0b, 0x0b, 0x0b, 0x0a, 0x0b, 0x09, 0x09,
+ 0x05, 0x0a, 0x09, 0x0a, 0x09, 0x0a, 0x0a, 0x09,
+ 0x08, 0x09, 0x0a, 0x0a, 0x07, 0x08, 0x07, 0x0a,
+ 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x09, 0x09,
+ 0x08, 0x09, 0x09, 0x09, 0x09, 0x0a, 0x09, 0x09,
+ 0x08, 0x08, 0x08, 0x08, 0x09, 0x09, 0x09, 0x09,
+ 0x09, 0x08, 0x09, 0x0a, 0x09, 0x09, 0x09, 0x09,
+ 0x08, 0x0a, 0x09, 0x08, 0x09, 0x09, 0x0a, 0x09,
+ 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a,
+ 0x09, 0x05, 0x07, 0x07, 0x06, 0x05, 0x07, 0x07,
+ 0x07, 0x06, 0x09, 0x08, 0x06, 0x07, 0x06, 0x06,
+ 0x07, 0x0a, 0x06, 0x06, 0x06, 0x06, 0x08, 0x07,
+ 0x09, 0x07, 0x09, 0x0b, 0x0a, 0x0a, 0x0a, 0x0a,
+ 0x09, 0x0b, 0x0b, 0x0a, 0x0a, 0x0b, 0x0b, 0x0a,
+ 0x0a, 0x0b, 0x0b, 0x0a, 0x0b, 0x0b, 0x0b, 0x0a,
+ 0x0a, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b,
+ 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b,
+ 0x0a, 0x0b, 0x0b, 0x0b, 0x0a, 0x0b, 0x0a, 0x0b,
+ 0x0a, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b,
+ 0x0a, 0x0b, 0x0b, 0x0b, 0x0a, 0x0b, 0x0b, 0x0b,
+ 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0a, 0x0b, 0x0a,
+ 0x09, 0x0a, 0x09, 0x09, 0x0b, 0x0b, 0x0b, 0x0a,
+ 0x0a, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0a,
+ 0x0a, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b,
+ 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b,
+ 0x09, 0x0a, 0x0b, 0x0a, 0x0a, 0x0b, 0x0b, 0x0a,
+ 0x0a, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0a,
+ 0x09, 0x0a, 0x0b, 0x0a, 0x0a, 0x0b, 0x0b, 0x0b,
+ 0x09, 0x0a, 0x0b, 0x0b, 0x09, 0x0a, 0x09, 0x08,
+ 0x0f},
+
+#ifndef LONGER_HUFFTABLE
+ .dcodes = {
+ 0x007f, 0x01ff, 0x017f, 0x03ff, 0x00ff, 0x003f, 0x00bf, 0x000f,
+ 0x002f, 0x001f, 0x000b, 0x001b, 0x0004, 0x0007, 0x000c, 0x0002,
+ 0x000a, 0x0006, 0x000e, 0x0001, 0x0009, 0x0017, 0x0000, 0x0005,
+ 0x000d, 0x0003, 0x0000, 0x0000, 0x0000, 0x0000},
+
+ .dcodes_sizes = {
+ 0x09, 0x0a, 0x09, 0x0a, 0x09, 0x08, 0x08, 0x06,
+ 0x06, 0x06, 0x05, 0x05, 0x04, 0x05, 0x04, 0x04,
+ 0x04, 0x04, 0x04, 0x04, 0x04, 0x05, 0x03, 0x04,
+ 0x04, 0x04, 0x00, 0x00, 0x00, 0x00}
+#else
+ .dcodes = {
+ 0x0000, 0x0000, 0x0000, 0x0000},
+
+ .dcodes_sizes = {
+ 0x00, 0x00, 0x00, 0x00}
+#endif
+};
+
+#else // LARGE_WINDOW
+
+const uint8_t gzip_hdr[] = {
+ 0x1f, 0x8b, 0x08, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0xff
+};
+
+const uint32_t gzip_hdr_bytes = 10;
+const uint32_t gzip_trl_bytes = 8;
+
+const uint8_t zlib_hdr[] = { 0x78, 0x01 };
+
+const uint32_t zlib_hdr_bytes = 2;
+const uint32_t zlib_trl_bytes = 4;
+
+struct isal_hufftables hufftables_default = {
+
+ .deflate_hdr = {
+ 0xed, 0xfd, 0x09, 0x80, 0x1c, 0x45, 0xf9, 0xbf,
+ 0x81, 0xf7, 0x66, 0x37, 0xd7, 0x24, 0x9b, 0x04,
+ 0x40, 0x45, 0x45, 0x52, 0x04, 0x20, 0x09, 0xcc,
+ 0x2e, 0xbb, 0x9b, 0x3b, 0x81, 0x24, 0xbb, 0xb9,
+ 0x21, 0x17, 0x49, 0xb8, 0x04, 0x85, 0xde, 0x99,
+ 0xde, 0x9d, 0x26, 0x33, 0xd3, 0x43, 0xf7, 0x4c,
+ 0x36, 0x8b, 0x08, 0xf1, 0x56, 0x51, 0xc1, 0xfb,
+ 0x56, 0x54, 0xc4, 0x5b, 0x51, 0xf1, 0x16, 0x0d,
+ 0x89, 0x8a, 0x37, 0x78, 0xdf, 0x1a, 0x45, 0x05,
+ 0xef, 0x20, 0xaa, 0xa0, 0x90, 0xfd, 0xff, 0x9f,
+ 0xf7, 0xad, 0x9e, 0xa9, 0xdd, 0xdd, 0x70, 0xfa,
+ 0xfd, 0xfd, 0xbe, 0xbf, 0xff, 0xdf, 0xcd, 0x66,
+ 0x67, 0xba, 0xbb, 0xaa, 0xea, 0x7d, 0xdf, 0x7a,
+ 0xeb, 0xad, 0x4f, 0xbd, 0xf5, 0x56, 0x35},
+
+ .deflate_hdr_count = 110,
+ .deflate_hdr_extra_bits = 6,
+
+ .dist_table = {
+ 0x000007e8, 0x00001fe9,
+#ifdef LONGER_HUFFTABLE
+ 0x000017e8, 0x00003fe9, 0x00000fe9, 0x00002fe9,
+ 0x000003e8, 0x000013e8, 0x00000be9, 0x00001be9,
+ 0x00002be9, 0x00003be9, 0x000002e8, 0x00000ae8,
+ 0x000012e8, 0x00001ae8, 0x000006e9, 0x00000ee9,
+ 0x000016e9, 0x00001ee9, 0x000026e9, 0x00002ee9,
+ 0x000036e9, 0x00003ee9, 0x000001e9, 0x000009e9,
+ 0x000011e9, 0x000019e9, 0x000021e9, 0x000029e9,
+ 0x000031e9, 0x000039e9, 0x00000129, 0x00000529,
+ 0x00000929, 0x00000d29, 0x00001129, 0x00001529,
+ 0x00001929, 0x00001d29, 0x00002129, 0x00002529,
+ 0x00002929, 0x00002d29, 0x00003129, 0x00003529,
+ 0x00003929, 0x00003d29, 0x00000329, 0x00000729,
+ 0x00000b29, 0x00000f29, 0x00001329, 0x00001729,
+ 0x00001b29, 0x00001f29, 0x00002329, 0x00002729,
+ 0x00002b29, 0x00002f29, 0x00003329, 0x00003729,
+ 0x00003b29, 0x00003f29, 0x000000aa, 0x000004aa,
+ 0x000008aa, 0x00000caa, 0x000010aa, 0x000014aa,
+ 0x000018aa, 0x00001caa, 0x000020aa, 0x000024aa,
+ 0x000028aa, 0x00002caa, 0x000030aa, 0x000034aa,
+ 0x000038aa, 0x00003caa, 0x000040aa, 0x000044aa,
+ 0x000048aa, 0x00004caa, 0x000050aa, 0x000054aa,
+ 0x000058aa, 0x00005caa, 0x000060aa, 0x000064aa,
+ 0x000068aa, 0x00006caa, 0x000070aa, 0x000074aa,
+ 0x000078aa, 0x00007caa, 0x000002aa, 0x000006aa,
+ 0x00000aaa, 0x00000eaa, 0x000012aa, 0x000016aa,
+ 0x00001aaa, 0x00001eaa, 0x000022aa, 0x000026aa,
+ 0x00002aaa, 0x00002eaa, 0x000032aa, 0x000036aa,
+ 0x00003aaa, 0x00003eaa, 0x000042aa, 0x000046aa,
+ 0x00004aaa, 0x00004eaa, 0x000052aa, 0x000056aa,
+ 0x00005aaa, 0x00005eaa, 0x000062aa, 0x000066aa,
+ 0x00006aaa, 0x00006eaa, 0x000072aa, 0x000076aa,
+ 0x00007aaa, 0x00007eaa, 0x0000008a, 0x0000028a,
+ 0x0000048a, 0x0000068a, 0x0000088a, 0x00000a8a,
+ 0x00000c8a, 0x00000e8a, 0x0000108a, 0x0000128a,
+ 0x0000148a, 0x0000168a, 0x0000188a, 0x00001a8a,
+ 0x00001c8a, 0x00001e8a, 0x0000208a, 0x0000228a,
+ 0x0000248a, 0x0000268a, 0x0000288a, 0x00002a8a,
+ 0x00002c8a, 0x00002e8a, 0x0000308a, 0x0000328a,
+ 0x0000348a, 0x0000368a, 0x0000388a, 0x00003a8a,
+ 0x00003c8a, 0x00003e8a, 0x0000408a, 0x0000428a,
+ 0x0000448a, 0x0000468a, 0x0000488a, 0x00004a8a,
+ 0x00004c8a, 0x00004e8a, 0x0000508a, 0x0000528a,
+ 0x0000548a, 0x0000568a, 0x0000588a, 0x00005a8a,
+ 0x00005c8a, 0x00005e8a, 0x0000608a, 0x0000628a,
+ 0x0000648a, 0x0000668a, 0x0000688a, 0x00006a8a,
+ 0x00006c8a, 0x00006e8a, 0x0000708a, 0x0000728a,
+ 0x0000748a, 0x0000768a, 0x0000788a, 0x00007a8a,
+ 0x00007c8a, 0x00007e8a, 0x0000018a, 0x0000038a,
+ 0x0000058a, 0x0000078a, 0x0000098a, 0x00000b8a,
+ 0x00000d8a, 0x00000f8a, 0x0000118a, 0x0000138a,
+ 0x0000158a, 0x0000178a, 0x0000198a, 0x00001b8a,
+ 0x00001d8a, 0x00001f8a, 0x0000218a, 0x0000238a,
+ 0x0000258a, 0x0000278a, 0x0000298a, 0x00002b8a,
+ 0x00002d8a, 0x00002f8a, 0x0000318a, 0x0000338a,
+ 0x0000358a, 0x0000378a, 0x0000398a, 0x00003b8a,
+ 0x00003d8a, 0x00003f8a, 0x0000418a, 0x0000438a,
+ 0x0000458a, 0x0000478a, 0x0000498a, 0x00004b8a,
+ 0x00004d8a, 0x00004f8a, 0x0000518a, 0x0000538a,
+ 0x0000558a, 0x0000578a, 0x0000598a, 0x00005b8a,
+ 0x00005d8a, 0x00005f8a, 0x0000618a, 0x0000638a,
+ 0x0000658a, 0x0000678a, 0x0000698a, 0x00006b8a,
+ 0x00006d8a, 0x00006f8a, 0x0000718a, 0x0000738a,
+ 0x0000758a, 0x0000778a, 0x0000798a, 0x00007b8a,
+ 0x00007d8a, 0x00007f8a, 0x0000004b, 0x0000024b,
+ 0x0000044b, 0x0000064b, 0x0000084b, 0x00000a4b,
+ 0x00000c4b, 0x00000e4b, 0x0000104b, 0x0000124b,
+ 0x0000144b, 0x0000164b, 0x0000184b, 0x00001a4b,
+ 0x00001c4b, 0x00001e4b, 0x0000204b, 0x0000224b,
+ 0x0000244b, 0x0000264b, 0x0000284b, 0x00002a4b,
+ 0x00002c4b, 0x00002e4b, 0x0000304b, 0x0000324b,
+ 0x0000344b, 0x0000364b, 0x0000384b, 0x00003a4b,
+ 0x00003c4b, 0x00003e4b, 0x0000404b, 0x0000424b,
+ 0x0000444b, 0x0000464b, 0x0000484b, 0x00004a4b,
+ 0x00004c4b, 0x00004e4b, 0x0000504b, 0x0000524b,
+ 0x0000544b, 0x0000564b, 0x0000584b, 0x00005a4b,
+ 0x00005c4b, 0x00005e4b, 0x0000604b, 0x0000624b,
+ 0x0000644b, 0x0000664b, 0x0000684b, 0x00006a4b,
+ 0x00006c4b, 0x00006e4b, 0x0000704b, 0x0000724b,
+ 0x0000744b, 0x0000764b, 0x0000784b, 0x00007a4b,
+ 0x00007c4b, 0x00007e4b, 0x0000804b, 0x0000824b,
+ 0x0000844b, 0x0000864b, 0x0000884b, 0x00008a4b,
+ 0x00008c4b, 0x00008e4b, 0x0000904b, 0x0000924b,
+ 0x0000944b, 0x0000964b, 0x0000984b, 0x00009a4b,
+ 0x00009c4b, 0x00009e4b, 0x0000a04b, 0x0000a24b,
+ 0x0000a44b, 0x0000a64b, 0x0000a84b, 0x0000aa4b,
+ 0x0000ac4b, 0x0000ae4b, 0x0000b04b, 0x0000b24b,
+ 0x0000b44b, 0x0000b64b, 0x0000b84b, 0x0000ba4b,
+ 0x0000bc4b, 0x0000be4b, 0x0000c04b, 0x0000c24b,
+ 0x0000c44b, 0x0000c64b, 0x0000c84b, 0x0000ca4b,
+ 0x0000cc4b, 0x0000ce4b, 0x0000d04b, 0x0000d24b,
+ 0x0000d44b, 0x0000d64b, 0x0000d84b, 0x0000da4b,
+ 0x0000dc4b, 0x0000de4b, 0x0000e04b, 0x0000e24b,
+ 0x0000e44b, 0x0000e64b, 0x0000e84b, 0x0000ea4b,
+ 0x0000ec4b, 0x0000ee4b, 0x0000f04b, 0x0000f24b,
+ 0x0000f44b, 0x0000f64b, 0x0000f84b, 0x0000fa4b,
+ 0x0000fc4b, 0x0000fe4b, 0x000001ac, 0x000005ac,
+ 0x000009ac, 0x00000dac, 0x000011ac, 0x000015ac,
+ 0x000019ac, 0x00001dac, 0x000021ac, 0x000025ac,
+ 0x000029ac, 0x00002dac, 0x000031ac, 0x000035ac,
+ 0x000039ac, 0x00003dac, 0x000041ac, 0x000045ac,
+ 0x000049ac, 0x00004dac, 0x000051ac, 0x000055ac,
+ 0x000059ac, 0x00005dac, 0x000061ac, 0x000065ac,
+ 0x000069ac, 0x00006dac, 0x000071ac, 0x000075ac,
+ 0x000079ac, 0x00007dac, 0x000081ac, 0x000085ac,
+ 0x000089ac, 0x00008dac, 0x000091ac, 0x000095ac,
+ 0x000099ac, 0x00009dac, 0x0000a1ac, 0x0000a5ac,
+ 0x0000a9ac, 0x0000adac, 0x0000b1ac, 0x0000b5ac,
+ 0x0000b9ac, 0x0000bdac, 0x0000c1ac, 0x0000c5ac,
+ 0x0000c9ac, 0x0000cdac, 0x0000d1ac, 0x0000d5ac,
+ 0x0000d9ac, 0x0000ddac, 0x0000e1ac, 0x0000e5ac,
+ 0x0000e9ac, 0x0000edac, 0x0000f1ac, 0x0000f5ac,
+ 0x0000f9ac, 0x0000fdac, 0x000101ac, 0x000105ac,
+ 0x000109ac, 0x00010dac, 0x000111ac, 0x000115ac,
+ 0x000119ac, 0x00011dac, 0x000121ac, 0x000125ac,
+ 0x000129ac, 0x00012dac, 0x000131ac, 0x000135ac,
+ 0x000139ac, 0x00013dac, 0x000141ac, 0x000145ac,
+ 0x000149ac, 0x00014dac, 0x000151ac, 0x000155ac,
+ 0x000159ac, 0x00015dac, 0x000161ac, 0x000165ac,
+ 0x000169ac, 0x00016dac, 0x000171ac, 0x000175ac,
+ 0x000179ac, 0x00017dac, 0x000181ac, 0x000185ac,
+ 0x000189ac, 0x00018dac, 0x000191ac, 0x000195ac,
+ 0x000199ac, 0x00019dac, 0x0001a1ac, 0x0001a5ac,
+ 0x0001a9ac, 0x0001adac, 0x0001b1ac, 0x0001b5ac,
+ 0x0001b9ac, 0x0001bdac, 0x0001c1ac, 0x0001c5ac,
+ 0x0001c9ac, 0x0001cdac, 0x0001d1ac, 0x0001d5ac,
+ 0x0001d9ac, 0x0001ddac, 0x0001e1ac, 0x0001e5ac,
+ 0x0001e9ac, 0x0001edac, 0x0001f1ac, 0x0001f5ac,
+ 0x0001f9ac, 0x0001fdac, 0x0000014c, 0x0000034c,
+ 0x0000054c, 0x0000074c, 0x0000094c, 0x00000b4c,
+ 0x00000d4c, 0x00000f4c, 0x0000114c, 0x0000134c,
+ 0x0000154c, 0x0000174c, 0x0000194c, 0x00001b4c,
+ 0x00001d4c, 0x00001f4c, 0x0000214c, 0x0000234c,
+ 0x0000254c, 0x0000274c, 0x0000294c, 0x00002b4c,
+ 0x00002d4c, 0x00002f4c, 0x0000314c, 0x0000334c,
+ 0x0000354c, 0x0000374c, 0x0000394c, 0x00003b4c,
+ 0x00003d4c, 0x00003f4c, 0x0000414c, 0x0000434c,
+ 0x0000454c, 0x0000474c, 0x0000494c, 0x00004b4c,
+ 0x00004d4c, 0x00004f4c, 0x0000514c, 0x0000534c,
+ 0x0000554c, 0x0000574c, 0x0000594c, 0x00005b4c,
+ 0x00005d4c, 0x00005f4c, 0x0000614c, 0x0000634c,
+ 0x0000654c, 0x0000674c, 0x0000694c, 0x00006b4c,
+ 0x00006d4c, 0x00006f4c, 0x0000714c, 0x0000734c,
+ 0x0000754c, 0x0000774c, 0x0000794c, 0x00007b4c,
+ 0x00007d4c, 0x00007f4c, 0x0000814c, 0x0000834c,
+ 0x0000854c, 0x0000874c, 0x0000894c, 0x00008b4c,
+ 0x00008d4c, 0x00008f4c, 0x0000914c, 0x0000934c,
+ 0x0000954c, 0x0000974c, 0x0000994c, 0x00009b4c,
+ 0x00009d4c, 0x00009f4c, 0x0000a14c, 0x0000a34c,
+ 0x0000a54c, 0x0000a74c, 0x0000a94c, 0x0000ab4c,
+ 0x0000ad4c, 0x0000af4c, 0x0000b14c, 0x0000b34c,
+ 0x0000b54c, 0x0000b74c, 0x0000b94c, 0x0000bb4c,
+ 0x0000bd4c, 0x0000bf4c, 0x0000c14c, 0x0000c34c,
+ 0x0000c54c, 0x0000c74c, 0x0000c94c, 0x0000cb4c,
+ 0x0000cd4c, 0x0000cf4c, 0x0000d14c, 0x0000d34c,
+ 0x0000d54c, 0x0000d74c, 0x0000d94c, 0x0000db4c,
+ 0x0000dd4c, 0x0000df4c, 0x0000e14c, 0x0000e34c,
+ 0x0000e54c, 0x0000e74c, 0x0000e94c, 0x0000eb4c,
+ 0x0000ed4c, 0x0000ef4c, 0x0000f14c, 0x0000f34c,
+ 0x0000f54c, 0x0000f74c, 0x0000f94c, 0x0000fb4c,
+ 0x0000fd4c, 0x0000ff4c, 0x0001014c, 0x0001034c,
+ 0x0001054c, 0x0001074c, 0x0001094c, 0x00010b4c,
+ 0x00010d4c, 0x00010f4c, 0x0001114c, 0x0001134c,
+ 0x0001154c, 0x0001174c, 0x0001194c, 0x00011b4c,
+ 0x00011d4c, 0x00011f4c, 0x0001214c, 0x0001234c,
+ 0x0001254c, 0x0001274c, 0x0001294c, 0x00012b4c,
+ 0x00012d4c, 0x00012f4c, 0x0001314c, 0x0001334c,
+ 0x0001354c, 0x0001374c, 0x0001394c, 0x00013b4c,
+ 0x00013d4c, 0x00013f4c, 0x0001414c, 0x0001434c,
+ 0x0001454c, 0x0001474c, 0x0001494c, 0x00014b4c,
+ 0x00014d4c, 0x00014f4c, 0x0001514c, 0x0001534c,
+ 0x0001554c, 0x0001574c, 0x0001594c, 0x00015b4c,
+ 0x00015d4c, 0x00015f4c, 0x0001614c, 0x0001634c,
+ 0x0001654c, 0x0001674c, 0x0001694c, 0x00016b4c,
+ 0x00016d4c, 0x00016f4c, 0x0001714c, 0x0001734c,
+ 0x0001754c, 0x0001774c, 0x0001794c, 0x00017b4c,
+ 0x00017d4c, 0x00017f4c, 0x0001814c, 0x0001834c,
+ 0x0001854c, 0x0001874c, 0x0001894c, 0x00018b4c,
+ 0x00018d4c, 0x00018f4c, 0x0001914c, 0x0001934c,
+ 0x0001954c, 0x0001974c, 0x0001994c, 0x00019b4c,
+ 0x00019d4c, 0x00019f4c, 0x0001a14c, 0x0001a34c,
+ 0x0001a54c, 0x0001a74c, 0x0001a94c, 0x0001ab4c,
+ 0x0001ad4c, 0x0001af4c, 0x0001b14c, 0x0001b34c,
+ 0x0001b54c, 0x0001b74c, 0x0001b94c, 0x0001bb4c,
+ 0x0001bd4c, 0x0001bf4c, 0x0001c14c, 0x0001c34c,
+ 0x0001c54c, 0x0001c74c, 0x0001c94c, 0x0001cb4c,
+ 0x0001cd4c, 0x0001cf4c, 0x0001d14c, 0x0001d34c,
+ 0x0001d54c, 0x0001d74c, 0x0001d94c, 0x0001db4c,
+ 0x0001dd4c, 0x0001df4c, 0x0001e14c, 0x0001e34c,
+ 0x0001e54c, 0x0001e74c, 0x0001e94c, 0x0001eb4c,
+ 0x0001ed4c, 0x0001ef4c, 0x0001f14c, 0x0001f34c,
+ 0x0001f54c, 0x0001f74c, 0x0001f94c, 0x0001fb4c,
+ 0x0001fd4c, 0x0001ff4c, 0x000003ad, 0x000007ad,
+ 0x00000bad, 0x00000fad, 0x000013ad, 0x000017ad,
+ 0x00001bad, 0x00001fad, 0x000023ad, 0x000027ad,
+ 0x00002bad, 0x00002fad, 0x000033ad, 0x000037ad,
+ 0x00003bad, 0x00003fad, 0x000043ad, 0x000047ad,
+ 0x00004bad, 0x00004fad, 0x000053ad, 0x000057ad,
+ 0x00005bad, 0x00005fad, 0x000063ad, 0x000067ad,
+ 0x00006bad, 0x00006fad, 0x000073ad, 0x000077ad,
+ 0x00007bad, 0x00007fad, 0x000083ad, 0x000087ad,
+ 0x00008bad, 0x00008fad, 0x000093ad, 0x000097ad,
+ 0x00009bad, 0x00009fad, 0x0000a3ad, 0x0000a7ad,
+ 0x0000abad, 0x0000afad, 0x0000b3ad, 0x0000b7ad,
+ 0x0000bbad, 0x0000bfad, 0x0000c3ad, 0x0000c7ad,
+ 0x0000cbad, 0x0000cfad, 0x0000d3ad, 0x0000d7ad,
+ 0x0000dbad, 0x0000dfad, 0x0000e3ad, 0x0000e7ad,
+ 0x0000ebad, 0x0000efad, 0x0000f3ad, 0x0000f7ad,
+ 0x0000fbad, 0x0000ffad, 0x000103ad, 0x000107ad,
+ 0x00010bad, 0x00010fad, 0x000113ad, 0x000117ad,
+ 0x00011bad, 0x00011fad, 0x000123ad, 0x000127ad,
+ 0x00012bad, 0x00012fad, 0x000133ad, 0x000137ad,
+ 0x00013bad, 0x00013fad, 0x000143ad, 0x000147ad,
+ 0x00014bad, 0x00014fad, 0x000153ad, 0x000157ad,
+ 0x00015bad, 0x00015fad, 0x000163ad, 0x000167ad,
+ 0x00016bad, 0x00016fad, 0x000173ad, 0x000177ad,
+ 0x00017bad, 0x00017fad, 0x000183ad, 0x000187ad,
+ 0x00018bad, 0x00018fad, 0x000193ad, 0x000197ad,
+ 0x00019bad, 0x00019fad, 0x0001a3ad, 0x0001a7ad,
+ 0x0001abad, 0x0001afad, 0x0001b3ad, 0x0001b7ad,
+ 0x0001bbad, 0x0001bfad, 0x0001c3ad, 0x0001c7ad,
+ 0x0001cbad, 0x0001cfad, 0x0001d3ad, 0x0001d7ad,
+ 0x0001dbad, 0x0001dfad, 0x0001e3ad, 0x0001e7ad,
+ 0x0001ebad, 0x0001efad, 0x0001f3ad, 0x0001f7ad,
+ 0x0001fbad, 0x0001ffad, 0x000203ad, 0x000207ad,
+ 0x00020bad, 0x00020fad, 0x000213ad, 0x000217ad,
+ 0x00021bad, 0x00021fad, 0x000223ad, 0x000227ad,
+ 0x00022bad, 0x00022fad, 0x000233ad, 0x000237ad,
+ 0x00023bad, 0x00023fad, 0x000243ad, 0x000247ad,
+ 0x00024bad, 0x00024fad, 0x000253ad, 0x000257ad,
+ 0x00025bad, 0x00025fad, 0x000263ad, 0x000267ad,
+ 0x00026bad, 0x00026fad, 0x000273ad, 0x000277ad,
+ 0x00027bad, 0x00027fad, 0x000283ad, 0x000287ad,
+ 0x00028bad, 0x00028fad, 0x000293ad, 0x000297ad,
+ 0x00029bad, 0x00029fad, 0x0002a3ad, 0x0002a7ad,
+ 0x0002abad, 0x0002afad, 0x0002b3ad, 0x0002b7ad,
+ 0x0002bbad, 0x0002bfad, 0x0002c3ad, 0x0002c7ad,
+ 0x0002cbad, 0x0002cfad, 0x0002d3ad, 0x0002d7ad,
+ 0x0002dbad, 0x0002dfad, 0x0002e3ad, 0x0002e7ad,
+ 0x0002ebad, 0x0002efad, 0x0002f3ad, 0x0002f7ad,
+ 0x0002fbad, 0x0002ffad, 0x000303ad, 0x000307ad,
+ 0x00030bad, 0x00030fad, 0x000313ad, 0x000317ad,
+ 0x00031bad, 0x00031fad, 0x000323ad, 0x000327ad,
+ 0x00032bad, 0x00032fad, 0x000333ad, 0x000337ad,
+ 0x00033bad, 0x00033fad, 0x000343ad, 0x000347ad,
+ 0x00034bad, 0x00034fad, 0x000353ad, 0x000357ad,
+ 0x00035bad, 0x00035fad, 0x000363ad, 0x000367ad,
+ 0x00036bad, 0x00036fad, 0x000373ad, 0x000377ad,
+ 0x00037bad, 0x00037fad, 0x000383ad, 0x000387ad,
+ 0x00038bad, 0x00038fad, 0x000393ad, 0x000397ad,
+ 0x00039bad, 0x00039fad, 0x0003a3ad, 0x0003a7ad,
+ 0x0003abad, 0x0003afad, 0x0003b3ad, 0x0003b7ad,
+ 0x0003bbad, 0x0003bfad, 0x0003c3ad, 0x0003c7ad,
+ 0x0003cbad, 0x0003cfad, 0x0003d3ad, 0x0003d7ad,
+ 0x0003dbad, 0x0003dfad, 0x0003e3ad, 0x0003e7ad,
+ 0x0003ebad, 0x0003efad, 0x0003f3ad, 0x0003f7ad,
+ 0x0003fbad, 0x0003ffad, 0x000000cd, 0x000002cd,
+ 0x000004cd, 0x000006cd, 0x000008cd, 0x00000acd,
+ 0x00000ccd, 0x00000ecd, 0x000010cd, 0x000012cd,
+ 0x000014cd, 0x000016cd, 0x000018cd, 0x00001acd,
+ 0x00001ccd, 0x00001ecd, 0x000020cd, 0x000022cd,
+ 0x000024cd, 0x000026cd, 0x000028cd, 0x00002acd,
+ 0x00002ccd, 0x00002ecd, 0x000030cd, 0x000032cd,
+ 0x000034cd, 0x000036cd, 0x000038cd, 0x00003acd,
+ 0x00003ccd, 0x00003ecd, 0x000040cd, 0x000042cd,
+ 0x000044cd, 0x000046cd, 0x000048cd, 0x00004acd,
+ 0x00004ccd, 0x00004ecd, 0x000050cd, 0x000052cd,
+ 0x000054cd, 0x000056cd, 0x000058cd, 0x00005acd,
+ 0x00005ccd, 0x00005ecd, 0x000060cd, 0x000062cd,
+ 0x000064cd, 0x000066cd, 0x000068cd, 0x00006acd,
+ 0x00006ccd, 0x00006ecd, 0x000070cd, 0x000072cd,
+ 0x000074cd, 0x000076cd, 0x000078cd, 0x00007acd,
+ 0x00007ccd, 0x00007ecd, 0x000080cd, 0x000082cd,
+ 0x000084cd, 0x000086cd, 0x000088cd, 0x00008acd,
+ 0x00008ccd, 0x00008ecd, 0x000090cd, 0x000092cd,
+ 0x000094cd, 0x000096cd, 0x000098cd, 0x00009acd,
+ 0x00009ccd, 0x00009ecd, 0x0000a0cd, 0x0000a2cd,
+ 0x0000a4cd, 0x0000a6cd, 0x0000a8cd, 0x0000aacd,
+ 0x0000accd, 0x0000aecd, 0x0000b0cd, 0x0000b2cd,
+ 0x0000b4cd, 0x0000b6cd, 0x0000b8cd, 0x0000bacd,
+ 0x0000bccd, 0x0000becd, 0x0000c0cd, 0x0000c2cd,
+ 0x0000c4cd, 0x0000c6cd, 0x0000c8cd, 0x0000cacd,
+ 0x0000cccd, 0x0000cecd, 0x0000d0cd, 0x0000d2cd,
+ 0x0000d4cd, 0x0000d6cd, 0x0000d8cd, 0x0000dacd,
+ 0x0000dccd, 0x0000decd, 0x0000e0cd, 0x0000e2cd,
+ 0x0000e4cd, 0x0000e6cd, 0x0000e8cd, 0x0000eacd,
+ 0x0000eccd, 0x0000eecd, 0x0000f0cd, 0x0000f2cd,
+ 0x0000f4cd, 0x0000f6cd, 0x0000f8cd, 0x0000facd,
+ 0x0000fccd, 0x0000fecd, 0x000100cd, 0x000102cd,
+ 0x000104cd, 0x000106cd, 0x000108cd, 0x00010acd,
+ 0x00010ccd, 0x00010ecd, 0x000110cd, 0x000112cd,
+ 0x000114cd, 0x000116cd, 0x000118cd, 0x00011acd,
+ 0x00011ccd, 0x00011ecd, 0x000120cd, 0x000122cd,
+ 0x000124cd, 0x000126cd, 0x000128cd, 0x00012acd,
+ 0x00012ccd, 0x00012ecd, 0x000130cd, 0x000132cd,
+ 0x000134cd, 0x000136cd, 0x000138cd, 0x00013acd,
+ 0x00013ccd, 0x00013ecd, 0x000140cd, 0x000142cd,
+ 0x000144cd, 0x000146cd, 0x000148cd, 0x00014acd,
+ 0x00014ccd, 0x00014ecd, 0x000150cd, 0x000152cd,
+ 0x000154cd, 0x000156cd, 0x000158cd, 0x00015acd,
+ 0x00015ccd, 0x00015ecd, 0x000160cd, 0x000162cd,
+ 0x000164cd, 0x000166cd, 0x000168cd, 0x00016acd,
+ 0x00016ccd, 0x00016ecd, 0x000170cd, 0x000172cd,
+ 0x000174cd, 0x000176cd, 0x000178cd, 0x00017acd,
+ 0x00017ccd, 0x00017ecd, 0x000180cd, 0x000182cd,
+ 0x000184cd, 0x000186cd, 0x000188cd, 0x00018acd,
+ 0x00018ccd, 0x00018ecd, 0x000190cd, 0x000192cd,
+ 0x000194cd, 0x000196cd, 0x000198cd, 0x00019acd,
+ 0x00019ccd, 0x00019ecd, 0x0001a0cd, 0x0001a2cd,
+ 0x0001a4cd, 0x0001a6cd, 0x0001a8cd, 0x0001aacd,
+ 0x0001accd, 0x0001aecd, 0x0001b0cd, 0x0001b2cd,
+ 0x0001b4cd, 0x0001b6cd, 0x0001b8cd, 0x0001bacd,
+ 0x0001bccd, 0x0001becd, 0x0001c0cd, 0x0001c2cd,
+ 0x0001c4cd, 0x0001c6cd, 0x0001c8cd, 0x0001cacd,
+ 0x0001cccd, 0x0001cecd, 0x0001d0cd, 0x0001d2cd,
+ 0x0001d4cd, 0x0001d6cd, 0x0001d8cd, 0x0001dacd,
+ 0x0001dccd, 0x0001decd, 0x0001e0cd, 0x0001e2cd,
+ 0x0001e4cd, 0x0001e6cd, 0x0001e8cd, 0x0001eacd,
+ 0x0001eccd, 0x0001eecd, 0x0001f0cd, 0x0001f2cd,
+ 0x0001f4cd, 0x0001f6cd, 0x0001f8cd, 0x0001facd,
+ 0x0001fccd, 0x0001fecd, 0x000200cd, 0x000202cd,
+ 0x000204cd, 0x000206cd, 0x000208cd, 0x00020acd,
+ 0x00020ccd, 0x00020ecd, 0x000210cd, 0x000212cd,
+ 0x000214cd, 0x000216cd, 0x000218cd, 0x00021acd,
+ 0x00021ccd, 0x00021ecd, 0x000220cd, 0x000222cd,
+ 0x000224cd, 0x000226cd, 0x000228cd, 0x00022acd,
+ 0x00022ccd, 0x00022ecd, 0x000230cd, 0x000232cd,
+ 0x000234cd, 0x000236cd, 0x000238cd, 0x00023acd,
+ 0x00023ccd, 0x00023ecd, 0x000240cd, 0x000242cd,
+ 0x000244cd, 0x000246cd, 0x000248cd, 0x00024acd,
+ 0x00024ccd, 0x00024ecd, 0x000250cd, 0x000252cd,
+ 0x000254cd, 0x000256cd, 0x000258cd, 0x00025acd,
+ 0x00025ccd, 0x00025ecd, 0x000260cd, 0x000262cd,
+ 0x000264cd, 0x000266cd, 0x000268cd, 0x00026acd,
+ 0x00026ccd, 0x00026ecd, 0x000270cd, 0x000272cd,
+ 0x000274cd, 0x000276cd, 0x000278cd, 0x00027acd,
+ 0x00027ccd, 0x00027ecd, 0x000280cd, 0x000282cd,
+ 0x000284cd, 0x000286cd, 0x000288cd, 0x00028acd,
+ 0x00028ccd, 0x00028ecd, 0x000290cd, 0x000292cd,
+ 0x000294cd, 0x000296cd, 0x000298cd, 0x00029acd,
+ 0x00029ccd, 0x00029ecd, 0x0002a0cd, 0x0002a2cd,
+ 0x0002a4cd, 0x0002a6cd, 0x0002a8cd, 0x0002aacd,
+ 0x0002accd, 0x0002aecd, 0x0002b0cd, 0x0002b2cd,
+ 0x0002b4cd, 0x0002b6cd, 0x0002b8cd, 0x0002bacd,
+ 0x0002bccd, 0x0002becd, 0x0002c0cd, 0x0002c2cd,
+ 0x0002c4cd, 0x0002c6cd, 0x0002c8cd, 0x0002cacd,
+ 0x0002cccd, 0x0002cecd, 0x0002d0cd, 0x0002d2cd,
+ 0x0002d4cd, 0x0002d6cd, 0x0002d8cd, 0x0002dacd,
+ 0x0002dccd, 0x0002decd, 0x0002e0cd, 0x0002e2cd,
+ 0x0002e4cd, 0x0002e6cd, 0x0002e8cd, 0x0002eacd,
+ 0x0002eccd, 0x0002eecd, 0x0002f0cd, 0x0002f2cd,
+ 0x0002f4cd, 0x0002f6cd, 0x0002f8cd, 0x0002facd,
+ 0x0002fccd, 0x0002fecd, 0x000300cd, 0x000302cd,
+ 0x000304cd, 0x000306cd, 0x000308cd, 0x00030acd,
+ 0x00030ccd, 0x00030ecd, 0x000310cd, 0x000312cd,
+ 0x000314cd, 0x000316cd, 0x000318cd, 0x00031acd,
+ 0x00031ccd, 0x00031ecd, 0x000320cd, 0x000322cd,
+ 0x000324cd, 0x000326cd, 0x000328cd, 0x00032acd,
+ 0x00032ccd, 0x00032ecd, 0x000330cd, 0x000332cd,
+ 0x000334cd, 0x000336cd, 0x000338cd, 0x00033acd,
+ 0x00033ccd, 0x00033ecd, 0x000340cd, 0x000342cd,
+ 0x000344cd, 0x000346cd, 0x000348cd, 0x00034acd,
+ 0x00034ccd, 0x00034ecd, 0x000350cd, 0x000352cd,
+ 0x000354cd, 0x000356cd, 0x000358cd, 0x00035acd,
+ 0x00035ccd, 0x00035ecd, 0x000360cd, 0x000362cd,
+ 0x000364cd, 0x000366cd, 0x000368cd, 0x00036acd,
+ 0x00036ccd, 0x00036ecd, 0x000370cd, 0x000372cd,
+ 0x000374cd, 0x000376cd, 0x000378cd, 0x00037acd,
+ 0x00037ccd, 0x00037ecd, 0x000380cd, 0x000382cd,
+ 0x000384cd, 0x000386cd, 0x000388cd, 0x00038acd,
+ 0x00038ccd, 0x00038ecd, 0x000390cd, 0x000392cd,
+ 0x000394cd, 0x000396cd, 0x000398cd, 0x00039acd,
+ 0x00039ccd, 0x00039ecd, 0x0003a0cd, 0x0003a2cd,
+ 0x0003a4cd, 0x0003a6cd, 0x0003a8cd, 0x0003aacd,
+ 0x0003accd, 0x0003aecd, 0x0003b0cd, 0x0003b2cd,
+ 0x0003b4cd, 0x0003b6cd, 0x0003b8cd, 0x0003bacd,
+ 0x0003bccd, 0x0003becd, 0x0003c0cd, 0x0003c2cd,
+ 0x0003c4cd, 0x0003c6cd, 0x0003c8cd, 0x0003cacd,
+ 0x0003cccd, 0x0003cecd, 0x0003d0cd, 0x0003d2cd,
+ 0x0003d4cd, 0x0003d6cd, 0x0003d8cd, 0x0003dacd,
+ 0x0003dccd, 0x0003decd, 0x0003e0cd, 0x0003e2cd,
+ 0x0003e4cd, 0x0003e6cd, 0x0003e8cd, 0x0003eacd,
+ 0x0003eccd, 0x0003eecd, 0x0003f0cd, 0x0003f2cd,
+ 0x0003f4cd, 0x0003f6cd, 0x0003f8cd, 0x0003facd,
+ 0x0003fccd, 0x0003fecd, 0x0000006e, 0x0000046e,
+ 0x0000086e, 0x00000c6e, 0x0000106e, 0x0000146e,
+ 0x0000186e, 0x00001c6e, 0x0000206e, 0x0000246e,
+ 0x0000286e, 0x00002c6e, 0x0000306e, 0x0000346e,
+ 0x0000386e, 0x00003c6e, 0x0000406e, 0x0000446e,
+ 0x0000486e, 0x00004c6e, 0x0000506e, 0x0000546e,
+ 0x0000586e, 0x00005c6e, 0x0000606e, 0x0000646e,
+ 0x0000686e, 0x00006c6e, 0x0000706e, 0x0000746e,
+ 0x0000786e, 0x00007c6e, 0x0000806e, 0x0000846e,
+ 0x0000886e, 0x00008c6e, 0x0000906e, 0x0000946e,
+ 0x0000986e, 0x00009c6e, 0x0000a06e, 0x0000a46e,
+ 0x0000a86e, 0x0000ac6e, 0x0000b06e, 0x0000b46e,
+ 0x0000b86e, 0x0000bc6e, 0x0000c06e, 0x0000c46e,
+ 0x0000c86e, 0x0000cc6e, 0x0000d06e, 0x0000d46e,
+ 0x0000d86e, 0x0000dc6e, 0x0000e06e, 0x0000e46e,
+ 0x0000e86e, 0x0000ec6e, 0x0000f06e, 0x0000f46e,
+ 0x0000f86e, 0x0000fc6e, 0x0001006e, 0x0001046e,
+ 0x0001086e, 0x00010c6e, 0x0001106e, 0x0001146e,
+ 0x0001186e, 0x00011c6e, 0x0001206e, 0x0001246e,
+ 0x0001286e, 0x00012c6e, 0x0001306e, 0x0001346e,
+ 0x0001386e, 0x00013c6e, 0x0001406e, 0x0001446e,
+ 0x0001486e, 0x00014c6e, 0x0001506e, 0x0001546e,
+ 0x0001586e, 0x00015c6e, 0x0001606e, 0x0001646e,
+ 0x0001686e, 0x00016c6e, 0x0001706e, 0x0001746e,
+ 0x0001786e, 0x00017c6e, 0x0001806e, 0x0001846e,
+ 0x0001886e, 0x00018c6e, 0x0001906e, 0x0001946e,
+ 0x0001986e, 0x00019c6e, 0x0001a06e, 0x0001a46e,
+ 0x0001a86e, 0x0001ac6e, 0x0001b06e, 0x0001b46e,
+ 0x0001b86e, 0x0001bc6e, 0x0001c06e, 0x0001c46e,
+ 0x0001c86e, 0x0001cc6e, 0x0001d06e, 0x0001d46e,
+ 0x0001d86e, 0x0001dc6e, 0x0001e06e, 0x0001e46e,
+ 0x0001e86e, 0x0001ec6e, 0x0001f06e, 0x0001f46e,
+ 0x0001f86e, 0x0001fc6e, 0x0002006e, 0x0002046e,
+ 0x0002086e, 0x00020c6e, 0x0002106e, 0x0002146e,
+ 0x0002186e, 0x00021c6e, 0x0002206e, 0x0002246e,
+ 0x0002286e, 0x00022c6e, 0x0002306e, 0x0002346e,
+ 0x0002386e, 0x00023c6e, 0x0002406e, 0x0002446e,
+ 0x0002486e, 0x00024c6e, 0x0002506e, 0x0002546e,
+ 0x0002586e, 0x00025c6e, 0x0002606e, 0x0002646e,
+ 0x0002686e, 0x00026c6e, 0x0002706e, 0x0002746e,
+ 0x0002786e, 0x00027c6e, 0x0002806e, 0x0002846e,
+ 0x0002886e, 0x00028c6e, 0x0002906e, 0x0002946e,
+ 0x0002986e, 0x00029c6e, 0x0002a06e, 0x0002a46e,
+ 0x0002a86e, 0x0002ac6e, 0x0002b06e, 0x0002b46e,
+ 0x0002b86e, 0x0002bc6e, 0x0002c06e, 0x0002c46e,
+ 0x0002c86e, 0x0002cc6e, 0x0002d06e, 0x0002d46e,
+ 0x0002d86e, 0x0002dc6e, 0x0002e06e, 0x0002e46e,
+ 0x0002e86e, 0x0002ec6e, 0x0002f06e, 0x0002f46e,
+ 0x0002f86e, 0x0002fc6e, 0x0003006e, 0x0003046e,
+ 0x0003086e, 0x00030c6e, 0x0003106e, 0x0003146e,
+ 0x0003186e, 0x00031c6e, 0x0003206e, 0x0003246e,
+ 0x0003286e, 0x00032c6e, 0x0003306e, 0x0003346e,
+ 0x0003386e, 0x00033c6e, 0x0003406e, 0x0003446e,
+ 0x0003486e, 0x00034c6e, 0x0003506e, 0x0003546e,
+ 0x0003586e, 0x00035c6e, 0x0003606e, 0x0003646e,
+ 0x0003686e, 0x00036c6e, 0x0003706e, 0x0003746e,
+ 0x0003786e, 0x00037c6e, 0x0003806e, 0x0003846e,
+ 0x0003886e, 0x00038c6e, 0x0003906e, 0x0003946e,
+ 0x0003986e, 0x00039c6e, 0x0003a06e, 0x0003a46e,
+ 0x0003a86e, 0x0003ac6e, 0x0003b06e, 0x0003b46e,
+ 0x0003b86e, 0x0003bc6e, 0x0003c06e, 0x0003c46e,
+ 0x0003c86e, 0x0003cc6e, 0x0003d06e, 0x0003d46e,
+ 0x0003d86e, 0x0003dc6e, 0x0003e06e, 0x0003e46e,
+ 0x0003e86e, 0x0003ec6e, 0x0003f06e, 0x0003f46e,
+ 0x0003f86e, 0x0003fc6e, 0x0004006e, 0x0004046e,
+ 0x0004086e, 0x00040c6e, 0x0004106e, 0x0004146e,
+ 0x0004186e, 0x00041c6e, 0x0004206e, 0x0004246e,
+ 0x0004286e, 0x00042c6e, 0x0004306e, 0x0004346e,
+ 0x0004386e, 0x00043c6e, 0x0004406e, 0x0004446e,
+ 0x0004486e, 0x00044c6e, 0x0004506e, 0x0004546e,
+ 0x0004586e, 0x00045c6e, 0x0004606e, 0x0004646e,
+ 0x0004686e, 0x00046c6e, 0x0004706e, 0x0004746e,
+ 0x0004786e, 0x00047c6e, 0x0004806e, 0x0004846e,
+ 0x0004886e, 0x00048c6e, 0x0004906e, 0x0004946e,
+ 0x0004986e, 0x00049c6e, 0x0004a06e, 0x0004a46e,
+ 0x0004a86e, 0x0004ac6e, 0x0004b06e, 0x0004b46e,
+ 0x0004b86e, 0x0004bc6e, 0x0004c06e, 0x0004c46e,
+ 0x0004c86e, 0x0004cc6e, 0x0004d06e, 0x0004d46e,
+ 0x0004d86e, 0x0004dc6e, 0x0004e06e, 0x0004e46e,
+ 0x0004e86e, 0x0004ec6e, 0x0004f06e, 0x0004f46e,
+ 0x0004f86e, 0x0004fc6e, 0x0005006e, 0x0005046e,
+ 0x0005086e, 0x00050c6e, 0x0005106e, 0x0005146e,
+ 0x0005186e, 0x00051c6e, 0x0005206e, 0x0005246e,
+ 0x0005286e, 0x00052c6e, 0x0005306e, 0x0005346e,
+ 0x0005386e, 0x00053c6e, 0x0005406e, 0x0005446e,
+ 0x0005486e, 0x00054c6e, 0x0005506e, 0x0005546e,
+ 0x0005586e, 0x00055c6e, 0x0005606e, 0x0005646e,
+ 0x0005686e, 0x00056c6e, 0x0005706e, 0x0005746e,
+ 0x0005786e, 0x00057c6e, 0x0005806e, 0x0005846e,
+ 0x0005886e, 0x00058c6e, 0x0005906e, 0x0005946e,
+ 0x0005986e, 0x00059c6e, 0x0005a06e, 0x0005a46e,
+ 0x0005a86e, 0x0005ac6e, 0x0005b06e, 0x0005b46e,
+ 0x0005b86e, 0x0005bc6e, 0x0005c06e, 0x0005c46e,
+ 0x0005c86e, 0x0005cc6e, 0x0005d06e, 0x0005d46e,
+ 0x0005d86e, 0x0005dc6e, 0x0005e06e, 0x0005e46e,
+ 0x0005e86e, 0x0005ec6e, 0x0005f06e, 0x0005f46e,
+ 0x0005f86e, 0x0005fc6e, 0x0006006e, 0x0006046e,
+ 0x0006086e, 0x00060c6e, 0x0006106e, 0x0006146e,
+ 0x0006186e, 0x00061c6e, 0x0006206e, 0x0006246e,
+ 0x0006286e, 0x00062c6e, 0x0006306e, 0x0006346e,
+ 0x0006386e, 0x00063c6e, 0x0006406e, 0x0006446e,
+ 0x0006486e, 0x00064c6e, 0x0006506e, 0x0006546e,
+ 0x0006586e, 0x00065c6e, 0x0006606e, 0x0006646e,
+ 0x0006686e, 0x00066c6e, 0x0006706e, 0x0006746e,
+ 0x0006786e, 0x00067c6e, 0x0006806e, 0x0006846e,
+ 0x0006886e, 0x00068c6e, 0x0006906e, 0x0006946e,
+ 0x0006986e, 0x00069c6e, 0x0006a06e, 0x0006a46e,
+ 0x0006a86e, 0x0006ac6e, 0x0006b06e, 0x0006b46e,
+ 0x0006b86e, 0x0006bc6e, 0x0006c06e, 0x0006c46e,
+ 0x0006c86e, 0x0006cc6e, 0x0006d06e, 0x0006d46e,
+ 0x0006d86e, 0x0006dc6e, 0x0006e06e, 0x0006e46e,
+ 0x0006e86e, 0x0006ec6e, 0x0006f06e, 0x0006f46e,
+ 0x0006f86e, 0x0006fc6e, 0x0007006e, 0x0007046e,
+ 0x0007086e, 0x00070c6e, 0x0007106e, 0x0007146e,
+ 0x0007186e, 0x00071c6e, 0x0007206e, 0x0007246e,
+ 0x0007286e, 0x00072c6e, 0x0007306e, 0x0007346e,
+ 0x0007386e, 0x00073c6e, 0x0007406e, 0x0007446e,
+ 0x0007486e, 0x00074c6e, 0x0007506e, 0x0007546e,
+ 0x0007586e, 0x00075c6e, 0x0007606e, 0x0007646e,
+ 0x0007686e, 0x00076c6e, 0x0007706e, 0x0007746e,
+ 0x0007786e, 0x00077c6e, 0x0007806e, 0x0007846e,
+ 0x0007886e, 0x00078c6e, 0x0007906e, 0x0007946e,
+ 0x0007986e, 0x00079c6e, 0x0007a06e, 0x0007a46e,
+ 0x0007a86e, 0x0007ac6e, 0x0007b06e, 0x0007b46e,
+ 0x0007b86e, 0x0007bc6e, 0x0007c06e, 0x0007c46e,
+ 0x0007c86e, 0x0007cc6e, 0x0007d06e, 0x0007d46e,
+ 0x0007d86e, 0x0007dc6e, 0x0007e06e, 0x0007e46e,
+ 0x0007e86e, 0x0007ec6e, 0x0007f06e, 0x0007f46e,
+ 0x0007f86e, 0x0007fc6e, 0x0000000d, 0x0000010d,
+ 0x0000020d, 0x0000030d, 0x0000040d, 0x0000050d,
+ 0x0000060d, 0x0000070d, 0x0000080d, 0x0000090d,
+ 0x00000a0d, 0x00000b0d, 0x00000c0d, 0x00000d0d,
+ 0x00000e0d, 0x00000f0d, 0x0000100d, 0x0000110d,
+ 0x0000120d, 0x0000130d, 0x0000140d, 0x0000150d,
+ 0x0000160d, 0x0000170d, 0x0000180d, 0x0000190d,
+ 0x00001a0d, 0x00001b0d, 0x00001c0d, 0x00001d0d,
+ 0x00001e0d, 0x00001f0d, 0x0000200d, 0x0000210d,
+ 0x0000220d, 0x0000230d, 0x0000240d, 0x0000250d,
+ 0x0000260d, 0x0000270d, 0x0000280d, 0x0000290d,
+ 0x00002a0d, 0x00002b0d, 0x00002c0d, 0x00002d0d,
+ 0x00002e0d, 0x00002f0d, 0x0000300d, 0x0000310d,
+ 0x0000320d, 0x0000330d, 0x0000340d, 0x0000350d,
+ 0x0000360d, 0x0000370d, 0x0000380d, 0x0000390d,
+ 0x00003a0d, 0x00003b0d, 0x00003c0d, 0x00003d0d,
+ 0x00003e0d, 0x00003f0d, 0x0000400d, 0x0000410d,
+ 0x0000420d, 0x0000430d, 0x0000440d, 0x0000450d,
+ 0x0000460d, 0x0000470d, 0x0000480d, 0x0000490d,
+ 0x00004a0d, 0x00004b0d, 0x00004c0d, 0x00004d0d,
+ 0x00004e0d, 0x00004f0d, 0x0000500d, 0x0000510d,
+ 0x0000520d, 0x0000530d, 0x0000540d, 0x0000550d,
+ 0x0000560d, 0x0000570d, 0x0000580d, 0x0000590d,
+ 0x00005a0d, 0x00005b0d, 0x00005c0d, 0x00005d0d,
+ 0x00005e0d, 0x00005f0d, 0x0000600d, 0x0000610d,
+ 0x0000620d, 0x0000630d, 0x0000640d, 0x0000650d,
+ 0x0000660d, 0x0000670d, 0x0000680d, 0x0000690d,
+ 0x00006a0d, 0x00006b0d, 0x00006c0d, 0x00006d0d,
+ 0x00006e0d, 0x00006f0d, 0x0000700d, 0x0000710d,
+ 0x0000720d, 0x0000730d, 0x0000740d, 0x0000750d,
+ 0x0000760d, 0x0000770d, 0x0000780d, 0x0000790d,
+ 0x00007a0d, 0x00007b0d, 0x00007c0d, 0x00007d0d,
+ 0x00007e0d, 0x00007f0d, 0x0000800d, 0x0000810d,
+ 0x0000820d, 0x0000830d, 0x0000840d, 0x0000850d,
+ 0x0000860d, 0x0000870d, 0x0000880d, 0x0000890d,
+ 0x00008a0d, 0x00008b0d, 0x00008c0d, 0x00008d0d,
+ 0x00008e0d, 0x00008f0d, 0x0000900d, 0x0000910d,
+ 0x0000920d, 0x0000930d, 0x0000940d, 0x0000950d,
+ 0x0000960d, 0x0000970d, 0x0000980d, 0x0000990d,
+ 0x00009a0d, 0x00009b0d, 0x00009c0d, 0x00009d0d,
+ 0x00009e0d, 0x00009f0d, 0x0000a00d, 0x0000a10d,
+ 0x0000a20d, 0x0000a30d, 0x0000a40d, 0x0000a50d,
+ 0x0000a60d, 0x0000a70d, 0x0000a80d, 0x0000a90d,
+ 0x0000aa0d, 0x0000ab0d, 0x0000ac0d, 0x0000ad0d,
+ 0x0000ae0d, 0x0000af0d, 0x0000b00d, 0x0000b10d,
+ 0x0000b20d, 0x0000b30d, 0x0000b40d, 0x0000b50d,
+ 0x0000b60d, 0x0000b70d, 0x0000b80d, 0x0000b90d,
+ 0x0000ba0d, 0x0000bb0d, 0x0000bc0d, 0x0000bd0d,
+ 0x0000be0d, 0x0000bf0d, 0x0000c00d, 0x0000c10d,
+ 0x0000c20d, 0x0000c30d, 0x0000c40d, 0x0000c50d,
+ 0x0000c60d, 0x0000c70d, 0x0000c80d, 0x0000c90d,
+ 0x0000ca0d, 0x0000cb0d, 0x0000cc0d, 0x0000cd0d,
+ 0x0000ce0d, 0x0000cf0d, 0x0000d00d, 0x0000d10d,
+ 0x0000d20d, 0x0000d30d, 0x0000d40d, 0x0000d50d,
+ 0x0000d60d, 0x0000d70d, 0x0000d80d, 0x0000d90d,
+ 0x0000da0d, 0x0000db0d, 0x0000dc0d, 0x0000dd0d,
+ 0x0000de0d, 0x0000df0d, 0x0000e00d, 0x0000e10d,
+ 0x0000e20d, 0x0000e30d, 0x0000e40d, 0x0000e50d,
+ 0x0000e60d, 0x0000e70d, 0x0000e80d, 0x0000e90d,
+ 0x0000ea0d, 0x0000eb0d, 0x0000ec0d, 0x0000ed0d,
+ 0x0000ee0d, 0x0000ef0d, 0x0000f00d, 0x0000f10d,
+ 0x0000f20d, 0x0000f30d, 0x0000f40d, 0x0000f50d,
+ 0x0000f60d, 0x0000f70d, 0x0000f80d, 0x0000f90d,
+ 0x0000fa0d, 0x0000fb0d, 0x0000fc0d, 0x0000fd0d,
+ 0x0000fe0d, 0x0000ff0d, 0x0001000d, 0x0001010d,
+ 0x0001020d, 0x0001030d, 0x0001040d, 0x0001050d,
+ 0x0001060d, 0x0001070d, 0x0001080d, 0x0001090d,
+ 0x00010a0d, 0x00010b0d, 0x00010c0d, 0x00010d0d,
+ 0x00010e0d, 0x00010f0d, 0x0001100d, 0x0001110d,
+ 0x0001120d, 0x0001130d, 0x0001140d, 0x0001150d,
+ 0x0001160d, 0x0001170d, 0x0001180d, 0x0001190d,
+ 0x00011a0d, 0x00011b0d, 0x00011c0d, 0x00011d0d,
+ 0x00011e0d, 0x00011f0d, 0x0001200d, 0x0001210d,
+ 0x0001220d, 0x0001230d, 0x0001240d, 0x0001250d,
+ 0x0001260d, 0x0001270d, 0x0001280d, 0x0001290d,
+ 0x00012a0d, 0x00012b0d, 0x00012c0d, 0x00012d0d,
+ 0x00012e0d, 0x00012f0d, 0x0001300d, 0x0001310d,
+ 0x0001320d, 0x0001330d, 0x0001340d, 0x0001350d,
+ 0x0001360d, 0x0001370d, 0x0001380d, 0x0001390d,
+ 0x00013a0d, 0x00013b0d, 0x00013c0d, 0x00013d0d,
+ 0x00013e0d, 0x00013f0d, 0x0001400d, 0x0001410d,
+ 0x0001420d, 0x0001430d, 0x0001440d, 0x0001450d,
+ 0x0001460d, 0x0001470d, 0x0001480d, 0x0001490d,
+ 0x00014a0d, 0x00014b0d, 0x00014c0d, 0x00014d0d,
+ 0x00014e0d, 0x00014f0d, 0x0001500d, 0x0001510d,
+ 0x0001520d, 0x0001530d, 0x0001540d, 0x0001550d,
+ 0x0001560d, 0x0001570d, 0x0001580d, 0x0001590d,
+ 0x00015a0d, 0x00015b0d, 0x00015c0d, 0x00015d0d,
+ 0x00015e0d, 0x00015f0d, 0x0001600d, 0x0001610d,
+ 0x0001620d, 0x0001630d, 0x0001640d, 0x0001650d,
+ 0x0001660d, 0x0001670d, 0x0001680d, 0x0001690d,
+ 0x00016a0d, 0x00016b0d, 0x00016c0d, 0x00016d0d,
+ 0x00016e0d, 0x00016f0d, 0x0001700d, 0x0001710d,
+ 0x0001720d, 0x0001730d, 0x0001740d, 0x0001750d,
+ 0x0001760d, 0x0001770d, 0x0001780d, 0x0001790d,
+ 0x00017a0d, 0x00017b0d, 0x00017c0d, 0x00017d0d,
+ 0x00017e0d, 0x00017f0d, 0x0001800d, 0x0001810d,
+ 0x0001820d, 0x0001830d, 0x0001840d, 0x0001850d,
+ 0x0001860d, 0x0001870d, 0x0001880d, 0x0001890d,
+ 0x00018a0d, 0x00018b0d, 0x00018c0d, 0x00018d0d,
+ 0x00018e0d, 0x00018f0d, 0x0001900d, 0x0001910d,
+ 0x0001920d, 0x0001930d, 0x0001940d, 0x0001950d,
+ 0x0001960d, 0x0001970d, 0x0001980d, 0x0001990d,
+ 0x00019a0d, 0x00019b0d, 0x00019c0d, 0x00019d0d,
+ 0x00019e0d, 0x00019f0d, 0x0001a00d, 0x0001a10d,
+ 0x0001a20d, 0x0001a30d, 0x0001a40d, 0x0001a50d,
+ 0x0001a60d, 0x0001a70d, 0x0001a80d, 0x0001a90d,
+ 0x0001aa0d, 0x0001ab0d, 0x0001ac0d, 0x0001ad0d,
+ 0x0001ae0d, 0x0001af0d, 0x0001b00d, 0x0001b10d,
+ 0x0001b20d, 0x0001b30d, 0x0001b40d, 0x0001b50d,
+ 0x0001b60d, 0x0001b70d, 0x0001b80d, 0x0001b90d,
+ 0x0001ba0d, 0x0001bb0d, 0x0001bc0d, 0x0001bd0d,
+ 0x0001be0d, 0x0001bf0d, 0x0001c00d, 0x0001c10d,
+ 0x0001c20d, 0x0001c30d, 0x0001c40d, 0x0001c50d,
+ 0x0001c60d, 0x0001c70d, 0x0001c80d, 0x0001c90d,
+ 0x0001ca0d, 0x0001cb0d, 0x0001cc0d, 0x0001cd0d,
+ 0x0001ce0d, 0x0001cf0d, 0x0001d00d, 0x0001d10d,
+ 0x0001d20d, 0x0001d30d, 0x0001d40d, 0x0001d50d,
+ 0x0001d60d, 0x0001d70d, 0x0001d80d, 0x0001d90d,
+ 0x0001da0d, 0x0001db0d, 0x0001dc0d, 0x0001dd0d,
+ 0x0001de0d, 0x0001df0d, 0x0001e00d, 0x0001e10d,
+ 0x0001e20d, 0x0001e30d, 0x0001e40d, 0x0001e50d,
+ 0x0001e60d, 0x0001e70d, 0x0001e80d, 0x0001e90d,
+ 0x0001ea0d, 0x0001eb0d, 0x0001ec0d, 0x0001ed0d,
+ 0x0001ee0d, 0x0001ef0d, 0x0001f00d, 0x0001f10d,
+ 0x0001f20d, 0x0001f30d, 0x0001f40d, 0x0001f50d,
+ 0x0001f60d, 0x0001f70d, 0x0001f80d, 0x0001f90d,
+ 0x0001fa0d, 0x0001fb0d, 0x0001fc0d, 0x0001fd0d,
+ 0x0001fe0d, 0x0001ff0d, 0x0002000d, 0x0002010d,
+ 0x0002020d, 0x0002030d, 0x0002040d, 0x0002050d,
+ 0x0002060d, 0x0002070d, 0x0002080d, 0x0002090d,
+ 0x00020a0d, 0x00020b0d, 0x00020c0d, 0x00020d0d,
+ 0x00020e0d, 0x00020f0d, 0x0002100d, 0x0002110d,
+ 0x0002120d, 0x0002130d, 0x0002140d, 0x0002150d,
+ 0x0002160d, 0x0002170d, 0x0002180d, 0x0002190d,
+ 0x00021a0d, 0x00021b0d, 0x00021c0d, 0x00021d0d,
+ 0x00021e0d, 0x00021f0d, 0x0002200d, 0x0002210d,
+ 0x0002220d, 0x0002230d, 0x0002240d, 0x0002250d,
+ 0x0002260d, 0x0002270d, 0x0002280d, 0x0002290d,
+ 0x00022a0d, 0x00022b0d, 0x00022c0d, 0x00022d0d,
+ 0x00022e0d, 0x00022f0d, 0x0002300d, 0x0002310d,
+ 0x0002320d, 0x0002330d, 0x0002340d, 0x0002350d,
+ 0x0002360d, 0x0002370d, 0x0002380d, 0x0002390d,
+ 0x00023a0d, 0x00023b0d, 0x00023c0d, 0x00023d0d,
+ 0x00023e0d, 0x00023f0d, 0x0002400d, 0x0002410d,
+ 0x0002420d, 0x0002430d, 0x0002440d, 0x0002450d,
+ 0x0002460d, 0x0002470d, 0x0002480d, 0x0002490d,
+ 0x00024a0d, 0x00024b0d, 0x00024c0d, 0x00024d0d,
+ 0x00024e0d, 0x00024f0d, 0x0002500d, 0x0002510d,
+ 0x0002520d, 0x0002530d, 0x0002540d, 0x0002550d,
+ 0x0002560d, 0x0002570d, 0x0002580d, 0x0002590d,
+ 0x00025a0d, 0x00025b0d, 0x00025c0d, 0x00025d0d,
+ 0x00025e0d, 0x00025f0d, 0x0002600d, 0x0002610d,
+ 0x0002620d, 0x0002630d, 0x0002640d, 0x0002650d,
+ 0x0002660d, 0x0002670d, 0x0002680d, 0x0002690d,
+ 0x00026a0d, 0x00026b0d, 0x00026c0d, 0x00026d0d,
+ 0x00026e0d, 0x00026f0d, 0x0002700d, 0x0002710d,
+ 0x0002720d, 0x0002730d, 0x0002740d, 0x0002750d,
+ 0x0002760d, 0x0002770d, 0x0002780d, 0x0002790d,
+ 0x00027a0d, 0x00027b0d, 0x00027c0d, 0x00027d0d,
+ 0x00027e0d, 0x00027f0d, 0x0002800d, 0x0002810d,
+ 0x0002820d, 0x0002830d, 0x0002840d, 0x0002850d,
+ 0x0002860d, 0x0002870d, 0x0002880d, 0x0002890d,
+ 0x00028a0d, 0x00028b0d, 0x00028c0d, 0x00028d0d,
+ 0x00028e0d, 0x00028f0d, 0x0002900d, 0x0002910d,
+ 0x0002920d, 0x0002930d, 0x0002940d, 0x0002950d,
+ 0x0002960d, 0x0002970d, 0x0002980d, 0x0002990d,
+ 0x00029a0d, 0x00029b0d, 0x00029c0d, 0x00029d0d,
+ 0x00029e0d, 0x00029f0d, 0x0002a00d, 0x0002a10d,
+ 0x0002a20d, 0x0002a30d, 0x0002a40d, 0x0002a50d,
+ 0x0002a60d, 0x0002a70d, 0x0002a80d, 0x0002a90d,
+ 0x0002aa0d, 0x0002ab0d, 0x0002ac0d, 0x0002ad0d,
+ 0x0002ae0d, 0x0002af0d, 0x0002b00d, 0x0002b10d,
+ 0x0002b20d, 0x0002b30d, 0x0002b40d, 0x0002b50d,
+ 0x0002b60d, 0x0002b70d, 0x0002b80d, 0x0002b90d,
+ 0x0002ba0d, 0x0002bb0d, 0x0002bc0d, 0x0002bd0d,
+ 0x0002be0d, 0x0002bf0d, 0x0002c00d, 0x0002c10d,
+ 0x0002c20d, 0x0002c30d, 0x0002c40d, 0x0002c50d,
+ 0x0002c60d, 0x0002c70d, 0x0002c80d, 0x0002c90d,
+ 0x0002ca0d, 0x0002cb0d, 0x0002cc0d, 0x0002cd0d,
+ 0x0002ce0d, 0x0002cf0d, 0x0002d00d, 0x0002d10d,
+ 0x0002d20d, 0x0002d30d, 0x0002d40d, 0x0002d50d,
+ 0x0002d60d, 0x0002d70d, 0x0002d80d, 0x0002d90d,
+ 0x0002da0d, 0x0002db0d, 0x0002dc0d, 0x0002dd0d,
+ 0x0002de0d, 0x0002df0d, 0x0002e00d, 0x0002e10d,
+ 0x0002e20d, 0x0002e30d, 0x0002e40d, 0x0002e50d,
+ 0x0002e60d, 0x0002e70d, 0x0002e80d, 0x0002e90d,
+ 0x0002ea0d, 0x0002eb0d, 0x0002ec0d, 0x0002ed0d,
+ 0x0002ee0d, 0x0002ef0d, 0x0002f00d, 0x0002f10d,
+ 0x0002f20d, 0x0002f30d, 0x0002f40d, 0x0002f50d,
+ 0x0002f60d, 0x0002f70d, 0x0002f80d, 0x0002f90d,
+ 0x0002fa0d, 0x0002fb0d, 0x0002fc0d, 0x0002fd0d,
+ 0x0002fe0d, 0x0002ff0d, 0x0003000d, 0x0003010d,
+ 0x0003020d, 0x0003030d, 0x0003040d, 0x0003050d,
+ 0x0003060d, 0x0003070d, 0x0003080d, 0x0003090d,
+ 0x00030a0d, 0x00030b0d, 0x00030c0d, 0x00030d0d,
+ 0x00030e0d, 0x00030f0d, 0x0003100d, 0x0003110d,
+ 0x0003120d, 0x0003130d, 0x0003140d, 0x0003150d,
+ 0x0003160d, 0x0003170d, 0x0003180d, 0x0003190d,
+ 0x00031a0d, 0x00031b0d, 0x00031c0d, 0x00031d0d,
+ 0x00031e0d, 0x00031f0d, 0x0003200d, 0x0003210d,
+ 0x0003220d, 0x0003230d, 0x0003240d, 0x0003250d,
+ 0x0003260d, 0x0003270d, 0x0003280d, 0x0003290d,
+ 0x00032a0d, 0x00032b0d, 0x00032c0d, 0x00032d0d,
+ 0x00032e0d, 0x00032f0d, 0x0003300d, 0x0003310d,
+ 0x0003320d, 0x0003330d, 0x0003340d, 0x0003350d,
+ 0x0003360d, 0x0003370d, 0x0003380d, 0x0003390d,
+ 0x00033a0d, 0x00033b0d, 0x00033c0d, 0x00033d0d,
+ 0x00033e0d, 0x00033f0d, 0x0003400d, 0x0003410d,
+ 0x0003420d, 0x0003430d, 0x0003440d, 0x0003450d,
+ 0x0003460d, 0x0003470d, 0x0003480d, 0x0003490d,
+ 0x00034a0d, 0x00034b0d, 0x00034c0d, 0x00034d0d,
+ 0x00034e0d, 0x00034f0d, 0x0003500d, 0x0003510d,
+ 0x0003520d, 0x0003530d, 0x0003540d, 0x0003550d,
+ 0x0003560d, 0x0003570d, 0x0003580d, 0x0003590d,
+ 0x00035a0d, 0x00035b0d, 0x00035c0d, 0x00035d0d,
+ 0x00035e0d, 0x00035f0d, 0x0003600d, 0x0003610d,
+ 0x0003620d, 0x0003630d, 0x0003640d, 0x0003650d,
+ 0x0003660d, 0x0003670d, 0x0003680d, 0x0003690d,
+ 0x00036a0d, 0x00036b0d, 0x00036c0d, 0x00036d0d,
+ 0x00036e0d, 0x00036f0d, 0x0003700d, 0x0003710d,
+ 0x0003720d, 0x0003730d, 0x0003740d, 0x0003750d,
+ 0x0003760d, 0x0003770d, 0x0003780d, 0x0003790d,
+ 0x00037a0d, 0x00037b0d, 0x00037c0d, 0x00037d0d,
+ 0x00037e0d, 0x00037f0d, 0x0003800d, 0x0003810d,
+ 0x0003820d, 0x0003830d, 0x0003840d, 0x0003850d,
+ 0x0003860d, 0x0003870d, 0x0003880d, 0x0003890d,
+ 0x00038a0d, 0x00038b0d, 0x00038c0d, 0x00038d0d,
+ 0x00038e0d, 0x00038f0d, 0x0003900d, 0x0003910d,
+ 0x0003920d, 0x0003930d, 0x0003940d, 0x0003950d,
+ 0x0003960d, 0x0003970d, 0x0003980d, 0x0003990d,
+ 0x00039a0d, 0x00039b0d, 0x00039c0d, 0x00039d0d,
+ 0x00039e0d, 0x00039f0d, 0x0003a00d, 0x0003a10d,
+ 0x0003a20d, 0x0003a30d, 0x0003a40d, 0x0003a50d,
+ 0x0003a60d, 0x0003a70d, 0x0003a80d, 0x0003a90d,
+ 0x0003aa0d, 0x0003ab0d, 0x0003ac0d, 0x0003ad0d,
+ 0x0003ae0d, 0x0003af0d, 0x0003b00d, 0x0003b10d,
+ 0x0003b20d, 0x0003b30d, 0x0003b40d, 0x0003b50d,
+ 0x0003b60d, 0x0003b70d, 0x0003b80d, 0x0003b90d,
+ 0x0003ba0d, 0x0003bb0d, 0x0003bc0d, 0x0003bd0d,
+ 0x0003be0d, 0x0003bf0d, 0x0003c00d, 0x0003c10d,
+ 0x0003c20d, 0x0003c30d, 0x0003c40d, 0x0003c50d,
+ 0x0003c60d, 0x0003c70d, 0x0003c80d, 0x0003c90d,
+ 0x0003ca0d, 0x0003cb0d, 0x0003cc0d, 0x0003cd0d,
+ 0x0003ce0d, 0x0003cf0d, 0x0003d00d, 0x0003d10d,
+ 0x0003d20d, 0x0003d30d, 0x0003d40d, 0x0003d50d,
+ 0x0003d60d, 0x0003d70d, 0x0003d80d, 0x0003d90d,
+ 0x0003da0d, 0x0003db0d, 0x0003dc0d, 0x0003dd0d,
+ 0x0003de0d, 0x0003df0d, 0x0003e00d, 0x0003e10d,
+ 0x0003e20d, 0x0003e30d, 0x0003e40d, 0x0003e50d,
+ 0x0003e60d, 0x0003e70d, 0x0003e80d, 0x0003e90d,
+ 0x0003ea0d, 0x0003eb0d, 0x0003ec0d, 0x0003ed0d,
+ 0x0003ee0d, 0x0003ef0d, 0x0003f00d, 0x0003f10d,
+ 0x0003f20d, 0x0003f30d, 0x0003f40d, 0x0003f50d,
+ 0x0003f60d, 0x0003f70d, 0x0003f80d, 0x0003f90d,
+ 0x0003fa0d, 0x0003fb0d, 0x0003fc0d, 0x0003fd0d,
+ 0x0003fe0d, 0x0003ff0d, 0x0000026f, 0x0000066f,
+ 0x00000a6f, 0x00000e6f, 0x0000126f, 0x0000166f,
+ 0x00001a6f, 0x00001e6f, 0x0000226f, 0x0000266f,
+ 0x00002a6f, 0x00002e6f, 0x0000326f, 0x0000366f,
+ 0x00003a6f, 0x00003e6f, 0x0000426f, 0x0000466f,
+ 0x00004a6f, 0x00004e6f, 0x0000526f, 0x0000566f,
+ 0x00005a6f, 0x00005e6f, 0x0000626f, 0x0000666f,
+ 0x00006a6f, 0x00006e6f, 0x0000726f, 0x0000766f,
+ 0x00007a6f, 0x00007e6f, 0x0000826f, 0x0000866f,
+ 0x00008a6f, 0x00008e6f, 0x0000926f, 0x0000966f,
+ 0x00009a6f, 0x00009e6f, 0x0000a26f, 0x0000a66f,
+ 0x0000aa6f, 0x0000ae6f, 0x0000b26f, 0x0000b66f,
+ 0x0000ba6f, 0x0000be6f, 0x0000c26f, 0x0000c66f,
+ 0x0000ca6f, 0x0000ce6f, 0x0000d26f, 0x0000d66f,
+ 0x0000da6f, 0x0000de6f, 0x0000e26f, 0x0000e66f,
+ 0x0000ea6f, 0x0000ee6f, 0x0000f26f, 0x0000f66f,
+ 0x0000fa6f, 0x0000fe6f, 0x0001026f, 0x0001066f,
+ 0x00010a6f, 0x00010e6f, 0x0001126f, 0x0001166f,
+ 0x00011a6f, 0x00011e6f, 0x0001226f, 0x0001266f,
+ 0x00012a6f, 0x00012e6f, 0x0001326f, 0x0001366f,
+ 0x00013a6f, 0x00013e6f, 0x0001426f, 0x0001466f,
+ 0x00014a6f, 0x00014e6f, 0x0001526f, 0x0001566f,
+ 0x00015a6f, 0x00015e6f, 0x0001626f, 0x0001666f,
+ 0x00016a6f, 0x00016e6f, 0x0001726f, 0x0001766f,
+ 0x00017a6f, 0x00017e6f, 0x0001826f, 0x0001866f,
+ 0x00018a6f, 0x00018e6f, 0x0001926f, 0x0001966f,
+ 0x00019a6f, 0x00019e6f, 0x0001a26f, 0x0001a66f,
+ 0x0001aa6f, 0x0001ae6f, 0x0001b26f, 0x0001b66f,
+ 0x0001ba6f, 0x0001be6f, 0x0001c26f, 0x0001c66f,
+ 0x0001ca6f, 0x0001ce6f, 0x0001d26f, 0x0001d66f,
+ 0x0001da6f, 0x0001de6f, 0x0001e26f, 0x0001e66f,
+ 0x0001ea6f, 0x0001ee6f, 0x0001f26f, 0x0001f66f,
+ 0x0001fa6f, 0x0001fe6f, 0x0002026f, 0x0002066f,
+ 0x00020a6f, 0x00020e6f, 0x0002126f, 0x0002166f,
+ 0x00021a6f, 0x00021e6f, 0x0002226f, 0x0002266f,
+ 0x00022a6f, 0x00022e6f, 0x0002326f, 0x0002366f,
+ 0x00023a6f, 0x00023e6f, 0x0002426f, 0x0002466f,
+ 0x00024a6f, 0x00024e6f, 0x0002526f, 0x0002566f,
+ 0x00025a6f, 0x00025e6f, 0x0002626f, 0x0002666f,
+ 0x00026a6f, 0x00026e6f, 0x0002726f, 0x0002766f,
+ 0x00027a6f, 0x00027e6f, 0x0002826f, 0x0002866f,
+ 0x00028a6f, 0x00028e6f, 0x0002926f, 0x0002966f,
+ 0x00029a6f, 0x00029e6f, 0x0002a26f, 0x0002a66f,
+ 0x0002aa6f, 0x0002ae6f, 0x0002b26f, 0x0002b66f,
+ 0x0002ba6f, 0x0002be6f, 0x0002c26f, 0x0002c66f,
+ 0x0002ca6f, 0x0002ce6f, 0x0002d26f, 0x0002d66f,
+ 0x0002da6f, 0x0002de6f, 0x0002e26f, 0x0002e66f,
+ 0x0002ea6f, 0x0002ee6f, 0x0002f26f, 0x0002f66f,
+ 0x0002fa6f, 0x0002fe6f, 0x0003026f, 0x0003066f,
+ 0x00030a6f, 0x00030e6f, 0x0003126f, 0x0003166f,
+ 0x00031a6f, 0x00031e6f, 0x0003226f, 0x0003266f,
+ 0x00032a6f, 0x00032e6f, 0x0003326f, 0x0003366f,
+ 0x00033a6f, 0x00033e6f, 0x0003426f, 0x0003466f,
+ 0x00034a6f, 0x00034e6f, 0x0003526f, 0x0003566f,
+ 0x00035a6f, 0x00035e6f, 0x0003626f, 0x0003666f,
+ 0x00036a6f, 0x00036e6f, 0x0003726f, 0x0003766f,
+ 0x00037a6f, 0x00037e6f, 0x0003826f, 0x0003866f,
+ 0x00038a6f, 0x00038e6f, 0x0003926f, 0x0003966f,
+ 0x00039a6f, 0x00039e6f, 0x0003a26f, 0x0003a66f,
+ 0x0003aa6f, 0x0003ae6f, 0x0003b26f, 0x0003b66f,
+ 0x0003ba6f, 0x0003be6f, 0x0003c26f, 0x0003c66f,
+ 0x0003ca6f, 0x0003ce6f, 0x0003d26f, 0x0003d66f,
+ 0x0003da6f, 0x0003de6f, 0x0003e26f, 0x0003e66f,
+ 0x0003ea6f, 0x0003ee6f, 0x0003f26f, 0x0003f66f,
+ 0x0003fa6f, 0x0003fe6f, 0x0004026f, 0x0004066f,
+ 0x00040a6f, 0x00040e6f, 0x0004126f, 0x0004166f,
+ 0x00041a6f, 0x00041e6f, 0x0004226f, 0x0004266f,
+ 0x00042a6f, 0x00042e6f, 0x0004326f, 0x0004366f,
+ 0x00043a6f, 0x00043e6f, 0x0004426f, 0x0004466f,
+ 0x00044a6f, 0x00044e6f, 0x0004526f, 0x0004566f,
+ 0x00045a6f, 0x00045e6f, 0x0004626f, 0x0004666f,
+ 0x00046a6f, 0x00046e6f, 0x0004726f, 0x0004766f,
+ 0x00047a6f, 0x00047e6f, 0x0004826f, 0x0004866f,
+ 0x00048a6f, 0x00048e6f, 0x0004926f, 0x0004966f,
+ 0x00049a6f, 0x00049e6f, 0x0004a26f, 0x0004a66f,
+ 0x0004aa6f, 0x0004ae6f, 0x0004b26f, 0x0004b66f,
+ 0x0004ba6f, 0x0004be6f, 0x0004c26f, 0x0004c66f,
+ 0x0004ca6f, 0x0004ce6f, 0x0004d26f, 0x0004d66f,
+ 0x0004da6f, 0x0004de6f, 0x0004e26f, 0x0004e66f,
+ 0x0004ea6f, 0x0004ee6f, 0x0004f26f, 0x0004f66f,
+ 0x0004fa6f, 0x0004fe6f, 0x0005026f, 0x0005066f,
+ 0x00050a6f, 0x00050e6f, 0x0005126f, 0x0005166f,
+ 0x00051a6f, 0x00051e6f, 0x0005226f, 0x0005266f,
+ 0x00052a6f, 0x00052e6f, 0x0005326f, 0x0005366f,
+ 0x00053a6f, 0x00053e6f, 0x0005426f, 0x0005466f,
+ 0x00054a6f, 0x00054e6f, 0x0005526f, 0x0005566f,
+ 0x00055a6f, 0x00055e6f, 0x0005626f, 0x0005666f,
+ 0x00056a6f, 0x00056e6f, 0x0005726f, 0x0005766f,
+ 0x00057a6f, 0x00057e6f, 0x0005826f, 0x0005866f,
+ 0x00058a6f, 0x00058e6f, 0x0005926f, 0x0005966f,
+ 0x00059a6f, 0x00059e6f, 0x0005a26f, 0x0005a66f,
+ 0x0005aa6f, 0x0005ae6f, 0x0005b26f, 0x0005b66f,
+ 0x0005ba6f, 0x0005be6f, 0x0005c26f, 0x0005c66f,
+ 0x0005ca6f, 0x0005ce6f, 0x0005d26f, 0x0005d66f,
+ 0x0005da6f, 0x0005de6f, 0x0005e26f, 0x0005e66f,
+ 0x0005ea6f, 0x0005ee6f, 0x0005f26f, 0x0005f66f,
+ 0x0005fa6f, 0x0005fe6f, 0x0006026f, 0x0006066f,
+ 0x00060a6f, 0x00060e6f, 0x0006126f, 0x0006166f,
+ 0x00061a6f, 0x00061e6f, 0x0006226f, 0x0006266f,
+ 0x00062a6f, 0x00062e6f, 0x0006326f, 0x0006366f,
+ 0x00063a6f, 0x00063e6f, 0x0006426f, 0x0006466f,
+ 0x00064a6f, 0x00064e6f, 0x0006526f, 0x0006566f,
+ 0x00065a6f, 0x00065e6f, 0x0006626f, 0x0006666f,
+ 0x00066a6f, 0x00066e6f, 0x0006726f, 0x0006766f,
+ 0x00067a6f, 0x00067e6f, 0x0006826f, 0x0006866f,
+ 0x00068a6f, 0x00068e6f, 0x0006926f, 0x0006966f,
+ 0x00069a6f, 0x00069e6f, 0x0006a26f, 0x0006a66f,
+ 0x0006aa6f, 0x0006ae6f, 0x0006b26f, 0x0006b66f,
+ 0x0006ba6f, 0x0006be6f, 0x0006c26f, 0x0006c66f,
+ 0x0006ca6f, 0x0006ce6f, 0x0006d26f, 0x0006d66f,
+ 0x0006da6f, 0x0006de6f, 0x0006e26f, 0x0006e66f,
+ 0x0006ea6f, 0x0006ee6f, 0x0006f26f, 0x0006f66f,
+ 0x0006fa6f, 0x0006fe6f, 0x0007026f, 0x0007066f,
+ 0x00070a6f, 0x00070e6f, 0x0007126f, 0x0007166f,
+ 0x00071a6f, 0x00071e6f, 0x0007226f, 0x0007266f,
+ 0x00072a6f, 0x00072e6f, 0x0007326f, 0x0007366f,
+ 0x00073a6f, 0x00073e6f, 0x0007426f, 0x0007466f,
+ 0x00074a6f, 0x00074e6f, 0x0007526f, 0x0007566f,
+ 0x00075a6f, 0x00075e6f, 0x0007626f, 0x0007666f,
+ 0x00076a6f, 0x00076e6f, 0x0007726f, 0x0007766f,
+ 0x00077a6f, 0x00077e6f, 0x0007826f, 0x0007866f,
+ 0x00078a6f, 0x00078e6f, 0x0007926f, 0x0007966f,
+ 0x00079a6f, 0x00079e6f, 0x0007a26f, 0x0007a66f,
+ 0x0007aa6f, 0x0007ae6f, 0x0007b26f, 0x0007b66f,
+ 0x0007ba6f, 0x0007be6f, 0x0007c26f, 0x0007c66f,
+ 0x0007ca6f, 0x0007ce6f, 0x0007d26f, 0x0007d66f,
+ 0x0007da6f, 0x0007de6f, 0x0007e26f, 0x0007e66f,
+ 0x0007ea6f, 0x0007ee6f, 0x0007f26f, 0x0007f66f,
+ 0x0007fa6f, 0x0007fe6f, 0x0008026f, 0x0008066f,
+ 0x00080a6f, 0x00080e6f, 0x0008126f, 0x0008166f,
+ 0x00081a6f, 0x00081e6f, 0x0008226f, 0x0008266f,
+ 0x00082a6f, 0x00082e6f, 0x0008326f, 0x0008366f,
+ 0x00083a6f, 0x00083e6f, 0x0008426f, 0x0008466f,
+ 0x00084a6f, 0x00084e6f, 0x0008526f, 0x0008566f,
+ 0x00085a6f, 0x00085e6f, 0x0008626f, 0x0008666f,
+ 0x00086a6f, 0x00086e6f, 0x0008726f, 0x0008766f,
+ 0x00087a6f, 0x00087e6f, 0x0008826f, 0x0008866f,
+ 0x00088a6f, 0x00088e6f, 0x0008926f, 0x0008966f,
+ 0x00089a6f, 0x00089e6f, 0x0008a26f, 0x0008a66f,
+ 0x0008aa6f, 0x0008ae6f, 0x0008b26f, 0x0008b66f,
+ 0x0008ba6f, 0x0008be6f, 0x0008c26f, 0x0008c66f,
+ 0x0008ca6f, 0x0008ce6f, 0x0008d26f, 0x0008d66f,
+ 0x0008da6f, 0x0008de6f, 0x0008e26f, 0x0008e66f,
+ 0x0008ea6f, 0x0008ee6f, 0x0008f26f, 0x0008f66f,
+ 0x0008fa6f, 0x0008fe6f, 0x0009026f, 0x0009066f,
+ 0x00090a6f, 0x00090e6f, 0x0009126f, 0x0009166f,
+ 0x00091a6f, 0x00091e6f, 0x0009226f, 0x0009266f,
+ 0x00092a6f, 0x00092e6f, 0x0009326f, 0x0009366f,
+ 0x00093a6f, 0x00093e6f, 0x0009426f, 0x0009466f,
+ 0x00094a6f, 0x00094e6f, 0x0009526f, 0x0009566f,
+ 0x00095a6f, 0x00095e6f, 0x0009626f, 0x0009666f,
+ 0x00096a6f, 0x00096e6f, 0x0009726f, 0x0009766f,
+ 0x00097a6f, 0x00097e6f, 0x0009826f, 0x0009866f,
+ 0x00098a6f, 0x00098e6f, 0x0009926f, 0x0009966f,
+ 0x00099a6f, 0x00099e6f, 0x0009a26f, 0x0009a66f,
+ 0x0009aa6f, 0x0009ae6f, 0x0009b26f, 0x0009b66f,
+ 0x0009ba6f, 0x0009be6f, 0x0009c26f, 0x0009c66f,
+ 0x0009ca6f, 0x0009ce6f, 0x0009d26f, 0x0009d66f,
+ 0x0009da6f, 0x0009de6f, 0x0009e26f, 0x0009e66f,
+ 0x0009ea6f, 0x0009ee6f, 0x0009f26f, 0x0009f66f,
+ 0x0009fa6f, 0x0009fe6f, 0x000a026f, 0x000a066f,
+ 0x000a0a6f, 0x000a0e6f, 0x000a126f, 0x000a166f,
+ 0x000a1a6f, 0x000a1e6f, 0x000a226f, 0x000a266f,
+ 0x000a2a6f, 0x000a2e6f, 0x000a326f, 0x000a366f,
+ 0x000a3a6f, 0x000a3e6f, 0x000a426f, 0x000a466f,
+ 0x000a4a6f, 0x000a4e6f, 0x000a526f, 0x000a566f,
+ 0x000a5a6f, 0x000a5e6f, 0x000a626f, 0x000a666f,
+ 0x000a6a6f, 0x000a6e6f, 0x000a726f, 0x000a766f,
+ 0x000a7a6f, 0x000a7e6f, 0x000a826f, 0x000a866f,
+ 0x000a8a6f, 0x000a8e6f, 0x000a926f, 0x000a966f,
+ 0x000a9a6f, 0x000a9e6f, 0x000aa26f, 0x000aa66f,
+ 0x000aaa6f, 0x000aae6f, 0x000ab26f, 0x000ab66f,
+ 0x000aba6f, 0x000abe6f, 0x000ac26f, 0x000ac66f,
+ 0x000aca6f, 0x000ace6f, 0x000ad26f, 0x000ad66f,
+ 0x000ada6f, 0x000ade6f, 0x000ae26f, 0x000ae66f,
+ 0x000aea6f, 0x000aee6f, 0x000af26f, 0x000af66f,
+ 0x000afa6f, 0x000afe6f, 0x000b026f, 0x000b066f,
+ 0x000b0a6f, 0x000b0e6f, 0x000b126f, 0x000b166f,
+ 0x000b1a6f, 0x000b1e6f, 0x000b226f, 0x000b266f,
+ 0x000b2a6f, 0x000b2e6f, 0x000b326f, 0x000b366f,
+ 0x000b3a6f, 0x000b3e6f, 0x000b426f, 0x000b466f,
+ 0x000b4a6f, 0x000b4e6f, 0x000b526f, 0x000b566f,
+ 0x000b5a6f, 0x000b5e6f, 0x000b626f, 0x000b666f,
+ 0x000b6a6f, 0x000b6e6f, 0x000b726f, 0x000b766f,
+ 0x000b7a6f, 0x000b7e6f, 0x000b826f, 0x000b866f,
+ 0x000b8a6f, 0x000b8e6f, 0x000b926f, 0x000b966f,
+ 0x000b9a6f, 0x000b9e6f, 0x000ba26f, 0x000ba66f,
+ 0x000baa6f, 0x000bae6f, 0x000bb26f, 0x000bb66f,
+ 0x000bba6f, 0x000bbe6f, 0x000bc26f, 0x000bc66f,
+ 0x000bca6f, 0x000bce6f, 0x000bd26f, 0x000bd66f,
+ 0x000bda6f, 0x000bde6f, 0x000be26f, 0x000be66f,
+ 0x000bea6f, 0x000bee6f, 0x000bf26f, 0x000bf66f,
+ 0x000bfa6f, 0x000bfe6f, 0x000c026f, 0x000c066f,
+ 0x000c0a6f, 0x000c0e6f, 0x000c126f, 0x000c166f,
+ 0x000c1a6f, 0x000c1e6f, 0x000c226f, 0x000c266f,
+ 0x000c2a6f, 0x000c2e6f, 0x000c326f, 0x000c366f,
+ 0x000c3a6f, 0x000c3e6f, 0x000c426f, 0x000c466f,
+ 0x000c4a6f, 0x000c4e6f, 0x000c526f, 0x000c566f,
+ 0x000c5a6f, 0x000c5e6f, 0x000c626f, 0x000c666f,
+ 0x000c6a6f, 0x000c6e6f, 0x000c726f, 0x000c766f,
+ 0x000c7a6f, 0x000c7e6f, 0x000c826f, 0x000c866f,
+ 0x000c8a6f, 0x000c8e6f, 0x000c926f, 0x000c966f,
+ 0x000c9a6f, 0x000c9e6f, 0x000ca26f, 0x000ca66f,
+ 0x000caa6f, 0x000cae6f, 0x000cb26f, 0x000cb66f,
+ 0x000cba6f, 0x000cbe6f, 0x000cc26f, 0x000cc66f,
+ 0x000cca6f, 0x000cce6f, 0x000cd26f, 0x000cd66f,
+ 0x000cda6f, 0x000cde6f, 0x000ce26f, 0x000ce66f,
+ 0x000cea6f, 0x000cee6f, 0x000cf26f, 0x000cf66f,
+ 0x000cfa6f, 0x000cfe6f, 0x000d026f, 0x000d066f,
+ 0x000d0a6f, 0x000d0e6f, 0x000d126f, 0x000d166f,
+ 0x000d1a6f, 0x000d1e6f, 0x000d226f, 0x000d266f,
+ 0x000d2a6f, 0x000d2e6f, 0x000d326f, 0x000d366f,
+ 0x000d3a6f, 0x000d3e6f, 0x000d426f, 0x000d466f,
+ 0x000d4a6f, 0x000d4e6f, 0x000d526f, 0x000d566f,
+ 0x000d5a6f, 0x000d5e6f, 0x000d626f, 0x000d666f,
+ 0x000d6a6f, 0x000d6e6f, 0x000d726f, 0x000d766f,
+ 0x000d7a6f, 0x000d7e6f, 0x000d826f, 0x000d866f,
+ 0x000d8a6f, 0x000d8e6f, 0x000d926f, 0x000d966f,
+ 0x000d9a6f, 0x000d9e6f, 0x000da26f, 0x000da66f,
+ 0x000daa6f, 0x000dae6f, 0x000db26f, 0x000db66f,
+ 0x000dba6f, 0x000dbe6f, 0x000dc26f, 0x000dc66f,
+ 0x000dca6f, 0x000dce6f, 0x000dd26f, 0x000dd66f,
+ 0x000dda6f, 0x000dde6f, 0x000de26f, 0x000de66f,
+ 0x000dea6f, 0x000dee6f, 0x000df26f, 0x000df66f,
+ 0x000dfa6f, 0x000dfe6f, 0x000e026f, 0x000e066f,
+ 0x000e0a6f, 0x000e0e6f, 0x000e126f, 0x000e166f,
+ 0x000e1a6f, 0x000e1e6f, 0x000e226f, 0x000e266f,
+ 0x000e2a6f, 0x000e2e6f, 0x000e326f, 0x000e366f,
+ 0x000e3a6f, 0x000e3e6f, 0x000e426f, 0x000e466f,
+ 0x000e4a6f, 0x000e4e6f, 0x000e526f, 0x000e566f,
+ 0x000e5a6f, 0x000e5e6f, 0x000e626f, 0x000e666f,
+ 0x000e6a6f, 0x000e6e6f, 0x000e726f, 0x000e766f,
+ 0x000e7a6f, 0x000e7e6f, 0x000e826f, 0x000e866f,
+ 0x000e8a6f, 0x000e8e6f, 0x000e926f, 0x000e966f,
+ 0x000e9a6f, 0x000e9e6f, 0x000ea26f, 0x000ea66f,
+ 0x000eaa6f, 0x000eae6f, 0x000eb26f, 0x000eb66f,
+ 0x000eba6f, 0x000ebe6f, 0x000ec26f, 0x000ec66f,
+ 0x000eca6f, 0x000ece6f, 0x000ed26f, 0x000ed66f,
+ 0x000eda6f, 0x000ede6f, 0x000ee26f, 0x000ee66f,
+ 0x000eea6f, 0x000eee6f, 0x000ef26f, 0x000ef66f,
+ 0x000efa6f, 0x000efe6f, 0x000f026f, 0x000f066f,
+ 0x000f0a6f, 0x000f0e6f, 0x000f126f, 0x000f166f,
+ 0x000f1a6f, 0x000f1e6f, 0x000f226f, 0x000f266f,
+ 0x000f2a6f, 0x000f2e6f, 0x000f326f, 0x000f366f,
+ 0x000f3a6f, 0x000f3e6f, 0x000f426f, 0x000f466f,
+ 0x000f4a6f, 0x000f4e6f, 0x000f526f, 0x000f566f,
+ 0x000f5a6f, 0x000f5e6f, 0x000f626f, 0x000f666f,
+ 0x000f6a6f, 0x000f6e6f, 0x000f726f, 0x000f766f,
+ 0x000f7a6f, 0x000f7e6f, 0x000f826f, 0x000f866f,
+ 0x000f8a6f, 0x000f8e6f, 0x000f926f, 0x000f966f,
+ 0x000f9a6f, 0x000f9e6f, 0x000fa26f, 0x000fa66f,
+ 0x000faa6f, 0x000fae6f, 0x000fb26f, 0x000fb66f,
+ 0x000fba6f, 0x000fbe6f, 0x000fc26f, 0x000fc66f,
+ 0x000fca6f, 0x000fce6f, 0x000fd26f, 0x000fd66f,
+ 0x000fda6f, 0x000fde6f, 0x000fe26f, 0x000fe66f,
+ 0x000fea6f, 0x000fee6f, 0x000ff26f, 0x000ff66f,
+ 0x000ffa6f, 0x000ffe6f, 0x000001cf, 0x000003cf,
+ 0x000005cf, 0x000007cf, 0x000009cf, 0x00000bcf,
+ 0x00000dcf, 0x00000fcf, 0x000011cf, 0x000013cf,
+ 0x000015cf, 0x000017cf, 0x000019cf, 0x00001bcf,
+ 0x00001dcf, 0x00001fcf, 0x000021cf, 0x000023cf,
+ 0x000025cf, 0x000027cf, 0x000029cf, 0x00002bcf,
+ 0x00002dcf, 0x00002fcf, 0x000031cf, 0x000033cf,
+ 0x000035cf, 0x000037cf, 0x000039cf, 0x00003bcf,
+ 0x00003dcf, 0x00003fcf, 0x000041cf, 0x000043cf,
+ 0x000045cf, 0x000047cf, 0x000049cf, 0x00004bcf,
+ 0x00004dcf, 0x00004fcf, 0x000051cf, 0x000053cf,
+ 0x000055cf, 0x000057cf, 0x000059cf, 0x00005bcf,
+ 0x00005dcf, 0x00005fcf, 0x000061cf, 0x000063cf,
+ 0x000065cf, 0x000067cf, 0x000069cf, 0x00006bcf,
+ 0x00006dcf, 0x00006fcf, 0x000071cf, 0x000073cf,
+ 0x000075cf, 0x000077cf, 0x000079cf, 0x00007bcf,
+ 0x00007dcf, 0x00007fcf, 0x000081cf, 0x000083cf,
+ 0x000085cf, 0x000087cf, 0x000089cf, 0x00008bcf,
+ 0x00008dcf, 0x00008fcf, 0x000091cf, 0x000093cf,
+ 0x000095cf, 0x000097cf, 0x000099cf, 0x00009bcf,
+ 0x00009dcf, 0x00009fcf, 0x0000a1cf, 0x0000a3cf,
+ 0x0000a5cf, 0x0000a7cf, 0x0000a9cf, 0x0000abcf,
+ 0x0000adcf, 0x0000afcf, 0x0000b1cf, 0x0000b3cf,
+ 0x0000b5cf, 0x0000b7cf, 0x0000b9cf, 0x0000bbcf,
+ 0x0000bdcf, 0x0000bfcf, 0x0000c1cf, 0x0000c3cf,
+ 0x0000c5cf, 0x0000c7cf, 0x0000c9cf, 0x0000cbcf,
+ 0x0000cdcf, 0x0000cfcf, 0x0000d1cf, 0x0000d3cf,
+ 0x0000d5cf, 0x0000d7cf, 0x0000d9cf, 0x0000dbcf,
+ 0x0000ddcf, 0x0000dfcf, 0x0000e1cf, 0x0000e3cf,
+ 0x0000e5cf, 0x0000e7cf, 0x0000e9cf, 0x0000ebcf,
+ 0x0000edcf, 0x0000efcf, 0x0000f1cf, 0x0000f3cf,
+ 0x0000f5cf, 0x0000f7cf, 0x0000f9cf, 0x0000fbcf,
+ 0x0000fdcf, 0x0000ffcf, 0x000101cf, 0x000103cf,
+ 0x000105cf, 0x000107cf, 0x000109cf, 0x00010bcf,
+ 0x00010dcf, 0x00010fcf, 0x000111cf, 0x000113cf,
+ 0x000115cf, 0x000117cf, 0x000119cf, 0x00011bcf,
+ 0x00011dcf, 0x00011fcf, 0x000121cf, 0x000123cf,
+ 0x000125cf, 0x000127cf, 0x000129cf, 0x00012bcf,
+ 0x00012dcf, 0x00012fcf, 0x000131cf, 0x000133cf,
+ 0x000135cf, 0x000137cf, 0x000139cf, 0x00013bcf,
+ 0x00013dcf, 0x00013fcf, 0x000141cf, 0x000143cf,
+ 0x000145cf, 0x000147cf, 0x000149cf, 0x00014bcf,
+ 0x00014dcf, 0x00014fcf, 0x000151cf, 0x000153cf,
+ 0x000155cf, 0x000157cf, 0x000159cf, 0x00015bcf,
+ 0x00015dcf, 0x00015fcf, 0x000161cf, 0x000163cf,
+ 0x000165cf, 0x000167cf, 0x000169cf, 0x00016bcf,
+ 0x00016dcf, 0x00016fcf, 0x000171cf, 0x000173cf,
+ 0x000175cf, 0x000177cf, 0x000179cf, 0x00017bcf,
+ 0x00017dcf, 0x00017fcf, 0x000181cf, 0x000183cf,
+ 0x000185cf, 0x000187cf, 0x000189cf, 0x00018bcf,
+ 0x00018dcf, 0x00018fcf, 0x000191cf, 0x000193cf,
+ 0x000195cf, 0x000197cf, 0x000199cf, 0x00019bcf,
+ 0x00019dcf, 0x00019fcf, 0x0001a1cf, 0x0001a3cf,
+ 0x0001a5cf, 0x0001a7cf, 0x0001a9cf, 0x0001abcf,
+ 0x0001adcf, 0x0001afcf, 0x0001b1cf, 0x0001b3cf,
+ 0x0001b5cf, 0x0001b7cf, 0x0001b9cf, 0x0001bbcf,
+ 0x0001bdcf, 0x0001bfcf, 0x0001c1cf, 0x0001c3cf,
+ 0x0001c5cf, 0x0001c7cf, 0x0001c9cf, 0x0001cbcf,
+ 0x0001cdcf, 0x0001cfcf, 0x0001d1cf, 0x0001d3cf,
+ 0x0001d5cf, 0x0001d7cf, 0x0001d9cf, 0x0001dbcf,
+ 0x0001ddcf, 0x0001dfcf, 0x0001e1cf, 0x0001e3cf,
+ 0x0001e5cf, 0x0001e7cf, 0x0001e9cf, 0x0001ebcf,
+ 0x0001edcf, 0x0001efcf, 0x0001f1cf, 0x0001f3cf,
+ 0x0001f5cf, 0x0001f7cf, 0x0001f9cf, 0x0001fbcf,
+ 0x0001fdcf, 0x0001ffcf, 0x000201cf, 0x000203cf,
+ 0x000205cf, 0x000207cf, 0x000209cf, 0x00020bcf,
+ 0x00020dcf, 0x00020fcf, 0x000211cf, 0x000213cf,
+ 0x000215cf, 0x000217cf, 0x000219cf, 0x00021bcf,
+ 0x00021dcf, 0x00021fcf, 0x000221cf, 0x000223cf,
+ 0x000225cf, 0x000227cf, 0x000229cf, 0x00022bcf,
+ 0x00022dcf, 0x00022fcf, 0x000231cf, 0x000233cf,
+ 0x000235cf, 0x000237cf, 0x000239cf, 0x00023bcf,
+ 0x00023dcf, 0x00023fcf, 0x000241cf, 0x000243cf,
+ 0x000245cf, 0x000247cf, 0x000249cf, 0x00024bcf,
+ 0x00024dcf, 0x00024fcf, 0x000251cf, 0x000253cf,
+ 0x000255cf, 0x000257cf, 0x000259cf, 0x00025bcf,
+ 0x00025dcf, 0x00025fcf, 0x000261cf, 0x000263cf,
+ 0x000265cf, 0x000267cf, 0x000269cf, 0x00026bcf,
+ 0x00026dcf, 0x00026fcf, 0x000271cf, 0x000273cf,
+ 0x000275cf, 0x000277cf, 0x000279cf, 0x00027bcf,
+ 0x00027dcf, 0x00027fcf, 0x000281cf, 0x000283cf,
+ 0x000285cf, 0x000287cf, 0x000289cf, 0x00028bcf,
+ 0x00028dcf, 0x00028fcf, 0x000291cf, 0x000293cf,
+ 0x000295cf, 0x000297cf, 0x000299cf, 0x00029bcf,
+ 0x00029dcf, 0x00029fcf, 0x0002a1cf, 0x0002a3cf,
+ 0x0002a5cf, 0x0002a7cf, 0x0002a9cf, 0x0002abcf,
+ 0x0002adcf, 0x0002afcf, 0x0002b1cf, 0x0002b3cf,
+ 0x0002b5cf, 0x0002b7cf, 0x0002b9cf, 0x0002bbcf,
+ 0x0002bdcf, 0x0002bfcf, 0x0002c1cf, 0x0002c3cf,
+ 0x0002c5cf, 0x0002c7cf, 0x0002c9cf, 0x0002cbcf,
+ 0x0002cdcf, 0x0002cfcf, 0x0002d1cf, 0x0002d3cf,
+ 0x0002d5cf, 0x0002d7cf, 0x0002d9cf, 0x0002dbcf,
+ 0x0002ddcf, 0x0002dfcf, 0x0002e1cf, 0x0002e3cf,
+ 0x0002e5cf, 0x0002e7cf, 0x0002e9cf, 0x0002ebcf,
+ 0x0002edcf, 0x0002efcf, 0x0002f1cf, 0x0002f3cf,
+ 0x0002f5cf, 0x0002f7cf, 0x0002f9cf, 0x0002fbcf,
+ 0x0002fdcf, 0x0002ffcf, 0x000301cf, 0x000303cf,
+ 0x000305cf, 0x000307cf, 0x000309cf, 0x00030bcf,
+ 0x00030dcf, 0x00030fcf, 0x000311cf, 0x000313cf,
+ 0x000315cf, 0x000317cf, 0x000319cf, 0x00031bcf,
+ 0x00031dcf, 0x00031fcf, 0x000321cf, 0x000323cf,
+ 0x000325cf, 0x000327cf, 0x000329cf, 0x00032bcf,
+ 0x00032dcf, 0x00032fcf, 0x000331cf, 0x000333cf,
+ 0x000335cf, 0x000337cf, 0x000339cf, 0x00033bcf,
+ 0x00033dcf, 0x00033fcf, 0x000341cf, 0x000343cf,
+ 0x000345cf, 0x000347cf, 0x000349cf, 0x00034bcf,
+ 0x00034dcf, 0x00034fcf, 0x000351cf, 0x000353cf,
+ 0x000355cf, 0x000357cf, 0x000359cf, 0x00035bcf,
+ 0x00035dcf, 0x00035fcf, 0x000361cf, 0x000363cf,
+ 0x000365cf, 0x000367cf, 0x000369cf, 0x00036bcf,
+ 0x00036dcf, 0x00036fcf, 0x000371cf, 0x000373cf,
+ 0x000375cf, 0x000377cf, 0x000379cf, 0x00037bcf,
+ 0x00037dcf, 0x00037fcf, 0x000381cf, 0x000383cf,
+ 0x000385cf, 0x000387cf, 0x000389cf, 0x00038bcf,
+ 0x00038dcf, 0x00038fcf, 0x000391cf, 0x000393cf,
+ 0x000395cf, 0x000397cf, 0x000399cf, 0x00039bcf,
+ 0x00039dcf, 0x00039fcf, 0x0003a1cf, 0x0003a3cf,
+ 0x0003a5cf, 0x0003a7cf, 0x0003a9cf, 0x0003abcf,
+ 0x0003adcf, 0x0003afcf, 0x0003b1cf, 0x0003b3cf,
+ 0x0003b5cf, 0x0003b7cf, 0x0003b9cf, 0x0003bbcf,
+ 0x0003bdcf, 0x0003bfcf, 0x0003c1cf, 0x0003c3cf,
+ 0x0003c5cf, 0x0003c7cf, 0x0003c9cf, 0x0003cbcf,
+ 0x0003cdcf, 0x0003cfcf, 0x0003d1cf, 0x0003d3cf,
+ 0x0003d5cf, 0x0003d7cf, 0x0003d9cf, 0x0003dbcf,
+ 0x0003ddcf, 0x0003dfcf, 0x0003e1cf, 0x0003e3cf,
+ 0x0003e5cf, 0x0003e7cf, 0x0003e9cf, 0x0003ebcf,
+ 0x0003edcf, 0x0003efcf, 0x0003f1cf, 0x0003f3cf,
+ 0x0003f5cf, 0x0003f7cf, 0x0003f9cf, 0x0003fbcf,
+ 0x0003fdcf, 0x0003ffcf, 0x000401cf, 0x000403cf,
+ 0x000405cf, 0x000407cf, 0x000409cf, 0x00040bcf,
+ 0x00040dcf, 0x00040fcf, 0x000411cf, 0x000413cf,
+ 0x000415cf, 0x000417cf, 0x000419cf, 0x00041bcf,
+ 0x00041dcf, 0x00041fcf, 0x000421cf, 0x000423cf,
+ 0x000425cf, 0x000427cf, 0x000429cf, 0x00042bcf,
+ 0x00042dcf, 0x00042fcf, 0x000431cf, 0x000433cf,
+ 0x000435cf, 0x000437cf, 0x000439cf, 0x00043bcf,
+ 0x00043dcf, 0x00043fcf, 0x000441cf, 0x000443cf,
+ 0x000445cf, 0x000447cf, 0x000449cf, 0x00044bcf,
+ 0x00044dcf, 0x00044fcf, 0x000451cf, 0x000453cf,
+ 0x000455cf, 0x000457cf, 0x000459cf, 0x00045bcf,
+ 0x00045dcf, 0x00045fcf, 0x000461cf, 0x000463cf,
+ 0x000465cf, 0x000467cf, 0x000469cf, 0x00046bcf,
+ 0x00046dcf, 0x00046fcf, 0x000471cf, 0x000473cf,
+ 0x000475cf, 0x000477cf, 0x000479cf, 0x00047bcf,
+ 0x00047dcf, 0x00047fcf, 0x000481cf, 0x000483cf,
+ 0x000485cf, 0x000487cf, 0x000489cf, 0x00048bcf,
+ 0x00048dcf, 0x00048fcf, 0x000491cf, 0x000493cf,
+ 0x000495cf, 0x000497cf, 0x000499cf, 0x00049bcf,
+ 0x00049dcf, 0x00049fcf, 0x0004a1cf, 0x0004a3cf,
+ 0x0004a5cf, 0x0004a7cf, 0x0004a9cf, 0x0004abcf,
+ 0x0004adcf, 0x0004afcf, 0x0004b1cf, 0x0004b3cf,
+ 0x0004b5cf, 0x0004b7cf, 0x0004b9cf, 0x0004bbcf,
+ 0x0004bdcf, 0x0004bfcf, 0x0004c1cf, 0x0004c3cf,
+ 0x0004c5cf, 0x0004c7cf, 0x0004c9cf, 0x0004cbcf,
+ 0x0004cdcf, 0x0004cfcf, 0x0004d1cf, 0x0004d3cf,
+ 0x0004d5cf, 0x0004d7cf, 0x0004d9cf, 0x0004dbcf,
+ 0x0004ddcf, 0x0004dfcf, 0x0004e1cf, 0x0004e3cf,
+ 0x0004e5cf, 0x0004e7cf, 0x0004e9cf, 0x0004ebcf,
+ 0x0004edcf, 0x0004efcf, 0x0004f1cf, 0x0004f3cf,
+ 0x0004f5cf, 0x0004f7cf, 0x0004f9cf, 0x0004fbcf,
+ 0x0004fdcf, 0x0004ffcf, 0x000501cf, 0x000503cf,
+ 0x000505cf, 0x000507cf, 0x000509cf, 0x00050bcf,
+ 0x00050dcf, 0x00050fcf, 0x000511cf, 0x000513cf,
+ 0x000515cf, 0x000517cf, 0x000519cf, 0x00051bcf,
+ 0x00051dcf, 0x00051fcf, 0x000521cf, 0x000523cf,
+ 0x000525cf, 0x000527cf, 0x000529cf, 0x00052bcf,
+ 0x00052dcf, 0x00052fcf, 0x000531cf, 0x000533cf,
+ 0x000535cf, 0x000537cf, 0x000539cf, 0x00053bcf,
+ 0x00053dcf, 0x00053fcf, 0x000541cf, 0x000543cf,
+ 0x000545cf, 0x000547cf, 0x000549cf, 0x00054bcf,
+ 0x00054dcf, 0x00054fcf, 0x000551cf, 0x000553cf,
+ 0x000555cf, 0x000557cf, 0x000559cf, 0x00055bcf,
+ 0x00055dcf, 0x00055fcf, 0x000561cf, 0x000563cf,
+ 0x000565cf, 0x000567cf, 0x000569cf, 0x00056bcf,
+ 0x00056dcf, 0x00056fcf, 0x000571cf, 0x000573cf,
+ 0x000575cf, 0x000577cf, 0x000579cf, 0x00057bcf,
+ 0x00057dcf, 0x00057fcf, 0x000581cf, 0x000583cf,
+ 0x000585cf, 0x000587cf, 0x000589cf, 0x00058bcf,
+ 0x00058dcf, 0x00058fcf, 0x000591cf, 0x000593cf,
+ 0x000595cf, 0x000597cf, 0x000599cf, 0x00059bcf,
+ 0x00059dcf, 0x00059fcf, 0x0005a1cf, 0x0005a3cf,
+ 0x0005a5cf, 0x0005a7cf, 0x0005a9cf, 0x0005abcf,
+ 0x0005adcf, 0x0005afcf, 0x0005b1cf, 0x0005b3cf,
+ 0x0005b5cf, 0x0005b7cf, 0x0005b9cf, 0x0005bbcf,
+ 0x0005bdcf, 0x0005bfcf, 0x0005c1cf, 0x0005c3cf,
+ 0x0005c5cf, 0x0005c7cf, 0x0005c9cf, 0x0005cbcf,
+ 0x0005cdcf, 0x0005cfcf, 0x0005d1cf, 0x0005d3cf,
+ 0x0005d5cf, 0x0005d7cf, 0x0005d9cf, 0x0005dbcf,
+ 0x0005ddcf, 0x0005dfcf, 0x0005e1cf, 0x0005e3cf,
+ 0x0005e5cf, 0x0005e7cf, 0x0005e9cf, 0x0005ebcf,
+ 0x0005edcf, 0x0005efcf, 0x0005f1cf, 0x0005f3cf,
+ 0x0005f5cf, 0x0005f7cf, 0x0005f9cf, 0x0005fbcf,
+ 0x0005fdcf, 0x0005ffcf, 0x000601cf, 0x000603cf,
+ 0x000605cf, 0x000607cf, 0x000609cf, 0x00060bcf,
+ 0x00060dcf, 0x00060fcf, 0x000611cf, 0x000613cf,
+ 0x000615cf, 0x000617cf, 0x000619cf, 0x00061bcf,
+ 0x00061dcf, 0x00061fcf, 0x000621cf, 0x000623cf,
+ 0x000625cf, 0x000627cf, 0x000629cf, 0x00062bcf,
+ 0x00062dcf, 0x00062fcf, 0x000631cf, 0x000633cf,
+ 0x000635cf, 0x000637cf, 0x000639cf, 0x00063bcf,
+ 0x00063dcf, 0x00063fcf, 0x000641cf, 0x000643cf,
+ 0x000645cf, 0x000647cf, 0x000649cf, 0x00064bcf,
+ 0x00064dcf, 0x00064fcf, 0x000651cf, 0x000653cf,
+ 0x000655cf, 0x000657cf, 0x000659cf, 0x00065bcf,
+ 0x00065dcf, 0x00065fcf, 0x000661cf, 0x000663cf,
+ 0x000665cf, 0x000667cf, 0x000669cf, 0x00066bcf,
+ 0x00066dcf, 0x00066fcf, 0x000671cf, 0x000673cf,
+ 0x000675cf, 0x000677cf, 0x000679cf, 0x00067bcf,
+ 0x00067dcf, 0x00067fcf, 0x000681cf, 0x000683cf,
+ 0x000685cf, 0x000687cf, 0x000689cf, 0x00068bcf,
+ 0x00068dcf, 0x00068fcf, 0x000691cf, 0x000693cf,
+ 0x000695cf, 0x000697cf, 0x000699cf, 0x00069bcf,
+ 0x00069dcf, 0x00069fcf, 0x0006a1cf, 0x0006a3cf,
+ 0x0006a5cf, 0x0006a7cf, 0x0006a9cf, 0x0006abcf,
+ 0x0006adcf, 0x0006afcf, 0x0006b1cf, 0x0006b3cf,
+ 0x0006b5cf, 0x0006b7cf, 0x0006b9cf, 0x0006bbcf,
+ 0x0006bdcf, 0x0006bfcf, 0x0006c1cf, 0x0006c3cf,
+ 0x0006c5cf, 0x0006c7cf, 0x0006c9cf, 0x0006cbcf,
+ 0x0006cdcf, 0x0006cfcf, 0x0006d1cf, 0x0006d3cf,
+ 0x0006d5cf, 0x0006d7cf, 0x0006d9cf, 0x0006dbcf,
+ 0x0006ddcf, 0x0006dfcf, 0x0006e1cf, 0x0006e3cf,
+ 0x0006e5cf, 0x0006e7cf, 0x0006e9cf, 0x0006ebcf,
+ 0x0006edcf, 0x0006efcf, 0x0006f1cf, 0x0006f3cf,
+ 0x0006f5cf, 0x0006f7cf, 0x0006f9cf, 0x0006fbcf,
+ 0x0006fdcf, 0x0006ffcf, 0x000701cf, 0x000703cf,
+ 0x000705cf, 0x000707cf, 0x000709cf, 0x00070bcf,
+ 0x00070dcf, 0x00070fcf, 0x000711cf, 0x000713cf,
+ 0x000715cf, 0x000717cf, 0x000719cf, 0x00071bcf,
+ 0x00071dcf, 0x00071fcf, 0x000721cf, 0x000723cf,
+ 0x000725cf, 0x000727cf, 0x000729cf, 0x00072bcf,
+ 0x00072dcf, 0x00072fcf, 0x000731cf, 0x000733cf,
+ 0x000735cf, 0x000737cf, 0x000739cf, 0x00073bcf,
+ 0x00073dcf, 0x00073fcf, 0x000741cf, 0x000743cf,
+ 0x000745cf, 0x000747cf, 0x000749cf, 0x00074bcf,
+ 0x00074dcf, 0x00074fcf, 0x000751cf, 0x000753cf,
+ 0x000755cf, 0x000757cf, 0x000759cf, 0x00075bcf,
+ 0x00075dcf, 0x00075fcf, 0x000761cf, 0x000763cf,
+ 0x000765cf, 0x000767cf, 0x000769cf, 0x00076bcf,
+ 0x00076dcf, 0x00076fcf, 0x000771cf, 0x000773cf,
+ 0x000775cf, 0x000777cf, 0x000779cf, 0x00077bcf,
+ 0x00077dcf, 0x00077fcf, 0x000781cf, 0x000783cf,
+ 0x000785cf, 0x000787cf, 0x000789cf, 0x00078bcf,
+ 0x00078dcf, 0x00078fcf, 0x000791cf, 0x000793cf,
+ 0x000795cf, 0x000797cf, 0x000799cf, 0x00079bcf,
+ 0x00079dcf, 0x00079fcf, 0x0007a1cf, 0x0007a3cf,
+ 0x0007a5cf, 0x0007a7cf, 0x0007a9cf, 0x0007abcf,
+ 0x0007adcf, 0x0007afcf, 0x0007b1cf, 0x0007b3cf,
+ 0x0007b5cf, 0x0007b7cf, 0x0007b9cf, 0x0007bbcf,
+ 0x0007bdcf, 0x0007bfcf, 0x0007c1cf, 0x0007c3cf,
+ 0x0007c5cf, 0x0007c7cf, 0x0007c9cf, 0x0007cbcf,
+ 0x0007cdcf, 0x0007cfcf, 0x0007d1cf, 0x0007d3cf,
+ 0x0007d5cf, 0x0007d7cf, 0x0007d9cf, 0x0007dbcf,
+ 0x0007ddcf, 0x0007dfcf, 0x0007e1cf, 0x0007e3cf,
+ 0x0007e5cf, 0x0007e7cf, 0x0007e9cf, 0x0007ebcf,
+ 0x0007edcf, 0x0007efcf, 0x0007f1cf, 0x0007f3cf,
+ 0x0007f5cf, 0x0007f7cf, 0x0007f9cf, 0x0007fbcf,
+ 0x0007fdcf, 0x0007ffcf, 0x000801cf, 0x000803cf,
+ 0x000805cf, 0x000807cf, 0x000809cf, 0x00080bcf,
+ 0x00080dcf, 0x00080fcf, 0x000811cf, 0x000813cf,
+ 0x000815cf, 0x000817cf, 0x000819cf, 0x00081bcf,
+ 0x00081dcf, 0x00081fcf, 0x000821cf, 0x000823cf,
+ 0x000825cf, 0x000827cf, 0x000829cf, 0x00082bcf,
+ 0x00082dcf, 0x00082fcf, 0x000831cf, 0x000833cf,
+ 0x000835cf, 0x000837cf, 0x000839cf, 0x00083bcf,
+ 0x00083dcf, 0x00083fcf, 0x000841cf, 0x000843cf,
+ 0x000845cf, 0x000847cf, 0x000849cf, 0x00084bcf,
+ 0x00084dcf, 0x00084fcf, 0x000851cf, 0x000853cf,
+ 0x000855cf, 0x000857cf, 0x000859cf, 0x00085bcf,
+ 0x00085dcf, 0x00085fcf, 0x000861cf, 0x000863cf,
+ 0x000865cf, 0x000867cf, 0x000869cf, 0x00086bcf,
+ 0x00086dcf, 0x00086fcf, 0x000871cf, 0x000873cf,
+ 0x000875cf, 0x000877cf, 0x000879cf, 0x00087bcf,
+ 0x00087dcf, 0x00087fcf, 0x000881cf, 0x000883cf,
+ 0x000885cf, 0x000887cf, 0x000889cf, 0x00088bcf,
+ 0x00088dcf, 0x00088fcf, 0x000891cf, 0x000893cf,
+ 0x000895cf, 0x000897cf, 0x000899cf, 0x00089bcf,
+ 0x00089dcf, 0x00089fcf, 0x0008a1cf, 0x0008a3cf,
+ 0x0008a5cf, 0x0008a7cf, 0x0008a9cf, 0x0008abcf,
+ 0x0008adcf, 0x0008afcf, 0x0008b1cf, 0x0008b3cf,
+ 0x0008b5cf, 0x0008b7cf, 0x0008b9cf, 0x0008bbcf,
+ 0x0008bdcf, 0x0008bfcf, 0x0008c1cf, 0x0008c3cf,
+ 0x0008c5cf, 0x0008c7cf, 0x0008c9cf, 0x0008cbcf,
+ 0x0008cdcf, 0x0008cfcf, 0x0008d1cf, 0x0008d3cf,
+ 0x0008d5cf, 0x0008d7cf, 0x0008d9cf, 0x0008dbcf,
+ 0x0008ddcf, 0x0008dfcf, 0x0008e1cf, 0x0008e3cf,
+ 0x0008e5cf, 0x0008e7cf, 0x0008e9cf, 0x0008ebcf,
+ 0x0008edcf, 0x0008efcf, 0x0008f1cf, 0x0008f3cf,
+ 0x0008f5cf, 0x0008f7cf, 0x0008f9cf, 0x0008fbcf,
+ 0x0008fdcf, 0x0008ffcf, 0x000901cf, 0x000903cf,
+ 0x000905cf, 0x000907cf, 0x000909cf, 0x00090bcf,
+ 0x00090dcf, 0x00090fcf, 0x000911cf, 0x000913cf,
+ 0x000915cf, 0x000917cf, 0x000919cf, 0x00091bcf,
+ 0x00091dcf, 0x00091fcf, 0x000921cf, 0x000923cf,
+ 0x000925cf, 0x000927cf, 0x000929cf, 0x00092bcf,
+ 0x00092dcf, 0x00092fcf, 0x000931cf, 0x000933cf,
+ 0x000935cf, 0x000937cf, 0x000939cf, 0x00093bcf,
+ 0x00093dcf, 0x00093fcf, 0x000941cf, 0x000943cf,
+ 0x000945cf, 0x000947cf, 0x000949cf, 0x00094bcf,
+ 0x00094dcf, 0x00094fcf, 0x000951cf, 0x000953cf,
+ 0x000955cf, 0x000957cf, 0x000959cf, 0x00095bcf,
+ 0x00095dcf, 0x00095fcf, 0x000961cf, 0x000963cf,
+ 0x000965cf, 0x000967cf, 0x000969cf, 0x00096bcf,
+ 0x00096dcf, 0x00096fcf, 0x000971cf, 0x000973cf,
+ 0x000975cf, 0x000977cf, 0x000979cf, 0x00097bcf,
+ 0x00097dcf, 0x00097fcf, 0x000981cf, 0x000983cf,
+ 0x000985cf, 0x000987cf, 0x000989cf, 0x00098bcf,
+ 0x00098dcf, 0x00098fcf, 0x000991cf, 0x000993cf,
+ 0x000995cf, 0x000997cf, 0x000999cf, 0x00099bcf,
+ 0x00099dcf, 0x00099fcf, 0x0009a1cf, 0x0009a3cf,
+ 0x0009a5cf, 0x0009a7cf, 0x0009a9cf, 0x0009abcf,
+ 0x0009adcf, 0x0009afcf, 0x0009b1cf, 0x0009b3cf,
+ 0x0009b5cf, 0x0009b7cf, 0x0009b9cf, 0x0009bbcf,
+ 0x0009bdcf, 0x0009bfcf, 0x0009c1cf, 0x0009c3cf,
+ 0x0009c5cf, 0x0009c7cf, 0x0009c9cf, 0x0009cbcf,
+ 0x0009cdcf, 0x0009cfcf, 0x0009d1cf, 0x0009d3cf,
+ 0x0009d5cf, 0x0009d7cf, 0x0009d9cf, 0x0009dbcf,
+ 0x0009ddcf, 0x0009dfcf, 0x0009e1cf, 0x0009e3cf,
+ 0x0009e5cf, 0x0009e7cf, 0x0009e9cf, 0x0009ebcf,
+ 0x0009edcf, 0x0009efcf, 0x0009f1cf, 0x0009f3cf,
+ 0x0009f5cf, 0x0009f7cf, 0x0009f9cf, 0x0009fbcf,
+ 0x0009fdcf, 0x0009ffcf, 0x000a01cf, 0x000a03cf,
+ 0x000a05cf, 0x000a07cf, 0x000a09cf, 0x000a0bcf,
+ 0x000a0dcf, 0x000a0fcf, 0x000a11cf, 0x000a13cf,
+ 0x000a15cf, 0x000a17cf, 0x000a19cf, 0x000a1bcf,
+ 0x000a1dcf, 0x000a1fcf, 0x000a21cf, 0x000a23cf,
+ 0x000a25cf, 0x000a27cf, 0x000a29cf, 0x000a2bcf,
+ 0x000a2dcf, 0x000a2fcf, 0x000a31cf, 0x000a33cf,
+ 0x000a35cf, 0x000a37cf, 0x000a39cf, 0x000a3bcf,
+ 0x000a3dcf, 0x000a3fcf, 0x000a41cf, 0x000a43cf,
+ 0x000a45cf, 0x000a47cf, 0x000a49cf, 0x000a4bcf,
+ 0x000a4dcf, 0x000a4fcf, 0x000a51cf, 0x000a53cf,
+ 0x000a55cf, 0x000a57cf, 0x000a59cf, 0x000a5bcf,
+ 0x000a5dcf, 0x000a5fcf, 0x000a61cf, 0x000a63cf,
+ 0x000a65cf, 0x000a67cf, 0x000a69cf, 0x000a6bcf,
+ 0x000a6dcf, 0x000a6fcf, 0x000a71cf, 0x000a73cf,
+ 0x000a75cf, 0x000a77cf, 0x000a79cf, 0x000a7bcf,
+ 0x000a7dcf, 0x000a7fcf, 0x000a81cf, 0x000a83cf,
+ 0x000a85cf, 0x000a87cf, 0x000a89cf, 0x000a8bcf,
+ 0x000a8dcf, 0x000a8fcf, 0x000a91cf, 0x000a93cf,
+ 0x000a95cf, 0x000a97cf, 0x000a99cf, 0x000a9bcf,
+ 0x000a9dcf, 0x000a9fcf, 0x000aa1cf, 0x000aa3cf,
+ 0x000aa5cf, 0x000aa7cf, 0x000aa9cf, 0x000aabcf,
+ 0x000aadcf, 0x000aafcf, 0x000ab1cf, 0x000ab3cf,
+ 0x000ab5cf, 0x000ab7cf, 0x000ab9cf, 0x000abbcf,
+ 0x000abdcf, 0x000abfcf, 0x000ac1cf, 0x000ac3cf,
+ 0x000ac5cf, 0x000ac7cf, 0x000ac9cf, 0x000acbcf,
+ 0x000acdcf, 0x000acfcf, 0x000ad1cf, 0x000ad3cf,
+ 0x000ad5cf, 0x000ad7cf, 0x000ad9cf, 0x000adbcf,
+ 0x000addcf, 0x000adfcf, 0x000ae1cf, 0x000ae3cf,
+ 0x000ae5cf, 0x000ae7cf, 0x000ae9cf, 0x000aebcf,
+ 0x000aedcf, 0x000aefcf, 0x000af1cf, 0x000af3cf,
+ 0x000af5cf, 0x000af7cf, 0x000af9cf, 0x000afbcf,
+ 0x000afdcf, 0x000affcf, 0x000b01cf, 0x000b03cf,
+ 0x000b05cf, 0x000b07cf, 0x000b09cf, 0x000b0bcf,
+ 0x000b0dcf, 0x000b0fcf, 0x000b11cf, 0x000b13cf,
+ 0x000b15cf, 0x000b17cf, 0x000b19cf, 0x000b1bcf,
+ 0x000b1dcf, 0x000b1fcf, 0x000b21cf, 0x000b23cf,
+ 0x000b25cf, 0x000b27cf, 0x000b29cf, 0x000b2bcf,
+ 0x000b2dcf, 0x000b2fcf, 0x000b31cf, 0x000b33cf,
+ 0x000b35cf, 0x000b37cf, 0x000b39cf, 0x000b3bcf,
+ 0x000b3dcf, 0x000b3fcf, 0x000b41cf, 0x000b43cf,
+ 0x000b45cf, 0x000b47cf, 0x000b49cf, 0x000b4bcf,
+ 0x000b4dcf, 0x000b4fcf, 0x000b51cf, 0x000b53cf,
+ 0x000b55cf, 0x000b57cf, 0x000b59cf, 0x000b5bcf,
+ 0x000b5dcf, 0x000b5fcf, 0x000b61cf, 0x000b63cf,
+ 0x000b65cf, 0x000b67cf, 0x000b69cf, 0x000b6bcf,
+ 0x000b6dcf, 0x000b6fcf, 0x000b71cf, 0x000b73cf,
+ 0x000b75cf, 0x000b77cf, 0x000b79cf, 0x000b7bcf,
+ 0x000b7dcf, 0x000b7fcf, 0x000b81cf, 0x000b83cf,
+ 0x000b85cf, 0x000b87cf, 0x000b89cf, 0x000b8bcf,
+ 0x000b8dcf, 0x000b8fcf, 0x000b91cf, 0x000b93cf,
+ 0x000b95cf, 0x000b97cf, 0x000b99cf, 0x000b9bcf,
+ 0x000b9dcf, 0x000b9fcf, 0x000ba1cf, 0x000ba3cf,
+ 0x000ba5cf, 0x000ba7cf, 0x000ba9cf, 0x000babcf,
+ 0x000badcf, 0x000bafcf, 0x000bb1cf, 0x000bb3cf,
+ 0x000bb5cf, 0x000bb7cf, 0x000bb9cf, 0x000bbbcf,
+ 0x000bbdcf, 0x000bbfcf, 0x000bc1cf, 0x000bc3cf,
+ 0x000bc5cf, 0x000bc7cf, 0x000bc9cf, 0x000bcbcf,
+ 0x000bcdcf, 0x000bcfcf, 0x000bd1cf, 0x000bd3cf,
+ 0x000bd5cf, 0x000bd7cf, 0x000bd9cf, 0x000bdbcf,
+ 0x000bddcf, 0x000bdfcf, 0x000be1cf, 0x000be3cf,
+ 0x000be5cf, 0x000be7cf, 0x000be9cf, 0x000bebcf,
+ 0x000bedcf, 0x000befcf, 0x000bf1cf, 0x000bf3cf,
+ 0x000bf5cf, 0x000bf7cf, 0x000bf9cf, 0x000bfbcf,
+ 0x000bfdcf, 0x000bffcf, 0x000c01cf, 0x000c03cf,
+ 0x000c05cf, 0x000c07cf, 0x000c09cf, 0x000c0bcf,
+ 0x000c0dcf, 0x000c0fcf, 0x000c11cf, 0x000c13cf,
+ 0x000c15cf, 0x000c17cf, 0x000c19cf, 0x000c1bcf,
+ 0x000c1dcf, 0x000c1fcf, 0x000c21cf, 0x000c23cf,
+ 0x000c25cf, 0x000c27cf, 0x000c29cf, 0x000c2bcf,
+ 0x000c2dcf, 0x000c2fcf, 0x000c31cf, 0x000c33cf,
+ 0x000c35cf, 0x000c37cf, 0x000c39cf, 0x000c3bcf,
+ 0x000c3dcf, 0x000c3fcf, 0x000c41cf, 0x000c43cf,
+ 0x000c45cf, 0x000c47cf, 0x000c49cf, 0x000c4bcf,
+ 0x000c4dcf, 0x000c4fcf, 0x000c51cf, 0x000c53cf,
+ 0x000c55cf, 0x000c57cf, 0x000c59cf, 0x000c5bcf,
+ 0x000c5dcf, 0x000c5fcf, 0x000c61cf, 0x000c63cf,
+ 0x000c65cf, 0x000c67cf, 0x000c69cf, 0x000c6bcf,
+ 0x000c6dcf, 0x000c6fcf, 0x000c71cf, 0x000c73cf,
+ 0x000c75cf, 0x000c77cf, 0x000c79cf, 0x000c7bcf,
+ 0x000c7dcf, 0x000c7fcf, 0x000c81cf, 0x000c83cf,
+ 0x000c85cf, 0x000c87cf, 0x000c89cf, 0x000c8bcf,
+ 0x000c8dcf, 0x000c8fcf, 0x000c91cf, 0x000c93cf,
+ 0x000c95cf, 0x000c97cf, 0x000c99cf, 0x000c9bcf,
+ 0x000c9dcf, 0x000c9fcf, 0x000ca1cf, 0x000ca3cf,
+ 0x000ca5cf, 0x000ca7cf, 0x000ca9cf, 0x000cabcf,
+ 0x000cadcf, 0x000cafcf, 0x000cb1cf, 0x000cb3cf,
+ 0x000cb5cf, 0x000cb7cf, 0x000cb9cf, 0x000cbbcf,
+ 0x000cbdcf, 0x000cbfcf, 0x000cc1cf, 0x000cc3cf,
+ 0x000cc5cf, 0x000cc7cf, 0x000cc9cf, 0x000ccbcf,
+ 0x000ccdcf, 0x000ccfcf, 0x000cd1cf, 0x000cd3cf,
+ 0x000cd5cf, 0x000cd7cf, 0x000cd9cf, 0x000cdbcf,
+ 0x000cddcf, 0x000cdfcf, 0x000ce1cf, 0x000ce3cf,
+ 0x000ce5cf, 0x000ce7cf, 0x000ce9cf, 0x000cebcf,
+ 0x000cedcf, 0x000cefcf, 0x000cf1cf, 0x000cf3cf,
+ 0x000cf5cf, 0x000cf7cf, 0x000cf9cf, 0x000cfbcf,
+ 0x000cfdcf, 0x000cffcf, 0x000d01cf, 0x000d03cf,
+ 0x000d05cf, 0x000d07cf, 0x000d09cf, 0x000d0bcf,
+ 0x000d0dcf, 0x000d0fcf, 0x000d11cf, 0x000d13cf,
+ 0x000d15cf, 0x000d17cf, 0x000d19cf, 0x000d1bcf,
+ 0x000d1dcf, 0x000d1fcf, 0x000d21cf, 0x000d23cf,
+ 0x000d25cf, 0x000d27cf, 0x000d29cf, 0x000d2bcf,
+ 0x000d2dcf, 0x000d2fcf, 0x000d31cf, 0x000d33cf,
+ 0x000d35cf, 0x000d37cf, 0x000d39cf, 0x000d3bcf,
+ 0x000d3dcf, 0x000d3fcf, 0x000d41cf, 0x000d43cf,
+ 0x000d45cf, 0x000d47cf, 0x000d49cf, 0x000d4bcf,
+ 0x000d4dcf, 0x000d4fcf, 0x000d51cf, 0x000d53cf,
+ 0x000d55cf, 0x000d57cf, 0x000d59cf, 0x000d5bcf,
+ 0x000d5dcf, 0x000d5fcf, 0x000d61cf, 0x000d63cf,
+ 0x000d65cf, 0x000d67cf, 0x000d69cf, 0x000d6bcf,
+ 0x000d6dcf, 0x000d6fcf, 0x000d71cf, 0x000d73cf,
+ 0x000d75cf, 0x000d77cf, 0x000d79cf, 0x000d7bcf,
+ 0x000d7dcf, 0x000d7fcf, 0x000d81cf, 0x000d83cf,
+ 0x000d85cf, 0x000d87cf, 0x000d89cf, 0x000d8bcf,
+ 0x000d8dcf, 0x000d8fcf, 0x000d91cf, 0x000d93cf,
+ 0x000d95cf, 0x000d97cf, 0x000d99cf, 0x000d9bcf,
+ 0x000d9dcf, 0x000d9fcf, 0x000da1cf, 0x000da3cf,
+ 0x000da5cf, 0x000da7cf, 0x000da9cf, 0x000dabcf,
+ 0x000dadcf, 0x000dafcf, 0x000db1cf, 0x000db3cf,
+ 0x000db5cf, 0x000db7cf, 0x000db9cf, 0x000dbbcf,
+ 0x000dbdcf, 0x000dbfcf, 0x000dc1cf, 0x000dc3cf,
+ 0x000dc5cf, 0x000dc7cf, 0x000dc9cf, 0x000dcbcf,
+ 0x000dcdcf, 0x000dcfcf, 0x000dd1cf, 0x000dd3cf,
+ 0x000dd5cf, 0x000dd7cf, 0x000dd9cf, 0x000ddbcf,
+ 0x000dddcf, 0x000ddfcf, 0x000de1cf, 0x000de3cf,
+ 0x000de5cf, 0x000de7cf, 0x000de9cf, 0x000debcf,
+ 0x000dedcf, 0x000defcf, 0x000df1cf, 0x000df3cf,
+ 0x000df5cf, 0x000df7cf, 0x000df9cf, 0x000dfbcf,
+ 0x000dfdcf, 0x000dffcf, 0x000e01cf, 0x000e03cf,
+ 0x000e05cf, 0x000e07cf, 0x000e09cf, 0x000e0bcf,
+ 0x000e0dcf, 0x000e0fcf, 0x000e11cf, 0x000e13cf,
+ 0x000e15cf, 0x000e17cf, 0x000e19cf, 0x000e1bcf,
+ 0x000e1dcf, 0x000e1fcf, 0x000e21cf, 0x000e23cf,
+ 0x000e25cf, 0x000e27cf, 0x000e29cf, 0x000e2bcf,
+ 0x000e2dcf, 0x000e2fcf, 0x000e31cf, 0x000e33cf,
+ 0x000e35cf, 0x000e37cf, 0x000e39cf, 0x000e3bcf,
+ 0x000e3dcf, 0x000e3fcf, 0x000e41cf, 0x000e43cf,
+ 0x000e45cf, 0x000e47cf, 0x000e49cf, 0x000e4bcf,
+ 0x000e4dcf, 0x000e4fcf, 0x000e51cf, 0x000e53cf,
+ 0x000e55cf, 0x000e57cf, 0x000e59cf, 0x000e5bcf,
+ 0x000e5dcf, 0x000e5fcf, 0x000e61cf, 0x000e63cf,
+ 0x000e65cf, 0x000e67cf, 0x000e69cf, 0x000e6bcf,
+ 0x000e6dcf, 0x000e6fcf, 0x000e71cf, 0x000e73cf,
+ 0x000e75cf, 0x000e77cf, 0x000e79cf, 0x000e7bcf,
+ 0x000e7dcf, 0x000e7fcf, 0x000e81cf, 0x000e83cf,
+ 0x000e85cf, 0x000e87cf, 0x000e89cf, 0x000e8bcf,
+ 0x000e8dcf, 0x000e8fcf, 0x000e91cf, 0x000e93cf,
+ 0x000e95cf, 0x000e97cf, 0x000e99cf, 0x000e9bcf,
+ 0x000e9dcf, 0x000e9fcf, 0x000ea1cf, 0x000ea3cf,
+ 0x000ea5cf, 0x000ea7cf, 0x000ea9cf, 0x000eabcf,
+ 0x000eadcf, 0x000eafcf, 0x000eb1cf, 0x000eb3cf,
+ 0x000eb5cf, 0x000eb7cf, 0x000eb9cf, 0x000ebbcf,
+ 0x000ebdcf, 0x000ebfcf, 0x000ec1cf, 0x000ec3cf,
+ 0x000ec5cf, 0x000ec7cf, 0x000ec9cf, 0x000ecbcf,
+ 0x000ecdcf, 0x000ecfcf, 0x000ed1cf, 0x000ed3cf,
+ 0x000ed5cf, 0x000ed7cf, 0x000ed9cf, 0x000edbcf,
+ 0x000eddcf, 0x000edfcf, 0x000ee1cf, 0x000ee3cf,
+ 0x000ee5cf, 0x000ee7cf, 0x000ee9cf, 0x000eebcf,
+ 0x000eedcf, 0x000eefcf, 0x000ef1cf, 0x000ef3cf,
+ 0x000ef5cf, 0x000ef7cf, 0x000ef9cf, 0x000efbcf,
+ 0x000efdcf, 0x000effcf, 0x000f01cf, 0x000f03cf,
+ 0x000f05cf, 0x000f07cf, 0x000f09cf, 0x000f0bcf,
+ 0x000f0dcf, 0x000f0fcf, 0x000f11cf, 0x000f13cf,
+ 0x000f15cf, 0x000f17cf, 0x000f19cf, 0x000f1bcf,
+ 0x000f1dcf, 0x000f1fcf, 0x000f21cf, 0x000f23cf,
+ 0x000f25cf, 0x000f27cf, 0x000f29cf, 0x000f2bcf,
+ 0x000f2dcf, 0x000f2fcf, 0x000f31cf, 0x000f33cf,
+ 0x000f35cf, 0x000f37cf, 0x000f39cf, 0x000f3bcf,
+ 0x000f3dcf, 0x000f3fcf, 0x000f41cf, 0x000f43cf,
+ 0x000f45cf, 0x000f47cf, 0x000f49cf, 0x000f4bcf,
+ 0x000f4dcf, 0x000f4fcf, 0x000f51cf, 0x000f53cf,
+ 0x000f55cf, 0x000f57cf, 0x000f59cf, 0x000f5bcf,
+ 0x000f5dcf, 0x000f5fcf, 0x000f61cf, 0x000f63cf,
+ 0x000f65cf, 0x000f67cf, 0x000f69cf, 0x000f6bcf,
+ 0x000f6dcf, 0x000f6fcf, 0x000f71cf, 0x000f73cf,
+ 0x000f75cf, 0x000f77cf, 0x000f79cf, 0x000f7bcf,
+ 0x000f7dcf, 0x000f7fcf, 0x000f81cf, 0x000f83cf,
+ 0x000f85cf, 0x000f87cf, 0x000f89cf, 0x000f8bcf,
+ 0x000f8dcf, 0x000f8fcf, 0x000f91cf, 0x000f93cf,
+ 0x000f95cf, 0x000f97cf, 0x000f99cf, 0x000f9bcf,
+ 0x000f9dcf, 0x000f9fcf, 0x000fa1cf, 0x000fa3cf,
+ 0x000fa5cf, 0x000fa7cf, 0x000fa9cf, 0x000fabcf,
+ 0x000fadcf, 0x000fafcf, 0x000fb1cf, 0x000fb3cf,
+ 0x000fb5cf, 0x000fb7cf, 0x000fb9cf, 0x000fbbcf,
+ 0x000fbdcf, 0x000fbfcf, 0x000fc1cf, 0x000fc3cf,
+ 0x000fc5cf, 0x000fc7cf, 0x000fc9cf, 0x000fcbcf,
+ 0x000fcdcf, 0x000fcfcf, 0x000fd1cf, 0x000fd3cf,
+ 0x000fd5cf, 0x000fd7cf, 0x000fd9cf, 0x000fdbcf,
+ 0x000fddcf, 0x000fdfcf, 0x000fe1cf, 0x000fe3cf,
+ 0x000fe5cf, 0x000fe7cf, 0x000fe9cf, 0x000febcf,
+ 0x000fedcf, 0x000fefcf, 0x000ff1cf, 0x000ff3cf,
+ 0x000ff5cf, 0x000ff7cf, 0x000ff9cf, 0x000ffbcf,
+ 0x000ffdcf, 0x000fffcf, 0x00000170, 0x00000570,
+ 0x00000970, 0x00000d70, 0x00001170, 0x00001570,
+ 0x00001970, 0x00001d70, 0x00002170, 0x00002570,
+ 0x00002970, 0x00002d70, 0x00003170, 0x00003570,
+ 0x00003970, 0x00003d70, 0x00004170, 0x00004570,
+ 0x00004970, 0x00004d70, 0x00005170, 0x00005570,
+ 0x00005970, 0x00005d70, 0x00006170, 0x00006570,
+ 0x00006970, 0x00006d70, 0x00007170, 0x00007570,
+ 0x00007970, 0x00007d70, 0x00008170, 0x00008570,
+ 0x00008970, 0x00008d70, 0x00009170, 0x00009570,
+ 0x00009970, 0x00009d70, 0x0000a170, 0x0000a570,
+ 0x0000a970, 0x0000ad70, 0x0000b170, 0x0000b570,
+ 0x0000b970, 0x0000bd70, 0x0000c170, 0x0000c570,
+ 0x0000c970, 0x0000cd70, 0x0000d170, 0x0000d570,
+ 0x0000d970, 0x0000dd70, 0x0000e170, 0x0000e570,
+ 0x0000e970, 0x0000ed70, 0x0000f170, 0x0000f570,
+ 0x0000f970, 0x0000fd70, 0x00010170, 0x00010570,
+ 0x00010970, 0x00010d70, 0x00011170, 0x00011570,
+ 0x00011970, 0x00011d70, 0x00012170, 0x00012570,
+ 0x00012970, 0x00012d70, 0x00013170, 0x00013570,
+ 0x00013970, 0x00013d70, 0x00014170, 0x00014570,
+ 0x00014970, 0x00014d70, 0x00015170, 0x00015570,
+ 0x00015970, 0x00015d70, 0x00016170, 0x00016570,
+ 0x00016970, 0x00016d70, 0x00017170, 0x00017570,
+ 0x00017970, 0x00017d70, 0x00018170, 0x00018570,
+ 0x00018970, 0x00018d70, 0x00019170, 0x00019570,
+ 0x00019970, 0x00019d70, 0x0001a170, 0x0001a570,
+ 0x0001a970, 0x0001ad70, 0x0001b170, 0x0001b570,
+ 0x0001b970, 0x0001bd70, 0x0001c170, 0x0001c570,
+ 0x0001c970, 0x0001cd70, 0x0001d170, 0x0001d570,
+ 0x0001d970, 0x0001dd70, 0x0001e170, 0x0001e570,
+ 0x0001e970, 0x0001ed70, 0x0001f170, 0x0001f570,
+ 0x0001f970, 0x0001fd70, 0x00020170, 0x00020570,
+ 0x00020970, 0x00020d70, 0x00021170, 0x00021570,
+ 0x00021970, 0x00021d70, 0x00022170, 0x00022570,
+ 0x00022970, 0x00022d70, 0x00023170, 0x00023570,
+ 0x00023970, 0x00023d70, 0x00024170, 0x00024570,
+ 0x00024970, 0x00024d70, 0x00025170, 0x00025570,
+ 0x00025970, 0x00025d70, 0x00026170, 0x00026570,
+ 0x00026970, 0x00026d70, 0x00027170, 0x00027570,
+ 0x00027970, 0x00027d70, 0x00028170, 0x00028570,
+ 0x00028970, 0x00028d70, 0x00029170, 0x00029570,
+ 0x00029970, 0x00029d70, 0x0002a170, 0x0002a570,
+ 0x0002a970, 0x0002ad70, 0x0002b170, 0x0002b570,
+ 0x0002b970, 0x0002bd70, 0x0002c170, 0x0002c570,
+ 0x0002c970, 0x0002cd70, 0x0002d170, 0x0002d570,
+ 0x0002d970, 0x0002dd70, 0x0002e170, 0x0002e570,
+ 0x0002e970, 0x0002ed70, 0x0002f170, 0x0002f570,
+ 0x0002f970, 0x0002fd70, 0x00030170, 0x00030570,
+ 0x00030970, 0x00030d70, 0x00031170, 0x00031570,
+ 0x00031970, 0x00031d70, 0x00032170, 0x00032570,
+ 0x00032970, 0x00032d70, 0x00033170, 0x00033570,
+ 0x00033970, 0x00033d70, 0x00034170, 0x00034570,
+ 0x00034970, 0x00034d70, 0x00035170, 0x00035570,
+ 0x00035970, 0x00035d70, 0x00036170, 0x00036570,
+ 0x00036970, 0x00036d70, 0x00037170, 0x00037570,
+ 0x00037970, 0x00037d70, 0x00038170, 0x00038570,
+ 0x00038970, 0x00038d70, 0x00039170, 0x00039570,
+ 0x00039970, 0x00039d70, 0x0003a170, 0x0003a570,
+ 0x0003a970, 0x0003ad70, 0x0003b170, 0x0003b570,
+ 0x0003b970, 0x0003bd70, 0x0003c170, 0x0003c570,
+ 0x0003c970, 0x0003cd70, 0x0003d170, 0x0003d570,
+ 0x0003d970, 0x0003dd70, 0x0003e170, 0x0003e570,
+ 0x0003e970, 0x0003ed70, 0x0003f170, 0x0003f570,
+ 0x0003f970, 0x0003fd70, 0x00040170, 0x00040570,
+ 0x00040970, 0x00040d70, 0x00041170, 0x00041570,
+ 0x00041970, 0x00041d70, 0x00042170, 0x00042570,
+ 0x00042970, 0x00042d70, 0x00043170, 0x00043570,
+ 0x00043970, 0x00043d70, 0x00044170, 0x00044570,
+ 0x00044970, 0x00044d70, 0x00045170, 0x00045570,
+ 0x00045970, 0x00045d70, 0x00046170, 0x00046570,
+ 0x00046970, 0x00046d70, 0x00047170, 0x00047570,
+ 0x00047970, 0x00047d70, 0x00048170, 0x00048570,
+ 0x00048970, 0x00048d70, 0x00049170, 0x00049570,
+ 0x00049970, 0x00049d70, 0x0004a170, 0x0004a570,
+ 0x0004a970, 0x0004ad70, 0x0004b170, 0x0004b570,
+ 0x0004b970, 0x0004bd70, 0x0004c170, 0x0004c570,
+ 0x0004c970, 0x0004cd70, 0x0004d170, 0x0004d570,
+ 0x0004d970, 0x0004dd70, 0x0004e170, 0x0004e570,
+ 0x0004e970, 0x0004ed70, 0x0004f170, 0x0004f570,
+ 0x0004f970, 0x0004fd70, 0x00050170, 0x00050570,
+ 0x00050970, 0x00050d70, 0x00051170, 0x00051570,
+ 0x00051970, 0x00051d70, 0x00052170, 0x00052570,
+ 0x00052970, 0x00052d70, 0x00053170, 0x00053570,
+ 0x00053970, 0x00053d70, 0x00054170, 0x00054570,
+ 0x00054970, 0x00054d70, 0x00055170, 0x00055570,
+ 0x00055970, 0x00055d70, 0x00056170, 0x00056570,
+ 0x00056970, 0x00056d70, 0x00057170, 0x00057570,
+ 0x00057970, 0x00057d70, 0x00058170, 0x00058570,
+ 0x00058970, 0x00058d70, 0x00059170, 0x00059570,
+ 0x00059970, 0x00059d70, 0x0005a170, 0x0005a570,
+ 0x0005a970, 0x0005ad70, 0x0005b170, 0x0005b570,
+ 0x0005b970, 0x0005bd70, 0x0005c170, 0x0005c570,
+ 0x0005c970, 0x0005cd70, 0x0005d170, 0x0005d570,
+ 0x0005d970, 0x0005dd70, 0x0005e170, 0x0005e570,
+ 0x0005e970, 0x0005ed70, 0x0005f170, 0x0005f570,
+ 0x0005f970, 0x0005fd70, 0x00060170, 0x00060570,
+ 0x00060970, 0x00060d70, 0x00061170, 0x00061570,
+ 0x00061970, 0x00061d70, 0x00062170, 0x00062570,
+ 0x00062970, 0x00062d70, 0x00063170, 0x00063570,
+ 0x00063970, 0x00063d70, 0x00064170, 0x00064570,
+ 0x00064970, 0x00064d70, 0x00065170, 0x00065570,
+ 0x00065970, 0x00065d70, 0x00066170, 0x00066570,
+ 0x00066970, 0x00066d70, 0x00067170, 0x00067570,
+ 0x00067970, 0x00067d70, 0x00068170, 0x00068570,
+ 0x00068970, 0x00068d70, 0x00069170, 0x00069570,
+ 0x00069970, 0x00069d70, 0x0006a170, 0x0006a570,
+ 0x0006a970, 0x0006ad70, 0x0006b170, 0x0006b570,
+ 0x0006b970, 0x0006bd70, 0x0006c170, 0x0006c570,
+ 0x0006c970, 0x0006cd70, 0x0006d170, 0x0006d570,
+ 0x0006d970, 0x0006dd70, 0x0006e170, 0x0006e570,
+ 0x0006e970, 0x0006ed70, 0x0006f170, 0x0006f570,
+ 0x0006f970, 0x0006fd70, 0x00070170, 0x00070570,
+ 0x00070970, 0x00070d70, 0x00071170, 0x00071570,
+ 0x00071970, 0x00071d70, 0x00072170, 0x00072570,
+ 0x00072970, 0x00072d70, 0x00073170, 0x00073570,
+ 0x00073970, 0x00073d70, 0x00074170, 0x00074570,
+ 0x00074970, 0x00074d70, 0x00075170, 0x00075570,
+ 0x00075970, 0x00075d70, 0x00076170, 0x00076570,
+ 0x00076970, 0x00076d70, 0x00077170, 0x00077570,
+ 0x00077970, 0x00077d70, 0x00078170, 0x00078570,
+ 0x00078970, 0x00078d70, 0x00079170, 0x00079570,
+ 0x00079970, 0x00079d70, 0x0007a170, 0x0007a570,
+ 0x0007a970, 0x0007ad70, 0x0007b170, 0x0007b570,
+ 0x0007b970, 0x0007bd70, 0x0007c170, 0x0007c570,
+ 0x0007c970, 0x0007cd70, 0x0007d170, 0x0007d570,
+ 0x0007d970, 0x0007dd70, 0x0007e170, 0x0007e570,
+ 0x0007e970, 0x0007ed70, 0x0007f170, 0x0007f570,
+ 0x0007f970, 0x0007fd70, 0x00080170, 0x00080570,
+ 0x00080970, 0x00080d70, 0x00081170, 0x00081570,
+ 0x00081970, 0x00081d70, 0x00082170, 0x00082570,
+ 0x00082970, 0x00082d70, 0x00083170, 0x00083570,
+ 0x00083970, 0x00083d70, 0x00084170, 0x00084570,
+ 0x00084970, 0x00084d70, 0x00085170, 0x00085570,
+ 0x00085970, 0x00085d70, 0x00086170, 0x00086570,
+ 0x00086970, 0x00086d70, 0x00087170, 0x00087570,
+ 0x00087970, 0x00087d70, 0x00088170, 0x00088570,
+ 0x00088970, 0x00088d70, 0x00089170, 0x00089570,
+ 0x00089970, 0x00089d70, 0x0008a170, 0x0008a570,
+ 0x0008a970, 0x0008ad70, 0x0008b170, 0x0008b570,
+ 0x0008b970, 0x0008bd70, 0x0008c170, 0x0008c570,
+ 0x0008c970, 0x0008cd70, 0x0008d170, 0x0008d570,
+ 0x0008d970, 0x0008dd70, 0x0008e170, 0x0008e570,
+ 0x0008e970, 0x0008ed70, 0x0008f170, 0x0008f570,
+ 0x0008f970, 0x0008fd70, 0x00090170, 0x00090570,
+ 0x00090970, 0x00090d70, 0x00091170, 0x00091570,
+ 0x00091970, 0x00091d70, 0x00092170, 0x00092570,
+ 0x00092970, 0x00092d70, 0x00093170, 0x00093570,
+ 0x00093970, 0x00093d70, 0x00094170, 0x00094570,
+ 0x00094970, 0x00094d70, 0x00095170, 0x00095570,
+ 0x00095970, 0x00095d70, 0x00096170, 0x00096570,
+ 0x00096970, 0x00096d70, 0x00097170, 0x00097570,
+ 0x00097970, 0x00097d70, 0x00098170, 0x00098570,
+ 0x00098970, 0x00098d70, 0x00099170, 0x00099570,
+ 0x00099970, 0x00099d70, 0x0009a170, 0x0009a570,
+ 0x0009a970, 0x0009ad70, 0x0009b170, 0x0009b570,
+ 0x0009b970, 0x0009bd70, 0x0009c170, 0x0009c570,
+ 0x0009c970, 0x0009cd70, 0x0009d170, 0x0009d570,
+ 0x0009d970, 0x0009dd70, 0x0009e170, 0x0009e570,
+ 0x0009e970, 0x0009ed70, 0x0009f170, 0x0009f570,
+ 0x0009f970, 0x0009fd70, 0x000a0170, 0x000a0570,
+ 0x000a0970, 0x000a0d70, 0x000a1170, 0x000a1570,
+ 0x000a1970, 0x000a1d70, 0x000a2170, 0x000a2570,
+ 0x000a2970, 0x000a2d70, 0x000a3170, 0x000a3570,
+ 0x000a3970, 0x000a3d70, 0x000a4170, 0x000a4570,
+ 0x000a4970, 0x000a4d70, 0x000a5170, 0x000a5570,
+ 0x000a5970, 0x000a5d70, 0x000a6170, 0x000a6570,
+ 0x000a6970, 0x000a6d70, 0x000a7170, 0x000a7570,
+ 0x000a7970, 0x000a7d70, 0x000a8170, 0x000a8570,
+ 0x000a8970, 0x000a8d70, 0x000a9170, 0x000a9570,
+ 0x000a9970, 0x000a9d70, 0x000aa170, 0x000aa570,
+ 0x000aa970, 0x000aad70, 0x000ab170, 0x000ab570,
+ 0x000ab970, 0x000abd70, 0x000ac170, 0x000ac570,
+ 0x000ac970, 0x000acd70, 0x000ad170, 0x000ad570,
+ 0x000ad970, 0x000add70, 0x000ae170, 0x000ae570,
+ 0x000ae970, 0x000aed70, 0x000af170, 0x000af570,
+ 0x000af970, 0x000afd70, 0x000b0170, 0x000b0570,
+ 0x000b0970, 0x000b0d70, 0x000b1170, 0x000b1570,
+ 0x000b1970, 0x000b1d70, 0x000b2170, 0x000b2570,
+ 0x000b2970, 0x000b2d70, 0x000b3170, 0x000b3570,
+ 0x000b3970, 0x000b3d70, 0x000b4170, 0x000b4570,
+ 0x000b4970, 0x000b4d70, 0x000b5170, 0x000b5570,
+ 0x000b5970, 0x000b5d70, 0x000b6170, 0x000b6570,
+ 0x000b6970, 0x000b6d70, 0x000b7170, 0x000b7570,
+ 0x000b7970, 0x000b7d70, 0x000b8170, 0x000b8570,
+ 0x000b8970, 0x000b8d70, 0x000b9170, 0x000b9570,
+ 0x000b9970, 0x000b9d70, 0x000ba170, 0x000ba570,
+ 0x000ba970, 0x000bad70, 0x000bb170, 0x000bb570,
+ 0x000bb970, 0x000bbd70, 0x000bc170, 0x000bc570,
+ 0x000bc970, 0x000bcd70, 0x000bd170, 0x000bd570,
+ 0x000bd970, 0x000bdd70, 0x000be170, 0x000be570,
+ 0x000be970, 0x000bed70, 0x000bf170, 0x000bf570,
+ 0x000bf970, 0x000bfd70, 0x000c0170, 0x000c0570,
+ 0x000c0970, 0x000c0d70, 0x000c1170, 0x000c1570,
+ 0x000c1970, 0x000c1d70, 0x000c2170, 0x000c2570,
+ 0x000c2970, 0x000c2d70, 0x000c3170, 0x000c3570,
+ 0x000c3970, 0x000c3d70, 0x000c4170, 0x000c4570,
+ 0x000c4970, 0x000c4d70, 0x000c5170, 0x000c5570,
+ 0x000c5970, 0x000c5d70, 0x000c6170, 0x000c6570,
+ 0x000c6970, 0x000c6d70, 0x000c7170, 0x000c7570,
+ 0x000c7970, 0x000c7d70, 0x000c8170, 0x000c8570,
+ 0x000c8970, 0x000c8d70, 0x000c9170, 0x000c9570,
+ 0x000c9970, 0x000c9d70, 0x000ca170, 0x000ca570,
+ 0x000ca970, 0x000cad70, 0x000cb170, 0x000cb570,
+ 0x000cb970, 0x000cbd70, 0x000cc170, 0x000cc570,
+ 0x000cc970, 0x000ccd70, 0x000cd170, 0x000cd570,
+ 0x000cd970, 0x000cdd70, 0x000ce170, 0x000ce570,
+ 0x000ce970, 0x000ced70, 0x000cf170, 0x000cf570,
+ 0x000cf970, 0x000cfd70, 0x000d0170, 0x000d0570,
+ 0x000d0970, 0x000d0d70, 0x000d1170, 0x000d1570,
+ 0x000d1970, 0x000d1d70, 0x000d2170, 0x000d2570,
+ 0x000d2970, 0x000d2d70, 0x000d3170, 0x000d3570,
+ 0x000d3970, 0x000d3d70, 0x000d4170, 0x000d4570,
+ 0x000d4970, 0x000d4d70, 0x000d5170, 0x000d5570,
+ 0x000d5970, 0x000d5d70, 0x000d6170, 0x000d6570,
+ 0x000d6970, 0x000d6d70, 0x000d7170, 0x000d7570,
+ 0x000d7970, 0x000d7d70, 0x000d8170, 0x000d8570,
+ 0x000d8970, 0x000d8d70, 0x000d9170, 0x000d9570,
+ 0x000d9970, 0x000d9d70, 0x000da170, 0x000da570,
+ 0x000da970, 0x000dad70, 0x000db170, 0x000db570,
+ 0x000db970, 0x000dbd70, 0x000dc170, 0x000dc570,
+ 0x000dc970, 0x000dcd70, 0x000dd170, 0x000dd570,
+ 0x000dd970, 0x000ddd70, 0x000de170, 0x000de570,
+ 0x000de970, 0x000ded70, 0x000df170, 0x000df570,
+ 0x000df970, 0x000dfd70, 0x000e0170, 0x000e0570,
+ 0x000e0970, 0x000e0d70, 0x000e1170, 0x000e1570,
+ 0x000e1970, 0x000e1d70, 0x000e2170, 0x000e2570,
+ 0x000e2970, 0x000e2d70, 0x000e3170, 0x000e3570,
+ 0x000e3970, 0x000e3d70, 0x000e4170, 0x000e4570,
+ 0x000e4970, 0x000e4d70, 0x000e5170, 0x000e5570,
+ 0x000e5970, 0x000e5d70, 0x000e6170, 0x000e6570,
+ 0x000e6970, 0x000e6d70, 0x000e7170, 0x000e7570,
+ 0x000e7970, 0x000e7d70, 0x000e8170, 0x000e8570,
+ 0x000e8970, 0x000e8d70, 0x000e9170, 0x000e9570,
+ 0x000e9970, 0x000e9d70, 0x000ea170, 0x000ea570,
+ 0x000ea970, 0x000ead70, 0x000eb170, 0x000eb570,
+ 0x000eb970, 0x000ebd70, 0x000ec170, 0x000ec570,
+ 0x000ec970, 0x000ecd70, 0x000ed170, 0x000ed570,
+ 0x000ed970, 0x000edd70, 0x000ee170, 0x000ee570,
+ 0x000ee970, 0x000eed70, 0x000ef170, 0x000ef570,
+ 0x000ef970, 0x000efd70, 0x000f0170, 0x000f0570,
+ 0x000f0970, 0x000f0d70, 0x000f1170, 0x000f1570,
+ 0x000f1970, 0x000f1d70, 0x000f2170, 0x000f2570,
+ 0x000f2970, 0x000f2d70, 0x000f3170, 0x000f3570,
+ 0x000f3970, 0x000f3d70, 0x000f4170, 0x000f4570,
+ 0x000f4970, 0x000f4d70, 0x000f5170, 0x000f5570,
+ 0x000f5970, 0x000f5d70, 0x000f6170, 0x000f6570,
+ 0x000f6970, 0x000f6d70, 0x000f7170, 0x000f7570,
+ 0x000f7970, 0x000f7d70, 0x000f8170, 0x000f8570,
+ 0x000f8970, 0x000f8d70, 0x000f9170, 0x000f9570,
+ 0x000f9970, 0x000f9d70, 0x000fa170, 0x000fa570,
+ 0x000fa970, 0x000fad70, 0x000fb170, 0x000fb570,
+ 0x000fb970, 0x000fbd70, 0x000fc170, 0x000fc570,
+ 0x000fc970, 0x000fcd70, 0x000fd170, 0x000fd570,
+ 0x000fd970, 0x000fdd70, 0x000fe170, 0x000fe570,
+ 0x000fe970, 0x000fed70, 0x000ff170, 0x000ff570,
+ 0x000ff970, 0x000ffd70, 0x00100170, 0x00100570,
+ 0x00100970, 0x00100d70, 0x00101170, 0x00101570,
+ 0x00101970, 0x00101d70, 0x00102170, 0x00102570,
+ 0x00102970, 0x00102d70, 0x00103170, 0x00103570,
+ 0x00103970, 0x00103d70, 0x00104170, 0x00104570,
+ 0x00104970, 0x00104d70, 0x00105170, 0x00105570,
+ 0x00105970, 0x00105d70, 0x00106170, 0x00106570,
+ 0x00106970, 0x00106d70, 0x00107170, 0x00107570,
+ 0x00107970, 0x00107d70, 0x00108170, 0x00108570,
+ 0x00108970, 0x00108d70, 0x00109170, 0x00109570,
+ 0x00109970, 0x00109d70, 0x0010a170, 0x0010a570,
+ 0x0010a970, 0x0010ad70, 0x0010b170, 0x0010b570,
+ 0x0010b970, 0x0010bd70, 0x0010c170, 0x0010c570,
+ 0x0010c970, 0x0010cd70, 0x0010d170, 0x0010d570,
+ 0x0010d970, 0x0010dd70, 0x0010e170, 0x0010e570,
+ 0x0010e970, 0x0010ed70, 0x0010f170, 0x0010f570,
+ 0x0010f970, 0x0010fd70, 0x00110170, 0x00110570,
+ 0x00110970, 0x00110d70, 0x00111170, 0x00111570,
+ 0x00111970, 0x00111d70, 0x00112170, 0x00112570,
+ 0x00112970, 0x00112d70, 0x00113170, 0x00113570,
+ 0x00113970, 0x00113d70, 0x00114170, 0x00114570,
+ 0x00114970, 0x00114d70, 0x00115170, 0x00115570,
+ 0x00115970, 0x00115d70, 0x00116170, 0x00116570,
+ 0x00116970, 0x00116d70, 0x00117170, 0x00117570,
+ 0x00117970, 0x00117d70, 0x00118170, 0x00118570,
+ 0x00118970, 0x00118d70, 0x00119170, 0x00119570,
+ 0x00119970, 0x00119d70, 0x0011a170, 0x0011a570,
+ 0x0011a970, 0x0011ad70, 0x0011b170, 0x0011b570,
+ 0x0011b970, 0x0011bd70, 0x0011c170, 0x0011c570,
+ 0x0011c970, 0x0011cd70, 0x0011d170, 0x0011d570,
+ 0x0011d970, 0x0011dd70, 0x0011e170, 0x0011e570,
+ 0x0011e970, 0x0011ed70, 0x0011f170, 0x0011f570,
+ 0x0011f970, 0x0011fd70, 0x00120170, 0x00120570,
+ 0x00120970, 0x00120d70, 0x00121170, 0x00121570,
+ 0x00121970, 0x00121d70, 0x00122170, 0x00122570,
+ 0x00122970, 0x00122d70, 0x00123170, 0x00123570,
+ 0x00123970, 0x00123d70, 0x00124170, 0x00124570,
+ 0x00124970, 0x00124d70, 0x00125170, 0x00125570,
+ 0x00125970, 0x00125d70, 0x00126170, 0x00126570,
+ 0x00126970, 0x00126d70, 0x00127170, 0x00127570,
+ 0x00127970, 0x00127d70, 0x00128170, 0x00128570,
+ 0x00128970, 0x00128d70, 0x00129170, 0x00129570,
+ 0x00129970, 0x00129d70, 0x0012a170, 0x0012a570,
+ 0x0012a970, 0x0012ad70, 0x0012b170, 0x0012b570,
+ 0x0012b970, 0x0012bd70, 0x0012c170, 0x0012c570,
+ 0x0012c970, 0x0012cd70, 0x0012d170, 0x0012d570,
+ 0x0012d970, 0x0012dd70, 0x0012e170, 0x0012e570,
+ 0x0012e970, 0x0012ed70, 0x0012f170, 0x0012f570,
+ 0x0012f970, 0x0012fd70, 0x00130170, 0x00130570,
+ 0x00130970, 0x00130d70, 0x00131170, 0x00131570,
+ 0x00131970, 0x00131d70, 0x00132170, 0x00132570,
+ 0x00132970, 0x00132d70, 0x00133170, 0x00133570,
+ 0x00133970, 0x00133d70, 0x00134170, 0x00134570,
+ 0x00134970, 0x00134d70, 0x00135170, 0x00135570,
+ 0x00135970, 0x00135d70, 0x00136170, 0x00136570,
+ 0x00136970, 0x00136d70, 0x00137170, 0x00137570,
+ 0x00137970, 0x00137d70, 0x00138170, 0x00138570,
+ 0x00138970, 0x00138d70, 0x00139170, 0x00139570,
+ 0x00139970, 0x00139d70, 0x0013a170, 0x0013a570,
+ 0x0013a970, 0x0013ad70, 0x0013b170, 0x0013b570,
+ 0x0013b970, 0x0013bd70, 0x0013c170, 0x0013c570,
+ 0x0013c970, 0x0013cd70, 0x0013d170, 0x0013d570,
+ 0x0013d970, 0x0013dd70, 0x0013e170, 0x0013e570,
+ 0x0013e970, 0x0013ed70, 0x0013f170, 0x0013f570,
+ 0x0013f970, 0x0013fd70, 0x00140170, 0x00140570,
+ 0x00140970, 0x00140d70, 0x00141170, 0x00141570,
+ 0x00141970, 0x00141d70, 0x00142170, 0x00142570,
+ 0x00142970, 0x00142d70, 0x00143170, 0x00143570,
+ 0x00143970, 0x00143d70, 0x00144170, 0x00144570,
+ 0x00144970, 0x00144d70, 0x00145170, 0x00145570,
+ 0x00145970, 0x00145d70, 0x00146170, 0x00146570,
+ 0x00146970, 0x00146d70, 0x00147170, 0x00147570,
+ 0x00147970, 0x00147d70, 0x00148170, 0x00148570,
+ 0x00148970, 0x00148d70, 0x00149170, 0x00149570,
+ 0x00149970, 0x00149d70, 0x0014a170, 0x0014a570,
+ 0x0014a970, 0x0014ad70, 0x0014b170, 0x0014b570,
+ 0x0014b970, 0x0014bd70, 0x0014c170, 0x0014c570,
+ 0x0014c970, 0x0014cd70, 0x0014d170, 0x0014d570,
+ 0x0014d970, 0x0014dd70, 0x0014e170, 0x0014e570,
+ 0x0014e970, 0x0014ed70, 0x0014f170, 0x0014f570,
+ 0x0014f970, 0x0014fd70, 0x00150170, 0x00150570,
+ 0x00150970, 0x00150d70, 0x00151170, 0x00151570,
+ 0x00151970, 0x00151d70, 0x00152170, 0x00152570,
+ 0x00152970, 0x00152d70, 0x00153170, 0x00153570,
+ 0x00153970, 0x00153d70, 0x00154170, 0x00154570,
+ 0x00154970, 0x00154d70, 0x00155170, 0x00155570,
+ 0x00155970, 0x00155d70, 0x00156170, 0x00156570,
+ 0x00156970, 0x00156d70, 0x00157170, 0x00157570,
+ 0x00157970, 0x00157d70, 0x00158170, 0x00158570,
+ 0x00158970, 0x00158d70, 0x00159170, 0x00159570,
+ 0x00159970, 0x00159d70, 0x0015a170, 0x0015a570,
+ 0x0015a970, 0x0015ad70, 0x0015b170, 0x0015b570,
+ 0x0015b970, 0x0015bd70, 0x0015c170, 0x0015c570,
+ 0x0015c970, 0x0015cd70, 0x0015d170, 0x0015d570,
+ 0x0015d970, 0x0015dd70, 0x0015e170, 0x0015e570,
+ 0x0015e970, 0x0015ed70, 0x0015f170, 0x0015f570,
+ 0x0015f970, 0x0015fd70, 0x00160170, 0x00160570,
+ 0x00160970, 0x00160d70, 0x00161170, 0x00161570,
+ 0x00161970, 0x00161d70, 0x00162170, 0x00162570,
+ 0x00162970, 0x00162d70, 0x00163170, 0x00163570,
+ 0x00163970, 0x00163d70, 0x00164170, 0x00164570,
+ 0x00164970, 0x00164d70, 0x00165170, 0x00165570,
+ 0x00165970, 0x00165d70, 0x00166170, 0x00166570,
+ 0x00166970, 0x00166d70, 0x00167170, 0x00167570,
+ 0x00167970, 0x00167d70, 0x00168170, 0x00168570,
+ 0x00168970, 0x00168d70, 0x00169170, 0x00169570,
+ 0x00169970, 0x00169d70, 0x0016a170, 0x0016a570,
+ 0x0016a970, 0x0016ad70, 0x0016b170, 0x0016b570,
+ 0x0016b970, 0x0016bd70, 0x0016c170, 0x0016c570,
+ 0x0016c970, 0x0016cd70, 0x0016d170, 0x0016d570,
+ 0x0016d970, 0x0016dd70, 0x0016e170, 0x0016e570,
+ 0x0016e970, 0x0016ed70, 0x0016f170, 0x0016f570,
+ 0x0016f970, 0x0016fd70, 0x00170170, 0x00170570,
+ 0x00170970, 0x00170d70, 0x00171170, 0x00171570,
+ 0x00171970, 0x00171d70, 0x00172170, 0x00172570,
+ 0x00172970, 0x00172d70, 0x00173170, 0x00173570,
+ 0x00173970, 0x00173d70, 0x00174170, 0x00174570,
+ 0x00174970, 0x00174d70, 0x00175170, 0x00175570,
+ 0x00175970, 0x00175d70, 0x00176170, 0x00176570,
+ 0x00176970, 0x00176d70, 0x00177170, 0x00177570,
+ 0x00177970, 0x00177d70, 0x00178170, 0x00178570,
+ 0x00178970, 0x00178d70, 0x00179170, 0x00179570,
+ 0x00179970, 0x00179d70, 0x0017a170, 0x0017a570,
+ 0x0017a970, 0x0017ad70, 0x0017b170, 0x0017b570,
+ 0x0017b970, 0x0017bd70, 0x0017c170, 0x0017c570,
+ 0x0017c970, 0x0017cd70, 0x0017d170, 0x0017d570,
+ 0x0017d970, 0x0017dd70, 0x0017e170, 0x0017e570,
+ 0x0017e970, 0x0017ed70, 0x0017f170, 0x0017f570,
+ 0x0017f970, 0x0017fd70, 0x00180170, 0x00180570,
+ 0x00180970, 0x00180d70, 0x00181170, 0x00181570,
+ 0x00181970, 0x00181d70, 0x00182170, 0x00182570,
+ 0x00182970, 0x00182d70, 0x00183170, 0x00183570,
+ 0x00183970, 0x00183d70, 0x00184170, 0x00184570,
+ 0x00184970, 0x00184d70, 0x00185170, 0x00185570,
+ 0x00185970, 0x00185d70, 0x00186170, 0x00186570,
+ 0x00186970, 0x00186d70, 0x00187170, 0x00187570,
+ 0x00187970, 0x00187d70, 0x00188170, 0x00188570,
+ 0x00188970, 0x00188d70, 0x00189170, 0x00189570,
+ 0x00189970, 0x00189d70, 0x0018a170, 0x0018a570,
+ 0x0018a970, 0x0018ad70, 0x0018b170, 0x0018b570,
+ 0x0018b970, 0x0018bd70, 0x0018c170, 0x0018c570,
+ 0x0018c970, 0x0018cd70, 0x0018d170, 0x0018d570,
+ 0x0018d970, 0x0018dd70, 0x0018e170, 0x0018e570,
+ 0x0018e970, 0x0018ed70, 0x0018f170, 0x0018f570,
+ 0x0018f970, 0x0018fd70, 0x00190170, 0x00190570,
+ 0x00190970, 0x00190d70, 0x00191170, 0x00191570,
+ 0x00191970, 0x00191d70, 0x00192170, 0x00192570,
+ 0x00192970, 0x00192d70, 0x00193170, 0x00193570,
+ 0x00193970, 0x00193d70, 0x00194170, 0x00194570,
+ 0x00194970, 0x00194d70, 0x00195170, 0x00195570,
+ 0x00195970, 0x00195d70, 0x00196170, 0x00196570,
+ 0x00196970, 0x00196d70, 0x00197170, 0x00197570,
+ 0x00197970, 0x00197d70, 0x00198170, 0x00198570,
+ 0x00198970, 0x00198d70, 0x00199170, 0x00199570,
+ 0x00199970, 0x00199d70, 0x0019a170, 0x0019a570,
+ 0x0019a970, 0x0019ad70, 0x0019b170, 0x0019b570,
+ 0x0019b970, 0x0019bd70, 0x0019c170, 0x0019c570,
+ 0x0019c970, 0x0019cd70, 0x0019d170, 0x0019d570,
+ 0x0019d970, 0x0019dd70, 0x0019e170, 0x0019e570,
+ 0x0019e970, 0x0019ed70, 0x0019f170, 0x0019f570,
+ 0x0019f970, 0x0019fd70, 0x001a0170, 0x001a0570,
+ 0x001a0970, 0x001a0d70, 0x001a1170, 0x001a1570,
+ 0x001a1970, 0x001a1d70, 0x001a2170, 0x001a2570,
+ 0x001a2970, 0x001a2d70, 0x001a3170, 0x001a3570,
+ 0x001a3970, 0x001a3d70, 0x001a4170, 0x001a4570,
+ 0x001a4970, 0x001a4d70, 0x001a5170, 0x001a5570,
+ 0x001a5970, 0x001a5d70, 0x001a6170, 0x001a6570,
+ 0x001a6970, 0x001a6d70, 0x001a7170, 0x001a7570,
+ 0x001a7970, 0x001a7d70, 0x001a8170, 0x001a8570,
+ 0x001a8970, 0x001a8d70, 0x001a9170, 0x001a9570,
+ 0x001a9970, 0x001a9d70, 0x001aa170, 0x001aa570,
+ 0x001aa970, 0x001aad70, 0x001ab170, 0x001ab570,
+ 0x001ab970, 0x001abd70, 0x001ac170, 0x001ac570,
+ 0x001ac970, 0x001acd70, 0x001ad170, 0x001ad570,
+ 0x001ad970, 0x001add70, 0x001ae170, 0x001ae570,
+ 0x001ae970, 0x001aed70, 0x001af170, 0x001af570,
+ 0x001af970, 0x001afd70, 0x001b0170, 0x001b0570,
+ 0x001b0970, 0x001b0d70, 0x001b1170, 0x001b1570,
+ 0x001b1970, 0x001b1d70, 0x001b2170, 0x001b2570,
+ 0x001b2970, 0x001b2d70, 0x001b3170, 0x001b3570,
+ 0x001b3970, 0x001b3d70, 0x001b4170, 0x001b4570,
+ 0x001b4970, 0x001b4d70, 0x001b5170, 0x001b5570,
+ 0x001b5970, 0x001b5d70, 0x001b6170, 0x001b6570,
+ 0x001b6970, 0x001b6d70, 0x001b7170, 0x001b7570,
+ 0x001b7970, 0x001b7d70, 0x001b8170, 0x001b8570,
+ 0x001b8970, 0x001b8d70, 0x001b9170, 0x001b9570,
+ 0x001b9970, 0x001b9d70, 0x001ba170, 0x001ba570,
+ 0x001ba970, 0x001bad70, 0x001bb170, 0x001bb570,
+ 0x001bb970, 0x001bbd70, 0x001bc170, 0x001bc570,
+ 0x001bc970, 0x001bcd70, 0x001bd170, 0x001bd570,
+ 0x001bd970, 0x001bdd70, 0x001be170, 0x001be570,
+ 0x001be970, 0x001bed70, 0x001bf170, 0x001bf570,
+ 0x001bf970, 0x001bfd70, 0x001c0170, 0x001c0570,
+ 0x001c0970, 0x001c0d70, 0x001c1170, 0x001c1570,
+ 0x001c1970, 0x001c1d70, 0x001c2170, 0x001c2570,
+ 0x001c2970, 0x001c2d70, 0x001c3170, 0x001c3570,
+ 0x001c3970, 0x001c3d70, 0x001c4170, 0x001c4570,
+ 0x001c4970, 0x001c4d70, 0x001c5170, 0x001c5570,
+ 0x001c5970, 0x001c5d70, 0x001c6170, 0x001c6570,
+ 0x001c6970, 0x001c6d70, 0x001c7170, 0x001c7570,
+ 0x001c7970, 0x001c7d70, 0x001c8170, 0x001c8570,
+ 0x001c8970, 0x001c8d70, 0x001c9170, 0x001c9570,
+ 0x001c9970, 0x001c9d70, 0x001ca170, 0x001ca570,
+ 0x001ca970, 0x001cad70, 0x001cb170, 0x001cb570,
+ 0x001cb970, 0x001cbd70, 0x001cc170, 0x001cc570,
+ 0x001cc970, 0x001ccd70, 0x001cd170, 0x001cd570,
+ 0x001cd970, 0x001cdd70, 0x001ce170, 0x001ce570,
+ 0x001ce970, 0x001ced70, 0x001cf170, 0x001cf570,
+ 0x001cf970, 0x001cfd70, 0x001d0170, 0x001d0570,
+ 0x001d0970, 0x001d0d70, 0x001d1170, 0x001d1570,
+ 0x001d1970, 0x001d1d70, 0x001d2170, 0x001d2570,
+ 0x001d2970, 0x001d2d70, 0x001d3170, 0x001d3570,
+ 0x001d3970, 0x001d3d70, 0x001d4170, 0x001d4570,
+ 0x001d4970, 0x001d4d70, 0x001d5170, 0x001d5570,
+ 0x001d5970, 0x001d5d70, 0x001d6170, 0x001d6570,
+ 0x001d6970, 0x001d6d70, 0x001d7170, 0x001d7570,
+ 0x001d7970, 0x001d7d70, 0x001d8170, 0x001d8570,
+ 0x001d8970, 0x001d8d70, 0x001d9170, 0x001d9570,
+ 0x001d9970, 0x001d9d70, 0x001da170, 0x001da570,
+ 0x001da970, 0x001dad70, 0x001db170, 0x001db570,
+ 0x001db970, 0x001dbd70, 0x001dc170, 0x001dc570,
+ 0x001dc970, 0x001dcd70, 0x001dd170, 0x001dd570,
+ 0x001dd970, 0x001ddd70, 0x001de170, 0x001de570,
+ 0x001de970, 0x001ded70, 0x001df170, 0x001df570,
+ 0x001df970, 0x001dfd70, 0x001e0170, 0x001e0570,
+ 0x001e0970, 0x001e0d70, 0x001e1170, 0x001e1570,
+ 0x001e1970, 0x001e1d70, 0x001e2170, 0x001e2570,
+ 0x001e2970, 0x001e2d70, 0x001e3170, 0x001e3570,
+ 0x001e3970, 0x001e3d70, 0x001e4170, 0x001e4570,
+ 0x001e4970, 0x001e4d70, 0x001e5170, 0x001e5570,
+ 0x001e5970, 0x001e5d70, 0x001e6170, 0x001e6570,
+ 0x001e6970, 0x001e6d70, 0x001e7170, 0x001e7570,
+ 0x001e7970, 0x001e7d70, 0x001e8170, 0x001e8570,
+ 0x001e8970, 0x001e8d70, 0x001e9170, 0x001e9570,
+ 0x001e9970, 0x001e9d70, 0x001ea170, 0x001ea570,
+ 0x001ea970, 0x001ead70, 0x001eb170, 0x001eb570,
+ 0x001eb970, 0x001ebd70, 0x001ec170, 0x001ec570,
+ 0x001ec970, 0x001ecd70, 0x001ed170, 0x001ed570,
+ 0x001ed970, 0x001edd70, 0x001ee170, 0x001ee570,
+ 0x001ee970, 0x001eed70, 0x001ef170, 0x001ef570,
+ 0x001ef970, 0x001efd70, 0x001f0170, 0x001f0570,
+ 0x001f0970, 0x001f0d70, 0x001f1170, 0x001f1570,
+ 0x001f1970, 0x001f1d70, 0x001f2170, 0x001f2570,
+ 0x001f2970, 0x001f2d70, 0x001f3170, 0x001f3570,
+ 0x001f3970, 0x001f3d70, 0x001f4170, 0x001f4570,
+ 0x001f4970, 0x001f4d70, 0x001f5170, 0x001f5570,
+ 0x001f5970, 0x001f5d70, 0x001f6170, 0x001f6570,
+ 0x001f6970, 0x001f6d70, 0x001f7170, 0x001f7570,
+ 0x001f7970, 0x001f7d70, 0x001f8170, 0x001f8570,
+ 0x001f8970, 0x001f8d70, 0x001f9170, 0x001f9570,
+ 0x001f9970, 0x001f9d70, 0x001fa170, 0x001fa570,
+ 0x001fa970, 0x001fad70, 0x001fb170, 0x001fb570,
+ 0x001fb970, 0x001fbd70, 0x001fc170, 0x001fc570,
+ 0x001fc970, 0x001fcd70, 0x001fd170, 0x001fd570,
+ 0x001fd970, 0x001fdd70, 0x001fe170, 0x001fe570,
+ 0x001fe970, 0x001fed70, 0x001ff170, 0x001ff570,
+ 0x001ff970, 0x001ffd70
+#endif /* LONGER_HUFFTABLE */
+ },
+
+ .len_table = {
+ 0x000bffef, 0x00000002, 0x00000044, 0x00000144,
+ 0x000002c5, 0x00000526, 0x00000ea7, 0x000001a7,
+ 0x000001c6, 0x000005c6, 0x00001869, 0x00003869,
+ 0x00000469, 0x00002469, 0x00001469, 0x00003469,
+ 0x00000c6a, 0x00002c6a, 0x00004c6a, 0x00006c6a,
+ 0x000030eb, 0x000070eb, 0x0000b0eb, 0x0000f0eb,
+ 0x000041ec, 0x0000c1ec, 0x000141ec, 0x0001c1ec,
+ 0x000021ec, 0x0000a1ec, 0x000121ec, 0x0001a1ec,
+ 0x000061ed, 0x0000e1ed, 0x000161ed, 0x0001e1ed,
+ 0x000261ed, 0x0002e1ed, 0x000361ed, 0x0003e1ed,
+ 0x000011ed, 0x000091ed, 0x000111ed, 0x000191ed,
+ 0x000211ed, 0x000291ed, 0x000311ed, 0x000391ed,
+ 0x000051ed, 0x0000d1ed, 0x000151ed, 0x0001d1ed,
+ 0x000251ed, 0x0002d1ed, 0x000351ed, 0x0003d1ed,
+ 0x000031ed, 0x0000b1ed, 0x000131ed, 0x0001b1ed,
+ 0x000231ed, 0x0002b1ed, 0x000331ed, 0x0003b1ed,
+ 0x00003fef, 0x00013fef, 0x00023fef, 0x00033fef,
+ 0x00043fef, 0x00053fef, 0x00063fef, 0x00073fef,
+ 0x00083fef, 0x00093fef, 0x000a3fef, 0x000b3fef,
+ 0x000c3fef, 0x000d3fef, 0x000e3fef, 0x000f3fef,
+ 0x00007ff0, 0x00027ff0, 0x00047ff0, 0x00067ff0,
+ 0x00087ff0, 0x000a7ff0, 0x000c7ff0, 0x000e7ff0,
+ 0x00107ff0, 0x00127ff0, 0x00147ff0, 0x00167ff0,
+ 0x00187ff0, 0x001a7ff0, 0x001c7ff0, 0x001e7ff0,
+ 0x0000fff1, 0x0004fff1, 0x0008fff1, 0x000cfff1,
+ 0x0010fff1, 0x0014fff1, 0x0018fff1, 0x001cfff1,
+ 0x0020fff1, 0x0024fff1, 0x0028fff1, 0x002cfff1,
+ 0x0030fff1, 0x0034fff1, 0x0038fff1, 0x003cfff1,
+ 0x0002fff1, 0x0006fff1, 0x000afff1, 0x000efff1,
+ 0x0012fff1, 0x0016fff1, 0x001afff1, 0x001efff1,
+ 0x0022fff1, 0x0026fff1, 0x002afff1, 0x002efff1,
+ 0x0032fff1, 0x0036fff1, 0x003afff1, 0x003efff1,
+ 0x00017ff1, 0x00037ff1, 0x00057ff1, 0x00077ff1,
+ 0x00097ff1, 0x000b7ff1, 0x000d7ff1, 0x000f7ff1,
+ 0x00117ff1, 0x00137ff1, 0x00157ff1, 0x00177ff1,
+ 0x00197ff1, 0x001b7ff1, 0x001d7ff1, 0x001f7ff1,
+ 0x00217ff1, 0x00237ff1, 0x00257ff1, 0x00277ff1,
+ 0x00297ff1, 0x002b7ff1, 0x002d7ff1, 0x002f7ff1,
+ 0x00317ff1, 0x00337ff1, 0x00357ff1, 0x00377ff1,
+ 0x00397ff1, 0x003b7ff1, 0x003d7ff1, 0x003f7ff1,
+ 0x0001fff2, 0x0005fff2, 0x0009fff2, 0x000dfff2,
+ 0x0011fff2, 0x0015fff2, 0x0019fff2, 0x001dfff2,
+ 0x0021fff2, 0x0025fff2, 0x0029fff2, 0x002dfff2,
+ 0x0031fff2, 0x0035fff2, 0x0039fff2, 0x003dfff2,
+ 0x0041fff2, 0x0045fff2, 0x0049fff2, 0x004dfff2,
+ 0x0051fff2, 0x0055fff2, 0x0059fff2, 0x005dfff2,
+ 0x0061fff2, 0x0065fff2, 0x0069fff2, 0x006dfff2,
+ 0x0071fff2, 0x0075fff2, 0x0079fff2, 0x007dfff2,
+ 0x0007fff4, 0x0017fff4, 0x0027fff4, 0x0037fff4,
+ 0x0047fff4, 0x0057fff4, 0x0067fff4, 0x0077fff4,
+ 0x0087fff4, 0x0097fff4, 0x00a7fff4, 0x00b7fff4,
+ 0x00c7fff4, 0x00d7fff4, 0x00e7fff4, 0x00f7fff4,
+ 0x0107fff4, 0x0117fff4, 0x0127fff4, 0x0137fff4,
+ 0x0147fff4, 0x0157fff4, 0x0167fff4, 0x0177fff4,
+ 0x0187fff4, 0x0197fff4, 0x01a7fff4, 0x01b7fff4,
+ 0x01c7fff4, 0x01d7fff4, 0x01e7fff4, 0x01f7fff4,
+ 0x000ffff4, 0x001ffff4, 0x002ffff4, 0x003ffff4,
+ 0x004ffff4, 0x005ffff4, 0x006ffff4, 0x007ffff4,
+ 0x008ffff4, 0x009ffff4, 0x00affff4, 0x00bffff4,
+ 0x00cffff4, 0x00dffff4, 0x00effff4, 0x00fffff4,
+ 0x010ffff4, 0x011ffff4, 0x012ffff4, 0x013ffff4,
+ 0x014ffff4, 0x015ffff4, 0x016ffff4, 0x017ffff4,
+ 0x018ffff4, 0x019ffff4, 0x01affff4, 0x01bffff4,
+ 0x01cffff4, 0x01dffff4, 0x01effff4, 0x0000bfeb},
+
+ .lit_table = {
+ 0x001e, 0x004d, 0x00e3, 0x00cd, 0x002d, 0x01e3, 0x0013, 0x0113,
+ 0x0093, 0x0193, 0x0019, 0x0053, 0x0153, 0x00ad, 0x00d3, 0x01d3,
+ 0x0033, 0x0047, 0x0247, 0x0147, 0x0347, 0x038f, 0x078f, 0x004f,
+ 0x00c7, 0x044f, 0x024f, 0x064f, 0x02c7, 0x014f, 0x01c7, 0x0133,
+ 0x0006, 0x03c7, 0x00b3, 0x0027, 0x0227, 0x0127, 0x0327, 0x01b3,
+ 0x0073, 0x0173, 0x00a7, 0x02a7, 0x0059, 0x006d, 0x00ed, 0x01a7,
+ 0x001d, 0x009d, 0x005d, 0x00f3, 0x01f3, 0x000b, 0x010b, 0x008b,
+ 0x018b, 0x004b, 0x014b, 0x00cb, 0x03a7, 0x0067, 0x01cb, 0x002b,
+ 0x012b, 0x00dd, 0x003d, 0x00ab, 0x01ab, 0x006b, 0x016b, 0x00eb,
+ 0x01eb, 0x001b, 0x0267, 0x0167, 0x011b, 0x009b, 0x019b, 0x005b,
+ 0x015b, 0x0367, 0x00db, 0x01db, 0x003b, 0x00e7, 0x02e7, 0x01e7,
+ 0x03e7, 0x0017, 0x054f, 0x0217, 0x0117, 0x034f, 0x074f, 0x0317,
+ 0x0097, 0x003e, 0x00bd, 0x0039, 0x0079, 0x0001, 0x007d, 0x00fd,
+ 0x0005, 0x0021, 0x0297, 0x013b, 0x0045, 0x0025, 0x0065, 0x0011,
+ 0x0015, 0x0197, 0x0031, 0x0009, 0x0055, 0x0035, 0x00bb, 0x0003,
+ 0x01bb, 0x0083, 0x0397, 0x00cf, 0x0057, 0x04cf, 0x0257, 0x0157,
+ 0x007b, 0x02cf, 0x06cf, 0x01cf, 0x05cf, 0x03cf, 0x07cf, 0x002f,
+ 0x042f, 0x022f, 0x062f, 0x0357, 0x012f, 0x052f, 0x032f, 0x00d7,
+ 0x02d7, 0x072f, 0x00af, 0x04af, 0x02af, 0x06af, 0x01af, 0x05af,
+ 0x03af, 0x07af, 0x006f, 0x046f, 0x026f, 0x066f, 0x016f, 0x056f,
+ 0x01d7, 0x036f, 0x076f, 0x00ef, 0x03d7, 0x04ef, 0x0037, 0x02ef,
+ 0x06ef, 0x01ef, 0x05ef, 0x03ef, 0x07ef, 0x001f, 0x041f, 0x021f,
+ 0x0237, 0x061f, 0x011f, 0x051f, 0x0137, 0x031f, 0x071f, 0x009f,
+ 0x049f, 0x029f, 0x069f, 0x019f, 0x059f, 0x0337, 0x039f, 0x079f,
+ 0x017b, 0x00b7, 0x00fb, 0x01fb, 0x005f, 0x045f, 0x025f, 0x02b7,
+ 0x065f, 0x015f, 0x055f, 0x035f, 0x075f, 0x00df, 0x04df, 0x01b7,
+ 0x03b7, 0x02df, 0x06df, 0x01df, 0x05df, 0x03df, 0x07df, 0x003f,
+ 0x043f, 0x023f, 0x063f, 0x013f, 0x053f, 0x033f, 0x073f, 0x00bf,
+ 0x0007, 0x04bf, 0x02bf, 0x0077, 0x06bf, 0x01bf, 0x05bf, 0x0277,
+ 0x0177, 0x03bf, 0x07bf, 0x007f, 0x047f, 0x027f, 0x067f, 0x017f,
+ 0x0107, 0x0377, 0x057f, 0x00f7, 0x037f, 0x077f, 0x00ff, 0x04ff,
+ 0x02f7, 0x01f7, 0x02ff, 0x06ff, 0x03f7, 0x000f, 0x0087, 0x0043,
+ 0x1fff},
+
+ .lit_table_sizes = {
+ 0x06, 0x08, 0x09, 0x08, 0x08, 0x09, 0x09, 0x09,
+ 0x09, 0x09, 0x07, 0x09, 0x09, 0x08, 0x09, 0x09,
+ 0x09, 0x0a, 0x0a, 0x0a, 0x0a, 0x0b, 0x0b, 0x0b,
+ 0x0a, 0x0b, 0x0b, 0x0b, 0x0a, 0x0b, 0x0a, 0x09,
+ 0x05, 0x0a, 0x09, 0x0a, 0x0a, 0x0a, 0x0a, 0x09,
+ 0x09, 0x09, 0x0a, 0x0a, 0x07, 0x08, 0x08, 0x0a,
+ 0x08, 0x08, 0x08, 0x09, 0x09, 0x09, 0x09, 0x09,
+ 0x09, 0x09, 0x09, 0x09, 0x0a, 0x0a, 0x09, 0x09,
+ 0x09, 0x08, 0x08, 0x09, 0x09, 0x09, 0x09, 0x09,
+ 0x09, 0x09, 0x0a, 0x0a, 0x09, 0x09, 0x09, 0x09,
+ 0x09, 0x0a, 0x09, 0x09, 0x09, 0x0a, 0x0a, 0x0a,
+ 0x0a, 0x0a, 0x0b, 0x0a, 0x0a, 0x0b, 0x0b, 0x0a,
+ 0x0a, 0x06, 0x08, 0x07, 0x07, 0x06, 0x08, 0x08,
+ 0x07, 0x06, 0x0a, 0x09, 0x07, 0x07, 0x07, 0x06,
+ 0x07, 0x0a, 0x06, 0x06, 0x07, 0x07, 0x09, 0x08,
+ 0x09, 0x08, 0x0a, 0x0b, 0x0a, 0x0b, 0x0a, 0x0a,
+ 0x09, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b,
+ 0x0b, 0x0b, 0x0b, 0x0a, 0x0b, 0x0b, 0x0b, 0x0a,
+ 0x0a, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b,
+ 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b,
+ 0x0a, 0x0b, 0x0b, 0x0b, 0x0a, 0x0b, 0x0a, 0x0b,
+ 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b,
+ 0x0a, 0x0b, 0x0b, 0x0b, 0x0a, 0x0b, 0x0b, 0x0b,
+ 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0a, 0x0b, 0x0b,
+ 0x09, 0x0a, 0x09, 0x09, 0x0b, 0x0b, 0x0b, 0x0a,
+ 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0a,
+ 0x0a, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b,
+ 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b,
+ 0x09, 0x0b, 0x0b, 0x0a, 0x0b, 0x0b, 0x0b, 0x0a,
+ 0x0a, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b,
+ 0x09, 0x0a, 0x0b, 0x0a, 0x0b, 0x0b, 0x0b, 0x0b,
+ 0x0a, 0x0a, 0x0b, 0x0b, 0x0a, 0x0a, 0x09, 0x08,
+ 0x0f},
+
+#ifndef LONGER_HUFFTABLE
+ .dcodes = {
+ 0x003f, 0x00ff, 0x00bf, 0x01ff, 0x007f, 0x001f, 0x005f, 0x0017,
+ 0x0037, 0x000f, 0x0009, 0x0019, 0x0005, 0x0015, 0x0004, 0x000c,
+ 0x0002, 0x000d, 0x000a, 0x001d, 0x0006, 0x0003, 0x0000, 0x0013,
+ 0x000e, 0x000b, 0x0001, 0x001b, 0x0007, 0x002f},
+
+ .dcodes_sizes = {
+ 0x08, 0x09, 0x08, 0x09, 0x08, 0x07, 0x07, 0x06,
+ 0x06, 0x06, 0x05, 0x05, 0x05, 0x05, 0x04, 0x04,
+ 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x03, 0x05,
+ 0x04, 0x05, 0x04, 0x05, 0x05, 0x06}
+#else
+ .dcodes = {
+ 0x0001, 0x001b, 0x0007, 0x002f},
+
+ .dcodes_sizes = {
+ 0x04, 0x05, 0x05, 0x06}
+#endif
+};
+#endif // LARGE_WINDOW
+
+struct isal_hufftables hufftables_static = {
+
+ .deflate_hdr = {0x03},
+ .deflate_hdr_count = 0,
+ .deflate_hdr_extra_bits = 3,
+
+ .dist_table = {
+ 0x00000005, 0x00000205,
+#ifdef LONGER_HUFFTABLE
+ 0x00000105, 0x00000305, 0x00000086, 0x00000486,
+ 0x00000286, 0x00000686, 0x00000187, 0x00000587,
+ 0x00000987, 0x00000d87, 0x00000387, 0x00000787,
+ 0x00000b87, 0x00000f87, 0x00000048, 0x00000448,
+ 0x00000848, 0x00000c48, 0x00001048, 0x00001448,
+ 0x00001848, 0x00001c48, 0x00000248, 0x00000648,
+ 0x00000a48, 0x00000e48, 0x00001248, 0x00001648,
+ 0x00001a48, 0x00001e48, 0x00000149, 0x00000549,
+ 0x00000949, 0x00000d49, 0x00001149, 0x00001549,
+ 0x00001949, 0x00001d49, 0x00002149, 0x00002549,
+ 0x00002949, 0x00002d49, 0x00003149, 0x00003549,
+ 0x00003949, 0x00003d49, 0x00000349, 0x00000749,
+ 0x00000b49, 0x00000f49, 0x00001349, 0x00001749,
+ 0x00001b49, 0x00001f49, 0x00002349, 0x00002749,
+ 0x00002b49, 0x00002f49, 0x00003349, 0x00003749,
+ 0x00003b49, 0x00003f49, 0x000000ca, 0x000004ca,
+ 0x000008ca, 0x00000cca, 0x000010ca, 0x000014ca,
+ 0x000018ca, 0x00001cca, 0x000020ca, 0x000024ca,
+ 0x000028ca, 0x00002cca, 0x000030ca, 0x000034ca,
+ 0x000038ca, 0x00003cca, 0x000040ca, 0x000044ca,
+ 0x000048ca, 0x00004cca, 0x000050ca, 0x000054ca,
+ 0x000058ca, 0x00005cca, 0x000060ca, 0x000064ca,
+ 0x000068ca, 0x00006cca, 0x000070ca, 0x000074ca,
+ 0x000078ca, 0x00007cca, 0x000002ca, 0x000006ca,
+ 0x00000aca, 0x00000eca, 0x000012ca, 0x000016ca,
+ 0x00001aca, 0x00001eca, 0x000022ca, 0x000026ca,
+ 0x00002aca, 0x00002eca, 0x000032ca, 0x000036ca,
+ 0x00003aca, 0x00003eca, 0x000042ca, 0x000046ca,
+ 0x00004aca, 0x00004eca, 0x000052ca, 0x000056ca,
+ 0x00005aca, 0x00005eca, 0x000062ca, 0x000066ca,
+ 0x00006aca, 0x00006eca, 0x000072ca, 0x000076ca,
+ 0x00007aca, 0x00007eca, 0x000001cb, 0x000005cb,
+ 0x000009cb, 0x00000dcb, 0x000011cb, 0x000015cb,
+ 0x000019cb, 0x00001dcb, 0x000021cb, 0x000025cb,
+ 0x000029cb, 0x00002dcb, 0x000031cb, 0x000035cb,
+ 0x000039cb, 0x00003dcb, 0x000041cb, 0x000045cb,
+ 0x000049cb, 0x00004dcb, 0x000051cb, 0x000055cb,
+ 0x000059cb, 0x00005dcb, 0x000061cb, 0x000065cb,
+ 0x000069cb, 0x00006dcb, 0x000071cb, 0x000075cb,
+ 0x000079cb, 0x00007dcb, 0x000081cb, 0x000085cb,
+ 0x000089cb, 0x00008dcb, 0x000091cb, 0x000095cb,
+ 0x000099cb, 0x00009dcb, 0x0000a1cb, 0x0000a5cb,
+ 0x0000a9cb, 0x0000adcb, 0x0000b1cb, 0x0000b5cb,
+ 0x0000b9cb, 0x0000bdcb, 0x0000c1cb, 0x0000c5cb,
+ 0x0000c9cb, 0x0000cdcb, 0x0000d1cb, 0x0000d5cb,
+ 0x0000d9cb, 0x0000ddcb, 0x0000e1cb, 0x0000e5cb,
+ 0x0000e9cb, 0x0000edcb, 0x0000f1cb, 0x0000f5cb,
+ 0x0000f9cb, 0x0000fdcb, 0x000003cb, 0x000007cb,
+ 0x00000bcb, 0x00000fcb, 0x000013cb, 0x000017cb,
+ 0x00001bcb, 0x00001fcb, 0x000023cb, 0x000027cb,
+ 0x00002bcb, 0x00002fcb, 0x000033cb, 0x000037cb,
+ 0x00003bcb, 0x00003fcb, 0x000043cb, 0x000047cb,
+ 0x00004bcb, 0x00004fcb, 0x000053cb, 0x000057cb,
+ 0x00005bcb, 0x00005fcb, 0x000063cb, 0x000067cb,
+ 0x00006bcb, 0x00006fcb, 0x000073cb, 0x000077cb,
+ 0x00007bcb, 0x00007fcb, 0x000083cb, 0x000087cb,
+ 0x00008bcb, 0x00008fcb, 0x000093cb, 0x000097cb,
+ 0x00009bcb, 0x00009fcb, 0x0000a3cb, 0x0000a7cb,
+ 0x0000abcb, 0x0000afcb, 0x0000b3cb, 0x0000b7cb,
+ 0x0000bbcb, 0x0000bfcb, 0x0000c3cb, 0x0000c7cb,
+ 0x0000cbcb, 0x0000cfcb, 0x0000d3cb, 0x0000d7cb,
+ 0x0000dbcb, 0x0000dfcb, 0x0000e3cb, 0x0000e7cb,
+ 0x0000ebcb, 0x0000efcb, 0x0000f3cb, 0x0000f7cb,
+ 0x0000fbcb, 0x0000ffcb, 0x0000002c, 0x0000042c,
+ 0x0000082c, 0x00000c2c, 0x0000102c, 0x0000142c,
+ 0x0000182c, 0x00001c2c, 0x0000202c, 0x0000242c,
+ 0x0000282c, 0x00002c2c, 0x0000302c, 0x0000342c,
+ 0x0000382c, 0x00003c2c, 0x0000402c, 0x0000442c,
+ 0x0000482c, 0x00004c2c, 0x0000502c, 0x0000542c,
+ 0x0000582c, 0x00005c2c, 0x0000602c, 0x0000642c,
+ 0x0000682c, 0x00006c2c, 0x0000702c, 0x0000742c,
+ 0x0000782c, 0x00007c2c, 0x0000802c, 0x0000842c,
+ 0x0000882c, 0x00008c2c, 0x0000902c, 0x0000942c,
+ 0x0000982c, 0x00009c2c, 0x0000a02c, 0x0000a42c,
+ 0x0000a82c, 0x0000ac2c, 0x0000b02c, 0x0000b42c,
+ 0x0000b82c, 0x0000bc2c, 0x0000c02c, 0x0000c42c,
+ 0x0000c82c, 0x0000cc2c, 0x0000d02c, 0x0000d42c,
+ 0x0000d82c, 0x0000dc2c, 0x0000e02c, 0x0000e42c,
+ 0x0000e82c, 0x0000ec2c, 0x0000f02c, 0x0000f42c,
+ 0x0000f82c, 0x0000fc2c, 0x0001002c, 0x0001042c,
+ 0x0001082c, 0x00010c2c, 0x0001102c, 0x0001142c,
+ 0x0001182c, 0x00011c2c, 0x0001202c, 0x0001242c,
+ 0x0001282c, 0x00012c2c, 0x0001302c, 0x0001342c,
+ 0x0001382c, 0x00013c2c, 0x0001402c, 0x0001442c,
+ 0x0001482c, 0x00014c2c, 0x0001502c, 0x0001542c,
+ 0x0001582c, 0x00015c2c, 0x0001602c, 0x0001642c,
+ 0x0001682c, 0x00016c2c, 0x0001702c, 0x0001742c,
+ 0x0001782c, 0x00017c2c, 0x0001802c, 0x0001842c,
+ 0x0001882c, 0x00018c2c, 0x0001902c, 0x0001942c,
+ 0x0001982c, 0x00019c2c, 0x0001a02c, 0x0001a42c,
+ 0x0001a82c, 0x0001ac2c, 0x0001b02c, 0x0001b42c,
+ 0x0001b82c, 0x0001bc2c, 0x0001c02c, 0x0001c42c,
+ 0x0001c82c, 0x0001cc2c, 0x0001d02c, 0x0001d42c,
+ 0x0001d82c, 0x0001dc2c, 0x0001e02c, 0x0001e42c,
+ 0x0001e82c, 0x0001ec2c, 0x0001f02c, 0x0001f42c,
+ 0x0001f82c, 0x0001fc2c, 0x0000022c, 0x0000062c,
+ 0x00000a2c, 0x00000e2c, 0x0000122c, 0x0000162c,
+ 0x00001a2c, 0x00001e2c, 0x0000222c, 0x0000262c,
+ 0x00002a2c, 0x00002e2c, 0x0000322c, 0x0000362c,
+ 0x00003a2c, 0x00003e2c, 0x0000422c, 0x0000462c,
+ 0x00004a2c, 0x00004e2c, 0x0000522c, 0x0000562c,
+ 0x00005a2c, 0x00005e2c, 0x0000622c, 0x0000662c,
+ 0x00006a2c, 0x00006e2c, 0x0000722c, 0x0000762c,
+ 0x00007a2c, 0x00007e2c, 0x0000822c, 0x0000862c,
+ 0x00008a2c, 0x00008e2c, 0x0000922c, 0x0000962c,
+ 0x00009a2c, 0x00009e2c, 0x0000a22c, 0x0000a62c,
+ 0x0000aa2c, 0x0000ae2c, 0x0000b22c, 0x0000b62c,
+ 0x0000ba2c, 0x0000be2c, 0x0000c22c, 0x0000c62c,
+ 0x0000ca2c, 0x0000ce2c, 0x0000d22c, 0x0000d62c,
+ 0x0000da2c, 0x0000de2c, 0x0000e22c, 0x0000e62c,
+ 0x0000ea2c, 0x0000ee2c, 0x0000f22c, 0x0000f62c,
+ 0x0000fa2c, 0x0000fe2c, 0x0001022c, 0x0001062c,
+ 0x00010a2c, 0x00010e2c, 0x0001122c, 0x0001162c,
+ 0x00011a2c, 0x00011e2c, 0x0001222c, 0x0001262c,
+ 0x00012a2c, 0x00012e2c, 0x0001322c, 0x0001362c,
+ 0x00013a2c, 0x00013e2c, 0x0001422c, 0x0001462c,
+ 0x00014a2c, 0x00014e2c, 0x0001522c, 0x0001562c,
+ 0x00015a2c, 0x00015e2c, 0x0001622c, 0x0001662c,
+ 0x00016a2c, 0x00016e2c, 0x0001722c, 0x0001762c,
+ 0x00017a2c, 0x00017e2c, 0x0001822c, 0x0001862c,
+ 0x00018a2c, 0x00018e2c, 0x0001922c, 0x0001962c,
+ 0x00019a2c, 0x00019e2c, 0x0001a22c, 0x0001a62c,
+ 0x0001aa2c, 0x0001ae2c, 0x0001b22c, 0x0001b62c,
+ 0x0001ba2c, 0x0001be2c, 0x0001c22c, 0x0001c62c,
+ 0x0001ca2c, 0x0001ce2c, 0x0001d22c, 0x0001d62c,
+ 0x0001da2c, 0x0001de2c, 0x0001e22c, 0x0001e62c,
+ 0x0001ea2c, 0x0001ee2c, 0x0001f22c, 0x0001f62c,
+ 0x0001fa2c, 0x0001fe2c, 0x0000012d, 0x0000052d,
+ 0x0000092d, 0x00000d2d, 0x0000112d, 0x0000152d,
+ 0x0000192d, 0x00001d2d, 0x0000212d, 0x0000252d,
+ 0x0000292d, 0x00002d2d, 0x0000312d, 0x0000352d,
+ 0x0000392d, 0x00003d2d, 0x0000412d, 0x0000452d,
+ 0x0000492d, 0x00004d2d, 0x0000512d, 0x0000552d,
+ 0x0000592d, 0x00005d2d, 0x0000612d, 0x0000652d,
+ 0x0000692d, 0x00006d2d, 0x0000712d, 0x0000752d,
+ 0x0000792d, 0x00007d2d, 0x0000812d, 0x0000852d,
+ 0x0000892d, 0x00008d2d, 0x0000912d, 0x0000952d,
+ 0x0000992d, 0x00009d2d, 0x0000a12d, 0x0000a52d,
+ 0x0000a92d, 0x0000ad2d, 0x0000b12d, 0x0000b52d,
+ 0x0000b92d, 0x0000bd2d, 0x0000c12d, 0x0000c52d,
+ 0x0000c92d, 0x0000cd2d, 0x0000d12d, 0x0000d52d,
+ 0x0000d92d, 0x0000dd2d, 0x0000e12d, 0x0000e52d,
+ 0x0000e92d, 0x0000ed2d, 0x0000f12d, 0x0000f52d,
+ 0x0000f92d, 0x0000fd2d, 0x0001012d, 0x0001052d,
+ 0x0001092d, 0x00010d2d, 0x0001112d, 0x0001152d,
+ 0x0001192d, 0x00011d2d, 0x0001212d, 0x0001252d,
+ 0x0001292d, 0x00012d2d, 0x0001312d, 0x0001352d,
+ 0x0001392d, 0x00013d2d, 0x0001412d, 0x0001452d,
+ 0x0001492d, 0x00014d2d, 0x0001512d, 0x0001552d,
+ 0x0001592d, 0x00015d2d, 0x0001612d, 0x0001652d,
+ 0x0001692d, 0x00016d2d, 0x0001712d, 0x0001752d,
+ 0x0001792d, 0x00017d2d, 0x0001812d, 0x0001852d,
+ 0x0001892d, 0x00018d2d, 0x0001912d, 0x0001952d,
+ 0x0001992d, 0x00019d2d, 0x0001a12d, 0x0001a52d,
+ 0x0001a92d, 0x0001ad2d, 0x0001b12d, 0x0001b52d,
+ 0x0001b92d, 0x0001bd2d, 0x0001c12d, 0x0001c52d,
+ 0x0001c92d, 0x0001cd2d, 0x0001d12d, 0x0001d52d,
+ 0x0001d92d, 0x0001dd2d, 0x0001e12d, 0x0001e52d,
+ 0x0001e92d, 0x0001ed2d, 0x0001f12d, 0x0001f52d,
+ 0x0001f92d, 0x0001fd2d, 0x0002012d, 0x0002052d,
+ 0x0002092d, 0x00020d2d, 0x0002112d, 0x0002152d,
+ 0x0002192d, 0x00021d2d, 0x0002212d, 0x0002252d,
+ 0x0002292d, 0x00022d2d, 0x0002312d, 0x0002352d,
+ 0x0002392d, 0x00023d2d, 0x0002412d, 0x0002452d,
+ 0x0002492d, 0x00024d2d, 0x0002512d, 0x0002552d,
+ 0x0002592d, 0x00025d2d, 0x0002612d, 0x0002652d,
+ 0x0002692d, 0x00026d2d, 0x0002712d, 0x0002752d,
+ 0x0002792d, 0x00027d2d, 0x0002812d, 0x0002852d,
+ 0x0002892d, 0x00028d2d, 0x0002912d, 0x0002952d,
+ 0x0002992d, 0x00029d2d, 0x0002a12d, 0x0002a52d,
+ 0x0002a92d, 0x0002ad2d, 0x0002b12d, 0x0002b52d,
+ 0x0002b92d, 0x0002bd2d, 0x0002c12d, 0x0002c52d,
+ 0x0002c92d, 0x0002cd2d, 0x0002d12d, 0x0002d52d,
+ 0x0002d92d, 0x0002dd2d, 0x0002e12d, 0x0002e52d,
+ 0x0002e92d, 0x0002ed2d, 0x0002f12d, 0x0002f52d,
+ 0x0002f92d, 0x0002fd2d, 0x0003012d, 0x0003052d,
+ 0x0003092d, 0x00030d2d, 0x0003112d, 0x0003152d,
+ 0x0003192d, 0x00031d2d, 0x0003212d, 0x0003252d,
+ 0x0003292d, 0x00032d2d, 0x0003312d, 0x0003352d,
+ 0x0003392d, 0x00033d2d, 0x0003412d, 0x0003452d,
+ 0x0003492d, 0x00034d2d, 0x0003512d, 0x0003552d,
+ 0x0003592d, 0x00035d2d, 0x0003612d, 0x0003652d,
+ 0x0003692d, 0x00036d2d, 0x0003712d, 0x0003752d,
+ 0x0003792d, 0x00037d2d, 0x0003812d, 0x0003852d,
+ 0x0003892d, 0x00038d2d, 0x0003912d, 0x0003952d,
+ 0x0003992d, 0x00039d2d, 0x0003a12d, 0x0003a52d,
+ 0x0003a92d, 0x0003ad2d, 0x0003b12d, 0x0003b52d,
+ 0x0003b92d, 0x0003bd2d, 0x0003c12d, 0x0003c52d,
+ 0x0003c92d, 0x0003cd2d, 0x0003d12d, 0x0003d52d,
+ 0x0003d92d, 0x0003dd2d, 0x0003e12d, 0x0003e52d,
+ 0x0003e92d, 0x0003ed2d, 0x0003f12d, 0x0003f52d,
+ 0x0003f92d, 0x0003fd2d, 0x0000032d, 0x0000072d,
+ 0x00000b2d, 0x00000f2d, 0x0000132d, 0x0000172d,
+ 0x00001b2d, 0x00001f2d, 0x0000232d, 0x0000272d,
+ 0x00002b2d, 0x00002f2d, 0x0000332d, 0x0000372d,
+ 0x00003b2d, 0x00003f2d, 0x0000432d, 0x0000472d,
+ 0x00004b2d, 0x00004f2d, 0x0000532d, 0x0000572d,
+ 0x00005b2d, 0x00005f2d, 0x0000632d, 0x0000672d,
+ 0x00006b2d, 0x00006f2d, 0x0000732d, 0x0000772d,
+ 0x00007b2d, 0x00007f2d, 0x0000832d, 0x0000872d,
+ 0x00008b2d, 0x00008f2d, 0x0000932d, 0x0000972d,
+ 0x00009b2d, 0x00009f2d, 0x0000a32d, 0x0000a72d,
+ 0x0000ab2d, 0x0000af2d, 0x0000b32d, 0x0000b72d,
+ 0x0000bb2d, 0x0000bf2d, 0x0000c32d, 0x0000c72d,
+ 0x0000cb2d, 0x0000cf2d, 0x0000d32d, 0x0000d72d,
+ 0x0000db2d, 0x0000df2d, 0x0000e32d, 0x0000e72d,
+ 0x0000eb2d, 0x0000ef2d, 0x0000f32d, 0x0000f72d,
+ 0x0000fb2d, 0x0000ff2d, 0x0001032d, 0x0001072d,
+ 0x00010b2d, 0x00010f2d, 0x0001132d, 0x0001172d,
+ 0x00011b2d, 0x00011f2d, 0x0001232d, 0x0001272d,
+ 0x00012b2d, 0x00012f2d, 0x0001332d, 0x0001372d,
+ 0x00013b2d, 0x00013f2d, 0x0001432d, 0x0001472d,
+ 0x00014b2d, 0x00014f2d, 0x0001532d, 0x0001572d,
+ 0x00015b2d, 0x00015f2d, 0x0001632d, 0x0001672d,
+ 0x00016b2d, 0x00016f2d, 0x0001732d, 0x0001772d,
+ 0x00017b2d, 0x00017f2d, 0x0001832d, 0x0001872d,
+ 0x00018b2d, 0x00018f2d, 0x0001932d, 0x0001972d,
+ 0x00019b2d, 0x00019f2d, 0x0001a32d, 0x0001a72d,
+ 0x0001ab2d, 0x0001af2d, 0x0001b32d, 0x0001b72d,
+ 0x0001bb2d, 0x0001bf2d, 0x0001c32d, 0x0001c72d,
+ 0x0001cb2d, 0x0001cf2d, 0x0001d32d, 0x0001d72d,
+ 0x0001db2d, 0x0001df2d, 0x0001e32d, 0x0001e72d,
+ 0x0001eb2d, 0x0001ef2d, 0x0001f32d, 0x0001f72d,
+ 0x0001fb2d, 0x0001ff2d, 0x0002032d, 0x0002072d,
+ 0x00020b2d, 0x00020f2d, 0x0002132d, 0x0002172d,
+ 0x00021b2d, 0x00021f2d, 0x0002232d, 0x0002272d,
+ 0x00022b2d, 0x00022f2d, 0x0002332d, 0x0002372d,
+ 0x00023b2d, 0x00023f2d, 0x0002432d, 0x0002472d,
+ 0x00024b2d, 0x00024f2d, 0x0002532d, 0x0002572d,
+ 0x00025b2d, 0x00025f2d, 0x0002632d, 0x0002672d,
+ 0x00026b2d, 0x00026f2d, 0x0002732d, 0x0002772d,
+ 0x00027b2d, 0x00027f2d, 0x0002832d, 0x0002872d,
+ 0x00028b2d, 0x00028f2d, 0x0002932d, 0x0002972d,
+ 0x00029b2d, 0x00029f2d, 0x0002a32d, 0x0002a72d,
+ 0x0002ab2d, 0x0002af2d, 0x0002b32d, 0x0002b72d,
+ 0x0002bb2d, 0x0002bf2d, 0x0002c32d, 0x0002c72d,
+ 0x0002cb2d, 0x0002cf2d, 0x0002d32d, 0x0002d72d,
+ 0x0002db2d, 0x0002df2d, 0x0002e32d, 0x0002e72d,
+ 0x0002eb2d, 0x0002ef2d, 0x0002f32d, 0x0002f72d,
+ 0x0002fb2d, 0x0002ff2d, 0x0003032d, 0x0003072d,
+ 0x00030b2d, 0x00030f2d, 0x0003132d, 0x0003172d,
+ 0x00031b2d, 0x00031f2d, 0x0003232d, 0x0003272d,
+ 0x00032b2d, 0x00032f2d, 0x0003332d, 0x0003372d,
+ 0x00033b2d, 0x00033f2d, 0x0003432d, 0x0003472d,
+ 0x00034b2d, 0x00034f2d, 0x0003532d, 0x0003572d,
+ 0x00035b2d, 0x00035f2d, 0x0003632d, 0x0003672d,
+ 0x00036b2d, 0x00036f2d, 0x0003732d, 0x0003772d,
+ 0x00037b2d, 0x00037f2d, 0x0003832d, 0x0003872d,
+ 0x00038b2d, 0x00038f2d, 0x0003932d, 0x0003972d,
+ 0x00039b2d, 0x00039f2d, 0x0003a32d, 0x0003a72d,
+ 0x0003ab2d, 0x0003af2d, 0x0003b32d, 0x0003b72d,
+ 0x0003bb2d, 0x0003bf2d, 0x0003c32d, 0x0003c72d,
+ 0x0003cb2d, 0x0003cf2d, 0x0003d32d, 0x0003d72d,
+ 0x0003db2d, 0x0003df2d, 0x0003e32d, 0x0003e72d,
+ 0x0003eb2d, 0x0003ef2d, 0x0003f32d, 0x0003f72d,
+ 0x0003fb2d, 0x0003ff2d, 0x000000ae, 0x000004ae,
+ 0x000008ae, 0x00000cae, 0x000010ae, 0x000014ae,
+ 0x000018ae, 0x00001cae, 0x000020ae, 0x000024ae,
+ 0x000028ae, 0x00002cae, 0x000030ae, 0x000034ae,
+ 0x000038ae, 0x00003cae, 0x000040ae, 0x000044ae,
+ 0x000048ae, 0x00004cae, 0x000050ae, 0x000054ae,
+ 0x000058ae, 0x00005cae, 0x000060ae, 0x000064ae,
+ 0x000068ae, 0x00006cae, 0x000070ae, 0x000074ae,
+ 0x000078ae, 0x00007cae, 0x000080ae, 0x000084ae,
+ 0x000088ae, 0x00008cae, 0x000090ae, 0x000094ae,
+ 0x000098ae, 0x00009cae, 0x0000a0ae, 0x0000a4ae,
+ 0x0000a8ae, 0x0000acae, 0x0000b0ae, 0x0000b4ae,
+ 0x0000b8ae, 0x0000bcae, 0x0000c0ae, 0x0000c4ae,
+ 0x0000c8ae, 0x0000ccae, 0x0000d0ae, 0x0000d4ae,
+ 0x0000d8ae, 0x0000dcae, 0x0000e0ae, 0x0000e4ae,
+ 0x0000e8ae, 0x0000ecae, 0x0000f0ae, 0x0000f4ae,
+ 0x0000f8ae, 0x0000fcae, 0x000100ae, 0x000104ae,
+ 0x000108ae, 0x00010cae, 0x000110ae, 0x000114ae,
+ 0x000118ae, 0x00011cae, 0x000120ae, 0x000124ae,
+ 0x000128ae, 0x00012cae, 0x000130ae, 0x000134ae,
+ 0x000138ae, 0x00013cae, 0x000140ae, 0x000144ae,
+ 0x000148ae, 0x00014cae, 0x000150ae, 0x000154ae,
+ 0x000158ae, 0x00015cae, 0x000160ae, 0x000164ae,
+ 0x000168ae, 0x00016cae, 0x000170ae, 0x000174ae,
+ 0x000178ae, 0x00017cae, 0x000180ae, 0x000184ae,
+ 0x000188ae, 0x00018cae, 0x000190ae, 0x000194ae,
+ 0x000198ae, 0x00019cae, 0x0001a0ae, 0x0001a4ae,
+ 0x0001a8ae, 0x0001acae, 0x0001b0ae, 0x0001b4ae,
+ 0x0001b8ae, 0x0001bcae, 0x0001c0ae, 0x0001c4ae,
+ 0x0001c8ae, 0x0001ccae, 0x0001d0ae, 0x0001d4ae,
+ 0x0001d8ae, 0x0001dcae, 0x0001e0ae, 0x0001e4ae,
+ 0x0001e8ae, 0x0001ecae, 0x0001f0ae, 0x0001f4ae,
+ 0x0001f8ae, 0x0001fcae, 0x000200ae, 0x000204ae,
+ 0x000208ae, 0x00020cae, 0x000210ae, 0x000214ae,
+ 0x000218ae, 0x00021cae, 0x000220ae, 0x000224ae,
+ 0x000228ae, 0x00022cae, 0x000230ae, 0x000234ae,
+ 0x000238ae, 0x00023cae, 0x000240ae, 0x000244ae,
+ 0x000248ae, 0x00024cae, 0x000250ae, 0x000254ae,
+ 0x000258ae, 0x00025cae, 0x000260ae, 0x000264ae,
+ 0x000268ae, 0x00026cae, 0x000270ae, 0x000274ae,
+ 0x000278ae, 0x00027cae, 0x000280ae, 0x000284ae,
+ 0x000288ae, 0x00028cae, 0x000290ae, 0x000294ae,
+ 0x000298ae, 0x00029cae, 0x0002a0ae, 0x0002a4ae,
+ 0x0002a8ae, 0x0002acae, 0x0002b0ae, 0x0002b4ae,
+ 0x0002b8ae, 0x0002bcae, 0x0002c0ae, 0x0002c4ae,
+ 0x0002c8ae, 0x0002ccae, 0x0002d0ae, 0x0002d4ae,
+ 0x0002d8ae, 0x0002dcae, 0x0002e0ae, 0x0002e4ae,
+ 0x0002e8ae, 0x0002ecae, 0x0002f0ae, 0x0002f4ae,
+ 0x0002f8ae, 0x0002fcae, 0x000300ae, 0x000304ae,
+ 0x000308ae, 0x00030cae, 0x000310ae, 0x000314ae,
+ 0x000318ae, 0x00031cae, 0x000320ae, 0x000324ae,
+ 0x000328ae, 0x00032cae, 0x000330ae, 0x000334ae,
+ 0x000338ae, 0x00033cae, 0x000340ae, 0x000344ae,
+ 0x000348ae, 0x00034cae, 0x000350ae, 0x000354ae,
+ 0x000358ae, 0x00035cae, 0x000360ae, 0x000364ae,
+ 0x000368ae, 0x00036cae, 0x000370ae, 0x000374ae,
+ 0x000378ae, 0x00037cae, 0x000380ae, 0x000384ae,
+ 0x000388ae, 0x00038cae, 0x000390ae, 0x000394ae,
+ 0x000398ae, 0x00039cae, 0x0003a0ae, 0x0003a4ae,
+ 0x0003a8ae, 0x0003acae, 0x0003b0ae, 0x0003b4ae,
+ 0x0003b8ae, 0x0003bcae, 0x0003c0ae, 0x0003c4ae,
+ 0x0003c8ae, 0x0003ccae, 0x0003d0ae, 0x0003d4ae,
+ 0x0003d8ae, 0x0003dcae, 0x0003e0ae, 0x0003e4ae,
+ 0x0003e8ae, 0x0003ecae, 0x0003f0ae, 0x0003f4ae,
+ 0x0003f8ae, 0x0003fcae, 0x000400ae, 0x000404ae,
+ 0x000408ae, 0x00040cae, 0x000410ae, 0x000414ae,
+ 0x000418ae, 0x00041cae, 0x000420ae, 0x000424ae,
+ 0x000428ae, 0x00042cae, 0x000430ae, 0x000434ae,
+ 0x000438ae, 0x00043cae, 0x000440ae, 0x000444ae,
+ 0x000448ae, 0x00044cae, 0x000450ae, 0x000454ae,
+ 0x000458ae, 0x00045cae, 0x000460ae, 0x000464ae,
+ 0x000468ae, 0x00046cae, 0x000470ae, 0x000474ae,
+ 0x000478ae, 0x00047cae, 0x000480ae, 0x000484ae,
+ 0x000488ae, 0x00048cae, 0x000490ae, 0x000494ae,
+ 0x000498ae, 0x00049cae, 0x0004a0ae, 0x0004a4ae,
+ 0x0004a8ae, 0x0004acae, 0x0004b0ae, 0x0004b4ae,
+ 0x0004b8ae, 0x0004bcae, 0x0004c0ae, 0x0004c4ae,
+ 0x0004c8ae, 0x0004ccae, 0x0004d0ae, 0x0004d4ae,
+ 0x0004d8ae, 0x0004dcae, 0x0004e0ae, 0x0004e4ae,
+ 0x0004e8ae, 0x0004ecae, 0x0004f0ae, 0x0004f4ae,
+ 0x0004f8ae, 0x0004fcae, 0x000500ae, 0x000504ae,
+ 0x000508ae, 0x00050cae, 0x000510ae, 0x000514ae,
+ 0x000518ae, 0x00051cae, 0x000520ae, 0x000524ae,
+ 0x000528ae, 0x00052cae, 0x000530ae, 0x000534ae,
+ 0x000538ae, 0x00053cae, 0x000540ae, 0x000544ae,
+ 0x000548ae, 0x00054cae, 0x000550ae, 0x000554ae,
+ 0x000558ae, 0x00055cae, 0x000560ae, 0x000564ae,
+ 0x000568ae, 0x00056cae, 0x000570ae, 0x000574ae,
+ 0x000578ae, 0x00057cae, 0x000580ae, 0x000584ae,
+ 0x000588ae, 0x00058cae, 0x000590ae, 0x000594ae,
+ 0x000598ae, 0x00059cae, 0x0005a0ae, 0x0005a4ae,
+ 0x0005a8ae, 0x0005acae, 0x0005b0ae, 0x0005b4ae,
+ 0x0005b8ae, 0x0005bcae, 0x0005c0ae, 0x0005c4ae,
+ 0x0005c8ae, 0x0005ccae, 0x0005d0ae, 0x0005d4ae,
+ 0x0005d8ae, 0x0005dcae, 0x0005e0ae, 0x0005e4ae,
+ 0x0005e8ae, 0x0005ecae, 0x0005f0ae, 0x0005f4ae,
+ 0x0005f8ae, 0x0005fcae, 0x000600ae, 0x000604ae,
+ 0x000608ae, 0x00060cae, 0x000610ae, 0x000614ae,
+ 0x000618ae, 0x00061cae, 0x000620ae, 0x000624ae,
+ 0x000628ae, 0x00062cae, 0x000630ae, 0x000634ae,
+ 0x000638ae, 0x00063cae, 0x000640ae, 0x000644ae,
+ 0x000648ae, 0x00064cae, 0x000650ae, 0x000654ae,
+ 0x000658ae, 0x00065cae, 0x000660ae, 0x000664ae,
+ 0x000668ae, 0x00066cae, 0x000670ae, 0x000674ae,
+ 0x000678ae, 0x00067cae, 0x000680ae, 0x000684ae,
+ 0x000688ae, 0x00068cae, 0x000690ae, 0x000694ae,
+ 0x000698ae, 0x00069cae, 0x0006a0ae, 0x0006a4ae,
+ 0x0006a8ae, 0x0006acae, 0x0006b0ae, 0x0006b4ae,
+ 0x0006b8ae, 0x0006bcae, 0x0006c0ae, 0x0006c4ae,
+ 0x0006c8ae, 0x0006ccae, 0x0006d0ae, 0x0006d4ae,
+ 0x0006d8ae, 0x0006dcae, 0x0006e0ae, 0x0006e4ae,
+ 0x0006e8ae, 0x0006ecae, 0x0006f0ae, 0x0006f4ae,
+ 0x0006f8ae, 0x0006fcae, 0x000700ae, 0x000704ae,
+ 0x000708ae, 0x00070cae, 0x000710ae, 0x000714ae,
+ 0x000718ae, 0x00071cae, 0x000720ae, 0x000724ae,
+ 0x000728ae, 0x00072cae, 0x000730ae, 0x000734ae,
+ 0x000738ae, 0x00073cae, 0x000740ae, 0x000744ae,
+ 0x000748ae, 0x00074cae, 0x000750ae, 0x000754ae,
+ 0x000758ae, 0x00075cae, 0x000760ae, 0x000764ae,
+ 0x000768ae, 0x00076cae, 0x000770ae, 0x000774ae,
+ 0x000778ae, 0x00077cae, 0x000780ae, 0x000784ae,
+ 0x000788ae, 0x00078cae, 0x000790ae, 0x000794ae,
+ 0x000798ae, 0x00079cae, 0x0007a0ae, 0x0007a4ae,
+ 0x0007a8ae, 0x0007acae, 0x0007b0ae, 0x0007b4ae,
+ 0x0007b8ae, 0x0007bcae, 0x0007c0ae, 0x0007c4ae,
+ 0x0007c8ae, 0x0007ccae, 0x0007d0ae, 0x0007d4ae,
+ 0x0007d8ae, 0x0007dcae, 0x0007e0ae, 0x0007e4ae,
+ 0x0007e8ae, 0x0007ecae, 0x0007f0ae, 0x0007f4ae,
+ 0x0007f8ae, 0x0007fcae, 0x000002ae, 0x000006ae,
+ 0x00000aae, 0x00000eae, 0x000012ae, 0x000016ae,
+ 0x00001aae, 0x00001eae, 0x000022ae, 0x000026ae,
+ 0x00002aae, 0x00002eae, 0x000032ae, 0x000036ae,
+ 0x00003aae, 0x00003eae, 0x000042ae, 0x000046ae,
+ 0x00004aae, 0x00004eae, 0x000052ae, 0x000056ae,
+ 0x00005aae, 0x00005eae, 0x000062ae, 0x000066ae,
+ 0x00006aae, 0x00006eae, 0x000072ae, 0x000076ae,
+ 0x00007aae, 0x00007eae, 0x000082ae, 0x000086ae,
+ 0x00008aae, 0x00008eae, 0x000092ae, 0x000096ae,
+ 0x00009aae, 0x00009eae, 0x0000a2ae, 0x0000a6ae,
+ 0x0000aaae, 0x0000aeae, 0x0000b2ae, 0x0000b6ae,
+ 0x0000baae, 0x0000beae, 0x0000c2ae, 0x0000c6ae,
+ 0x0000caae, 0x0000ceae, 0x0000d2ae, 0x0000d6ae,
+ 0x0000daae, 0x0000deae, 0x0000e2ae, 0x0000e6ae,
+ 0x0000eaae, 0x0000eeae, 0x0000f2ae, 0x0000f6ae,
+ 0x0000faae, 0x0000feae, 0x000102ae, 0x000106ae,
+ 0x00010aae, 0x00010eae, 0x000112ae, 0x000116ae,
+ 0x00011aae, 0x00011eae, 0x000122ae, 0x000126ae,
+ 0x00012aae, 0x00012eae, 0x000132ae, 0x000136ae,
+ 0x00013aae, 0x00013eae, 0x000142ae, 0x000146ae,
+ 0x00014aae, 0x00014eae, 0x000152ae, 0x000156ae,
+ 0x00015aae, 0x00015eae, 0x000162ae, 0x000166ae,
+ 0x00016aae, 0x00016eae, 0x000172ae, 0x000176ae,
+ 0x00017aae, 0x00017eae, 0x000182ae, 0x000186ae,
+ 0x00018aae, 0x00018eae, 0x000192ae, 0x000196ae,
+ 0x00019aae, 0x00019eae, 0x0001a2ae, 0x0001a6ae,
+ 0x0001aaae, 0x0001aeae, 0x0001b2ae, 0x0001b6ae,
+ 0x0001baae, 0x0001beae, 0x0001c2ae, 0x0001c6ae,
+ 0x0001caae, 0x0001ceae, 0x0001d2ae, 0x0001d6ae,
+ 0x0001daae, 0x0001deae, 0x0001e2ae, 0x0001e6ae,
+ 0x0001eaae, 0x0001eeae, 0x0001f2ae, 0x0001f6ae,
+ 0x0001faae, 0x0001feae, 0x000202ae, 0x000206ae,
+ 0x00020aae, 0x00020eae, 0x000212ae, 0x000216ae,
+ 0x00021aae, 0x00021eae, 0x000222ae, 0x000226ae,
+ 0x00022aae, 0x00022eae, 0x000232ae, 0x000236ae,
+ 0x00023aae, 0x00023eae, 0x000242ae, 0x000246ae,
+ 0x00024aae, 0x00024eae, 0x000252ae, 0x000256ae,
+ 0x00025aae, 0x00025eae, 0x000262ae, 0x000266ae,
+ 0x00026aae, 0x00026eae, 0x000272ae, 0x000276ae,
+ 0x00027aae, 0x00027eae, 0x000282ae, 0x000286ae,
+ 0x00028aae, 0x00028eae, 0x000292ae, 0x000296ae,
+ 0x00029aae, 0x00029eae, 0x0002a2ae, 0x0002a6ae,
+ 0x0002aaae, 0x0002aeae, 0x0002b2ae, 0x0002b6ae,
+ 0x0002baae, 0x0002beae, 0x0002c2ae, 0x0002c6ae,
+ 0x0002caae, 0x0002ceae, 0x0002d2ae, 0x0002d6ae,
+ 0x0002daae, 0x0002deae, 0x0002e2ae, 0x0002e6ae,
+ 0x0002eaae, 0x0002eeae, 0x0002f2ae, 0x0002f6ae,
+ 0x0002faae, 0x0002feae, 0x000302ae, 0x000306ae,
+ 0x00030aae, 0x00030eae, 0x000312ae, 0x000316ae,
+ 0x00031aae, 0x00031eae, 0x000322ae, 0x000326ae,
+ 0x00032aae, 0x00032eae, 0x000332ae, 0x000336ae,
+ 0x00033aae, 0x00033eae, 0x000342ae, 0x000346ae,
+ 0x00034aae, 0x00034eae, 0x000352ae, 0x000356ae,
+ 0x00035aae, 0x00035eae, 0x000362ae, 0x000366ae,
+ 0x00036aae, 0x00036eae, 0x000372ae, 0x000376ae,
+ 0x00037aae, 0x00037eae, 0x000382ae, 0x000386ae,
+ 0x00038aae, 0x00038eae, 0x000392ae, 0x000396ae,
+ 0x00039aae, 0x00039eae, 0x0003a2ae, 0x0003a6ae,
+ 0x0003aaae, 0x0003aeae, 0x0003b2ae, 0x0003b6ae,
+ 0x0003baae, 0x0003beae, 0x0003c2ae, 0x0003c6ae,
+ 0x0003caae, 0x0003ceae, 0x0003d2ae, 0x0003d6ae,
+ 0x0003daae, 0x0003deae, 0x0003e2ae, 0x0003e6ae,
+ 0x0003eaae, 0x0003eeae, 0x0003f2ae, 0x0003f6ae,
+ 0x0003faae, 0x0003feae, 0x000402ae, 0x000406ae,
+ 0x00040aae, 0x00040eae, 0x000412ae, 0x000416ae,
+ 0x00041aae, 0x00041eae, 0x000422ae, 0x000426ae,
+ 0x00042aae, 0x00042eae, 0x000432ae, 0x000436ae,
+ 0x00043aae, 0x00043eae, 0x000442ae, 0x000446ae,
+ 0x00044aae, 0x00044eae, 0x000452ae, 0x000456ae,
+ 0x00045aae, 0x00045eae, 0x000462ae, 0x000466ae,
+ 0x00046aae, 0x00046eae, 0x000472ae, 0x000476ae,
+ 0x00047aae, 0x00047eae, 0x000482ae, 0x000486ae,
+ 0x00048aae, 0x00048eae, 0x000492ae, 0x000496ae,
+ 0x00049aae, 0x00049eae, 0x0004a2ae, 0x0004a6ae,
+ 0x0004aaae, 0x0004aeae, 0x0004b2ae, 0x0004b6ae,
+ 0x0004baae, 0x0004beae, 0x0004c2ae, 0x0004c6ae,
+ 0x0004caae, 0x0004ceae, 0x0004d2ae, 0x0004d6ae,
+ 0x0004daae, 0x0004deae, 0x0004e2ae, 0x0004e6ae,
+ 0x0004eaae, 0x0004eeae, 0x0004f2ae, 0x0004f6ae,
+ 0x0004faae, 0x0004feae, 0x000502ae, 0x000506ae,
+ 0x00050aae, 0x00050eae, 0x000512ae, 0x000516ae,
+ 0x00051aae, 0x00051eae, 0x000522ae, 0x000526ae,
+ 0x00052aae, 0x00052eae, 0x000532ae, 0x000536ae,
+ 0x00053aae, 0x00053eae, 0x000542ae, 0x000546ae,
+ 0x00054aae, 0x00054eae, 0x000552ae, 0x000556ae,
+ 0x00055aae, 0x00055eae, 0x000562ae, 0x000566ae,
+ 0x00056aae, 0x00056eae, 0x000572ae, 0x000576ae,
+ 0x00057aae, 0x00057eae, 0x000582ae, 0x000586ae,
+ 0x00058aae, 0x00058eae, 0x000592ae, 0x000596ae,
+ 0x00059aae, 0x00059eae, 0x0005a2ae, 0x0005a6ae,
+ 0x0005aaae, 0x0005aeae, 0x0005b2ae, 0x0005b6ae,
+ 0x0005baae, 0x0005beae, 0x0005c2ae, 0x0005c6ae,
+ 0x0005caae, 0x0005ceae, 0x0005d2ae, 0x0005d6ae,
+ 0x0005daae, 0x0005deae, 0x0005e2ae, 0x0005e6ae,
+ 0x0005eaae, 0x0005eeae, 0x0005f2ae, 0x0005f6ae,
+ 0x0005faae, 0x0005feae, 0x000602ae, 0x000606ae,
+ 0x00060aae, 0x00060eae, 0x000612ae, 0x000616ae,
+ 0x00061aae, 0x00061eae, 0x000622ae, 0x000626ae,
+ 0x00062aae, 0x00062eae, 0x000632ae, 0x000636ae,
+ 0x00063aae, 0x00063eae, 0x000642ae, 0x000646ae,
+ 0x00064aae, 0x00064eae, 0x000652ae, 0x000656ae,
+ 0x00065aae, 0x00065eae, 0x000662ae, 0x000666ae,
+ 0x00066aae, 0x00066eae, 0x000672ae, 0x000676ae,
+ 0x00067aae, 0x00067eae, 0x000682ae, 0x000686ae,
+ 0x00068aae, 0x00068eae, 0x000692ae, 0x000696ae,
+ 0x00069aae, 0x00069eae, 0x0006a2ae, 0x0006a6ae,
+ 0x0006aaae, 0x0006aeae, 0x0006b2ae, 0x0006b6ae,
+ 0x0006baae, 0x0006beae, 0x0006c2ae, 0x0006c6ae,
+ 0x0006caae, 0x0006ceae, 0x0006d2ae, 0x0006d6ae,
+ 0x0006daae, 0x0006deae, 0x0006e2ae, 0x0006e6ae,
+ 0x0006eaae, 0x0006eeae, 0x0006f2ae, 0x0006f6ae,
+ 0x0006faae, 0x0006feae, 0x000702ae, 0x000706ae,
+ 0x00070aae, 0x00070eae, 0x000712ae, 0x000716ae,
+ 0x00071aae, 0x00071eae, 0x000722ae, 0x000726ae,
+ 0x00072aae, 0x00072eae, 0x000732ae, 0x000736ae,
+ 0x00073aae, 0x00073eae, 0x000742ae, 0x000746ae,
+ 0x00074aae, 0x00074eae, 0x000752ae, 0x000756ae,
+ 0x00075aae, 0x00075eae, 0x000762ae, 0x000766ae,
+ 0x00076aae, 0x00076eae, 0x000772ae, 0x000776ae,
+ 0x00077aae, 0x00077eae, 0x000782ae, 0x000786ae,
+ 0x00078aae, 0x00078eae, 0x000792ae, 0x000796ae,
+ 0x00079aae, 0x00079eae, 0x0007a2ae, 0x0007a6ae,
+ 0x0007aaae, 0x0007aeae, 0x0007b2ae, 0x0007b6ae,
+ 0x0007baae, 0x0007beae, 0x0007c2ae, 0x0007c6ae,
+ 0x0007caae, 0x0007ceae, 0x0007d2ae, 0x0007d6ae,
+ 0x0007daae, 0x0007deae, 0x0007e2ae, 0x0007e6ae,
+ 0x0007eaae, 0x0007eeae, 0x0007f2ae, 0x0007f6ae,
+ 0x0007faae, 0x0007feae, 0x000001af, 0x000005af,
+ 0x000009af, 0x00000daf, 0x000011af, 0x000015af,
+ 0x000019af, 0x00001daf, 0x000021af, 0x000025af,
+ 0x000029af, 0x00002daf, 0x000031af, 0x000035af,
+ 0x000039af, 0x00003daf, 0x000041af, 0x000045af,
+ 0x000049af, 0x00004daf, 0x000051af, 0x000055af,
+ 0x000059af, 0x00005daf, 0x000061af, 0x000065af,
+ 0x000069af, 0x00006daf, 0x000071af, 0x000075af,
+ 0x000079af, 0x00007daf, 0x000081af, 0x000085af,
+ 0x000089af, 0x00008daf, 0x000091af, 0x000095af,
+ 0x000099af, 0x00009daf, 0x0000a1af, 0x0000a5af,
+ 0x0000a9af, 0x0000adaf, 0x0000b1af, 0x0000b5af,
+ 0x0000b9af, 0x0000bdaf, 0x0000c1af, 0x0000c5af,
+ 0x0000c9af, 0x0000cdaf, 0x0000d1af, 0x0000d5af,
+ 0x0000d9af, 0x0000ddaf, 0x0000e1af, 0x0000e5af,
+ 0x0000e9af, 0x0000edaf, 0x0000f1af, 0x0000f5af,
+ 0x0000f9af, 0x0000fdaf, 0x000101af, 0x000105af,
+ 0x000109af, 0x00010daf, 0x000111af, 0x000115af,
+ 0x000119af, 0x00011daf, 0x000121af, 0x000125af,
+ 0x000129af, 0x00012daf, 0x000131af, 0x000135af,
+ 0x000139af, 0x00013daf, 0x000141af, 0x000145af,
+ 0x000149af, 0x00014daf, 0x000151af, 0x000155af,
+ 0x000159af, 0x00015daf, 0x000161af, 0x000165af,
+ 0x000169af, 0x00016daf, 0x000171af, 0x000175af,
+ 0x000179af, 0x00017daf, 0x000181af, 0x000185af,
+ 0x000189af, 0x00018daf, 0x000191af, 0x000195af,
+ 0x000199af, 0x00019daf, 0x0001a1af, 0x0001a5af,
+ 0x0001a9af, 0x0001adaf, 0x0001b1af, 0x0001b5af,
+ 0x0001b9af, 0x0001bdaf, 0x0001c1af, 0x0001c5af,
+ 0x0001c9af, 0x0001cdaf, 0x0001d1af, 0x0001d5af,
+ 0x0001d9af, 0x0001ddaf, 0x0001e1af, 0x0001e5af,
+ 0x0001e9af, 0x0001edaf, 0x0001f1af, 0x0001f5af,
+ 0x0001f9af, 0x0001fdaf, 0x000201af, 0x000205af,
+ 0x000209af, 0x00020daf, 0x000211af, 0x000215af,
+ 0x000219af, 0x00021daf, 0x000221af, 0x000225af,
+ 0x000229af, 0x00022daf, 0x000231af, 0x000235af,
+ 0x000239af, 0x00023daf, 0x000241af, 0x000245af,
+ 0x000249af, 0x00024daf, 0x000251af, 0x000255af,
+ 0x000259af, 0x00025daf, 0x000261af, 0x000265af,
+ 0x000269af, 0x00026daf, 0x000271af, 0x000275af,
+ 0x000279af, 0x00027daf, 0x000281af, 0x000285af,
+ 0x000289af, 0x00028daf, 0x000291af, 0x000295af,
+ 0x000299af, 0x00029daf, 0x0002a1af, 0x0002a5af,
+ 0x0002a9af, 0x0002adaf, 0x0002b1af, 0x0002b5af,
+ 0x0002b9af, 0x0002bdaf, 0x0002c1af, 0x0002c5af,
+ 0x0002c9af, 0x0002cdaf, 0x0002d1af, 0x0002d5af,
+ 0x0002d9af, 0x0002ddaf, 0x0002e1af, 0x0002e5af,
+ 0x0002e9af, 0x0002edaf, 0x0002f1af, 0x0002f5af,
+ 0x0002f9af, 0x0002fdaf, 0x000301af, 0x000305af,
+ 0x000309af, 0x00030daf, 0x000311af, 0x000315af,
+ 0x000319af, 0x00031daf, 0x000321af, 0x000325af,
+ 0x000329af, 0x00032daf, 0x000331af, 0x000335af,
+ 0x000339af, 0x00033daf, 0x000341af, 0x000345af,
+ 0x000349af, 0x00034daf, 0x000351af, 0x000355af,
+ 0x000359af, 0x00035daf, 0x000361af, 0x000365af,
+ 0x000369af, 0x00036daf, 0x000371af, 0x000375af,
+ 0x000379af, 0x00037daf, 0x000381af, 0x000385af,
+ 0x000389af, 0x00038daf, 0x000391af, 0x000395af,
+ 0x000399af, 0x00039daf, 0x0003a1af, 0x0003a5af,
+ 0x0003a9af, 0x0003adaf, 0x0003b1af, 0x0003b5af,
+ 0x0003b9af, 0x0003bdaf, 0x0003c1af, 0x0003c5af,
+ 0x0003c9af, 0x0003cdaf, 0x0003d1af, 0x0003d5af,
+ 0x0003d9af, 0x0003ddaf, 0x0003e1af, 0x0003e5af,
+ 0x0003e9af, 0x0003edaf, 0x0003f1af, 0x0003f5af,
+ 0x0003f9af, 0x0003fdaf, 0x000401af, 0x000405af,
+ 0x000409af, 0x00040daf, 0x000411af, 0x000415af,
+ 0x000419af, 0x00041daf, 0x000421af, 0x000425af,
+ 0x000429af, 0x00042daf, 0x000431af, 0x000435af,
+ 0x000439af, 0x00043daf, 0x000441af, 0x000445af,
+ 0x000449af, 0x00044daf, 0x000451af, 0x000455af,
+ 0x000459af, 0x00045daf, 0x000461af, 0x000465af,
+ 0x000469af, 0x00046daf, 0x000471af, 0x000475af,
+ 0x000479af, 0x00047daf, 0x000481af, 0x000485af,
+ 0x000489af, 0x00048daf, 0x000491af, 0x000495af,
+ 0x000499af, 0x00049daf, 0x0004a1af, 0x0004a5af,
+ 0x0004a9af, 0x0004adaf, 0x0004b1af, 0x0004b5af,
+ 0x0004b9af, 0x0004bdaf, 0x0004c1af, 0x0004c5af,
+ 0x0004c9af, 0x0004cdaf, 0x0004d1af, 0x0004d5af,
+ 0x0004d9af, 0x0004ddaf, 0x0004e1af, 0x0004e5af,
+ 0x0004e9af, 0x0004edaf, 0x0004f1af, 0x0004f5af,
+ 0x0004f9af, 0x0004fdaf, 0x000501af, 0x000505af,
+ 0x000509af, 0x00050daf, 0x000511af, 0x000515af,
+ 0x000519af, 0x00051daf, 0x000521af, 0x000525af,
+ 0x000529af, 0x00052daf, 0x000531af, 0x000535af,
+ 0x000539af, 0x00053daf, 0x000541af, 0x000545af,
+ 0x000549af, 0x00054daf, 0x000551af, 0x000555af,
+ 0x000559af, 0x00055daf, 0x000561af, 0x000565af,
+ 0x000569af, 0x00056daf, 0x000571af, 0x000575af,
+ 0x000579af, 0x00057daf, 0x000581af, 0x000585af,
+ 0x000589af, 0x00058daf, 0x000591af, 0x000595af,
+ 0x000599af, 0x00059daf, 0x0005a1af, 0x0005a5af,
+ 0x0005a9af, 0x0005adaf, 0x0005b1af, 0x0005b5af,
+ 0x0005b9af, 0x0005bdaf, 0x0005c1af, 0x0005c5af,
+ 0x0005c9af, 0x0005cdaf, 0x0005d1af, 0x0005d5af,
+ 0x0005d9af, 0x0005ddaf, 0x0005e1af, 0x0005e5af,
+ 0x0005e9af, 0x0005edaf, 0x0005f1af, 0x0005f5af,
+ 0x0005f9af, 0x0005fdaf, 0x000601af, 0x000605af,
+ 0x000609af, 0x00060daf, 0x000611af, 0x000615af,
+ 0x000619af, 0x00061daf, 0x000621af, 0x000625af,
+ 0x000629af, 0x00062daf, 0x000631af, 0x000635af,
+ 0x000639af, 0x00063daf, 0x000641af, 0x000645af,
+ 0x000649af, 0x00064daf, 0x000651af, 0x000655af,
+ 0x000659af, 0x00065daf, 0x000661af, 0x000665af,
+ 0x000669af, 0x00066daf, 0x000671af, 0x000675af,
+ 0x000679af, 0x00067daf, 0x000681af, 0x000685af,
+ 0x000689af, 0x00068daf, 0x000691af, 0x000695af,
+ 0x000699af, 0x00069daf, 0x0006a1af, 0x0006a5af,
+ 0x0006a9af, 0x0006adaf, 0x0006b1af, 0x0006b5af,
+ 0x0006b9af, 0x0006bdaf, 0x0006c1af, 0x0006c5af,
+ 0x0006c9af, 0x0006cdaf, 0x0006d1af, 0x0006d5af,
+ 0x0006d9af, 0x0006ddaf, 0x0006e1af, 0x0006e5af,
+ 0x0006e9af, 0x0006edaf, 0x0006f1af, 0x0006f5af,
+ 0x0006f9af, 0x0006fdaf, 0x000701af, 0x000705af,
+ 0x000709af, 0x00070daf, 0x000711af, 0x000715af,
+ 0x000719af, 0x00071daf, 0x000721af, 0x000725af,
+ 0x000729af, 0x00072daf, 0x000731af, 0x000735af,
+ 0x000739af, 0x00073daf, 0x000741af, 0x000745af,
+ 0x000749af, 0x00074daf, 0x000751af, 0x000755af,
+ 0x000759af, 0x00075daf, 0x000761af, 0x000765af,
+ 0x000769af, 0x00076daf, 0x000771af, 0x000775af,
+ 0x000779af, 0x00077daf, 0x000781af, 0x000785af,
+ 0x000789af, 0x00078daf, 0x000791af, 0x000795af,
+ 0x000799af, 0x00079daf, 0x0007a1af, 0x0007a5af,
+ 0x0007a9af, 0x0007adaf, 0x0007b1af, 0x0007b5af,
+ 0x0007b9af, 0x0007bdaf, 0x0007c1af, 0x0007c5af,
+ 0x0007c9af, 0x0007cdaf, 0x0007d1af, 0x0007d5af,
+ 0x0007d9af, 0x0007ddaf, 0x0007e1af, 0x0007e5af,
+ 0x0007e9af, 0x0007edaf, 0x0007f1af, 0x0007f5af,
+ 0x0007f9af, 0x0007fdaf, 0x000801af, 0x000805af,
+ 0x000809af, 0x00080daf, 0x000811af, 0x000815af,
+ 0x000819af, 0x00081daf, 0x000821af, 0x000825af,
+ 0x000829af, 0x00082daf, 0x000831af, 0x000835af,
+ 0x000839af, 0x00083daf, 0x000841af, 0x000845af,
+ 0x000849af, 0x00084daf, 0x000851af, 0x000855af,
+ 0x000859af, 0x00085daf, 0x000861af, 0x000865af,
+ 0x000869af, 0x00086daf, 0x000871af, 0x000875af,
+ 0x000879af, 0x00087daf, 0x000881af, 0x000885af,
+ 0x000889af, 0x00088daf, 0x000891af, 0x000895af,
+ 0x000899af, 0x00089daf, 0x0008a1af, 0x0008a5af,
+ 0x0008a9af, 0x0008adaf, 0x0008b1af, 0x0008b5af,
+ 0x0008b9af, 0x0008bdaf, 0x0008c1af, 0x0008c5af,
+ 0x0008c9af, 0x0008cdaf, 0x0008d1af, 0x0008d5af,
+ 0x0008d9af, 0x0008ddaf, 0x0008e1af, 0x0008e5af,
+ 0x0008e9af, 0x0008edaf, 0x0008f1af, 0x0008f5af,
+ 0x0008f9af, 0x0008fdaf, 0x000901af, 0x000905af,
+ 0x000909af, 0x00090daf, 0x000911af, 0x000915af,
+ 0x000919af, 0x00091daf, 0x000921af, 0x000925af,
+ 0x000929af, 0x00092daf, 0x000931af, 0x000935af,
+ 0x000939af, 0x00093daf, 0x000941af, 0x000945af,
+ 0x000949af, 0x00094daf, 0x000951af, 0x000955af,
+ 0x000959af, 0x00095daf, 0x000961af, 0x000965af,
+ 0x000969af, 0x00096daf, 0x000971af, 0x000975af,
+ 0x000979af, 0x00097daf, 0x000981af, 0x000985af,
+ 0x000989af, 0x00098daf, 0x000991af, 0x000995af,
+ 0x000999af, 0x00099daf, 0x0009a1af, 0x0009a5af,
+ 0x0009a9af, 0x0009adaf, 0x0009b1af, 0x0009b5af,
+ 0x0009b9af, 0x0009bdaf, 0x0009c1af, 0x0009c5af,
+ 0x0009c9af, 0x0009cdaf, 0x0009d1af, 0x0009d5af,
+ 0x0009d9af, 0x0009ddaf, 0x0009e1af, 0x0009e5af,
+ 0x0009e9af, 0x0009edaf, 0x0009f1af, 0x0009f5af,
+ 0x0009f9af, 0x0009fdaf, 0x000a01af, 0x000a05af,
+ 0x000a09af, 0x000a0daf, 0x000a11af, 0x000a15af,
+ 0x000a19af, 0x000a1daf, 0x000a21af, 0x000a25af,
+ 0x000a29af, 0x000a2daf, 0x000a31af, 0x000a35af,
+ 0x000a39af, 0x000a3daf, 0x000a41af, 0x000a45af,
+ 0x000a49af, 0x000a4daf, 0x000a51af, 0x000a55af,
+ 0x000a59af, 0x000a5daf, 0x000a61af, 0x000a65af,
+ 0x000a69af, 0x000a6daf, 0x000a71af, 0x000a75af,
+ 0x000a79af, 0x000a7daf, 0x000a81af, 0x000a85af,
+ 0x000a89af, 0x000a8daf, 0x000a91af, 0x000a95af,
+ 0x000a99af, 0x000a9daf, 0x000aa1af, 0x000aa5af,
+ 0x000aa9af, 0x000aadaf, 0x000ab1af, 0x000ab5af,
+ 0x000ab9af, 0x000abdaf, 0x000ac1af, 0x000ac5af,
+ 0x000ac9af, 0x000acdaf, 0x000ad1af, 0x000ad5af,
+ 0x000ad9af, 0x000addaf, 0x000ae1af, 0x000ae5af,
+ 0x000ae9af, 0x000aedaf, 0x000af1af, 0x000af5af,
+ 0x000af9af, 0x000afdaf, 0x000b01af, 0x000b05af,
+ 0x000b09af, 0x000b0daf, 0x000b11af, 0x000b15af,
+ 0x000b19af, 0x000b1daf, 0x000b21af, 0x000b25af,
+ 0x000b29af, 0x000b2daf, 0x000b31af, 0x000b35af,
+ 0x000b39af, 0x000b3daf, 0x000b41af, 0x000b45af,
+ 0x000b49af, 0x000b4daf, 0x000b51af, 0x000b55af,
+ 0x000b59af, 0x000b5daf, 0x000b61af, 0x000b65af,
+ 0x000b69af, 0x000b6daf, 0x000b71af, 0x000b75af,
+ 0x000b79af, 0x000b7daf, 0x000b81af, 0x000b85af,
+ 0x000b89af, 0x000b8daf, 0x000b91af, 0x000b95af,
+ 0x000b99af, 0x000b9daf, 0x000ba1af, 0x000ba5af,
+ 0x000ba9af, 0x000badaf, 0x000bb1af, 0x000bb5af,
+ 0x000bb9af, 0x000bbdaf, 0x000bc1af, 0x000bc5af,
+ 0x000bc9af, 0x000bcdaf, 0x000bd1af, 0x000bd5af,
+ 0x000bd9af, 0x000bddaf, 0x000be1af, 0x000be5af,
+ 0x000be9af, 0x000bedaf, 0x000bf1af, 0x000bf5af,
+ 0x000bf9af, 0x000bfdaf, 0x000c01af, 0x000c05af,
+ 0x000c09af, 0x000c0daf, 0x000c11af, 0x000c15af,
+ 0x000c19af, 0x000c1daf, 0x000c21af, 0x000c25af,
+ 0x000c29af, 0x000c2daf, 0x000c31af, 0x000c35af,
+ 0x000c39af, 0x000c3daf, 0x000c41af, 0x000c45af,
+ 0x000c49af, 0x000c4daf, 0x000c51af, 0x000c55af,
+ 0x000c59af, 0x000c5daf, 0x000c61af, 0x000c65af,
+ 0x000c69af, 0x000c6daf, 0x000c71af, 0x000c75af,
+ 0x000c79af, 0x000c7daf, 0x000c81af, 0x000c85af,
+ 0x000c89af, 0x000c8daf, 0x000c91af, 0x000c95af,
+ 0x000c99af, 0x000c9daf, 0x000ca1af, 0x000ca5af,
+ 0x000ca9af, 0x000cadaf, 0x000cb1af, 0x000cb5af,
+ 0x000cb9af, 0x000cbdaf, 0x000cc1af, 0x000cc5af,
+ 0x000cc9af, 0x000ccdaf, 0x000cd1af, 0x000cd5af,
+ 0x000cd9af, 0x000cddaf, 0x000ce1af, 0x000ce5af,
+ 0x000ce9af, 0x000cedaf, 0x000cf1af, 0x000cf5af,
+ 0x000cf9af, 0x000cfdaf, 0x000d01af, 0x000d05af,
+ 0x000d09af, 0x000d0daf, 0x000d11af, 0x000d15af,
+ 0x000d19af, 0x000d1daf, 0x000d21af, 0x000d25af,
+ 0x000d29af, 0x000d2daf, 0x000d31af, 0x000d35af,
+ 0x000d39af, 0x000d3daf, 0x000d41af, 0x000d45af,
+ 0x000d49af, 0x000d4daf, 0x000d51af, 0x000d55af,
+ 0x000d59af, 0x000d5daf, 0x000d61af, 0x000d65af,
+ 0x000d69af, 0x000d6daf, 0x000d71af, 0x000d75af,
+ 0x000d79af, 0x000d7daf, 0x000d81af, 0x000d85af,
+ 0x000d89af, 0x000d8daf, 0x000d91af, 0x000d95af,
+ 0x000d99af, 0x000d9daf, 0x000da1af, 0x000da5af,
+ 0x000da9af, 0x000dadaf, 0x000db1af, 0x000db5af,
+ 0x000db9af, 0x000dbdaf, 0x000dc1af, 0x000dc5af,
+ 0x000dc9af, 0x000dcdaf, 0x000dd1af, 0x000dd5af,
+ 0x000dd9af, 0x000dddaf, 0x000de1af, 0x000de5af,
+ 0x000de9af, 0x000dedaf, 0x000df1af, 0x000df5af,
+ 0x000df9af, 0x000dfdaf, 0x000e01af, 0x000e05af,
+ 0x000e09af, 0x000e0daf, 0x000e11af, 0x000e15af,
+ 0x000e19af, 0x000e1daf, 0x000e21af, 0x000e25af,
+ 0x000e29af, 0x000e2daf, 0x000e31af, 0x000e35af,
+ 0x000e39af, 0x000e3daf, 0x000e41af, 0x000e45af,
+ 0x000e49af, 0x000e4daf, 0x000e51af, 0x000e55af,
+ 0x000e59af, 0x000e5daf, 0x000e61af, 0x000e65af,
+ 0x000e69af, 0x000e6daf, 0x000e71af, 0x000e75af,
+ 0x000e79af, 0x000e7daf, 0x000e81af, 0x000e85af,
+ 0x000e89af, 0x000e8daf, 0x000e91af, 0x000e95af,
+ 0x000e99af, 0x000e9daf, 0x000ea1af, 0x000ea5af,
+ 0x000ea9af, 0x000eadaf, 0x000eb1af, 0x000eb5af,
+ 0x000eb9af, 0x000ebdaf, 0x000ec1af, 0x000ec5af,
+ 0x000ec9af, 0x000ecdaf, 0x000ed1af, 0x000ed5af,
+ 0x000ed9af, 0x000eddaf, 0x000ee1af, 0x000ee5af,
+ 0x000ee9af, 0x000eedaf, 0x000ef1af, 0x000ef5af,
+ 0x000ef9af, 0x000efdaf, 0x000f01af, 0x000f05af,
+ 0x000f09af, 0x000f0daf, 0x000f11af, 0x000f15af,
+ 0x000f19af, 0x000f1daf, 0x000f21af, 0x000f25af,
+ 0x000f29af, 0x000f2daf, 0x000f31af, 0x000f35af,
+ 0x000f39af, 0x000f3daf, 0x000f41af, 0x000f45af,
+ 0x000f49af, 0x000f4daf, 0x000f51af, 0x000f55af,
+ 0x000f59af, 0x000f5daf, 0x000f61af, 0x000f65af,
+ 0x000f69af, 0x000f6daf, 0x000f71af, 0x000f75af,
+ 0x000f79af, 0x000f7daf, 0x000f81af, 0x000f85af,
+ 0x000f89af, 0x000f8daf, 0x000f91af, 0x000f95af,
+ 0x000f99af, 0x000f9daf, 0x000fa1af, 0x000fa5af,
+ 0x000fa9af, 0x000fadaf, 0x000fb1af, 0x000fb5af,
+ 0x000fb9af, 0x000fbdaf, 0x000fc1af, 0x000fc5af,
+ 0x000fc9af, 0x000fcdaf, 0x000fd1af, 0x000fd5af,
+ 0x000fd9af, 0x000fddaf, 0x000fe1af, 0x000fe5af,
+ 0x000fe9af, 0x000fedaf, 0x000ff1af, 0x000ff5af,
+ 0x000ff9af, 0x000ffdaf, 0x000003af, 0x000007af,
+ 0x00000baf, 0x00000faf, 0x000013af, 0x000017af,
+ 0x00001baf, 0x00001faf, 0x000023af, 0x000027af,
+ 0x00002baf, 0x00002faf, 0x000033af, 0x000037af,
+ 0x00003baf, 0x00003faf, 0x000043af, 0x000047af,
+ 0x00004baf, 0x00004faf, 0x000053af, 0x000057af,
+ 0x00005baf, 0x00005faf, 0x000063af, 0x000067af,
+ 0x00006baf, 0x00006faf, 0x000073af, 0x000077af,
+ 0x00007baf, 0x00007faf, 0x000083af, 0x000087af,
+ 0x00008baf, 0x00008faf, 0x000093af, 0x000097af,
+ 0x00009baf, 0x00009faf, 0x0000a3af, 0x0000a7af,
+ 0x0000abaf, 0x0000afaf, 0x0000b3af, 0x0000b7af,
+ 0x0000bbaf, 0x0000bfaf, 0x0000c3af, 0x0000c7af,
+ 0x0000cbaf, 0x0000cfaf, 0x0000d3af, 0x0000d7af,
+ 0x0000dbaf, 0x0000dfaf, 0x0000e3af, 0x0000e7af,
+ 0x0000ebaf, 0x0000efaf, 0x0000f3af, 0x0000f7af,
+ 0x0000fbaf, 0x0000ffaf, 0x000103af, 0x000107af,
+ 0x00010baf, 0x00010faf, 0x000113af, 0x000117af,
+ 0x00011baf, 0x00011faf, 0x000123af, 0x000127af,
+ 0x00012baf, 0x00012faf, 0x000133af, 0x000137af,
+ 0x00013baf, 0x00013faf, 0x000143af, 0x000147af,
+ 0x00014baf, 0x00014faf, 0x000153af, 0x000157af,
+ 0x00015baf, 0x00015faf, 0x000163af, 0x000167af,
+ 0x00016baf, 0x00016faf, 0x000173af, 0x000177af,
+ 0x00017baf, 0x00017faf, 0x000183af, 0x000187af,
+ 0x00018baf, 0x00018faf, 0x000193af, 0x000197af,
+ 0x00019baf, 0x00019faf, 0x0001a3af, 0x0001a7af,
+ 0x0001abaf, 0x0001afaf, 0x0001b3af, 0x0001b7af,
+ 0x0001bbaf, 0x0001bfaf, 0x0001c3af, 0x0001c7af,
+ 0x0001cbaf, 0x0001cfaf, 0x0001d3af, 0x0001d7af,
+ 0x0001dbaf, 0x0001dfaf, 0x0001e3af, 0x0001e7af,
+ 0x0001ebaf, 0x0001efaf, 0x0001f3af, 0x0001f7af,
+ 0x0001fbaf, 0x0001ffaf, 0x000203af, 0x000207af,
+ 0x00020baf, 0x00020faf, 0x000213af, 0x000217af,
+ 0x00021baf, 0x00021faf, 0x000223af, 0x000227af,
+ 0x00022baf, 0x00022faf, 0x000233af, 0x000237af,
+ 0x00023baf, 0x00023faf, 0x000243af, 0x000247af,
+ 0x00024baf, 0x00024faf, 0x000253af, 0x000257af,
+ 0x00025baf, 0x00025faf, 0x000263af, 0x000267af,
+ 0x00026baf, 0x00026faf, 0x000273af, 0x000277af,
+ 0x00027baf, 0x00027faf, 0x000283af, 0x000287af,
+ 0x00028baf, 0x00028faf, 0x000293af, 0x000297af,
+ 0x00029baf, 0x00029faf, 0x0002a3af, 0x0002a7af,
+ 0x0002abaf, 0x0002afaf, 0x0002b3af, 0x0002b7af,
+ 0x0002bbaf, 0x0002bfaf, 0x0002c3af, 0x0002c7af,
+ 0x0002cbaf, 0x0002cfaf, 0x0002d3af, 0x0002d7af,
+ 0x0002dbaf, 0x0002dfaf, 0x0002e3af, 0x0002e7af,
+ 0x0002ebaf, 0x0002efaf, 0x0002f3af, 0x0002f7af,
+ 0x0002fbaf, 0x0002ffaf, 0x000303af, 0x000307af,
+ 0x00030baf, 0x00030faf, 0x000313af, 0x000317af,
+ 0x00031baf, 0x00031faf, 0x000323af, 0x000327af,
+ 0x00032baf, 0x00032faf, 0x000333af, 0x000337af,
+ 0x00033baf, 0x00033faf, 0x000343af, 0x000347af,
+ 0x00034baf, 0x00034faf, 0x000353af, 0x000357af,
+ 0x00035baf, 0x00035faf, 0x000363af, 0x000367af,
+ 0x00036baf, 0x00036faf, 0x000373af, 0x000377af,
+ 0x00037baf, 0x00037faf, 0x000383af, 0x000387af,
+ 0x00038baf, 0x00038faf, 0x000393af, 0x000397af,
+ 0x00039baf, 0x00039faf, 0x0003a3af, 0x0003a7af,
+ 0x0003abaf, 0x0003afaf, 0x0003b3af, 0x0003b7af,
+ 0x0003bbaf, 0x0003bfaf, 0x0003c3af, 0x0003c7af,
+ 0x0003cbaf, 0x0003cfaf, 0x0003d3af, 0x0003d7af,
+ 0x0003dbaf, 0x0003dfaf, 0x0003e3af, 0x0003e7af,
+ 0x0003ebaf, 0x0003efaf, 0x0003f3af, 0x0003f7af,
+ 0x0003fbaf, 0x0003ffaf, 0x000403af, 0x000407af,
+ 0x00040baf, 0x00040faf, 0x000413af, 0x000417af,
+ 0x00041baf, 0x00041faf, 0x000423af, 0x000427af,
+ 0x00042baf, 0x00042faf, 0x000433af, 0x000437af,
+ 0x00043baf, 0x00043faf, 0x000443af, 0x000447af,
+ 0x00044baf, 0x00044faf, 0x000453af, 0x000457af,
+ 0x00045baf, 0x00045faf, 0x000463af, 0x000467af,
+ 0x00046baf, 0x00046faf, 0x000473af, 0x000477af,
+ 0x00047baf, 0x00047faf, 0x000483af, 0x000487af,
+ 0x00048baf, 0x00048faf, 0x000493af, 0x000497af,
+ 0x00049baf, 0x00049faf, 0x0004a3af, 0x0004a7af,
+ 0x0004abaf, 0x0004afaf, 0x0004b3af, 0x0004b7af,
+ 0x0004bbaf, 0x0004bfaf, 0x0004c3af, 0x0004c7af,
+ 0x0004cbaf, 0x0004cfaf, 0x0004d3af, 0x0004d7af,
+ 0x0004dbaf, 0x0004dfaf, 0x0004e3af, 0x0004e7af,
+ 0x0004ebaf, 0x0004efaf, 0x0004f3af, 0x0004f7af,
+ 0x0004fbaf, 0x0004ffaf, 0x000503af, 0x000507af,
+ 0x00050baf, 0x00050faf, 0x000513af, 0x000517af,
+ 0x00051baf, 0x00051faf, 0x000523af, 0x000527af,
+ 0x00052baf, 0x00052faf, 0x000533af, 0x000537af,
+ 0x00053baf, 0x00053faf, 0x000543af, 0x000547af,
+ 0x00054baf, 0x00054faf, 0x000553af, 0x000557af,
+ 0x00055baf, 0x00055faf, 0x000563af, 0x000567af,
+ 0x00056baf, 0x00056faf, 0x000573af, 0x000577af,
+ 0x00057baf, 0x00057faf, 0x000583af, 0x000587af,
+ 0x00058baf, 0x00058faf, 0x000593af, 0x000597af,
+ 0x00059baf, 0x00059faf, 0x0005a3af, 0x0005a7af,
+ 0x0005abaf, 0x0005afaf, 0x0005b3af, 0x0005b7af,
+ 0x0005bbaf, 0x0005bfaf, 0x0005c3af, 0x0005c7af,
+ 0x0005cbaf, 0x0005cfaf, 0x0005d3af, 0x0005d7af,
+ 0x0005dbaf, 0x0005dfaf, 0x0005e3af, 0x0005e7af,
+ 0x0005ebaf, 0x0005efaf, 0x0005f3af, 0x0005f7af,
+ 0x0005fbaf, 0x0005ffaf, 0x000603af, 0x000607af,
+ 0x00060baf, 0x00060faf, 0x000613af, 0x000617af,
+ 0x00061baf, 0x00061faf, 0x000623af, 0x000627af,
+ 0x00062baf, 0x00062faf, 0x000633af, 0x000637af,
+ 0x00063baf, 0x00063faf, 0x000643af, 0x000647af,
+ 0x00064baf, 0x00064faf, 0x000653af, 0x000657af,
+ 0x00065baf, 0x00065faf, 0x000663af, 0x000667af,
+ 0x00066baf, 0x00066faf, 0x000673af, 0x000677af,
+ 0x00067baf, 0x00067faf, 0x000683af, 0x000687af,
+ 0x00068baf, 0x00068faf, 0x000693af, 0x000697af,
+ 0x00069baf, 0x00069faf, 0x0006a3af, 0x0006a7af,
+ 0x0006abaf, 0x0006afaf, 0x0006b3af, 0x0006b7af,
+ 0x0006bbaf, 0x0006bfaf, 0x0006c3af, 0x0006c7af,
+ 0x0006cbaf, 0x0006cfaf, 0x0006d3af, 0x0006d7af,
+ 0x0006dbaf, 0x0006dfaf, 0x0006e3af, 0x0006e7af,
+ 0x0006ebaf, 0x0006efaf, 0x0006f3af, 0x0006f7af,
+ 0x0006fbaf, 0x0006ffaf, 0x000703af, 0x000707af,
+ 0x00070baf, 0x00070faf, 0x000713af, 0x000717af,
+ 0x00071baf, 0x00071faf, 0x000723af, 0x000727af,
+ 0x00072baf, 0x00072faf, 0x000733af, 0x000737af,
+ 0x00073baf, 0x00073faf, 0x000743af, 0x000747af,
+ 0x00074baf, 0x00074faf, 0x000753af, 0x000757af,
+ 0x00075baf, 0x00075faf, 0x000763af, 0x000767af,
+ 0x00076baf, 0x00076faf, 0x000773af, 0x000777af,
+ 0x00077baf, 0x00077faf, 0x000783af, 0x000787af,
+ 0x00078baf, 0x00078faf, 0x000793af, 0x000797af,
+ 0x00079baf, 0x00079faf, 0x0007a3af, 0x0007a7af,
+ 0x0007abaf, 0x0007afaf, 0x0007b3af, 0x0007b7af,
+ 0x0007bbaf, 0x0007bfaf, 0x0007c3af, 0x0007c7af,
+ 0x0007cbaf, 0x0007cfaf, 0x0007d3af, 0x0007d7af,
+ 0x0007dbaf, 0x0007dfaf, 0x0007e3af, 0x0007e7af,
+ 0x0007ebaf, 0x0007efaf, 0x0007f3af, 0x0007f7af,
+ 0x0007fbaf, 0x0007ffaf, 0x000803af, 0x000807af,
+ 0x00080baf, 0x00080faf, 0x000813af, 0x000817af,
+ 0x00081baf, 0x00081faf, 0x000823af, 0x000827af,
+ 0x00082baf, 0x00082faf, 0x000833af, 0x000837af,
+ 0x00083baf, 0x00083faf, 0x000843af, 0x000847af,
+ 0x00084baf, 0x00084faf, 0x000853af, 0x000857af,
+ 0x00085baf, 0x00085faf, 0x000863af, 0x000867af,
+ 0x00086baf, 0x00086faf, 0x000873af, 0x000877af,
+ 0x00087baf, 0x00087faf, 0x000883af, 0x000887af,
+ 0x00088baf, 0x00088faf, 0x000893af, 0x000897af,
+ 0x00089baf, 0x00089faf, 0x0008a3af, 0x0008a7af,
+ 0x0008abaf, 0x0008afaf, 0x0008b3af, 0x0008b7af,
+ 0x0008bbaf, 0x0008bfaf, 0x0008c3af, 0x0008c7af,
+ 0x0008cbaf, 0x0008cfaf, 0x0008d3af, 0x0008d7af,
+ 0x0008dbaf, 0x0008dfaf, 0x0008e3af, 0x0008e7af,
+ 0x0008ebaf, 0x0008efaf, 0x0008f3af, 0x0008f7af,
+ 0x0008fbaf, 0x0008ffaf, 0x000903af, 0x000907af,
+ 0x00090baf, 0x00090faf, 0x000913af, 0x000917af,
+ 0x00091baf, 0x00091faf, 0x000923af, 0x000927af,
+ 0x00092baf, 0x00092faf, 0x000933af, 0x000937af,
+ 0x00093baf, 0x00093faf, 0x000943af, 0x000947af,
+ 0x00094baf, 0x00094faf, 0x000953af, 0x000957af,
+ 0x00095baf, 0x00095faf, 0x000963af, 0x000967af,
+ 0x00096baf, 0x00096faf, 0x000973af, 0x000977af,
+ 0x00097baf, 0x00097faf, 0x000983af, 0x000987af,
+ 0x00098baf, 0x00098faf, 0x000993af, 0x000997af,
+ 0x00099baf, 0x00099faf, 0x0009a3af, 0x0009a7af,
+ 0x0009abaf, 0x0009afaf, 0x0009b3af, 0x0009b7af,
+ 0x0009bbaf, 0x0009bfaf, 0x0009c3af, 0x0009c7af,
+ 0x0009cbaf, 0x0009cfaf, 0x0009d3af, 0x0009d7af,
+ 0x0009dbaf, 0x0009dfaf, 0x0009e3af, 0x0009e7af,
+ 0x0009ebaf, 0x0009efaf, 0x0009f3af, 0x0009f7af,
+ 0x0009fbaf, 0x0009ffaf, 0x000a03af, 0x000a07af,
+ 0x000a0baf, 0x000a0faf, 0x000a13af, 0x000a17af,
+ 0x000a1baf, 0x000a1faf, 0x000a23af, 0x000a27af,
+ 0x000a2baf, 0x000a2faf, 0x000a33af, 0x000a37af,
+ 0x000a3baf, 0x000a3faf, 0x000a43af, 0x000a47af,
+ 0x000a4baf, 0x000a4faf, 0x000a53af, 0x000a57af,
+ 0x000a5baf, 0x000a5faf, 0x000a63af, 0x000a67af,
+ 0x000a6baf, 0x000a6faf, 0x000a73af, 0x000a77af,
+ 0x000a7baf, 0x000a7faf, 0x000a83af, 0x000a87af,
+ 0x000a8baf, 0x000a8faf, 0x000a93af, 0x000a97af,
+ 0x000a9baf, 0x000a9faf, 0x000aa3af, 0x000aa7af,
+ 0x000aabaf, 0x000aafaf, 0x000ab3af, 0x000ab7af,
+ 0x000abbaf, 0x000abfaf, 0x000ac3af, 0x000ac7af,
+ 0x000acbaf, 0x000acfaf, 0x000ad3af, 0x000ad7af,
+ 0x000adbaf, 0x000adfaf, 0x000ae3af, 0x000ae7af,
+ 0x000aebaf, 0x000aefaf, 0x000af3af, 0x000af7af,
+ 0x000afbaf, 0x000affaf, 0x000b03af, 0x000b07af,
+ 0x000b0baf, 0x000b0faf, 0x000b13af, 0x000b17af,
+ 0x000b1baf, 0x000b1faf, 0x000b23af, 0x000b27af,
+ 0x000b2baf, 0x000b2faf, 0x000b33af, 0x000b37af,
+ 0x000b3baf, 0x000b3faf, 0x000b43af, 0x000b47af,
+ 0x000b4baf, 0x000b4faf, 0x000b53af, 0x000b57af,
+ 0x000b5baf, 0x000b5faf, 0x000b63af, 0x000b67af,
+ 0x000b6baf, 0x000b6faf, 0x000b73af, 0x000b77af,
+ 0x000b7baf, 0x000b7faf, 0x000b83af, 0x000b87af,
+ 0x000b8baf, 0x000b8faf, 0x000b93af, 0x000b97af,
+ 0x000b9baf, 0x000b9faf, 0x000ba3af, 0x000ba7af,
+ 0x000babaf, 0x000bafaf, 0x000bb3af, 0x000bb7af,
+ 0x000bbbaf, 0x000bbfaf, 0x000bc3af, 0x000bc7af,
+ 0x000bcbaf, 0x000bcfaf, 0x000bd3af, 0x000bd7af,
+ 0x000bdbaf, 0x000bdfaf, 0x000be3af, 0x000be7af,
+ 0x000bebaf, 0x000befaf, 0x000bf3af, 0x000bf7af,
+ 0x000bfbaf, 0x000bffaf, 0x000c03af, 0x000c07af,
+ 0x000c0baf, 0x000c0faf, 0x000c13af, 0x000c17af,
+ 0x000c1baf, 0x000c1faf, 0x000c23af, 0x000c27af,
+ 0x000c2baf, 0x000c2faf, 0x000c33af, 0x000c37af,
+ 0x000c3baf, 0x000c3faf, 0x000c43af, 0x000c47af,
+ 0x000c4baf, 0x000c4faf, 0x000c53af, 0x000c57af,
+ 0x000c5baf, 0x000c5faf, 0x000c63af, 0x000c67af,
+ 0x000c6baf, 0x000c6faf, 0x000c73af, 0x000c77af,
+ 0x000c7baf, 0x000c7faf, 0x000c83af, 0x000c87af,
+ 0x000c8baf, 0x000c8faf, 0x000c93af, 0x000c97af,
+ 0x000c9baf, 0x000c9faf, 0x000ca3af, 0x000ca7af,
+ 0x000cabaf, 0x000cafaf, 0x000cb3af, 0x000cb7af,
+ 0x000cbbaf, 0x000cbfaf, 0x000cc3af, 0x000cc7af,
+ 0x000ccbaf, 0x000ccfaf, 0x000cd3af, 0x000cd7af,
+ 0x000cdbaf, 0x000cdfaf, 0x000ce3af, 0x000ce7af,
+ 0x000cebaf, 0x000cefaf, 0x000cf3af, 0x000cf7af,
+ 0x000cfbaf, 0x000cffaf, 0x000d03af, 0x000d07af,
+ 0x000d0baf, 0x000d0faf, 0x000d13af, 0x000d17af,
+ 0x000d1baf, 0x000d1faf, 0x000d23af, 0x000d27af,
+ 0x000d2baf, 0x000d2faf, 0x000d33af, 0x000d37af,
+ 0x000d3baf, 0x000d3faf, 0x000d43af, 0x000d47af,
+ 0x000d4baf, 0x000d4faf, 0x000d53af, 0x000d57af,
+ 0x000d5baf, 0x000d5faf, 0x000d63af, 0x000d67af,
+ 0x000d6baf, 0x000d6faf, 0x000d73af, 0x000d77af,
+ 0x000d7baf, 0x000d7faf, 0x000d83af, 0x000d87af,
+ 0x000d8baf, 0x000d8faf, 0x000d93af, 0x000d97af,
+ 0x000d9baf, 0x000d9faf, 0x000da3af, 0x000da7af,
+ 0x000dabaf, 0x000dafaf, 0x000db3af, 0x000db7af,
+ 0x000dbbaf, 0x000dbfaf, 0x000dc3af, 0x000dc7af,
+ 0x000dcbaf, 0x000dcfaf, 0x000dd3af, 0x000dd7af,
+ 0x000ddbaf, 0x000ddfaf, 0x000de3af, 0x000de7af,
+ 0x000debaf, 0x000defaf, 0x000df3af, 0x000df7af,
+ 0x000dfbaf, 0x000dffaf, 0x000e03af, 0x000e07af,
+ 0x000e0baf, 0x000e0faf, 0x000e13af, 0x000e17af,
+ 0x000e1baf, 0x000e1faf, 0x000e23af, 0x000e27af,
+ 0x000e2baf, 0x000e2faf, 0x000e33af, 0x000e37af,
+ 0x000e3baf, 0x000e3faf, 0x000e43af, 0x000e47af,
+ 0x000e4baf, 0x000e4faf, 0x000e53af, 0x000e57af,
+ 0x000e5baf, 0x000e5faf, 0x000e63af, 0x000e67af,
+ 0x000e6baf, 0x000e6faf, 0x000e73af, 0x000e77af,
+ 0x000e7baf, 0x000e7faf, 0x000e83af, 0x000e87af,
+ 0x000e8baf, 0x000e8faf, 0x000e93af, 0x000e97af,
+ 0x000e9baf, 0x000e9faf, 0x000ea3af, 0x000ea7af,
+ 0x000eabaf, 0x000eafaf, 0x000eb3af, 0x000eb7af,
+ 0x000ebbaf, 0x000ebfaf, 0x000ec3af, 0x000ec7af,
+ 0x000ecbaf, 0x000ecfaf, 0x000ed3af, 0x000ed7af,
+ 0x000edbaf, 0x000edfaf, 0x000ee3af, 0x000ee7af,
+ 0x000eebaf, 0x000eefaf, 0x000ef3af, 0x000ef7af,
+ 0x000efbaf, 0x000effaf, 0x000f03af, 0x000f07af,
+ 0x000f0baf, 0x000f0faf, 0x000f13af, 0x000f17af,
+ 0x000f1baf, 0x000f1faf, 0x000f23af, 0x000f27af,
+ 0x000f2baf, 0x000f2faf, 0x000f33af, 0x000f37af,
+ 0x000f3baf, 0x000f3faf, 0x000f43af, 0x000f47af,
+ 0x000f4baf, 0x000f4faf, 0x000f53af, 0x000f57af,
+ 0x000f5baf, 0x000f5faf, 0x000f63af, 0x000f67af,
+ 0x000f6baf, 0x000f6faf, 0x000f73af, 0x000f77af,
+ 0x000f7baf, 0x000f7faf, 0x000f83af, 0x000f87af,
+ 0x000f8baf, 0x000f8faf, 0x000f93af, 0x000f97af,
+ 0x000f9baf, 0x000f9faf, 0x000fa3af, 0x000fa7af,
+ 0x000fabaf, 0x000fafaf, 0x000fb3af, 0x000fb7af,
+ 0x000fbbaf, 0x000fbfaf, 0x000fc3af, 0x000fc7af,
+ 0x000fcbaf, 0x000fcfaf, 0x000fd3af, 0x000fd7af,
+ 0x000fdbaf, 0x000fdfaf, 0x000fe3af, 0x000fe7af,
+ 0x000febaf, 0x000fefaf, 0x000ff3af, 0x000ff7af,
+ 0x000ffbaf, 0x000fffaf, 0x00000070, 0x00000470,
+ 0x00000870, 0x00000c70, 0x00001070, 0x00001470,
+ 0x00001870, 0x00001c70, 0x00002070, 0x00002470,
+ 0x00002870, 0x00002c70, 0x00003070, 0x00003470,
+ 0x00003870, 0x00003c70, 0x00004070, 0x00004470,
+ 0x00004870, 0x00004c70, 0x00005070, 0x00005470,
+ 0x00005870, 0x00005c70, 0x00006070, 0x00006470,
+ 0x00006870, 0x00006c70, 0x00007070, 0x00007470,
+ 0x00007870, 0x00007c70, 0x00008070, 0x00008470,
+ 0x00008870, 0x00008c70, 0x00009070, 0x00009470,
+ 0x00009870, 0x00009c70, 0x0000a070, 0x0000a470,
+ 0x0000a870, 0x0000ac70, 0x0000b070, 0x0000b470,
+ 0x0000b870, 0x0000bc70, 0x0000c070, 0x0000c470,
+ 0x0000c870, 0x0000cc70, 0x0000d070, 0x0000d470,
+ 0x0000d870, 0x0000dc70, 0x0000e070, 0x0000e470,
+ 0x0000e870, 0x0000ec70, 0x0000f070, 0x0000f470,
+ 0x0000f870, 0x0000fc70, 0x00010070, 0x00010470,
+ 0x00010870, 0x00010c70, 0x00011070, 0x00011470,
+ 0x00011870, 0x00011c70, 0x00012070, 0x00012470,
+ 0x00012870, 0x00012c70, 0x00013070, 0x00013470,
+ 0x00013870, 0x00013c70, 0x00014070, 0x00014470,
+ 0x00014870, 0x00014c70, 0x00015070, 0x00015470,
+ 0x00015870, 0x00015c70, 0x00016070, 0x00016470,
+ 0x00016870, 0x00016c70, 0x00017070, 0x00017470,
+ 0x00017870, 0x00017c70, 0x00018070, 0x00018470,
+ 0x00018870, 0x00018c70, 0x00019070, 0x00019470,
+ 0x00019870, 0x00019c70, 0x0001a070, 0x0001a470,
+ 0x0001a870, 0x0001ac70, 0x0001b070, 0x0001b470,
+ 0x0001b870, 0x0001bc70, 0x0001c070, 0x0001c470,
+ 0x0001c870, 0x0001cc70, 0x0001d070, 0x0001d470,
+ 0x0001d870, 0x0001dc70, 0x0001e070, 0x0001e470,
+ 0x0001e870, 0x0001ec70, 0x0001f070, 0x0001f470,
+ 0x0001f870, 0x0001fc70, 0x00020070, 0x00020470,
+ 0x00020870, 0x00020c70, 0x00021070, 0x00021470,
+ 0x00021870, 0x00021c70, 0x00022070, 0x00022470,
+ 0x00022870, 0x00022c70, 0x00023070, 0x00023470,
+ 0x00023870, 0x00023c70, 0x00024070, 0x00024470,
+ 0x00024870, 0x00024c70, 0x00025070, 0x00025470,
+ 0x00025870, 0x00025c70, 0x00026070, 0x00026470,
+ 0x00026870, 0x00026c70, 0x00027070, 0x00027470,
+ 0x00027870, 0x00027c70, 0x00028070, 0x00028470,
+ 0x00028870, 0x00028c70, 0x00029070, 0x00029470,
+ 0x00029870, 0x00029c70, 0x0002a070, 0x0002a470,
+ 0x0002a870, 0x0002ac70, 0x0002b070, 0x0002b470,
+ 0x0002b870, 0x0002bc70, 0x0002c070, 0x0002c470,
+ 0x0002c870, 0x0002cc70, 0x0002d070, 0x0002d470,
+ 0x0002d870, 0x0002dc70, 0x0002e070, 0x0002e470,
+ 0x0002e870, 0x0002ec70, 0x0002f070, 0x0002f470,
+ 0x0002f870, 0x0002fc70, 0x00030070, 0x00030470,
+ 0x00030870, 0x00030c70, 0x00031070, 0x00031470,
+ 0x00031870, 0x00031c70, 0x00032070, 0x00032470,
+ 0x00032870, 0x00032c70, 0x00033070, 0x00033470,
+ 0x00033870, 0x00033c70, 0x00034070, 0x00034470,
+ 0x00034870, 0x00034c70, 0x00035070, 0x00035470,
+ 0x00035870, 0x00035c70, 0x00036070, 0x00036470,
+ 0x00036870, 0x00036c70, 0x00037070, 0x00037470,
+ 0x00037870, 0x00037c70, 0x00038070, 0x00038470,
+ 0x00038870, 0x00038c70, 0x00039070, 0x00039470,
+ 0x00039870, 0x00039c70, 0x0003a070, 0x0003a470,
+ 0x0003a870, 0x0003ac70, 0x0003b070, 0x0003b470,
+ 0x0003b870, 0x0003bc70, 0x0003c070, 0x0003c470,
+ 0x0003c870, 0x0003cc70, 0x0003d070, 0x0003d470,
+ 0x0003d870, 0x0003dc70, 0x0003e070, 0x0003e470,
+ 0x0003e870, 0x0003ec70, 0x0003f070, 0x0003f470,
+ 0x0003f870, 0x0003fc70, 0x00040070, 0x00040470,
+ 0x00040870, 0x00040c70, 0x00041070, 0x00041470,
+ 0x00041870, 0x00041c70, 0x00042070, 0x00042470,
+ 0x00042870, 0x00042c70, 0x00043070, 0x00043470,
+ 0x00043870, 0x00043c70, 0x00044070, 0x00044470,
+ 0x00044870, 0x00044c70, 0x00045070, 0x00045470,
+ 0x00045870, 0x00045c70, 0x00046070, 0x00046470,
+ 0x00046870, 0x00046c70, 0x00047070, 0x00047470,
+ 0x00047870, 0x00047c70, 0x00048070, 0x00048470,
+ 0x00048870, 0x00048c70, 0x00049070, 0x00049470,
+ 0x00049870, 0x00049c70, 0x0004a070, 0x0004a470,
+ 0x0004a870, 0x0004ac70, 0x0004b070, 0x0004b470,
+ 0x0004b870, 0x0004bc70, 0x0004c070, 0x0004c470,
+ 0x0004c870, 0x0004cc70, 0x0004d070, 0x0004d470,
+ 0x0004d870, 0x0004dc70, 0x0004e070, 0x0004e470,
+ 0x0004e870, 0x0004ec70, 0x0004f070, 0x0004f470,
+ 0x0004f870, 0x0004fc70, 0x00050070, 0x00050470,
+ 0x00050870, 0x00050c70, 0x00051070, 0x00051470,
+ 0x00051870, 0x00051c70, 0x00052070, 0x00052470,
+ 0x00052870, 0x00052c70, 0x00053070, 0x00053470,
+ 0x00053870, 0x00053c70, 0x00054070, 0x00054470,
+ 0x00054870, 0x00054c70, 0x00055070, 0x00055470,
+ 0x00055870, 0x00055c70, 0x00056070, 0x00056470,
+ 0x00056870, 0x00056c70, 0x00057070, 0x00057470,
+ 0x00057870, 0x00057c70, 0x00058070, 0x00058470,
+ 0x00058870, 0x00058c70, 0x00059070, 0x00059470,
+ 0x00059870, 0x00059c70, 0x0005a070, 0x0005a470,
+ 0x0005a870, 0x0005ac70, 0x0005b070, 0x0005b470,
+ 0x0005b870, 0x0005bc70, 0x0005c070, 0x0005c470,
+ 0x0005c870, 0x0005cc70, 0x0005d070, 0x0005d470,
+ 0x0005d870, 0x0005dc70, 0x0005e070, 0x0005e470,
+ 0x0005e870, 0x0005ec70, 0x0005f070, 0x0005f470,
+ 0x0005f870, 0x0005fc70, 0x00060070, 0x00060470,
+ 0x00060870, 0x00060c70, 0x00061070, 0x00061470,
+ 0x00061870, 0x00061c70, 0x00062070, 0x00062470,
+ 0x00062870, 0x00062c70, 0x00063070, 0x00063470,
+ 0x00063870, 0x00063c70, 0x00064070, 0x00064470,
+ 0x00064870, 0x00064c70, 0x00065070, 0x00065470,
+ 0x00065870, 0x00065c70, 0x00066070, 0x00066470,
+ 0x00066870, 0x00066c70, 0x00067070, 0x00067470,
+ 0x00067870, 0x00067c70, 0x00068070, 0x00068470,
+ 0x00068870, 0x00068c70, 0x00069070, 0x00069470,
+ 0x00069870, 0x00069c70, 0x0006a070, 0x0006a470,
+ 0x0006a870, 0x0006ac70, 0x0006b070, 0x0006b470,
+ 0x0006b870, 0x0006bc70, 0x0006c070, 0x0006c470,
+ 0x0006c870, 0x0006cc70, 0x0006d070, 0x0006d470,
+ 0x0006d870, 0x0006dc70, 0x0006e070, 0x0006e470,
+ 0x0006e870, 0x0006ec70, 0x0006f070, 0x0006f470,
+ 0x0006f870, 0x0006fc70, 0x00070070, 0x00070470,
+ 0x00070870, 0x00070c70, 0x00071070, 0x00071470,
+ 0x00071870, 0x00071c70, 0x00072070, 0x00072470,
+ 0x00072870, 0x00072c70, 0x00073070, 0x00073470,
+ 0x00073870, 0x00073c70, 0x00074070, 0x00074470,
+ 0x00074870, 0x00074c70, 0x00075070, 0x00075470,
+ 0x00075870, 0x00075c70, 0x00076070, 0x00076470,
+ 0x00076870, 0x00076c70, 0x00077070, 0x00077470,
+ 0x00077870, 0x00077c70, 0x00078070, 0x00078470,
+ 0x00078870, 0x00078c70, 0x00079070, 0x00079470,
+ 0x00079870, 0x00079c70, 0x0007a070, 0x0007a470,
+ 0x0007a870, 0x0007ac70, 0x0007b070, 0x0007b470,
+ 0x0007b870, 0x0007bc70, 0x0007c070, 0x0007c470,
+ 0x0007c870, 0x0007cc70, 0x0007d070, 0x0007d470,
+ 0x0007d870, 0x0007dc70, 0x0007e070, 0x0007e470,
+ 0x0007e870, 0x0007ec70, 0x0007f070, 0x0007f470,
+ 0x0007f870, 0x0007fc70, 0x00080070, 0x00080470,
+ 0x00080870, 0x00080c70, 0x00081070, 0x00081470,
+ 0x00081870, 0x00081c70, 0x00082070, 0x00082470,
+ 0x00082870, 0x00082c70, 0x00083070, 0x00083470,
+ 0x00083870, 0x00083c70, 0x00084070, 0x00084470,
+ 0x00084870, 0x00084c70, 0x00085070, 0x00085470,
+ 0x00085870, 0x00085c70, 0x00086070, 0x00086470,
+ 0x00086870, 0x00086c70, 0x00087070, 0x00087470,
+ 0x00087870, 0x00087c70, 0x00088070, 0x00088470,
+ 0x00088870, 0x00088c70, 0x00089070, 0x00089470,
+ 0x00089870, 0x00089c70, 0x0008a070, 0x0008a470,
+ 0x0008a870, 0x0008ac70, 0x0008b070, 0x0008b470,
+ 0x0008b870, 0x0008bc70, 0x0008c070, 0x0008c470,
+ 0x0008c870, 0x0008cc70, 0x0008d070, 0x0008d470,
+ 0x0008d870, 0x0008dc70, 0x0008e070, 0x0008e470,
+ 0x0008e870, 0x0008ec70, 0x0008f070, 0x0008f470,
+ 0x0008f870, 0x0008fc70, 0x00090070, 0x00090470,
+ 0x00090870, 0x00090c70, 0x00091070, 0x00091470,
+ 0x00091870, 0x00091c70, 0x00092070, 0x00092470,
+ 0x00092870, 0x00092c70, 0x00093070, 0x00093470,
+ 0x00093870, 0x00093c70, 0x00094070, 0x00094470,
+ 0x00094870, 0x00094c70, 0x00095070, 0x00095470,
+ 0x00095870, 0x00095c70, 0x00096070, 0x00096470,
+ 0x00096870, 0x00096c70, 0x00097070, 0x00097470,
+ 0x00097870, 0x00097c70, 0x00098070, 0x00098470,
+ 0x00098870, 0x00098c70, 0x00099070, 0x00099470,
+ 0x00099870, 0x00099c70, 0x0009a070, 0x0009a470,
+ 0x0009a870, 0x0009ac70, 0x0009b070, 0x0009b470,
+ 0x0009b870, 0x0009bc70, 0x0009c070, 0x0009c470,
+ 0x0009c870, 0x0009cc70, 0x0009d070, 0x0009d470,
+ 0x0009d870, 0x0009dc70, 0x0009e070, 0x0009e470,
+ 0x0009e870, 0x0009ec70, 0x0009f070, 0x0009f470,
+ 0x0009f870, 0x0009fc70, 0x000a0070, 0x000a0470,
+ 0x000a0870, 0x000a0c70, 0x000a1070, 0x000a1470,
+ 0x000a1870, 0x000a1c70, 0x000a2070, 0x000a2470,
+ 0x000a2870, 0x000a2c70, 0x000a3070, 0x000a3470,
+ 0x000a3870, 0x000a3c70, 0x000a4070, 0x000a4470,
+ 0x000a4870, 0x000a4c70, 0x000a5070, 0x000a5470,
+ 0x000a5870, 0x000a5c70, 0x000a6070, 0x000a6470,
+ 0x000a6870, 0x000a6c70, 0x000a7070, 0x000a7470,
+ 0x000a7870, 0x000a7c70, 0x000a8070, 0x000a8470,
+ 0x000a8870, 0x000a8c70, 0x000a9070, 0x000a9470,
+ 0x000a9870, 0x000a9c70, 0x000aa070, 0x000aa470,
+ 0x000aa870, 0x000aac70, 0x000ab070, 0x000ab470,
+ 0x000ab870, 0x000abc70, 0x000ac070, 0x000ac470,
+ 0x000ac870, 0x000acc70, 0x000ad070, 0x000ad470,
+ 0x000ad870, 0x000adc70, 0x000ae070, 0x000ae470,
+ 0x000ae870, 0x000aec70, 0x000af070, 0x000af470,
+ 0x000af870, 0x000afc70, 0x000b0070, 0x000b0470,
+ 0x000b0870, 0x000b0c70, 0x000b1070, 0x000b1470,
+ 0x000b1870, 0x000b1c70, 0x000b2070, 0x000b2470,
+ 0x000b2870, 0x000b2c70, 0x000b3070, 0x000b3470,
+ 0x000b3870, 0x000b3c70, 0x000b4070, 0x000b4470,
+ 0x000b4870, 0x000b4c70, 0x000b5070, 0x000b5470,
+ 0x000b5870, 0x000b5c70, 0x000b6070, 0x000b6470,
+ 0x000b6870, 0x000b6c70, 0x000b7070, 0x000b7470,
+ 0x000b7870, 0x000b7c70, 0x000b8070, 0x000b8470,
+ 0x000b8870, 0x000b8c70, 0x000b9070, 0x000b9470,
+ 0x000b9870, 0x000b9c70, 0x000ba070, 0x000ba470,
+ 0x000ba870, 0x000bac70, 0x000bb070, 0x000bb470,
+ 0x000bb870, 0x000bbc70, 0x000bc070, 0x000bc470,
+ 0x000bc870, 0x000bcc70, 0x000bd070, 0x000bd470,
+ 0x000bd870, 0x000bdc70, 0x000be070, 0x000be470,
+ 0x000be870, 0x000bec70, 0x000bf070, 0x000bf470,
+ 0x000bf870, 0x000bfc70, 0x000c0070, 0x000c0470,
+ 0x000c0870, 0x000c0c70, 0x000c1070, 0x000c1470,
+ 0x000c1870, 0x000c1c70, 0x000c2070, 0x000c2470,
+ 0x000c2870, 0x000c2c70, 0x000c3070, 0x000c3470,
+ 0x000c3870, 0x000c3c70, 0x000c4070, 0x000c4470,
+ 0x000c4870, 0x000c4c70, 0x000c5070, 0x000c5470,
+ 0x000c5870, 0x000c5c70, 0x000c6070, 0x000c6470,
+ 0x000c6870, 0x000c6c70, 0x000c7070, 0x000c7470,
+ 0x000c7870, 0x000c7c70, 0x000c8070, 0x000c8470,
+ 0x000c8870, 0x000c8c70, 0x000c9070, 0x000c9470,
+ 0x000c9870, 0x000c9c70, 0x000ca070, 0x000ca470,
+ 0x000ca870, 0x000cac70, 0x000cb070, 0x000cb470,
+ 0x000cb870, 0x000cbc70, 0x000cc070, 0x000cc470,
+ 0x000cc870, 0x000ccc70, 0x000cd070, 0x000cd470,
+ 0x000cd870, 0x000cdc70, 0x000ce070, 0x000ce470,
+ 0x000ce870, 0x000cec70, 0x000cf070, 0x000cf470,
+ 0x000cf870, 0x000cfc70, 0x000d0070, 0x000d0470,
+ 0x000d0870, 0x000d0c70, 0x000d1070, 0x000d1470,
+ 0x000d1870, 0x000d1c70, 0x000d2070, 0x000d2470,
+ 0x000d2870, 0x000d2c70, 0x000d3070, 0x000d3470,
+ 0x000d3870, 0x000d3c70, 0x000d4070, 0x000d4470,
+ 0x000d4870, 0x000d4c70, 0x000d5070, 0x000d5470,
+ 0x000d5870, 0x000d5c70, 0x000d6070, 0x000d6470,
+ 0x000d6870, 0x000d6c70, 0x000d7070, 0x000d7470,
+ 0x000d7870, 0x000d7c70, 0x000d8070, 0x000d8470,
+ 0x000d8870, 0x000d8c70, 0x000d9070, 0x000d9470,
+ 0x000d9870, 0x000d9c70, 0x000da070, 0x000da470,
+ 0x000da870, 0x000dac70, 0x000db070, 0x000db470,
+ 0x000db870, 0x000dbc70, 0x000dc070, 0x000dc470,
+ 0x000dc870, 0x000dcc70, 0x000dd070, 0x000dd470,
+ 0x000dd870, 0x000ddc70, 0x000de070, 0x000de470,
+ 0x000de870, 0x000dec70, 0x000df070, 0x000df470,
+ 0x000df870, 0x000dfc70, 0x000e0070, 0x000e0470,
+ 0x000e0870, 0x000e0c70, 0x000e1070, 0x000e1470,
+ 0x000e1870, 0x000e1c70, 0x000e2070, 0x000e2470,
+ 0x000e2870, 0x000e2c70, 0x000e3070, 0x000e3470,
+ 0x000e3870, 0x000e3c70, 0x000e4070, 0x000e4470,
+ 0x000e4870, 0x000e4c70, 0x000e5070, 0x000e5470,
+ 0x000e5870, 0x000e5c70, 0x000e6070, 0x000e6470,
+ 0x000e6870, 0x000e6c70, 0x000e7070, 0x000e7470,
+ 0x000e7870, 0x000e7c70, 0x000e8070, 0x000e8470,
+ 0x000e8870, 0x000e8c70, 0x000e9070, 0x000e9470,
+ 0x000e9870, 0x000e9c70, 0x000ea070, 0x000ea470,
+ 0x000ea870, 0x000eac70, 0x000eb070, 0x000eb470,
+ 0x000eb870, 0x000ebc70, 0x000ec070, 0x000ec470,
+ 0x000ec870, 0x000ecc70, 0x000ed070, 0x000ed470,
+ 0x000ed870, 0x000edc70, 0x000ee070, 0x000ee470,
+ 0x000ee870, 0x000eec70, 0x000ef070, 0x000ef470,
+ 0x000ef870, 0x000efc70, 0x000f0070, 0x000f0470,
+ 0x000f0870, 0x000f0c70, 0x000f1070, 0x000f1470,
+ 0x000f1870, 0x000f1c70, 0x000f2070, 0x000f2470,
+ 0x000f2870, 0x000f2c70, 0x000f3070, 0x000f3470,
+ 0x000f3870, 0x000f3c70, 0x000f4070, 0x000f4470,
+ 0x000f4870, 0x000f4c70, 0x000f5070, 0x000f5470,
+ 0x000f5870, 0x000f5c70, 0x000f6070, 0x000f6470,
+ 0x000f6870, 0x000f6c70, 0x000f7070, 0x000f7470,
+ 0x000f7870, 0x000f7c70, 0x000f8070, 0x000f8470,
+ 0x000f8870, 0x000f8c70, 0x000f9070, 0x000f9470,
+ 0x000f9870, 0x000f9c70, 0x000fa070, 0x000fa470,
+ 0x000fa870, 0x000fac70, 0x000fb070, 0x000fb470,
+ 0x000fb870, 0x000fbc70, 0x000fc070, 0x000fc470,
+ 0x000fc870, 0x000fcc70, 0x000fd070, 0x000fd470,
+ 0x000fd870, 0x000fdc70, 0x000fe070, 0x000fe470,
+ 0x000fe870, 0x000fec70, 0x000ff070, 0x000ff470,
+ 0x000ff870, 0x000ffc70, 0x00100070, 0x00100470,
+ 0x00100870, 0x00100c70, 0x00101070, 0x00101470,
+ 0x00101870, 0x00101c70, 0x00102070, 0x00102470,
+ 0x00102870, 0x00102c70, 0x00103070, 0x00103470,
+ 0x00103870, 0x00103c70, 0x00104070, 0x00104470,
+ 0x00104870, 0x00104c70, 0x00105070, 0x00105470,
+ 0x00105870, 0x00105c70, 0x00106070, 0x00106470,
+ 0x00106870, 0x00106c70, 0x00107070, 0x00107470,
+ 0x00107870, 0x00107c70, 0x00108070, 0x00108470,
+ 0x00108870, 0x00108c70, 0x00109070, 0x00109470,
+ 0x00109870, 0x00109c70, 0x0010a070, 0x0010a470,
+ 0x0010a870, 0x0010ac70, 0x0010b070, 0x0010b470,
+ 0x0010b870, 0x0010bc70, 0x0010c070, 0x0010c470,
+ 0x0010c870, 0x0010cc70, 0x0010d070, 0x0010d470,
+ 0x0010d870, 0x0010dc70, 0x0010e070, 0x0010e470,
+ 0x0010e870, 0x0010ec70, 0x0010f070, 0x0010f470,
+ 0x0010f870, 0x0010fc70, 0x00110070, 0x00110470,
+ 0x00110870, 0x00110c70, 0x00111070, 0x00111470,
+ 0x00111870, 0x00111c70, 0x00112070, 0x00112470,
+ 0x00112870, 0x00112c70, 0x00113070, 0x00113470,
+ 0x00113870, 0x00113c70, 0x00114070, 0x00114470,
+ 0x00114870, 0x00114c70, 0x00115070, 0x00115470,
+ 0x00115870, 0x00115c70, 0x00116070, 0x00116470,
+ 0x00116870, 0x00116c70, 0x00117070, 0x00117470,
+ 0x00117870, 0x00117c70, 0x00118070, 0x00118470,
+ 0x00118870, 0x00118c70, 0x00119070, 0x00119470,
+ 0x00119870, 0x00119c70, 0x0011a070, 0x0011a470,
+ 0x0011a870, 0x0011ac70, 0x0011b070, 0x0011b470,
+ 0x0011b870, 0x0011bc70, 0x0011c070, 0x0011c470,
+ 0x0011c870, 0x0011cc70, 0x0011d070, 0x0011d470,
+ 0x0011d870, 0x0011dc70, 0x0011e070, 0x0011e470,
+ 0x0011e870, 0x0011ec70, 0x0011f070, 0x0011f470,
+ 0x0011f870, 0x0011fc70, 0x00120070, 0x00120470,
+ 0x00120870, 0x00120c70, 0x00121070, 0x00121470,
+ 0x00121870, 0x00121c70, 0x00122070, 0x00122470,
+ 0x00122870, 0x00122c70, 0x00123070, 0x00123470,
+ 0x00123870, 0x00123c70, 0x00124070, 0x00124470,
+ 0x00124870, 0x00124c70, 0x00125070, 0x00125470,
+ 0x00125870, 0x00125c70, 0x00126070, 0x00126470,
+ 0x00126870, 0x00126c70, 0x00127070, 0x00127470,
+ 0x00127870, 0x00127c70, 0x00128070, 0x00128470,
+ 0x00128870, 0x00128c70, 0x00129070, 0x00129470,
+ 0x00129870, 0x00129c70, 0x0012a070, 0x0012a470,
+ 0x0012a870, 0x0012ac70, 0x0012b070, 0x0012b470,
+ 0x0012b870, 0x0012bc70, 0x0012c070, 0x0012c470,
+ 0x0012c870, 0x0012cc70, 0x0012d070, 0x0012d470,
+ 0x0012d870, 0x0012dc70, 0x0012e070, 0x0012e470,
+ 0x0012e870, 0x0012ec70, 0x0012f070, 0x0012f470,
+ 0x0012f870, 0x0012fc70, 0x00130070, 0x00130470,
+ 0x00130870, 0x00130c70, 0x00131070, 0x00131470,
+ 0x00131870, 0x00131c70, 0x00132070, 0x00132470,
+ 0x00132870, 0x00132c70, 0x00133070, 0x00133470,
+ 0x00133870, 0x00133c70, 0x00134070, 0x00134470,
+ 0x00134870, 0x00134c70, 0x00135070, 0x00135470,
+ 0x00135870, 0x00135c70, 0x00136070, 0x00136470,
+ 0x00136870, 0x00136c70, 0x00137070, 0x00137470,
+ 0x00137870, 0x00137c70, 0x00138070, 0x00138470,
+ 0x00138870, 0x00138c70, 0x00139070, 0x00139470,
+ 0x00139870, 0x00139c70, 0x0013a070, 0x0013a470,
+ 0x0013a870, 0x0013ac70, 0x0013b070, 0x0013b470,
+ 0x0013b870, 0x0013bc70, 0x0013c070, 0x0013c470,
+ 0x0013c870, 0x0013cc70, 0x0013d070, 0x0013d470,
+ 0x0013d870, 0x0013dc70, 0x0013e070, 0x0013e470,
+ 0x0013e870, 0x0013ec70, 0x0013f070, 0x0013f470,
+ 0x0013f870, 0x0013fc70, 0x00140070, 0x00140470,
+ 0x00140870, 0x00140c70, 0x00141070, 0x00141470,
+ 0x00141870, 0x00141c70, 0x00142070, 0x00142470,
+ 0x00142870, 0x00142c70, 0x00143070, 0x00143470,
+ 0x00143870, 0x00143c70, 0x00144070, 0x00144470,
+ 0x00144870, 0x00144c70, 0x00145070, 0x00145470,
+ 0x00145870, 0x00145c70, 0x00146070, 0x00146470,
+ 0x00146870, 0x00146c70, 0x00147070, 0x00147470,
+ 0x00147870, 0x00147c70, 0x00148070, 0x00148470,
+ 0x00148870, 0x00148c70, 0x00149070, 0x00149470,
+ 0x00149870, 0x00149c70, 0x0014a070, 0x0014a470,
+ 0x0014a870, 0x0014ac70, 0x0014b070, 0x0014b470,
+ 0x0014b870, 0x0014bc70, 0x0014c070, 0x0014c470,
+ 0x0014c870, 0x0014cc70, 0x0014d070, 0x0014d470,
+ 0x0014d870, 0x0014dc70, 0x0014e070, 0x0014e470,
+ 0x0014e870, 0x0014ec70, 0x0014f070, 0x0014f470,
+ 0x0014f870, 0x0014fc70, 0x00150070, 0x00150470,
+ 0x00150870, 0x00150c70, 0x00151070, 0x00151470,
+ 0x00151870, 0x00151c70, 0x00152070, 0x00152470,
+ 0x00152870, 0x00152c70, 0x00153070, 0x00153470,
+ 0x00153870, 0x00153c70, 0x00154070, 0x00154470,
+ 0x00154870, 0x00154c70, 0x00155070, 0x00155470,
+ 0x00155870, 0x00155c70, 0x00156070, 0x00156470,
+ 0x00156870, 0x00156c70, 0x00157070, 0x00157470,
+ 0x00157870, 0x00157c70, 0x00158070, 0x00158470,
+ 0x00158870, 0x00158c70, 0x00159070, 0x00159470,
+ 0x00159870, 0x00159c70, 0x0015a070, 0x0015a470,
+ 0x0015a870, 0x0015ac70, 0x0015b070, 0x0015b470,
+ 0x0015b870, 0x0015bc70, 0x0015c070, 0x0015c470,
+ 0x0015c870, 0x0015cc70, 0x0015d070, 0x0015d470,
+ 0x0015d870, 0x0015dc70, 0x0015e070, 0x0015e470,
+ 0x0015e870, 0x0015ec70, 0x0015f070, 0x0015f470,
+ 0x0015f870, 0x0015fc70, 0x00160070, 0x00160470,
+ 0x00160870, 0x00160c70, 0x00161070, 0x00161470,
+ 0x00161870, 0x00161c70, 0x00162070, 0x00162470,
+ 0x00162870, 0x00162c70, 0x00163070, 0x00163470,
+ 0x00163870, 0x00163c70, 0x00164070, 0x00164470,
+ 0x00164870, 0x00164c70, 0x00165070, 0x00165470,
+ 0x00165870, 0x00165c70, 0x00166070, 0x00166470,
+ 0x00166870, 0x00166c70, 0x00167070, 0x00167470,
+ 0x00167870, 0x00167c70, 0x00168070, 0x00168470,
+ 0x00168870, 0x00168c70, 0x00169070, 0x00169470,
+ 0x00169870, 0x00169c70, 0x0016a070, 0x0016a470,
+ 0x0016a870, 0x0016ac70, 0x0016b070, 0x0016b470,
+ 0x0016b870, 0x0016bc70, 0x0016c070, 0x0016c470,
+ 0x0016c870, 0x0016cc70, 0x0016d070, 0x0016d470,
+ 0x0016d870, 0x0016dc70, 0x0016e070, 0x0016e470,
+ 0x0016e870, 0x0016ec70, 0x0016f070, 0x0016f470,
+ 0x0016f870, 0x0016fc70, 0x00170070, 0x00170470,
+ 0x00170870, 0x00170c70, 0x00171070, 0x00171470,
+ 0x00171870, 0x00171c70, 0x00172070, 0x00172470,
+ 0x00172870, 0x00172c70, 0x00173070, 0x00173470,
+ 0x00173870, 0x00173c70, 0x00174070, 0x00174470,
+ 0x00174870, 0x00174c70, 0x00175070, 0x00175470,
+ 0x00175870, 0x00175c70, 0x00176070, 0x00176470,
+ 0x00176870, 0x00176c70, 0x00177070, 0x00177470,
+ 0x00177870, 0x00177c70, 0x00178070, 0x00178470,
+ 0x00178870, 0x00178c70, 0x00179070, 0x00179470,
+ 0x00179870, 0x00179c70, 0x0017a070, 0x0017a470,
+ 0x0017a870, 0x0017ac70, 0x0017b070, 0x0017b470,
+ 0x0017b870, 0x0017bc70, 0x0017c070, 0x0017c470,
+ 0x0017c870, 0x0017cc70, 0x0017d070, 0x0017d470,
+ 0x0017d870, 0x0017dc70, 0x0017e070, 0x0017e470,
+ 0x0017e870, 0x0017ec70, 0x0017f070, 0x0017f470,
+ 0x0017f870, 0x0017fc70, 0x00180070, 0x00180470,
+ 0x00180870, 0x00180c70, 0x00181070, 0x00181470,
+ 0x00181870, 0x00181c70, 0x00182070, 0x00182470,
+ 0x00182870, 0x00182c70, 0x00183070, 0x00183470,
+ 0x00183870, 0x00183c70, 0x00184070, 0x00184470,
+ 0x00184870, 0x00184c70, 0x00185070, 0x00185470,
+ 0x00185870, 0x00185c70, 0x00186070, 0x00186470,
+ 0x00186870, 0x00186c70, 0x00187070, 0x00187470,
+ 0x00187870, 0x00187c70, 0x00188070, 0x00188470,
+ 0x00188870, 0x00188c70, 0x00189070, 0x00189470,
+ 0x00189870, 0x00189c70, 0x0018a070, 0x0018a470,
+ 0x0018a870, 0x0018ac70, 0x0018b070, 0x0018b470,
+ 0x0018b870, 0x0018bc70, 0x0018c070, 0x0018c470,
+ 0x0018c870, 0x0018cc70, 0x0018d070, 0x0018d470,
+ 0x0018d870, 0x0018dc70, 0x0018e070, 0x0018e470,
+ 0x0018e870, 0x0018ec70, 0x0018f070, 0x0018f470,
+ 0x0018f870, 0x0018fc70, 0x00190070, 0x00190470,
+ 0x00190870, 0x00190c70, 0x00191070, 0x00191470,
+ 0x00191870, 0x00191c70, 0x00192070, 0x00192470,
+ 0x00192870, 0x00192c70, 0x00193070, 0x00193470,
+ 0x00193870, 0x00193c70, 0x00194070, 0x00194470,
+ 0x00194870, 0x00194c70, 0x00195070, 0x00195470,
+ 0x00195870, 0x00195c70, 0x00196070, 0x00196470,
+ 0x00196870, 0x00196c70, 0x00197070, 0x00197470,
+ 0x00197870, 0x00197c70, 0x00198070, 0x00198470,
+ 0x00198870, 0x00198c70, 0x00199070, 0x00199470,
+ 0x00199870, 0x00199c70, 0x0019a070, 0x0019a470,
+ 0x0019a870, 0x0019ac70, 0x0019b070, 0x0019b470,
+ 0x0019b870, 0x0019bc70, 0x0019c070, 0x0019c470,
+ 0x0019c870, 0x0019cc70, 0x0019d070, 0x0019d470,
+ 0x0019d870, 0x0019dc70, 0x0019e070, 0x0019e470,
+ 0x0019e870, 0x0019ec70, 0x0019f070, 0x0019f470,
+ 0x0019f870, 0x0019fc70, 0x001a0070, 0x001a0470,
+ 0x001a0870, 0x001a0c70, 0x001a1070, 0x001a1470,
+ 0x001a1870, 0x001a1c70, 0x001a2070, 0x001a2470,
+ 0x001a2870, 0x001a2c70, 0x001a3070, 0x001a3470,
+ 0x001a3870, 0x001a3c70, 0x001a4070, 0x001a4470,
+ 0x001a4870, 0x001a4c70, 0x001a5070, 0x001a5470,
+ 0x001a5870, 0x001a5c70, 0x001a6070, 0x001a6470,
+ 0x001a6870, 0x001a6c70, 0x001a7070, 0x001a7470,
+ 0x001a7870, 0x001a7c70, 0x001a8070, 0x001a8470,
+ 0x001a8870, 0x001a8c70, 0x001a9070, 0x001a9470,
+ 0x001a9870, 0x001a9c70, 0x001aa070, 0x001aa470,
+ 0x001aa870, 0x001aac70, 0x001ab070, 0x001ab470,
+ 0x001ab870, 0x001abc70, 0x001ac070, 0x001ac470,
+ 0x001ac870, 0x001acc70, 0x001ad070, 0x001ad470,
+ 0x001ad870, 0x001adc70, 0x001ae070, 0x001ae470,
+ 0x001ae870, 0x001aec70, 0x001af070, 0x001af470,
+ 0x001af870, 0x001afc70, 0x001b0070, 0x001b0470,
+ 0x001b0870, 0x001b0c70, 0x001b1070, 0x001b1470,
+ 0x001b1870, 0x001b1c70, 0x001b2070, 0x001b2470,
+ 0x001b2870, 0x001b2c70, 0x001b3070, 0x001b3470,
+ 0x001b3870, 0x001b3c70, 0x001b4070, 0x001b4470,
+ 0x001b4870, 0x001b4c70, 0x001b5070, 0x001b5470,
+ 0x001b5870, 0x001b5c70, 0x001b6070, 0x001b6470,
+ 0x001b6870, 0x001b6c70, 0x001b7070, 0x001b7470,
+ 0x001b7870, 0x001b7c70, 0x001b8070, 0x001b8470,
+ 0x001b8870, 0x001b8c70, 0x001b9070, 0x001b9470,
+ 0x001b9870, 0x001b9c70, 0x001ba070, 0x001ba470,
+ 0x001ba870, 0x001bac70, 0x001bb070, 0x001bb470,
+ 0x001bb870, 0x001bbc70, 0x001bc070, 0x001bc470,
+ 0x001bc870, 0x001bcc70, 0x001bd070, 0x001bd470,
+ 0x001bd870, 0x001bdc70, 0x001be070, 0x001be470,
+ 0x001be870, 0x001bec70, 0x001bf070, 0x001bf470,
+ 0x001bf870, 0x001bfc70, 0x001c0070, 0x001c0470,
+ 0x001c0870, 0x001c0c70, 0x001c1070, 0x001c1470,
+ 0x001c1870, 0x001c1c70, 0x001c2070, 0x001c2470,
+ 0x001c2870, 0x001c2c70, 0x001c3070, 0x001c3470,
+ 0x001c3870, 0x001c3c70, 0x001c4070, 0x001c4470,
+ 0x001c4870, 0x001c4c70, 0x001c5070, 0x001c5470,
+ 0x001c5870, 0x001c5c70, 0x001c6070, 0x001c6470,
+ 0x001c6870, 0x001c6c70, 0x001c7070, 0x001c7470,
+ 0x001c7870, 0x001c7c70, 0x001c8070, 0x001c8470,
+ 0x001c8870, 0x001c8c70, 0x001c9070, 0x001c9470,
+ 0x001c9870, 0x001c9c70, 0x001ca070, 0x001ca470,
+ 0x001ca870, 0x001cac70, 0x001cb070, 0x001cb470,
+ 0x001cb870, 0x001cbc70, 0x001cc070, 0x001cc470,
+ 0x001cc870, 0x001ccc70, 0x001cd070, 0x001cd470,
+ 0x001cd870, 0x001cdc70, 0x001ce070, 0x001ce470,
+ 0x001ce870, 0x001cec70, 0x001cf070, 0x001cf470,
+ 0x001cf870, 0x001cfc70, 0x001d0070, 0x001d0470,
+ 0x001d0870, 0x001d0c70, 0x001d1070, 0x001d1470,
+ 0x001d1870, 0x001d1c70, 0x001d2070, 0x001d2470,
+ 0x001d2870, 0x001d2c70, 0x001d3070, 0x001d3470,
+ 0x001d3870, 0x001d3c70, 0x001d4070, 0x001d4470,
+ 0x001d4870, 0x001d4c70, 0x001d5070, 0x001d5470,
+ 0x001d5870, 0x001d5c70, 0x001d6070, 0x001d6470,
+ 0x001d6870, 0x001d6c70, 0x001d7070, 0x001d7470,
+ 0x001d7870, 0x001d7c70, 0x001d8070, 0x001d8470,
+ 0x001d8870, 0x001d8c70, 0x001d9070, 0x001d9470,
+ 0x001d9870, 0x001d9c70, 0x001da070, 0x001da470,
+ 0x001da870, 0x001dac70, 0x001db070, 0x001db470,
+ 0x001db870, 0x001dbc70, 0x001dc070, 0x001dc470,
+ 0x001dc870, 0x001dcc70, 0x001dd070, 0x001dd470,
+ 0x001dd870, 0x001ddc70, 0x001de070, 0x001de470,
+ 0x001de870, 0x001dec70, 0x001df070, 0x001df470,
+ 0x001df870, 0x001dfc70, 0x001e0070, 0x001e0470,
+ 0x001e0870, 0x001e0c70, 0x001e1070, 0x001e1470,
+ 0x001e1870, 0x001e1c70, 0x001e2070, 0x001e2470,
+ 0x001e2870, 0x001e2c70, 0x001e3070, 0x001e3470,
+ 0x001e3870, 0x001e3c70, 0x001e4070, 0x001e4470,
+ 0x001e4870, 0x001e4c70, 0x001e5070, 0x001e5470,
+ 0x001e5870, 0x001e5c70, 0x001e6070, 0x001e6470,
+ 0x001e6870, 0x001e6c70, 0x001e7070, 0x001e7470,
+ 0x001e7870, 0x001e7c70, 0x001e8070, 0x001e8470,
+ 0x001e8870, 0x001e8c70, 0x001e9070, 0x001e9470,
+ 0x001e9870, 0x001e9c70, 0x001ea070, 0x001ea470,
+ 0x001ea870, 0x001eac70, 0x001eb070, 0x001eb470,
+ 0x001eb870, 0x001ebc70, 0x001ec070, 0x001ec470,
+ 0x001ec870, 0x001ecc70, 0x001ed070, 0x001ed470,
+ 0x001ed870, 0x001edc70, 0x001ee070, 0x001ee470,
+ 0x001ee870, 0x001eec70, 0x001ef070, 0x001ef470,
+ 0x001ef870, 0x001efc70, 0x001f0070, 0x001f0470,
+ 0x001f0870, 0x001f0c70, 0x001f1070, 0x001f1470,
+ 0x001f1870, 0x001f1c70, 0x001f2070, 0x001f2470,
+ 0x001f2870, 0x001f2c70, 0x001f3070, 0x001f3470,
+ 0x001f3870, 0x001f3c70, 0x001f4070, 0x001f4470,
+ 0x001f4870, 0x001f4c70, 0x001f5070, 0x001f5470,
+ 0x001f5870, 0x001f5c70, 0x001f6070, 0x001f6470,
+ 0x001f6870, 0x001f6c70, 0x001f7070, 0x001f7470,
+ 0x001f7870, 0x001f7c70, 0x001f8070, 0x001f8470,
+ 0x001f8870, 0x001f8c70, 0x001f9070, 0x001f9470,
+ 0x001f9870, 0x001f9c70, 0x001fa070, 0x001fa470,
+ 0x001fa870, 0x001fac70, 0x001fb070, 0x001fb470,
+ 0x001fb870, 0x001fbc70, 0x001fc070, 0x001fc470,
+ 0x001fc870, 0x001fcc70, 0x001fd070, 0x001fd470,
+ 0x001fd870, 0x001fdc70, 0x001fe070, 0x001fe470,
+ 0x001fe870, 0x001fec70, 0x001ff070, 0x001ff470,
+ 0x001ff870, 0x001ffc70, 0x00000270, 0x00000670,
+ 0x00000a70, 0x00000e70, 0x00001270, 0x00001670,
+ 0x00001a70, 0x00001e70, 0x00002270, 0x00002670,
+ 0x00002a70, 0x00002e70, 0x00003270, 0x00003670,
+ 0x00003a70, 0x00003e70, 0x00004270, 0x00004670,
+ 0x00004a70, 0x00004e70, 0x00005270, 0x00005670,
+ 0x00005a70, 0x00005e70, 0x00006270, 0x00006670,
+ 0x00006a70, 0x00006e70, 0x00007270, 0x00007670,
+ 0x00007a70, 0x00007e70, 0x00008270, 0x00008670,
+ 0x00008a70, 0x00008e70, 0x00009270, 0x00009670,
+ 0x00009a70, 0x00009e70, 0x0000a270, 0x0000a670,
+ 0x0000aa70, 0x0000ae70, 0x0000b270, 0x0000b670,
+ 0x0000ba70, 0x0000be70, 0x0000c270, 0x0000c670,
+ 0x0000ca70, 0x0000ce70, 0x0000d270, 0x0000d670,
+ 0x0000da70, 0x0000de70, 0x0000e270, 0x0000e670,
+ 0x0000ea70, 0x0000ee70, 0x0000f270, 0x0000f670,
+ 0x0000fa70, 0x0000fe70, 0x00010270, 0x00010670,
+ 0x00010a70, 0x00010e70, 0x00011270, 0x00011670,
+ 0x00011a70, 0x00011e70, 0x00012270, 0x00012670,
+ 0x00012a70, 0x00012e70, 0x00013270, 0x00013670,
+ 0x00013a70, 0x00013e70, 0x00014270, 0x00014670,
+ 0x00014a70, 0x00014e70, 0x00015270, 0x00015670,
+ 0x00015a70, 0x00015e70, 0x00016270, 0x00016670,
+ 0x00016a70, 0x00016e70, 0x00017270, 0x00017670,
+ 0x00017a70, 0x00017e70, 0x00018270, 0x00018670,
+ 0x00018a70, 0x00018e70, 0x00019270, 0x00019670,
+ 0x00019a70, 0x00019e70, 0x0001a270, 0x0001a670,
+ 0x0001aa70, 0x0001ae70, 0x0001b270, 0x0001b670,
+ 0x0001ba70, 0x0001be70, 0x0001c270, 0x0001c670,
+ 0x0001ca70, 0x0001ce70, 0x0001d270, 0x0001d670,
+ 0x0001da70, 0x0001de70, 0x0001e270, 0x0001e670,
+ 0x0001ea70, 0x0001ee70, 0x0001f270, 0x0001f670,
+ 0x0001fa70, 0x0001fe70, 0x00020270, 0x00020670,
+ 0x00020a70, 0x00020e70, 0x00021270, 0x00021670,
+ 0x00021a70, 0x00021e70, 0x00022270, 0x00022670,
+ 0x00022a70, 0x00022e70, 0x00023270, 0x00023670,
+ 0x00023a70, 0x00023e70, 0x00024270, 0x00024670,
+ 0x00024a70, 0x00024e70, 0x00025270, 0x00025670,
+ 0x00025a70, 0x00025e70, 0x00026270, 0x00026670,
+ 0x00026a70, 0x00026e70, 0x00027270, 0x00027670,
+ 0x00027a70, 0x00027e70, 0x00028270, 0x00028670,
+ 0x00028a70, 0x00028e70, 0x00029270, 0x00029670,
+ 0x00029a70, 0x00029e70, 0x0002a270, 0x0002a670,
+ 0x0002aa70, 0x0002ae70, 0x0002b270, 0x0002b670,
+ 0x0002ba70, 0x0002be70, 0x0002c270, 0x0002c670,
+ 0x0002ca70, 0x0002ce70, 0x0002d270, 0x0002d670,
+ 0x0002da70, 0x0002de70, 0x0002e270, 0x0002e670,
+ 0x0002ea70, 0x0002ee70, 0x0002f270, 0x0002f670,
+ 0x0002fa70, 0x0002fe70, 0x00030270, 0x00030670,
+ 0x00030a70, 0x00030e70, 0x00031270, 0x00031670,
+ 0x00031a70, 0x00031e70, 0x00032270, 0x00032670,
+ 0x00032a70, 0x00032e70, 0x00033270, 0x00033670,
+ 0x00033a70, 0x00033e70, 0x00034270, 0x00034670,
+ 0x00034a70, 0x00034e70, 0x00035270, 0x00035670,
+ 0x00035a70, 0x00035e70, 0x00036270, 0x00036670,
+ 0x00036a70, 0x00036e70, 0x00037270, 0x00037670,
+ 0x00037a70, 0x00037e70, 0x00038270, 0x00038670,
+ 0x00038a70, 0x00038e70, 0x00039270, 0x00039670,
+ 0x00039a70, 0x00039e70, 0x0003a270, 0x0003a670,
+ 0x0003aa70, 0x0003ae70, 0x0003b270, 0x0003b670,
+ 0x0003ba70, 0x0003be70, 0x0003c270, 0x0003c670,
+ 0x0003ca70, 0x0003ce70, 0x0003d270, 0x0003d670,
+ 0x0003da70, 0x0003de70, 0x0003e270, 0x0003e670,
+ 0x0003ea70, 0x0003ee70, 0x0003f270, 0x0003f670,
+ 0x0003fa70, 0x0003fe70, 0x00040270, 0x00040670,
+ 0x00040a70, 0x00040e70, 0x00041270, 0x00041670,
+ 0x00041a70, 0x00041e70, 0x00042270, 0x00042670,
+ 0x00042a70, 0x00042e70, 0x00043270, 0x00043670,
+ 0x00043a70, 0x00043e70, 0x00044270, 0x00044670,
+ 0x00044a70, 0x00044e70, 0x00045270, 0x00045670,
+ 0x00045a70, 0x00045e70, 0x00046270, 0x00046670,
+ 0x00046a70, 0x00046e70, 0x00047270, 0x00047670,
+ 0x00047a70, 0x00047e70, 0x00048270, 0x00048670,
+ 0x00048a70, 0x00048e70, 0x00049270, 0x00049670,
+ 0x00049a70, 0x00049e70, 0x0004a270, 0x0004a670,
+ 0x0004aa70, 0x0004ae70, 0x0004b270, 0x0004b670,
+ 0x0004ba70, 0x0004be70, 0x0004c270, 0x0004c670,
+ 0x0004ca70, 0x0004ce70, 0x0004d270, 0x0004d670,
+ 0x0004da70, 0x0004de70, 0x0004e270, 0x0004e670,
+ 0x0004ea70, 0x0004ee70, 0x0004f270, 0x0004f670,
+ 0x0004fa70, 0x0004fe70, 0x00050270, 0x00050670,
+ 0x00050a70, 0x00050e70, 0x00051270, 0x00051670,
+ 0x00051a70, 0x00051e70, 0x00052270, 0x00052670,
+ 0x00052a70, 0x00052e70, 0x00053270, 0x00053670,
+ 0x00053a70, 0x00053e70, 0x00054270, 0x00054670,
+ 0x00054a70, 0x00054e70, 0x00055270, 0x00055670,
+ 0x00055a70, 0x00055e70, 0x00056270, 0x00056670,
+ 0x00056a70, 0x00056e70, 0x00057270, 0x00057670,
+ 0x00057a70, 0x00057e70, 0x00058270, 0x00058670,
+ 0x00058a70, 0x00058e70, 0x00059270, 0x00059670,
+ 0x00059a70, 0x00059e70, 0x0005a270, 0x0005a670,
+ 0x0005aa70, 0x0005ae70, 0x0005b270, 0x0005b670,
+ 0x0005ba70, 0x0005be70, 0x0005c270, 0x0005c670,
+ 0x0005ca70, 0x0005ce70, 0x0005d270, 0x0005d670,
+ 0x0005da70, 0x0005de70, 0x0005e270, 0x0005e670,
+ 0x0005ea70, 0x0005ee70, 0x0005f270, 0x0005f670,
+ 0x0005fa70, 0x0005fe70, 0x00060270, 0x00060670,
+ 0x00060a70, 0x00060e70, 0x00061270, 0x00061670,
+ 0x00061a70, 0x00061e70, 0x00062270, 0x00062670,
+ 0x00062a70, 0x00062e70, 0x00063270, 0x00063670,
+ 0x00063a70, 0x00063e70, 0x00064270, 0x00064670,
+ 0x00064a70, 0x00064e70, 0x00065270, 0x00065670,
+ 0x00065a70, 0x00065e70, 0x00066270, 0x00066670,
+ 0x00066a70, 0x00066e70, 0x00067270, 0x00067670,
+ 0x00067a70, 0x00067e70, 0x00068270, 0x00068670,
+ 0x00068a70, 0x00068e70, 0x00069270, 0x00069670,
+ 0x00069a70, 0x00069e70, 0x0006a270, 0x0006a670,
+ 0x0006aa70, 0x0006ae70, 0x0006b270, 0x0006b670,
+ 0x0006ba70, 0x0006be70, 0x0006c270, 0x0006c670,
+ 0x0006ca70, 0x0006ce70, 0x0006d270, 0x0006d670,
+ 0x0006da70, 0x0006de70, 0x0006e270, 0x0006e670,
+ 0x0006ea70, 0x0006ee70, 0x0006f270, 0x0006f670,
+ 0x0006fa70, 0x0006fe70, 0x00070270, 0x00070670,
+ 0x00070a70, 0x00070e70, 0x00071270, 0x00071670,
+ 0x00071a70, 0x00071e70, 0x00072270, 0x00072670,
+ 0x00072a70, 0x00072e70, 0x00073270, 0x00073670,
+ 0x00073a70, 0x00073e70, 0x00074270, 0x00074670,
+ 0x00074a70, 0x00074e70, 0x00075270, 0x00075670,
+ 0x00075a70, 0x00075e70, 0x00076270, 0x00076670,
+ 0x00076a70, 0x00076e70, 0x00077270, 0x00077670,
+ 0x00077a70, 0x00077e70, 0x00078270, 0x00078670,
+ 0x00078a70, 0x00078e70, 0x00079270, 0x00079670,
+ 0x00079a70, 0x00079e70, 0x0007a270, 0x0007a670,
+ 0x0007aa70, 0x0007ae70, 0x0007b270, 0x0007b670,
+ 0x0007ba70, 0x0007be70, 0x0007c270, 0x0007c670,
+ 0x0007ca70, 0x0007ce70, 0x0007d270, 0x0007d670,
+ 0x0007da70, 0x0007de70, 0x0007e270, 0x0007e670,
+ 0x0007ea70, 0x0007ee70, 0x0007f270, 0x0007f670,
+ 0x0007fa70, 0x0007fe70, 0x00080270, 0x00080670,
+ 0x00080a70, 0x00080e70, 0x00081270, 0x00081670,
+ 0x00081a70, 0x00081e70, 0x00082270, 0x00082670,
+ 0x00082a70, 0x00082e70, 0x00083270, 0x00083670,
+ 0x00083a70, 0x00083e70, 0x00084270, 0x00084670,
+ 0x00084a70, 0x00084e70, 0x00085270, 0x00085670,
+ 0x00085a70, 0x00085e70, 0x00086270, 0x00086670,
+ 0x00086a70, 0x00086e70, 0x00087270, 0x00087670,
+ 0x00087a70, 0x00087e70, 0x00088270, 0x00088670,
+ 0x00088a70, 0x00088e70, 0x00089270, 0x00089670,
+ 0x00089a70, 0x00089e70, 0x0008a270, 0x0008a670,
+ 0x0008aa70, 0x0008ae70, 0x0008b270, 0x0008b670,
+ 0x0008ba70, 0x0008be70, 0x0008c270, 0x0008c670,
+ 0x0008ca70, 0x0008ce70, 0x0008d270, 0x0008d670,
+ 0x0008da70, 0x0008de70, 0x0008e270, 0x0008e670,
+ 0x0008ea70, 0x0008ee70, 0x0008f270, 0x0008f670,
+ 0x0008fa70, 0x0008fe70, 0x00090270, 0x00090670,
+ 0x00090a70, 0x00090e70, 0x00091270, 0x00091670,
+ 0x00091a70, 0x00091e70, 0x00092270, 0x00092670,
+ 0x00092a70, 0x00092e70, 0x00093270, 0x00093670,
+ 0x00093a70, 0x00093e70, 0x00094270, 0x00094670,
+ 0x00094a70, 0x00094e70, 0x00095270, 0x00095670,
+ 0x00095a70, 0x00095e70, 0x00096270, 0x00096670,
+ 0x00096a70, 0x00096e70, 0x00097270, 0x00097670,
+ 0x00097a70, 0x00097e70, 0x00098270, 0x00098670,
+ 0x00098a70, 0x00098e70, 0x00099270, 0x00099670,
+ 0x00099a70, 0x00099e70, 0x0009a270, 0x0009a670,
+ 0x0009aa70, 0x0009ae70, 0x0009b270, 0x0009b670,
+ 0x0009ba70, 0x0009be70, 0x0009c270, 0x0009c670,
+ 0x0009ca70, 0x0009ce70, 0x0009d270, 0x0009d670,
+ 0x0009da70, 0x0009de70, 0x0009e270, 0x0009e670,
+ 0x0009ea70, 0x0009ee70, 0x0009f270, 0x0009f670,
+ 0x0009fa70, 0x0009fe70, 0x000a0270, 0x000a0670,
+ 0x000a0a70, 0x000a0e70, 0x000a1270, 0x000a1670,
+ 0x000a1a70, 0x000a1e70, 0x000a2270, 0x000a2670,
+ 0x000a2a70, 0x000a2e70, 0x000a3270, 0x000a3670,
+ 0x000a3a70, 0x000a3e70, 0x000a4270, 0x000a4670,
+ 0x000a4a70, 0x000a4e70, 0x000a5270, 0x000a5670,
+ 0x000a5a70, 0x000a5e70, 0x000a6270, 0x000a6670,
+ 0x000a6a70, 0x000a6e70, 0x000a7270, 0x000a7670,
+ 0x000a7a70, 0x000a7e70, 0x000a8270, 0x000a8670,
+ 0x000a8a70, 0x000a8e70, 0x000a9270, 0x000a9670,
+ 0x000a9a70, 0x000a9e70, 0x000aa270, 0x000aa670,
+ 0x000aaa70, 0x000aae70, 0x000ab270, 0x000ab670,
+ 0x000aba70, 0x000abe70, 0x000ac270, 0x000ac670,
+ 0x000aca70, 0x000ace70, 0x000ad270, 0x000ad670,
+ 0x000ada70, 0x000ade70, 0x000ae270, 0x000ae670,
+ 0x000aea70, 0x000aee70, 0x000af270, 0x000af670,
+ 0x000afa70, 0x000afe70, 0x000b0270, 0x000b0670,
+ 0x000b0a70, 0x000b0e70, 0x000b1270, 0x000b1670,
+ 0x000b1a70, 0x000b1e70, 0x000b2270, 0x000b2670,
+ 0x000b2a70, 0x000b2e70, 0x000b3270, 0x000b3670,
+ 0x000b3a70, 0x000b3e70, 0x000b4270, 0x000b4670,
+ 0x000b4a70, 0x000b4e70, 0x000b5270, 0x000b5670,
+ 0x000b5a70, 0x000b5e70, 0x000b6270, 0x000b6670,
+ 0x000b6a70, 0x000b6e70, 0x000b7270, 0x000b7670,
+ 0x000b7a70, 0x000b7e70, 0x000b8270, 0x000b8670,
+ 0x000b8a70, 0x000b8e70, 0x000b9270, 0x000b9670,
+ 0x000b9a70, 0x000b9e70, 0x000ba270, 0x000ba670,
+ 0x000baa70, 0x000bae70, 0x000bb270, 0x000bb670,
+ 0x000bba70, 0x000bbe70, 0x000bc270, 0x000bc670,
+ 0x000bca70, 0x000bce70, 0x000bd270, 0x000bd670,
+ 0x000bda70, 0x000bde70, 0x000be270, 0x000be670,
+ 0x000bea70, 0x000bee70, 0x000bf270, 0x000bf670,
+ 0x000bfa70, 0x000bfe70, 0x000c0270, 0x000c0670,
+ 0x000c0a70, 0x000c0e70, 0x000c1270, 0x000c1670,
+ 0x000c1a70, 0x000c1e70, 0x000c2270, 0x000c2670,
+ 0x000c2a70, 0x000c2e70, 0x000c3270, 0x000c3670,
+ 0x000c3a70, 0x000c3e70, 0x000c4270, 0x000c4670,
+ 0x000c4a70, 0x000c4e70, 0x000c5270, 0x000c5670,
+ 0x000c5a70, 0x000c5e70, 0x000c6270, 0x000c6670,
+ 0x000c6a70, 0x000c6e70, 0x000c7270, 0x000c7670,
+ 0x000c7a70, 0x000c7e70, 0x000c8270, 0x000c8670,
+ 0x000c8a70, 0x000c8e70, 0x000c9270, 0x000c9670,
+ 0x000c9a70, 0x000c9e70, 0x000ca270, 0x000ca670,
+ 0x000caa70, 0x000cae70, 0x000cb270, 0x000cb670,
+ 0x000cba70, 0x000cbe70, 0x000cc270, 0x000cc670,
+ 0x000cca70, 0x000cce70, 0x000cd270, 0x000cd670,
+ 0x000cda70, 0x000cde70, 0x000ce270, 0x000ce670,
+ 0x000cea70, 0x000cee70, 0x000cf270, 0x000cf670,
+ 0x000cfa70, 0x000cfe70, 0x000d0270, 0x000d0670,
+ 0x000d0a70, 0x000d0e70, 0x000d1270, 0x000d1670,
+ 0x000d1a70, 0x000d1e70, 0x000d2270, 0x000d2670,
+ 0x000d2a70, 0x000d2e70, 0x000d3270, 0x000d3670,
+ 0x000d3a70, 0x000d3e70, 0x000d4270, 0x000d4670,
+ 0x000d4a70, 0x000d4e70, 0x000d5270, 0x000d5670,
+ 0x000d5a70, 0x000d5e70, 0x000d6270, 0x000d6670,
+ 0x000d6a70, 0x000d6e70, 0x000d7270, 0x000d7670,
+ 0x000d7a70, 0x000d7e70, 0x000d8270, 0x000d8670,
+ 0x000d8a70, 0x000d8e70, 0x000d9270, 0x000d9670,
+ 0x000d9a70, 0x000d9e70, 0x000da270, 0x000da670,
+ 0x000daa70, 0x000dae70, 0x000db270, 0x000db670,
+ 0x000dba70, 0x000dbe70, 0x000dc270, 0x000dc670,
+ 0x000dca70, 0x000dce70, 0x000dd270, 0x000dd670,
+ 0x000dda70, 0x000dde70, 0x000de270, 0x000de670,
+ 0x000dea70, 0x000dee70, 0x000df270, 0x000df670,
+ 0x000dfa70, 0x000dfe70, 0x000e0270, 0x000e0670,
+ 0x000e0a70, 0x000e0e70, 0x000e1270, 0x000e1670,
+ 0x000e1a70, 0x000e1e70, 0x000e2270, 0x000e2670,
+ 0x000e2a70, 0x000e2e70, 0x000e3270, 0x000e3670,
+ 0x000e3a70, 0x000e3e70, 0x000e4270, 0x000e4670,
+ 0x000e4a70, 0x000e4e70, 0x000e5270, 0x000e5670,
+ 0x000e5a70, 0x000e5e70, 0x000e6270, 0x000e6670,
+ 0x000e6a70, 0x000e6e70, 0x000e7270, 0x000e7670,
+ 0x000e7a70, 0x000e7e70, 0x000e8270, 0x000e8670,
+ 0x000e8a70, 0x000e8e70, 0x000e9270, 0x000e9670,
+ 0x000e9a70, 0x000e9e70, 0x000ea270, 0x000ea670,
+ 0x000eaa70, 0x000eae70, 0x000eb270, 0x000eb670,
+ 0x000eba70, 0x000ebe70, 0x000ec270, 0x000ec670,
+ 0x000eca70, 0x000ece70, 0x000ed270, 0x000ed670,
+ 0x000eda70, 0x000ede70, 0x000ee270, 0x000ee670,
+ 0x000eea70, 0x000eee70, 0x000ef270, 0x000ef670,
+ 0x000efa70, 0x000efe70, 0x000f0270, 0x000f0670,
+ 0x000f0a70, 0x000f0e70, 0x000f1270, 0x000f1670,
+ 0x000f1a70, 0x000f1e70, 0x000f2270, 0x000f2670,
+ 0x000f2a70, 0x000f2e70, 0x000f3270, 0x000f3670,
+ 0x000f3a70, 0x000f3e70, 0x000f4270, 0x000f4670,
+ 0x000f4a70, 0x000f4e70, 0x000f5270, 0x000f5670,
+ 0x000f5a70, 0x000f5e70, 0x000f6270, 0x000f6670,
+ 0x000f6a70, 0x000f6e70, 0x000f7270, 0x000f7670,
+ 0x000f7a70, 0x000f7e70, 0x000f8270, 0x000f8670,
+ 0x000f8a70, 0x000f8e70, 0x000f9270, 0x000f9670,
+ 0x000f9a70, 0x000f9e70, 0x000fa270, 0x000fa670,
+ 0x000faa70, 0x000fae70, 0x000fb270, 0x000fb670,
+ 0x000fba70, 0x000fbe70, 0x000fc270, 0x000fc670,
+ 0x000fca70, 0x000fce70, 0x000fd270, 0x000fd670,
+ 0x000fda70, 0x000fde70, 0x000fe270, 0x000fe670,
+ 0x000fea70, 0x000fee70, 0x000ff270, 0x000ff670,
+ 0x000ffa70, 0x000ffe70, 0x00100270, 0x00100670,
+ 0x00100a70, 0x00100e70, 0x00101270, 0x00101670,
+ 0x00101a70, 0x00101e70, 0x00102270, 0x00102670,
+ 0x00102a70, 0x00102e70, 0x00103270, 0x00103670,
+ 0x00103a70, 0x00103e70, 0x00104270, 0x00104670,
+ 0x00104a70, 0x00104e70, 0x00105270, 0x00105670,
+ 0x00105a70, 0x00105e70, 0x00106270, 0x00106670,
+ 0x00106a70, 0x00106e70, 0x00107270, 0x00107670,
+ 0x00107a70, 0x00107e70, 0x00108270, 0x00108670,
+ 0x00108a70, 0x00108e70, 0x00109270, 0x00109670,
+ 0x00109a70, 0x00109e70, 0x0010a270, 0x0010a670,
+ 0x0010aa70, 0x0010ae70, 0x0010b270, 0x0010b670,
+ 0x0010ba70, 0x0010be70, 0x0010c270, 0x0010c670,
+ 0x0010ca70, 0x0010ce70, 0x0010d270, 0x0010d670,
+ 0x0010da70, 0x0010de70, 0x0010e270, 0x0010e670,
+ 0x0010ea70, 0x0010ee70, 0x0010f270, 0x0010f670,
+ 0x0010fa70, 0x0010fe70, 0x00110270, 0x00110670,
+ 0x00110a70, 0x00110e70, 0x00111270, 0x00111670,
+ 0x00111a70, 0x00111e70, 0x00112270, 0x00112670,
+ 0x00112a70, 0x00112e70, 0x00113270, 0x00113670,
+ 0x00113a70, 0x00113e70, 0x00114270, 0x00114670,
+ 0x00114a70, 0x00114e70, 0x00115270, 0x00115670,
+ 0x00115a70, 0x00115e70, 0x00116270, 0x00116670,
+ 0x00116a70, 0x00116e70, 0x00117270, 0x00117670,
+ 0x00117a70, 0x00117e70, 0x00118270, 0x00118670,
+ 0x00118a70, 0x00118e70, 0x00119270, 0x00119670,
+ 0x00119a70, 0x00119e70, 0x0011a270, 0x0011a670,
+ 0x0011aa70, 0x0011ae70, 0x0011b270, 0x0011b670,
+ 0x0011ba70, 0x0011be70, 0x0011c270, 0x0011c670,
+ 0x0011ca70, 0x0011ce70, 0x0011d270, 0x0011d670,
+ 0x0011da70, 0x0011de70, 0x0011e270, 0x0011e670,
+ 0x0011ea70, 0x0011ee70, 0x0011f270, 0x0011f670,
+ 0x0011fa70, 0x0011fe70, 0x00120270, 0x00120670,
+ 0x00120a70, 0x00120e70, 0x00121270, 0x00121670,
+ 0x00121a70, 0x00121e70, 0x00122270, 0x00122670,
+ 0x00122a70, 0x00122e70, 0x00123270, 0x00123670,
+ 0x00123a70, 0x00123e70, 0x00124270, 0x00124670,
+ 0x00124a70, 0x00124e70, 0x00125270, 0x00125670,
+ 0x00125a70, 0x00125e70, 0x00126270, 0x00126670,
+ 0x00126a70, 0x00126e70, 0x00127270, 0x00127670,
+ 0x00127a70, 0x00127e70, 0x00128270, 0x00128670,
+ 0x00128a70, 0x00128e70, 0x00129270, 0x00129670,
+ 0x00129a70, 0x00129e70, 0x0012a270, 0x0012a670,
+ 0x0012aa70, 0x0012ae70, 0x0012b270, 0x0012b670,
+ 0x0012ba70, 0x0012be70, 0x0012c270, 0x0012c670,
+ 0x0012ca70, 0x0012ce70, 0x0012d270, 0x0012d670,
+ 0x0012da70, 0x0012de70, 0x0012e270, 0x0012e670,
+ 0x0012ea70, 0x0012ee70, 0x0012f270, 0x0012f670,
+ 0x0012fa70, 0x0012fe70, 0x00130270, 0x00130670,
+ 0x00130a70, 0x00130e70, 0x00131270, 0x00131670,
+ 0x00131a70, 0x00131e70, 0x00132270, 0x00132670,
+ 0x00132a70, 0x00132e70, 0x00133270, 0x00133670,
+ 0x00133a70, 0x00133e70, 0x00134270, 0x00134670,
+ 0x00134a70, 0x00134e70, 0x00135270, 0x00135670,
+ 0x00135a70, 0x00135e70, 0x00136270, 0x00136670,
+ 0x00136a70, 0x00136e70, 0x00137270, 0x00137670,
+ 0x00137a70, 0x00137e70, 0x00138270, 0x00138670,
+ 0x00138a70, 0x00138e70, 0x00139270, 0x00139670,
+ 0x00139a70, 0x00139e70, 0x0013a270, 0x0013a670,
+ 0x0013aa70, 0x0013ae70, 0x0013b270, 0x0013b670,
+ 0x0013ba70, 0x0013be70, 0x0013c270, 0x0013c670,
+ 0x0013ca70, 0x0013ce70, 0x0013d270, 0x0013d670,
+ 0x0013da70, 0x0013de70, 0x0013e270, 0x0013e670,
+ 0x0013ea70, 0x0013ee70, 0x0013f270, 0x0013f670,
+ 0x0013fa70, 0x0013fe70, 0x00140270, 0x00140670,
+ 0x00140a70, 0x00140e70, 0x00141270, 0x00141670,
+ 0x00141a70, 0x00141e70, 0x00142270, 0x00142670,
+ 0x00142a70, 0x00142e70, 0x00143270, 0x00143670,
+ 0x00143a70, 0x00143e70, 0x00144270, 0x00144670,
+ 0x00144a70, 0x00144e70, 0x00145270, 0x00145670,
+ 0x00145a70, 0x00145e70, 0x00146270, 0x00146670,
+ 0x00146a70, 0x00146e70, 0x00147270, 0x00147670,
+ 0x00147a70, 0x00147e70, 0x00148270, 0x00148670,
+ 0x00148a70, 0x00148e70, 0x00149270, 0x00149670,
+ 0x00149a70, 0x00149e70, 0x0014a270, 0x0014a670,
+ 0x0014aa70, 0x0014ae70, 0x0014b270, 0x0014b670,
+ 0x0014ba70, 0x0014be70, 0x0014c270, 0x0014c670,
+ 0x0014ca70, 0x0014ce70, 0x0014d270, 0x0014d670,
+ 0x0014da70, 0x0014de70, 0x0014e270, 0x0014e670,
+ 0x0014ea70, 0x0014ee70, 0x0014f270, 0x0014f670,
+ 0x0014fa70, 0x0014fe70, 0x00150270, 0x00150670,
+ 0x00150a70, 0x00150e70, 0x00151270, 0x00151670,
+ 0x00151a70, 0x00151e70, 0x00152270, 0x00152670,
+ 0x00152a70, 0x00152e70, 0x00153270, 0x00153670,
+ 0x00153a70, 0x00153e70, 0x00154270, 0x00154670,
+ 0x00154a70, 0x00154e70, 0x00155270, 0x00155670,
+ 0x00155a70, 0x00155e70, 0x00156270, 0x00156670,
+ 0x00156a70, 0x00156e70, 0x00157270, 0x00157670,
+ 0x00157a70, 0x00157e70, 0x00158270, 0x00158670,
+ 0x00158a70, 0x00158e70, 0x00159270, 0x00159670,
+ 0x00159a70, 0x00159e70, 0x0015a270, 0x0015a670,
+ 0x0015aa70, 0x0015ae70, 0x0015b270, 0x0015b670,
+ 0x0015ba70, 0x0015be70, 0x0015c270, 0x0015c670,
+ 0x0015ca70, 0x0015ce70, 0x0015d270, 0x0015d670,
+ 0x0015da70, 0x0015de70, 0x0015e270, 0x0015e670,
+ 0x0015ea70, 0x0015ee70, 0x0015f270, 0x0015f670,
+ 0x0015fa70, 0x0015fe70, 0x00160270, 0x00160670,
+ 0x00160a70, 0x00160e70, 0x00161270, 0x00161670,
+ 0x00161a70, 0x00161e70, 0x00162270, 0x00162670,
+ 0x00162a70, 0x00162e70, 0x00163270, 0x00163670,
+ 0x00163a70, 0x00163e70, 0x00164270, 0x00164670,
+ 0x00164a70, 0x00164e70, 0x00165270, 0x00165670,
+ 0x00165a70, 0x00165e70, 0x00166270, 0x00166670,
+ 0x00166a70, 0x00166e70, 0x00167270, 0x00167670,
+ 0x00167a70, 0x00167e70, 0x00168270, 0x00168670,
+ 0x00168a70, 0x00168e70, 0x00169270, 0x00169670,
+ 0x00169a70, 0x00169e70, 0x0016a270, 0x0016a670,
+ 0x0016aa70, 0x0016ae70, 0x0016b270, 0x0016b670,
+ 0x0016ba70, 0x0016be70, 0x0016c270, 0x0016c670,
+ 0x0016ca70, 0x0016ce70, 0x0016d270, 0x0016d670,
+ 0x0016da70, 0x0016de70, 0x0016e270, 0x0016e670,
+ 0x0016ea70, 0x0016ee70, 0x0016f270, 0x0016f670,
+ 0x0016fa70, 0x0016fe70, 0x00170270, 0x00170670,
+ 0x00170a70, 0x00170e70, 0x00171270, 0x00171670,
+ 0x00171a70, 0x00171e70, 0x00172270, 0x00172670,
+ 0x00172a70, 0x00172e70, 0x00173270, 0x00173670,
+ 0x00173a70, 0x00173e70, 0x00174270, 0x00174670,
+ 0x00174a70, 0x00174e70, 0x00175270, 0x00175670,
+ 0x00175a70, 0x00175e70, 0x00176270, 0x00176670,
+ 0x00176a70, 0x00176e70, 0x00177270, 0x00177670,
+ 0x00177a70, 0x00177e70, 0x00178270, 0x00178670,
+ 0x00178a70, 0x00178e70, 0x00179270, 0x00179670,
+ 0x00179a70, 0x00179e70, 0x0017a270, 0x0017a670,
+ 0x0017aa70, 0x0017ae70, 0x0017b270, 0x0017b670,
+ 0x0017ba70, 0x0017be70, 0x0017c270, 0x0017c670,
+ 0x0017ca70, 0x0017ce70, 0x0017d270, 0x0017d670,
+ 0x0017da70, 0x0017de70, 0x0017e270, 0x0017e670,
+ 0x0017ea70, 0x0017ee70, 0x0017f270, 0x0017f670,
+ 0x0017fa70, 0x0017fe70, 0x00180270, 0x00180670,
+ 0x00180a70, 0x00180e70, 0x00181270, 0x00181670,
+ 0x00181a70, 0x00181e70, 0x00182270, 0x00182670,
+ 0x00182a70, 0x00182e70, 0x00183270, 0x00183670,
+ 0x00183a70, 0x00183e70, 0x00184270, 0x00184670,
+ 0x00184a70, 0x00184e70, 0x00185270, 0x00185670,
+ 0x00185a70, 0x00185e70, 0x00186270, 0x00186670,
+ 0x00186a70, 0x00186e70, 0x00187270, 0x00187670,
+ 0x00187a70, 0x00187e70, 0x00188270, 0x00188670,
+ 0x00188a70, 0x00188e70, 0x00189270, 0x00189670,
+ 0x00189a70, 0x00189e70, 0x0018a270, 0x0018a670,
+ 0x0018aa70, 0x0018ae70, 0x0018b270, 0x0018b670,
+ 0x0018ba70, 0x0018be70, 0x0018c270, 0x0018c670,
+ 0x0018ca70, 0x0018ce70, 0x0018d270, 0x0018d670,
+ 0x0018da70, 0x0018de70, 0x0018e270, 0x0018e670,
+ 0x0018ea70, 0x0018ee70, 0x0018f270, 0x0018f670,
+ 0x0018fa70, 0x0018fe70, 0x00190270, 0x00190670,
+ 0x00190a70, 0x00190e70, 0x00191270, 0x00191670,
+ 0x00191a70, 0x00191e70, 0x00192270, 0x00192670,
+ 0x00192a70, 0x00192e70, 0x00193270, 0x00193670,
+ 0x00193a70, 0x00193e70, 0x00194270, 0x00194670,
+ 0x00194a70, 0x00194e70, 0x00195270, 0x00195670,
+ 0x00195a70, 0x00195e70, 0x00196270, 0x00196670,
+ 0x00196a70, 0x00196e70, 0x00197270, 0x00197670,
+ 0x00197a70, 0x00197e70, 0x00198270, 0x00198670,
+ 0x00198a70, 0x00198e70, 0x00199270, 0x00199670,
+ 0x00199a70, 0x00199e70, 0x0019a270, 0x0019a670,
+ 0x0019aa70, 0x0019ae70, 0x0019b270, 0x0019b670,
+ 0x0019ba70, 0x0019be70, 0x0019c270, 0x0019c670,
+ 0x0019ca70, 0x0019ce70, 0x0019d270, 0x0019d670,
+ 0x0019da70, 0x0019de70, 0x0019e270, 0x0019e670,
+ 0x0019ea70, 0x0019ee70, 0x0019f270, 0x0019f670,
+ 0x0019fa70, 0x0019fe70, 0x001a0270, 0x001a0670,
+ 0x001a0a70, 0x001a0e70, 0x001a1270, 0x001a1670,
+ 0x001a1a70, 0x001a1e70, 0x001a2270, 0x001a2670,
+ 0x001a2a70, 0x001a2e70, 0x001a3270, 0x001a3670,
+ 0x001a3a70, 0x001a3e70, 0x001a4270, 0x001a4670,
+ 0x001a4a70, 0x001a4e70, 0x001a5270, 0x001a5670,
+ 0x001a5a70, 0x001a5e70, 0x001a6270, 0x001a6670,
+ 0x001a6a70, 0x001a6e70, 0x001a7270, 0x001a7670,
+ 0x001a7a70, 0x001a7e70, 0x001a8270, 0x001a8670,
+ 0x001a8a70, 0x001a8e70, 0x001a9270, 0x001a9670,
+ 0x001a9a70, 0x001a9e70, 0x001aa270, 0x001aa670,
+ 0x001aaa70, 0x001aae70, 0x001ab270, 0x001ab670,
+ 0x001aba70, 0x001abe70, 0x001ac270, 0x001ac670,
+ 0x001aca70, 0x001ace70, 0x001ad270, 0x001ad670,
+ 0x001ada70, 0x001ade70, 0x001ae270, 0x001ae670,
+ 0x001aea70, 0x001aee70, 0x001af270, 0x001af670,
+ 0x001afa70, 0x001afe70, 0x001b0270, 0x001b0670,
+ 0x001b0a70, 0x001b0e70, 0x001b1270, 0x001b1670,
+ 0x001b1a70, 0x001b1e70, 0x001b2270, 0x001b2670,
+ 0x001b2a70, 0x001b2e70, 0x001b3270, 0x001b3670,
+ 0x001b3a70, 0x001b3e70, 0x001b4270, 0x001b4670,
+ 0x001b4a70, 0x001b4e70, 0x001b5270, 0x001b5670,
+ 0x001b5a70, 0x001b5e70, 0x001b6270, 0x001b6670,
+ 0x001b6a70, 0x001b6e70, 0x001b7270, 0x001b7670,
+ 0x001b7a70, 0x001b7e70, 0x001b8270, 0x001b8670,
+ 0x001b8a70, 0x001b8e70, 0x001b9270, 0x001b9670,
+ 0x001b9a70, 0x001b9e70, 0x001ba270, 0x001ba670,
+ 0x001baa70, 0x001bae70, 0x001bb270, 0x001bb670,
+ 0x001bba70, 0x001bbe70, 0x001bc270, 0x001bc670,
+ 0x001bca70, 0x001bce70, 0x001bd270, 0x001bd670,
+ 0x001bda70, 0x001bde70, 0x001be270, 0x001be670,
+ 0x001bea70, 0x001bee70, 0x001bf270, 0x001bf670,
+ 0x001bfa70, 0x001bfe70, 0x001c0270, 0x001c0670,
+ 0x001c0a70, 0x001c0e70, 0x001c1270, 0x001c1670,
+ 0x001c1a70, 0x001c1e70, 0x001c2270, 0x001c2670,
+ 0x001c2a70, 0x001c2e70, 0x001c3270, 0x001c3670,
+ 0x001c3a70, 0x001c3e70, 0x001c4270, 0x001c4670,
+ 0x001c4a70, 0x001c4e70, 0x001c5270, 0x001c5670,
+ 0x001c5a70, 0x001c5e70, 0x001c6270, 0x001c6670,
+ 0x001c6a70, 0x001c6e70, 0x001c7270, 0x001c7670,
+ 0x001c7a70, 0x001c7e70, 0x001c8270, 0x001c8670,
+ 0x001c8a70, 0x001c8e70, 0x001c9270, 0x001c9670,
+ 0x001c9a70, 0x001c9e70, 0x001ca270, 0x001ca670,
+ 0x001caa70, 0x001cae70, 0x001cb270, 0x001cb670,
+ 0x001cba70, 0x001cbe70, 0x001cc270, 0x001cc670,
+ 0x001cca70, 0x001cce70, 0x001cd270, 0x001cd670,
+ 0x001cda70, 0x001cde70, 0x001ce270, 0x001ce670,
+ 0x001cea70, 0x001cee70, 0x001cf270, 0x001cf670,
+ 0x001cfa70, 0x001cfe70, 0x001d0270, 0x001d0670,
+ 0x001d0a70, 0x001d0e70, 0x001d1270, 0x001d1670,
+ 0x001d1a70, 0x001d1e70, 0x001d2270, 0x001d2670,
+ 0x001d2a70, 0x001d2e70, 0x001d3270, 0x001d3670,
+ 0x001d3a70, 0x001d3e70, 0x001d4270, 0x001d4670,
+ 0x001d4a70, 0x001d4e70, 0x001d5270, 0x001d5670,
+ 0x001d5a70, 0x001d5e70, 0x001d6270, 0x001d6670,
+ 0x001d6a70, 0x001d6e70, 0x001d7270, 0x001d7670,
+ 0x001d7a70, 0x001d7e70, 0x001d8270, 0x001d8670,
+ 0x001d8a70, 0x001d8e70, 0x001d9270, 0x001d9670,
+ 0x001d9a70, 0x001d9e70, 0x001da270, 0x001da670,
+ 0x001daa70, 0x001dae70, 0x001db270, 0x001db670,
+ 0x001dba70, 0x001dbe70, 0x001dc270, 0x001dc670,
+ 0x001dca70, 0x001dce70, 0x001dd270, 0x001dd670,
+ 0x001dda70, 0x001dde70, 0x001de270, 0x001de670,
+ 0x001dea70, 0x001dee70, 0x001df270, 0x001df670,
+ 0x001dfa70, 0x001dfe70, 0x001e0270, 0x001e0670,
+ 0x001e0a70, 0x001e0e70, 0x001e1270, 0x001e1670,
+ 0x001e1a70, 0x001e1e70, 0x001e2270, 0x001e2670,
+ 0x001e2a70, 0x001e2e70, 0x001e3270, 0x001e3670,
+ 0x001e3a70, 0x001e3e70, 0x001e4270, 0x001e4670,
+ 0x001e4a70, 0x001e4e70, 0x001e5270, 0x001e5670,
+ 0x001e5a70, 0x001e5e70, 0x001e6270, 0x001e6670,
+ 0x001e6a70, 0x001e6e70, 0x001e7270, 0x001e7670,
+ 0x001e7a70, 0x001e7e70, 0x001e8270, 0x001e8670,
+ 0x001e8a70, 0x001e8e70, 0x001e9270, 0x001e9670,
+ 0x001e9a70, 0x001e9e70, 0x001ea270, 0x001ea670,
+ 0x001eaa70, 0x001eae70, 0x001eb270, 0x001eb670,
+ 0x001eba70, 0x001ebe70, 0x001ec270, 0x001ec670,
+ 0x001eca70, 0x001ece70, 0x001ed270, 0x001ed670,
+ 0x001eda70, 0x001ede70, 0x001ee270, 0x001ee670,
+ 0x001eea70, 0x001eee70, 0x001ef270, 0x001ef670,
+ 0x001efa70, 0x001efe70, 0x001f0270, 0x001f0670,
+ 0x001f0a70, 0x001f0e70, 0x001f1270, 0x001f1670,
+ 0x001f1a70, 0x001f1e70, 0x001f2270, 0x001f2670,
+ 0x001f2a70, 0x001f2e70, 0x001f3270, 0x001f3670,
+ 0x001f3a70, 0x001f3e70, 0x001f4270, 0x001f4670,
+ 0x001f4a70, 0x001f4e70, 0x001f5270, 0x001f5670,
+ 0x001f5a70, 0x001f5e70, 0x001f6270, 0x001f6670,
+ 0x001f6a70, 0x001f6e70, 0x001f7270, 0x001f7670,
+ 0x001f7a70, 0x001f7e70, 0x001f8270, 0x001f8670,
+ 0x001f8a70, 0x001f8e70, 0x001f9270, 0x001f9670,
+ 0x001f9a70, 0x001f9e70, 0x001fa270, 0x001fa670,
+ 0x001faa70, 0x001fae70, 0x001fb270, 0x001fb670,
+ 0x001fba70, 0x001fbe70, 0x001fc270, 0x001fc670,
+ 0x001fca70, 0x001fce70, 0x001fd270, 0x001fd670,
+ 0x001fda70, 0x001fde70, 0x001fe270, 0x001fe670,
+ 0x001fea70, 0x001fee70, 0x001ff270, 0x001ff670,
+ 0x001ffa70, 0x001ffe70
+#endif /* LONGER_HUFFTABLE */
+ },
+
+ .len_table = {
+ 0x00000807, 0x00000407, 0x00000c07, 0x00000207,
+ 0x00000a07, 0x00000607, 0x00000e07, 0x00000107,
+ 0x00000908, 0x00001908, 0x00000508, 0x00001508,
+ 0x00000d08, 0x00001d08, 0x00000308, 0x00001308,
+ 0x00000b09, 0x00001b09, 0x00002b09, 0x00003b09,
+ 0x00000709, 0x00001709, 0x00002709, 0x00003709,
+ 0x00000f09, 0x00001f09, 0x00002f09, 0x00003f09,
+ 0x00000089, 0x00001089, 0x00002089, 0x00003089,
+ 0x0000088a, 0x0000188a, 0x0000288a, 0x0000388a,
+ 0x0000488a, 0x0000588a, 0x0000688a, 0x0000788a,
+ 0x0000048a, 0x0000148a, 0x0000248a, 0x0000348a,
+ 0x0000448a, 0x0000548a, 0x0000648a, 0x0000748a,
+ 0x00000c8a, 0x00001c8a, 0x00002c8a, 0x00003c8a,
+ 0x00004c8a, 0x00005c8a, 0x00006c8a, 0x00007c8a,
+ 0x0000028a, 0x0000128a, 0x0000228a, 0x0000328a,
+ 0x0000428a, 0x0000528a, 0x0000628a, 0x0000728a,
+ 0x00000a8b, 0x00001a8b, 0x00002a8b, 0x00003a8b,
+ 0x00004a8b, 0x00005a8b, 0x00006a8b, 0x00007a8b,
+ 0x00008a8b, 0x00009a8b, 0x0000aa8b, 0x0000ba8b,
+ 0x0000ca8b, 0x0000da8b, 0x0000ea8b, 0x0000fa8b,
+ 0x0000068b, 0x0000168b, 0x0000268b, 0x0000368b,
+ 0x0000468b, 0x0000568b, 0x0000668b, 0x0000768b,
+ 0x0000868b, 0x0000968b, 0x0000a68b, 0x0000b68b,
+ 0x0000c68b, 0x0000d68b, 0x0000e68b, 0x0000f68b,
+ 0x00000e8b, 0x00001e8b, 0x00002e8b, 0x00003e8b,
+ 0x00004e8b, 0x00005e8b, 0x00006e8b, 0x00007e8b,
+ 0x00008e8b, 0x00009e8b, 0x0000ae8b, 0x0000be8b,
+ 0x0000ce8b, 0x0000de8b, 0x0000ee8b, 0x0000fe8b,
+ 0x0000006c, 0x0000206c, 0x0000406c, 0x0000606c,
+ 0x0000806c, 0x0000a06c, 0x0000c06c, 0x0000e06c,
+ 0x0001006c, 0x0001206c, 0x0001406c, 0x0001606c,
+ 0x0001806c, 0x0001a06c, 0x0001c06c, 0x0001e06c,
+ 0x0000106d, 0x0000306d, 0x0000506d, 0x0000706d,
+ 0x0000906d, 0x0000b06d, 0x0000d06d, 0x0000f06d,
+ 0x0001106d, 0x0001306d, 0x0001506d, 0x0001706d,
+ 0x0001906d, 0x0001b06d, 0x0001d06d, 0x0001f06d,
+ 0x0002106d, 0x0002306d, 0x0002506d, 0x0002706d,
+ 0x0002906d, 0x0002b06d, 0x0002d06d, 0x0002f06d,
+ 0x0003106d, 0x0003306d, 0x0003506d, 0x0003706d,
+ 0x0003906d, 0x0003b06d, 0x0003d06d, 0x0003f06d,
+ 0x0000086d, 0x0000286d, 0x0000486d, 0x0000686d,
+ 0x0000886d, 0x0000a86d, 0x0000c86d, 0x0000e86d,
+ 0x0001086d, 0x0001286d, 0x0001486d, 0x0001686d,
+ 0x0001886d, 0x0001a86d, 0x0001c86d, 0x0001e86d,
+ 0x0002086d, 0x0002286d, 0x0002486d, 0x0002686d,
+ 0x0002886d, 0x0002a86d, 0x0002c86d, 0x0002e86d,
+ 0x0003086d, 0x0003286d, 0x0003486d, 0x0003686d,
+ 0x0003886d, 0x0003a86d, 0x0003c86d, 0x0003e86d,
+ 0x0000186d, 0x0000386d, 0x0000586d, 0x0000786d,
+ 0x0000986d, 0x0000b86d, 0x0000d86d, 0x0000f86d,
+ 0x0001186d, 0x0001386d, 0x0001586d, 0x0001786d,
+ 0x0001986d, 0x0001b86d, 0x0001d86d, 0x0001f86d,
+ 0x0002186d, 0x0002386d, 0x0002586d, 0x0002786d,
+ 0x0002986d, 0x0002b86d, 0x0002d86d, 0x0002f86d,
+ 0x0003186d, 0x0003386d, 0x0003586d, 0x0003786d,
+ 0x0003986d, 0x0003b86d, 0x0003d86d, 0x0003f86d,
+ 0x0000046d, 0x0000246d, 0x0000446d, 0x0000646d,
+ 0x0000846d, 0x0000a46d, 0x0000c46d, 0x0000e46d,
+ 0x0001046d, 0x0001246d, 0x0001446d, 0x0001646d,
+ 0x0001846d, 0x0001a46d, 0x0001c46d, 0x0001e46d,
+ 0x0002046d, 0x0002246d, 0x0002446d, 0x0002646d,
+ 0x0002846d, 0x0002a46d, 0x0002c46d, 0x0002e46d,
+ 0x0003046d, 0x0003246d, 0x0003446d, 0x0003646d,
+ 0x0003846d, 0x0003a46d, 0x0003c46d, 0x00001468},
+
+ .lit_table = {
+ 0x000c, 0x008c, 0x004c, 0x00cc, 0x002c, 0x00ac, 0x006c, 0x00ec,
+ 0x001c, 0x009c, 0x005c, 0x00dc, 0x003c, 0x00bc, 0x007c, 0x00fc,
+ 0x0002, 0x0082, 0x0042, 0x00c2, 0x0022, 0x00a2, 0x0062, 0x00e2,
+ 0x0012, 0x0092, 0x0052, 0x00d2, 0x0032, 0x00b2, 0x0072, 0x00f2,
+ 0x000a, 0x008a, 0x004a, 0x00ca, 0x002a, 0x00aa, 0x006a, 0x00ea,
+ 0x001a, 0x009a, 0x005a, 0x00da, 0x003a, 0x00ba, 0x007a, 0x00fa,
+ 0x0006, 0x0086, 0x0046, 0x00c6, 0x0026, 0x00a6, 0x0066, 0x00e6,
+ 0x0016, 0x0096, 0x0056, 0x00d6, 0x0036, 0x00b6, 0x0076, 0x00f6,
+ 0x000e, 0x008e, 0x004e, 0x00ce, 0x002e, 0x00ae, 0x006e, 0x00ee,
+ 0x001e, 0x009e, 0x005e, 0x00de, 0x003e, 0x00be, 0x007e, 0x00fe,
+ 0x0001, 0x0081, 0x0041, 0x00c1, 0x0021, 0x00a1, 0x0061, 0x00e1,
+ 0x0011, 0x0091, 0x0051, 0x00d1, 0x0031, 0x00b1, 0x0071, 0x00f1,
+ 0x0009, 0x0089, 0x0049, 0x00c9, 0x0029, 0x00a9, 0x0069, 0x00e9,
+ 0x0019, 0x0099, 0x0059, 0x00d9, 0x0039, 0x00b9, 0x0079, 0x00f9,
+ 0x0005, 0x0085, 0x0045, 0x00c5, 0x0025, 0x00a5, 0x0065, 0x00e5,
+ 0x0015, 0x0095, 0x0055, 0x00d5, 0x0035, 0x00b5, 0x0075, 0x00f5,
+ 0x000d, 0x008d, 0x004d, 0x00cd, 0x002d, 0x00ad, 0x006d, 0x00ed,
+ 0x001d, 0x009d, 0x005d, 0x00dd, 0x003d, 0x00bd, 0x007d, 0x00fd,
+ 0x0013, 0x0113, 0x0093, 0x0193, 0x0053, 0x0153, 0x00d3, 0x01d3,
+ 0x0033, 0x0133, 0x00b3, 0x01b3, 0x0073, 0x0173, 0x00f3, 0x01f3,
+ 0x000b, 0x010b, 0x008b, 0x018b, 0x004b, 0x014b, 0x00cb, 0x01cb,
+ 0x002b, 0x012b, 0x00ab, 0x01ab, 0x006b, 0x016b, 0x00eb, 0x01eb,
+ 0x001b, 0x011b, 0x009b, 0x019b, 0x005b, 0x015b, 0x00db, 0x01db,
+ 0x003b, 0x013b, 0x00bb, 0x01bb, 0x007b, 0x017b, 0x00fb, 0x01fb,
+ 0x0007, 0x0107, 0x0087, 0x0187, 0x0047, 0x0147, 0x00c7, 0x01c7,
+ 0x0027, 0x0127, 0x00a7, 0x01a7, 0x0067, 0x0167, 0x00e7, 0x01e7,
+ 0x0017, 0x0117, 0x0097, 0x0197, 0x0057, 0x0157, 0x00d7, 0x01d7,
+ 0x0037, 0x0137, 0x00b7, 0x01b7, 0x0077, 0x0177, 0x00f7, 0x01f7,
+ 0x000f, 0x010f, 0x008f, 0x018f, 0x004f, 0x014f, 0x00cf, 0x01cf,
+ 0x002f, 0x012f, 0x00af, 0x01af, 0x006f, 0x016f, 0x00ef, 0x01ef,
+ 0x001f, 0x011f, 0x009f, 0x019f, 0x005f, 0x015f, 0x00df, 0x01df,
+ 0x003f, 0x013f, 0x00bf, 0x01bf, 0x007f, 0x017f, 0x00ff, 0x01ff,
+ 0x0000},
+
+ .lit_table_sizes = {
+ 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
+ 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
+ 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
+ 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
+ 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
+ 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
+ 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
+ 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
+ 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
+ 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
+ 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
+ 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
+ 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
+ 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
+ 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
+ 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
+ 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
+ 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
+ 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09,
+ 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09,
+ 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09,
+ 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09,
+ 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09,
+ 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09,
+ 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09,
+ 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09,
+ 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09,
+ 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09,
+ 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09,
+ 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09,
+ 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09,
+ 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09,
+ 0x07},
+
+#ifndef LONGER_HUFFTABLE
+ .dcodes = {
+ 0x0000, 0x0010, 0x0008, 0x0018, 0x0004, 0x0014, 0x000c, 0x001c,
+ 0x0002, 0x0012, 0x000a, 0x001a, 0x0006, 0x0016, 0x000e, 0x001e,
+ 0x0001, 0x0011, 0x0009, 0x0019, 0x0005, 0x0015, 0x000d, 0x001d,
+ 0x0003, 0x0013, 0x000b, 0x001b, 0x0007, 0x0017},
+
+ .dcodes_sizes = {
+ 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
+ 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
+ 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
+ 0x05, 0x05, 0x05, 0x05, 0x05, 0x05}
+#else
+ .dcodes = {
+ 0x000b, 0x001b, 0x0007, 0x0017},
+
+ .dcodes_sizes = {
+ 0x05, 0x05, 0x05, 0x05}
+#endif
+};
diff --git a/src/isa-l/igzip/igzip.c b/src/isa-l/igzip/igzip.c
new file mode 100644
index 000000000..65b441b52
--- /dev/null
+++ b/src/isa-l/igzip/igzip.c
@@ -0,0 +1,2022 @@
+/**********************************************************************
+ Copyright(c) 2011-2016 Intel Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+#define ASM
+
+#include <assert.h>
+#include <string.h>
+#include <wchar.h>
+#ifdef _WIN32
+# include <intrin.h>
+#endif
+
+#define MAX_WRITE_BITS_SIZE 8
+#define FORCE_FLUSH 64
+#define MIN_OBUF_SIZE 224
+#define NON_EMPTY_BLOCK_SIZE 6
+#define MAX_SYNC_FLUSH_SIZE NON_EMPTY_BLOCK_SIZE + MAX_WRITE_BITS_SIZE
+
+#include "huffman.h"
+#include "bitbuf2.h"
+#include "igzip_lib.h"
+#include "crc.h"
+#include "repeated_char_result.h"
+#include "huff_codes.h"
+#include "encode_df.h"
+#include "igzip_level_buf_structs.h"
+#include "igzip_checksums.h"
+#include "igzip_wrapper.h"
+#include "unaligned.h"
+
+#ifdef __FreeBSD__
+#include <sys/types.h>
+#include <sys/endian.h>
+# define to_be32(x) bswap32(x)
+#elif defined (__APPLE__)
+#include <libkern/OSByteOrder.h>
+# define to_be32(x) OSSwapInt32(x)
+#elif defined (__GNUC__) && !defined (__MINGW32__)
+# include <byteswap.h>
+# define to_be32(x) bswap_32(x)
+#elif defined _WIN64
+# define to_be32(x) _byteswap_ulong(x)
+#endif
+
+extern void isal_deflate_hash_lvl0(uint16_t *, uint32_t, uint32_t, uint8_t *, uint32_t);
+extern void isal_deflate_hash_lvl1(uint16_t *, uint32_t, uint32_t, uint8_t *, uint32_t);
+extern void isal_deflate_hash_lvl2(uint16_t *, uint32_t, uint32_t, uint8_t *, uint32_t);
+extern void isal_deflate_hash_lvl3(uint16_t *, uint32_t, uint32_t, uint8_t *, uint32_t);
+extern const uint8_t gzip_hdr[];
+extern const uint32_t gzip_hdr_bytes;
+extern const uint32_t gzip_trl_bytes;
+extern const uint8_t zlib_hdr[];
+extern const uint32_t zlib_hdr_bytes;
+extern const uint32_t zlib_trl_bytes;
+extern const struct isal_hufftables hufftables_default;
+extern const struct isal_hufftables hufftables_static;
+
+static uint32_t write_stored_block(struct isal_zstream *stream);
+
+static int write_stream_header_stateless(struct isal_zstream *stream);
+static void write_stream_header(struct isal_zstream *stream);
+static int write_deflate_header_stateless(struct isal_zstream *stream);
+static int write_deflate_header_unaligned_stateless(struct isal_zstream *stream);
+
+#define TYPE0_HDR_LEN 4
+#define TYPE0_BLK_HDR_LEN 5
+#define TYPE0_MAX_BLK_LEN 65535
+
+void isal_deflate_body(struct isal_zstream *stream);
+void isal_deflate_finish(struct isal_zstream *stream);
+
+void isal_deflate_icf_body(struct isal_zstream *stream);
+void isal_deflate_icf_finish_lvl1(struct isal_zstream *stream);
+void isal_deflate_icf_finish_lvl2(struct isal_zstream *stream);
+void isal_deflate_icf_finish_lvl3(struct isal_zstream *stream);
+/*****************************************************************/
+
+/* Forward declarations */
+static inline void reset_match_history(struct isal_zstream *stream);
+static void write_header(struct isal_zstream *stream, uint8_t * deflate_hdr,
+ uint32_t deflate_hdr_count, uint32_t extra_bits_count,
+ uint32_t next_state, uint32_t toggle_end_of_stream);
+static void write_trailer(struct isal_zstream *stream);
+
+struct slver {
+ uint16_t snum;
+ uint8_t ver;
+ uint8_t core;
+};
+
+/* Version info */
+struct slver isal_deflate_init_slver_01030081;
+struct slver isal_deflate_init_slver = { 0x0081, 0x03, 0x01 };
+
+struct slver isal_deflate_reset_slver_0001008e;
+struct slver isal_deflate_reset_slver = { 0x008e, 0x01, 0x00 };
+
+struct slver isal_deflate_stateless_init_slver_00010084;
+struct slver isal_deflate_stateless_init_slver = { 0x0084, 0x01, 0x00 };
+
+struct slver isal_deflate_slver_01030082;
+struct slver isal_deflate_slver = { 0x0082, 0x03, 0x01 };
+
+struct slver isal_deflate_stateless_slver_01010083;
+struct slver isal_deflate_stateless_slver = { 0x0083, 0x01, 0x01 };
+
+struct slver isal_deflate_set_hufftables_slver_0001008b;
+struct slver isal_deflate_set_hufftables_slver = { 0x008b, 0x01, 0x00 };
+
+struct slver isal_deflate_set_dict_slver_0001008c;
+struct slver isal_deflate_set_dict_slver = { 0x008c, 0x01, 0x00 };
+
+/*****************************************************************/
+
+// isal_adler32_bam1 - adler with (B | A minus 1) storage
+
+uint32_t isal_adler32_bam1(uint32_t adler32, const unsigned char *start, uint64_t length)
+{
+ uint64_t a;
+
+ /* Internally the checksum is being stored as B | (A-1) so crc and
+ * addler have same init value */
+ a = adler32 & 0xffff;
+ a = (a == ADLER_MOD - 1) ? 0 : a + 1;
+ adler32 = isal_adler32((adler32 & 0xffff0000) | a, start, length);
+ a = (adler32 & 0xffff);
+ a = (a == 0) ? ADLER_MOD - 1 : a - 1;
+
+ return (adler32 & 0xffff0000) | a;
+}
+
+static void update_checksum(struct isal_zstream *stream, uint8_t * start_in, uint64_t length)
+{
+ struct isal_zstate *state = &stream->internal_state;
+ switch (stream->gzip_flag) {
+ case IGZIP_GZIP:
+ case IGZIP_GZIP_NO_HDR:
+ state->crc = crc32_gzip_refl(state->crc, start_in, length);
+ break;
+ case IGZIP_ZLIB:
+ case IGZIP_ZLIB_NO_HDR:
+ state->crc = isal_adler32_bam1(state->crc, start_in, length);
+ break;
+ }
+}
+
+static
+void sync_flush(struct isal_zstream *stream)
+{
+ struct isal_zstate *state = &stream->internal_state;
+ uint64_t bits_to_write = 0xFFFF0000, bits_len;
+ uint64_t bytes;
+ int flush_size;
+
+ if (stream->avail_out >= 8) {
+ set_buf(&state->bitbuf, stream->next_out, stream->avail_out);
+
+ flush_size = (-(state->bitbuf.m_bit_count + 3)) % 8;
+
+ bits_to_write <<= flush_size + 3;
+ bits_len = 32 + flush_size + 3;
+
+ state->state = ZSTATE_NEW_HDR;
+ state->has_eob = 0;
+
+ write_bits(&state->bitbuf, bits_to_write, bits_len);
+
+ bytes = buffer_used(&state->bitbuf);
+ stream->next_out = buffer_ptr(&state->bitbuf);
+ stream->avail_out -= bytes;
+ stream->total_out += bytes;
+
+ if (stream->flush == FULL_FLUSH) {
+ /* Clear match history so there are no cross
+ * block length distance pairs */
+ state->has_hist = IGZIP_NO_HIST;
+ }
+ }
+}
+
+static void flush_write_buffer(struct isal_zstream *stream)
+{
+ struct isal_zstate *state = &stream->internal_state;
+ int bytes = 0;
+ if (stream->avail_out >= 8) {
+ set_buf(&state->bitbuf, stream->next_out, stream->avail_out);
+ flush(&state->bitbuf);
+ stream->next_out = buffer_ptr(&state->bitbuf);
+ bytes = buffer_used(&state->bitbuf);
+ stream->avail_out -= bytes;
+ stream->total_out += bytes;
+ state->state = ZSTATE_NEW_HDR;
+ }
+}
+
+static void flush_icf_block(struct isal_zstream *stream)
+{
+ struct isal_zstate *state = &stream->internal_state;
+ struct level_buf *level_buf = (struct level_buf *)stream->level_buf;
+ struct BitBuf2 *write_buf = &state->bitbuf;
+ struct deflate_icf *icf_buf_encoded_next;
+
+ set_buf(write_buf, stream->next_out, stream->avail_out);
+
+ icf_buf_encoded_next = encode_deflate_icf(level_buf->icf_buf_start + state->count,
+ level_buf->icf_buf_next, write_buf,
+ &level_buf->encode_tables);
+
+ state->count = icf_buf_encoded_next - level_buf->icf_buf_start;
+ stream->next_out = buffer_ptr(write_buf);
+ stream->total_out += buffer_used(write_buf);
+ stream->avail_out -= buffer_used(write_buf);
+
+ if (level_buf->icf_buf_next <= icf_buf_encoded_next) {
+ state->count = 0;
+ if (stream->avail_in == 0 && stream->end_of_stream)
+ state->state = ZSTATE_TRL;
+ else if (stream->avail_in == 0 && stream->flush != NO_FLUSH)
+ state->state = ZSTATE_SYNC_FLUSH;
+ else
+ state->state = ZSTATE_NEW_HDR;
+ }
+}
+
+static int check_level_req(struct isal_zstream *stream)
+{
+ if (stream->level == 0)
+ return 0;
+
+ if (stream->level_buf == NULL)
+ return ISAL_INVALID_LEVEL_BUF;
+
+ switch (stream->level) {
+ case 3:
+ if (stream->level_buf_size < ISAL_DEF_LVL3_MIN)
+ return ISAL_INVALID_LEVEL;
+ break;
+
+ case 2:
+ if (stream->level_buf_size < ISAL_DEF_LVL2_MIN)
+ return ISAL_INVALID_LEVEL;
+ break;
+ case 1:
+ if (stream->level_buf_size < ISAL_DEF_LVL1_MIN)
+ return ISAL_INVALID_LEVEL;
+ break;
+ default:
+ return ISAL_INVALID_LEVEL;
+ }
+
+ return 0;
+}
+
+static int init_hash8k_buf(struct isal_zstream *stream)
+{
+ struct isal_zstate *state = &stream->internal_state;
+ struct level_buf *level_buf = (struct level_buf *)stream->level_buf;
+ state->has_level_buf_init = 1;
+ return sizeof(struct level_buf) - MAX_LVL_BUF_SIZE + sizeof(level_buf->hash8k);
+}
+
+static int init_hash_hist_buf(struct isal_zstream *stream)
+{
+ struct isal_zstate *state = &stream->internal_state;
+ struct level_buf *level_buf = (struct level_buf *)stream->level_buf;
+ state->has_level_buf_init = 1;
+ return sizeof(struct level_buf) - MAX_LVL_BUF_SIZE + sizeof(level_buf->hash_hist);
+}
+
+static int init_hash_map_buf(struct isal_zstream *stream)
+{
+ struct isal_zstate *state = &stream->internal_state;
+ struct level_buf *level_buf = (struct level_buf *)stream->level_buf;
+ if (!state->has_level_buf_init) {
+ level_buf->hash_map.matches_next = level_buf->hash_map.matches;
+ level_buf->hash_map.matches_end = level_buf->hash_map.matches;
+ }
+ state->has_level_buf_init = 1;
+
+ return sizeof(struct level_buf) - MAX_LVL_BUF_SIZE + sizeof(level_buf->hash_map);
+
+}
+
+/* returns the size of the level specific buffer */
+static int init_lvlX_buf(struct isal_zstream *stream)
+{
+ switch (stream->level) {
+ case 3:
+ return init_hash_map_buf(stream);
+ case 2:
+ return init_hash_hist_buf(stream);
+ default:
+ return init_hash8k_buf(stream);
+ }
+
+}
+
+static void init_new_icf_block(struct isal_zstream *stream)
+{
+ struct isal_zstate *state = &stream->internal_state;
+ struct level_buf *level_buf = (struct level_buf *)stream->level_buf;
+ int level_struct_size;
+
+ level_struct_size = init_lvlX_buf(stream);
+
+ state->block_next = state->block_end;
+ level_buf->icf_buf_start =
+ (struct deflate_icf *)(stream->level_buf + level_struct_size);
+
+ level_buf->icf_buf_next = level_buf->icf_buf_start;
+ level_buf->icf_buf_avail_out =
+ stream->level_buf_size - level_struct_size - sizeof(struct deflate_icf);
+
+ memset(&level_buf->hist, 0, sizeof(struct isal_mod_hist));
+ state->state = ZSTATE_BODY;
+}
+
+static int are_buffers_empty_hashX(struct isal_zstream *stream)
+{
+ return !stream->avail_in;
+}
+
+static int are_buffers_empty_hash_map(struct isal_zstream *stream)
+{
+ struct level_buf *level_buf = (struct level_buf *)stream->level_buf;
+
+ return (!stream->avail_in
+ && level_buf->hash_map.matches_next >= level_buf->hash_map.matches_end);
+}
+
+static int are_buffers_empty(struct isal_zstream *stream)
+{
+
+ switch (stream->level) {
+ case 3:
+ return are_buffers_empty_hash_map(stream);
+ case 2:
+ return are_buffers_empty_hashX(stream);
+ default:
+ return are_buffers_empty_hashX(stream);
+ }
+}
+
+static void create_icf_block_hdr(struct isal_zstream *stream, uint8_t * start_in)
+{
+ struct isal_zstate *state = &stream->internal_state;
+ struct level_buf *level_buf = (struct level_buf *)stream->level_buf;
+ struct BitBuf2 *write_buf = &state->bitbuf;
+ struct BitBuf2 write_buf_tmp;
+ uint32_t out_size = stream->avail_out;
+ uint32_t avail_output, block_start_offset;
+ uint8_t *end_out = stream->next_out + out_size;
+ uint64_t cur_in_processed;
+ uint64_t bit_count;
+ uint64_t block_in_size = state->block_end - state->block_next;
+ uint64_t block_size;
+ int buffer_header = 0;
+
+ memcpy(&write_buf_tmp, write_buf, sizeof(struct BitBuf2));
+
+ /* Calculate the bytes required to store a type 0 block. Need to account
+ * for bits stored in the bitbuf. Since 3 bits correspond to the deflate
+ * type 0 header, we need to add one byte more when the number of bits
+ * is at least 6 mod 8. */
+ block_size = (TYPE0_BLK_HDR_LEN) * ((block_in_size + TYPE0_MAX_BLK_LEN - 1) /
+ TYPE0_MAX_BLK_LEN) + block_in_size;
+ block_size = block_size ? block_size : TYPE0_BLK_HDR_LEN;
+ block_size += (write_buf->m_bit_count + 2) / 8;
+
+ /* Write EOB in icf_buf */
+ level_buf->hist.ll_hist[256] = 1;
+ level_buf->icf_buf_next->lit_len = 0x100;
+ level_buf->icf_buf_next->lit_dist = NULL_DIST_SYM;
+ level_buf->icf_buf_next->dist_extra = 0;
+ level_buf->icf_buf_next++;
+
+ state->has_eob_hdr = (stream->end_of_stream && are_buffers_empty(stream)) ? 1 : 0;
+
+ if (end_out - stream->next_out >= ISAL_DEF_MAX_HDR_SIZE) {
+ /* Assumes ISAL_DEF_MAX_HDR_SIZE is large enough to contain a
+ * max length header and a gzip header */
+ if (stream->gzip_flag == IGZIP_GZIP || stream->gzip_flag == IGZIP_ZLIB)
+ write_stream_header_stateless(stream);
+ set_buf(write_buf, stream->next_out, stream->avail_out);
+ buffer_header = 0;
+
+ } else {
+ /* Start writing into temporary buffer */
+ set_buf(write_buf, level_buf->deflate_hdr, ISAL_DEF_MAX_HDR_SIZE);
+ buffer_header = 1;
+ }
+
+ bit_count = create_hufftables_icf(write_buf, &level_buf->encode_tables,
+ &level_buf->hist, state->has_eob_hdr);
+
+ /* Assumes that type 0 block has size less than 4G */
+ block_start_offset = (stream->total_in - state->block_next);
+ cur_in_processed = stream->next_in - start_in;
+ avail_output = stream->avail_out + sizeof(state->buffer) -
+ (stream->total_in - state->block_end);
+
+ if (bit_count / 8 >= block_size && cur_in_processed >= block_start_offset
+ && block_size <= avail_output) {
+ /* Reset stream for writing out a type0 block */
+ state->has_eob_hdr = 0;
+ memcpy(write_buf, &write_buf_tmp, sizeof(struct BitBuf2));
+ state->state = ZSTATE_TYPE0_HDR;
+
+ } else if (buffer_header) {
+ /* Setup stream to write out a buffered header */
+ level_buf->deflate_hdr_count = buffer_used(write_buf);
+ level_buf->deflate_hdr_extra_bits = write_buf->m_bit_count;
+ flush(write_buf);
+ memcpy(write_buf, &write_buf_tmp, sizeof(struct BitBuf2));
+ write_buf->m_bits = 0;
+ write_buf->m_bit_count = 0;
+ state->state = ZSTATE_HDR;
+
+ } else {
+ stream->next_out = buffer_ptr(write_buf);
+ stream->total_out += buffer_used(write_buf);
+ stream->avail_out -= buffer_used(write_buf);
+ state->state = ZSTATE_FLUSH_ICF_BUFFER;
+ }
+}
+
+static void isal_deflate_pass(struct isal_zstream *stream)
+{
+ struct isal_zstate *state = &stream->internal_state;
+ struct isal_hufftables *hufftables = stream->hufftables;
+ uint8_t *start_in = stream->next_in;
+
+ if (state->state == ZSTATE_NEW_HDR || state->state == ZSTATE_HDR) {
+ if (state->count == 0)
+ /* Assume the final header is being written since the header
+ * stored in hufftables is the final header. */
+ state->has_eob_hdr = 1;
+ write_header(stream, hufftables->deflate_hdr, hufftables->deflate_hdr_count,
+ hufftables->deflate_hdr_extra_bits, ZSTATE_BODY,
+ !stream->end_of_stream);
+ }
+
+ if (state->state == ZSTATE_BODY)
+ isal_deflate_body(stream);
+
+ if (state->state == ZSTATE_FLUSH_READ_BUFFER)
+ isal_deflate_finish(stream);
+ if (state->state == ZSTATE_SYNC_FLUSH)
+ sync_flush(stream);
+
+ if (state->state == ZSTATE_FLUSH_WRITE_BUFFER)
+ flush_write_buffer(stream);
+
+ if (stream->gzip_flag)
+ update_checksum(stream, start_in, stream->next_in - start_in);
+
+ if (state->state == ZSTATE_TRL)
+ write_trailer(stream);
+}
+
+static void isal_deflate_icf_finish(struct isal_zstream *stream)
+{
+ switch (stream->level) {
+ case 3:
+ isal_deflate_icf_finish_lvl3(stream);
+ break;
+ case 2:
+ isal_deflate_icf_finish_lvl2(stream);
+ break;
+ default:
+ isal_deflate_icf_finish_lvl1(stream);
+ }
+}
+
+static void isal_deflate_icf_pass(struct isal_zstream *stream, uint8_t * inbuf_start)
+{
+ uint8_t *start_in = stream->next_in;
+ struct isal_zstate *state = &stream->internal_state;
+ struct level_buf *level_buf = (struct level_buf *)stream->level_buf;
+
+ do {
+ if (state->state == ZSTATE_NEW_HDR)
+ init_new_icf_block(stream);
+
+ if (state->state == ZSTATE_BODY)
+ isal_deflate_icf_body(stream);
+
+ if (state->state == ZSTATE_FLUSH_READ_BUFFER)
+ isal_deflate_icf_finish(stream);
+
+ if (state->state == ZSTATE_CREATE_HDR)
+ create_icf_block_hdr(stream, inbuf_start);
+
+ if (state->state == ZSTATE_HDR)
+ /* Note that the header may be prepended by the
+ * remaining bits in the previous block, as such the
+ * toggle header flag cannot be used */
+ write_header(stream, level_buf->deflate_hdr,
+ level_buf->deflate_hdr_count,
+ level_buf->deflate_hdr_extra_bits,
+ ZSTATE_FLUSH_ICF_BUFFER, 0);
+
+ if (state->state == ZSTATE_FLUSH_ICF_BUFFER)
+ flush_icf_block(stream);
+
+ if (state->state == ZSTATE_TYPE0_HDR || state->state == ZSTATE_TYPE0_BODY) {
+ if (stream->gzip_flag == IGZIP_GZIP || stream->gzip_flag == IGZIP_ZLIB)
+ write_stream_header(stream);
+ write_stored_block(stream);
+ }
+
+ }
+ while (state->state == ZSTATE_NEW_HDR);
+
+ if (state->state == ZSTATE_SYNC_FLUSH)
+ sync_flush(stream);
+
+ if (state->state == ZSTATE_FLUSH_WRITE_BUFFER)
+ flush_write_buffer(stream);
+
+ if (stream->gzip_flag)
+ update_checksum(stream, start_in, stream->next_in - start_in);
+
+ if (state->state == ZSTATE_TRL)
+ write_trailer(stream);
+}
+
+static void isal_deflate_int(struct isal_zstream *stream, uint8_t * start_in)
+{
+ struct isal_zstate *state = &stream->internal_state;
+ uint32_t size;
+
+ /* Move data from temporary output buffer to output buffer */
+ if (state->state >= ZSTATE_TMP_OFFSET) {
+ size = state->tmp_out_end - state->tmp_out_start;
+ if (size > stream->avail_out)
+ size = stream->avail_out;
+ memcpy(stream->next_out, state->tmp_out_buff + state->tmp_out_start, size);
+ stream->next_out += size;
+ stream->avail_out -= size;
+ stream->total_out += size;
+ state->tmp_out_start += size;
+
+ if (state->tmp_out_start == state->tmp_out_end)
+ state->state -= ZSTATE_TMP_OFFSET;
+
+ if (stream->avail_out == 0 || state->state == ZSTATE_END
+ // or do not write out empty blocks since the outbuffer was processed
+ || (state->state == ZSTATE_NEW_HDR && stream->avail_out == 0))
+ return;
+ }
+ assert(state->tmp_out_start == state->tmp_out_end);
+
+ if (stream->level == 0)
+ isal_deflate_pass(stream);
+ else
+ isal_deflate_icf_pass(stream, start_in);
+
+ /* Fill temporary output buffer then complete filling output buffer */
+ if (stream->avail_out > 0 && stream->avail_out < 8 && state->state != ZSTATE_NEW_HDR) {
+ uint8_t *next_out;
+ uint32_t avail_out;
+ uint32_t total_out;
+
+ next_out = stream->next_out;
+ avail_out = stream->avail_out;
+ total_out = stream->total_out;
+
+ stream->next_out = state->tmp_out_buff;
+ stream->avail_out = sizeof(state->tmp_out_buff);
+ stream->total_out = 0;
+
+ if (stream->level == 0)
+ isal_deflate_pass(stream);
+ else
+ isal_deflate_icf_pass(stream, start_in);
+
+ state->tmp_out_start = 0;
+ state->tmp_out_end = stream->total_out;
+
+ stream->next_out = next_out;
+ stream->avail_out = avail_out;
+ stream->total_out = total_out;
+ if (state->tmp_out_end) {
+ size = state->tmp_out_end;
+ if (size > stream->avail_out)
+ size = stream->avail_out;
+ memcpy(stream->next_out, state->tmp_out_buff, size);
+ stream->next_out += size;
+ stream->avail_out -= size;
+ stream->total_out += size;
+ state->tmp_out_start += size;
+ if (state->tmp_out_start != state->tmp_out_end)
+ state->state += ZSTATE_TMP_OFFSET;
+
+ }
+ }
+
+}
+
+static void write_constant_compressed_stateless(struct isal_zstream *stream,
+ uint32_t repeated_length)
+{
+ /* Assumes repeated_length is at least 1.
+ * Assumes the input end_of_stream is either 0 or 1. */
+ struct isal_zstate *state = &stream->internal_state;
+ uint32_t rep_bits = ((repeated_length - 1) / 258) * 2;
+ uint32_t rep_bytes = rep_bits / 8;
+ uint32_t rep_extra = (repeated_length - 1) % 258;
+ uint32_t bytes;
+ uint32_t repeated_char = *stream->next_in;
+ uint8_t *start_in = stream->next_in;
+
+ /* Guarantee there is enough space for the header even in the worst case */
+ if (stream->avail_out < HEADER_LENGTH + MAX_FIXUP_CODE_LENGTH + rep_bytes + 8)
+ return;
+
+ /* Assumes the repeated char is either 0 or 0xFF. */
+ memcpy(stream->next_out, repeated_char_header[repeated_char & 1], HEADER_LENGTH);
+
+ if (stream->avail_in == repeated_length && stream->end_of_stream > 0) {
+ stream->next_out[0] |= 1;
+ state->has_eob_hdr = 1;
+ state->has_eob = 1;
+ state->state = ZSTATE_TRL;
+ } else {
+ state->state = ZSTATE_NEW_HDR;
+ }
+
+ memset(stream->next_out + HEADER_LENGTH, 0, rep_bytes);
+ stream->avail_out -= HEADER_LENGTH + rep_bytes;
+ stream->next_out += HEADER_LENGTH + rep_bytes;
+ stream->total_out += HEADER_LENGTH + rep_bytes;
+
+ set_buf(&state->bitbuf, stream->next_out, stream->avail_out);
+
+ /* These two lines are basically a modified version of init. */
+ state->bitbuf.m_bits = 0;
+ state->bitbuf.m_bit_count = rep_bits % 8;
+
+ /* Add smaller repeat codes as necessary. Code280 can describe repeat
+ * lengths of 115-130 bits. Code10 can describe repeat lengths of 10
+ * bits. If more than 230 bits, fill code with two code280s. Else if
+ * more than 115 repeates, fill with code10s until one code280 can
+ * finish the rest of the repeats. Else, fill with code10s and
+ * literals */
+ if (rep_extra > 115) {
+ while (rep_extra > 130 && rep_extra < 230) {
+ write_bits(&state->bitbuf, CODE_10, CODE_10_LENGTH);
+ rep_extra -= 10;
+ }
+
+ if (rep_extra >= 230) {
+ write_bits(&state->bitbuf,
+ CODE_280 | ((rep_extra / 2 - 115) <<
+ CODE_280_LENGTH), CODE_280_TOTAL_LENGTH);
+ rep_extra -= rep_extra / 2;
+ }
+
+ write_bits(&state->bitbuf,
+ CODE_280 | ((rep_extra - 115) << CODE_280_LENGTH),
+ CODE_280_TOTAL_LENGTH);
+
+ } else {
+ while (rep_extra >= 10) {
+
+ write_bits(&state->bitbuf, CODE_10, CODE_10_LENGTH);
+ rep_extra -= 10;
+ }
+
+ for (; rep_extra > 0; rep_extra--)
+ write_bits(&state->bitbuf, CODE_LIT, CODE_LIT_LENGTH);
+ }
+
+ write_bits(&state->bitbuf, END_OF_BLOCK, END_OF_BLOCK_LEN);
+
+ stream->next_in += repeated_length;
+ stream->avail_in -= repeated_length;
+ stream->total_in += repeated_length;
+ state->block_end += repeated_length;
+
+ bytes = buffer_used(&state->bitbuf);
+ stream->next_out = buffer_ptr(&state->bitbuf);
+ stream->avail_out -= bytes;
+ stream->total_out += bytes;
+
+ if (stream->gzip_flag)
+ update_checksum(stream, start_in, stream->next_in - start_in);
+
+ return;
+}
+
+static int detect_repeated_char_length(uint8_t * in, uint32_t length)
+{
+ /* This currently assumes the first 8 bytes are the same character.
+ * This won't work effectively if the input stream isn't aligned well. */
+ uint8_t *p_8, *end = in + length;
+ uint64_t *p_64 = (uint64_t *) in;
+ uint64_t w = *p_64;
+ uint8_t c = (uint8_t) w;
+
+ for (; (p_64 <= (uint64_t *) (end - 8)) && (w == *p_64); p_64++) ;
+
+ p_8 = (uint8_t *) p_64;
+
+ for (; (p_8 < end) && (c == *p_8); p_8++) ;
+
+ return p_8 - in;
+}
+
+static int isal_deflate_int_stateless(struct isal_zstream *stream)
+{
+ uint32_t repeat_length;
+ struct isal_zstate *state = &stream->internal_state;
+
+ if (stream->gzip_flag == IGZIP_GZIP || stream->gzip_flag == IGZIP_ZLIB)
+ if (write_stream_header_stateless(stream))
+ return STATELESS_OVERFLOW;
+
+ if (stream->avail_in >= 8
+ && (load_u64(stream->next_in) == 0
+ || load_u64(stream->next_in) == ~(uint64_t) 0)) {
+ repeat_length = detect_repeated_char_length(stream->next_in, stream->avail_in);
+
+ if (stream->avail_in == repeat_length || repeat_length >= MIN_REPEAT_LEN)
+ write_constant_compressed_stateless(stream, repeat_length);
+ }
+
+ if (stream->level == 0) {
+ if (state->state == ZSTATE_NEW_HDR || state->state == ZSTATE_HDR) {
+ write_deflate_header_unaligned_stateless(stream);
+ if (state->state == ZSTATE_NEW_HDR || state->state == ZSTATE_HDR)
+ return STATELESS_OVERFLOW;
+
+ reset_match_history(stream);
+ }
+
+ isal_deflate_pass(stream);
+
+ } else if (stream->level <= ISAL_DEF_MAX_LEVEL) {
+ if (state->state == ZSTATE_NEW_HDR || state->state == ZSTATE_HDR)
+ reset_match_history(stream);
+
+ state->count = 0;
+ isal_deflate_icf_pass(stream, stream->next_in);
+
+ }
+
+ if (state->state == ZSTATE_END
+ || (state->state == ZSTATE_NEW_HDR && stream->flush == FULL_FLUSH))
+ return COMP_OK;
+ else
+ return STATELESS_OVERFLOW;
+}
+
+static void write_type0_header(struct isal_zstream *stream)
+{
+ struct isal_zstate *state = &stream->internal_state;
+ uint64_t stored_blk_hdr;
+ uint32_t copy_size;
+ uint32_t memcpy_len, avail_in;
+ uint32_t block_in_size = state->block_end - state->block_next;
+ uint32_t block_next_offset;
+ struct BitBuf2 *bitbuf = &stream->internal_state.bitbuf;
+
+ if (block_in_size > TYPE0_MAX_BLK_LEN) {
+ stored_blk_hdr = 0xFFFF;
+ copy_size = TYPE0_MAX_BLK_LEN;
+ } else {
+ stored_blk_hdr = ~block_in_size;
+ stored_blk_hdr <<= 16;
+ stored_blk_hdr |= (block_in_size & 0xFFFF);
+ copy_size = block_in_size;
+
+ /* Handle BFINAL bit */
+ block_next_offset = stream->total_in - state->block_next;
+ avail_in = stream->avail_in + block_next_offset;
+ if (stream->end_of_stream && avail_in == block_in_size)
+ stream->internal_state.has_eob_hdr = 1;
+ }
+
+ if (bitbuf->m_bit_count == 0 && stream->avail_out >= TYPE0_HDR_LEN + 1) {
+ stored_blk_hdr = stored_blk_hdr << 8;
+ stored_blk_hdr |= stream->internal_state.has_eob_hdr;
+ memcpy_len = TYPE0_HDR_LEN + 1;
+ memcpy(stream->next_out, &stored_blk_hdr, memcpy_len);
+ } else if (stream->avail_out >= 8) {
+ set_buf(bitbuf, stream->next_out, stream->avail_out);
+ write_bits_flush(bitbuf, stream->internal_state.has_eob_hdr, 3);
+ stream->next_out = buffer_ptr(bitbuf);
+ stream->total_out += buffer_used(bitbuf);
+ stream->avail_out -= buffer_used(bitbuf);
+ memcpy_len = TYPE0_HDR_LEN;
+ memcpy(stream->next_out, &stored_blk_hdr, memcpy_len);
+ } else {
+ stream->internal_state.has_eob_hdr = 0;
+ return;
+ }
+
+ stream->next_out += memcpy_len;
+ stream->avail_out -= memcpy_len;
+ stream->total_out += memcpy_len;
+ stream->internal_state.state = ZSTATE_TYPE0_BODY;
+
+ stream->internal_state.count = copy_size;
+}
+
+static uint32_t write_stored_block(struct isal_zstream *stream)
+{
+ uint32_t copy_size, avail_in, block_next_offset;
+ uint8_t *next_in;
+ struct isal_zstate *state = &stream->internal_state;
+
+ do {
+ if (state->state == ZSTATE_TYPE0_HDR) {
+ write_type0_header(stream);
+ if (state->state == ZSTATE_TYPE0_HDR)
+ break;
+ }
+
+ assert(state->count <= state->block_end - state->block_next);
+ copy_size = state->count;
+
+ block_next_offset = stream->total_in - state->block_next;
+ next_in = stream->next_in - block_next_offset;
+ avail_in = stream->avail_in + block_next_offset;
+
+ if (copy_size > stream->avail_out || copy_size > avail_in) {
+ state->count = copy_size;
+ copy_size = (stream->avail_out <= avail_in) ?
+ stream->avail_out : avail_in;
+
+ memcpy(stream->next_out, next_in, copy_size);
+ state->count -= copy_size;
+ } else {
+ memcpy(stream->next_out, next_in, copy_size);
+
+ state->count = 0;
+ state->state = ZSTATE_TYPE0_HDR;
+ }
+
+ state->block_next += copy_size;
+ stream->next_out += copy_size;
+ stream->avail_out -= copy_size;
+ stream->total_out += copy_size;
+
+ if (state->block_next == state->block_end) {
+ state->state = state->has_eob_hdr ? ZSTATE_TRL : ZSTATE_NEW_HDR;
+ if (stream->flush == FULL_FLUSH && state->state == ZSTATE_NEW_HDR
+ && are_buffers_empty(stream)) {
+ /* Clear match history so there are no cross
+ * block length distance pairs */
+ reset_match_history(stream);
+ }
+ }
+ } while (state->state == ZSTATE_TYPE0_HDR);
+
+ return state->block_end - state->block_next;
+}
+
+static inline void reset_match_history(struct isal_zstream *stream)
+{
+ struct isal_zstate *state = &stream->internal_state;
+ struct level_buf *level_buf = (struct level_buf *)stream->level_buf;
+ uint16_t *hash_table;
+ uint32_t hash_table_size;
+
+ hash_table_size = 2 * (state->hash_mask + 1);
+
+ switch (stream->level) {
+ case 3:
+ hash_table = level_buf->lvl3.hash_table;
+ break;
+ case 2:
+ hash_table = level_buf->lvl2.hash_table;
+ break;
+ case 1:
+ hash_table = level_buf->lvl1.hash_table;
+ break;
+ default:
+ hash_table = state->head;
+ }
+
+ state->has_hist = IGZIP_NO_HIST;
+
+ /* There is definitely more than 16 bytes in the hash table. Set this
+ * minimum to avoid a wmemset of size 0 */
+ if (hash_table_size <= sizeof(wchar_t))
+ hash_table_size = sizeof(wchar_t);
+
+ if (sizeof(wchar_t) == 2) {
+ uint16_t hash_init_val;
+
+ hash_init_val = stream->total_in & 0xffff;
+ wmemset((wchar_t *)hash_table, hash_init_val,
+ hash_table_size / sizeof(wchar_t));
+
+ } else if (sizeof(wchar_t) == 4) {
+ uint32_t hash_init_val;
+ int rep_bits;
+
+ hash_init_val = stream->total_in & 0xffff;
+ for (rep_bits = sizeof(uint16_t) * 8; rep_bits < sizeof(wchar_t) * 8;
+ rep_bits *= 2)
+ hash_init_val |= hash_init_val << rep_bits;
+
+ wmemset((wchar_t *)hash_table, hash_init_val,
+ hash_table_size / sizeof(wchar_t));
+ } else {
+ if ((stream->total_in & 0xFFFF) == 0)
+ memset(hash_table, 0, hash_table_size);
+ else {
+ int i;
+ for (i = 0; i < hash_table_size / 2; i++) {
+ hash_table[i] = (uint16_t) (stream->total_in);
+ }
+ }
+ }
+
+}
+
+static void inline set_dist_mask(struct isal_zstream *stream)
+{
+ struct isal_zstate *state = &stream->internal_state;
+ uint32_t hist_size;
+
+ if (stream->hist_bits > ISAL_DEF_MAX_HIST_BITS || stream->hist_bits == 0)
+ stream->hist_bits = ISAL_DEF_MAX_HIST_BITS;
+
+ hist_size = (1 << (stream->hist_bits));
+ state->dist_mask = hist_size - 1;
+
+ if (IGZIP_HIST_SIZE < ISAL_DEF_HIST_SIZE && state->dist_mask > IGZIP_HIST_SIZE - 1)
+ state->dist_mask = IGZIP_HIST_SIZE - 1;
+}
+
+static void inline set_hash_mask(struct isal_zstream *stream)
+{
+ struct isal_zstate *state = &stream->internal_state;
+
+ switch (stream->level) {
+ case 3:
+ state->hash_mask = LVL3_HASH_MASK;
+ break;
+ case 2:
+ state->hash_mask = LVL2_HASH_MASK;
+ break;
+ case 1:
+ state->hash_mask = LVL1_HASH_MASK;
+ break;
+ case 0:
+ state->hash_mask = LVL0_HASH_MASK;
+ }
+}
+
+void isal_deflate_init(struct isal_zstream *stream)
+{
+ struct isal_zstate *state = &stream->internal_state;
+
+ stream->total_in = 0;
+ stream->total_out = 0;
+ stream->hufftables = (struct isal_hufftables *)&hufftables_default;
+ stream->level = 0;
+ stream->level_buf = NULL;
+ stream->level_buf_size = 0;
+ stream->end_of_stream = 0;
+ stream->flush = NO_FLUSH;
+ stream->gzip_flag = 0;
+ stream->hist_bits = 0;
+
+ state->block_next = 0;
+ state->block_end = 0;
+ state->b_bytes_valid = 0;
+ state->b_bytes_processed = 0;
+ state->total_in_start = 0;
+ state->has_wrap_hdr = 0;
+ state->has_eob = 0;
+ state->has_eob_hdr = 0;
+ state->has_hist = IGZIP_NO_HIST;
+ state->has_level_buf_init = 0;
+ state->state = ZSTATE_NEW_HDR;
+ state->count = 0;
+
+ state->tmp_out_start = 0;
+ state->tmp_out_end = 0;
+
+ init(&state->bitbuf);
+
+ state->crc = 0;
+
+ return;
+}
+
+void isal_deflate_reset(struct isal_zstream *stream)
+{
+ struct isal_zstate *state = &stream->internal_state;
+
+ stream->total_in = 0;
+ stream->total_out = 0;
+
+ state->block_next = 0;
+ state->block_end = 0;
+ state->b_bytes_valid = 0;
+ state->b_bytes_processed = 0;
+ state->total_in_start = 0;
+ state->has_wrap_hdr = 0;
+ state->has_eob = 0;
+ state->has_level_buf_init = 0;
+ state->has_eob_hdr = 0;
+ state->has_hist = IGZIP_NO_HIST;
+ state->state = ZSTATE_NEW_HDR;
+ state->count = 0;
+
+ state->tmp_out_start = 0;
+ state->tmp_out_end = 0;
+
+ init(&state->bitbuf);
+
+ state->crc = 0;
+
+}
+
+void isal_gzip_header_init(struct isal_gzip_header *gz_hdr)
+{
+ gz_hdr->text = 0;
+ gz_hdr->time = 0;
+ gz_hdr->xflags = 0;
+ gz_hdr->os = 0xff;
+ gz_hdr->extra = NULL;
+ gz_hdr->extra_buf_len = 0;
+ gz_hdr->extra_len = 0;
+ gz_hdr->name = NULL;
+ gz_hdr->name_buf_len = 0;
+ gz_hdr->comment = NULL;
+ gz_hdr->comment_buf_len = 0;
+ gz_hdr->hcrc = 0;
+}
+
+uint32_t isal_write_gzip_header(struct isal_zstream *stream, struct isal_gzip_header *gz_hdr)
+{
+ uint32_t flags = 0, hcrc, hdr_size = GZIP_HDR_BASE;
+ uint8_t *out_buf = stream->next_out, *out_buf_start = stream->next_out;
+ uint32_t name_len = 0, comment_len = 0;
+
+ if (gz_hdr->text)
+ flags |= TEXT_FLAG;
+ if (gz_hdr->extra) {
+ flags |= EXTRA_FLAG;
+ hdr_size += GZIP_EXTRA_LEN + gz_hdr->extra_len;
+ }
+ if (gz_hdr->name) {
+ flags |= NAME_FLAG;
+ name_len = strnlen(gz_hdr->name, gz_hdr->name_buf_len);
+ if (name_len < gz_hdr->name_buf_len)
+ name_len++;
+ hdr_size += name_len;
+ }
+ if (gz_hdr->comment) {
+ flags |= COMMENT_FLAG;
+ comment_len = strnlen(gz_hdr->comment, gz_hdr->comment_buf_len);
+ if (comment_len < gz_hdr->comment_buf_len)
+ comment_len++;
+ hdr_size += comment_len;
+ }
+ if (gz_hdr->hcrc) {
+ flags |= HCRC_FLAG;
+ hdr_size += GZIP_HCRC_LEN;
+ }
+
+ if (stream->avail_out < hdr_size)
+ return hdr_size;
+
+ out_buf[0] = 0x1f;
+ out_buf[1] = 0x8b;
+ out_buf[2] = DEFLATE_METHOD;
+ out_buf[3] = flags;
+ store_u32(out_buf + 4, gz_hdr->time);
+ out_buf[8] = gz_hdr->xflags;
+ out_buf[9] = gz_hdr->os;
+
+ out_buf += GZIP_HDR_BASE;
+ if (flags & EXTRA_FLAG) {
+ store_u16(out_buf, gz_hdr->extra_len);
+ out_buf += GZIP_EXTRA_LEN;
+
+ memcpy(out_buf, gz_hdr->extra, gz_hdr->extra_len);
+ out_buf += gz_hdr->extra_len;
+ }
+
+ if (flags & NAME_FLAG) {
+ memcpy(out_buf, gz_hdr->name, name_len);
+ out_buf += name_len;
+ }
+
+ if (flags & COMMENT_FLAG) {
+ memcpy(out_buf, gz_hdr->comment, comment_len);
+ out_buf += comment_len;
+ }
+
+ if (flags & HCRC_FLAG) {
+ hcrc = crc32_gzip_refl(0, out_buf_start, out_buf - out_buf_start);
+ store_u16(out_buf, hcrc);
+ out_buf += GZIP_HCRC_LEN;
+ }
+
+ stream->next_out += hdr_size;
+ stream->total_out += hdr_size;
+ stream->avail_out -= hdr_size;
+
+ return ISAL_DECOMP_OK;
+}
+
+uint32_t isal_write_zlib_header(struct isal_zstream *stream, struct isal_zlib_header *z_hdr)
+{
+ uint32_t cmf, flg, dict_flag = 0, hdr_size = ZLIB_HDR_BASE;
+ uint8_t *out_buf = stream->next_out;
+
+ if (z_hdr->dict_flag) {
+ dict_flag = ZLIB_DICT_FLAG;
+ hdr_size = ZLIB_HDR_BASE + ZLIB_DICT_LEN;
+ }
+
+ if (stream->avail_out < hdr_size)
+ return hdr_size;
+
+ cmf = DEFLATE_METHOD | (z_hdr->info << 4);
+ flg = (z_hdr->level << 6) | dict_flag;
+
+ flg += 31 - ((256 * cmf + flg) % 31);
+
+ out_buf[0] = cmf;
+ out_buf[1] = flg;
+
+ if (dict_flag)
+ store_u32(out_buf + 2, z_hdr->dict_id);
+
+ stream->next_out += hdr_size;
+ stream->total_out += hdr_size;
+ stream->avail_out -= hdr_size;
+
+ return ISAL_DECOMP_OK;
+}
+
+int isal_deflate_set_hufftables(struct isal_zstream *stream,
+ struct isal_hufftables *hufftables, int type)
+{
+ if (stream->internal_state.state != ZSTATE_NEW_HDR)
+ return ISAL_INVALID_OPERATION;
+
+ switch (type) {
+ case IGZIP_HUFFTABLE_DEFAULT:
+ stream->hufftables = (struct isal_hufftables *)&hufftables_default;
+ break;
+ case IGZIP_HUFFTABLE_STATIC:
+ stream->hufftables = (struct isal_hufftables *)&hufftables_static;
+ break;
+ case IGZIP_HUFFTABLE_CUSTOM:
+ if (hufftables != NULL) {
+ stream->hufftables = hufftables;
+ break;
+ }
+ default:
+ return ISAL_INVALID_OPERATION;
+ }
+
+ return COMP_OK;
+}
+
+void isal_deflate_stateless_init(struct isal_zstream *stream)
+{
+ stream->total_in = 0;
+ stream->total_out = 0;
+ stream->hufftables = (struct isal_hufftables *)&hufftables_default;
+ stream->level = 0;
+ stream->level_buf = NULL;
+ stream->level_buf_size = 0;
+ stream->end_of_stream = 0;
+ stream->flush = NO_FLUSH;
+ stream->gzip_flag = 0;
+ stream->internal_state.has_wrap_hdr = 0;
+ stream->internal_state.state = ZSTATE_NEW_HDR;
+ return;
+}
+
+void isal_deflate_hash(struct isal_zstream *stream, uint8_t * dict, uint32_t dict_len)
+{
+ /* Reset history to prevent out of bounds matches this works because
+ * dictionary must set at least 1 element in the history */
+ struct level_buf *level_buf = (struct level_buf *)stream->level_buf;
+ uint32_t hash_mask = stream->internal_state.hash_mask;
+
+ switch (stream->level) {
+ case 3:
+ memset(level_buf->lvl3.hash_table, -1, sizeof(level_buf->lvl3.hash_table));
+ isal_deflate_hash_lvl3(level_buf->lvl3.hash_table, hash_mask,
+ stream->total_in, dict, dict_len);
+ break;
+
+ case 2:
+ memset(level_buf->lvl2.hash_table, -1, sizeof(level_buf->lvl2.hash_table));
+ isal_deflate_hash_lvl2(level_buf->lvl2.hash_table, hash_mask,
+ stream->total_in, dict, dict_len);
+ break;
+ case 1:
+ memset(level_buf->lvl1.hash_table, -1, sizeof(level_buf->lvl1.hash_table));
+ isal_deflate_hash_lvl1(level_buf->lvl1.hash_table, hash_mask,
+ stream->total_in, dict, dict_len);
+ break;
+ default:
+ memset(stream->internal_state.head, -1, sizeof(stream->internal_state.head));
+ isal_deflate_hash_lvl0(stream->internal_state.head, hash_mask,
+ stream->total_in, dict, dict_len);
+ }
+
+ stream->internal_state.has_hist = IGZIP_HIST;
+}
+
+int isal_deflate_process_dict(struct isal_zstream *stream, struct isal_dict *dict,
+ uint8_t * dict_data, uint32_t dict_len)
+{
+ if ((dict == NULL)
+ || (dict_len == 0)
+ || (dict->level > ISAL_DEF_MAX_LEVEL))
+ return ISAL_INVALID_STATE;
+
+ if (dict_len > IGZIP_HIST_SIZE) {
+ dict_data = dict_data + dict_len - IGZIP_HIST_SIZE;
+ dict_len = IGZIP_HIST_SIZE;
+ }
+
+ dict->level = stream->level;
+ dict->hist_size = dict_len;
+ memcpy(dict->history, dict_data, dict_len);
+ memset(dict->hashtable, -1, sizeof(dict->hashtable));
+
+ switch (stream->level) {
+ case 3:
+ dict->hash_size = IGZIP_LVL3_HASH_SIZE;
+ isal_deflate_hash_lvl3(dict->hashtable, LVL3_HASH_MASK,
+ 0, dict_data, dict_len);
+ break;
+
+ case 2:
+ dict->hash_size = IGZIP_LVL2_HASH_SIZE;
+ isal_deflate_hash_lvl2(dict->hashtable, LVL2_HASH_MASK,
+ 0, dict_data, dict_len);
+ break;
+ case 1:
+ dict->hash_size = IGZIP_LVL1_HASH_SIZE;
+ isal_deflate_hash_lvl1(dict->hashtable, LVL1_HASH_MASK,
+ 0, dict_data, dict_len);
+ break;
+ default:
+ dict->hash_size = IGZIP_LVL0_HASH_SIZE;
+ isal_deflate_hash_lvl0(dict->hashtable, LVL0_HASH_MASK,
+ 0, dict_data, dict_len);
+ }
+ return COMP_OK;
+}
+
+int isal_deflate_reset_dict(struct isal_zstream *stream, struct isal_dict *dict)
+{
+ struct isal_zstate *state = &stream->internal_state;
+ struct level_buf *level_buf = (struct level_buf *)stream->level_buf;
+ int ret;
+
+ if ((state->state != ZSTATE_NEW_HDR)
+ || (state->b_bytes_processed != state->b_bytes_valid)
+ || (dict->level != stream->level)
+ || (dict->hist_size == 0)
+ || (dict->hist_size > IGZIP_HIST_SIZE)
+ || (dict->hash_size > IGZIP_LVL3_HASH_SIZE))
+ return ISAL_INVALID_STATE;
+
+ ret = check_level_req(stream);
+ if (ret)
+ return ret;
+
+ memcpy(state->buffer, dict->history, dict->hist_size);
+ state->b_bytes_processed = dict->hist_size;
+ state->b_bytes_valid = dict->hist_size;
+ state->has_hist = IGZIP_DICT_HASH_SET;
+
+ switch (stream->level) {
+ case 3:
+ memcpy(level_buf->lvl3.hash_table, dict->hashtable,
+ sizeof(level_buf->lvl3.hash_table));
+ break;
+
+ case 2:
+ memcpy(level_buf->lvl2.hash_table, dict->hashtable,
+ sizeof(level_buf->lvl2.hash_table));
+ break;
+ case 1:
+ memcpy(level_buf->lvl1.hash_table, dict->hashtable,
+ sizeof(level_buf->lvl1.hash_table));
+ break;
+ default:
+ memcpy(stream->internal_state.head, dict->hashtable,
+ sizeof(stream->internal_state.head));
+ }
+
+ return COMP_OK;
+}
+
+int isal_deflate_set_dict(struct isal_zstream *stream, uint8_t * dict, uint32_t dict_len)
+{
+ struct isal_zstate *state = &stream->internal_state;
+
+ if (state->state != ZSTATE_NEW_HDR || state->b_bytes_processed != state->b_bytes_valid)
+ return ISAL_INVALID_STATE;
+
+ if (dict_len <= 0)
+ return COMP_OK;
+
+ if (dict_len > IGZIP_HIST_SIZE) {
+ dict = dict + dict_len - IGZIP_HIST_SIZE;
+ dict_len = IGZIP_HIST_SIZE;
+ }
+
+ memcpy(state->buffer, dict, dict_len);
+ state->b_bytes_processed = dict_len;
+ state->b_bytes_valid = dict_len;
+
+ state->has_hist = IGZIP_DICT_HIST;
+
+ return COMP_OK;
+}
+
+int isal_deflate_stateless(struct isal_zstream *stream)
+{
+ struct isal_zstate *state = &stream->internal_state;
+ uint8_t *next_in = stream->next_in;
+ const uint32_t avail_in = stream->avail_in;
+ const uint32_t total_in = stream->total_in;
+
+ uint8_t *next_out = stream->next_out;
+ const uint32_t avail_out = stream->avail_out;
+ const uint32_t total_out = stream->total_out;
+ const uint32_t gzip_flag = stream->gzip_flag;
+ const uint32_t has_wrap_hdr = state->has_wrap_hdr;
+
+ int level_check;
+ uint64_t stored_len;
+
+ /* Final block has already been written */
+ state->block_next = stream->total_in;
+ state->block_end = stream->total_in;
+ state->has_eob_hdr = 0;
+ init(&state->bitbuf);
+ state->state = ZSTATE_NEW_HDR;
+ state->crc = 0;
+ state->has_level_buf_init = 0;
+ set_dist_mask(stream);
+
+ if (stream->flush == NO_FLUSH)
+ stream->end_of_stream = 1;
+
+ if (stream->flush != NO_FLUSH && stream->flush != FULL_FLUSH)
+ return INVALID_FLUSH;
+
+ level_check = check_level_req(stream);
+ if (level_check) {
+ if (stream->level == 1 && stream->level_buf == NULL) {
+ /* Default to internal buffer if invalid size is supplied */
+ stream->level_buf = state->buffer;
+ stream->level_buf_size = sizeof(state->buffer) + sizeof(state->head);
+ } else
+ return level_check;
+ }
+
+ set_hash_mask(stream);
+
+ if (state->hash_mask > 2 * avail_in)
+ state->hash_mask = (1 << bsr(avail_in)) - 1;
+
+ if (avail_in == 0)
+ stored_len = TYPE0_BLK_HDR_LEN;
+ else {
+ stored_len = TYPE0_BLK_HDR_LEN * ((avail_in + TYPE0_MAX_BLK_LEN - 1) /
+ TYPE0_MAX_BLK_LEN);
+ stored_len += avail_in;
+ }
+
+ /*
+ at least 1 byte compressed data in the case of empty dynamic block which only
+ contains the EOB
+ */
+ if (stream->gzip_flag == IGZIP_GZIP)
+ stored_len += gzip_hdr_bytes + gzip_trl_bytes;
+ else if (stream->gzip_flag == IGZIP_GZIP_NO_HDR)
+ stored_len += gzip_trl_bytes;
+
+ else if (stream->gzip_flag == IGZIP_ZLIB)
+ stored_len += zlib_hdr_bytes + zlib_trl_bytes;
+
+ else if (stream->gzip_flag == IGZIP_ZLIB_NO_HDR)
+ stored_len += zlib_trl_bytes;
+
+ if (avail_out >= stored_len)
+ stream->avail_out = stored_len;
+
+ if (isal_deflate_int_stateless(stream) == COMP_OK) {
+ if (avail_out >= stored_len)
+ stream->avail_out += avail_out - stored_len;
+ return COMP_OK;
+ } else {
+ if (avail_out >= stored_len)
+ stream->avail_out += avail_out - stored_len;
+ if (stream->flush == FULL_FLUSH) {
+ reset_match_history(stream);
+ }
+ stream->internal_state.has_eob_hdr = 0;
+ }
+
+ if (avail_out < stored_len)
+ return STATELESS_OVERFLOW;
+
+ stream->next_in = next_in + avail_in;
+ stream->avail_in = 0;
+ stream->total_in = avail_in;
+
+ state->block_next = stream->total_in - avail_in;
+ state->block_end = stream->total_in;
+
+ stream->next_out = next_out;
+ stream->avail_out = avail_out;
+ stream->total_out = total_out;
+
+ stream->gzip_flag = gzip_flag;
+ state->has_wrap_hdr = has_wrap_hdr;
+ init(&stream->internal_state.bitbuf);
+ stream->internal_state.count = 0;
+
+ if (stream->gzip_flag == IGZIP_GZIP || stream->gzip_flag == IGZIP_ZLIB)
+ write_stream_header_stateless(stream);
+
+ stream->internal_state.state = ZSTATE_TYPE0_HDR;
+
+ write_stored_block(stream);
+
+ stream->total_in = total_in + avail_in;
+
+ if (stream->gzip_flag) {
+ stream->internal_state.crc = 0;
+ update_checksum(stream, next_in, avail_in);
+ }
+
+ if (stream->end_of_stream)
+ write_trailer(stream);
+
+ return COMP_OK;
+
+}
+
+static inline uint32_t get_hist_size(struct isal_zstream *stream, uint8_t * start_in,
+ int32_t buf_hist_start)
+{
+ struct isal_zstate *state = &stream->internal_state;
+ uint32_t history_size;
+ uint32_t buffered_history;
+ uint32_t buffered_size = state->b_bytes_valid - state->b_bytes_processed;
+ uint32_t input_history;
+
+ buffered_history = (state->has_hist) ? state->b_bytes_processed - buf_hist_start : 0;
+ input_history = stream->next_in - start_in;
+
+ /* Calculate history required for deflate window */
+ history_size = (buffered_history >= input_history) ? buffered_history : input_history;
+ if (history_size > IGZIP_HIST_SIZE)
+ history_size = IGZIP_HIST_SIZE;
+
+ /* Calculate history required based on internal state */
+ if (state->state == ZSTATE_TYPE0_HDR
+ || state->state == ZSTATE_TYPE0_BODY
+ || state->state == ZSTATE_TMP_TYPE0_HDR || state->state == ZSTATE_TMP_TYPE0_BODY) {
+ if (stream->total_in - state->block_next > history_size) {
+ history_size = (stream->total_in - state->block_next);
+ }
+ } else if (stream->avail_in + buffered_size == 0
+ && (stream->end_of_stream || stream->flush == FULL_FLUSH)) {
+ history_size = 0;
+ }
+ return history_size;
+}
+
+int isal_deflate(struct isal_zstream *stream)
+{
+ struct isal_zstate *state = &stream->internal_state;
+ int ret = COMP_OK;
+ uint8_t *next_in, *start_in, *buf_start_in, *next_in_pre;
+ uint32_t avail_in, total_start, hist_size, future_size;
+ uint32_t in_size, in_size_initial, out_size, out_size_initial;
+ uint32_t processed, buffered_size = state->b_bytes_valid - state->b_bytes_processed;
+ uint32_t flush_type = stream->flush;
+ uint32_t end_of_stream = stream->end_of_stream;
+ uint32_t size = 0;
+ int32_t buf_hist_start = 0;
+ uint8_t *copy_down_src = NULL;
+ uint64_t copy_down_size = 0, copy_start_offset;
+ int internal;
+
+ if (stream->flush >= 3)
+ return INVALID_FLUSH;
+
+ ret = check_level_req(stream);
+ if (ret)
+ return ret;
+
+ start_in = stream->next_in;
+ total_start = stream->total_in;
+
+ hist_size = get_hist_size(stream, start_in, buf_hist_start);
+
+ if (state->has_hist == IGZIP_NO_HIST) {
+ set_dist_mask(stream);
+ set_hash_mask(stream);
+ if (state->hash_mask > 2 * stream->avail_in
+ && (stream->flush == FULL_FLUSH || stream->end_of_stream))
+ state->hash_mask = (1 << bsr(2 * stream->avail_in)) - 1;
+ stream->total_in -= buffered_size;
+ reset_match_history(stream);
+ stream->total_in += buffered_size;
+ buf_hist_start = state->b_bytes_processed;
+
+ } else if (state->has_hist == IGZIP_DICT_HIST) {
+ set_dist_mask(stream);
+ set_hash_mask(stream);
+ isal_deflate_hash(stream, state->buffer, state->b_bytes_processed);
+ } else if (state->has_hist == IGZIP_DICT_HASH_SET) {
+ set_dist_mask(stream);
+ set_hash_mask(stream);
+ }
+
+ in_size = stream->avail_in + buffered_size;
+ out_size = stream->total_out;
+ do {
+ in_size_initial = in_size;
+ out_size_initial = out_size;
+ buf_start_in = start_in;
+ internal = 0;
+
+ /* Setup to compress from internal buffer if insufficient history */
+ if (stream->total_in - total_start < hist_size + buffered_size) {
+ /* On entry there should always be sufficient history bufferd */
+ /* assert(state->b_bytes_processed >= hist_size); */
+
+ internal = 1;
+ /* Shift down internal buffer if it contains more data
+ * than required */
+ if (state->b_bytes_processed > hist_size) {
+ copy_start_offset = state->b_bytes_processed - hist_size;
+
+ copy_down_src = &state->buffer[copy_start_offset];
+ copy_down_size = state->b_bytes_valid - copy_start_offset;
+ memmove(state->buffer, copy_down_src, copy_down_size);
+
+ state->b_bytes_valid -= copy_down_src - state->buffer;
+ state->b_bytes_processed -= copy_down_src - state->buffer;
+ buf_hist_start -= copy_down_src - state->buffer;
+ if (buf_hist_start < 0)
+ buf_hist_start = 0;
+ }
+
+ size = stream->avail_in;
+ if (size > sizeof(state->buffer) - state->b_bytes_valid)
+ size = sizeof(state->buffer) - state->b_bytes_valid;
+
+ memcpy(&state->buffer[state->b_bytes_valid], stream->next_in, size);
+
+ stream->next_in += size;
+ stream->avail_in -= size;
+ stream->total_in += size;
+ state->b_bytes_valid += size;
+ buffered_size += size;
+
+ /* Save off next_in and avail_in if compression is
+ * performed in internal buffer, total_in can be
+ * recovered from knowledge of the size of the buffered
+ * input */
+ next_in = stream->next_in;
+ avail_in = stream->avail_in;
+
+ /* If not much data is buffered and there is no need to
+ * flush the buffer, just continue rather than attempt
+ * to compress */
+ if (avail_in == 0 && buffered_size <= IGZIP_HIST_SIZE
+ && stream->total_in - buffered_size - state->block_next <=
+ IGZIP_HIST_SIZE && !stream->end_of_stream
+ && stream->flush == NO_FLUSH)
+ continue;
+
+ if (avail_in) {
+ stream->flush = NO_FLUSH;
+ stream->end_of_stream = 0;
+ }
+
+ stream->next_in = &state->buffer[state->b_bytes_processed];
+ stream->avail_in = buffered_size;
+ stream->total_in -= buffered_size;
+
+ buf_start_in = state->buffer;
+
+ } else if (buffered_size) {
+ /* The user provided buffer has sufficient data, reset
+ * the user supplied buffer to included any data already
+ * buffered */
+ stream->next_in -= buffered_size;
+ stream->avail_in += buffered_size;
+ stream->total_in -= buffered_size;
+ state->b_bytes_valid = 0;
+ state->b_bytes_processed = 0;
+ buffered_size = 0;
+ }
+
+ next_in_pre = stream->next_in;
+ isal_deflate_int(stream, buf_start_in);
+ processed = stream->next_in - next_in_pre;
+ hist_size = get_hist_size(stream, buf_start_in, buf_hist_start);
+
+ /* Restore compression to unbuffered input when compressing to internal buffer */
+ if (internal) {
+ state->b_bytes_processed += processed;
+ buffered_size -= processed;
+
+ stream->flush = flush_type;
+ stream->end_of_stream = end_of_stream;
+ stream->total_in += buffered_size;
+
+ stream->next_in = next_in;
+ stream->avail_in = avail_in;
+ }
+
+ in_size = stream->avail_in + buffered_size;
+ out_size = stream->total_out;
+
+ } while (internal && stream->avail_in > 0 && stream->avail_out > 0
+ && (in_size_initial != in_size || out_size_initial != out_size));
+
+ /* Buffer history if data was pulled from the external buffer and future
+ * calls to deflate will be required */
+ if (!internal && (state->state != ZSTATE_END && state->state != ZSTATE_TRL)) {
+ /* If the external buffer was used, sufficient history must
+ * exist in the user input buffer */
+ /* assert(stream->total_in - total_start >= */
+ /* hist_size + buffered_size); */
+
+ stream->next_in -= buffered_size;
+ stream->avail_in += buffered_size;
+ stream->total_in -= buffered_size;
+
+ memmove(state->buffer, stream->next_in - hist_size, hist_size);
+ state->b_bytes_processed = hist_size;
+ state->b_bytes_valid = hist_size;
+ buffered_size = 0;
+ }
+
+ /* Buffer input data if it is necessary for continued execution */
+ if (stream->avail_in > 0 && (stream->avail_out > 0 || stream->level == 3)) {
+ /* Determine how much data to buffer */
+ future_size = sizeof(state->buffer) - state->b_bytes_valid;
+ if (stream->avail_in < future_size)
+ /* Buffer all data if it fits as it will need to be buffered
+ * on the next call anyways*/
+ future_size = stream->avail_in;
+ else if (ISAL_LOOK_AHEAD < future_size)
+ /* Buffer a minimum look ahead required for level 3 */
+ future_size = ISAL_LOOK_AHEAD;
+
+ memcpy(&state->buffer[state->b_bytes_valid], stream->next_in, future_size);
+
+ state->b_bytes_valid += future_size;
+ buffered_size += future_size;
+ stream->next_in += future_size;
+ stream->total_in += future_size;
+ stream->avail_in -= future_size;
+
+ }
+
+ return ret;
+}
+
+static int write_stream_header_stateless(struct isal_zstream *stream)
+{
+ uint32_t hdr_bytes;
+ const uint8_t *hdr;
+ uint32_t next_flag;
+
+ if (stream->internal_state.has_wrap_hdr)
+ return COMP_OK;
+
+ if (stream->gzip_flag == IGZIP_ZLIB) {
+ hdr_bytes = zlib_hdr_bytes;
+ hdr = zlib_hdr;
+ next_flag = IGZIP_ZLIB_NO_HDR;
+
+ } else {
+ hdr_bytes = gzip_hdr_bytes;
+ hdr = gzip_hdr;
+ next_flag = IGZIP_GZIP_NO_HDR;
+ }
+
+ if (hdr_bytes >= stream->avail_out)
+ return STATELESS_OVERFLOW;
+
+ stream->avail_out -= hdr_bytes;
+ stream->total_out += hdr_bytes;
+
+ memcpy(stream->next_out, hdr, hdr_bytes);
+
+ stream->next_out += hdr_bytes;
+ stream->internal_state.has_wrap_hdr = 1;
+ stream->gzip_flag = next_flag;
+
+ return COMP_OK;
+}
+
+static void write_stream_header(struct isal_zstream *stream)
+{
+ struct isal_zstate *state = &stream->internal_state;
+ int bytes_to_write;
+ uint32_t hdr_bytes;
+ const uint8_t *hdr;
+
+ if (stream->internal_state.has_wrap_hdr)
+ return;
+
+ if (stream->gzip_flag == IGZIP_ZLIB) {
+ hdr_bytes = zlib_hdr_bytes;
+ hdr = zlib_hdr;
+ } else {
+ hdr_bytes = gzip_hdr_bytes;
+ hdr = gzip_hdr;
+ }
+
+ bytes_to_write = hdr_bytes;
+ bytes_to_write -= state->count;
+
+ if (bytes_to_write > stream->avail_out)
+ bytes_to_write = stream->avail_out;
+
+ memcpy(stream->next_out, hdr + state->count, bytes_to_write);
+ state->count += bytes_to_write;
+
+ if (state->count == hdr_bytes) {
+ state->count = 0;
+ state->has_wrap_hdr = 1;
+ }
+
+ stream->avail_out -= bytes_to_write;
+ stream->total_out += bytes_to_write;
+ stream->next_out += bytes_to_write;
+
+}
+
+static int write_deflate_header_stateless(struct isal_zstream *stream)
+{
+ struct isal_zstate *state = &stream->internal_state;
+ struct isal_hufftables *hufftables = stream->hufftables;
+ uint64_t hdr_extra_bits = hufftables->deflate_hdr[hufftables->deflate_hdr_count];
+ uint32_t count;
+
+ if (hufftables->deflate_hdr_count + 8 >= stream->avail_out)
+ return STATELESS_OVERFLOW;
+
+ memcpy(stream->next_out, hufftables->deflate_hdr, hufftables->deflate_hdr_count);
+
+ if (stream->end_of_stream == 0) {
+ if (hufftables->deflate_hdr_count > 0)
+ *stream->next_out -= 1;
+ else
+ hdr_extra_bits -= 1;
+ } else
+ state->has_eob_hdr = 1;
+
+ stream->avail_out -= hufftables->deflate_hdr_count;
+ stream->total_out += hufftables->deflate_hdr_count;
+ stream->next_out += hufftables->deflate_hdr_count;
+
+ set_buf(&state->bitbuf, stream->next_out, stream->avail_out);
+
+ write_bits(&state->bitbuf, hdr_extra_bits, hufftables->deflate_hdr_extra_bits);
+
+ count = buffer_used(&state->bitbuf);
+ stream->next_out = buffer_ptr(&state->bitbuf);
+ stream->avail_out -= count;
+ stream->total_out += count;
+
+ state->state = ZSTATE_BODY;
+
+ return COMP_OK;
+}
+
+static int write_deflate_header_unaligned_stateless(struct isal_zstream *stream)
+{
+ struct isal_zstate *state = &stream->internal_state;
+ struct isal_hufftables *hufftables = stream->hufftables;
+ unsigned int count;
+ uint64_t bit_count;
+ uint8_t *header_next;
+ uint8_t *header_end;
+ uint64_t header_bits;
+
+ if (state->bitbuf.m_bit_count == 0)
+ return write_deflate_header_stateless(stream);
+
+ if (hufftables->deflate_hdr_count + 16 >= stream->avail_out)
+ return STATELESS_OVERFLOW;
+
+ set_buf(&state->bitbuf, stream->next_out, stream->avail_out);
+
+ header_next = hufftables->deflate_hdr;
+ header_end = header_next +
+ (hufftables->deflate_hdr_count / sizeof(header_bits)) * sizeof(header_bits);
+
+ header_bits = load_u64(header_next);
+
+ if (stream->end_of_stream == 0)
+ header_bits--;
+ else
+ state->has_eob_hdr = 1;
+
+ header_next += sizeof(header_bits);
+
+ /* Write out Complete Header bits */
+ for (; header_next <= header_end; header_next += sizeof(header_bits)) {
+ write_bits(&state->bitbuf, header_bits, 32);
+ header_bits >>= 32;
+ write_bits(&state->bitbuf, header_bits, 32);
+ header_bits = load_u64(header_next);
+ }
+ bit_count =
+ (hufftables->deflate_hdr_count & 0x7) * 8 + hufftables->deflate_hdr_extra_bits;
+
+ if (bit_count > MAX_BITBUF_BIT_WRITE) {
+ write_bits(&state->bitbuf, header_bits, MAX_BITBUF_BIT_WRITE);
+ header_bits >>= MAX_BITBUF_BIT_WRITE;
+ bit_count -= MAX_BITBUF_BIT_WRITE;
+
+ }
+
+ write_bits(&state->bitbuf, header_bits, bit_count);
+
+ /* check_space flushes extra bytes in bitbuf. Required because
+ * write_bits_always fails when the next commit makes the buffer
+ * length exceed 64 bits */
+ check_space(&state->bitbuf, FORCE_FLUSH);
+
+ count = buffer_used(&state->bitbuf);
+ stream->next_out = buffer_ptr(&state->bitbuf);
+ stream->avail_out -= count;
+ stream->total_out += count;
+
+ state->state = ZSTATE_BODY;
+
+ return COMP_OK;
+}
+
+/* Toggle end of stream only works when deflate header is aligned */
+static void write_header(struct isal_zstream *stream, uint8_t * deflate_hdr,
+ uint32_t deflate_hdr_count, uint32_t extra_bits_count,
+ uint32_t next_state, uint32_t toggle_end_of_stream)
+{
+ struct isal_zstate *state = &stream->internal_state;
+ uint32_t hdr_extra_bits = deflate_hdr[deflate_hdr_count];
+ uint32_t count;
+ state->state = ZSTATE_HDR;
+
+ if (state->bitbuf.m_bit_count != 0) {
+ if (stream->avail_out < 8)
+ return;
+ set_buf(&state->bitbuf, stream->next_out, stream->avail_out);
+ flush(&state->bitbuf);
+ count = buffer_used(&state->bitbuf);
+ stream->next_out = buffer_ptr(&state->bitbuf);
+ stream->avail_out -= count;
+ stream->total_out += count;
+ }
+
+ if (stream->gzip_flag == IGZIP_GZIP || stream->gzip_flag == IGZIP_ZLIB)
+ write_stream_header(stream);
+
+ count = deflate_hdr_count - state->count;
+
+ if (count != 0) {
+ if (count > stream->avail_out)
+ count = stream->avail_out;
+
+ memcpy(stream->next_out, deflate_hdr + state->count, count);
+
+ if (toggle_end_of_stream && state->count == 0 && count > 0) {
+ /* Assumes the final block bit is the first bit */
+ *stream->next_out ^= 1;
+ state->has_eob_hdr = !state->has_eob_hdr;
+ }
+
+ stream->next_out += count;
+ stream->avail_out -= count;
+ stream->total_out += count;
+ state->count += count;
+
+ count = deflate_hdr_count - state->count;
+ } else if (toggle_end_of_stream && deflate_hdr_count == 0) {
+ /* Assumes the final block bit is the first bit */
+ hdr_extra_bits ^= 1;
+ state->has_eob_hdr = !state->has_eob_hdr;
+ }
+
+ if ((count == 0) && (stream->avail_out >= 8)) {
+
+ set_buf(&state->bitbuf, stream->next_out, stream->avail_out);
+
+ write_bits(&state->bitbuf, hdr_extra_bits, extra_bits_count);
+
+ state->state = next_state;
+ state->count = 0;
+
+ count = buffer_used(&state->bitbuf);
+ stream->next_out = buffer_ptr(&state->bitbuf);
+ stream->avail_out -= count;
+ stream->total_out += count;
+ }
+
+}
+
+static void write_trailer(struct isal_zstream *stream)
+{
+ struct isal_zstate *state = &stream->internal_state;
+ unsigned int bytes = 0;
+ uint32_t crc = state->crc;
+
+ set_buf(&state->bitbuf, stream->next_out, stream->avail_out);
+
+ if (!state->has_eob_hdr) {
+ /* If the final header has not been written, write a
+ * final block. This block is a static huffman block
+ * which only contains the end of block symbol. The code
+ * that happens to do this is the fist 10 bits of
+ * 0x003 */
+ if (stream->avail_out < 8)
+ return;
+
+ state->has_eob_hdr = 1;
+ write_bits(&state->bitbuf, 0x003, 10);
+ if (is_full(&state->bitbuf)) {
+ stream->next_out = buffer_ptr(&state->bitbuf);
+ bytes = buffer_used(&state->bitbuf);
+ stream->avail_out -= bytes;
+ stream->total_out += bytes;
+ return;
+ }
+ }
+
+ if (state->bitbuf.m_bit_count) {
+ /* the flush() will pad to the next byte and write up to 8 bytes
+ * to the output stream/buffer.
+ */
+ if (stream->avail_out < 8)
+ return;
+
+ flush(&state->bitbuf);
+ }
+
+ stream->next_out = buffer_ptr(&state->bitbuf);
+ bytes = buffer_used(&state->bitbuf);
+
+ switch (stream->gzip_flag) {
+ case IGZIP_GZIP:
+ case IGZIP_GZIP_NO_HDR:
+ if (stream->avail_out - bytes >= gzip_trl_bytes) {
+ store_u64(stream->next_out, ((uint64_t) stream->total_in << 32) | crc);
+ stream->next_out += gzip_trl_bytes;
+ bytes += gzip_trl_bytes;
+ state->state = ZSTATE_END;
+ }
+ break;
+
+ case IGZIP_ZLIB:
+ case IGZIP_ZLIB_NO_HDR:
+ if (stream->avail_out - bytes >= zlib_trl_bytes) {
+ store_u32(stream->next_out,
+ to_be32((crc & 0xFFFF0000) | ((crc & 0xFFFF) + 1) %
+ ADLER_MOD));
+ stream->next_out += zlib_trl_bytes;
+ bytes += zlib_trl_bytes;
+ state->state = ZSTATE_END;
+ }
+ break;
+
+ default:
+ state->state = ZSTATE_END;
+ }
+
+ stream->avail_out -= bytes;
+ stream->total_out += bytes;
+}
diff --git a/src/isa-l/igzip/igzip_base.c b/src/isa-l/igzip/igzip_base.c
new file mode 100644
index 000000000..bcc965f6d
--- /dev/null
+++ b/src/isa-l/igzip/igzip_base.c
@@ -0,0 +1,236 @@
+#include <stdint.h>
+#include "igzip_lib.h"
+#include "huffman.h"
+#include "huff_codes.h"
+#include "bitbuf2.h"
+
+extern const struct isal_hufftables hufftables_default;
+
+static inline void update_state(struct isal_zstream *stream, uint8_t * start_in,
+ uint8_t * next_in, uint8_t * end_in)
+{
+ struct isal_zstate *state = &stream->internal_state;
+ uint32_t bytes_written;
+
+ if (next_in - start_in > 0)
+ state->has_hist = IGZIP_HIST;
+
+ stream->next_in = next_in;
+ stream->total_in += next_in - start_in;
+ stream->avail_in = end_in - next_in;
+
+ bytes_written = buffer_used(&state->bitbuf);
+ stream->total_out += bytes_written;
+ stream->next_out += bytes_written;
+ stream->avail_out -= bytes_written;
+
+}
+
+void isal_deflate_body_base(struct isal_zstream *stream)
+{
+ uint32_t literal, hash;
+ uint8_t *start_in, *next_in, *end_in, *end, *next_hash;
+ uint16_t match_length;
+ uint32_t dist;
+ uint64_t code, code_len, code2, code_len2;
+ struct isal_zstate *state = &stream->internal_state;
+ uint16_t *last_seen = state->head;
+ uint8_t *file_start = (uint8_t *) ((uintptr_t) stream->next_in - stream->total_in);
+ uint32_t hist_size = state->dist_mask;
+ uint32_t hash_mask = state->hash_mask;
+
+ if (stream->avail_in == 0) {
+ if (stream->end_of_stream || stream->flush != NO_FLUSH)
+ state->state = ZSTATE_FLUSH_READ_BUFFER;
+ return;
+ }
+
+ set_buf(&state->bitbuf, stream->next_out, stream->avail_out);
+
+ start_in = stream->next_in;
+ end_in = start_in + stream->avail_in;
+ next_in = start_in;
+
+ while (next_in + ISAL_LOOK_AHEAD < end_in) {
+
+ if (is_full(&state->bitbuf)) {
+ update_state(stream, start_in, next_in, end_in);
+ return;
+ }
+
+ literal = load_u32(next_in);
+ hash = compute_hash(literal) & hash_mask;
+ dist = (next_in - file_start - last_seen[hash]) & 0xFFFF;
+ last_seen[hash] = (uint64_t) (next_in - file_start);
+
+ /* The -1 are to handle the case when dist = 0 */
+ if (dist - 1 < hist_size) {
+ assert(dist != 0);
+
+ match_length = compare258(next_in - dist, next_in, 258);
+
+ if (match_length >= SHORTEST_MATCH) {
+ next_hash = next_in;
+#ifdef ISAL_LIMIT_HASH_UPDATE
+ end = next_hash + 3;
+#else
+ end = next_hash + match_length;
+#endif
+ next_hash++;
+
+ for (; next_hash < end; next_hash++) {
+ literal = load_u32(next_hash);
+ hash = compute_hash(literal) & hash_mask;
+ last_seen[hash] = (uint64_t) (next_hash - file_start);
+ }
+
+ get_len_code(stream->hufftables, match_length, &code,
+ &code_len);
+ get_dist_code(stream->hufftables, dist, &code2, &code_len2);
+
+ code |= code2 << code_len;
+ code_len += code_len2;
+
+ write_bits(&state->bitbuf, code, code_len);
+
+ next_in += match_length;
+
+ continue;
+ }
+ }
+
+ get_lit_code(stream->hufftables, literal & 0xFF, &code, &code_len);
+ write_bits(&state->bitbuf, code, code_len);
+ next_in++;
+ }
+
+ update_state(stream, start_in, next_in, end_in);
+
+ assert(stream->avail_in <= ISAL_LOOK_AHEAD);
+ if (stream->end_of_stream || stream->flush != NO_FLUSH)
+ state->state = ZSTATE_FLUSH_READ_BUFFER;
+
+ return;
+
+}
+
+void isal_deflate_finish_base(struct isal_zstream *stream)
+{
+ uint32_t literal = 0, hash;
+ uint8_t *start_in, *next_in, *end_in, *end, *next_hash;
+ uint16_t match_length;
+ uint32_t dist;
+ uint64_t code, code_len, code2, code_len2;
+ struct isal_zstate *state = &stream->internal_state;
+ uint16_t *last_seen = state->head;
+ uint8_t *file_start = (uint8_t *) ((uintptr_t) stream->next_in - stream->total_in);
+ uint32_t hist_size = state->dist_mask;
+ uint32_t hash_mask = state->hash_mask;
+
+ set_buf(&state->bitbuf, stream->next_out, stream->avail_out);
+
+ start_in = stream->next_in;
+ end_in = start_in + stream->avail_in;
+ next_in = start_in;
+
+ if (stream->avail_in != 0) {
+ while (next_in + 3 < end_in) {
+ if (is_full(&state->bitbuf)) {
+ update_state(stream, start_in, next_in, end_in);
+ return;
+ }
+
+ literal = load_u32(next_in);
+ hash = compute_hash(literal) & hash_mask;
+ dist = (next_in - file_start - last_seen[hash]) & 0xFFFF;
+ last_seen[hash] = (uint64_t) (next_in - file_start);
+
+ if (dist - 1 < hist_size) { /* The -1 are to handle the case when dist = 0 */
+ match_length =
+ compare258(next_in - dist, next_in, end_in - next_in);
+
+ if (match_length >= SHORTEST_MATCH) {
+ next_hash = next_in;
+#ifdef ISAL_LIMIT_HASH_UPDATE
+ end = next_hash + 3;
+#else
+ end = next_hash + match_length;
+#endif
+ next_hash++;
+
+ for (; next_hash < end - 3; next_hash++) {
+ literal = load_u32(next_hash);
+ hash = compute_hash(literal) & hash_mask;
+ last_seen[hash] =
+ (uint64_t) (next_hash - file_start);
+ }
+
+ get_len_code(stream->hufftables, match_length, &code,
+ &code_len);
+ get_dist_code(stream->hufftables, dist, &code2,
+ &code_len2);
+
+ code |= code2 << code_len;
+ code_len += code_len2;
+
+ write_bits(&state->bitbuf, code, code_len);
+
+ next_in += match_length;
+
+ continue;
+ }
+ }
+
+ get_lit_code(stream->hufftables, literal & 0xFF, &code, &code_len);
+ write_bits(&state->bitbuf, code, code_len);
+ next_in++;
+
+ }
+
+ while (next_in < end_in) {
+ if (is_full(&state->bitbuf)) {
+ update_state(stream, start_in, next_in, end_in);
+ return;
+ }
+
+ literal = *next_in;
+ get_lit_code(stream->hufftables, literal & 0xFF, &code, &code_len);
+ write_bits(&state->bitbuf, code, code_len);
+ next_in++;
+
+ }
+ }
+
+ if (!is_full(&state->bitbuf)) {
+ get_lit_code(stream->hufftables, 256, &code, &code_len);
+ write_bits(&state->bitbuf, code, code_len);
+ state->has_eob = 1;
+
+ if (stream->end_of_stream == 1)
+ state->state = ZSTATE_TRL;
+ else
+ state->state = ZSTATE_SYNC_FLUSH;
+ }
+
+ update_state(stream, start_in, next_in, end_in);
+
+ return;
+}
+
+void isal_deflate_hash_base(uint16_t * hash_table, uint32_t hash_mask,
+ uint32_t current_index, uint8_t * dict, uint32_t dict_len)
+{
+ uint8_t *next_in = dict;
+ uint8_t *end_in = dict + dict_len - SHORTEST_MATCH;
+ uint32_t literal;
+ uint32_t hash;
+ uint16_t index = current_index - dict_len;
+
+ while (next_in <= end_in) {
+ literal = load_u32(next_in);
+ hash = compute_hash(literal) & hash_mask;
+ hash_table[hash] = index;
+ index++;
+ next_in++;
+ }
+}
diff --git a/src/isa-l/igzip/igzip_base_aliases.c b/src/isa-l/igzip/igzip_base_aliases.c
new file mode 100644
index 000000000..486ed8e3e
--- /dev/null
+++ b/src/isa-l/igzip/igzip_base_aliases.c
@@ -0,0 +1,153 @@
+/**********************************************************************
+ Copyright(c) 2011-2017 Intel Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+#include <stdint.h>
+#include "igzip_lib.h"
+#include "encode_df.h"
+#include "igzip_level_buf_structs.h"
+
+void isal_deflate_body_base(struct isal_zstream *stream);
+void isal_deflate_finish_base(struct isal_zstream *stream);
+void isal_deflate_icf_body_hash_hist_base(struct isal_zstream *stream);
+void icf_body_hash1_fillgreedy_lazy(struct isal_zstream *stream);
+void isal_deflate_icf_finish_hash_hist_base(struct isal_zstream *stream);
+void isal_deflate_icf_finish_hash_map_base(struct isal_zstream *stream);
+void isal_update_histogram_base(uint8_t * start_stream, int length,
+ struct isal_huff_histogram *histogram);
+struct deflate_icf *encode_deflate_icf_base(struct deflate_icf *next_in,
+ struct deflate_icf *end_in, struct BitBuf2 *bb,
+ struct hufftables_icf *hufftables);
+uint32_t adler32_base(uint32_t init, const unsigned char *buf, uint64_t len);
+int decode_huffman_code_block_stateless_base(struct inflate_state *s, uint8_t * start_out);
+
+extern void isal_deflate_hash_base(uint16_t *, uint32_t, uint32_t, uint8_t *, uint32_t);
+
+void set_long_icf_fg_base(uint8_t * next_in, uint8_t * end_in,
+ struct deflate_icf *match_lookup, struct level_buf *level_buf);
+void gen_icf_map_h1_base(struct isal_zstream *stream,
+ struct deflate_icf *matches_icf_lookup, uint64_t input_size);
+
+void isal_deflate_body(struct isal_zstream *stream)
+{
+ isal_deflate_body_base(stream);
+}
+
+void isal_deflate_finish(struct isal_zstream *stream)
+{
+ isal_deflate_finish_base(stream);
+}
+
+void isal_deflate_icf_body_lvl1(struct isal_zstream *stream)
+{
+ isal_deflate_icf_body_hash_hist_base(stream);
+}
+
+void isal_deflate_icf_body_lvl2(struct isal_zstream *stream)
+{
+ isal_deflate_icf_body_hash_hist_base(stream);
+}
+
+void isal_deflate_icf_body_lvl3(struct isal_zstream *stream)
+{
+ icf_body_hash1_fillgreedy_lazy(stream);
+}
+
+void isal_deflate_icf_finish_lvl1(struct isal_zstream *stream)
+{
+ isal_deflate_icf_finish_hash_hist_base(stream);
+}
+
+void isal_deflate_icf_finish_lvl2(struct isal_zstream *stream)
+{
+ isal_deflate_icf_finish_hash_hist_base(stream);
+}
+
+void isal_deflate_icf_finish_lvl3(struct isal_zstream *stream)
+{
+ isal_deflate_icf_finish_hash_map_base(stream);
+}
+
+void isal_update_histogram(uint8_t * start_stream, int length,
+ struct isal_huff_histogram *histogram)
+{
+ isal_update_histogram_base(start_stream, length, histogram);
+}
+
+struct deflate_icf *encode_deflate_icf(struct deflate_icf *next_in,
+ struct deflate_icf *end_in, struct BitBuf2 *bb,
+ struct hufftables_icf *hufftables)
+{
+ return encode_deflate_icf_base(next_in, end_in, bb, hufftables);
+}
+
+uint32_t isal_adler32(uint32_t init, const unsigned char *buf, uint64_t len)
+{
+ return adler32_base(init, buf, len);
+}
+
+int decode_huffman_code_block_stateless(struct inflate_state *s, uint8_t * start_out)
+{
+ return decode_huffman_code_block_stateless_base(s, start_out);
+}
+
+void isal_deflate_hash_lvl0(uint16_t * hash_table, uint32_t hash_mask,
+ uint32_t current_index, uint8_t * dict, uint32_t dict_len)
+{
+ isal_deflate_hash_base(hash_table, hash_mask, current_index, dict, dict_len);
+}
+
+void isal_deflate_hash_lvl1(uint16_t * hash_table, uint32_t hash_mask,
+ uint32_t current_index, uint8_t * dict, uint32_t dict_len)
+{
+ isal_deflate_hash_base(hash_table, hash_mask, current_index, dict, dict_len);
+}
+
+void isal_deflate_hash_lvl2(uint16_t * hash_table, uint32_t hash_mask,
+ uint32_t current_index, uint8_t * dict, uint32_t dict_len)
+{
+ isal_deflate_hash_base(hash_table, hash_mask, current_index, dict, dict_len);
+}
+
+void isal_deflate_hash_lvl3(uint16_t * hash_table, uint32_t hash_mask,
+ uint32_t current_index, uint8_t * dict, uint32_t dict_len)
+{
+ isal_deflate_hash_base(hash_table, hash_mask, current_index, dict, dict_len);
+}
+
+void set_long_icf_fg(uint8_t * next_in, uint8_t * end_in,
+ struct deflate_icf *match_lookup, struct level_buf *level_buf)
+{
+ set_long_icf_fg_base(next_in, end_in, match_lookup, level_buf);
+}
+
+void gen_icf_map_lh1(struct isal_zstream *stream,
+ struct deflate_icf *matches_icf_lookup, uint64_t input_size)
+{
+ gen_icf_map_h1_base(stream, matches_icf_lookup, input_size);
+}
diff --git a/src/isa-l/igzip/igzip_body.asm b/src/isa-l/igzip/igzip_body.asm
new file mode 100644
index 000000000..b9620d6d7
--- /dev/null
+++ b/src/isa-l/igzip/igzip_body.asm
@@ -0,0 +1,792 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright(c) 2011-2016 Intel Corporation All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in
+; the documentation and/or other materials provided with the
+; distribution.
+; * Neither the name of Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived
+; from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+%include "options.asm"
+
+%include "lz0a_const.asm"
+%include "data_struct2.asm"
+%include "bitbuf2.asm"
+%include "huffman.asm"
+%include "igzip_compare_types.asm"
+%include "reg_sizes.asm"
+
+%include "stdmac.asm"
+
+%define LARGE_MATCH_HASH_REP 1 ; Hash 4 * LARGE_MATCH_HASH_REP elements
+%define LARGE_MATCH_MIN 264 ; Minimum match size to enter large match emit loop
+%define MIN_INBUF_PADDING 16
+%define MAX_EMIT_SIZE 258 * 16
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+%define tmp2 rcx
+%define hash2 rcx
+
+%define curr_data rax
+%define code rax
+%define tmp5 rax
+
+%define tmp4 rbx
+%define dist rbx
+%define code2 rbx
+%define hmask1 rbx
+
+%define hash rdx
+%define len rdx
+%define code_len3 rdx
+%define tmp8 rdx
+
+%define tmp1 rsi
+%define code_len2 rsi
+
+%define file_start rdi
+
+%define m_bit_count rbp
+
+%define curr_data2 r8
+%define len2 r8
+%define tmp6 r8
+%define f_end_i r8
+
+%define m_bits r9
+
+%define f_i r10
+
+%define m_out_buf r11
+
+%define dist2 r12
+%define tmp7 r12
+%define code4 r12
+
+%define tmp3 r13
+%define code3 r13
+
+%define stream r14
+
+%define hufftables r15
+
+;; GPR r8 & r15 can be used
+
+%define xtmp0 xmm0 ; tmp
+%define xtmp1 xmm1 ; tmp
+%define xhash xmm2
+%define xmask xmm3
+%define xdata xmm4
+
+%define ytmp0 ymm0 ; tmp
+%define ytmp1 ymm1 ; tmp
+
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+
+blen_mem_offset equ 0 ; local variable (8 bytes)
+f_end_i_mem_offset equ 8
+inbuf_slop_offset equ 16
+gpr_save_mem_offset equ 32 ; gpr save area (8*8 bytes)
+xmm_save_mem_offset equ 32 + 8*8 ; xmm save area (4*16 bytes) (16 byte aligned)
+stack_size equ 4*8 + 8*8 + 4*16 + 8
+;;; 8 because stack address is odd multiple of 8 after a function call and
+;;; we want it aligned to 16 bytes
+
+;; Defines to generate functions for different architecture
+%xdefine ARCH 01
+%xdefine ARCH1 02
+%xdefine ARCH2 04
+
+%ifndef COMPARE_TYPE
+%xdefine COMPARE_TYPE_NOT_DEF
+%xdefine COMPARE_TYPE 1
+%xdefine COMPARE_TYPE1 2
+%xdefine COMPARE_TYPE2 3
+%endif
+
+%rep 3
+%if ARCH == 04
+%define USE_HSWNI
+%endif
+
+[bits 64]
+default rel
+section .text
+
+; void isal_deflate_body ( isal_zstream *stream )
+; arg 1: rcx: addr of stream
+global isal_deflate_body_ %+ ARCH
+isal_deflate_body_ %+ ARCH %+ :
+ endbranch
+%ifidn __OUTPUT_FORMAT__, elf64
+ mov rcx, rdi
+%endif
+
+ ;; do nothing if (avail_in == 0)
+ cmp dword [rcx + _avail_in], 0
+ jne .skip1
+
+ ;; Set stream's next state
+ mov rdx, ZSTATE_FLUSH_READ_BUFFER
+ mov rax, ZSTATE_BODY
+ cmp word [rcx + _end_of_stream], 0
+ cmovne rax, rdx
+ cmp word [rcx + _flush], _NO_FLUSH
+ cmovne rax, rdx
+ mov dword [rcx + _internal_state_state], eax
+ ret
+.skip1:
+
+%ifdef ALIGN_STACK
+ push rbp
+ mov rbp, rsp
+ sub rsp, stack_size
+ and rsp, ~15
+%else
+ sub rsp, stack_size
+%endif
+
+ mov [rsp + gpr_save_mem_offset + 0*8], rbx
+ mov [rsp + gpr_save_mem_offset + 1*8], rsi
+ mov [rsp + gpr_save_mem_offset + 2*8], rdi
+ mov [rsp + gpr_save_mem_offset + 3*8], rbp
+ mov [rsp + gpr_save_mem_offset + 4*8], r12
+ mov [rsp + gpr_save_mem_offset + 5*8], r13
+ mov [rsp + gpr_save_mem_offset + 6*8], r14
+ mov [rsp + gpr_save_mem_offset + 7*8], r15
+
+ mov stream, rcx
+ mov byte [stream + _internal_state_has_eob], 0
+
+ MOVD xmask, [stream + _internal_state_hash_mask]
+ PSHUFD xmask, xmask, 0
+
+ ; state->bitbuf.set_buf(stream->next_out, stream->avail_out);
+ mov m_out_buf, [stream + _next_out]
+ mov [stream + _internal_state_bitbuf_m_out_start], m_out_buf
+ mov tmp1 %+ d, [stream + _avail_out]
+ add tmp1, m_out_buf
+ sub tmp1, SLOP
+
+ mov [stream + _internal_state_bitbuf_m_out_end], tmp1
+
+ mov m_bits, [stream + _internal_state_bitbuf_m_bits]
+ mov m_bit_count %+ d, [stream + _internal_state_bitbuf_m_bit_count]
+ mov hufftables, [stream + _hufftables]
+
+ mov file_start, [stream + _next_in]
+
+ mov f_i %+ d, dword [stream + _total_in]
+ sub file_start, f_i
+
+ mov f_end_i %+ d, [stream + _avail_in]
+ add f_end_i, f_i
+
+ mov qword [rsp + inbuf_slop_offset], MIN_INBUF_PADDING
+ cmp byte [stream + _end_of_stream], 0
+ jnz .default_inbuf_padding
+ cmp byte [stream + _flush], 0
+ jnz .default_inbuf_padding
+ mov qword [rsp + inbuf_slop_offset], LA
+.default_inbuf_padding:
+
+ ; f_end_i -= INBUF_PADDING;
+ sub f_end_i, [rsp + inbuf_slop_offset]
+ mov [rsp + f_end_i_mem_offset], f_end_i
+ ; if (f_end_i <= 0) continue;
+
+ cmp f_end_i, f_i
+ jle .input_end
+
+ MOVD hmask1 %+ d, xmask
+ ; for (f_i = f_start_i; f_i < f_end_i; f_i++) {
+ MOVDQU xdata, [file_start + f_i]
+ mov curr_data, [file_start + f_i]
+ mov tmp3, curr_data
+ mov tmp6, curr_data
+
+ compute_hash hash, curr_data
+
+ shr tmp3, 8
+ compute_hash hash2, tmp3
+
+ and hash %+ d, hmask1 %+ d
+ and hash2 %+ d, hmask1 %+ d
+
+ cmp byte [stream + _internal_state_has_hist], IGZIP_NO_HIST
+ je .write_first_byte
+
+ jmp .loop2
+ align 16
+
+.loop2:
+ mov tmp3 %+ d, dword [stream + _internal_state_dist_mask]
+
+ ; if (state->bitbuf.is_full()) {
+ cmp m_out_buf, [stream + _internal_state_bitbuf_m_out_end]
+ ja .output_end
+
+ xor dist, dist
+ xor dist2, dist2
+
+ lea tmp1, [file_start + f_i]
+
+ mov dist %+ w, f_i %+ w
+ dec dist
+ sub dist %+ w, word [stream + _internal_state_head + 2 * hash]
+ mov [stream + _internal_state_head + 2 * hash], f_i %+ w
+
+ inc f_i
+
+ MOVQ tmp6, xdata
+ shr tmp5, 16
+ mov tmp8, tmp5
+ compute_hash tmp6, tmp5
+
+ mov dist2 %+ w, f_i %+ w
+ dec dist2
+ sub dist2 %+ w, word [stream + _internal_state_head + 2 * hash2]
+ mov [stream + _internal_state_head + 2 * hash2], f_i %+ w
+
+ ; if ((dist-1) < (D-1)) {
+ and dist, tmp3
+ neg dist
+
+ shr tmp8, 8
+ compute_hash tmp2, tmp8
+
+ and dist2, tmp3
+ neg dist2
+
+ ;; Check for long len/dist match (>7) with first literal
+ MOVQ len, xdata
+ mov curr_data, len
+ PSRLDQ xdata, 1
+ xor len, [tmp1 + dist - 1]
+ jz .compare_loop
+
+ MOVD xhash, tmp6 %+ d
+ PINSRD xhash, tmp2 %+ d, 1
+ PAND xhash, xhash, xmask
+
+ ;; Check for len/dist match (>7) with second literal
+ MOVQ len2, xdata
+ xor len2, [tmp1 + dist2]
+ jz .compare_loop2
+
+ ;; Specutively load the code for the first literal
+ movzx tmp1, curr_data %+ b
+ get_lit_code tmp1, code3, rcx, hufftables
+
+ ;; Check for len/dist match for first literal
+ test len %+ d, 0xFFFFFFFF
+ jz .len_dist_huffman_pre
+
+ ;; Specutively load the code for the second literal
+ shr curr_data, 8
+ and curr_data, 0xff
+ get_lit_code curr_data, code2, code_len2, hufftables
+
+ SHLX code2, code2, rcx
+ or code2, code3
+ add code_len2, rcx
+
+ ;; Check for len/dist match for second literal
+ test len2 %+ d, 0xFFFFFFFF
+ jnz .write_lit_bits
+
+.len_dist_lit_huffman_pre:
+ mov code_len3, rcx
+ bsf len2, len2
+ shr len2, 3
+
+.len_dist_lit_huffman:
+ neg dist2
+
+%ifndef LONGER_HUFFTABLE
+ mov tmp4, dist2
+ get_dist_code tmp4, code4, code_len2, hufftables ;; clobbers dist, rcx
+%else
+ get_dist_code dist2, code4, code_len2, hufftables
+%endif
+ get_len_code len2, code, rcx, hufftables ;; rcx is code_len
+
+ MOVD hmask1 %+ d, xmask
+
+ SHLX code4, code4, rcx
+ or code4, code
+ add code_len2, rcx
+
+ add f_i, len2
+ neg len2
+
+ SHLX code4, code4, code_len3
+
+ MOVQ tmp5, xdata
+ shr tmp5, 24
+ compute_hash hash2, tmp5
+ and hash2 %+ d, hmask1 %+ d
+
+ or code4, code3
+ add code_len2, code_len3
+
+ ;; Setup for updating hash
+ lea tmp3, [f_i + len2 + 1] ; tmp3 <= k
+
+ mov tmp6, [rsp + f_end_i_mem_offset]
+ cmp f_i, tmp6
+ jge .len_dist_lit_huffman_finish
+
+ MOVDQU xdata, [file_start + f_i]
+ mov curr_data, [file_start + f_i]
+
+ MOVD hash %+ d, xhash
+ PEXTRD tmp6 %+ d, xhash, 1
+ mov [stream + _internal_state_head + 2 * hash], tmp3 %+ w
+
+ compute_hash hash, curr_data
+
+ add tmp3,1
+ mov [stream + _internal_state_head + 2 * tmp6], tmp3 %+ w
+
+ add tmp3, 1
+ mov [stream + _internal_state_head + 2 * hash2], tmp3 %+ w
+
+ write_bits m_bits, m_bit_count, code4, code_len2, m_out_buf
+
+ mov curr_data2, curr_data
+ shr curr_data2, 8
+ compute_hash hash2, curr_data2
+
+%ifdef NO_LIMIT_HASH_UPDATE
+.loop3:
+ add tmp3,1
+ cmp tmp3, f_i
+ jae .loop3_done
+ mov tmp6, [file_start + tmp3]
+ compute_hash tmp1, tmp6
+ and tmp1 %+ d, hmask1 %+ d
+ ; state->head[hash] = k;
+ mov [stream + _internal_state_head + 2 * tmp1], tmp3 %+ w
+ jmp .loop3
+.loop3_done:
+%endif
+ ; hash = compute_hash(state->file_start + f_i) & hash_mask;
+ and hash %+ d, hmask1 %+ d
+ and hash2 %+ d, hmask1 %+ d
+
+ ; continue
+ jmp .loop2
+ ;; encode as dist/len
+.len_dist_lit_huffman_finish:
+ MOVD hash %+ d, xhash
+ PEXTRD tmp6 %+ d, xhash, 1
+ mov [stream + _internal_state_head + 2 * hash], tmp3 %+ w
+ add tmp3,1
+ mov [stream + _internal_state_head + 2 * tmp6], tmp3 %+ w
+ add tmp3, 1
+ mov [stream + _internal_state_head + 2 * hash2], tmp3 %+ w
+
+ write_bits m_bits, m_bit_count, code4, code_len2, m_out_buf
+ jmp .input_end
+
+align 16
+.len_dist_huffman_pre:
+ bsf len, len
+ shr len, 3
+
+.len_dist_huffman:
+ dec f_i
+ neg dist
+
+ ; get_dist_code(dist, &code2, &code_len2);
+%ifndef LONGER_HUFFTABLE
+ mov tmp3, dist ; since code2 and dist are rbx
+ get_dist_code tmp3, code2, code_len2, hufftables ;; clobbers dist, rcx
+%else
+ get_dist_code dist, code2, code_len2, hufftables
+%endif
+ ; get_len_code(len, &code, &code_len);
+ get_len_code len, code, rcx, hufftables ;; rcx is code_len
+
+ ; code2 <<= code_len
+ ; code2 |= code
+ ; code_len2 += code_len
+ SHLX code4, code2, rcx
+ or code4, code
+ add code_len2, rcx
+
+ ;; Setup for updateing hash
+ lea tmp3, [f_i + 2] ; tmp3 <= k
+ add f_i, len
+
+ MOVD hash %+ d, xhash
+ PEXTRD hash2 %+ d, xhash, 1
+ mov [stream + _internal_state_head + 2 * hash], tmp3 %+ w
+ add tmp3,1
+ mov [stream + _internal_state_head + 2 * hash2], tmp3 %+ w
+
+ MOVD hmask1 %+ d, xmask
+
+ cmp f_i, [rsp + f_end_i_mem_offset]
+ jge .len_dist_huffman_finish
+
+ MOVDQU xdata, [file_start + f_i]
+ mov curr_data, [file_start + f_i]
+ compute_hash hash, curr_data
+
+ write_bits m_bits, m_bit_count, code4, code_len2, m_out_buf
+
+ mov curr_data2, curr_data
+ shr curr_data2, 8
+ compute_hash hash2, curr_data2
+
+%ifdef NO_LIMIT_HASH_UPDATE
+.loop4:
+ add tmp3,1
+ cmp tmp3, f_i
+ jae .loop4_done
+ mov tmp6, [file_start + tmp3]
+ compute_hash tmp1, tmp6
+ and tmp1 %+ d, hmask1 %+ d
+ mov [stream + _internal_state_head + 2 * tmp1], tmp3 %+ w
+ jmp .loop4
+.loop4_done:
+%endif
+
+ ; hash = compute_hash(state->file_start + f_i) & hash_mask;
+ and hash %+ d, hmask1 %+ d
+ and hash2 %+ d, hmask1 %+ d
+
+ ; continue
+ jmp .loop2
+
+.len_dist_huffman_finish:
+ write_bits m_bits, m_bit_count, code4, code_len2, m_out_buf
+ jmp .input_end
+
+align 16
+.write_lit_bits:
+ PSRLDQ xdata, 1
+
+ add f_i, 1
+ cmp f_i, [rsp + f_end_i_mem_offset]
+ jge .write_lit_bits_finish
+
+ MOVQ curr_data, xdata
+ MOVDQU xdata, [file_start + f_i]
+
+ MOVD hash %+ d, xhash
+
+ write_bits m_bits, m_bit_count, code2, code_len2, m_out_buf
+
+ PEXTRD hash2 %+ d, xhash, 1
+ jmp .loop2
+
+.write_lit_bits_finish:
+ write_bits m_bits, m_bit_count, code2, code_len2, m_out_buf
+
+.input_end:
+ mov tmp1, ZSTATE_FLUSH_READ_BUFFER
+ mov tmp5, ZSTATE_BODY
+ cmp word [stream + _end_of_stream], 0
+ cmovne tmp5, tmp1
+ cmp word [stream + _flush], _NO_FLUSH
+ cmovne tmp5, tmp1
+ mov dword [stream + _internal_state_state], tmp5 %+ d
+
+.output_end:
+ ;; update input buffer
+ mov f_end_i, [rsp + f_end_i_mem_offset]
+ add f_end_i, [rsp + inbuf_slop_offset]
+ mov [stream + _total_in], f_i %+ d
+ add file_start, f_i
+ mov [stream + _next_in], file_start
+ sub f_end_i, f_i
+ mov [stream + _avail_in], f_end_i %+ d
+
+ ;; update output buffer
+ mov [stream + _next_out], m_out_buf
+ sub m_out_buf, [stream + _internal_state_bitbuf_m_out_start]
+ sub [stream + _avail_out], m_out_buf %+ d
+ add [stream + _total_out], m_out_buf %+ d
+
+ mov [stream + _internal_state_bitbuf_m_bits], m_bits
+ mov [stream + _internal_state_bitbuf_m_bit_count], m_bit_count %+ d
+
+ mov rbx, [rsp + gpr_save_mem_offset + 0*8]
+ mov rsi, [rsp + gpr_save_mem_offset + 1*8]
+ mov rdi, [rsp + gpr_save_mem_offset + 2*8]
+ mov rbp, [rsp + gpr_save_mem_offset + 3*8]
+ mov r12, [rsp + gpr_save_mem_offset + 4*8]
+ mov r13, [rsp + gpr_save_mem_offset + 5*8]
+ mov r14, [rsp + gpr_save_mem_offset + 6*8]
+ mov r15, [rsp + gpr_save_mem_offset + 7*8]
+
+%ifndef ALIGN_STACK
+ add rsp, stack_size
+%else
+ mov rsp, rbp
+ pop rbp
+%endif
+ ret
+
+align 16
+.compare_loop:
+ MOVD xhash, tmp6 %+ d
+ PINSRD xhash, tmp2 %+ d, 1
+ PAND xhash, xhash, xmask
+ lea tmp2, [tmp1 + dist - 1]
+
+ mov len2, [rsp + f_end_i_mem_offset]
+ sub len2, f_i
+ add len2, [rsp + inbuf_slop_offset]
+ add len2, 1
+ mov tmp3, MAX_EMIT_SIZE
+ cmp len2, tmp3
+ cmovg len2, tmp3
+
+ mov len, 8
+ compare_large tmp1, tmp2, len, len2, tmp3, ytmp0, ytmp1
+
+ cmp len, 258
+ jle .len_dist_huffman
+ cmp len, LARGE_MATCH_MIN
+ jge .do_emit
+ mov len, 258
+ jmp .len_dist_huffman
+
+align 16
+.compare_loop2:
+ lea tmp2, [tmp1 + dist2]
+ add tmp1, 1
+
+ mov len, [rsp + f_end_i_mem_offset]
+ sub len, f_i
+ add len, [rsp + inbuf_slop_offset]
+ mov tmp3, MAX_EMIT_SIZE
+ cmp len, tmp3
+ cmovg len, tmp3
+
+ mov len2, 8
+ compare_large tmp1, tmp2, len2, len, tmp3, ytmp0, ytmp1
+
+ and curr_data, 0xff
+ get_lit_code curr_data, code3, code_len3, hufftables
+ cmp len2, 258
+ jle .len_dist_lit_huffman
+ cmp len2, LARGE_MATCH_MIN
+ jge .do_emit2
+ mov len2, 258
+ jmp .len_dist_lit_huffman
+
+align 16
+.do_emit2:
+ neg dist2
+
+ ; get_dist_code(dist2, &code2, &code_len2);
+ get_dist_code dist2, code2, code_len2, hufftables
+
+ ; get_len_code(len, &code, &code_len);
+ get_len_code 258, code, rcx, hufftables ;; rcx is code_len
+
+ ; code2 <<= code_len
+ ; code2 |= code
+ ; code_len2 += code_len
+ SHLX code4, code2, rcx
+ or code4, code
+ add code_len2, rcx
+ mov tmp5, rcx
+
+ mov rcx, code_len3
+ SHLX tmp8, code4, rcx
+ or code3, tmp8
+ add rcx, code_len2
+ mov code_len3, rcx
+
+ write_bits m_bits, m_bit_count, code3, code_len3, m_out_buf
+
+ lea tmp3, [f_i + 2] ; tmp3 <= k
+ MOVD tmp2 %+ d, xhash
+ mov [stream + _internal_state_head + 2 * tmp2], tmp3 %+ w
+ add tmp3,1
+ PEXTRD tmp2 %+ d, xhash, 1
+ mov [stream + _internal_state_head + 2 * tmp2], tmp3 %+ w
+
+ add f_i, 258
+ lea len, [len2 - 258]
+
+ jmp .emit_loop
+
+.do_emit:
+ dec f_i
+ neg dist
+
+ ; get_dist_code(dist, &code2, &code_len2);
+%ifndef LONGER_HUFFTABLE
+ mov tmp3, dist ; since code2 and dist are rbx
+ get_dist_code tmp3, code2, code_len2, hufftables ;; clobbers dist, rcx
+%else
+ get_dist_code dist, code2, code_len2, hufftables
+%endif
+ ; get_len_code(len, &code, &code_len);
+ get_len_code 258, code, rcx, hufftables ;; rcx is code_len
+
+ ; code2 <<= code_len
+ ; code2 |= code
+ ; code_len2 += code_len
+ SHLX code4, code2, rcx
+ or code4, code
+ add code_len2, rcx
+
+ lea tmp3, [f_i + 2] ; tmp3 <= k
+ MOVD tmp6 %+ d, xhash
+ PEXTRD tmp5 %+ d, xhash, 1
+ mov [stream + _internal_state_head + 2 * tmp6], tmp3 %+ w
+ add tmp3,1
+ mov [stream + _internal_state_head + 2 * tmp5], tmp3 %+ w
+ mov tmp5, rcx
+
+.emit:
+ add f_i, 258
+ sub len, 258
+ mov code3, code4
+
+ write_bits m_bits, m_bit_count, code3, code_len2, m_out_buf
+
+.emit_loop:
+ cmp m_out_buf, [stream + _internal_state_bitbuf_m_out_end]
+ ja .output_end
+ cmp len, LARGE_MATCH_MIN
+ jge .emit
+
+ mov len2, 258
+ cmp len, len2
+ cmovg len, len2
+
+ add f_i, len
+
+ sub code_len2, tmp5
+ get_len_code len, code, rcx, hufftables
+ SHLX code4, code2, rcx
+ or code4, code
+ add code_len2, rcx
+
+ write_bits m_bits, m_bit_count, code4, code_len2, m_out_buf
+
+ cmp f_i, [rsp + f_end_i_mem_offset]
+ jge .input_end
+
+ lea tmp7, [f_i - 4 * LARGE_MATCH_HASH_REP]
+ MOVD hmask1 %+ d, xmask
+%rep LARGE_MATCH_HASH_REP
+ mov curr_data %+ d, dword [file_start + tmp7]
+ mov curr_data2 %+ d, dword [file_start + tmp7 + 1]
+
+ compute_hash hash, curr_data
+ compute_hash hash2, curr_data2
+
+ and hash %+ d, hmask1 %+ d
+ and hash2 %+ d, hmask1 %+ d
+
+ mov [stream + _internal_state_head + 2 * hash], tmp7 %+ w
+ add tmp7, 1
+ mov [stream + _internal_state_head + 2 * hash2], tmp7 %+ w
+ add tmp7, 1
+
+ mov curr_data %+ d, dword [file_start + tmp7]
+ mov curr_data2 %+ d, dword [file_start + tmp7 + 1]
+
+ compute_hash hash, curr_data
+ compute_hash hash2, curr_data2
+
+ and hash %+ d, hmask1 %+ d
+ and hash2 %+ d, hmask1 %+ d
+
+ mov [stream + _internal_state_head + 2 * hash], tmp7 %+ w
+ add tmp7, 1
+ mov [stream + _internal_state_head + 2 * hash2], tmp7 %+ w
+%if (LARGE_MATCH_HASH_REP > 1)
+ add tmp7, 1
+%endif
+%endrep
+
+ MOVDQU xdata, [file_start + f_i]
+ mov curr_data, [file_start + f_i]
+ compute_hash hash, curr_data
+
+
+ mov curr_data2, curr_data
+ shr curr_data2, 8
+ compute_hash hash2, curr_data2
+
+ ; hash = compute_hash(state->file_start + f_i) & hash_mask;
+ and hash %+ d, hmask1 %+ d
+ and hash2 %+ d, hmask1 %+ d
+
+ ; continue
+ jmp .loop2
+
+.write_first_byte:
+ cmp m_out_buf, [stream + _internal_state_bitbuf_m_out_end]
+ ja .output_end
+
+ mov byte [stream + _internal_state_has_hist], IGZIP_HIST
+
+ mov [stream + _internal_state_head + 2 * hash], f_i %+ w
+
+ mov hash, hash2
+ shr tmp6, 16
+ compute_hash hash2, tmp6
+
+ MOVD xhash, hash %+ d
+ PINSRD xhash, hash2 %+ d, 1
+ PAND xhash, xhash, xmask
+
+ and curr_data, 0xff
+ get_lit_code curr_data, code2, code_len2, hufftables
+ jmp .write_lit_bits
+
+%ifdef USE_HSWNI
+%undef USE_HSWNI
+%endif
+
+;; Shift defines over in order to iterate over all versions
+%undef ARCH
+%xdefine ARCH ARCH1
+%undef ARCH1
+%xdefine ARCH1 ARCH2
+
+%ifdef COMPARE_TYPE_NOT_DEF
+%undef COMPARE_TYPE
+%xdefine COMPARE_TYPE COMPARE_TYPE1
+%undef COMPARE_TYPE1
+%xdefine COMPARE_TYPE1 COMPARE_TYPE2
+%endif
+%endrep
diff --git a/src/isa-l/igzip/igzip_build_hash_table_perf.c b/src/isa-l/igzip/igzip_build_hash_table_perf.c
new file mode 100644
index 000000000..1c80dc8b0
--- /dev/null
+++ b/src/isa-l/igzip/igzip_build_hash_table_perf.c
@@ -0,0 +1,38 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <string.h>
+#include <getopt.h>
+#include "igzip_lib.h"
+#include "test.h"
+
+#define DICT_LEN 32*1024
+
+extern void isal_deflate_hash(struct isal_zstream *stream, uint8_t * dict, int dict_len);
+
+void create_rand_data(uint8_t * data, uint32_t size)
+{
+ int i;
+ for (i = 0; i < size; i++) {
+ data[i] = rand() % 256;
+ }
+}
+
+int main(int argc, char *argv[])
+{
+ int time = BENCHMARK_TIME;
+ struct isal_zstream stream;
+ uint8_t dict[DICT_LEN];
+ uint32_t dict_len = DICT_LEN;
+
+ stream.level = 0;
+ create_rand_data(dict, dict_len);
+
+ struct perf start;
+ BENCHMARK(&start, time, isal_deflate_hash(&stream, dict, dict_len));
+
+ printf("igzip_build_hash_table_perf: in_size=%u ", dict_len);
+ perf_print(start, (long long)dict_len);
+
+ return 0;
+}
diff --git a/src/isa-l/igzip/igzip_checksums.h b/src/isa-l/igzip/igzip_checksums.h
new file mode 100644
index 000000000..e09a1f161
--- /dev/null
+++ b/src/isa-l/igzip/igzip_checksums.h
@@ -0,0 +1,12 @@
+#ifndef IGZIP_CHECKSUMS_H
+#define IGZIP_CHECKSUMS_H
+
+#include <stdint.h>
+
+#define MAX_ADLER_BUF (1 << 28)
+#define ADLER_MOD 65521
+
+uint32_t isal_adler32(uint32_t init_crc, const unsigned char *buf, uint64_t len);
+uint32_t isal_adler32_bam1(uint32_t init_crc, const unsigned char *buf, uint64_t len);
+
+#endif
diff --git a/src/isa-l/igzip/igzip_compare_types.asm b/src/isa-l/igzip/igzip_compare_types.asm
new file mode 100644
index 000000000..c5ab3169f
--- /dev/null
+++ b/src/isa-l/igzip/igzip_compare_types.asm
@@ -0,0 +1,452 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright(c) 2011-2016 Intel Corporation All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in
+; the documentation and/or other materials provided with the
+; distribution.
+; * Neither the name of Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived
+; from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+%include "options.asm"
+%include "stdmac.asm"
+
+%ifndef UTILS_ASM
+%define UTILS_ASM
+; compare macro
+
+;; sttni2 is faster, but it can't be debugged
+;; so following code is based on "mine5"
+
+;; compares 8 bytes at a time, using xor
+;; assumes the input buffer has size at least 8
+;; compare_r src1, src2, result, result_max, tmp
+%macro compare_r 5
+%define %%src1 %1
+%define %%src2 %2
+%define %%result %3
+%define %%result_max %4
+%define %%tmp %5
+%define %%tmp16 %5w ; tmp as a 16-bit register
+
+ sub %%result_max, 16
+ cmp %%result, %%result_max
+ jg %%_by_8
+
+%%loop1:
+ mov %%tmp, [%%src1 + %%result]
+ xor %%tmp, [%%src2 + %%result]
+ jnz %%miscompare_reg
+ add %%result, 8
+
+ mov %%tmp, [%%src1 + %%result]
+ xor %%tmp, [%%src2 + %%result]
+ jnz %%miscompare_reg
+ add %%result, 8
+ cmp %%result, %%result_max
+ jle %%loop1
+
+%%_by_8:
+ add %%result_max, 8
+ cmp %%result, %%result_max
+ jg %%_cmp_last
+
+ ; compare last two bytes
+ mov %%tmp, [%%src1 + %%result]
+ xor %%tmp, [%%src2 + %%result]
+ jnz %%miscompare_reg
+ add %%result, 8
+
+%%_cmp_last:
+ add %%result_max, 8
+ cmp %%result, %%result_max
+ je %%end
+
+ lea %%result, [%%result_max - 8]
+
+ mov %%tmp, [%%src1 + %%result]
+ xor %%tmp, [%%src2 + %%result]
+ jnz %%miscompare_reg
+ add %%result, 8
+ jmp %%end
+
+%%miscompare_reg:
+ bsf %%tmp, %%tmp
+ shr %%tmp, 3
+ add %%result, %%tmp
+%%end:
+%endm
+
+;; compares 16 bytes at a time, using pcmpeqb/pmovmskb
+;; assumes the input buffer has size at least 8
+;; compare_x src1, src2, result, result_max, tmp, xtmp1, xtmp2
+%macro compare_x 7
+%define %%src1 %1
+%define %%src2 %2
+%define %%result %3 ; Accumulator for match_length
+%define %%result_max %4
+%define %%tmp %5
+%define %%tmp16 %5w ; tmp as a 16-bit register
+%define %%tmp32 %5d ; tmp as a 32-bit register
+%define %%xtmp %6
+%define %%xtmp2 %7
+
+ sub %%result_max, 32
+ cmp %%result, %%result_max
+ jg %%_by_16
+
+%%loop1:
+ MOVDQU %%xtmp, [%%src1 + %%result]
+ MOVDQU %%xtmp2, [%%src2 + %%result]
+ PCMPEQB %%xtmp, %%xtmp, %%xtmp2
+ PMOVMSKB %%tmp32, %%xtmp
+ xor %%tmp, 0xFFFF
+ jnz %%miscompare_vect
+ add %%result, 16
+
+ MOVDQU %%xtmp, [%%src1 + %%result]
+ MOVDQU %%xtmp2, [%%src2 + %%result]
+ PCMPEQB %%xtmp, %%xtmp, %%xtmp2
+ PMOVMSKB %%tmp32, %%xtmp
+ xor %%tmp, 0xFFFF
+ jnz %%miscompare_vect
+ add %%result, 16
+
+ cmp %%result, %%result_max
+ jle %%loop1
+
+%%_by_16:
+ add %%result_max, 16
+ cmp %%result, %%result_max
+ jg %%_by_8
+
+ MOVDQU %%xtmp, [%%src1 + %%result]
+ MOVDQU %%xtmp2, [%%src2 + %%result]
+ PCMPEQB %%xtmp, %%xtmp, %%xtmp2
+ PMOVMSKB %%tmp32, %%xtmp
+ xor %%tmp, 0xFFFF
+ jnz %%miscompare_vect
+ add %%result, 16
+
+%%_by_8:
+ add %%result_max, 8
+ cmp %%result, %%result_max
+ jg %%_cmp_last
+
+ ; compare last two bytes
+ mov %%tmp, [%%src1 + %%result]
+ xor %%tmp, [%%src2 + %%result]
+ jnz %%miscompare_reg
+ add %%result, 8
+
+%%_cmp_last:
+ add %%result_max, 8
+ cmp %%result, %%result_max
+ je %%end
+
+ lea %%result, [%%result_max - 8]
+
+ mov %%tmp, [%%src1 + %%result]
+ xor %%tmp, [%%src2 + %%result]
+ jnz %%miscompare_reg
+ add %%result, 8
+ jmp %%end
+
+%%miscompare_reg:
+ bsf %%tmp, %%tmp
+ shr %%tmp, 3
+ add %%result, %%tmp
+ jmp %%end
+
+%%miscompare_vect:
+ bsf %%tmp, %%tmp
+ add %%result, %%tmp
+%%end:
+%endm
+
+;; compares 32 bytes at a time, using pcmpeqb/pmovmskb
+;; assumes the input buffer has size at least 8
+;; compare_y src1, src2, result, result_max, tmp, xtmp1, xtmp2
+%macro compare_y 7
+%define %%src1 %1
+%define %%src2 %2
+%define %%result %3 ; Accumulator for match_length
+%define %%result_max %4
+%define %%tmp %5
+%define %%tmp16 %5w ; tmp as a 16-bit register
+%define %%tmp32 %5d ; tmp as a 32-bit register
+%define %%ytmp %6
+%define %%ytmp2 %7
+
+ sub %%result_max, 64
+ cmp %%result, %%result_max
+ jg %%_by_32
+
+%%loop1:
+ vmovdqu %%ytmp, [%%src1 + %%result]
+ vmovdqu %%ytmp2, [%%src2 + %%result]
+ vpcmpeqb %%ytmp, %%ytmp, %%ytmp2
+ vpmovmskb %%tmp, %%ytmp
+ xor %%tmp32, 0xFFFFFFFF
+ jnz %%miscompare_vect
+ add %%result, 32
+
+ vmovdqu %%ytmp, [%%src1 + %%result]
+ vmovdqu %%ytmp2, [%%src2 + %%result]
+ vpcmpeqb %%ytmp, %%ytmp, %%ytmp2
+ vpmovmskb %%tmp, %%ytmp
+ xor %%tmp32, 0xFFFFFFFF
+ jnz %%miscompare_vect
+ add %%result, 32
+
+ cmp %%result, %%result_max
+ jle %%loop1
+
+%%_by_32:
+ add %%result_max, 32
+ cmp %%result, %%result_max
+ jg %%_by_16
+
+ vmovdqu %%ytmp, [%%src1 + %%result]
+ vmovdqu %%ytmp2, [%%src2 + %%result]
+ vpcmpeqb %%ytmp, %%ytmp, %%ytmp2
+ vpmovmskb %%tmp, %%ytmp
+ xor %%tmp32, 0xFFFFFFFF
+ jnz %%miscompare_vect
+ add %%result, 32
+
+%%_by_16:
+ add %%result_max, 16
+ cmp %%result, %%result_max
+ jg %%_by_8
+
+ vmovdqu %%ytmp %+ x, [%%src1 + %%result]
+ vmovdqu %%ytmp2 %+ x, [%%src2 + %%result]
+ vpcmpeqb %%ytmp %+ x, %%ytmp %+ x, %%ytmp2 %+ x
+ vpmovmskb %%tmp, %%ytmp %+ x
+ xor %%tmp32, 0xFFFF
+ jnz %%miscompare_vect
+ add %%result, 16
+
+%%_by_8:
+ add %%result_max, 8
+ cmp %%result, %%result_max
+ jg %%_cmp_last
+
+ mov %%tmp, [%%src1 + %%result]
+ xor %%tmp, [%%src2 + %%result]
+ jnz %%miscompare_reg
+ add %%result, 8
+
+%%_cmp_last:
+ add %%result_max, 8
+ cmp %%result, %%result_max
+ je %%end
+
+ lea %%result, [%%result_max - 8]
+
+ ; compare last two bytes
+ mov %%tmp, [%%src1 + %%result]
+ xor %%tmp, [%%src2 + %%result]
+ jnz %%miscompare_reg
+ add %%result, 8
+ jmp %%end
+
+%%miscompare_reg:
+ bsf %%tmp, %%tmp
+ shr %%tmp, 3
+ add %%result, %%tmp
+ jmp %%end
+
+%%miscompare_vect:
+ tzcnt %%tmp, %%tmp
+ add %%result, %%tmp
+%%end:
+%endm
+
+;; compares 64 bytes at a time
+;; compare_z src1, src2, result, result_max, tmp, ktmp, ztmp1, ztmp2
+;; Clobbers result_max
+%macro compare_z 8
+%define %%src1 %1
+%define %%src2 %2
+%define %%result %3 ; Accumulator for match_length
+%define %%result_max %4
+%define %%tmp %5 ; tmp as a 16-bit register
+%define %%ktmp %6
+%define %%ztmp %7
+%define %%ztmp2 %8
+
+ sub %%result_max, 128
+ cmp %%result, %%result_max
+ jg %%_by_64
+
+%%loop1:
+ vmovdqu8 %%ztmp, [%%src1 + %%result]
+ vmovdqu8 %%ztmp2, [%%src2 + %%result]
+ vpcmpb %%ktmp, %%ztmp, %%ztmp2, NEQ
+ ktestq %%ktmp, %%ktmp
+ jnz %%miscompare
+ add %%result, 64
+
+ vmovdqu8 %%ztmp, [%%src1 + %%result]
+ vmovdqu8 %%ztmp2, [%%src2 + %%result]
+ vpcmpb %%ktmp, %%ztmp, %%ztmp2, NEQ
+ ktestq %%ktmp, %%ktmp
+ jnz %%miscompare
+ add %%result, 64
+
+ cmp %%result, %%result_max
+ jle %%loop1
+
+%%_by_64:
+ add %%result_max, 64
+ cmp %%result, %%result_max
+ jg %%_less_than_64
+
+ vmovdqu8 %%ztmp, [%%src1 + %%result]
+ vmovdqu8 %%ztmp2, [%%src2 + %%result]
+ vpcmpb %%ktmp, %%ztmp, %%ztmp2, NEQ
+ ktestq %%ktmp, %%ktmp
+ jnz %%miscompare
+ add %%result, 64
+
+%%_less_than_64:
+ add %%result_max, 64
+ sub %%result_max, %%result
+ jle %%end
+
+ mov %%tmp, -1
+ bzhi %%tmp, %%tmp, %%result_max
+ kmovq %%ktmp, %%tmp
+
+ vmovdqu8 %%ztmp {%%ktmp}{z}, [%%src1 + %%result]
+ vmovdqu8 %%ztmp2 {%%ktmp}{z}, [%%src2 + %%result]
+ vpcmpb %%ktmp, %%ztmp, %%ztmp2, NEQ
+ ktestq %%ktmp, %%ktmp
+ jnz %%miscompare
+ add %%result, %%result_max
+
+ jmp %%end
+%%miscompare:
+ kmovq %%tmp, %%ktmp
+ tzcnt %%tmp, %%tmp
+ add %%result, %%tmp
+%%end:
+%endm
+
+%macro compare250 7
+%define %%src1 %1
+%define %%src2 %2
+%define %%result %3
+%define %%result_max %4
+%define %%tmp %5
+%define %%xtmp0 %6x
+%define %%xtmp1 %7x
+%define %%ytmp0 %6
+%define %%ytmp1 %7
+
+ mov %%tmp, 250
+ cmp %%result_max, 250
+ cmovg %%result_max, %%tmp
+
+%if (COMPARE_TYPE == 1)
+ compare_r %%src1, %%src2, %%result, %%result_max, %%tmp
+%elif (COMPARE_TYPE == 2)
+ compare_x %%src1, %%src2, %%result, %%result_max, %%tmp, %%xtmp0, %%xtmp1
+%elif (COMPARE_TYPE == 3)
+ compare_y %%src1, %%src2, %%result, %%result_max, %%tmp, %%ytmp0, %%ytmp1
+%else
+%error Unknown Compare type COMPARE_TYPE
+ % error
+%endif
+%endmacro
+
+; Assumes the buffer has at least 8 bytes
+; Accumulates match length onto result
+%macro compare_large 7
+%define %%src1 %1
+%define %%src2 %2
+%define %%result %3
+%define %%result_max %4
+%define %%tmp %5
+%define %%xtmp0 %6x
+%define %%xtmp1 %7x
+%define %%ytmp0 %6
+%define %%ytmp1 %7
+
+%if (COMPARE_TYPE == 1)
+ compare_r %%src1, %%src2, %%result, %%result_max, %%tmp
+%elif (COMPARE_TYPE == 2)
+ compare_x %%src1, %%src2, %%result, %%result_max, %%tmp, %%xtmp0, %%xtmp1
+%elif (COMPARE_TYPE == 3)
+ compare_y %%src1, %%src2, %%result, %%result_max, %%tmp, %%ytmp0, %%ytmp1
+%else
+%error Unknown Compare type COMPARE_TYPE
+ % error
+%endif
+%endmacro
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; compare size, src1, src2, result, tmp
+%macro compare 5
+%define %%size %1
+%define %%src1 %2
+%define %%src2 %3
+%define %%result %4
+%define %%tmp %5
+%define %%tmp8 %5b ; tmp as a 8-bit register
+
+ xor %%result, %%result
+ sub %%size, 7
+ jle %%lab2
+%%loop1:
+ mov %%tmp, [%%src1 + %%result]
+ xor %%tmp, [%%src2 + %%result]
+ jnz %%miscompare
+ add %%result, 8
+ sub %%size, 8
+ jg %%loop1
+%%lab2:
+ ;; if we fall through from above, we have found no mismatches,
+ ;; %%size+7 is the number of bytes left to look at, and %%result is the
+ ;; number of bytes that have matched
+ add %%size, 7
+ jle %%end
+%%loop3:
+ mov %%tmp8, [%%src1 + %%result]
+ cmp %%tmp8, [%%src2 + %%result]
+ jne %%end
+ inc %%result
+ dec %%size
+ jg %%loop3
+ jmp %%end
+%%miscompare:
+ bsf %%tmp, %%tmp
+ shr %%tmp, 3
+ add %%result, %%tmp
+%%end:
+%endm
+
+%endif ;UTILS_ASM
diff --git a/src/isa-l/igzip/igzip_decode_block_stateless.asm b/src/isa-l/igzip/igzip_decode_block_stateless.asm
new file mode 100644
index 000000000..22f3bf229
--- /dev/null
+++ b/src/isa-l/igzip/igzip_decode_block_stateless.asm
@@ -0,0 +1,800 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright(c) 2011-2018 Intel Corporation All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in
+; the documentation and/or other materials provided with the
+; distribution.
+; * Neither the name of Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived
+; from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+default rel
+
+%include "reg_sizes.asm"
+
+%define DECOMP_OK 0
+%define END_INPUT 1
+%define OUT_OVERFLOW 2
+%define INVALID_BLOCK -1
+%define INVALID_SYMBOL -2
+%define INVALID_LOOKBACK -3
+
+%define ISAL_DECODE_LONG_BITS 12
+%define ISAL_DECODE_SHORT_BITS 10
+
+%define COPY_SIZE 16
+%define COPY_LEN_MAX 258
+
+%define IN_BUFFER_SLOP 8
+%define OUT_BUFFER_SLOP COPY_SIZE + COPY_LEN_MAX
+
+%include "inflate_data_structs.asm"
+%include "stdmac.asm"
+
+extern rfc1951_lookup_table
+
+
+
+%define LARGE_SHORT_SYM_LEN 25
+%define LARGE_SHORT_SYM_MASK ((1 << LARGE_SHORT_SYM_LEN) - 1)
+%define LARGE_LONG_SYM_LEN 10
+%define LARGE_LONG_SYM_MASK ((1 << LARGE_LONG_SYM_LEN) - 1)
+%define LARGE_SHORT_CODE_LEN_OFFSET 28
+%define LARGE_LONG_CODE_LEN_OFFSET 10
+%define LARGE_FLAG_BIT_OFFSET 25
+%define LARGE_FLAG_BIT (1 << LARGE_FLAG_BIT_OFFSET)
+%define LARGE_SYM_COUNT_OFFSET 26
+%define LARGE_SYM_COUNT_LEN 2
+%define LARGE_SYM_COUNT_MASK ((1 << LARGE_SYM_COUNT_LEN) - 1)
+%define LARGE_SHORT_MAX_LEN_OFFSET 26
+
+%define SMALL_SHORT_SYM_LEN 9
+%define SMALL_SHORT_SYM_MASK ((1 << SMALL_SHORT_SYM_LEN) - 1)
+%define SMALL_LONG_SYM_LEN 9
+%define SMALL_LONG_SYM_MASK ((1 << SMALL_LONG_SYM_LEN) - 1)
+%define SMALL_SHORT_CODE_LEN_OFFSET 11
+%define SMALL_LONG_CODE_LEN_OFFSET 10
+%define SMALL_FLAG_BIT_OFFSET 10
+%define SMALL_FLAG_BIT (1 << SMALL_FLAG_BIT_OFFSET)
+
+%define DIST_SYM_OFFSET 0
+%define DIST_SYM_LEN 5
+%define DIST_SYM_MASK ((1 << DIST_SYM_LEN) - 1)
+%define DIST_SYM_EXTRA_OFFSET 5
+%define DIST_SYM_EXTRA_LEN 4
+%define DIST_SYM_EXTRA_MASK ((1 << DIST_SYM_EXTRA_LEN) - 1)
+
+;; rax
+%define tmp3 rax
+%define read_in_2 rax
+%define look_back_dist rax
+
+;; rcx
+;; rdx arg3
+%define next_sym2 rdx
+%define copy_start rdx
+%define tmp4 rdx
+
+;; rdi arg1
+%define tmp1 rdi
+%define look_back_dist2 rdi
+%define next_bits2 rdi
+%define next_sym3 rdi
+
+;; rsi arg2
+%define tmp2 rsi
+%define next_sym_num rsi
+%define next_bits rsi
+
+;; rbx ; Saved
+%define next_in rbx
+
+;; rbp ; Saved
+%define end_in rbp
+
+;; r8
+%define repeat_length r8
+
+;; r9
+%define read_in r9
+
+;; r10
+%define read_in_length r10
+
+;; r11
+%define state r11
+
+;; r12 ; Saved
+%define next_out r12
+
+;; r13 ; Saved
+%define end_out r13
+
+;; r14 ; Saved
+%define next_sym r14
+
+;; r15 ; Saved
+%define rfc_lookup r15
+
+start_out_mem_offset equ 0
+read_in_mem_offset equ 8
+read_in_length_mem_offset equ 16
+next_out_mem_offset equ 24
+gpr_save_mem_offset equ 32
+stack_size equ 4 * 8 + 8 * 8
+
+%define _dist_extra_bit_count 264
+%define _dist_start _dist_extra_bit_count + 1*32
+%define _len_extra_bit_count _dist_start + 4*32
+%define _len_start _len_extra_bit_count + 1*32
+
+%ifidn __OUTPUT_FORMAT__, elf64
+%define arg0 rdi
+%define arg1 rsi
+
+%macro FUNC_SAVE 0
+%ifdef ALIGN_STACK
+ push rbp
+ mov rbp, rsp
+ sub rsp, stack_size
+ and rsp, ~15
+%else
+ sub rsp, stack_size
+%endif
+
+ mov [rsp + gpr_save_mem_offset + 0*8], rbx
+ mov [rsp + gpr_save_mem_offset + 1*8], rbp
+ mov [rsp + gpr_save_mem_offset + 2*8], r12
+ mov [rsp + gpr_save_mem_offset + 3*8], r13
+ mov [rsp + gpr_save_mem_offset + 4*8], r14
+ mov [rsp + gpr_save_mem_offset + 5*8], r15
+%endm
+
+%macro FUNC_RESTORE 0
+ mov rbx, [rsp + gpr_save_mem_offset + 0*8]
+ mov rbp, [rsp + gpr_save_mem_offset + 1*8]
+ mov r12, [rsp + gpr_save_mem_offset + 2*8]
+ mov r13, [rsp + gpr_save_mem_offset + 3*8]
+ mov r14, [rsp + gpr_save_mem_offset + 4*8]
+ mov r15, [rsp + gpr_save_mem_offset + 5*8]
+
+%ifndef ALIGN_STACK
+ add rsp, stack_size
+%else
+ mov rsp, rbp
+ pop rbp
+%endif
+%endm
+%endif
+
+%ifidn __OUTPUT_FORMAT__, win64
+%define arg0 rcx
+%define arg1 rdx
+
+%macro FUNC_SAVE 0
+%ifdef ALIGN_STACK
+ push rbp
+ mov rbp, rsp
+ sub rsp, stack_size
+ and rsp, ~15
+%else
+ sub rsp, stack_size
+%endif
+
+ mov [rsp + gpr_save_mem_offset + 0*8], rbx
+ mov [rsp + gpr_save_mem_offset + 1*8], rsi
+ mov [rsp + gpr_save_mem_offset + 2*8], rdi
+ mov [rsp + gpr_save_mem_offset + 3*8], rbp
+ mov [rsp + gpr_save_mem_offset + 4*8], r12
+ mov [rsp + gpr_save_mem_offset + 5*8], r13
+ mov [rsp + gpr_save_mem_offset + 6*8], r14
+ mov [rsp + gpr_save_mem_offset + 7*8], r15
+%endm
+
+%macro FUNC_RESTORE 0
+ mov rbx, [rsp + gpr_save_mem_offset + 0*8]
+ mov rsi, [rsp + gpr_save_mem_offset + 1*8]
+ mov rdi, [rsp + gpr_save_mem_offset + 2*8]
+ mov rbp, [rsp + gpr_save_mem_offset + 3*8]
+ mov r12, [rsp + gpr_save_mem_offset + 4*8]
+ mov r13, [rsp + gpr_save_mem_offset + 5*8]
+ mov r14, [rsp + gpr_save_mem_offset + 6*8]
+ mov r15, [rsp + gpr_save_mem_offset + 7*8]
+
+%ifndef ALIGN_STACK
+ add rsp, stack_size
+%else
+ mov rsp, rbp
+ pop rbp
+%endif
+%endm
+%endif
+
+;; Load read_in and updated in_buffer accordingly
+;; when there are at least 8 bytes in the in buffer
+;; Clobbers rcx, unless rcx is %%read_in_length
+%macro inflate_in_load 6
+%define %%next_in %1
+%define %%end_in %2
+%define %%read_in %3
+%define %%read_in_length %4
+%define %%tmp1 %5 ; Tmp registers
+%define %%tmp2 %6
+
+ SHLX %%tmp1, [%%next_in], %%read_in_length
+ or %%read_in, %%tmp1
+
+ mov %%tmp1, 64
+ sub %%tmp1, %%read_in_length
+ shr %%tmp1, 3
+
+ add %%next_in, %%tmp1
+ lea %%read_in_length, [%%read_in_length + 8 * %%tmp1]
+%%end:
+%endm
+
+;; Load read_in and updated in_buffer accordingly
+;; Clobbers rcx, unless rcx is %%read_in_length
+%macro inflate_in_small_load 6
+%define %%next_in %1
+%define %%end_in %2
+%define %%read_in %3
+%define %%read_in_length %4
+%define %%avail_in %5 ; Tmp registers
+%define %%tmp1 %5
+%define %%loop_count %6
+
+ mov %%avail_in, %%end_in
+ sub %%avail_in, %%next_in
+
+%ifnidn %%read_in_length, rcx
+ mov rcx, %%read_in_length
+%endif
+
+ mov %%loop_count, 64
+ sub %%loop_count, %%read_in_length
+ shr %%loop_count, 3
+
+ cmp %%loop_count, %%avail_in
+ cmovg %%loop_count, %%avail_in
+ cmp %%loop_count, 0
+ je %%end
+
+%%load_byte:
+ xor %%tmp1, %%tmp1
+ mov %%tmp1 %+ b, byte [%%next_in]
+ SHLX %%tmp1, %%tmp1, rcx
+ or %%read_in, %%tmp1
+ add rcx, 8
+ add %%next_in, 1
+ sub %%loop_count, 1
+ jg %%load_byte
+%ifnidn %%read_in_length, rcx
+ mov %%read_in_length, rcx
+%endif
+%%end:
+%endm
+
+;; Clears all bits at index %%bit_count and above in %%next_bits
+;; May clobber rcx and %%bit_count
+%macro CLEAR_HIGH_BITS 3
+%define %%next_bits %1
+%define %%bit_count %2
+%define %%lookup_size %3
+
+ sub %%bit_count, 0x40 + %%lookup_size
+;; Extract the 15-DECODE_LOOKUP_SIZE bits beyond the first DECODE_LOOKUP_SIZE bits.
+%ifdef USE_HSWNI
+ and %%bit_count, 0x1F
+ bzhi %%next_bits, %%next_bits, %%bit_count
+%else
+%ifnidn %%bit_count, rcx
+ mov rcx, %%bit_count
+%endif
+ neg rcx
+ shl %%next_bits, cl
+ shr %%next_bits, cl
+%endif
+
+%endm
+
+;; Decode next symbol
+;; Clobber rcx
+%macro decode_next_lit_len 8
+%define %%state %1 ; State structure associated with compressed stream
+%define %%lookup_size %2 ; Number of bits used for small lookup
+%define %%state_offset %3 ; Type of huff code, should be either LIT or DIST
+%define %%read_in %4 ; Bits read in from compressed stream
+%define %%read_in_length %5 ; Number of valid bits in read_in
+%define %%next_sym %6 ; Returned symbols
+%define %%next_sym_num %7 ; Returned symbols count
+%define %%next_bits %8
+
+ mov %%next_sym_num, %%next_sym
+ mov rcx, %%next_sym
+ shr rcx, LARGE_SHORT_CODE_LEN_OFFSET
+ jz invalid_symbol
+
+ and %%next_sym_num, LARGE_SYM_COUNT_MASK << LARGE_SYM_COUNT_OFFSET
+ shr %%next_sym_num, LARGE_SYM_COUNT_OFFSET
+
+ ;; Check if symbol or hint was looked up
+ and %%next_sym, LARGE_FLAG_BIT | LARGE_SHORT_SYM_MASK
+ test %%next_sym, LARGE_FLAG_BIT
+ jz %%end
+
+ shl rcx, LARGE_SYM_COUNT_LEN
+ or rcx, %%next_sym_num
+
+ ;; Save length associated with symbol
+ mov %%next_bits, %%read_in
+ shr %%next_bits, %%lookup_size
+
+ ;; Extract the bits beyond the first %%lookup_size bits.
+ CLEAR_HIGH_BITS %%next_bits, rcx, %%lookup_size
+
+ and %%next_sym, LARGE_SHORT_SYM_MASK
+ add %%next_sym, %%next_bits
+
+ ;; Lookup actual next symbol
+ movzx %%next_sym, word [%%state + LARGE_LONG_CODE_SIZE * %%next_sym + %%state_offset + LARGE_SHORT_CODE_SIZE * (1 << %%lookup_size)]
+ mov %%next_sym_num, 1
+
+ ;; Save length associated with symbol
+ mov rcx, %%next_sym
+ shr rcx, LARGE_LONG_CODE_LEN_OFFSET
+ jz invalid_symbol
+ and %%next_sym, LARGE_LONG_SYM_MASK
+
+%%end:
+;; Updated read_in to reflect the bits which were decoded
+ SHRX %%read_in, %%read_in, rcx
+ sub %%read_in_length, rcx
+%endm
+
+;; Decode next symbol
+;; Clobber rcx
+%macro decode_next_lit_len_with_load 8
+%define %%state %1 ; State structure associated with compressed stream
+%define %%lookup_size %2 ; Number of bits used for small lookup
+%define %%state_offset %3
+%define %%read_in %4 ; Bits read in from compressed stream
+%define %%read_in_length %5 ; Number of valid bits in read_in
+%define %%next_sym %6 ; Returned symbols
+%define %%next_sym_num %7 ; Returned symbols count
+%define %%next_bits %8
+
+ ;; Lookup possible next symbol
+ mov %%next_bits, %%read_in
+ and %%next_bits, (1 << %%lookup_size) - 1
+ mov %%next_sym %+ d, dword [%%state + %%state_offset + LARGE_SHORT_CODE_SIZE * %%next_bits]
+
+ decode_next_lit_len %%state, %%lookup_size, %%state_offset, %%read_in, %%read_in_length, %%next_sym, %%next_sym_num, %%next_bits
+%endm
+
+;; Decode next symbol
+;; Clobber rcx
+%macro decode_next_dist 8
+%define %%state %1 ; State structure associated with compressed stream
+%define %%lookup_size %2 ; Number of bits used for small lookup
+%define %%state_offset %3 ; Type of huff code, should be either LIT or DIST
+%define %%read_in %4 ; Bits read in from compressed stream
+%define %%read_in_length %5 ; Number of valid bits in read_in
+%define %%next_sym %6 ; Returned symobl
+%define %%next_extra_bits %7
+%define %%next_bits %8
+
+ mov rcx, %%next_sym
+ shr rcx, SMALL_SHORT_CODE_LEN_OFFSET
+ jz invalid_dist_symbol_ %+ %%next_sym
+
+ ;; Check if symbol or hint was looked up
+ and %%next_sym, SMALL_FLAG_BIT | SMALL_SHORT_SYM_MASK
+ test %%next_sym, SMALL_FLAG_BIT
+ jz %%end
+
+ ;; Save length associated with symbol
+ mov %%next_bits, %%read_in
+ shr %%next_bits, %%lookup_size
+
+ ;; Extract the 15-DECODE_LOOKUP_SIZE bits beyond the first %%lookup_size bits.
+ lea %%next_sym, [%%state + SMALL_LONG_CODE_SIZE * %%next_sym]
+
+ CLEAR_HIGH_BITS %%next_bits, rcx, %%lookup_size
+
+ ;; Lookup actual next symbol
+ movzx %%next_sym, word [%%next_sym + %%state_offset + SMALL_LONG_CODE_SIZE * %%next_bits + SMALL_SHORT_CODE_SIZE * (1 << %%lookup_size) - SMALL_LONG_CODE_SIZE * SMALL_FLAG_BIT]
+
+ ;; Save length associated with symbol
+ mov rcx, %%next_sym
+ shr rcx, SMALL_LONG_CODE_LEN_OFFSET
+ jz invalid_dist_symbol_ %+ %%next_sym
+ and %%next_sym, SMALL_SHORT_SYM_MASK
+
+%%end:
+ ;; Updated read_in to reflect the bits which were decoded
+ SHRX %%read_in, %%read_in, rcx
+ sub %%read_in_length, rcx
+ mov rcx, %%next_sym
+ shr rcx, DIST_SYM_EXTRA_OFFSET
+ and %%next_sym, DIST_SYM_MASK
+%endm
+
+;; Decode next symbol
+;; Clobber rcx
+%macro decode_next_dist_with_load 8
+%define %%state %1 ; State structure associated with compressed stream
+%define %%lookup_size %2 ; Number of bits used for small lookup
+%define %%state_offset %3
+%define %%read_in %4 ; Bits read in from compressed stream
+%define %%read_in_length %5 ; Number of valid bits in read_in
+%define %%next_sym %6 ; Returned symobl
+%define %%next_extra_bits %7
+%define %%next_bits %8
+
+ ;; Lookup possible next symbol
+ mov %%next_bits, %%read_in
+ and %%next_bits, (1 << %%lookup_size) - 1
+ movzx %%next_sym, word [%%state + %%state_offset + SMALL_SHORT_CODE_SIZE * %%next_bits]
+
+ decode_next_dist %%state, %%lookup_size, %%state_offset, %%read_in, %%read_in_length, %%next_sym, %%next_extra_bits, %%next_bits
+%endm
+
+[bits 64]
+default rel
+section .text
+
+global decode_huffman_code_block_stateless_ %+ ARCH
+decode_huffman_code_block_stateless_ %+ ARCH %+ :
+ endbranch
+
+ FUNC_SAVE
+
+ mov state, arg0
+ mov [rsp + start_out_mem_offset], arg1
+ lea rfc_lookup, [rfc1951_lookup_table]
+
+ mov read_in,[state + _read_in]
+ mov read_in_length %+ d, dword [state + _read_in_length]
+ mov next_out, [state + _next_out]
+ mov end_out %+ d, dword [state + _avail_out]
+ add end_out, next_out
+ mov next_in, [state + _next_in]
+ mov end_in %+ d, dword [state + _avail_in]
+ add end_in, next_in
+
+ mov dword [state + _copy_overflow_len], 0
+ mov dword [state + _copy_overflow_dist], 0
+
+ sub end_out, OUT_BUFFER_SLOP
+ sub end_in, IN_BUFFER_SLOP
+
+ cmp next_in, end_in
+ jg end_loop_block_pre
+
+ cmp read_in_length, 64
+ je skip_load
+
+ inflate_in_load next_in, end_in, read_in, read_in_length, tmp1, tmp2
+
+skip_load:
+ mov tmp3, read_in
+ and tmp3, (1 << ISAL_DECODE_LONG_BITS) - 1
+ mov next_sym %+ d, dword [state + _lit_huff_code + LARGE_SHORT_CODE_SIZE * tmp3]
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Main Loop
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+loop_block:
+ ;; Check if near end of in buffer or out buffer
+ cmp next_in, end_in
+ jg end_loop_block_pre
+ cmp next_out, end_out
+ jg end_loop_block_pre
+
+ ;; Decode next symbol and reload the read_in buffer
+ decode_next_lit_len state, ISAL_DECODE_LONG_BITS, _lit_huff_code, read_in, read_in_length, next_sym, next_sym_num, tmp1
+
+ ;; Specutively write next_sym if it is a literal
+ mov [next_out], next_sym
+ add next_out, next_sym_num
+ lea next_sym2, [8 * next_sym_num - 8]
+ SHRX next_sym2, next_sym, next_sym2
+
+ ;; Find index to specutively preload next_sym from
+ mov tmp3, (1 << ISAL_DECODE_LONG_BITS) - 1
+ and tmp3, read_in
+
+ ;; Start reloading read_in
+ mov tmp1, [next_in]
+ SHLX tmp1, tmp1, read_in_length
+ or read_in, tmp1
+
+ ;; Specutively load data associated with length symbol
+ lea repeat_length, [next_sym2 - 254]
+
+ ;; Test for end of block symbol
+ cmp next_sym2, 256
+ je end_symbol_pre
+
+ ;; Specutively load next_sym for next loop if a literal was decoded
+ mov next_sym %+ d, dword [state + _lit_huff_code + LARGE_SHORT_CODE_SIZE * tmp3]
+
+ ;; Finish updating read_in_length for read_in
+ mov tmp1, 64
+ sub tmp1, read_in_length
+ shr tmp1, 3
+ add next_in, tmp1
+ lea read_in_length, [read_in_length + 8 * tmp1]
+
+ ;; Specultively load next dist code
+ mov next_bits2, (1 << ISAL_DECODE_SHORT_BITS) - 1
+ and next_bits2, read_in
+ movzx next_sym3, word [state + _dist_huff_code + SMALL_SHORT_CODE_SIZE * next_bits2]
+
+ ;; Check if next_sym2 is a literal, length, or end of block symbol
+ cmp next_sym2, 256
+ jl loop_block
+
+decode_len_dist:
+ ;; Determine next_out after the copy is finished
+ lea next_out, [next_out + repeat_length - 1]
+
+ ;; Decode distance code
+ decode_next_dist state, ISAL_DECODE_SHORT_BITS, _dist_huff_code, read_in, read_in_length, next_sym3, rcx, tmp2
+
+ mov look_back_dist2 %+ d, [rfc_lookup + _dist_start + 4 * next_sym3]
+
+ ; ;; Load distance code extra bits
+ mov next_bits, read_in
+
+ ;; Calculate the look back distance
+ BZHI next_bits, next_bits, rcx, tmp4
+ SHRX read_in, read_in, rcx
+
+ ;; Setup next_sym, read_in, and read_in_length for next loop
+ mov read_in_2, (1 << ISAL_DECODE_LONG_BITS) - 1
+ and read_in_2, read_in
+ mov next_sym %+ d, dword [state + _lit_huff_code + LARGE_SHORT_CODE_SIZE * read_in_2]
+ sub read_in_length, rcx
+
+ ;; Copy distance in len/dist pair
+ add look_back_dist2, next_bits
+
+ ;; Find beginning of copy
+ mov copy_start, next_out
+ sub copy_start, repeat_length
+ sub copy_start, look_back_dist2
+
+ ;; Check if a valid look back distances was decoded
+ cmp copy_start, [rsp + start_out_mem_offset]
+ jl invalid_look_back_distance
+ MOVDQU xmm1, [copy_start]
+
+ ;; Set tmp2 to be the minimum of COPY_SIZE and repeat_length
+ ;; This is to decrease use of small_byte_copy branch
+ mov tmp2, COPY_SIZE
+ cmp tmp2, repeat_length
+ cmovg tmp2, repeat_length
+
+ ;; Check for overlapping memory in the copy
+ cmp look_back_dist2, tmp2
+ jl small_byte_copy_pre
+
+large_byte_copy:
+ ;; Copy length distance pair when memory overlap is not an issue
+ MOVDQU [copy_start + look_back_dist2], xmm1
+
+ sub repeat_length, COPY_SIZE
+ jle loop_block
+
+ add copy_start, COPY_SIZE
+ MOVDQU xmm1, [copy_start]
+ jmp large_byte_copy
+
+small_byte_copy_pre:
+ ;; Copy length distance pair when source and destination overlap
+ add repeat_length, look_back_dist2
+small_byte_copy:
+ MOVDQU [copy_start + look_back_dist2], xmm1
+
+ shl look_back_dist2, 1
+ MOVDQU xmm1, [copy_start]
+ cmp look_back_dist2, COPY_SIZE
+ jl small_byte_copy
+
+ sub repeat_length, look_back_dist2
+ jge large_byte_copy
+ jmp loop_block
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Finish Main Loop
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+end_loop_block_pre:
+ ;; Fix up in buffer and out buffer to reflect the actual buffer end
+ add end_out, OUT_BUFFER_SLOP
+ add end_in, IN_BUFFER_SLOP
+
+end_loop_block:
+ ;; Load read in buffer and decode next lit/len symbol
+ inflate_in_small_load next_in, end_in, read_in, read_in_length, tmp1, tmp2
+ mov [rsp + read_in_mem_offset], read_in
+ mov [rsp + read_in_length_mem_offset], read_in_length
+ mov [rsp + next_out_mem_offset], next_out
+
+ decode_next_lit_len_with_load state, ISAL_DECODE_LONG_BITS, _lit_huff_code, read_in, read_in_length, next_sym, next_sym_num, tmp1
+
+ ;; Check that enough input was available to decode symbol
+ cmp read_in_length, 0
+ jl end_of_input
+
+multi_symbol_start:
+ cmp next_sym_num, 1
+ jg decode_literal
+
+ cmp next_sym, 256
+ jl decode_literal
+ je end_symbol
+
+decode_len_dist_2:
+ lea repeat_length, [next_sym - 254]
+ ;; Decode distance code
+ decode_next_dist_with_load state, ISAL_DECODE_SHORT_BITS, _dist_huff_code, read_in, read_in_length, next_sym, rcx, tmp1
+
+ ;; Load distance code extra bits
+ mov next_bits, read_in
+ mov look_back_dist %+ d, [rfc_lookup + _dist_start + 4 * next_sym]
+
+ ;; Calculate the look back distance and check for enough input
+ BZHI next_bits, next_bits, rcx, tmp1
+ SHRX read_in, read_in, rcx
+ add look_back_dist, next_bits
+ sub read_in_length, rcx
+ jl end_of_input
+
+ ;; Setup code for byte copy using rep movsb
+ mov rsi, next_out
+ mov rdi, rsi
+ mov rcx, repeat_length
+ sub rsi, look_back_dist
+
+ ;; Check if a valid look back distance was decoded
+ cmp rsi, [rsp + start_out_mem_offset]
+ jl invalid_look_back_distance
+
+ ;; Check for out buffer overflow
+ add repeat_length, next_out
+ cmp repeat_length, end_out
+ jg out_buffer_overflow_repeat
+
+ mov next_out, repeat_length
+
+ rep movsb
+ jmp end_loop_block
+
+decode_literal:
+ ;; Store literal decoded from the input stream
+ cmp next_out, end_out
+ jge out_buffer_overflow_lit
+ add next_out, 1
+ mov byte [next_out - 1], next_sym %+ b
+ sub next_sym_num, 1
+ jz end_loop_block
+ shr next_sym, 8
+ jmp multi_symbol_start
+
+;; Set exit codes
+end_of_input:
+ mov read_in, [rsp + read_in_mem_offset]
+ mov read_in_length, [rsp + read_in_length_mem_offset]
+ mov next_out, [rsp + next_out_mem_offset]
+ xor tmp1, tmp1
+ mov dword [state + _write_overflow_lits], tmp1 %+ d
+ mov dword [state + _write_overflow_len], tmp1 %+ d
+ mov rax, END_INPUT
+ jmp end
+
+out_buffer_overflow_repeat:
+ mov rcx, end_out
+ sub rcx, next_out
+ sub repeat_length, rcx
+ sub repeat_length, next_out
+ rep movsb
+
+ mov [state + _copy_overflow_len], repeat_length %+ d
+ mov [state + _copy_overflow_dist], look_back_dist %+ d
+
+ mov next_out, end_out
+
+ mov rax, OUT_OVERFLOW
+ jmp end
+
+out_buffer_overflow_lit:
+ mov dword [state + _write_overflow_lits], next_sym %+ d
+ mov dword [state + _write_overflow_len], next_sym_num %+ d
+ sub next_sym_num, 1
+ shl next_sym_num, 3
+ SHRX next_sym, next_sym, next_sym_num
+ mov rax, OUT_OVERFLOW
+ shr next_sym_num, 3
+ cmp next_sym, 256
+ jl end
+ mov dword [state + _write_overflow_len], next_sym_num %+ d
+ jg decode_len_dist_2
+ jmp end_state
+
+invalid_look_back_distance:
+ mov rax, INVALID_LOOKBACK
+ jmp end
+
+invalid_dist_symbol_ %+ next_sym:
+ cmp read_in_length, next_sym
+ jl end_of_input
+ jmp invalid_symbol
+invalid_dist_symbol_ %+ next_sym3:
+ cmp read_in_length, next_sym3
+ jl end_of_input
+invalid_symbol:
+ mov rax, INVALID_SYMBOL
+ jmp end
+
+end_symbol_pre:
+ ;; Fix up in buffer and out buffer to reflect the actual buffer
+ sub next_out, 1
+ add end_out, OUT_BUFFER_SLOP
+ add end_in, IN_BUFFER_SLOP
+end_symbol:
+ xor rax, rax
+end_state:
+ ;; Set flag identifying a new block is required
+ mov byte [state + _block_state], ISAL_BLOCK_NEW_HDR
+ cmp dword [state + _bfinal], 0
+ je end
+ mov byte [state + _block_state], ISAL_BLOCK_INPUT_DONE
+
+end:
+ ;; Save current buffer states
+ mov [state + _read_in], read_in
+ mov [state + _read_in_length], read_in_length %+ d
+
+ ;; Set avail_out
+ sub end_out, next_out
+ mov dword [state + _avail_out], end_out %+ d
+
+ ;; Set total_out
+ mov tmp1, next_out
+ sub tmp1, [state + _next_out]
+ add [state + _total_out], tmp1 %+ d
+
+ ;; Set next_out
+ mov [state + _next_out], next_out
+
+ ;; Set next_in
+ mov [state + _next_in], next_in
+
+ ;; Set avail_in
+ sub end_in, next_in
+ mov [state + _avail_in], end_in %+ d
+
+ FUNC_RESTORE
+
+ ret
diff --git a/src/isa-l/igzip/igzip_decode_block_stateless_01.asm b/src/isa-l/igzip/igzip_decode_block_stateless_01.asm
new file mode 100644
index 000000000..4aa39fe1c
--- /dev/null
+++ b/src/isa-l/igzip/igzip_decode_block_stateless_01.asm
@@ -0,0 +1,3 @@
+%define ARCH 01
+
+%include "igzip_decode_block_stateless.asm"
diff --git a/src/isa-l/igzip/igzip_decode_block_stateless_04.asm b/src/isa-l/igzip/igzip_decode_block_stateless_04.asm
new file mode 100644
index 000000000..769fca22d
--- /dev/null
+++ b/src/isa-l/igzip/igzip_decode_block_stateless_04.asm
@@ -0,0 +1,4 @@
+%define ARCH 04
+%define USE_HSWNI
+
+%include "igzip_decode_block_stateless.asm"
diff --git a/src/isa-l/igzip/igzip_deflate_hash.asm b/src/isa-l/igzip/igzip_deflate_hash.asm
new file mode 100644
index 000000000..32a148285
--- /dev/null
+++ b/src/isa-l/igzip/igzip_deflate_hash.asm
@@ -0,0 +1,170 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright(c) 2011-2018 Intel Corporation All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in
+; the documentation and/or other materials provided with the
+; distribution.
+; * Neither the name of Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived
+; from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+%include "options.asm"
+%include "lz0a_const.asm"
+%include "data_struct2.asm"
+%include "huffman.asm"
+%include "reg_sizes.asm"
+
+%define DICT_SLOP 8
+%define DICT_END_SLOP 4
+
+%ifidn __OUTPUT_FORMAT__, win64
+%define arg1 rcx
+%define arg2 rdx
+%define arg3 r8
+%define arg4 r9
+%define arg5 rdi
+%define swap1 rsi
+%define stack_size 3 * 8
+%define PS 8
+%define arg(x) [rsp + stack_size + PS*x]
+%else
+%define arg1 rdi
+%define arg2 rsi
+%define arg3 rdx
+%define arg4 rcx
+%define arg5 r8
+%define swap1 r9
+%endif
+
+%define hash_table arg1
+
+%define hash_mask arg2
+
+%define f_i_end arg3
+
+%define dict_offset arg4
+
+%define dict_len arg5
+%define f_i arg5
+
+%define f_i_tmp rax
+
+%define hash swap1
+
+%define hash2 r10
+
+%define hash3 r11
+
+%define hash4 r12
+
+
+%macro FUNC_SAVE 0
+%ifidn __OUTPUT_FORMAT__, win64
+ push rsi
+ push rdi
+ push r12
+ mov arg5 %+ d, arg(5)
+%else
+ push r12
+%endif
+%endm
+
+%macro FUNC_RESTORE 0
+%ifidn __OUTPUT_FORMAT__, win64
+ pop r12
+ pop rdi
+ pop rsi
+%else
+ pop r12
+%endif
+%endm
+
+[bits 64]
+default rel
+section .text
+
+global isal_deflate_hash_crc_01
+isal_deflate_hash_crc_01:
+ endbranch
+ FUNC_SAVE
+
+ neg f_i
+ add f_i, f_i_end
+
+ sub dict_offset, f_i
+
+ sub f_i_end, DICT_SLOP
+ cmp f_i, f_i_end
+ jg end_main
+
+main_loop:
+ lea f_i_tmp, [f_i + 2]
+
+ xor hash, hash
+ crc32 hash %+ d, dword [f_i + dict_offset]
+
+ xor hash2, hash2
+ crc32 hash2 %+ d, dword [f_i + dict_offset + 1]
+
+ xor hash3, hash3
+ crc32 hash3 %+ d, dword [f_i_tmp + dict_offset]
+
+ xor hash4, hash4
+ crc32 hash4 %+ d, dword [f_i_tmp + dict_offset + 1]
+
+ and hash, hash_mask
+ and hash2, hash_mask
+ and hash3, hash_mask
+ and hash4, hash_mask
+
+ mov [hash_table + 2 * hash], f_i %+ w
+ add f_i, 1
+
+ mov [hash_table + 2 * hash2], f_i %+ w
+ add f_i, 3
+
+ mov [hash_table + 2 * hash3], f_i_tmp %+ w
+ add f_i_tmp, 1
+
+ mov [hash_table + 2 * hash4], f_i_tmp %+ w
+
+ cmp f_i, f_i_end
+ jle main_loop
+
+end_main:
+ add f_i_end, DICT_SLOP - DICT_END_SLOP
+ cmp f_i, f_i_end
+ jg end
+
+end_loop:
+ xor hash, hash
+ crc32 hash %+ d, dword [f_i + dict_offset]
+
+ and hash, hash_mask
+ mov [hash_table + 2 * hash], f_i %+ w
+
+ add f_i, 1
+ cmp f_i, f_i_end
+ jle end_loop
+end:
+ FUNC_RESTORE
+ ret
diff --git a/src/isa-l/igzip/igzip_example.c b/src/isa-l/igzip/igzip_example.c
new file mode 100644
index 000000000..5930c717f
--- /dev/null
+++ b/src/isa-l/igzip/igzip_example.c
@@ -0,0 +1,101 @@
+/**********************************************************************
+ Copyright(c) 2011-2016 Intel Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+#include <stdio.h>
+#include <stdlib.h>
+#include <assert.h>
+#include "igzip_lib.h"
+
+#define BUF_SIZE 8192
+#ifndef LEVEL
+# define LEVEL 0
+#else
+# define LEVEL 1
+#endif
+
+struct isal_zstream stream;
+
+int main(int argc, char *argv[])
+{
+ uint8_t inbuf[BUF_SIZE], outbuf[BUF_SIZE];
+ FILE *in, *out;
+
+ if (argc != 3) {
+ fprintf(stderr, "Usage: igzip_example infile outfile\n");
+ exit(0);
+ }
+ in = fopen(argv[1], "rb");
+ if (!in) {
+ fprintf(stderr, "Can't open %s for reading\n", argv[1]);
+ exit(0);
+ }
+ out = fopen(argv[2], "wb");
+ if (!out) {
+ fprintf(stderr, "Can't open %s for writing\n", argv[2]);
+ exit(0);
+ }
+
+ printf("igzip_example\nWindow Size: %d K\n", IGZIP_HIST_SIZE / 1024);
+ fflush(0);
+
+ isal_deflate_init(&stream);
+ stream.end_of_stream = 0;
+ stream.flush = NO_FLUSH;
+
+ if (LEVEL == 1) {
+ stream.level = 1;
+ stream.level_buf = malloc(ISAL_DEF_LVL1_DEFAULT);
+ stream.level_buf_size = ISAL_DEF_LVL1_DEFAULT;
+ if (stream.level_buf == 0) {
+ printf("Failed to allocate level compression buffer\n");
+ exit(0);
+ }
+ }
+
+ do {
+ stream.avail_in = (uint32_t) fread(inbuf, 1, BUF_SIZE, in);
+ stream.end_of_stream = feof(in) ? 1 : 0;
+ stream.next_in = inbuf;
+ do {
+ stream.avail_out = BUF_SIZE;
+ stream.next_out = outbuf;
+
+ isal_deflate(&stream);
+
+ fwrite(outbuf, 1, BUF_SIZE - stream.avail_out, out);
+ } while (stream.avail_out == 0);
+
+ assert(stream.avail_in == 0);
+ } while (stream.internal_state.state != ZSTATE_END);
+
+ fclose(out);
+ fclose(in);
+
+ printf("End of igzip_example\n\n");
+ return 0;
+}
diff --git a/src/isa-l/igzip/igzip_file_perf.c b/src/isa-l/igzip/igzip_file_perf.c
new file mode 100644
index 000000000..05c028028
--- /dev/null
+++ b/src/isa-l/igzip/igzip_file_perf.c
@@ -0,0 +1,348 @@
+/**********************************************************************
+ Copyright(c) 2011-2016 Intel Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+#define _FILE_OFFSET_BITS 64
+#include <stdio.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <string.h>
+#include <getopt.h>
+#include "igzip_lib.h"
+#include "test.h"
+
+#define BUF_SIZE 1024
+
+int level_size_buf[10] = {
+#ifdef ISAL_DEF_LVL0_DEFAULT
+ ISAL_DEF_LVL0_DEFAULT,
+#else
+ 0,
+#endif
+#ifdef ISAL_DEF_LVL1_DEFAULT
+ ISAL_DEF_LVL1_DEFAULT,
+#else
+ 0,
+#endif
+#ifdef ISAL_DEF_LVL2_DEFAULT
+ ISAL_DEF_LVL2_DEFAULT,
+#else
+ 0,
+#endif
+#ifdef ISAL_DEF_LVL3_DEFAULT
+ ISAL_DEF_LVL3_DEFAULT,
+#else
+ 0,
+#endif
+#ifdef ISAL_DEF_LVL4_DEFAULT
+ ISAL_DEF_LVL4_DEFAULT,
+#else
+ 0,
+#endif
+#ifdef ISAL_DEF_LVL5_DEFAULT
+ ISAL_DEF_LVL5_DEFAULT,
+#else
+ 0,
+#endif
+#ifdef ISAL_DEF_LVL6_DEFAULT
+ ISAL_DEF_LVL6_DEFAULT,
+#else
+ 0,
+#endif
+#ifdef ISAL_DEF_LVL7_DEFAULT
+ ISAL_DEF_LVL7_DEFAULT,
+#else
+ 0,
+#endif
+#ifdef ISAL_DEF_LVL8_DEFAULT
+ ISAL_DEF_LVL8_DEFAULT,
+#else
+ 0,
+#endif
+#ifdef ISAL_DEF_LVL9_DEFAULT
+ ISAL_DEF_LVL9_DEFAULT,
+#else
+ 0,
+#endif
+};
+
+int usage(void)
+{
+ fprintf(stderr,
+ "Usage: igzip_file_perf [options] <infile>\n"
+ " -h help\n"
+ " -X use compression level X with 0 <= X <= 1\n"
+ " -b <size> input buffer size, 0 buffers all the input\n"
+ " -i <time> time in seconds to benchmark (at least 0)\n"
+ " -o <file> output file for compresed data\n"
+ " -d <file> dictionary file used by compression\n"
+ " -w <size> log base 2 size of history window, between 8 and 15\n");
+
+ exit(0);
+}
+
+void deflate_perf(struct isal_zstream *stream, uint8_t * inbuf, size_t infile_size,
+ size_t inbuf_size, uint8_t * outbuf, size_t outbuf_size, int level,
+ uint8_t * level_buf, int level_size, uint32_t hist_bits, uint8_t * dictbuf,
+ size_t dictfile_size, struct isal_dict *dict_str,
+ struct isal_hufftables *hufftables_custom)
+{
+ int avail_in;
+ isal_deflate_init(stream);
+ stream->level = level;
+ stream->level_buf = level_buf;
+ stream->level_buf_size = level_size;
+
+ if (COMP_OK != isal_deflate_reset_dict(stream, dict_str))
+ if (dictbuf != NULL)
+ isal_deflate_set_dict(stream, dictbuf, dictfile_size);
+
+ stream->end_of_stream = 0;
+ stream->flush = NO_FLUSH;
+ stream->next_out = outbuf;
+ stream->avail_out = outbuf_size;
+ stream->next_in = inbuf;
+ if (hufftables_custom != NULL)
+ stream->hufftables = hufftables_custom;
+ stream->hist_bits = hist_bits;
+ avail_in = infile_size;
+
+ while (avail_in > 0) {
+ stream->avail_in = avail_in >= inbuf_size ? inbuf_size : avail_in;
+ avail_in -= inbuf_size;
+
+ if (avail_in <= 0)
+ stream->end_of_stream = 1;
+
+ isal_deflate(stream);
+
+ if (stream->avail_in != 0)
+ break;
+ }
+}
+
+int main(int argc, char *argv[])
+{
+ FILE *in = NULL, *out = NULL, *dict = NULL;
+ unsigned char *inbuf, *outbuf, *level_buf = NULL, *dictbuf = NULL;
+ int c, time = BENCHMARK_TIME, inbuf_size = 0;
+ size_t infile_size, outbuf_size, dictfile_size;
+ struct isal_huff_histogram histogram;
+ struct isal_hufftables hufftables_custom;
+ int level = 0, level_size = 0;
+ char *in_file_name = NULL, *out_file_name = NULL, *dict_file_name = NULL;
+ uint32_t hist_bits = 0;
+ struct isal_zstream stream;
+
+ while ((c = getopt(argc, argv, "h0123456789i:b:o:d:w:")) != -1) {
+ if (c >= '0' && c <= '9') {
+ if (c > '0' + ISAL_DEF_MAX_LEVEL)
+ usage();
+ else {
+ level = c - '0';
+ level_size = level_size_buf[level];
+ }
+ continue;
+ }
+
+ switch (c) {
+ case 'o':
+ out_file_name = optarg;
+ break;
+ case 'd':
+ dict_file_name = optarg;
+ break;
+ case 'i':
+ time = atoi(optarg);
+ if (time < 0)
+ usage();
+ break;
+ case 'b':
+ inbuf_size = atoi(optarg);
+ break;
+ case 'w':
+ hist_bits = atoi(optarg);
+ if (hist_bits > 15 || hist_bits < 8)
+ usage();
+ break;
+ case 'h':
+ default:
+ usage();
+ break;
+ }
+ }
+
+ if (optind < argc) {
+ in_file_name = argv[optind];
+ in = fopen(in_file_name, "rb");
+ } else
+ usage();
+
+ if (!in) {
+ fprintf(stderr, "Can't open %s for reading\n", in_file_name);
+ exit(0);
+ }
+ if (out_file_name != NULL) {
+ out = fopen(out_file_name, "wb");
+ if (!out) {
+ fprintf(stderr, "Can't open %s for writing\n", out_file_name);
+ exit(0);
+ }
+ printf("outfile=%s\n", out_file_name);
+ }
+
+ if (dict_file_name != NULL) {
+ dict = fopen(dict_file_name, "rb");
+ if (!dict) {
+ fprintf(stderr, "Can't open %s for reading\n", dict_file_name);
+ exit(0);
+ }
+ printf("outfile=%s\n", dict_file_name);
+ }
+
+ if (hist_bits == 0)
+ printf("Window Size: %d K\n", IGZIP_HIST_SIZE / 1024);
+
+ else if (hist_bits < 10)
+ printf("Window Size: %.2f K\n", 1.0 * (1 << hist_bits) / 1024);
+ else
+ printf("Window Size: %d K\n", (1 << hist_bits) / 1024);
+
+ printf("igzip_file_perf: \n");
+ fflush(0);
+
+ /* Allocate space for entire input file and output
+ * (assuming some possible expansion on output size)
+ */
+ infile_size = get_filesize(in);
+
+ outbuf_size = 2 * infile_size + BUF_SIZE;
+
+ dictfile_size = (dict_file_name != NULL) ? get_filesize(dict) : 0;
+
+ inbuf = malloc(infile_size);
+ if (inbuf == NULL) {
+ fprintf(stderr, "Can't allocate input buffer memory\n");
+ exit(0);
+ }
+ outbuf = malloc(outbuf_size);
+ if (outbuf == NULL) {
+ fprintf(stderr, "Can't allocate output buffer memory\n");
+ exit(0);
+ }
+
+ if (dictfile_size != 0) {
+ dictbuf = malloc(dictfile_size);
+ if (dictbuf == NULL) {
+ fprintf(stderr, "Can't allocate dictionary buffer memory\n");
+ exit(0);
+ }
+ }
+
+ if (level_size != 0) {
+ level_buf = malloc(level_size);
+ if (level_buf == NULL) {
+ fprintf(stderr, "Can't allocate level buffer memory\n");
+ exit(0);
+ }
+ }
+
+ inbuf_size = inbuf_size ? inbuf_size : infile_size;
+
+ printf("igzip_file_perf: %s\n", in_file_name);
+
+ /* Read complete input file into buffer */
+ stream.avail_in = (uint32_t) fread(inbuf, 1, infile_size, in);
+ if (stream.avail_in != infile_size) {
+ fprintf(stderr, "Couldn't fit all of input file into buffer\n");
+ exit(0);
+ }
+
+ /* Read complete dictionary into buffer */
+ if ((dictfile_size != 0) && (dictfile_size != fread(dictbuf, 1, dictfile_size, dict))) {
+ fprintf(stderr, "Couldn't fit all of dictionary file into buffer\n");
+ exit(0);
+ }
+
+ struct isal_dict dict_str;
+ stream.level = level;
+ isal_deflate_process_dict(&stream, &dict_str, dictbuf, dictfile_size);
+
+ struct perf start;
+ if (time > 0) {
+ BENCHMARK(&start, time,
+ deflate_perf(&stream, inbuf, infile_size, inbuf_size, outbuf,
+ outbuf_size, level, level_buf, level_size, hist_bits,
+ dictbuf, dictfile_size, &dict_str, NULL));
+ } else {
+ deflate_perf(&stream, inbuf, infile_size, inbuf_size, outbuf, outbuf_size,
+ level, level_buf, level_size, hist_bits, dictbuf,
+ dictfile_size, &dict_str, NULL);
+ }
+ if (stream.avail_in != 0) {
+ fprintf(stderr, "Could not compress all of inbuf\n");
+ exit(0);
+ }
+
+ printf(" file %s - in_size=%lu out_size=%d ratio=%3.1f%%",
+ in_file_name, infile_size, stream.total_out,
+ 100.0 * stream.total_out / infile_size);
+
+ if (level == 0) {
+ memset(&histogram, 0, sizeof(histogram));
+
+ isal_update_histogram(inbuf, infile_size, &histogram);
+ isal_create_hufftables(&hufftables_custom, &histogram);
+
+ deflate_perf(&stream, inbuf, infile_size, inbuf_size, outbuf, outbuf_size,
+ level, level_buf, level_size, hist_bits, dictbuf,
+ dictfile_size, &dict_str, &hufftables_custom);
+
+ printf(" ratio_custom=%3.1f%%", 100.0 * stream.total_out / infile_size);
+ }
+ printf("\n");
+
+ if (stream.avail_in != 0) {
+ fprintf(stderr, "Could not compress all of inbuf\n");
+ exit(0);
+ }
+
+ printf("igzip_file: ");
+ perf_print(start, (long long)infile_size);
+
+ if (argc > 2 && out) {
+ printf("writing %s\n", out_file_name);
+ fwrite(outbuf, 1, stream.total_out, out);
+ fclose(out);
+ }
+
+ fclose(in);
+ printf("End of igzip_file_perf\n\n");
+ fflush(0);
+ return 0;
+}
diff --git a/src/isa-l/igzip/igzip_finish.asm b/src/isa-l/igzip/igzip_finish.asm
new file mode 100644
index 000000000..2b539dd45
--- /dev/null
+++ b/src/isa-l/igzip/igzip_finish.asm
@@ -0,0 +1,330 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright(c) 2011-2016 Intel Corporation All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in
+; the documentation and/or other materials provided with the
+; distribution.
+; * Neither the name of Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived
+; from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+%include "options.asm"
+%include "lz0a_const.asm"
+%include "data_struct2.asm"
+%include "bitbuf2.asm"
+%include "huffman.asm"
+%include "igzip_compare_types.asm"
+
+%include "stdmac.asm"
+%include "reg_sizes.asm"
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+%define curr_data rax
+%define tmp1 rax
+
+%define f_index rbx
+%define code rbx
+%define tmp4 rbx
+%define tmp5 rbx
+%define tmp6 rbx
+
+%define tmp2 rcx
+%define hash rcx
+
+%define tmp3 rdx
+
+%define stream rsi
+
+%define f_i rdi
+
+%define code_len2 rbp
+%define hmask1 rbp
+
+%define m_out_buf r8
+
+%define m_bits r9
+
+%define dist r10
+%define hmask2 r10
+
+%define m_bit_count r11
+
+%define code2 r12
+%define f_end_i r12
+
+%define file_start r13
+
+%define len r14
+
+%define hufftables r15
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+f_end_i_mem_offset equ 0 ; local variable (8 bytes)
+stack_size equ 8
+
+[bits 64]
+default rel
+section .text
+
+; void isal_deflate_finish ( isal_zstream *stream )
+; arg 1: rcx: addr of stream
+global isal_deflate_finish_01
+isal_deflate_finish_01:
+ endbranch
+ PUSH_ALL rbx, rsi, rdi, rbp, r12, r13, r14, r15
+ sub rsp, stack_size
+
+%ifidn __OUTPUT_FORMAT__, elf64
+ mov rcx, rdi
+%endif
+
+ mov stream, rcx
+
+ ; state->bitbuf.set_buf(stream->next_out, stream->avail_out);
+ mov m_out_buf, [stream + _next_out]
+ mov [stream + _internal_state_bitbuf_m_out_start], m_out_buf
+ mov tmp1 %+ d, [stream + _avail_out]
+ add tmp1, m_out_buf
+ sub tmp1, SLOP
+skip_SLOP:
+ mov [stream + _internal_state_bitbuf_m_out_end], tmp1
+
+ mov m_bits, [stream + _internal_state_bitbuf_m_bits]
+ mov m_bit_count %+ d, [stream + _internal_state_bitbuf_m_bit_count]
+
+ mov hufftables, [stream + _hufftables]
+
+ mov file_start, [stream + _next_in]
+
+ mov f_i %+ d, dword [stream + _total_in]
+ sub file_start, f_i
+
+ mov f_end_i %+ d, dword [stream + _avail_in]
+ add f_end_i, f_i
+
+ sub f_end_i, LAST_BYTES_COUNT
+ mov [rsp + f_end_i_mem_offset], f_end_i
+ ; for (f_i = f_start_i; f_i < f_end_i; f_i++) {
+ cmp f_i, f_end_i
+ jge end_loop_2
+
+ mov curr_data %+ d, [file_start + f_i]
+
+ cmp byte [stream + _internal_state_has_hist], IGZIP_NO_HIST
+ jne skip_write_first_byte
+
+ cmp m_out_buf, [stream + _internal_state_bitbuf_m_out_end]
+ ja end_loop_2
+ mov hmask1 %+ d, dword [stream + _internal_state_hash_mask]
+ compute_hash hash, curr_data
+ and hash %+ d, hmask1 %+ d
+ mov [stream + _internal_state_head + 2 * hash], f_i %+ w
+ mov byte [stream + _internal_state_has_hist], IGZIP_HIST
+ jmp encode_literal
+
+skip_write_first_byte:
+
+loop2:
+ mov tmp3 %+ d, dword [stream + _internal_state_dist_mask]
+ mov hmask1 %+ d, dword [stream + _internal_state_hash_mask]
+ ; if (state->bitbuf.is_full()) {
+ cmp m_out_buf, [stream + _internal_state_bitbuf_m_out_end]
+ ja end_loop_2
+
+ ; hash = compute_hash(state->file_start + f_i) & hash_mask;
+ mov curr_data %+ d, [file_start + f_i]
+ compute_hash hash, curr_data
+ and hash %+ d, hmask1 %+ d
+
+ ; f_index = state->head[hash];
+ movzx f_index %+ d, word [stream + _internal_state_head + 2 * hash]
+
+ ; state->head[hash] = (uint16_t) f_i;
+ mov [stream + _internal_state_head + 2 * hash], f_i %+ w
+
+ ; dist = f_i - f_index; // mod 64k
+ mov dist %+ d, f_i %+ d
+ sub dist %+ d, f_index %+ d
+ and dist %+ d, 0xFFFF
+
+ ; if ((dist-1) <= (D-1)) {
+ mov tmp1 %+ d, dist %+ d
+ sub tmp1 %+ d, 1
+ cmp tmp1 %+ d, tmp3 %+ d
+ jae encode_literal
+
+ ; len = f_end_i - f_i;
+ mov tmp4, [rsp + f_end_i_mem_offset]
+ sub tmp4, f_i
+ add tmp4, LAST_BYTES_COUNT
+
+ ; if (len > 258) len = 258;
+ cmp tmp4, 258
+ cmovg tmp4, [c258]
+
+ ; len = compare(state->file_start + f_i,
+ ; state->file_start + f_i - dist, len);
+ lea tmp1, [file_start + f_i]
+ mov tmp2, tmp1
+ sub tmp2, dist
+ compare tmp4, tmp1, tmp2, len, tmp3
+
+ ; if (len >= SHORTEST_MATCH) {
+ cmp len, SHORTEST_MATCH
+ jb encode_literal
+
+ ;; encode as dist/len
+
+ ; get_dist_code(dist, &code2, &code_len2);
+ dec dist
+ get_dist_code dist, code2, code_len2, hufftables ;; clobbers dist, rcx
+
+ ; get_len_code(len, &code, &code_len);
+ get_len_code len, code, rcx, hufftables ;; rcx is code_len
+
+ mov hmask2 %+ d, dword [stream + _internal_state_hash_mask]
+ ; code2 <<= code_len
+ ; code2 |= code
+ ; code_len2 += code_len
+ SHLX code2, code2, rcx
+ or code2, code
+ add code_len2, rcx
+
+ ; for (k = f_i+1, f_i += len-1; k <= f_i; k++) {
+ lea tmp3, [f_i + 1] ; tmp3 <= k
+ add f_i, len
+ cmp f_i, [rsp + f_end_i_mem_offset]
+ jae skip_hash_update
+
+ ; only update hash twice
+
+ ; hash = compute_hash(state->file_start + k) & hash_mask;
+ mov tmp6 %+ d, dword [file_start + tmp3]
+ compute_hash hash, tmp6
+ and hash %+ d, hmask2 %+ d
+ ; state->head[hash] = k;
+ mov [stream + _internal_state_head + 2 * hash], tmp3 %+ w
+
+ add tmp3, 1
+
+ ; hash = compute_hash(state->file_start + k) & hash_mask;
+ mov tmp6 %+ d, dword [file_start + tmp3]
+ compute_hash hash, tmp6
+ and hash %+ d, hmask2 %+ d
+ ; state->head[hash] = k;
+ mov [stream + _internal_state_head + 2 * hash], tmp3 %+ w
+
+skip_hash_update:
+ write_bits m_bits, m_bit_count, code2, code_len2, m_out_buf
+
+ ; continue
+ cmp f_i, [rsp + f_end_i_mem_offset]
+ jl loop2
+ jmp end_loop_2
+
+encode_literal:
+ ; get_lit_code(state->file_start[f_i], &code2, &code_len2);
+ movzx tmp5, byte [file_start + f_i]
+ get_lit_code tmp5, code2, code_len2, hufftables
+
+ write_bits m_bits, m_bit_count, code2, code_len2, m_out_buf
+
+ ; continue
+ add f_i, 1
+ cmp f_i, [rsp + f_end_i_mem_offset]
+ jl loop2
+
+end_loop_2:
+ mov f_end_i, [rsp + f_end_i_mem_offset]
+ add f_end_i, LAST_BYTES_COUNT
+ mov [rsp + f_end_i_mem_offset], f_end_i
+ ; if ((f_i >= f_end_i) && ! state->bitbuf.is_full()) {
+ cmp f_i, f_end_i
+ jge write_eob
+
+ xor tmp5, tmp5
+final_bytes:
+ cmp m_out_buf, [stream + _internal_state_bitbuf_m_out_end]
+ ja not_end
+ movzx tmp5, byte [file_start + f_i]
+ get_lit_code tmp5, code2, code_len2, hufftables
+ write_bits m_bits, m_bit_count, code2, code_len2, m_out_buf
+
+ inc f_i
+ cmp f_i, [rsp + f_end_i_mem_offset]
+ jl final_bytes
+
+write_eob:
+ cmp m_out_buf, [stream + _internal_state_bitbuf_m_out_end]
+ ja not_end
+
+ ; get_lit_code(256, &code2, &code_len2);
+ get_lit_code 256, code2, code_len2, hufftables
+
+ write_bits m_bits, m_bit_count, code2, code_len2, m_out_buf
+
+ mov byte [stream + _internal_state_has_eob], 1
+ cmp word [stream + _end_of_stream], 1
+ jne sync_flush
+ ; state->state = ZSTATE_TRL;
+ mov dword [stream + _internal_state_state], ZSTATE_TRL
+ jmp not_end
+
+sync_flush:
+ ; state->state = ZSTATE_SYNC_FLUSH;
+ mov dword [stream + _internal_state_state], ZSTATE_SYNC_FLUSH
+ ; }
+not_end:
+
+
+ ;; Update input buffer
+ mov f_end_i, [rsp + f_end_i_mem_offset]
+ mov [stream + _total_in], f_i %+ d
+ add file_start, f_i
+ mov [stream + _next_in], file_start
+ sub f_end_i, f_i
+ mov [stream + _avail_in], f_end_i %+ d
+
+ ;; Update output buffer
+ mov [stream + _next_out], m_out_buf
+ ; len = state->bitbuf.buffer_used();
+ sub m_out_buf, [stream + _internal_state_bitbuf_m_out_start]
+
+ ; stream->avail_out -= len;
+ sub [stream + _avail_out], m_out_buf %+ d
+ ; stream->total_out += len;
+ add [stream + _total_out], m_out_buf %+ d
+
+ mov [stream + _internal_state_bitbuf_m_bits], m_bits
+ mov [stream + _internal_state_bitbuf_m_bit_count], m_bit_count %+ d
+ add rsp, stack_size
+ POP_ALL
+ ret
+
+section .data
+ align 4
+c258: dq 258
diff --git a/src/isa-l/igzip/igzip_gen_icf_map_lh1_04.asm b/src/isa-l/igzip/igzip_gen_icf_map_lh1_04.asm
new file mode 100644
index 000000000..d1888467d
--- /dev/null
+++ b/src/isa-l/igzip/igzip_gen_icf_map_lh1_04.asm
@@ -0,0 +1,746 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright(c) 2011-2018 Intel Corporation All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in
+; the documentation and/or other materials provided with the
+; distribution.
+; * Neither the name of Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived
+; from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+%include "reg_sizes.asm"
+%include "lz0a_const.asm"
+%include "data_struct2.asm"
+%include "huffman.asm"
+
+
+%define USE_HSWNI
+%define ARCH 04
+
+%ifidn __OUTPUT_FORMAT__, win64
+%define arg1 rcx
+%define arg2 rdx
+%define arg3 r8
+%define hash rsi
+%define next_in rdi
+%else
+%define arg1 rdi
+%define arg2 rsi
+%define arg3 rdx
+%define hash r8
+%define next_in rcx
+%endif
+
+%define stream arg1
+%define level_buf arg1
+%define matches_next arg2
+%define f_i_end arg3
+
+%define f_i rax
+%define file_start rbp
+%define tmp r9
+%define tmp2 r10
+%define prev_len r11
+%define prev_dist r12
+%define f_i_orig r13
+
+%define hash_table level_buf + _hash_map_hash_table
+
+%define datas ymm0
+%define datas_lookup ymm1
+%define yhashes ymm2
+%define ydists ymm3
+%define ydists_lookup ymm4
+
+%define ydownconvert_qd ymm5
+%define ydists2 ymm5
+%define yscatter ymm5
+%define ytmp2 ymm5
+%define ynull_syms ymm5
+
+%define ylens1 ymm6
+%define ylens2 ymm7
+%define ylookup ymm8
+%define ylookup2 ymm9
+%define yindex ymm10
+
+%define yrot_left ymm11
+%define yshift_finish ymm11
+%define yqword_shuf ymm11
+%define yhash_prod ymm11
+%define ycode ymm11
+%define ytmp3 ymm11
+
+%define yones ymm12
+%define ydatas_perm2 ymm13
+%define yincrement ymm14
+
+%define ytmp ymm15
+%define ydist_extra ymm15
+%define yhash_mask ymm15
+%define ydist_mask ymm15
+
+%ifidn __OUTPUT_FORMAT__, win64
+%define stack_size 10*16 + 6 * 8 + 3 * 8
+%define local_storage_offset (stack_size - 16)
+%define func(x) proc_frame x
+
+%macro FUNC_SAVE 0
+ alloc_stack stack_size
+ vmovdqa [rsp + 0*16], xmm6
+ vmovdqa [rsp + 1*16], xmm7
+ vmovdqa [rsp + 2*16], xmm8
+ vmovdqa [rsp + 3*16], xmm9
+ vmovdqa [rsp + 4*16], xmm10
+ vmovdqa [rsp + 5*16], xmm11
+ vmovdqa [rsp + 6*16], xmm12
+ vmovdqa [rsp + 7*16], xmm13
+ vmovdqu [rsp + 8*16], xmm14
+ vmovdqa [rsp + 9*16], xmm15
+ save_reg rsi, 10*16 + 0*8
+ save_reg rdi, 10*16 + 1*8
+ save_reg rbp, 10*16 + 2*8
+ save_reg r12, 10*16 + 3*8
+ save_reg r13, 10*16 + 4*8
+ end_prolog
+%endm
+
+%macro FUNC_RESTORE 0
+ vmovdqa xmm6, [rsp + 0*16]
+ vmovdqa xmm7, [rsp + 1*16]
+ vmovdqa xmm8, [rsp + 2*16]
+ vmovdqa xmm9, [rsp + 3*16]
+ vmovdqa xmm10, [rsp + 4*16]
+ vmovdqa xmm11, [rsp + 5*16]
+ vmovdqa xmm12, [rsp + 6*16]
+ vmovdqa xmm13, [rsp + 7*16]
+ vmovdqa xmm14, [rsp + 8*16]
+ vmovdqa xmm15, [rsp + 9*16]
+
+ mov rsi, [rsp + 10*16 + 0*8]
+ mov rdi, [rsp + 10*16 + 1*8]
+ mov rbp, [rsp + 10*16 + 2*8]
+ mov r12, [rsp + 10*16 + 3*8]
+ mov r13, [rsp + 10*16 + 4*8]
+ add rsp, stack_size
+%endm
+%else
+%define stack_size 16
+%define local_storage_offset 0
+
+%define func(x) x: endbranch
+%macro FUNC_SAVE 0
+ push rbp
+ push r12
+ push r13
+ sub rsp, stack_size
+%endm
+
+%macro FUNC_RESTORE 0
+ add rsp, stack_size
+ pop r13
+ pop r12
+ pop rbp
+%endm
+%endif
+
+%define dist_mask_offset local_storage_offset
+%define hash_mask_offset local_storage_offset + 8
+
+%define VECT_SIZE 8
+%define HASH_BYTES 2
+
+[bits 64]
+default rel
+section .text
+
+global gen_icf_map_lh1_04
+func(gen_icf_map_lh1_04)
+ endbranch
+ FUNC_SAVE
+
+ mov file_start, [stream + _next_in]
+ mov f_i %+ d, dword [stream + _total_in]
+ mov f_i_orig, f_i
+
+ sub file_start, f_i
+ add f_i_end, f_i
+ cmp f_i, f_i_end
+ jge end_main
+
+;; Prep for main loop
+ mov tmp %+ d, dword [stream + _internal_state_dist_mask]
+ mov [rsp + dist_mask_offset], tmp
+ mov tmp %+ d, dword [stream + _internal_state_hash_mask]
+ mov [rsp + hash_mask_offset], tmp
+ mov tmp, stream
+ mov level_buf, [stream + _level_buf]
+ sub f_i_end, LA
+ vmovdqu yincrement, [increment]
+ vpbroadcastd yones, [ones]
+ vmovdqu ydatas_perm2, [datas_perm2]
+
+;; Process first byte
+ vpbroadcastd yhash_prod, [hash_prod]
+ vpbroadcastd yhash_mask, [rsp + hash_mask_offset]
+ vmovd yhashes %+ x, dword [f_i + file_start]
+ vpmaddwd yhashes, yhashes, yhash_prod
+ vpmaddwd yhashes, yhashes, yhash_prod
+ vpand yhashes, yhashes, yhash_mask
+ vmovd hash %+ d, yhashes %+ x
+ cmp byte [tmp + _internal_state_has_hist], IGZIP_NO_HIST
+ jne .has_hist
+ ;; No history, the byte is a literal
+ xor prev_len, prev_len
+ xor prev_dist, prev_dist
+ mov byte [tmp + _internal_state_has_hist], IGZIP_HIST
+ jmp .byte_processed
+
+.has_hist:
+ ;; History exists, need to set prev_len and prev_dist accordingly
+ lea next_in, [f_i + file_start]
+
+ ;; Determine match lookback distance
+ xor tmp, tmp
+ mov tmp %+ w, f_i %+ w
+ dec tmp
+ sub tmp %+ w, word [hash_table + HASH_BYTES * hash]
+
+ and tmp %+ d, [rsp + dist_mask_offset]
+ neg tmp
+
+ ;; Check first 8 bytes of match
+ mov prev_len, [next_in]
+ xor prev_len, [next_in + tmp - 1]
+ neg tmp
+
+ ;; Set prev_dist
+%ifidn arg1, rcx
+ mov tmp2, rcx
+%endif
+ ;; The third register is unused on Haswell and later,
+ ;; This line will not work on previous architectures
+ get_dist_icf_code tmp, prev_dist, tmp
+
+%ifidn arg1, rcx
+ mov rcx, tmp2
+%endif
+
+ ;; Set prev_len
+ xor tmp2, tmp2
+ tzcnt prev_len, prev_len
+ shr prev_len, 3
+ cmp prev_len, MIN_DEF_MATCH
+ cmovl prev_len, tmp2
+
+.byte_processed:
+ mov word [hash_table + HASH_BYTES * hash], f_i %+ w
+
+ add f_i, 1
+
+;;hash
+ vmovdqu datas, [f_i + file_start]
+ vpermq yhashes, datas, 0x44
+ vpshufb yhashes, yhashes, [datas_shuf]
+ vpmaddwd yhashes, yhashes, yhash_prod
+ vpmaddwd yhashes, yhashes, yhash_prod
+ vpand yhashes, yhashes, yhash_mask
+
+ vpermq ylookup, datas, 0x44
+ vmovdqu yqword_shuf, [qword_shuf]
+ vpshufb ylookup, ylookup, yqword_shuf
+ vpermd ylookup2, ydatas_perm2, datas
+ vpshufb ylookup2, ylookup2, yqword_shuf
+
+;;gather/scatter hashes
+ vpcmpeqq ytmp, ytmp, ytmp
+ vpgatherdd ydists_lookup, [hash_table + HASH_BYTES * yhashes], ytmp
+
+ vpbroadcastd ytmp2, [upper_word]
+ vpbroadcastd ytmp, [low_word]
+ vmovd yindex %+ x, f_i %+ d
+ vpbroadcastd yindex, yindex %+ x
+ vpaddd yindex, yindex, yincrement
+ vpand yscatter, ydists_lookup, ytmp2
+ vpand ytmp, yindex, ytmp
+ vpor yscatter, yscatter, ytmp
+
+ vmovd tmp %+ d, yhashes %+ x
+ vmovd [hash_table + HASH_BYTES * tmp], yscatter %+ x
+ vpextrd tmp %+ d, yhashes %+ x, 1
+ vpextrd [hash_table + HASH_BYTES * tmp], yscatter %+ x, 1
+ vpextrd tmp %+ d, yhashes %+ x, 2
+ vpextrd [hash_table + HASH_BYTES * tmp], yscatter %+ x, 2
+ vpextrd tmp %+ d,yhashes %+ x, 3
+ vpextrd [hash_table + HASH_BYTES * tmp], yscatter %+ x, 3
+
+ vextracti128 yscatter %+ x, yscatter, 1
+ vextracti128 yhashes %+ x, yhashes, 1
+
+ vmovd tmp %+ d, yhashes %+ x
+ vmovd [hash_table + HASH_BYTES * tmp], yscatter %+ x
+ vpextrd tmp %+ d, yhashes %+ x, 1
+ vpextrd [hash_table + HASH_BYTES * tmp], yscatter %+ x, 1
+ vpextrd tmp %+ d, yhashes %+ x, 2
+ vpextrd [hash_table + HASH_BYTES * tmp], yscatter %+ x, 2
+ vpextrd tmp %+ d,yhashes %+ x, 3
+ vpextrd [hash_table + HASH_BYTES * tmp], yscatter %+ x, 3
+
+;; Compute hash for next loop
+ vpbroadcastd yhash_prod, [hash_prod]
+ vpbroadcastd yhash_mask, [rsp + hash_mask_offset]
+ vmovdqu datas, [f_i + file_start + VECT_SIZE]
+ vpermq yhashes, datas, 0x44
+ vpshufb yhashes, yhashes, [datas_shuf]
+ vpmaddwd yhashes, yhashes, yhash_prod
+ vpmaddwd yhashes, yhashes, yhash_prod
+ vpand yhashes, yhashes, yhash_mask
+
+ vmovdqu datas_lookup, [f_i + file_start + 2 * VECT_SIZE]
+
+ sub f_i_end, VECT_SIZE
+ cmp f_i, f_i_end
+ jg .loop1_end
+
+.loop1:
+ lea next_in, [f_i + file_start]
+
+;; Calculate look back dists
+ vpbroadcastd ydist_mask, [rsp + dist_mask_offset]
+ vpaddd ydists, ydists_lookup, yones
+ vpsubd ydists, yindex, ydists
+ vpand ydists, ydists, ydist_mask
+ vpaddd ydists, ydists, yones
+ vpsubd ydists, yincrement, ydists
+
+;;gather/scatter hashes
+ add f_i, VECT_SIZE
+
+ vpcmpeqq ytmp, ytmp, ytmp
+ vpgatherdd ydists_lookup, [hash_table + HASH_BYTES * yhashes], ytmp
+
+ vpbroadcastd ytmp2, [upper_word]
+ vpbroadcastd ytmp, [low_word]
+ vmovd yindex %+ x, f_i %+ d
+ vpbroadcastd yindex, yindex %+ x
+ vpaddd yindex, yindex, yincrement
+ vpand yscatter, ydists_lookup, ytmp2
+ vpand ytmp, yindex, ytmp
+ vpor yscatter, yscatter, ytmp
+
+ vmovd tmp %+ d, yhashes %+ x
+ vmovd [hash_table + HASH_BYTES * tmp], yscatter %+ x
+ vpextrd tmp %+ d, yhashes %+ x, 1
+ vpextrd [hash_table + HASH_BYTES * tmp], yscatter %+ x, 1
+ vpextrd tmp %+ d, yhashes %+ x, 2
+ vpextrd [hash_table + HASH_BYTES * tmp], yscatter %+ x, 2
+ vpextrd tmp %+ d,yhashes %+ x, 3
+ vpextrd [hash_table + HASH_BYTES * tmp], yscatter %+ x, 3
+
+ vextracti128 yscatter %+ x, yscatter, 1
+ vextracti128 yhashes %+ x, yhashes, 1
+
+ vmovd tmp %+ d, yhashes %+ x
+ vmovd [hash_table + HASH_BYTES * tmp], yscatter %+ x
+ vpextrd tmp %+ d, yhashes %+ x, 1
+ vpextrd [hash_table + HASH_BYTES * tmp], yscatter %+ x, 1
+ vpextrd tmp %+ d, yhashes %+ x, 2
+ vpextrd [hash_table + HASH_BYTES * tmp], yscatter %+ x, 2
+ vpextrd tmp %+ d,yhashes %+ x, 3
+ vpextrd [hash_table + HASH_BYTES * tmp], yscatter %+ x, 3
+
+;; Compute hash for next loop
+ vpbroadcastd yhash_prod, [hash_prod]
+ vpbroadcastd yhash_mask, [rsp + hash_mask_offset]
+ vpermq yhashes, datas_lookup, 0x44
+ vpshufb yhashes, yhashes, [datas_shuf]
+ vpmaddwd yhashes, yhashes, yhash_prod
+ vpmaddwd yhashes, yhashes, yhash_prod
+ vpand yhashes, yhashes, yhash_mask
+
+;;lookup old codes
+ vextracti128 ydists2 %+ x, ydists, 1
+
+ vpcmpeqq ytmp, ytmp, ytmp
+ vpgatherdq ylens1, [next_in + ydists %+ x], ytmp
+ vpcmpeqq ytmp, ytmp, ytmp
+ vpgatherdq ylens2, [next_in + ydists2 %+ x], ytmp
+
+;; Calculate dist_icf_code
+ vpaddd ydists, ydists, yones
+ vpsubd ydists, yincrement, ydists
+
+ vpbroadcastd ytmp2, [low_nibble]
+ vbroadcasti128 ytmp3, [nibble_order]
+ vpslld ydist_extra, ydists, 12
+ vpor ydist_extra, ydists, ydist_extra
+ vpand ydist_extra, ydist_extra, ytmp2
+ vpshufb ydist_extra, ydist_extra, ytmp3
+ vbroadcasti128 ytmp2, [bit_index]
+ vpshufb ydist_extra, ytmp2, ydist_extra
+ vpxor ytmp2, ytmp2, ytmp2
+ vpcmpgtb ytmp2, ydist_extra, ytmp2
+ vpsrld ytmp3, ytmp2, 8
+ vpandn ytmp2, ytmp3, ytmp2
+ vpsrld ytmp3, ytmp2, 16
+ vpandn ytmp2, ytmp3, ytmp2
+ vpsrld ytmp3, ytmp2, 24
+ vpandn ytmp2, ytmp3, ytmp2
+ vpbroadcastd ytmp3, [base_offset]
+ vpaddb ydist_extra, ytmp3
+ vpand ydist_extra, ydist_extra, ytmp2
+ vpsrlq ytmp2, ydist_extra, 32
+ vpxor ytmp3, ytmp3, ytmp3
+ vpsadbw ydist_extra, ydist_extra, ytmp3
+ vpsadbw ytmp2, ytmp2, ytmp3
+ vpsubd ydist_extra, ydist_extra, ytmp2
+ vpsllq ytmp2, ytmp2, 32
+ vpor ydist_extra, ydist_extra, ytmp2
+ vpcmpgtb ytmp3, ydist_extra, ytmp3
+ vpand ydist_extra, ydist_extra, ytmp3
+
+ vpsllvd ycode, yones, ydist_extra
+ vpsubd ycode, ycode, yones
+ vpcmpgtd ytmp2, ydists, yones
+ vpand ycode, ydists, ycode
+ vpand ycode, ycode, ytmp2
+ vpsrlvd ydists, ydists, ydist_extra
+ vpslld ydist_extra, ydist_extra, 1
+ vpaddd ydists, ydists, ydist_extra
+ vpslld ycode, ycode, EXTRA_BITS_OFFSET - DIST_OFFSET
+ vpaddd ydists, ydists, ycode
+
+;; Setup ydists for combining with ylens
+ vpslld ydists, ydists, DIST_OFFSET
+
+;; xor current data with lookback dist
+ vpxor ylens1, ylens1, ylookup
+ vpxor ylens2, ylens2, ylookup2
+
+;; Setup registers for next loop
+ vpermq ylookup, datas, 0x44
+ vmovdqu yqword_shuf, [qword_shuf]
+ vpshufb ylookup, ylookup, yqword_shuf
+ vpermd ylookup2, ydatas_perm2, datas
+ vpshufb ylookup2, ylookup2, yqword_shuf
+
+;; Compute match length
+ vpxor ytmp, ytmp, ytmp
+ vpcmpeqb ylens1, ylens1, ytmp
+ vpcmpeqb ylens2, ylens2, ytmp
+ vpbroadcastq yshift_finish, [shift_finish]
+ vpand ylens1, ylens1, yshift_finish
+ vpand ylens2, ylens2, yshift_finish
+ vpsadbw ylens1, ylens1, ytmp
+ vpsadbw ylens2, ylens2, ytmp
+ vmovdqu ydownconvert_qd, [downconvert_qd]
+ vpshufb ylens1, ylens1, ydownconvert_qd
+ vextracti128 ytmp %+ x, ylens1, 1
+ vpor ylens1, ylens1, ytmp
+ vpshufb ylens2, ylens2, ydownconvert_qd
+ vextracti128 ytmp %+ x, ylens2, 1
+ vpor ylens2, ylens2, ytmp
+ vinserti128 ylens1, ylens1, ylens2 %+ x, 1
+ vpbroadcastd ytmp, [low_nibble]
+ vpsrld ylens2, ylens1, 4
+ vpand ylens1, ylens1, ytmp
+ vbroadcasti128 ytmp, [match_cnt_perm]
+ vpbroadcastd ytmp2, [match_cnt_low_max]
+ vpshufb ylens1, ytmp, ylens1
+ vpshufb ylens2, ytmp, ylens2
+ vpcmpeqb ytmp, ylens1, ytmp2
+ vpand ylens2, ylens2, ytmp
+ vpaddd ylens1, ylens1, ylens2
+
+;; Preload for next loops
+ vmovdqu datas, datas_lookup
+ vmovdqu datas_lookup, [f_i + file_start + 2 * VECT_SIZE]
+
+;; Zero out matches which should not be taken
+ vmovdqu yrot_left, [drot_left]
+ vpermd ylens2, yrot_left, ylens1
+ vpermd ydists, yrot_left, ydists
+
+ vpinsrd ytmp %+ x, ylens2 %+ x, prev_len %+ d, 0
+ vmovd prev_len %+ d, ylens2 %+ x
+ vinserti128 ylens2, ylens2, ytmp %+ x, 0
+
+ vpinsrd ytmp %+ x, ydists %+ x, prev_dist %+ d, 0
+ vmovd prev_dist %+ d, ydists %+ x
+ vinserti128 ydists, ydists, ytmp %+ x, 0
+
+ vpbroadcastd ytmp, [shortest_matches]
+ vpcmpgtd ytmp, ylens2, ytmp
+ vpcmpgtd ytmp2, ylens1, ylens2
+
+ vpcmpeqd ytmp3, ytmp3, ytmp3
+ vpxor ytmp, ytmp, ytmp3
+ vpor ytmp, ytmp, ytmp2
+
+ vpandn ylens1, ytmp, ylens2
+
+;; Update zdists to match ylens1
+ vpbroadcastd ytmp2, [twofiftyfour]
+ vpaddd ydists, ydists, ylens1
+ vpaddd ydists, ydists, ytmp2
+
+ vpbroadcastd ynull_syms, [null_dist_syms]
+ vpmovzxbd ytmp3, [f_i + file_start - VECT_SIZE - 1]
+ vpaddd ytmp3, ynull_syms
+ vpand ytmp3, ytmp3, ytmp
+ vpandn ydists, ytmp, ydists
+ vpor ydists, ydists, ytmp3
+
+;;Store ydists
+ vmovdqu [matches_next], ydists
+ add matches_next, ICF_CODE_BYTES * VECT_SIZE
+
+ cmp f_i, f_i_end
+ jle .loop1
+
+.loop1_end:
+ lea next_in, [f_i + file_start]
+
+;; Calculate look back dists
+ vpbroadcastd ydist_mask, [rsp + dist_mask_offset]
+ vpaddd ydists, ydists_lookup, yones
+ vpsubd ydists, yindex, ydists
+ vpand ydists, ydists, ydist_mask
+ vpaddd ydists, ydists, yones
+ vpsubd ydists, yincrement, ydists
+
+;;lookup old codes
+ vextracti128 ydists2 %+ x, ydists, 1
+ vpcmpeqq ytmp, ytmp, ytmp
+ vpgatherdq ylens1, [next_in + ydists %+ x], ytmp
+ vpcmpeqq ytmp, ytmp, ytmp
+ vpgatherdq ylens2, [next_in + ydists2 %+ x], ytmp
+
+;; Restore last update hash value
+ vpextrd tmp %+ d, ydists2 %+ x, 3
+ add tmp %+ d, f_i %+ d
+
+ vpbroadcastd yhash_prod %+ x, [hash_prod]
+ vpbroadcastd yhash_mask %+ x, [rsp + hash_mask_offset]
+
+ vmovd yhashes %+ x, dword [f_i + file_start + VECT_SIZE - 1]
+ vpmaddwd yhashes %+ x, yhashes %+ x, yhash_prod %+ x
+ vpmaddwd yhashes %+ x, yhashes %+ x, yhash_prod %+ x
+ vpand yhashes %+ x, yhashes %+ x, yhash_mask %+ x
+ vmovd hash %+ d, yhashes %+ x
+
+ mov word [hash_table + HASH_BYTES * hash], tmp %+ w
+
+;; Calculate dist_icf_code
+ vpaddd ydists, ydists, yones
+ vpsubd ydists, yincrement, ydists
+
+ vpbroadcastd ytmp2, [low_nibble]
+ vbroadcasti128 ytmp3, [nibble_order]
+ vpslld ydist_extra, ydists, 12
+ vpor ydist_extra, ydists, ydist_extra
+ vpand ydist_extra, ydist_extra, ytmp2
+ vpshufb ydist_extra, ydist_extra, ytmp3
+ vbroadcasti128 ytmp2, [bit_index]
+ vpshufb ydist_extra, ytmp2, ydist_extra
+ vpxor ytmp2, ytmp2, ytmp2
+ vpcmpgtb ytmp2, ydist_extra, ytmp2
+ vpsrld ytmp3, ytmp2, 8
+ vpandn ytmp2, ytmp3, ytmp2
+ vpsrld ytmp3, ytmp2, 16
+ vpandn ytmp2, ytmp3, ytmp2
+ vpsrld ytmp3, ytmp2, 24
+ vpandn ytmp2, ytmp3, ytmp2
+ vpbroadcastd ytmp3, [base_offset]
+ vpaddb ydist_extra, ytmp3
+ vpand ydist_extra, ydist_extra, ytmp2
+ vpsrlq ytmp2, ydist_extra, 32
+ vpxor ytmp3, ytmp3, ytmp3
+ vpsadbw ydist_extra, ydist_extra, ytmp3
+ vpsadbw ytmp2, ytmp2, ytmp3
+ vpsubd ydist_extra, ydist_extra, ytmp2
+ vpsllq ytmp2, ytmp2, 32
+ vpor ydist_extra, ydist_extra, ytmp2
+ vpcmpgtb ytmp3, ydist_extra, ytmp3
+ vpand ydist_extra, ydist_extra, ytmp3
+
+ vpsllvd ycode, yones, ydist_extra
+ vpsubd ycode, ycode, yones
+ vpcmpgtd ytmp2, ydists, yones
+ vpand ycode, ydists, ycode
+ vpand ycode, ycode, ytmp2
+ vpsrlvd ydists, ydists, ydist_extra
+ vpslld ydist_extra, ydist_extra, 1
+ vpaddd ydists, ydists, ydist_extra
+ vpslld ycode, ycode, EXTRA_BITS_OFFSET - DIST_OFFSET
+ vpaddd ydists, ydists, ycode
+
+;; Setup ydists for combining with ylens
+ vpslld ydists, ydists, DIST_OFFSET
+
+;; xor current data with lookback dist
+ vpxor ylens1, ylens1, ylookup
+ vpxor ylens2, ylens2, ylookup2
+
+;; Compute match length
+ vpxor ytmp, ytmp, ytmp
+ vpcmpeqb ylens1, ylens1, ytmp
+ vpcmpeqb ylens2, ylens2, ytmp
+ vpbroadcastq yshift_finish, [shift_finish]
+ vpand ylens1, ylens1, yshift_finish
+ vpand ylens2, ylens2, yshift_finish
+ vpsadbw ylens1, ylens1, ytmp
+ vpsadbw ylens2, ylens2, ytmp
+ vmovdqu ydownconvert_qd, [downconvert_qd]
+ vpshufb ylens1, ylens1, ydownconvert_qd
+ vextracti128 ytmp %+ x, ylens1, 1
+ vpor ylens1, ylens1, ytmp
+ vpshufb ylens2, ylens2, ydownconvert_qd
+ vextracti128 ytmp %+ x, ylens2, 1
+ vpor ylens2, ylens2, ytmp
+ vinserti128 ylens1, ylens1, ylens2 %+ x, 1
+ vpbroadcastd ytmp, [low_nibble]
+ vpsrld ylens2, ylens1, 4
+ vpand ylens1, ylens1, ytmp
+ vbroadcasti128 ytmp, [match_cnt_perm]
+ vpbroadcastd ytmp2, [match_cnt_low_max]
+ vpshufb ylens1, ytmp, ylens1
+ vpshufb ylens2, ytmp, ylens2
+ vpcmpeqb ytmp, ylens1, ytmp2
+ vpand ylens2, ylens2, ytmp
+ vpaddd ylens1, ylens1, ylens2
+
+;; Zero out matches which should not be taken
+ vmovdqu yrot_left, [drot_left]
+ vpermd ylens2, yrot_left, ylens1
+ vpermd ydists, yrot_left, ydists
+
+ vpinsrd ytmp %+ x, ylens2 %+ x, prev_len %+ d, 0
+ vinserti128 ylens2, ylens2, ytmp %+ x, 0
+
+ vpinsrd ytmp %+ x, ydists %+ x, prev_dist %+ d, 0
+ vinserti128 ydists, ydists, ytmp %+ x, 0
+
+ vpbroadcastd ytmp, [shortest_matches]
+ vpcmpgtd ytmp, ylens2, ytmp
+ vpcmpgtd ytmp2, ylens1, ylens2
+
+ vpcmpeqd ytmp3, ytmp3, ytmp3
+ vpxor ytmp, ytmp, ytmp3
+ vpor ytmp, ytmp, ytmp2
+
+ vpandn ylens1, ytmp, ylens2
+
+;; Update zdists to match ylens1
+ vpbroadcastd ytmp2, [twofiftyfour]
+ vpaddd ydists, ydists, ylens1
+ vpaddd ydists, ydists, ytmp2
+
+ vpbroadcastd ynull_syms, [null_dist_syms]
+ vpmovzxbd ytmp3, [f_i + file_start - 1]
+ vpaddd ytmp3, ynull_syms
+ vpand ytmp3, ytmp3, ytmp
+ vpandn ydists, ytmp, ydists
+ vpor ydists, ydists, ytmp3
+
+;;Store ydists
+ vmovdqu [matches_next], ydists
+ add f_i, VECT_SIZE
+
+end_main:
+ sub f_i, f_i_orig
+ sub f_i, 1
+
+%ifnidn f_i, rax
+ mov rax, f_i
+%endif
+ FUNC_RESTORE
+ ret
+
+endproc_frame
+
+section .data
+align 32
+;; 32 byte data
+datas_perm2:
+ dd 0x1, 0x2, 0x3, 0x4, 0x1, 0x2, 0x3, 0x4
+drot_left:
+ dd 0x7, 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6
+datas_shuf:
+ db 0x0, 0x1, 0x2, 0x3
+ db 0x1, 0x2, 0x3, 0x4
+ db 0x2, 0x3, 0x4, 0x5
+ db 0x3, 0x4, 0x5, 0x6
+ db 0x4, 0x5, 0x6, 0x7
+ db 0x5, 0x6, 0x7, 0x8
+ db 0x6, 0x7, 0x8, 0x9
+ db 0x7, 0x8, 0x9, 0xa
+qword_shuf:
+ db 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7
+ db 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8
+ db 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9
+ db 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa
+increment:
+ dd 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7
+downconvert_qd:
+ db 0x00, 0xff, 0xff, 0xff, 0x08, 0xff, 0xff, 0xff
+ db 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
+ db 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
+ db 0x00, 0xff, 0xff, 0xff, 0x08, 0xff, 0xff, 0xff
+
+;; 16 byte data
+match_cnt_perm:
+ db 0x0, 0x1, 0x0, 0x2, 0x0, 0x1, 0x0, 0x3, 0x0, 0x1, 0x0, 0x2, 0x0, 0x1, 0x0, 0x4
+bit_index:
+ db 0x0, 0x1, 0x2, 0x2, 0x3, 0x3, 0x3, 0x3
+ db 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4
+nibble_order:
+ db 0x0, 0x2, 0x1, 0x3, 0x4, 0x6, 0x5, 0x7
+ db 0x8, 0xa, 0x9, 0xb, 0xc, 0xe, 0xd, 0xf
+
+;; 8 byte data
+shift_finish:
+ db 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80
+
+;; 4 byte data
+ones:
+ dd 0x1
+%define PROD1 0xE84B
+%define PROD2 0x97B1
+hash_prod:
+ dw PROD1, PROD2
+null_dist_syms:
+ dd LIT
+twofiftyfour:
+ dd 0xfe
+shortest_matches:
+ dd MIN_DEF_MATCH
+upper_word:
+ dw 0x0000, 0xffff
+low_word:
+ dw 0xffff, 0x0000
+low_nibble:
+ db 0x0f, 0x0f, 0x0f, 0x0f
+match_cnt_low_max:
+ dd 0x4
+base_offset:
+ db -0x2, 0x2, 0x6, 0xa
diff --git a/src/isa-l/igzip/igzip_gen_icf_map_lh1_06.asm b/src/isa-l/igzip/igzip_gen_icf_map_lh1_06.asm
new file mode 100644
index 000000000..7985ab51e
--- /dev/null
+++ b/src/isa-l/igzip/igzip_gen_icf_map_lh1_06.asm
@@ -0,0 +1,581 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright(c) 2011-2018 Intel Corporation All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in
+; the documentation and/or other materials provided with the
+; distribution.
+; * Neither the name of Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived
+; from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+%include "reg_sizes.asm"
+%include "lz0a_const.asm"
+%include "data_struct2.asm"
+%include "huffman.asm"
+
+
+%define USE_HSWNI
+%define ARCH 06
+
+%ifdef HAVE_AS_KNOWS_AVX512
+%ifidn __OUTPUT_FORMAT__, win64
+%define arg1 rcx
+%define arg2 rdx
+%define arg3 r8
+%define hash rsi
+%define next_in rdi
+%else
+%define arg1 rdi
+%define arg2 rsi
+%define arg3 rdx
+%define hash r8
+%define next_in rcx
+%endif
+
+%define stream arg1
+%define level_buf arg1
+%define matches_next arg2
+%define f_i_end arg3
+
+%define f_i rax
+%define file_start rbp
+%define tmp r9
+%define tmp2 r10
+%define prev_len r11
+%define prev_dist r12
+%define f_i_orig r13
+
+%define hash_table level_buf + _hash_map_hash_table
+
+%define datas zmm0
+%define datas_lookup zmm1
+%define zhashes zmm2
+%define zdists zmm3
+%define zdists_lookup zmm4
+%define zscatter zmm5
+%define zdists2 zmm6
+%define zlens1 zmm7
+%define zlens2 zmm8
+%define zlookup zmm9
+%define zlookup2 zmm10
+%define match_lookups zmm11
+%define zindex zmm12
+%define zdist_extra zmm13
+%define zdists_tmp zmm14
+%define znull_dist_syms zmm15
+%define zcode zmm16
+%define zthirty zmm17
+%define zdist_mask zmm18
+%define zshortest_matches zmm19
+%define zrot_left zmm20
+%define zdatas_perm zmm21
+%define zdatas_perm2 zmm22
+%define zdatas_perm3 zmm23
+%define zdatas_shuf zmm24
+%define zhash_prod zmm25
+%define zhash_mask zmm26
+%define zincrement zmm27
+%define zqword_shuf zmm28
+%define zones zmm29
+%define ztwofiftyfour zmm30
+%define zbswap zmm31
+
+%ifidn __OUTPUT_FORMAT__, win64
+%define stack_size 10*16 + 6 * 8 + 8
+%define func(x) proc_frame x
+
+%macro FUNC_SAVE 0
+ alloc_stack stack_size
+ vmovdqa [rsp + 0*16], xmm6
+ vmovdqa [rsp + 1*16], xmm7
+ vmovdqa [rsp + 2*16], xmm8
+ vmovdqa [rsp + 3*16], xmm9
+ vmovdqa [rsp + 4*16], xmm10
+ vmovdqa [rsp + 5*16], xmm11
+ vmovdqa [rsp + 6*16], xmm12
+ vmovdqa [rsp + 7*16], xmm13
+ vmovdqu [rsp + 8*16], xmm14
+ vmovdqa [rsp + 9*16], xmm15
+ save_reg rsi, 10*16 + 0*8
+ save_reg rdi, 10*16 + 1*8
+ save_reg rbp, 10*16 + 2*8
+ save_reg r12, 10*16 + 3*8
+ save_reg r13, 10*16 + 4*8
+ end_prolog
+%endm
+
+%macro FUNC_RESTORE 0
+ vmovdqa xmm6, [rsp + 0*16]
+ vmovdqa xmm7, [rsp + 1*16]
+ vmovdqa xmm8, [rsp + 2*16]
+ vmovdqa xmm9, [rsp + 3*16]
+ vmovdqa xmm10, [rsp + 4*16]
+ vmovdqa xmm11, [rsp + 5*16]
+ vmovdqa xmm12, [rsp + 6*16]
+ vmovdqa xmm13, [rsp + 7*16]
+ vmovdqa xmm14, [rsp + 8*16]
+ vmovdqa xmm15, [rsp + 9*16]
+
+ mov rsi, [rsp + 10*16 + 0*8]
+ mov rdi, [rsp + 10*16 + 1*8]
+ mov rbp, [rsp + 10*16 + 2*8]
+ mov r12, [rsp + 10*16 + 3*8]
+ mov r13, [rsp + 10*16 + 4*8]
+ add rsp, stack_size
+%endm
+%else
+%define func(x) x: endbranch
+%macro FUNC_SAVE 0
+ push rbp
+ push r12
+ push r13
+%endm
+
+%macro FUNC_RESTORE 0
+ pop r13
+ pop r12
+ pop rbp
+%endm
+%endif
+
+%define VECT_SIZE 16
+%define HASH_BYTES 2
+
+[bits 64]
+default rel
+section .text
+
+global gen_icf_map_lh1_06
+func(gen_icf_map_lh1_06)
+ endbranch
+ FUNC_SAVE
+
+ mov file_start, [stream + _next_in]
+ mov f_i %+ d, dword [stream + _total_in]
+ mov f_i_orig, f_i
+
+ sub file_start, f_i
+ add f_i_end, f_i
+ cmp f_i, f_i_end
+ jge end_main
+
+;; Prep for main loop
+ vpbroadcastd zdist_mask, dword [stream + _internal_state_dist_mask]
+ vpbroadcastd zhash_mask, dword [stream + _internal_state_hash_mask]
+ mov tmp, stream
+ mov level_buf, [stream + _level_buf]
+ sub f_i_end, LA
+ vmovdqu64 zdatas_perm, [datas_perm]
+ vbroadcasti32x8 zdatas_shuf, [datas_shuf]
+ vpbroadcastd zhash_prod, [hash_prod]
+ vmovdqu64 zincrement, [increment]
+ vmovdqu64 zqword_shuf, [qword_shuf]
+ vbroadcasti64x2 zdatas_perm2, [datas_perm2]
+ vbroadcasti64x2 zdatas_perm3, [datas_perm3]
+ vpbroadcastd zones, [ones]
+ vbroadcasti32x4 zbswap, [bswap_shuf]
+ vpbroadcastd zthirty, [thirty]
+ vmovdqu64 zrot_left, [drot_left]
+ vpbroadcastd zshortest_matches, [shortest_matches]
+ vpbroadcastd ztwofiftyfour, [twofiftyfour]
+ vpbroadcastd znull_dist_syms, [null_dist_syms]
+ kxorq k0, k0, k0
+ kmovq k1, [k_mask_1]
+ kmovq k2, [k_mask_2]
+
+;; Process first byte
+ vmovd zhashes %+ x, dword [f_i + file_start]
+ vpmaddwd zhashes, zhashes, zhash_prod
+ vpmaddwd zhashes, zhashes, zhash_prod
+ vpandd zhashes, zhashes, zhash_mask
+ vmovd hash %+ d, zhashes %+ x
+
+ cmp byte [tmp + _internal_state_has_hist], IGZIP_NO_HIST
+ jne .has_hist
+ ;; No history, the byte is a literal
+ xor prev_len, prev_len
+ xor prev_dist, prev_dist
+ mov byte [tmp + _internal_state_has_hist], IGZIP_HIST
+ jmp .byte_processed
+
+.has_hist:
+ ;; History exists, need to set prev_len and prev_dist accordingly
+ lea next_in, [f_i + file_start]
+
+ ;; Determine match lookback distance
+ xor tmp, tmp
+ mov tmp %+ w, f_i %+ w
+ dec tmp
+ sub tmp %+ w, word [hash_table + HASH_BYTES * hash]
+
+ vmovd tmp2 %+ d, zdist_mask %+ x
+ and tmp %+ d, tmp2 %+ d
+ neg tmp
+
+ ;; Check first 8 bytes of match
+ mov prev_len, [next_in]
+ xor prev_len, [next_in + tmp - 1]
+ neg tmp
+
+ ;; Set prev_dist
+%ifidn arg1, rcx
+ mov tmp2, rcx
+%endif
+ ;; The third register is unused on Haswell and later,
+ ;; This line will not work on previous architectures
+ get_dist_icf_code tmp, prev_dist, tmp
+
+%ifidn arg1, rcx
+ mov rcx, tmp2
+%endif
+
+ ;; Set prev_len
+ xor tmp2, tmp2
+ tzcnt prev_len, prev_len
+ shr prev_len, 3
+ cmp prev_len, MIN_DEF_MATCH
+ cmovl prev_len, tmp2
+
+.byte_processed:
+ mov word [hash_table + HASH_BYTES * hash], f_i %+ w
+
+ add f_i, 1
+ cmp f_i, f_i_end
+ jg end_main
+
+;;hash
+ vmovdqu64 datas %+ y, [f_i + file_start]
+ vpermq zhashes, zdatas_perm, datas
+ vpshufb zhashes, zhashes, zdatas_shuf
+ vpmaddwd zhashes, zhashes, zhash_prod
+ vpmaddwd zhashes, zhashes, zhash_prod
+ vpandd zhashes, zhashes, zhash_mask
+
+ vpermq zlookup, zdatas_perm2, datas
+ vpshufb zlookup, zlookup, zqword_shuf
+ vpermq zlookup2, zdatas_perm3, datas
+ vpshufb zlookup2, zlookup2, zqword_shuf
+
+;;gather/scatter hashes
+ knotq k6, k0
+ vpgatherdd zdists_lookup {k6}, [hash_table + HASH_BYTES * zhashes]
+
+ vpbroadcastd zindex, f_i %+ d
+ vpaddd zindex, zindex, zincrement
+ vpblendmw zscatter {k1}, zindex, zdists_lookup
+
+ knotq k6, k0
+ vpscatterdd [hash_table + HASH_BYTES * zhashes] {k6}, zscatter
+
+;; Compute hash for next loop
+ vmovdqu64 datas %+ y, [f_i + file_start + VECT_SIZE]
+ vpermq zhashes, zdatas_perm, datas
+ vpshufb zhashes, zhashes, zdatas_shuf
+ vpmaddwd zhashes, zhashes, zhash_prod
+ vpmaddwd zhashes, zhashes, zhash_prod
+ vpandd zhashes, zhashes, zhash_mask
+
+ vmovdqu64 datas_lookup %+ y, [f_i + file_start + 2 * VECT_SIZE]
+
+ sub f_i_end, VECT_SIZE
+ cmp f_i, f_i_end
+ jg .loop1_end
+
+.loop1:
+ lea next_in, [f_i + file_start]
+
+;; Calculate look back dists
+ vpaddd zdists, zdists_lookup, zones
+ vpsubd zdists, zindex, zdists
+ vpandd zdists, zdists, zdist_mask
+ vpaddd zdists, zdists, zones
+ vpsubd zdists, zincrement, zdists
+
+;;gather/scatter hashes
+ add f_i, VECT_SIZE
+
+ kxnorq k6, k6, k6
+ kxnorq k7, k7, k7
+ vpgatherdd zdists_lookup {k6}, [hash_table + HASH_BYTES * zhashes]
+
+ vpbroadcastd zindex, f_i %+ d
+ vpaddd zindex, zindex, zincrement
+ vpblendmw zscatter {k1}, zindex, zdists_lookup
+
+ vpscatterdd [hash_table + HASH_BYTES * zhashes] {k7}, zscatter
+
+;; Compute hash for next loop
+ vpermq zhashes, zdatas_perm, datas_lookup
+ vpshufb zhashes, zhashes, zdatas_shuf
+ vpmaddwd zhashes, zhashes, zhash_prod
+ vpmaddwd zhashes, zhashes, zhash_prod
+ vpandd zhashes, zhashes, zhash_mask
+
+;;lookup old codes
+ vextracti32x8 zdists2 %+ y, zdists, 1
+ kxnorq k6, k6, k6
+ kxnorq k7, k7, k7
+ vpgatherdq zlens1 {k6}, [next_in + zdists %+ y]
+ vpgatherdq zlens2 {k7}, [next_in + zdists2 %+ y]
+
+;; Calculate dist_icf_code
+ vpaddd zdists, zdists, zones
+ vpsubd zdists, zincrement, zdists
+ vpcmpgtd k5, zdists, zones
+ vplzcntd zdist_extra, zdists
+ vpsubd zdist_extra {k5}{z}, zthirty, zdist_extra
+ vpsllvd zcode, zones, zdist_extra
+ vpsubd zcode, zcode, zones
+ vpandd zcode {k5}{z}, zdists, zcode
+ vpsrlvd zdists, zdists, zdist_extra
+ vpslld zdist_extra, zdist_extra, 1
+ vpaddd zdists, zdists, zdist_extra
+ vpslld zcode, zcode, EXTRA_BITS_OFFSET - DIST_OFFSET
+ vpaddd zdists, zdists, zcode
+
+;; Setup zdists for combining with zlens
+ vpslld zdists, zdists, DIST_OFFSET
+
+;; xor current data with lookback dist
+ vpxorq zlens1, zlens1, zlookup
+ vpxorq zlens2, zlens2, zlookup2
+
+;; Setup registers for next loop
+ vpermq zlookup, zdatas_perm2, datas
+ vpshufb zlookup, zlookup, zqword_shuf
+ vpermq zlookup2, zdatas_perm3, datas
+ vpshufb zlookup2, zlookup2, zqword_shuf
+
+;; Compute match length
+ vpshufb zlens1, zlens1, zbswap
+ vpshufb zlens2, zlens2, zbswap
+ vplzcntq zlens1, zlens1
+ vplzcntq zlens2, zlens2
+ vpmovqd zlens1 %+ y, zlens1
+ vpmovqd zlens2 %+ y, zlens2
+ vinserti32x8 zlens1, zlens2 %+ y, 1
+ vpsrld zlens1, zlens1, 3
+
+;; Preload for next loops
+ vmovdqu64 datas, datas_lookup
+ vmovdqu64 datas_lookup %+ y, [f_i + file_start + 2 * VECT_SIZE]
+
+;; Zero out matches which should not be taken
+ kshiftrw k3, k1, 15
+ vpermd zlens2, zrot_left, zlens1
+ vpermd zdists, zrot_left, zdists
+
+ vmovd zdists_tmp %+ x, prev_len %+ d
+ vmovd prev_len %+ d, zlens2 %+ x
+ vmovdqu32 zlens2 {k3}, zdists_tmp
+
+ vmovd zdists_tmp %+ x, prev_dist %+ d
+ vmovd prev_dist %+ d, zdists %+ x
+ vmovdqu32 zdists {k3}, zdists_tmp
+
+ vpcmpgtd k3, zlens2, zshortest_matches
+ vpcmpgtd k4, zlens1, zlens2
+
+ knotq k3, k3
+ korq k3, k3, k4
+ knotq k4, k3
+ vmovdqu32 zlens1 {k4}{z}, zlens2
+
+;; Update zdists to match zlens1
+ vpaddd zdists, zdists, zlens1
+ vpaddd zdists, zdists, ztwofiftyfour
+ vpmovzxbd zdists {k3}, [f_i + file_start - VECT_SIZE - 1]
+ vpaddd zdists {k3}, zdists, znull_dist_syms
+
+;;Store zdists
+ vmovdqu64 [matches_next], zdists
+ add matches_next, ICF_CODE_BYTES * VECT_SIZE
+
+ cmp f_i, f_i_end
+ jle .loop1
+
+.loop1_end:
+ lea next_in, [f_i + file_start]
+
+;; Calculate look back dists
+ vpaddd zdists, zdists_lookup, zones
+ vpsubd zdists, zindex, zdists
+ vpandd zdists, zdists, zdist_mask
+ vpaddd zdists, zdists, zones
+ vpsubd zdists, zincrement, zdists
+
+;;lookup old codes
+ vextracti32x8 zdists2 %+ y, zdists, 1
+ kxnorq k6, k6, k6
+ kxnorq k7, k7, k7
+ vpgatherdq zlens1 {k6}, [next_in + zdists %+ y]
+ vpgatherdq zlens2 {k7}, [next_in + zdists2 %+ y]
+
+;; Restore last update hash value
+ vextracti32x4 zdists2 %+ x, zdists, 3
+ vpextrd tmp %+ d, zdists2 %+ x, 3
+ add tmp %+ d, f_i %+ d
+
+ vmovd zhashes %+ x, dword [f_i + file_start + VECT_SIZE - 1]
+ vpmaddwd zhashes %+ x, zhashes %+ x, zhash_prod %+ x
+ vpmaddwd zhashes %+ x, zhashes %+ x, zhash_prod %+ x
+ vpandd zhashes %+ x, zhashes %+ x, zhash_mask %+ x
+ vmovd hash %+ d, zhashes %+ x
+
+ mov word [hash_table + HASH_BYTES * hash], tmp %+ w
+
+;; Calculate dist_icf_code
+ vpaddd zdists, zdists, zones
+ vpsubd zdists, zincrement, zdists
+ vpcmpgtd k5, zdists, zones
+ vplzcntd zdist_extra, zdists
+ vpsubd zdist_extra {k5}{z}, zthirty, zdist_extra
+ vpsllvd zcode, zones, zdist_extra
+ vpsubd zcode, zcode, zones
+ vpandd zcode {k5}{z}, zdists, zcode
+ vpsrlvd zdists, zdists, zdist_extra
+ vpslld zdist_extra, zdist_extra, 1
+ vpaddd zdists, zdists, zdist_extra
+ vpslld zcode, zcode, EXTRA_BITS_OFFSET - DIST_OFFSET
+ vpaddd zdists, zdists, zcode
+
+;; Setup zdists for combining with zlens
+ vpslld zdists, zdists, DIST_OFFSET
+
+;; xor current data with lookback dist
+ vpxorq zlens1, zlens1, zlookup
+ vpxorq zlens2, zlens2, zlookup2
+
+;; Compute match length
+ vpshufb zlens1, zlens1, zbswap
+ vpshufb zlens2, zlens2, zbswap
+ vplzcntq zlens1, zlens1
+ vplzcntq zlens2, zlens2
+ vpmovqd zlens1 %+ y, zlens1
+ vpmovqd zlens2 %+ y, zlens2
+ vinserti32x8 zlens1, zlens2 %+ y, 1
+ vpsrld zlens1, zlens1, 3
+
+;; Zero out matches which should not be taken
+ kshiftrw k3, k1, 15
+ vpermd zlens2, zrot_left, zlens1
+ vpermd zdists, zrot_left, zdists
+
+ vmovd zdists_tmp %+ x, prev_len %+ d
+ vmovd prev_len %+ d, zlens2 %+ x
+ vmovdqu32 zlens2 {k3}, zdists_tmp
+
+ vmovd zdists_tmp %+ x, prev_dist %+ d
+ vmovd prev_dist %+ d, zdists %+ x
+ vmovdqu32 zdists {k3}, zdists_tmp
+
+ vpcmpgtd k3, zlens2, zshortest_matches
+ vpcmpgtd k4, zlens1, zlens2
+
+ knotq k3, k3
+ korq k3, k3, k4
+ knotq k4, k3
+ vmovdqu32 zlens1 {k4}{z}, zlens2
+
+;; Update zdists to match zlens1
+ vpaddd zdists, zdists, zlens1
+ vpaddd zdists, zdists, ztwofiftyfour
+ vpmovzxbd zdists {k3}, [f_i + file_start - 1]
+ vpaddd zdists {k3}, zdists, znull_dist_syms
+
+;;Store zdists
+ vmovdqu64 [matches_next], zdists
+ add f_i, VECT_SIZE
+
+end_main:
+ sub f_i, f_i_orig
+ sub f_i, 1
+%ifnidn f_i, rax
+ mov rax, f_i
+%endif
+ FUNC_RESTORE
+ ret
+
+endproc_frame
+
+section .data
+align 64
+;; 64 byte data
+datas_perm:
+ dq 0x0, 0x1, 0x0, 0x1, 0x1, 0x2, 0x1, 0x2
+drot_left:
+ dd 0xf, 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6
+ dd 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe
+qword_shuf:
+ db 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7
+ db 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8
+ db 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9
+ db 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa
+ db 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb
+ db 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc
+ db 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd
+ db 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe
+ db 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf
+datas_shuf:
+ db 0x0, 0x1, 0x2, 0x3
+ db 0x1, 0x2, 0x3, 0x4
+ db 0x2, 0x3, 0x4, 0x5
+ db 0x3, 0x4, 0x5, 0x6
+ db 0x4, 0x5, 0x6, 0x7
+ db 0x5, 0x6, 0x7, 0x8
+ db 0x6, 0x7, 0x8, 0x9
+ db 0x7, 0x8, 0x9, 0xa
+increment:
+ dd 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7
+ dd 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf
+
+;; 16 byte data
+datas_perm2:
+ dq 0x0, 0x1
+datas_perm3:
+ dq 0x1, 0x2
+bswap_shuf:
+ db 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00
+ db 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08
+;; 8 byte data
+k_mask_1:
+ dq 0xaaaaaaaaaaaaaaaa
+k_mask_2:
+ dq 0x7fff
+;; 4 byte data
+null_dist_syms:
+ dd LIT
+%define PROD1 0xE84B
+%define PROD2 0x97B1
+hash_prod:
+ dw PROD1, PROD2
+ones:
+ dd 0x1
+thirty:
+ dd 0x1e
+twofiftyfour:
+ dd 0xfe
+lit_len_mask:
+ dd LIT_LEN_MASK
+shortest_matches:
+ dd MIN_DEF_MATCH
+%endif
diff --git a/src/isa-l/igzip/igzip_hist_perf.c b/src/isa-l/igzip/igzip_hist_perf.c
new file mode 100644
index 000000000..bc8c935aa
--- /dev/null
+++ b/src/isa-l/igzip/igzip_hist_perf.c
@@ -0,0 +1,129 @@
+/**********************************************************************
+ Copyright(c) 2011-2016 Intel Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+#define _FILE_OFFSET_BITS 64
+#include <stdio.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <string.h>
+#include "igzip_lib.h"
+#include "test.h"
+
+#define BUF_SIZE 1024
+#define MIN_TEST_LOOPS 8
+#ifndef RUN_MEM_SIZE
+# define RUN_MEM_SIZE 2000000000
+#endif
+
+void print_histogram(struct isal_huff_histogram *histogram)
+{
+ int i;
+ printf("Lit Len histogram");
+ for (i = 0; i < ISAL_DEF_LIT_LEN_SYMBOLS; i++) {
+ if (i % 16 == 0)
+ printf("\n");
+ else
+ printf(", ");
+ printf("%4lu", histogram->lit_len_histogram[i]);
+ }
+ printf("\n");
+
+ printf("Dist histogram");
+ for (i = 0; i < ISAL_DEF_DIST_SYMBOLS; i++) {
+ if (i % 16 == 0)
+ printf("\n");
+ else
+ printf(", ");
+ printf("%4lu", histogram->dist_histogram[i]);
+ }
+ printf("\n");
+}
+
+int main(int argc, char *argv[])
+{
+ FILE *in;
+ unsigned char *inbuf;
+ int iterations, avail_in;
+ uint64_t infile_size;
+ struct isal_huff_histogram histogram1, histogram2;
+
+ memset(&histogram1, 0, sizeof(histogram1));
+ memset(&histogram2, 0, sizeof(histogram2));
+
+ if (argc > 3 || argc < 2) {
+ fprintf(stderr, "Usage: igzip_file_perf infile [outfile]\n"
+ "\t - Runs multiple iterations of igzip on a file to "
+ "get more accurate time results.\n");
+ exit(0);
+ }
+ in = fopen(argv[1], "rb");
+ if (!in) {
+ fprintf(stderr, "Can't open %s for reading\n", argv[1]);
+ exit(0);
+ }
+
+ /* Allocate space for entire input file and output
+ * (assuming some possible expansion on output size)
+ */
+ infile_size = get_filesize(in);
+
+ if (infile_size != 0)
+ iterations = RUN_MEM_SIZE / infile_size;
+ else
+ iterations = MIN_TEST_LOOPS;
+
+ if (iterations < MIN_TEST_LOOPS)
+ iterations = MIN_TEST_LOOPS;
+
+ inbuf = malloc(infile_size);
+ if (inbuf == NULL) {
+ fprintf(stderr, "Can't allocate input buffer memory\n");
+ exit(0);
+ }
+
+ avail_in = fread(inbuf, 1, infile_size, in);
+ if (avail_in != infile_size) {
+ free(inbuf);
+ fprintf(stderr, "Couldn't fit all of input file into buffer\n");
+ exit(0);
+ }
+
+ struct perf start;
+ BENCHMARK(&start, BENCHMARK_TIME,
+ isal_update_histogram(inbuf, infile_size, &histogram1));
+ printf(" file %s - in_size=%lu\n", argv[1], infile_size);
+ printf("igzip_hist_file: ");
+ perf_print(start, (long long)infile_size);
+
+ fclose(in);
+ fflush(0);
+ free(inbuf);
+
+ return 0;
+}
diff --git a/src/isa-l/igzip/igzip_icf_base.c b/src/isa-l/igzip/igzip_icf_base.c
new file mode 100644
index 000000000..1f031796d
--- /dev/null
+++ b/src/isa-l/igzip/igzip_icf_base.c
@@ -0,0 +1,370 @@
+#include <stdint.h>
+#include "igzip_lib.h"
+#include "huffman.h"
+#include "huff_codes.h"
+#include "encode_df.h"
+#include "igzip_level_buf_structs.h"
+#include "unaligned.h"
+
+static inline void write_deflate_icf(struct deflate_icf *icf, uint32_t lit_len,
+ uint32_t lit_dist, uint32_t extra_bits)
+{
+ icf->lit_len = lit_len;
+ icf->lit_dist = lit_dist;
+ icf->dist_extra = extra_bits;
+}
+
+static inline void update_state(struct isal_zstream *stream, uint8_t * start_in,
+ uint8_t * next_in, uint8_t * end_in,
+ struct deflate_icf *start_out, struct deflate_icf *next_out,
+ struct deflate_icf *end_out)
+{
+ struct level_buf *level_buf = (struct level_buf *)stream->level_buf;
+
+ if (next_in - start_in > 0)
+ stream->internal_state.has_hist = IGZIP_HIST;
+
+ stream->next_in = next_in;
+ stream->total_in += next_in - start_in;
+ stream->internal_state.block_end = stream->total_in;
+ stream->avail_in = end_in - next_in;
+
+ level_buf->icf_buf_next = next_out;
+ level_buf->icf_buf_avail_out = end_out - next_out;
+}
+
+void isal_deflate_icf_body_hash_hist_base(struct isal_zstream *stream)
+{
+ uint32_t literal, hash;
+ uint8_t *start_in, *next_in, *end_in, *end, *next_hash;
+ struct deflate_icf *start_out, *next_out, *end_out;
+ uint16_t match_length;
+ uint32_t dist;
+ uint32_t code, code2, extra_bits;
+ struct isal_zstate *state = &stream->internal_state;
+ struct level_buf *level_buf = (struct level_buf *)stream->level_buf;
+ uint16_t *last_seen = level_buf->hash_hist.hash_table;
+ uint8_t *file_start = (uint8_t *) ((uintptr_t) stream->next_in - stream->total_in);
+ uint32_t hist_size = state->dist_mask;
+ uint32_t hash_mask = state->hash_mask;
+
+ if (stream->avail_in == 0) {
+ if (stream->end_of_stream || stream->flush != NO_FLUSH)
+ state->state = ZSTATE_FLUSH_READ_BUFFER;
+ return;
+ }
+
+ start_in = stream->next_in;
+ end_in = start_in + stream->avail_in;
+ next_in = start_in;
+
+ start_out = ((struct level_buf *)stream->level_buf)->icf_buf_next;
+ end_out =
+ start_out + ((struct level_buf *)stream->level_buf)->icf_buf_avail_out /
+ sizeof(struct deflate_icf);
+ next_out = start_out;
+
+ while (next_in + ISAL_LOOK_AHEAD < end_in) {
+
+ if (next_out >= end_out) {
+ state->state = ZSTATE_CREATE_HDR;
+ update_state(stream, start_in, next_in, end_in, start_out, next_out,
+ end_out);
+ return;
+ }
+
+ literal = load_u32(next_in);
+ hash = compute_hash(literal) & hash_mask;
+ dist = (next_in - file_start - last_seen[hash]) & 0xFFFF;
+ last_seen[hash] = (uint64_t) (next_in - file_start);
+
+ /* The -1 are to handle the case when dist = 0 */
+ if (dist - 1 < hist_size) {
+ assert(dist != 0);
+
+ match_length = compare258(next_in - dist, next_in, 258);
+
+ if (match_length >= SHORTEST_MATCH) {
+ next_hash = next_in;
+#ifdef ISAL_LIMIT_HASH_UPDATE
+ end = next_hash + 3;
+#else
+ end = next_hash + match_length;
+#endif
+ next_hash++;
+
+ for (; next_hash < end; next_hash++) {
+ literal = load_u32(next_hash);
+ hash = compute_hash(literal) & hash_mask;
+ last_seen[hash] = (uint64_t) (next_hash - file_start);
+ }
+
+ get_len_icf_code(match_length, &code);
+ get_dist_icf_code(dist, &code2, &extra_bits);
+
+ level_buf->hist.ll_hist[code]++;
+ level_buf->hist.d_hist[code2]++;
+
+ write_deflate_icf(next_out, code, code2, extra_bits);
+ next_out++;
+ next_in += match_length;
+
+ continue;
+ }
+ }
+
+ get_lit_icf_code(literal & 0xFF, &code);
+ level_buf->hist.ll_hist[code]++;
+ write_deflate_icf(next_out, code, NULL_DIST_SYM, 0);
+ next_out++;
+ next_in++;
+ }
+
+ update_state(stream, start_in, next_in, end_in, start_out, next_out, end_out);
+
+ assert(stream->avail_in <= ISAL_LOOK_AHEAD);
+ if (stream->end_of_stream || stream->flush != NO_FLUSH)
+ state->state = ZSTATE_FLUSH_READ_BUFFER;
+
+ return;
+
+}
+
+void isal_deflate_icf_finish_hash_hist_base(struct isal_zstream *stream)
+{
+ uint32_t literal = 0, hash;
+ uint8_t *start_in, *next_in, *end_in, *end, *next_hash;
+ struct deflate_icf *start_out, *next_out, *end_out;
+ uint16_t match_length;
+ uint32_t dist;
+ uint32_t code, code2, extra_bits;
+ struct isal_zstate *state = &stream->internal_state;
+ struct level_buf *level_buf = (struct level_buf *)stream->level_buf;
+ uint16_t *last_seen = level_buf->hash_hist.hash_table;
+ uint8_t *file_start = (uint8_t *) ((uintptr_t) stream->next_in - stream->total_in);
+ uint32_t hist_size = state->dist_mask;
+ uint32_t hash_mask = state->hash_mask;
+
+ start_in = stream->next_in;
+ end_in = start_in + stream->avail_in;
+ next_in = start_in;
+
+ start_out = ((struct level_buf *)stream->level_buf)->icf_buf_next;
+ end_out = start_out + ((struct level_buf *)stream->level_buf)->icf_buf_avail_out /
+ sizeof(struct deflate_icf);
+ next_out = start_out;
+
+ if (stream->avail_in == 0) {
+ if (stream->end_of_stream || stream->flush != NO_FLUSH)
+ state->state = ZSTATE_CREATE_HDR;
+ return;
+ }
+
+ while (next_in + 3 < end_in) {
+ if (next_out >= end_out) {
+ state->state = ZSTATE_CREATE_HDR;
+ update_state(stream, start_in, next_in, end_in, start_out, next_out,
+ end_out);
+ return;
+ }
+
+ literal = load_u32(next_in);
+ hash = compute_hash(literal) & hash_mask;
+ dist = (next_in - file_start - last_seen[hash]) & 0xFFFF;
+ last_seen[hash] = (uint64_t) (next_in - file_start);
+
+ if (dist - 1 < hist_size) { /* The -1 are to handle the case when dist = 0 */
+ match_length = compare258(next_in - dist, next_in, end_in - next_in);
+
+ if (match_length >= SHORTEST_MATCH) {
+ next_hash = next_in;
+#ifdef ISAL_LIMIT_HASH_UPDATE
+ end = next_hash + 3;
+#else
+ end = next_hash + match_length;
+#endif
+ next_hash++;
+
+ for (; next_hash < end - 3; next_hash++) {
+ literal = load_u32(next_hash);
+ hash = compute_hash(literal) & hash_mask;
+ last_seen[hash] = (uint64_t) (next_hash - file_start);
+ }
+
+ get_len_icf_code(match_length, &code);
+ get_dist_icf_code(dist, &code2, &extra_bits);
+
+ level_buf->hist.ll_hist[code]++;
+ level_buf->hist.d_hist[code2]++;
+
+ write_deflate_icf(next_out, code, code2, extra_bits);
+
+ next_out++;
+ next_in += match_length;
+
+ continue;
+ }
+ }
+
+ get_lit_icf_code(literal & 0xFF, &code);
+ level_buf->hist.ll_hist[code]++;
+ write_deflate_icf(next_out, code, NULL_DIST_SYM, 0);
+ next_out++;
+ next_in++;
+
+ }
+
+ while (next_in < end_in) {
+ if (next_out >= end_out) {
+ state->state = ZSTATE_CREATE_HDR;
+ update_state(stream, start_in, next_in, end_in, start_out, next_out,
+ end_out);
+ return;
+ }
+
+ literal = *next_in;
+ get_lit_icf_code(literal & 0xFF, &code);
+ level_buf->hist.ll_hist[code]++;
+ write_deflate_icf(next_out, code, NULL_DIST_SYM, 0);
+ next_out++;
+ next_in++;
+
+ }
+
+ if (next_in == end_in) {
+ if (stream->end_of_stream || stream->flush != NO_FLUSH)
+ state->state = ZSTATE_CREATE_HDR;
+ }
+
+ update_state(stream, start_in, next_in, end_in, start_out, next_out, end_out);
+
+ return;
+}
+
+void isal_deflate_icf_finish_hash_map_base(struct isal_zstream *stream)
+{
+ uint32_t literal = 0, hash;
+ uint8_t *start_in, *next_in, *end_in, *end, *next_hash;
+ struct deflate_icf *start_out, *next_out, *end_out;
+ uint16_t match_length;
+ uint32_t dist;
+ uint32_t code, code2, extra_bits;
+ struct isal_zstate *state = &stream->internal_state;
+ struct level_buf *level_buf = (struct level_buf *)stream->level_buf;
+ uint16_t *last_seen = level_buf->hash_map.hash_table;
+ uint8_t *file_start = (uint8_t *) ((uintptr_t) stream->next_in - stream->total_in);
+ uint32_t hist_size = state->dist_mask;
+ uint32_t hash_mask = state->hash_mask;
+
+ start_in = stream->next_in;
+ end_in = start_in + stream->avail_in;
+ next_in = start_in;
+
+ start_out = level_buf->icf_buf_next;
+ end_out = start_out + level_buf->icf_buf_avail_out / sizeof(struct deflate_icf);
+ next_out = start_out;
+
+ if (stream->avail_in == 0) {
+ if (stream->end_of_stream || stream->flush != NO_FLUSH)
+ state->state = ZSTATE_CREATE_HDR;
+ return;
+ }
+
+ while (next_in + 3 < end_in) {
+ if (next_out >= end_out) {
+ state->state = ZSTATE_CREATE_HDR;
+ update_state(stream, start_in, next_in, end_in, start_out, next_out,
+ end_out);
+ return;
+ }
+
+ literal = load_u32(next_in);
+ hash = compute_hash_mad(literal) & hash_mask;
+ dist = (next_in - file_start - last_seen[hash]) & 0xFFFF;
+ last_seen[hash] = (uint64_t) (next_in - file_start);
+
+ if (dist - 1 < hist_size) { /* The -1 are to handle the case when dist = 0 */
+ match_length = compare258(next_in - dist, next_in, end_in - next_in);
+
+ if (match_length >= SHORTEST_MATCH) {
+ next_hash = next_in;
+#ifdef ISAL_LIMIT_HASH_UPDATE
+ end = next_hash + 3;
+#else
+ end = next_hash + match_length;
+#endif
+ next_hash++;
+
+ for (; next_hash < end - 3; next_hash++) {
+ literal = load_u32(next_hash);
+ hash = compute_hash_mad(literal) & hash_mask;
+ last_seen[hash] = (uint64_t) (next_hash - file_start);
+ }
+
+ get_len_icf_code(match_length, &code);
+ get_dist_icf_code(dist, &code2, &extra_bits);
+
+ level_buf->hist.ll_hist[code]++;
+ level_buf->hist.d_hist[code2]++;
+
+ write_deflate_icf(next_out, code, code2, extra_bits);
+
+ next_out++;
+ next_in += match_length;
+
+ continue;
+ }
+ }
+
+ get_lit_icf_code(literal & 0xFF, &code);
+ level_buf->hist.ll_hist[code]++;
+ write_deflate_icf(next_out, code, NULL_DIST_SYM, 0);
+ next_out++;
+ next_in++;
+
+ }
+
+ while (next_in < end_in) {
+ if (next_out >= end_out) {
+ state->state = ZSTATE_CREATE_HDR;
+ update_state(stream, start_in, next_in, end_in, start_out, next_out,
+ end_out);
+ return;
+ }
+
+ literal = *next_in;
+ get_lit_icf_code(literal & 0xFF, &code);
+ level_buf->hist.ll_hist[code]++;
+ write_deflate_icf(next_out, code, NULL_DIST_SYM, 0);
+ next_out++;
+ next_in++;
+
+ }
+
+ if (next_in == end_in) {
+ if (stream->end_of_stream || stream->flush != NO_FLUSH)
+ state->state = ZSTATE_CREATE_HDR;
+ }
+
+ update_state(stream, start_in, next_in, end_in, start_out, next_out, end_out);
+
+ return;
+}
+
+void isal_deflate_hash_mad_base(uint16_t * hash_table, uint32_t hash_mask,
+ uint32_t current_index, uint8_t * dict, uint32_t dict_len)
+{
+ uint8_t *next_in = dict;
+ uint8_t *end_in = dict + dict_len - SHORTEST_MATCH;
+ uint32_t literal;
+ uint32_t hash;
+ uint16_t index = current_index - dict_len;
+
+ while (next_in <= end_in) {
+ literal = load_u32(next_in);
+ hash = compute_hash_mad(literal) & hash_mask;
+ hash_table[hash] = index;
+ index++;
+ next_in++;
+ }
+}
diff --git a/src/isa-l/igzip/igzip_icf_body.c b/src/isa-l/igzip/igzip_icf_body.c
new file mode 100644
index 000000000..5d572323e
--- /dev/null
+++ b/src/isa-l/igzip/igzip_icf_body.c
@@ -0,0 +1,326 @@
+#include "igzip_lib.h"
+#include "huffman.h"
+#include "encode_df.h"
+#include "igzip_level_buf_structs.h"
+
+extern uint64_t gen_icf_map_lh1(struct isal_zstream *, struct deflate_icf *, uint32_t);
+extern void set_long_icf_fg(uint8_t *, uint64_t, uint64_t, struct deflate_icf *);
+extern void isal_deflate_icf_body_lvl1(struct isal_zstream *);
+extern void isal_deflate_icf_body_lvl2(struct isal_zstream *);
+extern void isal_deflate_icf_body_lvl3(struct isal_zstream *);
+/*
+*************************************************************
+ * Helper functions
+ ************************************************************
+*/
+static inline void write_deflate_icf(struct deflate_icf *icf, uint32_t lit_len,
+ uint32_t lit_dist, uint32_t extra_bits)
+{
+ /* icf->lit_len = lit_len; */
+ /* icf->lit_dist = lit_dist; */
+ /* icf->dist_extra = extra_bits; */
+
+ store_u32((uint8_t *) icf, lit_len | (lit_dist << LIT_LEN_BIT_COUNT)
+ | (extra_bits << (LIT_LEN_BIT_COUNT + DIST_LIT_BIT_COUNT)));
+}
+
+void set_long_icf_fg_base(uint8_t * next_in, uint64_t processed, uint64_t input_size,
+ struct deflate_icf *match_lookup)
+{
+ uint8_t *end_processed = next_in + processed;
+ uint8_t *end_in = next_in + input_size;
+ uint32_t dist_code, dist_extra, dist, len;
+ uint32_t match_len;
+ uint32_t dist_start[] = {
+ 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0007, 0x0009, 0x000d,
+ 0x0011, 0x0019, 0x0021, 0x0031, 0x0041, 0x0061, 0x0081, 0x00c1,
+ 0x0101, 0x0181, 0x0201, 0x0301, 0x0401, 0x0601, 0x0801, 0x0c01,
+ 0x1001, 0x1801, 0x2001, 0x3001, 0x4001, 0x6001, 0x0000, 0x0000
+ };
+
+ if (end_in > end_processed + ISAL_LOOK_AHEAD)
+ end_in = end_processed + ISAL_LOOK_AHEAD;
+
+ while (next_in < end_processed) {
+ dist_code = match_lookup->lit_dist;
+ dist_extra = match_lookup->dist_extra;
+ dist = dist_start[dist_code] + dist_extra;
+ len = match_lookup->lit_len;
+ if (len >= 8 + LEN_OFFSET) {
+ match_len = compare((next_in + 8) - dist, next_in + 8,
+ end_in - (next_in + 8)) + LEN_OFFSET + 8;
+
+ while (match_len > match_lookup->lit_len
+ && match_len >= LEN_OFFSET + SHORTEST_MATCH) {
+ write_deflate_icf(match_lookup,
+ match_len > LEN_MAX ? LEN_MAX : match_len,
+ dist_code, dist_extra);
+ match_lookup++;
+ next_in++;
+ match_len--;
+ }
+ }
+
+ match_lookup++;
+ next_in++;
+ }
+}
+
+/*
+*************************************************************
+ * Methods for generating one pass match lookup table
+ ************************************************************
+*/
+uint64_t gen_icf_map_h1_base(struct isal_zstream *stream,
+ struct deflate_icf *matches_icf_lookup, uint64_t input_size)
+{
+
+ uint32_t dist, len, extra_bits;
+ uint8_t *next_in = stream->next_in, *end_in = stream->next_in + input_size;
+ uint8_t *file_start = (uint8_t *) ((uintptr_t) stream->next_in - stream->total_in);
+ uint32_t hash;
+ uint64_t next_bytes, match_bytes;
+ uint64_t match;
+ struct level_buf *level_buf = (struct level_buf *)stream->level_buf;
+ uint16_t *hash_table = level_buf->hash_map.hash_table;
+ uint32_t hist_size = stream->internal_state.dist_mask;
+ uint32_t hash_mask = stream->internal_state.hash_mask;
+
+ if (input_size < ISAL_LOOK_AHEAD)
+ return 0;
+
+ if (stream->internal_state.has_hist == IGZIP_NO_HIST) {
+ matches_icf_lookup->lit_len = *next_in;
+ matches_icf_lookup->lit_dist = 0x1e;
+ matches_icf_lookup->dist_extra = 0;
+
+ hash = compute_hash(load_u32(next_in)) & hash_mask;
+ hash_table[hash] = (uint64_t) (next_in - file_start);
+
+ next_in++;
+ matches_icf_lookup++;
+ stream->internal_state.has_hist = IGZIP_HIST;
+ }
+
+ while (next_in < end_in - ISAL_LOOK_AHEAD) {
+ hash = compute_hash(load_u32(next_in)) & hash_mask;
+ dist = (next_in - file_start - hash_table[hash]);
+ dist = ((dist - 1) & hist_size) + 1;
+ hash_table[hash] = (uint64_t) (next_in - file_start);
+
+ match_bytes = load_u64(next_in - dist);
+ next_bytes = load_u64(next_in);
+ match = next_bytes ^ match_bytes;
+
+ len = tzbytecnt(match);
+
+ if (len >= SHORTEST_MATCH) {
+ len += LEN_OFFSET;
+ get_dist_icf_code(dist, &dist, &extra_bits);
+ write_deflate_icf(matches_icf_lookup, len, dist, extra_bits);
+ } else {
+ write_deflate_icf(matches_icf_lookup, *next_in, 0x1e, 0);
+ }
+
+ next_in++;
+ matches_icf_lookup++;
+ }
+ return next_in - stream->next_in;
+}
+
+/*
+*************************************************************
+ * One pass methods for parsing provided match lookup table
+ ************************************************************
+*/
+static struct deflate_icf *compress_icf_map_g(struct isal_zstream *stream,
+ struct deflate_icf *matches_next,
+ struct deflate_icf *matches_end)
+{
+ uint32_t lit_len, lit_len2, dist;
+ uint64_t code;
+ struct isal_zstate *state = &stream->internal_state;
+ struct level_buf *level_buf = (struct level_buf *)stream->level_buf;
+ struct deflate_icf *matches_start = matches_next;
+ struct deflate_icf *icf_buf_end =
+ level_buf->icf_buf_next +
+ level_buf->icf_buf_avail_out / sizeof(struct deflate_icf);
+
+ while (matches_next < matches_end - 1 && level_buf->icf_buf_next < icf_buf_end - 1) {
+ code = load_u64((uint8_t *) matches_next);
+ lit_len = code & LIT_LEN_MASK;
+ lit_len2 = (code >> ICF_CODE_LEN) & LIT_LEN_MASK;
+ level_buf->hist.ll_hist[lit_len]++;
+
+ if (lit_len >= LEN_START) {
+ store_u32((uint8_t *) level_buf->icf_buf_next, code);
+ level_buf->icf_buf_next++;
+
+ dist = (code >> ICF_DIST_OFFSET) & DIST_LIT_MASK;
+ level_buf->hist.d_hist[dist]++;
+ lit_len -= LEN_OFFSET;
+ matches_next += lit_len;
+
+ } else if (lit_len2 >= LEN_START) {
+ store_u64((uint8_t *) level_buf->icf_buf_next, code);
+ level_buf->icf_buf_next += 2;
+
+ level_buf->hist.ll_hist[lit_len2]++;
+
+ dist = (code >> (ICF_CODE_LEN + ICF_DIST_OFFSET)) & DIST_LIT_MASK;
+ level_buf->hist.d_hist[dist]++;
+ lit_len2 -= LEN_OFFSET - 1;
+ matches_next += lit_len2;
+
+ } else {
+ code = ((lit_len2 + LIT_START) << ICF_DIST_OFFSET) | lit_len;
+ store_u32((uint8_t *) level_buf->icf_buf_next, code);
+ level_buf->icf_buf_next++;
+
+ level_buf->hist.ll_hist[lit_len2]++;
+
+ matches_next += 2;
+ }
+ }
+
+ while (matches_next < matches_end && level_buf->icf_buf_next < icf_buf_end) {
+ code = load_u32((uint8_t *) matches_next);
+ lit_len = code & LIT_LEN_MASK;
+ store_u32((uint8_t *) level_buf->icf_buf_next, code);
+ level_buf->icf_buf_next++;
+
+ level_buf->hist.ll_hist[lit_len]++;
+ if (lit_len >= LEN_START) {
+ dist = (code >> 10) & 0x1ff;
+ level_buf->hist.d_hist[dist]++;
+ lit_len -= LEN_OFFSET;
+ matches_next += lit_len;
+ } else {
+ matches_next++;
+ }
+ }
+
+ level_buf->icf_buf_avail_out =
+ (icf_buf_end - level_buf->icf_buf_next) * sizeof(struct deflate_icf);
+
+ state->block_end += matches_next - matches_start;
+ if (matches_next > matches_end && matches_start < matches_end) {
+ stream->next_in += matches_next - matches_end;
+ stream->avail_in -= matches_next - matches_end;
+ stream->total_in += matches_next - matches_end;
+ }
+
+ return matches_next;
+
+}
+
+/*
+*************************************************************
+ * Compression functions combining different methods
+ ************************************************************
+*/
+static inline void icf_body_next_state(struct isal_zstream *stream)
+{
+ struct level_buf *level_buf = (struct level_buf *)stream->level_buf;
+ struct isal_zstate *state = &stream->internal_state;
+
+ if (level_buf->icf_buf_avail_out <= 0)
+ state->state = ZSTATE_CREATE_HDR;
+
+ else if (stream->avail_in <= ISAL_LOOK_AHEAD
+ && (stream->end_of_stream || stream->flush != NO_FLUSH))
+ state->state = ZSTATE_FLUSH_READ_BUFFER;
+}
+
+void icf_body_hash1_fillgreedy_lazy(struct isal_zstream *stream)
+{
+ struct deflate_icf *matches_icf, *matches_next_icf, *matches_end_icf;
+ struct deflate_icf *matches_icf_lookup;
+ struct level_buf *level_buf = (struct level_buf *)stream->level_buf;
+ uint32_t input_size, processed;
+
+ matches_icf = level_buf->hash_map.matches;
+ matches_icf_lookup = matches_icf;
+ matches_next_icf = level_buf->hash_map.matches_next;
+ matches_end_icf = level_buf->hash_map.matches_end;
+
+ matches_next_icf = compress_icf_map_g(stream, matches_next_icf, matches_end_icf);
+
+ while (matches_next_icf >= matches_end_icf) {
+ input_size = MATCH_BUF_SIZE;
+ input_size = (input_size > stream->avail_in) ? stream->avail_in : input_size;
+
+ if (input_size <= ISAL_LOOK_AHEAD)
+ break;
+
+ processed = gen_icf_map_h1_base(stream, matches_icf_lookup, input_size);
+
+ set_long_icf_fg(stream->next_in, processed, input_size, matches_icf_lookup);
+
+ stream->next_in += processed;
+ stream->avail_in -= processed;
+ stream->total_in += processed;
+
+ matches_end_icf = matches_icf + processed;
+ matches_next_icf = compress_icf_map_g(stream, matches_icf, matches_end_icf);
+ }
+
+ level_buf->hash_map.matches_next = matches_next_icf;
+ level_buf->hash_map.matches_end = matches_end_icf;
+
+ icf_body_next_state(stream);
+}
+
+void icf_body_lazyhash1_fillgreedy_greedy(struct isal_zstream *stream)
+{
+ struct deflate_icf *matches_icf, *matches_next_icf, *matches_end_icf;
+ struct deflate_icf *matches_icf_lookup;
+ struct level_buf *level_buf = (struct level_buf *)stream->level_buf;
+ uint32_t input_size, processed;
+
+ matches_icf = level_buf->hash_map.matches;
+ matches_icf_lookup = matches_icf;
+ matches_next_icf = level_buf->hash_map.matches_next;
+ matches_end_icf = level_buf->hash_map.matches_end;
+
+ matches_next_icf = compress_icf_map_g(stream, matches_next_icf, matches_end_icf);
+
+ while (matches_next_icf >= matches_end_icf) {
+ input_size = MATCH_BUF_SIZE;
+ input_size = (input_size > stream->avail_in) ? stream->avail_in : input_size;
+
+ if (input_size <= ISAL_LOOK_AHEAD)
+ break;
+
+ processed = gen_icf_map_lh1(stream, matches_icf_lookup, input_size);
+
+ set_long_icf_fg(stream->next_in, processed, input_size, matches_icf_lookup);
+
+ stream->next_in += processed;
+ stream->avail_in -= processed;
+ stream->total_in += processed;
+
+ matches_end_icf = matches_icf + processed;
+ matches_next_icf = compress_icf_map_g(stream, matches_icf, matches_end_icf);
+ }
+
+ level_buf->hash_map.matches_next = matches_next_icf;
+ level_buf->hash_map.matches_end = matches_end_icf;
+
+ icf_body_next_state(stream);
+}
+
+void isal_deflate_icf_body(struct isal_zstream *stream)
+{
+ switch (stream->level) {
+ case 3:
+ isal_deflate_icf_body_lvl3(stream);
+ break;
+ case 2:
+ isal_deflate_icf_body_lvl2(stream);
+ break;
+ case 1:
+ default:
+ isal_deflate_icf_body_lvl1(stream);
+ }
+}
diff --git a/src/isa-l/igzip/igzip_icf_body_h1_gr_bt.asm b/src/isa-l/igzip/igzip_icf_body_h1_gr_bt.asm
new file mode 100644
index 000000000..c74a24d30
--- /dev/null
+++ b/src/isa-l/igzip/igzip_icf_body_h1_gr_bt.asm
@@ -0,0 +1,906 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright(c) 2011-2016 Intel Corporation All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in
+; the documentation and/or other materials provided with the
+; distribution.
+; * Neither the name of Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived
+; from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+%include "options.asm"
+
+%include "lz0a_const.asm"
+%include "data_struct2.asm"
+%include "bitbuf2.asm"
+%include "huffman.asm"
+%include "igzip_compare_types.asm"
+%include "reg_sizes.asm"
+
+%include "stdmac.asm"
+%ifdef DEBUG
+%macro MARK 1
+global %1
+%1:
+%endm
+%else
+%macro MARK 1
+%endm
+%endif
+
+%define LARGE_MATCH_HASH_REP 1 ; Hash 4 * LARGE_MATCH_HASH_REP elements
+%define LARGE_MATCH_MIN 264 ; Minimum match size to enter large match emit loop
+%define MIN_INBUF_PADDING 16
+%define MAX_EMIT_SIZE 258 * 16
+
+%define SKIP_SIZE_BASE (2 << 10) ; No match length before starting skipping
+%define SKIP_BASE 32 ; Initial skip size
+%define SKIP_START 512 ; Start increasing skip size once level is beyond SKIP_START
+%define SKIP_RATE 2 ; Rate skip size increases after SKIP_START
+%define MAX_SKIP_SIZE 128 ; Maximum skip size
+%define MAX_SKIP_LEVEL (((MAX_SKIP_SIZE - SKIP_BASE) / SKIP_RATE) + SKIP_START) ; Maximum skip level
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+%define file_start rdi
+%define file_length r15
+%define level_buf r14
+%define f_i r10
+%define m_out_buf r11
+
+%define curr_data rax
+
+%define tmp2 rcx
+%define skip_count rcx
+
+%define dist rbx
+%define dist_code2 rbx
+%define lit_code2 rbx
+%define hmask2 rbx
+
+%define dist2 r12
+%define dist_code r12
+%define hmask3 r12
+
+%define tmp1 rsi
+%define lit_code rsi
+
+%define curr_data2 r8
+%define len2 r8
+%define tmp4 r8
+%define hmask1 r8
+%define len_code2 r8
+
+%define len rdx
+%define len_code rdx
+%define hash3 rdx
+
+%define stream r13
+%define tmp3 r13
+
+%define hash rbp
+%define hash2 r9
+
+;; GPR r8 & r15 can be used
+
+%define xtmp0 xmm0 ; tmp
+%define xtmp1 xmm1 ; tmp
+%define xlow_lit_shuf xmm2
+%define xup_lit_shuf xmm3
+%define xdata xmm4
+%define xlit xmm5
+
+%define ytmp0 ymm0 ; tmp
+%define ytmp1 ymm1 ; tmp
+
+%define hash_table level_buf + _hash8k_hash_table
+%define lit_len_hist level_buf + _hist_lit_len
+%define dist_hist level_buf + _hist_dist
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+m_out_end equ 0 ; local variable (8 bytes)
+m_out_start equ 8
+dist_mask_offset equ 16
+hash_mask_offset equ 24
+f_end_i_mem_offset equ 32
+stream_offset equ 40
+inbuf_slop_offset equ 48
+skip_match_offset equ 56
+skip_level_offset equ 64
+gpr_save_mem_offset equ 80 ; gpr save area (8*8 bytes)
+xmm_save_mem_offset equ gpr_save_mem_offset + 8*8 ; xmm save area (4*16 bytes) (16 byte aligned)
+stack_size equ 11*8 + 8*8 + 4*16
+
+;;; 8 because stack address is odd multiple of 8 after a function call and
+;;; we want it aligned to 16 bytes
+
+;; Defines to generate functions for different architecture
+%xdefine ARCH 01
+%xdefine ARCH1 02
+%xdefine ARCH2 04
+
+%ifndef COMPARE_TYPE
+%xdefine COMPARE_TYPE_NOT_DEF
+%xdefine COMPARE_TYPE 1
+%xdefine COMPARE_TYPE1 2
+%xdefine COMPARE_TYPE2 3
+%endif
+
+;; Defines to generate functions for different levels
+%xdefine METHOD hash_hist
+
+%rep 3
+%if ARCH == 04
+%define USE_HSWNI
+%endif
+
+[bits 64]
+default rel
+section .text
+
+; void isal_deflate_icf_body <hashsize> <arch> ( isal_zstream *stream )
+; we make 6 different versions of this function
+; arg 1: rcx: addr of stream
+global isal_deflate_icf_body_ %+ METHOD %+ _ %+ ARCH
+isal_deflate_icf_body_ %+ METHOD %+ _ %+ ARCH %+ :
+ endbranch
+%ifidn __OUTPUT_FORMAT__, elf64
+ mov rcx, rdi
+%endif
+
+ ;; do nothing if (avail_in == 0)
+ cmp dword [rcx + _avail_in], 0
+ jne .skip1
+
+ ;; Set stream's next state
+ mov rdx, ZSTATE_CREATE_HDR
+ mov eax, [rcx + _internal_state_state]
+ cmp word [rcx + _end_of_stream], 0
+ cmovne rax, rdx
+ cmp word [rcx + _flush], _NO_FLUSH
+ cmovne rax, rdx
+ mov dword [rcx + _internal_state_state], eax
+ ret
+.skip1:
+
+%ifdef ALIGN_STACK
+ push rbp
+ mov rbp, rsp
+ sub rsp, stack_size
+ and rsp, ~15
+%else
+ sub rsp, stack_size
+%endif
+
+ mov [rsp + gpr_save_mem_offset + 0*8], rbx
+ mov [rsp + gpr_save_mem_offset + 1*8], rsi
+ mov [rsp + gpr_save_mem_offset + 2*8], rdi
+ mov [rsp + gpr_save_mem_offset + 3*8], rbp
+ mov [rsp + gpr_save_mem_offset + 4*8], r12
+ mov [rsp + gpr_save_mem_offset + 5*8], r13
+ mov [rsp + gpr_save_mem_offset + 6*8], r14
+ mov [rsp + gpr_save_mem_offset + 7*8], r15
+
+ mov stream, rcx
+ mov [rsp + stream_offset], stream
+
+ mov byte [stream + _internal_state_has_eob], 0
+
+ mov tmp1 %+ d, dword[stream + _internal_state_dist_mask]
+ mov [rsp + dist_mask_offset], tmp1
+ mov tmp1 %+ d, dword[stream + _internal_state_hash_mask]
+ mov [rsp + hash_mask_offset], tmp1
+
+ ; state->bitbuf.set_buf(stream->next_out, stream->avail_out);
+ mov level_buf, [stream + _level_buf]
+ mov m_out_buf, [level_buf + _icf_buf_next]
+
+ mov [rsp + m_out_start], m_out_buf
+ mov tmp1, [level_buf + _icf_buf_avail_out]
+ add tmp1, m_out_buf
+ sub tmp1, SLOP
+
+ mov [rsp + m_out_end], tmp1
+
+ mov file_start, [stream + _next_in]
+
+ mov f_i %+ d, dword [stream + _total_in]
+ sub file_start, f_i
+
+ mov file_length %+ d, [stream + _avail_in]
+ add file_length, f_i
+
+ mov [rsp + skip_match_offset], f_i
+ add qword [rsp + skip_match_offset], SKIP_SIZE_BASE
+ mov qword [rsp + skip_level_offset], 0
+
+ PBROADCASTD xlit, dword [min_lit_dist_syms]
+ MOVDQU xlow_lit_shuf, [low_lit_shuf]
+ MOVDQU xup_lit_shuf, [up_lit_shuf]
+
+ mov qword [rsp + inbuf_slop_offset], MIN_INBUF_PADDING
+ cmp byte [stream + _end_of_stream], 0
+ jnz .default_inbuf_padding
+ cmp byte [stream + _flush], 0
+ jnz .default_inbuf_padding
+ mov qword [rsp + inbuf_slop_offset], LA
+.default_inbuf_padding:
+
+ ; file_length -= INBUF_PADDING;
+ sub file_length, [rsp + inbuf_slop_offset]
+ ; if (file_length <= 0) continue;
+ mov hmask1 %+ d, [rsp + hash_mask_offset]
+
+ cmp file_length, f_i
+ jle .input_end
+
+ ; for (f_i = f_start_i; f_i < file_length; f_i++) {
+ MOVDQU xdata, [file_start + f_i]
+ mov curr_data, [file_start + f_i]
+ mov tmp1, curr_data
+
+ compute_hash hash, curr_data
+
+ shr tmp1, 8
+ compute_hash hash2, tmp1
+
+ and hash, hmask1
+ and hash2, hmask1
+
+ cmp byte [stream + _internal_state_has_hist], IGZIP_NO_HIST
+ je .write_first_byte
+
+ jmp .loop2
+ align 16
+
+.loop2:
+ mov tmp3 %+ d, [rsp + dist_mask_offset]
+ mov hmask1 %+ d, [rsp + hash_mask_offset]
+ ; if (state->bitbuf.is_full()) {
+ cmp m_out_buf, [rsp + m_out_end]
+ ja .output_end
+
+ xor dist, dist
+ xor dist2, dist2
+
+ lea tmp1, [file_start + f_i]
+
+ mov dist %+ w, f_i %+ w
+ dec dist
+ sub dist %+ w, word [hash_table + 2 * hash]
+ mov [hash_table + 2 * hash], f_i %+ w
+
+ inc f_i
+
+ mov tmp2, curr_data
+ shr curr_data, 16
+ compute_hash hash, curr_data
+ and hash %+ d, hmask1 %+ d
+
+ mov dist2 %+ w, f_i %+ w
+ dec dist2
+ sub dist2 %+ w, word [hash_table + 2 * hash2]
+ mov [hash_table + 2 * hash2], f_i %+ w
+
+ ; if ((dist-1) < (D-1)) {
+ and dist %+ d, tmp3 %+ d
+ neg dist
+
+ shr tmp2, 24
+ compute_hash hash2, tmp2
+ and hash2 %+ d, hmask1 %+ d
+
+ and dist2 %+ d, tmp3 %+ d
+ neg dist2
+
+ ;; Check for long len/dist match (>7) with first literal
+ MOVQ len, xdata
+ mov curr_data, len
+ PSRLDQ xdata, 1
+ xor len, [tmp1 + dist - 1]
+ jz .compare_loop
+
+ ;; Check for len/dist match (>7) with second literal
+ MOVQ len2, xdata
+ xor len2, [tmp1 + dist2]
+ jz .compare_loop2
+
+ movzx lit_code, curr_data %+ b
+ shr curr_data, 8
+
+ ;; Check for len/dist match for first literal
+ test len %+ d, 0xFFFFFFFF
+ jz .len_dist_huffman_pre
+
+ PSRLDQ xdata, 1
+ inc dword [lit_len_hist + HIST_ELEM_SIZE*lit_code]
+ movzx lit_code2, curr_data %+ b
+ ;; Check for len/dist match for second literal
+ test len2 %+ d, 0xFFFFFFFF
+ jnz .write_lit_bits
+
+.len_dist_lit_huffman_pre:
+ bsf len2, len2
+ shr len2, 3
+
+.len_dist_lit_huffman:
+ or lit_code, LIT
+ mov dword [m_out_buf], lit_code %+ d
+
+ neg dist2
+
+ get_dist_icf_code dist2, dist_code2, tmp1
+
+ mov hmask3 %+ d, dword [rsp + hash_mask_offset]
+
+ ;; Setup for updating hash
+ lea tmp3, [f_i + 1] ; tmp3 <= k
+
+ mov tmp2, f_i
+ add file_start, f_i
+ add f_i, len2
+ cmp f_i, file_length
+ jg .len_dist_lit_huffman_finish
+
+ lea tmp1, [f_i + SKIP_SIZE_BASE]
+ mov qword [rsp + skip_match_offset], tmp1
+ sub qword [rsp + skip_level_offset], len2
+
+ MOVDQU xdata, [file_start + len2]
+ mov tmp1, [file_start + len2]
+ sub file_start, tmp2
+
+ shr curr_data, 24
+ compute_hash hash3, curr_data
+ and hash3 %+ d, hmask3 %+ d
+
+ mov curr_data, tmp1
+ shr tmp1, 8
+
+ mov [hash_table + 2 * hash], tmp3 %+ w
+
+ compute_hash hash, curr_data
+
+ add tmp3,1
+ mov [hash_table + 2 * hash2], tmp3 %+ w
+
+ compute_hash hash2, tmp1
+
+ add tmp3, 1
+ mov [hash_table + 2 * hash3], tmp3 %+ w
+
+ add dist_code2, 254
+ add dist_code2, len2
+
+ inc dword [lit_len_hist + HIST_ELEM_SIZE*(len2 + 254)]
+
+ mov dword [m_out_buf + 4], dist_code2 %+ d
+ add m_out_buf, 8
+
+ shr dist_code2, DIST_OFFSET
+ and dist_code2, 0x1F
+ inc dword [dist_hist + HIST_ELEM_SIZE*dist_code2]
+
+ ; hash = compute_hash(state->file_start + f_i) & hash_mask;
+ and hash %+ d, hmask3 %+ d
+ and hash2 %+ d, hmask3 %+ d
+
+ ; continue
+ jmp .loop2
+
+.len_dist_lit_huffman_finish:
+ sub file_start, tmp2
+
+ mov [hash_table + 2 * hash], tmp3 %+ w
+ add tmp3,1
+ mov [hash_table + 2 * hash2], tmp3 %+ w
+
+ add dist_code2, 254
+ add dist_code2, len2
+
+ inc dword [lit_len_hist + HIST_ELEM_SIZE*(len2 + 254)]
+
+ mov dword [m_out_buf + 4], dist_code2 %+ d
+ add m_out_buf, 8
+
+ shr dist_code2, DIST_OFFSET
+ and dist_code2, 0x1F
+ inc dword [dist_hist + HIST_ELEM_SIZE*dist_code2]
+
+ jmp .input_end
+
+.len_dist_huffman_pre:
+ bsf len, len
+ shr len, 3
+
+.len_dist_huffman:
+ dec f_i
+ ;; Setup for updateing hash
+ lea tmp3, [f_i + 2] ; tmp3 <= k
+
+ neg dist
+
+ ; get_dist_code(dist, &code2, &code_len2);
+ get_dist_icf_code dist, dist_code, tmp1
+
+.len_dist_huffman_skip:
+
+ mov hmask2 %+ d, [rsp + hash_mask_offset]
+
+ mov tmp1, f_i
+ add file_start, f_i
+
+ add f_i, len
+ cmp f_i, file_length
+ jg .len_dist_huffman_finish
+
+ lea tmp2, [f_i + SKIP_SIZE_BASE]
+ mov qword [rsp + skip_match_offset], tmp2
+ sub qword [rsp + skip_level_offset], len
+
+ MOVDQU xdata, [file_start + len]
+ mov curr_data2, [file_start + len]
+ mov curr_data, curr_data2
+ sub file_start, tmp1
+ ; get_len_code(len, &code, &code_len);
+ lea len_code, [len + 254]
+ or dist_code, len_code
+
+ mov [hash_table + 2 * hash], tmp3 %+ w
+ add tmp3,1
+ mov [hash_table + 2 * hash2], tmp3 %+ w
+
+ compute_hash hash, curr_data
+
+ shr curr_data2, 8
+ compute_hash hash2, curr_data2
+
+ inc dword [lit_len_hist + HIST_ELEM_SIZE*len_code]
+
+ mov dword [m_out_buf], dist_code %+ d
+ add m_out_buf, 4
+
+ shr dist_code, DIST_OFFSET
+ and dist_code, 0x1F
+ inc dword [dist_hist + HIST_ELEM_SIZE*dist_code]
+
+ ; hash = compute_hash(state->file_start + f_i) & hash_mask;
+ and hash %+ d, hmask2 %+ d
+ and hash2 %+ d, hmask2 %+ d
+
+ ; continue
+ jmp .loop2
+
+.len_dist_huffman_finish:
+ sub file_start, tmp1
+
+ ; get_len_code(len, &code, &code_len);
+ lea len_code, [len + 254]
+ or dist_code, len_code
+
+ mov [hash_table + 2 * hash], tmp3 %+ w
+ add tmp3,1
+ mov [hash_table + 2 * hash2], tmp3 %+ w
+
+ inc dword [lit_len_hist + HIST_ELEM_SIZE*len_code]
+
+ mov dword [m_out_buf], dist_code %+ d
+ add m_out_buf, 4
+
+ shr dist_code, DIST_OFFSET
+ and dist_code, 0x1F
+ inc dword [dist_hist + HIST_ELEM_SIZE*dist_code]
+
+ jmp .input_end
+
+.write_lit_bits:
+ MOVQ curr_data, xdata
+
+ add f_i, 1
+ cmp f_i, file_length
+ jg .write_lit_bits_finish
+
+ MOVDQU xdata, [file_start + f_i]
+
+ inc dword [lit_len_hist + HIST_ELEM_SIZE*lit_code2]
+
+ shl lit_code2, DIST_OFFSET
+ lea lit_code, [lit_code + lit_code2 + (31 << DIST_OFFSET)]
+
+ mov dword [m_out_buf], lit_code %+ d
+ add m_out_buf, 4
+
+ cmp f_i, [rsp + skip_match_offset]
+ jle .loop2
+
+ xor tmp3, tmp3
+ mov rcx, [rsp + skip_level_offset]
+ add rcx, 1
+ cmovl rcx, tmp3
+ mov tmp1, MAX_SKIP_LEVEL
+ cmp rcx, MAX_SKIP_LEVEL
+ cmovg rcx, tmp1
+
+ mov tmp1, SKIP_SIZE_BASE
+ shr tmp1, cl
+
+%if MAX_SKIP_LEVEL > 63
+ cmp rcx, 63
+ cmovg tmp1, tmp3
+%endif
+ mov [rsp + skip_match_offset], tmp1
+ mov [rsp + skip_level_offset], rcx
+
+ sub rcx, SKIP_START
+ cmovl rcx, tmp3
+
+ lea skip_count, [SKIP_RATE * rcx + SKIP_BASE]
+ and skip_count, -SKIP_BASE
+
+ mov tmp1, [rsp + m_out_end]
+ lea tmp1, [tmp1 + 4]
+ sub tmp1, m_out_buf
+ shr tmp1, 1
+ cmp tmp1, skip_count
+ jl .skip_forward_short
+
+ mov tmp1, [rsp + inbuf_slop_offset]
+ add tmp1, file_length
+ sub tmp1, f_i
+ cmp tmp1, skip_count
+ jl .skip_forward_short
+
+.skip_forward_long:
+ MOVQ xdata, [file_start + f_i]
+
+ movzx lit_code, byte [file_start + f_i]
+ movzx lit_code2, byte [file_start + f_i + 1]
+
+ add dword [lit_len_hist + HIST_ELEM_SIZE*lit_code], 1
+ add dword [lit_len_hist + HIST_ELEM_SIZE*lit_code2], 1
+
+ movzx lit_code, byte [file_start + f_i + 2]
+ movzx lit_code2, byte [file_start + f_i + 3]
+
+ add dword [lit_len_hist + HIST_ELEM_SIZE*lit_code], 1
+ add dword [lit_len_hist + HIST_ELEM_SIZE*lit_code2], 1
+
+ movzx lit_code, byte [file_start + f_i + 4]
+ movzx lit_code2, byte [file_start + f_i + 5]
+
+ add dword [lit_len_hist + HIST_ELEM_SIZE*lit_code], 1
+ add dword [lit_len_hist + HIST_ELEM_SIZE*lit_code2], 1
+
+ movzx lit_code, byte [file_start + f_i + 6]
+ movzx lit_code2, byte [file_start + f_i + 7]
+
+ add dword [lit_len_hist + HIST_ELEM_SIZE*lit_code], 1
+ add dword [lit_len_hist + HIST_ELEM_SIZE*lit_code2], 1
+
+ PSHUFB xtmp0, xdata, xlow_lit_shuf
+ PSHUFB xtmp1, xdata, xup_lit_shuf
+ PSLLD xtmp1, xtmp1, DIST_OFFSET
+ POR xtmp0, xtmp0, xtmp1
+ PADDD xtmp0, xtmp0, xlit
+ MOVDQU [m_out_buf], xtmp0
+
+ add m_out_buf, 16
+ add f_i, 8
+
+ sub skip_count, 8
+ jg .skip_forward_long
+
+ cmp file_length, f_i
+ jle .input_end
+
+ mov curr_data, [file_start + f_i]
+ MOVDQU xdata, [file_start + f_i]
+ add [rsp + skip_match_offset], f_i
+
+ jmp .loop2
+
+.skip_forward_short:
+ movzx lit_code, byte [file_start + f_i]
+ movzx lit_code2, byte [file_start + f_i + 1]
+
+ inc dword [lit_len_hist + HIST_ELEM_SIZE*lit_code]
+ inc dword [lit_len_hist + HIST_ELEM_SIZE*lit_code2]
+
+ shl lit_code2, DIST_OFFSET
+ lea lit_code, [lit_code + lit_code2 + (31 << DIST_OFFSET)]
+
+ mov dword [m_out_buf], lit_code %+ d
+ add m_out_buf, 4
+ add f_i, 2
+
+ cmp m_out_buf, [rsp + m_out_end]
+ ja .output_end
+
+ cmp file_length, f_i
+ jle .input_end
+
+ jmp .skip_forward_short
+
+.write_lit_bits_finish:
+ inc dword [lit_len_hist + HIST_ELEM_SIZE*lit_code2]
+
+ shl lit_code2, DIST_OFFSET
+ lea lit_code, [lit_code + lit_code2 + (31 << DIST_OFFSET)]
+
+ mov dword [m_out_buf], lit_code %+ d
+ add m_out_buf, 4
+
+.input_end:
+ mov stream, [rsp + stream_offset]
+ mov tmp1, ZSTATE_FLUSH_READ_BUFFER
+ mov tmp2, ZSTATE_BODY
+ cmp word [stream + _end_of_stream], 0
+ cmovne tmp2, tmp1
+ cmp word [stream + _flush], _NO_FLUSH
+
+ cmovne tmp2, tmp1
+ mov dword [stream + _internal_state_state], tmp2 %+ d
+ jmp .end
+
+.output_end:
+ mov stream, [rsp + stream_offset]
+ mov dword [stream + _internal_state_state], ZSTATE_CREATE_HDR
+
+.end:
+ ;; update input buffer
+ add file_length, [rsp + inbuf_slop_offset]
+ mov [stream + _total_in], f_i %+ d
+ mov [stream + _internal_state_block_end], f_i %+ d
+ add file_start, f_i
+ mov [stream + _next_in], file_start
+ sub file_length, f_i
+ mov [stream + _avail_in], file_length %+ d
+
+ ;; update output buffer
+ mov [level_buf + _icf_buf_next], m_out_buf
+ sub m_out_buf, [rsp + m_out_start]
+ sub [level_buf + _icf_buf_avail_out], m_out_buf %+ d
+
+ mov rbx, [rsp + gpr_save_mem_offset + 0*8]
+ mov rsi, [rsp + gpr_save_mem_offset + 1*8]
+ mov rdi, [rsp + gpr_save_mem_offset + 2*8]
+ mov rbp, [rsp + gpr_save_mem_offset + 3*8]
+ mov r12, [rsp + gpr_save_mem_offset + 4*8]
+ mov r13, [rsp + gpr_save_mem_offset + 5*8]
+ mov r14, [rsp + gpr_save_mem_offset + 6*8]
+ mov r15, [rsp + gpr_save_mem_offset + 7*8]
+
+%ifndef ALIGN_STACK
+ add rsp, stack_size
+%else
+ mov rsp, rbp
+ pop rbp
+%endif
+ ret
+
+align 16
+.compare_loop:
+ lea tmp2, [tmp1 + dist - 1]
+
+ mov len2, file_length
+ sub len2, f_i
+ add len2, [rsp + inbuf_slop_offset]
+ add len2, 1
+ mov tmp3, MAX_EMIT_SIZE
+ cmp len2, tmp3
+ cmovg len2, tmp3
+
+ mov len, 8
+ compare_large tmp1, tmp2, len, len2, tmp3, ytmp0, ytmp1
+
+ cmp len, 258
+ jle .len_dist_huffman
+ cmp len, LARGE_MATCH_MIN
+ jge .do_emit
+ mov len, 258
+ jmp .len_dist_huffman
+
+align 16
+.compare_loop2:
+ lea tmp2, [tmp1 + dist2]
+ add tmp1, 1
+
+ mov len, file_length
+ sub len, f_i
+ add len, [rsp + inbuf_slop_offset]
+ mov tmp3, MAX_EMIT_SIZE
+ cmp len, tmp3
+ cmovg len, tmp3
+
+ mov len2, 8
+ compare_large tmp1, tmp2, len2, len, tmp3, ytmp0, ytmp1
+
+ movzx lit_code, curr_data %+ b
+ shr curr_data, 8
+ inc dword [lit_len_hist + HIST_ELEM_SIZE*lit_code]
+ cmp len2, 258
+ jle .len_dist_lit_huffman
+ cmp len2, LARGE_MATCH_MIN
+ jge .do_emit2
+ mov len2, 258
+ jmp .len_dist_lit_huffman
+
+.do_emit2:
+ or lit_code, LIT
+ mov dword [m_out_buf], lit_code %+ d
+ add m_out_buf, 4
+
+ inc f_i
+ mov dist, dist2
+ mov len, len2
+
+.do_emit:
+ neg dist
+ get_dist_icf_code dist, dist_code, tmp1
+
+ mov len_code2, 258 + 254
+ or len_code2, dist_code
+ mov tmp1, dist_code
+ shr tmp1, DIST_OFFSET
+ and tmp1, 0x1F
+ lea tmp3, [f_i + 1]
+ dec f_i
+
+ mov [hash_table + 2 * hash], tmp3 %+ w
+ add tmp3,1
+ mov [hash_table + 2 * hash2], tmp3 %+ w
+.emit:
+ sub len, 258
+ add f_i, 258
+
+ inc dword [lit_len_hist + HIST_ELEM_SIZE*(258 + 254)]
+ inc dword [dist_hist + HIST_ELEM_SIZE*tmp1]
+ mov dword [m_out_buf], len_code2 %+ d
+ add m_out_buf, 4
+
+ cmp m_out_buf, [rsp + m_out_end]
+ ja .output_end
+
+ cmp len, LARGE_MATCH_MIN
+ jge .emit
+
+ mov len2, 258
+ cmp len, len2
+ cmovg len, len2
+
+ ; get_len_code(len, &code, &code_len);
+ add f_i, len
+ lea len_code, [len + 254]
+ or dist_code, len_code
+
+ inc dword [lit_len_hist + HIST_ELEM_SIZE*len_code]
+ inc dword [dist_hist + HIST_ELEM_SIZE*tmp1]
+
+ mov dword [m_out_buf], dist_code %+ d
+ add m_out_buf, 4
+
+ cmp file_length, f_i
+ jle .input_end
+
+ lea tmp2, [f_i - 4 * LARGE_MATCH_HASH_REP]
+ mov hmask2 %+ d, [rsp + hash_mask_offset]
+
+%rep LARGE_MATCH_HASH_REP
+ mov curr_data %+ d, dword [file_start + tmp2]
+ mov curr_data2 %+ d, dword [file_start + tmp2 + 1]
+ mov tmp3 %+ d, dword [file_start + tmp2 + 2]
+ mov tmp1 %+ d, dword [file_start + tmp2 + 3]
+
+ compute_hash hash, curr_data
+ compute_hash hash2, curr_data2
+ compute_hash hash3, tmp3
+ compute_hash hmask3, tmp1
+
+ and hash %+ d, hmask2 %+ d
+ and hash2 %+ d, hmask2 %+ d
+ and hash3 %+ d, hmask2 %+ d
+ and hmask3 %+ d, hmask2 %+ d
+
+ mov [hash_table + 2 * hash], tmp2 %+ w
+ add tmp2, 1
+ mov [hash_table + 2 * hash2], tmp2 %+ w
+ add tmp2, 1
+ mov [hash_table + 2 * hash3], tmp2 %+ w
+ add tmp2, 1
+ mov [hash_table + 2 * hmask3], tmp2 %+ w
+%if (LARGE_MATCH_HASH_REP > 1)
+ add tmp2, 1
+%endif
+%endrep
+ ; for (f_i = f_start_i; f_i < file_length; f_i++) {
+ MOVDQU xdata, [file_start + f_i]
+ mov curr_data, [file_start + f_i]
+ mov tmp1, curr_data
+
+ compute_hash hash, curr_data
+
+ shr tmp1, 8
+ compute_hash hash2, tmp1
+
+ and hash, hmask2
+ and hash2, hmask2
+
+ jmp .loop2
+
+.write_first_byte:
+ mov hmask1 %+ d, [rsp + hash_mask_offset]
+ cmp m_out_buf, [rsp + m_out_end]
+ ja .output_end
+
+ mov byte [stream + _internal_state_has_hist], IGZIP_HIST
+
+ mov [hash_table + 2 * hash], f_i %+ w
+
+ mov hash, hash2
+ shr tmp2, 16
+ compute_hash hash2, tmp2
+
+ and curr_data, 0xff
+ inc dword [lit_len_hist + HIST_ELEM_SIZE*curr_data]
+ or curr_data, LIT
+
+ mov dword [m_out_buf], curr_data %+ d
+ add m_out_buf, 4
+
+ MOVDQU xdata, [file_start + f_i + 1]
+ add f_i, 1
+ mov curr_data, [file_start + f_i]
+ and hash %+ d, hmask1 %+ d
+ and hash2 %+ d, hmask1 %+ d
+
+ cmp f_i, file_length
+ jl .loop2
+ jmp .input_end
+
+%ifdef USE_HSWNI
+%undef USE_HSWNI
+%endif
+
+;; Shift defines over in order to iterate over all versions
+%undef ARCH
+%xdefine ARCH ARCH1
+%undef ARCH1
+%xdefine ARCH1 ARCH2
+
+%ifdef COMPARE_TYPE_NOT_DEF
+%undef COMPARE_TYPE
+%xdefine COMPARE_TYPE COMPARE_TYPE1
+%undef COMPARE_TYPE1
+%xdefine COMPARE_TYPE1 COMPARE_TYPE2
+%endif
+%endrep
+min_lit_dist_syms:
+ dd LIT + (1 << DIST_OFFSET)
+
+low_lit_shuf:
+ db 0x00, 0xff, 0xff, 0xff, 0x02, 0xff, 0xff, 0xff
+ db 0x04, 0xff, 0xff, 0xff, 0x06, 0xff, 0xff, 0xff
+up_lit_shuf:
+ db 0x01, 0xff, 0xff, 0xff, 0x03, 0xff, 0xff, 0xff
+ db 0x05, 0xff, 0xff, 0xff, 0x07, 0xff, 0xff, 0xff
diff --git a/src/isa-l/igzip/igzip_icf_finish.asm b/src/isa-l/igzip/igzip_icf_finish.asm
new file mode 100644
index 000000000..231ac0667
--- /dev/null
+++ b/src/isa-l/igzip/igzip_icf_finish.asm
@@ -0,0 +1,327 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright(c) 2011-2016 Intel Corporation All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in
+; the documentation and/or other materials provided with the
+; distribution.
+; * Neither the name of Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived
+; from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+%include "options.asm"
+%include "lz0a_const.asm"
+%include "data_struct2.asm"
+%include "bitbuf2.asm"
+%include "huffman.asm"
+%include "igzip_compare_types.asm"
+
+%include "stdmac.asm"
+%include "reg_sizes.asm"
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+%define curr_data rax
+%define tmp1 rax
+
+%define f_index rbx
+%define code rbx
+%define tmp4 rbx
+%define tmp5 rbx
+%define tmp6 rbx
+
+%define tmp2 rcx
+%define hash rcx
+
+%define tmp3 rdx
+
+%define stream rsi
+
+%define f_i rdi
+
+%define code_len2 rbp
+%define hmask1 rbp
+
+%define m_out_buf r8
+
+%define level_buf r9
+
+%define dist r10
+%define hmask2 r10
+
+%define code2 r12
+%define f_end_i r12
+
+%define file_start r13
+
+%define len r14
+
+%define hufftables r15
+
+%define hash_table level_buf + _hash8k_hash_table
+%define lit_len_hist level_buf + _hist_lit_len
+%define dist_hist level_buf + _hist_dist
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+f_end_i_mem_offset equ 0 ; local variable (8 bytes)
+m_out_end equ 8
+m_out_start equ 16
+dist_mask_offset equ 24
+hash_mask_offset equ 32
+stack_size equ 5*8
+
+%xdefine METHOD hash_hist
+
+[bits 64]
+default rel
+section .text
+
+; void isal_deflate_icf_finish ( isal_zstream *stream )
+; arg 1: rcx: addr of stream
+global isal_deflate_icf_finish_ %+ METHOD %+ _01
+isal_deflate_icf_finish_ %+ METHOD %+ _01:
+ endbranch
+ PUSH_ALL rbx, rsi, rdi, rbp, r12, r13, r14, r15
+ sub rsp, stack_size
+
+%ifidn __OUTPUT_FORMAT__, win64
+ mov stream, rcx
+%else
+ mov stream, rdi
+%endif
+
+ ; state->bitbuf.set_buf(stream->next_out, stream->avail_out);
+ mov tmp2 %+ d, dword [stream + _internal_state_dist_mask]
+ mov tmp3 %+ d, dword [stream + _internal_state_hash_mask]
+ mov level_buf, [stream + _level_buf]
+ mov m_out_buf, [level_buf + _icf_buf_next]
+ mov [rsp + m_out_start], m_out_buf
+ mov tmp1, [level_buf + _icf_buf_avail_out]
+ add tmp1, m_out_buf
+ sub tmp1, 4
+
+ mov [rsp + dist_mask_offset], tmp2
+ mov [rsp + hash_mask_offset], tmp3
+ mov [rsp + m_out_end], tmp1
+
+ mov hufftables, [stream + _hufftables]
+
+ mov file_start, [stream + _next_in]
+
+ mov f_i %+ d, dword [stream + _total_in]
+ sub file_start, f_i
+
+ mov f_end_i %+ d, dword [stream + _avail_in]
+ add f_end_i, f_i
+
+ sub f_end_i, LAST_BYTES_COUNT
+ mov [rsp + f_end_i_mem_offset], f_end_i
+ ; for (f_i = f_start_i; f_i < f_end_i; f_i++) {
+ cmp f_i, f_end_i
+ jge .end_loop_2
+
+ mov curr_data %+ d, [file_start + f_i]
+
+ cmp byte [stream + _internal_state_has_hist], IGZIP_NO_HIST
+ jne .skip_write_first_byte
+
+ cmp m_out_buf, [rsp + m_out_end]
+ ja .end_loop_2
+
+ mov hmask1 %+ d, [rsp + hash_mask_offset]
+ compute_hash hash, curr_data
+ and hash %+ d, hmask1 %+ d
+ mov [hash_table + 2 * hash], f_i %+ w
+ mov byte [stream + _internal_state_has_hist], IGZIP_HIST
+ jmp .encode_literal
+
+.skip_write_first_byte:
+
+.loop2:
+ mov tmp3 %+ d, [rsp + dist_mask_offset]
+ mov hmask1 %+ d, [rsp + hash_mask_offset]
+ ; if (state->bitbuf.is_full()) {
+ cmp m_out_buf, [rsp + m_out_end]
+ ja .end_loop_2
+
+ ; hash = compute_hash(state->file_start + f_i) & hash_mask;
+ mov curr_data %+ d, [file_start + f_i]
+ compute_hash hash, curr_data
+ and hash %+ d, hmask1 %+ d
+
+ ; f_index = state->head[hash];
+ movzx f_index %+ d, word [hash_table + 2 * hash]
+
+ ; state->head[hash] = (uint16_t) f_i;
+ mov [hash_table + 2 * hash], f_i %+ w
+
+ ; dist = f_i - f_index; // mod 64k
+ mov dist %+ d, f_i %+ d
+ sub dist %+ d, f_index %+ d
+ and dist %+ d, 0xFFFF
+
+ ; if ((dist-1) <= (D-1)) {
+ mov tmp1 %+ d, dist %+ d
+ sub tmp1 %+ d, 1
+ cmp tmp1 %+ d, tmp3 %+ d
+ jae .encode_literal
+
+ ; len = f_end_i - f_i;
+ mov tmp4, [rsp + f_end_i_mem_offset]
+ sub tmp4, f_i
+ add tmp4, LAST_BYTES_COUNT
+
+ ; if (len > 258) len = 258;
+ cmp tmp4, 258
+ cmovg tmp4, [c258]
+
+ ; len = compare(state->file_start + f_i,
+ ; state->file_start + f_i - dist, len);
+ lea tmp1, [file_start + f_i]
+ mov tmp2, tmp1
+ sub tmp2, dist
+ compare tmp4, tmp1, tmp2, len, tmp3
+
+ ; if (len >= SHORTEST_MATCH) {
+ cmp len, SHORTEST_MATCH
+ jb .encode_literal
+
+ ;; encode as dist/len
+
+ ; get_dist_code(dist, &code2, &code_len2);
+ dec dist
+ get_dist_icf_code dist, code2, tmp3 ;; clobbers dist, rcx
+
+ ;; get_len_code
+ lea code, [len + 254]
+
+ mov hmask2 %+ d, [rsp + hash_mask_offset]
+
+ or code2, code
+ inc dword [lit_len_hist + HIST_ELEM_SIZE*code]
+
+ ; for (k = f_i+1, f_i += len-1; k <= f_i; k++) {
+ lea tmp3, [f_i + 1] ; tmp3 <= k
+ add f_i, len
+ cmp f_i, [rsp + f_end_i_mem_offset]
+ jae .skip_hash_update
+
+ ; only update hash twice
+
+ ; hash = compute_hash(state->file_start + k) & hash_mask;
+ mov tmp6 %+ d, dword [file_start + tmp3]
+ compute_hash hash, tmp6
+ and hash %+ d, hmask2 %+ d
+ ; state->head[hash] = k;
+ mov [hash_table + 2 * hash], tmp3 %+ w
+
+ add tmp3, 1
+
+ ; hash = compute_hash(state->file_start + k) & hash_mask;
+ mov tmp6 %+ d, dword [file_start + tmp3]
+ compute_hash hash, tmp6
+ and hash %+ d, hmask2 %+ d
+ ; state->head[hash] = k;
+ mov [hash_table + 2 * hash], tmp3 %+ w
+
+.skip_hash_update:
+ write_dword code2, m_out_buf
+ shr code2, DIST_OFFSET
+ and code2, 0x1F
+ inc dword [dist_hist + HIST_ELEM_SIZE*code2]
+ ; continue
+ cmp f_i, [rsp + f_end_i_mem_offset]
+ jl .loop2
+ jmp .end_loop_2
+
+.encode_literal:
+ ; get_lit_code(state->file_start[f_i], &code2, &code_len2);
+ movzx tmp5, byte [file_start + f_i]
+ inc dword [lit_len_hist + HIST_ELEM_SIZE*tmp5]
+ or tmp5, LIT
+ write_dword tmp5, m_out_buf
+ ; continue
+ add f_i, 1
+ cmp f_i, [rsp + f_end_i_mem_offset]
+ jl .loop2
+
+.end_loop_2:
+ mov f_end_i, [rsp + f_end_i_mem_offset]
+ add f_end_i, LAST_BYTES_COUNT
+ mov [rsp + f_end_i_mem_offset], f_end_i
+ ; if ((f_i >= f_end_i) && ! state->bitbuf.is_full()) {
+ cmp f_i, f_end_i
+ jge .input_end
+
+ xor tmp5, tmp5
+.final_bytes:
+ cmp m_out_buf, [rsp + m_out_end]
+ ja .out_end
+
+ movzx tmp5, byte [file_start + f_i]
+ inc dword [lit_len_hist + HIST_ELEM_SIZE*tmp5]
+ or tmp5, LIT
+ write_dword tmp5, m_out_buf
+
+ inc f_i
+ cmp f_i, [rsp + f_end_i_mem_offset]
+ jl .final_bytes
+
+.input_end:
+ cmp word [stream + _end_of_stream], 0
+ jne .out_end
+ cmp word [stream + _flush], _NO_FLUSH
+ jne .out_end
+ jmp .end
+
+.out_end:
+ mov dword [stream + _internal_state_state], ZSTATE_CREATE_HDR
+.end:
+ ;; Update input buffer
+ mov f_end_i, [rsp + f_end_i_mem_offset]
+ mov [stream + _total_in], f_i %+ d
+ mov [stream + _internal_state_block_end], f_i %+ d
+
+ add file_start, f_i
+ mov [stream + _next_in], file_start
+ sub f_end_i, f_i
+ mov [stream + _avail_in], f_end_i %+ d
+
+ ;; Update output buffer
+ mov [level_buf + _icf_buf_next], m_out_buf
+
+ ; len = state->bitbuf.buffer_used();
+ sub m_out_buf, [rsp + m_out_start]
+
+ ; stream->avail_out -= len;
+ sub [level_buf + _icf_buf_avail_out], m_out_buf
+
+ add rsp, stack_size
+ POP_ALL
+ ret
+
+section .data
+ align 4
+c258: dq 258
diff --git a/src/isa-l/igzip/igzip_inflate.c b/src/isa-l/igzip/igzip_inflate.c
new file mode 100644
index 000000000..526c2eaca
--- /dev/null
+++ b/src/isa-l/igzip/igzip_inflate.c
@@ -0,0 +1,2520 @@
+/**********************************************************************
+ Copyright(c) 2011-2016 Intel Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+#include <stdint.h>
+#include "igzip_lib.h"
+#include "crc.h"
+#include "huff_codes.h"
+#include "igzip_checksums.h"
+#include "igzip_wrapper.h"
+#include "unaligned.h"
+
+#ifndef NO_STATIC_INFLATE_H
+#include "static_inflate.h"
+#endif
+
+#ifdef __FreeBSD__
+#include <sys/types.h>
+#include <sys/endian.h>
+# define bswap_32(x) bswap32(x)
+#elif defined (__APPLE__)
+#include <libkern/OSByteOrder.h>
+# define bswap_32(x) OSSwapInt32(x)
+#elif defined (__GNUC__) && !defined (__MINGW32__)
+# include <byteswap.h>
+#elif defined _WIN64
+# define bswap_32(x) _byteswap_ulong(x)
+#endif
+
+extern int decode_huffman_code_block_stateless(struct inflate_state *, uint8_t * start_out);
+extern struct isal_hufftables hufftables_default; /* For known header detection */
+
+#define LARGE_SHORT_SYM_LEN 25
+#define LARGE_SHORT_SYM_MASK ((1 << LARGE_SHORT_SYM_LEN) - 1)
+#define LARGE_LONG_SYM_LEN 10
+#define LARGE_LONG_SYM_MASK ((1 << LARGE_LONG_SYM_LEN) - 1)
+#define LARGE_SHORT_CODE_LEN_OFFSET 28
+#define LARGE_LONG_CODE_LEN_OFFSET 10
+#define LARGE_FLAG_BIT_OFFSET 25
+#define LARGE_FLAG_BIT (1 << LARGE_FLAG_BIT_OFFSET)
+#define LARGE_SYM_COUNT_OFFSET 26
+#define LARGE_SYM_COUNT_LEN 2
+#define LARGE_SYM_COUNT_MASK ((1 << LARGE_SYM_COUNT_LEN) - 1)
+#define LARGE_SHORT_MAX_LEN_OFFSET 26
+
+#define SMALL_SHORT_SYM_LEN 9
+#define SMALL_SHORT_SYM_MASK ((1 << SMALL_SHORT_SYM_LEN) - 1)
+#define SMALL_LONG_SYM_LEN 9
+#define SMALL_LONG_SYM_MASK ((1 << SMALL_LONG_SYM_LEN) - 1)
+#define SMALL_SHORT_CODE_LEN_OFFSET 11
+#define SMALL_LONG_CODE_LEN_OFFSET 10
+#define SMALL_FLAG_BIT_OFFSET 10
+#define SMALL_FLAG_BIT (1 << SMALL_FLAG_BIT_OFFSET)
+
+#define DIST_SYM_OFFSET 0
+#define DIST_SYM_LEN 5
+#define DIST_SYM_MASK ((1 << DIST_SYM_LEN) - 1)
+#define DIST_SYM_EXTRA_OFFSET 5
+#define DIST_SYM_EXTRA_LEN 4
+#define DIST_SYM_EXTRA_MASK ((1 << DIST_SYM_EXTRA_LEN) - 1)
+
+#define MAX_LIT_LEN_CODE_LEN 21
+#define MAX_LIT_LEN_COUNT (MAX_LIT_LEN_CODE_LEN + 2)
+#define MAX_LIT_LEN_SYM 512
+#define LIT_LEN_ELEMS 514
+
+#define INVALID_SYMBOL 0x1FFF
+#define INVALID_CODE 0xFFFFFF
+
+#define MIN_DEF_MATCH 3
+
+#define TRIPLE_SYM_FLAG 0
+#define DOUBLE_SYM_FLAG TRIPLE_SYM_FLAG + 1
+#define SINGLE_SYM_FLAG DOUBLE_SYM_FLAG + 1
+#define DEFAULT_SYM_FLAG TRIPLE_SYM_FLAG
+
+#define SINGLE_SYM_THRESH (2 * 1024)
+#define DOUBLE_SYM_THRESH (4 * 1024)
+
+/* structure contain lookup data based on RFC 1951 */
+struct rfc1951_tables {
+ uint8_t dist_extra_bit_count[32];
+ uint32_t dist_start[32];
+ uint8_t len_extra_bit_count[32];
+ uint16_t len_start[32];
+
+};
+
+/* The following tables are based on the tables in the deflate standard,
+ * RFC 1951 page 11. */
+static struct rfc1951_tables rfc_lookup_table = {
+ .dist_extra_bit_count = {
+ 0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x02, 0x02,
+ 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06,
+ 0x07, 0x07, 0x08, 0x08, 0x09, 0x09, 0x0a, 0x0a,
+ 0x0b, 0x0b, 0x0c, 0x0c, 0x0d, 0x0d, 0x00, 0x00},
+
+ .dist_start = {
+ 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0007, 0x0009, 0x000d,
+ 0x0011, 0x0019, 0x0021, 0x0031, 0x0041, 0x0061, 0x0081, 0x00c1,
+ 0x0101, 0x0181, 0x0201, 0x0301, 0x0401, 0x0601, 0x0801, 0x0c01,
+ 0x1001, 0x1801, 0x2001, 0x3001, 0x4001, 0x6001, 0x0000, 0x0000},
+
+ .len_extra_bit_count = {
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x01, 0x01, 0x01, 0x01, 0x02, 0x02, 0x02, 0x02,
+ 0x03, 0x03, 0x03, 0x03, 0x04, 0x04, 0x04, 0x04,
+ 0x05, 0x05, 0x05, 0x05, 0x00, 0x00, 0x00, 0x00},
+
+ .len_start = {
+ 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, 0x000a,
+ 0x000b, 0x000d, 0x000f, 0x0011, 0x0013, 0x0017, 0x001b, 0x001f,
+ 0x0023, 0x002b, 0x0033, 0x003b, 0x0043, 0x0053, 0x0063, 0x0073,
+ 0x0083, 0x00a3, 0x00c3, 0x00e3, 0x0102, 0x0103, 0x0000, 0x0000}
+};
+
+struct slver {
+ uint16_t snum;
+ uint8_t ver;
+ uint8_t core;
+};
+
+/* Version info */
+struct slver isal_inflate_init_slver_00010088;
+struct slver isal_inflate_init_slver = { 0x0088, 0x01, 0x00 };
+
+struct slver isal_inflate_reset_slver_0001008f;
+struct slver isal_inflate_reset_slver = { 0x008f, 0x01, 0x00 };
+
+struct slver isal_inflate_stateless_slver_00010089;
+struct slver isal_inflate_stateless_slver = { 0x0089, 0x01, 0x00 };
+
+struct slver isal_inflate_slver_0001008a;
+struct slver isal_inflate_slver = { 0x008a, 0x01, 0x00 };
+
+struct slver isal_inflate_set_dict_slver_0001008d;
+struct slver isal_inflate_set_dict_slver = { 0x008d, 0x01, 0x00 };
+
+/*Performs a copy of length repeat_length data starting at dest -
+ * lookback_distance into dest. This copy copies data previously copied when the
+ * src buffer and the dest buffer overlap. */
+static void inline byte_copy(uint8_t * dest, uint64_t lookback_distance, int repeat_length)
+{
+ uint8_t *src = dest - lookback_distance;
+
+ for (; repeat_length > 0; repeat_length--)
+ *dest++ = *src++;
+}
+
+static void update_checksum(struct inflate_state *state, uint8_t * start_in, uint64_t length)
+{
+ switch (state->crc_flag) {
+ case ISAL_GZIP:
+ case ISAL_GZIP_NO_HDR:
+ case ISAL_GZIP_NO_HDR_VER:
+ state->crc = crc32_gzip_refl(state->crc, start_in, length);
+ break;
+ case ISAL_ZLIB:
+ case ISAL_ZLIB_NO_HDR:
+ case ISAL_ZLIB_NO_HDR_VER:
+ state->crc = isal_adler32_bam1(state->crc, start_in, length);
+ break;
+ }
+}
+
+static void finalize_adler32(struct inflate_state *state)
+{
+
+ state->crc = (state->crc & 0xffff0000) | (((state->crc & 0xffff) + 1) % ADLER_MOD);
+}
+
+static const uint8_t bitrev_table[] = {
+ 0x00, 0x80, 0x40, 0xc0, 0x20, 0xa0, 0x60, 0xe0,
+ 0x10, 0x90, 0x50, 0xd0, 0x30, 0xb0, 0x70, 0xf0,
+ 0x08, 0x88, 0x48, 0xc8, 0x28, 0xa8, 0x68, 0xe8,
+ 0x18, 0x98, 0x58, 0xd8, 0x38, 0xb8, 0x78, 0xf8,
+ 0x04, 0x84, 0x44, 0xc4, 0x24, 0xa4, 0x64, 0xe4,
+ 0x14, 0x94, 0x54, 0xd4, 0x34, 0xb4, 0x74, 0xf4,
+ 0x0c, 0x8c, 0x4c, 0xcc, 0x2c, 0xac, 0x6c, 0xec,
+ 0x1c, 0x9c, 0x5c, 0xdc, 0x3c, 0xbc, 0x7c, 0xfc,
+ 0x02, 0x82, 0x42, 0xc2, 0x22, 0xa2, 0x62, 0xe2,
+ 0x12, 0x92, 0x52, 0xd2, 0x32, 0xb2, 0x72, 0xf2,
+ 0x0a, 0x8a, 0x4a, 0xca, 0x2a, 0xaa, 0x6a, 0xea,
+ 0x1a, 0x9a, 0x5a, 0xda, 0x3a, 0xba, 0x7a, 0xfa,
+ 0x06, 0x86, 0x46, 0xc6, 0x26, 0xa6, 0x66, 0xe6,
+ 0x16, 0x96, 0x56, 0xd6, 0x36, 0xb6, 0x76, 0xf6,
+ 0x0e, 0x8e, 0x4e, 0xce, 0x2e, 0xae, 0x6e, 0xee,
+ 0x1e, 0x9e, 0x5e, 0xde, 0x3e, 0xbe, 0x7e, 0xfe,
+ 0x01, 0x81, 0x41, 0xc1, 0x21, 0xa1, 0x61, 0xe1,
+ 0x11, 0x91, 0x51, 0xd1, 0x31, 0xb1, 0x71, 0xf1,
+ 0x09, 0x89, 0x49, 0xc9, 0x29, 0xa9, 0x69, 0xe9,
+ 0x19, 0x99, 0x59, 0xd9, 0x39, 0xb9, 0x79, 0xf9,
+ 0x05, 0x85, 0x45, 0xc5, 0x25, 0xa5, 0x65, 0xe5,
+ 0x15, 0x95, 0x55, 0xd5, 0x35, 0xb5, 0x75, 0xf5,
+ 0x0d, 0x8d, 0x4d, 0xcd, 0x2d, 0xad, 0x6d, 0xed,
+ 0x1d, 0x9d, 0x5d, 0xdd, 0x3d, 0xbd, 0x7d, 0xfd,
+ 0x03, 0x83, 0x43, 0xc3, 0x23, 0xa3, 0x63, 0xe3,
+ 0x13, 0x93, 0x53, 0xd3, 0x33, 0xb3, 0x73, 0xf3,
+ 0x0b, 0x8b, 0x4b, 0xcb, 0x2b, 0xab, 0x6b, 0xeb,
+ 0x1b, 0x9b, 0x5b, 0xdb, 0x3b, 0xbb, 0x7b, 0xfb,
+ 0x07, 0x87, 0x47, 0xc7, 0x27, 0xa7, 0x67, 0xe7,
+ 0x17, 0x97, 0x57, 0xd7, 0x37, 0xb7, 0x77, 0xf7,
+ 0x0f, 0x8f, 0x4f, 0xcf, 0x2f, 0xaf, 0x6f, 0xef,
+ 0x1f, 0x9f, 0x5f, 0xdf, 0x3f, 0xbf, 0x7f, 0xff,
+
+};
+
+/*
+ * Returns integer with first length bits reversed and all higher bits zeroed
+ */
+static uint32_t inline bit_reverse2(uint16_t bits, uint8_t length)
+{
+ uint32_t bitrev;
+ bitrev = bitrev_table[bits >> 8];
+ bitrev |= bitrev_table[bits & 0xFF] << 8;
+
+ return bitrev >> (16 - length);
+}
+
+/* Load data from the in_stream into a buffer to allow for handling unaligned data*/
+static void inline inflate_in_load(struct inflate_state *state, int min_required)
+{
+ uint64_t temp = 0;
+ uint8_t new_bytes;
+
+ if (state->read_in_length >= 64)
+ return;
+
+ if (state->avail_in >= 8) {
+ /* If there is enough space to load a 64 bits, load the data and use
+ * that to fill read_in */
+ new_bytes = 8 - (state->read_in_length + 7) / 8;
+ temp = load_u64(state->next_in);
+
+ state->read_in |= temp << state->read_in_length;
+ state->next_in += new_bytes;
+ state->avail_in -= new_bytes;
+ state->read_in_length += new_bytes * 8;
+
+ } else {
+ /* Else fill the read_in buffer 1 byte at a time */
+ while (state->read_in_length < 57 && state->avail_in > 0) {
+ temp = *state->next_in;
+ state->read_in |= temp << state->read_in_length;
+ state->next_in++;
+ state->avail_in--;
+ state->read_in_length += 8;
+
+ }
+ }
+}
+
+static uint64_t inline inflate_in_read_bits_unsafe(struct inflate_state *state,
+ uint8_t bit_count)
+{
+ uint64_t ret;
+
+ ret = (state->read_in) & ((1 << bit_count) - 1);
+ state->read_in >>= bit_count;
+ state->read_in_length -= bit_count;
+
+ return ret;
+}
+
+/* Returns the next bit_count bits from the in stream and shifts the stream over
+ * by bit-count bits */
+static uint64_t inline inflate_in_read_bits(struct inflate_state *state, uint8_t bit_count)
+{
+ /* Load inflate_in if not enough data is in the read_in buffer */
+ inflate_in_load(state, bit_count);
+ return inflate_in_read_bits_unsafe(state, bit_count);
+}
+
+static void inline write_huff_code(struct huff_code *huff_code, uint32_t code, uint32_t length)
+{
+ huff_code->code_and_length = code | length << 24;
+}
+
+static int inline set_codes(struct huff_code *huff_code_table, int table_length,
+ uint16_t * count)
+{
+ uint32_t max, code, length;
+ uint32_t next_code[MAX_HUFF_TREE_DEPTH + 1];
+ int i;
+ struct huff_code *table_end = huff_code_table + table_length;
+
+ /* Setup for calculating huffman codes */
+ next_code[0] = 0;
+ next_code[1] = 0;
+ for (i = 2; i < MAX_HUFF_TREE_DEPTH + 1; i++)
+ next_code[i] = (next_code[i - 1] + count[i - 1]) << 1;
+
+ max = (next_code[MAX_HUFF_TREE_DEPTH] + count[MAX_HUFF_TREE_DEPTH]);
+
+ if (max > (1 << MAX_HUFF_TREE_DEPTH))
+ return ISAL_INVALID_BLOCK;
+
+ /* Calculate code corresponding to a given symbol */
+ for (; huff_code_table < table_end; huff_code_table++) {
+ length = huff_code_table->length;
+ if (length == 0)
+ continue;
+
+ code = bit_reverse2(next_code[length], length);
+
+ write_huff_code(huff_code_table, code, length);
+ next_code[length] += 1;
+ }
+ return 0;
+}
+
+static int inline set_and_expand_lit_len_huffcode(struct huff_code *lit_len_huff,
+ uint32_t table_length,
+ uint16_t * count,
+ uint16_t * expand_count,
+ uint32_t * code_list)
+{
+ int len_sym, len_size, extra_count, extra;
+ uint32_t count_total, count_tmp;
+ uint32_t code, code_len, expand_len;
+ struct huff_code *expand_next = &lit_len_huff[ISAL_DEF_LIT_SYMBOLS];
+ struct huff_code tmp_table[LIT_LEN - ISAL_DEF_LIT_SYMBOLS];
+ uint32_t max;
+ uint32_t next_code[MAX_HUFF_TREE_DEPTH + 1];
+ int i;
+ struct huff_code *table_end;
+ struct huff_code *huff_code_table = lit_len_huff;
+ uint32_t insert_index;
+
+ /* Setup for calculating huffman codes */
+ count_total = 0;
+ count_tmp = expand_count[1];
+ next_code[0] = 0;
+ next_code[1] = 0;
+ expand_count[0] = 0;
+ expand_count[1] = 0;
+
+ for (i = 1; i < MAX_HUFF_TREE_DEPTH; i++) {
+ count_total = count[i] + count_tmp + count_total;
+ count_tmp = expand_count[i + 1];
+ expand_count[i + 1] = count_total;
+ next_code[i + 1] = (next_code[i] + count[i]) << 1;
+ }
+
+ count_tmp = count[i] + count_tmp;
+
+ for (; i < MAX_LIT_LEN_COUNT - 1; i++) {
+ count_total = count_tmp + count_total;
+ count_tmp = expand_count[i + 1];
+ expand_count[i + 1] = count_total;
+ }
+
+ /* Correct for extra symbols used by static header */
+ if (table_length > LIT_LEN)
+ count[8] -= 2;
+
+ max = (next_code[MAX_HUFF_TREE_DEPTH] + count[MAX_HUFF_TREE_DEPTH]);
+
+ if (max > (1 << MAX_HUFF_TREE_DEPTH))
+ return ISAL_INVALID_BLOCK;
+
+ memcpy(count, expand_count, sizeof(*count) * MAX_LIT_LEN_COUNT);
+
+ memcpy(tmp_table, &lit_len_huff[ISAL_DEF_LIT_SYMBOLS],
+ sizeof(*lit_len_huff) * (LIT_LEN - ISAL_DEF_LIT_SYMBOLS));
+ memset(&lit_len_huff[ISAL_DEF_LIT_SYMBOLS], 0,
+ sizeof(*lit_len_huff) * (LIT_LEN_ELEMS - ISAL_DEF_LIT_SYMBOLS));
+
+ /* Calculate code corresponding to a given literal symbol */
+ table_end = huff_code_table + ISAL_DEF_LIT_SYMBOLS;
+ for (; huff_code_table < table_end; huff_code_table++) {
+ code_len = huff_code_table->length;
+ if (code_len == 0)
+ continue;
+
+ code = bit_reverse2(next_code[code_len], code_len);
+
+ insert_index = expand_count[code_len];
+ code_list[insert_index] = huff_code_table - lit_len_huff;
+ expand_count[code_len]++;
+
+ write_huff_code(huff_code_table, code, code_len);
+ next_code[code_len] += 1;
+ }
+
+ /* Calculate code corresponding to a given len symbol */
+ for (len_sym = 0; len_sym < LIT_LEN - ISAL_DEF_LIT_SYMBOLS; len_sym++) {
+ extra_count = rfc_lookup_table.len_extra_bit_count[len_sym];
+ len_size = (1 << extra_count);
+
+ code_len = tmp_table[len_sym].length;
+ if (code_len == 0) {
+ expand_next += len_size;
+ continue;
+ }
+
+ code = bit_reverse2(next_code[code_len], code_len);
+ expand_len = code_len + extra_count;
+ next_code[code_len] += 1;
+ insert_index = expand_count[expand_len];
+ expand_count[expand_len] += len_size;
+
+ for (extra = 0; extra < len_size; extra++) {
+ code_list[insert_index] = expand_next - lit_len_huff;
+ write_huff_code(expand_next, code | (extra << code_len), expand_len);
+ insert_index++;
+ expand_next++;
+ }
+ }
+
+ return 0;
+}
+
+static int inline index_to_sym(int index)
+{
+ return (index != 513) ? index : 512;
+}
+
+/* Sets result to the inflate_huff_code corresponding to the huffcode defined by
+ * the lengths in huff_code_table,where count is a histogram of the appearance
+ * of each code length */
+static void make_inflate_huff_code_lit_len(struct inflate_huff_code_large *result,
+ struct huff_code *huff_code_table,
+ uint32_t table_length, uint16_t * count_total,
+ uint32_t * code_list, uint32_t multisym)
+{
+ int i, j;
+ uint16_t code = 0;
+ uint32_t *long_code_list;
+ uint32_t long_code_length = 0;
+ uint16_t temp_code_list[1 << (MAX_LIT_LEN_CODE_LEN - ISAL_DECODE_LONG_BITS)];
+ uint32_t temp_code_length;
+ uint32_t long_code_lookup_length = 0;
+ uint32_t max_length;
+ uint16_t first_bits;
+ uint32_t code_length;
+ uint16_t long_bits;
+ uint16_t min_increment;
+ uint32_t code_list_len;
+ uint32_t last_length, min_length;
+ uint32_t copy_size;
+ uint32_t *short_code_lookup = result->short_code_lookup;
+ int index1, index2, index3;
+ int sym1, sym2, sym3, sym1_index, sym2_index, sym3_index;
+ uint32_t sym1_code, sym2_code, sym3_code, sym1_len, sym2_len, sym3_len;
+
+ uint32_t max_symbol = MAX_LIT_LEN_SYM;
+
+ code_list_len = count_total[MAX_LIT_LEN_COUNT - 1];
+
+ if (code_list_len == 0) {
+ memset(result->short_code_lookup, 0, sizeof(result->short_code_lookup));
+ return;
+ }
+
+ /* Determine the length of the first code */
+ last_length = huff_code_table[code_list[0]].length;
+ if (last_length > ISAL_DECODE_LONG_BITS)
+ last_length = ISAL_DECODE_LONG_BITS + 1;
+ copy_size = (1 << (last_length - 1));
+
+ /* Initialize short_code_lookup, so invalid lookups process data */
+ memset(short_code_lookup, 0x00, copy_size * sizeof(*short_code_lookup));
+
+ min_length = last_length;
+ for (; last_length <= ISAL_DECODE_LONG_BITS; last_length++) {
+ /* Copy forward previosly set codes */
+ memcpy(short_code_lookup + copy_size, short_code_lookup,
+ sizeof(*short_code_lookup) * copy_size);
+ copy_size *= 2;
+
+ /* Encode code singletons */
+ for (index1 = count_total[last_length];
+ index1 < count_total[last_length + 1]; index1++) {
+ sym1_index = code_list[index1];
+ sym1 = index_to_sym(sym1_index);
+ sym1_len = huff_code_table[sym1_index].length;
+ sym1_code = huff_code_table[sym1_index].code;
+
+ if (sym1 > max_symbol)
+ continue;
+
+ /* Set new codes */
+ short_code_lookup[sym1_code] =
+ sym1 | sym1_len << LARGE_SHORT_CODE_LEN_OFFSET |
+ (1 << LARGE_SYM_COUNT_OFFSET);
+ }
+
+ /* Continue if no pairs are possible */
+ if (multisym >= SINGLE_SYM_FLAG || last_length < 2 * min_length)
+ continue;
+
+ /* Encode code pairs */
+ for (index1 = count_total[min_length];
+ index1 < count_total[last_length - min_length + 1]; index1++) {
+ sym1_index = code_list[index1];
+ sym1 = index_to_sym(sym1_index);
+ sym1_len = huff_code_table[sym1_index].length;
+ sym1_code = huff_code_table[sym1_index].code;
+
+ /*Check that sym1 is a literal */
+ if (sym1 >= 256) {
+ index1 = count_total[sym1_len + 1] - 1;
+ continue;
+ }
+
+ sym2_len = last_length - sym1_len;
+ for (index2 = count_total[sym2_len];
+ index2 < count_total[sym2_len + 1]; index2++) {
+ sym2_index = code_list[index2];
+ sym2 = index_to_sym(sym2_index);
+
+ /* Check that sym2 is an existing symbol */
+ if (sym2 > max_symbol)
+ break;
+
+ sym2_code = huff_code_table[sym2_index].code;
+ code = sym1_code | (sym2_code << sym1_len);
+ code_length = sym1_len + sym2_len;
+ short_code_lookup[code] =
+ sym1 | (sym2 << 8) |
+ (code_length << LARGE_SHORT_CODE_LEN_OFFSET)
+ | (2 << LARGE_SYM_COUNT_OFFSET);
+ }
+ }
+
+ /* Continue if no triples are possible */
+ if (multisym >= DOUBLE_SYM_FLAG || last_length < 3 * min_length)
+ continue;
+
+ /* Encode code triples */
+ for (index1 = count_total[min_length];
+ index1 < count_total[last_length - 2 * min_length + 1]; index1++) {
+ sym1_index = code_list[index1];
+ sym1 = index_to_sym(sym1_index);
+ sym1_len = huff_code_table[sym1_index].length;
+ sym1_code = huff_code_table[sym1_index].code;
+ /*Check that sym1 is a literal */
+ if (sym1 >= 256) {
+ index1 = count_total[sym1_len + 1] - 1;
+ continue;
+ }
+
+ if (last_length - sym1_len < 2 * min_length)
+ break;
+
+ for (index2 = count_total[min_length];
+ index2 < count_total[last_length - sym1_len - min_length + 1];
+ index2++) {
+ sym2_index = code_list[index2];
+ sym2 = index_to_sym(sym2_index);
+ sym2_len = huff_code_table[sym2_index].length;
+ sym2_code = huff_code_table[sym2_index].code;
+
+ /* Check that sym2 is a literal */
+ if (sym2 >= 256) {
+ index2 = count_total[sym2_len + 1] - 1;
+ continue;
+ }
+
+ sym3_len = last_length - sym1_len - sym2_len;
+ for (index3 = count_total[sym3_len];
+ index3 < count_total[sym3_len + 1]; index3++) {
+ sym3_index = code_list[index3];
+ sym3 = index_to_sym(sym3_index);
+ sym3_code = huff_code_table[sym3_index].code;
+
+ /* Check that sym3 is writable existing symbol */
+ if (sym3 > max_symbol - 1)
+ break;
+
+ code = sym1_code | (sym2_code << sym1_len) |
+ (sym3_code << (sym2_len + sym1_len));
+ code_length = sym1_len + sym2_len + sym3_len;
+ short_code_lookup[code] =
+ sym1 | (sym2 << 8) | sym3 << 16 |
+ (code_length << LARGE_SHORT_CODE_LEN_OFFSET)
+ | (3 << LARGE_SYM_COUNT_OFFSET);
+
+ }
+
+ }
+ }
+
+ }
+
+ index1 = count_total[ISAL_DECODE_LONG_BITS + 1];
+ long_code_length = code_list_len - index1;
+ long_code_list = &code_list[index1];
+ for (i = 0; i < long_code_length; i++) {
+ /*Set the look up table to point to a hint where the symbol can be found
+ * in the list of long codes and add the current symbol to the list of
+ * long codes. */
+ if (huff_code_table[long_code_list[i]].code_and_extra == INVALID_CODE)
+ continue;
+
+ max_length = huff_code_table[long_code_list[i]].length;
+ first_bits =
+ huff_code_table[long_code_list[i]].code_and_extra
+ & ((1 << ISAL_DECODE_LONG_BITS) - 1);
+
+ temp_code_list[0] = long_code_list[i];
+ temp_code_length = 1;
+
+ for (j = i + 1; j < long_code_length; j++) {
+ if ((huff_code_table[long_code_list[j]].code &
+ ((1 << ISAL_DECODE_LONG_BITS) - 1)) == first_bits) {
+ max_length = huff_code_table[long_code_list[j]].length;
+ temp_code_list[temp_code_length] = long_code_list[j];
+ temp_code_length++;
+ }
+ }
+
+ memset(&result->long_code_lookup[long_code_lookup_length], 0x00,
+ sizeof(*result->long_code_lookup) *
+ (1 << (max_length - ISAL_DECODE_LONG_BITS)));
+
+ for (j = 0; j < temp_code_length; j++) {
+ sym1_index = temp_code_list[j];
+ sym1 = index_to_sym(sym1_index);
+ sym1_len = huff_code_table[sym1_index].length;
+ sym1_code = huff_code_table[sym1_index].code_and_extra;
+
+ long_bits = sym1_code >> ISAL_DECODE_LONG_BITS;
+ min_increment = 1 << (sym1_len - ISAL_DECODE_LONG_BITS);
+
+ for (; long_bits < (1 << (max_length - ISAL_DECODE_LONG_BITS));
+ long_bits += min_increment) {
+ result->long_code_lookup[long_code_lookup_length + long_bits] =
+ sym1 | (sym1_len << LARGE_LONG_CODE_LEN_OFFSET);
+ }
+ huff_code_table[sym1_index].code_and_extra = INVALID_CODE;
+
+ }
+ result->short_code_lookup[first_bits] = long_code_lookup_length |
+ (max_length << LARGE_SHORT_MAX_LEN_OFFSET) | LARGE_FLAG_BIT;
+ long_code_lookup_length += 1 << (max_length - ISAL_DECODE_LONG_BITS);
+ }
+}
+
+static void inline make_inflate_huff_code_dist(struct inflate_huff_code_small *result,
+ struct huff_code *huff_code_table,
+ uint32_t table_length, uint16_t * count,
+ uint32_t max_symbol)
+{
+ int i, j, k;
+ uint32_t *long_code_list;
+ uint32_t long_code_length = 0;
+ uint16_t temp_code_list[1 << (15 - ISAL_DECODE_SHORT_BITS)];
+ uint32_t temp_code_length;
+ uint32_t long_code_lookup_length = 0;
+ uint32_t max_length;
+ uint16_t first_bits;
+ uint32_t code_length;
+ uint16_t long_bits;
+ uint16_t min_increment;
+ uint32_t code_list[DIST_LEN + 2]; /* The +2 is for the extra codes in the static header */
+ uint32_t code_list_len;
+ uint32_t count_total[17], count_total_tmp[17];
+ uint32_t insert_index;
+ uint32_t last_length;
+ uint32_t copy_size;
+ uint16_t *short_code_lookup = result->short_code_lookup;
+ uint32_t sym;
+
+ count_total[0] = 0;
+ count_total[1] = 0;
+ for (i = 2; i < 17; i++)
+ count_total[i] = count_total[i - 1] + count[i - 1];
+ memcpy(count_total_tmp, count_total, sizeof(count_total_tmp));
+
+ code_list_len = count_total[16];
+ if (code_list_len == 0) {
+ memset(result->short_code_lookup, 0, sizeof(result->short_code_lookup));
+ return;
+ }
+
+ for (i = 0; i < table_length; i++) {
+ code_length = huff_code_table[i].length;
+ if (code_length == 0)
+ continue;
+
+ insert_index = count_total_tmp[code_length];
+ code_list[insert_index] = i;
+ count_total_tmp[code_length]++;
+ }
+
+ last_length = huff_code_table[code_list[0]].length;
+ if (last_length > ISAL_DECODE_SHORT_BITS)
+ last_length = ISAL_DECODE_SHORT_BITS + 1;
+ copy_size = (1 << (last_length - 1));
+
+ /* Initialize short_code_lookup, so invalid lookups process data */
+ memset(short_code_lookup, 0x00, copy_size * sizeof(*short_code_lookup));
+
+ for (; last_length <= ISAL_DECODE_SHORT_BITS; last_length++) {
+ memcpy(short_code_lookup + copy_size, short_code_lookup,
+ sizeof(*short_code_lookup) * copy_size);
+ copy_size *= 2;
+
+ for (k = count_total[last_length]; k < count_total[last_length + 1]; k++) {
+ i = code_list[k];
+
+ if (i >= max_symbol) {
+ /* If the symbol is invalid, set code to be the
+ * length of the symbol and the code_length to 0
+ * to determine if there was enough input */
+ short_code_lookup[huff_code_table[i].code] =
+ huff_code_table[i].length;
+ continue;
+ }
+
+ /* Set lookup table to return the current symbol concatenated
+ * with the code length when the first DECODE_LENGTH bits of the
+ * address are the same as the code for the current symbol. The
+ * first 9 bits are the code, bits 14:10 are the code length,
+ * bit 15 is a flag representing this is a symbol*/
+ short_code_lookup[huff_code_table[i].code] = i |
+ rfc_lookup_table.dist_extra_bit_count[i] << DIST_SYM_EXTRA_OFFSET |
+ (huff_code_table[i].length) << SMALL_SHORT_CODE_LEN_OFFSET;
+ }
+ }
+
+ k = count_total[ISAL_DECODE_SHORT_BITS + 1];
+ long_code_list = &code_list[k];
+ long_code_length = code_list_len - k;
+ for (i = 0; i < long_code_length; i++) {
+ /*Set the look up table to point to a hint where the symbol can be found
+ * in the list of long codes and add the current symbol to the list of
+ * long codes. */
+ if (huff_code_table[long_code_list[i]].code == 0xFFFF)
+ continue;
+
+ max_length = huff_code_table[long_code_list[i]].length;
+ first_bits =
+ huff_code_table[long_code_list[i]].code
+ & ((1 << ISAL_DECODE_SHORT_BITS) - 1);
+
+ temp_code_list[0] = long_code_list[i];
+ temp_code_length = 1;
+
+ for (j = i + 1; j < long_code_length; j++) {
+ if ((huff_code_table[long_code_list[j]].code &
+ ((1 << ISAL_DECODE_SHORT_BITS) - 1)) == first_bits) {
+ max_length = huff_code_table[long_code_list[j]].length;
+ temp_code_list[temp_code_length] = long_code_list[j];
+ temp_code_length++;
+ }
+ }
+
+ memset(&result->long_code_lookup[long_code_lookup_length], 0x00,
+ 2 * (1 << (max_length - ISAL_DECODE_SHORT_BITS)));
+
+ for (j = 0; j < temp_code_length; j++) {
+ sym = temp_code_list[j];
+ code_length = huff_code_table[sym].length;
+ long_bits = huff_code_table[sym].code >> ISAL_DECODE_SHORT_BITS;
+ min_increment = 1 << (code_length - ISAL_DECODE_SHORT_BITS);
+ for (; long_bits < (1 << (max_length - ISAL_DECODE_SHORT_BITS));
+ long_bits += min_increment) {
+ if (sym >= max_symbol) {
+ /* If the symbol is invalid, set code to be the
+ * length of the symbol and the code_length to 0
+ * to determine if there was enough input */
+ result->long_code_lookup[long_code_lookup_length +
+ long_bits] = code_length;
+ continue;
+ }
+ result->long_code_lookup[long_code_lookup_length + long_bits] =
+ sym |
+ rfc_lookup_table.dist_extra_bit_count[sym] <<
+ DIST_SYM_EXTRA_OFFSET |
+ (code_length << SMALL_LONG_CODE_LEN_OFFSET);
+ }
+ huff_code_table[sym].code = 0xFFFF;
+ }
+ result->short_code_lookup[first_bits] = long_code_lookup_length |
+ (max_length << SMALL_SHORT_CODE_LEN_OFFSET) | SMALL_FLAG_BIT;
+ long_code_lookup_length += 1 << (max_length - ISAL_DECODE_SHORT_BITS);
+
+ }
+}
+
+static void inline make_inflate_huff_code_header(struct inflate_huff_code_small *result,
+ struct huff_code *huff_code_table,
+ uint32_t table_length, uint16_t * count,
+ uint32_t max_symbol)
+{
+ int i, j, k;
+ uint32_t *long_code_list;
+ uint32_t long_code_length = 0;
+ uint16_t temp_code_list[1 << (15 - ISAL_DECODE_SHORT_BITS)];
+ uint32_t temp_code_length;
+ uint32_t long_code_lookup_length = 0;
+ uint32_t max_length;
+ uint16_t first_bits;
+ uint32_t code_length;
+ uint16_t long_bits;
+ uint16_t min_increment;
+ uint32_t code_list[DIST_LEN + 2]; /* The +2 is for the extra codes in the static header */
+ uint32_t code_list_len;
+ uint32_t count_total[17], count_total_tmp[17];
+ uint32_t insert_index;
+ uint32_t last_length;
+ uint32_t copy_size;
+ uint16_t *short_code_lookup = result->short_code_lookup;
+
+ count_total[0] = 0;
+ count_total[1] = 0;
+ for (i = 2; i < 17; i++)
+ count_total[i] = count_total[i - 1] + count[i - 1];
+
+ memcpy(count_total_tmp, count_total, sizeof(count_total_tmp));
+
+ code_list_len = count_total[16];
+ if (code_list_len == 0) {
+ memset(result->short_code_lookup, 0, sizeof(result->short_code_lookup));
+ return;
+ }
+
+ for (i = 0; i < table_length; i++) {
+ code_length = huff_code_table[i].length;
+ if (code_length == 0)
+ continue;
+
+ insert_index = count_total_tmp[code_length];
+ code_list[insert_index] = i;
+ count_total_tmp[code_length]++;
+ }
+
+ last_length = huff_code_table[code_list[0]].length;
+ if (last_length > ISAL_DECODE_SHORT_BITS)
+ last_length = ISAL_DECODE_SHORT_BITS + 1;
+ copy_size = (1 << (last_length - 1));
+
+ /* Initialize short_code_lookup, so invalid lookups process data */
+ memset(short_code_lookup, 0x00, copy_size * sizeof(*short_code_lookup));
+
+ for (; last_length <= ISAL_DECODE_SHORT_BITS; last_length++) {
+ memcpy(short_code_lookup + copy_size, short_code_lookup,
+ sizeof(*short_code_lookup) * copy_size);
+ copy_size *= 2;
+
+ for (k = count_total[last_length]; k < count_total[last_length + 1]; k++) {
+ i = code_list[k];
+
+ if (i >= max_symbol)
+ continue;
+
+ /* Set lookup table to return the current symbol concatenated
+ * with the code length when the first DECODE_LENGTH bits of the
+ * address are the same as the code for the current symbol. The
+ * first 9 bits are the code, bits 14:10 are the code length,
+ * bit 15 is a flag representing this is a symbol*/
+ short_code_lookup[huff_code_table[i].code] =
+ i | (huff_code_table[i].length) << SMALL_SHORT_CODE_LEN_OFFSET;
+ }
+ }
+
+ k = count_total[ISAL_DECODE_SHORT_BITS + 1];
+ long_code_list = &code_list[k];
+ long_code_length = code_list_len - k;
+ for (i = 0; i < long_code_length; i++) {
+ /*Set the look up table to point to a hint where the symbol can be found
+ * in the list of long codes and add the current symbol to the list of
+ * long codes. */
+ if (huff_code_table[long_code_list[i]].code == 0xFFFF)
+ continue;
+
+ max_length = huff_code_table[long_code_list[i]].length;
+ first_bits =
+ huff_code_table[long_code_list[i]].code
+ & ((1 << ISAL_DECODE_SHORT_BITS) - 1);
+
+ temp_code_list[0] = long_code_list[i];
+ temp_code_length = 1;
+
+ for (j = i + 1; j < long_code_length; j++) {
+ if ((huff_code_table[long_code_list[j]].code &
+ ((1 << ISAL_DECODE_SHORT_BITS) - 1)) == first_bits) {
+ if (max_length < huff_code_table[long_code_list[j]].length)
+ max_length = huff_code_table[long_code_list[j]].length;
+ temp_code_list[temp_code_length] = long_code_list[j];
+ temp_code_length++;
+ }
+ }
+
+ memset(&result->long_code_lookup[long_code_lookup_length], 0x00,
+ 2 * (1 << (max_length - ISAL_DECODE_SHORT_BITS)));
+
+ for (j = 0; j < temp_code_length; j++) {
+ code_length = huff_code_table[temp_code_list[j]].length;
+ long_bits =
+ huff_code_table[temp_code_list[j]].code >> ISAL_DECODE_SHORT_BITS;
+ min_increment = 1 << (code_length - ISAL_DECODE_SHORT_BITS);
+ for (; long_bits < (1 << (max_length - ISAL_DECODE_SHORT_BITS));
+ long_bits += min_increment) {
+ result->long_code_lookup[long_code_lookup_length + long_bits] =
+ temp_code_list[j] |
+ (code_length << SMALL_LONG_CODE_LEN_OFFSET);
+ }
+ huff_code_table[temp_code_list[j]].code = 0xFFFF;
+ }
+ result->short_code_lookup[first_bits] = long_code_lookup_length |
+ (max_length << SMALL_SHORT_CODE_LEN_OFFSET) | SMALL_FLAG_BIT;
+ long_code_lookup_length += 1 << (max_length - ISAL_DECODE_SHORT_BITS);
+
+ }
+}
+
+static int header_matches_pregen(struct inflate_state *state)
+{
+#ifndef ISAL_STATIC_INFLATE_TABLE
+ return 0;
+#else
+ uint8_t *in, *hdr;
+ uint32_t in_end_bits, hdr_end_bits;
+ uint32_t bytes_read_in, header_len, last_bits, last_bit_mask;
+ uint64_t bits_read_mask;
+ uint64_t hdr_stash, in_stash;
+ const uint64_t bits_read_prior = 3; // Have read bfinal(1) and btype(2)
+
+ /* Check if stashed read_in_bytes match header */
+ hdr = &(hufftables_default.deflate_hdr[0]);
+ bits_read_mask = (1ull << state->read_in_length) - 1;
+ hdr_stash = (load_u64(hdr) >> bits_read_prior) & bits_read_mask;
+ in_stash = state->read_in & bits_read_mask;
+
+ if (hdr_stash != in_stash)
+ return 0;
+
+ /* Check if input is byte aligned */
+ if ((state->read_in_length + bits_read_prior) % 8)
+ return 0;
+
+ /* Check if header bulk is the same */
+ in = state->next_in;
+ bytes_read_in = (state->read_in_length + bits_read_prior) / 8;
+ header_len = hufftables_default.deflate_hdr_count;
+
+ if (memcmp(in, &hdr[bytes_read_in], header_len - bytes_read_in))
+ return 0;
+
+ /* If there are any last/end bits to the header check them too */
+ last_bits = hufftables_default.deflate_hdr_extra_bits;
+ last_bit_mask = (1 << last_bits) - 1;
+
+ if (0 == last_bits) {
+ state->next_in += header_len - bytes_read_in;
+ state->avail_in -= header_len - bytes_read_in;
+ state->read_in_length = 0;
+ state->read_in = 0;
+ return 1;
+ }
+
+ in_end_bits = in[header_len - bytes_read_in] & last_bit_mask;
+ hdr_end_bits = hdr[header_len] & last_bit_mask;
+ if (in_end_bits == hdr_end_bits) {
+ state->next_in += header_len - bytes_read_in;
+ state->avail_in -= header_len - bytes_read_in;
+ state->read_in_length = 0;
+ state->read_in = 0;
+ inflate_in_read_bits(state, last_bits);
+ return 1;
+ }
+
+ return 0;
+#endif // ISAL_STATIC_INFLATE_TABLE
+}
+
+static int setup_pregen_header(struct inflate_state *state)
+{
+#ifdef ISAL_STATIC_INFLATE_TABLE
+ memcpy(&state->lit_huff_code, &pregen_lit_huff_code, sizeof(pregen_lit_huff_code));
+ memcpy(&state->dist_huff_code, &pregen_dist_huff_code, sizeof(pregen_dist_huff_code));
+ state->block_state = ISAL_BLOCK_CODED;
+#endif // ISAL_STATIC_INFLATE_TABLE
+ return 0;
+}
+
+/* Sets the inflate_huff_codes in state to be the huffcodes corresponding to the
+ * deflate static header */
+static int inline setup_static_header(struct inflate_state *state)
+{
+#ifdef ISAL_STATIC_INFLATE_TABLE
+ memcpy(&state->lit_huff_code, &static_lit_huff_code, sizeof(static_lit_huff_code));
+ memcpy(&state->dist_huff_code, &static_dist_huff_code, sizeof(static_dist_huff_code));
+#else
+
+#ifndef NO_STATIC_INFLATE_H
+# warning "Defaulting to static inflate table fallback."
+# warning "For best performance, run generate_static_inflate, replace static_inflate.h, and recompile"
+#endif
+ int i;
+ struct huff_code lit_code[LIT_LEN_ELEMS];
+ struct huff_code dist_code[DIST_LEN + 2];
+ uint32_t multisym = SINGLE_SYM_FLAG, max_dist = DIST_LEN;
+ /* These tables are based on the static huffman tree described in RFC
+ * 1951 */
+ uint16_t lit_count[MAX_LIT_LEN_COUNT] = {
+ 0, 0, 0, 0, 0, 0, 0, 24, 152, 112, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+ };
+
+ uint16_t lit_expand_count[MAX_LIT_LEN_COUNT] = {
+ 0, 0, 0, 0, 0, 0, 0, -15, 1, 16, 32, 48, 16, 128, 0, 0, 0, 0, 0, 0, 0, 0
+ };
+ uint16_t dist_count[16] = {
+ 0, 0, 0, 0, 0, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+ };
+ uint32_t code_list[LIT_LEN_ELEMS + 2]; /* The +2 is for the extra codes in the static header */
+ /* These for loops set the code lengths for the static literal/length
+ * and distance codes defined in the deflate standard RFC 1951 */
+ for (i = 0; i < 144; i++)
+ lit_code[i].length = 8;
+
+ for (i = 144; i < 256; i++)
+ lit_code[i].length = 9;
+
+ for (i = 256; i < 280; i++)
+ lit_code[i].length = 7;
+
+ for (i = 280; i < LIT_LEN + 2; i++)
+ lit_code[i].length = 8;
+
+ for (i = 0; i < DIST_LEN + 2; i++)
+ dist_code[i].length = 5;
+
+ set_and_expand_lit_len_huffcode(lit_code, LIT_LEN + 2, lit_count, lit_expand_count,
+ code_list);
+
+ set_codes(dist_code, DIST_LEN + 2, dist_count);
+
+ make_inflate_huff_code_lit_len(&state->lit_huff_code, lit_code, LIT_LEN_ELEMS,
+ lit_count, code_list, multisym);
+
+ if (state->hist_bits && state->hist_bits < 15)
+ max_dist = 2 * state->hist_bits;
+
+ make_inflate_huff_code_dist(&state->dist_huff_code, dist_code, DIST_LEN + 2,
+ dist_count, max_dist);
+#endif
+ state->block_state = ISAL_BLOCK_CODED;
+
+ return 0;
+}
+
+/* Decodes the next symbol symbol in in_buffer using the huff code defined by
+ * huff_code and returns the value in next_lits and sym_count */
+static void inline decode_next_lit_len(uint32_t * next_lits, uint32_t * sym_count,
+ struct inflate_state *state,
+ struct inflate_huff_code_large *huff_code)
+{
+ uint32_t next_bits;
+ uint32_t next_sym;
+ uint32_t bit_count;
+ uint32_t bit_mask;
+
+ if (state->read_in_length <= ISAL_DEF_MAX_CODE_LEN)
+ inflate_in_load(state, 0);
+
+ next_bits = state->read_in & ((1 << ISAL_DECODE_LONG_BITS) - 1);
+
+ /* next_sym is a possible symbol decoded from next_bits. If bit 15 is 0,
+ * next_code is a symbol. Bits 9:0 represent the symbol, and bits 14:10
+ * represent the length of that symbols huffman code. If next_sym is not
+ * a symbol, it provides a hint of where the large symbols containin
+ * this code are located. Note the hint is at largest the location the
+ * first actual symbol in the long code list.*/
+ next_sym = huff_code->short_code_lookup[next_bits];
+
+ if ((next_sym & LARGE_FLAG_BIT) == 0) {
+ /* Return symbol found if next_code is a complete huffman code
+ * and shift in buffer over by the length of the next_code */
+ bit_count = next_sym >> LARGE_SHORT_CODE_LEN_OFFSET;
+ state->read_in >>= bit_count;
+ state->read_in_length -= bit_count;
+
+ if (bit_count == 0)
+ next_sym = INVALID_SYMBOL;
+
+ *sym_count = (next_sym >> LARGE_SYM_COUNT_OFFSET) & LARGE_SYM_COUNT_MASK;
+ *next_lits = next_sym & LARGE_SHORT_SYM_MASK;
+
+ } else {
+ /* If a symbol is not found, do a lookup in the long code
+ * list starting from the hint in next_sym */
+ bit_mask = next_sym >> LARGE_SHORT_MAX_LEN_OFFSET;
+ bit_mask = (1 << bit_mask) - 1;
+ next_bits = state->read_in & bit_mask;
+ next_sym =
+ huff_code->long_code_lookup[(next_sym & LARGE_SHORT_SYM_MASK) +
+ (next_bits >> ISAL_DECODE_LONG_BITS)];
+ bit_count = next_sym >> LARGE_LONG_CODE_LEN_OFFSET;
+ state->read_in >>= bit_count;
+ state->read_in_length -= bit_count;
+
+ if (bit_count == 0)
+ next_sym = INVALID_SYMBOL;
+
+ *sym_count = 1;
+ *next_lits = next_sym & LARGE_LONG_SYM_MASK;
+ }
+}
+
+static uint16_t inline decode_next_dist(struct inflate_state *state,
+ struct inflate_huff_code_small *huff_code)
+{
+ uint16_t next_bits;
+ uint16_t next_sym;
+ uint32_t bit_count;
+ uint32_t bit_mask;
+
+ if (state->read_in_length <= ISAL_DEF_MAX_CODE_LEN)
+ inflate_in_load(state, 0);
+
+ next_bits = state->read_in & ((1 << ISAL_DECODE_SHORT_BITS) - 1);
+
+ /* next_sym is a possible symbol decoded from next_bits. If bit 15 is 0,
+ * next_code is a symbol. Bits 9:0 represent the symbol, and bits 14:10
+ * represent the length of that symbols huffman code. If next_sym is not
+ * a symbol, it provides a hint of where the large symbols containin
+ * this code are located. Note the hint is at largest the location the
+ * first actual symbol in the long code list.*/
+ next_sym = huff_code->short_code_lookup[next_bits];
+
+ if ((next_sym & SMALL_FLAG_BIT) == 0) {
+ /* Return symbol found if next_code is a complete huffman code
+ * and shift in buffer over by the length of the next_code */
+ bit_count = next_sym >> SMALL_SHORT_CODE_LEN_OFFSET;
+ state->read_in >>= bit_count;
+ state->read_in_length -= bit_count;
+
+ if (bit_count == 0) {
+ state->read_in_length -= next_sym;
+ next_sym = INVALID_SYMBOL;
+ }
+
+ return next_sym & DIST_SYM_MASK;
+
+ } else {
+ /* If a symbol is not found, perform a linear search of the long code
+ * list starting from the hint in next_sym */
+ bit_mask = (next_sym - SMALL_FLAG_BIT) >> SMALL_SHORT_CODE_LEN_OFFSET;
+ bit_mask = (1 << bit_mask) - 1;
+ next_bits = state->read_in & bit_mask;
+ next_sym =
+ huff_code->long_code_lookup[(next_sym & SMALL_SHORT_SYM_MASK) +
+ (next_bits >> ISAL_DECODE_SHORT_BITS)];
+ bit_count = next_sym >> SMALL_LONG_CODE_LEN_OFFSET;
+ state->read_in >>= bit_count;
+ state->read_in_length -= bit_count;
+
+ if (bit_count == 0) {
+ state->read_in_length -= next_sym;
+ next_sym = INVALID_SYMBOL;
+ }
+
+ return next_sym & DIST_SYM_MASK;
+ }
+}
+
+static uint16_t inline decode_next_header(struct inflate_state *state,
+ struct inflate_huff_code_small *huff_code)
+{
+ uint16_t next_bits;
+ uint16_t next_sym;
+ uint32_t bit_count;
+ uint32_t bit_mask;
+
+ if (state->read_in_length <= ISAL_DEF_MAX_CODE_LEN)
+ inflate_in_load(state, 0);
+
+ next_bits = state->read_in & ((1 << ISAL_DECODE_SHORT_BITS) - 1);
+
+ /* next_sym is a possible symbol decoded from next_bits. If bit 15 is 0,
+ * next_code is a symbol. Bits 9:0 represent the symbol, and bits 14:10
+ * represent the length of that symbols huffman code. If next_sym is not
+ * a symbol, it provides a hint of where the large symbols containin
+ * this code are located. Note the hint is at largest the location the
+ * first actual symbol in the long code list.*/
+ next_sym = huff_code->short_code_lookup[next_bits];
+
+ if ((next_sym & SMALL_FLAG_BIT) == 0) {
+ /* Return symbol found if next_code is a complete huffman code
+ * and shift in buffer over by the length of the next_code */
+ bit_count = next_sym >> SMALL_SHORT_CODE_LEN_OFFSET;
+ state->read_in >>= bit_count;
+ state->read_in_length -= bit_count;
+
+ if (bit_count == 0)
+ next_sym = INVALID_SYMBOL;
+
+ return next_sym & SMALL_SHORT_SYM_MASK;
+
+ } else {
+ /* If a symbol is not found, perform a linear search of the long code
+ * list starting from the hint in next_sym */
+ bit_mask = (next_sym - SMALL_FLAG_BIT) >> SMALL_SHORT_CODE_LEN_OFFSET;
+ bit_mask = (1 << bit_mask) - 1;
+ next_bits = state->read_in & bit_mask;
+ next_sym =
+ huff_code->long_code_lookup[(next_sym & SMALL_SHORT_SYM_MASK) +
+ (next_bits >> ISAL_DECODE_SHORT_BITS)];
+ bit_count = next_sym >> SMALL_LONG_CODE_LEN_OFFSET;
+ state->read_in >>= bit_count;
+ state->read_in_length -= bit_count;
+ return next_sym & SMALL_LONG_SYM_MASK;
+
+ }
+}
+
+/* Reads data from the in_buffer and sets the huff code corresponding to that
+ * data */
+static int inline setup_dynamic_header(struct inflate_state *state)
+{
+ int i, j;
+ struct huff_code code_huff[CODE_LEN_CODES];
+ struct huff_code lit_and_dist_huff[LIT_LEN_ELEMS];
+ struct huff_code *previous = NULL, *current, *end, rep_code;
+ struct inflate_huff_code_small inflate_code_huff;
+ uint64_t hclen, hdist, hlit;
+ uint16_t code_count[16], lit_count[MAX_LIT_LEN_COUNT],
+ lit_expand_count[MAX_LIT_LEN_COUNT], dist_count[16];
+ uint16_t *count;
+ uint16_t symbol;
+ uint32_t multisym = DEFAULT_SYM_FLAG, length, max_dist = DIST_LEN;
+ struct huff_code *code;
+ uint64_t flag = 0;
+
+ int extra_count;
+ uint32_t code_list[LIT_LEN_ELEMS + 2]; /* The +2 is for the extra codes in the static header */
+
+ /* This order is defined in RFC 1951 page 13 */
+ const uint8_t code_length_order[CODE_LEN_CODES] = {
+ 0x10, 0x11, 0x12, 0x00, 0x08, 0x07, 0x09, 0x06,
+ 0x0a, 0x05, 0x0b, 0x04, 0x0c, 0x03, 0x0d, 0x02, 0x0e, 0x01, 0x0f
+ };
+
+ /* If you are given a whole header and it matches the pregen header */
+ if (state->avail_in > (hufftables_default.deflate_hdr_count + sizeof(uint64_t))
+ && header_matches_pregen(state))
+ return setup_pregen_header(state);
+
+ if (state->bfinal && state->avail_in <= SINGLE_SYM_THRESH) {
+ multisym = SINGLE_SYM_FLAG;
+ } else if (state->bfinal && state->avail_in <= DOUBLE_SYM_THRESH) {
+ multisym = DOUBLE_SYM_FLAG;
+ }
+
+ memset(code_count, 0, sizeof(code_count));
+ memset(lit_count, 0, sizeof(lit_count));
+ memset(lit_expand_count, 0, sizeof(lit_expand_count));
+ memset(dist_count, 0, sizeof(dist_count));
+ memset(code_huff, 0, sizeof(code_huff));
+ memset(lit_and_dist_huff, 0, sizeof(lit_and_dist_huff));
+
+ /* These variables are defined in the deflate standard, RFC 1951 */
+ inflate_in_load(state, 0);
+ if (state->read_in_length < 14)
+ return ISAL_END_INPUT;
+
+ hlit = inflate_in_read_bits_unsafe(state, 5);
+ hdist = inflate_in_read_bits_unsafe(state, 5);
+ hclen = inflate_in_read_bits_unsafe(state, 4);
+
+ if (hlit > 29 || hdist > 29 || hclen > 15)
+ return ISAL_INVALID_BLOCK;
+
+ /* Create the code huffman code for decoding the lit/len and dist huffman codes */
+ for (i = 0; i < 4; i++) {
+ code = &code_huff[code_length_order[i]];
+ length = inflate_in_read_bits_unsafe(state, 3);
+ write_huff_code(code, 0, length);
+ code_count[length] += 1;
+ flag |= length;
+ }
+
+ inflate_in_load(state, 0);
+
+ for (i = 4; i < hclen + 4; i++) {
+ code = &code_huff[code_length_order[i]];
+ length = inflate_in_read_bits_unsafe(state, 3);
+ write_huff_code(code, 0, length);
+ code_count[length] += 1;
+ flag |= length;
+ }
+
+ if (state->read_in_length < 0)
+ return ISAL_END_INPUT;
+
+ if (!flag || set_codes(code_huff, CODE_LEN_CODES, code_count))
+ return ISAL_INVALID_BLOCK;
+
+ make_inflate_huff_code_header(&inflate_code_huff, code_huff, CODE_LEN_CODES,
+ code_count, CODE_LEN_CODES);
+
+ /* Decode the lit/len and dist huffman codes using the code huffman code */
+ count = lit_count;
+ current = lit_and_dist_huff;
+ end = lit_and_dist_huff + LIT_LEN + hdist + 1;
+
+ while (current < end) {
+ symbol = decode_next_header(state, &inflate_code_huff);
+
+ if (state->read_in_length < 0) {
+ if (current > &lit_and_dist_huff[256]
+ && lit_and_dist_huff[256].length <= 0)
+ return ISAL_INVALID_BLOCK;
+ return ISAL_END_INPUT;
+ }
+
+ if (symbol < 16) {
+ /* If a length is found, update the current lit/len/dist
+ * to have length symbol */
+ if (current == lit_and_dist_huff + LIT_TABLE_SIZE + hlit) {
+ /* Switch code upon completion of lit_len table */
+ current = lit_and_dist_huff + LIT_LEN;
+ count = dist_count;
+ }
+ count[symbol]++;
+ write_huff_code(current, 0, symbol);
+ previous = current;
+ current++;
+
+ if (symbol == 0 // No symbol
+ || (previous >= lit_and_dist_huff + LIT_TABLE_SIZE + hlit) // Dist table
+ || (previous < lit_and_dist_huff + 264)) // Lit/Len with no extra bits
+ continue;
+
+ extra_count =
+ rfc_lookup_table.len_extra_bit_count[previous - LIT_TABLE_SIZE -
+ lit_and_dist_huff];
+ lit_expand_count[symbol]--;
+ lit_expand_count[symbol + extra_count] += (1 << extra_count);
+
+ } else if (symbol == 16) {
+ /* If a repeat length is found, update the next repeat
+ * length lit/len/dist elements to have the value of the
+ * repeated length */
+
+ i = 3 + inflate_in_read_bits(state, 2);
+
+ if (current + i > end || previous == NULL)
+ return ISAL_INVALID_BLOCK;
+
+ rep_code = *previous;
+ for (j = 0; j < i; j++) {
+ if (current == lit_and_dist_huff + LIT_TABLE_SIZE + hlit) {
+ /* Switch code upon completion of lit_len table */
+ current = lit_and_dist_huff + LIT_LEN;
+ count = dist_count;
+ }
+
+ *current = rep_code;
+ count[rep_code.length]++;
+ previous = current;
+ current++;
+
+ if (rep_code.length == 0 // No symbol
+ || (previous >= lit_and_dist_huff + LIT_TABLE_SIZE + hlit) // Dist table
+ || (previous < lit_and_dist_huff + 264)) // Lit/Len with no extra
+ continue;
+
+ extra_count =
+ rfc_lookup_table.len_extra_bit_count
+ [previous - lit_and_dist_huff - LIT_TABLE_SIZE];
+ lit_expand_count[rep_code.length]--;
+ lit_expand_count[rep_code.length +
+ extra_count] += (1 << extra_count);
+
+ }
+ } else if (symbol == 17) {
+ /* If a repeat zeroes if found, update then next
+ * repeated zeroes length lit/len/dist elements to have
+ * length 0. */
+ i = 3 + inflate_in_read_bits(state, 3);
+
+ current = current + i;
+ previous = current - 1;
+
+ if (count != dist_count
+ && current > lit_and_dist_huff + LIT_TABLE_SIZE + hlit) {
+ /* Switch code upon completion of lit_len table */
+ current += LIT_LEN - LIT_TABLE_SIZE - hlit;
+ count = dist_count;
+ if (current > lit_and_dist_huff + LIT_LEN)
+ previous = current - 1;
+ }
+
+ } else if (symbol == 18) {
+ /* If a repeat zeroes if found, update then next
+ * repeated zeroes length lit/len/dist elements to have
+ * length 0. */
+ i = 11 + inflate_in_read_bits(state, 7);
+
+ current = current + i;
+ previous = current - 1;
+
+ if (count != dist_count
+ && current > lit_and_dist_huff + LIT_TABLE_SIZE + hlit) {
+ /* Switch code upon completion of lit_len table */
+ current += LIT_LEN - LIT_TABLE_SIZE - hlit;
+ count = dist_count;
+ if (current > lit_and_dist_huff + LIT_LEN)
+ previous = current - 1;
+ }
+
+ } else
+ return ISAL_INVALID_BLOCK;
+
+ }
+
+ if (current > end || lit_and_dist_huff[256].length <= 0)
+ return ISAL_INVALID_BLOCK;
+
+ if (state->read_in_length < 0)
+ return ISAL_END_INPUT;
+
+ if (set_codes(&lit_and_dist_huff[LIT_LEN], DIST_LEN, dist_count))
+ return ISAL_INVALID_BLOCK;
+
+ if (state->hist_bits && state->hist_bits < 15)
+ max_dist = 2 * state->hist_bits;
+
+ make_inflate_huff_code_dist(&state->dist_huff_code, &lit_and_dist_huff[LIT_LEN],
+ DIST_LEN, dist_count, max_dist);
+
+ if (set_and_expand_lit_len_huffcode
+ (lit_and_dist_huff, LIT_LEN, lit_count, lit_expand_count, code_list))
+ return ISAL_INVALID_BLOCK;
+
+ make_inflate_huff_code_lit_len(&state->lit_huff_code, lit_and_dist_huff, LIT_LEN_ELEMS,
+ lit_count, code_list, multisym);
+
+ state->block_state = ISAL_BLOCK_CODED;
+
+ return 0;
+}
+
+/* Reads in the header pointed to by in_stream and sets up state to reflect that
+ * header information*/
+static int read_header(struct inflate_state *state)
+{
+ uint8_t bytes;
+ uint32_t btype;
+ uint16_t len, nlen;
+ int ret = 0;
+
+ /* btype and bfinal are defined in RFC 1951, bfinal represents whether
+ * the current block is the end of block, and btype represents the
+ * encoding method on the current block. */
+
+ state->bfinal = inflate_in_read_bits(state, 1);
+ btype = inflate_in_read_bits(state, 2);
+
+ if (state->read_in_length < 0)
+ ret = ISAL_END_INPUT;
+
+ else if (btype == 0) {
+ inflate_in_load(state, 40);
+ bytes = state->read_in_length / 8;
+
+ if (bytes < 4)
+ return ISAL_END_INPUT;
+
+ state->read_in >>= state->read_in_length % 8;
+ state->read_in_length = bytes * 8;
+
+ len = state->read_in & 0xFFFF;
+ state->read_in >>= 16;
+ nlen = state->read_in & 0xFFFF;
+ state->read_in >>= 16;
+ state->read_in_length -= 32;
+
+ /* Check if len and nlen match */
+ if (len != (~nlen & 0xffff))
+ return ISAL_INVALID_BLOCK;
+
+ state->type0_block_len = len;
+ state->block_state = ISAL_BLOCK_TYPE0;
+
+ ret = 0;
+
+ } else if (btype == 1)
+ ret = setup_static_header(state);
+
+ else if (btype == 2)
+ ret = setup_dynamic_header(state);
+
+ else
+ ret = ISAL_INVALID_BLOCK;
+
+ return ret;
+}
+
+/* Reads in the header pointed to by in_stream and sets up state to reflect that
+ * header information*/
+static int read_header_stateful(struct inflate_state *state)
+{
+ uint64_t read_in_start = state->read_in;
+ int32_t read_in_length_start = state->read_in_length;
+ uint8_t *next_in_start = state->next_in;
+ uint32_t avail_in_start = state->avail_in;
+ int block_state_start = state->block_state;
+ int ret;
+ int copy_size;
+ int bytes_read;
+
+ if (block_state_start == ISAL_BLOCK_HDR) {
+ /* Setup so read_header decodes data in tmp_in_buffer */
+ copy_size = ISAL_DEF_MAX_HDR_SIZE - state->tmp_in_size;
+ if (copy_size > state->avail_in)
+ copy_size = state->avail_in;
+
+ memcpy(&state->tmp_in_buffer[state->tmp_in_size], state->next_in, copy_size);
+ state->next_in = state->tmp_in_buffer;
+ state->avail_in = state->tmp_in_size + copy_size;
+ }
+
+ ret = read_header(state);
+
+ if (block_state_start == ISAL_BLOCK_HDR) {
+ /* Setup so state is restored to a valid state */
+ bytes_read = state->next_in - state->tmp_in_buffer - state->tmp_in_size;
+ if (bytes_read < 0)
+ bytes_read = 0;
+ state->next_in = next_in_start + bytes_read;
+ state->avail_in = avail_in_start - bytes_read;
+ }
+
+ if (ret == ISAL_END_INPUT) {
+ /* Save off data so header can be decoded again with more data */
+ state->read_in = read_in_start;
+ state->read_in_length = read_in_length_start;
+ memcpy(&state->tmp_in_buffer[state->tmp_in_size], next_in_start,
+ avail_in_start);
+ state->tmp_in_size += avail_in_start;
+ state->avail_in = 0;
+ state->next_in = next_in_start + avail_in_start;
+ state->block_state = ISAL_BLOCK_HDR;
+ } else
+ state->tmp_in_size = 0;
+
+ return ret;
+
+}
+
+static int inline decode_literal_block(struct inflate_state *state)
+{
+ uint32_t len = state->type0_block_len;
+ uint32_t bytes = state->read_in_length / 8;
+ /* If the block is uncompressed, perform a memcopy while
+ * updating state data */
+ state->block_state = state->bfinal ? ISAL_BLOCK_INPUT_DONE : ISAL_BLOCK_NEW_HDR;
+
+ if (state->avail_out < len) {
+ len = state->avail_out;
+ state->block_state = ISAL_BLOCK_TYPE0;
+ }
+
+ if (state->avail_in + bytes < len) {
+ len = state->avail_in + bytes;
+ state->block_state = ISAL_BLOCK_TYPE0;
+ }
+ if (state->read_in_length) {
+ if (len >= bytes) {
+ memcpy(state->next_out, &state->read_in, bytes);
+
+ state->next_out += bytes;
+ state->avail_out -= bytes;
+ state->total_out += bytes;
+ state->type0_block_len -= bytes;
+
+ state->read_in = 0;
+ state->read_in_length = 0;
+ len -= bytes;
+ bytes = 0;
+
+ } else {
+ memcpy(state->next_out, &state->read_in, len);
+
+ state->next_out += len;
+ state->avail_out -= len;
+ state->total_out += len;
+ state->type0_block_len -= len;
+
+ state->read_in >>= 8 * len;
+ state->read_in_length -= 8 * len;
+ bytes -= len;
+ len = 0;
+ }
+ }
+ memcpy(state->next_out, state->next_in, len);
+
+ state->next_out += len;
+ state->avail_out -= len;
+ state->total_out += len;
+ state->next_in += len;
+ state->avail_in -= len;
+ state->type0_block_len -= len;
+
+ if (state->avail_in + bytes == 0 && state->block_state != ISAL_BLOCK_INPUT_DONE)
+ return ISAL_END_INPUT;
+
+ if (state->avail_out == 0 && state->type0_block_len > 0)
+ return ISAL_OUT_OVERFLOW;
+
+ return 0;
+
+}
+
+/* Decodes the next block if it was encoded using a huffman code */
+int decode_huffman_code_block_stateless_base(struct inflate_state *state, uint8_t * start_out)
+{
+ uint16_t next_lit;
+ uint8_t next_dist;
+ uint32_t repeat_length;
+ uint32_t look_back_dist;
+ uint64_t read_in_tmp;
+ int32_t read_in_length_tmp;
+ uint8_t *next_in_tmp, *next_out_tmp;
+ uint32_t avail_in_tmp, avail_out_tmp, total_out_tmp;
+ uint32_t next_lits, sym_count;
+ struct rfc1951_tables *rfc = &rfc_lookup_table;
+
+ state->copy_overflow_length = 0;
+ state->copy_overflow_distance = 0;
+
+ while (state->block_state == ISAL_BLOCK_CODED) {
+ /* While not at the end of block, decode the next
+ * symbol */
+ inflate_in_load(state, 0);
+
+ read_in_tmp = state->read_in;
+ read_in_length_tmp = state->read_in_length;
+ next_in_tmp = state->next_in;
+ avail_in_tmp = state->avail_in;
+ next_out_tmp = state->next_out;
+ avail_out_tmp = state->avail_out;
+ total_out_tmp = state->total_out;
+
+ decode_next_lit_len(&next_lits, &sym_count, state, &state->lit_huff_code);
+
+ if (sym_count == 0)
+ return ISAL_INVALID_SYMBOL;
+
+ if (state->read_in_length < 0) {
+ state->read_in = read_in_tmp;
+ state->read_in_length = read_in_length_tmp;
+ state->next_in = next_in_tmp;
+ state->avail_in = avail_in_tmp;
+ return ISAL_END_INPUT;
+ }
+
+ while (sym_count > 0) {
+ next_lit = next_lits & 0xffff;
+ if (next_lit < 256 || sym_count > 1) {
+ /* If the next symbol is a literal,
+ * write out the symbol and update state
+ * data accordingly. */
+ if (state->avail_out < 1) {
+ state->write_overflow_lits = next_lits;
+ state->write_overflow_len = sym_count;
+ next_lits = next_lits >> (8 * (sym_count - 1));
+ sym_count = 1;
+
+ if (next_lits < 256)
+ return ISAL_OUT_OVERFLOW;
+ else if (next_lits == 256) {
+ state->write_overflow_len -= 1;
+ state->block_state = state->bfinal ?
+ ISAL_BLOCK_INPUT_DONE : ISAL_BLOCK_NEW_HDR;
+ return ISAL_OUT_OVERFLOW;
+ } else {
+ state->write_overflow_len -= 1;
+ continue;
+ }
+ }
+
+ *state->next_out = next_lit;
+ state->next_out++;
+ state->avail_out--;
+ state->total_out++;
+
+ } else if (next_lit == 256) {
+ /* If the next symbol is the end of
+ * block, update the state data
+ * accordingly */
+ state->block_state = state->bfinal ?
+ ISAL_BLOCK_INPUT_DONE : ISAL_BLOCK_NEW_HDR;
+
+ } else if (next_lit <= MAX_LIT_LEN_SYM) {
+ /* Else if the next symbol is a repeat
+ * length, read in the length extra
+ * bits, the distance code, the distance
+ * extra bits. Then write out the
+ * corresponding data and update the
+ * state data accordingly*/
+ repeat_length = next_lit - 254;
+ next_dist = decode_next_dist(state, &state->dist_huff_code);
+
+ if (state->read_in_length >= 0) {
+ if (next_dist >= DIST_LEN)
+ return ISAL_INVALID_SYMBOL;
+
+ look_back_dist = rfc->dist_start[next_dist] +
+ inflate_in_read_bits(state,
+ rfc->dist_extra_bit_count
+ [next_dist]);
+ }
+
+ if (state->read_in_length < 0) {
+ state->read_in = read_in_tmp;
+ state->read_in_length = read_in_length_tmp;
+ state->next_in = next_in_tmp;
+ state->avail_in = avail_in_tmp;
+ state->next_out = next_out_tmp;
+ state->avail_out = avail_out_tmp;
+ state->total_out = total_out_tmp;
+ state->write_overflow_lits = 0;
+ state->write_overflow_len = 0;
+ return ISAL_END_INPUT;
+ }
+
+ if (state->next_out - look_back_dist < start_out)
+ return ISAL_INVALID_LOOKBACK;
+
+ if (state->avail_out < repeat_length) {
+ state->copy_overflow_length =
+ repeat_length - state->avail_out;
+ state->copy_overflow_distance = look_back_dist;
+ repeat_length = state->avail_out;
+ }
+
+ if (look_back_dist > repeat_length)
+ memcpy(state->next_out,
+ state->next_out - look_back_dist,
+ repeat_length);
+ else
+ byte_copy(state->next_out, look_back_dist,
+ repeat_length);
+
+ state->next_out += repeat_length;
+ state->avail_out -= repeat_length;
+ state->total_out += repeat_length;
+
+ if (state->copy_overflow_length > 0)
+ return ISAL_OUT_OVERFLOW;
+ } else
+ /* Else the read in bits do not
+ * correspond to any valid symbol */
+ return ISAL_INVALID_SYMBOL;
+
+ next_lits >>= 8;
+ sym_count--;
+ }
+
+ }
+ return 0;
+}
+
+void isal_inflate_init(struct inflate_state *state)
+{
+
+ state->read_in = 0;
+ state->read_in_length = 0;
+ state->next_in = NULL;
+ state->avail_in = 0;
+ state->next_out = NULL;
+ state->avail_out = 0;
+ state->total_out = 0;
+ state->dict_length = 0;
+ state->block_state = ISAL_BLOCK_NEW_HDR;
+ state->bfinal = 0;
+ state->crc_flag = 0;
+ state->crc = 0;
+ state->hist_bits = 0;
+ state->type0_block_len = 0;
+ state->write_overflow_lits = 0;
+ state->write_overflow_len = 0;
+ state->copy_overflow_length = 0;
+ state->copy_overflow_distance = 0;
+ state->wrapper_flag = 0;
+ state->tmp_in_size = 0;
+ state->tmp_out_processed = 0;
+ state->tmp_out_valid = 0;
+}
+
+void isal_inflate_reset(struct inflate_state *state)
+{
+ state->read_in = 0;
+ state->read_in_length = 0;
+ state->total_out = 0;
+ state->dict_length = 0;
+ state->block_state = ISAL_BLOCK_NEW_HDR;
+ state->bfinal = 0;
+ state->crc = 0;
+ state->type0_block_len = 0;
+ state->write_overflow_lits = 0;
+ state->write_overflow_len = 0;
+ state->copy_overflow_length = 0;
+ state->copy_overflow_distance = 0;
+ state->wrapper_flag = 0;
+ state->tmp_in_size = 0;
+ state->tmp_out_processed = 0;
+ state->tmp_out_valid = 0;
+}
+
+static inline uint32_t fixed_size_read(struct inflate_state *state,
+ uint8_t ** read_buf, int read_size)
+{
+ uint32_t tmp_in_size = state->tmp_in_size;
+
+ if (state->avail_in + tmp_in_size < read_size) {
+ memcpy(state->tmp_in_buffer + tmp_in_size, state->next_in, state->avail_in);
+ tmp_in_size += state->avail_in;
+ state->tmp_in_size = tmp_in_size;
+ state->next_in += state->avail_in;
+ state->avail_in = 0;
+
+ return ISAL_END_INPUT;
+ }
+
+ *read_buf = state->next_in;
+ if (tmp_in_size) {
+ memcpy(state->tmp_in_buffer + tmp_in_size, state->next_in,
+ read_size - tmp_in_size);
+ *read_buf = state->tmp_in_buffer;
+ state->tmp_in_size = 0;
+ }
+
+ state->next_in += read_size - tmp_in_size;
+ state->avail_in -= read_size - tmp_in_size;
+ tmp_in_size = 0;
+
+ return 0;
+
+}
+
+static inline uint32_t buffer_header_copy(struct inflate_state *state, uint32_t in_len,
+ uint8_t * buf, uint32_t buffer_len, uint32_t offset,
+ uint32_t buf_error)
+{
+ uint32_t len = in_len;
+ uint32_t buf_len = buffer_len - offset;
+
+ if (len > state->avail_in)
+ len = state->avail_in;
+
+ if (buf != NULL && buf_len < len) {
+ memcpy(&buf[offset], state->next_in, buf_len);
+ state->next_in += buf_len;
+ state->avail_in -= buf_len;
+ state->count = in_len - buf_len;
+ return buf_error;
+ } else {
+ if (buf != NULL)
+ memcpy(&buf[offset], state->next_in, len);
+ state->next_in += len;
+ state->avail_in -= len;
+ state->count = in_len - len;
+
+ if (len == in_len)
+ return 0;
+ else
+ return ISAL_END_INPUT;
+ }
+}
+
+static inline uint32_t string_header_copy(struct inflate_state *state,
+ char *str_buf, uint32_t str_len,
+ uint32_t offset, uint32_t str_error)
+{
+ uint32_t len, max_len = str_len - offset;
+
+ if (max_len > state->avail_in || str_buf == NULL)
+ max_len = state->avail_in;
+
+ len = strnlen((char *)state->next_in, max_len);
+
+ if (str_buf != NULL)
+ memcpy(&str_buf[offset], state->next_in, len);
+
+ state->next_in += len;
+ state->avail_in -= len;
+ state->count += len;
+
+ if (str_buf != NULL && len == (str_len - offset))
+ return str_error;
+ else if (state->avail_in <= 0)
+ return ISAL_END_INPUT;
+ else {
+ state->next_in++;
+ state->avail_in--;
+ state->count = 0;
+ if (str_buf != NULL)
+ str_buf[len] = 0;
+ }
+
+ return 0;
+}
+
+static int check_gzip_checksum(struct inflate_state *state)
+{
+ uint64_t trailer, crc, total_out;
+ uint8_t *next_in;
+ uint32_t byte_count, offset, tmp_in_size = state->tmp_in_size;
+ int ret;
+
+ if (state->read_in_length >= 8 * GZIP_TRAILER_LEN) {
+ /* The following is unecessary as state->read_in_length == 64 */
+ /* bit_count = state->read_in_length % 8; */
+ /* state->read_in >>= bit_count; */
+ /* state->read_in_length -= bit_count; */
+
+ trailer = state->read_in;
+ state->read_in_length = 0;
+ state->read_in = 0;
+ } else {
+ if (state->read_in_length >= 8) {
+ byte_count = state->read_in_length / 8;
+ offset = state->read_in_length % 8;
+
+ store_u64(state->tmp_in_buffer + tmp_in_size,
+ state->read_in >> offset);
+ state->read_in = 0;
+ state->read_in_length = 0;
+
+ tmp_in_size += byte_count;
+ state->tmp_in_size = tmp_in_size;
+ }
+
+ ret = fixed_size_read(state, &next_in, GZIP_TRAILER_LEN);
+ if (ret) {
+ state->block_state = ISAL_CHECKSUM_CHECK;
+ return ret;
+ }
+
+ trailer = load_u64(next_in);
+ }
+
+ state->block_state = ISAL_BLOCK_FINISH;
+
+ crc = state->crc;
+ total_out = state->total_out;
+
+ if (trailer != (crc | (total_out << 32)))
+ return ISAL_INCORRECT_CHECKSUM;
+ else
+ return ISAL_DECOMP_OK;
+}
+
+static int check_zlib_checksum(struct inflate_state *state)
+{
+
+ uint32_t trailer;
+ uint8_t *next_in;
+ uint32_t byte_count, offset, tmp_in_size = state->tmp_in_size;
+ int ret, bit_count;
+
+ if (state->read_in_length >= 8 * ZLIB_TRAILER_LEN) {
+ bit_count = state->read_in_length % 8;
+ state->read_in >>= bit_count;
+ state->read_in_length -= bit_count;
+
+ trailer = state->read_in;
+
+ state->read_in_length -= 8 * ZLIB_TRAILER_LEN;
+ state->read_in >>= 8 * ZLIB_TRAILER_LEN;
+ } else {
+ if (state->read_in_length >= 8) {
+ byte_count = state->read_in_length / 8;
+ offset = state->read_in_length % 8;
+
+ store_u64(state->tmp_in_buffer + tmp_in_size,
+ state->read_in >> offset);
+ state->read_in = 0;
+ state->read_in_length = 0;
+
+ tmp_in_size += byte_count;
+ state->tmp_in_size = tmp_in_size;
+ }
+
+ ret = fixed_size_read(state, &next_in, ZLIB_TRAILER_LEN);
+ if (ret) {
+ state->block_state = ISAL_CHECKSUM_CHECK;
+ return ret;
+ }
+
+ trailer = load_u32(next_in);
+ }
+
+ state->block_state = ISAL_BLOCK_FINISH;
+
+ if (bswap_32(trailer) != state->crc)
+ return ISAL_INCORRECT_CHECKSUM;
+ else
+ return ISAL_DECOMP_OK;
+}
+
+int isal_read_gzip_header(struct inflate_state *state, struct isal_gzip_header *gz_hdr)
+{
+ int cm, flags = gz_hdr->flags, id1, id2;
+ uint16_t xlen = gz_hdr->extra_len;
+ uint32_t block_state = state->block_state;
+ uint8_t *start_in = state->next_in, *next_in;
+ uint32_t tmp_in_size = state->tmp_in_size;
+ uint32_t count = state->count, offset;
+ uint32_t hcrc = gz_hdr->hcrc;
+ int ret = 0;
+
+ /* This switch is a jump table into the function so that decoding the
+ * header can continue where it stopped on the last call */
+ switch (block_state) {
+ case ISAL_BLOCK_NEW_HDR:
+ state->count = 0;
+ flags = UNDEFINED_FLAG;
+ if (tmp_in_size == 0)
+ hcrc = 0;
+
+ ret = fixed_size_read(state, &next_in, GZIP_HDR_BASE);
+ if (ret)
+ break;
+
+ id1 = next_in[0];
+ id2 = next_in[1];
+ cm = next_in[2];
+ flags = next_in[3];
+ gz_hdr->time = load_u32(next_in + 4);
+ gz_hdr->xflags = *(next_in + 8);
+ gz_hdr->os = *(next_in + 9);
+
+ if (id1 != 0x1f || id2 != 0x8b)
+ return ISAL_INVALID_WRAPPER;
+
+ if (cm != DEFLATE_METHOD)
+ return ISAL_UNSUPPORTED_METHOD;
+
+ gz_hdr->text = 0;
+ if (flags & TEXT_FLAG)
+ gz_hdr->text = 1;
+
+ gz_hdr->flags = flags;
+
+ if (flags & EXTRA_FLAG) {
+ case ISAL_GZIP_EXTRA_LEN:
+ ret = fixed_size_read(state, &next_in, GZIP_EXTRA_LEN);
+ if (ret) {
+ state->block_state = ISAL_GZIP_EXTRA_LEN;
+ break;
+ }
+
+ xlen = load_u16(next_in);
+ count = xlen;
+
+ gz_hdr->extra_len = xlen;
+
+ case ISAL_GZIP_EXTRA:
+ offset = gz_hdr->extra_len - count;
+ ret =
+ buffer_header_copy(state, count, gz_hdr->extra,
+ gz_hdr->extra_buf_len,
+ offset, ISAL_EXTRA_OVERFLOW);
+
+ if (ret) {
+ state->block_state = ISAL_GZIP_EXTRA;
+ break;
+ }
+ } else {
+ gz_hdr->extra_len = 0;
+ }
+
+ if (flags & NAME_FLAG) {
+ case ISAL_GZIP_NAME:
+ offset = state->count;
+ ret = string_header_copy(state, gz_hdr->name,
+ gz_hdr->name_buf_len,
+ offset, ISAL_NAME_OVERFLOW);
+ if (ret) {
+ state->block_state = ISAL_GZIP_NAME;
+ break;
+ }
+ }
+
+ if (flags & COMMENT_FLAG) {
+ case ISAL_GZIP_COMMENT:
+ offset = state->count;
+ ret = string_header_copy(state, gz_hdr->comment,
+ gz_hdr->comment_buf_len,
+ offset, ISAL_COMMENT_OVERFLOW);
+ if (ret) {
+ state->block_state = ISAL_GZIP_COMMENT;
+ break;
+ }
+ }
+
+ if (flags & HCRC_FLAG) {
+ hcrc = crc32_gzip_refl(hcrc, start_in, state->next_in - start_in);
+ gz_hdr->hcrc = hcrc;
+
+ case ISAL_GZIP_HCRC:
+ ret = fixed_size_read(state, &next_in, GZIP_HCRC_LEN);
+ if (ret) {
+ state->block_state = ISAL_GZIP_HCRC;
+ return ret;
+ }
+
+ if ((hcrc & 0xffff) != load_u16(next_in))
+ return ISAL_INCORRECT_CHECKSUM;
+ }
+
+ state->wrapper_flag = 1;
+ state->block_state = ISAL_BLOCK_NEW_HDR;
+ return ISAL_DECOMP_OK;
+ }
+
+ if (flags & HCRC_FLAG)
+ gz_hdr->hcrc = crc32_gzip_refl(hcrc, start_in, state->next_in - start_in);
+
+ return ret;
+}
+
+int isal_read_zlib_header(struct inflate_state *state, struct isal_zlib_header *zlib_hdr)
+{
+ int cmf, method, flags;
+ uint32_t block_state = state->block_state;
+ uint8_t *next_in;
+ int ret = 0;
+
+ switch (block_state) {
+ case ISAL_BLOCK_NEW_HDR:
+ zlib_hdr->dict_flag = 0;
+ ret = fixed_size_read(state, &next_in, ZLIB_HDR_BASE);
+ if (ret)
+ break;
+
+ cmf = *next_in;
+ method = cmf & 0xf;
+ flags = *(next_in + 1);
+
+ zlib_hdr->info = cmf >> ZLIB_INFO_OFFSET;
+ zlib_hdr->dict_flag = (flags & ZLIB_DICT_FLAG) ? 1 : 0;
+ zlib_hdr->level = flags >> ZLIB_LEVEL_OFFSET;
+
+ if (method != DEFLATE_METHOD)
+ return ISAL_UNSUPPORTED_METHOD;
+
+ if ((256 * cmf + flags) % 31 != 0)
+ return ISAL_INCORRECT_CHECKSUM;
+
+ if (zlib_hdr->dict_flag) {
+ case ISAL_ZLIB_DICT:
+ ret = fixed_size_read(state, &next_in, ZLIB_DICT_LEN);
+ if (ret) {
+ state->block_state = ISAL_ZLIB_DICT;
+ break;
+ }
+
+ zlib_hdr->dict_id = load_u32(next_in);
+ }
+
+ state->wrapper_flag = 1;
+ state->block_state = ISAL_BLOCK_NEW_HDR;
+ }
+
+ return ret;
+}
+
+int isal_inflate_set_dict(struct inflate_state *state, uint8_t * dict, uint32_t dict_len)
+{
+
+ if (state->block_state != ISAL_BLOCK_NEW_HDR
+ || state->tmp_out_processed != state->tmp_out_valid)
+ return ISAL_INVALID_STATE;
+
+ if (dict_len > IGZIP_HIST_SIZE) {
+ dict = dict + dict_len - IGZIP_HIST_SIZE;
+ dict_len = IGZIP_HIST_SIZE;
+ }
+
+ memcpy(state->tmp_out_buffer, dict, dict_len);
+ state->tmp_out_processed = dict_len;
+ state->tmp_out_valid = dict_len;
+ state->dict_length = dict_len;
+
+ return COMP_OK;
+}
+
+int isal_inflate_stateless(struct inflate_state *state)
+{
+ uint32_t ret = 0;
+ uint8_t *start_out = state->next_out;
+
+ state->read_in = 0;
+ state->read_in_length = 0;
+ state->block_state = ISAL_BLOCK_NEW_HDR;
+ state->dict_length = 0;
+ state->bfinal = 0;
+ state->crc = 0;
+ state->total_out = 0;
+ state->hist_bits = 0;
+ state->tmp_in_size = 0;
+
+ if (state->crc_flag == IGZIP_GZIP) {
+ struct isal_gzip_header gz_hdr;
+ isal_gzip_header_init(&gz_hdr);
+ ret = isal_read_gzip_header(state, &gz_hdr);
+ if (ret)
+ return ret;
+ } else if (state->crc_flag == IGZIP_ZLIB) {
+ struct isal_zlib_header z_hdr = { 0 };
+ ret = isal_read_zlib_header(state, &z_hdr);
+ if (ret)
+ return ret;
+ if (z_hdr.dict_flag)
+ return ISAL_NEED_DICT;
+
+ }
+
+ while (state->block_state != ISAL_BLOCK_FINISH) {
+ if (state->block_state == ISAL_BLOCK_NEW_HDR) {
+ ret = read_header(state);
+
+ if (ret)
+ break;
+ }
+
+ if (state->block_state == ISAL_BLOCK_TYPE0)
+ ret = decode_literal_block(state);
+ else
+ ret = decode_huffman_code_block_stateless(state, start_out);
+
+ if (ret)
+ break;
+ if (state->block_state == ISAL_BLOCK_INPUT_DONE)
+ state->block_state = ISAL_BLOCK_FINISH;
+ }
+
+ /* Undo count stuff of bytes read into the read buffer */
+ state->next_in -= state->read_in_length / 8;
+ state->avail_in += state->read_in_length / 8;
+ state->read_in_length = 0;
+ state->read_in = 0;
+
+ if (!ret && state->crc_flag) {
+ update_checksum(state, start_out, state->next_out - start_out);
+ switch (state->crc_flag) {
+ case ISAL_ZLIB:
+ case ISAL_ZLIB_NO_HDR_VER:
+ finalize_adler32(state);
+ ret = check_zlib_checksum(state);
+ break;
+
+ case ISAL_ZLIB_NO_HDR:
+ finalize_adler32(state);
+ break;
+
+ case ISAL_GZIP:
+ case ISAL_GZIP_NO_HDR_VER:
+ ret = check_gzip_checksum(state);
+ break;
+ }
+ }
+
+ return ret;
+}
+
+int isal_inflate(struct inflate_state *state)
+{
+
+ uint8_t *start_out = state->next_out;
+ uint32_t avail_out = state->avail_out;
+ uint32_t copy_size = 0;
+ int32_t shift_size = 0;
+ int ret = 0;
+
+ if (!state->wrapper_flag && state->crc_flag == IGZIP_GZIP) {
+ struct isal_gzip_header gz_hdr;
+ isal_gzip_header_init(&gz_hdr);
+ ret = isal_read_gzip_header(state, &gz_hdr);
+ if (ret < 0)
+ return ret;
+ else if (ret > 0)
+ return ISAL_DECOMP_OK;
+ } else if (!state->wrapper_flag && state->crc_flag == IGZIP_ZLIB) {
+ struct isal_zlib_header z_hdr = { 0 };
+ ret = isal_read_zlib_header(state, &z_hdr);
+ if (ret < 0)
+ return ret;
+ else if (ret > 0)
+ return ISAL_DECOMP_OK;
+
+ if (z_hdr.dict_flag) {
+ state->dict_id = z_hdr.dict_id;
+ return ISAL_NEED_DICT;
+ }
+ } else if (state->block_state == ISAL_CHECKSUM_CHECK) {
+ switch (state->crc_flag) {
+ case ISAL_ZLIB:
+ case ISAL_ZLIB_NO_HDR_VER:
+ ret = check_zlib_checksum(state);
+ break;
+ case ISAL_GZIP:
+ case ISAL_GZIP_NO_HDR_VER:
+ ret = check_gzip_checksum(state);
+ break;
+ }
+
+ return (ret > 0) ? ISAL_DECOMP_OK : ret;
+ }
+
+ if (state->block_state != ISAL_BLOCK_FINISH) {
+ state->total_out += state->tmp_out_valid - state->tmp_out_processed;
+ /* If space in tmp_out buffer, decompress into the tmp_out_buffer */
+ if (state->tmp_out_valid < 2 * ISAL_DEF_HIST_SIZE) {
+ /* Setup to start decoding into temp buffer */
+ state->next_out = &state->tmp_out_buffer[state->tmp_out_valid];
+ state->avail_out =
+ sizeof(state->tmp_out_buffer) - ISAL_LOOK_AHEAD -
+ state->tmp_out_valid;
+
+ if ((int32_t) state->avail_out < 0)
+ state->avail_out = 0;
+
+ /* Decode into internal buffer until exit */
+ while (state->block_state != ISAL_BLOCK_INPUT_DONE) {
+ if (state->block_state == ISAL_BLOCK_NEW_HDR
+ || state->block_state == ISAL_BLOCK_HDR) {
+ ret = read_header_stateful(state);
+
+ if (ret)
+ break;
+ }
+
+ if (state->block_state == ISAL_BLOCK_TYPE0) {
+ ret = decode_literal_block(state);
+ } else {
+ uint8_t *tmp = state->tmp_out_buffer;
+ ret = decode_huffman_code_block_stateless(state, tmp);
+ }
+
+ if (ret)
+ break;
+ }
+
+ /* Copy valid data from internal buffer into out_buffer */
+ if (state->write_overflow_len != 0) {
+ store_u32(state->next_out, state->write_overflow_lits);
+ state->next_out += state->write_overflow_len;
+ state->total_out += state->write_overflow_len;
+ state->write_overflow_lits = 0;
+ state->write_overflow_len = 0;
+ }
+
+ if (state->copy_overflow_length != 0) {
+ byte_copy(state->next_out, state->copy_overflow_distance,
+ state->copy_overflow_length);
+ state->tmp_out_valid += state->copy_overflow_length;
+ state->next_out += state->copy_overflow_length;
+ state->total_out += state->copy_overflow_length;
+ state->copy_overflow_distance = 0;
+ state->copy_overflow_length = 0;
+ }
+
+ state->tmp_out_valid = state->next_out - state->tmp_out_buffer;
+
+ /* Setup state for decompressing into out_buffer */
+ state->next_out = start_out;
+ state->avail_out = avail_out;
+ }
+
+ /* Copy data from tmp_out buffer into out_buffer */
+ copy_size = state->tmp_out_valid - state->tmp_out_processed;
+ if (copy_size > avail_out)
+ copy_size = avail_out;
+
+ memcpy(state->next_out,
+ &state->tmp_out_buffer[state->tmp_out_processed], copy_size);
+
+ state->tmp_out_processed += copy_size;
+ state->avail_out -= copy_size;
+ state->next_out += copy_size;
+
+ if (ret == ISAL_INVALID_LOOKBACK || ret == ISAL_INVALID_BLOCK
+ || ret == ISAL_INVALID_SYMBOL) {
+ /* Set total_out to not count data in tmp_out_buffer */
+ state->total_out -= state->tmp_out_valid - state->tmp_out_processed;
+ if (state->crc_flag)
+ update_checksum(state, start_out, state->next_out - start_out);
+ return ret;
+ }
+
+ /* If all data from tmp_out buffer has been processed, start
+ * decompressing into the out buffer */
+ if (state->tmp_out_processed == state->tmp_out_valid) {
+ while (state->block_state != ISAL_BLOCK_INPUT_DONE) {
+ if (state->block_state == ISAL_BLOCK_NEW_HDR
+ || state->block_state == ISAL_BLOCK_HDR) {
+ ret = read_header_stateful(state);
+ if (ret)
+ break;
+ }
+
+ if (state->block_state == ISAL_BLOCK_TYPE0)
+ ret = decode_literal_block(state);
+ else
+ ret =
+ decode_huffman_code_block_stateless(state,
+ start_out);
+ if (ret)
+ break;
+ }
+ }
+
+ if (state->crc_flag)
+ update_checksum(state, start_out, state->next_out - start_out);
+
+ if (state->block_state != ISAL_BLOCK_INPUT_DONE
+ || state->copy_overflow_length + state->write_overflow_len +
+ state->tmp_out_valid > sizeof(state->tmp_out_buffer)) {
+ /* Save decompression history in tmp_out buffer */
+ if (state->tmp_out_valid == state->tmp_out_processed
+ && avail_out - state->avail_out >= ISAL_DEF_HIST_SIZE) {
+ memcpy(state->tmp_out_buffer,
+ state->next_out - ISAL_DEF_HIST_SIZE,
+ ISAL_DEF_HIST_SIZE);
+ state->tmp_out_valid = ISAL_DEF_HIST_SIZE;
+ state->tmp_out_processed = ISAL_DEF_HIST_SIZE;
+
+ } else if (state->tmp_out_processed >= ISAL_DEF_HIST_SIZE) {
+ shift_size = state->tmp_out_valid - ISAL_DEF_HIST_SIZE;
+ if (shift_size > state->tmp_out_processed)
+ shift_size = state->tmp_out_processed;
+
+ memmove(state->tmp_out_buffer,
+ &state->tmp_out_buffer[shift_size],
+ state->tmp_out_valid - shift_size);
+ state->tmp_out_valid -= shift_size;
+ state->tmp_out_processed -= shift_size;
+
+ }
+ }
+
+ /* Write overflow data into tmp buffer */
+ if (state->write_overflow_len != 0) {
+ store_u32(&state->tmp_out_buffer[state->tmp_out_valid],
+ state->write_overflow_lits);
+ state->tmp_out_valid += state->write_overflow_len;
+ state->total_out += state->write_overflow_len;
+ state->write_overflow_lits = 0;
+ state->write_overflow_len = 0;
+ }
+
+ if (state->copy_overflow_length != 0) {
+ byte_copy(&state->tmp_out_buffer[state->tmp_out_valid],
+ state->copy_overflow_distance, state->copy_overflow_length);
+ state->tmp_out_valid += state->copy_overflow_length;
+ state->total_out += state->copy_overflow_length;
+ state->copy_overflow_distance = 0;
+ state->copy_overflow_length = 0;
+ }
+
+ if (ret == ISAL_INVALID_LOOKBACK || ret == ISAL_INVALID_BLOCK
+ || ret == ISAL_INVALID_SYMBOL) {
+ state->total_out -= state->tmp_out_valid - state->tmp_out_processed;
+ return ret;
+ }
+
+ if (state->block_state == ISAL_BLOCK_INPUT_DONE
+ && state->tmp_out_valid == state->tmp_out_processed) {
+ state->block_state = ISAL_BLOCK_FINISH;
+
+ switch (state->crc_flag) {
+ case ISAL_ZLIB:
+ case ISAL_ZLIB_NO_HDR_VER:
+ finalize_adler32(state);
+ ret = check_zlib_checksum(state);
+ break;
+
+ case ISAL_ZLIB_NO_HDR:
+ finalize_adler32(state);
+ break;
+
+ case ISAL_GZIP:
+ case ISAL_GZIP_NO_HDR_VER:
+ ret = check_gzip_checksum(state);
+ break;
+ }
+ }
+
+ state->total_out -= state->tmp_out_valid - state->tmp_out_processed;
+ }
+
+ return (ret > 0) ? ISAL_DECOMP_OK : ret;
+}
diff --git a/src/isa-l/igzip/igzip_inflate_multibinary.asm b/src/isa-l/igzip/igzip_inflate_multibinary.asm
new file mode 100644
index 000000000..ef2ce6836
--- /dev/null
+++ b/src/isa-l/igzip/igzip_inflate_multibinary.asm
@@ -0,0 +1,45 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright(c) 2011-2016 Intel Corporation All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in
+; the documentation and/or other materials provided with the
+; distribution.
+; * Neither the name of Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived
+; from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+default rel
+[bits 64]
+
+%include "reg_sizes.asm"
+
+extern decode_huffman_code_block_stateless_base
+extern decode_huffman_code_block_stateless_01
+extern decode_huffman_code_block_stateless_04
+
+section .text
+
+%include "multibinary.asm"
+
+
+mbin_interface decode_huffman_code_block_stateless
+mbin_dispatch_init5 decode_huffman_code_block_stateless, decode_huffman_code_block_stateless_base, decode_huffman_code_block_stateless_01, decode_huffman_code_block_stateless_01, decode_huffman_code_block_stateless_04
diff --git a/src/isa-l/igzip/igzip_inflate_test.c b/src/isa-l/igzip/igzip_inflate_test.c
new file mode 100644
index 000000000..946759645
--- /dev/null
+++ b/src/isa-l/igzip/igzip_inflate_test.c
@@ -0,0 +1,311 @@
+/**********************************************************************
+ Copyright(c) 2011-2016 Intel Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+#define _FILE_OFFSET_BITS 64
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <zlib.h>
+#include "igzip_lib.h"
+#include "huff_codes.h"
+#include "test.h"
+
+/*Don't use file larger memory can support because compression and decompression
+ * are done in a stateless manner. */
+#if __WORDSIZE == 64
+#define MAX_INPUT_FILE_SIZE 2L*1024L*1024L*1024L
+#else
+#define MAX_INPUT_FILE_SIZE 512L*1024L*1024L
+#endif
+
+int inflate_multi_pass(uint8_t * compress_buf, uint64_t compress_len,
+ uint8_t * uncompress_buf, uint32_t * uncompress_len)
+{
+ struct inflate_state *state = NULL;
+ int ret = 0;
+ uint8_t *comp_tmp = NULL, *uncomp_tmp = NULL;
+ uint32_t comp_tmp_size = 0, uncomp_tmp_size = 0;
+ uint32_t comp_processed = 0, uncomp_processed = 0;
+
+ state = malloc(sizeof(struct inflate_state));
+ if (state == NULL) {
+ printf("Failed to allocate memory\n");
+ exit(0);
+ }
+
+ isal_inflate_init(state);
+
+ state->next_in = NULL;
+ state->next_out = NULL;
+ state->avail_in = 0;
+ state->avail_out = 0;
+
+ while (1) {
+ if (state->avail_in == 0) {
+ comp_tmp_size = rand() % (compress_len + 1);
+
+ if (comp_tmp_size >= compress_len - comp_processed)
+ comp_tmp_size = compress_len - comp_processed;
+
+ if (comp_tmp_size != 0) {
+ if (comp_tmp != NULL) {
+ free(comp_tmp);
+ comp_tmp = NULL;
+ }
+
+ comp_tmp = malloc(comp_tmp_size);
+
+ if (comp_tmp == NULL) {
+ printf("Failed to allocate memory\n");
+ exit(0);
+ }
+
+ memcpy(comp_tmp, compress_buf + comp_processed, comp_tmp_size);
+ comp_processed += comp_tmp_size;
+
+ state->next_in = comp_tmp;
+ state->avail_in = comp_tmp_size;
+ }
+ }
+
+ if (state->avail_out == 0) {
+ /* Save uncompressed data into uncompress_buf */
+ if (uncomp_tmp != NULL) {
+ memcpy(uncompress_buf + uncomp_processed, uncomp_tmp,
+ uncomp_tmp_size);
+ uncomp_processed += uncomp_tmp_size;
+ }
+
+ uncomp_tmp_size = rand() % (*uncompress_len + 1);
+
+ /* Limit size of buffer to be smaller than maximum */
+ if (uncomp_tmp_size > *uncompress_len - uncomp_processed)
+ uncomp_tmp_size = *uncompress_len - uncomp_processed;
+
+ if (uncomp_tmp_size != 0) {
+
+ if (uncomp_tmp != NULL) {
+ fflush(0);
+ free(uncomp_tmp);
+ uncomp_tmp = NULL;
+ }
+
+ uncomp_tmp = malloc(uncomp_tmp_size);
+ if (uncomp_tmp == NULL) {
+ printf("Failed to allocate memory\n");
+ exit(0);
+ }
+
+ state->avail_out = uncomp_tmp_size;
+ state->next_out = uncomp_tmp;
+ }
+ }
+
+ ret = isal_inflate(state);
+
+ if (state->block_state == ISAL_BLOCK_FINISH || ret != 0) {
+ memcpy(uncompress_buf + uncomp_processed, uncomp_tmp, uncomp_tmp_size);
+ *uncompress_len = state->total_out;
+ break;
+ }
+ }
+
+ if (comp_tmp != NULL) {
+ free(comp_tmp);
+ comp_tmp = NULL;
+ }
+ if (uncomp_tmp != NULL) {
+ free(uncomp_tmp);
+ uncomp_tmp = NULL;
+ }
+
+ free(state);
+ return ret;
+}
+
+int test(uint8_t * compressed_stream,
+ uint64_t * compressed_length,
+ uint8_t * uncompressed_stream, uint32_t uncompressed_length,
+ uint8_t * uncompressed_test_stream, uint32_t uncompressed_test_stream_length)
+{
+ int ret;
+ ret =
+ compress2(compressed_stream, (uLongf *) compressed_length,
+ uncompressed_stream, uncompressed_length, 6);
+ if (ret) {
+ printf("Failed compressing input with exit code %d", ret);
+ return ret;
+ }
+
+ ret =
+ inflate_multi_pass(compressed_stream + 2,
+ *compressed_length - 2 - 4,
+ uncompressed_test_stream, &uncompressed_test_stream_length);
+ switch (ret) {
+ case 0:
+ break;
+ case ISAL_END_INPUT:
+ printf(" did not decompress all input\n");
+ return ISAL_END_INPUT;
+ break;
+ case ISAL_INVALID_BLOCK:
+ printf(" invalid header\n");
+ return ISAL_INVALID_BLOCK;
+ break;
+ case ISAL_INVALID_SYMBOL:
+ printf(" invalid symbol\n");
+ return ISAL_INVALID_SYMBOL;
+ break;
+ case ISAL_OUT_OVERFLOW:
+ printf(" out buffer overflow\n");
+ return ISAL_OUT_OVERFLOW;
+ break;
+ case ISAL_INVALID_LOOKBACK:
+ printf("Invalid lookback distance");
+ return ISAL_INVALID_LOOKBACK;
+ break;
+ default:
+ printf(" error\n");
+ return -1;
+ break;
+ }
+
+ if (uncompressed_test_stream_length != uncompressed_length) {
+ printf("incorrect amount of data was decompressed from compressed data\n");
+ printf("%d decompressed of %d compressed",
+ uncompressed_test_stream_length, uncompressed_length);
+ return -1;
+ }
+ if (memcmp(uncompressed_stream, uncompressed_test_stream, uncompressed_length)) {
+ int i;
+ for (i = 0; i < uncompressed_length; i++) {
+ if (uncompressed_stream[i] != uncompressed_test_stream[i]) {
+ printf("first error at %d, 0x%x != 0x%x\n", i,
+ uncompressed_stream[i], uncompressed_test_stream[i]);
+ }
+ }
+ printf(" decompressed data is not the same as the compressed data\n");
+ return -1;
+ }
+
+ return 0;
+}
+
+int main(int argc, char **argv)
+{
+ int i, j, ret = 0, fin_ret = 0;
+ FILE *file = NULL;
+ uint64_t compressed_length, file_length;
+ uint64_t uncompressed_length, uncompressed_test_stream_length;
+ uint8_t *uncompressed_stream = NULL;
+ uint8_t *compressed_stream = NULL;
+ uint8_t *uncompressed_test_stream = NULL;
+
+ if (argc == 1)
+ printf("Error, no input file\n");
+ for (i = 1; i < argc; i++) {
+
+ file = NULL;
+ uncompressed_stream = NULL;
+ compressed_stream = NULL;
+ uncompressed_test_stream = NULL;
+
+ file = fopen(argv[i], "r");
+ if (file == NULL) {
+ printf("Error opening file %s\n", argv[i]);
+ return 1;
+ } else
+ printf("Starting file %s", argv[i]);
+ fflush(0);
+ file_length = get_filesize(file);
+ if (file_length > MAX_INPUT_FILE_SIZE) {
+ printf("\nFile too large to run on this test,"
+ " Max 512MB for 32bit OS, 2GB for 64bit OS.\n");
+ printf(" ... Fail\n");
+ fclose(file);
+ continue;
+ }
+
+ compressed_length = compressBound(file_length);
+
+ if (file_length != 0) {
+ uncompressed_stream = malloc(file_length);
+ uncompressed_test_stream = malloc(file_length);
+ }
+
+ compressed_stream = malloc(compressed_length);
+ if (uncompressed_stream == NULL && file_length != 0) {
+ printf("\nFailed to allocate input memory\n");
+ exit(0);
+ }
+
+ if (compressed_stream == NULL) {
+ printf("\nFailed to allocate output memory\n");
+ exit(0);
+ }
+
+ if (uncompressed_test_stream == NULL && file_length != 0) {
+ printf("\nFailed to allocate decompressed memory\n");
+ exit(0);
+ }
+
+ uncompressed_length = fread(uncompressed_stream, 1, file_length, file);
+ uncompressed_test_stream_length = uncompressed_length;
+ ret =
+ test(compressed_stream, &compressed_length, uncompressed_stream,
+ uncompressed_length, uncompressed_test_stream,
+ uncompressed_test_stream_length);
+ if (ret) {
+ for (j = 0; j < compressed_length; j++) {
+ if ((j & 31) == 0)
+ printf("\n");
+ else
+ printf(" ");
+ printf("0x%02x,", compressed_stream[j]);
+ }
+ printf("\n");
+ }
+
+ fflush(0);
+ fclose(file);
+ if (compressed_stream != NULL)
+ free(compressed_stream);
+ if (uncompressed_stream != NULL)
+ free(uncompressed_stream);
+ if (uncompressed_test_stream != NULL)
+ free(uncompressed_test_stream);
+ if (ret) {
+ printf(" ... Fail with exit code %d\n", ret);
+ return ret;
+ } else
+ printf(" ... Pass\n");
+ fin_ret |= ret;
+ }
+ return fin_ret;
+}
diff --git a/src/isa-l/igzip/igzip_level_buf_structs.h b/src/isa-l/igzip/igzip_level_buf_structs.h
new file mode 100644
index 000000000..5c195e3f1
--- /dev/null
+++ b/src/isa-l/igzip/igzip_level_buf_structs.h
@@ -0,0 +1,48 @@
+#ifndef IGZIP_LEVEL_BUF_STRUCTS_H
+#define IGZIP_LEVEL_BUF_STRUCTS_H
+
+#include "igzip_lib.h"
+#include "huff_codes.h"
+#include "encode_df.h"
+
+#define MATCH_BUF_SIZE (4 * 1024)
+
+struct hash8k_buf {
+ uint16_t hash_table[IGZIP_HASH8K_HASH_SIZE];
+};
+
+struct hash_hist_buf {
+ uint16_t hash_table[IGZIP_HASH_HIST_SIZE];
+};
+
+struct hash_map_buf {
+ uint16_t hash_table[IGZIP_HASH_MAP_HASH_SIZE];
+ struct deflate_icf *matches_next;
+ struct deflate_icf *matches_end;
+ struct deflate_icf matches[MATCH_BUF_SIZE];
+ struct deflate_icf overflow[ISAL_LOOK_AHEAD];
+};
+
+#define MAX_LVL_BUF_SIZE sizeof(struct hash_map_buf)
+
+struct level_buf {
+ struct hufftables_icf encode_tables;
+ struct isal_mod_hist hist;
+ uint32_t deflate_hdr_count;
+ uint32_t deflate_hdr_extra_bits;
+ uint8_t deflate_hdr[ISAL_DEF_MAX_HDR_SIZE];
+ struct deflate_icf *icf_buf_next;
+ uint64_t icf_buf_avail_out;
+ struct deflate_icf *icf_buf_start;
+ union {
+ struct hash8k_buf hash8k;
+ struct hash_hist_buf hash_hist;
+ struct hash_map_buf hash_map;
+
+ struct hash8k_buf lvl1;
+ struct hash_hist_buf lvl2;
+ struct hash_map_buf lvl3;
+ };
+};
+
+#endif
diff --git a/src/isa-l/igzip/igzip_multibinary.asm b/src/isa-l/igzip/igzip_multibinary.asm
new file mode 100644
index 000000000..7997a8453
--- /dev/null
+++ b/src/isa-l/igzip/igzip_multibinary.asm
@@ -0,0 +1,134 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright(c) 2011-2016 Intel Corporation All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in
+; the documentation and/or other materials provided with the
+; distribution.
+; * Neither the name of Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived
+; from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+default rel
+[bits 64]
+
+%include "reg_sizes.asm"
+
+extern isal_deflate_body_base
+extern isal_deflate_body_01
+extern isal_deflate_body_02
+extern isal_deflate_body_04
+extern isal_deflate_finish_base
+extern isal_deflate_finish_01
+
+extern isal_deflate_icf_body_hash_hist_base
+extern isal_deflate_icf_body_hash_hist_01
+extern isal_deflate_icf_body_hash_hist_02
+extern isal_deflate_icf_body_hash_hist_04
+extern isal_deflate_icf_finish_hash_hist_base
+extern isal_deflate_icf_finish_hash_hist_01
+
+extern isal_deflate_icf_finish_hash_map_base
+
+extern isal_update_histogram_base
+extern isal_update_histogram_01
+extern isal_update_histogram_04
+
+extern gen_icf_map_h1_base
+extern gen_icf_map_lh1_04
+
+extern encode_deflate_icf_base
+extern encode_deflate_icf_04
+
+extern set_long_icf_fg_base
+extern set_long_icf_fg_04
+
+%ifdef HAVE_AS_KNOWS_AVX512
+extern encode_deflate_icf_06
+extern set_long_icf_fg_06
+extern gen_icf_map_lh1_06
+%endif
+
+extern adler32_base
+extern adler32_avx2_4
+extern adler32_sse
+
+extern isal_deflate_hash_base
+extern isal_deflate_hash_crc_01
+
+extern isal_deflate_hash_mad_base
+
+extern icf_body_hash1_fillgreedy_lazy
+extern icf_body_lazyhash1_fillgreedy_greedy
+
+section .text
+
+%include "multibinary.asm"
+
+mbin_interface isal_deflate_body
+mbin_dispatch_init5 isal_deflate_body, isal_deflate_body_base, isal_deflate_body_01, isal_deflate_body_02, isal_deflate_body_04
+mbin_interface isal_deflate_finish
+mbin_dispatch_init5 isal_deflate_finish, isal_deflate_finish_base, isal_deflate_finish_01, isal_deflate_finish_01, isal_deflate_finish_01
+
+mbin_interface isal_deflate_icf_body_lvl1
+mbin_dispatch_init5 isal_deflate_icf_body_lvl1, isal_deflate_icf_body_hash_hist_base, isal_deflate_icf_body_hash_hist_01, isal_deflate_icf_body_hash_hist_02, isal_deflate_icf_body_hash_hist_04
+
+mbin_interface isal_deflate_icf_body_lvl2
+mbin_dispatch_init5 isal_deflate_icf_body_lvl2, isal_deflate_icf_body_hash_hist_base, isal_deflate_icf_body_hash_hist_01, isal_deflate_icf_body_hash_hist_02, isal_deflate_icf_body_hash_hist_04
+
+mbin_interface isal_deflate_icf_body_lvl3
+mbin_dispatch_init5 isal_deflate_icf_body_lvl3, icf_body_hash1_fillgreedy_lazy, icf_body_hash1_fillgreedy_lazy, icf_body_hash1_fillgreedy_lazy, icf_body_lazyhash1_fillgreedy_greedy
+
+mbin_interface isal_deflate_icf_finish_lvl1
+mbin_dispatch_init5 isal_deflate_icf_finish_lvl1, isal_deflate_icf_finish_hash_hist_base, isal_deflate_icf_finish_hash_hist_01, isal_deflate_icf_finish_hash_hist_01, isal_deflate_icf_finish_hash_hist_01
+
+mbin_interface isal_deflate_icf_finish_lvl2
+mbin_dispatch_init5 isal_deflate_icf_finish_lvl2, isal_deflate_icf_finish_hash_hist_base, isal_deflate_icf_finish_hash_hist_01, isal_deflate_icf_finish_hash_hist_01, isal_deflate_icf_finish_hash_hist_01
+
+mbin_interface isal_deflate_icf_finish_lvl3
+mbin_dispatch_init5 isal_deflate_icf_finish_lvl3, isal_deflate_icf_finish_hash_map_base, isal_deflate_icf_finish_hash_map_base, isal_deflate_icf_finish_hash_map_base, isal_deflate_icf_finish_hash_map_base
+
+mbin_interface isal_update_histogram
+mbin_dispatch_init5 isal_update_histogram, isal_update_histogram_base, isal_update_histogram_01, isal_update_histogram_01, isal_update_histogram_04
+
+mbin_interface encode_deflate_icf
+mbin_dispatch_init6 encode_deflate_icf, encode_deflate_icf_base, encode_deflate_icf_base, encode_deflate_icf_base, encode_deflate_icf_04, encode_deflate_icf_06
+
+mbin_interface set_long_icf_fg
+mbin_dispatch_init6 set_long_icf_fg, set_long_icf_fg_base, set_long_icf_fg_base, set_long_icf_fg_base, set_long_icf_fg_04, set_long_icf_fg_06
+
+mbin_interface gen_icf_map_lh1
+mbin_dispatch_init6 gen_icf_map_lh1, gen_icf_map_h1_base, gen_icf_map_h1_base, gen_icf_map_h1_base, gen_icf_map_lh1_04, gen_icf_map_lh1_06
+
+mbin_interface isal_adler32
+mbin_dispatch_init5 isal_adler32, adler32_base, adler32_sse, adler32_sse, adler32_avx2_4
+
+mbin_interface isal_deflate_hash_lvl0
+mbin_dispatch_init5 isal_deflate_hash_lvl0, isal_deflate_hash_base, isal_deflate_hash_crc_01, isal_deflate_hash_crc_01, isal_deflate_hash_crc_01
+
+mbin_interface isal_deflate_hash_lvl1
+mbin_dispatch_init5 isal_deflate_hash_lvl1, isal_deflate_hash_base, isal_deflate_hash_crc_01, isal_deflate_hash_crc_01, isal_deflate_hash_crc_01
+
+mbin_interface isal_deflate_hash_lvl2
+mbin_dispatch_init5 isal_deflate_hash_lvl2, isal_deflate_hash_base, isal_deflate_hash_crc_01, isal_deflate_hash_crc_01, isal_deflate_hash_crc_01
+
+mbin_interface isal_deflate_hash_lvl3
+mbin_dispatch_init5 isal_deflate_hash_lvl3, isal_deflate_hash_base, isal_deflate_hash_base, isal_deflate_hash_base, isal_deflate_hash_mad_base
diff --git a/src/isa-l/igzip/igzip_perf.c b/src/isa-l/igzip/igzip_perf.c
new file mode 100644
index 000000000..da6d3b9eb
--- /dev/null
+++ b/src/isa-l/igzip/igzip_perf.c
@@ -0,0 +1,832 @@
+/**********************************************************************
+ Copyright(c) 2011-2018 Intel Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+#define _FILE_OFFSET_BITS 64
+#include <stdio.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <getopt.h>
+#include "huff_codes.h"
+#include "igzip_lib.h"
+#include "test.h"
+
+#include <zlib.h>
+
+#define BUF_SIZE 1024
+
+#define OPTARGS "hl:f:z:i:d:stub:y:w:o:"
+
+#define COMPRESSION_QUEUE_LIMIT 32
+#define UNSET -1
+
+#define xstr(a) str(a)
+#define str(a) #a
+
+/* Limit output buffer size to 2 Gigabytes. Since stream->avail_out is a
+ * uint32_t and there is no logic for handling an overflowed output buffer in
+ * the perf test, this define must be less then 4 Gigabytes */
+#define MAX_COMPRESS_BUF_SIZE (1U << 31)
+
+int level_size_buf[10] = {
+#ifdef ISAL_DEF_LVL0_DEFAULT
+ ISAL_DEF_LVL0_DEFAULT,
+#else
+ 0,
+#endif
+#ifdef ISAL_DEF_LVL1_DEFAULT
+ ISAL_DEF_LVL1_DEFAULT,
+#else
+ 0,
+#endif
+#ifdef ISAL_DEF_LVL2_DEFAULT
+ ISAL_DEF_LVL2_DEFAULT,
+#else
+ 0,
+#endif
+#ifdef ISAL_DEF_LVL3_DEFAULT
+ ISAL_DEF_LVL3_DEFAULT,
+#else
+ 0,
+#endif
+#ifdef ISAL_DEF_LVL4_DEFAULT
+ ISAL_DEF_LVL4_DEFAULT,
+#else
+ 0,
+#endif
+#ifdef ISAL_DEF_LVL5_DEFAULT
+ ISAL_DEF_LVL5_DEFAULT,
+#else
+ 0,
+#endif
+#ifdef ISAL_DEF_LVL6_DEFAULT
+ ISAL_DEF_LVL6_DEFAULT,
+#else
+ 0,
+#endif
+#ifdef ISAL_DEF_LVL7_DEFAULT
+ ISAL_DEF_LVL7_DEFAULT,
+#else
+ 0,
+#endif
+#ifdef ISAL_DEF_LVL8_DEFAULT
+ ISAL_DEF_LVL8_DEFAULT,
+#else
+ 0,
+#endif
+#ifdef ISAL_DEF_LVL9_DEFAULT
+ ISAL_DEF_LVL9_DEFAULT,
+#else
+ 0,
+#endif
+};
+
+enum {
+ ISAL_STATELESS,
+ ISAL_STATEFUL,
+ ZLIB
+};
+
+struct compress_strategy {
+ int32_t mode;
+ int32_t level;
+};
+
+struct inflate_modes {
+ int32_t stateless;
+ int32_t stateful;
+ int32_t zlib;
+};
+
+struct perf_info {
+ char *file_name;
+ size_t file_size;
+ size_t deflate_size;
+ uint32_t inblock_size;
+ uint32_t flush_type;
+ int32_t hist_bits;
+ int32_t deflate_time;
+ int32_t inflate_time;
+ struct compress_strategy strategy;
+ uint32_t inflate_mode;
+ struct perf start;
+};
+
+void init_perf_info(struct perf_info *info)
+{
+ memset(info, 0, sizeof(*info));
+ info->deflate_time = BENCHMARK_TIME;
+ info->inflate_time = BENCHMARK_TIME;
+}
+
+int usage(void)
+{
+ fprintf(stderr,
+ "Usage: igzip_perf [options] <infile>\n"
+ " -h help, print this message\n"
+ " The options -l, -f, -z may be used up to "
+ xstr(COMPRESSION_QUEUE_LIMIT) " times\n"
+ " -l <level> isa-l stateless deflate level to test ("
+ xstr(ISAL_DEF_MIN_LEVEL) "-" xstr(ISAL_DEF_MAX_LEVEL) ")\n"
+ " -f <level> isa-l stateful deflate level to test ("
+ xstr(ISAL_DEF_MIN_LEVEL) "-" xstr(ISAL_DEF_MAX_LEVEL) ")\n"
+ " -z <level> zlib deflate level to test\n"
+ " -d <time> approx time in seconds for deflate (at least 0)\n"
+ " -i <time> approx time in seconds for inflate (at least 0)\n"
+ " -s performance test isa-l stateful inflate\n"
+ " -t performance test isa-l stateless inflate\n"
+ " -u performance test zlib inflate\n"
+ " -o <file> output file to store compressed data (last one if multiple)\n"
+ " -b <size> input buffer size, applies to stateful options (-f,-z,-s)\n"
+ " -y <type> flush type: 0 (default: no flush), 1 (sync flush), 2 (full flush)\n"
+ " -w <size> log base 2 size of history window, between 9 and 15\n");
+ exit(0);
+}
+
+void print_perf_info_line(struct perf_info *info)
+{
+ printf("igzip_perf-> compress level: %d flush_type: %d block_size: %d\n",
+ info->strategy.level, info->flush_type, info->inblock_size);
+}
+
+void print_file_line(struct perf_info *info)
+{
+ printf(" file info-> name: %s file_size: %lu compress_size: %lu ratio: %2.02f%%\n",
+ info->file_name, info->file_size, info->deflate_size,
+ 100.0 * info->deflate_size / info->file_size);
+}
+
+void print_deflate_perf_line(struct perf_info *info)
+{
+ if (info->strategy.mode == ISAL_STATELESS)
+ printf(" isal_stateless_deflate-> ");
+ else if (info->strategy.mode == ISAL_STATEFUL)
+ printf(" isal_stateful_deflate-> ");
+ else if (info->strategy.mode == ZLIB)
+ printf(" zlib_deflate-> ");
+
+ perf_print(info->start, info->file_size);
+}
+
+void print_inflate_perf_line(struct perf_info *info)
+{
+ if (info->inflate_mode == ISAL_STATELESS)
+ printf(" isal_stateless_inflate-> ");
+ else if (info->inflate_mode == ISAL_STATEFUL)
+ printf(" isal_stateful_inflate-> ");
+ else if (info->inflate_mode == ZLIB)
+ printf(" zlib_inflate-> ");
+
+ perf_print(info->start, info->file_size);
+}
+
+int isal_deflate_round(struct isal_zstream *stream, uint8_t * outbuf, uint32_t outbuf_size,
+ uint8_t * inbuf, uint32_t inbuf_size,
+ uint32_t level, uint8_t * level_buf, uint32_t level_buf_size,
+ int flush_type, int hist_bits)
+{
+ int check;
+
+ /* Setup stream for stateless compression */
+ isal_deflate_init(stream);
+ stream->end_of_stream = 1; /* Do the entire file at once */
+ stream->flush = flush_type;
+ stream->next_in = inbuf;
+ stream->avail_in = inbuf_size;
+ stream->next_out = outbuf;
+ stream->avail_out = outbuf_size;
+ stream->level = level;
+ stream->level_buf = level_buf;
+ stream->level_buf_size = level_buf_size;
+ stream->hist_bits = hist_bits;
+
+ /* Compress stream */
+ check = isal_deflate_stateless(stream);
+
+ /* Verify compression success */
+ if (check || stream->avail_in)
+ return 1;
+
+ return 0;
+}
+
+int isal_inflate_round(struct inflate_state *state, uint8_t * inbuf, uint32_t inbuf_size,
+ uint8_t * outbuf, uint32_t outbuf_size, int hist_bits)
+{
+ int check = 0;
+
+ /* Setup for stateless inflate */
+ state->next_in = inbuf;
+ state->avail_in = inbuf_size;
+ state->next_out = outbuf;
+ state->avail_out = outbuf_size;
+ state->crc_flag = ISAL_DEFLATE;
+ state->hist_bits = hist_bits;
+
+ /* Inflate data */
+ check = isal_inflate_stateless(state);
+
+ /* Verify inflate was successful */
+ if (check)
+ return 1;
+
+ return 0;
+}
+
+int isal_deflate_stateful_round(struct isal_zstream *stream, uint8_t * outbuf,
+ uint32_t outbuf_size, uint8_t * inbuf,
+ uint32_t inbuf_size, uint32_t in_block_size, uint32_t level,
+ uint8_t * level_buf, uint32_t level_buf_size, int flush_type,
+ int hist_bits)
+{
+ uint64_t inbuf_remaining;
+ int check = COMP_OK;
+
+ /* Setup stream for stateful compression */
+ inbuf_remaining = inbuf_size;
+ isal_deflate_init(stream);
+ stream->flush = flush_type;
+ stream->next_in = inbuf;
+ stream->next_out = outbuf;
+ stream->avail_out = outbuf_size;
+ stream->level = level;
+ stream->level_buf = level_buf;
+ stream->level_buf_size = level_buf_size;
+ stream->hist_bits = hist_bits;
+
+ /* Keep compressing so long as more data is available and no error has
+ * been hit */
+ while (COMP_OK == check && inbuf_remaining > in_block_size) {
+ /* Setup next in buffer, assumes out buffer is sufficiently
+ * large */
+ stream->avail_in = in_block_size;
+ inbuf_remaining -= in_block_size;
+
+ /* Compress stream */
+ check = isal_deflate(stream);
+ }
+
+ /* Finish compressing all remaining input */
+ if (COMP_OK == check) {
+ stream->avail_in = inbuf_remaining;
+ stream->end_of_stream = 1;
+ check = isal_deflate(stream);
+ }
+
+ /* Verify Compression Success */
+ if (COMP_OK != check || stream->avail_in > 0)
+ return 1;
+
+ return 0;
+}
+
+int isal_inflate_stateful_round(struct inflate_state *state, uint8_t * inbuf,
+ uint32_t inbuf_size, uint32_t in_block_size, uint8_t * outbuf,
+ uint32_t outbuf_size, int hist_bits)
+{
+ int check = ISAL_DECOMP_OK;
+ uint64_t inbuf_remaining;
+
+ isal_inflate_init(state);
+ state->next_in = inbuf;
+ state->next_out = outbuf;
+ state->avail_out = outbuf_size;
+ state->hist_bits = hist_bits;
+ inbuf_remaining = inbuf_size;
+
+ while (ISAL_DECOMP_OK == check && inbuf_remaining >= in_block_size) {
+ state->avail_in = in_block_size;
+ inbuf_remaining -= in_block_size;
+ check = isal_inflate(state);
+ }
+ if (ISAL_DECOMP_OK == check && inbuf_remaining > 0) {
+ state->avail_in = inbuf_remaining;
+ check = isal_inflate(state);
+ }
+
+ if (ISAL_DECOMP_OK != check || state->avail_in > 0)
+ return 1;
+
+ return 0;
+}
+
+int zlib_deflate_round(z_stream * gstream, uint8_t * outbuf, uInt outbuf_size,
+ uint8_t * inbuf, uLong inbuf_size,
+ uLong in_block_size, int level, int flush_type)
+{
+ uLong inbuf_remaining;
+ int check = Z_OK;
+
+ inbuf_remaining = inbuf_size;
+
+ /* Setup stream for stateful compression */
+ if (0 != deflateReset(gstream))
+ return 1;
+
+ gstream->next_in = inbuf;
+ gstream->next_out = outbuf;
+ gstream->avail_out = outbuf_size;
+
+ /* Keep compressing so long as more data is available and no error has
+ * been hit */
+ while (Z_OK == check && inbuf_remaining > in_block_size) {
+ gstream->avail_in = in_block_size;
+ inbuf_remaining -= in_block_size;
+ check = deflate(gstream, flush_type);
+ }
+
+ /* Finish compressing all remaining input */
+ if (Z_OK == check) {
+ gstream->avail_in = inbuf_remaining;
+ check = deflate(gstream, Z_FINISH);
+ }
+
+ /* Verify Compression Success */
+ if (Z_STREAM_END != check)
+ return 1;
+
+ return 0;
+}
+
+int zlib_inflate_round(z_stream * gstream, uint8_t * inbuf,
+ uLong inbuf_size, uint8_t * outbuf, uInt outbuf_size)
+{
+ int check = 0;
+
+ if (0 != inflateReset(gstream))
+ return 1;
+
+ gstream->next_in = inbuf;
+ gstream->avail_in = inbuf_size;
+ gstream->next_out = outbuf;
+ gstream->avail_out = outbuf_size;
+ check = inflate(gstream, Z_FINISH);
+ if (check != Z_STREAM_END)
+ return 1;
+
+ return 0;
+}
+
+int isal_deflate_perf(uint8_t * outbuf, uint64_t * outbuf_size, uint8_t * inbuf,
+ uint64_t inbuf_size, int level, int flush_type, int hist_bits, int time,
+ struct perf *start)
+{
+ struct isal_zstream stream;
+ uint8_t *level_buf = NULL;
+ int check;
+
+ if (level_size_buf[level] > 0) {
+ level_buf = malloc(level_size_buf[level]);
+ if (level_buf == NULL)
+ return 1;
+ }
+
+ BENCHMARK(start, time, check =
+ isal_deflate_round(&stream, outbuf, *outbuf_size, inbuf,
+ inbuf_size, level, level_buf,
+ level_size_buf[level], flush_type, hist_bits));
+ *outbuf_size = stream.total_out;
+ return check;
+}
+
+int isal_deflate_stateful_perf(uint8_t * outbuf, uint64_t * outbuf_size, uint8_t * inbuf,
+ uint64_t inbuf_size, int level, int flush_type,
+ uint64_t in_block_size, int hist_bits, int time,
+ struct perf *start)
+{
+ struct isal_zstream stream;
+ uint8_t *level_buf = NULL;
+ int check;
+
+ if (in_block_size == 0)
+ in_block_size = inbuf_size;
+
+ if (level_size_buf[level] > 0) {
+ level_buf = malloc(level_size_buf[level]);
+ if (level_buf == NULL)
+ return 1;
+ }
+
+ BENCHMARK(start, time, check =
+ isal_deflate_stateful_round(&stream, outbuf, *outbuf_size, inbuf, inbuf_size,
+ in_block_size, level, level_buf,
+ level_size_buf[level], flush_type, hist_bits));
+ *outbuf_size = stream.total_out;
+ return check;
+
+}
+
+int zlib_deflate_perf(uint8_t * outbuf, uint64_t * outbuf_size, uint8_t * inbuf,
+ uint64_t inbuf_size, int level, int flush_type,
+ uint64_t in_block_size, int hist_bits, int time, struct perf *start)
+{
+ int check;
+ z_stream gstream;
+ int flush_translator[] = { Z_NO_FLUSH, Z_SYNC_FLUSH, Z_FULL_FLUSH };
+
+ if (in_block_size == 0)
+ in_block_size = inbuf_size;
+
+ flush_type = flush_translator[flush_type];
+
+ /* Initialize the gstream buffer */
+ gstream.next_in = inbuf;
+ gstream.avail_in = inbuf_size;
+ gstream.zalloc = Z_NULL;
+ gstream.zfree = Z_NULL;
+ gstream.opaque = Z_NULL;
+
+ if (hist_bits == 0)
+ hist_bits = -15;
+ else
+ hist_bits = -hist_bits;
+
+ if (0 != deflateInit2(&gstream, level, Z_DEFLATED, hist_bits, 9, Z_DEFAULT_STRATEGY))
+ return 1;
+
+ BENCHMARK(start, time, check =
+ zlib_deflate_round(&gstream, outbuf, *outbuf_size, inbuf, inbuf_size,
+ in_block_size, level, flush_type));
+
+ *outbuf_size = gstream.total_out;
+ deflateEnd(&gstream);
+
+ return check;
+}
+
+int isal_inflate_perf(uint8_t * inbuf, uint64_t inbuf_size, uint8_t * outbuf,
+ uint64_t outbuf_size, uint8_t * filebuf, uint64_t file_size,
+ int hist_bits, int time, struct perf *start)
+{
+ struct inflate_state state;
+ int check;
+
+ /* Check that data decompresses */
+ check = isal_inflate_round(&state, inbuf, inbuf_size, outbuf, outbuf_size, hist_bits);
+ if (check || state.total_out != file_size || memcmp(outbuf, filebuf, file_size))
+ return 1;
+
+ BENCHMARK(start, time, isal_inflate_round(&state, inbuf, inbuf_size,
+ outbuf, outbuf_size, hist_bits));
+
+ return check;
+}
+
+int isal_inflate_stateful_perf(uint8_t * inbuf, uint64_t inbuf_size, uint8_t * outbuf,
+ uint64_t outbuf_size, uint8_t * filebuf, uint64_t file_size,
+ uint64_t in_block_size, int hist_bits, int time,
+ struct perf *start)
+{
+ struct inflate_state state;
+ int check;
+
+ if (in_block_size == 0)
+ in_block_size = inbuf_size;
+
+ check = isal_inflate_round(&state, inbuf, inbuf_size, outbuf, outbuf_size, hist_bits);
+ if (check || state.total_out != file_size || memcmp(outbuf, filebuf, file_size))
+ return 1;
+
+ BENCHMARK(start, time,
+ isal_inflate_stateful_round(&state, inbuf, inbuf_size, in_block_size, outbuf,
+ outbuf_size, hist_bits));
+
+ return 0;
+
+}
+
+int zlib_inflate_perf(uint8_t * inbuf, uint64_t inbuf_size, uint8_t * outbuf,
+ uint64_t outbuf_size, uint8_t * filebuf, uint64_t file_size,
+ int hist_bits, int time, struct perf *start)
+{
+ int check;
+ z_stream gstream;
+
+ gstream.next_in = inbuf;
+ gstream.avail_in = inbuf_size;
+ gstream.zalloc = Z_NULL;
+ gstream.zfree = Z_NULL;
+ gstream.opaque = Z_NULL;
+
+ if (hist_bits == 0)
+ hist_bits = -15;
+ else
+ hist_bits = -hist_bits;
+
+ if (0 != inflateInit2(&gstream, hist_bits))
+ return 1;
+
+ check = zlib_inflate_round(&gstream, inbuf, inbuf_size, outbuf, outbuf_size);
+ if (check || gstream.total_out != file_size || memcmp(outbuf, filebuf, file_size))
+ return 1;
+
+ BENCHMARK(start, time,
+ zlib_inflate_round(&gstream, inbuf, inbuf_size, outbuf, outbuf_size));
+
+ inflateEnd(&gstream);
+ return 0;
+}
+
+int main(int argc, char *argv[])
+{
+ FILE *in = NULL;
+ unsigned char *compressbuf, *decompbuf, *filebuf;
+ char *outfile = NULL;
+ int i, c, ret = 0;
+ uint64_t decompbuf_size, compressbuf_size;
+ uint64_t block_count;
+
+ struct compress_strategy compression_queue[COMPRESSION_QUEUE_LIMIT];
+
+ int compression_queue_size = 0;
+ struct compress_strategy compress_strat;
+ struct inflate_modes inflate_strat = { 0 };
+ struct perf_info info;
+ init_perf_info(&info);
+
+ while ((c = getopt(argc, argv, OPTARGS)) != -1) {
+ switch (c) {
+ case 'l':
+ if (compression_queue_size >= COMPRESSION_QUEUE_LIMIT) {
+ printf("Too many levels specified");
+ exit(0);
+ }
+
+ compress_strat.mode = ISAL_STATELESS;
+ compress_strat.level = atoi(optarg);
+ if (compress_strat.level > ISAL_DEF_MAX_LEVEL) {
+ printf("Unsupported isa-l compression level\n");
+ exit(0);
+ }
+
+ compression_queue[compression_queue_size] = compress_strat;
+ compression_queue_size++;
+ break;
+ case 'f':
+ if (compression_queue_size >= COMPRESSION_QUEUE_LIMIT) {
+ printf("Too many levels specified");
+ exit(0);
+ }
+
+ compress_strat.mode = ISAL_STATEFUL;
+ compress_strat.level = atoi(optarg);
+ if (compress_strat.level > ISAL_DEF_MAX_LEVEL) {
+ printf("Unsupported isa-l compression level\n");
+ exit(0);
+ }
+
+ compression_queue[compression_queue_size] = compress_strat;
+ compression_queue_size++;
+ break;
+ case 'z':
+ if (compression_queue_size >= COMPRESSION_QUEUE_LIMIT) {
+ printf("Too many levels specified");
+ exit(0);
+ }
+
+ compress_strat.mode = ZLIB;
+ compress_strat.level = atoi(optarg);
+ if (compress_strat.level > Z_BEST_COMPRESSION) {
+ printf("Unsupported zlib compression level\n");
+ exit(0);
+ }
+ compression_queue[compression_queue_size] = compress_strat;
+ compression_queue_size++;
+ break;
+ case 'i':
+ info.inflate_time = atoi(optarg);
+ if (info.inflate_time < 0)
+ usage();
+ break;
+ case 'd':
+ info.deflate_time = atoi(optarg);
+ if (info.deflate_time < 0)
+ usage();
+ break;
+ case 's':
+ inflate_strat.stateful = 1;
+ break;
+ case 't':
+ inflate_strat.stateless = 1;
+ break;
+ case 'u':
+ inflate_strat.zlib = 1;
+ break;
+ case 'b':
+ inflate_strat.stateful = 1;
+ info.inblock_size = atoi(optarg);
+ break;
+ case 'y':
+ info.flush_type = atoi(optarg);
+ if (info.flush_type != NO_FLUSH && info.flush_type != SYNC_FLUSH
+ && info.flush_type != FULL_FLUSH) {
+ printf("Unsupported flush type\n");
+ exit(0);
+ }
+ break;
+
+ case 'w':
+ info.hist_bits = atoi(optarg);
+ if (info.hist_bits > 15 || info.hist_bits < 9)
+ usage();
+ break;
+ case 'o':
+ outfile = optarg;
+ break;
+ case 'h':
+ default:
+ usage();
+ break;
+ }
+ }
+
+ if (optind >= argc)
+ usage();
+
+ if (!inflate_strat.stateless && !inflate_strat.stateful && !inflate_strat.zlib) {
+ if (info.inblock_size == 0)
+ inflate_strat.stateless = 1;
+ else
+ inflate_strat.stateful = 1;
+ }
+
+ /* Allocate space for entire input file and output
+ * (assuming some possible expansion on output size)
+ */
+ info.file_name = argv[optind];
+ in = fopen(info.file_name, "rb");
+ if (NULL == in) {
+ printf("Error: Can not find file %s\n", info.file_name);
+ exit(0);
+ }
+
+ info.file_size = get_filesize(in);
+ if (info.file_size == 0) {
+ printf("Error: input file has 0 size\n");
+ exit(0);
+ }
+
+ decompbuf_size = info.file_size;
+
+ if (compression_queue_size == 0) {
+ if (info.inblock_size == 0)
+ compression_queue[0].mode = ISAL_STATELESS;
+ else
+ compression_queue[0].mode = ISAL_STATEFUL;
+ compression_queue[0].level = 1;
+ compression_queue_size = 1;
+ }
+
+ filebuf = malloc(info.file_size);
+ if (filebuf == NULL) {
+ fprintf(stderr, "Can't allocate temp buffer memory\n");
+ exit(0);
+ }
+
+ block_count = 1;
+ if (info.flush_type > 0)
+ block_count = (info.file_size + info.inblock_size - 1) / info.inblock_size;
+
+ /* Way overestimate likely compressed size to handle bad type 0 and
+ * small block_size case */
+ compressbuf_size = block_count * ISAL_DEF_MAX_HDR_SIZE + 2 * info.file_size;
+ if (compressbuf_size >= MAX_COMPRESS_BUF_SIZE)
+ compressbuf_size = MAX_COMPRESS_BUF_SIZE;
+
+ compressbuf = malloc(compressbuf_size);
+ if (compressbuf == NULL) {
+ fprintf(stderr, "Can't allocate input buffer memory\n");
+ exit(0);
+ }
+
+ decompbuf = malloc(decompbuf_size);
+ if (decompbuf == NULL) {
+ fprintf(stderr, "Can't allocate output buffer memory\n");
+ exit(0);
+ }
+
+ if (info.file_size != fread(filebuf, 1, info.file_size, in)) {
+ fprintf(stderr, "Could not read in all input\n");
+ exit(0);
+ }
+ fclose(in);
+
+ for (i = 0; i < compression_queue_size; i++) {
+ if (i > 0)
+ printf("\n\n");
+
+ info.strategy = compression_queue[i];
+ print_perf_info_line(&info);
+
+ info.deflate_size = compressbuf_size;
+
+ if (info.strategy.mode == ISAL_STATELESS)
+ ret = isal_deflate_perf(compressbuf, &info.deflate_size, filebuf,
+ info.file_size, compression_queue[i].level,
+ info.flush_type, info.hist_bits,
+ info.deflate_time, &info.start);
+ else if (info.strategy.mode == ISAL_STATEFUL)
+ ret =
+ isal_deflate_stateful_perf(compressbuf, &info.deflate_size,
+ filebuf, info.file_size,
+ compression_queue[i].level,
+ info.flush_type, info.inblock_size,
+ info.hist_bits, info.deflate_time,
+ &info.start);
+ else if (info.strategy.mode == ZLIB)
+ ret = zlib_deflate_perf(compressbuf, &info.deflate_size, filebuf,
+ info.file_size, compression_queue[i].level,
+ info.flush_type, info.inblock_size,
+ info.hist_bits, info.deflate_time,
+ &info.start);
+
+ if (ret) {
+ printf(" Error in compression\n");
+ continue;
+ }
+
+ print_file_line(&info);
+ printf("\n");
+ print_deflate_perf_line(&info);
+ printf("\n");
+
+ if (outfile != NULL && i + 1 == compression_queue_size) {
+ FILE *out;
+ out = fopen(outfile, "wb");
+ fwrite(compressbuf, 1, info.deflate_size, out);
+ fclose(out);
+ }
+
+ if (info.inflate_time == 0)
+ continue;
+
+ if (inflate_strat.stateless) {
+ info.inflate_mode = ISAL_STATELESS;
+ ret = isal_inflate_perf(compressbuf, info.deflate_size, decompbuf,
+ decompbuf_size, filebuf, info.file_size,
+ info.hist_bits, info.inflate_time,
+ &info.start);
+ if (ret)
+ printf(" Error in isal stateless inflate\n");
+ else
+ print_inflate_perf_line(&info);
+ }
+
+ if (inflate_strat.stateful) {
+ info.inflate_mode = ISAL_STATEFUL;
+ ret =
+ isal_inflate_stateful_perf(compressbuf, info.deflate_size,
+ decompbuf, decompbuf_size, filebuf,
+ info.file_size, info.inblock_size,
+ info.hist_bits, info.inflate_time,
+ &info.start);
+
+ if (ret)
+ printf(" Error in isal stateful inflate\n");
+ else
+ print_inflate_perf_line(&info);
+ }
+
+ if (inflate_strat.zlib) {
+ info.inflate_mode = ZLIB;
+ ret = zlib_inflate_perf(compressbuf, info.deflate_size, decompbuf,
+ decompbuf_size, filebuf, info.file_size,
+ info.hist_bits, info.inflate_time,
+ &info.start);
+ if (ret)
+ printf(" Error in zlib inflate\n");
+ else
+ print_inflate_perf_line(&info);
+ }
+ }
+
+ free(compressbuf);
+ free(decompbuf);
+ free(filebuf);
+ return 0;
+}
diff --git a/src/isa-l/igzip/igzip_rand_test.c b/src/isa-l/igzip/igzip_rand_test.c
new file mode 100644
index 000000000..74195d64c
--- /dev/null
+++ b/src/isa-l/igzip/igzip_rand_test.c
@@ -0,0 +1,3101 @@
+/**********************************************************************
+ Copyright(c) 2011-2016 Intel Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+#define _FILE_OFFSET_BITS 64
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+#include <stdarg.h>
+#include "igzip_lib.h"
+#include "checksum_test_ref.h"
+#include "inflate_std_vects.h"
+#include <math.h>
+#include "test.h"
+#include "unaligned.h"
+
+#ifdef HAVE_GETOPT
+#include <getopt.h>
+#endif
+
+#ifndef RANDOMS
+# define RANDOMS 0x40
+#endif
+#ifndef TEST_SEED
+# define TEST_SEED 0x1234
+#endif
+
+#define MAX_BITS_COUNT 20
+#define MIN_BITS_COUNT 8
+
+#define IBUF_SIZE (1024*1024)
+
+#define MAX_LARGE_COMP_BUF_SIZE (1024*1024)
+
+#define PAGE_SIZE 4*1024
+
+#define MAX_FILE_SIZE 0x7fff8fff
+
+#define str1 "Short test string"
+#define str2 "one two three four five six seven eight nine ten eleven twelve " \
+ "thirteen fourteen fifteen sixteen"
+
+#define TYPE0_HDR_SIZE 5 /* Size of a type 0 blocks header in bytes */
+#define TYPE0_MAX_SIZE 65535 /* Max length of a type 0 block in bytes (excludes the header) */
+
+#define MAX_LOOPS 20
+/* Defines for the possible error conditions */
+enum IGZIP_TEST_ERROR_CODES {
+ IGZIP_COMP_OK = 0,
+
+ MALLOC_FAILED,
+ FILE_READ_FAILED,
+
+ COMPRESS_INCORRECT_STATE,
+ COMPRESS_INPUT_STREAM_INTEGRITY_ERROR,
+ COMPRESS_OUTPUT_STREAM_INTEGRITY_ERROR,
+ COMPRESS_END_OF_STREAM_NOT_SET,
+ COMPRESS_ALL_INPUT_FAIL,
+ COMPRESS_OUT_BUFFER_OVERFLOW,
+ COMPRESS_LOOP_COUNT_OVERFLOW,
+ COMPRESS_GENERAL_ERROR,
+
+ INFLATE_END_OF_INPUT,
+ INFLATE_INVALID_BLOCK_HEADER,
+ INFLATE_INVALID_SYMBOL,
+ INFLATE_OUT_BUFFER_OVERFLOW,
+ INFLATE_LEFTOVER_INPUT,
+ INFLATE_INCORRECT_OUTPUT_SIZE,
+ INFLATE_INVALID_LOOK_BACK_DISTANCE,
+ INFLATE_INPUT_STREAM_INTEGRITY_ERROR,
+ INFLATE_OUTPUT_STREAM_INTEGRITY_ERROR,
+ INVALID_GZIP_HEADER,
+ INCORRECT_GZIP_TRAILER,
+ INVALID_ZLIB_HEADER,
+ INCORRECT_ZLIB_TRAILER,
+
+ UNSUPPORTED_METHOD,
+
+ INFLATE_GENERAL_ERROR,
+
+ INVALID_FLUSH_ERROR,
+
+ OVERFLOW_TEST_ERROR,
+ RESULT_ERROR
+};
+
+static const int hdr_bytes = 300;
+
+static const uint32_t gzip_trl_bytes = 8;
+static const uint32_t zlib_trl_bytes = 4;
+static const int gzip_extra_bytes = 18; /* gzip_hdr_bytes + gzip_trl_bytes */
+static const int zlib_extra_bytes = 6; /* zlib_hdr_bytes + zlib_trl_bytes */
+
+int inflate_type = 0;
+
+struct isal_hufftables *hufftables = NULL;
+struct isal_hufftables *hufftables_subset = NULL;
+
+#define HISTORY_SIZE 32*1024
+#define MIN_LENGTH 3
+#define MIN_DIST 1
+
+struct test_options {
+ int test_seed;
+ int randoms;
+ int do_large_test;
+ int verbose;
+
+};
+
+struct test_options options;
+
+void init_options(void)
+{
+ options.test_seed = TEST_SEED;
+ options.randoms = RANDOMS;
+ options.do_large_test = 1;
+#ifdef VERBOSE
+ options.verbose = 1;
+#else
+ options.verbose = 0;
+#endif
+}
+
+void usage(void)
+{
+ fprintf(stderr,
+ "Usage: igzip_rand_test [options] [FILES]\n"
+ " -h help, print this message\n"
+ " -l turn off large input test\n"
+ " -r <iter> number of randoms for each test\n"
+ " -s <seed> set rand() test seed\n"
+ " -v enable verbose test log\n");
+ exit(0);
+}
+
+size_t parse_options(int argc, char *argv[])
+{
+ init_options();
+#ifdef HAVE_GETOPT
+ int c;
+ char optstring[] = "hlr:s:v";
+ while ((c = getopt(argc, argv, optstring)) != -1) {
+ switch (c) {
+ case 'l':
+ options.do_large_test = 0;
+ break;
+ case 'r':
+ options.randoms = atoi(optarg);
+ break;
+ case 's':
+ options.test_seed = atoi(optarg);
+ break;
+ case 'v':
+ options.verbose = 1;
+ break;
+ case 'h':
+ default:
+ usage();
+ break;
+ }
+ }
+ return optind;
+#else
+ return 1;
+#endif
+}
+
+/* Create random compressible data. This is achieved by randomly choosing a
+ * random character, or to repeat previous data in the stream for a random
+ * length and look back distance. The probability of a random character or a
+ * repeat being chosen is semi-randomly chosen by setting max_repeat_data to be
+ * differing values */
+void create_rand_repeat_data(uint8_t * data, int size)
+{
+ uint32_t next_data;
+ uint8_t *data_start = data;
+ uint32_t length, distance;
+ uint32_t symbol_count = rand() % 255 + 1, swaps_left, tmp;
+ uint32_t max_repeat_data = symbol_count;
+ uint8_t symbols[256], *symbols_next, swap_val;
+
+ /* An array of the powers of 2 (except the final element which is 0) */
+ const uint32_t power_of_2_array[] = {
+ 0x00000001, 0x00000002, 0x00000004, 0x00000008,
+ 0x00000010, 0x00000020, 0x00000040, 0x00000080,
+ 0x00000100, 0x00000200, 0x00000400, 0x00000800,
+ 0x00001000, 0x00002000, 0x00004000, 0x00008000,
+ 0x00010000, 0x00020000, 0x00040000, 0x00080000,
+ 0x00100000, 0x00200000, 0x00400000, 0x00800000,
+ 0x01000000, 0x02000000, 0x04000000, 0x08000000,
+ 0x10000000, 0x20000000, 0x40000000, 0x00000000
+ };
+
+ uint32_t power = rand() % sizeof(power_of_2_array) / sizeof(uint32_t);
+
+ if (symbol_count > 128) {
+ memset(symbols, 1, sizeof(symbols));
+ swap_val = 0;
+ swaps_left = 256 - symbol_count;
+ } else {
+ memset(symbols, 0, sizeof(symbols));
+ swap_val = 1;
+ swaps_left = symbol_count;
+ }
+
+ while (swaps_left > 0) {
+ tmp = rand() % 256;
+ if (symbols[tmp] != swap_val) {
+ symbols[tmp] = swap_val;
+ swaps_left--;
+ }
+ }
+
+ symbols_next = symbols;
+ for (tmp = 0; tmp < 256; tmp++) {
+ if (symbols[tmp]) {
+ *symbols_next = tmp;
+ symbols_next++;
+ }
+ }
+
+ max_repeat_data += power_of_2_array[power];
+
+ if (size > 0) {
+ size--;
+ *data++ = rand();
+ }
+
+ while (size > 0) {
+ next_data = rand() % max_repeat_data;
+ if (next_data < symbol_count) {
+ *data++ = symbols[next_data];
+ size--;
+ } else if (size < 3) {
+ *data++ = symbols[rand() % symbol_count];
+ size--;
+ } else {
+ length = (rand() % 256) + MIN_LENGTH;
+ if (length > size)
+ length = (rand() % (size - 2)) + MIN_LENGTH;
+
+ distance = (rand() % HISTORY_SIZE) + MIN_DIST;
+ if (distance > data - data_start)
+ distance = (rand() % (data - data_start)) + MIN_DIST;
+
+ size -= length;
+ if (distance <= length) {
+ while (length-- > 0) {
+ *data = *(data - distance);
+ data++;
+ }
+ } else {
+ memcpy(data, data - distance, length);
+ data += length;
+ }
+ }
+ }
+}
+
+void create_rand_dict(uint8_t * dict, uint32_t dict_len, uint8_t * buf, uint32_t buf_len)
+{
+ uint32_t dict_chunk_size, buf_chunk_size;
+ while (dict_len > 0) {
+ dict_chunk_size = rand() % IGZIP_K;
+ dict_chunk_size = (dict_len >= dict_chunk_size) ? dict_chunk_size : dict_len;
+
+ buf_chunk_size = rand() % IGZIP_K;
+ buf_chunk_size = (buf_len >= buf_chunk_size) ? buf_chunk_size : buf_len;
+
+ if (rand() % 3 == 0 && buf_len >= dict_len)
+ memcpy(dict, buf, dict_chunk_size);
+ else
+ create_rand_repeat_data(dict, dict_chunk_size);
+
+ dict_len -= dict_chunk_size;
+ dict += dict_chunk_size;
+ buf_len -= buf_chunk_size;
+ buf += buf_chunk_size;
+ }
+
+}
+
+int get_rand_data_length(void)
+{
+ int max_mask =
+ (1 << ((rand() % (MAX_BITS_COUNT - MIN_BITS_COUNT)) + MIN_BITS_COUNT)) - 1;
+ return rand() & max_mask;
+}
+
+int get_rand_level(void)
+{
+ return ISAL_DEF_MIN_LEVEL + rand() % (ISAL_DEF_MAX_LEVEL - ISAL_DEF_MIN_LEVEL + 1);
+
+}
+
+int get_rand_level_buf_size(int level)
+{
+ int size;
+ switch (level) {
+ case 3:
+ size = rand() % IBUF_SIZE + ISAL_DEF_LVL3_MIN;
+ break;
+ case 2:
+ size = rand() % IBUF_SIZE + ISAL_DEF_LVL2_MIN;
+ break;
+ case 1:
+ default:
+ size = rand() % IBUF_SIZE + ISAL_DEF_LVL1_MIN;
+ }
+ return size;
+}
+
+void print_error(int error_code)
+{
+ switch (error_code) {
+ case IGZIP_COMP_OK:
+ break;
+ case MALLOC_FAILED:
+ printf("error: failed to allocate memory\n");
+ break;
+ case FILE_READ_FAILED:
+ printf("error: failed to read in file\n");
+ break;
+ case COMPRESS_INCORRECT_STATE:
+ printf("error: incorrect stream internal state\n");
+ break;
+ case COMPRESS_INPUT_STREAM_INTEGRITY_ERROR:
+ printf("error: inconsistent stream input buffer\n");
+ break;
+ case COMPRESS_OUTPUT_STREAM_INTEGRITY_ERROR:
+ printf("error: inconsistent stream output buffer\n");
+ break;
+ case COMPRESS_END_OF_STREAM_NOT_SET:
+ printf("error: end of stream not set\n");
+ break;
+ case COMPRESS_ALL_INPUT_FAIL:
+ printf("error: not all input data compressed\n");
+ break;
+ case COMPRESS_OUT_BUFFER_OVERFLOW:
+ printf("error: output buffer overflow while compressing data\n");
+ break;
+ case COMPRESS_GENERAL_ERROR:
+ printf("error: compression failed\n");
+ break;
+ case INFLATE_END_OF_INPUT:
+ printf("error: did not decompress all input\n");
+ break;
+ case INFLATE_INVALID_BLOCK_HEADER:
+ printf("error: invalid header\n");
+ break;
+ case INFLATE_INVALID_SYMBOL:
+ printf("error: invalid symbol found when decompressing input\n");
+ break;
+ case INFLATE_OUT_BUFFER_OVERFLOW:
+ printf("error: output buffer overflow while decompressing data\n");
+ break;
+ case INFLATE_GENERAL_ERROR:
+ printf("error: decompression failed\n");
+ break;
+ case INFLATE_LEFTOVER_INPUT:
+ printf("error: the trailer of igzip output contains junk\n");
+ break;
+ case INFLATE_INCORRECT_OUTPUT_SIZE:
+ printf("error: incorrect amount of data was decompressed\n");
+ break;
+ case INFLATE_INVALID_LOOK_BACK_DISTANCE:
+ printf("error: invalid look back distance found while decompressing\n");
+ break;
+ case INFLATE_INPUT_STREAM_INTEGRITY_ERROR:
+ printf("error: inconsistent input buffer\n");
+ break;
+ case INFLATE_OUTPUT_STREAM_INTEGRITY_ERROR:
+ printf("error: inconsistent output buffer\n");
+ break;
+ case INVALID_GZIP_HEADER:
+ printf("error: incorrect gzip header found when inflating data\n");
+ break;
+ case INCORRECT_GZIP_TRAILER:
+ printf("error: incorrect gzip trailer found when inflating data\n");
+ break;
+ case INVALID_ZLIB_HEADER:
+ printf("error: incorrect zlib header found when inflating data\n");
+ break;
+ case INCORRECT_ZLIB_TRAILER:
+ printf("error: incorrect zlib trailer found when inflating data\n");
+ break;
+ case UNSUPPORTED_METHOD:
+ printf("error: invalid compression method in wrapper header\n");
+ break;
+ case INVALID_FLUSH_ERROR:
+ printf("error: invalid flush did not cause compression to error\n");
+ break;
+ case RESULT_ERROR:
+ printf("error: decompressed data is not the same as the compressed data\n");
+ break;
+ case OVERFLOW_TEST_ERROR:
+ printf("error: overflow undetected\n");
+ break;
+ default:
+ printf("error: unknown error code\n");
+ }
+}
+
+void print_uint8_t(uint8_t * array, uint64_t length)
+{
+ const int line_size = 16;
+ int i;
+
+ printf("Length = %lu", length);
+ for (i = 0; i < length; i++) {
+ if ((i % line_size) == 0)
+ printf("\n0x%08x\t", i);
+ else
+ printf(" ");
+ printf("0x%02x,", array[i]);
+ }
+ printf("\n");
+}
+
+void log_print(char *format, ...)
+{
+ va_list args;
+ va_start(args, format);
+
+ if (options.verbose)
+ vfprintf(stdout, format, args);
+
+ va_end(args);
+}
+
+void log_uint8_t(uint8_t * array, uint64_t length)
+{
+ if (options.verbose)
+ print_uint8_t(array, length);
+}
+
+void log_error(int error_code)
+{
+ if (options.verbose)
+ print_error(error_code);
+}
+
+uint32_t check_gzip_trl(uint64_t gzip_trl, uint32_t inflate_crc, uint8_t * uncompress_buf,
+ uint32_t uncompress_len)
+{
+ uint64_t trl, ret = 0;
+ uint32_t crc;
+
+ crc = crc32_gzip_refl_ref(0, uncompress_buf, uncompress_len);
+ trl = ((uint64_t) uncompress_len << 32) | crc;
+
+ if (crc != inflate_crc || trl != gzip_trl)
+ ret = INCORRECT_GZIP_TRAILER;
+
+ return ret;
+}
+
+uint32_t check_zlib_trl(uint32_t zlib_trl, uint32_t inflate_adler, uint8_t * uncompress_buf,
+ uint32_t uncompress_len)
+{
+ uint32_t trl, ret = 0;
+ uint32_t adler;
+
+ adler = adler_ref(1, uncompress_buf, uncompress_len);
+
+ trl =
+ (adler >> 24) | ((adler >> 8) & 0xFF00) | (adler << 24) | ((adler & 0xFF00) << 8);
+
+ if (adler != inflate_adler || trl != zlib_trl) {
+ ret = INCORRECT_ZLIB_TRAILER;
+ }
+
+ return ret;
+}
+
+int inflate_stateless_pass(uint8_t * compress_buf, uint64_t compress_len,
+ uint8_t * uncompress_buf, uint32_t * uncompress_len,
+ uint32_t gzip_flag)
+{
+ struct inflate_state state;
+ int ret = 0, offset = 0;
+ struct isal_gzip_header gz_hdr;
+ struct isal_zlib_header z_hdr;
+
+ state.next_in = compress_buf;
+ state.avail_in = compress_len;
+ state.next_out = uncompress_buf;
+ state.avail_out = *uncompress_len;
+
+ if (gzip_flag == IGZIP_GZIP) {
+ if (rand() % 2 == 0) {
+ memset(&gz_hdr, 0, sizeof(gz_hdr));
+ isal_inflate_reset(&state);
+ state.tmp_in_size = 0;
+ gzip_flag = ISAL_GZIP_NO_HDR_VER;
+
+ isal_read_gzip_header(&state, &gz_hdr);
+ }
+ } else if (gzip_flag == IGZIP_ZLIB) {
+ if (rand() % 2 == 0) {
+ memset(&z_hdr, 0, sizeof(z_hdr));
+ isal_inflate_reset(&state);
+ gzip_flag = ISAL_ZLIB_NO_HDR_VER;
+ isal_read_zlib_header(&state, &z_hdr);
+ }
+ }
+
+ state.crc_flag = gzip_flag;
+
+ ret = isal_inflate_stateless(&state);
+
+ *uncompress_len = state.total_out;
+
+ if (gzip_flag) {
+ if (gzip_flag == IGZIP_GZIP || gzip_flag == IGZIP_GZIP_NO_HDR
+ || gzip_flag == ISAL_GZIP_NO_HDR_VER) {
+ if (gzip_flag == IGZIP_GZIP || gzip_flag == ISAL_GZIP_NO_HDR_VER)
+ offset = gzip_trl_bytes;
+
+ if (!ret)
+ ret =
+ check_gzip_trl(load_u64(state.next_in - offset),
+ state.crc, uncompress_buf, *uncompress_len);
+ else if (ret == ISAL_INCORRECT_CHECKSUM)
+ ret = INCORRECT_GZIP_TRAILER;
+ state.avail_in -= (gzip_trl_bytes - offset);
+ } else if (gzip_flag == IGZIP_ZLIB || gzip_flag == IGZIP_ZLIB_NO_HDR
+ || gzip_flag == ISAL_ZLIB_NO_HDR_VER) {
+ if (gzip_flag == IGZIP_ZLIB || gzip_flag == ISAL_ZLIB_NO_HDR_VER)
+ offset = zlib_trl_bytes;
+
+ if (!ret)
+ ret =
+ check_zlib_trl(load_u32(state.next_in - offset),
+ state.crc, uncompress_buf, *uncompress_len);
+ else if (ret == ISAL_INCORRECT_CHECKSUM)
+ ret = INCORRECT_ZLIB_TRAILER;
+ state.avail_in -= (zlib_trl_bytes - offset);
+
+ }
+
+ }
+
+ if (ret == 0 && state.avail_in != 0)
+ ret = INFLATE_LEFTOVER_INPUT;
+
+ return ret;
+}
+
+/* Check if that the state of the data stream is consistent */
+int inflate_state_valid_check(struct inflate_state *state, uint8_t * in_buf, uint32_t in_size,
+ uint8_t * out_buf, uint32_t out_size, uint32_t in_processed,
+ uint32_t out_processed, uint32_t data_size)
+{
+ uint32_t in_buffer_size, total_out, out_buffer_size;
+
+ in_buffer_size = (in_size == 0) ? 0 : state->next_in - in_buf + state->avail_in;
+
+ /* Check for a consistent amount of data processed */
+ if (in_buffer_size != in_size)
+ return INFLATE_INPUT_STREAM_INTEGRITY_ERROR;
+
+ total_out =
+ (out_size == 0) ? out_processed : out_processed + (state->next_out - out_buf);
+ out_buffer_size = (out_size == 0) ? 0 : state->next_out - out_buf + state->avail_out;
+
+ /* Check for a consistent amount of data compressed */
+ if (total_out != state->total_out || out_buffer_size != out_size)
+ return INFLATE_OUTPUT_STREAM_INTEGRITY_ERROR;
+
+ return 0;
+}
+
+/* Performs compression with checks to discover and verify the state of the
+ * stream
+ * state: inflate data structure which has been initialized to use
+ * in_buf and out_buf as the buffers
+ * compress_len: size of all input compressed data
+ * data_size: size of all available output buffers
+ * in_buf: next buffer of data to be inflated
+ * in_size: size of in_buf
+ * out_buf: next out put buffer where data is stored
+ * out_size: size of out_buf
+ * in_processed: the amount of input data which has been loaded into buffers
+ * to be inflated, this includes the data in in_buf
+ * out_processed: the amount of output data which has been decompressed and stored,
+ * this does not include the data in the current out_buf
+*/
+int isal_inflate_with_checks(struct inflate_state *state, uint32_t compress_len,
+ uint32_t data_size, uint8_t * in_buf, uint32_t in_size,
+ uint32_t in_processed, uint8_t * out_buf, uint32_t out_size,
+ uint32_t out_processed)
+{
+ int ret, stream_check = 0;
+
+ ret = isal_inflate(state);
+
+ /* Verify the stream is in a valid state when no errors occured */
+ if (ret >= 0) {
+ stream_check =
+ inflate_state_valid_check(state, in_buf, in_size, out_buf, out_size,
+ in_processed, out_processed, data_size);
+ }
+
+ if (stream_check != 0)
+ return stream_check;
+
+ return ret;
+
+}
+
+int inflate_multi_pass(uint8_t * compress_buf, uint64_t compress_len,
+ uint8_t * uncompress_buf, uint32_t * uncompress_len, uint32_t gzip_flag,
+ uint8_t * dict, uint32_t dict_len, uint32_t hist_bits)
+{
+ struct inflate_state *state = NULL;
+ int ret = 0;
+ uint8_t *comp_tmp = NULL, *uncomp_tmp = NULL;
+ uint32_t comp_tmp_size = 0, uncomp_tmp_size = 0;
+ uint32_t comp_processed = 0, uncomp_processed = 0;
+ int32_t read_in_old = 0;
+ uint32_t reset_test_flag = 0;
+
+ state = malloc(sizeof(struct inflate_state));
+ if (state == NULL) {
+ printf("Failed to allocate memory\n");
+ exit(0);
+ }
+
+ create_rand_repeat_data((uint8_t *) state, sizeof(state));
+ isal_inflate_init(state);
+
+ if (rand() % 4 == 0) {
+ /* Test reset */
+ reset_test_flag = 1;
+ create_rand_repeat_data((uint8_t *) state, sizeof(state));
+ }
+
+ if (gzip_flag == IGZIP_GZIP_NO_HDR) {
+ if (rand() % 2 == 0)
+ compress_len -= gzip_trl_bytes;
+ else
+ gzip_flag = ISAL_GZIP_NO_HDR_VER;
+ } else if (gzip_flag == IGZIP_ZLIB_NO_HDR) {
+ if (rand() % 2 == 0)
+ compress_len -= zlib_trl_bytes;
+ else
+ gzip_flag = ISAL_ZLIB_NO_HDR_VER;
+ }
+
+ state->next_in = NULL;
+ state->next_out = NULL;
+ state->avail_in = 0;
+ state->avail_out = 0;
+ state->crc_flag = gzip_flag;
+ state->hist_bits = hist_bits;
+
+ if (reset_test_flag)
+ isal_inflate_reset(state);
+
+ if (dict != NULL)
+ isal_inflate_set_dict(state, dict, dict_len);
+
+ while (1) {
+ if (state->avail_in == 0) {
+ comp_tmp_size = rand() % (compress_len + 1);
+
+ if (comp_tmp_size >= compress_len - comp_processed)
+ comp_tmp_size = compress_len - comp_processed;
+
+ if (comp_tmp_size != 0) {
+ if (comp_tmp != NULL) {
+ free(comp_tmp);
+ comp_tmp = NULL;
+ }
+
+ comp_tmp = malloc(comp_tmp_size);
+
+ if (comp_tmp == NULL) {
+ printf("Failed to allocate memory\n");
+ return MALLOC_FAILED;
+ }
+
+ memcpy(comp_tmp, compress_buf + comp_processed, comp_tmp_size);
+ comp_processed += comp_tmp_size;
+
+ state->next_in = comp_tmp;
+ state->avail_in = comp_tmp_size;
+ }
+ }
+
+ if (state->avail_out == 0) {
+ /* Save uncompressed data into uncompress_buf */
+ if (uncomp_tmp != NULL) {
+ memcpy(uncompress_buf + uncomp_processed, uncomp_tmp,
+ uncomp_tmp_size);
+ uncomp_processed += uncomp_tmp_size;
+ }
+
+ uncomp_tmp_size = rand() % (*uncompress_len + 1);
+
+ /* Limit size of buffer to be smaller than maximum */
+ if (uncomp_tmp_size > *uncompress_len - uncomp_processed)
+ uncomp_tmp_size = *uncompress_len - uncomp_processed;
+
+ if (uncomp_tmp_size != 0) {
+
+ if (uncomp_tmp != NULL) {
+ fflush(0);
+ free(uncomp_tmp);
+ uncomp_tmp = NULL;
+ }
+
+ uncomp_tmp = malloc(uncomp_tmp_size);
+ if (uncomp_tmp == NULL) {
+ printf("Failed to allocate memory\n");
+ return MALLOC_FAILED;
+ }
+
+ state->avail_out = uncomp_tmp_size;
+ state->next_out = uncomp_tmp;
+ }
+ }
+
+ log_print("Pre inflate\n");
+ log_print
+ ("compressed_size = 0x%05lx, in_processed = 0x%05x, in_size = 0x%05x, avail_in = 0x%05x\n",
+ compress_len, comp_processed, comp_tmp_size, state->avail_in);
+ log_print
+ ("data_size = 0x%05x, out_processed = 0x%05x, out_size = 0x%05x, avail_out = 0x%05x, total_out = 0x%05x\n",
+ *uncompress_len, uncomp_processed, uncomp_tmp_size, state->avail_out,
+ state->total_out);
+
+ ret = isal_inflate_with_checks(state, compress_len, *uncompress_len, comp_tmp,
+ comp_tmp_size, comp_processed, uncomp_tmp,
+ uncomp_tmp_size, uncomp_processed);
+
+ log_print("Post inflate\n");
+ log_print
+ ("compressed_size = 0x%05lx, in_processed = 0x%05x, in_size = 0x%05x, avail_in = 0x%05x\n",
+ compress_len, comp_processed, comp_tmp_size, state->avail_in);
+ log_print
+ ("data_size = 0x%05x, out_processed = 0x%05x, out_size = 0x%05x, avail_out = 0x%05x, total_out = 0x%05x\n",
+ *uncompress_len, uncomp_processed, uncomp_tmp_size, state->avail_out,
+ state->total_out);
+
+ if (state->block_state == ISAL_BLOCK_FINISH || ret != 0) {
+ memcpy(uncompress_buf + uncomp_processed, uncomp_tmp, uncomp_tmp_size);
+ *uncompress_len = state->total_out;
+ break;
+ }
+
+ if (*uncompress_len - uncomp_processed == 0 && state->avail_out == 0
+ && state->tmp_out_valid - state->tmp_out_processed > 0) {
+ ret = ISAL_OUT_OVERFLOW;
+ break;
+ }
+
+ if (compress_len - comp_processed == 0 && state->avail_in == 0
+ && (state->block_state != ISAL_BLOCK_INPUT_DONE)
+ && state->tmp_out_valid - state->tmp_out_processed == 0) {
+ if (state->read_in_length == read_in_old) {
+ ret = ISAL_END_INPUT;
+ break;
+ }
+ read_in_old = state->read_in_length;
+ }
+ }
+
+ if (gzip_flag) {
+ if (!ret) {
+ if (gzip_flag == IGZIP_GZIP || gzip_flag == IGZIP_GZIP_NO_HDR
+ || gzip_flag == ISAL_GZIP_NO_HDR_VER) {
+ if (gzip_flag == ISAL_GZIP_NO_HDR_VER
+ || gzip_flag == IGZIP_GZIP)
+ compress_len -= gzip_trl_bytes;
+ ret =
+ check_gzip_trl(load_u64(compress_buf + compress_len),
+ state->crc, uncompress_buf,
+ *uncompress_len);
+ } else if (gzip_flag == IGZIP_ZLIB_NO_HDR) {
+ if (gzip_flag == IGZIP_ZLIB
+ || gzip_flag == ISAL_ZLIB_NO_HDR_VER)
+ compress_len -= zlib_trl_bytes;
+ ret =
+ check_zlib_trl(load_u32(compress_buf + compress_len),
+ state->crc, uncompress_buf,
+ *uncompress_len);
+ }
+ }
+ }
+ if (ret == 0 && state->avail_in != 0)
+ ret = INFLATE_LEFTOVER_INPUT;
+
+ if (comp_tmp != NULL) {
+ free(comp_tmp);
+ comp_tmp = NULL;
+ }
+
+ if (uncomp_tmp != NULL) {
+ free(uncomp_tmp);
+ uncomp_tmp = NULL;
+ }
+
+ free(state);
+ return ret;
+}
+
+int inflate_ret_to_code(int ret)
+{
+ switch (ret) {
+ case ISAL_DECOMP_OK:
+ return 0;
+ case ISAL_END_INPUT:
+ return INFLATE_END_OF_INPUT;
+ case ISAL_OUT_OVERFLOW:
+ return INFLATE_OUT_BUFFER_OVERFLOW;
+ case ISAL_INVALID_BLOCK:
+ return INFLATE_INVALID_BLOCK_HEADER;
+ case ISAL_INVALID_SYMBOL:
+ return INFLATE_INVALID_SYMBOL;
+ case ISAL_INVALID_LOOKBACK:
+ return INFLATE_INVALID_LOOK_BACK_DISTANCE;
+ default:
+ return INFLATE_GENERAL_ERROR;
+ }
+}
+
+/* Inflate the compressed data and check that the decompressed data agrees with the input data */
+int inflate_check(uint8_t * z_buf, uint32_t z_size, uint8_t * in_buf, uint32_t in_size,
+ uint32_t gzip_flag, uint8_t * dict, uint32_t dict_len, uint32_t hist_bits)
+{
+ /* Test inflate with reference inflate */
+
+ int ret = 0;
+ uint32_t test_size = in_size;
+ uint8_t *test_buf = NULL;
+ int mem_result = 0;
+ int gzip_hdr_result = 0, gzip_trl_result = 0;
+
+ if (in_size > 0) {
+ assert(in_buf != NULL);
+ test_buf = malloc(test_size);
+ if (test_buf == NULL)
+ return MALLOC_FAILED;
+ }
+
+ if (test_buf != NULL)
+ memset(test_buf, 0xff, test_size);
+
+ if (inflate_type == 0 && dict == NULL) {
+ ret = inflate_stateless_pass(z_buf, z_size, test_buf, &test_size, gzip_flag);
+ inflate_type = 1;
+ } else {
+ ret =
+ inflate_multi_pass(z_buf, z_size, test_buf, &test_size, gzip_flag, dict,
+ dict_len, hist_bits);
+ inflate_type = 0;
+ }
+
+ if (test_buf != NULL)
+ mem_result = memcmp(in_buf, test_buf, in_size);
+
+ if (options.verbose && mem_result) {
+ int i;
+ for (i = 0; i < in_size; i++) {
+ if (in_buf[i] != test_buf[i]) {
+ log_print
+ ("First incorrect data at 0x%x of 0x%x, 0x%x != 0x%x\n", i,
+ in_size, in_buf[i], test_buf[i]);
+ break;
+ }
+ }
+ }
+
+ if (test_buf != NULL)
+ free(test_buf);
+ switch (ret) {
+ case 0:
+ break;
+ case ISAL_END_INPUT:
+ return INFLATE_END_OF_INPUT;
+ break;
+ case ISAL_INVALID_BLOCK:
+ return INFLATE_INVALID_BLOCK_HEADER;
+ break;
+ case ISAL_INVALID_SYMBOL:
+ return INFLATE_INVALID_SYMBOL;
+ break;
+ case ISAL_OUT_OVERFLOW:
+ return INFLATE_OUT_BUFFER_OVERFLOW;
+ break;
+ case ISAL_INVALID_LOOKBACK:
+ return INFLATE_INVALID_LOOK_BACK_DISTANCE;
+ break;
+ case INFLATE_LEFTOVER_INPUT:
+ return INFLATE_LEFTOVER_INPUT;
+ break;
+ case INCORRECT_GZIP_TRAILER:
+ gzip_trl_result = INCORRECT_GZIP_TRAILER;
+ break;
+ case INCORRECT_ZLIB_TRAILER:
+ gzip_trl_result = INCORRECT_ZLIB_TRAILER;
+ break;
+ case ISAL_INCORRECT_CHECKSUM:
+ if (gzip_flag == IGZIP_GZIP || gzip_flag == IGZIP_GZIP_NO_HDR
+ || gzip_flag == ISAL_GZIP_NO_HDR_VER)
+ gzip_trl_result = INCORRECT_GZIP_TRAILER;
+ else if (gzip_flag == IGZIP_ZLIB || gzip_flag == IGZIP_ZLIB_NO_HDR
+ || gzip_flag == ISAL_ZLIB_NO_HDR_VER)
+ gzip_trl_result = INCORRECT_GZIP_TRAILER;
+ break;
+ case ISAL_UNSUPPORTED_METHOD:
+ return UNSUPPORTED_METHOD;
+ case INFLATE_INPUT_STREAM_INTEGRITY_ERROR:
+ return INFLATE_INPUT_STREAM_INTEGRITY_ERROR;
+ break;
+ case INFLATE_OUTPUT_STREAM_INTEGRITY_ERROR:
+ return INFLATE_OUTPUT_STREAM_INTEGRITY_ERROR;
+ break;
+ default:
+ return INFLATE_GENERAL_ERROR;
+ break;
+ }
+
+ if (test_size != in_size)
+ return INFLATE_INCORRECT_OUTPUT_SIZE;
+
+ if (mem_result)
+ return RESULT_ERROR;
+
+ if (gzip_hdr_result == INVALID_GZIP_HEADER)
+ return INVALID_GZIP_HEADER;
+
+ else if (gzip_hdr_result == INVALID_ZLIB_HEADER)
+ return INVALID_ZLIB_HEADER;
+
+ if (gzip_trl_result == INCORRECT_GZIP_TRAILER)
+ return INCORRECT_GZIP_TRAILER;
+
+ else if (gzip_trl_result == INCORRECT_ZLIB_TRAILER)
+ return INCORRECT_ZLIB_TRAILER;
+
+ return 0;
+}
+
+/* Check if that the state of the data stream is consistent */
+int stream_valid_check(struct isal_zstream *stream, uint8_t * in_buf, uint32_t in_size,
+ uint8_t * out_buf, uint32_t out_size, uint32_t in_processed,
+ uint32_t out_processed, uint32_t data_size)
+{
+ uint32_t total_in, in_buffer_size, total_out, out_buffer_size;
+
+ total_in =
+ (in_size ==
+ 0) ? in_processed : (in_processed - in_size) + (stream->next_in - in_buf);
+ in_buffer_size = (in_size == 0) ? 0 : stream->next_in - in_buf + stream->avail_in;
+
+ /* Check for a consistent amount of data processed */
+ if (total_in != stream->total_in || in_buffer_size != in_size)
+ return COMPRESS_INPUT_STREAM_INTEGRITY_ERROR;
+
+ total_out =
+ (out_size == 0) ? out_processed : out_processed + (stream->next_out - out_buf);
+ out_buffer_size = (out_size == 0) ? 0 : stream->next_out - out_buf + stream->avail_out;
+
+ /* Check for a consistent amount of data compressed */
+ if (total_out != stream->total_out || out_buffer_size != out_size) {
+ return COMPRESS_OUTPUT_STREAM_INTEGRITY_ERROR;
+ }
+
+ return 0;
+}
+
+/* Performs compression with checks to discover and verify the state of the
+ * stream
+ * stream: compress data structure which has been initialized to use
+ * in_buf and out_buf as the buffers
+ * data_size: size of all input data
+ * compressed_size: size of all available output buffers
+ * in_buf: next buffer of data to be compressed
+ * in_size: size of in_buf
+ * out_buf: next out put buffer where data is stored
+ * out_size: size of out_buf
+ * in_processed: the amount of input data which has been loaded into buffers
+ * to be compressed, this includes the data in in_buf
+ * out_processed: the amount of output data which has been compressed and stored,
+ * this does not include the data in the current out_buf
+*/
+int isal_deflate_with_checks(struct isal_zstream *stream, uint32_t data_size,
+ uint32_t compressed_size, uint8_t * in_buf, uint32_t in_size,
+ uint32_t in_processed, uint8_t * out_buf, uint32_t out_size,
+ uint32_t out_processed)
+{
+ int ret, stream_check;
+ struct isal_zstate *state = &stream->internal_state;
+
+ log_print("Pre compression\n");
+ log_print
+ ("data_size = 0x%05x, in_processed = 0x%05x, in_size = 0x%05x, avail_in = 0x%05x, total_in = 0x%05x\n",
+ data_size, in_processed, in_size, stream->avail_in, stream->total_in);
+ log_print
+ ("compressed_size = 0x%05x, out_processed = 0x%05x, out_size = 0x%05x, avail_out = 0x%05x, total_out = 0x%05x\n",
+ compressed_size, out_processed, out_size, stream->avail_out, stream->total_out);
+
+ ret = isal_deflate(stream);
+
+ log_print("Post compression\n");
+ log_print
+ ("data_size = 0x%05x, in_processed = 0x%05x, in_size = 0x%05x, avail_in = 0x%05x, total_in = 0x%05x\n",
+ data_size, in_processed, in_size, stream->avail_in, stream->total_in);
+ log_print
+ ("compressed_size = 0x%05x, out_processed = 0x%05x, out_size = 0x%05x, avail_out = 0x%05x, total_out = 0x%05x\n",
+ compressed_size, out_processed, out_size, stream->avail_out, stream->total_out);
+ log_print("\n\n");
+
+ /* Verify the stream is in a valid state */
+ stream_check = stream_valid_check(stream, in_buf, in_size, out_buf, out_size,
+ in_processed, out_processed, data_size);
+
+ if (stream_check != 0)
+ return stream_check;
+
+ if (ret != IGZIP_COMP_OK)
+ return COMPRESS_GENERAL_ERROR;
+
+ /* Check if the compression is completed */
+ if (state->state != ZSTATE_END)
+ if (compressed_size - out_processed - (out_size - stream->avail_out) <= 0)
+ return COMPRESS_OUT_BUFFER_OVERFLOW;
+
+ return ret;
+
+}
+
+void set_random_hufftable(struct isal_zstream *stream, int level, uint8_t * data,
+ uint32_t data_size)
+{
+ struct isal_hufftables *huff = hufftables;
+ struct isal_huff_histogram hist;
+ if (level == 0 || rand() % 16 == 0) {
+ if (rand() % 8 == 0) {
+ huff = hufftables_subset;
+ memset(&hist, 0, sizeof(hist));
+ isal_update_histogram(data, data_size, &hist);
+ isal_create_hufftables_subset(huff, &hist);
+ }
+
+ isal_deflate_set_hufftables(stream, huff, rand() % 4);
+ }
+}
+
+/* Compress the input data into the output buffer where the input buffer and
+ * output buffer are randomly segmented to test state information for the
+ * compression*/
+int compress_multi_pass(uint8_t * data, uint32_t data_size, uint8_t * compressed_buf,
+ uint32_t * compressed_size, uint32_t flush_type, uint32_t gzip_flag,
+ uint32_t level, uint8_t * dict, uint32_t dict_len, uint32_t hist_bits)
+{
+ int ret = IGZIP_COMP_OK;
+ uint8_t *in_buf = NULL, *out_buf = NULL;
+ uint32_t in_size = 0, out_size = 0;
+ uint32_t in_processed = 0, out_processed = 0;
+ struct isal_zstream *stream;
+ struct isal_zstate *state;
+ uint32_t loop_count = 0;
+ uint32_t level_buf_size;
+ uint8_t *level_buf = NULL;
+ struct isal_hufftables *huff_tmp;
+ uint32_t reset_test_flag = 0;
+ uint8_t tmp_symbol;
+ int no_mod = 0;
+ struct isal_dict dict_str;
+
+ log_print("Starting Compress Multi Pass\n");
+
+ stream = malloc(sizeof(*stream));
+ if (stream == NULL)
+ return MALLOC_FAILED;
+ state = &stream->internal_state;
+
+ create_rand_repeat_data((uint8_t *) stream, sizeof(*stream));
+
+ isal_deflate_init(stream);
+
+ if (state->state != ZSTATE_NEW_HDR)
+ return COMPRESS_INCORRECT_STATE;
+
+ if (rand() % 4 == 0) {
+ /* Test reset */
+ reset_test_flag = 1;
+ huff_tmp = stream->hufftables;
+ create_rand_repeat_data((uint8_t *) stream, sizeof(*stream));
+
+ /* Restore variables not necessarily set by user */
+ stream->hufftables = huff_tmp;
+ stream->end_of_stream = 0;
+ stream->level = 0;
+ stream->level_buf = NULL;
+ stream->level_buf_size = 0;
+ }
+
+ stream->flush = flush_type;
+ stream->end_of_stream = 0;
+
+ /* These are set here to allow the loop to run correctly */
+ stream->avail_in = 0;
+ stream->avail_out = 0;
+ stream->gzip_flag = gzip_flag;
+ stream->level = level;
+ stream->hist_bits = hist_bits;
+
+ if (level >= 1) {
+ level_buf_size = get_rand_level_buf_size(stream->level);
+ level_buf = malloc(level_buf_size);
+ create_rand_repeat_data(level_buf, level_buf_size);
+ stream->level_buf = level_buf;
+ stream->level_buf_size = level_buf_size;
+ }
+
+ if (reset_test_flag)
+ isal_deflate_reset(stream);
+
+ if (dict != NULL) {
+ if (rand() % 2 == 0)
+ isal_deflate_set_dict(stream, dict, dict_len);
+ else {
+ isal_deflate_process_dict(stream, &dict_str, dict, dict_len);
+ isal_deflate_reset_dict(stream, &dict_str);
+ }
+ }
+
+ while (1) {
+ loop_count++;
+
+ /* Setup in buffer for next round of compression */
+ if (stream->avail_in == 0) {
+ if (flush_type == NO_FLUSH || state->state == ZSTATE_NEW_HDR) {
+ /* Randomly choose size of the next out buffer */
+ in_size = rand() % (data_size + 1);
+
+ /* Limit size of buffer to be smaller than maximum */
+ if (in_size >= data_size - in_processed) {
+ in_size = data_size - in_processed;
+ stream->end_of_stream = 1;
+ }
+
+ if (in_size != 0) {
+ if (in_buf != NULL) {
+ free(in_buf);
+ in_buf = NULL;
+ }
+
+ in_buf = malloc(in_size);
+ if (in_buf == NULL) {
+ ret = MALLOC_FAILED;
+ break;
+ }
+ memcpy(in_buf, data + in_processed, in_size);
+ in_processed += in_size;
+
+ stream->avail_in = in_size;
+ stream->next_in = in_buf;
+ }
+ }
+ } else {
+ /* Randomly modify data after next in */
+ if (rand() % 4 == 0 && !no_mod) {
+
+ tmp_symbol = rand();
+ log_print
+ ("Modifying data at index 0x%x from 0x%x to 0x%x before recalling isal_deflate\n",
+ in_processed - stream->avail_in,
+ data[in_processed - stream->avail_in], tmp_symbol);
+ *stream->next_in = tmp_symbol;
+ data[in_processed - stream->avail_in] = tmp_symbol;
+ }
+ }
+
+ /* Setup out buffer for next round of compression */
+ if (stream->avail_out == 0) {
+ /* Save compressed data inot compressed_buf */
+ if (out_buf != NULL) {
+ memcpy(compressed_buf + out_processed, out_buf,
+ out_size - stream->avail_out);
+ out_processed += out_size - stream->avail_out;
+ }
+
+ /* Randomly choose size of the next out buffer */
+ out_size = rand() % (*compressed_size + 1);
+
+ /* Limit size of buffer to be smaller than maximum */
+ if (out_size > *compressed_size - out_processed)
+ out_size = *compressed_size - out_processed;
+
+ if (out_size != 0) {
+ if (out_buf != NULL) {
+ free(out_buf);
+ out_buf = NULL;
+ }
+
+ out_buf = malloc(out_size);
+ if (out_buf == NULL) {
+ ret = MALLOC_FAILED;
+ break;
+ }
+
+ stream->avail_out = out_size;
+ stream->next_out = out_buf;
+ }
+ }
+
+ if (state->state == ZSTATE_NEW_HDR) {
+ set_random_hufftable(stream, level, data, data_size);
+ if (stream->hufftables == hufftables_subset)
+ no_mod = 1;
+ else
+ no_mod = 0;
+ }
+
+ ret =
+ isal_deflate_with_checks(stream, data_size, *compressed_size, in_buf,
+ in_size, in_processed, out_buf, out_size,
+ out_processed);
+
+ if (ret) {
+ if (ret == COMPRESS_OUT_BUFFER_OVERFLOW
+ || ret == COMPRESS_INCORRECT_STATE)
+ memcpy(compressed_buf + out_processed, out_buf, out_size);
+ break;
+ }
+
+ /* Check if the compression is completed */
+ if (state->state == ZSTATE_END) {
+ memcpy(compressed_buf + out_processed, out_buf, out_size);
+ *compressed_size = stream->total_out;
+ break;
+ }
+
+ }
+
+ if (stream != NULL)
+ free(stream);
+ if (level_buf != NULL)
+ free(level_buf);
+ if (in_buf != NULL)
+ free(in_buf);
+ if (out_buf != NULL)
+ free(out_buf);
+
+ if (ret == COMPRESS_OUT_BUFFER_OVERFLOW && flush_type == SYNC_FLUSH
+ && loop_count >= MAX_LOOPS)
+ ret = COMPRESS_LOOP_COUNT_OVERFLOW;
+
+ return ret;
+
+}
+
+/* Compress the input data into the outbuffer in one call to isal_deflate */
+int compress_single_pass(uint8_t * data, uint32_t data_size, uint8_t * compressed_buf,
+ uint32_t * compressed_size, uint32_t flush_type, uint32_t gzip_flag,
+ uint32_t level, uint8_t * dict, uint32_t dict_len, uint32_t hist_bits)
+{
+ int ret = IGZIP_COMP_OK;
+ struct isal_zstream stream;
+ struct isal_zstate *state = &stream.internal_state;
+ uint32_t level_buf_size;
+ uint8_t *level_buf = NULL;
+ struct isal_hufftables *huff_tmp;
+ uint32_t reset_test_flag = 0;
+ struct isal_dict dict_str;
+
+ log_print("Starting Compress Single Pass\n");
+
+ create_rand_repeat_data((uint8_t *) & stream, sizeof(stream));
+
+ isal_deflate_init(&stream);
+
+ set_random_hufftable(&stream, level, data, data_size);
+
+ if (state->state != ZSTATE_NEW_HDR)
+ return COMPRESS_INCORRECT_STATE;
+
+ if (rand() % 4 == 0) {
+ /* Test reset */
+ reset_test_flag = 1;
+ huff_tmp = stream.hufftables;
+ create_rand_repeat_data((uint8_t *) & stream, sizeof(stream));
+
+ /* Restore variables not necessarily set by user */
+ stream.hufftables = huff_tmp;
+ stream.end_of_stream = 0;
+ stream.level = 0;
+ stream.level_buf = NULL;
+ stream.level_buf_size = 0;
+ }
+
+ stream.flush = flush_type;
+ stream.avail_in = data_size;
+ stream.next_in = data;
+ stream.avail_out = *compressed_size;
+ stream.next_out = compressed_buf;
+ stream.end_of_stream = 1;
+ stream.gzip_flag = gzip_flag;
+ stream.level = level;
+ stream.hist_bits = hist_bits;
+
+ if (level >= 1) {
+ level_buf_size = get_rand_level_buf_size(stream.level);
+ level_buf = malloc(level_buf_size);
+ create_rand_repeat_data(level_buf, level_buf_size);
+ stream.level_buf = level_buf;
+ stream.level_buf_size = level_buf_size;
+ }
+
+ if (reset_test_flag)
+ isal_deflate_reset(&stream);
+
+ if (dict != NULL) {
+ if (rand() % 2 == 0)
+ isal_deflate_set_dict(&stream, dict, dict_len);
+ else {
+ isal_deflate_process_dict(&stream, &dict_str, dict, dict_len);
+ isal_deflate_reset_dict(&stream, &dict_str);
+ }
+ }
+
+ ret =
+ isal_deflate_with_checks(&stream, data_size, *compressed_size, data, data_size,
+ data_size, compressed_buf, *compressed_size, 0);
+
+ if (level_buf != NULL)
+ free(level_buf);
+
+ /* Check if the compression is completed */
+ if (state->state == ZSTATE_END)
+ *compressed_size = stream.total_out;
+ else if (flush_type == SYNC_FLUSH && stream.avail_out < 16)
+ ret = COMPRESS_OUT_BUFFER_OVERFLOW;
+
+ return ret;
+
+}
+
+/* Compress the input data repeatedly into the outbuffer
+ * Compresses and verifies in place to decrease memory usage
+ */
+int compress_ver_rep_buf(uint8_t * data, uint32_t data_size, uint64_t data_rep_size,
+ uint8_t * compressed_buf, uint32_t compressed_size,
+ uint8_t * decomp_buf, uint32_t decomp_buf_size, uint32_t flush_type,
+ uint32_t gzip_flag, uint32_t level)
+{
+ int ret = IGZIP_COMP_OK;
+ struct isal_zstream stream;
+ struct inflate_state state;
+ uint32_t level_buf_size;
+ uint8_t *level_buf = NULL;
+ uint64_t data_remaining = data_rep_size;
+ uint64_t data_verified = 0;
+ uint32_t index;
+ uint32_t out_size, cmp_size;
+ uint32_t avail_out_start;
+
+ log_print("Starting Compress and Verify Repeated Buffer\n");
+
+ create_rand_repeat_data((uint8_t *) & stream, sizeof(stream));
+
+ /* Setup compression stream */
+ isal_deflate_init(&stream);
+ stream.avail_in = 0;
+ stream.next_in = NULL;
+ stream.avail_out = 0;
+ stream.next_out = NULL;
+
+ set_random_hufftable(&stream, level, data, data_size);
+ stream.flush = flush_type;
+ stream.end_of_stream = 0;
+ stream.gzip_flag = gzip_flag;
+ stream.level = level;
+
+ if (level >= 1) {
+ level_buf_size = get_rand_level_buf_size(stream.level);
+ level_buf = malloc(level_buf_size);
+ create_rand_repeat_data(level_buf, level_buf_size);
+ stream.level_buf = level_buf;
+ stream.level_buf_size = level_buf_size;
+ }
+
+ /* Setup decompression stream */
+ create_rand_repeat_data((uint8_t *) & state, sizeof(state));
+ isal_inflate_init(&state);
+ state.crc_flag = gzip_flag;
+
+ while (data_remaining || stream.avail_in) {
+ /* Compress the input buffer */
+ if (stream.next_out == NULL) {
+ stream.avail_out = compressed_size;
+ stream.next_out = compressed_buf;
+ }
+
+ while (stream.avail_out > 0 && (data_remaining || stream.avail_in)) {
+ if (stream.avail_in == 0) {
+ stream.avail_in = data_size;
+ if (data_size >= data_remaining) {
+ stream.avail_in = data_remaining;
+ stream.end_of_stream = 1;
+ }
+
+ stream.next_in = data;
+ data_remaining -= stream.avail_in;
+ }
+
+ ret = isal_deflate(&stream);
+
+ if (ret)
+ return COMPRESS_GENERAL_ERROR;
+ }
+
+ /* Verfiy the compressed buffer */
+ state.next_in = compressed_buf;
+ state.avail_in = compressed_size;
+ state.next_out = NULL;
+ state.avail_out = 0;
+ create_rand_repeat_data(decomp_buf, decomp_buf_size);
+
+ while (state.avail_out == 0) {
+ state.next_out = decomp_buf;
+ state.avail_out = decomp_buf_size;
+
+ /* Force decoding to stop when avail_out rolls over */
+ if ((1ULL << 32) - state.total_out < decomp_buf_size)
+ state.avail_out = (1ULL << 32) - state.total_out;
+
+ avail_out_start = state.avail_out;
+
+ ret = isal_inflate(&state);
+ if (ret)
+ return inflate_ret_to_code(ret);
+
+ /* Check data accuracy */
+ index = data_verified % data_size;
+ out_size = avail_out_start - state.avail_out;
+ cmp_size =
+ (out_size > data_size - index) ? data_size - index : out_size;
+ ret |= memcmp(decomp_buf, data + index, cmp_size);
+ out_size -= cmp_size;
+ cmp_size = (out_size > index) ? index : out_size;
+ ret |= memcmp(decomp_buf + data_size - index, data, cmp_size);
+ out_size -= cmp_size;
+ cmp_size = out_size;
+ ret |= memcmp(decomp_buf, decomp_buf + data_size, out_size);
+ if (ret)
+ return RESULT_ERROR;
+
+ data_verified += avail_out_start - state.avail_out;
+ }
+ stream.next_out = NULL;
+ }
+
+ if (level_buf != NULL)
+ free(level_buf);
+
+ return ret;
+
+}
+
+/* Statelessly compress the input buffer into the output buffer */
+int compress_stateless(uint8_t * data, uint32_t data_size, uint8_t * compressed_buf,
+ uint32_t * compressed_size, uint32_t flush_type, uint32_t gzip_flag,
+ uint32_t level, uint32_t hist_bits)
+{
+ int ret = IGZIP_COMP_OK;
+ struct isal_zstream stream;
+ uint32_t level_buf_size;
+ uint8_t *level_buf = NULL;
+ struct isal_hufftables *huff_tmp;
+ uint32_t reset_test_flag = 0;
+
+ create_rand_repeat_data((uint8_t *) & stream, sizeof(stream));
+
+ isal_deflate_stateless_init(&stream);
+
+ set_random_hufftable(&stream, level, data, data_size);
+
+ if (rand() % 4 == 0) {
+ /* Test reset */
+ reset_test_flag = 1;
+ huff_tmp = stream.hufftables;
+ create_rand_repeat_data((uint8_t *) & stream, sizeof(stream));
+
+ /* Restore variables not necessarily set by user */
+ stream.hufftables = huff_tmp;
+ stream.end_of_stream = 0;
+ stream.level = 0;
+ stream.level_buf = NULL;
+ stream.level_buf_size = 0;
+ }
+
+ stream.avail_in = data_size;
+ stream.next_in = data;
+ stream.flush = flush_type;
+ if (flush_type != NO_FLUSH)
+ stream.end_of_stream = 1;
+ stream.avail_out = *compressed_size;
+ stream.next_out = compressed_buf;
+ stream.gzip_flag = gzip_flag;
+ stream.level = level;
+ stream.hist_bits = hist_bits;
+
+ if (level == 1) {
+ /* This is to test case where level buf uses already existing
+ * internal buffers */
+ level_buf_size = rand() % IBUF_SIZE;
+
+ if (level_buf_size >= ISAL_DEF_LVL1_MIN) {
+ level_buf = malloc(level_buf_size);
+ create_rand_repeat_data(level_buf, level_buf_size);
+ stream.level_buf = level_buf;
+ stream.level_buf_size = level_buf_size;
+ }
+ } else if (level > 1) {
+ level_buf_size = get_rand_level_buf_size(level);
+ level_buf = malloc(level_buf_size);
+ create_rand_repeat_data(level_buf, level_buf_size);
+ stream.level_buf = level_buf;
+ stream.level_buf_size = level_buf_size;
+ }
+
+ if (reset_test_flag)
+ isal_deflate_reset(&stream);
+
+ ret = isal_deflate_stateless(&stream);
+
+ if (level_buf != NULL)
+ free(level_buf);
+
+ /* verify the stream */
+ if (stream.next_in - data != stream.total_in ||
+ stream.total_in + stream.avail_in != data_size)
+ return COMPRESS_INPUT_STREAM_INTEGRITY_ERROR;
+
+ if (stream.next_out - compressed_buf != stream.total_out ||
+ stream.total_out + stream.avail_out != *compressed_size) {
+ return COMPRESS_OUTPUT_STREAM_INTEGRITY_ERROR;
+ }
+
+ if (ret != IGZIP_COMP_OK) {
+ if (ret == STATELESS_OVERFLOW)
+ return COMPRESS_OUT_BUFFER_OVERFLOW;
+ else if (ret == INVALID_FLUSH)
+ return INVALID_FLUSH_ERROR;
+ else {
+ printf("Return due to ret = %d with level = %d or %d\n", ret, level,
+ stream.level);
+ return COMPRESS_GENERAL_ERROR;
+ }
+ }
+
+ if (!stream.end_of_stream) {
+ return COMPRESS_END_OF_STREAM_NOT_SET;
+ }
+
+ if (stream.avail_in != 0)
+ return COMPRESS_ALL_INPUT_FAIL;
+
+ *compressed_size = stream.total_out;
+
+ return ret;
+
+}
+
+/* Statelessly compress the input buffer into the output buffer */
+int compress_stateless_full_flush(uint8_t * data, uint32_t data_size, uint8_t * compressed_buf,
+ uint32_t * compressed_size, uint32_t level,
+ uint32_t hist_bits)
+{
+ int ret = IGZIP_COMP_OK;
+ uint8_t *in_buf = NULL, *level_buf = NULL, *out_buf = compressed_buf;
+ uint32_t in_size = 0, level_buf_size;
+ uint32_t in_processed = 00;
+ struct isal_zstream stream;
+ uint32_t loop_count = 0;
+ struct isal_hufftables *huff_tmp;
+ uint32_t reset_test_flag = 0;
+
+ log_print("Starting Stateless Compress Full Flush\n");
+
+ create_rand_repeat_data((uint8_t *) & stream, sizeof(stream));
+
+ isal_deflate_stateless_init(&stream);
+
+ if (rand() % 4 == 0) {
+ /* Test reset */
+ reset_test_flag = 1;
+ huff_tmp = stream.hufftables;
+ create_rand_repeat_data((uint8_t *) & stream, sizeof(stream));
+
+ /* Restore variables not necessarily set by user */
+ stream.hufftables = huff_tmp;
+ stream.end_of_stream = 0;
+ stream.level = 0;
+ stream.level_buf = NULL;
+ stream.level_buf_size = 0;
+ stream.gzip_flag = 0;
+ }
+
+ stream.flush = FULL_FLUSH;
+ stream.end_of_stream = 0;
+ stream.avail_out = *compressed_size;
+ stream.next_out = compressed_buf;
+ stream.level = level;
+ stream.hist_bits = hist_bits;
+
+ if (level == 1) {
+ /* This is to test case where level_buf uses already existing
+ * internal buffers */
+ level_buf_size = rand() % IBUF_SIZE;
+
+ if (level_buf_size >= ISAL_DEF_LVL1_MIN) {
+ level_buf = malloc(level_buf_size);
+ create_rand_repeat_data(level_buf, level_buf_size);
+ stream.level_buf = level_buf;
+ stream.level_buf_size = level_buf_size;
+ }
+ } else if (level > 1) {
+ level_buf_size = get_rand_level_buf_size(level);
+ level_buf = malloc(level_buf_size);
+ create_rand_repeat_data(level_buf, level_buf_size);
+ stream.level_buf = level_buf;
+ stream.level_buf_size = level_buf_size;
+ }
+
+ if (reset_test_flag)
+ isal_deflate_reset(&stream);
+
+ while (1) {
+ loop_count++;
+
+ /* Randomly choose size of the next out buffer */
+ in_size = rand() % (data_size + 1);
+
+ /* Limit size of buffer to be smaller than maximum */
+ if (in_size >= data_size - in_processed) {
+ in_size = data_size - in_processed;
+ stream.end_of_stream = 1;
+ }
+
+ stream.avail_in = in_size;
+
+ if (in_size != 0) {
+ if (in_buf != NULL) {
+ free(in_buf);
+ in_buf = NULL;
+ }
+
+ in_buf = malloc(in_size);
+ if (in_buf == NULL) {
+ ret = MALLOC_FAILED;
+ break;
+ }
+ memcpy(in_buf, data + in_processed, in_size);
+ in_processed += in_size;
+
+ stream.next_in = in_buf;
+ }
+
+ out_buf = stream.next_out;
+
+ if (stream.internal_state.state == ZSTATE_NEW_HDR)
+ set_random_hufftable(&stream, level, data, data_size);
+
+ ret = isal_deflate_stateless(&stream);
+
+ assert(stream.internal_state.bitbuf.m_bit_count == 0);
+
+ assert(compressed_buf == stream.next_out - stream.total_out);
+ if (ret)
+ break;
+
+ /* Verify that blocks are independent */
+ ret =
+ inflate_check(out_buf, stream.next_out - out_buf, in_buf, in_size, 0, NULL,
+ 0, hist_bits);
+
+ if (ret == INFLATE_INVALID_LOOK_BACK_DISTANCE) {
+ break;
+ } else
+ ret = 0;
+
+ /* Check if the compression is completed */
+ if (in_processed == data_size) {
+ *compressed_size = stream.total_out;
+ break;
+ }
+
+ }
+
+ if (level_buf != NULL)
+ free(level_buf);
+
+ if (in_buf != NULL)
+ free(in_buf);
+
+ if (ret == STATELESS_OVERFLOW && loop_count >= MAX_LOOPS)
+ ret = COMPRESS_LOOP_COUNT_OVERFLOW;
+
+ return ret;
+
+}
+
+/* Compress the input data into the output buffer where the input buffer and
+ * is randomly segmented to test for independence of blocks in full flush
+ * compression*/
+int compress_full_flush(uint8_t * data, uint32_t data_size, uint8_t * compressed_buf,
+ uint32_t * compressed_size, uint32_t gzip_flag, uint32_t level)
+{
+ int ret = IGZIP_COMP_OK;
+ uint8_t *in_buf = NULL, *out_buf = compressed_buf, *level_buf = NULL;
+ uint32_t in_size = 0, level_buf_size;
+ uint32_t in_processed = 00;
+ struct isal_zstream stream;
+ struct isal_zstate *state = &stream.internal_state;
+ uint32_t loop_count = 0;
+ struct isal_hufftables *huff_tmp;
+ uint32_t reset_test_flag = 0;
+
+ log_print("Starting Compress Full Flush\n");
+
+ create_rand_repeat_data((uint8_t *) & stream, sizeof(stream));
+
+ isal_deflate_init(&stream);
+
+ if (state->state != ZSTATE_NEW_HDR)
+ return COMPRESS_INCORRECT_STATE;
+
+ if (rand() % 4 == 0) {
+ /* Test reset */
+ reset_test_flag = 1;
+ huff_tmp = stream.hufftables;
+ create_rand_repeat_data((uint8_t *) & stream, sizeof(stream));
+
+ /* Restore variables not necessarily set by user */
+ stream.hufftables = huff_tmp;
+ stream.end_of_stream = 0;
+ stream.level = 0;
+ stream.level_buf = NULL;
+ stream.level_buf_size = 0;
+ stream.hist_bits = 0;
+ }
+
+ stream.flush = FULL_FLUSH;
+ stream.end_of_stream = 0;
+ stream.avail_out = *compressed_size;
+ stream.next_out = compressed_buf;
+ stream.total_out = 0;
+ stream.gzip_flag = gzip_flag;
+ stream.level = level;
+
+ if (level >= 1) {
+ level_buf_size = get_rand_level_buf_size(stream.level);
+ if (level_buf_size >= ISAL_DEF_LVL1_MIN) {
+ level_buf = malloc(level_buf_size);
+ create_rand_repeat_data(level_buf, level_buf_size);
+ stream.level_buf = level_buf;
+ stream.level_buf_size = level_buf_size;
+ }
+ }
+
+ if (reset_test_flag)
+ isal_deflate_reset(&stream);
+
+ while (1) {
+ loop_count++;
+
+ /* Setup in buffer for next round of compression */
+ if (state->state == ZSTATE_NEW_HDR) {
+ /* Randomly choose size of the next out buffer */
+ in_size = rand() % (data_size + 1);
+
+ /* Limit size of buffer to be smaller than maximum */
+ if (in_size >= data_size - in_processed) {
+ in_size = data_size - in_processed;
+ stream.end_of_stream = 1;
+ }
+
+ stream.avail_in = in_size;
+
+ if (in_size != 0) {
+ if (in_buf != NULL) {
+ free(in_buf);
+ in_buf = NULL;
+ }
+
+ in_buf = malloc(in_size);
+ if (in_buf == NULL) {
+ ret = MALLOC_FAILED;
+ break;
+ }
+ memcpy(in_buf, data + in_processed, in_size);
+ in_processed += in_size;
+
+ stream.next_in = in_buf;
+ }
+
+ out_buf = stream.next_out;
+ }
+
+ if (state->state == ZSTATE_NEW_HDR)
+ set_random_hufftable(&stream, level, data, data_size);
+
+ ret = isal_deflate(&stream);
+
+ if (ret)
+ break;
+
+ /* Verify that blocks are independent */
+ if (state->state == ZSTATE_NEW_HDR || state->state == ZSTATE_END) {
+ ret =
+ inflate_check(out_buf, stream.next_out - out_buf, in_buf, in_size,
+ 0, NULL, 0, 0);
+
+ if (ret == INFLATE_INVALID_LOOK_BACK_DISTANCE)
+ break;
+ else
+ ret = 0;
+ }
+
+ /* Check if the compression is completed */
+ if (state->state == ZSTATE_END) {
+ *compressed_size = stream.total_out;
+ break;
+ }
+
+ }
+
+ if (level_buf != NULL)
+ free(level_buf);
+
+ if (in_buf != NULL)
+ free(in_buf);
+
+ if (ret == COMPRESS_OUT_BUFFER_OVERFLOW && loop_count >= MAX_LOOPS)
+ ret = COMPRESS_LOOP_COUNT_OVERFLOW;
+
+ return ret;
+
+}
+
+/*Compress the input buffer into the output buffer, but switch the flush type in
+ * the middle of the compression to test what happens*/
+int compress_swap_flush(uint8_t * data, uint32_t data_size, uint8_t * compressed_buf,
+ uint32_t * compressed_size, uint32_t flush_type, int level,
+ uint32_t gzip_flag)
+{
+ int ret = IGZIP_COMP_OK;
+ struct isal_zstream stream;
+ struct isal_zstate *state = &stream.internal_state;
+ uint32_t partial_size;
+ struct isal_hufftables *huff_tmp;
+ uint32_t reset_test_flag = 0;
+ uint32_t level_buf_size;
+ uint8_t *level_buf = NULL;
+
+ log_print("Starting Compress Swap Flush\n");
+
+ isal_deflate_init(&stream);
+
+ set_random_hufftable(&stream, 0, data, data_size);
+
+ if (state->state != ZSTATE_NEW_HDR)
+ return COMPRESS_INCORRECT_STATE;
+
+ if (rand() % 4 == 0) {
+ /* Test reset */
+ reset_test_flag = 1;
+ huff_tmp = stream.hufftables;
+ create_rand_repeat_data((uint8_t *) & stream, sizeof(stream));
+
+ /* Restore variables not necessarily set by user */
+ stream.hufftables = huff_tmp;
+ stream.end_of_stream = 0;
+ stream.level = 0;
+ stream.level_buf = NULL;
+ stream.level_buf_size = 0;
+ }
+
+ partial_size = rand() % (data_size + 1);
+
+ stream.flush = flush_type;
+ stream.avail_in = partial_size;
+ stream.next_in = data;
+ stream.avail_out = *compressed_size;
+ stream.next_out = compressed_buf;
+ stream.end_of_stream = 0;
+ stream.gzip_flag = gzip_flag;
+ if (level) {
+ stream.level = level;
+ level_buf_size = get_rand_level_buf_size(stream.level);
+ level_buf = malloc(level_buf_size);
+ create_rand_repeat_data(level_buf, level_buf_size);
+ stream.level_buf = level_buf;
+ stream.level_buf_size = level_buf_size;
+ }
+
+ if (reset_test_flag)
+ isal_deflate_reset(&stream);
+
+ ret =
+ isal_deflate_with_checks(&stream, data_size, *compressed_size, data, partial_size,
+ partial_size, compressed_buf, *compressed_size, 0);
+
+ if (ret)
+ return ret;
+
+ if (state->state == ZSTATE_NEW_HDR)
+ set_random_hufftable(&stream, 0, data, data_size);
+
+ flush_type = rand() % 3;
+
+ stream.flush = flush_type;
+ stream.avail_in = data_size - partial_size;
+ stream.next_in = data + partial_size;
+ stream.end_of_stream = 1;
+
+ ret =
+ isal_deflate_with_checks(&stream, data_size, *compressed_size, data + partial_size,
+ data_size - partial_size, data_size, compressed_buf,
+ *compressed_size, 0);
+
+ if (ret == COMPRESS_GENERAL_ERROR)
+ return INVALID_FLUSH_ERROR;
+
+ *compressed_size = stream.total_out;
+
+ if (stream.level_buf != NULL)
+ free(stream.level_buf);
+
+ return ret;
+}
+
+/* Test deflate_stateless */
+int test_compress_stateless(uint8_t * in_data, uint32_t in_size, uint32_t flush_type)
+{
+ int ret = IGZIP_COMP_OK;
+ uint32_t z_size, overflow, gzip_flag, level, hist_bits;
+ uint8_t *z_buf = NULL;
+ uint8_t *in_buf = NULL;
+
+ gzip_flag = rand() % 5;
+ hist_bits = rand() % 16;
+ level = get_rand_level();
+
+ if (in_size != 0) {
+ in_buf = malloc(in_size);
+
+ if (in_buf == NULL)
+ return MALLOC_FAILED;
+
+ memcpy(in_buf, in_data, in_size);
+ }
+
+ /* Test non-overflow case where a type 0 block is not written */
+ z_size = 2 * in_size + hdr_bytes;
+ if (gzip_flag == IGZIP_GZIP)
+ z_size += gzip_extra_bytes;
+ else if (gzip_flag == IGZIP_GZIP_NO_HDR)
+ z_size += gzip_trl_bytes;
+ else if (gzip_flag == IGZIP_ZLIB)
+ z_size += zlib_extra_bytes;
+ else if (gzip_flag == IGZIP_ZLIB_NO_HDR)
+ z_size += zlib_trl_bytes;
+
+ z_buf = malloc(z_size);
+
+ if (z_buf == NULL)
+ return MALLOC_FAILED;
+
+ create_rand_repeat_data(z_buf, z_size);
+
+ /* If flush type is invalid */
+ if (flush_type != NO_FLUSH && flush_type != FULL_FLUSH) {
+ ret =
+ compress_stateless(in_buf, in_size, z_buf, &z_size, flush_type, gzip_flag,
+ level, hist_bits);
+
+ if (ret != INVALID_FLUSH_ERROR)
+ print_error(ret);
+ else
+ ret = 0;
+
+ if (z_buf != NULL)
+ free(z_buf);
+
+ if (in_buf != NULL)
+ free(in_buf);
+
+ return ret;
+ }
+
+ /* Else test valid flush type */
+ ret = compress_stateless(in_buf, in_size, z_buf, &z_size, flush_type, gzip_flag, level,
+ hist_bits);
+
+ if (!ret)
+ ret =
+ inflate_check(z_buf, z_size, in_buf, in_size, gzip_flag, NULL, 0,
+ hist_bits);
+
+ if (options.verbose && ret) {
+ log_print
+ ("Compressed array at level %d with gzip flag %d, flush type %d, and window bits %d: ",
+ level, gzip_flag, flush_type, hist_bits);
+ log_uint8_t(z_buf, z_size);
+ log_print("\n");
+ log_print("Data: ");
+ log_uint8_t(in_buf, in_size);
+ }
+
+ if (z_buf != NULL) {
+ free(z_buf);
+ z_buf = NULL;
+ }
+
+ print_error(ret);
+ if (ret)
+ return ret;
+
+ /*Test non-overflow case where a type 0 block is possible to be written */
+ z_size = TYPE0_HDR_SIZE * ((in_size + TYPE0_MAX_SIZE - 1) / TYPE0_MAX_SIZE) + in_size;
+
+ if (gzip_flag == IGZIP_GZIP)
+ z_size += gzip_extra_bytes;
+ else if (gzip_flag == IGZIP_GZIP_NO_HDR)
+ z_size += gzip_trl_bytes;
+ else if (gzip_flag == IGZIP_ZLIB)
+ z_size += zlib_extra_bytes;
+ else if (gzip_flag == IGZIP_ZLIB_NO_HDR)
+ z_size += zlib_trl_bytes;
+
+ if (z_size <= gzip_extra_bytes)
+ z_size += TYPE0_HDR_SIZE;
+
+ if (z_size < 8)
+ z_size = 8;
+
+ z_buf = malloc(z_size);
+
+ if (z_buf == NULL)
+ return MALLOC_FAILED;
+
+ create_rand_repeat_data(z_buf, z_size);
+
+ ret = compress_stateless(in_buf, in_size, z_buf, &z_size, flush_type, gzip_flag, level,
+ hist_bits);
+ if (!ret)
+ ret =
+ inflate_check(z_buf, z_size, in_buf, in_size, gzip_flag, NULL, 0,
+ hist_bits);
+ if (ret) {
+ log_print
+ ("Compressed array at level %d with gzip flag %d, flush type %d, and hist_bits %d: ",
+ level, gzip_flag, flush_type, hist_bits);
+ log_uint8_t(z_buf, z_size);
+ log_print("\n");
+ log_print("Data: ");
+ log_uint8_t(in_buf, in_size);
+ }
+
+ if (!ret) {
+ free(z_buf);
+ z_buf = NULL;
+
+ /* Test random overflow case */
+ z_size = rand() % z_size;
+
+ if (z_size > in_size)
+ z_size = rand() & in_size;
+
+ if (z_size > 0) {
+ z_buf = malloc(z_size);
+
+ if (z_buf == NULL)
+ return MALLOC_FAILED;
+ }
+
+ overflow = compress_stateless(in_buf, in_size, z_buf, &z_size, flush_type,
+ gzip_flag, level, hist_bits);
+
+ if (overflow != COMPRESS_OUT_BUFFER_OVERFLOW) {
+ if (overflow == 0)
+ ret =
+ inflate_check(z_buf, z_size, in_buf, in_size, gzip_flag,
+ NULL, 0, hist_bits);
+
+ if (overflow != 0 || ret != 0) {
+ log_print("overflow error = %d\n", overflow);
+ log_error(overflow);
+ log_print("inflate ret = %d\n", ret);
+ log_error(ret);
+
+ log_print
+ ("Compressed array at level %d with gzip flag %d, flush type %d, and hist_bits %d: ",
+ level, gzip_flag, flush_type, hist_bits);
+
+ log_uint8_t(z_buf, z_size);
+ log_print("\n");
+ log_print("Data: ");
+ log_uint8_t(in_buf, in_size);
+
+ printf("Failed on compress single pass overflow\n");
+ print_error(ret);
+ ret = OVERFLOW_TEST_ERROR;
+ }
+ }
+ }
+
+ print_error(ret);
+ if (ret) {
+ if (z_buf != NULL) {
+ free(z_buf);
+ z_buf = NULL;
+ }
+ if (in_buf != NULL)
+ free(in_buf);
+ return ret;
+ }
+
+ if (flush_type == FULL_FLUSH) {
+ if (z_buf != NULL)
+ free(z_buf);
+
+ z_size = 2 * in_size + MAX_LOOPS * (hdr_bytes + 5);
+
+ z_buf = malloc(z_size);
+
+ if (z_buf == NULL)
+ return MALLOC_FAILED;
+
+ create_rand_repeat_data(z_buf, z_size);
+
+ /* Else test valid flush type */
+ ret = compress_stateless_full_flush(in_buf, in_size, z_buf, &z_size,
+ level, hist_bits);
+
+ if (!ret)
+ ret =
+ inflate_check(z_buf, z_size, in_buf, in_size, 0, NULL, 0,
+ hist_bits);
+ else if (ret == COMPRESS_LOOP_COUNT_OVERFLOW)
+ ret = 0;
+
+ print_error(ret);
+
+ if (ret) {
+ log_print
+ ("Compressed array at level %d with gzip flag %d, flush type %d, and hist_bits %d: ",
+ level, gzip_flag, FULL_FLUSH, hist_bits);
+ log_uint8_t(z_buf, z_size);
+ log_print("\n");
+ log_print("Data: ");
+ log_uint8_t(in_buf, in_size);
+ }
+ }
+ if (z_buf != NULL)
+ free(z_buf);
+
+ if (in_buf != NULL)
+ free(in_buf);
+
+ return ret;
+}
+
+/* Test deflate */
+int test_compress(uint8_t * in_buf, uint32_t in_size, uint32_t flush_type)
+{
+ int ret = IGZIP_COMP_OK, fin_ret = IGZIP_COMP_OK;
+ uint32_t overflow = 0, gzip_flag, level, hist_bits;
+ uint32_t z_size = 0, z_size_max = 0, z_compressed_size, dict_len = 0;
+ uint8_t *z_buf = NULL, *dict = NULL;
+
+ /* Test a non overflow case */
+ if (flush_type == NO_FLUSH)
+ z_size_max = 2 * in_size + hdr_bytes + 2;
+ else if (flush_type == SYNC_FLUSH || flush_type == FULL_FLUSH)
+ z_size_max = 2 * in_size + MAX_LOOPS * (hdr_bytes + 5);
+ else {
+ printf("Invalid Flush Parameter\n");
+ return COMPRESS_GENERAL_ERROR;
+ }
+
+ gzip_flag = rand() % 5;
+ hist_bits = rand() % 16;
+ level = get_rand_level();
+
+ z_size = z_size_max;
+
+ if (gzip_flag == IGZIP_GZIP)
+ z_size += gzip_extra_bytes;
+ else if (gzip_flag == IGZIP_GZIP_NO_HDR)
+ z_size += gzip_trl_bytes;
+ else if (gzip_flag == IGZIP_ZLIB)
+ z_size += zlib_extra_bytes;
+ else if (gzip_flag == IGZIP_ZLIB_NO_HDR)
+ z_size += zlib_trl_bytes;
+
+ z_buf = malloc(z_size);
+ if (z_buf == NULL) {
+ print_error(MALLOC_FAILED);
+ return MALLOC_FAILED;
+ }
+ create_rand_repeat_data(z_buf, z_size);
+
+ if (rand() % 8 == 0) {
+ dict_len = (rand() % IGZIP_HIST_SIZE) + 1;
+ dict = malloc(dict_len);
+ if (dict == NULL) {
+ print_error(MALLOC_FAILED);
+ return MALLOC_FAILED;
+ }
+ create_rand_dict(dict, dict_len, z_buf, z_size);
+ }
+
+ ret = compress_single_pass(in_buf, in_size, z_buf, &z_size, flush_type,
+ gzip_flag, level, dict, dict_len, hist_bits);
+
+ if (!ret)
+ ret =
+ inflate_check(z_buf, z_size, in_buf, in_size, gzip_flag, dict, dict_len,
+ hist_bits);
+
+ if (ret) {
+ log_print
+ ("Compressed array at level %d with gzip flag %d, flush type %d, and hist_bits %d: ",
+ level, gzip_flag, flush_type, hist_bits);
+ log_uint8_t(z_buf, z_size);
+ log_print("\n");
+ if (dict != NULL) {
+ log_print("Using Dictionary: ");
+ log_uint8_t(dict, dict_len);
+ log_print("\n");
+ }
+ log_print("Data: ");
+ log_uint8_t(in_buf, in_size);
+
+ printf("Failed on compress single pass\n");
+ print_error(ret);
+ }
+
+ if (dict != NULL) {
+ free(dict);
+ dict = NULL;
+ dict_len = 0;
+ }
+
+ fin_ret |= ret;
+ if (ret)
+ goto test_compress_cleanup;
+
+ z_compressed_size = z_size;
+ z_size = z_size_max;
+ create_rand_repeat_data(z_buf, z_size_max);
+
+ if (rand() % 8 == 0) {
+ dict_len = (rand() % IGZIP_HIST_SIZE) + 1;
+ dict = malloc(dict_len);
+ if (dict == NULL) {
+ print_error(MALLOC_FAILED);
+ return MALLOC_FAILED;
+ }
+ create_rand_dict(dict, dict_len, z_buf, z_size);
+ }
+
+ ret =
+ compress_multi_pass(in_buf, in_size, z_buf, &z_size, flush_type, gzip_flag, level,
+ dict, dict_len, hist_bits);
+
+ if (!ret)
+ ret =
+ inflate_check(z_buf, z_size, in_buf, in_size, gzip_flag, dict, dict_len,
+ hist_bits);
+
+ if (ret) {
+ log_print
+ ("Compressed array at level %d with gzip flag %d, flush type %d and hist_bits %d: ",
+ level, gzip_flag, flush_type, hist_bits);
+ log_uint8_t(z_buf, z_size);
+ log_print("\n");
+ if (dict != NULL) {
+ log_print("Using Dictionary: ");
+ log_uint8_t(dict, dict_len);
+ log_print("\n");
+ }
+ log_print("Data: ");
+ log_uint8_t(in_buf, in_size);
+
+ printf("Failed on compress multi pass\n");
+ print_error(ret);
+ }
+
+ if (dict != NULL) {
+ free(dict);
+ dict = NULL;
+ dict_len = 0;
+ }
+
+ fin_ret |= ret;
+ if (ret)
+ goto test_compress_cleanup;
+
+ ret = 0;
+
+ /* Test random overflow case */
+ if (flush_type == SYNC_FLUSH && z_compressed_size > in_size)
+ z_compressed_size = in_size + 1;
+
+ z_size = rand() % z_compressed_size;
+ create_rand_repeat_data(z_buf, z_size);
+
+ overflow = compress_single_pass(in_buf, in_size, z_buf, &z_size, flush_type,
+ gzip_flag, level, dict, dict_len, hist_bits);
+
+ if (overflow != COMPRESS_OUT_BUFFER_OVERFLOW) {
+ if (overflow == 0)
+ ret =
+ inflate_check(z_buf, z_size, in_buf, in_size, gzip_flag, dict,
+ dict_len, hist_bits);
+
+ /* Rarely single pass overflow will compresses data
+ * better than the initial run. This is to stop that
+ * case from erroring. */
+ if (overflow != 0 || ret != 0) {
+ log_print("overflow error = %d\n", overflow);
+ log_error(overflow);
+ log_print("inflate ret = %d\n", ret);
+ log_error(ret);
+
+ log_print
+ ("Compressed array at level %d with gzip flag %d, flush type %d, and hist_bits %d: ",
+ level, gzip_flag, flush_type, hist_bits);
+ log_uint8_t(z_buf, z_size);
+ log_print("\n");
+ log_print("Data: ");
+ log_uint8_t(in_buf, in_size);
+
+ printf("Failed on compress single pass overflow\n");
+ print_error(ret);
+ ret = OVERFLOW_TEST_ERROR;
+ }
+ }
+
+ fin_ret |= ret;
+ if (ret)
+ goto test_compress_cleanup;
+
+ if (flush_type == NO_FLUSH) {
+ create_rand_repeat_data(z_buf, z_size);
+
+ overflow =
+ compress_multi_pass(in_buf, in_size, z_buf, &z_size, flush_type,
+ gzip_flag, level, dict, dict_len, hist_bits);
+
+ if (overflow != COMPRESS_OUT_BUFFER_OVERFLOW) {
+ if (overflow == 0)
+ ret =
+ inflate_check(z_buf, z_size, in_buf, in_size, gzip_flag,
+ dict, dict_len, hist_bits);
+
+ /* Rarely multi pass overflow will compresses data
+ * better than the initial run. This is to stop that
+ * case from erroring */
+ if (overflow != 0 || ret != 0) {
+ log_print("overflow error = %d\n", overflow);
+ log_error(overflow);
+ log_print("inflate ret = %d\n", ret);
+ log_error(ret);
+ log_print
+ ("Compressed array at level %d with gzip flag %d, flush type %d, and hist_bits %d: ",
+ level, gzip_flag, flush_type, hist_bits);
+ log_uint8_t(z_buf, z_size);
+ log_print("\n");
+ log_print("Data: ");
+ log_uint8_t(in_buf, in_size);
+
+ printf("Failed on compress multi pass overflow\n");
+ print_error(ret);
+ ret = OVERFLOW_TEST_ERROR;
+ }
+ }
+ fin_ret |= ret;
+ }
+
+ test_compress_cleanup:
+ free(z_buf);
+
+ return fin_ret;
+}
+
+/* Test swapping flush types in the middle of compression */
+int test_flush(uint8_t * in_buf, uint32_t in_size)
+{
+ int fin_ret = IGZIP_COMP_OK, ret;
+ uint32_t z_size, flush_type = 0, gzip_flag, level;
+ uint8_t *z_buf = NULL;
+
+ gzip_flag = rand() % 5;
+ level = get_rand_level();
+
+ z_size = 2 * in_size + 2 * hdr_bytes + 8;
+ if (gzip_flag == IGZIP_GZIP)
+ z_size += gzip_extra_bytes;
+ else if (gzip_flag == IGZIP_GZIP_NO_HDR)
+ z_size += gzip_trl_bytes;
+ else if (gzip_flag == IGZIP_ZLIB)
+ z_size += zlib_extra_bytes;
+ else if (gzip_flag == IGZIP_ZLIB_NO_HDR)
+ z_size += zlib_trl_bytes;
+
+ z_buf = malloc(z_size);
+
+ if (z_buf == NULL)
+ return MALLOC_FAILED;
+
+ create_rand_repeat_data(z_buf, z_size);
+
+ while (flush_type < 3)
+ flush_type = rand() & 0xFFFF;
+
+ /* Test invalid flush */
+ ret = compress_single_pass(in_buf, in_size, z_buf, &z_size, flush_type,
+ gzip_flag, level, NULL, 0, 0);
+
+ if (ret == COMPRESS_GENERAL_ERROR)
+ ret = 0;
+ else {
+ printf("Failed when passing invalid flush parameter\n");
+ ret = INVALID_FLUSH_ERROR;
+ }
+
+ fin_ret |= ret;
+ print_error(ret);
+
+ create_rand_repeat_data(z_buf, z_size);
+
+ /* Test swapping flush type */
+ ret =
+ compress_swap_flush(in_buf, in_size, z_buf, &z_size, rand() % 3, level, gzip_flag);
+
+ if (!ret)
+ ret = inflate_check(z_buf, z_size, in_buf, in_size, gzip_flag, NULL, 0, 0);
+
+ if (ret) {
+ log_print("Compressed array at level %d with gzip flag %d: ", level,
+ gzip_flag);
+ log_uint8_t(z_buf, z_size);
+ log_print("\n");
+ log_print("Data: ");
+ log_uint8_t(in_buf, in_size);
+
+ printf("Failed on swapping flush type\n");
+ print_error(ret);
+ }
+
+ fin_ret |= ret;
+ print_error(ret);
+
+ return fin_ret;
+}
+
+/* Test there are no length distance pairs across full flushes */
+int test_full_flush(uint8_t * in_buf, uint32_t in_size)
+{
+ int ret = IGZIP_COMP_OK;
+ uint32_t z_size, gzip_flag, level;
+ uint8_t *z_buf = NULL;
+
+ gzip_flag = rand() % 5;
+ level = get_rand_level();
+ z_size = 2 * in_size + MAX_LOOPS * (hdr_bytes + 5);
+
+ if (gzip_flag == IGZIP_GZIP)
+ z_size += gzip_extra_bytes;
+ else if (gzip_flag == IGZIP_GZIP_NO_HDR)
+ z_size += gzip_trl_bytes;
+ else if (gzip_flag == IGZIP_ZLIB)
+ z_size += zlib_extra_bytes;
+ else if (gzip_flag == IGZIP_ZLIB_NO_HDR)
+ z_size += zlib_trl_bytes;
+
+ z_buf = malloc(z_size);
+ if (z_buf == NULL) {
+ print_error(MALLOC_FAILED);
+ return MALLOC_FAILED;
+ }
+
+ create_rand_repeat_data(z_buf, z_size);
+
+ ret = compress_full_flush(in_buf, in_size, z_buf, &z_size, gzip_flag, level);
+
+ if (!ret)
+ ret = inflate_check(z_buf, z_size, in_buf, in_size, gzip_flag, NULL, 0, 0);
+
+ if (ret) {
+ log_print("Compressed array at level %d with gzip flag %d and flush type %d: ",
+ level, gzip_flag, FULL_FLUSH);
+ log_uint8_t(z_buf, z_size);
+ log_print("\n");
+ log_print("Data: ");
+ log_uint8_t(in_buf, in_size);
+
+ printf("Failed on compress multi pass\n");
+ print_error(ret);
+ }
+
+ free(z_buf);
+
+ return ret;
+}
+
+int test_inflate(struct vect_result *in_vector)
+{
+ int ret = IGZIP_COMP_OK;
+ uint8_t *compress_buf = in_vector->vector, *out_buf = NULL;
+ uint64_t compress_len = in_vector->vector_length;
+ uint32_t out_size = 0;
+
+ out_size = 10 * in_vector->vector_length;
+ out_buf = malloc(out_size);
+ if (out_buf == NULL)
+ return MALLOC_FAILED;
+
+ ret = inflate_stateless_pass(compress_buf, compress_len, out_buf, &out_size, 0);
+
+ if (ret == INFLATE_LEFTOVER_INPUT)
+ ret = ISAL_DECOMP_OK;
+
+ if (ret != in_vector->expected_error)
+ printf("Inflate return value incorrect, %d != %d\n", ret,
+ in_vector->expected_error);
+ else
+ ret = IGZIP_COMP_OK;
+
+ if (!ret) {
+ ret = inflate_multi_pass(compress_buf, compress_len, out_buf, &out_size,
+ 0, NULL, 0, 0);
+
+ if (ret == INFLATE_LEFTOVER_INPUT)
+ ret = ISAL_DECOMP_OK;
+
+ if (ret != in_vector->expected_error)
+ printf("Inflate return value incorrect, %d != %d\n", ret,
+ in_vector->expected_error);
+ else
+ ret = IGZIP_COMP_OK;
+ }
+
+ return ret;
+
+}
+
+int test_large(uint8_t * in_buf, uint32_t in_size, uint64_t large_size)
+{
+
+ int ret = IGZIP_COMP_OK;
+ uint32_t gzip_flag, level;
+ uint32_t z_size = 0, z_size_max = 0, tmp_buf_size;
+ uint8_t *z_buf = NULL, *tmp_buf = NULL;
+ int flush_type = NO_FLUSH;
+
+ /* Test a non overflow case */
+ z_size_max = MAX_LARGE_COMP_BUF_SIZE;
+
+ gzip_flag = rand() % 5;
+ level = get_rand_level();
+
+ z_size = z_size_max;
+ z_buf = malloc(z_size);
+ if (z_buf == NULL) {
+ print_error(MALLOC_FAILED);
+ return MALLOC_FAILED;
+ }
+ create_rand_repeat_data(z_buf, z_size);
+
+ tmp_buf_size = IBUF_SIZE;
+ tmp_buf = malloc(tmp_buf_size);
+ if (tmp_buf == NULL) {
+ print_error(MALLOC_FAILED);
+ return MALLOC_FAILED;
+ }
+
+ ret =
+ compress_ver_rep_buf(in_buf, in_size, large_size, z_buf, z_size, tmp_buf,
+ tmp_buf_size, flush_type, gzip_flag, level);
+
+ if (ret)
+ print_error(ret);
+
+ if (z_buf != NULL) {
+ free(z_buf);
+ z_buf = NULL;
+ }
+
+ if (tmp_buf != NULL) {
+ free(tmp_buf);
+ tmp_buf = NULL;
+ }
+
+ return ret;
+}
+
+/* Run multiple compression tests on data stored in a file */
+int test_compress_file(char *file_name)
+{
+ int ret = IGZIP_COMP_OK;
+ uint64_t in_size;
+ uint8_t *in_buf = NULL;
+ FILE *in_file = NULL;
+
+ in_file = fopen(file_name, "rb");
+ if (!in_file) {
+ printf("Failed to open file %s\n", file_name);
+ return FILE_READ_FAILED;
+ }
+
+ in_size = get_filesize(in_file);
+ if (in_size > MAX_FILE_SIZE)
+ in_size = MAX_FILE_SIZE;
+
+ if (in_size != 0) {
+ in_buf = malloc(in_size);
+ if (in_buf == NULL) {
+ printf("Failed to allocate in_buf for test_compress_file\n");
+ return MALLOC_FAILED;
+ }
+ if (fread(in_buf, 1, in_size, in_file) != in_size) {
+ printf("Failed to read in_buf from test_compress_file\n");
+ free(in_buf);
+ return FILE_READ_FAILED;
+ }
+ }
+
+ ret |= test_compress_stateless(in_buf, in_size, NO_FLUSH);
+ if (!ret)
+ ret |= test_compress_stateless(in_buf, in_size, SYNC_FLUSH);
+ if (!ret)
+ ret |= test_compress_stateless(in_buf, in_size, FULL_FLUSH);
+ if (!ret)
+ ret |= test_compress(in_buf, in_size, NO_FLUSH);
+ if (!ret)
+ ret |= test_compress(in_buf, in_size, SYNC_FLUSH);
+ if (!ret)
+ ret |= test_compress(in_buf, in_size, FULL_FLUSH);
+ if (!ret)
+ ret |= test_flush(in_buf, in_size);
+
+ if (ret)
+ printf("Failed on file %s\n", file_name);
+
+ if (in_buf != NULL)
+ free(in_buf);
+
+ return ret;
+}
+
+int create_custom_hufftables(struct isal_hufftables *hufftables_custom, int file_count,
+ char *files[])
+{
+ long int file_length;
+ uint8_t *stream = NULL;
+ struct isal_huff_histogram histogram;
+ FILE *file;
+ int i;
+
+ memset(&histogram, 0, sizeof(histogram));
+
+ for (i = 0; i < file_count; i++) {
+ printf("Processing %s\n", files[i]);
+ file = fopen(files[i], "r");
+ if (file == NULL) {
+ printf("Error opening file\n");
+ return 1;
+ }
+ fseek(file, 0, SEEK_END);
+ file_length = ftell(file);
+ fseek(file, 0, SEEK_SET);
+ file_length -= ftell(file);
+
+ if (file_length > 0) {
+ stream = malloc(file_length);
+ if (stream == NULL) {
+ printf("Failed to allocate memory to read in file\n");
+ fclose(file);
+ return 1;
+ }
+ }
+
+ if (fread(stream, 1, file_length, file) != file_length) {
+ printf("Error occurred when reading file\n");
+ fclose(file);
+ free(stream);
+ stream = NULL;
+ return 1;
+ }
+
+ /* Create a histogram of frequency of symbols found in stream to
+ * generate the huffman tree.*/
+ isal_update_histogram(stream, file_length, &histogram);
+
+ fclose(file);
+ if (stream != NULL) {
+ free(stream);
+ stream = NULL;
+ }
+ }
+
+ return isal_create_hufftables(hufftables_custom, &histogram);
+
+}
+
+int main(int argc, char *argv[])
+{
+ int i = 0, j = 0, ret = 0, fin_ret = 0;
+ uint32_t in_size = 0, offset = 0;
+ uint8_t *in_buf;
+ struct isal_hufftables hufftables_custom, hufftables_sub;
+ uint64_t iterations, large_buf_size;
+ size_t argv_index;
+ char **input_files;
+ size_t file_count;
+
+ argv_index = parse_options(argc, argv);
+
+ input_files = &argv[argv_index];
+ file_count = argc - argv_index;
+
+ if (options.verbose)
+ setbuf(stdout, NULL);
+
+ printf("Window Size: %d K\n", IGZIP_HIST_SIZE / 1024);
+ printf("Test Seed : %d\n", options.test_seed);
+ printf("Randoms : %d\n", options.randoms);
+ srand(options.test_seed);
+
+ hufftables_subset = &hufftables_sub;
+ if (file_count > 0) {
+ ret = create_custom_hufftables(&hufftables_custom, file_count, input_files);
+ if (ret == 0)
+ hufftables = &hufftables_custom;
+ else {
+ printf("Failed to generate custom hufftable");
+ return -1;
+ }
+ }
+
+ in_buf = malloc(IBUF_SIZE);
+ memset(in_buf, 0, IBUF_SIZE);
+
+ if (in_buf == NULL) {
+ fprintf(stderr, "Can't allocate in_buf memory\n");
+ return -1;
+ }
+
+ if (file_count > 0) {
+ printf("igzip_rand_test files: ");
+
+ for (i = 0; i < file_count; i++) {
+ ret |= test_compress_file(input_files[i]);
+ if (ret)
+ return ret;
+ }
+
+ printf("................");
+ printf("%s\n", ret ? "Fail" : "Pass");
+ fin_ret |= ret;
+ }
+
+ printf("igzip_rand_test stateless: ");
+
+ ret = test_compress_stateless((uint8_t *) str1, sizeof(str1), NO_FLUSH);
+ if (ret)
+ return ret;
+
+ ret |= test_compress_stateless((uint8_t *) str2, sizeof(str2), NO_FLUSH);
+ if (ret)
+ return ret;
+
+ for (i = 0; i < options.randoms; i++) {
+ in_size = get_rand_data_length();
+ offset = rand() % (IBUF_SIZE + 1 - in_size);
+ in_buf += offset;
+
+ create_rand_repeat_data(in_buf, in_size);
+
+ ret |= test_compress_stateless(in_buf, in_size, NO_FLUSH);
+
+ in_buf -= offset;
+
+ if (i % (options.randoms / 16) == 0)
+ printf(".");
+
+ if (ret)
+ return ret;
+ }
+
+ for (i = 0; i < options.randoms / 16; i++) {
+ create_rand_repeat_data(in_buf, PAGE_SIZE);
+ ret |= test_compress_stateless(in_buf, PAGE_SIZE, NO_FLUSH); // good for efence
+ if (ret)
+ return ret;
+ }
+
+ fin_ret |= ret;
+
+ ret = test_compress_stateless((uint8_t *) str1, sizeof(str1), SYNC_FLUSH);
+ if (ret)
+ return ret;
+
+ ret |= test_compress_stateless((uint8_t *) str2, sizeof(str2), SYNC_FLUSH);
+ if (ret)
+ return ret;
+
+ for (i = 0; i < 16; i++) {
+ in_size = get_rand_data_length();
+ offset = rand() % (IBUF_SIZE + 1 - in_size);
+ in_buf += offset;
+
+ create_rand_repeat_data(in_buf, in_size);
+
+ ret |= test_compress_stateless(in_buf, in_size, SYNC_FLUSH);
+
+ in_buf -= offset;
+
+ if (ret)
+ return ret;
+ }
+
+ fin_ret |= ret;
+
+ printf("%s\n", ret ? "Fail" : "Pass");
+
+ printf("igzip_rand_test stateless FULL_FLUSH: ");
+
+ ret = test_compress_stateless((uint8_t *) str1, sizeof(str1), FULL_FLUSH);
+ if (ret)
+ return ret;
+
+ ret |= test_compress_stateless((uint8_t *) str2, sizeof(str2), FULL_FLUSH);
+ if (ret)
+ return ret;
+
+ for (i = 0; i < options.randoms; i++) {
+ in_size = get_rand_data_length();
+ offset = rand() % (IBUF_SIZE + 1 - in_size);
+ in_buf += offset;
+
+ create_rand_repeat_data(in_buf, in_size);
+
+ ret |= test_compress_stateless(in_buf, in_size, FULL_FLUSH);
+
+ in_buf -= offset;
+
+ if (i % (options.randoms / 16) == 0)
+ printf(".");
+
+ if (ret)
+ return ret;
+ }
+
+ for (i = 0; i < options.randoms / 16; i++) {
+ create_rand_repeat_data(in_buf, PAGE_SIZE);
+ ret |= test_compress_stateless(in_buf, PAGE_SIZE, FULL_FLUSH); // good for efence
+ if (ret)
+ return ret;
+ }
+ fin_ret |= ret;
+
+ printf("%s\n", ret ? "Fail" : "Pass");
+
+ printf("igzip_rand_test stateful NO_FLUSH: ");
+
+ memcpy(in_buf, str1, sizeof(str1));
+ ret = test_compress(in_buf, sizeof(str1), NO_FLUSH);
+ if (ret)
+ return ret;
+
+ memcpy(in_buf, str2, sizeof(str2));
+ ret |= test_compress(in_buf, sizeof(str2), NO_FLUSH);
+ if (ret)
+ return ret;
+
+ for (i = 0; i < options.randoms; i++) {
+ in_size = get_rand_data_length();
+ offset = rand() % (IBUF_SIZE + 1 - in_size);
+ in_buf += offset;
+
+ create_rand_repeat_data(in_buf, in_size);
+
+ ret |= test_compress(in_buf, in_size, NO_FLUSH);
+
+ in_buf -= offset;
+
+ if (i % (options.randoms / 16) == 0)
+ printf(".");
+ if (ret)
+ return ret;
+ }
+
+ fin_ret |= ret;
+
+ printf("%s\n", ret ? "Fail" : "Pass");
+
+ printf("igzip_rand_test stateful SYNC_FLUSH: ");
+
+ memcpy(in_buf, str1, sizeof(str1));
+ ret = test_compress(in_buf, sizeof(str1), SYNC_FLUSH);
+ if (ret)
+ return ret;
+
+ memcpy(in_buf, str2, sizeof(str2));
+ ret |= test_compress(in_buf, sizeof(str2), SYNC_FLUSH);
+ if (ret)
+ return ret;
+
+ for (i = 0; i < options.randoms; i++) {
+ in_size = get_rand_data_length();
+ offset = rand() % (IBUF_SIZE + 1 - in_size);
+ in_buf += offset;
+
+ create_rand_repeat_data(in_buf, in_size);
+
+ ret |= test_compress(in_buf, in_size, SYNC_FLUSH);
+
+ in_buf -= offset;
+
+ if (i % (options.randoms / 16) == 0)
+ printf(".");
+ if (ret)
+ return ret;
+ }
+
+ fin_ret |= ret;
+
+ printf("%s\n", ret ? "Fail" : "Pass");
+
+ printf("igzip_rand_test stateful FULL_FLUSH: ");
+
+ memcpy(in_buf, str1, sizeof(str1));
+ ret = test_compress(in_buf, sizeof(str1), FULL_FLUSH);
+ if (ret)
+ return ret;
+
+ memcpy(in_buf, str2, sizeof(str2));
+ ret |= test_compress(in_buf, sizeof(str2), FULL_FLUSH);
+ if (ret)
+ return ret;
+
+ for (i = 0; i < options.randoms; i++) {
+ in_size = get_rand_data_length();
+ offset = rand() % (IBUF_SIZE + 1 - in_size);
+ in_buf += offset;
+
+ create_rand_repeat_data(in_buf, in_size);
+
+ ret |= test_compress(in_buf, in_size, FULL_FLUSH);
+
+ in_buf -= offset;
+
+ if (i % (options.randoms / 16) == 0)
+ printf(".");
+ if (ret)
+ return ret;
+ }
+
+ for (i = 0; i < options.randoms / 8; i++) {
+ in_size = get_rand_data_length();
+ offset = rand() % (IBUF_SIZE + 1 - in_size);
+ in_buf += offset;
+
+ create_rand_repeat_data(in_buf, in_size);
+
+ ret |= test_full_flush(in_buf, in_size);
+
+ in_buf -= offset;
+
+ if (ret)
+ return ret;
+ }
+
+ fin_ret |= ret;
+
+ printf("%s\n", ret ? "Fail" : "Pass");
+
+ printf("igzip_rand_test stateful Change Flush: ");
+
+ ret = test_flush((uint8_t *) str1, sizeof(str1));
+ if (ret)
+ return ret;
+
+ ret |= test_flush((uint8_t *) str2, sizeof(str2));
+ if (ret)
+ return ret;
+
+ for (i = 0; i < options.randoms / 4; i++) {
+ in_size = get_rand_data_length();
+ offset = rand() % (IBUF_SIZE + 1 - in_size);
+ in_buf += offset;
+
+ create_rand_repeat_data(in_buf, in_size);
+
+ ret |= test_flush(in_buf, in_size);
+
+ in_buf -= offset;
+
+ if (i % ((options.randoms / 4) / 16) == 0)
+ printf(".");
+ if (ret)
+ return ret;
+ }
+
+ fin_ret |= ret;
+
+ printf("%s\n", ret ? "Fail" : "Pass");
+
+ if (options.do_large_test) {
+ printf("igzip_rand_test large input ");
+
+ iterations = options.randoms / 256 + 1;
+ for (i = 0; i < iterations; i++) {
+ in_size = rand() % (32 * 1024) + 16 * 1024;
+ offset = rand() % (IBUF_SIZE + 1 - in_size);
+ in_buf += offset;
+
+ large_buf_size = 1;
+ large_buf_size <<= 32;
+ large_buf_size += rand() % (1024 * 1024) + 1;
+ create_rand_repeat_data(in_buf, in_size);
+
+ ret |= test_large(in_buf, in_size, large_buf_size);
+
+ if (ret)
+ return ret;
+
+ in_buf -= offset;
+
+ if (iterations < 16) {
+ for (j = 0; j < 16 / iterations; j++)
+ printf(".");
+ } else if (i % (iterations / 16) == 0)
+ printf(".");
+
+ }
+
+ if (iterations < 16) {
+ for (j = (16 / iterations) * iterations; j < 16; j++)
+ printf(".");
+ }
+
+ printf("%s\n", ret ? "Fail" : "Pass");
+ }
+
+ printf("igzip_rand_test inflate Std Vectors: ");
+
+ for (i = 0; i < sizeof(std_vect_array) / sizeof(struct vect_result); i++) {
+ ret = test_inflate(&std_vect_array[i]);
+ if (ret)
+ return ret;
+ }
+ printf("................");
+ printf("%s\n", ret ? "Fail" : "Pass");
+
+ printf("igzip rand test finished: %s\n",
+ fin_ret ? "Some tests failed" : "All tests passed");
+
+ return fin_ret != IGZIP_COMP_OK;
+}
diff --git a/src/isa-l/igzip/igzip_semi_dyn_file_perf.c b/src/isa-l/igzip/igzip_semi_dyn_file_perf.c
new file mode 100644
index 000000000..79e7d2754
--- /dev/null
+++ b/src/isa-l/igzip/igzip_semi_dyn_file_perf.c
@@ -0,0 +1,334 @@
+/**********************************************************************
+ Copyright(c) 2011-2016 Intel Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+#define _FILE_OFFSET_BITS 64
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <getopt.h>
+#include "igzip_lib.h"
+#include "test.h"
+
+#define MIN_BUF_SIZE (4 * 1024)
+#define MIN_TEST_LOOPS 10
+#ifndef RUN_MEM_SIZE
+# define RUN_MEM_SIZE 500000000
+#endif
+
+#define DEFAULT_SEG_SIZE (512 * 1024)
+#define DEFAULT_SAMPLE_SIZE (32 * 1024)
+
+int usage(void)
+{
+ fprintf(stderr,
+ "Usage: igzip_semi_dynamic [options] <infile>\n"
+ " -h help\n"
+ " -v (don't) validate output by inflate and compare\n"
+ " -t <type> 1:stateless 0:(default)stateful\n"
+ " -c <size> chunk size default=%d\n"
+ " -s <size> sample size default=%d\n"
+ " -o <file> output file\n", DEFAULT_SEG_SIZE, DEFAULT_SAMPLE_SIZE);
+ exit(0);
+}
+
+int str_to_i(char *s)
+{
+#define ARG_MAX 32
+
+ int i = atoi(s);
+ int len = strnlen(s, ARG_MAX);
+ if (len < 2 || len == ARG_MAX)
+ return i;
+
+ switch (s[len - 1]) {
+ case 'k':
+ i *= 1024;
+ break;
+ case 'K':
+ i *= 1000;
+ break;
+ case 'm':
+ i *= (1024 * 1024);
+ break;
+ case 'M':
+ i *= (1000 * 1000);
+ break;
+ case 'g':
+ i *= (1024 * 1024 * 1024);
+ break;
+ case 'G':
+ i *= (1000 * 1000 * 1000);
+ break;
+ }
+ return i;
+}
+
+void semi_dyn_stateless_perf(struct isal_zstream *stream, uint8_t * inbuf,
+ uint64_t infile_size, uint8_t * outbuf, uint64_t outbuf_size,
+ int segment_size, int hist_size)
+{
+ struct isal_huff_histogram histogram;
+ struct isal_hufftables hufftable;
+
+ isal_deflate_stateless_init(stream);
+ stream->end_of_stream = 0;
+ stream->flush = FULL_FLUSH;
+ stream->next_in = inbuf;
+ stream->next_out = outbuf;
+ int remaining = infile_size;
+ int chunk_size = segment_size;
+
+ while (remaining > 0) {
+ // Generate custom hufftables on sample
+ memset(&histogram, 0, sizeof(struct isal_huff_histogram));
+ if (remaining < segment_size * 2) {
+ chunk_size = remaining;
+ stream->end_of_stream = 1;
+ }
+ int hist_rem = (hist_size > chunk_size) ? chunk_size : hist_size;
+ isal_update_histogram(stream->next_in, hist_rem, &histogram);
+
+ if (hist_rem == chunk_size)
+ isal_create_hufftables_subset(&hufftable, &histogram);
+ else
+ isal_create_hufftables(&hufftable, &histogram);
+
+ // Compress with custom table
+ stream->avail_in = chunk_size;
+ stream->avail_out = chunk_size + 8 * (1 + (chunk_size >> 16));
+ stream->hufftables = &hufftable;
+ remaining -= chunk_size;
+ isal_deflate_stateless(stream);
+ if (stream->avail_in != 0)
+ break;
+ }
+}
+
+void semi_dyn_stateful_perf(struct isal_zstream *stream, uint8_t * inbuf,
+ uint64_t infile_size, uint8_t * outbuf, uint64_t outbuf_size,
+ int segment_size, int hist_size)
+{
+ struct isal_huff_histogram histogram;
+ struct isal_hufftables hufftable;
+
+ isal_deflate_init(stream);
+ stream->end_of_stream = 0;
+ stream->flush = SYNC_FLUSH;
+ stream->next_in = inbuf;
+ stream->next_out = outbuf;
+ stream->avail_out = outbuf_size;
+ int remaining = infile_size;
+ int chunk_size = segment_size;
+
+ while (remaining > 0) {
+ // Generate custom hufftables on sample
+ memset(&histogram, 0, sizeof(struct isal_huff_histogram));
+ if (remaining < segment_size * 2) {
+ chunk_size = remaining;
+ stream->end_of_stream = 1;
+ }
+ int hist_rem = (hist_size > chunk_size) ? chunk_size : hist_size;
+ isal_update_histogram(stream->next_in, hist_rem, &histogram);
+
+ if (hist_rem == chunk_size)
+ isal_create_hufftables_subset(&hufftable, &histogram);
+ else
+ isal_create_hufftables(&hufftable, &histogram);
+
+ // Compress with custom table
+ stream->avail_in = chunk_size;
+ stream->hufftables = &hufftable;
+ remaining -= chunk_size;
+ isal_deflate(stream);
+ if (stream->internal_state.state != ZSTATE_NEW_HDR)
+ break;
+ }
+}
+
+int main(int argc, char *argv[])
+{
+ FILE *in = stdin, *out = NULL;
+ unsigned char *inbuf, *outbuf;
+ int i = 0, c;
+ uint64_t infile_size, outbuf_size;
+ int segment_size = DEFAULT_SEG_SIZE;
+ int sample_size = DEFAULT_SAMPLE_SIZE;
+ int check_output = 1;
+ int do_stateless = 0, do_stateful = 1;
+ int ret = 0;
+ char *out_file_name = NULL;
+ struct isal_zstream stream;
+
+ while ((c = getopt(argc, argv, "vht:c:s:o:")) != -1) {
+ switch (c) {
+ case 'v':
+ check_output ^= 1;
+ break;
+ case 't':
+ if (atoi(optarg) == 1) {
+ do_stateful = 0;
+ do_stateless = 1;
+ }
+ break;
+ case 'c':
+ segment_size = str_to_i(optarg);
+ break;
+ case 's':
+ sample_size = str_to_i(optarg);
+ break;
+ case 'o':
+ out_file_name = optarg;
+ break;
+ case 'h':
+ default:
+ usage();
+ break;
+ }
+ }
+
+ // Open input file
+ if (optind < argc) {
+ if (!(in = fopen(argv[optind], "rb"))) {
+ fprintf(stderr, "Can't open %s for reading\n", argv[optind]);
+ exit(1);
+ }
+ } else
+ usage();
+
+ // Optionally open output file
+ if (out_file_name != NULL) {
+ if (!(out = fopen(out_file_name, "wb"))) {
+ fprintf(stderr, "Can't open %s for writing\n", out_file_name);
+ exit(1);
+ }
+ }
+
+ printf("Window Size: %d K\n", IGZIP_HIST_SIZE / 1024);
+
+ /*
+ * Allocate space for entire input file and output
+ * (assuming some possible expansion on output size)
+ */
+ infile_size = get_filesize(in);
+ if (infile_size == 0) {
+ printf("Input file has zero length\n");
+ usage();
+ }
+
+ outbuf_size = infile_size * 1.30 > MIN_BUF_SIZE ? infile_size * 1.30 : MIN_BUF_SIZE;
+
+ if (NULL == (inbuf = malloc(infile_size))) {
+ fprintf(stderr, "Can't allocate input buffer memory\n");
+ exit(0);
+ }
+ if (NULL == (outbuf = malloc(outbuf_size))) {
+ fprintf(stderr, "Can't allocate output buffer memory\n");
+ exit(0);
+ }
+
+ int hist_size = sample_size > segment_size ? segment_size : sample_size;
+
+ printf("semi-dynamic sample=%d segment=%d %s\n", hist_size, segment_size,
+ do_stateful ? "stateful" : "stateless");
+ printf("igzip_file_perf: %s\n", argv[optind]);
+
+ // Read complete input file into buffer
+ stream.avail_in = (uint32_t) fread(inbuf, 1, infile_size, in);
+ if (stream.avail_in != infile_size) {
+ fprintf(stderr, "Couldn't fit all of input file into buffer\n");
+ exit(0);
+ }
+
+ struct perf start;
+
+ if (do_stateful) {
+ BENCHMARK(&start, BENCHMARK_TIME,
+ semi_dyn_stateful_perf(&stream, inbuf, infile_size, outbuf,
+ outbuf_size, segment_size, hist_size)
+ );
+ }
+
+ if (do_stateless) {
+ BENCHMARK(&start, BENCHMARK_TIME,
+ semi_dyn_stateless_perf(&stream, inbuf, infile_size, outbuf,
+ outbuf_size, segment_size, hist_size));
+ }
+
+ if (stream.avail_in != 0) {
+ printf("Could not compress all of inbuf\n");
+ ret = 1;
+ }
+
+ printf(" file %s - in_size=%lu out_size=%d iter=%d ratio=%3.1f%%\n", argv[optind],
+ infile_size, stream.total_out, i, 100.0 * stream.total_out / infile_size);
+
+ printf("igzip_semi_dyn_file: ");
+ perf_print(start, (long long)infile_size);
+
+ if (out != NULL) {
+ printf("writing %s\n", out_file_name);
+ fwrite(outbuf, 1, stream.total_out, out);
+ fclose(out);
+ }
+
+ fclose(in);
+
+ if (check_output) {
+ unsigned char *inflate_buf;
+ struct inflate_state istate;
+
+ if (NULL == (inflate_buf = malloc(infile_size))) {
+ fprintf(stderr, "Can't allocate reconstruct buffer memory\n");
+ exit(0);
+ }
+ isal_inflate_init(&istate);
+ istate.next_in = outbuf;
+ istate.avail_in = stream.total_out;
+ istate.next_out = inflate_buf;
+ istate.avail_out = infile_size;
+ int check = isal_inflate(&istate);
+
+ if (memcmp(inflate_buf, inbuf, infile_size)) {
+ printf("inflate check Fail\n");
+ printf(" ret %d total_inflate=%d\n", check, istate.total_out);
+ for (i = 0; i < infile_size; i++) {
+ if (inbuf[i] != inflate_buf[i]) {
+ printf(" first diff at offset=%d\n", i);
+ break;
+ }
+ }
+ ret = 1;
+ } else
+ printf("inflate check Pass\n");
+ free(inflate_buf);
+ }
+
+ printf("End of igzip_semi_dyn_file_perf\n\n");
+ return ret;
+}
diff --git a/src/isa-l/igzip/igzip_set_long_icf_fg_04.asm b/src/isa-l/igzip/igzip_set_long_icf_fg_04.asm
new file mode 100644
index 000000000..09fcb6468
--- /dev/null
+++ b/src/isa-l/igzip/igzip_set_long_icf_fg_04.asm
@@ -0,0 +1,300 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright(c) 2011-2018 Intel Corporation All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in
+; the documentation and/or other materials provided with the
+; distribution.
+; * Neither the name of Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived
+; from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+%include "reg_sizes.asm"
+%include "lz0a_const.asm"
+%include "data_struct2.asm"
+%include "igzip_compare_types.asm"
+%define NEQ 4
+
+default rel
+
+%ifidn __OUTPUT_FORMAT__, win64
+%define arg1 rcx
+%define arg2 rdx
+%define arg3 r8
+%define arg4 r9
+%define len rdi
+%define tmp2 rdi
+%define dist rsi
+%else
+%define arg1 rdi
+%define arg2 rsi
+%define arg3 rdx
+%define arg4 rcx
+%define len r8
+%define tmp2 r8
+%define dist r9
+%endif
+
+%define next_in arg1
+%define end_processed arg2
+%define end_in arg3
+%define match_lookup arg4
+%define match_in rax
+%define match_offset r10
+%define tmp1 r11
+%define end_processed_orig r12
+%define dist_code r13
+%define tmp3 r13
+
+%define ymatch_lookup ymm0
+%define ymatch_lookup2 ymm1
+%define ylens ymm2
+%define ycmp2 ymm3
+%define ylens1 ymm4
+%define ylens2 ymm5
+%define ycmp ymm6
+%define ytmp1 ymm7
+%define ytmp2 ymm8
+%define yvect_size ymm9
+%define ymax_len ymm10
+%define ytwofiftysix ymm11
+%define ynlen_mask ymm12
+%define ydists_mask ymm13
+%define ylong_lens ymm14
+%define ylens_mask ymm15
+
+%ifidn __OUTPUT_FORMAT__, win64
+%define stack_size 10*16 + 4 * 8 + 8
+%define func(x) proc_frame x
+%macro FUNC_SAVE 0
+ alloc_stack stack_size
+ vmovdqa [rsp + 0*16], xmm6
+ vmovdqa [rsp + 1*16], xmm7
+ vmovdqa [rsp + 2*16], xmm8
+ vmovdqa [rsp + 3*16], xmm9
+ vmovdqa [rsp + 4*16], xmm10
+ vmovdqa [rsp + 5*16], xmm11
+ vmovdqa [rsp + 6*16], xmm12
+ vmovdqa [rsp + 7*16], xmm13
+ vmovdqa [rsp + 8*16], xmm14
+ vmovdqa [rsp + 9*16], xmm15
+ save_reg rsi, 10*16 + 0*8
+ save_reg rdi, 10*16 + 1*8
+ save_reg r12, 10*16 + 2*8
+ save_reg r13, 10*16 + 3*8
+ end_prolog
+%endm
+
+%macro FUNC_RESTORE 0
+ vmovdqa xmm6, [rsp + 0*16]
+ vmovdqa xmm7, [rsp + 1*16]
+ vmovdqa xmm8, [rsp + 2*16]
+ vmovdqa xmm9, [rsp + 3*16]
+ vmovdqa xmm10, [rsp + 4*16]
+ vmovdqa xmm11, [rsp + 5*16]
+ vmovdqa xmm12, [rsp + 6*16]
+ vmovdqa xmm13, [rsp + 7*16]
+ vmovdqa xmm14, [rsp + 8*16]
+ vmovdqa xmm15, [rsp + 9*16]
+
+ mov rsi, [rsp + 10*16 + 0*8]
+ mov rdi, [rsp + 10*16 + 1*8]
+ mov r12, [rsp + 10*16 + 2*8]
+ mov r13, [rsp + 10*16 + 3*8]
+ add rsp, stack_size
+%endm
+%else
+%define func(x) x: endbranch
+%macro FUNC_SAVE 0
+ push r12
+ push r13
+%endm
+
+%macro FUNC_RESTORE 0
+ pop r13
+ pop r12
+%endm
+%endif
+%define VECT_SIZE 8
+
+[bits 64]
+default rel
+section .text
+
+global set_long_icf_fg_04
+func(set_long_icf_fg_04)
+ endbranch
+ FUNC_SAVE
+
+ lea end_in, [next_in + arg3]
+ add end_processed, next_in
+ mov end_processed_orig, end_processed
+ lea tmp1, [end_processed + LA_STATELESS]
+ cmp end_in, tmp1
+ cmovg end_in, tmp1
+ sub end_processed, VECT_SIZE - 1
+ vmovdqu ylong_lens, [long_len]
+ vmovdqu ylens_mask, [len_mask]
+ vmovdqu ydists_mask, [dists_mask]
+ vmovdqu ynlen_mask, [nlen_mask]
+ vmovdqu yvect_size, [vect_size]
+ vmovdqu ymax_len, [max_len]
+ vmovdqu ytwofiftysix, [twofiftysix]
+ vmovdqu ymatch_lookup, [match_lookup]
+
+.fill_loop: ; Tahiti is a magical place
+ vmovdqu ymatch_lookup2, ymatch_lookup
+ vmovdqu ymatch_lookup, [match_lookup + ICF_CODE_BYTES * VECT_SIZE]
+
+ cmp next_in, end_processed
+ jae .end_fill
+
+.finish_entry:
+ vpand ylens, ymatch_lookup2, ylens_mask
+ vpcmpgtd ycmp, ylens, ylong_lens
+ vpmovmskb tmp1, ycmp
+
+;; Speculatively increment
+ add next_in, VECT_SIZE
+ add match_lookup, ICF_CODE_BYTES * VECT_SIZE
+
+ test tmp1, tmp1
+ jz .fill_loop
+
+ tzcnt match_offset, tmp1
+ shr match_offset, 2
+
+ lea next_in, [next_in + match_offset - VECT_SIZE]
+ lea match_lookup, [match_lookup + ICF_CODE_BYTES * (match_offset - VECT_SIZE)]
+ mov dist %+ d, [match_lookup]
+ vmovd ymatch_lookup2 %+ x, dist %+ d
+
+ mov tmp1, dist
+ shr dist, DIST_OFFSET
+ and dist, LIT_DIST_MASK
+ shr tmp1, EXTRA_BITS_OFFSET
+ lea tmp2, [dist_start]
+ mov dist %+ w, [tmp2 + 2 * dist]
+ add dist, tmp1
+
+ mov match_in, next_in
+ sub match_in, dist
+
+ mov len, 8
+ mov tmp3, end_in
+ sub tmp3, next_in
+
+ compare_y next_in, match_in, len, tmp3, tmp1, ytmp1, ytmp2
+
+ vmovd ylens1 %+ x, len %+ d
+ vpbroadcastd ylens1, ylens1 %+ x
+ vpsubd ylens1, ylens1, [increment]
+ vpaddd ylens1, ylens1, [twofiftyfour]
+
+ mov tmp3, end_processed
+ sub tmp3, next_in
+ cmp len, tmp3
+ cmovg len, tmp3
+
+ add next_in, len
+ lea match_lookup, [match_lookup + ICF_CODE_BYTES * len]
+ vmovdqu ymatch_lookup, [match_lookup]
+
+ vpbroadcastd ymatch_lookup2, ymatch_lookup2 %+ x
+ vpand ymatch_lookup2, ymatch_lookup2, ynlen_mask
+
+ neg len
+
+.update_match_lookup:
+ vpand ylens2, ylens_mask, [match_lookup + ICF_CODE_BYTES * len]
+
+ vpcmpgtd ycmp, ylens1, ylens2
+ vpcmpgtd ytmp1, ylens1, ytwofiftysix
+ vpand ycmp, ycmp, ytmp1
+ vpmovmskb tmp1, ycmp
+
+ vpcmpgtd ycmp2, ylens1, ymax_len
+ vpandn ylens, ycmp2, ylens1
+ vpand ycmp2, ymax_len, ycmp2
+ vpor ylens, ycmp2
+
+ vpaddd ylens2, ylens, ymatch_lookup2
+ vpand ylens2, ylens2, ycmp
+
+ vpmaskmovd [match_lookup + ICF_CODE_BYTES * len], ycmp, ylens2
+
+ test tmp1 %+ d, tmp1 %+ d
+ jz .fill_loop
+
+ add len, VECT_SIZE
+ vpsubd ylens1, ylens1, yvect_size
+
+ jmp .update_match_lookup
+
+.end_fill:
+ mov end_processed, end_processed_orig
+ cmp next_in, end_processed
+ jge .finish
+
+ mov tmp1, end_processed
+ sub tmp1, next_in
+ vmovd ytmp1 %+ x, tmp1 %+ d
+ vpbroadcastd ytmp1, ytmp1 %+ x
+ vpcmpgtd ytmp1, ytmp1, [increment]
+ vpand ymatch_lookup2, ymatch_lookup2, ytmp1
+ jmp .finish_entry
+
+.finish:
+ FUNC_RESTORE
+ ret
+
+endproc_frame
+
+section .data
+align 64
+dist_start:
+ dw 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0007, 0x0009, 0x000d
+ dw 0x0011, 0x0019, 0x0021, 0x0031, 0x0041, 0x0061, 0x0081, 0x00c1
+ dw 0x0101, 0x0181, 0x0201, 0x0301, 0x0401, 0x0601, 0x0801, 0x0c01
+ dw 0x1001, 0x1801, 0x2001, 0x3001, 0x4001, 0x6001, 0x0000, 0x0000
+len_mask:
+ dd LIT_LEN_MASK, LIT_LEN_MASK, LIT_LEN_MASK, LIT_LEN_MASK
+ dd LIT_LEN_MASK, LIT_LEN_MASK, LIT_LEN_MASK, LIT_LEN_MASK
+dists_mask:
+ dd LIT_DIST_MASK, LIT_DIST_MASK, LIT_DIST_MASK, LIT_DIST_MASK
+ dd LIT_DIST_MASK, LIT_DIST_MASK, LIT_DIST_MASK, LIT_DIST_MASK
+long_len:
+ dd 0x105, 0x105, 0x105, 0x105, 0x105, 0x105, 0x105, 0x105
+increment:
+ dd 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7
+vect_size:
+ dd VECT_SIZE, VECT_SIZE, VECT_SIZE, VECT_SIZE
+ dd VECT_SIZE, VECT_SIZE, VECT_SIZE, VECT_SIZE
+twofiftyfour:
+ dd 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe
+twofiftysix:
+ dd 0x100, 0x100, 0x100, 0x100, 0x100, 0x100, 0x100, 0x100
+nlen_mask:
+ dd 0xfffffc00, 0xfffffc00, 0xfffffc00, 0xfffffc00
+ dd 0xfffffc00, 0xfffffc00, 0xfffffc00, 0xfffffc00
+max_len:
+ dd 0xfe + 0x102, 0xfe + 0x102, 0xfe + 0x102, 0xfe + 0x102
+ dd 0xfe + 0x102, 0xfe + 0x102, 0xfe + 0x102, 0xfe + 0x102
diff --git a/src/isa-l/igzip/igzip_set_long_icf_fg_06.asm b/src/isa-l/igzip/igzip_set_long_icf_fg_06.asm
new file mode 100644
index 000000000..3152ef427
--- /dev/null
+++ b/src/isa-l/igzip/igzip_set_long_icf_fg_06.asm
@@ -0,0 +1,372 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright(c) 2011-2018 Intel Corporation All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in
+; the documentation and/or other materials provided with the
+; distribution.
+; * Neither the name of Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived
+; from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+%include "reg_sizes.asm"
+%include "lz0a_const.asm"
+%include "data_struct2.asm"
+%include "igzip_compare_types.asm"
+%define NEQ 4
+
+%ifdef HAVE_AS_KNOWS_AVX512
+%ifidn __OUTPUT_FORMAT__, win64
+%define arg1 rcx
+%define arg2 rdx
+%define arg3 r8
+%define arg4 r9
+%define len rdi
+%define dist rsi
+%else
+%define arg1 rdi
+%define arg2 rsi
+%define arg3 rdx
+%define arg4 rcx
+%define len r8
+%define dist r9
+%endif
+
+%define next_in arg1
+%define end_processed arg2
+%define end_in arg3
+%define match_lookup arg4
+%define match_in rax
+%define match_offset r10
+%define tmp1 r11
+%define end_processed_orig r12
+%define dist_code r13
+%define tmp2 r13
+
+%define zmatch_lookup zmm0
+%define zmatch_lookup2 zmm1
+%define zlens zmm2
+%define zdist_codes zmm3
+%define zdist_extras zmm4
+%define zdists zmm5
+%define zdists2 zmm6
+%define zlens1 zmm7
+%define zlens2 zmm8
+%define zlookup zmm9
+%define zlookup2 zmm10
+%define datas zmm11
+%define ztmp1 zmm12
+%define ztmp2 zmm13
+%define zvect_size zmm16
+%define zmax_len zmm17
+%define ztwofiftyfour zmm18
+%define ztwofiftysix zmm19
+%define ztwosixtytwo zmm20
+%define znlen_mask zmm21
+%define zbswap zmm22
+%define zqword_shuf zmm23
+%define zdatas_perm3 zmm24
+%define zdatas_perm2 zmm25
+%define zincrement zmm26
+%define zdists_mask zmm27
+%define zdists_start zmm28
+%define zlong_lens2 zmm29
+%define zlong_lens zmm30
+%define zlens_mask zmm31
+
+%ifidn __OUTPUT_FORMAT__, win64
+%define stack_size 8*16 + 4 * 8 + 8
+%define func(x) proc_frame x
+%macro FUNC_SAVE 0
+ alloc_stack stack_size
+ vmovdqa [rsp + 0*16], xmm6
+ vmovdqa [rsp + 1*16], xmm7
+ vmovdqa [rsp + 2*16], xmm8
+ vmovdqa [rsp + 3*16], xmm9
+ vmovdqa [rsp + 4*16], xmm10
+ vmovdqa [rsp + 5*16], xmm11
+ vmovdqa [rsp + 6*16], xmm12
+ vmovdqa [rsp + 7*16], xmm13
+ save_reg rsi, 8*16 + 0*8
+ save_reg rdi, 8*16 + 1*8
+ save_reg r12, 8*16 + 2*8
+ save_reg r13, 8*16 + 3*8
+ end_prolog
+%endm
+
+%macro FUNC_RESTORE 0
+ vmovdqa xmm6, [rsp + 0*16]
+ vmovdqa xmm7, [rsp + 1*16]
+ vmovdqa xmm8, [rsp + 2*16]
+ vmovdqa xmm9, [rsp + 3*16]
+ vmovdqa xmm10, [rsp + 4*16]
+ vmovdqa xmm11, [rsp + 5*16]
+ vmovdqa xmm12, [rsp + 6*16]
+ vmovdqa xmm13, [rsp + 7*16]
+
+ mov rsi, [rsp + 8*16 + 0*8]
+ mov rdi, [rsp + 8*16 + 1*8]
+ mov r12, [rsp + 8*16 + 2*8]
+ mov r13, [rsp + 8*16 + 3*8]
+ add rsp, stack_size
+%endm
+%else
+%define func(x) x: endbranch
+%macro FUNC_SAVE 0
+ push r12
+ push r13
+%endm
+
+%macro FUNC_RESTORE 0
+ pop r13
+ pop r12
+%endm
+%endif
+%define VECT_SIZE 16
+
+[bits 64]
+default rel
+section .text
+
+global set_long_icf_fg_06
+func(set_long_icf_fg_06)
+ endbranch
+ FUNC_SAVE
+
+ lea end_in, [next_in + arg3]
+ add end_processed, next_in
+ mov end_processed_orig, end_processed
+ lea tmp1, [end_processed + LA_STATELESS]
+ cmp end_in, tmp1
+ cmovg end_in, tmp1
+ sub end_processed, 15
+ vpbroadcastd zlong_lens, [long_len]
+ vpbroadcastd zlong_lens2, [long_len2]
+ vpbroadcastd zlens_mask, [len_mask]
+ vmovdqu16 zdists_start, [dist_start]
+ vpbroadcastd zdists_mask, [dists_mask]
+ vmovdqu32 zincrement, [increment]
+ vbroadcasti64x2 zdatas_perm2, [datas_perm2]
+ vbroadcasti64x2 zdatas_perm3, [datas_perm3]
+ vmovdqu64 zqword_shuf, [qword_shuf]
+ vbroadcasti64x2 zbswap, [bswap_shuf]
+ vpbroadcastd znlen_mask, [nlen_mask]
+ vpbroadcastd zvect_size, [vect_size]
+ vpbroadcastd zmax_len, [max_len]
+ vpbroadcastd ztwofiftyfour, [twofiftyfour]
+ vpbroadcastd ztwofiftysix, [twofiftysix]
+ vpbroadcastd ztwosixtytwo, [twosixtytwo]
+ vmovdqu32 zmatch_lookup, [match_lookup]
+
+.fill_loop: ; Tahiti is a magical place
+ vmovdqu32 zmatch_lookup2, zmatch_lookup
+ vmovdqu32 zmatch_lookup, [match_lookup + ICF_CODE_BYTES * VECT_SIZE]
+
+ cmp next_in, end_processed
+ jae .end_fill
+
+.finish_entry:
+ vpandd zlens, zmatch_lookup2, zlens_mask
+ vpcmpgtd k3, zlens, zlong_lens
+
+;; Speculatively increment
+ add next_in, VECT_SIZE
+ add match_lookup, ICF_CODE_BYTES * VECT_SIZE
+
+ ktestw k3, k3
+ jz .fill_loop
+
+ vpsrld zdist_codes, zmatch_lookup2, DIST_OFFSET
+ vpmovdw zdists %+ y, zdist_codes ; Relies on perm working mod 32
+ vpermw zdists, zdists, zdists_start
+ vpmovzxwd zdists, zdists %+ y
+
+ vpsrld zdist_extras, zmatch_lookup2, EXTRA_BITS_OFFSET
+ vpsubd zdist_extras, zincrement, zdist_extras
+
+ vpsubd zdists, zdist_extras, zdists
+ vextracti32x8 zdists2 %+ y, zdists, 1
+ kmovb k6, k3
+ kshiftrw k7, k3, 8
+ vpgatherdq zlens1 {k6}, [next_in + zdists %+ y - 8]
+ vpgatherdq zlens2 {k7}, [next_in + zdists2 %+ y - 8]
+
+ vmovdqu8 datas %+ y, [next_in - 8]
+ vpermq zlookup, zdatas_perm2, datas
+ vpshufb zlookup, zlookup, zqword_shuf
+ vpermq zlookup2, zdatas_perm3, datas
+ vpshufb zlookup2, zlookup2, zqword_shuf
+
+ vpxorq zlens1, zlens1, zlookup
+ vpxorq zlens2, zlens2, zlookup2
+
+ vpshufb zlens1, zlens1, zbswap
+ vpshufb zlens2, zlens2, zbswap
+ vplzcntq zlens1, zlens1
+ vplzcntq zlens2, zlens2
+ vpmovqd zlens1 %+ y, zlens1
+ vpmovqd zlens2 %+ y, zlens2
+ vinserti32x8 zlens1, zlens2 %+ y, 1
+ vpsrld zlens1 {k3}{z}, zlens1, 3
+
+ vpandd zmatch_lookup2 {k3}{z}, zmatch_lookup2, znlen_mask
+ vpaddd zmatch_lookup2 {k3}{z}, zmatch_lookup2, ztwosixtytwo
+ vpaddd zmatch_lookup2 {k3}{z}, zmatch_lookup2, zlens1
+
+ vmovdqu32 [match_lookup - ICF_CODE_BYTES * VECT_SIZE] {k3}, zmatch_lookup2
+
+ vpcmpgtd k3, zlens1, zlong_lens2
+ ktestw k3, k3
+ jz .fill_loop
+
+ vpsubd zdists, zincrement, zdists
+
+ vpcompressd zdists2 {k3}, zdists
+ vpcompressd zmatch_lookup2 {k3}, zmatch_lookup2
+ kmovq match_offset, k3
+ tzcnt match_offset, match_offset
+
+ vmovd dist %+ d, zdists2 %+ x
+ lea next_in, [next_in + match_offset - VECT_SIZE]
+ lea match_lookup, [match_lookup + ICF_CODE_BYTES * (match_offset - VECT_SIZE)]
+ mov match_in, next_in
+ sub match_in, dist
+
+ mov len, 16
+ mov tmp2, end_in
+ sub tmp2, next_in
+
+ compare_z next_in, match_in, len, tmp2, tmp1, k3, ztmp1, ztmp2
+
+ vpbroadcastd zlens1, len %+ d
+ vpsubd zlens1, zlens1, zincrement
+ vpaddd zlens1, zlens1, ztwofiftyfour
+
+ mov tmp2, end_processed
+ sub tmp2, next_in
+ cmp len, tmp2
+ cmovg len, tmp2
+
+ add next_in, len
+ lea match_lookup, [match_lookup + ICF_CODE_BYTES * len]
+ vmovdqu32 zmatch_lookup, [match_lookup]
+
+ vpbroadcastd zmatch_lookup2, zmatch_lookup2 %+ x
+ vpandd zmatch_lookup2, zmatch_lookup2, znlen_mask
+
+ neg len
+
+.update_match_lookup:
+ vpandd zlens2, zlens_mask, [match_lookup + ICF_CODE_BYTES * len]
+ vpcmpgtd k3, zlens1, zlens2
+ vpcmpgtd k4, zlens1, ztwofiftysix
+ kandw k3, k3, k4
+
+ vpcmpgtd k4, zlens1, zmax_len
+ vmovdqu32 zlens, zlens1
+ vmovdqu32 zlens {k4}, zmax_len
+
+ vpaddd zlens2 {k3}{z}, zlens, zmatch_lookup2
+
+ vmovdqu32 [match_lookup + ICF_CODE_BYTES * len] {k3}, zlens2
+
+ knotw k3, k3
+ ktestw k3, k3
+ jnz .fill_loop
+
+ add len, VECT_SIZE
+ vpsubd zlens1, zlens1, zvect_size
+
+ jmp .update_match_lookup
+
+.end_fill:
+ mov end_processed, end_processed_orig
+ cmp next_in, end_processed
+ jge .finish
+
+ mov tmp1, end_processed
+ sub tmp1, next_in
+ vpbroadcastd ztmp1, tmp1 %+ d
+ vpcmpd k3, ztmp1, zincrement, 6
+ vmovdqu32 zmatch_lookup2 {k3}{z}, zmatch_lookup2
+ jmp .finish_entry
+
+.finish:
+
+ FUNC_RESTORE
+ ret
+
+endproc_frame
+
+section .data
+align 64
+;; 64 byte data
+dist_start:
+ dw 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0007, 0x0009, 0x000d
+ dw 0x0011, 0x0019, 0x0021, 0x0031, 0x0041, 0x0061, 0x0081, 0x00c1
+ dw 0x0101, 0x0181, 0x0201, 0x0301, 0x0401, 0x0601, 0x0801, 0x0c01
+ dw 0x1001, 0x1801, 0x2001, 0x3001, 0x4001, 0x6001, 0x0000, 0x0000
+qword_shuf:
+ db 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7
+ db 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8
+ db 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9
+ db 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa
+ db 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb
+ db 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc
+ db 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd
+ db 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe
+ db 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf
+
+;; 16 byte data
+increment:
+ dd 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7
+ dd 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf
+
+datas_perm2:
+ dq 0x0, 0x1
+datas_perm3:
+ dq 0x1, 0x2
+bswap_shuf:
+ db 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00
+ db 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08
+
+;; 4 byte data
+len_mask:
+ dd LIT_LEN_MASK
+dists_mask:
+ dd LIT_DIST_MASK
+long_len:
+ dd 0x105
+long_len2:
+ dd 0x7
+max_len:
+ dd 0xfe + 0x102
+vect_size:
+ dd VECT_SIZE
+twofiftyfour:
+ dd 0xfe
+twofiftysix:
+ dd 0x100
+twosixtytwo:
+ dd 0x106
+nlen_mask:
+ dd 0xfffffc00
+%endif
diff --git a/src/isa-l/igzip/igzip_sync_flush_example.c b/src/isa-l/igzip/igzip_sync_flush_example.c
new file mode 100644
index 000000000..0351d5c01
--- /dev/null
+++ b/src/isa-l/igzip/igzip_sync_flush_example.c
@@ -0,0 +1,86 @@
+/**********************************************************************
+ Copyright(c) 2011-2016 Intel Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <assert.h>
+#include "igzip_lib.h"
+
+#define BUF_SIZE 8 * 1024
+
+struct isal_zstream stream;
+
+int main(int argc, char *argv[])
+{
+ uint8_t inbuf[BUF_SIZE], outbuf[BUF_SIZE];
+ FILE *in, *out;
+
+ if (argc != 3) {
+ fprintf(stderr, "Usage: igzip_sync_flush_example infile outfile\n");
+ exit(0);
+ }
+ in = fopen(argv[1], "rb");
+ if (!in) {
+ fprintf(stderr, "Can't open %s for reading\n", argv[1]);
+ exit(0);
+ }
+ out = fopen(argv[2], "wb");
+ if (!out) {
+ fprintf(stderr, "Can't open %s for writing\n", argv[2]);
+ exit(0);
+ }
+
+ printf("igzip_sync_flush_example\nWindow Size: %d K\n", IGZIP_HIST_SIZE / 1024);
+ fflush(0);
+
+ isal_deflate_init(&stream);
+ stream.end_of_stream = 0;
+ stream.flush = SYNC_FLUSH;
+
+ do {
+ if (stream.internal_state.state == ZSTATE_NEW_HDR) {
+ stream.avail_in = (uint32_t) fread(inbuf, 1, BUF_SIZE, in);
+ stream.end_of_stream = feof(in) ? 1 : 0;
+ stream.next_in = inbuf;
+ }
+ do {
+ stream.avail_out = BUF_SIZE;
+ stream.next_out = outbuf;
+ isal_deflate(&stream);
+ fwrite(outbuf, 1, BUF_SIZE - stream.avail_out, out);
+ } while (stream.avail_out == 0);
+
+ } while (stream.internal_state.state != ZSTATE_END);
+
+ fclose(out);
+ fclose(in);
+
+ printf("End of igzip_sync_flush_example\n\n");
+ return 0;
+}
diff --git a/src/isa-l/igzip/igzip_update_histogram.asm b/src/isa-l/igzip/igzip_update_histogram.asm
new file mode 100644
index 000000000..698c8be99
--- /dev/null
+++ b/src/isa-l/igzip/igzip_update_histogram.asm
@@ -0,0 +1,579 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright(c) 2011-2018 Intel Corporation All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in
+; the documentation and/or other materials provided with the
+; distribution.
+; * Neither the name of Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived
+; from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+%include "options.asm"
+
+%include "lz0a_const.asm"
+%include "data_struct2.asm"
+%include "bitbuf2.asm"
+%include "huffman.asm"
+%include "igzip_compare_types.asm"
+%include "reg_sizes.asm"
+
+%include "stdmac.asm"
+
+extern rfc1951_lookup_table
+_len_to_code_offset equ 0
+
+%define LAST_BYTES_COUNT 3 ; Bytes to prevent reading out of array bounds
+%define LA_STATELESS 280 ; Max number of bytes read in loop2 rounded up to 8 byte boundary
+%define LIT_LEN 286
+%define DIST_LEN 30
+%define HIST_ELEM_SIZE 8
+
+%ifdef DEBUG
+%macro MARK 1
+global %1
+%1:
+%endm
+%else
+%macro MARK 1
+%endm
+%endif
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+%define file_start rdi
+%define file_length rsi
+%define histogram rdx
+%define rfc_lookup r9
+%define f_i r10
+
+%define curr_data rax
+
+%define tmp2 rcx
+
+%define dist rbx
+%define dist_code2 rbx
+
+%define dist2 r12
+%define dist_code r12
+
+%define len rbp
+%define len_code rbp
+%define hash3 rbp
+
+%define curr_data2 r8
+%define len2 r8
+%define tmp4 r8
+
+%define tmp1 r11
+
+%define tmp3 r13
+
+%define hash r14
+
+%define hash2 r15
+
+%define xtmp0 xmm0
+%define xtmp1 xmm1
+%define xdata xmm2
+
+%define ytmp0 ymm0
+%define ytmp1 ymm1
+
+%if(ARCH == 01)
+%define vtmp0 xtmp0
+%define vtmp1 xtmp1
+%define V_LENGTH 16
+%else
+%define vtmp0 ytmp0
+%define vtmp1 ytmp1
+%define V_LENGTH 32
+%endif
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+_eob_count_offset equ 0 ; local variable (8 bytes)
+f_end_i_mem_offset equ 8
+gpr_save_mem_offset equ 16 ; gpr save area (8*8 bytes)
+xmm_save_mem_offset equ 16 + 8*8 ; xmm save area (4*16 bytes) (16 byte aligned)
+stack_size equ 2*8 + 8*8 + 4*16 + 8
+;;; 8 because stack address is odd multiple of 8 after a function call and
+;;; we want it aligned to 16 bytes
+
+%ifidn __OUTPUT_FORMAT__, elf64
+%define arg0 rdi
+%define arg1 rsi
+%define arg2 rdx
+
+%macro FUNC_SAVE 0
+%ifdef ALIGN_STACK
+ push rbp
+ mov rbp, rsp
+ sub rsp, stack_size
+ and rsp, ~15
+%else
+ sub rsp, stack_size
+%endif
+
+ mov [rsp + gpr_save_mem_offset + 0*8], rbx
+ mov [rsp + gpr_save_mem_offset + 1*8], rbp
+ mov [rsp + gpr_save_mem_offset + 2*8], r12
+ mov [rsp + gpr_save_mem_offset + 3*8], r13
+ mov [rsp + gpr_save_mem_offset + 4*8], r14
+ mov [rsp + gpr_save_mem_offset + 5*8], r15
+%endm
+
+%macro FUNC_RESTORE 0
+ mov rbx, [rsp + gpr_save_mem_offset + 0*8]
+ mov rbp, [rsp + gpr_save_mem_offset + 1*8]
+ mov r12, [rsp + gpr_save_mem_offset + 2*8]
+ mov r13, [rsp + gpr_save_mem_offset + 3*8]
+ mov r14, [rsp + gpr_save_mem_offset + 4*8]
+ mov r15, [rsp + gpr_save_mem_offset + 5*8]
+
+%ifndef ALIGN_STACK
+ add rsp, stack_size
+%else
+ mov rsp, rbp
+ pop rbp
+%endif
+%endm
+%endif
+
+%ifidn __OUTPUT_FORMAT__, win64
+%define arg0 rcx
+%define arg1 rdx
+%define arg2 r8
+
+%macro FUNC_SAVE 0
+%ifdef ALIGN_STACK
+ push rbp
+ mov rbp, rsp
+ sub rsp, stack_size
+ and rsp, ~15
+%else
+ sub rsp, stack_size
+%endif
+
+ mov [rsp + gpr_save_mem_offset + 0*8], rbx
+ mov [rsp + gpr_save_mem_offset + 1*8], rsi
+ mov [rsp + gpr_save_mem_offset + 2*8], rdi
+ mov [rsp + gpr_save_mem_offset + 3*8], rbp
+ mov [rsp + gpr_save_mem_offset + 4*8], r12
+ mov [rsp + gpr_save_mem_offset + 5*8], r13
+ mov [rsp + gpr_save_mem_offset + 6*8], r14
+ mov [rsp + gpr_save_mem_offset + 7*8], r15
+%endm
+
+%macro FUNC_RESTORE 0
+ mov rbx, [rsp + gpr_save_mem_offset + 0*8]
+ mov rsi, [rsp + gpr_save_mem_offset + 1*8]
+ mov rdi, [rsp + gpr_save_mem_offset + 2*8]
+ mov rbp, [rsp + gpr_save_mem_offset + 3*8]
+ mov r12, [rsp + gpr_save_mem_offset + 4*8]
+ mov r13, [rsp + gpr_save_mem_offset + 5*8]
+ mov r14, [rsp + gpr_save_mem_offset + 6*8]
+ mov r15, [rsp + gpr_save_mem_offset + 7*8]
+
+%ifndef ALIGN_STACK
+ add rsp, stack_size
+%else
+ mov rsp, rbp
+ pop rbp
+%endif
+%endm
+%endif
+
+
+_lit_len_offset equ 0
+_dist_offset equ (8 * LIT_LEN)
+_hash_offset equ (_dist_offset + 8 * DIST_LEN)
+
+
+%macro len_to_len_code 3
+%define %%len_code %1 ; Output
+%define %%len %2 ; Input
+%define %%rfc_lookup %3
+ movzx %%len_code, byte [%%rfc_lookup + _len_to_code_offset + %%len]
+ or %%len_code, 0x100
+%endm
+
+;;; Clobbers rcx and dist
+%macro dist_to_dist_code 2
+%define %%dist_code %1 ; Output code associated with dist
+%define %%dist_coded %1d
+%define %%dist %2d ; Input dist
+ dec %%dist
+ mov %%dist_coded, %%dist
+ bsr ecx, %%dist_coded
+ dec ecx
+ SHRX %%dist_code, %%dist_code, rcx
+ lea %%dist_coded, [%%dist_coded + 2*ecx]
+
+ cmp %%dist, 1
+ cmovle %%dist_coded, %%dist
+%endm
+
+;;; Clobbers rcx and dist
+%macro dist_to_dist_code2 2
+%define %%dist_code %1 ; Output code associated with dist
+%define %%dist_coded %1d
+%define %%dist %2d ; Input -(dist - 1)
+ neg %%dist
+ mov %%dist_coded, %%dist
+ bsr ecx, %%dist_coded
+ dec ecx
+ SHRX %%dist_code, %%dist_code, rcx
+ lea %%dist_coded, [%%dist_coded + 2*ecx]
+
+ cmp %%dist, 1
+ cmovle %%dist_coded, %%dist
+%endm
+
+[bits 64]
+default rel
+section .text
+
+; void isal_update_histogram
+global isal_update_histogram_ %+ ARCH
+isal_update_histogram_ %+ ARCH %+ :
+ endbranch
+ FUNC_SAVE
+
+%ifnidn file_start, arg0
+ mov file_start, arg0
+%endif
+%ifnidn file_length, arg1
+ mov file_length, arg1
+%endif
+%ifnidn histogram, arg2
+ mov histogram, arg2
+%endif
+ mov f_i, 0
+ cmp file_length, 0
+ je exit_ret ; If nothing to do then exit
+
+ mov tmp1, qword [histogram + _lit_len_offset + 8*256]
+ inc tmp1
+ mov [rsp + _eob_count_offset], tmp1
+
+ lea rfc_lookup, [rfc1951_lookup_table]
+
+ ;; Init hash_table
+ PXOR vtmp0, vtmp0, vtmp0
+ mov rcx, (IGZIP_LVL0_HASH_SIZE - V_LENGTH)
+init_hash_table:
+ MOVDQU [histogram + _hash_offset + 2 * rcx], vtmp0
+ MOVDQU [histogram + _hash_offset + 2 * (rcx + V_LENGTH / 2)], vtmp0
+ sub rcx, V_LENGTH
+ jge init_hash_table
+
+ sub file_length, LA_STATELESS
+ cmp file_length, 0
+ jle end_loop_2
+
+
+ ;; Load first literal into histogram
+ mov curr_data, [file_start + f_i]
+ compute_hash hash, curr_data
+ and hash %+ d, LVL0_HASH_MASK
+ mov [histogram + _hash_offset + 2 * hash], f_i %+ w
+ and curr_data, 0xff
+ inc qword [histogram + _lit_len_offset + HIST_ELEM_SIZE * curr_data]
+ inc f_i
+
+ ;; Setup to begin loop 2
+ MOVDQU xdata, [file_start + f_i]
+ mov curr_data, [file_start + f_i]
+ mov curr_data2, curr_data
+ compute_hash hash, curr_data
+ shr curr_data2, 8
+ compute_hash hash2, curr_data2
+
+ and hash2 %+ d, LVL0_HASH_MASK
+ and hash, LVL0_HASH_MASK
+loop2:
+ xor dist, dist
+ xor dist2, dist2
+ xor tmp3, tmp3
+
+ lea tmp1, [file_start + f_i]
+
+ MOVQ curr_data, xdata
+ PSRLDQ xdata, 1
+
+ ;; Load possible look back distances and update hash data
+ mov dist %+ w, f_i %+ w
+ sub dist, 1
+ sub dist %+ w, word [histogram + _hash_offset + 2 * hash]
+ mov [histogram + _hash_offset + 2 * hash], f_i %+ w
+
+ add f_i, 1
+
+ mov dist2 %+ w, f_i %+ w
+ sub dist2, 1
+ sub dist2 %+ w, word [histogram + _hash_offset + 2 * hash2]
+ mov [histogram + _hash_offset + 2 * hash2], f_i %+ w
+
+ ;; Start computing hashes to be used in either the next loop or
+ ;; for updating the hash if a match is found
+ MOVQ curr_data2, xdata
+ MOVQ tmp2, xdata
+ shr curr_data2, 8
+ compute_hash hash, curr_data2
+
+ ;; Check if look back distances are valid. Load a junk distance of 1
+ ;; if the look back distance is too long for speculative lookups.
+ and dist %+ d, (D-1)
+ neg dist
+
+ and dist2 %+ d, (D-1)
+ neg dist2
+
+ shr tmp2, 16
+ compute_hash hash2, tmp2
+
+ ;; Check for long len/dist matches (>7)
+ mov len, curr_data
+ xor len, [tmp1 + dist - 1]
+ jz compare_loop
+
+ and hash %+ d, LVL0_HASH_MASK
+ and hash2 %+ d, LVL0_HASH_MASK
+
+ MOVQ len2, xdata
+ xor len2, [tmp1 + dist2]
+ jz compare_loop2
+
+ ;; Specutively load the code for the first literal
+ movzx tmp1, curr_data %+ b
+ shr curr_data, 8
+
+ lea tmp3, [f_i + 1]
+
+ ;; Check for len/dist match for first literal
+ test len %+ d, 0xFFFFFFFF
+ jz len_dist_huffman_pre
+
+ ;; Store first literal
+ inc qword [histogram + _lit_len_offset + HIST_ELEM_SIZE * tmp1]
+
+ ;; Check for len/dist match for second literal
+ test len2 %+ d, 0xFFFFFFFF
+ jnz lit_lit_huffman
+len_dist_lit_huffman_pre:
+ ;; Calculate repeat length
+ tzcnt len2, len2
+ shr len2, 3
+
+len_dist_lit_huffman:
+ MOVQ curr_data, xdata
+ shr curr_data, 24
+ compute_hash hash3, curr_data
+
+ ;; Store updated hashes
+ mov [histogram + _hash_offset + 2 * hash], tmp3 %+ w
+ add tmp3,1
+ mov [histogram + _hash_offset + 2 * hash2], tmp3 %+ w
+ add tmp3, 1
+
+ add f_i, len2
+
+ MOVDQU xdata, [file_start + f_i]
+ mov curr_data, [file_start + f_i]
+ mov tmp1, curr_data
+ compute_hash hash, curr_data
+
+ and hash3, LVL0_HASH_MASK
+ mov [histogram + _hash_offset + 2 * hash3], tmp3 %+ w
+
+ dist_to_dist_code2 dist_code2, dist2
+
+ len_to_len_code len_code, len2, rfc_lookup
+
+ shr tmp1, 8
+ compute_hash hash2, tmp1
+
+ inc qword [histogram + _lit_len_offset + HIST_ELEM_SIZE * len_code]
+ inc qword [histogram + _dist_offset + HIST_ELEM_SIZE * dist_code2]
+
+ and hash2 %+ d, LVL0_HASH_MASK
+ and hash, LVL0_HASH_MASK
+
+ cmp f_i, file_length
+ jl loop2
+ jmp end_loop_2
+ ;; encode as dist/len
+
+len_dist_huffman_pre:
+ tzcnt len, len
+ shr len, 3
+
+len_dist_huffman:
+ mov [histogram + _hash_offset + 2 * hash], tmp3 %+ w
+ add tmp3,1
+ mov [histogram + _hash_offset + 2 * hash2], tmp3 %+ w
+
+ dec f_i
+ add f_i, len
+
+ MOVDQU xdata, [file_start + f_i]
+ mov curr_data, [file_start + f_i]
+ mov tmp1, curr_data
+ compute_hash hash, curr_data
+
+ dist_to_dist_code2 dist_code, dist
+
+ len_to_len_code len_code, len, rfc_lookup
+
+ shr tmp1, 8
+ compute_hash hash2, tmp1
+
+ inc qword [histogram + _lit_len_offset + HIST_ELEM_SIZE * len_code]
+ inc qword [histogram + _dist_offset + HIST_ELEM_SIZE * dist_code]
+
+ and hash2 %+ d, LVL0_HASH_MASK
+ and hash, LVL0_HASH_MASK
+
+ cmp f_i, file_length
+ jl loop2
+ jmp end_loop_2
+
+lit_lit_huffman:
+ MOVDQU xdata, [file_start + f_i + 1]
+ and curr_data, 0xff
+ add f_i, 1
+ inc qword [histogram + _lit_len_offset + HIST_ELEM_SIZE * curr_data]
+
+ cmp f_i, file_length
+ jl loop2
+
+end_loop_2:
+ add file_length, LA_STATELESS - LAST_BYTES_COUNT
+ cmp f_i, file_length
+ jge final_bytes
+
+loop2_finish:
+ mov curr_data %+ d, dword [file_start + f_i]
+ compute_hash hash, curr_data
+ and hash %+ d, LVL0_HASH_MASK
+
+ ;; Calculate possible distance for length/dist pair.
+ xor dist, dist
+ mov dist %+ w, f_i %+ w
+ sub dist %+ w, word [histogram + _hash_offset + 2 * hash]
+ mov [histogram + _hash_offset + 2 * hash], f_i %+ w
+
+ ;; Check if look back distance is valid (the dec is to handle when dist = 0)
+ dec dist
+ cmp dist %+ d, (D-1)
+ jae encode_literal_finish
+ inc dist
+
+ ;; Check if look back distance is a match
+ lea tmp4, [file_length + LAST_BYTES_COUNT]
+ sub tmp4, f_i
+ lea tmp1, [file_start + f_i]
+ mov tmp2, tmp1
+ sub tmp2, dist
+ compare tmp4, tmp1, tmp2, len, tmp3
+
+ ;; Limit len to maximum value of 258
+ mov tmp2, 258
+ cmp len, 258
+ cmova len, tmp2
+ cmp len, SHORTEST_MATCH
+ jb encode_literal_finish
+
+ add f_i, len
+
+ len_to_len_code len_code, len, rfc_lookup
+ dist_to_dist_code dist_code, dist
+
+ inc qword [histogram + _lit_len_offset + HIST_ELEM_SIZE * len_code]
+ inc qword [histogram + _dist_offset + HIST_ELEM_SIZE * dist_code]
+
+ cmp f_i, file_length
+ jl loop2_finish
+ jmp final_bytes
+
+encode_literal_finish:
+ ;; Encode literal
+ and curr_data %+ d, 0xFF
+ inc qword [histogram + _lit_len_offset + HIST_ELEM_SIZE * curr_data]
+
+ ;; Setup for next loop
+ add f_i, 1
+ cmp f_i, file_length
+ jl loop2_finish
+
+final_bytes:
+ add file_length, LAST_BYTES_COUNT
+final_bytes_loop:
+ cmp f_i, file_length
+ jge end
+ movzx curr_data, byte [file_start + f_i]
+ inc qword [histogram + _lit_len_offset + HIST_ELEM_SIZE * curr_data]
+ inc f_i
+ jmp final_bytes_loop
+
+end:
+ ;; Handle eob at end of stream
+ mov tmp1, [rsp + _eob_count_offset]
+ mov qword [histogram + _lit_len_offset + HIST_ELEM_SIZE * 256], tmp1
+
+exit_ret:
+ FUNC_RESTORE
+ ret
+
+compare_loop:
+ and hash %+ d, LVL0_HASH_MASK
+ and hash2 %+ d, LVL0_HASH_MASK
+ lea tmp2, [tmp1 + dist - 1]
+
+ mov len2, 250
+ mov len, 8
+ compare250 tmp1, tmp2, len, len2, tmp3, ytmp0, ytmp1
+
+ lea tmp3, [f_i + 1]
+ jmp len_dist_huffman
+
+compare_loop2:
+ add tmp1, 1
+ lea tmp2, [tmp1 + dist2 - 1]
+
+ mov len, 250
+ mov len2, 8
+ compare250 tmp1, tmp2, len2, len, tmp3, ytmp0, ytmp1
+
+ and curr_data, 0xff
+ inc qword [histogram + _lit_len_offset + 8 * curr_data]
+ lea tmp3, [f_i + 1]
+ jmp len_dist_lit_huffman
+
+section .data
+ align 32
+D_vector:
+ dw -(D + 1) & 0xFFFF, -(D + 1) & 0xFFFF, -(D + 1) & 0xFFFF, -(D + 1) & 0xFFFF
+ dw -(D + 1) & 0xFFFF, -(D + 1) & 0xFFFF, -(D + 1) & 0xFFFF, -(D + 1) & 0xFFFF
+ dw -(D + 1) & 0xFFFF, -(D + 1) & 0xFFFF, -(D + 1) & 0xFFFF, -(D + 1) & 0xFFFF
+ dw -(D + 1) & 0xFFFF, -(D + 1) & 0xFFFF, -(D + 1) & 0xFFFF, -(D + 1) & 0xFFFF
diff --git a/src/isa-l/igzip/igzip_update_histogram_01.asm b/src/isa-l/igzip/igzip_update_histogram_01.asm
new file mode 100644
index 000000000..0705a0774
--- /dev/null
+++ b/src/isa-l/igzip/igzip_update_histogram_01.asm
@@ -0,0 +1,7 @@
+%define ARCH 01
+
+%ifndef COMPARE_TYPE
+%define COMPARE_TYPE 2
+%endif
+
+%include "igzip_update_histogram.asm"
diff --git a/src/isa-l/igzip/igzip_update_histogram_04.asm b/src/isa-l/igzip/igzip_update_histogram_04.asm
new file mode 100644
index 000000000..18945b2ac
--- /dev/null
+++ b/src/isa-l/igzip/igzip_update_histogram_04.asm
@@ -0,0 +1,8 @@
+%define ARCH 04
+%define USE_HSWNI
+
+%ifndef COMPARE_TYPE
+%define COMPARE_TYPE 3
+%endif
+
+%include "igzip_update_histogram.asm"
diff --git a/src/isa-l/igzip/igzip_wrapper.h b/src/isa-l/igzip/igzip_wrapper.h
new file mode 100644
index 000000000..f1b4bce4c
--- /dev/null
+++ b/src/isa-l/igzip/igzip_wrapper.h
@@ -0,0 +1,52 @@
+/**********************************************************************
+ Copyright(c) 2011-2018 Intel Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+#ifndef IGZIP_WRAPPER_H
+
+#define DEFLATE_METHOD 8
+#define ZLIB_DICT_FLAG (1 << 5)
+#define TEXT_FLAG (1 << 0)
+#define HCRC_FLAG (1 << 1)
+#define EXTRA_FLAG (1 << 2)
+#define NAME_FLAG (1 << 3)
+#define COMMENT_FLAG (1 << 4)
+#define UNDEFINED_FLAG (-1)
+
+#define GZIP_HDR_BASE 10
+#define GZIP_EXTRA_LEN 2
+#define GZIP_HCRC_LEN 2
+#define GZIP_TRAILER_LEN 8
+
+#define ZLIB_HDR_BASE 2
+#define ZLIB_DICT_LEN 4
+#define ZLIB_INFO_OFFSET 4
+#define ZLIB_LEVEL_OFFSET 6
+#define ZLIB_TRAILER_LEN 4
+
+#endif
diff --git a/src/isa-l/igzip/igzip_wrapper_hdr_test.c b/src/isa-l/igzip/igzip_wrapper_hdr_test.c
new file mode 100644
index 000000000..57e099f33
--- /dev/null
+++ b/src/isa-l/igzip/igzip_wrapper_hdr_test.c
@@ -0,0 +1,890 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+#include "checksum_test_ref.h"
+#include "igzip_lib.h"
+#include "igzip_wrapper.h"
+
+#ifndef TEST_SEED
+# define TEST_SEED 0x1234
+#endif
+#ifndef RANDOMS
+# define RANDOMS 0x4000
+#endif
+
+#define EXTRA_SIZE_MAX 256
+#define NAME_SIZE_MAX 256
+#define COMMENT_SIZE_MAX 1024
+
+#define EXTRA_SIZE 10
+#define NAME_SIZE 25
+#define COMMENT_SIZE 192
+
+enum {
+ INVALID_WRAPPER = ISAL_INVALID_WRAPPER,
+ UNSUPPORTED_METHOD = ISAL_UNSUPPORTED_METHOD,
+ INCORRECT_CHECKSUM = ISAL_INCORRECT_CHECKSUM,
+ NO_ERROR = ISAL_DECOMP_OK,
+ END_INPUT = ISAL_END_INPUT,
+ NAME_OVERFLOW = ISAL_NAME_OVERFLOW,
+ COMMENT_OVERFLOW = ISAL_COMMENT_OVERFLOW,
+ EXTRA_OVERFLOW = ISAL_EXTRA_OVERFLOW,
+ INCORRECT_TEXT_FLAG,
+ INCORRECT_TIME,
+ INCORRECT_XFLAGS,
+ INCORRECT_OS,
+ INCORRECT_EXTRA_LEN,
+ INCORRECT_EXTRA_BUF,
+ INCORRECT_NAME,
+ INCORRECT_COMMENT,
+ INCORRECT_INFO,
+ INCORRECT_LEVEL,
+ INCORRECT_DICT_FLAG,
+ INCORRECT_DICT_ID,
+ INCORRECT_HDR_LEN,
+ INSUFFICIENT_BUFFER_SIZE,
+ INCORRECT_WRITE_RETURN,
+ MALLOC_FAILED
+};
+
+void print_error(int32_t error)
+{
+ printf("Error Code %d: ", error);
+ switch (error) {
+ case END_INPUT:
+ printf("End of input reached before header decompressed\n");
+ break;
+ case INVALID_WRAPPER:
+ printf("Invalid gzip wrapper found\n");
+ break;
+ case UNSUPPORTED_METHOD:
+ printf("Unsupported decompression method found\n");
+ break;
+ case INCORRECT_CHECKSUM:
+ printf("Incorrect header checksum found\n");
+ break;
+ case NAME_OVERFLOW:
+ printf("Name buffer overflow while decompression\n");
+ break;
+ case COMMENT_OVERFLOW:
+ printf("Comment buffer overflow while decompressing\n");
+ case EXTRA_OVERFLOW:
+ printf("Extra buffer overflow while decomrpessiong\n");
+ break;
+ case INCORRECT_TEXT_FLAG:
+ printf("Incorrect text field found\n");
+ break;
+ case INCORRECT_TIME:
+ printf("Incorrect time filed found\n");
+ break;
+ case INCORRECT_XFLAGS:
+ printf("Incorrect xflags field found\n");
+ break;
+ case INCORRECT_OS:
+ printf("Incorrect os field found\n");
+ break;
+ case INCORRECT_EXTRA_LEN:
+ printf("Incorect extra_len field found\n");
+ break;
+ case INCORRECT_EXTRA_BUF:
+ printf("Incorrect extra buffer found\n");
+ break;
+ case INCORRECT_NAME:
+ printf("Incorrect name found\n");
+ break;
+ case INCORRECT_COMMENT:
+ printf("Incorrect comment found\n");
+ break;
+ case INCORRECT_INFO:
+ printf("Incorrect info found\n");
+ break;
+ case INCORRECT_LEVEL:
+ printf("Incorrect level found\n");
+ break;
+ case INCORRECT_DICT_FLAG:
+ printf("Incorrect dictionary flag found\n");
+ break;
+ case INCORRECT_DICT_ID:
+ printf("Incorrect dictionary id found\n");
+ break;
+ case INCORRECT_HDR_LEN:
+ printf("Incorrect header length found\n");
+ break;
+ case INSUFFICIENT_BUFFER_SIZE:
+ printf("Insufficient buffer size to write header\n");
+ break;
+ case INCORRECT_WRITE_RETURN:
+ printf("Header write returned an incorrect value\n");
+ break;
+ case MALLOC_FAILED:
+ printf("Failed to allocate buffers\n");
+ break;
+ case NO_ERROR:
+ printf("No error found\n");
+ }
+}
+
+void print_uint8_t(uint8_t * array, uint64_t length, char *prepend)
+{
+ const int line_size = 16;
+ int i;
+
+ if (array == NULL)
+ printf("%s(NULL)", prepend);
+ else if (length == 0)
+ printf("%s(Empty)", prepend);
+
+ for (i = 0; i < length; i++) {
+ if (i == 0)
+ printf("%s0x%04x\t", prepend, i);
+ else if ((i % line_size) == 0)
+ printf("\n%s0x%04x\t", prepend, i);
+ else
+ printf(" ");
+ printf("0x%02x,", array[i]);
+ }
+ printf("\n");
+}
+
+void print_string(char *str, uint32_t str_max_len, char *prepend)
+{
+ const int line_size = 64;
+ uint32_t i = 0;
+
+ while (str[i] != 0 && i < str_max_len) {
+ if (i == 0)
+ printf("%s0x%04x\t", prepend, i);
+ else if ((i % line_size) == 0)
+ printf("\n%s0x%04x\t", prepend, i);
+
+ printf("%c", str[i]);
+ i++;
+ }
+ printf("\n");
+}
+
+void print_gzip_header(struct isal_gzip_header *gz_hdr, char *prepend1, char *prepend2)
+{
+ printf("%sText: %d, Time: 0x%08x, Xflags: 0x%x, OS: 0x%x\n", prepend1,
+ gz_hdr->text, gz_hdr->time, gz_hdr->xflags, gz_hdr->os);
+
+ printf("%sExtra: Extra_len = 0x%x\n", prepend1, gz_hdr->extra_len);
+ if (gz_hdr->extra_len < EXTRA_SIZE_MAX)
+ print_uint8_t(gz_hdr->extra, gz_hdr->extra_len, prepend2);
+ else
+ printf("%sExtra field larger than EXTRA_SIZE_MAX\n", prepend2);
+
+ printf("%sName:\n", prepend1);
+ if (gz_hdr->name_buf_len < NAME_SIZE_MAX)
+ print_string(gz_hdr->name, gz_hdr->name_buf_len, prepend2);
+ else
+ printf("%sName field larger than NAME_SIZE_MAX\n", prepend2);
+
+ printf("%sComment:\n", prepend1);
+ if (gz_hdr->comment_buf_len < COMMENT_SIZE_MAX)
+ print_string(gz_hdr->comment, gz_hdr->comment_buf_len, prepend2);
+ else
+ printf("%sComment field larger than COMMENT_SIZE_MAX\n", prepend2);
+}
+
+void print_zlib_header(struct isal_zlib_header *z_hdr, char *prepend)
+{
+ printf("%sInfo: 0x%x\n", prepend, z_hdr->info);
+ printf("%sLevel: 0x%x\n", prepend, z_hdr->level);
+ printf("%sDictionary: Flag = 0x%x, Id =0x%x\n", prepend, z_hdr->dict_flag,
+ z_hdr->dict_id);
+}
+
+void print_gzip_final_verbose(uint8_t * hdr_buf, uint32_t hdr_buf_len,
+ struct isal_gzip_header *gz_hdr_orig,
+ struct isal_gzip_header *gz_hdr)
+{
+#ifdef VERBOSE
+ printf("\n");
+ if (gz_hdr_orig != NULL) {
+ printf("Original Gzip Header Struct:\n");
+ print_gzip_header(gz_hdr_orig, "\t", "\t\t");
+ printf("\n");
+ }
+
+ if (gz_hdr != NULL) {
+ printf("Parsed Gzip Header Struct:\n");
+ print_gzip_header(gz_hdr, "\t", "\t\t");
+ printf("\n");
+ }
+
+ if (hdr_buf != NULL) {
+ printf("Serialized Gzip Header:\n");
+ print_uint8_t(hdr_buf, hdr_buf_len, "\t");
+ printf("\n");
+ }
+#endif
+ return;
+}
+
+void print_zlib_final_verbose(uint8_t * hdr_buf, uint32_t hdr_buf_len,
+ struct isal_zlib_header *z_hdr_orig,
+ struct isal_zlib_header *z_hdr)
+{
+#ifdef VERBOSE
+ printf("\n");
+ if (z_hdr_orig != NULL) {
+ printf("Original Zlib Header Struct:\n");
+ print_zlib_header(z_hdr_orig, "\t");
+ printf("\n");
+ }
+
+ if (z_hdr != NULL) {
+ printf("Parsed Zlib Header Struct:\n");
+ print_zlib_header(z_hdr, "\t");
+ printf("\n");
+ }
+
+ if (hdr_buf != NULL) {
+ printf("Serialized Zlib Header:\n");
+ print_uint8_t(hdr_buf, hdr_buf_len, "\t");
+ printf("\n");
+ }
+#endif
+ return;
+}
+
+int gzip_header_size(struct isal_gzip_header *gz_hdr)
+{
+ int hdr_size = 10;
+ if (gz_hdr->extra != NULL) {
+ hdr_size += 2 + gz_hdr->extra_len;
+ }
+ if (gz_hdr->name != NULL) {
+ hdr_size += strnlen(gz_hdr->name, gz_hdr->name_buf_len) + 1;
+ }
+ if (gz_hdr->comment != NULL) {
+ hdr_size += strnlen(gz_hdr->comment, gz_hdr->comment_buf_len) + 1;
+ }
+
+ if (gz_hdr->hcrc) {
+ hdr_size += 2;
+ }
+
+ return hdr_size;
+}
+
+int zlib_header_size(struct isal_zlib_header *z_hdr)
+{
+ if (z_hdr->dict_flag)
+ return 6;
+ else
+ return 2;
+}
+
+void rand_string(char *string, uint32_t str_len)
+{
+ int i;
+
+ if (str_len == 0 || string == NULL)
+ return;
+ for (i = 0; i < str_len - 1; i++) {
+ string[i] = rand() % 26 + 65;
+ }
+ string[str_len - 1] = 0;
+}
+
+void rand_buf(uint8_t * buf, uint32_t buf_len)
+{
+ int i;
+
+ if (buf_len == 0 || buf == NULL)
+ return;
+
+ for (i = 0; i < buf_len; i++) {
+ buf[i] = rand();
+ }
+}
+
+int malloc_gzip_header(struct isal_gzip_header *gz_hdr)
+{
+ gz_hdr->extra = NULL;
+ if (gz_hdr->extra_buf_len) {
+ gz_hdr->extra = malloc(gz_hdr->extra_buf_len);
+ if (gz_hdr->extra == NULL)
+ return MALLOC_FAILED;
+ }
+
+ gz_hdr->name = NULL;
+ if (gz_hdr->name_buf_len) {
+ gz_hdr->name = malloc(gz_hdr->name_buf_len);
+ if (gz_hdr->name == NULL)
+ return MALLOC_FAILED;
+ }
+
+ gz_hdr->comment = NULL;
+ if (gz_hdr->comment_buf_len) {
+ gz_hdr->comment = malloc(gz_hdr->comment_buf_len);
+ if (gz_hdr->comment == NULL)
+ return MALLOC_FAILED;
+ }
+
+ return 0;
+}
+
+void free_gzip_header(struct isal_gzip_header *gz_hdr)
+{
+ if (gz_hdr->extra != NULL) {
+ free(gz_hdr->extra);
+ gz_hdr->extra = NULL;
+ }
+
+ if (gz_hdr->name != NULL) {
+ free(gz_hdr->name);
+ gz_hdr->name = NULL;
+ }
+
+ if (gz_hdr->comment != NULL) {
+ free(gz_hdr->comment);
+ gz_hdr->comment = NULL;
+ }
+
+}
+
+int gen_rand_gzip_header(struct isal_gzip_header *gz_hdr)
+{
+ int ret = 0;
+ int field_set_space = 8;
+
+ isal_gzip_header_init(gz_hdr);
+
+ if (rand() % field_set_space != 0)
+ gz_hdr->text = rand() % 2;
+ if (rand() % field_set_space != 0)
+ gz_hdr->time = rand();
+ if (rand() % field_set_space != 0)
+ gz_hdr->xflags = rand() % 256;
+ if (rand() % field_set_space != 0)
+ gz_hdr->os = rand() % 256;
+
+ if (rand() % field_set_space != 0) {
+ gz_hdr->extra_buf_len = rand() % EXTRA_SIZE_MAX;
+ gz_hdr->extra_len = gz_hdr->extra_buf_len;
+ }
+
+ if (rand() % field_set_space != 0)
+ gz_hdr->name_buf_len = rand() % NAME_SIZE_MAX;
+
+ if (rand() % field_set_space != 0)
+ gz_hdr->comment_buf_len = rand() % COMMENT_SIZE_MAX;
+
+ gz_hdr->hcrc = rand() % 2;
+
+ ret = malloc_gzip_header(gz_hdr);
+ if (ret)
+ return ret;
+
+ rand_buf(gz_hdr->extra, gz_hdr->extra_len);
+ rand_string(gz_hdr->name, gz_hdr->name_buf_len);
+ rand_string(gz_hdr->comment, gz_hdr->comment_buf_len);
+
+ return ret;
+}
+
+void gen_rand_zlib_header(struct isal_zlib_header *z_hdr)
+{
+ z_hdr->info = rand() % 16;
+ z_hdr->level = rand() % 4;
+ z_hdr->dict_flag = rand() % 2;
+ z_hdr->dict_id = rand();
+}
+
+int write_gzip_header(uint8_t * hdr_buf, uint32_t hdr_buf_len, struct isal_gzip_header *gz_hdr)
+{
+
+ struct isal_zstream stream;
+ uint32_t hdr_len = gzip_header_size(gz_hdr);
+ uint32_t len;
+
+ isal_deflate_init(&stream);
+ stream.next_out = hdr_buf;
+ stream.avail_out = rand() % hdr_len;
+ len = isal_write_gzip_header(&stream, gz_hdr);
+
+ if (len != hdr_len) {
+ printf("len = %d, hdr_buf_len = %d\n", len, hdr_len);
+ print_gzip_final_verbose(hdr_buf, hdr_buf_len, gz_hdr, NULL);
+ print_error(INCORRECT_HDR_LEN);
+ return INCORRECT_HDR_LEN;
+ }
+
+ if (hdr_buf_len < hdr_len) {
+ print_gzip_final_verbose(NULL, 0, gz_hdr, NULL);
+ print_error(INSUFFICIENT_BUFFER_SIZE);
+ return INSUFFICIENT_BUFFER_SIZE;
+ }
+
+ stream.avail_out = hdr_buf_len;
+
+ len = isal_write_gzip_header(&stream, gz_hdr);
+
+ if (len) {
+ print_gzip_final_verbose(hdr_buf, hdr_buf_len, gz_hdr, NULL);
+ print_error(INCORRECT_WRITE_RETURN);
+ return INCORRECT_WRITE_RETURN;;
+ }
+
+ return 0;
+}
+
+int write_zlib_header(uint8_t * hdr_buf, uint32_t hdr_buf_len, struct isal_zlib_header *z_hdr)
+{
+ struct isal_zstream stream;
+ uint32_t hdr_len = zlib_header_size(z_hdr);
+ uint32_t len;
+
+ isal_deflate_init(&stream);
+ stream.next_out = hdr_buf;
+ stream.avail_out = rand() % hdr_len;
+ len = isal_write_zlib_header(&stream, z_hdr);
+
+ if (len != hdr_len) {
+ print_zlib_final_verbose(hdr_buf, hdr_buf_len, z_hdr, NULL);
+ print_error(INCORRECT_HDR_LEN);
+ return INCORRECT_HDR_LEN;
+ }
+
+ if (hdr_buf_len < hdr_len) {
+ print_zlib_final_verbose(NULL, 0, z_hdr, NULL);
+ print_error(INSUFFICIENT_BUFFER_SIZE);
+ return INSUFFICIENT_BUFFER_SIZE;
+ }
+
+ stream.avail_out = hdr_buf_len;
+
+ len = isal_write_zlib_header(&stream, z_hdr);
+
+ if (len) {
+ print_zlib_final_verbose(hdr_buf, hdr_buf_len, z_hdr, NULL);
+ print_error(INCORRECT_WRITE_RETURN);
+ return INCORRECT_WRITE_RETURN;;
+ }
+
+ return 0;
+}
+
+int compare_gzip_headers(struct isal_gzip_header *gz_hdr1, struct isal_gzip_header *gz_hdr2)
+{
+ int ret = 0;
+ uint32_t max_len;
+
+ if (gz_hdr1->text != gz_hdr2->text)
+ return INCORRECT_TEXT_FLAG;
+
+ if (gz_hdr1->time != gz_hdr2->time)
+ return INCORRECT_TIME;
+
+ if (gz_hdr1->xflags != gz_hdr2->xflags)
+ return INCORRECT_XFLAGS;
+
+ if (gz_hdr1->os != gz_hdr2->os)
+ return INCORRECT_OS;
+
+ if (gz_hdr1->extra_len != gz_hdr2->extra_len)
+ return INCORRECT_EXTRA_LEN;
+
+ if (gz_hdr1->extra != NULL && gz_hdr2->extra != NULL) {
+ ret = memcmp(gz_hdr1->extra, gz_hdr2->extra, gz_hdr1->extra_len);
+ if (ret)
+ return INCORRECT_EXTRA_BUF;
+ }
+
+ if (gz_hdr1->name != NULL && gz_hdr2->name != NULL) {
+ max_len = gz_hdr1->name_buf_len;
+ if (gz_hdr1->name_buf_len < gz_hdr2->name_buf_len)
+ max_len = gz_hdr2->name_buf_len;
+
+ ret = strncmp(gz_hdr1->name, gz_hdr2->name, max_len);
+ if (ret)
+ return INCORRECT_NAME;
+ }
+
+ if (gz_hdr1->comment != NULL && gz_hdr2->comment != NULL) {
+ max_len = gz_hdr1->comment_buf_len;
+ if (gz_hdr1->comment_buf_len < gz_hdr2->comment_buf_len)
+ max_len = gz_hdr2->comment_buf_len;
+
+ ret = strncmp(gz_hdr1->comment, gz_hdr2->comment, max_len);
+ if (ret)
+ return INCORRECT_COMMENT;
+ }
+ return ret;
+}
+
+int compare_zlib_headers(struct isal_zlib_header *z_hdr1, struct isal_zlib_header *z_hdr2)
+{
+ if (z_hdr1->info != z_hdr2->info)
+ return INCORRECT_INFO;
+
+ if (z_hdr1->level != z_hdr2->level)
+ return INCORRECT_LEVEL;
+
+ if (z_hdr1->dict_flag != z_hdr2->dict_flag)
+ return INCORRECT_DICT_FLAG;
+
+ if (z_hdr1->dict_flag && z_hdr1->dict_id != z_hdr2->dict_id)
+ return INCORRECT_DICT_ID;
+
+ return 0;
+}
+
+int read_gzip_header_simple(uint8_t * hdr_buf, uint32_t hdr_buf_len,
+ struct isal_gzip_header *gz_hdr_orig)
+{
+
+ int ret = 0;
+ struct inflate_state state;
+ struct isal_gzip_header gz_hdr;
+
+ rand_buf((uint8_t *) & gz_hdr, sizeof(gz_hdr));
+ gz_hdr.extra_buf_len = gz_hdr_orig->extra_buf_len;
+ gz_hdr.name_buf_len = gz_hdr_orig->name_buf_len;
+ gz_hdr.comment_buf_len = gz_hdr_orig->comment_buf_len;
+
+ ret = malloc_gzip_header(&gz_hdr);
+ if (ret) {
+ print_gzip_final_verbose(hdr_buf, hdr_buf_len, gz_hdr_orig, NULL);
+ print_error(ret);
+ free_gzip_header(&gz_hdr);
+ return ret;
+ }
+
+ isal_inflate_init(&state);
+ state.next_in = hdr_buf;
+ state.avail_in = hdr_buf_len;
+ ret = isal_read_gzip_header(&state, &gz_hdr);
+
+ if (ret) {
+ print_gzip_final_verbose(hdr_buf, hdr_buf_len, gz_hdr_orig, &gz_hdr);
+ print_error(ret);
+ free_gzip_header(&gz_hdr);
+ return ret;
+ }
+
+ ret = compare_gzip_headers(gz_hdr_orig, &gz_hdr);
+
+ if (ret) {
+ print_gzip_final_verbose(hdr_buf, hdr_buf_len, gz_hdr_orig, &gz_hdr);
+ print_error(ret);
+ }
+
+ free_gzip_header(&gz_hdr);
+ return ret;
+}
+
+int read_zlib_header_simple(uint8_t * hdr_buf, uint32_t hdr_buf_len,
+ struct isal_zlib_header *z_hdr_orig)
+{
+
+ int ret = 0;
+ struct inflate_state state;
+ struct isal_zlib_header z_hdr;
+
+ rand_buf((uint8_t *) & z_hdr, sizeof(z_hdr));
+
+ if (ret) {
+ print_zlib_final_verbose(hdr_buf, hdr_buf_len, z_hdr_orig, NULL);
+ print_error(ret);
+ return ret;
+ }
+
+ isal_inflate_init(&state);
+ state.next_in = hdr_buf;
+ state.avail_in = hdr_buf_len;
+ ret = isal_read_zlib_header(&state, &z_hdr);
+
+ if (ret) {
+ print_zlib_final_verbose(hdr_buf, hdr_buf_len, z_hdr_orig, &z_hdr);
+ print_error(ret);
+ return ret;
+ }
+
+ ret = compare_zlib_headers(z_hdr_orig, &z_hdr);
+
+ if (ret) {
+ print_zlib_final_verbose(hdr_buf, hdr_buf_len, z_hdr_orig, &z_hdr);
+ print_error(ret);
+ }
+
+ return ret;
+}
+
+int read_gzip_header_streaming(uint8_t * hdr_buf, uint32_t hdr_buf_len,
+ struct isal_gzip_header *gz_hdr_orig)
+{
+ int ret = 0;
+ uint32_t max_dec_size, dec_size, max_extra_len, extra_len;
+ uint32_t max_name_len, name_len, max_comment_len, comment_len;
+ struct inflate_state state;
+ struct isal_gzip_header gz_hdr;
+ void *tmp_ptr;
+
+ rand_buf((uint8_t *) & gz_hdr, sizeof(gz_hdr));
+
+ max_dec_size = (rand() % hdr_buf_len) + 2;
+
+ max_extra_len = 2;
+ max_name_len = 2;
+ max_comment_len = 2;
+ if (gz_hdr_orig->extra_buf_len)
+ max_extra_len = (rand() % gz_hdr_orig->extra_buf_len) + 2;
+ if (gz_hdr_orig->name_buf_len)
+ max_name_len = (rand() % gz_hdr_orig->name_buf_len) + 2;
+ if (gz_hdr_orig->comment_buf_len)
+ max_comment_len = (rand() % gz_hdr_orig->comment_buf_len) + 2;
+
+ extra_len = rand() % max_extra_len;
+ name_len = rand() % max_name_len;
+ comment_len = rand() % max_comment_len;
+
+ if (extra_len == 0)
+ extra_len = 1;
+ if (name_len == 0)
+ name_len = 1;
+ if (comment_len == 0)
+ comment_len = 1;
+
+ gz_hdr.extra_buf_len = extra_len;
+ gz_hdr.name_buf_len = name_len;
+ gz_hdr.comment_buf_len = comment_len;
+
+ ret = malloc_gzip_header(&gz_hdr);
+
+ if (ret) {
+ print_gzip_final_verbose(hdr_buf, hdr_buf_len, gz_hdr_orig, NULL);
+ print_error(ret);
+ free_gzip_header(&gz_hdr);
+ return (ret == 0);
+ }
+
+ isal_inflate_init(&state);
+
+ state.next_in = hdr_buf;
+ dec_size = rand() % max_dec_size;
+ if (dec_size > hdr_buf_len)
+ dec_size = hdr_buf_len;
+
+ state.avail_in = dec_size;
+ hdr_buf_len -= dec_size;
+
+ while (1) {
+ ret = isal_read_gzip_header(&state, &gz_hdr);
+
+ switch (ret) {
+ case ISAL_NAME_OVERFLOW:
+ if (name_len >= NAME_SIZE_MAX)
+ break;
+
+ name_len += rand() % max_name_len;
+ tmp_ptr = realloc(gz_hdr.name, name_len);
+ if (tmp_ptr == NULL) {
+ ret = MALLOC_FAILED;
+ break;
+ }
+ gz_hdr.name = tmp_ptr;
+ gz_hdr.name_buf_len = name_len;
+ continue;
+ case ISAL_COMMENT_OVERFLOW:
+ if (comment_len >= COMMENT_SIZE_MAX)
+ break;
+
+ comment_len += rand() % max_comment_len;
+ tmp_ptr = realloc(gz_hdr.comment, comment_len);
+ if (tmp_ptr == NULL) {
+ ret = MALLOC_FAILED;
+ break;
+ }
+ gz_hdr.comment = tmp_ptr;
+ gz_hdr.comment_buf_len = comment_len;
+ continue;
+ case ISAL_EXTRA_OVERFLOW:
+ if (extra_len >= EXTRA_SIZE_MAX)
+ break;
+
+ extra_len += rand() % max_extra_len;
+ tmp_ptr = realloc(gz_hdr.extra, extra_len);
+ if (tmp_ptr == NULL) {
+ ret = MALLOC_FAILED;
+ break;
+ }
+ gz_hdr.extra = tmp_ptr;
+ gz_hdr.extra_buf_len = extra_len;
+ continue;
+ case ISAL_END_INPUT:
+ if (hdr_buf_len == 0)
+ break;
+
+ dec_size = rand() % max_dec_size;
+ if (dec_size > hdr_buf_len)
+ dec_size = hdr_buf_len;
+
+ state.avail_in = dec_size;
+ hdr_buf_len -= dec_size;
+ continue;
+ }
+
+ break;
+ }
+
+ if (ret) {
+ print_gzip_final_verbose(hdr_buf, hdr_buf_len, gz_hdr_orig, &gz_hdr);
+ print_error(ret);
+ free_gzip_header(&gz_hdr);
+ return ret;
+ }
+
+ ret = compare_gzip_headers(gz_hdr_orig, &gz_hdr);
+
+ if (ret) {
+ print_gzip_final_verbose(hdr_buf, hdr_buf_len, gz_hdr_orig, &gz_hdr);
+ print_error(ret);
+ }
+
+ free_gzip_header(&gz_hdr);
+ return ret;
+}
+
+int read_zlib_header_streaming(uint8_t * hdr_buf, uint32_t hdr_buf_len,
+ struct isal_zlib_header *z_hdr_orig)
+{
+ int ret = ISAL_END_INPUT;
+ uint32_t max_dec_size, dec_size;
+ struct inflate_state state;
+ struct isal_zlib_header z_hdr;
+
+ rand_buf((uint8_t *) & z_hdr, sizeof(z_hdr));
+
+ max_dec_size = (rand() % hdr_buf_len) + 2;
+
+ isal_inflate_init(&state);
+
+ state.next_in = hdr_buf;
+ while (ret == ISAL_END_INPUT && hdr_buf_len > 0) {
+ dec_size = rand() % max_dec_size;
+ if (dec_size > hdr_buf_len)
+ dec_size = hdr_buf_len;
+
+ state.avail_in = dec_size;
+ hdr_buf_len -= dec_size;
+
+ ret = isal_read_zlib_header(&state, &z_hdr);
+ }
+
+ if (ret) {
+ print_zlib_final_verbose(hdr_buf, hdr_buf_len, z_hdr_orig, &z_hdr);
+ print_error(ret);
+ return ret;
+ }
+
+ ret = compare_zlib_headers(z_hdr_orig, &z_hdr);
+
+ if (ret) {
+ print_zlib_final_verbose(hdr_buf, hdr_buf_len, z_hdr_orig, &z_hdr);
+ print_error(ret);
+ }
+
+ return ret;
+}
+
+int main(int argc, char *argv[])
+{
+ uint8_t *hdr_buf;
+ uint32_t hdr_buf_len;
+ int ret = 0, fin_ret = 0;
+ struct isal_gzip_header gz_hdr_orig;
+ struct isal_zlib_header z_hdr_orig;
+ int i;
+
+#ifndef VERBOSE
+ setbuf(stdout, NULL);
+#endif
+ printf("Test Seed : %d\n", TEST_SEED);
+ printf("Randoms : %d\n", RANDOMS);
+ srand(TEST_SEED);
+
+ printf("gzip wrapper test: ");
+ for (i = 0; i < RANDOMS; i++) {
+ rand_buf((uint8_t *) & gz_hdr_orig, sizeof(gz_hdr_orig));
+
+ ret = gen_rand_gzip_header(&gz_hdr_orig);
+ if (ret) {
+ print_error(ret);
+ return (ret == 0);
+ }
+
+ hdr_buf_len = gzip_header_size(&gz_hdr_orig);
+ hdr_buf = malloc(hdr_buf_len);
+
+ ret = write_gzip_header(hdr_buf, hdr_buf_len, &gz_hdr_orig);
+
+ fin_ret |= ret;
+ if (ret)
+ return (ret == 0);
+
+ ret = read_gzip_header_simple(hdr_buf, hdr_buf_len, &gz_hdr_orig);
+
+ fin_ret |= ret;
+ if (ret)
+ return (ret == 0);
+
+ ret = read_gzip_header_streaming(hdr_buf, hdr_buf_len, &gz_hdr_orig);
+
+ fin_ret |= ret;
+ if (ret)
+ return (ret == 0);
+
+ free_gzip_header(&gz_hdr_orig);
+ if (hdr_buf != NULL)
+ free(hdr_buf);
+
+ if (i % (RANDOMS / 16) == 0)
+ printf(".");
+ }
+ printf("Pass \n");
+
+ printf("zlib wrapper test: ");
+ for (i = 0; i < RANDOMS; i++) {
+ memset(&z_hdr_orig, 0, sizeof(z_hdr_orig));
+
+ gen_rand_zlib_header(&z_hdr_orig);
+
+ hdr_buf_len = zlib_header_size(&z_hdr_orig);
+ hdr_buf = malloc(hdr_buf_len);
+
+ ret = write_zlib_header(hdr_buf, hdr_buf_len, &z_hdr_orig);
+
+ fin_ret |= ret;
+ if (ret)
+ return (ret == 0);
+
+ ret = read_zlib_header_simple(hdr_buf, hdr_buf_len, &z_hdr_orig);
+
+ fin_ret |= ret;
+ if (ret)
+ return (ret == 0);
+
+ ret = read_zlib_header_streaming(hdr_buf, hdr_buf_len, &z_hdr_orig);
+
+ fin_ret |= ret;
+ if (ret)
+ return (ret == 0);
+
+ if (hdr_buf != NULL)
+ free(hdr_buf);
+
+ if (i % (RANDOMS / 16) == 0)
+ printf(".");
+ }
+ printf("Pass \n");
+
+ printf("igzip wrapper_hdr test finished:%s \n",
+ fin_ret ? " Some tests failed " : " All tests passed");
+
+ return 0;
+}
diff --git a/src/isa-l/igzip/inflate_data_structs.asm b/src/isa-l/igzip/inflate_data_structs.asm
new file mode 100644
index 000000000..bfdb6d5f0
--- /dev/null
+++ b/src/isa-l/igzip/inflate_data_structs.asm
@@ -0,0 +1,146 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright(c) 2011-2016 Intel Corporation All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in
+; the documentation and/or other materials provided with the
+; distribution.
+; * Neither the name of Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived
+; from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; START_FIELDS
+%macro START_FIELDS 0
+%assign _FIELD_OFFSET 0
+%assign _STRUCT_ALIGN 0
+%endm
+
+;; FIELD name size align
+%macro FIELD 3
+%define %%name %1
+%define %%size %2
+%define %%align %3
+
+%assign _FIELD_OFFSET (_FIELD_OFFSET + (%%align) - 1) & (~ ((%%align)-1))
+%%name equ _FIELD_OFFSET
+%assign _FIELD_OFFSET _FIELD_OFFSET + (%%size)
+%if (%%align > _STRUCT_ALIGN)
+%assign _STRUCT_ALIGN %%align
+%endif
+%endm
+
+;; See inflate_huff_code structure declaration in igzip_lib.h calculation explanation
+%define L_REM (21 - ISAL_DECODE_LONG_BITS)
+%define S_REM (15 - ISAL_DECODE_SHORT_BITS)
+
+%define L_DUP ((1 << L_REM) - (L_REM + 1))
+%define S_DUP ((1 << S_REM) - (S_REM + 1))
+
+%define L_UNUSED ((1 << L_REM) - (1 << ((L_REM)/2)) - (1 << ((L_REM + 1)/2)) + 1)
+%define S_UNUSED ((1 << S_REM) - (1 << ((S_REM)/2)) - (1 << ((S_REM + 1)/2)) + 1)
+
+%define L_SIZE (286 + L_DUP + L_UNUSED)
+%define S_SIZE (30 + S_DUP + S_UNUSED)
+
+%define HUFF_CODE_LARGE_LONG_ALIGNED (L_SIZE + (-L_SIZE & 0xf))
+%define HUFF_CODE_SMALL_LONG_ALIGNED (S_SIZE + (-S_SIZE & 0xf))
+
+%define MAX_LONG_CODE_LARGE (L_SIZE + (-L_SIZE & 0xf))
+%define MAX_LONG_CODE_SMALL (S_SIZE + (-S_SIZE & 0xf))
+
+%define LARGE_SHORT_CODE_SIZE 4
+%define LARGE_LONG_CODE_SIZE 2
+
+%define SMALL_SHORT_CODE_SIZE 2
+%define SMALL_LONG_CODE_SIZE 2
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+START_FIELDS ;; inflate huff code
+
+;; name size align
+FIELD _short_code_lookup_large, LARGE_SHORT_CODE_SIZE * (1 << (ISAL_DECODE_LONG_BITS)), LARGE_LONG_CODE_SIZE
+FIELD _long_code_lookup_large, LARGE_LONG_CODE_SIZE * MAX_LONG_CODE_LARGE, LARGE_SHORT_CODE_SIZE
+
+%assign _inflate_huff_code_large_size _FIELD_OFFSET
+%assign _inflate_huff_code_large_align _STRUCT_ALIGN
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+START_FIELDS ;; inflate huff code
+
+;; name size align
+FIELD _short_code_lookup_small, SMALL_SHORT_CODE_SIZE * (1 << (ISAL_DECODE_SHORT_BITS)), SMALL_LONG_CODE_SIZE
+FIELD _long_code_lookup_small, SMALL_LONG_CODE_SIZE * MAX_LONG_CODE_SMALL, SMALL_SHORT_CODE_SIZE
+
+%assign _inflate_huff_code_small_size _FIELD_OFFSET
+%assign _inflate_huff_code_small_align _STRUCT_ALIGN
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+START_FIELDS ;; inflate state
+
+;; name size align
+FIELD _next_out, 8, 8
+FIELD _avail_out, 4, 4
+FIELD _total_out, 4, 4
+FIELD _next_in, 8, 8
+FIELD _read_in, 8, 8
+FIELD _avail_in, 4, 4
+FIELD _read_in_length,4, 4
+FIELD _lit_huff_code, _inflate_huff_code_large_size, _inflate_huff_code_large_align
+FIELD _dist_huff_code,_inflate_huff_code_small_size, _inflate_huff_code_small_align
+FIELD _block_state, 4, 4
+FIELD _dict_length, 4, 4
+FIELD _bfinal, 4, 4
+FIELD _crc_flag, 4, 4
+FIELD _crc, 4, 4
+FIELD _hist_bits, 4, 4
+FIELD _type0_block_len, 4, 4
+FIELD _write_overflow_lits, 4, 4
+FIELD _write_overflow_len, 4, 4
+FIELD _copy_overflow_len, 4, 4
+FIELD _copy_overflow_dist, 4, 4
+
+%assign _inflate_state_size _FIELD_OFFSET
+%assign _inflate_state_align _STRUCT_ALIGN
+
+_lit_huff_code_short_code_lookup equ _lit_huff_code+_short_code_lookup_large
+_lit_huff_code_long_code_lookup equ _lit_huff_code+_long_code_lookup_large
+
+_dist_huff_code_short_code_lookup equ _dist_huff_code+_short_code_lookup_small
+_dist_huff_code_long_code_lookup equ _dist_huff_code+_long_code_lookup_small
+
+ISAL_BLOCK_NEW_HDR equ 0
+ISAL_BLOCK_HDR equ 1
+ISAL_BLOCK_TYPE0 equ 2
+ISAL_BLOCK_CODED equ 3
+ISAL_BLOCK_INPUT_DONE equ 4
+ISAL_BLOCK_FINISH equ 5
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
diff --git a/src/isa-l/igzip/inflate_std_vects.h b/src/isa-l/igzip/inflate_std_vects.h
new file mode 100644
index 000000000..6dccae4d4
--- /dev/null
+++ b/src/isa-l/igzip/inflate_std_vects.h
@@ -0,0 +1,1554 @@
+#include <stdint.h>
+#include "igzip_lib.h"
+
+uint8_t std_vect_0[] = {
+ 0xed, 0xfd, 0x6d, 0xc7, 0xb6, 0x6d, 0x20, 0x34,
+ 0x20, 0x00, 0xc7, 0x7e, 0x06, 0xfb, 0xb1, 0x1f,
+ 0xcb, 0x7c, 0xec, 0xfb, 0xf1, 0xb2, 0x40, 0xe7,
+ 0xcb, 0x6a, 0xe8, 0x03, 0x00, 0x00, 0x19, 0xff,
+ 0xff, 0xbc, 0xec, 0xd9, 0xb6, 0xf3, 0xb2, 0xcd,
+ 0x4e, 0xcb, 0xb2, 0x2e, 0xc7, 0xb6, 0xad, 0xc7,
+ 0x7e, 0xbc, 0xbf, 0xee, 0xbc, 0xec, 0xfb, 0x7e,
+ 0xec, 0x64, 0x7a, 0xec, 0x2f, 0xcc, 0xeb, 0xc5,
+ 0x1f, 0xbb, 0xfe, 0x72, 0xbc, 0xec, 0xb2, 0x1f
+};
+
+uint8_t std_vect_1[] = {
+ 0xed, 0xfd, 0xdb, 0xbc, 0x2d, 0xf3, 0x34, 0x8d,
+ 0x31, 0xa6, 0x31, 0x7a, 0xf4, 0x18, 0xd3, 0x34,
+ 0x6d, 0x40, 0x85, 0x42, 0x6d, 0xc7, 0xb6, 0x6d,
+ 0xd7, 0x34, 0x3d, 0xef, 0xc7, 0x7e, 0xff, 0xff
+};
+
+uint8_t std_vect_2[] = {
+ 0xed, 0x83, 0x63, 0x61, 0xeb, 0xbb, 0xff, 0x82,
+ 0x66, 0xe0, 0xc5, 0xc2, 0xee, 0xc9, 0x8f, 0xf5,
+ 0xc7, 0xeb, 0x7a, 0x7c, 0xfb, 0x76, 0xec, 0xc7
+};
+
+uint8_t std_vect_3[] = {
+ 0xed, 0xfb, 0xb1, 0x1f, 0x33, 0xee, 0xfb, 0xb1,
+ 0xbf, 0x1e, 0xc7, 0x61, 0x61, 0x61, 0x61, 0x61,
+ 0x61, 0x61, 0x61, 0x61, 0x61, 0x61, 0x61, 0x61,
+ 0x61, 0x61, 0x61, 0x61, 0x61, 0x61, 0x61, 0x61,
+ 0x61, 0x61, 0x61, 0xa7, 0xec, 0xb2, 0xce, 0xeb,
+ 0x7a, 0x6c, 0xfb, 0x02, 0xec, 0xc7, 0x88, 0x6c,
+ 0xcb, 0xb6, 0x6c, 0xc7, 0xb1, 0x6e, 0xeb, 0xb6,
+ 0x6e, 0xdb, 0x00, 0x7f, 0xfb, 0xb6, 0x6e, 0xdb,
+ 0x3f, 0x01
+};
+
+uint8_t std_vect_4[] = {
+ 0xed, 0xfd, 0x6d, 0xc7, 0xb6, 0x6d, 0x37, 0xac,
+ 0x4b, 0x88, 0x4a, 0x2f, 0xb0, 0xa9, 0x10, 0xfc,
+ 0x31, 0xc8, 0x42, 0xc4, 0x36, 0x50, 0x7b, 0xb2,
+ 0x5f, 0x37, 0x09, 0x17, 0x65, 0x6b, 0x46, 0xa2,
+ 0xdb, 0x35, 0xd7, 0x8e, 0x59, 0xd7, 0x34, 0x3d,
+ 0xef, 0xc7, 0x7e, 0x1c, 0x26, 0xab, 0x48, 0x48,
+ 0x7c, 0xec, 0xfb, 0xf1, 0xb2, 0x1f, 0xfb, 0xcb,
+ 0x71, 0xec, 0xc7, 0xbe, 0x1f, 0xc7, 0xb4, 0xce,
+ 0xfb, 0xb1, 0x6f, 0xcb, 0xbc, 0xec, 0xeb, 0x9f
+};
+
+uint8_t std_vect_5[] = {
+ 0xed, 0xfd, 0x6d, 0xc7, 0xb6, 0x6d, 0xd7, 0x4d,
+ 0x3d, 0xef, 0xc7, 0x8e, 0x35, 0x35, 0x35, 0x35,
+ 0x35, 0x35, 0x35, 0x35, 0x35, 0x35, 0x35, 0x35,
+ 0x35, 0x35, 0x35, 0x35, 0x35, 0x35, 0x35, 0x35,
+ 0x35, 0x35, 0x35, 0x4c, 0xe5, 0x41, 0x75, 0xab,
+ 0x69, 0xab, 0x0c, 0xaa, 0x55, 0xec, 0xd8, 0x7e
+};
+
+uint8_t std_vect_6[] = {
+ 0xed, 0xfd, 0x6d, 0xc7, 0xfa, 0x00, 0x00, 0xfa,
+ 0x3d, 0xef, 0xc7, 0x7e, 0x1c, 0xfb, 0xb1, 0x1f,
+ 0xcb, 0x7c, 0xec, 0xfb, 0xf1, 0xb2, 0x1f, 0x00,
+ 0x40, 0x71, 0xec, 0x64, 0x25, 0x49, 0x1a, 0x27,
+ 0x2f, 0x50, 0xcc, 0x76, 0x8e, 0xbc, 0xec, 0xeb,
+ 0xb2, 0xce, 0xeb, 0x7a, 0x6c, 0xfb, 0x76, 0xec,
+ 0xc7, 0x71, 0x6c, 0xcb, 0xb6, 0x6c, 0xc7, 0xeb,
+ 0xb2, 0xce, 0xeb, 0x7a, 0x83, 0xfb, 0x76, 0xec,
+ 0xb1, 0xaf, 0xf3, 0x3c, 0x2d, 0xcb, 0x32, 0x6f,
+ 0xdb, 0xbc, 0xcc, 0xf3, 0xb2, 0xcd, 0x2f, 0xcb,
+ 0xb2, 0x2e, 0xc7, 0xb6, 0xad, 0xc7, 0x7e, 0xbc,
+ 0xbf, 0xee, 0xfb, 0xb1, 0x8f, 0xc2, 0x3f, 0x01
+};
+
+uint8_t std_vect_7[] = {
+ 0xed, 0xfd, 0x6d, 0xc7, 0xb6, 0x6d, 0xd7, 0xec,
+ 0xfb, 0xf1, 0xb2, 0x1f, 0x58, 0xf7, 0xc2, 0xb8,
+ 0x1c, 0xce, 0xcc, 0xcf, 0x44, 0x04, 0x54, 0x29,
+ 0x34, 0x17, 0xcb, 0xac, 0x36, 0x50, 0x7b, 0xb2,
+ 0xd8, 0x79, 0xf1, 0x9b, 0xd2, 0x3a, 0xdb, 0x5a,
+ 0x33, 0xb3, 0x50, 0xca
+};
+
+uint8_t std_vect_8[] = {
+ 0xed, 0xfd, 0x1c, 0xfb, 0xaa, 0x12, 0xcb, 0x7c,
+ 0xec, 0xfb, 0xf1, 0xb2, 0x6e, 0xeb, 0x7f, 0xca,
+ 0xca, 0xac, 0xca, 0x96, 0x96, 0x96, 0x96, 0xca,
+ 0xca, 0xca, 0xca, 0xca, 0xb6, 0xed, 0xcb, 0x31,
+ 0x36, 0x98, 0x79, 0xa6, 0x48, 0xc6, 0x82, 0x8b
+};
+
+uint8_t std_vect_9[] = {
+ 0xed, 0xfd, 0x6d, 0xc7, 0xb6, 0x6d, 0xc3, 0xc3,
+ 0xbb, 0xc3, 0xc3, 0xc3, 0xc3, 0xc3, 0xc3, 0xc3,
+ 0xc3, 0xc3, 0xc3, 0xc3, 0xff, 0x7f, 0xc3, 0xc3
+};
+
+uint8_t std_vect_10[] = {
+ 0xed, 0xfd, 0x6d, 0xed, 0xaa, 0x9e, 0x1d, 0x01,
+ 0x4b, 0x86, 0x10, 0x00, 0xfa, 0xf0, 0xf0, 0xf0,
+ 0xf0, 0xf0, 0xf0, 0xf0, 0xd0, 0xf0, 0xf0, 0x6f
+};
+
+uint8_t std_vect_11[] = {
+ 0xed, 0xfd, 0x6d, 0xc7, 0xb6, 0x1e, 0x1e, 0x1e,
+ 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x6d, 0xbb, 0x34,
+ 0xec, 0xeb, 0x2f, 0xb0, 0xa9, 0x11, 0x0c, 0x31,
+ 0xc8, 0xda, 0xda, 0xda, 0xda, 0xda, 0xda, 0xe5,
+ 0xda, 0xda, 0xda, 0xda, 0xda, 0xda, 0xda, 0xda,
+ 0xda, 0xda, 0xcc, 0xfb, 0x71, 0x1c, 0xfb, 0x71,
+ 0xec, 0xfb, 0xf1, 0xbe, 0x1f, 0xc7, 0xbe, 0x1f,
+ 0xc7, 0xb1, 0xbf, 0x1d, 0x72, 0xfb, 0x7e, 0xbc,
+ 0xed, 0xaf, 0xc7, 0xb1, 0x05, 0xff, 0xff, 0x05,
+ 0xfb, 0x31, 0xc6, 0x34, 0xcd, 0xf3, 0x32, 0xbf,
+ 0x2c, 0xf3, 0xba, 0x6e, 0xeb, 0x7a, 0x6c, 0xc7
+};
+
+uint8_t std_vect_12[] = {
+ 0xed, 0xfd, 0x55, 0xc7, 0xb6, 0x6d, 0xd7, 0x34,
+ 0x3d, 0xef, 0xc7, 0x7e, 0x1c, 0xfb, 0xb1, 0x1f,
+ 0xcb, 0x7c, 0xec, 0xfb, 0xf1, 0xb2, 0x1f, 0xfb,
+ 0xcb, 0x71, 0xec, 0xc7, 0xbe, 0x1f, 0xc7, 0xb4,
+ 0xce, 0xfb, 0xb1, 0x6f, 0xcb, 0xbc, 0xec, 0xeb,
+ 0xb2, 0xce, 0xeb, 0x7a, 0x6c, 0xfb, 0x76, 0xec,
+ 0xc7, 0x71, 0x6c, 0xcb, 0xb6, 0x6c, 0xc7, 0xb1,
+ 0x6e, 0xeb, 0xb6, 0x6e, 0xdb, 0xba, 0x1d, 0xfb,
+ 0xb6, 0x6e, 0xdb, 0xb6, 0xaf, 0xeb, 0xb2, 0x6d,
+ 0xc7, 0x7a, 0xec, 0xdb, 0xb6, 0xed, 0xcb, 0x31,
+ 0xcd, 0x2f, 0xcb, 0xb2
+};
+
+uint8_t std_vect_13[] = {
+ 0xed, 0xfd, 0x6d, 0x14, 0x81, 0x00, 0x00, 0x34,
+ 0x52, 0xef, 0xc7, 0x4e, 0x00, 0xfb, 0xb1, 0x1f,
+ 0xcb, 0x7c, 0xec, 0xf6, 0xf1, 0xb2, 0x1f, 0xfb,
+ 0xc7, 0xbe, 0x1f, 0xc7, 0xb4, 0xce, 0xfb, 0xb1,
+ 0x6f, 0xcb, 0xbc, 0xec, 0xeb, 0x1e, 0x0c, 0x31,
+ 0xc8, 0x42, 0xc4, 0x36, 0x50, 0x40, 0x34, 0x8d,
+ 0xaf, 0x85, 0x42, 0x81, 0xf4, 0x1d, 0xd1, 0x80,
+ 0xe8, 0x03, 0x00, 0x00, 0x26, 0xab, 0x75, 0xfe,
+ 0xb6, 0xbd, 0xeb, 0xb2, 0x6d, 0xc7, 0x7a, 0xec,
+ 0x82, 0x66, 0xf3, 0xc5
+};
+
+uint8_t std_vect_14[] = {
+ 0xed, 0xf8, 0x6d, 0xc7, 0x00, 0x3c, 0x2d, 0xcb,
+ 0x32, 0x6f, 0xdb, 0xbc, 0xb6, 0x6d, 0xd7, 0x34,
+ 0x3d, 0xef, 0xc7, 0x7e, 0x1c, 0xfb, 0xb1, 0x1f,
+ 0xcb, 0x7c, 0xec, 0xfb, 0xf1, 0xb2, 0x1f, 0xfb,
+ 0xcb, 0x71, 0xec, 0x80, 0xbe, 0x1f, 0xc7, 0xb4,
+ 0xce, 0xfb, 0xb1, 0x6f, 0xcb, 0xbc, 0xec, 0xeb
+};
+
+uint8_t std_vect_15[] = {
+ 0xed, 0xfd, 0x6d, 0xc7, 0xa9, 0x6d, 0xd7, 0x34,
+ 0x3d, 0xef, 0xc7, 0x7e, 0x1c, 0xfb, 0xb1, 0x1f,
+ 0xcb, 0x7c, 0xec, 0xfb, 0xf1, 0xb2, 0x1f, 0xfb,
+ 0xcb, 0x71, 0xec, 0xc7, 0xbe, 0x1f, 0xc7, 0xb4
+};
+
+uint8_t std_vect_16[] = {
+ 0xed, 0xfd, 0x6d, 0xc7, 0xb6, 0x70, 0xd7, 0x0d,
+ 0x3d, 0x99, 0xc7, 0x7e, 0x1c, 0x20, 0x00, 0x1f,
+ 0xcb, 0x71, 0xec, 0xc7, 0xbe, 0x1f, 0xc7, 0xc2
+};
+
+uint8_t std_vect_17[] = {
+ 0xed, 0xfd, 0x6d, 0xc7, 0xb4, 0x6d, 0xd7, 0x34,
+ 0x3d, 0xef, 0xc7, 0xef, 0xef, 0xef, 0xef, 0xef,
+ 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x02, 0x02, 0x02, 0xef, 0xef, 0xef, 0xef,
+ 0xef, 0xb6, 0x6e, 0xdb, 0xb6, 0xaf, 0xeb, 0xb2,
+ 0x6d, 0xc7, 0x7a, 0xec, 0xdb, 0xb6, 0xed, 0x10,
+ 0x31, 0xaf, 0xf3, 0x3c, 0x2d, 0x01, 0x32, 0x6f,
+ 0xdb, 0xbc, 0xcc, 0xf3, 0xb2, 0xcd, 0x2f, 0xcb,
+ 0xb2, 0x2e, 0xc7, 0xb6, 0xad, 0xc7, 0x7e, 0xbc,
+ 0xbf, 0xee, 0xfb, 0xb1, 0xbf, 0x1e, 0xc7, 0xb1,
+ 0x1f, 0xc7, 0xfe, 0x72, 0xbc, 0xec, 0xfb, 0x7e,
+ 0xec, 0xfb, 0x71, 0xec, 0x2f, 0xc7, 0xeb, 0xbe,
+ 0xe0, 0xc5, 0xc2, 0xee, 0x09, 0xc9, 0x8f, 0xf5
+};
+
+uint8_t std_vect_18[] = {
+ 0xed, 0xfd, 0x6d, 0xc7, 0xb6, 0x75, 0xab, 0x69,
+ 0xb5, 0x0c, 0xaa, 0x55, 0x29, 0x79, 0x81, 0xe2,
+ 0xab, 0x27, 0x07, 0x23, 0xf2, 0x1f, 0xfb, 0xeb,
+ 0x7a, 0x6c, 0xfb, 0x76, 0xec, 0xc7, 0x71, 0xb1,
+ 0x6e, 0xeb, 0xb6, 0xcb, 0x4c, 0xec, 0xc7, 0xbe,
+ 0x1f, 0xc7, 0xb4, 0xce, 0xfb, 0xb1, 0x6f, 0xcb,
+ 0xbc, 0xec, 0xeb, 0xb2, 0xce, 0xeb, 0x7a, 0x87,
+ 0xfb, 0x76, 0xec, 0xc7, 0x71, 0xb1, 0x6e, 0xeb,
+ 0xb6, 0x6e, 0xdb, 0xba, 0x1d, 0xfb, 0xb6, 0x6e,
+ 0xdb, 0xb6, 0xaf, 0xeb, 0xca, 0x6d, 0xc7, 0x7a,
+ 0x6f, 0xdb, 0xbc, 0xcc
+};
+
+uint8_t std_vect_19[] = {
+ 0xed, 0xfd, 0x6d, 0xc7, 0xfe, 0x65, 0x65, 0x65,
+ 0x65, 0x65, 0x72, 0xbc, 0xec, 0x1f, 0xc7, 0xae,
+ 0x1f, 0x00, 0x00, 0x00, 0x20, 0x71, 0xec, 0xfb,
+ 0xd0, 0xd2, 0x1f, 0xc7, 0xbe, 0x1f, 0xc7, 0xb1,
+ 0xbf, 0x1d, 0x6f, 0xfb, 0x7e, 0xbc, 0xed, 0x80,
+ 0xc7, 0xb1, 0xef, 0xc7, 0xb1, 0x32, 0xf0, 0x11,
+ 0x52, 0xc6, 0x34, 0xcd, 0xf3, 0x32, 0xbf, 0x2c,
+ 0x00, 0x01, 0x6e, 0xeb, 0x7a, 0x6c, 0xc7, 0xb1,
+ 0x6f, 0xe4, 0x7e, 0x1c, 0xfb, 0x00, 0x01, 0x00,
+ 0xfa, 0x2d, 0xfb, 0x5f, 0x7c, 0xf6, 0x47, 0xde
+};
+
+uint8_t std_vect_20[] = {
+ 0xed, 0xfd, 0x6d, 0xc7, 0xb6, 0x6d, 0x09, 0x64,
+ 0x3d, 0xef, 0xc7, 0x7e, 0x1c, 0xfb, 0xb1, 0x1f,
+ 0xf1, 0xb2, 0x1f, 0xfb, 0xcb, 0x71, 0xec, 0xc7,
+ 0xbe, 0x1f, 0xc7, 0xb4, 0xce, 0xfb, 0xb1, 0x6f,
+ 0xcb, 0xbc, 0xec, 0xeb, 0xb2, 0xce, 0x7a, 0x6c,
+ 0x7b, 0x76, 0xec, 0xc7, 0x71, 0x6c, 0xcb, 0xb6,
+ 0x6c, 0xc7, 0xb1, 0x6e, 0xeb, 0xb6, 0x6e, 0xdb,
+ 0xba, 0x1d, 0xfb, 0xb6
+};
+
+uint8_t std_vect_21[] = {
+ 0xed, 0x0b, 0x84, 0x64, 0x25, 0x49, 0x1a, 0x27,
+ 0x2d, 0x2d, 0x0a, 0xa8, 0x11, 0x0c, 0x27, 0xc8
+};
+
+uint8_t std_vect_22[] = {
+ 0xed, 0xfd, 0x6d, 0xc7, 0xb6, 0x6d, 0x00, 0x00,
+ 0x80, 0x00, 0xc7, 0x7e, 0x1c, 0xfb, 0xb1, 0x1f,
+ 0xcb, 0x7c, 0xec, 0xfb, 0xf1, 0xb2, 0x1f, 0xfb,
+ 0xcb, 0x71, 0xec, 0xc7, 0xbe, 0x1f, 0xc7, 0xb4,
+ 0xce, 0xfb, 0xb1, 0x6f, 0xcb, 0xbc, 0xf9, 0xeb,
+ 0xb2, 0xce, 0xeb, 0xc7, 0x71, 0x6c, 0xcb, 0xb6,
+ 0x6c, 0xc7, 0xb1, 0x6e, 0xeb, 0xb6, 0x6e, 0xdb,
+ 0x98, 0x79, 0xa6, 0x48, 0xab, 0x8e, 0x8f, 0xc2,
+ 0x3f, 0x01
+};
+
+uint8_t std_vect_23[] = {
+ 0xed, 0xfd, 0x6d, 0xc7, 0xb6, 0x6e, 0xd7, 0x50,
+ 0x3d, 0xef, 0xc7, 0x7e, 0x1c, 0xfb, 0xb1, 0x1f,
+ 0xcb, 0x7c, 0xec, 0xfb, 0x20, 0xb2, 0x1f, 0xfb,
+ 0xcb, 0xc7, 0xbe, 0x1f, 0xc7, 0x9f, 0xce, 0xfb,
+ 0xb1, 0x6f, 0xcb, 0xbc, 0xec, 0xeb, 0xca, 0xce,
+ 0xeb, 0x7a, 0x6c, 0xfb, 0x76, 0xec, 0xc7, 0x5b,
+ 0x00, 0x1e, 0x00, 0x00, 0xe2, 0xb1, 0x6e, 0xeb,
+ 0xb6, 0x6e, 0xf3, 0xba, 0x1d, 0xfb, 0xa6, 0x6e,
+ 0x40, 0x00, 0xaf, 0xeb, 0xb2, 0x6d, 0xc7, 0x7a,
+ 0xec, 0xdb, 0xb6, 0x00, 0xfa, 0x00, 0x00, 0xfa,
+ 0x00, 0x22, 0xff, 0x32, 0x6f, 0xdb, 0x00, 0x00,
+ 0x03, 0xe8, 0xcd, 0x2f, 0xf1, 0xb2, 0x0f, 0xfb,
+ 0xcb, 0x71, 0xec, 0xc7, 0xbe, 0x0e, 0xc7, 0xb4,
+ 0xce, 0xfb, 0xb1, 0x6f, 0x00, 0x00, 0x03, 0xe8,
+ 0xb2, 0xce, 0xeb, 0x94, 0xab, 0x82, 0x8f, 0xc2
+};
+
+uint8_t std_vect_24[] = {
+ 0xed, 0x44, 0x04, 0x54, 0x29, 0xff, 0xff, 0xff,
+ 0x80, 0xc7, 0x60, 0x30, 0xeb, 0xbb, 0xff, 0x82,
+ 0x5b, 0xe0, 0xc5, 0xff, 0xee, 0xfd, 0x80, 0xc7,
+ 0xff, 0xff, 0xd7, 0x34, 0x3d, 0x00, 0x00, 0x00,
+ 0x40, 0xf1, 0xb2, 0x0b, 0xfb, 0xcb, 0x71, 0xec,
+ 0xc7, 0xbe, 0x1f, 0xc7, 0xb4, 0xce, 0xfb, 0xb1,
+ 0x2f, 0xc7, 0x79, 0xa6, 0x48, 0xab, 0x82, 0x8f,
+ 0xc2, 0x3f, 0x01
+};
+
+uint8_t std_vect_25[] = {
+ 0xed, 0xfd, 0x6d, 0xc7, 0x1f, 0x21, 0x00, 0x10,
+ 0x21, 0x0d, 0x21, 0x14, 0x21, 0x35, 0x16, 0xec,
+ 0xc7, 0x7d, 0x1f, 0xc7, 0x35, 0xce, 0xfb, 0xb1,
+ 0x6f, 0x7f, 0xbc, 0xec, 0x17, 0x64, 0xbd, 0xeb,
+ 0x7a, 0x6c, 0xfb, 0x76, 0xec, 0x70, 0x8e, 0x37,
+ 0xb9, 0x25, 0x9a, 0x0e, 0x65, 0x32, 0x9c, 0xe5,
+ 0x41, 0x75, 0xab, 0x69, 0x98, 0x0c, 0xc7, 0x71,
+ 0x6c, 0xcb, 0xb6, 0x6c, 0xc7, 0xb1, 0x6e, 0xf6,
+ 0x36, 0xe0, 0x27, 0xa9, 0xa6, 0x48, 0xab, 0x82
+};
+
+uint8_t std_vect_26[] = {
+ 0x1c, 0xfb, 0x91, 0xfb, 0xf1, 0xbe, 0x1f, 0xdd,
+ 0xbe, 0xa7, 0xa7, 0xa7, 0xa7, 0xa7, 0xa7, 0xa7,
+ 0xa7, 0xb9, 0x8f, 0xa7, 0xa7, 0xa7, 0xa7, 0xa7
+};
+
+uint8_t std_vect_27[] = {
+ 0xed, 0xfd, 0x6d, 0xc7, 0xb6, 0xff, 0x00, 0x34,
+ 0x3d, 0xef, 0xc7, 0x7e, 0x1c, 0xfb, 0xb1, 0x1f,
+ 0xcb, 0x7c, 0xec, 0xfb, 0xf1, 0xb2, 0x1f, 0xfb,
+ 0xcb, 0x71, 0xec, 0xc7, 0xbe, 0x1f, 0xc7, 0xb4,
+ 0xce, 0xfb, 0xb1, 0x6f, 0xcb, 0xbc, 0xec, 0xeb,
+ 0xb2, 0xce, 0xeb, 0x7a, 0x6c, 0xfb, 0x76, 0xec,
+ 0xc7, 0x71, 0x6c, 0xcb, 0xb6, 0x6c, 0xc7, 0xb1,
+ 0x6e, 0xeb, 0xb6, 0x6e, 0xdb, 0xba, 0x1d, 0xfb,
+ 0xb6, 0x6e, 0xdb, 0xb6, 0xcd, 0x2f, 0xcb, 0xb2
+};
+
+uint8_t std_vect_28[] = {
+ 0xed, 0xfd, 0x6d, 0xc7, 0xb6, 0x6d, 0xd7, 0x34,
+ 0x3d, 0x00, 0x01, 0x00, 0x00, 0xed, 0xcb, 0x1f,
+ 0xcb, 0x7c, 0xec, 0xfb, 0xf1, 0xb2, 0x1f, 0xfb,
+ 0xcb, 0x71, 0xec, 0xc7, 0xbe, 0x3e, 0xc7, 0xb4,
+ 0xce, 0xfb, 0xb1, 0x6f, 0xcb, 0xd1, 0xec, 0xeb,
+ 0xb2, 0xdb, 0xeb, 0x7a, 0x6c, 0xfb, 0x76, 0xec,
+ 0xc7, 0x71, 0x6c, 0xcb, 0xb6, 0x6c, 0xc7, 0x92,
+ 0x85, 0xeb, 0xa6, 0x6e, 0xdb, 0xba, 0x3c, 0xfb,
+ 0x3f, 0x01
+};
+
+uint8_t std_vect_29[] = {
+ 0xed, 0xfd, 0x6d, 0x00, 0x00, 0xff, 0x34, 0x3d,
+ 0xef, 0xc7, 0x94, 0x1c, 0xfb, 0xb1, 0x1f, 0xcb,
+ 0x7c, 0xec, 0xfb, 0xf1, 0xb2, 0x1f, 0xfb, 0xcb,
+ 0x71, 0xec, 0xc7, 0xbe, 0x1f, 0xc7, 0xb4, 0xce,
+ 0xfb, 0xb1, 0x6f, 0xcb, 0xbc, 0xec, 0xeb, 0xb2,
+ 0xce, 0xeb, 0x7a, 0x6c, 0xfb, 0x76, 0xec, 0xc7,
+ 0x71, 0x6c, 0xcb, 0xb6, 0x6c, 0xc7, 0xb1, 0x6e,
+ 0xeb, 0xb6, 0x6e, 0xdb, 0xba, 0x1d, 0xfb, 0x58,
+ 0xb5, 0x0c, 0xaa, 0x55, 0x29, 0x79, 0x81, 0xe2
+};
+
+uint8_t std_vect_30[] = {
+ 0xed, 0xfd, 0x00, 0x00, 0x7f, 0xff, 0xd7, 0x34,
+ 0x3d, 0xef, 0xc7, 0x7e, 0x1c, 0xfb, 0xb1, 0x1f
+};
+
+uint8_t std_vect_31[] = {
+ 0x0c, 0x8b, 0x8b, 0xc7, 0xb6, 0x65, 0x20, 0x40,
+ 0x80, 0x00, 0x00, 0x00, 0x1c, 0xfb, 0xb1, 0x1f,
+ 0xcb, 0x7c, 0xec, 0xfb, 0xf1, 0xb2, 0x1e, 0x00,
+ 0x12, 0x10, 0x00, 0xc7, 0xbe, 0x00, 0x01, 0xb4,
+ 0xce, 0xfb, 0xb1, 0x7f, 0xcb, 0xcb, 0x31, 0xff,
+ 0xff, 0xff, 0x80, 0xcb, 0x01, 0x01, 0x01, 0x01,
+ 0x01, 0x01, 0x01, 0x01, 0x09, 0x3d, 0x01, 0x89,
+ 0x7c, 0x7c, 0x7c, 0x7c, 0x43, 0xbb, 0xca, 0xcd,
+ 0xfa, 0x84, 0x89, 0x89
+};
+
+uint8_t std_vect_32[] = {
+ 0xed, 0xfd, 0x6d, 0xc7, 0xb6, 0x6d, 0xd7, 0x34,
+ 0x3d, 0xef, 0xc7, 0x7d, 0x1c, 0x00, 0xb1, 0xff,
+ 0xcb, 0x7c, 0xec, 0xfb, 0xf1, 0xb2, 0x1f, 0x62,
+ 0xcb, 0x1f, 0xc7, 0xb4, 0xd7, 0xfb, 0xb1, 0x6f,
+ 0xcb, 0x2c, 0xf3, 0xba, 0x6e, 0xeb, 0x00, 0x04,
+ 0x00, 0x00, 0x6f, 0xc7, 0x7e, 0x1c, 0xfb, 0xb6,
+ 0xc2, 0xb8, 0x1c, 0x00, 0x00, 0x04, 0x00, 0xec
+};
+
+uint8_t std_vect_33[] = {
+ 0xed, 0xfd, 0xb0, 0xa9, 0x11, 0x0c, 0x31, 0xc8,
+ 0x42, 0xc4, 0x36, 0x50, 0x1c, 0xfb, 0xb1, 0x1f,
+ 0xcb, 0x7c, 0xec, 0xfb, 0x6c, 0xcb, 0xb6, 0x6c,
+ 0xc7, 0xb1, 0x6e, 0xeb
+};
+
+uint8_t std_vect_34[] = {
+ 0xed, 0xfd, 0x6d, 0xc7, 0xb6, 0x6d, 0xd7, 0x18,
+ 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+ 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+ 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+ 0xab, 0x82, 0x8f, 0xc2
+};
+
+uint8_t std_vect_35[] = {
+ 0xed, 0xfd, 0x01, 0x87, 0xb6, 0x6d, 0xd7, 0x34,
+ 0x3d, 0xef, 0xc7, 0x91, 0xcb, 0x7c, 0xec, 0xfb,
+ 0x80, 0x00, 0x00, 0x00, 0xcb, 0x71, 0xec, 0xc7,
+ 0xbe, 0x1f, 0xc7, 0xb4, 0x34, 0x09, 0x17, 0x61,
+ 0x6b, 0x06, 0x8e, 0x59, 0xdd, 0x45, 0xff, 0xff,
+ 0x00, 0x00, 0xaa, 0x50, 0x92, 0x31, 0xaf, 0xff,
+ 0xf3, 0xb2, 0xcd, 0xd0, 0xd0, 0xd0, 0xd0, 0xd0,
+ 0xd8, 0xd0, 0xd0, 0xd0, 0xd0, 0xd0, 0xd0, 0xd0,
+ 0xd0, 0xd0, 0xd0, 0xd0, 0xfb, 0x71, 0xec, 0xfb,
+ 0xf1, 0xbe, 0x1f, 0xad, 0xdb, 0x99, 0x2c, 0xf3,
+ 0x34, 0x8d, 0x00, 0x01
+};
+
+uint8_t std_vect_36[] = {
+ 0xed, 0xfd, 0x19, 0xc7, 0xb6, 0x6d, 0xd7, 0x34,
+ 0x3d, 0xce, 0xfb, 0xff, 0xff, 0xff, 0x7f, 0x9c,
+ 0xb2, 0xce, 0xeb, 0x7a, 0x6c, 0x91, 0x91, 0x91,
+ 0x91, 0x91, 0x91, 0x91, 0x91, 0x91, 0x91, 0x91,
+ 0x91, 0x91, 0x91, 0x91, 0xdb, 0xba, 0x3b, 0x3b,
+ 0x3b, 0x3b, 0x3b, 0x3b, 0x3b, 0x3b, 0x3b, 0x3b,
+ 0x3b, 0x3b, 0x3b, 0x3b, 0xff, 0x80, 0x3b, 0x3b
+};
+
+uint8_t std_vect_37[] = {
+ 0xed, 0xfd, 0xb3, 0x50, 0xca, 0x94, 0x6d, 0xc7,
+ 0xb6, 0x6d, 0xd7, 0x34, 0x3d, 0xef, 0xc7, 0x7e,
+ 0x1c, 0xfb, 0xb1, 0x1f, 0xcb, 0x7c, 0xec, 0xfb,
+ 0xf1, 0xb2, 0x1f, 0xfb, 0xcb, 0x71, 0xec, 0xc7,
+ 0xbe, 0x1f, 0xc7, 0xb4, 0xce, 0xfb, 0xb1, 0x6f,
+ 0xcb, 0xbc, 0xec, 0xeb, 0xb2, 0xce, 0xeb, 0x7a,
+ 0x6c, 0xfb, 0x76, 0xec, 0xc7, 0x71, 0x6c, 0xcb,
+ 0xb6, 0x6c, 0xc7, 0xb1, 0x6e, 0xeb, 0xb6, 0x6e,
+ 0xdb, 0xba, 0x1d, 0xfb, 0xb6, 0x6e, 0xdb, 0xb6,
+ 0xaf, 0xeb, 0xb2, 0x6d, 0xc7, 0x7a, 0xec, 0xdb,
+ 0xb6, 0xed, 0xcb, 0x31, 0xcb, 0x32, 0x6f, 0xdb,
+ 0xbc, 0xcc, 0xf3, 0xb2
+};
+
+uint8_t std_vect_38[] = {
+ 0xed, 0xfd, 0x4d, 0xc7, 0xb6, 0x70, 0xd7, 0x34,
+ 0x3d, 0xef, 0xd6, 0x7f, 0xff, 0xff, 0xff, 0x10,
+ 0x00, 0x00, 0x64, 0x7e, 0x6c, 0xef, 0xfb, 0x31,
+ 0xc6, 0x34, 0x08, 0x08, 0x08, 0x08, 0x20, 0x08,
+ 0x08, 0x01, 0x00, 0x08, 0x08, 0x08, 0x08, 0x08,
+ 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
+ 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0xc7, 0x7e,
+ 0x7c, 0x00, 0x64, 0x00, 0x00, 0xbf, 0x1e, 0xb6,
+ 0xc2, 0x3f, 0x06
+};
+
+uint8_t std_vect_39[] = {
+ 0xed, 0xfd, 0x8c, 0x8c, 0x97, 0x8c, 0x8c, 0x8c,
+ 0x8c, 0x8c, 0x8c, 0x8c, 0x8c, 0x8c, 0x8c, 0x8c,
+ 0x8c, 0x8c, 0x8c, 0x8c, 0x8c, 0x8c, 0x8c, 0xb4
+};
+
+uint8_t std_vect_40[] = {
+ 0xed, 0xfd, 0x6d, 0xc7, 0xb6, 0x6d, 0xd7, 0x7c,
+ 0x99, 0x17, 0x34, 0x3d, 0xef, 0xc7, 0x7e, 0x1c,
+ 0xfb, 0xb1, 0x1f, 0xcb, 0x7c, 0xec, 0xfb, 0xf1,
+ 0xb2, 0x1f, 0xfb, 0xcb, 0x71, 0xec, 0xc7, 0xbe,
+ 0x1f, 0xc7, 0xb4, 0xce, 0xfb, 0xb1, 0x6f, 0xcb,
+ 0xbc, 0xec, 0xeb, 0xb2, 0xce, 0xeb, 0x7a, 0x6c,
+ 0xfb, 0x76, 0xec, 0xc7, 0x71, 0x6c, 0xcb, 0xb6
+};
+
+uint8_t std_vect_41[] = {
+ 0xed, 0xfd, 0x6d, 0xff, 0xff, 0x80, 0x00, 0x34,
+ 0x3d, 0xef, 0xc7, 0x7e, 0x1c, 0xfb, 0xb1, 0x1f,
+ 0xcb, 0x7c, 0xec, 0xfb, 0xf1, 0xb2, 0x1f, 0xfb,
+ 0xcb, 0x71, 0xec, 0xc7, 0xbe, 0x1f, 0xc7, 0xb4,
+ 0xce, 0xfb, 0xb1, 0x6f, 0xcb, 0xbc, 0xec, 0xeb,
+ 0x6c, 0xfb, 0x76, 0xec
+};
+
+uint8_t std_vect_42[] = {
+ 0xed, 0xfd, 0x19, 0xc7, 0xb6, 0x6d, 0xd7, 0x34,
+ 0x3d, 0xef, 0xc7, 0x7e, 0x56, 0x56, 0x56, 0x56,
+ 0x56, 0x56, 0x56, 0x56, 0x56, 0x56, 0x56, 0x56,
+ 0x56, 0x56, 0x56, 0x56, 0x56, 0x56, 0x56, 0x56,
+ 0x56, 0x56, 0x56, 0x56, 0x56, 0x56, 0x56, 0x7a,
+ 0x6c, 0xfb, 0x76, 0xec
+};
+
+uint8_t std_vect_43[] = {
+ 0x7c, 0x99, 0x17, 0xed, 0xfd, 0x6d, 0x12, 0x12,
+ 0x12, 0x12, 0x12, 0x12, 0x12, 0x12, 0x12, 0x12,
+ 0x12, 0x12, 0x12, 0x12, 0x12, 0x12, 0xb0, 0xa9,
+ 0x11, 0x0c, 0x31, 0xc8, 0x42, 0xc4, 0x36, 0x50
+};
+
+uint8_t std_vect_44[] = {
+ 0xed, 0xfd, 0xb1, 0xcb, 0xb6, 0x6c, 0xc7, 0xb1,
+ 0x6e, 0xeb, 0xb6, 0x6e, 0xdb, 0xba, 0x1d, 0xe9,
+ 0xb6, 0x6e, 0xdb, 0xb4, 0x64, 0x00, 0xb2, 0xb2,
+ 0xb2, 0xb2, 0xb2, 0xb2, 0xb2, 0xb2, 0xb2, 0xb2,
+ 0xb2, 0xb2, 0xb2, 0xb2, 0xb2, 0xb2, 0xb2, 0x6d,
+ 0xc7, 0x7a, 0xec, 0xdb, 0xe3, 0x6d, 0xd3, 0x31,
+ 0xaf, 0xf3, 0x3c, 0x2d, 0xbc, 0xcc, 0xf3, 0xb2,
+ 0xec, 0x2f, 0xcb, 0xb2
+};
+
+uint8_t std_vect_45[] = {
+ 0xed, 0xfd, 0x6d, 0xc7, 0xb6, 0x6d, 0xd7, 0x34,
+ 0x3d, 0xef, 0xff, 0x7f, 0x1c, 0xfb, 0xb1, 0x1f,
+ 0xcb, 0x7c, 0x2d, 0x25, 0xd2, 0xf8, 0x0b, 0xf9,
+ 0x4b, 0x4e, 0x20, 0x8d, 0x10, 0x9c, 0x93, 0x6b,
+ 0xac, 0x7a, 0xc3, 0xf2, 0x4c, 0x8d, 0xbc, 0xc0,
+ 0x79, 0x30, 0x17, 0x4a, 0x3a, 0x2f, 0xcc, 0xe1,
+ 0xbc, 0xec, 0xfb, 0xf1, 0xb2, 0x1f, 0xfb, 0xcb,
+ 0x71, 0xec, 0xc7, 0xbe, 0x1f, 0xc7, 0xb4, 0xce,
+ 0x3f, 0x01
+};
+
+uint8_t std_vect_46[] = {
+ 0xed, 0xb0, 0xb0, 0xb0, 0xb0, 0xb0, 0xb0, 0xb0,
+ 0xb0, 0xb0, 0xb0, 0xb0, 0xb0, 0xb0, 0xb0, 0xb0,
+ 0xcb, 0x71, 0xec, 0xc7, 0xbe, 0x1f, 0xc7, 0xb4
+};
+
+uint8_t std_vect_47[] = {
+ 0xed, 0xfd, 0x1c, 0xfb, 0x71, 0xec, 0xfb, 0xf1,
+ 0xbe, 0x1f, 0xc7, 0xbe, 0x1f, 0xc7, 0xb1, 0xbf,
+ 0x1d, 0x6f, 0xfb, 0x7e, 0xbc, 0xed, 0xaf, 0xc7,
+ 0xb1, 0xef, 0xec, 0xc7, 0xbe, 0x1f, 0xc7, 0xb4,
+ 0xce, 0xfb, 0xb1, 0x6f, 0xcb, 0xbc, 0xec, 0xeb,
+ 0xb2, 0xce, 0xeb, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d
+};
+
+uint8_t std_vect_48[] = {
+ 0xed, 0xfd, 0x6d, 0xc7, 0xb6, 0x6d, 0xd7, 0x34,
+ 0x3d, 0xff, 0x80, 0x1f, 0xfb, 0xcb, 0x71, 0xec,
+ 0xbe, 0xbe, 0x1f, 0xc7, 0x7a, 0xec, 0xdb, 0x9b,
+ 0x48, 0xab, 0x2d, 0xb5, 0x6f, 0xb3, 0x63, 0x59,
+ 0x99, 0x59, 0xcb, 0x71, 0xec, 0xbe, 0xbe, 0x1f,
+ 0xc7, 0x7a, 0xec, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd
+};
+
+uint8_t std_vect_49[] = {
+ 0xed, 0xfd, 0x6d, 0xc7, 0xb6, 0x1f, 0xbb, 0xbb,
+ 0xbb, 0xbb, 0xb9, 0xa9, 0xbb, 0xbb, 0xe3, 0x6d,
+ 0xd3, 0xbb, 0xe3, 0x6d, 0x01, 0xbb, 0xbb, 0xbb,
+ 0xbb, 0xaf, 0xf3, 0x3c, 0xb4, 0xc7, 0xb4, 0xb4
+};
+
+uint8_t std_vect_50[] = {
+ 0xed, 0xfd, 0x6d, 0xc7, 0xb6, 0xff, 0x22, 0x80,
+ 0x00, 0xec, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d,
+ 0x0d, 0x1d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d,
+ 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d
+};
+
+uint8_t std_vect_51[] = {
+ 0x7a, 0x7a, 0x7a, 0x7a, 0x71, 0xec, 0xc7, 0xbe,
+ 0x00, 0x00, 0x1d, 0xfb, 0xb6, 0x6e, 0xdb, 0xb6,
+ 0xaf, 0xeb, 0xb2, 0x6d, 0xec, 0x00, 0x02, 0xbe
+};
+
+uint8_t std_vect_52[] = {
+ 0xed, 0xf2, 0xe8, 0xe8, 0xe8, 0xe8, 0xf6, 0xe8,
+ 0xe8, 0xe8, 0xe8, 0xe8, 0xe8, 0xe8, 0xe8, 0xcd,
+ 0x9b, 0xd2, 0x3a, 0xdb, 0x5a, 0x33, 0xb3, 0x50
+};
+
+uint8_t std_vect_53[] = {
+ 0xed, 0xa0, 0xa0, 0xa0, 0xa0, 0xa0, 0xa0, 0xa0,
+ 0xa0, 0xa0, 0xa0, 0x1d, 0xd1, 0x47, 0x46, 0x2d,
+ 0xec, 0xc7, 0x71, 0x6c, 0xcb, 0xb6, 0x6c, 0xc7
+};
+
+uint8_t std_vect_54[] = {
+ 0xed, 0xfd, 0x1a, 0x1a, 0x2c, 0x1a, 0x1a, 0x16,
+ 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a,
+ 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a,
+ 0x1f, 0xcb, 0x7c, 0xfd, 0xfb, 0xd0, 0xb2, 0x1f,
+ 0xfb, 0xcb, 0x71, 0xec, 0xf1, 0xbe, 0x1f, 0xc7
+};
+
+uint8_t std_vect_55[] = {
+ 0xed, 0xfd, 0x1c, 0xfb, 0xb1, 0x1f, 0xcb, 0x7c,
+ 0xec, 0xfb, 0xf1, 0x00, 0xb3, 0x50, 0xca, 0x94,
+ 0x01, 0x00, 0x00, 0x71, 0xec, 0xc7, 0xbe, 0x1f,
+ 0xc7, 0xb4, 0xce, 0xfb, 0xb1, 0x6f, 0xcb, 0xbc,
+ 0xec, 0xeb, 0xb2, 0xce, 0xeb, 0xe3, 0x6d, 0xd3,
+ 0x7a, 0x6c, 0xfb, 0x76, 0xec, 0xc7, 0x71, 0x6c
+};
+
+uint8_t std_vect_56[] = {
+ 0xf7, 0x4e, 0x00
+};
+
+uint8_t std_vect_57[] = {
+ 0xed, 0xfd, 0x6d, 0x00, 0x00, 0x00, 0x20, 0x39,
+ 0x3d, 0xef, 0xc7, 0x7e, 0x32, 0x4c, 0xc2, 0x9a,
+ 0x75, 0xe4, 0x05, 0x8e, 0x37, 0xd5, 0x25, 0x84,
+ 0x90, 0x9c, 0xfe, 0x47, 0xf2, 0x02, 0x89, 0x7f,
+ 0x11, 0xb1, 0x1f, 0xcb, 0x7c, 0xec, 0x16, 0x17,
+ 0x4a, 0x2d, 0xa3, 0xa9, 0x76, 0x96, 0xd1, 0xc1,
+ 0x3c, 0xbb, 0xca, 0xcd, 0xfa, 0x0a, 0x6c, 0x71,
+ 0xe1, 0xf7, 0xf1, 0xb2, 0x1f, 0xfb, 0xcb, 0x71,
+ 0xec, 0xc7, 0xbe, 0x1f, 0xc6, 0xb4, 0xce, 0x40,
+ 0xb1, 0x6f, 0xcb, 0x99, 0x10, 0xeb, 0x00, 0x00
+};
+
+uint8_t std_vect_58[] = {
+ 0xed, 0xfd, 0x6d, 0xc7, 0xb6, 0x6d, 0xd7, 0x34,
+ 0x3d, 0xef, 0xc7, 0x7e, 0xff, 0xfb, 0xb1, 0x1f,
+ 0xcb, 0x7f, 0xec, 0xfb, 0xfa, 0x8e, 0x37, 0xb9,
+ 0x25, 0x47, 0xa4, 0x0d, 0xfe, 0x47, 0xf2, 0x10,
+ 0x97, 0x6c, 0x00, 0x00, 0x00, 0xff, 0x00, 0x10,
+ 0xd9, 0xd9, 0xd9, 0xd9
+};
+
+uint8_t std_vect_59[] = {
+ 0xed, 0xfd, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0,
+ 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0,
+ 0xf0, 0xf0, 0x6d, 0xdb, 0x1d, 0xfb, 0xb6, 0x6e
+};
+
+uint8_t std_vect_60[] = {
+ 0x64, 0x00, 0x15, 0xbb, 0x2d, 0x18, 0x15, 0xff,
+ 0xbb, 0x2d
+};
+
+uint8_t std_vect_61[] = {
+ 0xed, 0xfd, 0x6d, 0xc7, 0xb6, 0xfa, 0x00, 0x00,
+ 0xfa, 0xef, 0xc7, 0x7e, 0x1c, 0xfb, 0xb1, 0x1f,
+ 0xcb, 0x7c, 0xec, 0xfb, 0xf1, 0xb2, 0x1f, 0xfb,
+ 0xcb, 0x71, 0xec, 0xc7, 0xbe, 0x1f, 0xc7, 0xb4,
+ 0xce, 0xfb, 0xb1, 0x6f, 0xcb, 0xbc, 0xec, 0xeb,
+ 0xb2, 0xce, 0xab, 0x7a, 0x6c, 0xfb, 0x76, 0xec,
+ 0x6e, 0xeb, 0xb6, 0x6e, 0xe5, 0xba, 0x1d, 0xfb
+};
+
+uint8_t std_vect_62[] = {
+ 0x6d, 0x2e, 0x98
+};
+
+uint8_t std_vect_63[] = {
+ 0xed, 0xfd, 0x6d, 0xd2, 0xa5, 0x6d, 0x64, 0x7e,
+ 0xb3, 0x50, 0xca, 0x94, 0x1c, 0xfb, 0xce, 0x1f,
+ 0xb7, 0x01, 0x09, 0xfb, 0xf1, 0xb2, 0x1f, 0xfb,
+ 0xcb, 0x71, 0xec, 0xc7, 0xae, 0x1f, 0xc7, 0xb4
+};
+
+uint8_t std_vect_64[] = {
+ 0xed, 0xfd, 0xc1, 0x79, 0x02, 0x02, 0x02, 0x02,
+ 0x02, 0x7a, 0xf4, 0x18, 0xc1, 0xf7, 0x2a, 0x6d,
+ 0x7f, 0xff, 0xab, 0xee, 0x64, 0x7f, 0x1d, 0xfb,
+ 0x00, 0x28, 0x2d, 0xcb, 0x32, 0x6f, 0xdb, 0xbc
+};
+
+uint8_t std_vect_65[] = {
+ 0xed, 0x7d, 0x6d, 0xc7, 0xb6, 0x6d, 0xd7, 0x10,
+ 0x3c, 0xef, 0xc7, 0x7e, 0x1c, 0xfb, 0xb1, 0x1f,
+ 0xcb, 0x7c, 0xec, 0xfb, 0xf1, 0xb2, 0x1f, 0xfb,
+ 0xcb, 0x71, 0xec, 0xd1, 0xbe, 0x1f, 0xc7, 0xb4
+};
+
+uint8_t std_vect_66[] = {
+ 0xed, 0xfd, 0x1a, 0xfb, 0x19, 0x1a, 0x1a, 0x1a,
+ 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a,
+ 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0xff, 0xff,
+ 0x6f, 0xf6, 0x9c, 0x01, 0xff, 0xff, 0xda, 0x7c,
+ 0x99, 0x17, 0x10, 0x0d, 0xec, 0x9b, 0xce, 0xeb
+};
+
+uint8_t std_vect_67[] = {
+ 0xcc, 0xcc
+};
+
+uint8_t std_vect_68[] = {
+ 0xed, 0x30, 0x17, 0xff, 0x7f, 0xff, 0xff, 0x00,
+ 0x00, 0x00, 0x80, 0x08, 0x08, 0x08, 0x08, 0x08,
+ 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
+ 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
+ 0x08, 0x08, 0x08, 0x08, 0x7e, 0x1c, 0xfb, 0xe3,
+ 0xdb, 0x7c, 0x59, 0x99
+};
+
+uint8_t std_vect_69[] = {
+ 0xed, 0xb2, 0x1f, 0xee, 0xcb, 0x5f, 0xec, 0xe8,
+ 0x03, 0x00, 0x00, 0xd6, 0xce, 0x55, 0xec, 0xcf,
+ 0xbe, 0x55, 0x55, 0x55, 0x55, 0x01, 0x55, 0x55,
+ 0x55, 0x55, 0x55, 0x55, 0x12, 0x55, 0x55, 0x55,
+ 0x55, 0x50, 0xc7, 0xb1, 0x6a, 0xeb, 0xb6, 0x6e,
+ 0xdb, 0xb6, 0x1d, 0xfb, 0xb6, 0x6e, 0xdb, 0xb6
+};
+
+uint8_t std_vect_70[] = {
+ 0xed, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0,
+ 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0,
+ 0xe0, 0xe0, 0xe0, 0xfb, 0xcb, 0x71, 0xec, 0xc7,
+ 0xbe, 0x1f, 0xc7, 0xb4
+};
+
+uint8_t std_vect_71[] = {
+ 0xdd, 0x00, 0x80, 0x01, 0xfa, 0xff
+};
+
+uint8_t std_vect_72[] = {
+ 0x05, 0x20, 0x00, 0xff, 0xe5
+};
+
+uint8_t std_vect_73[] = {
+ 0x04, 0x00, 0x04
+};
+
+uint8_t std_vect_74[] = {
+ 0x34
+};
+
+uint8_t std_vect_75[] = {
+ 0x25, 0x28
+};
+
+uint8_t std_vect_76[] = {
+ 0x0a, 0xee, 0x2d, 0x2d, 0x00, 0x01, 0x00, 0x00,
+ 0x2d, 0xff, 0xff, 0x2d, 0x2d, 0x34
+};
+
+uint8_t std_vect_77[] = {
+ 0x7c, 0x99, 0x17, 0x66, 0x85, 0x17, 0x84, 0x69,
+ 0x69, 0x00, 0x7f
+};
+
+uint8_t std_vect_78[] = {
+ 0x8d, 0x10, 0x7a, 0xf4, 0x18
+};
+
+uint8_t std_vect_79[] = {
+ 0xed, 0xfd, 0x10, 0xae, 0xb6, 0x6c, 0xc7, 0xb1,
+ 0x6e, 0xeb, 0x99, 0x99, 0x99, 0x99, 0x99, 0x9b,
+ 0xba, 0x99, 0x99, 0x99, 0x99, 0x99, 0x99, 0x99,
+ 0x94, 0x99, 0x99, 0x99, 0x99, 0x99, 0x99, 0x99,
+ 0x88, 0xee, 0x04, 0xbe, 0x03, 0x00, 0x00, 0x10
+};
+
+uint8_t std_vect_80[] = {
+ 0x7c, 0x99, 0x17, 0xfd, 0xfd, 0x6d, 0xc7, 0xb6,
+ 0xe4, 0x88, 0x34, 0x77, 0x6d, 0xd7, 0x34
+};
+
+uint8_t std_vect_81[] = {
+ 0x15, 0xe7, 0xff, 0x00, 0x23, 0x04, 0x00, 0x04,
+ 0x6d, 0xd7, 0x34, 0x55, 0xd7, 0x34
+};
+
+uint8_t std_vect_82[] = {
+ 0x9d, 0x00, 0x00, 0xf4, 0x8b
+};
+
+uint8_t std_vect_83[] = {
+ 0x6d, 0xfd, 0xfd, 0x55, 0xbe, 0xb6, 0x6d, 0xd7,
+ 0x35
+};
+
+uint8_t std_vect_84[] = {
+ 0x64, 0xc3, 0xc3, 0x84, 0x84, 0x10, 0x9c, 0x10,
+ 0x0c, 0x0c, 0x01, 0x9c, 0x20
+};
+
+uint8_t std_vect_85[] = {
+ 0xed, 0xfd, 0x6d, 0xc7, 0x00, 0x00, 0x00, 0x00,
+ 0x52, 0xef, 0xc7, 0xa3, 0xa3, 0xa3, 0xb3, 0xa3,
+ 0xa3, 0xa3, 0xa3, 0xa3, 0xa3, 0xa3, 0xa3, 0xa3,
+ 0xa3, 0xa3, 0x45, 0x45, 0x45, 0x4e, 0x45, 0x45,
+ 0x45, 0x45, 0x45, 0x45, 0x45, 0x45, 0x45, 0x45,
+ 0x45, 0xb6, 0xaf, 0xeb, 0xb0, 0x48, 0x4b, 0x69,
+ 0xa5, 0x4c, 0x9b, 0x48, 0xab, 0x2d, 0xb5, 0x6f,
+ 0x50, 0x63, 0x59, 0x8a, 0x8d, 0x48, 0xab, 0x82,
+ 0x8f, 0xc2, 0x3f, 0x01
+};
+
+uint8_t std_vect_86[] = {
+ 0x7c, 0x99, 0x0f, 0x85, 0x42, 0x81, 0xff, 0xff,
+ 0x80, 0x00, 0xf4, 0x34
+};
+
+uint8_t std_vect_87[] = {
+ 0xe4, 0x2d, 0x2d, 0x2d, 0x2d, 0x51, 0x2d, 0x2d,
+ 0x2d, 0x2d, 0x2d, 0x2d, 0x2d, 0x2d, 0x34
+};
+
+uint8_t std_vect_88[] = {
+ 0x65, 0x6c, 0x71, 0xc1, 0x1a, 0x1a, 0x00, 0x6c,
+ 0x71, 0xc1, 0x1a, 0x00, 0x00, 0x27
+};
+
+uint8_t std_vect_89[] = {
+ 0xdd, 0xed, 0xa3
+};
+
+uint8_t std_vect_90[] = {
+ 0x72, 0x10, 0x00, 0x10
+};
+
+uint8_t std_vect_91[] = {
+ 0xed, 0xfd, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0,
+ 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0,
+ 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0,
+ 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0,
+ 0xff, 0x7f, 0x01
+};
+
+uint8_t std_vect_92[] = {
+ 0xed, 0xfd, 0x6d, 0x00, 0x00, 0x80, 0xff, 0xff,
+ 0x00, 0x04, 0x84, 0x65, 0x75, 0x7b, 0xff
+};
+
+uint8_t std_vect_93[] = {
+ 0x00, 0x00, 0x00, 0xff, 0xff
+};
+
+uint8_t std_vect_94[] = {
+ 0x7a, 0x7f, 0x18, 0x20, 0x20, 0x08, 0x08, 0x7a,
+ 0x7f, 0x18, 0x20, 0x20, 0x08, 0x08, 0x25, 0xfe,
+ 0x25, 0xfe
+};
+
+uint8_t std_vect_95[] = {
+ 0xfa, 0xfd, 0x50, 0xc7, 0x50, 0xc7, 0x00, 0x00,
+ 0xff, 0xff, 0x00, 0x00
+};
+
+uint8_t std_vect_96[] = {
+ 0xed, 0xdd, 0x6d, 0xc7, 0xb6, 0x6d, 0xd7, 0x34,
+ 0x3d, 0xef, 0xc7, 0x7e, 0x1c, 0xfb, 0xb1, 0x1f,
+ 0xcb, 0x7c, 0xec, 0xe4, 0xe0, 0xb2, 0x1f, 0xfb,
+ 0xe4, 0x71, 0xec, 0xc7, 0xa5, 0x42, 0x81, 0x74,
+ 0xce, 0xfb, 0xb1, 0x6f, 0xcb, 0xbc, 0x65, 0x31,
+ 0x08, 0xce, 0xeb, 0x7a, 0x6c, 0xfb, 0x76, 0xec,
+ 0xc7, 0x71, 0x6c, 0xcb, 0xb6, 0xb6, 0x6e, 0xd6,
+ 0xb6, 0xaf, 0xf8, 0xb2, 0x6d, 0xc7, 0x7a, 0xec,
+ 0xcb, 0x32, 0x6f, 0xdb, 0xbc, 0xcc, 0xf3, 0xb2,
+ 0xcd, 0x2f, 0xc9, 0xb2, 0x2e, 0xc7, 0xb6, 0xad,
+ 0xc7, 0x7e, 0xbc, 0xbf, 0xee, 0xfb, 0x7d, 0xbf,
+ 0x40, 0xc7, 0xb1, 0x1f, 0xfb, 0x71, 0xec, 0x3d,
+ 0xc7, 0xeb, 0xbe, 0x1f
+};
+
+uint8_t std_vect_97[] = {
+ 0xed, 0xfd, 0xd0, 0xd0, 0xd0, 0xd0, 0xd0, 0xd0,
+ 0xd0, 0xd0, 0xd0, 0xd0, 0xd0, 0xd0, 0xd0, 0xd0,
+ 0xd0, 0xd0, 0xd0, 0xd0, 0xd0, 0xd0, 0xd0, 0xd0,
+ 0xd0, 0x6d, 0xc7, 0xb6, 0xd8, 0xd8, 0xd8, 0xd8,
+ 0xf9, 0xd8, 0xd8, 0xb2
+};
+
+uint8_t std_vect_98[] = {
+ 0x0c, 0x00, 0x00, 0x03, 0xdb, 0xf7, 0xff, 0x00
+};
+
+uint8_t std_vect_99[] = {
+ 0xed, 0xfd, 0xa0, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c,
+ 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c,
+ 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x0c,
+ 0x0c, 0x40, 0x0c, 0x0c
+};
+
+uint8_t std_vect_100[] = {
+ 0x0c, 0x01, 0x80, 0xb6, 0xc7, 0x89
+};
+
+uint8_t std_vect_101[] = {
+ 0xed, 0xfd, 0xb0, 0xb0, 0xb0, 0xb0, 0xb0, 0xb0,
+ 0xb0, 0xb0, 0xb0, 0xb0, 0xb0, 0xb0, 0xb0, 0xb0,
+ 0xb0, 0xb0, 0xb0, 0xb0, 0xb0, 0xb0, 0xb0, 0xb0,
+ 0xb0, 0xb0, 0xb0, 0xb0, 0xab, 0x82, 0x8f, 0xc2,
+ 0x3f, 0x00
+};
+
+uint8_t std_vect_102[] = {
+ 0xed, 0x22, 0x1f, 0x00, 0x00, 0x00, 0x01, 0x22,
+ 0x21, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22,
+ 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22,
+ 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0xfd,
+ 0x4c, 0x4c, 0x4c, 0xec
+};
+
+uint8_t std_vect_103[] = {
+ 0xed, 0xb8, 0xb1, 0xe8, 0x03, 0x08, 0x00, 0xc7,
+ 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22,
+ 0x22, 0x00, 0xc7, 0x22, 0x22, 0x22, 0x22, 0x22,
+ 0x80, 0x32, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22,
+ 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22,
+ 0x22, 0x22, 0x21, 0x22, 0x22, 0x22, 0x22, 0x22,
+ 0x22, 0x22, 0x22, 0xc9
+};
+
+uint8_t std_vect_104[] = {
+ 0xed, 0xfd, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0,
+ 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0,
+ 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0,
+ 0xf0, 0x1f, 0xc7, 0xb4
+};
+
+uint8_t std_vect_105[] = {
+ 0xed, 0x02, 0x92, 0xc7, 0xb6, 0xaa, 0xaa, 0xaa,
+ 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa,
+ 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xc7,
+ 0xdb, 0xba, 0x1d, 0xfb
+};
+
+uint8_t std_vect_106[] = {
+ 0xad, 0xfd, 0xcc, 0xcc, 0xff, 0xcc, 0xcc, 0xcc,
+ 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28,
+ 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28,
+ 0x28, 0x28, 0x28, 0x28, 0x28, 0x7e, 0x1c, 0xfb,
+ 0xad, 0xcd, 0xeb, 0xe2
+};
+
+uint8_t std_vect_107[] = {
+ 0x4a, 0xff, 0xff, 0x00, 0x40, 0x00, 0x64, 0x7e,
+ 0xeb, 0xab, 0xeb, 0xab, 0x82, 0x00, 0x00, 0xd2,
+ 0x3a, 0x7f, 0x00, 0x00, 0x01, 0x01, 0x01, 0x01,
+ 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+ 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x40, 0xeb,
+ 0xd5, 0x82, 0x00, 0x00, 0x39, 0x10
+};
+
+uint8_t std_vect_108[] = {
+ 0x0c, 0x8b, 0x8b, 0xc7, 0xb6, 0x7f, 0x20, 0x40,
+ 0x80, 0x40, 0x00, 0x00, 0x01, 0x0c, 0x8b, 0x8b,
+ 0xc7, 0xa4, 0x7f, 0x20, 0x40, 0x80, 0x40, 0x01,
+ 0x05, 0xff, 0xff, 0x05, 0xcb, 0x31, 0xff, 0xff,
+ 0xff, 0x04, 0xc3, 0x01, 0x01, 0x01, 0x01, 0x01,
+ 0x01, 0x01, 0xb1, 0x7f, 0xcb, 0xcb, 0x31, 0xff,
+ 0xff, 0xff, 0x04, 0xcb, 0x01, 0x01, 0x01, 0x01,
+ 0x01, 0x01, 0xcb, 0x31, 0xff, 0xff, 0xb4, 0xce,
+ 0xfb, 0xb1, 0x7f, 0x89, 0x7c, 0x7c, 0x7c, 0x7c,
+ 0x43, 0xbb, 0xca, 0xcd, 0xfa, 0x84, 0x89, 0x89
+};
+
+uint8_t std_vect_109[] = {
+ 0x0c, 0x24, 0x8b, 0x10, 0x10, 0x10, 0x10, 0x10,
+ 0x10, 0x10, 0x10, 0x18, 0x10, 0x10, 0x10, 0x10,
+ 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
+ 0x46, 0x89
+};
+
+uint8_t std_vect_110[] = {
+ 0xec, 0xfd, 0x6d, 0xc7, 0xb6, 0x6d, 0xd7, 0x34,
+ 0x3d, 0xef, 0xc7, 0x7e, 0x6e, 0xeb, 0xb6, 0x6e,
+ 0xdb, 0xba, 0x1d, 0xfb, 0xb6, 0x6e, 0x40, 0xb6,
+ 0x6d, 0xb5, 0x7a, 0xec, 0xdb, 0xb6, 0xed, 0xcb,
+ 0x31, 0xe6, 0xbe, 0x1f, 0x2e, 0x26, 0xab, 0x75,
+ 0xe3, 0x6d, 0xd3, 0xe3, 0x6d, 0xd3, 0x55, 0xa6,
+ 0x01, 0x00, 0x21, 0x1f, 0xef, 0xfb, 0xaf, 0xf7,
+ 0x88, 0x34, 0x77, 0x26, 0x6f, 0xdb, 0xbc, 0xcc,
+ 0x72, 0xbc, 0xec, 0xfb, 0x7e, 0x64, 0xfb, 0x57,
+ 0x10, 0x9c, 0xad, 0xdb, 0xbc, 0x2d, 0xf3, 0x34,
+ 0x8d, 0x24, 0xa6, 0x65, 0x31, 0x08, 0x71, 0x31,
+ 0x08, 0x0d, 0xd3, 0x34, 0x6d, 0xa3, 0x85, 0x42,
+ 0x81, 0xf4, 0x1d, 0xe1, 0x47, 0x4a, 0x2d, 0x8d
+};
+
+uint8_t std_vect_111[] = {
+ 0xed, 0xfd, 0x6d, 0xc7, 0xb6, 0x6d, 0x98, 0x79,
+ 0xa6, 0x48, 0xb7, 0x82, 0x8f, 0xfb, 0xb1, 0x1f,
+ 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f,
+ 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f,
+ 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f,
+ 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f,
+ 0xb6, 0x6c, 0xc7, 0xb1, 0x6e, 0xeb, 0xb6, 0x6e,
+ 0xdb, 0xba, 0x1d, 0xfb, 0xb6, 0x6e, 0xdb, 0x99,
+ 0xaf, 0xeb, 0xc7, 0xeb, 0xbe, 0x1f, 0xc7, 0xfe
+};
+
+uint8_t std_vect_112[] = {
+ 0x7c, 0x99, 0x17, 0xfd, 0xfd, 0x6d, 0xc2, 0xef,
+ 0xff, 0xff, 0x7f, 0x33, 0x33, 0x33, 0x33, 0x33,
+ 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33,
+ 0x33, 0x33, 0x33, 0x33, 0x59, 0xfb, 0xb6, 0x35,
+ 0xab, 0x77, 0x8f, 0xc2
+};
+
+uint8_t std_vect_113[] = {
+ 0x4a, 0xff, 0xff, 0x00, 0x40, 0x00, 0xe2, 0x7e,
+ 0xbd, 0x16, 0xf9, 0xb7, 0x4a, 0xff, 0xff, 0x00,
+ 0x40, 0x00, 0xe2, 0x7e, 0xbd, 0x16, 0xf9, 0xb7,
+ 0x13, 0x00, 0x40, 0x00, 0xe2, 0x7e, 0xbd, 0x1d,
+ 0x00, 0x00, 0xd2, 0x3a, 0xc1, 0x5a, 0x33, 0x00,
+ 0x00, 0x4f, 0x03, 0xeb, 0xd5, 0x82, 0x00, 0x00
+};
+
+uint8_t std_vect_114[] = {
+ 0xfa, 0xfd, 0x50, 0x00, 0x20, 0xc7, 0x00, 0xcb,
+ 0xb6, 0x6c, 0x80, 0x00, 0xcb, 0x44, 0x6f, 0xdb,
+ 0x01
+};
+
+uint8_t std_vect_115[] = {
+ 0x0c, 0x8b, 0x8b, 0xc7, 0xb6, 0x65, 0x20, 0x40,
+ 0x80, 0x00, 0x00, 0xfb, 0x1a, 0xdc, 0xb5, 0xff,
+ 0xcb, 0x7a, 0xec, 0xfb, 0xf1, 0xb2, 0x1e, 0x00,
+ 0x12, 0x10, 0x00, 0xc7, 0xbe, 0x00, 0x01, 0xb4,
+ 0xad, 0xfb, 0x00, 0x7f, 0xcb, 0xcb, 0x24, 0xff,
+ 0xff, 0xff, 0x01, 0x01, 0x01, 0x09, 0x01, 0x09,
+ 0x65, 0x20, 0x40, 0x80, 0x00, 0x50, 0x00, 0x1c,
+ 0xdc, 0xb5, 0x3f, 0xcb, 0x7e, 0xec, 0xfb, 0xf1,
+ 0xb2, 0x1e, 0x00, 0x12, 0x10, 0x00, 0xc7, 0xbe,
+ 0x00, 0x3d, 0x80, 0xcb, 0x01, 0x01, 0x01, 0x01,
+ 0x01, 0x01, 0x09, 0x01, 0x00, 0x04, 0x01, 0x85,
+ 0x89, 0x89
+};
+
+uint8_t std_vect_116[] = {
+ 0x4a, 0xff, 0x66, 0xfd, 0x00, 0x40, 0x0a, 0x05,
+ 0xff, 0xff, 0x05, 0xf9, 0xff, 0xfb, 0x3f, 0x00,
+ 0x82, 0x00, 0xff, 0xff, 0x00, 0x00, 0xd2, 0x3a,
+ 0xcf, 0x5a, 0x33
+};
+
+uint8_t std_vect_117[] = {
+ 0x0c, 0x8b, 0x8b, 0xcb, 0xb6, 0x12, 0x10, 0x00,
+ 0xc7, 0xbe, 0x00, 0x01, 0xb4, 0xce, 0xfb, 0x00,
+ 0x7f, 0xcb, 0xcb, 0x31, 0xff, 0xe7, 0xff, 0x80,
+ 0xcb, 0x01, 0x65, 0x20, 0x40, 0x96, 0x00, 0x00,
+ 0x00, 0x1c, 0xdc, 0xb1, 0x1f, 0xcb, 0x69, 0xec,
+ 0x16, 0xf1, 0xb2, 0x1e, 0x00, 0x12, 0x10, 0x00,
+ 0xc7, 0xbe, 0x00, 0x01, 0xb4, 0xce, 0xfb, 0x00,
+ 0x7f, 0xcb, 0xcb, 0x31, 0xff, 0xfd, 0xff, 0x80,
+ 0xcb, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+ 0x01, 0x09, 0x3d, 0x01, 0x89, 0x7c, 0x7c, 0x7c,
+ 0xfc, 0x43, 0xbb, 0xca, 0xcd, 0xfa, 0x00, 0x00
+};
+
+uint8_t std_vect_118[] = {
+ 0x4a, 0xff, 0xfd, 0x00, 0x40, 0x00, 0x82, 0x00,
+ 0xbd, 0x1d, 0xf9, 0xff, 0x00, 0x40, 0x00, 0x82,
+ 0x00, 0xbd, 0x1d, 0xf9, 0x00, 0x40, 0x00, 0x82,
+ 0x00, 0xbd, 0x1d, 0xb3
+};
+
+uint8_t std_vect_119[] = {
+ 0x0c, 0x8b, 0x8b, 0xc7, 0xb6, 0x6c, 0x20, 0x40,
+ 0x80, 0x00, 0x00, 0xfb, 0x1a, 0xdc, 0xb5, 0xff,
+ 0xcb, 0x7a, 0xec, 0xfb, 0xf1, 0xb2, 0x1e, 0x00,
+ 0x12, 0x10, 0x00, 0xc7, 0xbe, 0x00, 0x01, 0xb4,
+ 0xad, 0xfb, 0x00, 0x7f, 0xcb, 0xcb, 0x24, 0xff,
+ 0xff, 0xff, 0x01, 0x01, 0x01, 0x09, 0x01, 0x09,
+ 0x65, 0x20, 0x40, 0x80, 0x00, 0x50, 0x00, 0x1c,
+ 0xdc, 0xb5, 0x3f, 0xcb, 0x7e, 0xec, 0xfb, 0x30,
+ 0x17, 0x4a, 0xf1, 0xb2, 0x1e, 0x00, 0x12, 0x10,
+ 0x00, 0xc7, 0xbe, 0x00, 0x3d, 0x80, 0xcb, 0x01,
+ 0x01, 0x2d, 0xf7, 0xa9, 0x01, 0x09, 0x01, 0x00,
+ 0x04, 0x01, 0x85, 0x89, 0x89
+};
+
+uint8_t std_vect_120[] = {
+ 0x0c, 0x8b, 0x8b, 0xc7, 0xb6, 0x7f, 0x20, 0x40,
+ 0x80, 0x40, 0x00, 0x00, 0x01, 0x0c, 0x8b, 0x8b,
+ 0xc7, 0xa4, 0x7f, 0x20, 0x40, 0xb2, 0x1f, 0x62,
+ 0xcb, 0x1f, 0xc7, 0xb4, 0xd7, 0xfb, 0xb1, 0x6f,
+ 0xcb, 0x2c, 0xf3, 0xba, 0x6e, 0xeb, 0x00, 0x1d,
+ 0x00, 0xb3, 0x6f, 0x80, 0xff, 0xff, 0xff, 0xb6,
+ 0xc2, 0xb8, 0x1c, 0x00, 0x00, 0x04, 0x00, 0xec,
+ 0x5d, 0x6c, 0xfb, 0x76, 0x25, 0x49, 0x1a, 0x27,
+ 0x2f, 0x50, 0xcc, 0x10, 0x8e, 0x36, 0xcc, 0x76,
+ 0x8e, 0x20, 0x0d, 0xfe, 0x47, 0x75, 0xab, 0xca,
+ 0xcd, 0xfa, 0x02, 0x6c, 0x71, 0x8f, 0xf5, 0x65,
+ 0x31, 0x08, 0x37, 0x23, 0xfa, 0x86, 0xbc, 0xed,
+ 0xaf, 0xc7, 0xb1, 0xef, 0x31, 0x31, 0x31, 0x31,
+ 0x40, 0x00, 0x80, 0xff, 0x89, 0x7d, 0x7c, 0x99
+};
+
+uint8_t std_vect_121[] = {
+ 0x0c, 0x8b, 0x8b, 0xcb, 0xb6, 0x12, 0x10, 0x00,
+ 0xc7, 0xbe, 0x00, 0x01, 0xb4, 0xce, 0xfb, 0x00,
+ 0x7f, 0xcb, 0xcb, 0x31, 0x19, 0xe8, 0xff, 0x80,
+ 0xcb, 0x01, 0x65, 0x20, 0x40, 0x96, 0x00, 0x00,
+ 0x00, 0x1c, 0xdc, 0xb1, 0x1f, 0xcb, 0x69, 0xec,
+ 0x16, 0xf1, 0xb2, 0x1e, 0x00, 0x12, 0x10, 0x00,
+ 0xc7, 0xbe, 0x00, 0x00, 0x10, 0xce, 0xfb, 0x00,
+ 0x7f, 0xcb, 0xcb, 0x31, 0xff, 0xfd, 0xff, 0x80,
+ 0xcb, 0x6e, 0xdb, 0xb6, 0xaf, 0xeb, 0xb2, 0x6d,
+ 0xc7, 0x7a, 0xec, 0xdb, 0xb6, 0xed, 0xcb, 0x31,
+ 0xaf, 0xf3, 0x3c, 0x2d, 0xcb, 0x32, 0x6f, 0xdb,
+ 0xbc, 0xcc, 0xf3, 0xb2, 0xcd, 0x2f, 0xcb, 0xb2
+};
+
+uint8_t std_vect_122[] = {
+ 0x4a, 0xff, 0x66, 0xfd, 0x00, 0x40, 0x00, 0x82,
+ 0x54, 0xb5, 0x1d, 0xf9, 0xff, 0x00, 0x40, 0x00,
+ 0x82, 0x00, 0xb4, 0xfc, 0xf9, 0x00, 0x40, 0x00,
+ 0x82, 0x00, 0xb5, 0x54, 0x00, 0x40, 0x00, 0x82,
+ 0x00, 0xb4, 0xfc, 0xf9, 0x00, 0x40, 0x00, 0x82,
+ 0x00, 0x40, 0x00, 0xff, 0x7f, 0xbd, 0x1d, 0xfb,
+ 0x10, 0x00, 0xd2, 0x3a
+};
+
+uint8_t std_vect_123[] = {
+ 0x0c, 0x8b, 0x8b, 0xff, 0xff, 0x65, 0x20, 0x40,
+ 0x80, 0x00, 0xcf, 0xff, 0xc9, 0xcf, 0xcf, 0xcf,
+ 0xc2, 0xc2, 0xc2, 0xc2, 0xc2, 0xc2, 0xc2, 0xc2,
+ 0xb6, 0xc2, 0xc2, 0xc2, 0xa3, 0xc2, 0xc2, 0xc2,
+ 0xc2, 0xc2, 0xc2, 0xc2, 0xaa, 0xef, 0xc7, 0x7e,
+ 0x10, 0xfb, 0xb1, 0x1f, 0xec, 0xeb, 0xb2, 0x04,
+ 0x76, 0xec, 0xc7, 0x71, 0x6c, 0xcb, 0xb6, 0x6c,
+ 0x6e, 0xeb, 0xb6, 0x6e, 0xdb, 0xba, 0x1d, 0xfb,
+ 0xb6, 0x6e, 0xdb, 0xb6, 0xaf, 0xeb, 0xb2, 0x6d,
+ 0xc7, 0x7a, 0xec, 0xdb, 0xb6, 0xcb, 0x32, 0xbf,
+ 0x1e, 0xc7, 0xb1, 0x1f, 0xc7, 0xfe, 0x72, 0xbc,
+ 0xec, 0xfb, 0x7e, 0xec, 0xfb, 0x71, 0xec, 0x2f,
+ 0xc7, 0xeb, 0xbe, 0x1f, 0xc7, 0xfe, 0x72, 0xb4
+};
+
+uint8_t std_vect_124[] = {
+ 0xfa, 0xfd, 0x50, 0x08, 0x20, 0xc7, 0x00, 0xcb,
+ 0xb6, 0x6e, 0x80, 0x00, 0xfa, 0xfd, 0x50, 0x00,
+ 0x20, 0xc7, 0x00, 0xcb, 0xb6, 0x6c, 0x80, 0x00,
+ 0xfa, 0xfd, 0x50, 0x00, 0x20, 0xc7, 0x00, 0xcb,
+ 0xff, 0x7f, 0x00, 0x00, 0xcb, 0xcb, 0x44, 0x6f,
+ 0xdb, 0x40, 0x00, 0x00, 0x00, 0xcb, 0xcb, 0x44,
+ 0x6f, 0xdb, 0x01
+};
+
+uint8_t std_vect_125[] = {
+ 0x0c, 0x8b, 0x8b, 0xcb, 0xb6, 0x12, 0x10, 0x00,
+ 0xc7, 0xbe, 0x00, 0x01, 0xb4, 0xce, 0xfb, 0x00,
+ 0x7f, 0xcb, 0xcb, 0x31, 0x00, 0x00, 0x00, 0x80,
+ 0xcb, 0x01, 0x65, 0x20, 0x40, 0x96, 0x00, 0x00,
+ 0x00, 0x1c, 0xdc, 0xb1, 0x1f, 0xcb, 0x69, 0xec,
+ 0x16, 0xf1, 0xb2, 0x1e, 0x00, 0x12, 0x10, 0x00,
+ 0xc7, 0xbe, 0x00, 0x01, 0xb4, 0xce, 0xfb, 0x00,
+ 0x7f, 0xcb, 0xcb, 0x31, 0xff, 0xfd, 0xff, 0x80,
+ 0xcb, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+ 0x01, 0x09, 0x3d, 0x01, 0x81, 0x7c, 0x7c, 0x7c,
+ 0xcd, 0xfa, 0x00, 0x00
+};
+
+uint8_t std_vect_126[] = {
+ 0x00, 0x00, 0x00, 0xff, 0xff, 0x00, 0x00, 0x00,
+ 0xff, 0xff, 0x00, 0x00, 0x00, 0xff, 0xff, 0x80,
+ 0x00, 0x00, 0x00, 0x80, 0xff, 0x80, 0x1c
+};
+
+uint8_t std_vect_127[] = {
+ 0x00, 0x00, 0x00, 0xff, 0xff, 0x00, 0x00, 0x00,
+ 0xff, 0xff, 0x00, 0x00, 0x00, 0xff, 0xff, 0x00,
+ 0x00, 0x00, 0xff, 0xff, 0x20, 0x20, 0x80, 0x00,
+ 0xff, 0xff, 0xff, 0x7f, 0x00, 0x00, 0x1c
+};
+
+uint8_t std_vect_128[] = {
+ 0x34, 0x3d, 0xef, 0xc7, 0x7e, 0x1c, 0xfb, 0xe8,
+ 0x17, 0x00, 0x00, 0xf2, 0xfb, 0xc7, 0x7e, 0xbc,
+ 0xbf, 0xee, 0xfb, 0xb1, 0xbf, 0x1e, 0xc7, 0xb1,
+ 0x1f, 0xfb, 0x76, 0xec, 0x0e, 0x0e, 0x0e, 0x0e,
+ 0x0e, 0x0e, 0x0e, 0x0e, 0x0e, 0x0e, 0x0e, 0x0e,
+ 0x0e, 0x0e, 0x0e, 0x0e, 0x0e, 0x0e, 0x0e, 0x0e,
+ 0x0e, 0x0e, 0x0e, 0x0e, 0x0e, 0x0e, 0x0e, 0x0e,
+ 0x0e, 0x0e, 0x0e, 0xc7, 0x71, 0x6c, 0xcb, 0xb6,
+ 0x6c, 0xc7, 0xb1, 0x6e, 0xeb, 0xb6, 0x6e, 0xdb,
+ 0xba, 0x1d, 0xbc, 0xcc, 0xf3, 0xb2, 0xe3, 0x2f,
+ 0xcb, 0x84, 0x11, 0x9c, 0xb2, 0x2e, 0xc7, 0xb6,
+ 0xad, 0xc7, 0x7e, 0xbc, 0xbf, 0xee, 0xfb, 0xb1,
+ 0xbf, 0x1e, 0xc7, 0xb1, 0x1f, 0xfb, 0x76, 0xec,
+ 0x2f, 0xc7, 0xeb, 0xd6, 0x1f
+};
+
+uint8_t std_vect_129[] = {
+ 0x00, 0x00, 0x00, 0xff, 0xff, 0x00, 0x00, 0x00,
+ 0xff, 0xff, 0x39, 0x39, 0x52, 0x39, 0x39, 0x39,
+ 0x39, 0x39, 0x39, 0x7c, 0xb1, 0x17, 0x39, 0xff,
+ 0x7f, 0xf9
+};
+
+uint8_t std_vect_130[] = {
+ 0x00, 0x00, 0x00, 0xff, 0xff, 0x00, 0x00, 0x00,
+ 0xff, 0xff, 0x00, 0x00, 0x00, 0xff, 0xff, 0x00,
+ 0x00, 0x00, 0xff, 0xff, 0x80, 0x1c, 0x7f, 0xff
+};
+
+uint8_t std_vect_131[] = {
+ 0x0c, 0x8b, 0x8b, 0xc7, 0xb6, 0x41, 0x20, 0x4a,
+ 0x80, 0x00, 0x23, 0xeb, 0x20, 0xd3, 0x55, 0xa6,
+ 0x16, 0x7a, 0x2e, 0xab, 0xeb, 0x7a, 0x6c, 0xc7,
+ 0xb1, 0x6f, 0xc7, 0x7e, 0x1c, 0xfb, 0x5e, 0xb7,
+ 0x1e, 0x00, 0x12, 0x10, 0x00, 0xc7, 0xbe, 0x00,
+ 0x01, 0xb4, 0xce, 0xfb, 0x00, 0x80, 0xff, 0xff,
+ 0x31, 0xff, 0xff, 0xa3, 0xa9, 0x76, 0x96, 0xd1,
+ 0xbe, 0x41, 0xbb, 0xca, 0xcd, 0xfa, 0x02, 0x6c,
+ 0x00, 0x89, 0x88, 0x00, 0x00, 0x10, 0xff, 0xff,
+ 0x00, 0xd9, 0x20, 0x00, 0x00, 0x04, 0x00, 0x00,
+ 0x00, 0xfb, 0x91, 0xec, 0xfb, 0xd3, 0xbe, 0x1f,
+ 0x00, 0x00, 0x80, 0x00, 0x63, 0x2e, 0xc7, 0xa6,
+ 0xad, 0xc7, 0x7e, 0xbc, 0xbf, 0xeb, 0x04, 0x00,
+ 0x63, 0x2e, 0xc7, 0xa6, 0xad, 0xc7, 0x7e, 0xbc,
+ 0xbf, 0xeb, 0xf5, 0xfb, 0x72, 0xbc, 0xec, 0xfb
+};
+
+uint8_t std_vect_132[] = {
+ 0xfa, 0xfd, 0x50, 0x08, 0x20, 0xc7, 0x00, 0xcb,
+ 0xb6, 0x6c, 0x80, 0x00, 0xfa, 0xfd, 0x50, 0x00,
+ 0x20, 0xc7, 0x00, 0xcb, 0xb6, 0xb6, 0x6c, 0x80,
+ 0x00, 0xfa, 0xfd, 0x50, 0x00, 0x20, 0xc7, 0x00,
+ 0xcb, 0xb6, 0x6c, 0xb6, 0x6c, 0x80, 0x00, 0xfa,
+ 0xfd, 0x50, 0x00, 0x20, 0x80, 0x00, 0xfa, 0xfd,
+ 0x50, 0x00, 0x20, 0xc7, 0x00, 0xcd, 0xb6, 0x6c,
+ 0x80, 0xf6, 0x80, 0xff
+};
+
+uint8_t std_vect_133[] = {
+ 0x4a, 0xff, 0x66, 0xfd, 0x00, 0x40, 0x00, 0x82,
+ 0x54, 0xb5, 0x1d, 0xf9, 0xf8, 0x00, 0x40, 0x00,
+ 0x82, 0x00, 0xb4, 0xfc, 0x00, 0x40, 0x00, 0x82,
+ 0x00, 0xb4, 0xfc, 0xf9, 0xff, 0x66, 0xfd, 0x00,
+ 0x40, 0x00, 0x82, 0x54, 0xb5, 0x1d, 0xf9, 0xf8,
+ 0x00, 0x40, 0x00, 0x82, 0x00, 0xb4, 0xf9, 0xf8,
+ 0x00, 0x40, 0x00, 0x82, 0x00, 0xb4, 0xfc, 0xf9,
+ 0x00, 0xb5, 0x54, 0x00, 0x40, 0x00, 0x82, 0x00,
+ 0xb4, 0xfc, 0xf9, 0xff, 0x66, 0xfd, 0x00, 0x40,
+ 0x00, 0x82, 0x54, 0xb5, 0x1d, 0xf9, 0xf8, 0x00,
+ 0x40, 0x00, 0x82, 0x00, 0xb4, 0xfc, 0xf9, 0x00,
+ 0x40, 0x00, 0x82, 0x00, 0xb5, 0x54, 0x00, 0x40,
+ 0x00, 0x82, 0x00, 0x40, 0xfc, 0xf9, 0x00, 0x40,
+ 0x00, 0x82, 0x00, 0xb5, 0x54, 0x00, 0x40, 0x00,
+ 0x82, 0x00, 0x40, 0x00, 0x82, 0x00, 0x40, 0x00,
+ 0xff, 0x7f, 0xbd, 0x1d, 0xfb, 0x10
+};
+
+uint8_t std_vect_134[] = {
+ 0x00, 0x00, 0x00, 0xff, 0xff, 0x00, 0x00, 0x00,
+ 0xff, 0xff, 0x00, 0x00, 0x00, 0xff, 0xff, 0x00,
+ 0x00, 0x00, 0xff, 0xff, 0x00, 0x00, 0x00, 0xff,
+ 0xff, 0x00, 0x00, 0x00, 0xff, 0xff, 0x00, 0x00,
+ 0x00, 0xff, 0xff, 0x00, 0x00, 0x00, 0xff, 0xff,
+ 0x00, 0x00, 0x00, 0xff, 0xff, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0xff, 0xff, 0x00, 0x00, 0x00,
+ 0xff, 0xff, 0x20, 0xff, 0xff, 0x7f, 0x18, 0xff
+};
+
+uint8_t std_vect_135[] = {
+ 0x4a, 0xff, 0xff, 0x00, 0x40, 0x00, 0xe2, 0x7e,
+ 0xff, 0x00, 0x40, 0x00, 0xe2, 0x7e, 0xff, 0xff,
+ 0x00, 0x40, 0x00, 0xe2, 0x7f, 0xfd, 0x00, 0x40,
+ 0x00, 0xe2, 0x7e, 0xbd, 0x00, 0x40, 0x00, 0xe2,
+ 0x7e, 0xff, 0x00, 0x40, 0x00, 0xe2, 0x7e, 0xff,
+ 0xff, 0x00, 0x40, 0x00, 0xe2, 0x7f, 0xfd, 0x00,
+ 0x40, 0x00, 0xe2, 0x7e, 0xbd, 0x1d, 0x1d, 0xff,
+ 0x82, 0x00, 0x00, 0xd2, 0x3a, 0x39, 0xd2, 0x3a,
+ 0x7f, 0x10
+};
+
+uint8_t std_vect_136[] = {
+ 0xed, 0xfd, 0x51, 0xc7, 0xb6, 0x6d, 0xdb, 0x5a,
+ 0x33, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f,
+ 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f,
+ 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f,
+ 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f,
+ 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f
+};
+
+uint8_t std_vect_137[] = {
+ 0xfa, 0xfd, 0x50, 0x08, 0x20, 0xc7, 0x00, 0xcb,
+ 0xb6, 0x6c, 0x80, 0x00, 0xfa, 0xfd, 0x50, 0x00,
+ 0x20, 0xc7, 0x00, 0xcb, 0xb6, 0xb6, 0x6c, 0x80,
+ 0x00, 0xfa, 0xfd, 0x50, 0x00, 0x20, 0xc7, 0x00,
+ 0xcb, 0xb6, 0x6c, 0xb6, 0x6c, 0x80, 0x00, 0xfa,
+ 0xfd, 0x50, 0x00, 0x20, 0x80, 0x00, 0xfa, 0xfd,
+ 0x50, 0x08, 0x20, 0xc7, 0x00, 0xcb, 0xb6, 0x6c,
+ 0x80, 0x00, 0xfa, 0xfd, 0x50, 0x00, 0x20, 0xc7,
+ 0x20, 0xc7, 0x00, 0x00, 0x00, 0x10, 0x00, 0x0d,
+ 0x80, 0xff
+};
+
+uint8_t std_vect_138[] = {
+ 0xfa, 0xfd, 0x50, 0x08, 0x20, 0xc7, 0x00, 0xcb,
+ 0xb6, 0xb6, 0x6c, 0x80, 0x00, 0xfa, 0xfd, 0x50,
+ 0x00, 0x20, 0xc7, 0x00, 0x80, 0x00, 0xfa, 0xfd,
+ 0x50, 0x00, 0x20, 0xc7, 0xb6, 0x6c, 0x80, 0x00,
+ 0xfa, 0xfd, 0x50, 0x00, 0x20, 0x80, 0x00, 0xfa,
+ 0xfd, 0x50, 0x00, 0x20, 0xc7, 0x00, 0x6c, 0x80,
+ 0x00, 0xfa, 0xfd, 0x50, 0x00, 0x20, 0xc7, 0x00,
+ 0x80, 0x00, 0xfa, 0xfd, 0x50, 0x00, 0x20, 0xc7,
+ 0xb6, 0x6c, 0x80, 0x00, 0xfa, 0xfd, 0x50, 0x00,
+ 0x20, 0x80, 0x00, 0xfa, 0xfd, 0x50, 0x00, 0x20,
+ 0xc7, 0x00, 0xcd, 0xb6, 0x6c, 0x80, 0xf6, 0x80,
+ 0xff
+};
+
+uint8_t std_vect_139[] = {
+ 0xfa, 0xfd, 0x50, 0x08, 0x20, 0xc7, 0x00, 0xcb,
+ 0xb6, 0x6c, 0x80, 0x00, 0xfa, 0xfd, 0x50, 0x00,
+ 0x20, 0xd6, 0x00, 0xcb, 0xb6, 0xb6, 0x6c, 0x80,
+ 0x00, 0xfa, 0xfd, 0x50, 0x00, 0x20, 0xc7, 0x00,
+ 0xcb, 0xb6, 0x6c, 0xb6, 0x6c, 0x80, 0x00, 0xfa,
+ 0xfd, 0x50, 0x00, 0x20, 0x80, 0x00, 0xfa, 0xfd,
+ 0x50, 0x00, 0x20, 0xc7, 0x00, 0xcd, 0xb6, 0x6c,
+ 0x80, 0x00, 0xfa, 0xfd, 0x50, 0x00, 0x20, 0xc7,
+ 0xb6, 0x6c, 0x80, 0x00, 0xfa, 0xfd, 0x50, 0x00,
+ 0x20, 0x80, 0x00, 0xfa, 0x80, 0xf6, 0x80, 0xff
+};
+
+uint8_t std_vect_140[] = {
+ 0x4a, 0xff, 0xff, 0x00, 0x40, 0x00, 0xe2, 0x7e,
+ 0xff, 0x00, 0x40, 0x00, 0xe2, 0xb6, 0xff, 0x00,
+ 0x40, 0x00, 0xe2, 0xc7, 0xb1, 0x6e, 0xeb, 0x32,
+ 0x7e, 0xff, 0x00, 0x40, 0x00, 0xe2, 0xb6, 0xff,
+ 0x00, 0x40, 0x00, 0xe2, 0xc7, 0xb1, 0x6e, 0xeb,
+ 0xff, 0x00, 0x40, 0x00, 0xe2, 0xb6, 0xff, 0x00,
+ 0x40, 0x00, 0xe2, 0xc7, 0xb1, 0x6e, 0xeb, 0x32,
+ 0x7e, 0xff, 0x00, 0x40, 0x00, 0xe2, 0xb6, 0xff,
+ 0x00, 0x40, 0x00, 0x00, 0x10, 0xb1, 0x6e, 0xeb,
+ 0xb6, 0xed, 0x85, 0x84, 0x84, 0x84, 0x84, 0x84
+};
+
+uint8_t std_vect_141[] = {
+ 0x00, 0x00, 0x00, 0xff, 0xff, 0x00, 0x00, 0x00,
+ 0xff, 0xff, 0x00, 0x00, 0x00, 0xff, 0xff, 0x00,
+ 0x00, 0x00, 0xff, 0xff, 0x00, 0x00, 0x00, 0xff,
+ 0xff, 0x00, 0x00, 0x00, 0xff, 0xff, 0x00, 0x00,
+ 0x00, 0xff, 0xff, 0x00, 0x00, 0x00, 0xff, 0xff,
+ 0x00, 0x00, 0x00, 0xff, 0xff, 0x00, 0x00, 0x00,
+ 0xff, 0xff, 0x00, 0x00, 0x00, 0xff, 0xff, 0x00,
+ 0x00, 0x00, 0xff, 0xff, 0x00, 0x00, 0x00, 0xff,
+ 0xff, 0x00, 0x00, 0x00, 0xff, 0xff, 0x00, 0x00,
+ 0x00, 0xff, 0xff, 0x00, 0x00, 0x00, 0xff, 0xff,
+ 0x00, 0x00, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0xff, 0xff, 0x00, 0x00, 0x00, 0xff
+};
+
+uint8_t std_vect_142[] = {
+ 0x4a, 0xff, 0xff, 0x00, 0x40, 0x00, 0xe2, 0x7e,
+ 0xbd, 0x1d, 0xf9, 0xff, 0x00, 0x40, 0x00, 0xe2,
+ 0xb6, 0xff, 0x00, 0x40, 0x00, 0xe2, 0x7e, 0xf9,
+ 0xff, 0x00, 0x40, 0x00, 0xe2, 0xd0, 0xff, 0x00,
+ 0x40, 0x00, 0xe2, 0x7e, 0xf9, 0xff, 0x00, 0x40,
+ 0x00, 0xe2, 0xd0, 0xff, 0x00, 0x40, 0x00, 0xe2,
+ 0x7e, 0x00, 0xe2, 0xb6, 0x7f, 0xc5, 0xa4, 0xee,
+ 0x11, 0xff, 0x8f, 0xf5
+};
+
+uint8_t std_vect_143[] = {
+ 0x1c, 0xa7, 0x51, 0x20, 0xf8, 0xf8, 0xf8, 0xf8,
+ 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8,
+ 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8,
+ 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0x96, 0x96,
+ 0x96, 0x96, 0x96, 0x96, 0x96, 0x96, 0x96, 0x96,
+ 0x96, 0x96, 0x96, 0x96, 0x96, 0x96, 0x96, 0x96,
+ 0x96, 0x96, 0x96, 0x96, 0x96, 0x96, 0x96, 0x96,
+ 0x96, 0x96, 0x96, 0x96, 0x96, 0x96, 0x96, 0x96,
+ 0x96, 0x96, 0x96, 0x96, 0x96, 0x96, 0x96, 0x96,
+ 0x96, 0x96, 0x96, 0x96, 0x96, 0x96, 0x96, 0x96,
+ 0x96, 0x96, 0x96, 0x96, 0x96, 0x96, 0xf8, 0xf8,
+ 0xb6, 0x6e, 0xdb, 0xba, 0x1d, 0xfb, 0xb6, 0x6f,
+ 0x10, 0x00, 0x6f, 0xdb, 0x00, 0x01, 0x00, 0x00
+};
+
+uint8_t std_vect_144[] = {
+ 0x1c, 0xa7, 0x51, 0x20, 0xf8, 0xf8, 0xf8, 0xf8,
+ 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8,
+ 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8,
+ 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0x96, 0x96,
+ 0x96, 0x96, 0x96, 0x96, 0x96, 0x96, 0x96, 0x96,
+ 0x96, 0x96, 0x96, 0x96, 0x96, 0x96, 0x96, 0x96,
+ 0x96, 0x96, 0x96, 0x96, 0x96, 0x96, 0x96, 0x96,
+ 0x96, 0x96, 0x96, 0x96, 0x96, 0x96, 0x96, 0x96,
+ 0x96, 0x96, 0x96, 0x96, 0x96, 0x96, 0x96, 0x96,
+ 0x96, 0x96, 0x96, 0x96, 0x96, 0x96, 0x96, 0x96,
+ 0x96, 0x96, 0x96, 0x96, 0x96, 0x96, 0xf8, 0xf8,
+ 0xb6, 0x6e, 0xdb, 0xba, 0x1d, 0xfb, 0xb6, 0x6f,
+ 0x10, 0x00, 0x6f, 0xdb, 0x00, 0x01, 0x00, 0xdb,
+ 0x00, 0x01, 0x00
+};
+
+uint8_t std_vect_145[] = {
+ 0x4a, 0xff, 0x66, 0xfd, 0x01, 0x00, 0x00, 0x00,
+ 0xff, 0xff, 0x00, 0x00, 0x00, 0xff, 0xff, 0x80,
+ 0xff
+};
+
+uint8_t std_vect_146[] = {
+ 0x4a, 0xff, 0xff, 0x00, 0x40, 0x00, 0xe2, 0x7e,
+ 0xbd, 0x1d, 0x40, 0x00, 0xf9, 0xff, 0x00, 0x40,
+ 0x00, 0xe2, 0xb6, 0xff, 0x00, 0x40, 0x00, 0xe2,
+ 0x7e, 0xf9, 0xff, 0x00, 0x40, 0x00, 0xe2, 0xd0,
+ 0xff, 0xff, 0x00, 0x40, 0x00, 0xe2, 0x7e, 0xbd,
+ 0x1d, 0xf9, 0xff, 0x00, 0x40, 0x00, 0xe2, 0xb6,
+ 0xff, 0x00, 0x40, 0x00, 0xe2, 0x7e, 0xf9, 0xff,
+ 0x00, 0x40, 0x00, 0xe2, 0xd0, 0xff, 0x00, 0x40,
+ 0x00, 0xe2, 0x7e, 0xf9, 0xff, 0x00, 0x40, 0x00,
+ 0xe2, 0xd0, 0xff, 0x00, 0xff, 0x00, 0x40, 0x00,
+ 0xe2, 0x7e, 0xf9, 0xff, 0x00, 0x40, 0x00, 0xe2,
+ 0x7e, 0x00, 0xe2, 0xb6, 0x7f, 0xc5, 0xa4, 0xee,
+ 0x11, 0xff, 0xa9, 0xf5
+};
+
+uint8_t std_vect_147[] = {
+ 0x32, 0x6c, 0x71, 0xb3, 0x00, 0x10, 0xd7, 0x34,
+ 0x3d, 0xef, 0xc7, 0x7e, 0x1c, 0xfb, 0xb1, 0x1f,
+ 0xb3, 0x00, 0x10, 0xd7, 0x34, 0x3d, 0xef, 0xc7,
+ 0x7e, 0x1c, 0xfb, 0xb1, 0xb3, 0x00, 0x10, 0xd7,
+ 0x34, 0x3d, 0x1f, 0xc7, 0x6c, 0x71, 0xb3, 0x00,
+ 0x10, 0xd7, 0x34, 0x3d, 0xef, 0xc7, 0x7e, 0x1c
+};
+
+uint8_t std_vect_148[] = {
+ 0x32, 0x6c, 0x71, 0xb3, 0x00, 0x10, 0xd7, 0x34,
+ 0x7e, 0x1c, 0xef, 0xb1, 0x1f, 0x9f, 0x00, 0x10,
+ 0xd7, 0x34, 0x3d, 0xe2, 0xc7, 0x7e, 0x1c, 0x1f,
+ 0xb3, 0x00, 0x10, 0xd7, 0x34, 0x3d, 0xe2, 0xc7,
+ 0x7e, 0x1c, 0x1f, 0xb3, 0x00, 0x10, 0xd7, 0x34,
+ 0x3d, 0xe2, 0xc7, 0x7e, 0x1c, 0xfb, 0xb1, 0xb3,
+ 0x00, 0x96, 0x96, 0x96
+};
+
+uint8_t std_vect_149[] = {
+ 0x4a, 0xff, 0x01, 0x00, 0x00, 0x00, 0xff, 0xff,
+ 0x02, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0x80,
+ 0xff
+};
+
+uint8_t std_vect_150[] = {
+ 0x1c ,0xe5 ,0x03 ,0x97 ,0x00 ,0x80 ,0xff ,0xff,
+ 0xa5 ,0xa5 ,0xa5 ,0xa5 ,0xa5 ,0xa5 ,0xa5 ,0x10,
+ 0xf3 ,0x1d ,0x1d ,0x09 ,0x1d ,0x09 ,0xa5 ,0x00,
+ 0x00 ,0x08 ,0xa5 ,0x30 ,0x11 ,0x11 ,0x11 ,0xa5,
+ 0xa5 ,0xa5 ,0xa5 ,0xa5 ,0x80 ,0xa8 ,0x7a ,0x11,
+ 0x11 ,0x09 ,0x01 ,0x09 ,0x1d ,0x1d ,0x09 ,0x05,
+ 0x1d ,0xa7 ,0x11 ,0x11 ,0x11 ,0x11 ,0x6d ,0x10,
+ 0x6d ,0x6d ,0xa5 ,0xa5 ,0x30 ,0x30 ,0x30 ,0x30,
+ 0x30 ,0x30 ,0x30 ,0x30 ,0x30 ,0x30 ,0x30 ,0x30,
+ 0x30 ,0x30
+};
+
+struct vect_result {
+ uint8_t *vector;
+ int vector_length;
+ int expected_error;
+};
+
+struct vect_result std_vect_array[] = {
+ {std_vect_0, sizeof(std_vect_0), ISAL_INVALID_BLOCK},
+ {std_vect_1, sizeof(std_vect_1), ISAL_INVALID_BLOCK},
+ {std_vect_2, sizeof(std_vect_2), ISAL_INVALID_BLOCK},
+ {std_vect_3, sizeof(std_vect_3), ISAL_INVALID_BLOCK},
+ {std_vect_4, sizeof(std_vect_4), ISAL_INVALID_BLOCK},
+ {std_vect_5, sizeof(std_vect_5), ISAL_INVALID_BLOCK},
+ {std_vect_6, sizeof(std_vect_6), ISAL_INVALID_BLOCK},
+ {std_vect_7, sizeof(std_vect_7), ISAL_INVALID_BLOCK},
+ {std_vect_8, sizeof(std_vect_8), ISAL_INVALID_BLOCK},
+ {std_vect_9, sizeof(std_vect_9), ISAL_INVALID_BLOCK},
+ {std_vect_10, sizeof(std_vect_10), ISAL_INVALID_BLOCK},
+ {std_vect_11, sizeof(std_vect_11), ISAL_INVALID_BLOCK},
+ {std_vect_12, sizeof(std_vect_12), ISAL_INVALID_BLOCK},
+ {std_vect_13, sizeof(std_vect_13), ISAL_INVALID_BLOCK},
+ {std_vect_14, sizeof(std_vect_14), ISAL_INVALID_BLOCK},
+ {std_vect_15, sizeof(std_vect_15), ISAL_INVALID_BLOCK},
+ {std_vect_16, sizeof(std_vect_16), ISAL_INVALID_BLOCK},
+ {std_vect_17, sizeof(std_vect_17), ISAL_INVALID_BLOCK},
+ {std_vect_18, sizeof(std_vect_18), ISAL_INVALID_BLOCK},
+ {std_vect_19, sizeof(std_vect_19), ISAL_INVALID_BLOCK},
+ {std_vect_20, sizeof(std_vect_20), ISAL_INVALID_BLOCK},
+ {std_vect_21, sizeof(std_vect_21), ISAL_INVALID_BLOCK},
+ {std_vect_22, sizeof(std_vect_22), ISAL_INVALID_BLOCK},
+ {std_vect_23, sizeof(std_vect_23), ISAL_INVALID_BLOCK},
+ {std_vect_24, sizeof(std_vect_24), ISAL_INVALID_BLOCK},
+ {std_vect_25, sizeof(std_vect_25), ISAL_INVALID_BLOCK},
+ {std_vect_26, sizeof(std_vect_26), ISAL_INVALID_BLOCK},
+ {std_vect_27, sizeof(std_vect_27), ISAL_INVALID_BLOCK},
+ {std_vect_28, sizeof(std_vect_28), ISAL_INVALID_BLOCK},
+ {std_vect_29, sizeof(std_vect_29), ISAL_INVALID_BLOCK},
+ {std_vect_30, sizeof(std_vect_30), ISAL_INVALID_BLOCK},
+ {std_vect_31, sizeof(std_vect_31), ISAL_INVALID_BLOCK},
+ {std_vect_32, sizeof(std_vect_32), ISAL_INVALID_BLOCK},
+ {std_vect_33, sizeof(std_vect_33), ISAL_INVALID_BLOCK},
+ {std_vect_34, sizeof(std_vect_34), ISAL_INVALID_BLOCK},
+ {std_vect_35, sizeof(std_vect_35), ISAL_INVALID_BLOCK},
+ {std_vect_36, sizeof(std_vect_36), ISAL_INVALID_BLOCK},
+ {std_vect_37, sizeof(std_vect_37), ISAL_INVALID_BLOCK},
+ {std_vect_38, sizeof(std_vect_38), ISAL_INVALID_BLOCK},
+ {std_vect_39, sizeof(std_vect_39), ISAL_INVALID_BLOCK},
+ {std_vect_40, sizeof(std_vect_40), ISAL_INVALID_BLOCK},
+ {std_vect_41, sizeof(std_vect_41), ISAL_INVALID_BLOCK},
+ {std_vect_42, sizeof(std_vect_42), ISAL_INVALID_BLOCK},
+ {std_vect_43, sizeof(std_vect_43), ISAL_INVALID_BLOCK},
+ {std_vect_44, sizeof(std_vect_44), ISAL_INVALID_BLOCK},
+ {std_vect_45, sizeof(std_vect_45), ISAL_INVALID_BLOCK},
+ {std_vect_46, sizeof(std_vect_46), ISAL_INVALID_BLOCK},
+ {std_vect_47, sizeof(std_vect_47), ISAL_INVALID_BLOCK},
+ {std_vect_48, sizeof(std_vect_48), ISAL_INVALID_BLOCK},
+ {std_vect_49, sizeof(std_vect_49), ISAL_INVALID_BLOCK},
+ {std_vect_50, sizeof(std_vect_50), ISAL_INVALID_BLOCK},
+ {std_vect_51, sizeof(std_vect_51), ISAL_INVALID_BLOCK},
+ {std_vect_52, sizeof(std_vect_52), ISAL_INVALID_BLOCK},
+ {std_vect_53, sizeof(std_vect_53), ISAL_INVALID_BLOCK},
+ {std_vect_54, sizeof(std_vect_54), ISAL_INVALID_BLOCK},
+ {std_vect_55, sizeof(std_vect_55), ISAL_INVALID_BLOCK},
+ {std_vect_56, sizeof(std_vect_56), ISAL_INVALID_BLOCK},
+ {std_vect_57, sizeof(std_vect_57), ISAL_INVALID_BLOCK},
+ {std_vect_58, sizeof(std_vect_58), ISAL_INVALID_BLOCK},
+ {std_vect_59, sizeof(std_vect_59), ISAL_INVALID_BLOCK},
+ {std_vect_60, sizeof(std_vect_60), ISAL_INVALID_BLOCK},
+ {std_vect_61, sizeof(std_vect_61), ISAL_INVALID_BLOCK},
+ {std_vect_62, sizeof(std_vect_62), ISAL_END_INPUT},
+ {std_vect_63, sizeof(std_vect_63), ISAL_INVALID_BLOCK},
+ {std_vect_64, sizeof(std_vect_64), ISAL_INVALID_BLOCK},
+ {std_vect_65, sizeof(std_vect_65), ISAL_INVALID_BLOCK},
+ {std_vect_66, sizeof(std_vect_66), ISAL_INVALID_BLOCK},
+ {std_vect_67, sizeof(std_vect_67), ISAL_END_INPUT},
+ {std_vect_68, sizeof(std_vect_68), ISAL_INVALID_BLOCK},
+ {std_vect_69, sizeof(std_vect_69), ISAL_INVALID_BLOCK},
+ {std_vect_70, sizeof(std_vect_70), ISAL_INVALID_BLOCK},
+ {std_vect_71, sizeof(std_vect_71), ISAL_INVALID_BLOCK},
+ {std_vect_72, sizeof(std_vect_72), ISAL_INVALID_BLOCK},
+ {std_vect_73, sizeof(std_vect_73), ISAL_END_INPUT},
+ {std_vect_74, sizeof(std_vect_74), ISAL_END_INPUT},
+ {std_vect_75, sizeof(std_vect_75), ISAL_END_INPUT},
+ {std_vect_76, sizeof(std_vect_76), ISAL_INVALID_BLOCK},
+ {std_vect_77, sizeof(std_vect_77), ISAL_INVALID_BLOCK},
+ {std_vect_78, sizeof(std_vect_78), ISAL_INVALID_BLOCK},
+ {std_vect_79, sizeof(std_vect_79), ISAL_INVALID_BLOCK},
+ {std_vect_80, sizeof(std_vect_80), ISAL_INVALID_BLOCK},
+ {std_vect_81, sizeof(std_vect_81), ISAL_INVALID_BLOCK},
+ {std_vect_82, sizeof(std_vect_82), ISAL_INVALID_BLOCK},
+ {std_vect_83, sizeof(std_vect_83), ISAL_END_INPUT},
+ {std_vect_84, sizeof(std_vect_84), ISAL_INVALID_BLOCK},
+ {std_vect_85, sizeof(std_vect_85), ISAL_INVALID_BLOCK},
+ {std_vect_86, sizeof(std_vect_86), ISAL_INVALID_BLOCK},
+ {std_vect_87, sizeof(std_vect_87), ISAL_INVALID_BLOCK},
+ {std_vect_88, sizeof(std_vect_88), ISAL_INVALID_BLOCK},
+ {std_vect_89, sizeof(std_vect_89), ISAL_END_INPUT},
+ {std_vect_90, sizeof(std_vect_90), ISAL_END_INPUT},
+ {std_vect_91, sizeof(std_vect_91), ISAL_INVALID_BLOCK},
+ {std_vect_92, sizeof(std_vect_92), ISAL_INVALID_BLOCK},
+ {std_vect_93, sizeof(std_vect_93), ISAL_END_INPUT},
+ {std_vect_94, sizeof(std_vect_94), ISAL_INVALID_SYMBOL},
+ {std_vect_95, sizeof(std_vect_95), ISAL_END_INPUT},
+ {std_vect_96, sizeof(std_vect_96), ISAL_END_INPUT},
+ {std_vect_97, sizeof(std_vect_97), ISAL_INVALID_BLOCK},
+ {std_vect_98, sizeof(std_vect_98), ISAL_INVALID_BLOCK},
+ {std_vect_99, sizeof(std_vect_99), ISAL_INVALID_BLOCK},
+ {std_vect_100, sizeof(std_vect_100), ISAL_INVALID_BLOCK},
+ {std_vect_101, sizeof(std_vect_101), ISAL_INVALID_BLOCK},
+ {std_vect_102, sizeof(std_vect_102), ISAL_INVALID_BLOCK},
+ {std_vect_103, sizeof(std_vect_103), ISAL_INVALID_BLOCK},
+ {std_vect_104, sizeof(std_vect_104), ISAL_INVALID_BLOCK},
+ {std_vect_105, sizeof(std_vect_105), ISAL_INVALID_BLOCK},
+ {std_vect_106, sizeof(std_vect_106), ISAL_INVALID_BLOCK},
+ {std_vect_107, sizeof(std_vect_107), ISAL_INVALID_BLOCK},
+ {std_vect_108, sizeof(std_vect_108), ISAL_INVALID_BLOCK},
+ {std_vect_109, sizeof(std_vect_109), ISAL_INVALID_BLOCK},
+ {std_vect_110, sizeof(std_vect_110), ISAL_INVALID_BLOCK},
+ {std_vect_111, sizeof(std_vect_111), ISAL_INVALID_BLOCK},
+ {std_vect_112, sizeof(std_vect_112), ISAL_INVALID_BLOCK},
+ {std_vect_113, sizeof(std_vect_113), ISAL_INVALID_BLOCK},
+ {std_vect_114, sizeof(std_vect_114), ISAL_INVALID_LOOKBACK},
+ {std_vect_115, sizeof(std_vect_115), ISAL_INVALID_BLOCK},
+ {std_vect_116, sizeof(std_vect_116), ISAL_INVALID_BLOCK},
+ {std_vect_117, sizeof(std_vect_117), ISAL_INVALID_BLOCK},
+ {std_vect_118, sizeof(std_vect_118), ISAL_INVALID_BLOCK},
+ {std_vect_119, sizeof(std_vect_119), ISAL_INVALID_BLOCK},
+ {std_vect_120, sizeof(std_vect_120), ISAL_INVALID_BLOCK},
+ {std_vect_121, sizeof(std_vect_121), ISAL_INVALID_BLOCK},
+ {std_vect_122, sizeof(std_vect_122), ISAL_INVALID_BLOCK},
+ {std_vect_123, sizeof(std_vect_123), ISAL_INVALID_BLOCK},
+ {std_vect_124, sizeof(std_vect_124), ISAL_INVALID_BLOCK},
+ {std_vect_125, sizeof(std_vect_125), ISAL_INVALID_BLOCK},
+ {std_vect_126, sizeof(std_vect_126), ISAL_INVALID_BLOCK},
+ {std_vect_127, sizeof(std_vect_127), ISAL_INVALID_BLOCK},
+ {std_vect_128, sizeof(std_vect_128), ISAL_INVALID_BLOCK},
+ {std_vect_129, sizeof(std_vect_129), ISAL_INVALID_BLOCK},
+ {std_vect_130, sizeof(std_vect_130), ISAL_END_INPUT},
+ {std_vect_131, sizeof(std_vect_131), ISAL_INVALID_BLOCK},
+ {std_vect_132, sizeof(std_vect_132), ISAL_INVALID_SYMBOL},
+ {std_vect_133, sizeof(std_vect_133), ISAL_INVALID_BLOCK},
+ {std_vect_134, sizeof(std_vect_134), ISAL_INVALID_BLOCK},
+ {std_vect_135, sizeof(std_vect_135), ISAL_INVALID_BLOCK},
+ {std_vect_136, sizeof(std_vect_136), ISAL_INVALID_BLOCK},
+ {std_vect_137, sizeof(std_vect_137), ISAL_INVALID_BLOCK},
+ {std_vect_138, sizeof(std_vect_138), ISAL_INVALID_SYMBOL},
+ {std_vect_139, sizeof(std_vect_139), ISAL_INVALID_SYMBOL},
+ {std_vect_140, sizeof(std_vect_140), ISAL_INVALID_BLOCK},
+ {std_vect_141, sizeof(std_vect_141), ISAL_END_INPUT},
+ {std_vect_142, sizeof(std_vect_142), ISAL_INVALID_BLOCK},
+ {std_vect_143, sizeof(std_vect_143), ISAL_INVALID_BLOCK},
+ {std_vect_144, sizeof(std_vect_144), ISAL_INVALID_BLOCK},
+ {std_vect_145, sizeof(std_vect_145), ISAL_END_INPUT},
+ {std_vect_146, sizeof(std_vect_146), ISAL_INVALID_BLOCK},
+ {std_vect_147, sizeof(std_vect_147), ISAL_INVALID_BLOCK},
+ {std_vect_148, sizeof(std_vect_148), ISAL_INVALID_BLOCK},
+ {std_vect_149, sizeof(std_vect_149), ISAL_INVALID_BLOCK},
+ {std_vect_150, sizeof(std_vect_150), ISAL_INVALID_BLOCK}
+};
diff --git a/src/isa-l/igzip/lz0a_const.asm b/src/isa-l/igzip/lz0a_const.asm
new file mode 100644
index 000000000..deb6d232e
--- /dev/null
+++ b/src/isa-l/igzip/lz0a_const.asm
@@ -0,0 +1,65 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright(c) 2011-2016 Intel Corporation All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in
+; the documentation and/or other materials provided with the
+; distribution.
+; * Neither the name of Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived
+; from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+%include "options.asm"
+
+%assign K 1024
+%assign D IGZIP_HIST_SIZE ;; Amount of history
+%assign LA 18 * 16 ;; Max look-ahead, rounded up to 32 byte boundary
+%assign BSIZE 2*IGZIP_HIST_SIZE + LA ;; Nominal buffer size
+
+;; Constants for stateless compression
+%define LAST_BYTES_COUNT 3 ;; Bytes to prevent reading out of array bounds
+%define LA_STATELESS 258 ;; No round up since no data is copied to a buffer
+
+%assign IGZIP_LVL0_HASH_SIZE (8 * K)
+%assign IGZIP_HASH8K_HASH_SIZE (8 * K)
+%assign IGZIP_HASH_HIST_HASH_SIZE IGZIP_HIST_SIZE
+%assign IGZIP_HASH_MAP_HASH_SIZE IGZIP_HIST_SIZE
+
+%xdefine LVL0_HASH_MASK (IGZIP_LVL0_HASH_SIZE - 1)
+%xdefine HASH8K_HASH_MASK (IGZIP_HASH8K_HASH_SIZE - 1)
+%xdefine HASH_HIST_HASH_MASK (IGZIP_HASH_HIST_HASH_SIZE - 1)
+%xdefine HASH_MAP_HASH_MASK (IGZIP_HASH_MAP_HASH_SIZE - 1)
+
+%assign MIN_DEF_MATCH 3 ; Minimum length of a match in deflate
+%assign SHORTEST_MATCH 4
+
+%assign SLOP 8
+
+%define ICF_CODE_BYTES 4
+%define LIT_LEN_BIT_COUNT 10
+%define DIST_LIT_BIT_COUNT 9
+
+%define LIT_LEN_MASK ((1 << LIT_LEN_BIT_COUNT) - 1)
+%define LIT_DIST_MASK ((1 << DIST_LIT_BIT_COUNT) - 1)
+
+%define DIST_OFFSET LIT_LEN_BIT_COUNT
+%define EXTRA_BITS_OFFSET (DIST_OFFSET + DIST_LIT_BIT_COUNT)
+%define LIT (0x1E << DIST_OFFSET)
diff --git a/src/isa-l/igzip/options.asm b/src/isa-l/igzip/options.asm
new file mode 100644
index 000000000..afbb586ea
--- /dev/null
+++ b/src/isa-l/igzip/options.asm
@@ -0,0 +1,77 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright(c) 2011-2016 Intel Corporation All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in
+; the documentation and/or other materials provided with the
+; distribution.
+; * Neither the name of Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived
+; from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+default rel
+
+%ifndef __OPTIONS_ASM__
+%define __OPTIONS_ASM__
+
+; Options:dir
+; m - reschedule mem reads
+; e b - bitbuff style
+; t s x - compare style
+; h - limit hash updates
+; l - use longer huffman table
+; f - fix cache read
+
+%ifndef IGZIP_HIST_SIZE
+%define IGZIP_HIST_SIZE (32 * 1024)
+%endif
+
+%if (IGZIP_HIST_SIZE > (32 * 1024))
+%undef IGZIP_HIST_SIZE
+%define IGZIP_HIST_SIZE (32 * 1024)
+%endif
+
+%ifdef LONGER_HUFFTABLE
+%if (IGZIP_HIST_SIZE > 8 * 1024)
+%undef IGZIP_HIST_SIZE
+%define IGZIP_HIST_SIZE (8 * 1024)
+%endif
+%endif
+
+; (h) limit hash update
+%define LIMIT_HASH_UPDATE
+
+; (f) fix cache read problem
+%define FIX_CACHE_READ
+
+%define ISAL_DEF_MAX_HDR_SIZE 328
+
+%ifidn __OUTPUT_FORMAT__, elf64
+%ifndef __NASM_VER__
+%define WRT_OPT wrt ..sym
+%else
+%define WRT_OPT
+%endif
+%else
+%define WRT_OPT
+%endif
+
+%endif ; ifndef __OPTIONS_ASM__
diff --git a/src/isa-l/igzip/proc_heap.asm b/src/isa-l/igzip/proc_heap.asm
new file mode 100644
index 000000000..ea9365a5d
--- /dev/null
+++ b/src/isa-l/igzip/proc_heap.asm
@@ -0,0 +1,132 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright(c) 2011-2018 Intel Corporation All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in
+; the documentation and/or other materials provided with the
+; distribution.
+; * Neither the name of Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived
+; from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+; returns modified node_ptr
+; uint32_t proc_heap(uint64_t *heap, uint32_t heap_size);
+
+%include "reg_sizes.asm"
+%include "heap_macros.asm"
+
+%ifidn __OUTPUT_FORMAT__, win64
+%define heap rcx ; pointer, 64-bit
+%define heap_size rdx
+%define arg3 r8
+%define child rsi
+%define tmp32 rdi
+%else
+%define heap rdi
+%define heap_size rsi
+%define arg3 rdx
+%define child rcx
+%define tmp32 rdx
+%endif
+
+%define node_ptr rax
+%define h1 r8
+%define h2 r9
+%define h3 r10
+%define i r11
+%define tmp2 r12
+
+[bits 64]
+default rel
+section .text
+
+ global build_huff_tree
+build_huff_tree:
+ endbranch
+%ifidn __OUTPUT_FORMAT__, win64
+ push rsi
+ push rdi
+%endif
+ push r12
+
+ mov node_ptr, arg3
+.main_loop:
+ ; REMOVE_MIN64(heap, heap_size, h1);
+ mov h2, [heap + heap_size*8]
+ mov h1, [heap + 1*8]
+ mov qword [heap + heap_size*8], -1
+ dec heap_size
+ mov [heap + 1*8], h2
+
+ mov i, 1
+ heapify heap, heap_size, i, child, h2, h3, tmp32, tmp2
+
+ mov h2, [heap + 1*8]
+ lea h3, [h1 + h2]
+ mov [heap + node_ptr*8], h1 %+ w
+ mov [heap + node_ptr*8 - 8], h2 %+ w
+
+ and h3, ~0xffff
+ or h3, node_ptr
+ sub node_ptr, 2
+
+ ; replace_min64(heap, heap_size, h3)
+ mov [heap + 1*8], h3
+ mov i, 1
+ heapify heap, heap_size, i, child, h2, h3, tmp32, tmp2
+
+ cmp heap_size, 1
+ ja .main_loop
+
+ mov h1, [heap + 1*8]
+ mov [heap + node_ptr*8], h1 %+ w
+
+ pop r12
+%ifidn __OUTPUT_FORMAT__, win64
+ pop rdi
+ pop rsi
+%endif
+ ret
+
+align 32
+ global build_heap
+build_heap:
+ endbranch
+%ifidn __OUTPUT_FORMAT__, win64
+ push rsi
+ push rdi
+%endif
+ push r12
+ mov qword [heap + heap_size*8 + 8], -1
+ mov i, heap_size
+ shr i, 1
+.loop:
+ mov h1, i
+ heapify heap, heap_size, h1, child, h2, h3, tmp32, tmp2
+ dec i
+ jnz .loop
+
+ pop r12
+%ifidn __OUTPUT_FORMAT__, win64
+ pop rdi
+ pop rsi
+%endif
+ ret
diff --git a/src/isa-l/igzip/proc_heap_base.c b/src/isa-l/igzip/proc_heap_base.c
new file mode 100644
index 000000000..777bd0f09
--- /dev/null
+++ b/src/isa-l/igzip/proc_heap_base.c
@@ -0,0 +1,85 @@
+/**********************************************************************
+ Copyright(c) 2011-2017 Intel Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+#include "igzip_lib.h"
+#include "huff_codes.h"
+#include "unaligned.h"
+
+static inline void heapify(uint64_t * heap, uint64_t heap_size, uint64_t index)
+{
+ uint64_t child = 2 * index, tmp;
+ while (child <= heap_size) {
+ child = (heap[child] <= heap[child + 1]) ? child : child + 1;
+
+ if (heap[index] > heap[child]) {
+ tmp = heap[index];
+ heap[index] = heap[child];
+ heap[child] = tmp;
+ index = child;
+ child = 2 * index;
+ } else
+ break;
+ }
+}
+
+void build_heap(uint64_t * heap, uint64_t heap_size)
+{
+ uint64_t i;
+ heap[heap_size + 1] = -1;
+ for (i = heap_size / 2; i > 0; i--)
+ heapify(heap, heap_size, i);
+
+}
+
+uint32_t build_huff_tree(struct heap_tree *heap_space, uint64_t heap_size, uint64_t node_ptr)
+{
+ uint64_t *heap = (uint64_t *) heap_space;
+ uint64_t h1, h2;
+
+ while (heap_size > 1) {
+ h1 = heap[1];
+ heap[1] = heap[heap_size];
+ heap[heap_size--] = -1;
+
+ heapify(heap, heap_size, 1);
+
+ h2 = heap[1];
+ heap[1] = ((h1 + h2) & ~0xFFFFull) | node_ptr;
+
+ heapify(heap, heap_size, 1);
+
+ store_u16((uint8_t *) & heap[node_ptr], h1);
+ store_u16((uint8_t *) & heap[node_ptr - 1], h2);
+ node_ptr -= 2;
+
+ }
+ h1 = heap[1];
+ store_u16((uint8_t *) & heap[node_ptr], h1);
+ return node_ptr;
+}
diff --git a/src/isa-l/igzip/repeated_char_result.h b/src/isa-l/igzip/repeated_char_result.h
new file mode 100644
index 000000000..60a5fc197
--- /dev/null
+++ b/src/isa-l/igzip/repeated_char_result.h
@@ -0,0 +1,68 @@
+/**********************************************************************
+ Copyright(c) 2011-2016 Intel Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+#ifndef _IGZIP_REPEATED_8K_CHAR_RESULT_H_
+#define _IGZIP_REPEATED_8K_CHAR_RESULT_H_
+
+/* The code for the literal being encoded */
+#define CODE_LIT 0x1
+#define CODE_LIT_LENGTH 0x2
+
+/* The code for repeat 10. The Length includes the distance code length*/
+#define CODE_10 0x3
+#define CODE_10_LENGTH 0x4
+
+/* The code for repeat 115-130. The Length includes the distance code length*/
+#define CODE_280 0x0f
+#define CODE_280_LENGTH 0x4
+#define CODE_280_TOTAL_LENGTH CODE_280_LENGTH + 4 + 1
+
+/* Code representing the end of block. */
+#define END_OF_BLOCK 0x7
+#define END_OF_BLOCK_LEN 0x4
+
+/* MIN_REPEAT_LEN currently optimizes storage space, another possiblity is to
+ * find the size which optimizes speed instead.*/
+#define MIN_REPEAT_LEN 4*1024
+
+#define HEADER_LENGTH 16
+
+/* Maximum length of the portion of the header represented by repeat lengths
+ * smaller than 258 */
+#define MAX_FIXUP_CODE_LENGTH 8
+
+
+/* Headers for constant 0x00 and 0xFF blocks
+ * This also contains the first literal character. */
+const uint32_t repeated_char_header[2][5] = {
+ { 0x0121c0ec, 0xc30c0000, 0x7d57fab0, 0x49270938}, /* Deflate header for 0x00 */
+ { 0x0121c0ec, 0xc30c0000, 0x7baaff30, 0x49270938} /* Deflate header for 0xFF */
+
+};
+
+#endif /*_IGZIP_REPEATED_8K_CHAR_RESULT_H_*/
diff --git a/src/isa-l/igzip/rfc1951_lookup.asm b/src/isa-l/igzip/rfc1951_lookup.asm
new file mode 100644
index 000000000..07014028a
--- /dev/null
+++ b/src/isa-l/igzip/rfc1951_lookup.asm
@@ -0,0 +1,118 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright(c) 2011-2018 Intel Corporation All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in
+; the documentation and/or other materials provided with the
+; distribution.
+; * Neither the name of Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived
+; from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+%include "reg_sizes.asm"
+
+%ifndef RFC1951_LOOKUP
+%define RFC1951_LOOKUP
+
+section .data
+
+ align 8
+
+;; /* Structure contain lookup data based on RFC 1951 */
+;; struct rfc1951_tables {
+;; uint8_t len_to_code[264];
+;; uint8_t dist_extra_bit_count[32];
+;; uint32_t dist_start[32];
+;; uint8_t len_extra_bit_count[32];
+;; uint16_t len_start[32];
+;; };
+
+mk_global rfc1951_lookup_table, data, internal
+rfc1951_lookup_table:
+len_to_code:
+ db 0x00, 0x00, 0x00
+ db 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08
+ db 0x09, 0x09, 0x0a, 0x0a, 0x0b, 0x0b, 0x0c, 0x0c
+ db 0x0d, 0x0d, 0x0d, 0x0d, 0x0e, 0x0e, 0x0e, 0x0e
+ db 0x0f, 0x0f, 0x0f, 0x0f, 0x10, 0x10, 0x10, 0x10
+ db 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11
+ db 0x12, 0x12, 0x12, 0x12, 0x12, 0x12, 0x12, 0x12
+ db 0x13, 0x13, 0x13, 0x13, 0x13, 0x13, 0x13, 0x13
+ db 0x14, 0x14, 0x14, 0x14, 0x14, 0x14, 0x14, 0x14
+ db 0x15, 0x15, 0x15, 0x15, 0x15, 0x15, 0x15, 0x15
+ db 0x15, 0x15, 0x15, 0x15, 0x15, 0x15, 0x15, 0x15
+ db 0x16, 0x16, 0x16, 0x16, 0x16, 0x16, 0x16, 0x16
+ db 0x16, 0x16, 0x16, 0x16, 0x16, 0x16, 0x16, 0x16
+ db 0x17, 0x17, 0x17, 0x17, 0x17, 0x17, 0x17, 0x17
+ db 0x17, 0x17, 0x17, 0x17, 0x17, 0x17, 0x17, 0x17
+ db 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18
+ db 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18
+ db 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19
+ db 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19
+ db 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19
+ db 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19
+ db 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a
+ db 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a
+ db 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a
+ db 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a
+ db 0x1b, 0x1b, 0x1b, 0x1b, 0x1b, 0x1b, 0x1b, 0x1b
+ db 0x1b, 0x1b, 0x1b, 0x1b, 0x1b, 0x1b, 0x1b, 0x1b
+ db 0x1b, 0x1b, 0x1b, 0x1b, 0x1b, 0x1b, 0x1b, 0x1b
+ db 0x1b, 0x1b, 0x1b, 0x1b, 0x1b, 0x1b, 0x1b, 0x1b
+ db 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c
+ db 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c
+ db 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c
+ db 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1d
+ db 0x00, 0x00, 0x00, 0x00, 0x00
+
+dist_extra_bit_count:
+ db 0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x02, 0x02
+ db 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06
+ db 0x07, 0x07, 0x08, 0x08, 0x09, 0x09, 0x0a, 0x0a
+ db 0x0b, 0x0b, 0x0c, 0x0c, 0x0d, 0x0d, 0x00, 0x00
+
+dist_start:
+ dd 0x00000001, 0x00000002, 0x00000003, 0x00000004
+ dd 0x00000005, 0x00000007, 0x00000009, 0x0000000d
+ dd 0x00000011, 0x00000019, 0x00000021, 0x00000031
+ dd 0x00000041, 0x00000061, 0x00000081, 0x000000c1
+ dd 0x00000101, 0x00000181, 0x00000201, 0x00000301
+ dd 0x00000401, 0x00000601, 0x00000801, 0x00000c01
+ dd 0x00001001, 0x00001801, 0x00002001, 0x00003001
+ dd 0x00004001, 0x00006001, 0x00000000, 0x00000000
+
+len_extra_bit_count:
+ db 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
+ db 0x01, 0x01, 0x01, 0x01, 0x02, 0x02, 0x02, 0x02
+ db 0x03, 0x03, 0x03, 0x03, 0x04, 0x04, 0x04, 0x04
+ db 0x05, 0x05, 0x05, 0x05, 0x00, 0x00, 0x00, 0x00
+
+len_start:
+ dw 0x0003, 0x0004, 0x0005, 0x0006
+ dw 000007, 0x0008, 0x0009, 0x000a
+ dw 0x000b, 0x000d, 0x000f, 0x0011
+ dw 0x0013, 0x0017, 0x001b, 0x001f
+ dw 0x0023, 0x002b, 0x0033, 0x003b
+ dw 0x0043, 0x0053, 0x0063, 0x0073
+ dw 0x0083, 0x00a3, 0x00c3, 0x00e3
+ dw 0x0102, 0x0000, 0x0000, 0x0000
+
+%endif ; RFC1951_LOOKUP
diff --git a/src/isa-l/igzip/static_inflate.h b/src/isa-l/igzip/static_inflate.h
new file mode 100644
index 000000000..6ed8d3037
--- /dev/null
+++ b/src/isa-l/igzip/static_inflate.h
@@ -0,0 +1,2678 @@
+#ifndef STATIC_HEADER_H
+#define STATIC_HEADER_H
+
+#include "igzip_lib.h"
+
+#define LONG_BITS_CHECK 12
+#define SHORT_BITS_CHECK 10
+#if (LONG_BITS_CHECK == ISAL_DECODE_LONG_BITS) && (SHORT_BITS_CHECK == ISAL_DECODE_SHORT_BITS)
+# define ISAL_STATIC_INFLATE_TABLE
+#else
+# warning "Incompatible compile time defines for optimized static inflate table."
+#endif
+
+struct inflate_huff_code_large static_lit_huff_code = {
+ .short_code_lookup = {
+ 0x74000100, 0x84000050, 0x84000010, 0xc4000171,
+ 0x9400011d, 0x84000070, 0x84000030, 0x940000c0,
+ 0x74000108, 0x84000060, 0x84000020, 0x940000a0,
+ 0x84000000, 0x84000080, 0x84000040, 0x940000e0,
+ 0x74000104, 0x84000058, 0x84000018, 0x94000090,
+ 0xa4000139, 0x84000078, 0x84000038, 0x940000d0,
+ 0x8400010f, 0x84000068, 0x84000028, 0x940000b0,
+ 0x84000008, 0x84000088, 0x84000048, 0x940000f0,
+ 0x74000102, 0x84000054, 0x84000014, 0x36000060,
+ 0xa4000129, 0x84000074, 0x84000034, 0x940000c8,
+ 0x8400010b, 0x84000064, 0x84000024, 0x940000a8,
+ 0x84000004, 0x84000084, 0x84000044, 0x940000e8,
+ 0x74000106, 0x8400005c, 0x8400001c, 0x94000098,
+ 0xb4000151, 0x8400007c, 0x8400003c, 0x940000d8,
+ 0x94000115, 0x8400006c, 0x8400002c, 0x940000b8,
+ 0x8400000c, 0x8400008c, 0x8400004c, 0x940000f8,
+ 0x74000101, 0x84000052, 0x84000012, 0x36000020,
+ 0xa4000121, 0x84000072, 0x84000032, 0x940000c4,
+ 0x84000109, 0x84000062, 0x84000022, 0x940000a4,
+ 0x84000002, 0x84000082, 0x84000042, 0x940000e4,
+ 0x74000105, 0x8400005a, 0x8400001a, 0x94000094,
+ 0xb4000141, 0x8400007a, 0x8400003a, 0x940000d4,
+ 0x94000111, 0x8400006a, 0x8400002a, 0x940000b4,
+ 0x8400000a, 0x8400008a, 0x8400004a, 0x940000f4,
+ 0x74000103, 0x84000056, 0x84000016, 0x00000000,
+ 0xa4000131, 0x84000076, 0x84000036, 0x940000cc,
+ 0x8400010d, 0x84000066, 0x84000026, 0x940000ac,
+ 0x84000006, 0x84000086, 0x84000046, 0x940000ec,
+ 0x74000107, 0x8400005e, 0x8400001e, 0x9400009c,
+ 0xb4000161, 0x8400007e, 0x8400003e, 0x940000dc,
+ 0x94000119, 0x8400006e, 0x8400002e, 0x940000bc,
+ 0x8400000e, 0x8400008e, 0x8400004e, 0x940000fc,
+ 0x74000100, 0x84000051, 0x84000011, 0x36000000,
+ 0x9400011e, 0x84000071, 0x84000031, 0x940000c2,
+ 0x74000108, 0x84000061, 0x84000021, 0x940000a2,
+ 0x84000001, 0x84000081, 0x84000041, 0x940000e2,
+ 0x74000104, 0x84000059, 0x84000019, 0x94000092,
+ 0xa400013a, 0x84000079, 0x84000039, 0x940000d2,
+ 0x84000110, 0x84000069, 0x84000029, 0x940000b2,
+ 0x84000009, 0x84000089, 0x84000049, 0x940000f2,
+ 0x74000102, 0x84000055, 0x84000015, 0x84000200,
+ 0xa400012a, 0x84000075, 0x84000035, 0x940000ca,
+ 0x8400010c, 0x84000065, 0x84000025, 0x940000aa,
+ 0x84000005, 0x84000085, 0x84000045, 0x940000ea,
+ 0x74000106, 0x8400005d, 0x8400001d, 0x9400009a,
+ 0xb4000152, 0x8400007d, 0x8400003d, 0x940000da,
+ 0x94000116, 0x8400006d, 0x8400002d, 0x940000ba,
+ 0x8400000d, 0x8400008d, 0x8400004d, 0x940000fa,
+ 0x74000101, 0x84000053, 0x84000013, 0x36000040,
+ 0xa4000122, 0x84000073, 0x84000033, 0x940000c6,
+ 0x8400010a, 0x84000063, 0x84000023, 0x940000a6,
+ 0x84000003, 0x84000083, 0x84000043, 0x940000e6,
+ 0x74000105, 0x8400005b, 0x8400001b, 0x94000096,
+ 0xb4000142, 0x8400007b, 0x8400003b, 0x940000d6,
+ 0x94000112, 0x8400006b, 0x8400002b, 0x940000b6,
+ 0x8400000b, 0x8400008b, 0x8400004b, 0x940000f6,
+ 0x74000103, 0x84000057, 0x84000017, 0x00000000,
+ 0xa4000132, 0x84000077, 0x84000037, 0x940000ce,
+ 0x8400010e, 0x84000067, 0x84000027, 0x940000ae,
+ 0x84000007, 0x84000087, 0x84000047, 0x940000ee,
+ 0x74000107, 0x8400005f, 0x8400001f, 0x9400009e,
+ 0xb4000162, 0x8400007f, 0x8400003f, 0x940000de,
+ 0x9400011a, 0x8400006f, 0x8400002f, 0x940000be,
+ 0x8400000f, 0x8400008f, 0x8400004f, 0x940000fe,
+ 0x74000100, 0x84000050, 0x84000010, 0xc4000172,
+ 0x9400011f, 0x84000070, 0x84000030, 0x940000c1,
+ 0x74000108, 0x84000060, 0x84000020, 0x940000a1,
+ 0x84000000, 0x84000080, 0x84000040, 0x940000e1,
+ 0x74000104, 0x84000058, 0x84000018, 0x94000091,
+ 0xa400013b, 0x84000078, 0x84000038, 0x940000d1,
+ 0x8400010f, 0x84000068, 0x84000028, 0x940000b1,
+ 0x84000008, 0x84000088, 0x84000048, 0x940000f1,
+ 0x74000102, 0x84000054, 0x84000014, 0x36000062,
+ 0xa400012b, 0x84000074, 0x84000034, 0x940000c9,
+ 0x8400010b, 0x84000064, 0x84000024, 0x940000a9,
+ 0x84000004, 0x84000084, 0x84000044, 0x940000e9,
+ 0x74000106, 0x8400005c, 0x8400001c, 0x94000099,
+ 0xb4000153, 0x8400007c, 0x8400003c, 0x940000d9,
+ 0x94000117, 0x8400006c, 0x8400002c, 0x940000b9,
+ 0x8400000c, 0x8400008c, 0x8400004c, 0x940000f9,
+ 0x74000101, 0x84000052, 0x84000012, 0x36000022,
+ 0xa4000123, 0x84000072, 0x84000032, 0x940000c5,
+ 0x84000109, 0x84000062, 0x84000022, 0x940000a5,
+ 0x84000002, 0x84000082, 0x84000042, 0x940000e5,
+ 0x74000105, 0x8400005a, 0x8400001a, 0x94000095,
+ 0xb4000143, 0x8400007a, 0x8400003a, 0x940000d5,
+ 0x94000113, 0x8400006a, 0x8400002a, 0x940000b5,
+ 0x8400000a, 0x8400008a, 0x8400004a, 0x940000f5,
+ 0x74000103, 0x84000056, 0x84000016, 0x00000000,
+ 0xa4000133, 0x84000076, 0x84000036, 0x940000cd,
+ 0x8400010d, 0x84000066, 0x84000026, 0x940000ad,
+ 0x84000006, 0x84000086, 0x84000046, 0x940000ed,
+ 0x74000107, 0x8400005e, 0x8400001e, 0x9400009d,
+ 0xb4000163, 0x8400007e, 0x8400003e, 0x940000dd,
+ 0x9400011b, 0x8400006e, 0x8400002e, 0x940000bd,
+ 0x8400000e, 0x8400008e, 0x8400004e, 0x940000fd,
+ 0x74000100, 0x84000051, 0x84000011, 0x36000002,
+ 0x94000120, 0x84000071, 0x84000031, 0x940000c3,
+ 0x74000108, 0x84000061, 0x84000021, 0x940000a3,
+ 0x84000001, 0x84000081, 0x84000041, 0x940000e3,
+ 0x74000104, 0x84000059, 0x84000019, 0x94000093,
+ 0xa400013c, 0x84000079, 0x84000039, 0x940000d3,
+ 0x84000110, 0x84000069, 0x84000029, 0x940000b3,
+ 0x84000009, 0x84000089, 0x84000049, 0x940000f3,
+ 0x74000102, 0x84000055, 0x84000015, 0x84000200,
+ 0xa400012c, 0x84000075, 0x84000035, 0x940000cb,
+ 0x8400010c, 0x84000065, 0x84000025, 0x940000ab,
+ 0x84000005, 0x84000085, 0x84000045, 0x940000eb,
+ 0x74000106, 0x8400005d, 0x8400001d, 0x9400009b,
+ 0xb4000154, 0x8400007d, 0x8400003d, 0x940000db,
+ 0x94000118, 0x8400006d, 0x8400002d, 0x940000bb,
+ 0x8400000d, 0x8400008d, 0x8400004d, 0x940000fb,
+ 0x74000101, 0x84000053, 0x84000013, 0x36000042,
+ 0xa4000124, 0x84000073, 0x84000033, 0x940000c7,
+ 0x8400010a, 0x84000063, 0x84000023, 0x940000a7,
+ 0x84000003, 0x84000083, 0x84000043, 0x940000e7,
+ 0x74000105, 0x8400005b, 0x8400001b, 0x94000097,
+ 0xb4000144, 0x8400007b, 0x8400003b, 0x940000d7,
+ 0x94000114, 0x8400006b, 0x8400002b, 0x940000b7,
+ 0x8400000b, 0x8400008b, 0x8400004b, 0x940000f7,
+ 0x74000103, 0x84000057, 0x84000017, 0x00000000,
+ 0xa4000134, 0x84000077, 0x84000037, 0x940000cf,
+ 0x8400010e, 0x84000067, 0x84000027, 0x940000af,
+ 0x84000007, 0x84000087, 0x84000047, 0x940000ef,
+ 0x74000107, 0x8400005f, 0x8400001f, 0x9400009f,
+ 0xb4000164, 0x8400007f, 0x8400003f, 0x940000df,
+ 0x9400011c, 0x8400006f, 0x8400002f, 0x940000bf,
+ 0x8400000f, 0x8400008f, 0x8400004f, 0x940000ff,
+ 0x74000100, 0x84000050, 0x84000010, 0xc4000173,
+ 0x9400011d, 0x84000070, 0x84000030, 0x940000c0,
+ 0x74000108, 0x84000060, 0x84000020, 0x940000a0,
+ 0x84000000, 0x84000080, 0x84000040, 0x940000e0,
+ 0x74000104, 0x84000058, 0x84000018, 0x94000090,
+ 0xa400013d, 0x84000078, 0x84000038, 0x940000d0,
+ 0x8400010f, 0x84000068, 0x84000028, 0x940000b0,
+ 0x84000008, 0x84000088, 0x84000048, 0x940000f0,
+ 0x74000102, 0x84000054, 0x84000014, 0x36000064,
+ 0xa400012d, 0x84000074, 0x84000034, 0x940000c8,
+ 0x8400010b, 0x84000064, 0x84000024, 0x940000a8,
+ 0x84000004, 0x84000084, 0x84000044, 0x940000e8,
+ 0x74000106, 0x8400005c, 0x8400001c, 0x94000098,
+ 0xb4000155, 0x8400007c, 0x8400003c, 0x940000d8,
+ 0x94000115, 0x8400006c, 0x8400002c, 0x940000b8,
+ 0x8400000c, 0x8400008c, 0x8400004c, 0x940000f8,
+ 0x74000101, 0x84000052, 0x84000012, 0x36000024,
+ 0xa4000125, 0x84000072, 0x84000032, 0x940000c4,
+ 0x84000109, 0x84000062, 0x84000022, 0x940000a4,
+ 0x84000002, 0x84000082, 0x84000042, 0x940000e4,
+ 0x74000105, 0x8400005a, 0x8400001a, 0x94000094,
+ 0xb4000145, 0x8400007a, 0x8400003a, 0x940000d4,
+ 0x94000111, 0x8400006a, 0x8400002a, 0x940000b4,
+ 0x8400000a, 0x8400008a, 0x8400004a, 0x940000f4,
+ 0x74000103, 0x84000056, 0x84000016, 0x00000000,
+ 0xa4000135, 0x84000076, 0x84000036, 0x940000cc,
+ 0x8400010d, 0x84000066, 0x84000026, 0x940000ac,
+ 0x84000006, 0x84000086, 0x84000046, 0x940000ec,
+ 0x74000107, 0x8400005e, 0x8400001e, 0x9400009c,
+ 0xb4000165, 0x8400007e, 0x8400003e, 0x940000dc,
+ 0x94000119, 0x8400006e, 0x8400002e, 0x940000bc,
+ 0x8400000e, 0x8400008e, 0x8400004e, 0x940000fc,
+ 0x74000100, 0x84000051, 0x84000011, 0x36000004,
+ 0x9400011e, 0x84000071, 0x84000031, 0x940000c2,
+ 0x74000108, 0x84000061, 0x84000021, 0x940000a2,
+ 0x84000001, 0x84000081, 0x84000041, 0x940000e2,
+ 0x74000104, 0x84000059, 0x84000019, 0x94000092,
+ 0xa400013e, 0x84000079, 0x84000039, 0x940000d2,
+ 0x84000110, 0x84000069, 0x84000029, 0x940000b2,
+ 0x84000009, 0x84000089, 0x84000049, 0x940000f2,
+ 0x74000102, 0x84000055, 0x84000015, 0x84000200,
+ 0xa400012e, 0x84000075, 0x84000035, 0x940000ca,
+ 0x8400010c, 0x84000065, 0x84000025, 0x940000aa,
+ 0x84000005, 0x84000085, 0x84000045, 0x940000ea,
+ 0x74000106, 0x8400005d, 0x8400001d, 0x9400009a,
+ 0xb4000156, 0x8400007d, 0x8400003d, 0x940000da,
+ 0x94000116, 0x8400006d, 0x8400002d, 0x940000ba,
+ 0x8400000d, 0x8400008d, 0x8400004d, 0x940000fa,
+ 0x74000101, 0x84000053, 0x84000013, 0x36000044,
+ 0xa4000126, 0x84000073, 0x84000033, 0x940000c6,
+ 0x8400010a, 0x84000063, 0x84000023, 0x940000a6,
+ 0x84000003, 0x84000083, 0x84000043, 0x940000e6,
+ 0x74000105, 0x8400005b, 0x8400001b, 0x94000096,
+ 0xb4000146, 0x8400007b, 0x8400003b, 0x940000d6,
+ 0x94000112, 0x8400006b, 0x8400002b, 0x940000b6,
+ 0x8400000b, 0x8400008b, 0x8400004b, 0x940000f6,
+ 0x74000103, 0x84000057, 0x84000017, 0x00000000,
+ 0xa4000136, 0x84000077, 0x84000037, 0x940000ce,
+ 0x8400010e, 0x84000067, 0x84000027, 0x940000ae,
+ 0x84000007, 0x84000087, 0x84000047, 0x940000ee,
+ 0x74000107, 0x8400005f, 0x8400001f, 0x9400009e,
+ 0xb4000166, 0x8400007f, 0x8400003f, 0x940000de,
+ 0x9400011a, 0x8400006f, 0x8400002f, 0x940000be,
+ 0x8400000f, 0x8400008f, 0x8400004f, 0x940000fe,
+ 0x74000100, 0x84000050, 0x84000010, 0xc4000174,
+ 0x9400011f, 0x84000070, 0x84000030, 0x940000c1,
+ 0x74000108, 0x84000060, 0x84000020, 0x940000a1,
+ 0x84000000, 0x84000080, 0x84000040, 0x940000e1,
+ 0x74000104, 0x84000058, 0x84000018, 0x94000091,
+ 0xa400013f, 0x84000078, 0x84000038, 0x940000d1,
+ 0x8400010f, 0x84000068, 0x84000028, 0x940000b1,
+ 0x84000008, 0x84000088, 0x84000048, 0x940000f1,
+ 0x74000102, 0x84000054, 0x84000014, 0x36000066,
+ 0xa400012f, 0x84000074, 0x84000034, 0x940000c9,
+ 0x8400010b, 0x84000064, 0x84000024, 0x940000a9,
+ 0x84000004, 0x84000084, 0x84000044, 0x940000e9,
+ 0x74000106, 0x8400005c, 0x8400001c, 0x94000099,
+ 0xb4000157, 0x8400007c, 0x8400003c, 0x940000d9,
+ 0x94000117, 0x8400006c, 0x8400002c, 0x940000b9,
+ 0x8400000c, 0x8400008c, 0x8400004c, 0x940000f9,
+ 0x74000101, 0x84000052, 0x84000012, 0x36000026,
+ 0xa4000127, 0x84000072, 0x84000032, 0x940000c5,
+ 0x84000109, 0x84000062, 0x84000022, 0x940000a5,
+ 0x84000002, 0x84000082, 0x84000042, 0x940000e5,
+ 0x74000105, 0x8400005a, 0x8400001a, 0x94000095,
+ 0xb4000147, 0x8400007a, 0x8400003a, 0x940000d5,
+ 0x94000113, 0x8400006a, 0x8400002a, 0x940000b5,
+ 0x8400000a, 0x8400008a, 0x8400004a, 0x940000f5,
+ 0x74000103, 0x84000056, 0x84000016, 0x00000000,
+ 0xa4000137, 0x84000076, 0x84000036, 0x940000cd,
+ 0x8400010d, 0x84000066, 0x84000026, 0x940000ad,
+ 0x84000006, 0x84000086, 0x84000046, 0x940000ed,
+ 0x74000107, 0x8400005e, 0x8400001e, 0x9400009d,
+ 0xb4000167, 0x8400007e, 0x8400003e, 0x940000dd,
+ 0x9400011b, 0x8400006e, 0x8400002e, 0x940000bd,
+ 0x8400000e, 0x8400008e, 0x8400004e, 0x940000fd,
+ 0x74000100, 0x84000051, 0x84000011, 0x36000006,
+ 0x94000120, 0x84000071, 0x84000031, 0x940000c3,
+ 0x74000108, 0x84000061, 0x84000021, 0x940000a3,
+ 0x84000001, 0x84000081, 0x84000041, 0x940000e3,
+ 0x74000104, 0x84000059, 0x84000019, 0x94000093,
+ 0xa4000140, 0x84000079, 0x84000039, 0x940000d3,
+ 0x84000110, 0x84000069, 0x84000029, 0x940000b3,
+ 0x84000009, 0x84000089, 0x84000049, 0x940000f3,
+ 0x74000102, 0x84000055, 0x84000015, 0x84000200,
+ 0xa4000130, 0x84000075, 0x84000035, 0x940000cb,
+ 0x8400010c, 0x84000065, 0x84000025, 0x940000ab,
+ 0x84000005, 0x84000085, 0x84000045, 0x940000eb,
+ 0x74000106, 0x8400005d, 0x8400001d, 0x9400009b,
+ 0xb4000158, 0x8400007d, 0x8400003d, 0x940000db,
+ 0x94000118, 0x8400006d, 0x8400002d, 0x940000bb,
+ 0x8400000d, 0x8400008d, 0x8400004d, 0x940000fb,
+ 0x74000101, 0x84000053, 0x84000013, 0x36000046,
+ 0xa4000128, 0x84000073, 0x84000033, 0x940000c7,
+ 0x8400010a, 0x84000063, 0x84000023, 0x940000a7,
+ 0x84000003, 0x84000083, 0x84000043, 0x940000e7,
+ 0x74000105, 0x8400005b, 0x8400001b, 0x94000097,
+ 0xb4000148, 0x8400007b, 0x8400003b, 0x940000d7,
+ 0x94000114, 0x8400006b, 0x8400002b, 0x940000b7,
+ 0x8400000b, 0x8400008b, 0x8400004b, 0x940000f7,
+ 0x74000103, 0x84000057, 0x84000017, 0x00000000,
+ 0xa4000138, 0x84000077, 0x84000037, 0x940000cf,
+ 0x8400010e, 0x84000067, 0x84000027, 0x940000af,
+ 0x84000007, 0x84000087, 0x84000047, 0x940000ef,
+ 0x74000107, 0x8400005f, 0x8400001f, 0x9400009f,
+ 0xb4000168, 0x8400007f, 0x8400003f, 0x940000df,
+ 0x9400011c, 0x8400006f, 0x8400002f, 0x940000bf,
+ 0x8400000f, 0x8400008f, 0x8400004f, 0x940000ff,
+ 0x74000100, 0x84000050, 0x84000010, 0xc4000175,
+ 0x9400011d, 0x84000070, 0x84000030, 0x940000c0,
+ 0x74000108, 0x84000060, 0x84000020, 0x940000a0,
+ 0x84000000, 0x84000080, 0x84000040, 0x940000e0,
+ 0x74000104, 0x84000058, 0x84000018, 0x94000090,
+ 0xa4000139, 0x84000078, 0x84000038, 0x940000d0,
+ 0x8400010f, 0x84000068, 0x84000028, 0x940000b0,
+ 0x84000008, 0x84000088, 0x84000048, 0x940000f0,
+ 0x74000102, 0x84000054, 0x84000014, 0x36000068,
+ 0xa4000129, 0x84000074, 0x84000034, 0x940000c8,
+ 0x8400010b, 0x84000064, 0x84000024, 0x940000a8,
+ 0x84000004, 0x84000084, 0x84000044, 0x940000e8,
+ 0x74000106, 0x8400005c, 0x8400001c, 0x94000098,
+ 0xb4000159, 0x8400007c, 0x8400003c, 0x940000d8,
+ 0x94000115, 0x8400006c, 0x8400002c, 0x940000b8,
+ 0x8400000c, 0x8400008c, 0x8400004c, 0x940000f8,
+ 0x74000101, 0x84000052, 0x84000012, 0x36000028,
+ 0xa4000121, 0x84000072, 0x84000032, 0x940000c4,
+ 0x84000109, 0x84000062, 0x84000022, 0x940000a4,
+ 0x84000002, 0x84000082, 0x84000042, 0x940000e4,
+ 0x74000105, 0x8400005a, 0x8400001a, 0x94000094,
+ 0xb4000149, 0x8400007a, 0x8400003a, 0x940000d4,
+ 0x94000111, 0x8400006a, 0x8400002a, 0x940000b4,
+ 0x8400000a, 0x8400008a, 0x8400004a, 0x940000f4,
+ 0x74000103, 0x84000056, 0x84000016, 0x00000000,
+ 0xa4000131, 0x84000076, 0x84000036, 0x940000cc,
+ 0x8400010d, 0x84000066, 0x84000026, 0x940000ac,
+ 0x84000006, 0x84000086, 0x84000046, 0x940000ec,
+ 0x74000107, 0x8400005e, 0x8400001e, 0x9400009c,
+ 0xb4000169, 0x8400007e, 0x8400003e, 0x940000dc,
+ 0x94000119, 0x8400006e, 0x8400002e, 0x940000bc,
+ 0x8400000e, 0x8400008e, 0x8400004e, 0x940000fc,
+ 0x74000100, 0x84000051, 0x84000011, 0x36000008,
+ 0x9400011e, 0x84000071, 0x84000031, 0x940000c2,
+ 0x74000108, 0x84000061, 0x84000021, 0x940000a2,
+ 0x84000001, 0x84000081, 0x84000041, 0x940000e2,
+ 0x74000104, 0x84000059, 0x84000019, 0x94000092,
+ 0xa400013a, 0x84000079, 0x84000039, 0x940000d2,
+ 0x84000110, 0x84000069, 0x84000029, 0x940000b2,
+ 0x84000009, 0x84000089, 0x84000049, 0x940000f2,
+ 0x74000102, 0x84000055, 0x84000015, 0x84000200,
+ 0xa400012a, 0x84000075, 0x84000035, 0x940000ca,
+ 0x8400010c, 0x84000065, 0x84000025, 0x940000aa,
+ 0x84000005, 0x84000085, 0x84000045, 0x940000ea,
+ 0x74000106, 0x8400005d, 0x8400001d, 0x9400009a,
+ 0xb400015a, 0x8400007d, 0x8400003d, 0x940000da,
+ 0x94000116, 0x8400006d, 0x8400002d, 0x940000ba,
+ 0x8400000d, 0x8400008d, 0x8400004d, 0x940000fa,
+ 0x74000101, 0x84000053, 0x84000013, 0x36000048,
+ 0xa4000122, 0x84000073, 0x84000033, 0x940000c6,
+ 0x8400010a, 0x84000063, 0x84000023, 0x940000a6,
+ 0x84000003, 0x84000083, 0x84000043, 0x940000e6,
+ 0x74000105, 0x8400005b, 0x8400001b, 0x94000096,
+ 0xb400014a, 0x8400007b, 0x8400003b, 0x940000d6,
+ 0x94000112, 0x8400006b, 0x8400002b, 0x940000b6,
+ 0x8400000b, 0x8400008b, 0x8400004b, 0x940000f6,
+ 0x74000103, 0x84000057, 0x84000017, 0x00000000,
+ 0xa4000132, 0x84000077, 0x84000037, 0x940000ce,
+ 0x8400010e, 0x84000067, 0x84000027, 0x940000ae,
+ 0x84000007, 0x84000087, 0x84000047, 0x940000ee,
+ 0x74000107, 0x8400005f, 0x8400001f, 0x9400009e,
+ 0xb400016a, 0x8400007f, 0x8400003f, 0x940000de,
+ 0x9400011a, 0x8400006f, 0x8400002f, 0x940000be,
+ 0x8400000f, 0x8400008f, 0x8400004f, 0x940000fe,
+ 0x74000100, 0x84000050, 0x84000010, 0xc4000176,
+ 0x9400011f, 0x84000070, 0x84000030, 0x940000c1,
+ 0x74000108, 0x84000060, 0x84000020, 0x940000a1,
+ 0x84000000, 0x84000080, 0x84000040, 0x940000e1,
+ 0x74000104, 0x84000058, 0x84000018, 0x94000091,
+ 0xa400013b, 0x84000078, 0x84000038, 0x940000d1,
+ 0x8400010f, 0x84000068, 0x84000028, 0x940000b1,
+ 0x84000008, 0x84000088, 0x84000048, 0x940000f1,
+ 0x74000102, 0x84000054, 0x84000014, 0x3600006a,
+ 0xa400012b, 0x84000074, 0x84000034, 0x940000c9,
+ 0x8400010b, 0x84000064, 0x84000024, 0x940000a9,
+ 0x84000004, 0x84000084, 0x84000044, 0x940000e9,
+ 0x74000106, 0x8400005c, 0x8400001c, 0x94000099,
+ 0xb400015b, 0x8400007c, 0x8400003c, 0x940000d9,
+ 0x94000117, 0x8400006c, 0x8400002c, 0x940000b9,
+ 0x8400000c, 0x8400008c, 0x8400004c, 0x940000f9,
+ 0x74000101, 0x84000052, 0x84000012, 0x3600002a,
+ 0xa4000123, 0x84000072, 0x84000032, 0x940000c5,
+ 0x84000109, 0x84000062, 0x84000022, 0x940000a5,
+ 0x84000002, 0x84000082, 0x84000042, 0x940000e5,
+ 0x74000105, 0x8400005a, 0x8400001a, 0x94000095,
+ 0xb400014b, 0x8400007a, 0x8400003a, 0x940000d5,
+ 0x94000113, 0x8400006a, 0x8400002a, 0x940000b5,
+ 0x8400000a, 0x8400008a, 0x8400004a, 0x940000f5,
+ 0x74000103, 0x84000056, 0x84000016, 0x00000000,
+ 0xa4000133, 0x84000076, 0x84000036, 0x940000cd,
+ 0x8400010d, 0x84000066, 0x84000026, 0x940000ad,
+ 0x84000006, 0x84000086, 0x84000046, 0x940000ed,
+ 0x74000107, 0x8400005e, 0x8400001e, 0x9400009d,
+ 0xb400016b, 0x8400007e, 0x8400003e, 0x940000dd,
+ 0x9400011b, 0x8400006e, 0x8400002e, 0x940000bd,
+ 0x8400000e, 0x8400008e, 0x8400004e, 0x940000fd,
+ 0x74000100, 0x84000051, 0x84000011, 0x3600000a,
+ 0x94000120, 0x84000071, 0x84000031, 0x940000c3,
+ 0x74000108, 0x84000061, 0x84000021, 0x940000a3,
+ 0x84000001, 0x84000081, 0x84000041, 0x940000e3,
+ 0x74000104, 0x84000059, 0x84000019, 0x94000093,
+ 0xa400013c, 0x84000079, 0x84000039, 0x940000d3,
+ 0x84000110, 0x84000069, 0x84000029, 0x940000b3,
+ 0x84000009, 0x84000089, 0x84000049, 0x940000f3,
+ 0x74000102, 0x84000055, 0x84000015, 0x84000200,
+ 0xa400012c, 0x84000075, 0x84000035, 0x940000cb,
+ 0x8400010c, 0x84000065, 0x84000025, 0x940000ab,
+ 0x84000005, 0x84000085, 0x84000045, 0x940000eb,
+ 0x74000106, 0x8400005d, 0x8400001d, 0x9400009b,
+ 0xb400015c, 0x8400007d, 0x8400003d, 0x940000db,
+ 0x94000118, 0x8400006d, 0x8400002d, 0x940000bb,
+ 0x8400000d, 0x8400008d, 0x8400004d, 0x940000fb,
+ 0x74000101, 0x84000053, 0x84000013, 0x3600004a,
+ 0xa4000124, 0x84000073, 0x84000033, 0x940000c7,
+ 0x8400010a, 0x84000063, 0x84000023, 0x940000a7,
+ 0x84000003, 0x84000083, 0x84000043, 0x940000e7,
+ 0x74000105, 0x8400005b, 0x8400001b, 0x94000097,
+ 0xb400014c, 0x8400007b, 0x8400003b, 0x940000d7,
+ 0x94000114, 0x8400006b, 0x8400002b, 0x940000b7,
+ 0x8400000b, 0x8400008b, 0x8400004b, 0x940000f7,
+ 0x74000103, 0x84000057, 0x84000017, 0x00000000,
+ 0xa4000134, 0x84000077, 0x84000037, 0x940000cf,
+ 0x8400010e, 0x84000067, 0x84000027, 0x940000af,
+ 0x84000007, 0x84000087, 0x84000047, 0x940000ef,
+ 0x74000107, 0x8400005f, 0x8400001f, 0x9400009f,
+ 0xb400016c, 0x8400007f, 0x8400003f, 0x940000df,
+ 0x9400011c, 0x8400006f, 0x8400002f, 0x940000bf,
+ 0x8400000f, 0x8400008f, 0x8400004f, 0x940000ff,
+ 0x74000100, 0x84000050, 0x84000010, 0xc4000177,
+ 0x9400011d, 0x84000070, 0x84000030, 0x940000c0,
+ 0x74000108, 0x84000060, 0x84000020, 0x940000a0,
+ 0x84000000, 0x84000080, 0x84000040, 0x940000e0,
+ 0x74000104, 0x84000058, 0x84000018, 0x94000090,
+ 0xa400013d, 0x84000078, 0x84000038, 0x940000d0,
+ 0x8400010f, 0x84000068, 0x84000028, 0x940000b0,
+ 0x84000008, 0x84000088, 0x84000048, 0x940000f0,
+ 0x74000102, 0x84000054, 0x84000014, 0x3600006c,
+ 0xa400012d, 0x84000074, 0x84000034, 0x940000c8,
+ 0x8400010b, 0x84000064, 0x84000024, 0x940000a8,
+ 0x84000004, 0x84000084, 0x84000044, 0x940000e8,
+ 0x74000106, 0x8400005c, 0x8400001c, 0x94000098,
+ 0xb400015d, 0x8400007c, 0x8400003c, 0x940000d8,
+ 0x94000115, 0x8400006c, 0x8400002c, 0x940000b8,
+ 0x8400000c, 0x8400008c, 0x8400004c, 0x940000f8,
+ 0x74000101, 0x84000052, 0x84000012, 0x3600002c,
+ 0xa4000125, 0x84000072, 0x84000032, 0x940000c4,
+ 0x84000109, 0x84000062, 0x84000022, 0x940000a4,
+ 0x84000002, 0x84000082, 0x84000042, 0x940000e4,
+ 0x74000105, 0x8400005a, 0x8400001a, 0x94000094,
+ 0xb400014d, 0x8400007a, 0x8400003a, 0x940000d4,
+ 0x94000111, 0x8400006a, 0x8400002a, 0x940000b4,
+ 0x8400000a, 0x8400008a, 0x8400004a, 0x940000f4,
+ 0x74000103, 0x84000056, 0x84000016, 0x00000000,
+ 0xa4000135, 0x84000076, 0x84000036, 0x940000cc,
+ 0x8400010d, 0x84000066, 0x84000026, 0x940000ac,
+ 0x84000006, 0x84000086, 0x84000046, 0x940000ec,
+ 0x74000107, 0x8400005e, 0x8400001e, 0x9400009c,
+ 0xb400016d, 0x8400007e, 0x8400003e, 0x940000dc,
+ 0x94000119, 0x8400006e, 0x8400002e, 0x940000bc,
+ 0x8400000e, 0x8400008e, 0x8400004e, 0x940000fc,
+ 0x74000100, 0x84000051, 0x84000011, 0x3600000c,
+ 0x9400011e, 0x84000071, 0x84000031, 0x940000c2,
+ 0x74000108, 0x84000061, 0x84000021, 0x940000a2,
+ 0x84000001, 0x84000081, 0x84000041, 0x940000e2,
+ 0x74000104, 0x84000059, 0x84000019, 0x94000092,
+ 0xa400013e, 0x84000079, 0x84000039, 0x940000d2,
+ 0x84000110, 0x84000069, 0x84000029, 0x940000b2,
+ 0x84000009, 0x84000089, 0x84000049, 0x940000f2,
+ 0x74000102, 0x84000055, 0x84000015, 0x84000200,
+ 0xa400012e, 0x84000075, 0x84000035, 0x940000ca,
+ 0x8400010c, 0x84000065, 0x84000025, 0x940000aa,
+ 0x84000005, 0x84000085, 0x84000045, 0x940000ea,
+ 0x74000106, 0x8400005d, 0x8400001d, 0x9400009a,
+ 0xb400015e, 0x8400007d, 0x8400003d, 0x940000da,
+ 0x94000116, 0x8400006d, 0x8400002d, 0x940000ba,
+ 0x8400000d, 0x8400008d, 0x8400004d, 0x940000fa,
+ 0x74000101, 0x84000053, 0x84000013, 0x3600004c,
+ 0xa4000126, 0x84000073, 0x84000033, 0x940000c6,
+ 0x8400010a, 0x84000063, 0x84000023, 0x940000a6,
+ 0x84000003, 0x84000083, 0x84000043, 0x940000e6,
+ 0x74000105, 0x8400005b, 0x8400001b, 0x94000096,
+ 0xb400014e, 0x8400007b, 0x8400003b, 0x940000d6,
+ 0x94000112, 0x8400006b, 0x8400002b, 0x940000b6,
+ 0x8400000b, 0x8400008b, 0x8400004b, 0x940000f6,
+ 0x74000103, 0x84000057, 0x84000017, 0x00000000,
+ 0xa4000136, 0x84000077, 0x84000037, 0x940000ce,
+ 0x8400010e, 0x84000067, 0x84000027, 0x940000ae,
+ 0x84000007, 0x84000087, 0x84000047, 0x940000ee,
+ 0x74000107, 0x8400005f, 0x8400001f, 0x9400009e,
+ 0xb400016e, 0x8400007f, 0x8400003f, 0x940000de,
+ 0x9400011a, 0x8400006f, 0x8400002f, 0x940000be,
+ 0x8400000f, 0x8400008f, 0x8400004f, 0x940000fe,
+ 0x74000100, 0x84000050, 0x84000010, 0xc4000178,
+ 0x9400011f, 0x84000070, 0x84000030, 0x940000c1,
+ 0x74000108, 0x84000060, 0x84000020, 0x940000a1,
+ 0x84000000, 0x84000080, 0x84000040, 0x940000e1,
+ 0x74000104, 0x84000058, 0x84000018, 0x94000091,
+ 0xa400013f, 0x84000078, 0x84000038, 0x940000d1,
+ 0x8400010f, 0x84000068, 0x84000028, 0x940000b1,
+ 0x84000008, 0x84000088, 0x84000048, 0x940000f1,
+ 0x74000102, 0x84000054, 0x84000014, 0x3600006e,
+ 0xa400012f, 0x84000074, 0x84000034, 0x940000c9,
+ 0x8400010b, 0x84000064, 0x84000024, 0x940000a9,
+ 0x84000004, 0x84000084, 0x84000044, 0x940000e9,
+ 0x74000106, 0x8400005c, 0x8400001c, 0x94000099,
+ 0xb400015f, 0x8400007c, 0x8400003c, 0x940000d9,
+ 0x94000117, 0x8400006c, 0x8400002c, 0x940000b9,
+ 0x8400000c, 0x8400008c, 0x8400004c, 0x940000f9,
+ 0x74000101, 0x84000052, 0x84000012, 0x3600002e,
+ 0xa4000127, 0x84000072, 0x84000032, 0x940000c5,
+ 0x84000109, 0x84000062, 0x84000022, 0x940000a5,
+ 0x84000002, 0x84000082, 0x84000042, 0x940000e5,
+ 0x74000105, 0x8400005a, 0x8400001a, 0x94000095,
+ 0xb400014f, 0x8400007a, 0x8400003a, 0x940000d5,
+ 0x94000113, 0x8400006a, 0x8400002a, 0x940000b5,
+ 0x8400000a, 0x8400008a, 0x8400004a, 0x940000f5,
+ 0x74000103, 0x84000056, 0x84000016, 0x00000000,
+ 0xa4000137, 0x84000076, 0x84000036, 0x940000cd,
+ 0x8400010d, 0x84000066, 0x84000026, 0x940000ad,
+ 0x84000006, 0x84000086, 0x84000046, 0x940000ed,
+ 0x74000107, 0x8400005e, 0x8400001e, 0x9400009d,
+ 0xb400016f, 0x8400007e, 0x8400003e, 0x940000dd,
+ 0x9400011b, 0x8400006e, 0x8400002e, 0x940000bd,
+ 0x8400000e, 0x8400008e, 0x8400004e, 0x940000fd,
+ 0x74000100, 0x84000051, 0x84000011, 0x3600000e,
+ 0x94000120, 0x84000071, 0x84000031, 0x940000c3,
+ 0x74000108, 0x84000061, 0x84000021, 0x940000a3,
+ 0x84000001, 0x84000081, 0x84000041, 0x940000e3,
+ 0x74000104, 0x84000059, 0x84000019, 0x94000093,
+ 0xa4000140, 0x84000079, 0x84000039, 0x940000d3,
+ 0x84000110, 0x84000069, 0x84000029, 0x940000b3,
+ 0x84000009, 0x84000089, 0x84000049, 0x940000f3,
+ 0x74000102, 0x84000055, 0x84000015, 0x84000200,
+ 0xa4000130, 0x84000075, 0x84000035, 0x940000cb,
+ 0x8400010c, 0x84000065, 0x84000025, 0x940000ab,
+ 0x84000005, 0x84000085, 0x84000045, 0x940000eb,
+ 0x74000106, 0x8400005d, 0x8400001d, 0x9400009b,
+ 0xb4000160, 0x8400007d, 0x8400003d, 0x940000db,
+ 0x94000118, 0x8400006d, 0x8400002d, 0x940000bb,
+ 0x8400000d, 0x8400008d, 0x8400004d, 0x940000fb,
+ 0x74000101, 0x84000053, 0x84000013, 0x3600004e,
+ 0xa4000128, 0x84000073, 0x84000033, 0x940000c7,
+ 0x8400010a, 0x84000063, 0x84000023, 0x940000a7,
+ 0x84000003, 0x84000083, 0x84000043, 0x940000e7,
+ 0x74000105, 0x8400005b, 0x8400001b, 0x94000097,
+ 0xb4000150, 0x8400007b, 0x8400003b, 0x940000d7,
+ 0x94000114, 0x8400006b, 0x8400002b, 0x940000b7,
+ 0x8400000b, 0x8400008b, 0x8400004b, 0x940000f7,
+ 0x74000103, 0x84000057, 0x84000017, 0x00000000,
+ 0xa4000138, 0x84000077, 0x84000037, 0x940000cf,
+ 0x8400010e, 0x84000067, 0x84000027, 0x940000af,
+ 0x84000007, 0x84000087, 0x84000047, 0x940000ef,
+ 0x74000107, 0x8400005f, 0x8400001f, 0x9400009f,
+ 0xb4000170, 0x8400007f, 0x8400003f, 0x940000df,
+ 0x9400011c, 0x8400006f, 0x8400002f, 0x940000bf,
+ 0x8400000f, 0x8400008f, 0x8400004f, 0x940000ff,
+ 0x74000100, 0x84000050, 0x84000010, 0xc4000179,
+ 0x9400011d, 0x84000070, 0x84000030, 0x940000c0,
+ 0x74000108, 0x84000060, 0x84000020, 0x940000a0,
+ 0x84000000, 0x84000080, 0x84000040, 0x940000e0,
+ 0x74000104, 0x84000058, 0x84000018, 0x94000090,
+ 0xa4000139, 0x84000078, 0x84000038, 0x940000d0,
+ 0x8400010f, 0x84000068, 0x84000028, 0x940000b0,
+ 0x84000008, 0x84000088, 0x84000048, 0x940000f0,
+ 0x74000102, 0x84000054, 0x84000014, 0x36000070,
+ 0xa4000129, 0x84000074, 0x84000034, 0x940000c8,
+ 0x8400010b, 0x84000064, 0x84000024, 0x940000a8,
+ 0x84000004, 0x84000084, 0x84000044, 0x940000e8,
+ 0x74000106, 0x8400005c, 0x8400001c, 0x94000098,
+ 0xb4000151, 0x8400007c, 0x8400003c, 0x940000d8,
+ 0x94000115, 0x8400006c, 0x8400002c, 0x940000b8,
+ 0x8400000c, 0x8400008c, 0x8400004c, 0x940000f8,
+ 0x74000101, 0x84000052, 0x84000012, 0x36000030,
+ 0xa4000121, 0x84000072, 0x84000032, 0x940000c4,
+ 0x84000109, 0x84000062, 0x84000022, 0x940000a4,
+ 0x84000002, 0x84000082, 0x84000042, 0x940000e4,
+ 0x74000105, 0x8400005a, 0x8400001a, 0x94000094,
+ 0xb4000141, 0x8400007a, 0x8400003a, 0x940000d4,
+ 0x94000111, 0x8400006a, 0x8400002a, 0x940000b4,
+ 0x8400000a, 0x8400008a, 0x8400004a, 0x940000f4,
+ 0x74000103, 0x84000056, 0x84000016, 0x00000000,
+ 0xa4000131, 0x84000076, 0x84000036, 0x940000cc,
+ 0x8400010d, 0x84000066, 0x84000026, 0x940000ac,
+ 0x84000006, 0x84000086, 0x84000046, 0x940000ec,
+ 0x74000107, 0x8400005e, 0x8400001e, 0x9400009c,
+ 0xb4000161, 0x8400007e, 0x8400003e, 0x940000dc,
+ 0x94000119, 0x8400006e, 0x8400002e, 0x940000bc,
+ 0x8400000e, 0x8400008e, 0x8400004e, 0x940000fc,
+ 0x74000100, 0x84000051, 0x84000011, 0x36000010,
+ 0x9400011e, 0x84000071, 0x84000031, 0x940000c2,
+ 0x74000108, 0x84000061, 0x84000021, 0x940000a2,
+ 0x84000001, 0x84000081, 0x84000041, 0x940000e2,
+ 0x74000104, 0x84000059, 0x84000019, 0x94000092,
+ 0xa400013a, 0x84000079, 0x84000039, 0x940000d2,
+ 0x84000110, 0x84000069, 0x84000029, 0x940000b2,
+ 0x84000009, 0x84000089, 0x84000049, 0x940000f2,
+ 0x74000102, 0x84000055, 0x84000015, 0x84000200,
+ 0xa400012a, 0x84000075, 0x84000035, 0x940000ca,
+ 0x8400010c, 0x84000065, 0x84000025, 0x940000aa,
+ 0x84000005, 0x84000085, 0x84000045, 0x940000ea,
+ 0x74000106, 0x8400005d, 0x8400001d, 0x9400009a,
+ 0xb4000152, 0x8400007d, 0x8400003d, 0x940000da,
+ 0x94000116, 0x8400006d, 0x8400002d, 0x940000ba,
+ 0x8400000d, 0x8400008d, 0x8400004d, 0x940000fa,
+ 0x74000101, 0x84000053, 0x84000013, 0x36000050,
+ 0xa4000122, 0x84000073, 0x84000033, 0x940000c6,
+ 0x8400010a, 0x84000063, 0x84000023, 0x940000a6,
+ 0x84000003, 0x84000083, 0x84000043, 0x940000e6,
+ 0x74000105, 0x8400005b, 0x8400001b, 0x94000096,
+ 0xb4000142, 0x8400007b, 0x8400003b, 0x940000d6,
+ 0x94000112, 0x8400006b, 0x8400002b, 0x940000b6,
+ 0x8400000b, 0x8400008b, 0x8400004b, 0x940000f6,
+ 0x74000103, 0x84000057, 0x84000017, 0x00000000,
+ 0xa4000132, 0x84000077, 0x84000037, 0x940000ce,
+ 0x8400010e, 0x84000067, 0x84000027, 0x940000ae,
+ 0x84000007, 0x84000087, 0x84000047, 0x940000ee,
+ 0x74000107, 0x8400005f, 0x8400001f, 0x9400009e,
+ 0xb4000162, 0x8400007f, 0x8400003f, 0x940000de,
+ 0x9400011a, 0x8400006f, 0x8400002f, 0x940000be,
+ 0x8400000f, 0x8400008f, 0x8400004f, 0x940000fe,
+ 0x74000100, 0x84000050, 0x84000010, 0xc400017a,
+ 0x9400011f, 0x84000070, 0x84000030, 0x940000c1,
+ 0x74000108, 0x84000060, 0x84000020, 0x940000a1,
+ 0x84000000, 0x84000080, 0x84000040, 0x940000e1,
+ 0x74000104, 0x84000058, 0x84000018, 0x94000091,
+ 0xa400013b, 0x84000078, 0x84000038, 0x940000d1,
+ 0x8400010f, 0x84000068, 0x84000028, 0x940000b1,
+ 0x84000008, 0x84000088, 0x84000048, 0x940000f1,
+ 0x74000102, 0x84000054, 0x84000014, 0x36000072,
+ 0xa400012b, 0x84000074, 0x84000034, 0x940000c9,
+ 0x8400010b, 0x84000064, 0x84000024, 0x940000a9,
+ 0x84000004, 0x84000084, 0x84000044, 0x940000e9,
+ 0x74000106, 0x8400005c, 0x8400001c, 0x94000099,
+ 0xb4000153, 0x8400007c, 0x8400003c, 0x940000d9,
+ 0x94000117, 0x8400006c, 0x8400002c, 0x940000b9,
+ 0x8400000c, 0x8400008c, 0x8400004c, 0x940000f9,
+ 0x74000101, 0x84000052, 0x84000012, 0x36000032,
+ 0xa4000123, 0x84000072, 0x84000032, 0x940000c5,
+ 0x84000109, 0x84000062, 0x84000022, 0x940000a5,
+ 0x84000002, 0x84000082, 0x84000042, 0x940000e5,
+ 0x74000105, 0x8400005a, 0x8400001a, 0x94000095,
+ 0xb4000143, 0x8400007a, 0x8400003a, 0x940000d5,
+ 0x94000113, 0x8400006a, 0x8400002a, 0x940000b5,
+ 0x8400000a, 0x8400008a, 0x8400004a, 0x940000f5,
+ 0x74000103, 0x84000056, 0x84000016, 0x00000000,
+ 0xa4000133, 0x84000076, 0x84000036, 0x940000cd,
+ 0x8400010d, 0x84000066, 0x84000026, 0x940000ad,
+ 0x84000006, 0x84000086, 0x84000046, 0x940000ed,
+ 0x74000107, 0x8400005e, 0x8400001e, 0x9400009d,
+ 0xb4000163, 0x8400007e, 0x8400003e, 0x940000dd,
+ 0x9400011b, 0x8400006e, 0x8400002e, 0x940000bd,
+ 0x8400000e, 0x8400008e, 0x8400004e, 0x940000fd,
+ 0x74000100, 0x84000051, 0x84000011, 0x36000012,
+ 0x94000120, 0x84000071, 0x84000031, 0x940000c3,
+ 0x74000108, 0x84000061, 0x84000021, 0x940000a3,
+ 0x84000001, 0x84000081, 0x84000041, 0x940000e3,
+ 0x74000104, 0x84000059, 0x84000019, 0x94000093,
+ 0xa400013c, 0x84000079, 0x84000039, 0x940000d3,
+ 0x84000110, 0x84000069, 0x84000029, 0x940000b3,
+ 0x84000009, 0x84000089, 0x84000049, 0x940000f3,
+ 0x74000102, 0x84000055, 0x84000015, 0x84000200,
+ 0xa400012c, 0x84000075, 0x84000035, 0x940000cb,
+ 0x8400010c, 0x84000065, 0x84000025, 0x940000ab,
+ 0x84000005, 0x84000085, 0x84000045, 0x940000eb,
+ 0x74000106, 0x8400005d, 0x8400001d, 0x9400009b,
+ 0xb4000154, 0x8400007d, 0x8400003d, 0x940000db,
+ 0x94000118, 0x8400006d, 0x8400002d, 0x940000bb,
+ 0x8400000d, 0x8400008d, 0x8400004d, 0x940000fb,
+ 0x74000101, 0x84000053, 0x84000013, 0x36000052,
+ 0xa4000124, 0x84000073, 0x84000033, 0x940000c7,
+ 0x8400010a, 0x84000063, 0x84000023, 0x940000a7,
+ 0x84000003, 0x84000083, 0x84000043, 0x940000e7,
+ 0x74000105, 0x8400005b, 0x8400001b, 0x94000097,
+ 0xb4000144, 0x8400007b, 0x8400003b, 0x940000d7,
+ 0x94000114, 0x8400006b, 0x8400002b, 0x940000b7,
+ 0x8400000b, 0x8400008b, 0x8400004b, 0x940000f7,
+ 0x74000103, 0x84000057, 0x84000017, 0x00000000,
+ 0xa4000134, 0x84000077, 0x84000037, 0x940000cf,
+ 0x8400010e, 0x84000067, 0x84000027, 0x940000af,
+ 0x84000007, 0x84000087, 0x84000047, 0x940000ef,
+ 0x74000107, 0x8400005f, 0x8400001f, 0x9400009f,
+ 0xb4000164, 0x8400007f, 0x8400003f, 0x940000df,
+ 0x9400011c, 0x8400006f, 0x8400002f, 0x940000bf,
+ 0x8400000f, 0x8400008f, 0x8400004f, 0x940000ff,
+ 0x74000100, 0x84000050, 0x84000010, 0xc400017b,
+ 0x9400011d, 0x84000070, 0x84000030, 0x940000c0,
+ 0x74000108, 0x84000060, 0x84000020, 0x940000a0,
+ 0x84000000, 0x84000080, 0x84000040, 0x940000e0,
+ 0x74000104, 0x84000058, 0x84000018, 0x94000090,
+ 0xa400013d, 0x84000078, 0x84000038, 0x940000d0,
+ 0x8400010f, 0x84000068, 0x84000028, 0x940000b0,
+ 0x84000008, 0x84000088, 0x84000048, 0x940000f0,
+ 0x74000102, 0x84000054, 0x84000014, 0x36000074,
+ 0xa400012d, 0x84000074, 0x84000034, 0x940000c8,
+ 0x8400010b, 0x84000064, 0x84000024, 0x940000a8,
+ 0x84000004, 0x84000084, 0x84000044, 0x940000e8,
+ 0x74000106, 0x8400005c, 0x8400001c, 0x94000098,
+ 0xb4000155, 0x8400007c, 0x8400003c, 0x940000d8,
+ 0x94000115, 0x8400006c, 0x8400002c, 0x940000b8,
+ 0x8400000c, 0x8400008c, 0x8400004c, 0x940000f8,
+ 0x74000101, 0x84000052, 0x84000012, 0x36000034,
+ 0xa4000125, 0x84000072, 0x84000032, 0x940000c4,
+ 0x84000109, 0x84000062, 0x84000022, 0x940000a4,
+ 0x84000002, 0x84000082, 0x84000042, 0x940000e4,
+ 0x74000105, 0x8400005a, 0x8400001a, 0x94000094,
+ 0xb4000145, 0x8400007a, 0x8400003a, 0x940000d4,
+ 0x94000111, 0x8400006a, 0x8400002a, 0x940000b4,
+ 0x8400000a, 0x8400008a, 0x8400004a, 0x940000f4,
+ 0x74000103, 0x84000056, 0x84000016, 0x00000000,
+ 0xa4000135, 0x84000076, 0x84000036, 0x940000cc,
+ 0x8400010d, 0x84000066, 0x84000026, 0x940000ac,
+ 0x84000006, 0x84000086, 0x84000046, 0x940000ec,
+ 0x74000107, 0x8400005e, 0x8400001e, 0x9400009c,
+ 0xb4000165, 0x8400007e, 0x8400003e, 0x940000dc,
+ 0x94000119, 0x8400006e, 0x8400002e, 0x940000bc,
+ 0x8400000e, 0x8400008e, 0x8400004e, 0x940000fc,
+ 0x74000100, 0x84000051, 0x84000011, 0x36000014,
+ 0x9400011e, 0x84000071, 0x84000031, 0x940000c2,
+ 0x74000108, 0x84000061, 0x84000021, 0x940000a2,
+ 0x84000001, 0x84000081, 0x84000041, 0x940000e2,
+ 0x74000104, 0x84000059, 0x84000019, 0x94000092,
+ 0xa400013e, 0x84000079, 0x84000039, 0x940000d2,
+ 0x84000110, 0x84000069, 0x84000029, 0x940000b2,
+ 0x84000009, 0x84000089, 0x84000049, 0x940000f2,
+ 0x74000102, 0x84000055, 0x84000015, 0x84000200,
+ 0xa400012e, 0x84000075, 0x84000035, 0x940000ca,
+ 0x8400010c, 0x84000065, 0x84000025, 0x940000aa,
+ 0x84000005, 0x84000085, 0x84000045, 0x940000ea,
+ 0x74000106, 0x8400005d, 0x8400001d, 0x9400009a,
+ 0xb4000156, 0x8400007d, 0x8400003d, 0x940000da,
+ 0x94000116, 0x8400006d, 0x8400002d, 0x940000ba,
+ 0x8400000d, 0x8400008d, 0x8400004d, 0x940000fa,
+ 0x74000101, 0x84000053, 0x84000013, 0x36000054,
+ 0xa4000126, 0x84000073, 0x84000033, 0x940000c6,
+ 0x8400010a, 0x84000063, 0x84000023, 0x940000a6,
+ 0x84000003, 0x84000083, 0x84000043, 0x940000e6,
+ 0x74000105, 0x8400005b, 0x8400001b, 0x94000096,
+ 0xb4000146, 0x8400007b, 0x8400003b, 0x940000d6,
+ 0x94000112, 0x8400006b, 0x8400002b, 0x940000b6,
+ 0x8400000b, 0x8400008b, 0x8400004b, 0x940000f6,
+ 0x74000103, 0x84000057, 0x84000017, 0x00000000,
+ 0xa4000136, 0x84000077, 0x84000037, 0x940000ce,
+ 0x8400010e, 0x84000067, 0x84000027, 0x940000ae,
+ 0x84000007, 0x84000087, 0x84000047, 0x940000ee,
+ 0x74000107, 0x8400005f, 0x8400001f, 0x9400009e,
+ 0xb4000166, 0x8400007f, 0x8400003f, 0x940000de,
+ 0x9400011a, 0x8400006f, 0x8400002f, 0x940000be,
+ 0x8400000f, 0x8400008f, 0x8400004f, 0x940000fe,
+ 0x74000100, 0x84000050, 0x84000010, 0xc400017c,
+ 0x9400011f, 0x84000070, 0x84000030, 0x940000c1,
+ 0x74000108, 0x84000060, 0x84000020, 0x940000a1,
+ 0x84000000, 0x84000080, 0x84000040, 0x940000e1,
+ 0x74000104, 0x84000058, 0x84000018, 0x94000091,
+ 0xa400013f, 0x84000078, 0x84000038, 0x940000d1,
+ 0x8400010f, 0x84000068, 0x84000028, 0x940000b1,
+ 0x84000008, 0x84000088, 0x84000048, 0x940000f1,
+ 0x74000102, 0x84000054, 0x84000014, 0x36000076,
+ 0xa400012f, 0x84000074, 0x84000034, 0x940000c9,
+ 0x8400010b, 0x84000064, 0x84000024, 0x940000a9,
+ 0x84000004, 0x84000084, 0x84000044, 0x940000e9,
+ 0x74000106, 0x8400005c, 0x8400001c, 0x94000099,
+ 0xb4000157, 0x8400007c, 0x8400003c, 0x940000d9,
+ 0x94000117, 0x8400006c, 0x8400002c, 0x940000b9,
+ 0x8400000c, 0x8400008c, 0x8400004c, 0x940000f9,
+ 0x74000101, 0x84000052, 0x84000012, 0x36000036,
+ 0xa4000127, 0x84000072, 0x84000032, 0x940000c5,
+ 0x84000109, 0x84000062, 0x84000022, 0x940000a5,
+ 0x84000002, 0x84000082, 0x84000042, 0x940000e5,
+ 0x74000105, 0x8400005a, 0x8400001a, 0x94000095,
+ 0xb4000147, 0x8400007a, 0x8400003a, 0x940000d5,
+ 0x94000113, 0x8400006a, 0x8400002a, 0x940000b5,
+ 0x8400000a, 0x8400008a, 0x8400004a, 0x940000f5,
+ 0x74000103, 0x84000056, 0x84000016, 0x00000000,
+ 0xa4000137, 0x84000076, 0x84000036, 0x940000cd,
+ 0x8400010d, 0x84000066, 0x84000026, 0x940000ad,
+ 0x84000006, 0x84000086, 0x84000046, 0x940000ed,
+ 0x74000107, 0x8400005e, 0x8400001e, 0x9400009d,
+ 0xb4000167, 0x8400007e, 0x8400003e, 0x940000dd,
+ 0x9400011b, 0x8400006e, 0x8400002e, 0x940000bd,
+ 0x8400000e, 0x8400008e, 0x8400004e, 0x940000fd,
+ 0x74000100, 0x84000051, 0x84000011, 0x36000016,
+ 0x94000120, 0x84000071, 0x84000031, 0x940000c3,
+ 0x74000108, 0x84000061, 0x84000021, 0x940000a3,
+ 0x84000001, 0x84000081, 0x84000041, 0x940000e3,
+ 0x74000104, 0x84000059, 0x84000019, 0x94000093,
+ 0xa4000140, 0x84000079, 0x84000039, 0x940000d3,
+ 0x84000110, 0x84000069, 0x84000029, 0x940000b3,
+ 0x84000009, 0x84000089, 0x84000049, 0x940000f3,
+ 0x74000102, 0x84000055, 0x84000015, 0x84000200,
+ 0xa4000130, 0x84000075, 0x84000035, 0x940000cb,
+ 0x8400010c, 0x84000065, 0x84000025, 0x940000ab,
+ 0x84000005, 0x84000085, 0x84000045, 0x940000eb,
+ 0x74000106, 0x8400005d, 0x8400001d, 0x9400009b,
+ 0xb4000158, 0x8400007d, 0x8400003d, 0x940000db,
+ 0x94000118, 0x8400006d, 0x8400002d, 0x940000bb,
+ 0x8400000d, 0x8400008d, 0x8400004d, 0x940000fb,
+ 0x74000101, 0x84000053, 0x84000013, 0x36000056,
+ 0xa4000128, 0x84000073, 0x84000033, 0x940000c7,
+ 0x8400010a, 0x84000063, 0x84000023, 0x940000a7,
+ 0x84000003, 0x84000083, 0x84000043, 0x940000e7,
+ 0x74000105, 0x8400005b, 0x8400001b, 0x94000097,
+ 0xb4000148, 0x8400007b, 0x8400003b, 0x940000d7,
+ 0x94000114, 0x8400006b, 0x8400002b, 0x940000b7,
+ 0x8400000b, 0x8400008b, 0x8400004b, 0x940000f7,
+ 0x74000103, 0x84000057, 0x84000017, 0x00000000,
+ 0xa4000138, 0x84000077, 0x84000037, 0x940000cf,
+ 0x8400010e, 0x84000067, 0x84000027, 0x940000af,
+ 0x84000007, 0x84000087, 0x84000047, 0x940000ef,
+ 0x74000107, 0x8400005f, 0x8400001f, 0x9400009f,
+ 0xb4000168, 0x8400007f, 0x8400003f, 0x940000df,
+ 0x9400011c, 0x8400006f, 0x8400002f, 0x940000bf,
+ 0x8400000f, 0x8400008f, 0x8400004f, 0x940000ff,
+ 0x74000100, 0x84000050, 0x84000010, 0xc400017d,
+ 0x9400011d, 0x84000070, 0x84000030, 0x940000c0,
+ 0x74000108, 0x84000060, 0x84000020, 0x940000a0,
+ 0x84000000, 0x84000080, 0x84000040, 0x940000e0,
+ 0x74000104, 0x84000058, 0x84000018, 0x94000090,
+ 0xa4000139, 0x84000078, 0x84000038, 0x940000d0,
+ 0x8400010f, 0x84000068, 0x84000028, 0x940000b0,
+ 0x84000008, 0x84000088, 0x84000048, 0x940000f0,
+ 0x74000102, 0x84000054, 0x84000014, 0x36000078,
+ 0xa4000129, 0x84000074, 0x84000034, 0x940000c8,
+ 0x8400010b, 0x84000064, 0x84000024, 0x940000a8,
+ 0x84000004, 0x84000084, 0x84000044, 0x940000e8,
+ 0x74000106, 0x8400005c, 0x8400001c, 0x94000098,
+ 0xb4000159, 0x8400007c, 0x8400003c, 0x940000d8,
+ 0x94000115, 0x8400006c, 0x8400002c, 0x940000b8,
+ 0x8400000c, 0x8400008c, 0x8400004c, 0x940000f8,
+ 0x74000101, 0x84000052, 0x84000012, 0x36000038,
+ 0xa4000121, 0x84000072, 0x84000032, 0x940000c4,
+ 0x84000109, 0x84000062, 0x84000022, 0x940000a4,
+ 0x84000002, 0x84000082, 0x84000042, 0x940000e4,
+ 0x74000105, 0x8400005a, 0x8400001a, 0x94000094,
+ 0xb4000149, 0x8400007a, 0x8400003a, 0x940000d4,
+ 0x94000111, 0x8400006a, 0x8400002a, 0x940000b4,
+ 0x8400000a, 0x8400008a, 0x8400004a, 0x940000f4,
+ 0x74000103, 0x84000056, 0x84000016, 0x00000000,
+ 0xa4000131, 0x84000076, 0x84000036, 0x940000cc,
+ 0x8400010d, 0x84000066, 0x84000026, 0x940000ac,
+ 0x84000006, 0x84000086, 0x84000046, 0x940000ec,
+ 0x74000107, 0x8400005e, 0x8400001e, 0x9400009c,
+ 0xb4000169, 0x8400007e, 0x8400003e, 0x940000dc,
+ 0x94000119, 0x8400006e, 0x8400002e, 0x940000bc,
+ 0x8400000e, 0x8400008e, 0x8400004e, 0x940000fc,
+ 0x74000100, 0x84000051, 0x84000011, 0x36000018,
+ 0x9400011e, 0x84000071, 0x84000031, 0x940000c2,
+ 0x74000108, 0x84000061, 0x84000021, 0x940000a2,
+ 0x84000001, 0x84000081, 0x84000041, 0x940000e2,
+ 0x74000104, 0x84000059, 0x84000019, 0x94000092,
+ 0xa400013a, 0x84000079, 0x84000039, 0x940000d2,
+ 0x84000110, 0x84000069, 0x84000029, 0x940000b2,
+ 0x84000009, 0x84000089, 0x84000049, 0x940000f2,
+ 0x74000102, 0x84000055, 0x84000015, 0x84000200,
+ 0xa400012a, 0x84000075, 0x84000035, 0x940000ca,
+ 0x8400010c, 0x84000065, 0x84000025, 0x940000aa,
+ 0x84000005, 0x84000085, 0x84000045, 0x940000ea,
+ 0x74000106, 0x8400005d, 0x8400001d, 0x9400009a,
+ 0xb400015a, 0x8400007d, 0x8400003d, 0x940000da,
+ 0x94000116, 0x8400006d, 0x8400002d, 0x940000ba,
+ 0x8400000d, 0x8400008d, 0x8400004d, 0x940000fa,
+ 0x74000101, 0x84000053, 0x84000013, 0x36000058,
+ 0xa4000122, 0x84000073, 0x84000033, 0x940000c6,
+ 0x8400010a, 0x84000063, 0x84000023, 0x940000a6,
+ 0x84000003, 0x84000083, 0x84000043, 0x940000e6,
+ 0x74000105, 0x8400005b, 0x8400001b, 0x94000096,
+ 0xb400014a, 0x8400007b, 0x8400003b, 0x940000d6,
+ 0x94000112, 0x8400006b, 0x8400002b, 0x940000b6,
+ 0x8400000b, 0x8400008b, 0x8400004b, 0x940000f6,
+ 0x74000103, 0x84000057, 0x84000017, 0x00000000,
+ 0xa4000132, 0x84000077, 0x84000037, 0x940000ce,
+ 0x8400010e, 0x84000067, 0x84000027, 0x940000ae,
+ 0x84000007, 0x84000087, 0x84000047, 0x940000ee,
+ 0x74000107, 0x8400005f, 0x8400001f, 0x9400009e,
+ 0xb400016a, 0x8400007f, 0x8400003f, 0x940000de,
+ 0x9400011a, 0x8400006f, 0x8400002f, 0x940000be,
+ 0x8400000f, 0x8400008f, 0x8400004f, 0x940000fe,
+ 0x74000100, 0x84000050, 0x84000010, 0xc400017e,
+ 0x9400011f, 0x84000070, 0x84000030, 0x940000c1,
+ 0x74000108, 0x84000060, 0x84000020, 0x940000a1,
+ 0x84000000, 0x84000080, 0x84000040, 0x940000e1,
+ 0x74000104, 0x84000058, 0x84000018, 0x94000091,
+ 0xa400013b, 0x84000078, 0x84000038, 0x940000d1,
+ 0x8400010f, 0x84000068, 0x84000028, 0x940000b1,
+ 0x84000008, 0x84000088, 0x84000048, 0x940000f1,
+ 0x74000102, 0x84000054, 0x84000014, 0x3600007a,
+ 0xa400012b, 0x84000074, 0x84000034, 0x940000c9,
+ 0x8400010b, 0x84000064, 0x84000024, 0x940000a9,
+ 0x84000004, 0x84000084, 0x84000044, 0x940000e9,
+ 0x74000106, 0x8400005c, 0x8400001c, 0x94000099,
+ 0xb400015b, 0x8400007c, 0x8400003c, 0x940000d9,
+ 0x94000117, 0x8400006c, 0x8400002c, 0x940000b9,
+ 0x8400000c, 0x8400008c, 0x8400004c, 0x940000f9,
+ 0x74000101, 0x84000052, 0x84000012, 0x3600003a,
+ 0xa4000123, 0x84000072, 0x84000032, 0x940000c5,
+ 0x84000109, 0x84000062, 0x84000022, 0x940000a5,
+ 0x84000002, 0x84000082, 0x84000042, 0x940000e5,
+ 0x74000105, 0x8400005a, 0x8400001a, 0x94000095,
+ 0xb400014b, 0x8400007a, 0x8400003a, 0x940000d5,
+ 0x94000113, 0x8400006a, 0x8400002a, 0x940000b5,
+ 0x8400000a, 0x8400008a, 0x8400004a, 0x940000f5,
+ 0x74000103, 0x84000056, 0x84000016, 0x00000000,
+ 0xa4000133, 0x84000076, 0x84000036, 0x940000cd,
+ 0x8400010d, 0x84000066, 0x84000026, 0x940000ad,
+ 0x84000006, 0x84000086, 0x84000046, 0x940000ed,
+ 0x74000107, 0x8400005e, 0x8400001e, 0x9400009d,
+ 0xb400016b, 0x8400007e, 0x8400003e, 0x940000dd,
+ 0x9400011b, 0x8400006e, 0x8400002e, 0x940000bd,
+ 0x8400000e, 0x8400008e, 0x8400004e, 0x940000fd,
+ 0x74000100, 0x84000051, 0x84000011, 0x3600001a,
+ 0x94000120, 0x84000071, 0x84000031, 0x940000c3,
+ 0x74000108, 0x84000061, 0x84000021, 0x940000a3,
+ 0x84000001, 0x84000081, 0x84000041, 0x940000e3,
+ 0x74000104, 0x84000059, 0x84000019, 0x94000093,
+ 0xa400013c, 0x84000079, 0x84000039, 0x940000d3,
+ 0x84000110, 0x84000069, 0x84000029, 0x940000b3,
+ 0x84000009, 0x84000089, 0x84000049, 0x940000f3,
+ 0x74000102, 0x84000055, 0x84000015, 0x84000200,
+ 0xa400012c, 0x84000075, 0x84000035, 0x940000cb,
+ 0x8400010c, 0x84000065, 0x84000025, 0x940000ab,
+ 0x84000005, 0x84000085, 0x84000045, 0x940000eb,
+ 0x74000106, 0x8400005d, 0x8400001d, 0x9400009b,
+ 0xb400015c, 0x8400007d, 0x8400003d, 0x940000db,
+ 0x94000118, 0x8400006d, 0x8400002d, 0x940000bb,
+ 0x8400000d, 0x8400008d, 0x8400004d, 0x940000fb,
+ 0x74000101, 0x84000053, 0x84000013, 0x3600005a,
+ 0xa4000124, 0x84000073, 0x84000033, 0x940000c7,
+ 0x8400010a, 0x84000063, 0x84000023, 0x940000a7,
+ 0x84000003, 0x84000083, 0x84000043, 0x940000e7,
+ 0x74000105, 0x8400005b, 0x8400001b, 0x94000097,
+ 0xb400014c, 0x8400007b, 0x8400003b, 0x940000d7,
+ 0x94000114, 0x8400006b, 0x8400002b, 0x940000b7,
+ 0x8400000b, 0x8400008b, 0x8400004b, 0x940000f7,
+ 0x74000103, 0x84000057, 0x84000017, 0x00000000,
+ 0xa4000134, 0x84000077, 0x84000037, 0x940000cf,
+ 0x8400010e, 0x84000067, 0x84000027, 0x940000af,
+ 0x84000007, 0x84000087, 0x84000047, 0x940000ef,
+ 0x74000107, 0x8400005f, 0x8400001f, 0x9400009f,
+ 0xb400016c, 0x8400007f, 0x8400003f, 0x940000df,
+ 0x9400011c, 0x8400006f, 0x8400002f, 0x940000bf,
+ 0x8400000f, 0x8400008f, 0x8400004f, 0x940000ff,
+ 0x74000100, 0x84000050, 0x84000010, 0xc400017f,
+ 0x9400011d, 0x84000070, 0x84000030, 0x940000c0,
+ 0x74000108, 0x84000060, 0x84000020, 0x940000a0,
+ 0x84000000, 0x84000080, 0x84000040, 0x940000e0,
+ 0x74000104, 0x84000058, 0x84000018, 0x94000090,
+ 0xa400013d, 0x84000078, 0x84000038, 0x940000d0,
+ 0x8400010f, 0x84000068, 0x84000028, 0x940000b0,
+ 0x84000008, 0x84000088, 0x84000048, 0x940000f0,
+ 0x74000102, 0x84000054, 0x84000014, 0x3600007c,
+ 0xa400012d, 0x84000074, 0x84000034, 0x940000c8,
+ 0x8400010b, 0x84000064, 0x84000024, 0x940000a8,
+ 0x84000004, 0x84000084, 0x84000044, 0x940000e8,
+ 0x74000106, 0x8400005c, 0x8400001c, 0x94000098,
+ 0xb400015d, 0x8400007c, 0x8400003c, 0x940000d8,
+ 0x94000115, 0x8400006c, 0x8400002c, 0x940000b8,
+ 0x8400000c, 0x8400008c, 0x8400004c, 0x940000f8,
+ 0x74000101, 0x84000052, 0x84000012, 0x3600003c,
+ 0xa4000125, 0x84000072, 0x84000032, 0x940000c4,
+ 0x84000109, 0x84000062, 0x84000022, 0x940000a4,
+ 0x84000002, 0x84000082, 0x84000042, 0x940000e4,
+ 0x74000105, 0x8400005a, 0x8400001a, 0x94000094,
+ 0xb400014d, 0x8400007a, 0x8400003a, 0x940000d4,
+ 0x94000111, 0x8400006a, 0x8400002a, 0x940000b4,
+ 0x8400000a, 0x8400008a, 0x8400004a, 0x940000f4,
+ 0x74000103, 0x84000056, 0x84000016, 0x00000000,
+ 0xa4000135, 0x84000076, 0x84000036, 0x940000cc,
+ 0x8400010d, 0x84000066, 0x84000026, 0x940000ac,
+ 0x84000006, 0x84000086, 0x84000046, 0x940000ec,
+ 0x74000107, 0x8400005e, 0x8400001e, 0x9400009c,
+ 0xb400016d, 0x8400007e, 0x8400003e, 0x940000dc,
+ 0x94000119, 0x8400006e, 0x8400002e, 0x940000bc,
+ 0x8400000e, 0x8400008e, 0x8400004e, 0x940000fc,
+ 0x74000100, 0x84000051, 0x84000011, 0x3600001c,
+ 0x9400011e, 0x84000071, 0x84000031, 0x940000c2,
+ 0x74000108, 0x84000061, 0x84000021, 0x940000a2,
+ 0x84000001, 0x84000081, 0x84000041, 0x940000e2,
+ 0x74000104, 0x84000059, 0x84000019, 0x94000092,
+ 0xa400013e, 0x84000079, 0x84000039, 0x940000d2,
+ 0x84000110, 0x84000069, 0x84000029, 0x940000b2,
+ 0x84000009, 0x84000089, 0x84000049, 0x940000f2,
+ 0x74000102, 0x84000055, 0x84000015, 0x84000200,
+ 0xa400012e, 0x84000075, 0x84000035, 0x940000ca,
+ 0x8400010c, 0x84000065, 0x84000025, 0x940000aa,
+ 0x84000005, 0x84000085, 0x84000045, 0x940000ea,
+ 0x74000106, 0x8400005d, 0x8400001d, 0x9400009a,
+ 0xb400015e, 0x8400007d, 0x8400003d, 0x940000da,
+ 0x94000116, 0x8400006d, 0x8400002d, 0x940000ba,
+ 0x8400000d, 0x8400008d, 0x8400004d, 0x940000fa,
+ 0x74000101, 0x84000053, 0x84000013, 0x3600005c,
+ 0xa4000126, 0x84000073, 0x84000033, 0x940000c6,
+ 0x8400010a, 0x84000063, 0x84000023, 0x940000a6,
+ 0x84000003, 0x84000083, 0x84000043, 0x940000e6,
+ 0x74000105, 0x8400005b, 0x8400001b, 0x94000096,
+ 0xb400014e, 0x8400007b, 0x8400003b, 0x940000d6,
+ 0x94000112, 0x8400006b, 0x8400002b, 0x940000b6,
+ 0x8400000b, 0x8400008b, 0x8400004b, 0x940000f6,
+ 0x74000103, 0x84000057, 0x84000017, 0x00000000,
+ 0xa4000136, 0x84000077, 0x84000037, 0x940000ce,
+ 0x8400010e, 0x84000067, 0x84000027, 0x940000ae,
+ 0x84000007, 0x84000087, 0x84000047, 0x940000ee,
+ 0x74000107, 0x8400005f, 0x8400001f, 0x9400009e,
+ 0xb400016e, 0x8400007f, 0x8400003f, 0x940000de,
+ 0x9400011a, 0x8400006f, 0x8400002f, 0x940000be,
+ 0x8400000f, 0x8400008f, 0x8400004f, 0x940000fe,
+ 0x74000100, 0x84000050, 0x84000010, 0xc4000180,
+ 0x9400011f, 0x84000070, 0x84000030, 0x940000c1,
+ 0x74000108, 0x84000060, 0x84000020, 0x940000a1,
+ 0x84000000, 0x84000080, 0x84000040, 0x940000e1,
+ 0x74000104, 0x84000058, 0x84000018, 0x94000091,
+ 0xa400013f, 0x84000078, 0x84000038, 0x940000d1,
+ 0x8400010f, 0x84000068, 0x84000028, 0x940000b1,
+ 0x84000008, 0x84000088, 0x84000048, 0x940000f1,
+ 0x74000102, 0x84000054, 0x84000014, 0x3600007e,
+ 0xa400012f, 0x84000074, 0x84000034, 0x940000c9,
+ 0x8400010b, 0x84000064, 0x84000024, 0x940000a9,
+ 0x84000004, 0x84000084, 0x84000044, 0x940000e9,
+ 0x74000106, 0x8400005c, 0x8400001c, 0x94000099,
+ 0xb400015f, 0x8400007c, 0x8400003c, 0x940000d9,
+ 0x94000117, 0x8400006c, 0x8400002c, 0x940000b9,
+ 0x8400000c, 0x8400008c, 0x8400004c, 0x940000f9,
+ 0x74000101, 0x84000052, 0x84000012, 0x3600003e,
+ 0xa4000127, 0x84000072, 0x84000032, 0x940000c5,
+ 0x84000109, 0x84000062, 0x84000022, 0x940000a5,
+ 0x84000002, 0x84000082, 0x84000042, 0x940000e5,
+ 0x74000105, 0x8400005a, 0x8400001a, 0x94000095,
+ 0xb400014f, 0x8400007a, 0x8400003a, 0x940000d5,
+ 0x94000113, 0x8400006a, 0x8400002a, 0x940000b5,
+ 0x8400000a, 0x8400008a, 0x8400004a, 0x940000f5,
+ 0x74000103, 0x84000056, 0x84000016, 0x00000000,
+ 0xa4000137, 0x84000076, 0x84000036, 0x940000cd,
+ 0x8400010d, 0x84000066, 0x84000026, 0x940000ad,
+ 0x84000006, 0x84000086, 0x84000046, 0x940000ed,
+ 0x74000107, 0x8400005e, 0x8400001e, 0x9400009d,
+ 0xb400016f, 0x8400007e, 0x8400003e, 0x940000dd,
+ 0x9400011b, 0x8400006e, 0x8400002e, 0x940000bd,
+ 0x8400000e, 0x8400008e, 0x8400004e, 0x940000fd,
+ 0x74000100, 0x84000051, 0x84000011, 0x3600001e,
+ 0x94000120, 0x84000071, 0x84000031, 0x940000c3,
+ 0x74000108, 0x84000061, 0x84000021, 0x940000a3,
+ 0x84000001, 0x84000081, 0x84000041, 0x940000e3,
+ 0x74000104, 0x84000059, 0x84000019, 0x94000093,
+ 0xa4000140, 0x84000079, 0x84000039, 0x940000d3,
+ 0x84000110, 0x84000069, 0x84000029, 0x940000b3,
+ 0x84000009, 0x84000089, 0x84000049, 0x940000f3,
+ 0x74000102, 0x84000055, 0x84000015, 0x84000200,
+ 0xa4000130, 0x84000075, 0x84000035, 0x940000cb,
+ 0x8400010c, 0x84000065, 0x84000025, 0x940000ab,
+ 0x84000005, 0x84000085, 0x84000045, 0x940000eb,
+ 0x74000106, 0x8400005d, 0x8400001d, 0x9400009b,
+ 0xb4000160, 0x8400007d, 0x8400003d, 0x940000db,
+ 0x94000118, 0x8400006d, 0x8400002d, 0x940000bb,
+ 0x8400000d, 0x8400008d, 0x8400004d, 0x940000fb,
+ 0x74000101, 0x84000053, 0x84000013, 0x3600005e,
+ 0xa4000128, 0x84000073, 0x84000033, 0x940000c7,
+ 0x8400010a, 0x84000063, 0x84000023, 0x940000a7,
+ 0x84000003, 0x84000083, 0x84000043, 0x940000e7,
+ 0x74000105, 0x8400005b, 0x8400001b, 0x94000097,
+ 0xb4000150, 0x8400007b, 0x8400003b, 0x940000d7,
+ 0x94000114, 0x8400006b, 0x8400002b, 0x940000b7,
+ 0x8400000b, 0x8400008b, 0x8400004b, 0x940000f7,
+ 0x74000103, 0x84000057, 0x84000017, 0x00000000,
+ 0xa4000138, 0x84000077, 0x84000037, 0x940000cf,
+ 0x8400010e, 0x84000067, 0x84000027, 0x940000af,
+ 0x84000007, 0x84000087, 0x84000047, 0x940000ef,
+ 0x74000107, 0x8400005f, 0x8400001f, 0x9400009f,
+ 0xb4000170, 0x8400007f, 0x8400003f, 0x940000df,
+ 0x9400011c, 0x8400006f, 0x8400002f, 0x940000bf,
+ 0x8400000f, 0x8400008f, 0x8400004f, 0x940000ff },
+
+ .long_code_lookup = {
+ 0x3581, 0x3591, 0x3582, 0x3592, 0x3583, 0x3593, 0x3584, 0x3594,
+ 0x3585, 0x3595, 0x3586, 0x3596, 0x3587, 0x3597, 0x3588, 0x3598,
+ 0x3589, 0x3599, 0x358a, 0x359a, 0x358b, 0x359b, 0x358c, 0x359c,
+ 0x358d, 0x359d, 0x358e, 0x359e, 0x358f, 0x359f, 0x3590, 0x35a0,
+ 0x35a1, 0x35b1, 0x35a2, 0x35b2, 0x35a3, 0x35b3, 0x35a4, 0x35b4,
+ 0x35a5, 0x35b5, 0x35a6, 0x35b6, 0x35a7, 0x35b7, 0x35a8, 0x35b8,
+ 0x35a9, 0x35b9, 0x35aa, 0x35ba, 0x35ab, 0x35bb, 0x35ac, 0x35bc,
+ 0x35ad, 0x35bd, 0x35ae, 0x35be, 0x35af, 0x35bf, 0x35b0, 0x35c0,
+ 0x35c1, 0x35d1, 0x35c2, 0x35d2, 0x35c3, 0x35d3, 0x35c4, 0x35d4,
+ 0x35c5, 0x35d5, 0x35c6, 0x35d6, 0x35c7, 0x35d7, 0x35c8, 0x35d8,
+ 0x35c9, 0x35d9, 0x35ca, 0x35da, 0x35cb, 0x35db, 0x35cc, 0x35dc,
+ 0x35cd, 0x35dd, 0x35ce, 0x35de, 0x35cf, 0x35df, 0x35d0, 0x35e0,
+ 0x35e1, 0x35f1, 0x35e2, 0x35f2, 0x35e3, 0x35f3, 0x35e4, 0x35f4,
+ 0x35e5, 0x35f5, 0x35e6, 0x35f6, 0x35e7, 0x35f7, 0x35e8, 0x35f8,
+ 0x35e9, 0x35f9, 0x35ea, 0x35fa, 0x35eb, 0x35fb, 0x35ec, 0x35fc,
+ 0x35ed, 0x35fd, 0x35ee, 0x35fe, 0x35ef, 0x35ff, 0x35f0, 0x3600,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 }
+};
+
+struct inflate_huff_code_small static_dist_huff_code = {
+ .short_code_lookup = {
+ 0x2800, 0x28f0, 0x2868, 0x2978, 0x2824, 0x2934, 0x28ac, 0x29bc,
+ 0x2802, 0x2912, 0x288a, 0x299a, 0x2846, 0x2956, 0x28ce, 0x0005,
+ 0x2801, 0x28f1, 0x2869, 0x2979, 0x2825, 0x2935, 0x28ad, 0x29bd,
+ 0x2803, 0x2913, 0x288b, 0x299b, 0x2847, 0x2957, 0x28cf, 0x0005,
+ 0x2800, 0x28f0, 0x2868, 0x2978, 0x2824, 0x2934, 0x28ac, 0x29bc,
+ 0x2802, 0x2912, 0x288a, 0x299a, 0x2846, 0x2956, 0x28ce, 0x0005,
+ 0x2801, 0x28f1, 0x2869, 0x2979, 0x2825, 0x2935, 0x28ad, 0x29bd,
+ 0x2803, 0x2913, 0x288b, 0x299b, 0x2847, 0x2957, 0x28cf, 0x0005,
+ 0x2800, 0x28f0, 0x2868, 0x2978, 0x2824, 0x2934, 0x28ac, 0x29bc,
+ 0x2802, 0x2912, 0x288a, 0x299a, 0x2846, 0x2956, 0x28ce, 0x0005,
+ 0x2801, 0x28f1, 0x2869, 0x2979, 0x2825, 0x2935, 0x28ad, 0x29bd,
+ 0x2803, 0x2913, 0x288b, 0x299b, 0x2847, 0x2957, 0x28cf, 0x0005,
+ 0x2800, 0x28f0, 0x2868, 0x2978, 0x2824, 0x2934, 0x28ac, 0x29bc,
+ 0x2802, 0x2912, 0x288a, 0x299a, 0x2846, 0x2956, 0x28ce, 0x0005,
+ 0x2801, 0x28f1, 0x2869, 0x2979, 0x2825, 0x2935, 0x28ad, 0x29bd,
+ 0x2803, 0x2913, 0x288b, 0x299b, 0x2847, 0x2957, 0x28cf, 0x0005,
+ 0x2800, 0x28f0, 0x2868, 0x2978, 0x2824, 0x2934, 0x28ac, 0x29bc,
+ 0x2802, 0x2912, 0x288a, 0x299a, 0x2846, 0x2956, 0x28ce, 0x0005,
+ 0x2801, 0x28f1, 0x2869, 0x2979, 0x2825, 0x2935, 0x28ad, 0x29bd,
+ 0x2803, 0x2913, 0x288b, 0x299b, 0x2847, 0x2957, 0x28cf, 0x0005,
+ 0x2800, 0x28f0, 0x2868, 0x2978, 0x2824, 0x2934, 0x28ac, 0x29bc,
+ 0x2802, 0x2912, 0x288a, 0x299a, 0x2846, 0x2956, 0x28ce, 0x0005,
+ 0x2801, 0x28f1, 0x2869, 0x2979, 0x2825, 0x2935, 0x28ad, 0x29bd,
+ 0x2803, 0x2913, 0x288b, 0x299b, 0x2847, 0x2957, 0x28cf, 0x0005,
+ 0x2800, 0x28f0, 0x2868, 0x2978, 0x2824, 0x2934, 0x28ac, 0x29bc,
+ 0x2802, 0x2912, 0x288a, 0x299a, 0x2846, 0x2956, 0x28ce, 0x0005,
+ 0x2801, 0x28f1, 0x2869, 0x2979, 0x2825, 0x2935, 0x28ad, 0x29bd,
+ 0x2803, 0x2913, 0x288b, 0x299b, 0x2847, 0x2957, 0x28cf, 0x0005,
+ 0x2800, 0x28f0, 0x2868, 0x2978, 0x2824, 0x2934, 0x28ac, 0x29bc,
+ 0x2802, 0x2912, 0x288a, 0x299a, 0x2846, 0x2956, 0x28ce, 0x0005,
+ 0x2801, 0x28f1, 0x2869, 0x2979, 0x2825, 0x2935, 0x28ad, 0x29bd,
+ 0x2803, 0x2913, 0x288b, 0x299b, 0x2847, 0x2957, 0x28cf, 0x0005,
+ 0x2800, 0x28f0, 0x2868, 0x2978, 0x2824, 0x2934, 0x28ac, 0x29bc,
+ 0x2802, 0x2912, 0x288a, 0x299a, 0x2846, 0x2956, 0x28ce, 0x0005,
+ 0x2801, 0x28f1, 0x2869, 0x2979, 0x2825, 0x2935, 0x28ad, 0x29bd,
+ 0x2803, 0x2913, 0x288b, 0x299b, 0x2847, 0x2957, 0x28cf, 0x0005,
+ 0x2800, 0x28f0, 0x2868, 0x2978, 0x2824, 0x2934, 0x28ac, 0x29bc,
+ 0x2802, 0x2912, 0x288a, 0x299a, 0x2846, 0x2956, 0x28ce, 0x0005,
+ 0x2801, 0x28f1, 0x2869, 0x2979, 0x2825, 0x2935, 0x28ad, 0x29bd,
+ 0x2803, 0x2913, 0x288b, 0x299b, 0x2847, 0x2957, 0x28cf, 0x0005,
+ 0x2800, 0x28f0, 0x2868, 0x2978, 0x2824, 0x2934, 0x28ac, 0x29bc,
+ 0x2802, 0x2912, 0x288a, 0x299a, 0x2846, 0x2956, 0x28ce, 0x0005,
+ 0x2801, 0x28f1, 0x2869, 0x2979, 0x2825, 0x2935, 0x28ad, 0x29bd,
+ 0x2803, 0x2913, 0x288b, 0x299b, 0x2847, 0x2957, 0x28cf, 0x0005,
+ 0x2800, 0x28f0, 0x2868, 0x2978, 0x2824, 0x2934, 0x28ac, 0x29bc,
+ 0x2802, 0x2912, 0x288a, 0x299a, 0x2846, 0x2956, 0x28ce, 0x0005,
+ 0x2801, 0x28f1, 0x2869, 0x2979, 0x2825, 0x2935, 0x28ad, 0x29bd,
+ 0x2803, 0x2913, 0x288b, 0x299b, 0x2847, 0x2957, 0x28cf, 0x0005,
+ 0x2800, 0x28f0, 0x2868, 0x2978, 0x2824, 0x2934, 0x28ac, 0x29bc,
+ 0x2802, 0x2912, 0x288a, 0x299a, 0x2846, 0x2956, 0x28ce, 0x0005,
+ 0x2801, 0x28f1, 0x2869, 0x2979, 0x2825, 0x2935, 0x28ad, 0x29bd,
+ 0x2803, 0x2913, 0x288b, 0x299b, 0x2847, 0x2957, 0x28cf, 0x0005,
+ 0x2800, 0x28f0, 0x2868, 0x2978, 0x2824, 0x2934, 0x28ac, 0x29bc,
+ 0x2802, 0x2912, 0x288a, 0x299a, 0x2846, 0x2956, 0x28ce, 0x0005,
+ 0x2801, 0x28f1, 0x2869, 0x2979, 0x2825, 0x2935, 0x28ad, 0x29bd,
+ 0x2803, 0x2913, 0x288b, 0x299b, 0x2847, 0x2957, 0x28cf, 0x0005,
+ 0x2800, 0x28f0, 0x2868, 0x2978, 0x2824, 0x2934, 0x28ac, 0x29bc,
+ 0x2802, 0x2912, 0x288a, 0x299a, 0x2846, 0x2956, 0x28ce, 0x0005,
+ 0x2801, 0x28f1, 0x2869, 0x2979, 0x2825, 0x2935, 0x28ad, 0x29bd,
+ 0x2803, 0x2913, 0x288b, 0x299b, 0x2847, 0x2957, 0x28cf, 0x0005,
+ 0x2800, 0x28f0, 0x2868, 0x2978, 0x2824, 0x2934, 0x28ac, 0x29bc,
+ 0x2802, 0x2912, 0x288a, 0x299a, 0x2846, 0x2956, 0x28ce, 0x0005,
+ 0x2801, 0x28f1, 0x2869, 0x2979, 0x2825, 0x2935, 0x28ad, 0x29bd,
+ 0x2803, 0x2913, 0x288b, 0x299b, 0x2847, 0x2957, 0x28cf, 0x0005,
+ 0x2800, 0x28f0, 0x2868, 0x2978, 0x2824, 0x2934, 0x28ac, 0x29bc,
+ 0x2802, 0x2912, 0x288a, 0x299a, 0x2846, 0x2956, 0x28ce, 0x0005,
+ 0x2801, 0x28f1, 0x2869, 0x2979, 0x2825, 0x2935, 0x28ad, 0x29bd,
+ 0x2803, 0x2913, 0x288b, 0x299b, 0x2847, 0x2957, 0x28cf, 0x0005,
+ 0x2800, 0x28f0, 0x2868, 0x2978, 0x2824, 0x2934, 0x28ac, 0x29bc,
+ 0x2802, 0x2912, 0x288a, 0x299a, 0x2846, 0x2956, 0x28ce, 0x0005,
+ 0x2801, 0x28f1, 0x2869, 0x2979, 0x2825, 0x2935, 0x28ad, 0x29bd,
+ 0x2803, 0x2913, 0x288b, 0x299b, 0x2847, 0x2957, 0x28cf, 0x0005,
+ 0x2800, 0x28f0, 0x2868, 0x2978, 0x2824, 0x2934, 0x28ac, 0x29bc,
+ 0x2802, 0x2912, 0x288a, 0x299a, 0x2846, 0x2956, 0x28ce, 0x0005,
+ 0x2801, 0x28f1, 0x2869, 0x2979, 0x2825, 0x2935, 0x28ad, 0x29bd,
+ 0x2803, 0x2913, 0x288b, 0x299b, 0x2847, 0x2957, 0x28cf, 0x0005,
+ 0x2800, 0x28f0, 0x2868, 0x2978, 0x2824, 0x2934, 0x28ac, 0x29bc,
+ 0x2802, 0x2912, 0x288a, 0x299a, 0x2846, 0x2956, 0x28ce, 0x0005,
+ 0x2801, 0x28f1, 0x2869, 0x2979, 0x2825, 0x2935, 0x28ad, 0x29bd,
+ 0x2803, 0x2913, 0x288b, 0x299b, 0x2847, 0x2957, 0x28cf, 0x0005,
+ 0x2800, 0x28f0, 0x2868, 0x2978, 0x2824, 0x2934, 0x28ac, 0x29bc,
+ 0x2802, 0x2912, 0x288a, 0x299a, 0x2846, 0x2956, 0x28ce, 0x0005,
+ 0x2801, 0x28f1, 0x2869, 0x2979, 0x2825, 0x2935, 0x28ad, 0x29bd,
+ 0x2803, 0x2913, 0x288b, 0x299b, 0x2847, 0x2957, 0x28cf, 0x0005,
+ 0x2800, 0x28f0, 0x2868, 0x2978, 0x2824, 0x2934, 0x28ac, 0x29bc,
+ 0x2802, 0x2912, 0x288a, 0x299a, 0x2846, 0x2956, 0x28ce, 0x0005,
+ 0x2801, 0x28f1, 0x2869, 0x2979, 0x2825, 0x2935, 0x28ad, 0x29bd,
+ 0x2803, 0x2913, 0x288b, 0x299b, 0x2847, 0x2957, 0x28cf, 0x0005,
+ 0x2800, 0x28f0, 0x2868, 0x2978, 0x2824, 0x2934, 0x28ac, 0x29bc,
+ 0x2802, 0x2912, 0x288a, 0x299a, 0x2846, 0x2956, 0x28ce, 0x0005,
+ 0x2801, 0x28f1, 0x2869, 0x2979, 0x2825, 0x2935, 0x28ad, 0x29bd,
+ 0x2803, 0x2913, 0x288b, 0x299b, 0x2847, 0x2957, 0x28cf, 0x0005,
+ 0x2800, 0x28f0, 0x2868, 0x2978, 0x2824, 0x2934, 0x28ac, 0x29bc,
+ 0x2802, 0x2912, 0x288a, 0x299a, 0x2846, 0x2956, 0x28ce, 0x0005,
+ 0x2801, 0x28f1, 0x2869, 0x2979, 0x2825, 0x2935, 0x28ad, 0x29bd,
+ 0x2803, 0x2913, 0x288b, 0x299b, 0x2847, 0x2957, 0x28cf, 0x0005,
+ 0x2800, 0x28f0, 0x2868, 0x2978, 0x2824, 0x2934, 0x28ac, 0x29bc,
+ 0x2802, 0x2912, 0x288a, 0x299a, 0x2846, 0x2956, 0x28ce, 0x0005,
+ 0x2801, 0x28f1, 0x2869, 0x2979, 0x2825, 0x2935, 0x28ad, 0x29bd,
+ 0x2803, 0x2913, 0x288b, 0x299b, 0x2847, 0x2957, 0x28cf, 0x0005,
+ 0x2800, 0x28f0, 0x2868, 0x2978, 0x2824, 0x2934, 0x28ac, 0x29bc,
+ 0x2802, 0x2912, 0x288a, 0x299a, 0x2846, 0x2956, 0x28ce, 0x0005,
+ 0x2801, 0x28f1, 0x2869, 0x2979, 0x2825, 0x2935, 0x28ad, 0x29bd,
+ 0x2803, 0x2913, 0x288b, 0x299b, 0x2847, 0x2957, 0x28cf, 0x0005,
+ 0x2800, 0x28f0, 0x2868, 0x2978, 0x2824, 0x2934, 0x28ac, 0x29bc,
+ 0x2802, 0x2912, 0x288a, 0x299a, 0x2846, 0x2956, 0x28ce, 0x0005,
+ 0x2801, 0x28f1, 0x2869, 0x2979, 0x2825, 0x2935, 0x28ad, 0x29bd,
+ 0x2803, 0x2913, 0x288b, 0x299b, 0x2847, 0x2957, 0x28cf, 0x0005,
+ 0x2800, 0x28f0, 0x2868, 0x2978, 0x2824, 0x2934, 0x28ac, 0x29bc,
+ 0x2802, 0x2912, 0x288a, 0x299a, 0x2846, 0x2956, 0x28ce, 0x0005,
+ 0x2801, 0x28f1, 0x2869, 0x2979, 0x2825, 0x2935, 0x28ad, 0x29bd,
+ 0x2803, 0x2913, 0x288b, 0x299b, 0x2847, 0x2957, 0x28cf, 0x0005,
+ 0x2800, 0x28f0, 0x2868, 0x2978, 0x2824, 0x2934, 0x28ac, 0x29bc,
+ 0x2802, 0x2912, 0x288a, 0x299a, 0x2846, 0x2956, 0x28ce, 0x0005,
+ 0x2801, 0x28f1, 0x2869, 0x2979, 0x2825, 0x2935, 0x28ad, 0x29bd,
+ 0x2803, 0x2913, 0x288b, 0x299b, 0x2847, 0x2957, 0x28cf, 0x0005,
+ 0x2800, 0x28f0, 0x2868, 0x2978, 0x2824, 0x2934, 0x28ac, 0x29bc,
+ 0x2802, 0x2912, 0x288a, 0x299a, 0x2846, 0x2956, 0x28ce, 0x0005,
+ 0x2801, 0x28f1, 0x2869, 0x2979, 0x2825, 0x2935, 0x28ad, 0x29bd,
+ 0x2803, 0x2913, 0x288b, 0x299b, 0x2847, 0x2957, 0x28cf, 0x0005,
+ 0x2800, 0x28f0, 0x2868, 0x2978, 0x2824, 0x2934, 0x28ac, 0x29bc,
+ 0x2802, 0x2912, 0x288a, 0x299a, 0x2846, 0x2956, 0x28ce, 0x0005,
+ 0x2801, 0x28f1, 0x2869, 0x2979, 0x2825, 0x2935, 0x28ad, 0x29bd,
+ 0x2803, 0x2913, 0x288b, 0x299b, 0x2847, 0x2957, 0x28cf, 0x0005,
+ 0x2800, 0x28f0, 0x2868, 0x2978, 0x2824, 0x2934, 0x28ac, 0x29bc,
+ 0x2802, 0x2912, 0x288a, 0x299a, 0x2846, 0x2956, 0x28ce, 0x0005,
+ 0x2801, 0x28f1, 0x2869, 0x2979, 0x2825, 0x2935, 0x28ad, 0x29bd,
+ 0x2803, 0x2913, 0x288b, 0x299b, 0x2847, 0x2957, 0x28cf, 0x0005 },
+
+ .long_code_lookup = {
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 }
+};
+
+#endif
+struct inflate_huff_code_large pregen_lit_huff_code = {
+ .short_code_lookup = {
+ 0x24000102, 0x88010265, 0x44000103, 0xa8010277,
+ 0x24000102, 0x98010268, 0x78010220, 0xb80102e0,
+ 0x24000102, 0x88010273, 0x44000104, 0xb8010235,
+ 0x24000102, 0x74000108, 0x64000109, 0xc80102fd,
+ 0x24000102, 0x8801026f, 0x44000103, 0xb8010206,
+ 0x24000102, 0x98010270, 0x54000105, 0xc8010259,
+ 0x24000102, 0x9801020a, 0x44000104, 0xb8010249,
+ 0x24000102, 0xa8010230, 0x88010200, 0xb40000ad,
+ 0x24000102, 0x88010269, 0x44000103, 0x9400010d,
+ 0x24000102, 0x9801026d, 0xb8006520, 0xc8010223,
+ 0x24000102, 0x64000106, 0x44000104, 0xb801023f,
+ 0x24000102, 0xa8010204, 0x6400010a, 0xb4000087,
+ 0x24000102, 0x88010272, 0x44000103, 0xb8010210,
+ 0x24000102, 0x98010275, 0x54000105, 0xc80102a6,
+ 0x24000102, 0x98010263, 0x44000104, 0xb8010254,
+ 0x24000102, 0xa8010242, 0x88010261, 0xb40000d7,
+ 0x24000102, 0xc8006565, 0x44000103, 0xa80102ff,
+ 0x24000102, 0x9801026c, 0x98010320, 0xc8010211,
+ 0x24000102, 0xc8006573, 0x44000104, 0xb8010239,
+ 0x24000102, 0xa8010201, 0x64000109, 0xb4000017,
+ 0x24000102, 0xc800656f, 0x44000103, 0xb801020b,
+ 0x24000102, 0x98010274, 0x54000105, 0xc801027c,
+ 0x24000102, 0x9801022c, 0x44000104, 0xb801024f,
+ 0x24000102, 0xa8010232, 0xc8006500, 0xb40000c4,
+ 0x24000102, 0xc8006569, 0x44000103, 0xa4000111,
+ 0x24000102, 0x9801026e, 0x54000020, 0xc801023d,
+ 0x24000102, 0x64000106, 0x44000104, 0xb8010245,
+ 0x24000102, 0xa801022d, 0x6400010a, 0xb400009a,
+ 0x24000102, 0xc8006572, 0x44000103, 0xb8010228,
+ 0x24000102, 0x74000107, 0x54000105, 0xc80102e3,
+ 0x24000102, 0x98010264, 0x44000104, 0xb8010280,
+ 0x24000102, 0xa8010266, 0xc8006561, 0xb40000eb,
+ 0x24000102, 0xa8010365, 0x44000103, 0xa8010279,
+ 0x24000102, 0x74000068, 0x78010220, 0xb80102fe,
+ 0x24000102, 0xa8010373, 0x44000104, 0xb8010237,
+ 0x24000102, 0x74000108, 0x64000109, 0x36000008,
+ 0x24000102, 0xa801036f, 0x44000103, 0xb8010208,
+ 0x24000102, 0x74000070, 0x54000105, 0xc8010260,
+ 0x24000102, 0x7400000a, 0x44000104, 0xb801024d,
+ 0x24000102, 0xa8010231, 0xa8010300, 0xb40000b7,
+ 0x24000102, 0xa8010369, 0x44000103, 0x9400010f,
+ 0x24000102, 0x7400006d, 0xc8006820, 0xc801022a,
+ 0x24000102, 0x64000106, 0x44000104, 0xb8010243,
+ 0x24000102, 0xa801020d, 0x6400010a, 0xb4000092,
+ 0x24000102, 0xa8010372, 0x44000103, 0xb8010222,
+ 0x24000102, 0x74000075, 0x54000105, 0xc80102c1,
+ 0x24000102, 0x74000063, 0x44000104, 0xb8010276,
+ 0x24000102, 0xa8010262, 0xa8010361, 0xb40000df,
+ 0x24000102, 0x64000065, 0x44000103, 0x9400010b,
+ 0x24000102, 0x7400006c, 0xa8002020, 0xc8010218,
+ 0x24000102, 0x64000073, 0x44000104, 0xb801023b,
+ 0x24000102, 0xa8010203, 0x64000109, 0xb400007b,
+ 0x24000102, 0x6400006f, 0x44000103, 0xb801020e,
+ 0x24000102, 0x74000074, 0x54000105, 0xc801028f,
+ 0x24000102, 0x7400002c, 0x44000104, 0xb8010252,
+ 0x24000102, 0xa8010241, 0x64000000, 0xb40000cd,
+ 0x24000102, 0x64000069, 0x44000103, 0xb8010202,
+ 0x24000102, 0x7400006e, 0x54000020, 0xc8010255,
+ 0x24000102, 0x64000106, 0x44000104, 0xb8010247,
+ 0x24000102, 0xa801022e, 0x6400010a, 0xb40000a3,
+ 0x24000102, 0x64000072, 0x44000103, 0xb8010233,
+ 0x24000102, 0x74000107, 0x54000105, 0xc80102f3,
+ 0x24000102, 0x74000064, 0x44000104, 0xb80102c2,
+ 0x24000102, 0xa8010267, 0x64000061, 0xb40000f6,
+ 0x24000102, 0x88010265, 0x44000103, 0x84000077,
+ 0x24000102, 0xb8010368, 0x78010220, 0xb80102f0,
+ 0x24000102, 0x88010273, 0x44000104, 0xb8010236,
+ 0x24000102, 0x74000108, 0x64000109, 0xc400011d,
+ 0x24000102, 0x8801026f, 0x44000103, 0xb8010207,
+ 0x24000102, 0xb8010370, 0x54000105, 0xc801025c,
+ 0x24000102, 0xb801030a, 0x44000104, 0xb801024c,
+ 0x24000102, 0x84000030, 0x88010200, 0xb40000b2,
+ 0x24000102, 0x88010269, 0x44000103, 0x9400010e,
+ 0x24000102, 0xb801036d, 0xb8007320, 0xc8010225,
+ 0x24000102, 0x64000106, 0x44000104, 0xb8010240,
+ 0x24000102, 0x84000004, 0x6400010a, 0xb400008c,
+ 0x24000102, 0x88010272, 0x44000103, 0xb801021f,
+ 0x24000102, 0xb8010375, 0x54000105, 0xc80102b4,
+ 0x24000102, 0xb8010363, 0x44000104, 0xb801026b,
+ 0x24000102, 0x84000042, 0x88010261, 0xb40000db,
+ 0x24000102, 0x64000065, 0x44000103, 0x840000ff,
+ 0x24000102, 0xb801036c, 0x98010420, 0xc8010213,
+ 0x24000102, 0x64000073, 0x44000104, 0xb801023a,
+ 0x24000102, 0x84000001, 0x64000109, 0xb400001d,
+ 0x24000102, 0x6400006f, 0x44000103, 0xb801020c,
+ 0x24000102, 0xb8010374, 0x54000105, 0xc801027f,
+ 0x24000102, 0xb801032c, 0x44000104, 0xb8010250,
+ 0x24000102, 0x84000032, 0x64000000, 0xb40000c9,
+ 0x24000102, 0x64000069, 0x44000103, 0xa4000112,
+ 0x24000102, 0xb801036e, 0x54000020, 0xc801024b,
+ 0x24000102, 0x64000106, 0x44000104, 0xb8010246,
+ 0x24000102, 0x8400002d, 0x6400010a, 0xb400009e,
+ 0x24000102, 0x64000072, 0x44000103, 0xb8010229,
+ 0x24000102, 0x74000107, 0x54000105, 0xc80102e8,
+ 0x24000102, 0xb8010364, 0x44000104, 0xb80102c0,
+ 0x24000102, 0x84000066, 0x64000061, 0xb40000ef,
+ 0x24000102, 0xb8002065, 0x44000103, 0x84000079,
+ 0x24000102, 0x74000068, 0x78010220, 0xb4000115,
+ 0x24000102, 0xb8002073, 0x44000104, 0xb8010238,
+ 0x24000102, 0x74000108, 0x64000109, 0x36000018,
+ 0x24000102, 0xb800206f, 0x44000103, 0xb8010209,
+ 0x24000102, 0x74000070, 0x54000105, 0xc8010271,
+ 0x24000102, 0x7400000a, 0x44000104, 0xb801024e,
+ 0x24000102, 0x84000031, 0xb8002000, 0xb40000bb,
+ 0x24000102, 0xb8002069, 0x44000103, 0x94000110,
+ 0x24000102, 0x7400006d, 0xc8010820, 0xc801022f,
+ 0x24000102, 0x64000106, 0x44000104, 0xb8010244,
+ 0x24000102, 0x8400000d, 0x6400010a, 0xb4000096,
+ 0x24000102, 0xb8002072, 0x44000103, 0xb8010227,
+ 0x24000102, 0x74000075, 0x54000105, 0xc80102cf,
+ 0x24000102, 0x74000063, 0x44000104, 0xb8010278,
+ 0x24000102, 0x84000062, 0xb8002061, 0xb40000e5,
+ 0x24000102, 0x64000065, 0x44000103, 0x9400010c,
+ 0x24000102, 0x7400006c, 0xb8010920, 0xc801021e,
+ 0x24000102, 0x64000073, 0x44000104, 0xb801023e,
+ 0x24000102, 0x84000003, 0x64000109, 0xb4000083,
+ 0x24000102, 0x6400006f, 0x44000103, 0xb801020f,
+ 0x24000102, 0x74000074, 0x54000105, 0xc80102a0,
+ 0x24000102, 0x7400002c, 0x44000104, 0xb8010253,
+ 0x24000102, 0x84000041, 0x64000000, 0xb40000d3,
+ 0x24000102, 0x64000069, 0x44000103, 0xb8010205,
+ 0x24000102, 0x7400006e, 0x54000020, 0xc8010257,
+ 0x24000102, 0x64000106, 0x44000104, 0xb8010248,
+ 0x24000102, 0x8400002e, 0x6400010a, 0xb40000a9,
+ 0x24000102, 0x64000072, 0x44000103, 0xb8010234,
+ 0x24000102, 0x74000107, 0x54000105, 0xc80102f9,
+ 0x24000102, 0x74000064, 0x44000104, 0xb80102c3,
+ 0x24000102, 0x84000067, 0x64000061, 0x3e000120,
+ 0x24000102, 0x88010265, 0x44000103, 0xc8010377,
+ 0x24000102, 0x98010268, 0x78010220, 0x940000e0,
+ 0x24000102, 0x88010273, 0x44000104, 0x94000035,
+ 0x24000102, 0x74000108, 0x64000109, 0xc4000119,
+ 0x24000102, 0x8801026f, 0x44000103, 0x94000006,
+ 0x24000102, 0x98010270, 0x54000105, 0xc801025b,
+ 0x24000102, 0x9801020a, 0x44000104, 0x94000049,
+ 0x24000102, 0xc8010330, 0x88010200, 0xb40000af,
+ 0x24000102, 0x88010269, 0x44000103, 0x9400010d,
+ 0x24000102, 0x9801026d, 0xb8006f20, 0xc8010224,
+ 0x24000102, 0x64000106, 0x44000104, 0x9400003f,
+ 0x24000102, 0xc8010304, 0x6400010a, 0xb4000089,
+ 0x24000102, 0x88010272, 0x44000103, 0x94000010,
+ 0x24000102, 0x98010275, 0x54000105, 0xc80102b0,
+ 0x24000102, 0x98010263, 0x44000104, 0x94000054,
+ 0x24000102, 0xc8010342, 0x88010261, 0xb40000d9,
+ 0x24000102, 0xc8007365, 0x44000103, 0xc80103ff,
+ 0x24000102, 0x9801026c, 0x98010320, 0xc8010212,
+ 0x24000102, 0xc8007373, 0x44000104, 0x94000039,
+ 0x24000102, 0xc8010301, 0x64000109, 0xb400001a,
+ 0x24000102, 0xc800736f, 0x44000103, 0x9400000b,
+ 0x24000102, 0x98010274, 0x54000105, 0xc801027e,
+ 0x24000102, 0x9801022c, 0x44000104, 0x9400004f,
+ 0x24000102, 0xc8010332, 0xc8007300, 0xb40000c6,
+ 0x24000102, 0xc8007369, 0x44000103, 0xa4000113,
+ 0x24000102, 0x9801026e, 0x54000020, 0xc801024a,
+ 0x24000102, 0x64000106, 0x44000104, 0x94000045,
+ 0x24000102, 0xc801032d, 0x6400010a, 0xb400009c,
+ 0x24000102, 0xc8007372, 0x44000103, 0x94000028,
+ 0x24000102, 0x74000107, 0x54000105, 0xc80102e7,
+ 0x24000102, 0x98010264, 0x44000104, 0x94000080,
+ 0x24000102, 0xc8010366, 0xc8007361, 0xb40000ed,
+ 0x24000102, 0xa8010465, 0x44000103, 0xc8010379,
+ 0x24000102, 0x74000068, 0x78010220, 0x940000fe,
+ 0x24000102, 0xa8010473, 0x44000104, 0x94000037,
+ 0x24000102, 0x74000108, 0x64000109, 0x36000010,
+ 0x24000102, 0xa801046f, 0x44000103, 0x94000008,
+ 0x24000102, 0x74000070, 0x54000105, 0xc801026a,
+ 0x24000102, 0x7400000a, 0x44000104, 0x9400004d,
+ 0x24000102, 0xc8010331, 0xa8010400, 0xb40000b9,
+ 0x24000102, 0xa8010469, 0x44000103, 0x9400010f,
+ 0x24000102, 0x7400006d, 0xc8007020, 0xc801022b,
+ 0x24000102, 0x64000106, 0x44000104, 0x94000043,
+ 0x24000102, 0xc801030d, 0x6400010a, 0xb4000094,
+ 0x24000102, 0xa8010472, 0x44000103, 0x94000022,
+ 0x24000102, 0x74000075, 0x54000105, 0xc80102c7,
+ 0x24000102, 0x74000063, 0x44000104, 0x94000076,
+ 0x24000102, 0xc8010362, 0xa8010461, 0xb40000e2,
+ 0x24000102, 0x64000065, 0x44000103, 0x9400010b,
+ 0x24000102, 0x7400006c, 0xa8010520, 0xc801021c,
+ 0x24000102, 0x64000073, 0x44000104, 0x9400003b,
+ 0x24000102, 0xc8010303, 0x64000109, 0xb4000081,
+ 0x24000102, 0x6400006f, 0x44000103, 0x9400000e,
+ 0x24000102, 0x74000074, 0x54000105, 0xc8010290,
+ 0x24000102, 0x7400002c, 0x44000104, 0x94000052,
+ 0x24000102, 0xc8010341, 0x64000000, 0xb40000d1,
+ 0x24000102, 0x64000069, 0x44000103, 0x94000002,
+ 0x24000102, 0x7400006e, 0x54000020, 0xc8010256,
+ 0x24000102, 0x64000106, 0x44000104, 0x94000047,
+ 0x24000102, 0xc801032e, 0x6400010a, 0xb40000a7,
+ 0x24000102, 0x64000072, 0x44000103, 0x94000033,
+ 0x24000102, 0x74000107, 0x54000105, 0xc80102f8,
+ 0x24000102, 0x74000064, 0x44000104, 0x940000c2,
+ 0x24000102, 0xc8010367, 0x64000061, 0xb40000fa,
+ 0x24000102, 0x88010265, 0x44000103, 0x84000077,
+ 0x24000102, 0xc8002068, 0x78010220, 0x940000f0,
+ 0x24000102, 0x88010273, 0x44000104, 0x94000036,
+ 0x24000102, 0x74000108, 0x64000109, 0x36000000,
+ 0x24000102, 0x8801026f, 0x44000103, 0x94000007,
+ 0x24000102, 0xc8002070, 0x54000105, 0xc801025f,
+ 0x24000102, 0xc800200a, 0x44000104, 0x9400004c,
+ 0x24000102, 0x84000030, 0x88010200, 0xb40000b5,
+ 0x24000102, 0x88010269, 0x44000103, 0x9400010e,
+ 0x24000102, 0xc800206d, 0xc8000a20, 0xc8010226,
+ 0x24000102, 0x64000106, 0x44000104, 0x94000040,
+ 0x24000102, 0x84000004, 0x6400010a, 0xb400008e,
+ 0x24000102, 0x88010272, 0x44000103, 0x9400001f,
+ 0x24000102, 0xc8002075, 0x54000105, 0xc80102bd,
+ 0x24000102, 0xc8002063, 0x44000104, 0x9400006b,
+ 0x24000102, 0x84000042, 0x88010261, 0xb40000dd,
+ 0x24000102, 0x64000065, 0x44000103, 0x840000ff,
+ 0x24000102, 0xc800206c, 0x98010420, 0xc8010214,
+ 0x24000102, 0x64000073, 0x44000104, 0x9400003a,
+ 0x24000102, 0x84000001, 0x64000109, 0xb400005d,
+ 0x24000102, 0x6400006f, 0x44000103, 0x9400000c,
+ 0x24000102, 0xc8002074, 0x54000105, 0xc801028b,
+ 0x24000102, 0xc800202c, 0x44000104, 0x94000050,
+ 0x24000102, 0x84000032, 0x64000000, 0xb40000cb,
+ 0x24000102, 0x64000069, 0x44000103, 0xa4000114,
+ 0x24000102, 0xc800206e, 0x54000020, 0xc8010251,
+ 0x24000102, 0x64000106, 0x44000104, 0x94000046,
+ 0x24000102, 0x8400002d, 0x6400010a, 0xb40000a1,
+ 0x24000102, 0x64000072, 0x44000103, 0x94000029,
+ 0x24000102, 0x74000107, 0x54000105, 0xc80102f1,
+ 0x24000102, 0xc8002064, 0x44000104, 0x940000c0,
+ 0x24000102, 0x84000066, 0x64000061, 0xb40000f4,
+ 0x24000102, 0xc8010965, 0x44000103, 0x84000079,
+ 0x24000102, 0x74000068, 0x78010220, 0xb4000116,
+ 0x24000102, 0xc8010973, 0x44000104, 0x94000038,
+ 0x24000102, 0x74000108, 0x64000109, 0xb4000015,
+ 0x24000102, 0xc801096f, 0x44000103, 0x94000009,
+ 0x24000102, 0x74000070, 0x54000105, 0xc801027a,
+ 0x24000102, 0x7400000a, 0x44000104, 0x9400004e,
+ 0x24000102, 0x84000031, 0xc8010900, 0xb40000be,
+ 0x24000102, 0xc8010969, 0x44000103, 0x94000110,
+ 0x24000102, 0x7400006d, 0x54000020, 0xc801023c,
+ 0x24000102, 0x64000106, 0x44000104, 0x94000044,
+ 0x24000102, 0x8400000d, 0x6400010a, 0xb4000098,
+ 0x24000102, 0xc8010972, 0x44000103, 0x94000027,
+ 0x24000102, 0x74000075, 0x54000105, 0xc80102d0,
+ 0x24000102, 0x74000063, 0x44000104, 0x94000078,
+ 0x24000102, 0x84000062, 0xc8010961, 0xb40000e9,
+ 0x24000102, 0x64000065, 0x44000103, 0x9400010c,
+ 0x24000102, 0x7400006c, 0xb8000020, 0xc8010221,
+ 0x24000102, 0x64000073, 0x44000104, 0x9400003e,
+ 0x24000102, 0x84000003, 0x64000109, 0xb4000085,
+ 0x24000102, 0x6400006f, 0x44000103, 0x9400000f,
+ 0x24000102, 0x74000074, 0x54000105, 0xc80102a4,
+ 0x24000102, 0x7400002c, 0x44000104, 0x94000053,
+ 0x24000102, 0x84000041, 0x64000000, 0xb40000d5,
+ 0x24000102, 0x64000069, 0x44000103, 0x94000005,
+ 0x24000102, 0x7400006e, 0x54000020, 0xc8010258,
+ 0x24000102, 0x64000106, 0x44000104, 0x94000048,
+ 0x24000102, 0x8400002e, 0x6400010a, 0xb40000ab,
+ 0x24000102, 0x64000072, 0x44000103, 0x94000034,
+ 0x24000102, 0x74000107, 0x54000105, 0xc80102fc,
+ 0x24000102, 0x74000064, 0x44000104, 0x940000c3,
+ 0x24000102, 0x84000067, 0x64000061, 0x42000130,
+ 0x24000102, 0x88010265, 0x44000103, 0xa8010277,
+ 0x24000102, 0x98010268, 0x78010220, 0x940000e0,
+ 0x24000102, 0x88010273, 0x44000104, 0x94000035,
+ 0x24000102, 0x74000108, 0x64000109, 0xa40000fd,
+ 0x24000102, 0x8801026f, 0x44000103, 0x94000006,
+ 0x24000102, 0x98010270, 0x54000105, 0xa4000059,
+ 0x24000102, 0x9801020a, 0x44000104, 0x94000049,
+ 0x24000102, 0xa8010230, 0x88010200, 0xb40000ae,
+ 0x24000102, 0x88010269, 0x44000103, 0x9400010d,
+ 0x24000102, 0x9801026d, 0xb8006920, 0xa4000023,
+ 0x24000102, 0x64000106, 0x44000104, 0x9400003f,
+ 0x24000102, 0xa8010204, 0x6400010a, 0xb4000088,
+ 0x24000102, 0x88010272, 0x44000103, 0x94000010,
+ 0x24000102, 0x98010275, 0x54000105, 0xa40000a6,
+ 0x24000102, 0x98010263, 0x44000104, 0x94000054,
+ 0x24000102, 0xa8010242, 0x88010261, 0xb40000d8,
+ 0x24000102, 0xc8006f65, 0x44000103, 0xa80102ff,
+ 0x24000102, 0x9801026c, 0x98010320, 0xa4000011,
+ 0x24000102, 0xc8006f73, 0x44000104, 0x94000039,
+ 0x24000102, 0xa8010201, 0x64000109, 0xb4000019,
+ 0x24000102, 0xc8006f6f, 0x44000103, 0x9400000b,
+ 0x24000102, 0x98010274, 0x54000105, 0xa400007c,
+ 0x24000102, 0x9801022c, 0x44000104, 0x9400004f,
+ 0x24000102, 0xa8010232, 0xc8006f00, 0xb40000c5,
+ 0x24000102, 0xc8006f69, 0x44000103, 0xa4000111,
+ 0x24000102, 0x9801026e, 0x54000020, 0xa400003d,
+ 0x24000102, 0x64000106, 0x44000104, 0x94000045,
+ 0x24000102, 0xa801022d, 0x6400010a, 0xb400009b,
+ 0x24000102, 0xc8006f72, 0x44000103, 0x94000028,
+ 0x24000102, 0x74000107, 0x54000105, 0xa40000e3,
+ 0x24000102, 0x98010264, 0x44000104, 0x94000080,
+ 0x24000102, 0xa8010266, 0xc8006f61, 0xb40000ec,
+ 0x24000102, 0xa8010365, 0x44000103, 0xa8010279,
+ 0x24000102, 0x74000068, 0x78010220, 0x940000fe,
+ 0x24000102, 0xa8010373, 0x44000104, 0x94000037,
+ 0x24000102, 0x74000108, 0x64000109, 0x3600000a,
+ 0x24000102, 0xa801036f, 0x44000103, 0x94000008,
+ 0x24000102, 0x74000070, 0x54000105, 0xa4000060,
+ 0x24000102, 0x7400000a, 0x44000104, 0x9400004d,
+ 0x24000102, 0xa8010231, 0xa8010300, 0xb40000b8,
+ 0x24000102, 0xa8010369, 0x44000103, 0x9400010f,
+ 0x24000102, 0x7400006d, 0xc8006d20, 0xa400002a,
+ 0x24000102, 0x64000106, 0x44000104, 0x94000043,
+ 0x24000102, 0xa801020d, 0x6400010a, 0xb4000093,
+ 0x24000102, 0xa8010372, 0x44000103, 0x94000022,
+ 0x24000102, 0x74000075, 0x54000105, 0xa40000c1,
+ 0x24000102, 0x74000063, 0x44000104, 0x94000076,
+ 0x24000102, 0xa8010262, 0xa8010361, 0xb40000e1,
+ 0x24000102, 0x64000065, 0x44000103, 0x9400010b,
+ 0x24000102, 0x7400006c, 0xa8002020, 0xa4000018,
+ 0x24000102, 0x64000073, 0x44000104, 0x9400003b,
+ 0x24000102, 0xa8010203, 0x64000109, 0xb400007d,
+ 0x24000102, 0x6400006f, 0x44000103, 0x9400000e,
+ 0x24000102, 0x74000074, 0x54000105, 0xa400008f,
+ 0x24000102, 0x7400002c, 0x44000104, 0x94000052,
+ 0x24000102, 0xa8010241, 0x64000000, 0xb40000ce,
+ 0x24000102, 0x64000069, 0x44000103, 0x94000002,
+ 0x24000102, 0x7400006e, 0x54000020, 0xa4000055,
+ 0x24000102, 0x64000106, 0x44000104, 0x94000047,
+ 0x24000102, 0xa801022e, 0x6400010a, 0xb40000a5,
+ 0x24000102, 0x64000072, 0x44000103, 0x94000033,
+ 0x24000102, 0x74000107, 0x54000105, 0xa40000f3,
+ 0x24000102, 0x74000064, 0x44000104, 0x940000c2,
+ 0x24000102, 0xa8010267, 0x64000061, 0xb40000f7,
+ 0x24000102, 0x88010265, 0x44000103, 0x84000077,
+ 0x24000102, 0xb8010468, 0x78010220, 0x940000f0,
+ 0x24000102, 0x88010273, 0x44000104, 0x94000036,
+ 0x24000102, 0x74000108, 0x64000109, 0xc400011e,
+ 0x24000102, 0x8801026f, 0x44000103, 0x94000007,
+ 0x24000102, 0xb8010470, 0x54000105, 0xa400005c,
+ 0x24000102, 0xb801040a, 0x44000104, 0x9400004c,
+ 0x24000102, 0x84000030, 0x88010200, 0xb40000b3,
+ 0x24000102, 0x88010269, 0x44000103, 0x9400010e,
+ 0x24000102, 0xb801046d, 0xb8010620, 0xa4000025,
+ 0x24000102, 0x64000106, 0x44000104, 0x94000040,
+ 0x24000102, 0x84000004, 0x6400010a, 0xb400008d,
+ 0x24000102, 0x88010272, 0x44000103, 0x9400001f,
+ 0x24000102, 0xb8010475, 0x54000105, 0xa40000b4,
+ 0x24000102, 0xb8010463, 0x44000104, 0x9400006b,
+ 0x24000102, 0x84000042, 0x88010261, 0xb40000dc,
+ 0x24000102, 0x64000065, 0x44000103, 0x840000ff,
+ 0x24000102, 0xb801046c, 0x98010420, 0xa4000013,
+ 0x24000102, 0x64000073, 0x44000104, 0x9400003a,
+ 0x24000102, 0x84000001, 0x64000109, 0xb400005a,
+ 0x24000102, 0x6400006f, 0x44000103, 0x9400000c,
+ 0x24000102, 0xb8010474, 0x54000105, 0xa400007f,
+ 0x24000102, 0xb801042c, 0x44000104, 0x94000050,
+ 0x24000102, 0x84000032, 0x64000000, 0xb40000ca,
+ 0x24000102, 0x64000069, 0x44000103, 0xa4000112,
+ 0x24000102, 0xb801046e, 0x54000020, 0xa400004b,
+ 0x24000102, 0x64000106, 0x44000104, 0x94000046,
+ 0x24000102, 0x8400002d, 0x6400010a, 0xb400009f,
+ 0x24000102, 0x64000072, 0x44000103, 0x94000029,
+ 0x24000102, 0x74000107, 0x54000105, 0xa40000e8,
+ 0x24000102, 0xb8010464, 0x44000104, 0x940000c0,
+ 0x24000102, 0x84000066, 0x64000061, 0xb40000f2,
+ 0x24000102, 0xb8010565, 0x44000103, 0x84000079,
+ 0x24000102, 0x74000068, 0x78010220, 0xb4000117,
+ 0x24000102, 0xb8010573, 0x44000104, 0x94000038,
+ 0x24000102, 0x74000108, 0x64000109, 0x3600001a,
+ 0x24000102, 0xb801056f, 0x44000103, 0x94000009,
+ 0x24000102, 0x74000070, 0x54000105, 0xa4000071,
+ 0x24000102, 0x7400000a, 0x44000104, 0x9400004e,
+ 0x24000102, 0x84000031, 0xb8010500, 0xb40000bc,
+ 0x24000102, 0xb8010569, 0x44000103, 0x94000110,
+ 0x24000102, 0x7400006d, 0x54000020, 0xa400002f,
+ 0x24000102, 0x64000106, 0x44000104, 0x94000044,
+ 0x24000102, 0x8400000d, 0x6400010a, 0xb4000097,
+ 0x24000102, 0xb8010572, 0x44000103, 0x94000027,
+ 0x24000102, 0x74000075, 0x54000105, 0xa40000cf,
+ 0x24000102, 0x74000063, 0x44000104, 0x94000078,
+ 0x24000102, 0x84000062, 0xb8010561, 0xb40000e6,
+ 0x24000102, 0x64000065, 0x44000103, 0x9400010c,
+ 0x24000102, 0x7400006c, 0xb8010a20, 0xa400001e,
+ 0x24000102, 0x64000073, 0x44000104, 0x9400003e,
+ 0x24000102, 0x84000003, 0x64000109, 0xb4000084,
+ 0x24000102, 0x6400006f, 0x44000103, 0x9400000f,
+ 0x24000102, 0x74000074, 0x54000105, 0xa40000a0,
+ 0x24000102, 0x7400002c, 0x44000104, 0x94000053,
+ 0x24000102, 0x84000041, 0x64000000, 0xb40000d4,
+ 0x24000102, 0x64000069, 0x44000103, 0x94000005,
+ 0x24000102, 0x7400006e, 0x54000020, 0xa4000057,
+ 0x24000102, 0x64000106, 0x44000104, 0x94000048,
+ 0x24000102, 0x8400002e, 0x6400010a, 0xb40000aa,
+ 0x24000102, 0x64000072, 0x44000103, 0x94000034,
+ 0x24000102, 0x74000107, 0x54000105, 0xa40000f9,
+ 0x24000102, 0x74000064, 0x44000104, 0x940000c3,
+ 0x24000102, 0x84000067, 0x64000061, 0xb4000200,
+ 0x24000102, 0x88010265, 0x44000103, 0x84000077,
+ 0x24000102, 0x98010268, 0x78010220, 0x940000e0,
+ 0x24000102, 0x88010273, 0x44000104, 0x94000035,
+ 0x24000102, 0x74000108, 0x64000109, 0xc400011a,
+ 0x24000102, 0x8801026f, 0x44000103, 0x94000006,
+ 0x24000102, 0x98010270, 0x54000105, 0xa400005b,
+ 0x24000102, 0x9801020a, 0x44000104, 0x94000049,
+ 0x24000102, 0x84000030, 0x88010200, 0xb40000b1,
+ 0x24000102, 0x88010269, 0x44000103, 0x9400010d,
+ 0x24000102, 0x9801026d, 0xb8007220, 0xa4000024,
+ 0x24000102, 0x64000106, 0x44000104, 0x9400003f,
+ 0x24000102, 0x84000004, 0x6400010a, 0xb400008a,
+ 0x24000102, 0x88010272, 0x44000103, 0x94000010,
+ 0x24000102, 0x98010275, 0x54000105, 0xa40000b0,
+ 0x24000102, 0x98010263, 0x44000104, 0x94000054,
+ 0x24000102, 0x84000042, 0x88010261, 0xb40000da,
+ 0x24000102, 0x64000065, 0x44000103, 0x840000ff,
+ 0x24000102, 0x9801026c, 0x98010320, 0xa4000012,
+ 0x24000102, 0x64000073, 0x44000104, 0x94000039,
+ 0x24000102, 0x84000001, 0x64000109, 0xb400001b,
+ 0x24000102, 0x6400006f, 0x44000103, 0x9400000b,
+ 0x24000102, 0x98010274, 0x54000105, 0xa400007e,
+ 0x24000102, 0x9801022c, 0x44000104, 0x9400004f,
+ 0x24000102, 0x84000032, 0x64000000, 0xb40000c8,
+ 0x24000102, 0x64000069, 0x44000103, 0xa4000113,
+ 0x24000102, 0x9801026e, 0x54000020, 0xa400004a,
+ 0x24000102, 0x64000106, 0x44000104, 0x94000045,
+ 0x24000102, 0x8400002d, 0x6400010a, 0xb400009d,
+ 0x24000102, 0x64000072, 0x44000103, 0x94000028,
+ 0x24000102, 0x74000107, 0x54000105, 0xa40000e7,
+ 0x24000102, 0x98010264, 0x44000104, 0x94000080,
+ 0x24000102, 0x84000066, 0x64000061, 0xb40000ee,
+ 0x24000102, 0xa8010465, 0x44000103, 0x84000079,
+ 0x24000102, 0x74000068, 0x78010220, 0x940000fe,
+ 0x24000102, 0xa8010473, 0x44000104, 0x94000037,
+ 0x24000102, 0x74000108, 0x64000109, 0x36000012,
+ 0x24000102, 0xa801046f, 0x44000103, 0x94000008,
+ 0x24000102, 0x74000070, 0x54000105, 0xa400006a,
+ 0x24000102, 0x7400000a, 0x44000104, 0x9400004d,
+ 0x24000102, 0x84000031, 0xa8010400, 0xb40000ba,
+ 0x24000102, 0xa8010469, 0x44000103, 0x9400010f,
+ 0x24000102, 0x7400006d, 0xc8007520, 0xa400002b,
+ 0x24000102, 0x64000106, 0x44000104, 0x94000043,
+ 0x24000102, 0x8400000d, 0x6400010a, 0xb4000095,
+ 0x24000102, 0xa8010472, 0x44000103, 0x94000022,
+ 0x24000102, 0x74000075, 0x54000105, 0xa40000c7,
+ 0x24000102, 0x74000063, 0x44000104, 0x94000076,
+ 0x24000102, 0x84000062, 0xa8010461, 0xb40000e4,
+ 0x24000102, 0x64000065, 0x44000103, 0x9400010b,
+ 0x24000102, 0x7400006c, 0xa8010520, 0xa400001c,
+ 0x24000102, 0x64000073, 0x44000104, 0x9400003b,
+ 0x24000102, 0x84000003, 0x64000109, 0xb4000082,
+ 0x24000102, 0x6400006f, 0x44000103, 0x9400000e,
+ 0x24000102, 0x74000074, 0x54000105, 0xa4000090,
+ 0x24000102, 0x7400002c, 0x44000104, 0x94000052,
+ 0x24000102, 0x84000041, 0x64000000, 0xb40000d2,
+ 0x24000102, 0x64000069, 0x44000103, 0x94000002,
+ 0x24000102, 0x7400006e, 0x54000020, 0xa4000056,
+ 0x24000102, 0x64000106, 0x44000104, 0x94000047,
+ 0x24000102, 0x8400002e, 0x6400010a, 0xb40000a8,
+ 0x24000102, 0x64000072, 0x44000103, 0x94000033,
+ 0x24000102, 0x74000107, 0x54000105, 0xa40000f8,
+ 0x24000102, 0x74000064, 0x44000104, 0x940000c2,
+ 0x24000102, 0x84000067, 0x64000061, 0xb40000fb,
+ 0x24000102, 0x88010265, 0x44000103, 0x84000077,
+ 0x24000102, 0x74000068, 0x78010220, 0x940000f0,
+ 0x24000102, 0x88010273, 0x44000104, 0x94000036,
+ 0x24000102, 0x74000108, 0x64000109, 0x36000002,
+ 0x24000102, 0x8801026f, 0x44000103, 0x94000007,
+ 0x24000102, 0x74000070, 0x54000105, 0xa400005f,
+ 0x24000102, 0x7400000a, 0x44000104, 0x9400004c,
+ 0x24000102, 0x84000030, 0x88010200, 0xb40000b6,
+ 0x24000102, 0x88010269, 0x44000103, 0x9400010e,
+ 0x24000102, 0x7400006d, 0xc8006320, 0xa4000026,
+ 0x24000102, 0x64000106, 0x44000104, 0x94000040,
+ 0x24000102, 0x84000004, 0x6400010a, 0xb4000091,
+ 0x24000102, 0x88010272, 0x44000103, 0x9400001f,
+ 0x24000102, 0x74000075, 0x54000105, 0xa40000bd,
+ 0x24000102, 0x74000063, 0x44000104, 0x9400006b,
+ 0x24000102, 0x84000042, 0x88010261, 0xb40000de,
+ 0x24000102, 0x64000065, 0x44000103, 0x840000ff,
+ 0x24000102, 0x7400006c, 0x98010420, 0xa4000014,
+ 0x24000102, 0x64000073, 0x44000104, 0x9400003a,
+ 0x24000102, 0x84000001, 0x64000109, 0xb400005e,
+ 0x24000102, 0x6400006f, 0x44000103, 0x9400000c,
+ 0x24000102, 0x74000074, 0x54000105, 0xa400008b,
+ 0x24000102, 0x7400002c, 0x44000104, 0x94000050,
+ 0x24000102, 0x84000032, 0x64000000, 0xb40000cc,
+ 0x24000102, 0x64000069, 0x44000103, 0xa4000114,
+ 0x24000102, 0x7400006e, 0x54000020, 0xa4000051,
+ 0x24000102, 0x64000106, 0x44000104, 0x94000046,
+ 0x24000102, 0x8400002d, 0x6400010a, 0xb40000a2,
+ 0x24000102, 0x64000072, 0x44000103, 0x94000029,
+ 0x24000102, 0x74000107, 0x54000105, 0xa40000f1,
+ 0x24000102, 0x74000064, 0x44000104, 0x940000c0,
+ 0x24000102, 0x84000066, 0x64000061, 0xb40000f5,
+ 0x24000102, 0xc8000065, 0x44000103, 0x84000079,
+ 0x24000102, 0x74000068, 0x78010220, 0xb4000118,
+ 0x24000102, 0xc8000073, 0x44000104, 0x94000038,
+ 0x24000102, 0x74000108, 0x64000109, 0xb4000016,
+ 0x24000102, 0xc800006f, 0x44000103, 0x94000009,
+ 0x24000102, 0x74000070, 0x54000105, 0xa400007a,
+ 0x24000102, 0x7400000a, 0x44000104, 0x9400004e,
+ 0x24000102, 0x84000031, 0xc8000000, 0xb40000bf,
+ 0x24000102, 0xc8000069, 0x44000103, 0x94000110,
+ 0x24000102, 0x7400006d, 0x54000020, 0xa400003c,
+ 0x24000102, 0x64000106, 0x44000104, 0x94000044,
+ 0x24000102, 0x8400000d, 0x6400010a, 0xb4000099,
+ 0x24000102, 0xc8000072, 0x44000103, 0x94000027,
+ 0x24000102, 0x74000075, 0x54000105, 0xa40000d0,
+ 0x24000102, 0x74000063, 0x44000104, 0x94000078,
+ 0x24000102, 0x84000062, 0xc8000061, 0xb40000ea,
+ 0x24000102, 0x64000065, 0x44000103, 0x9400010c,
+ 0x24000102, 0x7400006c, 0xb8006120, 0xa4000021,
+ 0x24000102, 0x64000073, 0x44000104, 0x9400003e,
+ 0x24000102, 0x84000003, 0x64000109, 0xb4000086,
+ 0x24000102, 0x6400006f, 0x44000103, 0x9400000f,
+ 0x24000102, 0x74000074, 0x54000105, 0xa40000a4,
+ 0x24000102, 0x7400002c, 0x44000104, 0x94000053,
+ 0x24000102, 0x84000041, 0x64000000, 0xb40000d6,
+ 0x24000102, 0x64000069, 0x44000103, 0x94000005,
+ 0x24000102, 0x7400006e, 0x54000020, 0xa4000058,
+ 0x24000102, 0x64000106, 0x44000104, 0x94000048,
+ 0x24000102, 0x8400002e, 0x6400010a, 0xb40000ac,
+ 0x24000102, 0x64000072, 0x44000103, 0x94000034,
+ 0x24000102, 0x74000107, 0x54000105, 0xa40000fc,
+ 0x24000102, 0x74000064, 0x44000104, 0x940000c3,
+ 0x24000102, 0x84000067, 0x64000061, 0x46000140,
+ 0x24000102, 0x88010265, 0x44000103, 0xa8010277,
+ 0x24000102, 0x98010268, 0x78010220, 0xb80102e0,
+ 0x24000102, 0x88010273, 0x44000104, 0xb8010235,
+ 0x24000102, 0x74000108, 0x64000109, 0xa40000fd,
+ 0x24000102, 0x8801026f, 0x44000103, 0xb8010206,
+ 0x24000102, 0x98010270, 0x54000105, 0xa4000059,
+ 0x24000102, 0x9801020a, 0x44000104, 0xb8010249,
+ 0x24000102, 0xa8010230, 0x88010200, 0xb40000ad,
+ 0x24000102, 0x88010269, 0x44000103, 0x9400010d,
+ 0x24000102, 0x9801026d, 0xb8006520, 0xa4000023,
+ 0x24000102, 0x64000106, 0x44000104, 0xb801023f,
+ 0x24000102, 0xa8010204, 0x6400010a, 0xb4000087,
+ 0x24000102, 0x88010272, 0x44000103, 0xb8010210,
+ 0x24000102, 0x98010275, 0x54000105, 0xa40000a6,
+ 0x24000102, 0x98010263, 0x44000104, 0xb8010254,
+ 0x24000102, 0xa8010242, 0x88010261, 0xb40000d7,
+ 0x24000102, 0xc8006965, 0x44000103, 0xa80102ff,
+ 0x24000102, 0x9801026c, 0x98010320, 0xa4000011,
+ 0x24000102, 0xc8006973, 0x44000104, 0xb8010239,
+ 0x24000102, 0xa8010201, 0x64000109, 0xb4000017,
+ 0x24000102, 0xc800696f, 0x44000103, 0xb801020b,
+ 0x24000102, 0x98010274, 0x54000105, 0xa400007c,
+ 0x24000102, 0x9801022c, 0x44000104, 0xb801024f,
+ 0x24000102, 0xa8010232, 0xc8006900, 0xb40000c4,
+ 0x24000102, 0xc8006969, 0x44000103, 0xa4000111,
+ 0x24000102, 0x9801026e, 0x54000020, 0xa400003d,
+ 0x24000102, 0x64000106, 0x44000104, 0xb8010245,
+ 0x24000102, 0xa801022d, 0x6400010a, 0xb400009a,
+ 0x24000102, 0xc8006972, 0x44000103, 0xb8010228,
+ 0x24000102, 0x74000107, 0x54000105, 0xa40000e3,
+ 0x24000102, 0x98010264, 0x44000104, 0xb8010280,
+ 0x24000102, 0xa8010266, 0xc8006961, 0xb40000eb,
+ 0x24000102, 0xa8010365, 0x44000103, 0xa8010279,
+ 0x24000102, 0x74000068, 0x78010220, 0xb80102fe,
+ 0x24000102, 0xa8010373, 0x44000104, 0xb8010237,
+ 0x24000102, 0x74000108, 0x64000109, 0x3600000c,
+ 0x24000102, 0xa801036f, 0x44000103, 0xb8010208,
+ 0x24000102, 0x74000070, 0x54000105, 0xa4000060,
+ 0x24000102, 0x7400000a, 0x44000104, 0xb801024d,
+ 0x24000102, 0xa8010231, 0xa8010300, 0xb40000b7,
+ 0x24000102, 0xa8010369, 0x44000103, 0x9400010f,
+ 0x24000102, 0x7400006d, 0xc8006c20, 0xa400002a,
+ 0x24000102, 0x64000106, 0x44000104, 0xb8010243,
+ 0x24000102, 0xa801020d, 0x6400010a, 0xb4000092,
+ 0x24000102, 0xa8010372, 0x44000103, 0xb8010222,
+ 0x24000102, 0x74000075, 0x54000105, 0xa40000c1,
+ 0x24000102, 0x74000063, 0x44000104, 0xb8010276,
+ 0x24000102, 0xa8010262, 0xa8010361, 0xb40000df,
+ 0x24000102, 0x64000065, 0x44000103, 0x9400010b,
+ 0x24000102, 0x7400006c, 0xa8002020, 0xa4000018,
+ 0x24000102, 0x64000073, 0x44000104, 0xb801023b,
+ 0x24000102, 0xa8010203, 0x64000109, 0xb400007b,
+ 0x24000102, 0x6400006f, 0x44000103, 0xb801020e,
+ 0x24000102, 0x74000074, 0x54000105, 0xa400008f,
+ 0x24000102, 0x7400002c, 0x44000104, 0xb8010252,
+ 0x24000102, 0xa8010241, 0x64000000, 0xb40000cd,
+ 0x24000102, 0x64000069, 0x44000103, 0xb8010202,
+ 0x24000102, 0x7400006e, 0x54000020, 0xa4000055,
+ 0x24000102, 0x64000106, 0x44000104, 0xb8010247,
+ 0x24000102, 0xa801022e, 0x6400010a, 0xb40000a3,
+ 0x24000102, 0x64000072, 0x44000103, 0xb8010233,
+ 0x24000102, 0x74000107, 0x54000105, 0xa40000f3,
+ 0x24000102, 0x74000064, 0x44000104, 0xb80102c2,
+ 0x24000102, 0xa8010267, 0x64000061, 0xb40000f6,
+ 0x24000102, 0x88010265, 0x44000103, 0x84000077,
+ 0x24000102, 0xb8010368, 0x78010220, 0xb80102f0,
+ 0x24000102, 0x88010273, 0x44000104, 0xb8010236,
+ 0x24000102, 0x74000108, 0x64000109, 0xc400011f,
+ 0x24000102, 0x8801026f, 0x44000103, 0xb8010207,
+ 0x24000102, 0xb8010370, 0x54000105, 0xa400005c,
+ 0x24000102, 0xb801030a, 0x44000104, 0xb801024c,
+ 0x24000102, 0x84000030, 0x88010200, 0xb40000b2,
+ 0x24000102, 0x88010269, 0x44000103, 0x9400010e,
+ 0x24000102, 0xb801036d, 0xb8007320, 0xa4000025,
+ 0x24000102, 0x64000106, 0x44000104, 0xb8010240,
+ 0x24000102, 0x84000004, 0x6400010a, 0xb400008c,
+ 0x24000102, 0x88010272, 0x44000103, 0xb801021f,
+ 0x24000102, 0xb8010375, 0x54000105, 0xa40000b4,
+ 0x24000102, 0xb8010363, 0x44000104, 0xb801026b,
+ 0x24000102, 0x84000042, 0x88010261, 0xb40000db,
+ 0x24000102, 0x64000065, 0x44000103, 0x840000ff,
+ 0x24000102, 0xb801036c, 0x98010420, 0xa4000013,
+ 0x24000102, 0x64000073, 0x44000104, 0xb801023a,
+ 0x24000102, 0x84000001, 0x64000109, 0xb400001d,
+ 0x24000102, 0x6400006f, 0x44000103, 0xb801020c,
+ 0x24000102, 0xb8010374, 0x54000105, 0xa400007f,
+ 0x24000102, 0xb801032c, 0x44000104, 0xb8010250,
+ 0x24000102, 0x84000032, 0x64000000, 0xb40000c9,
+ 0x24000102, 0x64000069, 0x44000103, 0xa4000112,
+ 0x24000102, 0xb801036e, 0x54000020, 0xa400004b,
+ 0x24000102, 0x64000106, 0x44000104, 0xb8010246,
+ 0x24000102, 0x8400002d, 0x6400010a, 0xb400009e,
+ 0x24000102, 0x64000072, 0x44000103, 0xb8010229,
+ 0x24000102, 0x74000107, 0x54000105, 0xa40000e8,
+ 0x24000102, 0xb8010364, 0x44000104, 0xb80102c0,
+ 0x24000102, 0x84000066, 0x64000061, 0xb40000ef,
+ 0x24000102, 0xb8002065, 0x44000103, 0x84000079,
+ 0x24000102, 0x74000068, 0x78010220, 0xb4000115,
+ 0x24000102, 0xb8002073, 0x44000104, 0xb8010238,
+ 0x24000102, 0x74000108, 0x64000109, 0x3600001c,
+ 0x24000102, 0xb800206f, 0x44000103, 0xb8010209,
+ 0x24000102, 0x74000070, 0x54000105, 0xa4000071,
+ 0x24000102, 0x7400000a, 0x44000104, 0xb801024e,
+ 0x24000102, 0x84000031, 0xb8002000, 0xb40000bb,
+ 0x24000102, 0xb8002069, 0x44000103, 0x94000110,
+ 0x24000102, 0x7400006d, 0x54000020, 0xa400002f,
+ 0x24000102, 0x64000106, 0x44000104, 0xb8010244,
+ 0x24000102, 0x8400000d, 0x6400010a, 0xb4000096,
+ 0x24000102, 0xb8002072, 0x44000103, 0xb8010227,
+ 0x24000102, 0x74000075, 0x54000105, 0xa40000cf,
+ 0x24000102, 0x74000063, 0x44000104, 0xb8010278,
+ 0x24000102, 0x84000062, 0xb8002061, 0xb40000e5,
+ 0x24000102, 0x64000065, 0x44000103, 0x9400010c,
+ 0x24000102, 0x7400006c, 0xb8010920, 0xa400001e,
+ 0x24000102, 0x64000073, 0x44000104, 0xb801023e,
+ 0x24000102, 0x84000003, 0x64000109, 0xb4000083,
+ 0x24000102, 0x6400006f, 0x44000103, 0xb801020f,
+ 0x24000102, 0x74000074, 0x54000105, 0xa40000a0,
+ 0x24000102, 0x7400002c, 0x44000104, 0xb8010253,
+ 0x24000102, 0x84000041, 0x64000000, 0xb40000d3,
+ 0x24000102, 0x64000069, 0x44000103, 0xb8010205,
+ 0x24000102, 0x7400006e, 0x54000020, 0xa4000057,
+ 0x24000102, 0x64000106, 0x44000104, 0xb8010248,
+ 0x24000102, 0x8400002e, 0x6400010a, 0xb40000a9,
+ 0x24000102, 0x64000072, 0x44000103, 0xb8010234,
+ 0x24000102, 0x74000107, 0x54000105, 0xa40000f9,
+ 0x24000102, 0x74000064, 0x44000104, 0xb80102c3,
+ 0x24000102, 0x84000067, 0x64000061, 0x3e000128,
+ 0x24000102, 0x88010265, 0x44000103, 0xc8010477,
+ 0x24000102, 0x98010268, 0x78010220, 0x940000e0,
+ 0x24000102, 0x88010273, 0x44000104, 0x94000035,
+ 0x24000102, 0x74000108, 0x64000109, 0xc400011b,
+ 0x24000102, 0x8801026f, 0x44000103, 0x94000006,
+ 0x24000102, 0x98010270, 0x54000105, 0xa400005b,
+ 0x24000102, 0x9801020a, 0x44000104, 0x94000049,
+ 0x24000102, 0xc8010430, 0x88010200, 0xb40000af,
+ 0x24000102, 0x88010269, 0x44000103, 0x9400010d,
+ 0x24000102, 0x9801026d, 0xb8006f20, 0xa4000024,
+ 0x24000102, 0x64000106, 0x44000104, 0x9400003f,
+ 0x24000102, 0xc8010404, 0x6400010a, 0xb4000089,
+ 0x24000102, 0x88010272, 0x44000103, 0x94000010,
+ 0x24000102, 0x98010275, 0x54000105, 0xa40000b0,
+ 0x24000102, 0x98010263, 0x44000104, 0x94000054,
+ 0x24000102, 0xc8010442, 0x88010261, 0xb40000d9,
+ 0x24000102, 0xc8010665, 0x44000103, 0xc80104ff,
+ 0x24000102, 0x9801026c, 0x98010320, 0xa4000012,
+ 0x24000102, 0xc8010673, 0x44000104, 0x94000039,
+ 0x24000102, 0xc8010401, 0x64000109, 0xb400001a,
+ 0x24000102, 0xc801066f, 0x44000103, 0x9400000b,
+ 0x24000102, 0x98010274, 0x54000105, 0xa400007e,
+ 0x24000102, 0x9801022c, 0x44000104, 0x9400004f,
+ 0x24000102, 0xc8010432, 0xc8010600, 0xb40000c6,
+ 0x24000102, 0xc8010669, 0x44000103, 0xa4000113,
+ 0x24000102, 0x9801026e, 0x54000020, 0xa400004a,
+ 0x24000102, 0x64000106, 0x44000104, 0x94000045,
+ 0x24000102, 0xc801042d, 0x6400010a, 0xb400009c,
+ 0x24000102, 0xc8010672, 0x44000103, 0x94000028,
+ 0x24000102, 0x74000107, 0x54000105, 0xa40000e7,
+ 0x24000102, 0x98010264, 0x44000104, 0x94000080,
+ 0x24000102, 0xc8010466, 0xc8010661, 0xb40000ed,
+ 0x24000102, 0xa8010465, 0x44000103, 0xc8010479,
+ 0x24000102, 0x74000068, 0x78010220, 0x940000fe,
+ 0x24000102, 0xa8010473, 0x44000104, 0x94000037,
+ 0x24000102, 0x74000108, 0x64000109, 0x36000014,
+ 0x24000102, 0xa801046f, 0x44000103, 0x94000008,
+ 0x24000102, 0x74000070, 0x54000105, 0xa400006a,
+ 0x24000102, 0x7400000a, 0x44000104, 0x9400004d,
+ 0x24000102, 0xc8010431, 0xa8010400, 0xb40000b9,
+ 0x24000102, 0xa8010469, 0x44000103, 0x9400010f,
+ 0x24000102, 0x7400006d, 0xc8007420, 0xa400002b,
+ 0x24000102, 0x64000106, 0x44000104, 0x94000043,
+ 0x24000102, 0xc801040d, 0x6400010a, 0xb4000094,
+ 0x24000102, 0xa8010472, 0x44000103, 0x94000022,
+ 0x24000102, 0x74000075, 0x54000105, 0xa40000c7,
+ 0x24000102, 0x74000063, 0x44000104, 0x94000076,
+ 0x24000102, 0xc8010462, 0xa8010461, 0xb40000e2,
+ 0x24000102, 0x64000065, 0x44000103, 0x9400010b,
+ 0x24000102, 0x7400006c, 0xa8010520, 0xa400001c,
+ 0x24000102, 0x64000073, 0x44000104, 0x9400003b,
+ 0x24000102, 0xc8010403, 0x64000109, 0xb4000081,
+ 0x24000102, 0x6400006f, 0x44000103, 0x9400000e,
+ 0x24000102, 0x74000074, 0x54000105, 0xa4000090,
+ 0x24000102, 0x7400002c, 0x44000104, 0x94000052,
+ 0x24000102, 0xc8010441, 0x64000000, 0xb40000d1,
+ 0x24000102, 0x64000069, 0x44000103, 0x94000002,
+ 0x24000102, 0x7400006e, 0x54000020, 0xa4000056,
+ 0x24000102, 0x64000106, 0x44000104, 0x94000047,
+ 0x24000102, 0xc801042e, 0x6400010a, 0xb40000a7,
+ 0x24000102, 0x64000072, 0x44000103, 0x94000033,
+ 0x24000102, 0x74000107, 0x54000105, 0xa40000f8,
+ 0x24000102, 0x74000064, 0x44000104, 0x940000c2,
+ 0x24000102, 0xc8010467, 0x64000061, 0xb40000fa,
+ 0x24000102, 0x88010265, 0x44000103, 0x84000077,
+ 0x24000102, 0xc8010568, 0x78010220, 0x940000f0,
+ 0x24000102, 0x88010273, 0x44000104, 0x94000036,
+ 0x24000102, 0x74000108, 0x64000109, 0x36000004,
+ 0x24000102, 0x8801026f, 0x44000103, 0x94000007,
+ 0x24000102, 0xc8010570, 0x54000105, 0xa400005f,
+ 0x24000102, 0xc801050a, 0x44000104, 0x9400004c,
+ 0x24000102, 0x84000030, 0x88010200, 0xb40000b5,
+ 0x24000102, 0x88010269, 0x44000103, 0x9400010e,
+ 0x24000102, 0xc801056d, 0xc8002c20, 0xa4000026,
+ 0x24000102, 0x64000106, 0x44000104, 0x94000040,
+ 0x24000102, 0x84000004, 0x6400010a, 0xb400008e,
+ 0x24000102, 0x88010272, 0x44000103, 0x9400001f,
+ 0x24000102, 0xc8010575, 0x54000105, 0xa40000bd,
+ 0x24000102, 0xc8010563, 0x44000104, 0x9400006b,
+ 0x24000102, 0x84000042, 0x88010261, 0xb40000dd,
+ 0x24000102, 0x64000065, 0x44000103, 0x840000ff,
+ 0x24000102, 0xc801056c, 0x98010420, 0xa4000014,
+ 0x24000102, 0x64000073, 0x44000104, 0x9400003a,
+ 0x24000102, 0x84000001, 0x64000109, 0xb400005d,
+ 0x24000102, 0x6400006f, 0x44000103, 0x9400000c,
+ 0x24000102, 0xc8010574, 0x54000105, 0xa400008b,
+ 0x24000102, 0xc801052c, 0x44000104, 0x94000050,
+ 0x24000102, 0x84000032, 0x64000000, 0xb40000cb,
+ 0x24000102, 0x64000069, 0x44000103, 0xa4000114,
+ 0x24000102, 0xc801056e, 0x54000020, 0xa4000051,
+ 0x24000102, 0x64000106, 0x44000104, 0x94000046,
+ 0x24000102, 0x8400002d, 0x6400010a, 0xb40000a1,
+ 0x24000102, 0x64000072, 0x44000103, 0x94000029,
+ 0x24000102, 0x74000107, 0x54000105, 0xa40000f1,
+ 0x24000102, 0xc8010564, 0x44000104, 0x940000c0,
+ 0x24000102, 0x84000066, 0x64000061, 0xb40000f4,
+ 0x24000102, 0xc8010a65, 0x44000103, 0x84000079,
+ 0x24000102, 0x74000068, 0x78010220, 0xb4000116,
+ 0x24000102, 0xc8010a73, 0x44000104, 0x94000038,
+ 0x24000102, 0x74000108, 0x64000109, 0xb4000015,
+ 0x24000102, 0xc8010a6f, 0x44000103, 0x94000009,
+ 0x24000102, 0x74000070, 0x54000105, 0xa400007a,
+ 0x24000102, 0x7400000a, 0x44000104, 0x9400004e,
+ 0x24000102, 0x84000031, 0xc8010a00, 0xb40000be,
+ 0x24000102, 0xc8010a69, 0x44000103, 0x94000110,
+ 0x24000102, 0x7400006d, 0x54000020, 0xa400003c,
+ 0x24000102, 0x64000106, 0x44000104, 0x94000044,
+ 0x24000102, 0x8400000d, 0x6400010a, 0xb4000098,
+ 0x24000102, 0xc8010a72, 0x44000103, 0x94000027,
+ 0x24000102, 0x74000075, 0x54000105, 0xa40000d0,
+ 0x24000102, 0x74000063, 0x44000104, 0x94000078,
+ 0x24000102, 0x84000062, 0xc8010a61, 0xb40000e9,
+ 0x24000102, 0x64000065, 0x44000103, 0x9400010c,
+ 0x24000102, 0x7400006c, 0xb8000020, 0xa4000021,
+ 0x24000102, 0x64000073, 0x44000104, 0x9400003e,
+ 0x24000102, 0x84000003, 0x64000109, 0xb4000085,
+ 0x24000102, 0x6400006f, 0x44000103, 0x9400000f,
+ 0x24000102, 0x74000074, 0x54000105, 0xa40000a4,
+ 0x24000102, 0x7400002c, 0x44000104, 0x94000053,
+ 0x24000102, 0x84000041, 0x64000000, 0xb40000d5,
+ 0x24000102, 0x64000069, 0x44000103, 0x94000005,
+ 0x24000102, 0x7400006e, 0x54000020, 0xa4000058,
+ 0x24000102, 0x64000106, 0x44000104, 0x94000048,
+ 0x24000102, 0x8400002e, 0x6400010a, 0xb40000ab,
+ 0x24000102, 0x64000072, 0x44000103, 0x94000034,
+ 0x24000102, 0x74000107, 0x54000105, 0xa40000fc,
+ 0x24000102, 0x74000064, 0x44000104, 0x940000c3,
+ 0x24000102, 0x84000067, 0x64000061, 0x46000160,
+ 0x24000102, 0x88010265, 0x44000103, 0xa8010277,
+ 0x24000102, 0x98010268, 0x78010220, 0x940000e0,
+ 0x24000102, 0x88010273, 0x44000104, 0x94000035,
+ 0x24000102, 0x74000108, 0x64000109, 0xa40000fd,
+ 0x24000102, 0x8801026f, 0x44000103, 0x94000006,
+ 0x24000102, 0x98010270, 0x54000105, 0xa4000059,
+ 0x24000102, 0x9801020a, 0x44000104, 0x94000049,
+ 0x24000102, 0xa8010230, 0x88010200, 0xb40000ae,
+ 0x24000102, 0x88010269, 0x44000103, 0x9400010d,
+ 0x24000102, 0x9801026d, 0xb8006920, 0xa4000023,
+ 0x24000102, 0x64000106, 0x44000104, 0x9400003f,
+ 0x24000102, 0xa8010204, 0x6400010a, 0xb4000088,
+ 0x24000102, 0x88010272, 0x44000103, 0x94000010,
+ 0x24000102, 0x98010275, 0x54000105, 0xa40000a6,
+ 0x24000102, 0x98010263, 0x44000104, 0x94000054,
+ 0x24000102, 0xa8010242, 0x88010261, 0xb40000d8,
+ 0x24000102, 0xc8007265, 0x44000103, 0xa80102ff,
+ 0x24000102, 0x9801026c, 0x98010320, 0xa4000011,
+ 0x24000102, 0xc8007273, 0x44000104, 0x94000039,
+ 0x24000102, 0xa8010201, 0x64000109, 0xb4000019,
+ 0x24000102, 0xc800726f, 0x44000103, 0x9400000b,
+ 0x24000102, 0x98010274, 0x54000105, 0xa400007c,
+ 0x24000102, 0x9801022c, 0x44000104, 0x9400004f,
+ 0x24000102, 0xa8010232, 0xc8007200, 0xb40000c5,
+ 0x24000102, 0xc8007269, 0x44000103, 0xa4000111,
+ 0x24000102, 0x9801026e, 0x54000020, 0xa400003d,
+ 0x24000102, 0x64000106, 0x44000104, 0x94000045,
+ 0x24000102, 0xa801022d, 0x6400010a, 0xb400009b,
+ 0x24000102, 0xc8007272, 0x44000103, 0x94000028,
+ 0x24000102, 0x74000107, 0x54000105, 0xa40000e3,
+ 0x24000102, 0x98010264, 0x44000104, 0x94000080,
+ 0x24000102, 0xa8010266, 0xc8007261, 0xb40000ec,
+ 0x24000102, 0xa8010365, 0x44000103, 0xa8010279,
+ 0x24000102, 0x74000068, 0x78010220, 0x940000fe,
+ 0x24000102, 0xa8010373, 0x44000104, 0x94000037,
+ 0x24000102, 0x74000108, 0x64000109, 0x3600000e,
+ 0x24000102, 0xa801036f, 0x44000103, 0x94000008,
+ 0x24000102, 0x74000070, 0x54000105, 0xa4000060,
+ 0x24000102, 0x7400000a, 0x44000104, 0x9400004d,
+ 0x24000102, 0xa8010231, 0xa8010300, 0xb40000b8,
+ 0x24000102, 0xa8010369, 0x44000103, 0x9400010f,
+ 0x24000102, 0x7400006d, 0xc8006e20, 0xa400002a,
+ 0x24000102, 0x64000106, 0x44000104, 0x94000043,
+ 0x24000102, 0xa801020d, 0x6400010a, 0xb4000093,
+ 0x24000102, 0xa8010372, 0x44000103, 0x94000022,
+ 0x24000102, 0x74000075, 0x54000105, 0xa40000c1,
+ 0x24000102, 0x74000063, 0x44000104, 0x94000076,
+ 0x24000102, 0xa8010262, 0xa8010361, 0xb40000e1,
+ 0x24000102, 0x64000065, 0x44000103, 0x9400010b,
+ 0x24000102, 0x7400006c, 0xa8002020, 0xa4000018,
+ 0x24000102, 0x64000073, 0x44000104, 0x9400003b,
+ 0x24000102, 0xa8010203, 0x64000109, 0xb400007d,
+ 0x24000102, 0x6400006f, 0x44000103, 0x9400000e,
+ 0x24000102, 0x74000074, 0x54000105, 0xa400008f,
+ 0x24000102, 0x7400002c, 0x44000104, 0x94000052,
+ 0x24000102, 0xa8010241, 0x64000000, 0xb40000ce,
+ 0x24000102, 0x64000069, 0x44000103, 0x94000002,
+ 0x24000102, 0x7400006e, 0x54000020, 0xa4000055,
+ 0x24000102, 0x64000106, 0x44000104, 0x94000047,
+ 0x24000102, 0xa801022e, 0x6400010a, 0xb40000a5,
+ 0x24000102, 0x64000072, 0x44000103, 0x94000033,
+ 0x24000102, 0x74000107, 0x54000105, 0xa40000f3,
+ 0x24000102, 0x74000064, 0x44000104, 0x940000c2,
+ 0x24000102, 0xa8010267, 0x64000061, 0xb40000f7,
+ 0x24000102, 0x88010265, 0x44000103, 0x84000077,
+ 0x24000102, 0xb8010468, 0x78010220, 0x940000f0,
+ 0x24000102, 0x88010273, 0x44000104, 0x94000036,
+ 0x24000102, 0x74000108, 0x64000109, 0xc4000120,
+ 0x24000102, 0x8801026f, 0x44000103, 0x94000007,
+ 0x24000102, 0xb8010470, 0x54000105, 0xa400005c,
+ 0x24000102, 0xb801040a, 0x44000104, 0x9400004c,
+ 0x24000102, 0x84000030, 0x88010200, 0xb40000b3,
+ 0x24000102, 0x88010269, 0x44000103, 0x9400010e,
+ 0x24000102, 0xb801046d, 0xb8010620, 0xa4000025,
+ 0x24000102, 0x64000106, 0x44000104, 0x94000040,
+ 0x24000102, 0x84000004, 0x6400010a, 0xb400008d,
+ 0x24000102, 0x88010272, 0x44000103, 0x9400001f,
+ 0x24000102, 0xb8010475, 0x54000105, 0xa40000b4,
+ 0x24000102, 0xb8010463, 0x44000104, 0x9400006b,
+ 0x24000102, 0x84000042, 0x88010261, 0xb40000dc,
+ 0x24000102, 0x64000065, 0x44000103, 0x840000ff,
+ 0x24000102, 0xb801046c, 0x98010420, 0xa4000013,
+ 0x24000102, 0x64000073, 0x44000104, 0x9400003a,
+ 0x24000102, 0x84000001, 0x64000109, 0xb400005a,
+ 0x24000102, 0x6400006f, 0x44000103, 0x9400000c,
+ 0x24000102, 0xb8010474, 0x54000105, 0xa400007f,
+ 0x24000102, 0xb801042c, 0x44000104, 0x94000050,
+ 0x24000102, 0x84000032, 0x64000000, 0xb40000ca,
+ 0x24000102, 0x64000069, 0x44000103, 0xa4000112,
+ 0x24000102, 0xb801046e, 0x54000020, 0xa400004b,
+ 0x24000102, 0x64000106, 0x44000104, 0x94000046,
+ 0x24000102, 0x8400002d, 0x6400010a, 0xb400009f,
+ 0x24000102, 0x64000072, 0x44000103, 0x94000029,
+ 0x24000102, 0x74000107, 0x54000105, 0xa40000e8,
+ 0x24000102, 0xb8010464, 0x44000104, 0x940000c0,
+ 0x24000102, 0x84000066, 0x64000061, 0xb40000f2,
+ 0x24000102, 0xb8010565, 0x44000103, 0x84000079,
+ 0x24000102, 0x74000068, 0x78010220, 0xb4000117,
+ 0x24000102, 0xb8010573, 0x44000104, 0x94000038,
+ 0x24000102, 0x74000108, 0x64000109, 0x3600001e,
+ 0x24000102, 0xb801056f, 0x44000103, 0x94000009,
+ 0x24000102, 0x74000070, 0x54000105, 0xa4000071,
+ 0x24000102, 0x7400000a, 0x44000104, 0x9400004e,
+ 0x24000102, 0x84000031, 0xb8010500, 0xb40000bc,
+ 0x24000102, 0xb8010569, 0x44000103, 0x94000110,
+ 0x24000102, 0x7400006d, 0x54000020, 0xa400002f,
+ 0x24000102, 0x64000106, 0x44000104, 0x94000044,
+ 0x24000102, 0x8400000d, 0x6400010a, 0xb4000097,
+ 0x24000102, 0xb8010572, 0x44000103, 0x94000027,
+ 0x24000102, 0x74000075, 0x54000105, 0xa40000cf,
+ 0x24000102, 0x74000063, 0x44000104, 0x94000078,
+ 0x24000102, 0x84000062, 0xb8010561, 0xb40000e6,
+ 0x24000102, 0x64000065, 0x44000103, 0x9400010c,
+ 0x24000102, 0x7400006c, 0xb8010a20, 0xa400001e,
+ 0x24000102, 0x64000073, 0x44000104, 0x9400003e,
+ 0x24000102, 0x84000003, 0x64000109, 0xb4000084,
+ 0x24000102, 0x6400006f, 0x44000103, 0x9400000f,
+ 0x24000102, 0x74000074, 0x54000105, 0xa40000a0,
+ 0x24000102, 0x7400002c, 0x44000104, 0x94000053,
+ 0x24000102, 0x84000041, 0x64000000, 0xb40000d4,
+ 0x24000102, 0x64000069, 0x44000103, 0x94000005,
+ 0x24000102, 0x7400006e, 0x54000020, 0xa4000057,
+ 0x24000102, 0x64000106, 0x44000104, 0x94000048,
+ 0x24000102, 0x8400002e, 0x6400010a, 0xb40000aa,
+ 0x24000102, 0x64000072, 0x44000103, 0x94000034,
+ 0x24000102, 0x74000107, 0x54000105, 0xa40000f9,
+ 0x24000102, 0x74000064, 0x44000104, 0x940000c3,
+ 0x24000102, 0x84000067, 0x64000061, 0xb4000200,
+ 0x24000102, 0x88010265, 0x44000103, 0x84000077,
+ 0x24000102, 0x98010268, 0x78010220, 0x940000e0,
+ 0x24000102, 0x88010273, 0x44000104, 0x94000035,
+ 0x24000102, 0x74000108, 0x64000109, 0xc400011c,
+ 0x24000102, 0x8801026f, 0x44000103, 0x94000006,
+ 0x24000102, 0x98010270, 0x54000105, 0xa400005b,
+ 0x24000102, 0x9801020a, 0x44000104, 0x94000049,
+ 0x24000102, 0x84000030, 0x88010200, 0xb40000b1,
+ 0x24000102, 0x88010269, 0x44000103, 0x9400010d,
+ 0x24000102, 0x9801026d, 0xb8007220, 0xa4000024,
+ 0x24000102, 0x64000106, 0x44000104, 0x9400003f,
+ 0x24000102, 0x84000004, 0x6400010a, 0xb400008a,
+ 0x24000102, 0x88010272, 0x44000103, 0x94000010,
+ 0x24000102, 0x98010275, 0x54000105, 0xa40000b0,
+ 0x24000102, 0x98010263, 0x44000104, 0x94000054,
+ 0x24000102, 0x84000042, 0x88010261, 0xb40000da,
+ 0x24000102, 0x64000065, 0x44000103, 0x840000ff,
+ 0x24000102, 0x9801026c, 0x98010320, 0xa4000012,
+ 0x24000102, 0x64000073, 0x44000104, 0x94000039,
+ 0x24000102, 0x84000001, 0x64000109, 0xb400001b,
+ 0x24000102, 0x6400006f, 0x44000103, 0x9400000b,
+ 0x24000102, 0x98010274, 0x54000105, 0xa400007e,
+ 0x24000102, 0x9801022c, 0x44000104, 0x9400004f,
+ 0x24000102, 0x84000032, 0x64000000, 0xb40000c8,
+ 0x24000102, 0x64000069, 0x44000103, 0xa4000113,
+ 0x24000102, 0x9801026e, 0x54000020, 0xa400004a,
+ 0x24000102, 0x64000106, 0x44000104, 0x94000045,
+ 0x24000102, 0x8400002d, 0x6400010a, 0xb400009d,
+ 0x24000102, 0x64000072, 0x44000103, 0x94000028,
+ 0x24000102, 0x74000107, 0x54000105, 0xa40000e7,
+ 0x24000102, 0x98010264, 0x44000104, 0x94000080,
+ 0x24000102, 0x84000066, 0x64000061, 0xb40000ee,
+ 0x24000102, 0xa8010465, 0x44000103, 0x84000079,
+ 0x24000102, 0x74000068, 0x78010220, 0x940000fe,
+ 0x24000102, 0xa8010473, 0x44000104, 0x94000037,
+ 0x24000102, 0x74000108, 0x64000109, 0x36000016,
+ 0x24000102, 0xa801046f, 0x44000103, 0x94000008,
+ 0x24000102, 0x74000070, 0x54000105, 0xa400006a,
+ 0x24000102, 0x7400000a, 0x44000104, 0x9400004d,
+ 0x24000102, 0x84000031, 0xa8010400, 0xb40000ba,
+ 0x24000102, 0xa8010469, 0x44000103, 0x9400010f,
+ 0x24000102, 0x7400006d, 0xc8010720, 0xa400002b,
+ 0x24000102, 0x64000106, 0x44000104, 0x94000043,
+ 0x24000102, 0x8400000d, 0x6400010a, 0xb4000095,
+ 0x24000102, 0xa8010472, 0x44000103, 0x94000022,
+ 0x24000102, 0x74000075, 0x54000105, 0xa40000c7,
+ 0x24000102, 0x74000063, 0x44000104, 0x94000076,
+ 0x24000102, 0x84000062, 0xa8010461, 0xb40000e4,
+ 0x24000102, 0x64000065, 0x44000103, 0x9400010b,
+ 0x24000102, 0x7400006c, 0xa8010520, 0xa400001c,
+ 0x24000102, 0x64000073, 0x44000104, 0x9400003b,
+ 0x24000102, 0x84000003, 0x64000109, 0xb4000082,
+ 0x24000102, 0x6400006f, 0x44000103, 0x9400000e,
+ 0x24000102, 0x74000074, 0x54000105, 0xa4000090,
+ 0x24000102, 0x7400002c, 0x44000104, 0x94000052,
+ 0x24000102, 0x84000041, 0x64000000, 0xb40000d2,
+ 0x24000102, 0x64000069, 0x44000103, 0x94000002,
+ 0x24000102, 0x7400006e, 0x54000020, 0xa4000056,
+ 0x24000102, 0x64000106, 0x44000104, 0x94000047,
+ 0x24000102, 0x8400002e, 0x6400010a, 0xb40000a8,
+ 0x24000102, 0x64000072, 0x44000103, 0x94000033,
+ 0x24000102, 0x74000107, 0x54000105, 0xa40000f8,
+ 0x24000102, 0x74000064, 0x44000104, 0x940000c2,
+ 0x24000102, 0x84000067, 0x64000061, 0xb40000fb,
+ 0x24000102, 0x88010265, 0x44000103, 0x84000077,
+ 0x24000102, 0x74000068, 0x78010220, 0x940000f0,
+ 0x24000102, 0x88010273, 0x44000104, 0x94000036,
+ 0x24000102, 0x74000108, 0x64000109, 0x36000006,
+ 0x24000102, 0x8801026f, 0x44000103, 0x94000007,
+ 0x24000102, 0x74000070, 0x54000105, 0xa400005f,
+ 0x24000102, 0x7400000a, 0x44000104, 0x9400004c,
+ 0x24000102, 0x84000030, 0x88010200, 0xb40000b6,
+ 0x24000102, 0x88010269, 0x44000103, 0x9400010e,
+ 0x24000102, 0x7400006d, 0xc8006420, 0xa4000026,
+ 0x24000102, 0x64000106, 0x44000104, 0x94000040,
+ 0x24000102, 0x84000004, 0x6400010a, 0xb4000091,
+ 0x24000102, 0x88010272, 0x44000103, 0x9400001f,
+ 0x24000102, 0x74000075, 0x54000105, 0xa40000bd,
+ 0x24000102, 0x74000063, 0x44000104, 0x9400006b,
+ 0x24000102, 0x84000042, 0x88010261, 0xb40000de,
+ 0x24000102, 0x64000065, 0x44000103, 0x840000ff,
+ 0x24000102, 0x7400006c, 0x98010420, 0xa4000014,
+ 0x24000102, 0x64000073, 0x44000104, 0x9400003a,
+ 0x24000102, 0x84000001, 0x64000109, 0xb400005e,
+ 0x24000102, 0x6400006f, 0x44000103, 0x9400000c,
+ 0x24000102, 0x74000074, 0x54000105, 0xa400008b,
+ 0x24000102, 0x7400002c, 0x44000104, 0x94000050,
+ 0x24000102, 0x84000032, 0x64000000, 0xb40000cc,
+ 0x24000102, 0x64000069, 0x44000103, 0xa4000114,
+ 0x24000102, 0x7400006e, 0x54000020, 0xa4000051,
+ 0x24000102, 0x64000106, 0x44000104, 0x94000046,
+ 0x24000102, 0x8400002d, 0x6400010a, 0xb40000a2,
+ 0x24000102, 0x64000072, 0x44000103, 0x94000029,
+ 0x24000102, 0x74000107, 0x54000105, 0xa40000f1,
+ 0x24000102, 0x74000064, 0x44000104, 0x940000c0,
+ 0x24000102, 0x84000066, 0x64000061, 0xb40000f5,
+ 0x24000102, 0xc8006165, 0x44000103, 0x84000079,
+ 0x24000102, 0x74000068, 0x78010220, 0xb4000118,
+ 0x24000102, 0xc8006173, 0x44000104, 0x94000038,
+ 0x24000102, 0x74000108, 0x64000109, 0xb4000016,
+ 0x24000102, 0xc800616f, 0x44000103, 0x94000009,
+ 0x24000102, 0x74000070, 0x54000105, 0xa400007a,
+ 0x24000102, 0x7400000a, 0x44000104, 0x9400004e,
+ 0x24000102, 0x84000031, 0xc8006100, 0xb40000bf,
+ 0x24000102, 0xc8006169, 0x44000103, 0x94000110,
+ 0x24000102, 0x7400006d, 0x54000020, 0xa400003c,
+ 0x24000102, 0x64000106, 0x44000104, 0x94000044,
+ 0x24000102, 0x8400000d, 0x6400010a, 0xb4000099,
+ 0x24000102, 0xc8006172, 0x44000103, 0x94000027,
+ 0x24000102, 0x74000075, 0x54000105, 0xa40000d0,
+ 0x24000102, 0x74000063, 0x44000104, 0x94000078,
+ 0x24000102, 0x84000062, 0xc8006161, 0xb40000ea,
+ 0x24000102, 0x64000065, 0x44000103, 0x9400010c,
+ 0x24000102, 0x7400006c, 0xb8006120, 0xa4000021,
+ 0x24000102, 0x64000073, 0x44000104, 0x9400003e,
+ 0x24000102, 0x84000003, 0x64000109, 0xb4000086,
+ 0x24000102, 0x6400006f, 0x44000103, 0x9400000f,
+ 0x24000102, 0x74000074, 0x54000105, 0xa40000a4,
+ 0x24000102, 0x7400002c, 0x44000104, 0x94000053,
+ 0x24000102, 0x84000041, 0x64000000, 0xb40000d6,
+ 0x24000102, 0x64000069, 0x44000103, 0x94000005,
+ 0x24000102, 0x7400006e, 0x54000020, 0xa4000058,
+ 0x24000102, 0x64000106, 0x44000104, 0x94000048,
+ 0x24000102, 0x8400002e, 0x6400010a, 0xb40000ac,
+ 0x24000102, 0x64000072, 0x44000103, 0x94000034,
+ 0x24000102, 0x74000107, 0x54000105, 0xa40000fc,
+ 0x24000102, 0x74000064, 0x44000104, 0x940000c3,
+ 0x24000102, 0x84000067, 0x64000061, 0x52000020 },
+
+ .long_code_lookup = {
+ 0x3521, 0x3525, 0x3522, 0x3526, 0x3523, 0x3527, 0x3524, 0x3528,
+ 0x3529, 0x352d, 0x352a, 0x352e, 0x352b, 0x352f, 0x352c, 0x3530,
+ 0x3531, 0x3535, 0x3532, 0x3536, 0x3533, 0x3537, 0x3534, 0x3538,
+ 0x3539, 0x353d, 0x353a, 0x353e, 0x353b, 0x353f, 0x353c, 0x3540,
+ 0x49a1, 0x3d00, 0x49a2, 0x51c1, 0x49a3, 0x3d01, 0x49a4, 0x51e1,
+ 0x49a5, 0x3d00, 0x49a6, 0x51c2, 0x49a7, 0x3d01, 0x49a8, 0x51e2,
+ 0x49a9, 0x3d00, 0x49aa, 0x51c3, 0x49ab, 0x3d01, 0x49ac, 0x51e3,
+ 0x49ad, 0x3d00, 0x49ae, 0x51c4, 0x49af, 0x3d01, 0x49b0, 0x51e4,
+ 0x49b1, 0x3d00, 0x49b2, 0x51c5, 0x49b3, 0x3d01, 0x49b4, 0x51e5,
+ 0x49b5, 0x3d00, 0x49b6, 0x51c6, 0x49b7, 0x3d01, 0x49b8, 0x51e6,
+ 0x49b9, 0x3d00, 0x49ba, 0x51c7, 0x49bb, 0x3d01, 0x49bc, 0x51e7,
+ 0x49bd, 0x3d00, 0x49be, 0x51c8, 0x49bf, 0x3d01, 0x49c0, 0x51e8,
+ 0x49a1, 0x3d00, 0x49a2, 0x51c9, 0x49a3, 0x3d01, 0x49a4, 0x51e9,
+ 0x49a5, 0x3d00, 0x49a6, 0x51ca, 0x49a7, 0x3d01, 0x49a8, 0x51ea,
+ 0x49a9, 0x3d00, 0x49aa, 0x51cb, 0x49ab, 0x3d01, 0x49ac, 0x51eb,
+ 0x49ad, 0x3d00, 0x49ae, 0x51cc, 0x49af, 0x3d01, 0x49b0, 0x51ec,
+ 0x49b1, 0x3d00, 0x49b2, 0x51cd, 0x49b3, 0x3d01, 0x49b4, 0x51ed,
+ 0x49b5, 0x3d00, 0x49b6, 0x51ce, 0x49b7, 0x3d01, 0x49b8, 0x51ee,
+ 0x49b9, 0x3d00, 0x49ba, 0x51cf, 0x49bb, 0x3d01, 0x49bc, 0x51ef,
+ 0x49bd, 0x3d00, 0x49be, 0x51d0, 0x49bf, 0x3d01, 0x49c0, 0x51f0,
+ 0x49a1, 0x3d00, 0x49a2, 0x51d1, 0x49a3, 0x3d01, 0x49a4, 0x51f1,
+ 0x49a5, 0x3d00, 0x49a6, 0x51d2, 0x49a7, 0x3d01, 0x49a8, 0x51f2,
+ 0x49a9, 0x3d00, 0x49aa, 0x51d3, 0x49ab, 0x3d01, 0x49ac, 0x51f3,
+ 0x49ad, 0x3d00, 0x49ae, 0x51d4, 0x49af, 0x3d01, 0x49b0, 0x51f4,
+ 0x49b1, 0x3d00, 0x49b2, 0x51d5, 0x49b3, 0x3d01, 0x49b4, 0x51f5,
+ 0x49b5, 0x3d00, 0x49b6, 0x51d6, 0x49b7, 0x3d01, 0x49b8, 0x51f6,
+ 0x49b9, 0x3d00, 0x49ba, 0x51d7, 0x49bb, 0x3d01, 0x49bc, 0x51f7,
+ 0x49bd, 0x3d00, 0x49be, 0x51d8, 0x49bf, 0x3d01, 0x49c0, 0x51f8,
+ 0x49a1, 0x3d00, 0x49a2, 0x51d9, 0x49a3, 0x3d01, 0x49a4, 0x51f9,
+ 0x49a5, 0x3d00, 0x49a6, 0x51da, 0x49a7, 0x3d01, 0x49a8, 0x51fa,
+ 0x49a9, 0x3d00, 0x49aa, 0x51db, 0x49ab, 0x3d01, 0x49ac, 0x51fb,
+ 0x49ad, 0x3d00, 0x49ae, 0x51dc, 0x49af, 0x3d01, 0x49b0, 0x51fc,
+ 0x49b1, 0x3d00, 0x49b2, 0x51dd, 0x49b3, 0x3d01, 0x49b4, 0x51fd,
+ 0x49b5, 0x3d00, 0x49b6, 0x51de, 0x49b7, 0x3d01, 0x49b8, 0x51fe,
+ 0x49b9, 0x3d00, 0x49ba, 0x51df, 0x49bb, 0x3d01, 0x49bc, 0x51ff,
+ 0x49bd, 0x3d00, 0x49be, 0x51e0, 0x49bf, 0x3d01, 0x49c0, 0x5200,
+ 0x3d41, 0x3d43, 0x3d45, 0x3d47, 0x3d49, 0x3d4b, 0x3d4d, 0x3d4f,
+ 0x3d42, 0x3d44, 0x3d46, 0x3d48, 0x3d4a, 0x3d4c, 0x3d4e, 0x3d50,
+ 0x4151, 0x4152, 0x4153, 0x4154, 0x4155, 0x4156, 0x4157, 0x4158,
+ 0x4159, 0x415a, 0x415b, 0x415c, 0x415d, 0x415e, 0x415f, 0x4160,
+ 0x4561, 0x4571, 0x4562, 0x4572, 0x4563, 0x4573, 0x4564, 0x4574,
+ 0x4565, 0x4575, 0x4566, 0x4576, 0x4567, 0x4577, 0x4568, 0x4578,
+ 0x4569, 0x4579, 0x456a, 0x457a, 0x456b, 0x457b, 0x456c, 0x457c,
+ 0x456d, 0x457d, 0x456e, 0x457e, 0x456f, 0x457f, 0x4570, 0x4580,
+ 0x4581, 0x4582, 0x4583, 0x4584, 0x4585, 0x4586, 0x4587, 0x4588,
+ 0x4589, 0x458a, 0x458b, 0x458c, 0x458d, 0x458e, 0x458f, 0x4590,
+ 0x4591, 0x4592, 0x4593, 0x4594, 0x4595, 0x4596, 0x4597, 0x4598,
+ 0x4599, 0x459a, 0x459b, 0x459c, 0x459d, 0x459e, 0x459f, 0x45a0,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 }
+};
+
+struct inflate_huff_code_small pregen_dist_huff_code = {
+ .short_code_lookup = {
+ 0x1956, 0x219a, 0x20f0, 0x2935, 0x20ce, 0x28ac, 0x2134, 0x29bc,
+ 0x1956, 0x288a, 0x2112, 0x2979, 0x20cf, 0x28f1, 0x2178, 0x3069,
+ 0x1956, 0x219a, 0x20f0, 0x2957, 0x20ce, 0x28ad, 0x2134, 0x3047,
+ 0x1956, 0x288b, 0x2112, 0x299b, 0x20cf, 0x2913, 0x2178, 0x3825,
+ 0x1956, 0x219a, 0x20f0, 0x2935, 0x20ce, 0x28ac, 0x2134, 0x29bc,
+ 0x1956, 0x288a, 0x2112, 0x2979, 0x20cf, 0x28f1, 0x2178, 0x31bd,
+ 0x1956, 0x219a, 0x20f0, 0x2957, 0x20ce, 0x28ad, 0x2134, 0x3068,
+ 0x1956, 0x288b, 0x2112, 0x299b, 0x20cf, 0x2913, 0x2178, 0x4000,
+ 0x1956, 0x219a, 0x20f0, 0x2935, 0x20ce, 0x28ac, 0x2134, 0x29bc,
+ 0x1956, 0x288a, 0x2112, 0x2979, 0x20cf, 0x28f1, 0x2178, 0x3069,
+ 0x1956, 0x219a, 0x20f0, 0x2957, 0x20ce, 0x28ad, 0x2134, 0x3047,
+ 0x1956, 0x288b, 0x2112, 0x299b, 0x20cf, 0x2913, 0x2178, 0x3846,
+ 0x1956, 0x219a, 0x20f0, 0x2935, 0x20ce, 0x28ac, 0x2134, 0x29bc,
+ 0x1956, 0x288a, 0x2112, 0x2979, 0x20cf, 0x28f1, 0x2178, 0x31bd,
+ 0x1956, 0x219a, 0x20f0, 0x2957, 0x20ce, 0x28ad, 0x2134, 0x3068,
+ 0x1956, 0x288b, 0x2112, 0x299b, 0x20cf, 0x2913, 0x2178, 0x4024,
+ 0x1956, 0x219a, 0x20f0, 0x2935, 0x20ce, 0x28ac, 0x2134, 0x29bc,
+ 0x1956, 0x288a, 0x2112, 0x2979, 0x20cf, 0x28f1, 0x2178, 0x3069,
+ 0x1956, 0x219a, 0x20f0, 0x2957, 0x20ce, 0x28ad, 0x2134, 0x3047,
+ 0x1956, 0x288b, 0x2112, 0x299b, 0x20cf, 0x2913, 0x2178, 0x3825,
+ 0x1956, 0x219a, 0x20f0, 0x2935, 0x20ce, 0x28ac, 0x2134, 0x29bc,
+ 0x1956, 0x288a, 0x2112, 0x2979, 0x20cf, 0x28f1, 0x2178, 0x31bd,
+ 0x1956, 0x219a, 0x20f0, 0x2957, 0x20ce, 0x28ad, 0x2134, 0x3068,
+ 0x1956, 0x288b, 0x2112, 0x299b, 0x20cf, 0x2913, 0x2178, 0x4002,
+ 0x1956, 0x219a, 0x20f0, 0x2935, 0x20ce, 0x28ac, 0x2134, 0x29bc,
+ 0x1956, 0x288a, 0x2112, 0x2979, 0x20cf, 0x28f1, 0x2178, 0x3069,
+ 0x1956, 0x219a, 0x20f0, 0x2957, 0x20ce, 0x28ad, 0x2134, 0x3047,
+ 0x1956, 0x288b, 0x2112, 0x299b, 0x20cf, 0x2913, 0x2178, 0x3846,
+ 0x1956, 0x219a, 0x20f0, 0x2935, 0x20ce, 0x28ac, 0x2134, 0x29bc,
+ 0x1956, 0x288a, 0x2112, 0x2979, 0x20cf, 0x28f1, 0x2178, 0x31bd,
+ 0x1956, 0x219a, 0x20f0, 0x2957, 0x20ce, 0x28ad, 0x2134, 0x3068,
+ 0x1956, 0x288b, 0x2112, 0x299b, 0x20cf, 0x2913, 0x2178, 0x4801,
+ 0x1956, 0x219a, 0x20f0, 0x2935, 0x20ce, 0x28ac, 0x2134, 0x29bc,
+ 0x1956, 0x288a, 0x2112, 0x2979, 0x20cf, 0x28f1, 0x2178, 0x3069,
+ 0x1956, 0x219a, 0x20f0, 0x2957, 0x20ce, 0x28ad, 0x2134, 0x3047,
+ 0x1956, 0x288b, 0x2112, 0x299b, 0x20cf, 0x2913, 0x2178, 0x3825,
+ 0x1956, 0x219a, 0x20f0, 0x2935, 0x20ce, 0x28ac, 0x2134, 0x29bc,
+ 0x1956, 0x288a, 0x2112, 0x2979, 0x20cf, 0x28f1, 0x2178, 0x31bd,
+ 0x1956, 0x219a, 0x20f0, 0x2957, 0x20ce, 0x28ad, 0x2134, 0x3068,
+ 0x1956, 0x288b, 0x2112, 0x299b, 0x20cf, 0x2913, 0x2178, 0x4000,
+ 0x1956, 0x219a, 0x20f0, 0x2935, 0x20ce, 0x28ac, 0x2134, 0x29bc,
+ 0x1956, 0x288a, 0x2112, 0x2979, 0x20cf, 0x28f1, 0x2178, 0x3069,
+ 0x1956, 0x219a, 0x20f0, 0x2957, 0x20ce, 0x28ad, 0x2134, 0x3047,
+ 0x1956, 0x288b, 0x2112, 0x299b, 0x20cf, 0x2913, 0x2178, 0x3846,
+ 0x1956, 0x219a, 0x20f0, 0x2935, 0x20ce, 0x28ac, 0x2134, 0x29bc,
+ 0x1956, 0x288a, 0x2112, 0x2979, 0x20cf, 0x28f1, 0x2178, 0x31bd,
+ 0x1956, 0x219a, 0x20f0, 0x2957, 0x20ce, 0x28ad, 0x2134, 0x3068,
+ 0x1956, 0x288b, 0x2112, 0x299b, 0x20cf, 0x2913, 0x2178, 0x4024,
+ 0x1956, 0x219a, 0x20f0, 0x2935, 0x20ce, 0x28ac, 0x2134, 0x29bc,
+ 0x1956, 0x288a, 0x2112, 0x2979, 0x20cf, 0x28f1, 0x2178, 0x3069,
+ 0x1956, 0x219a, 0x20f0, 0x2957, 0x20ce, 0x28ad, 0x2134, 0x3047,
+ 0x1956, 0x288b, 0x2112, 0x299b, 0x20cf, 0x2913, 0x2178, 0x3825,
+ 0x1956, 0x219a, 0x20f0, 0x2935, 0x20ce, 0x28ac, 0x2134, 0x29bc,
+ 0x1956, 0x288a, 0x2112, 0x2979, 0x20cf, 0x28f1, 0x2178, 0x31bd,
+ 0x1956, 0x219a, 0x20f0, 0x2957, 0x20ce, 0x28ad, 0x2134, 0x3068,
+ 0x1956, 0x288b, 0x2112, 0x299b, 0x20cf, 0x2913, 0x2178, 0x4002,
+ 0x1956, 0x219a, 0x20f0, 0x2935, 0x20ce, 0x28ac, 0x2134, 0x29bc,
+ 0x1956, 0x288a, 0x2112, 0x2979, 0x20cf, 0x28f1, 0x2178, 0x3069,
+ 0x1956, 0x219a, 0x20f0, 0x2957, 0x20ce, 0x28ad, 0x2134, 0x3047,
+ 0x1956, 0x288b, 0x2112, 0x299b, 0x20cf, 0x2913, 0x2178, 0x3846,
+ 0x1956, 0x219a, 0x20f0, 0x2935, 0x20ce, 0x28ac, 0x2134, 0x29bc,
+ 0x1956, 0x288a, 0x2112, 0x2979, 0x20cf, 0x28f1, 0x2178, 0x31bd,
+ 0x1956, 0x219a, 0x20f0, 0x2957, 0x20ce, 0x28ad, 0x2134, 0x3068,
+ 0x1956, 0x288b, 0x2112, 0x299b, 0x20cf, 0x2913, 0x2178, 0x4803,
+ 0x1956, 0x219a, 0x20f0, 0x2935, 0x20ce, 0x28ac, 0x2134, 0x29bc,
+ 0x1956, 0x288a, 0x2112, 0x2979, 0x20cf, 0x28f1, 0x2178, 0x3069,
+ 0x1956, 0x219a, 0x20f0, 0x2957, 0x20ce, 0x28ad, 0x2134, 0x3047,
+ 0x1956, 0x288b, 0x2112, 0x299b, 0x20cf, 0x2913, 0x2178, 0x3825,
+ 0x1956, 0x219a, 0x20f0, 0x2935, 0x20ce, 0x28ac, 0x2134, 0x29bc,
+ 0x1956, 0x288a, 0x2112, 0x2979, 0x20cf, 0x28f1, 0x2178, 0x31bd,
+ 0x1956, 0x219a, 0x20f0, 0x2957, 0x20ce, 0x28ad, 0x2134, 0x3068,
+ 0x1956, 0x288b, 0x2112, 0x299b, 0x20cf, 0x2913, 0x2178, 0x4000,
+ 0x1956, 0x219a, 0x20f0, 0x2935, 0x20ce, 0x28ac, 0x2134, 0x29bc,
+ 0x1956, 0x288a, 0x2112, 0x2979, 0x20cf, 0x28f1, 0x2178, 0x3069,
+ 0x1956, 0x219a, 0x20f0, 0x2957, 0x20ce, 0x28ad, 0x2134, 0x3047,
+ 0x1956, 0x288b, 0x2112, 0x299b, 0x20cf, 0x2913, 0x2178, 0x3846,
+ 0x1956, 0x219a, 0x20f0, 0x2935, 0x20ce, 0x28ac, 0x2134, 0x29bc,
+ 0x1956, 0x288a, 0x2112, 0x2979, 0x20cf, 0x28f1, 0x2178, 0x31bd,
+ 0x1956, 0x219a, 0x20f0, 0x2957, 0x20ce, 0x28ad, 0x2134, 0x3068,
+ 0x1956, 0x288b, 0x2112, 0x299b, 0x20cf, 0x2913, 0x2178, 0x4024,
+ 0x1956, 0x219a, 0x20f0, 0x2935, 0x20ce, 0x28ac, 0x2134, 0x29bc,
+ 0x1956, 0x288a, 0x2112, 0x2979, 0x20cf, 0x28f1, 0x2178, 0x3069,
+ 0x1956, 0x219a, 0x20f0, 0x2957, 0x20ce, 0x28ad, 0x2134, 0x3047,
+ 0x1956, 0x288b, 0x2112, 0x299b, 0x20cf, 0x2913, 0x2178, 0x3825,
+ 0x1956, 0x219a, 0x20f0, 0x2935, 0x20ce, 0x28ac, 0x2134, 0x29bc,
+ 0x1956, 0x288a, 0x2112, 0x2979, 0x20cf, 0x28f1, 0x2178, 0x31bd,
+ 0x1956, 0x219a, 0x20f0, 0x2957, 0x20ce, 0x28ad, 0x2134, 0x3068,
+ 0x1956, 0x288b, 0x2112, 0x299b, 0x20cf, 0x2913, 0x2178, 0x4002,
+ 0x1956, 0x219a, 0x20f0, 0x2935, 0x20ce, 0x28ac, 0x2134, 0x29bc,
+ 0x1956, 0x288a, 0x2112, 0x2979, 0x20cf, 0x28f1, 0x2178, 0x3069,
+ 0x1956, 0x219a, 0x20f0, 0x2957, 0x20ce, 0x28ad, 0x2134, 0x3047,
+ 0x1956, 0x288b, 0x2112, 0x299b, 0x20cf, 0x2913, 0x2178, 0x3846,
+ 0x1956, 0x219a, 0x20f0, 0x2935, 0x20ce, 0x28ac, 0x2134, 0x29bc,
+ 0x1956, 0x288a, 0x2112, 0x2979, 0x20cf, 0x28f1, 0x2178, 0x31bd,
+ 0x1956, 0x219a, 0x20f0, 0x2957, 0x20ce, 0x28ad, 0x2134, 0x3068,
+ 0x1956, 0x288b, 0x2112, 0x299b, 0x20cf, 0x2913, 0x2178, 0x4801,
+ 0x1956, 0x219a, 0x20f0, 0x2935, 0x20ce, 0x28ac, 0x2134, 0x29bc,
+ 0x1956, 0x288a, 0x2112, 0x2979, 0x20cf, 0x28f1, 0x2178, 0x3069,
+ 0x1956, 0x219a, 0x20f0, 0x2957, 0x20ce, 0x28ad, 0x2134, 0x3047,
+ 0x1956, 0x288b, 0x2112, 0x299b, 0x20cf, 0x2913, 0x2178, 0x3825,
+ 0x1956, 0x219a, 0x20f0, 0x2935, 0x20ce, 0x28ac, 0x2134, 0x29bc,
+ 0x1956, 0x288a, 0x2112, 0x2979, 0x20cf, 0x28f1, 0x2178, 0x31bd,
+ 0x1956, 0x219a, 0x20f0, 0x2957, 0x20ce, 0x28ad, 0x2134, 0x3068,
+ 0x1956, 0x288b, 0x2112, 0x299b, 0x20cf, 0x2913, 0x2178, 0x4000,
+ 0x1956, 0x219a, 0x20f0, 0x2935, 0x20ce, 0x28ac, 0x2134, 0x29bc,
+ 0x1956, 0x288a, 0x2112, 0x2979, 0x20cf, 0x28f1, 0x2178, 0x3069,
+ 0x1956, 0x219a, 0x20f0, 0x2957, 0x20ce, 0x28ad, 0x2134, 0x3047,
+ 0x1956, 0x288b, 0x2112, 0x299b, 0x20cf, 0x2913, 0x2178, 0x3846,
+ 0x1956, 0x219a, 0x20f0, 0x2935, 0x20ce, 0x28ac, 0x2134, 0x29bc,
+ 0x1956, 0x288a, 0x2112, 0x2979, 0x20cf, 0x28f1, 0x2178, 0x31bd,
+ 0x1956, 0x219a, 0x20f0, 0x2957, 0x20ce, 0x28ad, 0x2134, 0x3068,
+ 0x1956, 0x288b, 0x2112, 0x299b, 0x20cf, 0x2913, 0x2178, 0x4024,
+ 0x1956, 0x219a, 0x20f0, 0x2935, 0x20ce, 0x28ac, 0x2134, 0x29bc,
+ 0x1956, 0x288a, 0x2112, 0x2979, 0x20cf, 0x28f1, 0x2178, 0x3069,
+ 0x1956, 0x219a, 0x20f0, 0x2957, 0x20ce, 0x28ad, 0x2134, 0x3047,
+ 0x1956, 0x288b, 0x2112, 0x299b, 0x20cf, 0x2913, 0x2178, 0x3825,
+ 0x1956, 0x219a, 0x20f0, 0x2935, 0x20ce, 0x28ac, 0x2134, 0x29bc,
+ 0x1956, 0x288a, 0x2112, 0x2979, 0x20cf, 0x28f1, 0x2178, 0x31bd,
+ 0x1956, 0x219a, 0x20f0, 0x2957, 0x20ce, 0x28ad, 0x2134, 0x3068,
+ 0x1956, 0x288b, 0x2112, 0x299b, 0x20cf, 0x2913, 0x2178, 0x4002,
+ 0x1956, 0x219a, 0x20f0, 0x2935, 0x20ce, 0x28ac, 0x2134, 0x29bc,
+ 0x1956, 0x288a, 0x2112, 0x2979, 0x20cf, 0x28f1, 0x2178, 0x3069,
+ 0x1956, 0x219a, 0x20f0, 0x2957, 0x20ce, 0x28ad, 0x2134, 0x3047,
+ 0x1956, 0x288b, 0x2112, 0x299b, 0x20cf, 0x2913, 0x2178, 0x3846,
+ 0x1956, 0x219a, 0x20f0, 0x2935, 0x20ce, 0x28ac, 0x2134, 0x29bc,
+ 0x1956, 0x288a, 0x2112, 0x2979, 0x20cf, 0x28f1, 0x2178, 0x31bd,
+ 0x1956, 0x219a, 0x20f0, 0x2957, 0x20ce, 0x28ad, 0x2134, 0x3068,
+ 0x1956, 0x288b, 0x2112, 0x299b, 0x20cf, 0x2913, 0x2178, 0x4803 },
+
+ .long_code_lookup = {
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 }
+};
+
diff --git a/src/isa-l/igzip/stdmac.asm b/src/isa-l/igzip/stdmac.asm
new file mode 100644
index 000000000..e54828785
--- /dev/null
+++ b/src/isa-l/igzip/stdmac.asm
@@ -0,0 +1,469 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright(c) 2011-2016 Intel Corporation All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in
+; the documentation and/or other materials provided with the
+; distribution.
+; * Neither the name of Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived
+; from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+%ifndef STDMAC_ASM
+%define STDMAC_ASM
+;; internal macro used by push_all
+;; push args L to R
+%macro push_all_ 1-*
+%xdefine _PUSH_ALL_REGS_COUNT_ %0
+%rep %0
+ push %1
+ %rotate 1
+%endrep
+%endmacro
+
+;; internal macro used by pop_all
+;; pop args R to L
+%macro pop_all_ 1-*
+%rep %0
+ %rotate -1
+ pop %1
+%endrep
+%endmacro
+
+%xdefine _PUSH_ALL_REGS_COUNT_ 0
+%xdefine _ALLOC_STACK_VAL_ 0
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; STACK_OFFSET
+;; Number of bytes subtracted from stack due to PUSH_ALL and ALLOC_STACK
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+%define STACK_OFFSET (_PUSH_ALL_REGS_COUNT_ * 8 + _ALLOC_STACK_VAL_)
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; PUSH_ALL reg1, reg2, ...
+;; push args L to R, remember regs for pop_all
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+%macro PUSH_ALL 1+
+%xdefine _PUSH_ALL_REGS_ %1
+ push_all_ %1
+%endmacro
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; POP_ALL
+;; push args from prev "push_all" R to L
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+%macro POP_ALL 0
+ pop_all_ _PUSH_ALL_REGS_
+%xdefine _PUSH_ALL_REGS_COUNT_ 0
+%endmacro
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; ALLOC_STACK n
+;; subtract n from the stack pointer and remember the value for restore_stack
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+%macro ALLOC_STACK 1
+%xdefine _ALLOC_STACK_VAL_ %1
+ sub rsp, %1
+%endmacro
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; RESTORE_STACK
+;; add n to the stack pointer, where n is the arg to the previous alloc_stack
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+%macro RESTORE_STACK 0
+ add rsp, _ALLOC_STACK_VAL_
+%xdefine _ALLOC_STACK_VAL_ 0
+%endmacro
+
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; NOPN n
+;; Create n bytes of NOP, using nops of up to 8 bytes each
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+%macro NOPN 1
+
+ %assign %%i %1
+ %rep 200
+ %if (%%i < 9)
+ nopn %%i
+ %exitrep
+ %else
+ nopn 8
+ %assign %%i (%%i - 8)
+ %endif
+ %endrep
+%endmacro
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; nopn n
+;; Create n bytes of NOP, where n is between 1 and 9
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+%macro nopn 1
+%if (%1 == 1)
+ nop
+%elif (%1 == 2)
+ db 0x66
+ nop
+%elif (%1 == 3)
+ db 0x0F
+ db 0x1F
+ db 0x00
+%elif (%1 == 4)
+ db 0x0F
+ db 0x1F
+ db 0x40
+ db 0x00
+%elif (%1 == 5)
+ db 0x0F
+ db 0x1F
+ db 0x44
+ db 0x00
+ db 0x00
+%elif (%1 == 6)
+ db 0x66
+ db 0x0F
+ db 0x1F
+ db 0x44
+ db 0x00
+ db 0x00
+%elif (%1 == 7)
+ db 0x0F
+ db 0x1F
+ db 0x80
+ db 0x00
+ db 0x00
+ db 0x00
+ db 0x00
+%elif (%1 == 8)
+ db 0x0F
+ db 0x1F
+ db 0x84
+ db 0x00
+ db 0x00
+ db 0x00
+ db 0x00
+ db 0x00
+%elif (%1 == 9)
+ db 0x66
+ db 0x0F
+ db 0x1F
+ db 0x84
+ db 0x00
+ db 0x00
+ db 0x00
+ db 0x00
+ db 0x00
+%else
+%error Invalid value to nopn
+%endif
+%endmacro
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; rolx64 dst, src, amount
+;; Emulate a rolx instruction using rorx, assuming data 64 bits wide
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+%macro rolx64 3
+ rorx %1, %2, (64-%3)
+%endm
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; rolx32 dst, src, amount
+;; Emulate a rolx instruction using rorx, assuming data 32 bits wide
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+%macro rolx32 3
+ rorx %1, %2, (32-%3)
+%endm
+
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Define a function void ssc(uint64_t x)
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+%macro DEF_SSC 0
+global ssc
+ssc:
+ mov rax, rbx
+ mov rbx, rcx
+ db 0x64
+ db 0x67
+ nop
+ mov rbx, rax
+ ret
+%endm
+
+%macro MOVDQU 2
+%define %%dest %1
+%define %%src %2
+%if ((ARCH == 02) || (ARCH == 03) || (ARCH == 04))
+ vmovdqu %%dest, %%src
+%else
+ movdqu %%dest, %%src
+%endif
+%endm
+
+%macro MOVDQA 2
+%define %%dest %1
+%define %%src %2
+%if ((ARCH == 02) || (ARCH == 03) || (ARCH == 04))
+ vmovdqa %%dest, %%src
+%else
+ movdqa %%dest, %%src
+%endif
+%endm
+
+%macro MOVD 2
+%define %%dest %1
+%define %%src %2
+%if (ARCH == 02 || ARCH == 03 || ARCH == 04)
+ vmovd %%dest, %%src
+%else
+ movd %%dest, %%src
+%endif
+%endm
+
+%macro MOVQ 2
+%define %%dest %1
+%define %%src %2
+%if (ARCH == 02 || ARCH == 03 || ARCH == 04)
+ vmovq %%dest, %%src
+%else
+ movq %%dest, %%src
+%endif
+%endm
+
+;; Move register if the src and dest are not equal
+%macro MOVNIDN 2
+%define dest %1
+%define src %2
+%ifnidn dest, src
+ mov dest, src
+%endif
+%endm
+
+%macro MOVDQANIDN 2
+%define dest %1
+%define src %2
+%ifnidn dest, src
+ MOVDQA dest, src
+%endif
+%endm
+
+%macro PSHUFD 3
+%define %%dest %1
+%define %%src1 %2
+%define %%imm8 %3
+%if ((ARCH == 02) || (ARCH == 03) || (ARCH == 04))
+ vpshufd %%dest, %%src1, %%imm8
+%else
+ pshufd %%dest, %%src1, %%imm8
+%endif
+%endm
+
+%macro PSHUFB 3
+%define %%dest %1
+%define %%src1 %2
+%define %%shuf %3
+%if ((ARCH == 02) || (ARCH == 03) || (ARCH == 04))
+ vpshufb %%dest, %%src1, %%shuf
+%else
+ MOVDQANIDN %%dest, %%src1
+ pshufb %%dest, %%shuf
+%endif
+%endm
+
+%macro PBROADCASTD 2
+%define %%dest %1
+%define %%src %2
+%if (ARCH == 04)
+ vpbroadcastd %%dest, %%src
+%else
+ MOVD %%dest, %%src
+ PSHUFD %%dest, %%dest, 0
+%endif
+%endm
+
+;; Implement BZHI instruction on older architectures
+;; Clobbers rcx, unless rcx is %%index
+%macro BZHI 4
+%define %%dest %1
+%define %%src %2
+%define %%index %3
+%define %%tmp1 %4
+
+%ifdef USE_HSWNI
+ bzhi %%dest, %%src, %%index
+%else
+ MOVNIDN rcx, %%index
+ mov %%tmp1, 1
+ shl %%tmp1, cl
+ sub %%tmp1, 1
+
+ MOVNIDN %%dest, %%src
+
+ and %%dest, %%tmp1
+%endif
+%endm
+
+;; Implement shrx instruction on older architectures
+;; Clobbers rcx, unless rcx is %%index
+%macro SHRX 3
+%define %%dest %1
+%define %%src %2
+%define %%index %3
+
+%ifdef USE_HSWNI
+ shrx %%dest, %%src, %%index
+%else
+ MOVNIDN rcx, %%index
+ MOVNIDN %%dest, %%src
+ shr %%dest, cl
+%endif
+%endm
+
+;; Implement shlx instruction on older architectures
+;; Clobbers rcx, unless rcx is %%index
+%macro SHLX 3
+%define %%dest %1
+%define %%src %2
+%define %%index %3
+
+%ifdef USE_HSWNI
+ shlx %%dest, %%src, %%index
+%else
+ MOVNIDN %%dest, %%src
+ MOVNIDN rcx, %%index
+ shl %%dest, cl
+%endif
+%endm
+
+%macro PINSRD 3
+%define %%dest %1
+%define %%src %2
+%define %%offset %3
+%if ((ARCH == 02) || (ARCH == 03) || (ARCH == 04))
+ vpinsrd %%dest, %%src, %%offset
+%else
+ pinsrd %%dest, %%src, %%offset
+%endif
+%endm
+
+%macro PEXTRD 3
+%define %%dest %1
+%define %%src %2
+%define %%offset %3
+%if ((ARCH == 02) || (ARCH == 03) || (ARCH == 04))
+ vpextrd %%dest, %%src, %%offset
+%else
+ pextrd %%dest, %%src, %%offset
+%endif
+%endm
+
+%macro PSRLDQ 2
+%define %%dest %1
+%define %%offset %2
+%if ((ARCH == 02) || (ARCH == 03) || (ARCH == 04))
+ vpsrldq %%dest, %%offset
+%else
+ psrldq %%dest, %%offset
+%endif
+%endm
+
+%macro PSLLD 3
+%define %%dest %1
+%define %%src %2
+%define %%offset %3
+%if ((ARCH == 02) || (ARCH == 03) || (ARCH == 04))
+ vpslld %%dest, %%src, %%offset
+%else
+ MOVDQANIDN %%dest, %%src
+ pslld %%dest, %%offset
+%endif
+%endm
+
+%macro PAND 3
+%define %%dest %1
+%define %%src1 %2
+%define %%src2 %3
+%if (ARCH == 02 || ARCH == 03 || ARCH == 04)
+ vpand %%dest, %%src1, %%src2
+%else
+ MOVDQANIDN %%dest, %%src1
+ pand %%dest, %%src2
+%endif
+%endm
+
+%macro POR 3
+%define %%dest %1
+%define %%src1 %2
+%define %%src2 %3
+%if (ARCH == 02 || ARCH == 03 || ARCH == 04)
+ vpor %%dest, %%src1, %%src2
+%else
+ MOVDQANIDN %%dest, %%src1
+ por %%dest, %%src2
+%endif
+%endm
+
+%macro PXOR 3
+%define %%dest %1
+%define %%src1 %2
+%define %%src2 %3
+%if ((ARCH == 02) || (ARCH == 03) || (ARCH == 04))
+ vpxor %%dest, %%src1, %%src2
+%else
+ MOVDQANIDN %%dest, %%src1
+ pxor %%dest, %%src2
+%endif
+%endm
+
+%macro PADDD 3
+%define %%dest %1
+%define %%src1 %2
+%define %%src2 %3
+%if ((ARCH == 02) || (ARCH == 03) || (ARCH == 04))
+ vpaddd %%dest, %%src1, %%src2
+%else
+ MOVDQANIDN %%dest, %%src1
+ paddd %%dest, %%src2
+%endif
+%endm
+
+%macro PCMPEQB 3
+%define %%dest %1
+%define %%src1 %2
+%define %%src2 %3
+%if ((ARCH == 02) || (ARCH == 03) || (ARCH == 04))
+ vpcmpeqb %%dest, %%src1, %%src2
+%else
+ MOVDQANIDN %%dest, %%src1
+ pcmpeqb %%dest, %%src2
+%endif
+%endm
+
+%macro PMOVMSKB 2
+%define %%dest %1
+%define %%src %2
+%if ((ARCH == 02) || (ARCH == 03) || (ARCH == 04))
+ vpmovmskb %%dest, %%src
+%else
+ pmovmskb %%dest, %%src
+%endif
+%endm
+
+%endif ;; ifndef STDMAC_ASM
diff --git a/src/isa-l/include/aarch64_multibinary.h b/src/isa-l/include/aarch64_multibinary.h
new file mode 100644
index 000000000..e31451be6
--- /dev/null
+++ b/src/isa-l/include/aarch64_multibinary.h
@@ -0,0 +1,311 @@
+/**********************************************************************
+ Copyright(c) 2020 Arm Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Arm Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+#ifndef __AARCH64_MULTIBINARY_H__
+#define __AARCH64_MULTIBINARY_H__
+#ifndef __aarch64__
+#error "This file is for aarch64 only"
+#endif
+#include <asm/hwcap.h>
+#ifdef __ASSEMBLY__
+/**
+ * # mbin_interface : the wrapper layer for isal-l api
+ *
+ * ## references:
+ * * https://sourceware.org/git/gitweb.cgi?p=glibc.git;a=blob;f=sysdeps/aarch64/dl-trampoline.S
+ * * http://infocenter.arm.com/help/topic/com.arm.doc.ihi0055b/IHI0055B_aapcs64.pdf
+ * * https://static.docs.arm.com/ihi0057/b/IHI0057B_aadwarf64.pdf?_ga=2.80574487.1870739014.1564969896-1634778941.1548729310
+ *
+ * ## Usage:
+ * 1. Define dispather function
+ * 2. name must be \name\()_dispatcher
+ * 3. Prototype should be *"void * \name\()_dispatcher"*
+ * 4. The dispather should return the right function pointer , revision and a string information .
+ **/
+.macro mbin_interface name:req
+ .extern \name\()_dispatcher
+ .section .data
+ .balign 8
+ .global \name\()_dispatcher_info
+ .type \name\()_dispatcher_info,%object
+
+ \name\()_dispatcher_info:
+ .quad \name\()_mbinit //func_entry
+
+ .size \name\()_dispatcher_info,. - \name\()_dispatcher_info
+
+ .balign 8
+ .text
+ \name\()_mbinit:
+ //save lp fp, sub sp
+ .cfi_startproc
+ stp x29, x30, [sp, -224]!
+
+ //add cfi directive to avoid GDB bt cmds error
+ //set cfi(Call Frame Information)
+ .cfi_def_cfa_offset 224
+ .cfi_offset 29, -224
+ .cfi_offset 30, -216
+
+ //save parameter/result/indirect result registers
+ stp x8, x9, [sp, 16]
+ .cfi_offset 8, -208
+ .cfi_offset 9, -200
+ stp x0, x1, [sp, 32]
+ .cfi_offset 0, -192
+ .cfi_offset 1, -184
+ stp x2, x3, [sp, 48]
+ .cfi_offset 2, -176
+ .cfi_offset 3, -168
+ stp x4, x5, [sp, 64]
+ .cfi_offset 4, -160
+ .cfi_offset 5, -152
+ stp x6, x7, [sp, 80]
+ .cfi_offset 6, -144
+ .cfi_offset 7, -136
+ stp q0, q1, [sp, 96]
+ .cfi_offset 64, -128
+ .cfi_offset 65, -112
+ stp q2, q3, [sp, 128]
+ .cfi_offset 66, -96
+ .cfi_offset 67, -80
+ stp q4, q5, [sp, 160]
+ .cfi_offset 68, -64
+ .cfi_offset 69, -48
+ stp q6, q7, [sp, 192]
+ .cfi_offset 70, -32
+ .cfi_offset 71, -16
+
+ /**
+ * The dispatcher functions have the following prototype:
+ * void * function_dispatcher(void)
+ * As the dispatcher is returning a struct, by the AAPCS,
+ */
+
+
+ bl \name\()_dispatcher
+ //restore temp/indirect result registers
+ ldp x8, x9, [sp, 16]
+ .cfi_restore 8
+ .cfi_restore 9
+
+ // save function entry
+ str x0, [x9]
+
+ //restore parameter/result registers
+ ldp x0, x1, [sp, 32]
+ .cfi_restore 0
+ .cfi_restore 1
+ ldp x2, x3, [sp, 48]
+ .cfi_restore 2
+ .cfi_restore 3
+ ldp x4, x5, [sp, 64]
+ .cfi_restore 4
+ .cfi_restore 5
+ ldp x6, x7, [sp, 80]
+ .cfi_restore 6
+ .cfi_restore 7
+ ldp q0, q1, [sp, 96]
+ .cfi_restore 64
+ .cfi_restore 65
+ ldp q2, q3, [sp, 128]
+ .cfi_restore 66
+ .cfi_restore 67
+ ldp q4, q5, [sp, 160]
+ .cfi_restore 68
+ .cfi_restore 69
+ ldp q6, q7, [sp, 192]
+ .cfi_restore 70
+ .cfi_restore 71
+ //save lp fp and sp
+ ldp x29, x30, [sp], 224
+ //restore cfi setting
+ .cfi_restore 30
+ .cfi_restore 29
+ .cfi_def_cfa_offset 0
+ .cfi_endproc
+
+ .global \name
+ .type \name,%function
+ .align 2
+ \name\():
+ adrp x9, :got:\name\()_dispatcher_info
+ ldr x9, [x9, #:got_lo12:\name\()_dispatcher_info]
+ ldr x10,[x9]
+ br x10
+ .size \name,. - \name
+
+.endm
+
+/**
+ * mbin_interface_base is used for the interfaces which have only
+ * noarch implementation
+ */
+.macro mbin_interface_base name:req, base:req
+ .extern \base
+ .section .data
+ .balign 8
+ .global \name\()_dispatcher_info
+ .type \name\()_dispatcher_info,%object
+
+ \name\()_dispatcher_info:
+ .quad \base //func_entry
+ .size \name\()_dispatcher_info,. - \name\()_dispatcher_info
+
+ .balign 8
+ .text
+ .global \name
+ .type \name,%function
+ .align 2
+ \name\():
+ adrp x9, :got:\name\()_dispatcher_info
+ ldr x9, [x9, #:got_lo12:\name\()_dispatcher_info]
+ ldr x10,[x9]
+ br x10
+ .size \name,. - \name
+
+.endm
+
+#else /* __ASSEMBLY__ */
+#include <sys/auxv.h>
+
+
+
+#define DEFINE_INTERFACE_DISPATCHER(name) \
+ void * name##_dispatcher(void)
+
+#define PROVIDER_BASIC(name) \
+ PROVIDER_INFO(name##_base)
+
+#define DO_DIGNOSTIC(x) _Pragma GCC diagnostic ignored "-W"#x
+#define DO_PRAGMA(x) _Pragma (#x)
+#define DIGNOSTIC_IGNORE(x) DO_PRAGMA(GCC diagnostic ignored #x)
+#define DIGNOSTIC_PUSH() DO_PRAGMA(GCC diagnostic push)
+#define DIGNOSTIC_POP() DO_PRAGMA(GCC diagnostic pop)
+
+
+#define PROVIDER_INFO(_func_entry) \
+ ({ DIGNOSTIC_PUSH() \
+ DIGNOSTIC_IGNORE(-Wnested-externs) \
+ extern void _func_entry(void); \
+ DIGNOSTIC_POP() \
+ _func_entry; \
+ })
+
+/**
+ * Micro-Architector definitions
+ * Reference: https://developer.arm.com/docs/ddi0595/f/aarch64-system-registers/midr_el1
+ */
+
+#define CPU_IMPLEMENTER_RESERVE 0x00
+#define CPU_IMPLEMENTER_ARM 0x41
+
+
+#define CPU_PART_CORTEX_A57 0xD07
+#define CPU_PART_CORTEX_A72 0xD08
+#define CPU_PART_NEOVERSE_N1 0xD0C
+
+#define MICRO_ARCH_ID(imp,part) \
+ (((CPU_IMPLEMENTER_##imp&0xff)<<24)|((CPU_PART_##part&0xfff)<<4))
+
+#ifndef HWCAP_CPUID
+#define HWCAP_CPUID (1<<11)
+#endif
+
+/**
+ * @brief get_micro_arch_id
+ *
+ * read micro-architector register instruction if possible.This function
+ * provides microarchitecture information and make microarchitecture optimization
+ * possible.
+ *
+ * Read system registers(MRS) is forbidden in userspace. If executed, it
+ * will raise illegal instruction error. Kernel provides a solution for
+ * this issue. The solution depends on HWCAP_CPUID flags. Reference(1)
+ * describes how to use it. It provides a "illegal insstruction" handler
+ * in kernel space, the handler will execute MRS and return the correct
+ * value to userspace.
+ *
+ * To avoid too many kernel trap, this function MUST be only called in
+ * dispatcher. And HWCAP must be match,That will make sure there are no
+ * illegal instruction errors. HWCAP_CPUID should be available to get the
+ * best performance.
+ *
+ * NOTICE:
+ * - HWCAP_CPUID should be available. Otherwise it returns reserve value
+ * - It MUST be called inside dispather.
+ * - It MUST meet the HWCAP requirements
+ *
+ * Example:
+ * DEFINE_INTERFACE_DISPATCHER(crc32_iscsi)
+ * {
+ * unsigned long auxval = getauxval(AT_HWCAP);
+ * // MUST do the judgement is MUST.
+ * if ((HWCAP_CRC32 | HWCAP_PMULL) == (auxval & (HWCAP_CRC32 | HWCAP_PMULL))) {
+ * switch (get_micro_arch_id()) {
+ * case MICRO_ARCH_ID(ARM, CORTEX_A57):
+ * return PROVIDER_INFO(crc32_pmull_crc_for_a57);
+ * case MICRO_ARCH_ID(ARM, CORTEX_A72):
+ * return PROVIDER_INFO(crc32_pmull_crc_for_a72);
+ * case MICRO_ARCH_ID(ARM, NEOVERSE_N1):
+ * return PROVIDER_INFO(crc32_pmull_crc_for_n1);
+ * case default:
+ * return PROVIDER_INFO(crc32_pmull_crc_for_others);
+ * }
+ * }
+ * return PROVIDER_BASIC(crc32_iscsi);
+ * }
+ * KNOWN ISSUE:
+ * On a heterogeneous system (big.LITTLE), it will work but the performance
+ * might not be the best one as expected.
+ *
+ * If this function is called on the big core, it will return the function
+ * optimized for the big core.
+ *
+ * If execution is then scheduled to the little core. It will still work (1),
+ * but the function won't be optimized for the little core, thus the performance
+ * won't be as expected.
+ *
+ * References:
+ * - [CPU Feature detection](https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/Documentation/arm64/cpu-feature-registers.rst?h=v5.5)
+ *
+ */
+static inline uint32_t get_micro_arch_id(void)
+{
+ uint32_t id=CPU_IMPLEMENTER_RESERVE;
+ if ((getauxval(AT_HWCAP) & HWCAP_CPUID)) {
+ /** Here will trap into kernel space */
+ asm("mrs %0, MIDR_EL1 " : "=r" (id));
+ }
+ return id&0xff00fff0;
+}
+
+
+
+#endif /* __ASSEMBLY__ */
+#endif
diff --git a/src/isa-l/include/crc.h b/src/isa-l/include/crc.h
new file mode 100644
index 000000000..071496083
--- /dev/null
+++ b/src/isa-l/include/crc.h
@@ -0,0 +1,212 @@
+/**********************************************************************
+ Copyright(c) 2011-2015 Intel Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+
+/**
+ * @file crc.h
+ * @brief CRC functions.
+ */
+
+
+#ifndef _CRC_H_
+#define _CRC_H_
+
+#include <stdint.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+/* Multi-binary functions */
+
+/**
+ * @brief Generate CRC from the T10 standard, runs appropriate version.
+ *
+ * This function determines what instruction sets are enabled and
+ * selects the appropriate version at runtime.
+ *
+ * @returns 16 bit CRC
+ */
+uint16_t crc16_t10dif(
+ uint16_t init_crc, //!< initial CRC value, 16 bits
+ const unsigned char *buf, //!< buffer to calculate CRC on
+ uint64_t len //!< buffer length in bytes (64-bit data)
+ );
+
+
+/**
+ * @brief Generate CRC and copy T10 standard, runs appropriate version.
+ *
+ * Stitched CRC + copy function.
+ *
+ * @returns 16 bit CRC
+ */
+uint16_t crc16_t10dif_copy(
+ uint16_t init_crc, //!< initial CRC value, 16 bits
+ uint8_t *dst, //!< buffer destination for copy
+ uint8_t *src, //!< buffer source to crc + copy
+ uint64_t len //!< buffer length in bytes (64-bit data)
+ );
+
+
+/**
+ * @brief Generate CRC from the IEEE standard, runs appropriate version.
+ *
+ * This function determines what instruction sets are enabled and
+ * selects the appropriate version at runtime.
+ * Note: CRC32 IEEE standard is widely used in HDLC, Ethernet, Gzip and
+ * many others. Its polynomial is 0x04C11DB7 in normal and 0xEDB88320
+ * in reflection (or reverse). In ISA-L CRC, function crc32_ieee is
+ * actually designed for normal CRC32 IEEE version. And function
+ * crc32_gzip_refl is actually designed for reflected CRC32 IEEE.
+ * These two versions of CRC32 IEEE are not compatible with each other.
+ * Users who want to replace their not optimized crc32 ieee with ISA-L's
+ * crc32 function should be careful of that.
+ * Since many applications use CRC32 IEEE reflected version, Please have
+ * a check whether crc32_gzip_refl is right one for you instead of
+ * crc32_ieee.
+ *
+ * @returns 32 bit CRC
+ */
+
+uint32_t crc32_ieee(
+ uint32_t init_crc, //!< initial CRC value, 32 bits
+ const unsigned char *buf, //!< buffer to calculate CRC on
+ uint64_t len //!< buffer length in bytes (64-bit data)
+ );
+
+/**
+ * @brief Generate the customized CRC
+ * based on RFC 1952 CRC (http://www.ietf.org/rfc/rfc1952.txt) standard,
+ * runs appropriate version.
+ *
+ * This function determines what instruction sets are enabled and
+ * selects the appropriate version at runtime.
+ *
+ * Note: CRC32 IEEE standard is widely used in HDLC, Ethernet, Gzip and
+ * many others. Its polynomial is 0x04C11DB7 in normal and 0xEDB88320
+ * in reflection (or reverse). In ISA-L CRC, function crc32_ieee is
+ * actually designed for normal CRC32 IEEE version. And function
+ * crc32_gzip_refl is actually designed for reflected CRC32 IEEE.
+ * These two versions of CRC32 IEEE are not compatible with each other.
+ * Users who want to replace their not optimized crc32 ieee with ISA-L's
+ * crc32 function should be careful of that.
+ * Since many applications use CRC32 IEEE reflected version, Please have
+ * a check whether crc32_gzip_refl is right one for you instead of
+ * crc32_ieee.
+ *
+ * @returns 32 bit CRC
+ */
+uint32_t crc32_gzip_refl(
+ uint32_t init_crc, //!< initial CRC value, 32 bits
+ const unsigned char *buf, //!< buffer to calculate CRC on
+ uint64_t len //!< buffer length in bytes (64-bit data)
+ );
+
+
+/**
+ * @brief ISCSI CRC function, runs appropriate version.
+ *
+ * This function determines what instruction sets are enabled and
+ * selects the appropriate version at runtime.
+ *
+ * @returns 32 bit CRC
+ */
+unsigned int crc32_iscsi(
+ unsigned char *buffer, //!< buffer to calculate CRC on
+ int len, //!< buffer length in bytes
+ unsigned int init_crc //!< initial CRC value
+ );
+
+
+/* Base functions */
+
+/**
+ * @brief ISCSI CRC function, baseline version
+ * @returns 32 bit CRC
+ */
+unsigned int crc32_iscsi_base(
+ unsigned char *buffer, //!< buffer to calculate CRC on
+ int len, //!< buffer length in bytes
+ unsigned int crc_init //!< initial CRC value
+ );
+
+
+/**
+ * @brief Generate CRC from the T10 standard, runs baseline version
+ * @returns 16 bit CRC
+ */
+uint16_t crc16_t10dif_base(
+ uint16_t seed, //!< initial CRC value, 16 bits
+ uint8_t *buf, //!< buffer to calculate CRC on
+ uint64_t len //!< buffer length in bytes (64-bit data)
+ );
+
+
+/**
+ * @brief Generate CRC and copy T10 standard, runs baseline version.
+ * @returns 16 bit CRC
+ */
+uint16_t crc16_t10dif_copy_base(
+ uint16_t init_crc, //!< initial CRC value, 16 bits
+ uint8_t *dst, //!< buffer destination for copy
+ uint8_t *src, //!< buffer source to crc + copy
+ uint64_t len //!< buffer length in bytes (64-bit data)
+ );
+
+
+/**
+ * @brief Generate CRC from the IEEE standard, runs baseline version
+ * @returns 32 bit CRC
+ */
+uint32_t crc32_ieee_base(
+ uint32_t seed, //!< initial CRC value, 32 bits
+ uint8_t *buf, //!< buffer to calculate CRC on
+ uint64_t len //!< buffer length in bytes (64-bit data)
+ );
+
+/**
+ * @brief Generate the customized CRC
+ * based on RFC 1952 CRC (http://www.ietf.org/rfc/rfc1952.txt) standard,
+ * runs baseline version
+ * @returns 32 bit CRC
+ */
+uint32_t crc32_gzip_refl_base(
+ uint32_t seed, //!< initial CRC value, 32 bits
+ uint8_t *buf, //!< buffer to calculate CRC on
+ uint64_t len //!< buffer length in bytes (64-bit data)
+ );
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // _CRC_H_
diff --git a/src/isa-l/include/crc64.h b/src/isa-l/include/crc64.h
new file mode 100644
index 000000000..d0e02748c
--- /dev/null
+++ b/src/isa-l/include/crc64.h
@@ -0,0 +1,277 @@
+/**********************************************************************
+ Copyright(c) 2011-2016 Intel Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+
+/**
+ * @file crc64.h
+ * @brief CRC64 functions.
+ */
+
+
+#ifndef _CRC64_H_
+#define _CRC64_H_
+
+#include <stdint.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+/* Multi-binary functions */
+
+/**
+ * @brief Generate CRC from ECMA-182 standard in reflected format, runs
+ * appropriate version.
+ *
+ * This function determines what instruction sets are enabled and
+ * selects the appropriate version at runtime.
+ * @returns 64 bit CRC
+ */
+uint64_t crc64_ecma_refl(
+ uint64_t init_crc, //!< initial CRC value, 64 bits
+ const unsigned char *buf, //!< buffer to calculate CRC on
+ uint64_t len //!< buffer length in bytes (64-bit data)
+ );
+
+/**
+ * @brief Generate CRC from ECMA-182 standard in normal format, runs
+ * appropriate version.
+ *
+ * This function determines what instruction sets are enabled and
+ * selects the appropriate version at runtime.
+ * @returns 64 bit CRC
+ */
+uint64_t crc64_ecma_norm(
+ uint64_t init_crc, //!< initial CRC value, 64 bits
+ const unsigned char *buf, //!< buffer to calculate CRC on
+ uint64_t len //!< buffer length in bytes (64-bit data)
+ );
+
+/**
+ * @brief Generate CRC from ISO standard in reflected format, runs
+ * appropriate version.
+ *
+ * This function determines what instruction sets are enabled and
+ * selects the appropriate version at runtime.
+ * @returns 64 bit CRC
+ */
+uint64_t crc64_iso_refl(
+ uint64_t init_crc, //!< initial CRC value, 64 bits
+ const unsigned char *buf, //!< buffer to calculate CRC on
+ uint64_t len //!< buffer length in bytes (64-bit data)
+ );
+
+/**
+ * @brief Generate CRC from ISO standard in normal format, runs
+ * appropriate version.
+ *
+ * This function determines what instruction sets are enabled and
+ * selects the appropriate version at runtime.
+ * @returns 64 bit CRC
+ */
+uint64_t crc64_iso_norm(
+ uint64_t init_crc, //!< initial CRC value, 64 bits
+ const unsigned char *buf, //!< buffer to calculate CRC on
+ uint64_t len //!< buffer length in bytes (64-bit data)
+ );
+
+/**
+ * @brief Generate CRC from "Jones" coefficients in reflected format, runs
+ * appropriate version.
+ *
+ * This function determines what instruction sets are enabled and
+ * selects the appropriate version at runtime.
+ * @returns 64 bit CRC
+ */
+uint64_t crc64_jones_refl(
+ uint64_t init_crc, //!< initial CRC value, 64 bits
+ const unsigned char *buf, //!< buffer to calculate CRC on
+ uint64_t len //!< buffer length in bytes (64-bit data)
+ );
+
+/**
+ * @brief Generate CRC from "Jones" coefficients in normal format, runs
+ * appropriate version.
+ *
+ * This function determines what instruction sets are enabled and
+ * selects the appropriate version at runtime.
+ * @returns 64 bit CRC
+ */
+uint64_t crc64_jones_norm(
+ uint64_t init_crc, //!< initial CRC value, 64 bits
+ const unsigned char *buf, //!< buffer to calculate CRC on
+ uint64_t len //!< buffer length in bytes (64-bit data)
+ );
+
+/* Arch specific versions */
+
+/**
+ * @brief Generate CRC from ECMA-182 standard in reflected format.
+ * @requires SSE3, CLMUL
+ *
+ * @returns 64 bit CRC
+ */
+
+uint64_t crc64_ecma_refl_by8(
+ uint64_t init_crc, //!< initial CRC value, 64 bits
+ const unsigned char *buf, //!< buffer to calculate CRC on
+ uint64_t len //!< buffer length in bytes (64-bit data)
+ );
+
+/**
+ * @brief Generate CRC from ECMA-182 standard in normal format.
+ * @requires SSE3, CLMUL
+ *
+ * @returns 64 bit CRC
+ */
+
+uint64_t crc64_ecma_norm_by8(
+ uint64_t init_crc, //!< initial CRC value, 64 bits
+ const unsigned char *buf, //!< buffer to calculate CRC on
+ uint64_t len //!< buffer length in bytes (64-bit data)
+ );
+
+/**
+ * @brief Generate CRC from ECMA-182 standard in reflected format, runs baseline version
+ * @returns 64 bit CRC
+ */
+uint64_t crc64_ecma_refl_base(
+ uint64_t init_crc, //!< initial CRC value, 64 bits
+ const unsigned char *buf, //!< buffer to calculate CRC on
+ uint64_t len //!< buffer length in bytes (64-bit data)
+ );
+
+/**
+ * @brief Generate CRC from ECMA-182 standard in normal format, runs baseline version
+ * @returns 64 bit CRC
+ */
+uint64_t crc64_ecma_norm_base(
+ uint64_t init_crc, //!< initial CRC value, 64 bits
+ const unsigned char *buf, //!< buffer to calculate CRC on
+ uint64_t len //!< buffer length in bytes (64-bit data)
+ );
+
+/**
+ * @brief Generate CRC from ISO standard in reflected format.
+ * @requires SSE3, CLMUL
+ *
+ * @returns 64 bit CRC
+ */
+
+uint64_t crc64_iso_refl_by8(
+ uint64_t init_crc, //!< initial CRC value, 64 bits
+ const unsigned char *buf, //!< buffer to calculate CRC on
+ uint64_t len //!< buffer length in bytes (64-bit data)
+ );
+
+/**
+ * @brief Generate CRC from ISO standard in normal format.
+ * @requires SSE3, CLMUL
+ *
+ * @returns 64 bit CRC
+ */
+
+uint64_t crc64_iso_norm_by8(
+ uint64_t init_crc, //!< initial CRC value, 64 bits
+ const unsigned char *buf, //!< buffer to calculate CRC on
+ uint64_t len //!< buffer length in bytes (64-bit data)
+ );
+
+/**
+ * @brief Generate CRC from ISO standard in reflected format, runs baseline version
+ * @returns 64 bit CRC
+ */
+uint64_t crc64_iso_refl_base(
+ uint64_t init_crc, //!< initial CRC value, 64 bits
+ const unsigned char *buf, //!< buffer to calculate CRC on
+ uint64_t len //!< buffer length in bytes (64-bit data)
+ );
+
+/**
+ * @brief Generate CRC from ISO standard in normal format, runs baseline version
+ * @returns 64 bit CRC
+ */
+uint64_t crc64_iso_norm_base(
+ uint64_t init_crc, //!< initial CRC value, 64 bits
+ const unsigned char *buf, //!< buffer to calculate CRC on
+ uint64_t len //!< buffer length in bytes (64-bit data)
+ );
+
+/**
+ * @brief Generate CRC from "Jones" coefficients in reflected format.
+ * @requires SSE3, CLMUL
+ *
+ * @returns 64 bit CRC
+ */
+
+uint64_t crc64_jones_refl_by8(
+ uint64_t init_crc, //!< initial CRC value, 64 bits
+ const unsigned char *buf, //!< buffer to calculate CRC on
+ uint64_t len //!< buffer length in bytes (64-bit data)
+ );
+
+/**
+ * @brief Generate CRC from "Jones" coefficients in normal format.
+ * @requires SSE3, CLMUL
+ *
+ * @returns 64 bit CRC
+ */
+
+uint64_t crc64_jones_norm_by8(
+ uint64_t init_crc, //!< initial CRC value, 64 bits
+ const unsigned char *buf, //!< buffer to calculate CRC on
+ uint64_t len //!< buffer length in bytes (64-bit data)
+ );
+
+/**
+ * @brief Generate CRC from "Jones" coefficients in reflected format, runs baseline version
+ * @returns 64 bit CRC
+ */
+uint64_t crc64_jones_refl_base(
+ uint64_t init_crc, //!< initial CRC value, 64 bits
+ const unsigned char *buf, //!< buffer to calculate CRC on
+ uint64_t len //!< buffer length in bytes (64-bit data)
+ );
+
+/**
+ * @brief Generate CRC from "Jones" coefficients in normal format, runs baseline version
+ * @returns 64 bit CRC
+ */
+uint64_t crc64_jones_norm_base(
+ uint64_t init_crc, //!< initial CRC value, 64 bits
+ const unsigned char *buf, //!< buffer to calculate CRC on
+ uint64_t len //!< buffer length in bytes (64-bit data)
+ );
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // _CRC64_H_
diff --git a/src/isa-l/include/erasure_code.h b/src/isa-l/include/erasure_code.h
new file mode 100644
index 000000000..2f9a257e5
--- /dev/null
+++ b/src/isa-l/include/erasure_code.h
@@ -0,0 +1,947 @@
+/**********************************************************************
+ Copyright(c) 2011-2015 Intel Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+
+#ifndef _ERASURE_CODE_H_
+#define _ERASURE_CODE_H_
+
+/**
+ * @file erasure_code.h
+ * @brief Interface to functions supporting erasure code encode and decode.
+ *
+ * This file defines the interface to optimized functions used in erasure
+ * codes. Encode and decode of erasures in GF(2^8) are made by calculating the
+ * dot product of the symbols (bytes in GF(2^8)) across a set of buffers and a
+ * set of coefficients. Values for the coefficients are determined by the type
+ * of erasure code. Using a general dot product means that any sequence of
+ * coefficients may be used including erasure codes based on random
+ * coefficients.
+ * Multiple versions of dot product are supplied to calculate 1-6 output
+ * vectors in one pass.
+ * Base GF multiply and divide functions can be sped up by defining
+ * GF_LARGE_TABLES at the expense of memory size.
+ *
+ */
+
+#include "gf_vect_mul.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * @brief Initialize tables for fast Erasure Code encode and decode.
+ *
+ * Generates the expanded tables needed for fast encode or decode for erasure
+ * codes on blocks of data. 32bytes is generated for each input coefficient.
+ *
+ * @param k The number of vector sources or rows in the generator matrix
+ * for coding.
+ * @param rows The number of output vectors to concurrently encode/decode.
+ * @param a Pointer to sets of arrays of input coefficients used to encode
+ * or decode data.
+ * @param gftbls Pointer to start of space for concatenated output tables
+ * generated from input coefficients. Must be of size 32*k*rows.
+ * @returns none
+ */
+
+void ec_init_tables(int k, int rows, unsigned char* a, unsigned char* gftbls);
+
+/**
+ * @brief Generate or decode erasure codes on blocks of data, runs appropriate version.
+ *
+ * Given a list of source data blocks, generate one or multiple blocks of
+ * encoded data as specified by a matrix of GF(2^8) coefficients. When given a
+ * suitable set of coefficients, this function will perform the fast generation
+ * or decoding of Reed-Solomon type erasure codes.
+ *
+ * This function determines what instruction sets are enabled and
+ * selects the appropriate version at runtime.
+ *
+ * @param len Length of each block of data (vector) of source or dest data.
+ * @param k The number of vector sources or rows in the generator matrix
+ * for coding.
+ * @param rows The number of output vectors to concurrently encode/decode.
+ * @param gftbls Pointer to array of input tables generated from coding
+ * coefficients in ec_init_tables(). Must be of size 32*k*rows
+ * @param data Array of pointers to source input buffers.
+ * @param coding Array of pointers to coded output buffers.
+ * @returns none
+ */
+
+void ec_encode_data(int len, int k, int rows, unsigned char *gftbls, unsigned char **data,
+ unsigned char **coding);
+
+/**
+ * @brief Generate or decode erasure codes on blocks of data, runs baseline version.
+ *
+ * Baseline version of ec_encode_data() with same parameters.
+ */
+void ec_encode_data_base(int len, int srcs, int dests, unsigned char *v, unsigned char **src,
+ unsigned char **dest);
+
+/**
+ * @brief Generate update for encode or decode of erasure codes from single source, runs appropriate version.
+ *
+ * Given one source data block, update one or multiple blocks of encoded data as
+ * specified by a matrix of GF(2^8) coefficients. When given a suitable set of
+ * coefficients, this function will perform the fast generation or decoding of
+ * Reed-Solomon type erasure codes from one input source at a time.
+ *
+ * This function determines what instruction sets are enabled and selects the
+ * appropriate version at runtime.
+ *
+ * @param len Length of each block of data (vector) of source or dest data.
+ * @param k The number of vector sources or rows in the generator matrix
+ * for coding.
+ * @param rows The number of output vectors to concurrently encode/decode.
+ * @param vec_i The vector index corresponding to the single input source.
+ * @param g_tbls Pointer to array of input tables generated from coding
+ * coefficients in ec_init_tables(). Must be of size 32*k*rows
+ * @param data Pointer to single input source used to update output parity.
+ * @param coding Array of pointers to coded output buffers.
+ * @returns none
+ */
+void ec_encode_data_update(int len, int k, int rows, int vec_i, unsigned char *g_tbls,
+ unsigned char *data, unsigned char **coding);
+
+/**
+ * @brief Generate update for encode or decode of erasure codes from single source.
+ *
+ * Baseline version of ec_encode_data_update().
+ */
+
+void ec_encode_data_update_base(int len, int k, int rows, int vec_i, unsigned char *v,
+ unsigned char *data, unsigned char **dest);
+
+/**
+ * @brief GF(2^8) vector dot product, runs baseline version.
+ *
+ * Does a GF(2^8) dot product across each byte of the input array and a constant
+ * set of coefficients to produce each byte of the output. Can be used for
+ * erasure coding encode and decode. Function requires pre-calculation of a
+ * 32*vlen byte constant array based on the input coefficients.
+ *
+ * @param len Length of each vector in bytes. Must be >= 16.
+ * @param vlen Number of vector sources.
+ * @param gftbls Pointer to 32*vlen byte array of pre-calculated constants based
+ * on the array of input coefficients. Only elements 32*CONST*j + 1
+ * of this array are used, where j = (0, 1, 2...) and CONST is the
+ * number of elements in the array of input coefficients. The
+ * elements used correspond to the original input coefficients.
+ * @param src Array of pointers to source inputs.
+ * @param dest Pointer to destination data array.
+ * @returns none
+ */
+
+
+void gf_vect_dot_prod_base(int len, int vlen, unsigned char *gftbls,
+ unsigned char **src, unsigned char *dest);
+
+/**
+ * @brief GF(2^8) vector dot product, runs appropriate version.
+ *
+ * Does a GF(2^8) dot product across each byte of the input array and a constant
+ * set of coefficients to produce each byte of the output. Can be used for
+ * erasure coding encode and decode. Function requires pre-calculation of a
+ * 32*vlen byte constant array based on the input coefficients.
+ *
+ * This function determines what instruction sets are enabled and
+ * selects the appropriate version at runtime.
+ *
+ * @param len Length of each vector in bytes. Must be >= 32.
+ * @param vlen Number of vector sources.
+ * @param gftbls Pointer to 32*vlen byte array of pre-calculated constants based
+ * on the array of input coefficients.
+ * @param src Array of pointers to source inputs.
+ * @param dest Pointer to destination data array.
+ * @returns none
+ */
+
+void gf_vect_dot_prod(int len, int vlen, unsigned char *gftbls,
+ unsigned char **src, unsigned char *dest);
+
+/**
+ * @brief GF(2^8) vector multiply accumulate, runs appropriate version.
+ *
+ * Does a GF(2^8) multiply across each byte of input source with expanded
+ * constant and add to destination array. Can be used for erasure coding encode
+ * and decode update when only one source is available at a time. Function
+ * requires pre-calculation of a 32*vec byte constant array based on the input
+ * coefficients.
+ *
+ * This function determines what instruction sets are enabled and selects the
+ * appropriate version at runtime.
+ *
+ * @param len Length of each vector in bytes. Must be >= 64.
+ * @param vec The number of vector sources or rows in the generator matrix
+ * for coding.
+ * @param vec_i The vector index corresponding to the single input source.
+ * @param gftbls Pointer to array of input tables generated from coding
+ * coefficients in ec_init_tables(). Must be of size 32*vec.
+ * @param src Array of pointers to source inputs.
+ * @param dest Pointer to destination data array.
+ * @returns none
+ */
+
+void gf_vect_mad(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
+ unsigned char *dest);
+
+/**
+ * @brief GF(2^8) vector multiply accumulate, baseline version.
+ *
+ * Baseline version of gf_vect_mad() with same parameters.
+ */
+
+void gf_vect_mad_base(int len, int vec, int vec_i, unsigned char *v, unsigned char *src,
+ unsigned char *dest);
+
+// x86 only
+#if defined(__i386__) || defined(__x86_64__)
+
+/**
+ * @brief Generate or decode erasure codes on blocks of data.
+ *
+ * Arch specific version of ec_encode_data() with same parameters.
+ * @requires SSE4.1
+ */
+void ec_encode_data_sse(int len, int k, int rows, unsigned char *gftbls, unsigned char **data,
+ unsigned char **coding);
+
+/**
+ * @brief Generate or decode erasure codes on blocks of data.
+ *
+ * Arch specific version of ec_encode_data() with same parameters.
+ * @requires AVX
+ */
+void ec_encode_data_avx(int len, int k, int rows, unsigned char *gftbls, unsigned char **data,
+ unsigned char **coding);
+
+/**
+ * @brief Generate or decode erasure codes on blocks of data.
+ *
+ * Arch specific version of ec_encode_data() with same parameters.
+ * @requires AVX2
+ */
+void ec_encode_data_avx2(int len, int k, int rows, unsigned char *gftbls, unsigned char **data,
+ unsigned char **coding);
+
+/**
+ * @brief Generate update for encode or decode of erasure codes from single source.
+ *
+ * Arch specific version of ec_encode_data_update() with same parameters.
+ * @requires SSE4.1
+ */
+
+void ec_encode_data_update_sse(int len, int k, int rows, int vec_i, unsigned char *g_tbls,
+ unsigned char *data, unsigned char **coding);
+
+/**
+ * @brief Generate update for encode or decode of erasure codes from single source.
+ *
+ * Arch specific version of ec_encode_data_update() with same parameters.
+ * @requires AVX
+ */
+
+void ec_encode_data_update_avx(int len, int k, int rows, int vec_i, unsigned char *g_tbls,
+ unsigned char *data, unsigned char **coding);
+
+/**
+ * @brief Generate update for encode or decode of erasure codes from single source.
+ *
+ * Arch specific version of ec_encode_data_update() with same parameters.
+ * @requires AVX2
+ */
+
+void ec_encode_data_update_avx2(int len, int k, int rows, int vec_i, unsigned char *g_tbls,
+ unsigned char *data, unsigned char **coding);
+
+/**
+ * @brief GF(2^8) vector dot product.
+ *
+ * Does a GF(2^8) dot product across each byte of the input array and a constant
+ * set of coefficients to produce each byte of the output. Can be used for
+ * erasure coding encode and decode. Function requires pre-calculation of a
+ * 32*vlen byte constant array based on the input coefficients.
+ * @requires SSE4.1
+ *
+ * @param len Length of each vector in bytes. Must be >= 16.
+ * @param vlen Number of vector sources.
+ * @param gftbls Pointer to 32*vlen byte array of pre-calculated constants based
+ * on the array of input coefficients.
+ * @param src Array of pointers to source inputs.
+ * @param dest Pointer to destination data array.
+ * @returns none
+ */
+
+void gf_vect_dot_prod_sse(int len, int vlen, unsigned char *gftbls,
+ unsigned char **src, unsigned char *dest);
+
+/**
+ * @brief GF(2^8) vector dot product.
+ *
+ * Does a GF(2^8) dot product across each byte of the input array and a constant
+ * set of coefficients to produce each byte of the output. Can be used for
+ * erasure coding encode and decode. Function requires pre-calculation of a
+ * 32*vlen byte constant array based on the input coefficients.
+ * @requires AVX
+ *
+ * @param len Length of each vector in bytes. Must be >= 16.
+ * @param vlen Number of vector sources.
+ * @param gftbls Pointer to 32*vlen byte array of pre-calculated constants based
+ * on the array of input coefficients.
+ * @param src Array of pointers to source inputs.
+ * @param dest Pointer to destination data array.
+ * @returns none
+ */
+
+void gf_vect_dot_prod_avx(int len, int vlen, unsigned char *gftbls,
+ unsigned char **src, unsigned char *dest);
+
+/**
+ * @brief GF(2^8) vector dot product.
+ *
+ * Does a GF(2^8) dot product across each byte of the input array and a constant
+ * set of coefficients to produce each byte of the output. Can be used for
+ * erasure coding encode and decode. Function requires pre-calculation of a
+ * 32*vlen byte constant array based on the input coefficients.
+ * @requires AVX2
+ *
+ * @param len Length of each vector in bytes. Must be >= 32.
+ * @param vlen Number of vector sources.
+ * @param gftbls Pointer to 32*vlen byte array of pre-calculated constants based
+ * on the array of input coefficients.
+ * @param src Array of pointers to source inputs.
+ * @param dest Pointer to destination data array.
+ * @returns none
+ */
+
+void gf_vect_dot_prod_avx2(int len, int vlen, unsigned char *gftbls,
+ unsigned char **src, unsigned char *dest);
+
+/**
+ * @brief GF(2^8) vector dot product with two outputs.
+ *
+ * Vector dot product optimized to calculate two outputs at a time. Does two
+ * GF(2^8) dot products across each byte of the input array and two constant
+ * sets of coefficients to produce each byte of the outputs. Can be used for
+ * erasure coding encode and decode. Function requires pre-calculation of a
+ * 2*32*vlen byte constant array based on the two sets of input coefficients.
+ * @requires SSE4.1
+ *
+ * @param len Length of each vector in bytes. Must be >= 16.
+ * @param vlen Number of vector sources.
+ * @param gftbls Pointer to 2*32*vlen byte array of pre-calculated constants
+ * based on the array of input coefficients.
+ * @param src Array of pointers to source inputs.
+ * @param dest Array of pointers to destination data buffers.
+ * @returns none
+ */
+
+void gf_2vect_dot_prod_sse(int len, int vlen, unsigned char *gftbls,
+ unsigned char **src, unsigned char **dest);
+
+/**
+ * @brief GF(2^8) vector dot product with two outputs.
+ *
+ * Vector dot product optimized to calculate two outputs at a time. Does two
+ * GF(2^8) dot products across each byte of the input array and two constant
+ * sets of coefficients to produce each byte of the outputs. Can be used for
+ * erasure coding encode and decode. Function requires pre-calculation of a
+ * 2*32*vlen byte constant array based on the two sets of input coefficients.
+ * @requires AVX
+ *
+ * @param len Length of each vector in bytes. Must be >= 16.
+ * @param vlen Number of vector sources.
+ * @param gftbls Pointer to 2*32*vlen byte array of pre-calculated constants
+ * based on the array of input coefficients.
+ * @param src Array of pointers to source inputs.
+ * @param dest Array of pointers to destination data buffers.
+ * @returns none
+ */
+
+void gf_2vect_dot_prod_avx(int len, int vlen, unsigned char *gftbls,
+ unsigned char **src, unsigned char **dest);
+
+/**
+ * @brief GF(2^8) vector dot product with two outputs.
+ *
+ * Vector dot product optimized to calculate two outputs at a time. Does two
+ * GF(2^8) dot products across each byte of the input array and two constant
+ * sets of coefficients to produce each byte of the outputs. Can be used for
+ * erasure coding encode and decode. Function requires pre-calculation of a
+ * 2*32*vlen byte constant array based on the two sets of input coefficients.
+ * @requires AVX2
+ *
+ * @param len Length of each vector in bytes. Must be >= 32.
+ * @param vlen Number of vector sources.
+ * @param gftbls Pointer to 2*32*vlen byte array of pre-calculated constants
+ * based on the array of input coefficients.
+ * @param src Array of pointers to source inputs.
+ * @param dest Array of pointers to destination data buffers.
+ * @returns none
+ */
+
+void gf_2vect_dot_prod_avx2(int len, int vlen, unsigned char *gftbls,
+ unsigned char **src, unsigned char **dest);
+
+/**
+ * @brief GF(2^8) vector dot product with three outputs.
+ *
+ * Vector dot product optimized to calculate three outputs at a time. Does three
+ * GF(2^8) dot products across each byte of the input array and three constant
+ * sets of coefficients to produce each byte of the outputs. Can be used for
+ * erasure coding encode and decode. Function requires pre-calculation of a
+ * 3*32*vlen byte constant array based on the three sets of input coefficients.
+ * @requires SSE4.1
+ *
+ * @param len Length of each vector in bytes. Must be >= 16.
+ * @param vlen Number of vector sources.
+ * @param gftbls Pointer to 3*32*vlen byte array of pre-calculated constants
+ * based on the array of input coefficients.
+ * @param src Array of pointers to source inputs.
+ * @param dest Array of pointers to destination data buffers.
+ * @returns none
+ */
+
+void gf_3vect_dot_prod_sse(int len, int vlen, unsigned char *gftbls,
+ unsigned char **src, unsigned char **dest);
+
+/**
+ * @brief GF(2^8) vector dot product with three outputs.
+ *
+ * Vector dot product optimized to calculate three outputs at a time. Does three
+ * GF(2^8) dot products across each byte of the input array and three constant
+ * sets of coefficients to produce each byte of the outputs. Can be used for
+ * erasure coding encode and decode. Function requires pre-calculation of a
+ * 3*32*vlen byte constant array based on the three sets of input coefficients.
+ * @requires AVX
+ *
+ * @param len Length of each vector in bytes. Must be >= 16.
+ * @param vlen Number of vector sources.
+ * @param gftbls Pointer to 3*32*vlen byte array of pre-calculated constants
+ * based on the array of input coefficients.
+ * @param src Array of pointers to source inputs.
+ * @param dest Array of pointers to destination data buffers.
+ * @returns none
+ */
+
+void gf_3vect_dot_prod_avx(int len, int vlen, unsigned char *gftbls,
+ unsigned char **src, unsigned char **dest);
+
+/**
+ * @brief GF(2^8) vector dot product with three outputs.
+ *
+ * Vector dot product optimized to calculate three outputs at a time. Does three
+ * GF(2^8) dot products across each byte of the input array and three constant
+ * sets of coefficients to produce each byte of the outputs. Can be used for
+ * erasure coding encode and decode. Function requires pre-calculation of a
+ * 3*32*vlen byte constant array based on the three sets of input coefficients.
+ * @requires AVX2
+ *
+ * @param len Length of each vector in bytes. Must be >= 32.
+ * @param vlen Number of vector sources.
+ * @param gftbls Pointer to 3*32*vlen byte array of pre-calculated constants
+ * based on the array of input coefficients.
+ * @param src Array of pointers to source inputs.
+ * @param dest Array of pointers to destination data buffers.
+ * @returns none
+ */
+
+void gf_3vect_dot_prod_avx2(int len, int vlen, unsigned char *gftbls,
+ unsigned char **src, unsigned char **dest);
+
+/**
+ * @brief GF(2^8) vector dot product with four outputs.
+ *
+ * Vector dot product optimized to calculate four outputs at a time. Does four
+ * GF(2^8) dot products across each byte of the input array and four constant
+ * sets of coefficients to produce each byte of the outputs. Can be used for
+ * erasure coding encode and decode. Function requires pre-calculation of a
+ * 4*32*vlen byte constant array based on the four sets of input coefficients.
+ * @requires SSE4.1
+ *
+ * @param len Length of each vector in bytes. Must be >= 16.
+ * @param vlen Number of vector sources.
+ * @param gftbls Pointer to 4*32*vlen byte array of pre-calculated constants
+ * based on the array of input coefficients.
+ * @param src Array of pointers to source inputs.
+ * @param dest Array of pointers to destination data buffers.
+ * @returns none
+ */
+
+void gf_4vect_dot_prod_sse(int len, int vlen, unsigned char *gftbls,
+ unsigned char **src, unsigned char **dest);
+
+/**
+ * @brief GF(2^8) vector dot product with four outputs.
+ *
+ * Vector dot product optimized to calculate four outputs at a time. Does four
+ * GF(2^8) dot products across each byte of the input array and four constant
+ * sets of coefficients to produce each byte of the outputs. Can be used for
+ * erasure coding encode and decode. Function requires pre-calculation of a
+ * 4*32*vlen byte constant array based on the four sets of input coefficients.
+ * @requires AVX
+ *
+ * @param len Length of each vector in bytes. Must be >= 16.
+ * @param vlen Number of vector sources.
+ * @param gftbls Pointer to 4*32*vlen byte array of pre-calculated constants
+ * based on the array of input coefficients.
+ * @param src Array of pointers to source inputs.
+ * @param dest Array of pointers to destination data buffers.
+ * @returns none
+ */
+
+void gf_4vect_dot_prod_avx(int len, int vlen, unsigned char *gftbls,
+ unsigned char **src, unsigned char **dest);
+
+/**
+ * @brief GF(2^8) vector dot product with four outputs.
+ *
+ * Vector dot product optimized to calculate four outputs at a time. Does four
+ * GF(2^8) dot products across each byte of the input array and four constant
+ * sets of coefficients to produce each byte of the outputs. Can be used for
+ * erasure coding encode and decode. Function requires pre-calculation of a
+ * 4*32*vlen byte constant array based on the four sets of input coefficients.
+ * @requires AVX2
+ *
+ * @param len Length of each vector in bytes. Must be >= 32.
+ * @param vlen Number of vector sources.
+ * @param gftbls Pointer to 4*32*vlen byte array of pre-calculated constants
+ * based on the array of input coefficients.
+ * @param src Array of pointers to source inputs.
+ * @param dest Array of pointers to destination data buffers.
+ * @returns none
+ */
+
+void gf_4vect_dot_prod_avx2(int len, int vlen, unsigned char *gftbls,
+ unsigned char **src, unsigned char **dest);
+
+/**
+ * @brief GF(2^8) vector dot product with five outputs.
+ *
+ * Vector dot product optimized to calculate five outputs at a time. Does five
+ * GF(2^8) dot products across each byte of the input array and five constant
+ * sets of coefficients to produce each byte of the outputs. Can be used for
+ * erasure coding encode and decode. Function requires pre-calculation of a
+ * 5*32*vlen byte constant array based on the five sets of input coefficients.
+ * @requires SSE4.1
+ *
+ * @param len Length of each vector in bytes. Must >= 16.
+ * @param vlen Number of vector sources.
+ * @param gftbls Pointer to 5*32*vlen byte array of pre-calculated constants
+ * based on the array of input coefficients.
+ * @param src Array of pointers to source inputs.
+ * @param dest Array of pointers to destination data buffers.
+ * @returns none
+ */
+
+void gf_5vect_dot_prod_sse(int len, int vlen, unsigned char *gftbls,
+ unsigned char **src, unsigned char **dest);
+
+/**
+ * @brief GF(2^8) vector dot product with five outputs.
+ *
+ * Vector dot product optimized to calculate five outputs at a time. Does five
+ * GF(2^8) dot products across each byte of the input array and five constant
+ * sets of coefficients to produce each byte of the outputs. Can be used for
+ * erasure coding encode and decode. Function requires pre-calculation of a
+ * 5*32*vlen byte constant array based on the five sets of input coefficients.
+ * @requires AVX
+ *
+ * @param len Length of each vector in bytes. Must >= 16.
+ * @param vlen Number of vector sources.
+ * @param gftbls Pointer to 5*32*vlen byte array of pre-calculated constants
+ * based on the array of input coefficients.
+ * @param src Array of pointers to source inputs.
+ * @param dest Array of pointers to destination data buffers.
+ * @returns none
+ */
+
+void gf_5vect_dot_prod_avx(int len, int vlen, unsigned char *gftbls,
+ unsigned char **src, unsigned char **dest);
+
+/**
+ * @brief GF(2^8) vector dot product with five outputs.
+ *
+ * Vector dot product optimized to calculate five outputs at a time. Does five
+ * GF(2^8) dot products across each byte of the input array and five constant
+ * sets of coefficients to produce each byte of the outputs. Can be used for
+ * erasure coding encode and decode. Function requires pre-calculation of a
+ * 5*32*vlen byte constant array based on the five sets of input coefficients.
+ * @requires AVX2
+ *
+ * @param len Length of each vector in bytes. Must >= 32.
+ * @param vlen Number of vector sources.
+ * @param gftbls Pointer to 5*32*vlen byte array of pre-calculated constants
+ * based on the array of input coefficients.
+ * @param src Array of pointers to source inputs.
+ * @param dest Array of pointers to destination data buffers.
+ * @returns none
+ */
+
+void gf_5vect_dot_prod_avx2(int len, int vlen, unsigned char *gftbls,
+ unsigned char **src, unsigned char **dest);
+
+/**
+ * @brief GF(2^8) vector dot product with six outputs.
+ *
+ * Vector dot product optimized to calculate six outputs at a time. Does six
+ * GF(2^8) dot products across each byte of the input array and six constant
+ * sets of coefficients to produce each byte of the outputs. Can be used for
+ * erasure coding encode and decode. Function requires pre-calculation of a
+ * 6*32*vlen byte constant array based on the six sets of input coefficients.
+ * @requires SSE4.1
+ *
+ * @param len Length of each vector in bytes. Must be >= 16.
+ * @param vlen Number of vector sources.
+ * @param gftbls Pointer to 6*32*vlen byte array of pre-calculated constants
+ * based on the array of input coefficients.
+ * @param src Array of pointers to source inputs.
+ * @param dest Array of pointers to destination data buffers.
+ * @returns none
+ */
+
+void gf_6vect_dot_prod_sse(int len, int vlen, unsigned char *gftbls,
+ unsigned char **src, unsigned char **dest);
+
+/**
+ * @brief GF(2^8) vector dot product with six outputs.
+ *
+ * Vector dot product optimized to calculate six outputs at a time. Does six
+ * GF(2^8) dot products across each byte of the input array and six constant
+ * sets of coefficients to produce each byte of the outputs. Can be used for
+ * erasure coding encode and decode. Function requires pre-calculation of a
+ * 6*32*vlen byte constant array based on the six sets of input coefficients.
+ * @requires AVX
+ *
+ * @param len Length of each vector in bytes. Must be >= 16.
+ * @param vlen Number of vector sources.
+ * @param gftbls Pointer to 6*32*vlen byte array of pre-calculated constants
+ * based on the array of input coefficients.
+ * @param src Array of pointers to source inputs.
+ * @param dest Array of pointers to destination data buffers.
+ * @returns none
+ */
+
+void gf_6vect_dot_prod_avx(int len, int vlen, unsigned char *gftbls,
+ unsigned char **src, unsigned char **dest);
+
+/**
+ * @brief GF(2^8) vector dot product with six outputs.
+ *
+ * Vector dot product optimized to calculate six outputs at a time. Does six
+ * GF(2^8) dot products across each byte of the input array and six constant
+ * sets of coefficients to produce each byte of the outputs. Can be used for
+ * erasure coding encode and decode. Function requires pre-calculation of a
+ * 6*32*vlen byte constant array based on the six sets of input coefficients.
+ * @requires AVX2
+ *
+ * @param len Length of each vector in bytes. Must be >= 32.
+ * @param vlen Number of vector sources.
+ * @param gftbls Pointer to 6*32*vlen byte array of pre-calculated constants
+ * based on the array of input coefficients.
+ * @param src Array of pointers to source inputs.
+ * @param dest Array of pointers to destination data buffers.
+ * @returns none
+ */
+
+void gf_6vect_dot_prod_avx2(int len, int vlen, unsigned char *gftbls,
+ unsigned char **src, unsigned char **dest);
+
+/**
+ * @brief GF(2^8) vector multiply accumulate, arch specific version.
+ *
+ * Arch specific version of gf_vect_mad() with same parameters.
+ * @requires SSE4.1
+ */
+
+void gf_vect_mad_sse(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
+ unsigned char *dest);
+/**
+ * @brief GF(2^8) vector multiply accumulate, arch specific version.
+ *
+ * Arch specific version of gf_vect_mad() with same parameters.
+ * @requires AVX
+ */
+
+void gf_vect_mad_avx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
+ unsigned char *dest);
+
+/**
+ * @brief GF(2^8) vector multiply accumulate, arch specific version.
+ *
+ * Arch specific version of gf_vect_mad() with same parameters.
+ * @requires AVX2
+ */
+
+void gf_vect_mad_avx2(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
+ unsigned char *dest);
+
+
+/**
+ * @brief GF(2^8) vector multiply with 2 accumulate. SSE version.
+ *
+ * Does a GF(2^8) multiply across each byte of input source with expanded
+ * constants and add to destination arrays. Can be used for erasure coding
+ * encode and decode update when only one source is available at a
+ * time. Function requires pre-calculation of a 32*vec byte constant array based
+ * on the input coefficients.
+ * @requires SSE4.1
+ *
+ * @param len Length of each vector in bytes. Must be >= 32.
+ * @param vec The number of vector sources or rows in the generator matrix
+ * for coding.
+ * @param vec_i The vector index corresponding to the single input source.
+ * @param gftbls Pointer to array of input tables generated from coding
+ * coefficients in ec_init_tables(). Must be of size 32*vec.
+ * @param src Pointer to source input array.
+ * @param dest Array of pointers to destination input/outputs.
+ * @returns none
+ */
+
+void gf_2vect_mad_sse(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
+ unsigned char **dest);
+
+/**
+ * @brief GF(2^8) vector multiply with 2 accumulate. AVX version of gf_2vect_mad_sse().
+ * @requires AVX
+ */
+void gf_2vect_mad_avx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
+ unsigned char **dest);
+/**
+ * @brief GF(2^8) vector multiply with 2 accumulate. AVX2 version of gf_2vect_mad_sse().
+ * @requires AVX2
+ */
+void gf_2vect_mad_avx2(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
+ unsigned char **dest);
+
+/**
+ * @brief GF(2^8) vector multiply with 3 accumulate. SSE version.
+ *
+ * Does a GF(2^8) multiply across each byte of input source with expanded
+ * constants and add to destination arrays. Can be used for erasure coding
+ * encode and decode update when only one source is available at a
+ * time. Function requires pre-calculation of a 32*vec byte constant array based
+ * on the input coefficients.
+ * @requires SSE4.1
+ *
+ * @param len Length of each vector in bytes. Must be >= 32.
+ * @param vec The number of vector sources or rows in the generator matrix
+ * for coding.
+ * @param vec_i The vector index corresponding to the single input source.
+ * @param gftbls Pointer to array of input tables generated from coding
+ * coefficients in ec_init_tables(). Must be of size 32*vec.
+ * @param src Pointer to source input array.
+ * @param dest Array of pointers to destination input/outputs.
+ * @returns none
+ */
+
+void gf_3vect_mad_sse(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
+ unsigned char **dest);
+
+/**
+ * @brief GF(2^8) vector multiply with 3 accumulate. AVX version of gf_3vect_mad_sse().
+ * @requires AVX
+ */
+void gf_3vect_mad_avx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
+ unsigned char **dest);
+
+/**
+ * @brief GF(2^8) vector multiply with 3 accumulate. AVX2 version of gf_3vect_mad_sse().
+ * @requires AVX2
+ */
+void gf_3vect_mad_avx2(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
+ unsigned char **dest);
+
+/**
+ * @brief GF(2^8) vector multiply with 4 accumulate. SSE version.
+ *
+ * Does a GF(2^8) multiply across each byte of input source with expanded
+ * constants and add to destination arrays. Can be used for erasure coding
+ * encode and decode update when only one source is available at a
+ * time. Function requires pre-calculation of a 32*vec byte constant array based
+ * on the input coefficients.
+ * @requires SSE4.1
+ *
+ * @param len Length of each vector in bytes. Must be >= 32.
+ * @param vec The number of vector sources or rows in the generator matrix
+ * for coding.
+ * @param vec_i The vector index corresponding to the single input source.
+ * @param gftbls Pointer to array of input tables generated from coding
+ * coefficients in ec_init_tables(). Must be of size 32*vec.
+ * @param src Pointer to source input array.
+ * @param dest Array of pointers to destination input/outputs.
+ * @returns none
+ */
+
+void gf_4vect_mad_sse(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
+ unsigned char **dest);
+
+/**
+ * @brief GF(2^8) vector multiply with 4 accumulate. AVX version of gf_4vect_mad_sse().
+ * @requires AVX
+ */
+void gf_4vect_mad_avx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
+ unsigned char **dest);
+/**
+ * @brief GF(2^8) vector multiply with 4 accumulate. AVX2 version of gf_4vect_mad_sse().
+ * @requires AVX2
+ */
+void gf_4vect_mad_avx2(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
+ unsigned char **dest);
+
+/**
+ * @brief GF(2^8) vector multiply with 5 accumulate. SSE version.
+ * @requires SSE4.1
+ */
+void gf_5vect_mad_sse(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
+ unsigned char **dest);
+
+/**
+ * @brief GF(2^8) vector multiply with 5 accumulate. AVX version.
+ * @requires AVX
+ */
+void gf_5vect_mad_avx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
+ unsigned char **dest);
+/**
+ * @brief GF(2^8) vector multiply with 5 accumulate. AVX2 version.
+ * @requires AVX2
+ */
+void gf_5vect_mad_avx2(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
+ unsigned char **dest);
+
+/**
+ * @brief GF(2^8) vector multiply with 6 accumulate. SSE version.
+ * @requires SSE4.1
+ */
+void gf_6vect_mad_sse(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
+ unsigned char **dest);
+/**
+ * @brief GF(2^8) vector multiply with 6 accumulate. AVX version.
+ * @requires AVX
+ */
+void gf_6vect_mad_avx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
+ unsigned char **dest);
+
+/**
+ * @brief GF(2^8) vector multiply with 6 accumulate. AVX2 version.
+ * @requires AVX2
+ */
+void gf_6vect_mad_avx2(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
+ unsigned char **dest);
+
+#endif
+
+/**********************************************************************
+ * The remaining are lib support functions used in GF(2^8) operations.
+ */
+
+/**
+ * @brief Single element GF(2^8) multiply.
+ *
+ * @param a Multiplicand a
+ * @param b Multiplicand b
+ * @returns Product of a and b in GF(2^8)
+ */
+
+unsigned char gf_mul(unsigned char a, unsigned char b);
+
+/**
+ * @brief Single element GF(2^8) inverse.
+ *
+ * @param a Input element
+ * @returns Field element b such that a x b = {1}
+ */
+
+unsigned char gf_inv(unsigned char a);
+
+/**
+ * @brief Generate a matrix of coefficients to be used for encoding.
+ *
+ * Vandermonde matrix example of encoding coefficients where high portion of
+ * matrix is identity matrix I and lower portion is constructed as 2^{i*(j-k+1)}
+ * i:{0,k-1} j:{k,m-1}. Commonly used method for choosing coefficients in
+ * erasure encoding but does not guarantee invertable for every sub matrix. For
+ * large pairs of m and k it is possible to find cases where the decode matrix
+ * chosen from sources and parity is not invertable. Users may want to adjust
+ * for certain pairs m and k. If m and k satisfy one of the following
+ * inequalities, no adjustment is required:
+ *
+ * - k <= 3
+ * - k = 4, m <= 25
+ * - k = 5, m <= 10
+ * - k <= 21, m-k = 4
+ * - m - k <= 3.
+ *
+ * @param a [m x k] array to hold coefficients
+ * @param m number of rows in matrix corresponding to srcs + parity.
+ * @param k number of columns in matrix corresponding to srcs.
+ * @returns none
+ */
+
+void gf_gen_rs_matrix(unsigned char *a, int m, int k);
+
+/**
+ * @brief Generate a Cauchy matrix of coefficients to be used for encoding.
+ *
+ * Cauchy matrix example of encoding coefficients where high portion of matrix
+ * is identity matrix I and lower portion is constructed as 1/(i + j) | i != j,
+ * i:{0,k-1} j:{k,m-1}. Any sub-matrix of a Cauchy matrix should be invertable.
+ *
+ * @param a [m x k] array to hold coefficients
+ * @param m number of rows in matrix corresponding to srcs + parity.
+ * @param k number of columns in matrix corresponding to srcs.
+ * @returns none
+ */
+
+void gf_gen_cauchy1_matrix(unsigned char *a, int m, int k);
+
+/**
+ * @brief Invert a matrix in GF(2^8)
+ *
+ * Attempts to construct an n x n inverse of the input matrix. Returns non-zero
+ * if singular. Will always destroy input matrix in process.
+ *
+ * @param in input matrix, destroyed by invert process
+ * @param out output matrix such that [in] x [out] = [I] - identity matrix
+ * @param n size of matrix [nxn]
+ * @returns 0 successful, other fail on singular input matrix
+ */
+
+int gf_invert_matrix(unsigned char *in, unsigned char *out, const int n);
+
+
+/*************************************************************/
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif //_ERASURE_CODE_H_
diff --git a/src/isa-l/include/gf_vect_mul.h b/src/isa-l/include/gf_vect_mul.h
new file mode 100644
index 000000000..70a0ab2ed
--- /dev/null
+++ b/src/isa-l/include/gf_vect_mul.h
@@ -0,0 +1,152 @@
+/**********************************************************************
+ Copyright(c) 2011-2015 Intel Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+
+#ifndef _GF_VECT_MUL_H
+#define _GF_VECT_MUL_H
+
+/**
+ * @file gf_vect_mul.h
+ * @brief Interface to functions for vector (block) multiplication in GF(2^8).
+ *
+ * This file defines the interface to routines used in fast RAID rebuild and
+ * erasure codes.
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// x86 only
+#if defined(__i386__) || defined(__x86_64__)
+
+ /**
+ * @brief GF(2^8) vector multiply by constant.
+ *
+ * Does a GF(2^8) vector multiply b = Ca where a and b are arrays and C
+ * is a single field element in GF(2^8). Can be used for RAID6 rebuild
+ * and partial write functions. Function requires pre-calculation of a
+ * 32-element constant array based on constant C. gftbl(C) = {C{00},
+ * C{01}, C{02}, ... , C{0f} }, {C{00}, C{10}, C{20}, ... , C{f0} }. Len
+ * and src must be aligned to 32B.
+ * @requires SSE4.1
+ *
+ * @param len Length of vector in bytes. Must be aligned to 32B.
+ * @param gftbl Pointer to 32-byte array of pre-calculated constants based on C.
+ * @param src Pointer to src data array. Must be aligned to 32B.
+ * @param dest Pointer to destination data array. Must be aligned to 32B.
+ * @returns 0 pass, other fail
+ */
+
+int gf_vect_mul_sse(int len, unsigned char *gftbl, void *src, void *dest);
+
+
+ /**
+ * @brief GF(2^8) vector multiply by constant.
+ *
+ * Does a GF(2^8) vector multiply b = Ca where a and b are arrays and C
+ * is a single field element in GF(2^8). Can be used for RAID6 rebuild
+ * and partial write functions. Function requires pre-calculation of a
+ * 32-element constant array based on constant C. gftbl(C) = {C{00},
+ * C{01}, C{02}, ... , C{0f} }, {C{00}, C{10}, C{20}, ... , C{f0} }. Len
+ * and src must be aligned to 32B.
+ * @requires AVX
+ *
+ * @param len Length of vector in bytes. Must be aligned to 32B.
+ * @param gftbl Pointer to 32-byte array of pre-calculated constants based on C.
+ * @param src Pointer to src data array. Must be aligned to 32B.
+ * @param dest Pointer to destination data array. Must be aligned to 32B.
+ * @returns 0 pass, other fail
+ */
+
+int gf_vect_mul_avx(int len, unsigned char *gftbl, void *src, void *dest);
+
+#endif
+
+/**
+ * @brief GF(2^8) vector multiply by constant, runs appropriate version.
+ *
+ * Does a GF(2^8) vector multiply b = Ca where a and b are arrays and C
+ * is a single field element in GF(2^8). Can be used for RAID6 rebuild
+ * and partial write functions. Function requires pre-calculation of a
+ * 32-element constant array based on constant C. gftbl(C) = {C{00},
+ * C{01}, C{02}, ... , C{0f} }, {C{00}, C{10}, C{20}, ... , C{f0} }.
+ * Len and src must be aligned to 32B.
+ *
+ * This function determines what instruction sets are enabled
+ * and selects the appropriate version at runtime.
+ *
+ * @param len Length of vector in bytes. Must be aligned to 32B.
+ * @param gftbl Pointer to 32-byte array of pre-calculated constants based on C.
+ * @param src Pointer to src data array. Must be aligned to 32B.
+ * @param dest Pointer to destination data array. Must be aligned to 32B.
+ * @returns 0 pass, other fail
+ */
+
+int gf_vect_mul(int len, unsigned char *gftbl, void *src, void *dest);
+
+
+/**
+ * @brief Initialize 32-byte constant array for GF(2^8) vector multiply
+ *
+ * Calculates array {C{00}, C{01}, C{02}, ... , C{0f} }, {C{00}, C{10},
+ * C{20}, ... , C{f0} } as required by other fast vector multiply
+ * functions.
+ * @param c Constant input.
+ * @param gftbl Table output.
+ */
+
+void gf_vect_mul_init(unsigned char c, unsigned char* gftbl);
+
+
+/**
+ * @brief GF(2^8) vector multiply by constant, runs baseline version.
+ *
+ * Does a GF(2^8) vector multiply b = Ca where a and b are arrays and C
+ * is a single field element in GF(2^8). Can be used for RAID6 rebuild
+ * and partial write functions. Function requires pre-calculation of a
+ * 32-element constant array based on constant C. gftbl(C) = {C{00},
+ * C{01}, C{02}, ... , C{0f} }, {C{00}, C{10}, C{20}, ... , C{f0} }. Len
+ * and src must be aligned to 32B.
+ *
+ * @param len Length of vector in bytes. Must be aligned to 32B.
+ * @param a Pointer to 32-byte array of pre-calculated constants based on C.
+ * only use 2nd element is used.
+ * @param src Pointer to src data array. Must be aligned to 32B.
+ * @param dest Pointer to destination data array. Must be aligned to 32B.
+ */
+
+void gf_vect_mul_base(int len, unsigned char *a, unsigned char *src,
+ unsigned char *dest);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif //_GF_VECT_MUL_H
diff --git a/src/isa-l/include/igzip_lib.h b/src/isa-l/include/igzip_lib.h
new file mode 100644
index 000000000..57333748b
--- /dev/null
+++ b/src/isa-l/include/igzip_lib.h
@@ -0,0 +1,990 @@
+/**********************************************************************
+ Copyright(c) 2011-2016 Intel Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+#ifndef _IGZIP_H
+#define _IGZIP_H
+
+/**
+ * @file igzip_lib.h
+ *
+ * @brief This file defines the igzip compression and decompression interface, a
+ * high performance deflate compression interface for storage applications.
+ *
+ * Deflate is a widely used compression standard that can be used standalone, it
+ * also forms the basis of gzip and zlib compression formats. Igzip supports the
+ * following flush features:
+ *
+ * - No Flush: The default method where no special flush is performed.
+ *
+ * - Sync flush: whereby isal_deflate() finishes the current deflate block at
+ * the end of each input buffer. The deflate block is byte aligned by
+ * appending an empty stored block.
+ *
+ * - Full flush: whereby isal_deflate() finishes and aligns the deflate block as
+ * in sync flush but also ensures that subsequent block's history does not
+ * look back beyond this point and new blocks are fully independent.
+ *
+ * Igzip also supports compression levels from ISAL_DEF_MIN_LEVEL to
+ * ISAL_DEF_MAX_LEVEL.
+ *
+ * Igzip contains some behavior configurable at compile time. These
+ * configurable options are:
+ *
+ * - IGZIP_HIST_SIZE - Defines the window size. The default value is 32K (note K
+ * represents 1024), but 8K is also supported. Powers of 2 which are at most
+ * 32K may also work.
+ *
+ * - LONGER_HUFFTABLES - Defines whether to use a larger hufftables structure
+ * which may increase performance with smaller IGZIP_HIST_SIZE values. By
+ * default this option is not defined. This define sets IGZIP_HIST_SIZE to be
+ * 8 if IGZIP_HIST_SIZE > 8K.
+ *
+ * As an example, to compile gzip with an 8K window size, in a terminal run
+ * @verbatim gmake D="-D IGZIP_HIST_SIZE=8*1024" @endverbatim on Linux and
+ * FreeBSD, or with @verbatim nmake -f Makefile.nmake D="-D
+ * IGZIP_HIST_SIZE=8*1024" @endverbatim on Windows.
+ *
+ */
+#include <stdint.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/******************************************************************************/
+/* Deflate Compression Standard Defines */
+/******************************************************************************/
+#define IGZIP_K 1024
+#define ISAL_DEF_MAX_HDR_SIZE 328
+#define ISAL_DEF_MAX_CODE_LEN 15
+#define ISAL_DEF_HIST_SIZE (32*IGZIP_K)
+#define ISAL_DEF_MAX_HIST_BITS 15
+#define ISAL_DEF_MAX_MATCH 258
+#define ISAL_DEF_MIN_MATCH 3
+
+#define ISAL_DEF_LIT_SYMBOLS 257
+#define ISAL_DEF_LEN_SYMBOLS 29
+#define ISAL_DEF_DIST_SYMBOLS 30
+#define ISAL_DEF_LIT_LEN_SYMBOLS (ISAL_DEF_LIT_SYMBOLS + ISAL_DEF_LEN_SYMBOLS)
+
+/* Max repeat length, rounded up to 32 byte boundary */
+#define ISAL_LOOK_AHEAD ((ISAL_DEF_MAX_MATCH + 31) & ~31)
+
+/******************************************************************************/
+/* Deflate Implementation Specific Defines */
+/******************************************************************************/
+/* Note IGZIP_HIST_SIZE must be a power of two */
+#ifndef IGZIP_HIST_SIZE
+#define IGZIP_HIST_SIZE ISAL_DEF_HIST_SIZE
+#endif
+
+#if (IGZIP_HIST_SIZE > ISAL_DEF_HIST_SIZE)
+#undef IGZIP_HIST_SIZE
+#define IGZIP_HIST_SIZE ISAL_DEF_HIST_SIZE
+#endif
+
+#ifdef LONGER_HUFFTABLE
+#if (IGZIP_HIST_SIZE > 8 * IGZIP_K)
+#undef IGZIP_HIST_SIZE
+#define IGZIP_HIST_SIZE (8 * IGZIP_K)
+#endif
+#endif
+
+#define ISAL_LIMIT_HASH_UPDATE
+
+#define IGZIP_HASH8K_HASH_SIZE (8 * IGZIP_K)
+#define IGZIP_HASH_HIST_SIZE IGZIP_HIST_SIZE
+#define IGZIP_HASH_MAP_HASH_SIZE IGZIP_HIST_SIZE
+
+#define IGZIP_LVL0_HASH_SIZE (8 * IGZIP_K)
+#define IGZIP_LVL1_HASH_SIZE IGZIP_HASH8K_HASH_SIZE
+#define IGZIP_LVL2_HASH_SIZE IGZIP_HASH_HIST_SIZE
+#define IGZIP_LVL3_HASH_SIZE IGZIP_HASH_MAP_HASH_SIZE
+
+#ifdef LONGER_HUFFTABLE
+enum {IGZIP_DIST_TABLE_SIZE = 8*1024};
+
+/* DECODE_OFFSET is dist code index corresponding to DIST_TABLE_SIZE + 1 */
+enum { IGZIP_DECODE_OFFSET = 26 };
+#else
+enum {IGZIP_DIST_TABLE_SIZE = 2};
+/* DECODE_OFFSET is dist code index corresponding to DIST_TABLE_SIZE + 1 */
+enum { IGZIP_DECODE_OFFSET = 0 };
+#endif
+enum {IGZIP_LEN_TABLE_SIZE = 256};
+enum {IGZIP_LIT_TABLE_SIZE = ISAL_DEF_LIT_SYMBOLS};
+
+#define IGZIP_HUFFTABLE_CUSTOM 0
+#define IGZIP_HUFFTABLE_DEFAULT 1
+#define IGZIP_HUFFTABLE_STATIC 2
+
+/* Flush Flags */
+#define NO_FLUSH 0 /* Default */
+#define SYNC_FLUSH 1
+#define FULL_FLUSH 2
+#define FINISH_FLUSH 0 /* Deprecated */
+
+/* Gzip Flags */
+#define IGZIP_DEFLATE 0 /* Default */
+#define IGZIP_GZIP 1
+#define IGZIP_GZIP_NO_HDR 2
+#define IGZIP_ZLIB 3
+#define IGZIP_ZLIB_NO_HDR 4
+
+/* Compression Return values */
+#define COMP_OK 0
+#define INVALID_FLUSH -7
+#define INVALID_PARAM -8
+#define STATELESS_OVERFLOW -1
+#define ISAL_INVALID_OPERATION -9
+#define ISAL_INVALID_STATE -3
+#define ISAL_INVALID_LEVEL -4 /* Invalid Compression level set */
+#define ISAL_INVALID_LEVEL_BUF -5 /* Invalid buffer specified for the compression level */
+
+/**
+ * @enum isal_zstate_state
+ * @brief Compression State please note ZSTATE_TRL only applies for GZIP compression
+ */
+
+
+/* When the state is set to ZSTATE_NEW_HDR or TMP_ZSTATE_NEW_HEADER, the
+ * hufftable being used for compression may be swapped
+ */
+enum isal_zstate_state {
+ ZSTATE_NEW_HDR, //!< Header to be written
+ ZSTATE_HDR, //!< Header state
+ ZSTATE_CREATE_HDR, //!< Header to be created
+ ZSTATE_BODY, //!< Body state
+ ZSTATE_FLUSH_READ_BUFFER, //!< Flush buffer
+ ZSTATE_FLUSH_ICF_BUFFER,
+ ZSTATE_TYPE0_HDR, //! Type0 block header to be written
+ ZSTATE_TYPE0_BODY, //!< Type0 block body to be written
+ ZSTATE_SYNC_FLUSH, //!< Write sync flush block
+ ZSTATE_FLUSH_WRITE_BUFFER, //!< Flush bitbuf
+ ZSTATE_TRL, //!< Trailer state
+ ZSTATE_END, //!< End state
+ ZSTATE_TMP_NEW_HDR, //!< Temporary Header to be written
+ ZSTATE_TMP_HDR, //!< Temporary Header state
+ ZSTATE_TMP_CREATE_HDR, //!< Temporary Header to be created state
+ ZSTATE_TMP_BODY, //!< Temporary Body state
+ ZSTATE_TMP_FLUSH_READ_BUFFER, //!< Flush buffer
+ ZSTATE_TMP_FLUSH_ICF_BUFFER,
+ ZSTATE_TMP_TYPE0_HDR, //! Temporary Type0 block header to be written
+ ZSTATE_TMP_TYPE0_BODY, //!< Temporary Type0 block body to be written
+ ZSTATE_TMP_SYNC_FLUSH, //!< Write sync flush block
+ ZSTATE_TMP_FLUSH_WRITE_BUFFER, //!< Flush bitbuf
+ ZSTATE_TMP_TRL, //!< Temporary Trailer state
+ ZSTATE_TMP_END //!< Temporary End state
+};
+
+/* Offset used to switch between TMP states and non-tmp states */
+#define ZSTATE_TMP_OFFSET ZSTATE_TMP_HDR - ZSTATE_HDR
+
+/******************************************************************************/
+/* Inflate Implementation Specific Defines */
+/******************************************************************************/
+#define ISAL_DECODE_LONG_BITS 12
+#define ISAL_DECODE_SHORT_BITS 10
+
+/* Current state of decompression */
+enum isal_block_state {
+ ISAL_BLOCK_NEW_HDR, /* Just starting a new block */
+ ISAL_BLOCK_HDR, /* In the middle of reading in a block header */
+ ISAL_BLOCK_TYPE0, /* Decoding a type 0 block */
+ ISAL_BLOCK_CODED, /* Decoding a huffman coded block */
+ ISAL_BLOCK_INPUT_DONE, /* Decompression of input is completed */
+ ISAL_BLOCK_FINISH, /* Decompression of input is completed and all data has been flushed to output */
+ ISAL_GZIP_EXTRA_LEN,
+ ISAL_GZIP_EXTRA,
+ ISAL_GZIP_NAME,
+ ISAL_GZIP_COMMENT,
+ ISAL_GZIP_HCRC,
+ ISAL_ZLIB_DICT,
+ ISAL_CHECKSUM_CHECK,
+};
+
+
+/* Inflate Flags */
+#define ISAL_DEFLATE 0 /* Default */
+#define ISAL_GZIP 1
+#define ISAL_GZIP_NO_HDR 2
+#define ISAL_ZLIB 3
+#define ISAL_ZLIB_NO_HDR 4
+#define ISAL_ZLIB_NO_HDR_VER 5
+#define ISAL_GZIP_NO_HDR_VER 6
+
+/* Inflate Return values */
+#define ISAL_DECOMP_OK 0 /* No errors encountered while decompressing */
+#define ISAL_END_INPUT 1 /* End of input reached */
+#define ISAL_OUT_OVERFLOW 2 /* End of output reached */
+#define ISAL_NAME_OVERFLOW 3 /* End of gzip name buffer reached */
+#define ISAL_COMMENT_OVERFLOW 4 /* End of gzip name buffer reached */
+#define ISAL_EXTRA_OVERFLOW 5 /* End of extra buffer reached */
+#define ISAL_NEED_DICT 6 /* Stream needs a dictionary to continue */
+#define ISAL_INVALID_BLOCK -1 /* Invalid deflate block found */
+#define ISAL_INVALID_SYMBOL -2 /* Invalid deflate symbol found */
+#define ISAL_INVALID_LOOKBACK -3 /* Invalid lookback distance found */
+#define ISAL_INVALID_WRAPPER -4 /* Invalid gzip/zlib wrapper found */
+#define ISAL_UNSUPPORTED_METHOD -5 /* Gzip/zlib wrapper specifies unsupported compress method */
+#define ISAL_INCORRECT_CHECKSUM -6 /* Incorrect checksum found */
+
+/******************************************************************************/
+/* Compression structures */
+/******************************************************************************/
+/** @brief Holds histogram of deflate symbols*/
+struct isal_huff_histogram {
+ uint64_t lit_len_histogram[ISAL_DEF_LIT_LEN_SYMBOLS]; //!< Histogram of Literal/Len symbols seen
+ uint64_t dist_histogram[ISAL_DEF_DIST_SYMBOLS]; //!< Histogram of Distance Symbols seen
+ uint16_t hash_table[IGZIP_LVL0_HASH_SIZE]; //!< Tmp space used as a hash table
+};
+
+struct isal_mod_hist {
+ uint32_t d_hist[30];
+ uint32_t ll_hist[513];
+};
+
+#define ISAL_DEF_MIN_LEVEL 0
+#define ISAL_DEF_MAX_LEVEL 3
+
+/* Defines used set level data sizes */
+/* has to be at least sizeof(struct level_buf) + sizeof(struct lvlX_buf */
+#define ISAL_DEF_LVL0_REQ 0
+#define ISAL_DEF_LVL1_REQ (4 * IGZIP_K + 2 * IGZIP_LVL1_HASH_SIZE)
+#define ISAL_DEF_LVL1_TOKEN_SIZE 4
+#define ISAL_DEF_LVL2_REQ (4 * IGZIP_K + 2 * IGZIP_LVL2_HASH_SIZE)
+#define ISAL_DEF_LVL2_TOKEN_SIZE 4
+#define ISAL_DEF_LVL3_REQ 4 * IGZIP_K + 4 * 4 * IGZIP_K + 2 * IGZIP_LVL3_HASH_SIZE
+#define ISAL_DEF_LVL3_TOKEN_SIZE 4
+
+/* Data sizes for level specific data options */
+#define ISAL_DEF_LVL0_MIN ISAL_DEF_LVL0_REQ
+#define ISAL_DEF_LVL0_SMALL ISAL_DEF_LVL0_REQ
+#define ISAL_DEF_LVL0_MEDIUM ISAL_DEF_LVL0_REQ
+#define ISAL_DEF_LVL0_LARGE ISAL_DEF_LVL0_REQ
+#define ISAL_DEF_LVL0_EXTRA_LARGE ISAL_DEF_LVL0_REQ
+#define ISAL_DEF_LVL0_DEFAULT ISAL_DEF_LVL0_REQ
+
+#define ISAL_DEF_LVL1_MIN (ISAL_DEF_LVL1_REQ + ISAL_DEF_LVL1_TOKEN_SIZE * 1 * IGZIP_K)
+#define ISAL_DEF_LVL1_SMALL (ISAL_DEF_LVL1_REQ + ISAL_DEF_LVL1_TOKEN_SIZE * 16 * IGZIP_K)
+#define ISAL_DEF_LVL1_MEDIUM (ISAL_DEF_LVL1_REQ + ISAL_DEF_LVL1_TOKEN_SIZE * 32 * IGZIP_K)
+#define ISAL_DEF_LVL1_LARGE (ISAL_DEF_LVL1_REQ + ISAL_DEF_LVL1_TOKEN_SIZE * 64 * IGZIP_K)
+#define ISAL_DEF_LVL1_EXTRA_LARGE (ISAL_DEF_LVL1_REQ + ISAL_DEF_LVL1_TOKEN_SIZE * 128 * IGZIP_K)
+#define ISAL_DEF_LVL1_DEFAULT ISAL_DEF_LVL1_LARGE
+
+#define ISAL_DEF_LVL2_MIN (ISAL_DEF_LVL2_REQ + ISAL_DEF_LVL2_TOKEN_SIZE * 1 * IGZIP_K)
+#define ISAL_DEF_LVL2_SMALL (ISAL_DEF_LVL2_REQ + ISAL_DEF_LVL2_TOKEN_SIZE * 16 * IGZIP_K)
+#define ISAL_DEF_LVL2_MEDIUM (ISAL_DEF_LVL2_REQ + ISAL_DEF_LVL2_TOKEN_SIZE * 32 * IGZIP_K)
+#define ISAL_DEF_LVL2_LARGE (ISAL_DEF_LVL2_REQ + ISAL_DEF_LVL2_TOKEN_SIZE * 64 * IGZIP_K)
+#define ISAL_DEF_LVL2_EXTRA_LARGE (ISAL_DEF_LVL2_REQ + ISAL_DEF_LVL2_TOKEN_SIZE * 128 * IGZIP_K)
+#define ISAL_DEF_LVL2_DEFAULT ISAL_DEF_LVL2_LARGE
+
+#define ISAL_DEF_LVL3_MIN (ISAL_DEF_LVL3_REQ + ISAL_DEF_LVL3_TOKEN_SIZE * 1 * IGZIP_K)
+#define ISAL_DEF_LVL3_SMALL (ISAL_DEF_LVL3_REQ + ISAL_DEF_LVL3_TOKEN_SIZE * 16 * IGZIP_K)
+#define ISAL_DEF_LVL3_MEDIUM (ISAL_DEF_LVL3_REQ + ISAL_DEF_LVL3_TOKEN_SIZE * 32 * IGZIP_K)
+#define ISAL_DEF_LVL3_LARGE (ISAL_DEF_LVL3_REQ + ISAL_DEF_LVL3_TOKEN_SIZE * 64 * IGZIP_K)
+#define ISAL_DEF_LVL3_EXTRA_LARGE (ISAL_DEF_LVL3_REQ + ISAL_DEF_LVL3_TOKEN_SIZE * 128 * IGZIP_K)
+#define ISAL_DEF_LVL3_DEFAULT ISAL_DEF_LVL3_LARGE
+
+#define IGZIP_NO_HIST 0
+#define IGZIP_HIST 1
+#define IGZIP_DICT_HIST 2
+#define IGZIP_DICT_HASH_SET 3
+
+/** @brief Holds Bit Buffer information*/
+struct BitBuf2 {
+ uint64_t m_bits; //!< bits in the bit buffer
+ uint32_t m_bit_count; //!< number of valid bits in the bit buffer
+ uint8_t *m_out_buf; //!< current index of buffer to write to
+ uint8_t *m_out_end; //!< end of buffer to write to
+ uint8_t *m_out_start; //!< start of buffer to write to
+};
+
+struct isal_zlib_header {
+ uint32_t info; //!< base-2 logarithm of the LZ77 window size minus 8
+ uint32_t level; //!< Compression level (fastest, fast, default, maximum)
+ uint32_t dict_id; //!< Dictionary id
+ uint32_t dict_flag; //!< Whether to use a dictionary
+};
+
+struct isal_gzip_header {
+ uint32_t text; //!< Optional Text hint
+ uint32_t time; //!< Unix modification time in gzip header
+ uint32_t xflags; //!< xflags in gzip header
+ uint32_t os; //!< OS in gzip header
+ uint8_t *extra; //!< Extra field in gzip header
+ uint32_t extra_buf_len; //!< Length of extra buffer
+ uint32_t extra_len; //!< Actual length of gzip header extra field
+ char *name; //!< Name in gzip header
+ uint32_t name_buf_len; //!< Length of name buffer
+ char *comment; //!< Comments in gzip header
+ uint32_t comment_buf_len; //!< Length of comment buffer
+ uint32_t hcrc; //!< Header crc or header crc flag
+ uint32_t flags; //!< Internal data
+};
+
+/* Variable prefixes:
+ * b_ : Measured wrt the start of the buffer
+ * f_ : Measured wrt the start of the file (aka file_start)
+ */
+
+/** @brief Holds the internal state information for input and output compression streams*/
+struct isal_zstate {
+ uint32_t total_in_start; //!< Not used, may be replaced with something else
+ uint32_t block_next; //!< Start of current deflate block in the input
+ uint32_t block_end; //!< End of current deflate block in the input
+ uint32_t dist_mask; //!< Distance mask used.
+ uint32_t hash_mask;
+ enum isal_zstate_state state; //!< Current state in processing the data stream
+ struct BitBuf2 bitbuf; //!< Bit Buffer
+ uint32_t crc; //!< Current checksum without finalize step if any (adler)
+ uint8_t has_wrap_hdr; //!< keeps track of wrapper header
+ uint8_t has_eob_hdr; //!< keeps track of eob hdr (with BFINAL set)
+ uint8_t has_eob; //!< keeps track of eob on the last deflate block
+ uint8_t has_hist; //!< flag to track if there is match history
+ uint16_t has_level_buf_init; //!< flag to track if user supplied memory has been initialized.
+ uint32_t count; //!< used for partial header/trailer writes
+ uint8_t tmp_out_buff[16]; //!< temporary array
+ uint32_t tmp_out_start; //!< temporary variable
+ uint32_t tmp_out_end; //!< temporary variable
+ uint32_t b_bytes_valid; //!< number of valid bytes in buffer
+ uint32_t b_bytes_processed; //!< number of bytes processed in buffer
+ uint8_t buffer[2 * IGZIP_HIST_SIZE + ISAL_LOOK_AHEAD]; //!< Internal buffer
+
+ /* Stream should be setup such that the head is cache aligned*/
+ uint16_t head[IGZIP_LVL0_HASH_SIZE]; //!< Hash array
+};
+
+/** @brief Holds the huffman tree used to huffman encode the input stream **/
+struct isal_hufftables {
+
+ uint8_t deflate_hdr[ISAL_DEF_MAX_HDR_SIZE]; //!< deflate huffman tree header
+ uint32_t deflate_hdr_count; //!< Number of whole bytes in deflate_huff_hdr
+ uint32_t deflate_hdr_extra_bits; //!< Number of bits in the partial byte in header
+ uint32_t dist_table[IGZIP_DIST_TABLE_SIZE]; //!< bits 4:0 are the code length, bits 31:5 are the code
+ uint32_t len_table[IGZIP_LEN_TABLE_SIZE]; //!< bits 4:0 are the code length, bits 31:5 are the code
+ uint16_t lit_table[IGZIP_LIT_TABLE_SIZE]; //!< literal code
+ uint8_t lit_table_sizes[IGZIP_LIT_TABLE_SIZE]; //!< literal code length
+ uint16_t dcodes[30 - IGZIP_DECODE_OFFSET]; //!< distance code
+ uint8_t dcodes_sizes[30 - IGZIP_DECODE_OFFSET]; //!< distance code length
+
+};
+
+/** @brief Holds stream information*/
+struct isal_zstream {
+ uint8_t *next_in; //!< Next input byte
+ uint32_t avail_in; //!< number of bytes available at next_in
+ uint32_t total_in; //!< total number of bytes read so far
+
+ uint8_t *next_out; //!< Next output byte
+ uint32_t avail_out; //!< number of bytes available at next_out
+ uint32_t total_out; //!< total number of bytes written so far
+
+ struct isal_hufftables *hufftables; //!< Huffman encoding used when compressing
+ uint32_t level; //!< Compression level to use
+ uint32_t level_buf_size; //!< Size of level_buf
+ uint8_t * level_buf; //!< User allocated buffer required for different compression levels
+ uint16_t end_of_stream; //!< non-zero if this is the last input buffer
+ uint16_t flush; //!< Flush type can be NO_FLUSH, SYNC_FLUSH or FULL_FLUSH
+ uint16_t gzip_flag; //!< Indicate if gzip compression is to be performed
+ uint16_t hist_bits; //!< Log base 2 of maximum lookback distance, 0 is use default
+ struct isal_zstate internal_state; //!< Internal state for this stream
+};
+
+/******************************************************************************/
+/* Inflate structures */
+/******************************************************************************/
+/*
+ * Inflate_huff_code data structures are used to store a Huffman code for fast
+ * lookup. It works by performing a lookup in small_code_lookup that hopefully
+ * yields the correct symbol. Otherwise a lookup into long_code_lookup is
+ * performed to find the correct symbol. The details of how this works follows:
+ *
+ * Let i be some index into small_code_lookup and let e be the associated
+ * element. Bit 15 in e is a flag. If bit 15 is not set, then index i contains
+ * a Huffman code for a symbol which has length at most DECODE_LOOKUP_SIZE. Bits
+ * 0 through 8 are the symbol associated with that code and bits 9 through 12 of
+ * e represent the number of bits in the code. If bit 15 is set, the i
+ * corresponds to the first DECODE_LOOKUP_SIZE bits of a Huffman code which has
+ * length longer than DECODE_LOOKUP_SIZE. In this case, bits 0 through 8
+ * represent an offset into long_code_lookup table and bits 9 through 12
+ * represent the maximum length of a Huffman code starting with the bits in the
+ * index i. The offset into long_code_lookup is for an array associated with all
+ * codes which start with the bits in i.
+ *
+ * The elements of long_code_lookup are in the same format as small_code_lookup,
+ * except bit 15 is never set. Let i be a number made up of DECODE_LOOKUP_SIZE
+ * bits. Then all Huffman codes which start with DECODE_LOOKUP_SIZE bits are
+ * stored in an array starting at index h in long_code_lookup. This index h is
+ * stored in bits 0 through 9 at index i in small_code_lookup. The index j is an
+ * index of this array if the number of bits contained in j and i is the number
+ * of bits in the longest huff_code starting with the bits of i. The symbol
+ * stored at index j is the symbol whose huffcode can be found in (j <<
+ * DECODE_LOOKUP_SIZE) | i. Note these arrays will be stored sorted in order of
+ * maximum Huffman code length.
+ *
+ * The following are explanations for sizes of the tables:
+ *
+ * Since small_code_lookup is a lookup on DECODE_LOOKUP_SIZE bits, it must have
+ * size 2^DECODE_LOOKUP_SIZE.
+ *
+ * To determine the amount of memory required for long_code_lookup, note that
+ * any element of long_code_lookup corresponds to a code, a duplicate of an
+ * existing code, or a invalid code. Since deflate Huffman are stored such that
+ * the code size and the code value form an increasing function, the number of
+ * duplicates is maximized when all the duplicates are contained in a single
+ * array, thus there are at most 2^(15 - DECODE_LOOKUP_SIZE) -
+ * (DECODE_LOOKUP_SIZE + 1) duplicate elements. Similarly the number of invalid
+ * elements is maximized at 2^(15 - DECODE_LOOKUP_SIZE) - 2^(floor((15 -
+ * DECODE_LOOKUP_SIZE)/2) - 2^(ceil((15 - DECODE_LOOKUP_SIZE)/2) + 1. Thus the
+ * amount of memory required is: NUM_CODES + 2^(16 - DECODE_LOOKUP_SIZE) -
+ * (DECODE_LOOKUP_SIZE + 1) - 2^(floor((15 - DECODE_LOOKUP_SIZE)/2) -
+ * 2^(ceil((15 - DECODE_LOOKUP_SIZE)/2) + 1. The values used below are those
+ * values rounded up to the nearest 16 byte boundary
+ *
+ * Note that DECODE_LOOKUP_SIZE can be any length even though the offset in
+ * small_lookup_code is 9 bits long because the increasing relationship between
+ * code length and code value forces the maximum offset to be less than 288.
+ */
+
+/* In the following defines, L stands for LARGE and S for SMALL */
+#define ISAL_L_REM (21 - ISAL_DECODE_LONG_BITS)
+#define ISAL_S_REM (15 - ISAL_DECODE_SHORT_BITS)
+
+#define ISAL_L_DUP ((1 << ISAL_L_REM) - (ISAL_L_REM + 1))
+#define ISAL_S_DUP ((1 << ISAL_S_REM) - (ISAL_S_REM + 1))
+
+#define ISAL_L_UNUSED ((1 << ISAL_L_REM) - (1 << ((ISAL_L_REM)/2)) - (1 << ((ISAL_L_REM + 1)/2)) + 1)
+#define ISAL_S_UNUSED ((1 << ISAL_S_REM) - (1 << ((ISAL_S_REM)/2)) - (1 << ((ISAL_S_REM + 1)/2)) + 1)
+
+#define ISAL_L_SIZE (ISAL_DEF_LIT_LEN_SYMBOLS + ISAL_L_DUP + ISAL_L_UNUSED)
+#define ISAL_S_SIZE (ISAL_DEF_DIST_SYMBOLS + ISAL_S_DUP + ISAL_S_UNUSED)
+
+#define ISAL_HUFF_CODE_LARGE_LONG_ALIGNED (ISAL_L_SIZE + (-ISAL_L_SIZE & 0xf))
+#define ISAL_HUFF_CODE_SMALL_LONG_ALIGNED (ISAL_S_SIZE + (-ISAL_S_SIZE & 0xf))
+
+/* Large lookup table for decoding huffman codes */
+struct inflate_huff_code_large {
+ uint32_t short_code_lookup[1 << (ISAL_DECODE_LONG_BITS)];
+ uint16_t long_code_lookup[ISAL_HUFF_CODE_LARGE_LONG_ALIGNED];
+};
+
+/* Small lookup table for decoding huffman codes */
+struct inflate_huff_code_small {
+ uint16_t short_code_lookup[1 << (ISAL_DECODE_SHORT_BITS)];
+ uint16_t long_code_lookup[ISAL_HUFF_CODE_SMALL_LONG_ALIGNED];
+};
+
+/** @brief Holds decompression state information*/
+struct inflate_state {
+ uint8_t *next_out; //!< Next output Byte
+ uint32_t avail_out; //!< Number of bytes available at next_out
+ uint32_t total_out; //!< Total bytes written out so far
+ uint8_t *next_in; //!< Next input byte
+ uint64_t read_in; //!< Bits buffered to handle unaligned streams
+ uint32_t avail_in; //!< Number of bytes available at next_in
+ int32_t read_in_length; //!< Bits in read_in
+ struct inflate_huff_code_large lit_huff_code; //!< Structure for decoding lit/len symbols
+ struct inflate_huff_code_small dist_huff_code; //!< Structure for decoding dist symbols
+ enum isal_block_state block_state; //!< Current decompression state
+ uint32_t dict_length; //!< Length of dictionary used
+ uint32_t bfinal; //!< Flag identifying final block
+ uint32_t crc_flag; //!< Flag identifying whether to track of crc
+ uint32_t crc; //!< Contains crc or adler32 of output if crc_flag is set
+ uint32_t hist_bits; //!< Log base 2 of maximum lookback distance
+ union {
+ int32_t type0_block_len; //!< Length left to read of type 0 block when outbuffer overflow occurred
+ int32_t count; //!< Count of bytes remaining to be parsed
+ uint32_t dict_id;
+ };
+ int32_t write_overflow_lits;
+ int32_t write_overflow_len;
+ int32_t copy_overflow_length; //!< Length left to copy when outbuffer overflow occurred
+ int32_t copy_overflow_distance; //!< Lookback distance when outbuffer overflow occurred
+ int16_t wrapper_flag;
+ int16_t tmp_in_size; //!< Number of bytes in tmp_in_buffer
+ int32_t tmp_out_valid; //!< Number of bytes in tmp_out_buffer
+ int32_t tmp_out_processed; //!< Number of bytes processed in tmp_out_buffer
+ uint8_t tmp_in_buffer[ISAL_DEF_MAX_HDR_SIZE]; //!< Temporary buffer containing data from the input stream
+ uint8_t tmp_out_buffer[2 * ISAL_DEF_HIST_SIZE + ISAL_LOOK_AHEAD]; //!< Temporary buffer containing data from the output stream
+};
+
+/******************************************************************************/
+/* Compression functions */
+/******************************************************************************/
+/**
+ * @brief Updates histograms to include the symbols found in the input
+ * stream. Since this function only updates the histograms, it can be called on
+ * multiple streams to get a histogram better representing the desired data
+ * set. When first using histogram it must be initialized by zeroing the
+ * structure.
+ *
+ * @param in_stream: Input stream of data.
+ * @param length: The length of start_stream.
+ * @param histogram: The returned histogram of lit/len/dist symbols.
+ */
+void isal_update_histogram(uint8_t * in_stream, int length, struct isal_huff_histogram * histogram);
+
+
+/**
+ * @brief Creates a custom huffman code for the given histograms in which
+ * every literal and repeat length is assigned a code and all possible lookback
+ * distances are assigned a code.
+ *
+ * @param hufftables: the output structure containing the huffman code
+ * @param histogram: histogram containing frequency of literal symbols,
+ * repeat lengths and lookback distances
+ * @returns Returns a non zero value if an invalid huffman code was created.
+ */
+int isal_create_hufftables(struct isal_hufftables * hufftables,
+ struct isal_huff_histogram * histogram);
+
+/**
+ * @brief Creates a custom huffman code for the given histograms like
+ * isal_create_hufftables() except literals with 0 frequency in the histogram
+ * are not assigned a code
+ *
+ * @param hufftables: the output structure containing the huffman code
+ * @param histogram: histogram containing frequency of literal symbols,
+ * repeat lengths and lookback distances
+ * @returns Returns a non zero value if an invalid huffman code was created.
+ */
+int isal_create_hufftables_subset(struct isal_hufftables * hufftables,
+ struct isal_huff_histogram * histogram);
+
+/**
+ * @brief Initialize compression stream data structure
+ *
+ * @param stream Structure holding state information on the compression streams.
+ * @returns none
+ */
+void isal_deflate_init(struct isal_zstream *stream);
+
+/**
+ * @brief Reinitialize compression stream data structure. Performs the same
+ * action as isal_deflate_init, but does not change user supplied input such as
+ * the level, flush type, compression wrapper (like gzip), hufftables, and
+ * end_of_stream_flag.
+ *
+ * @param stream Structure holding state information on the compression streams.
+ * @returns none
+ */
+void isal_deflate_reset(struct isal_zstream *stream);
+
+
+/**
+ * @brief Set gzip header default values
+ *
+ * @param gz_hdr: Gzip header to initialize.
+ */
+void isal_gzip_header_init(struct isal_gzip_header *gz_hdr);
+
+/**
+ * @brief Write gzip header to output stream
+ *
+ * Writes the gzip header to the output stream. On entry this function assumes
+ * that the output buffer has been initialized, so stream->next_out,
+ * stream->avail_out and stream->total_out have been set. If the output buffer
+ * contains insufficient space, stream is not modified.
+ *
+ * @param stream: Structure holding state information on the compression stream.
+ * @param gz_hdr: Structure holding the gzip header information to encode.
+ *
+ * @returns Returns 0 if the header is successfully written, otherwise returns
+ * the minimum size required to successfully write the gzip header to the output
+ * buffer.
+ */
+uint32_t isal_write_gzip_header(struct isal_zstream * stream, struct isal_gzip_header *gz_hdr);
+
+/**
+ * @brief Write zlib header to output stream
+ *
+ * Writes the zlib header to the output stream. On entry this function assumes
+ * that the output buffer has been initialized, so stream->next_out,
+ * stream->avail_out and stream->total_out have been set. If the output buffer
+ * contains insufficient space, stream is not modified.
+ *
+ * @param stream: Structure holding state information on the compression stream.
+ * @param z_hdr: Structure holding the zlib header information to encode.
+ *
+ * @returns Returns 0 if the header is successfully written, otherwise returns
+ * the minimum size required to successfully write the zlib header to the output
+ * buffer.
+ */
+uint32_t isal_write_zlib_header(struct isal_zstream * stream, struct isal_zlib_header *z_hdr);
+
+/**
+ * @brief Set stream to use a new Huffman code
+ *
+ * Sets the Huffman code to be used in compression before compression start or
+ * after the successful completion of a SYNC_FLUSH or FULL_FLUSH. If type has
+ * value IGZIP_HUFFTABLE_DEFAULT, the stream is set to use the default Huffman
+ * code. If type has value IGZIP_HUFFTABLE_STATIC, the stream is set to use the
+ * deflate standard static Huffman code, or if type has value
+ * IGZIP_HUFFTABLE_CUSTOM, the stream is set to sue the isal_hufftables
+ * structure input to isal_deflate_set_hufftables.
+ *
+ * @param stream: Structure holding state information on the compression stream.
+ * @param hufftables: new huffman code to use if type is set to
+ * IGZIP_HUFFTABLE_CUSTOM.
+ * @param type: Flag specifying what hufftable to use.
+ *
+ * @returns Returns INVALID_OPERATION if the stream was unmodified. This may be
+ * due to the stream being in a state where changing the huffman code is not
+ * allowed or an invalid input is provided.
+ */
+int isal_deflate_set_hufftables(struct isal_zstream *stream,
+ struct isal_hufftables *hufftables, int type);
+
+/**
+ * @brief Initialize compression stream data structure
+ *
+ * @param stream Structure holding state information on the compression streams.
+ * @returns none
+ */
+void isal_deflate_stateless_init(struct isal_zstream *stream);
+
+
+/**
+ * @brief Set compression dictionary to use
+ *
+ * This function is to be called after isal_deflate_init, or after completing a
+ * SYNC_FLUSH or FULL_FLUSH and before the next call do isal_deflate. If the
+ * dictionary is longer than IGZIP_HIST_SIZE, only the last IGZIP_HIST_SIZE
+ * bytes will be used.
+ *
+ * @param stream Structure holding state information on the compression streams.
+ * @param dict: Array containing dictionary to use.
+ * @param dict_len: Length of dict.
+ * @returns COMP_OK,
+ * ISAL_INVALID_STATE (dictionary could not be set)
+ */
+int isal_deflate_set_dict(struct isal_zstream *stream, uint8_t *dict, uint32_t dict_len);
+
+/** @brief Structure for holding processed dictionary information */
+
+struct isal_dict {
+ uint32_t params;
+ uint32_t level;
+ uint32_t hist_size;
+ uint32_t hash_size;
+ uint8_t history[ISAL_DEF_HIST_SIZE];
+ uint16_t hashtable[IGZIP_LVL3_HASH_SIZE];
+};
+
+/**
+ * @brief Process dictionary to reuse later
+ *
+ * Processes a dictionary so that the generated output can be reused to reset a
+ * new deflate stream more quickly than isal_deflate_set_dict() alone. This
+ * function is paired with isal_deflate_reset_dict() when using the same
+ * dictionary on multiple deflate objects. The stream.level must be set prior to
+ * calling this function to process the dictionary correctly. If the dictionary
+ * is longer than IGZIP_HIST_SIZE, only the last IGZIP_HIST_SIZE bytes will be
+ * used.
+ *
+ * @param stream Structure holding state information on the compression streams.
+ * @param dict_str: Structure to hold processed dictionary info to reuse later.
+ * @param dict: Array containing dictionary to use.
+ * @param dict_len: Length of dict.
+ * @returns COMP_OK,
+ * ISAL_INVALID_STATE (dictionary could not be processed)
+ */
+int isal_deflate_process_dict(struct isal_zstream *stream, struct isal_dict *dict_str,
+ uint8_t *dict, uint32_t dict_len);
+
+/**
+ * @brief Reset compression dictionary to use
+ *
+ * Similar to isal_deflate_set_dict() but on pre-processed dictionary
+ * data. Pairing with isal_deflate_process_dict() can reduce the processing time
+ * on subsequent compression with dictionary especially on small files.
+ *
+ * Like isal_deflate_set_dict(), this function is to be called after
+ * isal_deflate_init, or after completing a SYNC_FLUSH or FULL_FLUSH and before
+ * the next call do isal_deflate. Changing compression level between dictionary
+ * process and reset will cause return of ISAL_INVALID_STATE.
+ *
+ * @param stream Structure holding state information on the compression streams.
+ * @param dict_str: Structure with pre-processed dictionary info.
+ * @returns COMP_OK,
+ * ISAL_INVALID_STATE or other (dictionary could not be reset)
+ */
+int isal_deflate_reset_dict(struct isal_zstream *stream, struct isal_dict *dict_str);
+
+
+/**
+ * @brief Fast data (deflate) compression for storage applications.
+ *
+ * The call to isal_deflate() will take data from the input buffer (updating
+ * next_in, avail_in and write a compressed stream to the output buffer
+ * (updating next_out and avail_out). The function returns when either the input
+ * buffer is empty or the output buffer is full.
+ *
+ * On entry to isal_deflate(), next_in points to an input buffer and avail_in
+ * indicates the length of that buffer. Similarly next_out points to an empty
+ * output buffer and avail_out indicates the size of that buffer.
+ *
+ * The fields total_in and total_out start at 0 and are updated by
+ * isal_deflate(). These reflect the total number of bytes read or written so far.
+ *
+ * When the last input buffer is passed in, signaled by setting the
+ * end_of_stream, the routine will complete compression at the end of the input
+ * buffer, as long as the output buffer is big enough.
+ *
+ * The compression level can be set by setting level to any value between
+ * ISAL_DEF_MIN_LEVEL and ISAL_DEF_MAX_LEVEL. When the compression level is
+ * ISAL_DEF_MIN_LEVEL, hufftables can be set to a table trained for the the
+ * specific data type being compressed to achieve better compression. When a
+ * higher compression level is desired, a larger generic memory buffer needs to
+ * be supplied by setting level_buf and level_buf_size to represent the chunk of
+ * memory. For level x, the suggest size for this buffer this buffer is
+ * ISAL_DEFL_LVLx_DEFAULT. The defines ISAL_DEFL_LVLx_MIN, ISAL_DEFL_LVLx_SMALL,
+ * ISAL_DEFL_LVLx_MEDIUM, ISAL_DEFL_LVLx_LARGE, and ISAL_DEFL_LVLx_EXTRA_LARGE
+ * are also provided as other suggested sizes.
+ *
+ * The equivalent of the zlib FLUSH_SYNC operation is currently supported.
+ * Flush types can be NO_FLUSH, SYNC_FLUSH or FULL_FLUSH. Default flush type is
+ * NO_FLUSH. A SYNC_ OR FULL_ flush will byte align the deflate block by
+ * appending an empty stored block once all input has been compressed, including
+ * the buffered input. Checking that the out_buffer is not empty or that
+ * internal_state.state = ZSTATE_NEW_HDR is sufficient to guarantee all input
+ * has been flushed. Additionally FULL_FLUSH will ensure look back history does
+ * not include previous blocks so new blocks are fully independent. Switching
+ * between flush types is supported.
+ *
+ * If a compression dictionary is required, the dictionary can be set calling
+ * isal_deflate_set_dictionary before calling isal_deflate.
+ *
+ * If the gzip_flag is set to IGZIP_GZIP, a generic gzip header and the gzip
+ * trailer are written around the deflate compressed data. If gzip_flag is set
+ * to IGZIP_GZIP_NO_HDR, then only the gzip trailer is written. A full-featured
+ * header is supported by the isal_write_{gzip,zlib}_header() functions.
+ *
+ * @param stream Structure holding state information on the compression streams.
+ * @return COMP_OK (if everything is ok),
+ * INVALID_FLUSH (if an invalid FLUSH is selected),
+ * ISAL_INVALID_LEVEL (if an invalid compression level is selected),
+ * ISAL_INVALID_LEVEL_BUF (if the level buffer is not large enough).
+ */
+int isal_deflate(struct isal_zstream *stream);
+
+
+/**
+ * @brief Fast data (deflate) stateless compression for storage applications.
+ *
+ * Stateless (one shot) compression routine with a similar interface to
+ * isal_deflate() but operates on entire input buffer at one time. Parameter
+ * avail_out must be large enough to fit the entire compressed output. Max
+ * expansion is limited to the input size plus the header size of a stored/raw
+ * block.
+ *
+ * When the compression level is set to 1, unlike in isal_deflate(), level_buf
+ * may be optionally set depending on what what performance is desired.
+ *
+ * For stateless the flush types NO_FLUSH and FULL_FLUSH are supported.
+ * FULL_FLUSH will byte align the output deflate block so additional blocks can
+ * be easily appended.
+ *
+ * If the gzip_flag is set to IGZIP_GZIP, a generic gzip header and the gzip
+ * trailer are written around the deflate compressed data. If gzip_flag is set
+ * to IGZIP_GZIP_NO_HDR, then only the gzip trailer is written.
+ *
+ * @param stream Structure holding state information on the compression streams.
+ * @return COMP_OK (if everything is ok),
+ * INVALID_FLUSH (if an invalid FLUSH is selected),
+ * ISAL_INVALID_LEVEL (if an invalid compression level is selected),
+ * ISAL_INVALID_LEVEL_BUF (if the level buffer is not large enough),
+ * STATELESS_OVERFLOW (if output buffer will not fit output).
+ */
+int isal_deflate_stateless(struct isal_zstream *stream);
+
+
+/******************************************************************************/
+/* Inflate functions */
+/******************************************************************************/
+/**
+ * @brief Initialize decompression state data structure
+ *
+ * @param state Structure holding state information on the compression streams.
+ * @returns none
+ */
+void isal_inflate_init(struct inflate_state *state);
+
+/**
+ * @brief Reinitialize decompression state data structure
+ *
+ * @param state Structure holding state information on the compression streams.
+ * @returns none
+ */
+void isal_inflate_reset(struct inflate_state *state);
+
+/**
+ * @brief Set decompression dictionary to use
+ *
+ * This function is to be called after isal_inflate_init. If the dictionary is
+ * longer than IGZIP_HIST_SIZE, only the last IGZIP_HIST_SIZE bytes will be
+ * used.
+ *
+ * @param state: Structure holding state information on the decompression stream.
+ * @param dict: Array containing dictionary to use.
+ * @param dict_len: Length of dict.
+ * @returns COMP_OK,
+ * ISAL_INVALID_STATE (dictionary could not be set)
+ */
+int isal_inflate_set_dict(struct inflate_state *state, uint8_t *dict, uint32_t dict_len);
+
+/**
+ * @brief Read and return gzip header information
+ *
+ * On entry state must be initialized and next_in pointing to a gzip compressed
+ * buffer. The buffers gz_hdr->extra, gz_hdr->name, gz_hdr->comments and the
+ * buffer lengths must be set to record the corresponding field, or set to NULL
+ * to disregard that gzip header information. If one of these buffers overflows,
+ * the user can reallocate a larger buffer and call this function again to
+ * continue reading the header information.
+ *
+ * @param state: Structure holding state information on the decompression stream.
+ * @param gz_hdr: Structure to return data encoded in the gzip header
+ * @returns ISAL_DECOMP_OK (header was successfully parsed)
+ * ISAL_END_INPUT (all input was parsed),
+ * ISAL_NAME_OVERFLOW (gz_hdr->name overflowed while parsing),
+ * ISAL_COMMENT_OVERFLOW (gz_hdr->comment overflowed while parsing),
+ * ISAL_EXTRA_OVERFLOW (gz_hdr->extra overflowed while parsing),
+ * ISAL_INVALID_WRAPPER (invalid gzip header found),
+ * ISAL_UNSUPPORTED_METHOD (deflate is not the compression method),
+ * ISAL_INCORRECT_CHECKSUM (gzip header checksum was incorrect)
+ */
+int isal_read_gzip_header (struct inflate_state *state, struct isal_gzip_header *gz_hdr);
+
+/**
+ * @brief Read and return zlib header information
+ *
+ * On entry state must be initialized and next_in pointing to a zlib compressed
+ * buffer.
+ *
+ * @param state: Structure holding state information on the decompression stream.
+ * @param zlib_hdr: Structure to return data encoded in the zlib header
+ * @returns ISAL_DECOMP_OK (header was successfully parsed),
+ * ISAL_END_INPUT (all input was parsed),
+ * ISAL_UNSUPPORTED_METHOD (deflate is not the compression method),
+ * ISAL_INCORRECT_CHECKSUM (zlib header checksum was incorrect)
+ */
+int isal_read_zlib_header (struct inflate_state *state, struct isal_zlib_header *zlib_hdr);
+
+/**
+ * @brief Fast data (deflate) decompression for storage applications.
+ *
+ * On entry to isal_inflate(), next_in points to an input buffer and avail_in
+ * indicates the length of that buffer. Similarly next_out points to an empty
+ * output buffer and avail_out indicates the size of that buffer.
+ *
+ * The field total_out starts at 0 and is updated by isal_inflate(). This
+ * reflects the total number of bytes written so far.
+ *
+ * The call to isal_inflate() will take data from the input buffer (updating
+ * next_in, avail_in and write a decompressed stream to the output buffer
+ * (updating next_out and avail_out). The function returns when the input buffer
+ * is empty, the output buffer is full, invalid data is found, or in the case of
+ * zlib formatted data if a dictionary is specified. The current state of the
+ * decompression on exit can be read from state->block-state.
+ *
+ * If the crc_flag is set to ISAL_GZIP_NO_HDR the gzip crc of the output is
+ * stored in state->crc. Alternatively, if the crc_flag is set to
+ * ISAL_ZLIB_NO_HDR the adler32 of the output is stored in state->crc (checksum
+ * may not be updated until decompression is complete). When the crc_flag is set
+ * to ISAL_GZIP_NO_HDR_VER or ISAL_ZLIB_NO_HDR_VER, the behavior is the same,
+ * except the checksum is verified with the checksum after immediately following
+ * the deflate data. If the crc_flag is set to ISAL_GZIP or ISAL_ZLIB, the
+ * gzip/zlib header is parsed, state->crc is set to the appropriate checksum,
+ * and the checksum is verified. If the crc_flag is set to ISAL_DEFLATE
+ * (default), then the data is treated as a raw deflate block.
+ *
+ * The element state->hist_bits has values from 0 to 15, where values of 1 to 15
+ * are the log base 2 size of the matching window and 0 is the default with
+ * maximum history size.
+ *
+ * If a dictionary is required, a call to isal_inflate_set_dict will set the
+ * dictionary.
+ *
+ * @param state Structure holding state information on the compression streams.
+ * @return ISAL_DECOMP_OK (if everything is ok),
+ * ISAL_INVALID_BLOCK,
+ * ISAL_NEED_DICT,
+ * ISAL_INVALID_SYMBOL,
+ * ISAL_INVALID_LOOKBACK,
+ * ISAL_INVALID_WRAPPER,
+ * ISAL_UNSUPPORTED_METHOD,
+ * ISAL_INCORRECT_CHECKSUM.
+ */
+
+int isal_inflate(struct inflate_state *state);
+
+/**
+ * @brief Fast data (deflate) stateless decompression for storage applications.
+ *
+ * Stateless (one shot) decompression routine with a similar interface to
+ * isal_inflate() but operates on entire input buffer at one time. Parameter
+ * avail_out must be large enough to fit the entire decompressed
+ * output. Dictionaries are not supported.
+ *
+ * @param state Structure holding state information on the compression streams.
+ * @return ISAL_DECOMP_OK (if everything is ok),
+ * ISAL_END_INPUT (if all input was decompressed),
+ * ISAL_NEED_DICT,
+ * ISAL_OUT_OVERFLOW (if output buffer ran out of space),
+ * ISAL_INVALID_BLOCK,
+ * ISAL_INVALID_SYMBOL,
+ * ISAL_INVALID_LOOKBACK,
+ * ISAL_INVALID_WRAPPER,
+ * ISAL_UNSUPPORTED_METHOD,
+ * ISAL_INCORRECT_CHECKSUM.
+ */
+int isal_inflate_stateless(struct inflate_state *state);
+
+/******************************************************************************/
+/* Other functions */
+/******************************************************************************/
+/**
+ * @brief Calculate Adler-32 checksum, runs appropriate version.
+ *
+ * This function determines what instruction sets are enabled and selects the
+ * appropriate version at runtime.
+ *
+ * @param init: initial Adler-32 value
+ * @param buf: buffer to calculate checksum on
+ * @param len: buffer length in bytes
+ *
+ * @returns 32-bit Adler-32 checksum
+ */
+uint32_t isal_adler32(uint32_t init, const unsigned char *buf, uint64_t len);
+
+#ifdef __cplusplus
+}
+#endif
+#endif /* ifndef _IGZIP_H */
diff --git a/src/isa-l/include/mem_routines.h b/src/isa-l/include/mem_routines.h
new file mode 100644
index 000000000..3d23522e9
--- /dev/null
+++ b/src/isa-l/include/mem_routines.h
@@ -0,0 +1,64 @@
+/**********************************************************************
+ Copyright(c) 2011-2018 Intel Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+#include <stddef.h>
+
+/**
+ * @file mem_routines.h
+ * @brief Interface to storage mem operations
+ *
+ * Defines the interface for vector versions of common memory functions.
+ */
+
+
+#ifndef _MEM_ROUTINES_H_
+#define _MEM_ROUTINES_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * @brief Detect if a memory region is all zero
+ *
+ * Zero detect function with optimizations for large blocks > 128 bytes
+ *
+ * @param mem Pointer to memory region to test
+ * @param len Length of region in bytes
+ * @returns 0 - region is all zeros
+ * other - region has non zero bytes
+ */
+int isal_zero_detect(void *mem, size_t len);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // _MEM_ROUTINES_H_
+
diff --git a/src/isa-l/include/multibinary.asm b/src/isa-l/include/multibinary.asm
new file mode 100644
index 000000000..588352a2f
--- /dev/null
+++ b/src/isa-l/include/multibinary.asm
@@ -0,0 +1,440 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright(c) 2011-2015 Intel Corporation All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in
+; the documentation and/or other materials provided with the
+; distribution.
+; * Neither the name of Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived
+; from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+%ifndef _MULTIBINARY_ASM_
+%define _MULTIBINARY_ASM_
+
+%ifidn __OUTPUT_FORMAT__, elf32
+ %define mbin_def_ptr dd
+ %define mbin_ptr_sz dword
+ %define mbin_rdi edi
+ %define mbin_rsi esi
+ %define mbin_rax eax
+ %define mbin_rbx ebx
+ %define mbin_rcx ecx
+ %define mbin_rdx edx
+%else
+ %define mbin_def_ptr dq
+ %define mbin_ptr_sz qword
+ %define mbin_rdi rdi
+ %define mbin_rsi rsi
+ %define mbin_rax rax
+ %define mbin_rbx rbx
+ %define mbin_rcx rcx
+ %define mbin_rdx rdx
+%endif
+
+%ifndef AS_FEATURE_LEVEL
+%define AS_FEATURE_LEVEL 4
+%endif
+
+;;;;
+; multibinary macro:
+; creates the visable entry point that uses HW optimized call pointer
+; creates the init of the HW optimized call pointer
+;;;;
+%macro mbin_interface 1
+ ;;;;
+ ; *_dispatched is defaulted to *_mbinit and replaced on first call.
+ ; Therefore, *_dispatch_init is only executed on first call.
+ ;;;;
+ section .data
+ %1_dispatched:
+ mbin_def_ptr %1_mbinit
+
+ section .text
+ mk_global %1, function
+ %1_mbinit:
+ endbranch
+ ;;; only called the first time to setup hardware match
+ call %1_dispatch_init
+ ;;; falls thru to execute the hw optimized code
+ %1:
+ endbranch
+ jmp mbin_ptr_sz [%1_dispatched]
+%endmacro
+
+;;;;;
+; mbin_dispatch_init parameters
+; Use this function when SSE/00/01 is a minimum requirement
+; 1-> function name
+; 2-> SSE/00/01 optimized function used as base
+; 3-> AVX or AVX/02 opt func
+; 4-> AVX2 or AVX/04 opt func
+;;;;;
+%macro mbin_dispatch_init 4
+ section .text
+ %1_dispatch_init:
+ push mbin_rsi
+ push mbin_rax
+ push mbin_rbx
+ push mbin_rcx
+ push mbin_rdx
+ lea mbin_rsi, [%2 WRT_OPT] ; Default to SSE 00/01
+
+ mov eax, 1
+ cpuid
+ and ecx, (FLAG_CPUID1_ECX_AVX | FLAG_CPUID1_ECX_OSXSAVE)
+ cmp ecx, (FLAG_CPUID1_ECX_AVX | FLAG_CPUID1_ECX_OSXSAVE)
+ lea mbin_rbx, [%3 WRT_OPT] ; AVX (gen2) opt func
+ jne _%1_init_done ; AVX is not available so end
+ mov mbin_rsi, mbin_rbx
+
+ ;; Try for AVX2
+ xor ecx, ecx
+ mov eax, 7
+ cpuid
+ test ebx, FLAG_CPUID7_EBX_AVX2
+ lea mbin_rbx, [%4 WRT_OPT] ; AVX (gen4) opt func
+ cmovne mbin_rsi, mbin_rbx
+
+ ;; Does it have xmm and ymm support
+ xor ecx, ecx
+ xgetbv
+ and eax, FLAG_XGETBV_EAX_XMM_YMM
+ cmp eax, FLAG_XGETBV_EAX_XMM_YMM
+ je _%1_init_done
+ lea mbin_rsi, [%2 WRT_OPT]
+
+ _%1_init_done:
+ pop mbin_rdx
+ pop mbin_rcx
+ pop mbin_rbx
+ pop mbin_rax
+ mov [%1_dispatched], mbin_rsi
+ pop mbin_rsi
+ ret
+%endmacro
+
+;;;;;
+; mbin_dispatch_init2 parameters
+; Cases where only base functions are available
+; 1-> function name
+; 2-> base function
+;;;;;
+%macro mbin_dispatch_init2 2
+ section .text
+ %1_dispatch_init:
+ push mbin_rsi
+ lea mbin_rsi, [%2 WRT_OPT] ; Default
+ mov [%1_dispatched], mbin_rsi
+ pop mbin_rsi
+ ret
+%endmacro
+
+;;;;;
+; mbin_dispatch_init_clmul 3 parameters
+; Use this case for CRC which needs both SSE4_1 and CLMUL
+; 1-> function name
+; 2-> base function
+; 3-> SSE4_1 and CLMUL optimized function
+; 4-> AVX/02 opt func
+; 5-> AVX512/10 opt func
+;;;;;
+%macro mbin_dispatch_init_clmul 5
+ section .text
+ %1_dispatch_init:
+ push mbin_rsi
+ push mbin_rax
+ push mbin_rbx
+ push mbin_rcx
+ push mbin_rdx
+ push mbin_rdi
+ lea mbin_rsi, [%2 WRT_OPT] ; Default - use base function
+
+ mov eax, 1
+ cpuid
+ mov ebx, ecx ; save cpuid1.ecx
+ test ecx, FLAG_CPUID1_ECX_SSE4_1
+ jz _%1_init_done
+ test ecx, FLAG_CPUID1_ECX_CLMUL
+ jz _%1_init_done
+ lea mbin_rsi, [%3 WRT_OPT] ; SSE possible so use 00/01 opt
+
+ ;; Test for XMM_YMM support/AVX
+ test ecx, FLAG_CPUID1_ECX_OSXSAVE
+ je _%1_init_done
+ xor ecx, ecx
+ xgetbv ; xcr -> edx:eax
+ mov edi, eax ; save xgetvb.eax
+
+ and eax, FLAG_XGETBV_EAX_XMM_YMM
+ cmp eax, FLAG_XGETBV_EAX_XMM_YMM
+ jne _%1_init_done
+ test ebx, FLAG_CPUID1_ECX_AVX
+ je _%1_init_done
+ lea mbin_rsi, [%4 WRT_OPT] ; AVX/02 opt
+
+%if AS_FEATURE_LEVEL >= 10
+ ;; Test for AVX2
+ xor ecx, ecx
+ mov eax, 7
+ cpuid
+ test ebx, FLAG_CPUID7_EBX_AVX2
+ je _%1_init_done ; No AVX2 possible
+
+ ;; Test for AVX512
+ and edi, FLAG_XGETBV_EAX_ZMM_OPM
+ cmp edi, FLAG_XGETBV_EAX_ZMM_OPM
+ jne _%1_init_done ; No AVX512 possible
+ and ebx, FLAGS_CPUID7_EBX_AVX512_G1
+ cmp ebx, FLAGS_CPUID7_EBX_AVX512_G1
+ jne _%1_init_done
+
+ and ecx, FLAGS_CPUID7_ECX_AVX512_G2
+ cmp ecx, FLAGS_CPUID7_ECX_AVX512_G2
+ lea mbin_rbx, [%5 WRT_OPT] ; AVX512/10 opt
+ cmove mbin_rsi, mbin_rbx
+%endif
+ _%1_init_done:
+ pop mbin_rdi
+ pop mbin_rdx
+ pop mbin_rcx
+ pop mbin_rbx
+ pop mbin_rax
+ mov [%1_dispatched], mbin_rsi
+ pop mbin_rsi
+ ret
+%endmacro
+
+;;;;;
+; mbin_dispatch_init5 parameters
+; 1-> function name
+; 2-> base function
+; 3-> SSE4_2 or 00/01 optimized function
+; 4-> AVX/02 opt func
+; 5-> AVX2/04 opt func
+;;;;;
+%macro mbin_dispatch_init5 5
+ section .text
+ %1_dispatch_init:
+ push mbin_rsi
+ push mbin_rax
+ push mbin_rbx
+ push mbin_rcx
+ push mbin_rdx
+ lea mbin_rsi, [%2 WRT_OPT] ; Default - use base function
+
+ mov eax, 1
+ cpuid
+ ; Test for SSE4.2
+ test ecx, FLAG_CPUID1_ECX_SSE4_2
+ lea mbin_rbx, [%3 WRT_OPT] ; SSE opt func
+ cmovne mbin_rsi, mbin_rbx
+
+ and ecx, (FLAG_CPUID1_ECX_AVX | FLAG_CPUID1_ECX_OSXSAVE)
+ cmp ecx, (FLAG_CPUID1_ECX_AVX | FLAG_CPUID1_ECX_OSXSAVE)
+ lea mbin_rbx, [%4 WRT_OPT] ; AVX (gen2) opt func
+ jne _%1_init_done ; AVX is not available so end
+ mov mbin_rsi, mbin_rbx
+
+ ;; Try for AVX2
+ xor ecx, ecx
+ mov eax, 7
+ cpuid
+ test ebx, FLAG_CPUID7_EBX_AVX2
+ lea mbin_rbx, [%5 WRT_OPT] ; AVX (gen4) opt func
+ cmovne mbin_rsi, mbin_rbx
+
+ ;; Does it have xmm and ymm support
+ xor ecx, ecx
+ xgetbv
+ and eax, FLAG_XGETBV_EAX_XMM_YMM
+ cmp eax, FLAG_XGETBV_EAX_XMM_YMM
+ je _%1_init_done
+ lea mbin_rsi, [%3 WRT_OPT]
+
+ _%1_init_done:
+ pop mbin_rdx
+ pop mbin_rcx
+ pop mbin_rbx
+ pop mbin_rax
+ mov [%1_dispatched], mbin_rsi
+ pop mbin_rsi
+ ret
+%endmacro
+
+%if AS_FEATURE_LEVEL >= 6
+;;;;;
+; mbin_dispatch_init6 parameters
+; 1-> function name
+; 2-> base function
+; 3-> SSE4_2 or 00/01 optimized function
+; 4-> AVX/02 opt func
+; 5-> AVX2/04 opt func
+; 6-> AVX512/06 opt func
+;;;;;
+%macro mbin_dispatch_init6 6
+ section .text
+ %1_dispatch_init:
+ push mbin_rsi
+ push mbin_rax
+ push mbin_rbx
+ push mbin_rcx
+ push mbin_rdx
+ push mbin_rdi
+ lea mbin_rsi, [%2 WRT_OPT] ; Default - use base function
+
+ mov eax, 1
+ cpuid
+ mov ebx, ecx ; save cpuid1.ecx
+ test ecx, FLAG_CPUID1_ECX_SSE4_2
+ je _%1_init_done ; Use base function if no SSE4_2
+ lea mbin_rsi, [%3 WRT_OPT] ; SSE possible so use 00/01 opt
+
+ ;; Test for XMM_YMM support/AVX
+ test ecx, FLAG_CPUID1_ECX_OSXSAVE
+ je _%1_init_done
+ xor ecx, ecx
+ xgetbv ; xcr -> edx:eax
+ mov edi, eax ; save xgetvb.eax
+
+ and eax, FLAG_XGETBV_EAX_XMM_YMM
+ cmp eax, FLAG_XGETBV_EAX_XMM_YMM
+ jne _%1_init_done
+ test ebx, FLAG_CPUID1_ECX_AVX
+ je _%1_init_done
+ lea mbin_rsi, [%4 WRT_OPT] ; AVX/02 opt
+
+ ;; Test for AVX2
+ xor ecx, ecx
+ mov eax, 7
+ cpuid
+ test ebx, FLAG_CPUID7_EBX_AVX2
+ je _%1_init_done ; No AVX2 possible
+ lea mbin_rsi, [%5 WRT_OPT] ; AVX2/04 opt func
+
+ ;; Test for AVX512
+ and edi, FLAG_XGETBV_EAX_ZMM_OPM
+ cmp edi, FLAG_XGETBV_EAX_ZMM_OPM
+ jne _%1_init_done ; No AVX512 possible
+ and ebx, FLAGS_CPUID7_EBX_AVX512_G1
+ cmp ebx, FLAGS_CPUID7_EBX_AVX512_G1
+ lea mbin_rbx, [%6 WRT_OPT] ; AVX512/06 opt
+ cmove mbin_rsi, mbin_rbx
+
+ _%1_init_done:
+ pop mbin_rdi
+ pop mbin_rdx
+ pop mbin_rcx
+ pop mbin_rbx
+ pop mbin_rax
+ mov [%1_dispatched], mbin_rsi
+ pop mbin_rsi
+ ret
+%endmacro
+
+%else
+%macro mbin_dispatch_init6 6
+ mbin_dispatch_init5 %1, %2, %3, %4, %5
+%endmacro
+%endif
+
+%if AS_FEATURE_LEVEL >= 10
+;;;;;
+; mbin_dispatch_init7 parameters
+; 1-> function name
+; 2-> base function
+; 3-> SSE4_2 or 00/01 optimized function
+; 4-> AVX/02 opt func
+; 5-> AVX2/04 opt func
+; 6-> AVX512/06 opt func
+; 7-> AVX512 Update/10 opt func
+;;;;;
+%macro mbin_dispatch_init7 7
+ section .text
+ %1_dispatch_init:
+ push mbin_rsi
+ push mbin_rax
+ push mbin_rbx
+ push mbin_rcx
+ push mbin_rdx
+ push mbin_rdi
+ lea mbin_rsi, [%2 WRT_OPT] ; Default - use base function
+
+ mov eax, 1
+ cpuid
+ mov ebx, ecx ; save cpuid1.ecx
+ test ecx, FLAG_CPUID1_ECX_SSE4_2
+ je _%1_init_done ; Use base function if no SSE4_2
+ lea mbin_rsi, [%3 WRT_OPT] ; SSE possible so use 00/01 opt
+
+ ;; Test for XMM_YMM support/AVX
+ test ecx, FLAG_CPUID1_ECX_OSXSAVE
+ je _%1_init_done
+ xor ecx, ecx
+ xgetbv ; xcr -> edx:eax
+ mov edi, eax ; save xgetvb.eax
+
+ and eax, FLAG_XGETBV_EAX_XMM_YMM
+ cmp eax, FLAG_XGETBV_EAX_XMM_YMM
+ jne _%1_init_done
+ test ebx, FLAG_CPUID1_ECX_AVX
+ je _%1_init_done
+ lea mbin_rsi, [%4 WRT_OPT] ; AVX/02 opt
+
+ ;; Test for AVX2
+ xor ecx, ecx
+ mov eax, 7
+ cpuid
+ test ebx, FLAG_CPUID7_EBX_AVX2
+ je _%1_init_done ; No AVX2 possible
+ lea mbin_rsi, [%5 WRT_OPT] ; AVX2/04 opt func
+
+ ;; Test for AVX512
+ and edi, FLAG_XGETBV_EAX_ZMM_OPM
+ cmp edi, FLAG_XGETBV_EAX_ZMM_OPM
+ jne _%1_init_done ; No AVX512 possible
+ and ebx, FLAGS_CPUID7_EBX_AVX512_G1
+ cmp ebx, FLAGS_CPUID7_EBX_AVX512_G1
+ lea mbin_rbx, [%6 WRT_OPT] ; AVX512/06 opt
+ cmove mbin_rsi, mbin_rbx
+
+ and ecx, FLAGS_CPUID7_ECX_AVX512_G2
+ cmp ecx, FLAGS_CPUID7_ECX_AVX512_G2
+ lea mbin_rbx, [%7 WRT_OPT] ; AVX512/06 opt
+ cmove mbin_rsi, mbin_rbx
+
+ _%1_init_done:
+ pop mbin_rdi
+ pop mbin_rdx
+ pop mbin_rcx
+ pop mbin_rbx
+ pop mbin_rax
+ mov [%1_dispatched], mbin_rsi
+ pop mbin_rsi
+ ret
+%endmacro
+%else
+%macro mbin_dispatch_init7 7
+ mbin_dispatch_init6 %1, %2, %3, %4, %5, %6
+%endmacro
+%endif
+
+%endif ; ifndef _MULTIBINARY_ASM_
diff --git a/src/isa-l/include/raid.h b/src/isa-l/include/raid.h
new file mode 100644
index 000000000..6100a4824
--- /dev/null
+++ b/src/isa-l/include/raid.h
@@ -0,0 +1,305 @@
+/**********************************************************************
+ Copyright(c) 2011-2015 Intel Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+
+#ifndef _RAID_H_
+#define _RAID_H_
+
+/**
+ * @file raid.h
+ * @brief Interface to RAID functions - XOR and P+Q calculation.
+ *
+ * This file defines the interface to optimized XOR calculation (RAID5) or P+Q
+ * dual parity (RAID6). Operations are carried out on an array of pointers to
+ * sources and output arrays.
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* Multi-binary functions */
+
+/**
+ * @brief Generate XOR parity vector from N sources, runs appropriate version.
+ *
+ * This function determines what instruction sets are enabled and
+ * selects the appropriate version at runtime.
+ *
+ * @param vects Number of source+dest vectors in array.
+ * @param len Length of each vector in bytes.
+ * @param array Array of pointers to source and dest. For XOR the dest is
+ * the last pointer. ie array[vects-1]. Src and dest
+ * pointers must be aligned to 32B.
+ *
+ * @returns 0 pass, other fail
+ */
+
+int xor_gen(int vects, int len, void **array);
+
+
+/**
+ * @brief Checks that array has XOR parity sum of 0 across all vectors, runs appropriate version.
+ *
+ * This function determines what instruction sets are enabled and
+ * selects the appropriate version at runtime.
+ *
+ * @param vects Number of vectors in array.
+ * @param len Length of each vector in bytes.
+ * @param array Array of pointers to vectors. Src and dest pointers
+ * must be aligned to 16B.
+ *
+ * @returns 0 pass, other fail
+ */
+
+int xor_check(int vects, int len, void **array);
+
+
+/**
+ * @brief Generate P+Q parity vectors from N sources, runs appropriate version.
+ *
+ * This function determines what instruction sets are enabled and
+ * selects the appropriate version at runtime.
+ *
+ * @param vects Number of source+dest vectors in array.
+ * @param len Length of each vector in bytes. Must be 32B aligned.
+ * @param array Array of pointers to source and dest. For P+Q the dest
+ * is the last two pointers. ie array[vects-2],
+ * array[vects-1]. P and Q parity vectors are
+ * written to these last two pointers. Src and dest
+ * pointers must be aligned to 32B.
+ *
+ * @returns 0 pass, other fail
+ */
+
+int pq_gen(int vects, int len, void **array);
+
+
+/**
+ * @brief Checks that array of N sources, P and Q are consistent across all vectors, runs appropriate version.
+ *
+ * This function determines what instruction sets are enabled and
+ * selects the appropriate version at runtime.
+ *
+ * @param vects Number of vectors in array including P&Q.
+ * @param len Length of each vector in bytes. Must be 16B aligned.
+ * @param array Array of pointers to source and P, Q. P and Q parity
+ * are assumed to be the last two pointers in the array.
+ * All pointers must be aligned to 16B.
+ *
+ * @returns 0 pass, other fail
+ */
+
+int pq_check(int vects, int len, void **array);
+
+
+/* Arch specific versions */
+// x86 only
+#if defined(__i386__) || defined(__x86_64__)
+
+/**
+ * @brief Generate XOR parity vector from N sources.
+ * @requires SSE4.1
+ *
+ * @param vects Number of source+dest vectors in array.
+ * @param len Length of each vector in bytes.
+ * @param array Array of pointers to source and dest. For XOR the dest is
+ * the last pointer. ie array[vects-1]. Src and dest pointers
+ * must be aligned to 16B.
+ *
+ * @returns 0 pass, other fail
+ */
+
+int xor_gen_sse(int vects, int len, void **array);
+
+
+/**
+ * @brief Generate XOR parity vector from N sources.
+ * @requires AVX
+ *
+ * @param vects Number of source+dest vectors in array.
+ * @param len Length of each vector in bytes.
+ * @param array Array of pointers to source and dest. For XOR the dest is
+ * the last pointer. ie array[vects-1]. Src and dest pointers
+ * must be aligned to 32B.
+ *
+ * @returns 0 pass, other fail
+ */
+
+int xor_gen_avx(int vects, int len, void **array);
+
+
+/**
+ * @brief Checks that array has XOR parity sum of 0 across all vectors.
+ * @requires SSE4.1
+ *
+ * @param vects Number of vectors in array.
+ * @param len Length of each vector in bytes.
+ * @param array Array of pointers to vectors. Src and dest pointers
+ * must be aligned to 16B.
+ *
+ * @returns 0 pass, other fail
+ */
+
+int xor_check_sse(int vects, int len, void **array);
+
+
+/**
+ * @brief Generate P+Q parity vectors from N sources.
+ * @requires SSE4.1
+ *
+ * @param vects Number of source+dest vectors in array.
+ * @param len Length of each vector in bytes. Must be 16B aligned.
+ * @param array Array of pointers to source and dest. For P+Q the dest
+ * is the last two pointers. ie array[vects-2],
+ * array[vects-1]. P and Q parity vectors are
+ * written to these last two pointers. Src and dest
+ * pointers must be aligned to 16B.
+ *
+ * @returns 0 pass, other fail
+ */
+
+int pq_gen_sse(int vects, int len, void **array);
+
+
+/**
+ * @brief Generate P+Q parity vectors from N sources.
+ * @requires AVX
+ *
+ * @param vects Number of source+dest vectors in array.
+ * @param len Length of each vector in bytes. Must be 16B aligned.
+ * @param array Array of pointers to source and dest. For P+Q the dest
+ * is the last two pointers. ie array[vects-2],
+ * array[vects-1]. P and Q parity vectors are
+ * written to these last two pointers. Src and dest
+ * pointers must be aligned to 16B.
+ *
+ * @returns 0 pass, other fail
+ */
+
+int pq_gen_avx(int vects, int len, void **array);
+
+
+/**
+ * @brief Generate P+Q parity vectors from N sources.
+ * @requires AVX2
+ *
+ * @param vects Number of source+dest vectors in array.
+ * @param len Length of each vector in bytes. Must be 32B aligned.
+ * @param array Array of pointers to source and dest. For P+Q the dest
+ * is the last two pointers. ie array[vects-2],
+ * array[vects-1]. P and Q parity vectors are
+ * written to these last two pointers. Src and dest
+ * pointers must be aligned to 32B.
+ *
+ * @returns 0 pass, other fail
+ */
+
+int pq_gen_avx2(int vects, int len, void **array);
+
+
+/**
+ * @brief Checks that array of N sources, P and Q are consistent across all vectors.
+ * @requires SSE4.1
+ *
+ * @param vects Number of vectors in array including P&Q.
+ * @param len Length of each vector in bytes. Must be 16B aligned.
+ * @param array Array of pointers to source and P, Q. P and Q parity
+ are assumed to be the last two pointers in the array.
+ All pointers must be aligned to 16B.
+ * @returns 0 pass, other fail
+ */
+
+int pq_check_sse(int vects, int len, void **array);
+
+#endif
+
+/**
+ * @brief Generate P+Q parity vectors from N sources, runs baseline version.
+ * @param vects Number of source+dest vectors in array.
+ * @param len Length of each vector in bytes. Must be 16B aligned.
+ * @param array Array of pointers to source and dest. For P+Q the dest
+ * is the last two pointers. ie array[vects-2],
+ * array[vects-1]. P and Q parity vectors are
+ * written to these last two pointers. Src and dest pointers
+ * must be aligned to 16B.
+ *
+ * @returns 0 pass, other fail
+ */
+
+int pq_gen_base(int vects, int len, void **array);
+
+
+/**
+ * @brief Generate XOR parity vector from N sources, runs baseline version.
+ * @param vects Number of source+dest vectors in array.
+ * @param len Length of each vector in bytes.
+ * @param array Array of pointers to source and dest. For XOR the dest is
+ * the last pointer. ie array[vects-1]. Src and dest pointers
+ * must be aligned to 32B.
+ *
+ * @returns 0 pass, other fail
+ */
+
+int xor_gen_base(int vects, int len, void **array);
+
+
+/**
+ * @brief Checks that array has XOR parity sum of 0 across all vectors, runs baseline version.
+ *
+ * @param vects Number of vectors in array.
+ * @param len Length of each vector in bytes.
+ * @param array Array of pointers to vectors. Src and dest pointers
+ * must be aligned to 16B.
+ *
+ * @returns 0 pass, other fail
+ */
+
+int xor_check_base(int vects, int len, void **array);
+
+
+/**
+ * @brief Checks that array of N sources, P and Q are consistent across all vectors, runs baseline version.
+ *
+ * @param vects Number of vectors in array including P&Q.
+ * @param len Length of each vector in bytes. Must be 16B aligned.
+ * @param array Array of pointers to source and P, Q. P and Q parity
+ * are assumed to be the last two pointers in the array.
+ * All pointers must be aligned to 16B.
+ *
+ * @returns 0 pass, other fail
+ */
+
+int pq_check_base(int vects, int len, void **array);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif //_RAID_H_
diff --git a/src/isa-l/include/reg_sizes.asm b/src/isa-l/include/reg_sizes.asm
new file mode 100644
index 000000000..b7ad842d8
--- /dev/null
+++ b/src/isa-l/include/reg_sizes.asm
@@ -0,0 +1,291 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright(c) 2011-2015 Intel Corporation All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in
+; the documentation and/or other materials provided with the
+; distribution.
+; * Neither the name of Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived
+; from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+%ifndef _REG_SIZES_ASM_
+%define _REG_SIZES_ASM_
+
+%ifndef AS_FEATURE_LEVEL
+%define AS_FEATURE_LEVEL 4
+%endif
+
+%define EFLAGS_HAS_CPUID (1<<21)
+%define FLAG_CPUID1_ECX_CLMUL (1<<1)
+%define FLAG_CPUID1_EDX_SSE2 (1<<26)
+%define FLAG_CPUID1_ECX_SSE3 (1)
+%define FLAG_CPUID1_ECX_SSE4_1 (1<<19)
+%define FLAG_CPUID1_ECX_SSE4_2 (1<<20)
+%define FLAG_CPUID1_ECX_POPCNT (1<<23)
+%define FLAG_CPUID1_ECX_AESNI (1<<25)
+%define FLAG_CPUID1_ECX_OSXSAVE (1<<27)
+%define FLAG_CPUID1_ECX_AVX (1<<28)
+%define FLAG_CPUID1_EBX_AVX2 (1<<5)
+
+%define FLAG_CPUID7_EBX_AVX2 (1<<5)
+%define FLAG_CPUID7_EBX_AVX512F (1<<16)
+%define FLAG_CPUID7_EBX_AVX512DQ (1<<17)
+%define FLAG_CPUID7_EBX_AVX512IFMA (1<<21)
+%define FLAG_CPUID7_EBX_AVX512PF (1<<26)
+%define FLAG_CPUID7_EBX_AVX512ER (1<<27)
+%define FLAG_CPUID7_EBX_AVX512CD (1<<28)
+%define FLAG_CPUID7_EBX_AVX512BW (1<<30)
+%define FLAG_CPUID7_EBX_AVX512VL (1<<31)
+
+%define FLAG_CPUID7_ECX_AVX512VBMI (1<<1)
+%define FLAG_CPUID7_ECX_AVX512VBMI2 (1 << 6)
+%define FLAG_CPUID7_ECX_GFNI (1 << 8)
+%define FLAG_CPUID7_ECX_VAES (1 << 9)
+%define FLAG_CPUID7_ECX_VPCLMULQDQ (1 << 10)
+%define FLAG_CPUID7_ECX_VNNI (1 << 11)
+%define FLAG_CPUID7_ECX_BITALG (1 << 12)
+%define FLAG_CPUID7_ECX_VPOPCNTDQ (1 << 14)
+
+%define FLAGS_CPUID7_EBX_AVX512_G1 (FLAG_CPUID7_EBX_AVX512F | FLAG_CPUID7_EBX_AVX512VL | FLAG_CPUID7_EBX_AVX512BW | FLAG_CPUID7_EBX_AVX512CD | FLAG_CPUID7_EBX_AVX512DQ)
+%define FLAGS_CPUID7_ECX_AVX512_G2 (FLAG_CPUID7_ECX_AVX512VBMI2 | FLAG_CPUID7_ECX_GFNI | FLAG_CPUID7_ECX_VAES | FLAG_CPUID7_ECX_VPCLMULQDQ | FLAG_CPUID7_ECX_VNNI | FLAG_CPUID7_ECX_BITALG | FLAG_CPUID7_ECX_VPOPCNTDQ)
+
+%define FLAG_XGETBV_EAX_XMM (1<<1)
+%define FLAG_XGETBV_EAX_YMM (1<<2)
+%define FLAG_XGETBV_EAX_XMM_YMM 0x6
+%define FLAG_XGETBV_EAX_ZMM_OPM 0xe0
+
+%define FLAG_CPUID1_EAX_AVOTON 0x000406d0
+%define FLAG_CPUID1_EAX_STEP_MASK 0xfffffff0
+
+; define d and w variants for registers
+
+%define raxd eax
+%define raxw ax
+%define raxb al
+
+%define rbxd ebx
+%define rbxw bx
+%define rbxb bl
+
+%define rcxd ecx
+%define rcxw cx
+%define rcxb cl
+
+%define rdxd edx
+%define rdxw dx
+%define rdxb dl
+
+%define rsid esi
+%define rsiw si
+%define rsib sil
+
+%define rdid edi
+%define rdiw di
+%define rdib dil
+
+%define rbpd ebp
+%define rbpw bp
+%define rbpb bpl
+
+%define ymm0x xmm0
+%define ymm1x xmm1
+%define ymm2x xmm2
+%define ymm3x xmm3
+%define ymm4x xmm4
+%define ymm5x xmm5
+%define ymm6x xmm6
+%define ymm7x xmm7
+%define ymm8x xmm8
+%define ymm9x xmm9
+%define ymm10x xmm10
+%define ymm11x xmm11
+%define ymm12x xmm12
+%define ymm13x xmm13
+%define ymm14x xmm14
+%define ymm15x xmm15
+
+%define zmm0x xmm0
+%define zmm1x xmm1
+%define zmm2x xmm2
+%define zmm3x xmm3
+%define zmm4x xmm4
+%define zmm5x xmm5
+%define zmm6x xmm6
+%define zmm7x xmm7
+%define zmm8x xmm8
+%define zmm9x xmm9
+%define zmm10x xmm10
+%define zmm11x xmm11
+%define zmm12x xmm12
+%define zmm13x xmm13
+%define zmm14x xmm14
+%define zmm15x xmm15
+%define zmm16x xmm16
+%define zmm17x xmm17
+%define zmm18x xmm18
+%define zmm19x xmm19
+%define zmm20x xmm20
+%define zmm21x xmm21
+%define zmm22x xmm22
+%define zmm23x xmm23
+%define zmm24x xmm24
+%define zmm25x xmm25
+%define zmm26x xmm26
+%define zmm27x xmm27
+%define zmm28x xmm28
+%define zmm29x xmm29
+%define zmm30x xmm30
+%define zmm31x xmm31
+
+%define zmm0y ymm0
+%define zmm1y ymm1
+%define zmm2y ymm2
+%define zmm3y ymm3
+%define zmm4y ymm4
+%define zmm5y ymm5
+%define zmm6y ymm6
+%define zmm7y ymm7
+%define zmm8y ymm8
+%define zmm9y ymm9
+%define zmm10y ymm10
+%define zmm11y ymm11
+%define zmm12y ymm12
+%define zmm13y ymm13
+%define zmm14y ymm14
+%define zmm15y ymm15
+%define zmm16y ymm16
+%define zmm17y ymm17
+%define zmm18y ymm18
+%define zmm19y ymm19
+%define zmm20y ymm20
+%define zmm21y ymm21
+%define zmm22y ymm22
+%define zmm23y ymm23
+%define zmm24y ymm24
+%define zmm25y ymm25
+%define zmm26y ymm26
+%define zmm27y ymm27
+%define zmm28y ymm28
+%define zmm29y ymm29
+%define zmm30y ymm30
+%define zmm31y ymm31
+
+%define DWORD(reg) reg %+ d
+%define WORD(reg) reg %+ w
+%define BYTE(reg) reg %+ b
+
+%define XWORD(reg) reg %+ x
+
+%ifidn __OUTPUT_FORMAT__,elf32
+section .note.GNU-stack noalloc noexec nowrite progbits
+section .text
+%endif
+%ifidn __OUTPUT_FORMAT__,elf64
+ %define __x86_64__
+section .note.GNU-stack noalloc noexec nowrite progbits
+section .text
+%endif
+%ifidn __OUTPUT_FORMAT__,win64
+ %define __x86_64__
+%endif
+%ifidn __OUTPUT_FORMAT__,macho64
+ %define __x86_64__
+%endif
+
+%ifdef __x86_64__
+ %define endbranch db 0xf3, 0x0f, 0x1e, 0xfa
+%else
+ %define endbranch db 0xf3, 0x0f, 0x1e, 0xfb
+%endif
+
+%ifdef REL_TEXT
+ %define WRT_OPT
+%elifidn __OUTPUT_FORMAT__, elf64
+ %define WRT_OPT wrt ..plt
+%else
+ %define WRT_OPT
+%endif
+
+%macro mk_global 1-3
+ %ifdef __NASM_VER__
+ %ifidn __OUTPUT_FORMAT__, macho64
+ global %1
+ %elifidn __OUTPUT_FORMAT__, win64
+ global %1
+ %else
+ global %1:%2 %3
+ %endif
+ %else
+ global %1:%2 %3
+ %endif
+%endmacro
+
+
+; Fixes for nasm lack of MS proc helpers
+%ifdef __NASM_VER__
+ %ifidn __OUTPUT_FORMAT__, win64
+ %macro alloc_stack 1
+ sub rsp, %1
+ %endmacro
+
+ %macro proc_frame 1
+ %1:
+ %endmacro
+
+ %macro save_xmm128 2
+ movdqa [rsp + %2], %1
+ %endmacro
+
+ %macro save_reg 2
+ mov [rsp + %2], %1
+ %endmacro
+
+ %macro rex_push_reg 1
+ push %1
+ %endmacro
+
+ %macro push_reg 1
+ push %1
+ %endmacro
+
+ %define end_prolog
+ %endif
+
+ %define endproc_frame
+%endif
+
+%ifidn __OUTPUT_FORMAT__, macho64
+ %define elf64 macho64
+ mac_equ equ 1
+%endif
+
+%macro slversion 4
+ section .text
+ global %1_slver_%2%3%4
+ global %1_slver
+ %1_slver:
+ %1_slver_%2%3%4:
+ dw 0x%4
+ db 0x%3, 0x%2
+%endmacro
+
+%endif ; ifndef _REG_SIZES_ASM_
diff --git a/src/isa-l/include/test.h b/src/isa-l/include/test.h
new file mode 100644
index 000000000..31ccc67b9
--- /dev/null
+++ b/src/isa-l/include/test.h
@@ -0,0 +1,285 @@
+/**********************************************************************
+ Copyright(c) 2011-2015 Intel Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+#ifndef _TEST_H
+#define _TEST_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdio.h>
+#include <stdint.h>
+
+#ifdef _MSC_VER
+# define inline __inline
+#endif
+
+/* Decide wether to use benchmark time as an approximation or a minimum. Fewer
+ * calls to the timer are required for the approximation case.*/
+#define BENCHMARK_MIN_TIME 0
+#define BENCHMARK_APPROX_TIME 1
+#ifndef BENCHMARK_TYPE
+#define BENCHMARK_TYPE BENCHMARK_MIN_TIME
+#endif
+
+#ifdef USE_RDTSC
+/* The use of rtdsc is nuanced. On many processors it corresponds to a
+ * standardized clock source. To obtain a meaningful result it may be
+ * necessary to fix the CPU clock to match the rtdsc tick rate.
+ */
+# include <inttypes.h>
+# include <x86intrin.h>
+# define USE_CYCLES
+#else
+# include <time.h>
+#define USE_SECONDS
+#endif
+
+#ifdef USE_RDTSC
+#ifndef BENCHMARK_TIME
+# define BENCHMARK_TIME 6
+#endif
+# define GHZ 1000000000
+# define UNIT_SCALE (GHZ)
+# define CALLIBRATE_TIME (UNIT_SCALE / 2)
+static inline long long get_time(void) {
+ unsigned int dummy;
+ return __rdtscp(&dummy);
+}
+
+static inline long long get_res(void) {
+ return 1;
+}
+#else
+#ifndef BENCHMARK_TIME
+# define BENCHMARK_TIME 3
+#endif
+#ifdef _MSC_VER
+#define UNIT_SCALE get_res()
+#define CALLIBRATE_TIME (UNIT_SCALE / 4)
+static inline long long get_time(void) {
+ long long ret = 0;
+ QueryPerformanceCounter(&ret);
+ return ret;
+}
+
+static inline long long get_res(void) {
+ long long ret = 0;
+ QueryPerformanceFrequency(&ret);
+ return ret;
+}
+#else
+# define NANO_SCALE 1000000000
+# define UNIT_SCALE NANO_SCALE
+# define CALLIBRATE_TIME (UNIT_SCALE / 4)
+#ifdef __FreeBSD__
+# define CLOCK_ID CLOCK_MONOTONIC_PRECISE
+#else
+# define CLOCK_ID CLOCK_MONOTONIC
+#endif
+
+static inline long long get_time(void) {
+ struct timespec time;
+ long long nano_total;
+ clock_gettime(CLOCK_ID, &time);
+ nano_total = time.tv_sec;
+ nano_total *= NANO_SCALE;
+ nano_total += time.tv_nsec;
+ return nano_total;
+}
+
+static inline long long get_res(void) {
+ struct timespec time;
+ long long nano_total;
+ clock_getres(CLOCK_ID, &time);
+ nano_total = time.tv_sec;
+ nano_total *= NANO_SCALE;
+ nano_total += time.tv_nsec;
+ return nano_total;
+}
+#endif
+#endif
+struct perf {
+ long long start;
+ long long stop;
+ long long run_total;
+ long long iterations;
+};
+
+static inline void perf_init(struct perf *p) {
+ p->start = 0;
+ p->stop = 0;
+ p->run_total = 0;
+}
+
+static inline void perf_continue(struct perf *p) {
+ p->start = get_time();
+}
+
+static inline void perf_pause(struct perf *p) {
+ p->stop = get_time();
+ p->run_total = p->run_total + p->stop - p->start;
+ p->start = p->stop;
+}
+
+static inline void perf_start(struct perf *p) {
+ perf_init(p);
+ perf_continue(p);
+}
+
+static inline void perf_stop(struct perf *p) {
+ perf_pause(p);
+}
+
+static inline double get_time_elapsed(struct perf *p) {
+ return 1.0 * p->run_total / UNIT_SCALE;
+}
+
+static inline long long get_base_elapsed(struct perf *p) {
+ return p->run_total;
+}
+
+static inline unsigned long long estimate_perf_iterations(struct perf *p,
+ unsigned long long runs,
+ unsigned long long total) {
+ total = total * runs;
+ if (get_base_elapsed(p) > 0)
+ return (total + get_base_elapsed(p) - 1) / get_base_elapsed(p);
+ else
+ return (total + get_res() - 1) / get_res();
+}
+
+#define CALLIBRATE(PERF, FUNC_CALL) { \
+ unsigned long long _i, _iter = 1; \
+ perf_start(PERF); \
+ FUNC_CALL; \
+ perf_pause(PERF); \
+ \
+ while (get_base_elapsed(PERF) < CALLIBRATE_TIME) { \
+ _iter = estimate_perf_iterations(PERF, _iter, \
+ 2 * CALLIBRATE_TIME); \
+ perf_start(PERF); \
+ for (_i = 0; _i < _iter; _i++) { \
+ FUNC_CALL; \
+ } \
+ perf_stop(PERF); \
+ } \
+ (PERF)->iterations=_iter; \
+}
+
+#define PERFORMANCE_TEST(PERF, RUN_TIME, FUNC_CALL) { \
+ unsigned long long _i, _iter = (PERF)->iterations; \
+ unsigned long long _run_total = RUN_TIME; \
+ _run_total *= UNIT_SCALE; \
+ _iter = estimate_perf_iterations(PERF, _iter, _run_total);\
+ (PERF)->iterations = 0; \
+ perf_start(PERF); \
+ for (_i = 0; _i < _iter; _i++) { \
+ FUNC_CALL; \
+ } \
+ perf_pause(PERF); \
+ (PERF)->iterations += _iter; \
+ \
+ if(get_base_elapsed(PERF) < _run_total && \
+ BENCHMARK_TYPE == BENCHMARK_MIN_TIME) { \
+ _iter = estimate_perf_iterations(PERF, _iter, \
+ _run_total - get_base_elapsed(PERF) + \
+ (UNIT_SCALE / 16)); \
+ perf_continue(PERF); \
+ for (_i = 0; _i < _iter; _i++) { \
+ FUNC_CALL; \
+ } \
+ perf_pause(PERF); \
+ (PERF)->iterations += _iter; \
+ } \
+}
+
+#define BENCHMARK(PERF, RUN_TIME, FUNC_CALL) { \
+ if((RUN_TIME) > 0) { \
+ CALLIBRATE(PERF, FUNC_CALL); \
+ PERFORMANCE_TEST(PERF, RUN_TIME, FUNC_CALL); \
+ \
+ } else { \
+ (PERF)->iterations = 1; \
+ perf_start(PERF); \
+ FUNC_CALL; \
+ perf_stop(PERF); \
+ } \
+}
+
+#ifdef USE_CYCLES
+static inline void perf_print(struct perf p, long long unit_count) {
+ long long total_units = p.iterations * unit_count;
+
+ printf("runtime = %10lld ticks", get_base_elapsed(&p));
+ if (total_units != 0) {
+ printf(", bandwidth %lld MB in %.4f GC = %.2f ticks/byte",
+ total_units / (1000000), get_time_elapsed(&p),
+ get_base_elapsed(&p) / (double)total_units);
+ }
+ printf("\n");
+}
+#else
+static inline void perf_print(struct perf p, double unit_count) {
+ long long total_units = p.iterations * unit_count;
+ long long usecs = (long long)(get_time_elapsed(&p) * 1000000);
+
+ printf("runtime = %10lld usecs", usecs);
+ if (total_units != 0) {
+ printf(", bandwidth %lld MB in %.4f sec = %.2f MB/s",
+ total_units / (1000000), get_time_elapsed(&p),
+ ((double)total_units) / (1000000 * get_time_elapsed(&p)));
+ }
+ printf("\n");
+}
+#endif
+
+static inline uint64_t get_filesize(FILE * fp) {
+ uint64_t file_size;
+ fpos_t pos, pos_curr;
+
+ fgetpos(fp, &pos_curr); /* Save current position */
+#if defined(_WIN32) || defined(_WIN64)
+ _fseeki64(fp, 0, SEEK_END);
+#else
+ fseeko(fp, 0, SEEK_END);
+#endif
+ fgetpos(fp, &pos);
+ file_size = *(uint64_t *) & pos;
+ fsetpos(fp, &pos_curr); /* Restore position */
+
+ return file_size;
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // _TEST_H
diff --git a/src/isa-l/include/types.h b/src/isa-l/include/types.h
new file mode 100644
index 000000000..531c79724
--- /dev/null
+++ b/src/isa-l/include/types.h
@@ -0,0 +1,77 @@
+/**********************************************************************
+ Copyright(c) 2011-2015 Intel Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+
+/**
+ * @file types.h
+ * @brief Defines standard width types.
+ *
+ */
+
+#ifndef __TYPES_H
+#define __TYPES_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifdef _WIN32
+#ifdef __MINGW32__
+# include <_mingw.h>
+#endif
+#endif
+
+
+#if defined __unix__ || defined __APPLE__
+# define DECLARE_ALIGNED(decl, alignval) decl __attribute__((aligned(alignval)))
+# define __forceinline static inline
+# define aligned_free(x) free(x)
+#else
+# ifdef __MINGW32__
+# define DECLARE_ALIGNED(decl, alignval) decl __attribute__((aligned(alignval)))
+# define posix_memalign(p, algn, len) (NULL == (*((char**)(p)) = (void*) _aligned_malloc(len, algn)))
+# define aligned_free(x) _aligned_free(x)
+# else
+# define DECLARE_ALIGNED(decl, alignval) __declspec(align(alignval)) decl
+# define posix_memalign(p, algn, len) (NULL == (*((char**)(p)) = (void*) _aligned_malloc(len, algn)))
+# define aligned_free(x) _aligned_free(x)
+# endif
+#endif
+
+#ifdef DEBUG
+# define DEBUG_PRINT(x) printf x
+#else
+# define DEBUG_PRINT(x) do {} while (0)
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif //__TYPES_H
diff --git a/src/isa-l/include/unaligned.h b/src/isa-l/include/unaligned.h
new file mode 100644
index 000000000..f7b1ed88e
--- /dev/null
+++ b/src/isa-l/include/unaligned.h
@@ -0,0 +1,76 @@
+/**********************************************************************
+ Copyright(c) 2011-2019 Intel Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+#ifndef UNALIGNED_H
+#define UNALIGNED_H
+
+#include "stdint.h"
+#include "string.h"
+
+static inline uint16_t load_u16(uint8_t * buf) {
+ uint16_t ret;
+ memcpy(&ret, buf, sizeof(ret));
+ return ret;
+}
+
+static inline uint32_t load_u32(uint8_t * buf) {
+ uint32_t ret;
+ memcpy(&ret, buf, sizeof(ret));
+ return ret;
+}
+
+static inline uint64_t load_u64(uint8_t * buf) {
+ uint64_t ret;
+ memcpy(&ret, buf, sizeof(ret));
+ return ret;
+}
+
+static inline uintmax_t load_umax(uint8_t * buf) {
+ uintmax_t ret;
+ memcpy(&ret, buf, sizeof(ret));
+ return ret;
+}
+
+static inline void store_u16(uint8_t * buf, uint16_t val) {
+ memcpy(buf, &val, sizeof(val));
+}
+
+static inline void store_u32(uint8_t * buf, uint32_t val) {
+ memcpy(buf, &val, sizeof(val));
+}
+
+static inline void store_u64(uint8_t * buf, uint64_t val) {
+ memcpy(buf, &val, sizeof(val));
+}
+
+static inline void store_umax(uint8_t * buf, uintmax_t val) {
+ memcpy(buf, &val, sizeof(val));
+}
+
+#endif
diff --git a/src/isa-l/isa-l.def b/src/isa-l/isa-l.def
new file mode 100644
index 000000000..ebf499758
--- /dev/null
+++ b/src/isa-l/isa-l.def
@@ -0,0 +1,117 @@
+LIBRARY isa-l
+VERSION 2.30
+EXPORTS
+
+ec_encode_data_sse @1
+ec_init_tables @2
+gf_gen_cauchy1_matrix @3
+gf_gen_rs_matrix @4
+gf_invert_matrix @5
+gf_mul @6
+gf_vect_dot_prod_base @7
+gf_vect_mul_base @8
+ec_encode_data_base @9
+gf_vect_mul_init @10
+gf_vect_mul_sse @11
+gf_vect_mul_avx @12
+gf_vect_dot_prod_sse @13
+gf_vect_dot_prod_avx @14
+gf_vect_dot_prod_avx2 @15
+gf_2vect_dot_prod_sse @16
+gf_3vect_dot_prod_sse @17
+gf_4vect_dot_prod_sse @18
+gf_5vect_dot_prod_sse @19
+gf_6vect_dot_prod_sse @20
+gf_2vect_dot_prod_avx @21
+gf_3vect_dot_prod_avx @22
+gf_4vect_dot_prod_avx @23
+gf_5vect_dot_prod_avx @24
+gf_6vect_dot_prod_avx @25
+gf_2vect_dot_prod_avx2 @26
+gf_3vect_dot_prod_avx2 @27
+gf_4vect_dot_prod_avx2 @28
+gf_5vect_dot_prod_avx2 @29
+gf_6vect_dot_prod_avx2 @30
+gf_vect_mad_sse @31
+gf_2vect_mad_sse @32
+gf_3vect_mad_sse @33
+gf_4vect_mad_sse @34
+gf_5vect_mad_sse @35
+gf_6vect_mad_sse @36
+gf_vect_mad_avx @37
+gf_2vect_mad_avx @38
+gf_3vect_mad_avx @39
+gf_4vect_mad_avx @40
+gf_5vect_mad_avx @41
+gf_6vect_mad_avx @42
+gf_vect_mad_avx2 @43
+gf_2vect_mad_avx2 @44
+gf_3vect_mad_avx2 @45
+gf_4vect_mad_avx2 @46
+gf_5vect_mad_avx2 @47
+gf_6vect_mad_avx2 @48
+ec_encode_data @49
+gf_vect_mul @50
+ec_encode_data_update @51
+gf_vect_dot_prod @52
+gf_vect_mad @53
+xor_gen @54
+xor_check @55
+pq_gen @56
+pq_check @57
+xor_gen_sse @58
+xor_gen_avx @59
+xor_check_sse @60
+pq_gen_sse @61
+pq_gen_avx @62
+pq_gen_avx2 @63
+pq_check_sse @64
+pq_gen_base @65
+xor_gen_base @66
+xor_check_base @67
+pq_check_base @68
+crc16_t10dif @69
+crc32_ieee @70
+crc32_iscsi @71
+crc16_t10dif_base @72
+crc32_ieee_base @73
+crc32_iscsi_base @74
+isal_deflate_stateless @75
+isal_deflate @76
+isal_deflate_init @77
+isal_update_histogram @78
+isal_create_hufftables @79
+isal_create_hufftables_subset @80
+isal_deflate_stateless_init @81
+isal_deflate_set_hufftables @82
+isal_inflate @83
+isal_inflate_stateless @84
+isal_inflate_init @85
+crc64_jones_norm_base @86
+crc64_jones_refl_base @87
+crc64_iso_norm_base @88
+crc64_iso_refl_base @89
+crc64_ecma_norm_base @90
+crc64_ecma_refl_base @91
+crc64_ecma_refl @92
+crc64_ecma_norm @93
+crc64_iso_refl @94
+crc64_iso_norm @95
+crc64_jones_refl @96
+crc64_jones_norm @97
+crc32_gzip_refl_base @98
+crc32_gzip_refl @99
+isal_deflate_set_dict @100
+isal_deflate_reset @101
+isal_inflate_set_dict @102
+isal_inflate_reset @103
+crc16_t10dif_copy @104
+isal_read_gzip_header @105
+isal_read_zlib_header @106
+isal_write_gzip_header @107
+isal_write_zlib_header @108
+isal_zero_detect @109
+isal_gzip_header_init @110
+isal_adler32 @111
+isal_deflate_process_dict @112
+isal_deflate_reset_dict @113
diff --git a/src/isa-l/libisal.pc.in b/src/isa-l/libisal.pc.in
new file mode 100644
index 000000000..886151009
--- /dev/null
+++ b/src/isa-l/libisal.pc.in
@@ -0,0 +1,11 @@
+prefix=@prefix@
+exec_prefix=@exec_prefix@
+libdir=@libdir@
+includedir=@includedir@
+
+Name: libisal
+Description: Library for storage systems
+Version: @VERSION@
+Libs: -L${libdir} -lisal
+Libs.private:
+Cflags: -I${includedir}
diff --git a/src/isa-l/make.inc b/src/isa-l/make.inc
new file mode 100644
index 000000000..7c5f042c8
--- /dev/null
+++ b/src/isa-l/make.inc
@@ -0,0 +1,380 @@
+########################################################################
+# Copyright(c) 2011-2018 Intel Corporation All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Intel Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+########################################################################
+
+
+# Makefile include for optimized libraries
+# make targets:
+# lib - build library of optimized functions
+# slib - build shared library
+# test - run unit tests of functions
+# perf - run performance tests
+# install - install headers and libs to system location
+# sim - run on simulator
+# trace - get simulator trace
+# clean - remove object files
+
+version ?= 2.30.0
+host_cpu ?= $(shell uname -m | sed -e 's/amd/x86_/')
+arch ?= $(shell uname | grep -v -e Linux -e BSD )
+
+# aarch64 cpu arch = aarch64
+ifeq ($(host_cpu)_$(arch),aarch64_)
+ arch = aarch64
+endif
+
+CC = gcc
+AS = nasm
+AWK = awk
+
+DEBUG = -g
+DEBUG_yasm = -g dwarf2
+DEBUG_nasm = -g
+
+# Default arch= build options
+CFLAGS_ = -Wall
+ASFLAGS_ = -f elf64
+ARFLAGS_ = cr $@
+STRIP_gcc = strip -d -R .comment $@
+
+# arch=32 build options
+ASFLAGS_32 = -f elf32
+CFLAGS_32 = -m32
+ARFLAGS_32 = cr $@
+
+# arch=win64 build options
+ASFLAGS_win64 = -f win64
+CFLAGS_icl = -Qstd=c99
+ARFLAGS_win64 = -out:$@
+
+# arch=mingw build options
+ASFLAGS_mingw = -f win64
+ARFLAGS_mingw = cr $@
+
+LDFLAGS_so = -Wl,-soname,$(soname)
+
+ifeq ($(arch),mingw)
+ CC=x86_64-w64-mingw32-gcc
+ AR=x86_64-w64-mingw32-ar
+ AS=yasm
+ LDFLAGS += -Wl,--force-exe-suffix
+ SIM=wine
+ EXT=.exe
+ CLEANFILES+=*.exe
+endif
+
+# arch=noarch build options
+ARFLAGS_noarch = cr $@
+ifeq ($(arch),noarch)
+ host_cpu=base_aliases
+endif
+
+# arch=aarch64 build options
+ifeq ($(lib_debug),1)
+ ASFLAGS_aarch64 = -g -c
+else
+ ASFLAGS_aarch64 = -c
+endif
+
+ARFLAGS_aarch64 = cr $@
+ifeq ($(arch),aarch64)
+ AS=$(CC) -D__ASSEMBLY__
+ SIM=
+endif
+
+ASFLAGS_Darwin = -f macho64 --prefix=_
+ARFLAGS_Darwin = -r $@
+ifeq ($(shell uname),Darwin)
+ LDFLAGS_so =
+ STRIP_gcc =
+endif
+
+INCLUDE = $(patsubst %,-I%/,$(subst :, ,$(VPATH)))
+CFLAGS = $(CFLAGS_$(arch)) $(CFLAGS_$(CC)) $(DEBUG) -O2 $(DEFINES) $(INCLUDE)
+ASFLAGS = $(ASFLAGS_$(arch)) $(ASFLAGS_$(CC)) $(DEBUG_$(AS)) $(DEFINES) $(INCLUDE)
+ARFLAGS = $(ARFLAGS_$(arch))
+DEFINES += $(addprefix -D , $D)
+CLEANFILES += $(O) *.o *.a $(all_tests) $(bin_PROGRAMS) $(lib_name) $(so_lib_name) $(all_llvm_fuzz_tests)
+
+# set host_cpu=base_aliases for unsupported CPUs
+ifeq ($(filter aarch64 x86_%,$(host_cpu)),)
+ host_cpu=base_aliases
+endif
+
+other_tests += $(other_tests_$(host_cpu))
+
+lsrc += $(lsrc_$(host_cpu))
+O = bin
+lobj += $(patsubst %.c,%.o,$(patsubst %.S,%.o,$(patsubst %.asm,%.o,$(lsrc) $(lsrc_intrinsic))))
+objs = $(addprefix $(O)/,$(notdir $(lobj)))
+
+
+lib_name ?= isa-l.a
+default: lib slib progs
+
+# Defaults for windows build
+ifeq ($(arch),win64)
+ AR=lib
+ CC=cl
+ OUTPUT_OPTION = -Fo$@
+ DEBUG=
+ lib_name := $(basename $(lib_name)).lib
+endif
+lsrcwin64 = $(lsrc)
+unit_testswin64 = $(unit_tests)
+exampleswin64 = $(examples)
+perf_testswin64 = $(perf_tests)
+
+
+# Build and run unit tests, performance tests, etc.
+all_tests = $(notdir $(sort $(perf_tests) $(check_tests) $(unit_tests) $(examples) $(other_tests)))
+all_unit_tests = $(notdir $(sort $(check_tests) $(unit_tests)))
+all_perf_tests = $(notdir $(sort $(perf_tests)))
+all_check_tests = $(notdir $(sort $(check_tests)))
+all_llvm_fuzz_tests = $(notdir $(sort $(llvm_fuzz_tests)))
+
+$(all_unit_tests): % : %.c $(lib_name)
+$(all_perf_tests): % : %.c $(lib_name)
+$(sort $(notdir $(examples))): % : %.c $(lib_name)
+$(sort $(notdir $(other_tests))): % : %.c $(lib_name)
+
+fuzz_args = -fsanitize=fuzzer,address
+$(all_llvm_fuzz_tests): FUZZLINK = $(fuzz_args)
+$(all_llvm_fuzz_tests): CFLAGS += $(fuzz_args)
+$(all_llvm_fuzz_tests): CXXFLAGS += $(fuzz_args)
+$(all_llvm_fuzz_tests): % : %.o $(lib_name)
+ $(CXX) $(CXXFLAGS) $^ $(LDLIBS) $(FUZZLINK) -o $@
+
+
+# Check for modern as
+test-as = $(shell hash printf && printf $(3) > $(2) && $(AS) $(ASFLAGS) ${tmpf} -o /dev/null 2> /dev/null && echo $(1) || echo $(4))
+as_4 := "pblendvb xmm2, xmm1;"
+as_6 := "vinserti32x8 zmm0, ymm1, 1;"
+as_10 := "vpcompressb zmm0 {k1}, zmm1;"
+
+tmpf := $(shell mktemp)
+as_feature_level := $(call test-as, 4, $(tmpf), $(as_4), $(as_feature_level))
+as_feature_level := $(call test-as, 6, $(tmpf), $(as_6), $(as_feature_level))
+as_feature_level := $(call test-as, 10, $(tmpf), $(as_10), $(as_feature_level))
+tmpf := $(shell rm ${tmpf})
+
+ifneq ($(findstring $(as_feature_level),6 10),)
+ D_HAVE_AS_KNOWS_AVX512_y := -DHAVE_AS_KNOWS_AVX512
+endif
+
+CFLAGS += -DAS_FEATURE_LEVEL=$(as_feature_level) $(D_HAVE_AS_KNOWS_AVX512_y)
+ASFLAGS += -DAS_FEATURE_LEVEL=$(as_feature_level) $(D_HAVE_AS_KNOWS_AVX512_y)
+
+
+# Check for pthreads
+have_threads ?= $(shell printf "\#include <pthread.h>\nint main(void){return 0;}\n" | $(CC) -x c - -o /dev/null -lpthread && echo y )
+THREAD_LD_$(have_threads) := -lpthread
+THREAD_CFLAGS_$(have_threads) := -DHAVE_THREADS
+
+progs: $(bin_PROGRAMS)
+$(bin_PROGRAMS): CFLAGS += -DVERSION=\"$(version)\"
+$(bin_PROGRAMS): LDLIBS += $(THREAD_LD_y)
+$(bin_PROGRAMS): CFLAGS += $(THREAD_CFLAGS_y)
+sim test trace: $(addsuffix .run,$(all_unit_tests))
+perf: $(addsuffix .run,$(all_perf_tests))
+check: $(addsuffix .run,$(all_check_tests))
+ex: $(notdir $(examples))
+all: lib $(all_tests)
+other: $(notdir $(other_tests))
+llvm_fuzz_tests: $(all_llvm_fuzz_tests)
+tests: $(all_unit_tests)
+perfs: $(all_perf_tests)
+checks: $(all_check_tests)
+trace: SIM=sde -debugtrace --
+sim: SIM=sde --
+check test sim:
+ @echo Finished running $@
+
+$(objs): | $(O)
+$(O): ; mkdir -p $(O)
+
+# Build rule to run tests
+$(addsuffix .run,$(all_tests)): %.run : %
+ $(SIM) ./$<$(EXT)
+ @echo Completed run: $<
+
+# Other build rules
+msg = $(if $(DEBUG),DEBUG) $(patsubst 32,32-bit,$(host_cpu)) $D
+
+# yasm/nasm assembly files
+$(O)/%.o: %.asm
+ @echo " ---> Building $< $(msg)"
+ @$(AS) $(ASFLAGS) -o $@ $<
+
+# gcc assembly files
+$(O)/%.o: $(host_cpu)/%.S
+ @echo " ---> Building $< $(msg)"
+ @$(AS) $(ASFLAGS) -o $@ $<
+
+$(O)/%.o : $(host_cpu)/%.c
+ @echo " ---> Building $< $(msg)"
+ @$(COMPILE.c) $(OUTPUT_OPTION) $<
+$(O)/%.o %.o: %.c
+ @echo " ---> Building $< $(msg)"
+ @$(COMPILE.c) $(OUTPUT_OPTION) $<
+
+$(all_tests):
+ @echo " ---> Building Test $@ $(msg)"
+ @$(LINK.o) $(CFLAGS) $^ $(LDLIBS) -o $@
+
+$(bin_PROGRAMS): % : %_cli.c $(lib_name)
+ @echo " ---> Building Programs $@ $(msg)"
+ @$(LINK.o) $(CFLAGS) $^ $(LDLIBS) -o $@
+
+
+# Target to build lib files
+lib: $(lib_name)
+ifneq ($(lib_debug),1)
+ $(lib_name): DEBUG_$(AS)= # Don't put debug symbols in the lib
+ $(lib_name): DEBUG=
+ $(lib_name): DEFINES+=-D NDEBUG
+endif
+ifeq ($(lib_debug),1)
+ DEBUG+=-D DEBUG # Define DEBUG for macros
+endif
+
+#lib $(lib_name): $(lib_name)(${objs})
+$(lib_name): $(objs)
+ @echo " ---> Creating Lib $@"
+ @$(AR) $(ARFLAGS) $^
+ifneq ($(lib_debug),1)
+ @$(STRIP_$(CC))
+endif
+
+
+# Target for shared lib
+so_lib_name = bin/libisal.so
+so_lib_inst = $(notdir $(so_lib_name))
+so_lib_ver = $(so_lib_inst).$(version)
+soname = $(so_lib_inst).$(word 1, $(subst ., ,$(version)))
+
+slib: $(so_lib_name)
+aobjs += $(addprefix $(O)/,$(patsubst %.asm,%.o,$(filter %.asm,$(notdir $(lsrc) $(lsrc_intrinsic)))))
+aobjs += $(addprefix $(O)/,$(patsubst %.S,%.o,$(filter %.S,$(notdir $(lsrc) $(lsrc_intrinsic)))))
+shared_objs += $(addprefix $(O)/shared_ver_,$(patsubst %.c,%.o,$(filter %.c,$(notdir $(lsrc) $(lsrc_intrinsic)))))
+
+$(O)/shared_ver_%.o: %.c
+ @echo " ---> Building shared $< $(msg)"
+ @$(COMPILE.c) $(OUTPUT_OPTION) $<
+$(O)/shared_ver_%.o: $(host_cpu)/%.c
+ @echo " ---> Building shared $< $(msg)"
+ @$(COMPILE.c) $(OUTPUT_OPTION) $<
+ifneq ($(lib_debug),1)
+ $(so_lib_name): DEBUG_$(AS)=
+ $(so_lib_name): DEBUG=
+ $(so_lib_name): DEFINES+=-D NDEBUG
+endif
+
+$(shared_objs): CFLAGS += -fPIC
+$(shared_objs) $(aobjs): | $(O)
+$(so_lib_name): LDFLAGS+=$(LDFLAGS_so)
+$(so_lib_name): $(shared_objs) $(aobjs)
+ @echo " ---> Creating Shared Lib $@"
+ @$(CC) $(CFLAGS) --shared $(LDFLAGS) -o $@ $^
+ @(cd $(@D); ln -f -s $(so_lib_inst) $(soname))
+
+
+isa-l.h:
+ @echo 'Building $@'
+ @echo '' >> $@
+ @echo '/**' >> $@
+ @echo ' * @file isa-l.h' >> $@
+ @echo ' * @brief Include for ISA-L library' >> $@
+ @echo ' */' >> $@
+ @echo '' >> $@
+ @echo '#ifndef _ISAL_H_' >> $@
+ @echo '#define _ISAL_H_' >> $@
+ @echo '' >> $@
+ @echo '#define.ISAL_MAJOR_VERSION.${version}' | ${AWK} -F . '{print $$1, $$2, $$3}' >> $@
+ @echo '#define.ISAL_MINOR_VERSION.${version}' | ${AWK} -F . '{print $$1, $$2, $$4}' >> $@
+ @echo '#define.ISAL_PATCH_VERSION.${version}' | ${AWK} -F . '{print $$1, $$2, $$5}' >> $@
+ @echo '#define ISAL_MAKE_VERSION(maj, min, patch) ((maj) * 0x10000 + (min) * 0x100 + (patch))' >> $@
+ @echo '#define ISAL_VERSION ISAL_MAKE_VERSION(ISAL_MAJOR_VERSION, ISAL_MINOR_VERSION, ISAL_PATCH_VERSION)' >> $@
+ @echo '' >> $@
+ @for unit in $(sort $(extern_hdrs)); do echo "#include <isa-l/$$unit>" | sed -e 's;include/;;' >> $@; done
+ @echo '#endif //_ISAL_H_' >> $@
+
+
+# Target for install
+prefix = /usr/local
+man1dir ?= $(prefix)/share/man/man1
+install_dirs = $(prefix)/lib $(prefix)/include/isa-l $(prefix)/bin $(man1dir)
+$(install_dirs): ; mkdir -p $@
+install: $(sort $(extern_hdrs)) | $(install_dirs) $(lib_name) $(so_lib_name) isa-l.h $(bin_PROGRAMS)
+ install -m 644 $(lib_name) $(prefix)/lib/libisal.a
+ install -m 644 $^ $(prefix)/include/isa-l/.
+ install -m 664 isa-l.h $(prefix)/include/.
+ install -m 664 include/types.h $(prefix)/include/isa-l/.
+ install -m 664 $(so_lib_name) $(prefix)/lib/$(so_lib_ver)
+ (cd $(prefix)/lib && ln -f -s $(so_lib_ver) $(soname) && ln -f -s $(so_lib_ver) $(so_lib_inst))
+ifeq ($(shell uname),Darwin)
+ (cd $(prefix)/lib && ln -f -s $(so_lib_ver) $(basename $(so_lib_inst)).dylib)
+ which glibtool && glibtool --mode=finish $(prefix)/lib
+else
+ which libtool && libtool --mode=finish $(prefix)/lib || \
+ echo 'Lib installed at $(prefix)/lib. Run system-dependent programs to add shared lib path.'
+endif
+ install -m 774 $(bin_PROGRAMS) $(prefix)/bin/.
+ install -m 664 $(dist_man_MANS) $(man1dir)/.
+
+uninstall:
+ $(RM) $(prefix)/lib/libisal.a
+ $(RM) $(prefix)/lib/$(soname)
+ $(RM) $(prefix)/lib/$(so_lib_ver)
+ $(RM) $(prefix)/lib/$(so_lib_inst)
+ $(RM) -r $(prefix)/include/isa-l
+ $(RM) $(prefix)/include/isa-l.h
+ $(RM) $(prefix)/lib/$(basename $(so_lib_inst)).dylib
+ $(RM) $(prefix)/bin/$(notdir $(bin_PROGRAMS))
+ $(RM) $(man1dir)/$(notdir $(dist_man_MANS))
+
+# Collect performance data
+rpt_name = perf_report_$(shell uname -n)_$(shell date +%y%m%d).perf
+
+perf_report:
+ echo Results for $(rpt_name) >> $(rpt_name)
+ $(MAKE) -f Makefile.unx -k perf | tee -a $(rpt_name)
+ @echo Summary:
+ -grep runtime $(rpt_name)
+
+
+clean:
+ @echo Cleaning up
+ @$(RM) -r $(CLEANFILES)
+
+
+doc: isa-l.h
+ (cat Doxyfile; echo 'PROJECT_NUMBER=$(version)') | doxygen -
+ $(MAKE) -C generated_doc/latex &> generated_doc/latex_build_api.log
+ cp generated_doc/latex/refman.pdf isa-l_api_$(version).pdf
+
diff --git a/src/isa-l/mem/Makefile.am b/src/isa-l/mem/Makefile.am
new file mode 100644
index 000000000..c864f66fd
--- /dev/null
+++ b/src/isa-l/mem/Makefile.am
@@ -0,0 +1,48 @@
+########################################################################
+# Copyright(c) 2011-2018 Intel Corporation All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Intel Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+########################################################################
+
+include mem/aarch64/Makefile.am
+
+lsrc += mem/mem_zero_detect_base.c
+
+lsrc_base_aliases += mem/mem_zero_detect_base_aliases.c
+lsrc_ppc64le += mem/mem_zero_detect_base_aliases.c
+
+lsrc_x86_64 += mem/mem_zero_detect_avx.asm \
+ mem/mem_zero_detect_sse.asm \
+ mem/mem_multibinary.asm
+
+extern_hdrs += include/mem_routines.h
+
+other_src += include/test.h \
+ include/types.h
+
+check_tests += mem/mem_zero_detect_test
+
+perf_tests += mem/mem_zero_detect_perf
diff --git a/src/isa-l/mem/aarch64/Makefile.am b/src/isa-l/mem/aarch64/Makefile.am
new file mode 100644
index 000000000..c18659872
--- /dev/null
+++ b/src/isa-l/mem/aarch64/Makefile.am
@@ -0,0 +1,33 @@
+########################################################################
+# Copyright(c) 2019 Arm Corporation All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Arm Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#########################################################################
+
+lsrc_aarch64 += \
+ mem/aarch64/mem_zero_detect_neon.S \
+ mem/aarch64/mem_multibinary_arm.S \
+ mem/aarch64/mem_aarch64_dispatcher.c
diff --git a/src/isa-l/mem/aarch64/mem_aarch64_dispatcher.c b/src/isa-l/mem/aarch64/mem_aarch64_dispatcher.c
new file mode 100644
index 000000000..0dfe3a3ae
--- /dev/null
+++ b/src/isa-l/mem/aarch64/mem_aarch64_dispatcher.c
@@ -0,0 +1,39 @@
+/**********************************************************************
+ Copyright(c) 2019 Arm Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Arm Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+#include <aarch64_multibinary.h>
+
+DEFINE_INTERFACE_DISPATCHER(isal_zero_detect)
+{
+ unsigned long auxval = getauxval(AT_HWCAP);
+ if (auxval & HWCAP_ASIMD)
+ return PROVIDER_INFO(mem_zero_detect_neon);
+
+ return PROVIDER_BASIC(mem_zero_detect);
+
+}
diff --git a/src/isa-l/mem/aarch64/mem_multibinary_arm.S b/src/isa-l/mem/aarch64/mem_multibinary_arm.S
new file mode 100644
index 000000000..baac3ca38
--- /dev/null
+++ b/src/isa-l/mem/aarch64/mem_multibinary_arm.S
@@ -0,0 +1,33 @@
+########################################################################
+# Copyright(c) 2019 Arm Corporation All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Arm Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#########################################################################
+
+#include <aarch64_multibinary.h>
+
+mbin_interface isal_zero_detect
+
diff --git a/src/isa-l/mem/aarch64/mem_zero_detect_neon.S b/src/isa-l/mem/aarch64/mem_zero_detect_neon.S
new file mode 100644
index 000000000..6f93ff612
--- /dev/null
+++ b/src/isa-l/mem/aarch64/mem_zero_detect_neon.S
@@ -0,0 +1,243 @@
+########################################################################
+# Copyright(c) 2019 Arm Corporation All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Arm Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#########################################################################
+
+.text
+.arch armv8-a
+
+/*int mem_zero_detect_neon(void *buf, size_t n)*/
+
+// input: buf -> x0
+// input: n -> x1
+// output: -> x0 (true or false)
+
+.global mem_zero_detect_neon
+.type mem_zero_detect_neon, %function
+
+mem_zero_detect_neon:
+ cmp x1, #(16*24-1)
+ b.ls .loop_16x24_end
+
+.loop_16x24: // 16x24 block loop
+
+ ldr q0, [x0]
+ ldr q1, [x0, #16]
+ ldr q2, [x0, #(16*2)]
+ ldr q3, [x0, #(16*3)]
+ ldr q4, [x0, #(16*4)]
+ ldr q5, [x0, #(16*5)]
+ ldr q6, [x0, #(16*6)]
+ ldr q7, [x0, #(16*7)]
+ ldr q16, [x0, #(16*8)]
+ ldr q17, [x0, #(16*9)]
+ ldr q18, [x0, #(16*10)]
+ ldr q19, [x0, #(16*11)]
+ ldr q20, [x0, #(16*12)]
+ ldr q21, [x0, #(16*13)]
+ ldr q22, [x0, #(16*14)]
+ ldr q23, [x0, #(16*15)]
+ ldr q24, [x0, #(16*16)]
+ ldr q25, [x0, #(16*17)]
+ ldr q26, [x0, #(16*18)]
+ ldr q27, [x0, #(16*19)]
+ ldr q28, [x0, #(16*20)]
+ ldr q29, [x0, #(16*21)]
+ ldr q30, [x0, #(16*22)]
+ ldr q31, [x0, #(16*23)]
+
+ add x0, x0, #(16*24)
+
+ orr v0.16b, v0.16b, v1.16b
+ orr v2.16b, v2.16b, v3.16b
+ orr v4.16b, v4.16b, v5.16b
+ orr v6.16b, v6.16b, v7.16b
+ orr v16.16b, v16.16b, v17.16b
+ orr v18.16b, v18.16b, v19.16b
+ orr v20.16b, v20.16b, v21.16b
+ orr v22.16b, v22.16b, v23.16b
+ orr v24.16b, v24.16b, v25.16b
+ orr v26.16b, v26.16b, v27.16b
+ orr v28.16b, v28.16b, v29.16b
+ orr v30.16b, v30.16b, v31.16b
+
+ orr v0.16b, v0.16b, v2.16b
+ orr v4.16b, v4.16b, v6.16b
+ orr v16.16b, v16.16b, v18.16b
+ orr v20.16b, v20.16b, v22.16b
+ orr v24.16b, v24.16b, v26.16b
+ orr v28.16b, v28.16b, v30.16b
+
+ orr v0.16b, v0.16b, v4.16b
+ orr v16.16b, v16.16b, v20.16b
+ orr v24.16b, v24.16b, v28.16b
+
+ orr v0.16b, v0.16b, v16.16b
+ orr v0.16b, v0.16b, v24.16b
+
+ mov x3, v0.d[0]
+ mov x2, v0.d[1]
+ orr x2, x3, x2
+ cbnz x2, .fail_exit
+
+ // loop condition check
+ sub x1, x1, #(16*24)
+ cmp x1, #(16*24-1)
+ b.hi .loop_16x24
+
+.loop_16x24_end:
+ cmp x1, #(16*8-1)
+ b.ls .loop_16x8_end
+
+.loop_16x8: // 16x8 block loop
+ ldr q0, [x0]
+ ldr q1, [x0, #16]
+ ldr q2, [x0, #(16*2)]
+ ldr q3, [x0, #(16*3)]
+ ldr q4, [x0, #(16*4)]
+ ldr q5, [x0, #(16*5)]
+ ldr q6, [x0, #(16*6)]
+ ldr q7, [x0, #(16*7)]
+
+ add x0, x0, #(16*8)
+
+ orr v0.16b, v0.16b, v1.16b
+ orr v2.16b, v2.16b, v3.16b
+ orr v4.16b, v4.16b, v5.16b
+ orr v6.16b, v6.16b, v7.16b
+
+ orr v0.16b, v0.16b, v2.16b
+ orr v4.16b, v4.16b, v6.16b
+ orr v0.16b, v0.16b, v4.16b
+
+ mov x3, v0.d[0]
+ mov x2, v0.d[1]
+ orr x2, x3, x2
+ cbnz x2, .fail_exit
+
+ sub x1, x1, #(16*8)
+ cmp x1, #(16*8-1)
+ b.hi .loop_16x8
+
+.loop_16x8_end:
+ cmp x1, #(8*8-1)
+ b.ls .loop_8x8_end
+
+.loop_8x8: // 8x8 block loop
+ ldp x2, x3, [x0]
+ ldp x4, x5, [x0, #16]
+ ldp x6, x7, [x0, #32]
+ ldp x8, x9, [x0, #48]
+
+ add x0, x0, #(8*8)
+
+ orr x2, x2, x3
+ orr x4, x4, x5
+ orr x6, x6, x7
+ orr x8, x8, x9
+ orr x2, x2, x4
+ orr x6, x6, x8
+ orr x2, x2, x6
+
+ cbnz x2, .fail_exit
+
+ sub x1, x1, #(8*8)
+ cmp x1, #(8*8-1)
+ b.hi .loop_8x8
+
+.loop_8x8_end:
+ cmp x1, #(8-1)
+ b.ls .handle_remainder
+
+.loop_8: // loop per 8bytes
+ ldr x2, [x0]
+ add x0, x0, #8
+ cbnz x2, .fail_exit
+
+ sub x1, x1, #8
+ cmp x1, #7
+ b.hi .loop_8
+
+.loop_8_end:
+
+ // check remaining bytes
+.handle_remainder:
+ mov w2, #0
+
+ cmp x1, #0
+ beq .handle_reminder_end
+ cmp x1, #1
+ beq .case1
+ cmp x1, #2
+ beq .case2
+ cmp x1, #3
+ beq .case3
+ cmp x1, #4
+ beq .case4
+ cmp x1, #5
+ beq .case5
+ cmp x1, #6
+ beq .case6
+
+.case7: // case7 drop here directly
+ ldrb w3, [x0]
+ add x0, x0, #1
+ orr w2, w2, w3
+.case6:
+ ldrb w3, [x0]
+ add x0, x0, #1
+ orr w2, w2, w3
+.case5:
+ ldrb w3, [x0]
+ add x0, x0, #1
+ orr w2, w2, w3
+.case4:
+ ldr w3, [x0]
+ orr w2, w2, w3
+ b .handle_reminder_end
+.case3:
+ ldrb w3, [x0]
+ add x0, x0, #1
+ orr w2, w2, w3
+.case2:
+ ldrh w3, [x0]
+ orr w2, w2, w3
+ b .handle_reminder_end
+.case1:
+ ldrb w3, [x0]
+ orr w2, w2, w3
+
+.handle_reminder_end:
+ cbz w2, .pass_exit
+
+.fail_exit:
+ mov w0, #0xffffffff
+ ret
+
+.pass_exit:
+ mov w0, #0x0
+ ret
diff --git a/src/isa-l/mem/mem_multibinary.asm b/src/isa-l/mem/mem_multibinary.asm
new file mode 100644
index 000000000..38f63e22b
--- /dev/null
+++ b/src/isa-l/mem/mem_multibinary.asm
@@ -0,0 +1,42 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright(c) 2011-2018 Intel Corporation All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in
+; the documentation and/or other materials provided with the
+; distribution.
+; * Neither the name of Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived
+; from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+%include "reg_sizes.asm"
+%include "multibinary.asm"
+
+default rel
+[bits 64]
+
+extern mem_zero_detect_avx
+extern mem_zero_detect_sse
+extern mem_zero_detect_base
+
+mbin_interface isal_zero_detect
+
+mbin_dispatch_init5 isal_zero_detect, mem_zero_detect_base, mem_zero_detect_sse, mem_zero_detect_avx, mem_zero_detect_avx
diff --git a/src/isa-l/mem/mem_zero_detect_avx.asm b/src/isa-l/mem/mem_zero_detect_avx.asm
new file mode 100644
index 000000000..1b5de8415
--- /dev/null
+++ b/src/isa-l/mem/mem_zero_detect_avx.asm
@@ -0,0 +1,189 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright(c) 2011-2018 Intel Corporation All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in
+; the documentation and/or other materials provided with the
+; distribution.
+; * Neither the name of Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived
+; from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+%include "reg_sizes.asm"
+
+%ifidn __OUTPUT_FORMAT__, elf64
+ %define arg0 rdi
+ %define arg1 rsi
+ %define arg2 rdx
+ %define arg3 rcx
+ %define arg4 r8
+ %define arg5 r9
+ %define tmp r11
+ %define tmpb r11b
+ %define tmp3 arg4
+ %define return rax
+ %define func(x) x: endbranch
+ %define FUNC_SAVE
+ %define FUNC_RESTORE
+%endif
+
+%ifidn __OUTPUT_FORMAT__, win64
+ %define arg0 rcx
+ %define arg1 rdx
+ %define arg2 r8
+ %define arg3 r9
+ %define tmp r11
+ %define tmpb r11b
+ %define tmp3 r10
+ %define return rax
+ %define func(x) proc_frame x
+ %macro FUNC_SAVE 0
+ end_prolog
+ %endmacro
+ %macro FUNC_RESTORE 0
+ %endmacro
+%endif
+
+%define src arg0
+%define len arg1
+%define ptr arg2
+%define pos return
+
+default rel
+
+[bits 64]
+section .text
+
+align 16
+mk_global mem_zero_detect_avx, function
+func(mem_zero_detect_avx)
+ FUNC_SAVE
+ mov pos, 0
+ sub len, 4*32
+ jle .mem_z_small_block
+
+.mem_z_loop:
+ vmovdqu ymm0, [src+pos]
+ vmovdqu ymm1, [src+pos+1*32]
+ vmovdqu ymm2, [src+pos+2*32]
+ vmovdqu ymm3, [src+pos+3*32]
+ vptest ymm0, ymm0
+ jnz .return_fail
+ vptest ymm1, ymm1
+ jnz .return_fail
+ vptest ymm2, ymm2
+ jnz .return_fail
+ vptest ymm3, ymm3
+ jnz .return_fail
+ add pos, 4*32
+ cmp pos, len
+ jl .mem_z_loop
+
+.mem_z_last_block:
+ vmovdqu ymm0, [src+len]
+ vmovdqu ymm1, [src+len+1*32]
+ vmovdqu ymm2, [src+len+2*32]
+ vmovdqu ymm3, [src+len+3*32]
+ vptest ymm0, ymm0
+ jnz .return_fail
+ vptest ymm1, ymm1
+ jnz .return_fail
+ vptest ymm2, ymm2
+ jnz .return_fail
+ vptest ymm3, ymm3
+ jnz .return_fail
+
+.return_pass:
+ mov return, 0
+ FUNC_RESTORE
+ ret
+
+
+.mem_z_small_block:
+ add len, 4*32
+ cmp len, 2*32
+ jl .mem_z_lt64
+ vmovdqu ymm0, [src]
+ vmovdqu ymm1, [src+32]
+ vmovdqu ymm2, [src+len-2*32]
+ vmovdqu ymm3, [src+len-1*32]
+ vptest ymm0, ymm0
+ jnz .return_fail
+ vptest ymm1, ymm1
+ jnz .return_fail
+ vptest ymm2, ymm2
+ jnz .return_fail
+ vptest ymm3, ymm3
+ jnz .return_fail
+ jmp .return_pass
+
+.mem_z_lt64:
+ cmp len, 32
+ jl .mem_z_lt32
+ vmovdqu ymm0, [src]
+ vmovdqu ymm1, [src+len-32]
+ vptest ymm0, ymm0
+ jnz .return_fail
+ vptest ymm1, ymm1
+ jnz .return_fail
+ jmp .return_pass
+
+
+.mem_z_lt32:
+ cmp len, 16
+ jl .mem_z_lt16
+ vmovdqu xmm0, [src]
+ vmovdqu xmm1, [src+len-16]
+ vptest xmm0, xmm0
+ jnz .return_fail
+ vptest xmm1, xmm1
+ jnz .return_fail
+ jmp .return_pass
+
+
+.mem_z_lt16:
+ cmp len, 8
+ jl .mem_z_lt8
+ mov tmp, [src]
+ mov tmp3,[src+len-8]
+ or tmp, tmp3
+ test tmp, tmp
+ jnz .return_fail
+ jmp .return_pass
+
+.mem_z_lt8:
+ cmp len, 0
+ je .return_pass
+.mem_z_1byte_loop:
+ mov tmpb, [src+pos]
+ cmp tmpb, 0
+ jnz .return_fail
+ add pos, 1
+ cmp pos, len
+ jl .mem_z_1byte_loop
+ jmp .return_pass
+
+.return_fail:
+ mov return, 1
+ FUNC_RESTORE
+ ret
+
+endproc_frame
diff --git a/src/isa-l/mem/mem_zero_detect_base.c b/src/isa-l/mem/mem_zero_detect_base.c
new file mode 100644
index 000000000..235301658
--- /dev/null
+++ b/src/isa-l/mem/mem_zero_detect_base.c
@@ -0,0 +1,69 @@
+/**********************************************************************
+ Copyright(c) 2011-2018 Intel Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+#include <stdint.h>
+#include <stddef.h>
+#include "unaligned.h"
+
+int mem_zero_detect_base(void *buf, size_t n)
+{
+ uint8_t *c = buf;
+ uintmax_t a = 0;
+
+ // Check buffer in native machine width comparisons
+ while (n >= sizeof(uintmax_t)) {
+ n -= sizeof(uintmax_t);
+ if (load_umax(c) != 0)
+ return -1;
+ c += sizeof(uintmax_t);
+ }
+
+ // Check remaining bytes
+ switch (n) {
+ case 7:
+ a |= *c++; // fall through to case 6,5,4
+ case 6:
+ a |= *c++; // fall through to case 5,4
+ case 5:
+ a |= *c++; // fall through to case 4
+ case 4:
+ a |= load_u32(c);
+ break;
+ case 3:
+ a |= *c++; // fall through to case 2
+ case 2:
+ a |= load_u16(c);
+ break;
+ case 1:
+ a |= *c;
+ break;
+ }
+
+ return (a == 0) ? 0 : -1;
+}
diff --git a/src/isa-l/mem/mem_zero_detect_base_aliases.c b/src/isa-l/mem/mem_zero_detect_base_aliases.c
new file mode 100644
index 000000000..8c75b06be
--- /dev/null
+++ b/src/isa-l/mem/mem_zero_detect_base_aliases.c
@@ -0,0 +1,38 @@
+/**********************************************************************
+ Copyright(c) 2011-2018 Intel Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+#include <stdint.h>
+#include "mem_routines.h"
+
+int mem_zero_detect_base(void *buf, size_t n);
+
+int isal_zero_detect(void *mem, size_t len)
+{
+ return mem_zero_detect_base(mem, len);
+}
diff --git a/src/isa-l/mem/mem_zero_detect_perf.c b/src/isa-l/mem/mem_zero_detect_perf.c
new file mode 100644
index 000000000..90a308862
--- /dev/null
+++ b/src/isa-l/mem/mem_zero_detect_perf.c
@@ -0,0 +1,60 @@
+/**********************************************************************
+ Copyright(c) 2011-2018 Intel Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "mem_routines.h"
+#include "test.h"
+#include "types.h"
+
+#define TEST_LEN 8*1024
+#define TEST_TYPE_STR "_warm"
+
+int main(int argc, char *argv[])
+{
+ int val = 0;
+ void *buf;
+ struct perf start;
+
+ printf("Test mem_zero_detect_perf %d bytes\n", TEST_LEN);
+
+ if (posix_memalign(&buf, 64, TEST_LEN)) {
+ printf("alloc error: Fail");
+ return -1;
+ }
+
+ memset(buf, 0, TEST_LEN);
+ BENCHMARK(&start, BENCHMARK_TIME, val |= isal_zero_detect(buf, TEST_LEN));
+
+ printf("mem_zero_detect_perf" TEST_TYPE_STR ": ");
+ perf_print(start, (long long)TEST_LEN);
+
+ return 0;
+}
diff --git a/src/isa-l/mem/mem_zero_detect_sse.asm b/src/isa-l/mem/mem_zero_detect_sse.asm
new file mode 100644
index 000000000..c84f0f034
--- /dev/null
+++ b/src/isa-l/mem/mem_zero_detect_sse.asm
@@ -0,0 +1,176 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright(c) 2011-2018 Intel Corporation All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in
+; the documentation and/or other materials provided with the
+; distribution.
+; * Neither the name of Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived
+; from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+%include "reg_sizes.asm"
+
+%ifidn __OUTPUT_FORMAT__, elf64
+ %define arg0 rdi
+ %define arg1 rsi
+ %define arg2 rdx
+ %define arg3 rcx
+ %define arg4 r8
+ %define arg5 r9
+ %define tmp r11
+ %define tmpb r11b
+ %define tmp3 arg4
+ %define return rax
+ %define func(x) x: endbranch
+ %define FUNC_SAVE
+ %define FUNC_RESTORE
+%endif
+
+%ifidn __OUTPUT_FORMAT__, win64
+ %define arg0 rcx
+ %define arg1 rdx
+ %define arg2 r8
+ %define arg3 r9
+ %define tmp r11
+ %define tmpb r11b
+ %define tmp3 r10
+ %define return rax
+ %define func(x) proc_frame x
+ %macro FUNC_SAVE 0
+ end_prolog
+ %endmacro
+ %macro FUNC_RESTORE 0
+ %endmacro
+%endif
+
+%define src arg0
+%define len arg1
+%define ptr arg2
+%define pos return
+
+default rel
+
+[bits 64]
+section .text
+
+align 16
+mk_global mem_zero_detect_sse, function
+func(mem_zero_detect_sse)
+ FUNC_SAVE
+ mov pos, 0
+ sub len, 4*16
+ jle .mem_z_small_block
+
+.mem_z_loop:
+ movdqu xmm0, [src+pos]
+ movdqu xmm1, [src+pos+1*16]
+ movdqu xmm2, [src+pos+2*16]
+ movdqu xmm3, [src+pos+3*16]
+ ptest xmm0, xmm0
+ jnz .return_fail
+ ptest xmm1, xmm1
+ jnz .return_fail
+ ptest xmm2, xmm2
+ jnz .return_fail
+ ptest xmm3, xmm3
+ jnz .return_fail
+ add pos, 4*16
+ cmp pos, len
+ jl .mem_z_loop
+
+.mem_z_last_block:
+ movdqu xmm0, [src+len]
+ movdqu xmm1, [src+len+1*16]
+ movdqu xmm2, [src+len+2*16]
+ movdqu xmm3, [src+len+3*16]
+ ptest xmm0, xmm0
+ jnz .return_fail
+ ptest xmm1, xmm1
+ jnz .return_fail
+ ptest xmm2, xmm2
+ jnz .return_fail
+ ptest xmm3, xmm3
+ jnz .return_fail
+
+.return_pass:
+ mov return, 0
+ FUNC_RESTORE
+ ret
+
+
+.mem_z_small_block:
+ add len, 4*16
+ cmp len, 2*16
+ jl .mem_z_lt32
+ movdqu xmm0, [src]
+ movdqu xmm1, [src+16]
+ movdqu xmm2, [src+len-2*16]
+ movdqu xmm3, [src+len-1*16]
+ ptest xmm0, xmm0
+ jnz .return_fail
+ ptest xmm1, xmm1
+ jnz .return_fail
+ ptest xmm2, xmm2
+ jnz .return_fail
+ ptest xmm3, xmm3
+ jnz .return_fail
+ jmp .return_pass
+
+.mem_z_lt32:
+ cmp len, 16
+ jl .mem_z_lt16
+ movdqu xmm0, [src]
+ movdqu xmm1, [src+len-16]
+ ptest xmm0, xmm0
+ jnz .return_fail
+ ptest xmm1, xmm1
+ jnz .return_fail
+ jmp .return_pass
+
+.mem_z_lt16:
+ cmp len, 8
+ jl .mem_z_lt8
+ mov tmp, [src]
+ mov tmp3,[src+len-8]
+ or tmp, tmp3
+ test tmp, tmp
+ jnz .return_fail
+ jmp .return_pass
+
+.mem_z_lt8:
+ cmp len, 0
+ je .return_pass
+.mem_z_1byte_loop:
+ mov tmpb, [src+pos]
+ cmp tmpb, 0
+ jnz .return_fail
+ add pos, 1
+ cmp pos, len
+ jl .mem_z_1byte_loop
+ jmp .return_pass
+
+.return_fail:
+ mov return, 1
+ FUNC_RESTORE
+ ret
+
+endproc_frame
diff --git a/src/isa-l/mem/mem_zero_detect_test.c b/src/isa-l/mem/mem_zero_detect_test.c
new file mode 100644
index 000000000..12d5f4bdb
--- /dev/null
+++ b/src/isa-l/mem/mem_zero_detect_test.c
@@ -0,0 +1,226 @@
+/**********************************************************************
+ Copyright(c) 2011-2018 Intel Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <limits.h>
+#include "mem_routines.h"
+#include "types.h"
+
+#define TEST_MEM 10*1024*1024
+#define TEST_LEN 8*1024
+#define RAND_ALIGN 32
+#define BORDER_BYTES (5*RAND_ALIGN + 7)
+
+#ifndef RANDOMS
+# define RANDOMS 2000
+#endif
+#ifndef TEST_SEED
+# define TEST_SEED 0x1234
+#endif
+
+int main(int argc, char *argv[])
+{
+ int i, j, sign;
+ long long r, l;
+ void *buf;
+ unsigned char *a;
+ int failures = 0, ret_neg = 1;
+
+ printf("mem_zero_detect_test %d bytes, %d randoms, seed=0x%x ", TEST_MEM, RANDOMS,
+ TEST_SEED);
+ if (posix_memalign(&buf, 64, TEST_MEM)) {
+ printf("alloc error: Fail");
+ return -1;
+ }
+
+ srand(TEST_SEED);
+
+ // Test full zero buffer
+ memset(buf, 0, TEST_MEM);
+ failures = isal_zero_detect(buf, TEST_MEM);
+
+ if (failures) {
+ printf("Fail large buf test\n");
+ return failures;
+ }
+ putchar('.');
+
+ // Test small buffers
+ for (i = 0; i < TEST_LEN; i++) {
+ failures |= isal_zero_detect(buf, i);
+ if (failures) {
+ printf("Fail len=%d\n", i);
+ return failures;
+ }
+ }
+ putchar('.');
+
+ // Test small buffers near end of alloc region
+ a = buf;
+ for (i = 0; i < TEST_LEN; i++)
+ failures |= isal_zero_detect(&a[TEST_LEN - i], i);
+
+ if (failures) {
+ printf("Fail:\n");
+ return failures;
+ }
+ putchar('.');
+
+ // Test for detect non zero
+ a[TEST_MEM / 2] = 1;
+ ret_neg = isal_zero_detect(a, TEST_MEM);
+ if (ret_neg == 0) {
+ printf("Fail on not detect\n");
+ return -1;
+ }
+ a[TEST_MEM / 2] = 0;
+ putchar('.');
+
+ // Test various non-zero offsets
+ for (i = 0; i < BORDER_BYTES; i++) {
+ for (j = 0; j < CHAR_BIT; j++) {
+ a[i] = 1 << j;
+ ret_neg = isal_zero_detect(a, TEST_MEM);
+ if (ret_neg == 0) {
+ printf("Fail on not detect offsets %d, %d\n", i, j);
+ return -1;
+ }
+ a[i] = 0;
+ }
+ }
+ putchar('.');
+ fflush(0);
+
+ // Test random non-zero offsets
+ for (i = 0; i < RANDOMS; i++) {
+ r = rand();
+ r = (r % TEST_LEN) ^ (r & (RAND_ALIGN - 1));
+ if (r >= TEST_LEN)
+ continue;
+
+ a[r] = 1 << (r & (CHAR_BIT - 1));
+ ret_neg = isal_zero_detect(a, TEST_MEM);
+ if (ret_neg == 0) {
+ printf("Fail on not detect rand %d, e=%lld\n", i, r);
+ return -1;
+ }
+ a[r] = 0;
+ }
+ putchar('.');
+ fflush(0);
+
+ // Test putting non-zero byte at end of buffer
+ for (i = 1; i < BORDER_BYTES; i++) {
+ for (j = 0; j < CHAR_BIT; j++) {
+ a[TEST_MEM - i] = 1 << j;
+ ret_neg = isal_zero_detect(a, TEST_MEM);
+ if (ret_neg == 0) {
+ printf("Fail on not detect rand offset=%d, idx=%d\n", i, j);
+ return -1;
+ }
+ a[TEST_MEM - i] = 0;
+ }
+ }
+ putchar('.');
+
+ // Test various size buffers and non-zero offsets
+ for (l = 1; l < TEST_LEN; l++) {
+ for (i = 0; i < l + BORDER_BYTES; i++) {
+ failures = isal_zero_detect(a, l);
+
+ if (failures) {
+ printf("Fail on detect non-zero with l=%lld\n", l);
+ return -1;
+ }
+
+ a[i] = 1;
+ ret_neg = isal_zero_detect(a, l);
+
+ if ((i < l) && (ret_neg == 0)) {
+ printf("Fail on non-zero buffer l=%lld err=%d\n", l, i);
+ return -1;
+ }
+ if ((i >= l) && (ret_neg != 0)) {
+ printf("Fail on bad pass detect l=%lld err=%d\n", l, i);
+ return -1;
+ }
+ a[i] = 0;
+ }
+ }
+ putchar('.');
+
+ // Test random test size and non-zero error offsets
+ for (i = 0; i < RANDOMS; i++) {
+ r = rand();
+ r = (r % TEST_LEN) ^ (r & (RAND_ALIGN - 1));
+ l = r + 1 + (rand() & (CHAR_BIT - 1));
+ a[r] = 1 << (r & (CHAR_BIT - 1));
+ ret_neg = isal_zero_detect(a, l);
+ if (ret_neg == 0) {
+ printf("Fail on not detect rand %d, l=%lld, e=%lld\n", i, l, r);
+ return -1;
+ }
+ a[r] = 0;
+ }
+ putchar('.');
+ fflush(0);
+
+ // Test combinations of zero and non-zero buffers
+ for (i = 0; i < RANDOMS; i++) {
+ r = rand();
+ r = (r % TEST_LEN) ^ (r & (RAND_ALIGN - 1));
+ sign = rand() & 1 ? 1 : -1;
+ l = r + sign * (rand() & (2 * RAND_ALIGN - 1));
+
+ if ((l >= TEST_LEN) || (l < 0) || (r >= TEST_LEN))
+ continue;
+
+ a[r] = 1 << (r & (CHAR_BIT - 1));
+ ret_neg = isal_zero_detect(a, l);
+
+ if ((r < l) && (ret_neg == 0)) {
+ printf("Fail on non-zero rand buffer %d, l=%lld, e=%lld\n", i, l, r);
+ return -1;
+ }
+ if ((r >= l) && (ret_neg != 0)) {
+ printf("Fail on bad pass zero detect rand %d, l=%lld, e=%lld\n", i, l,
+ r);
+ return -1;
+ }
+
+ a[r] = 0;
+ }
+ putchar('.');
+ fflush(0);
+
+ printf(failures == 0 ? " Pass\n" : " Fail\n");
+ return failures;
+}
diff --git a/src/isa-l/programs/Makefile.am b/src/isa-l/programs/Makefile.am
new file mode 100644
index 000000000..46f2a2306
--- /dev/null
+++ b/src/isa-l/programs/Makefile.am
@@ -0,0 +1,38 @@
+########################################################################
+# Copyright(c) 2011-2018 Intel Corporation All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Intel Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+########################################################################
+
+
+bin_PROGRAMS += programs/igzip
+programs_igzip_SOURCES = programs/igzip_cli.c
+programs_igzip_LDADD = libisal.la
+dist_man_MANS = programs/igzip.1
+other_src += programs/igzip.1.h2m
+
+programs/igzip.1: % : %.h2m programs/igzip_cli.c
+ -help2man -o $@ -i $< -N ./programs/igzip
diff --git a/src/isa-l/programs/igzip.1 b/src/isa-l/programs/igzip.1
new file mode 100644
index 000000000..fd20512df
--- /dev/null
+++ b/src/isa-l/programs/igzip.1
@@ -0,0 +1,87 @@
+.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.47.11.
+.TH IGZIP "1" "November 2020" "igzip command line interface 2.30.0" "User Commands"
+.SH NAME
+igzip \- compress or decompress files similar to gzip
+.SH SYNOPSIS
+.B igzip
+[\fI\,options\/\fR] [\fI\,infiles\/\fR]
+.SH DESCRIPTION
+
+Compress or decompress files similar to gzip using the ISA-L fast deflate library.
+
+Output .gz files are compatible with gzip and [RFC-1952].
+
+Options are similar to gzip except --keep is default.
+.SH OPTIONS
+.TP
+\fB\-h\fR, \fB\-\-help\fR
+help, print this message
+.TP
+\-#
+use compression level # with 0 <= # <= 3
+.TP
+\fB\-o\fR
+<file> output file
+.TP
+\fB\-c\fR, \fB\-\-stdout\fR
+write to stdout
+.TP
+\fB\-d\fR, \fB\-\-decompress\fR
+decompress file
+.TP
+\fB\-z\fR, \fB\-\-compress\fR
+compress file (default)
+.TP
+\fB\-f\fR, \fB\-\-force\fR
+overwrite output without prompting
+.TP
+\fB\-\-rm\fR
+remove source files after successful (de)compression
+.TP
+\fB\-k\fR, \fB\-\-keep\fR
+keep source files (default)
+.TP
+\fB\-S\fR, \fB\-\-suffix\fR <.suf>
+suffix to use while (de)compressing
+.TP
+\fB\-V\fR, \fB\-\-version\fR
+show version number
+.TP
+\fB\-v\fR, \fB\-\-verbose\fR
+verbose mode
+.TP
+\fB\-N\fR, \fB\-\-name\fR
+save/use file name and timestamp in compress/decompress
+.TP
+\fB\-n\fR, \fB\-\-no\-name\fR
+do not save/use file name and timestamp in compress/decompress
+.TP
+\fB\-t\fR, \fB\-\-test\fR
+test compressed file integrity
+.TP
+\fB\-T\fR, \fB\-\-threads\fR <n>
+use n threads to compress if enabled
+.TP
+\fB\-q\fR, \fB\-\-quiet\fR
+suppress warnings
+.PP
+with no infile, or when infile is \- , read standard input
+.SH EXAMPLES
+
+Make compressed file1.gz and file2.gz and keep file1 and file2.
+.RS
+.B igzip file1 file2
+.RE
+
+Piped compression and decompression.
+.RS
+.B igzip -c file.txt | igzip -d -c -
+.RE
+
+Streaming compression from output of tar, compress level 2.
+.RS
+.B tar cf - dir1 | igzip -2 > dir1.tar.gz
+.RE
+.SH "REPORTING BUGS"
+
+Report bugs to https://github.com/intel/isa-l/issues
diff --git a/src/isa-l/programs/igzip.1.h2m b/src/isa-l/programs/igzip.1.h2m
new file mode 100644
index 000000000..819cd2d45
--- /dev/null
+++ b/src/isa-l/programs/igzip.1.h2m
@@ -0,0 +1,31 @@
+[Name]
+igzip \- compress or decompress files similar to gzip
+
+[Description]
+
+Compress or decompress files similar to gzip using the ISA-L fast deflate library.
+
+Output .gz files are compatible with gzip and [RFC-1952].
+
+Options are similar to gzip except --keep is default.
+
+[Examples]
+
+Make compressed file1.gz and file2.gz and keep file1 and file2.
+.RS
+.B igzip file1 file2
+.RE
+
+Piped compression and decompression.
+.RS
+.B igzip -c file.txt | igzip -d -c -
+.RE
+
+Streaming compression from output of tar, compress level 2.
+.RS
+.B tar cf - dir1 | igzip -2 > dir1.tar.gz
+.RE
+
+[Reporting Bugs]
+
+Report bugs to https://github.com/intel/isa-l/issues
diff --git a/src/isa-l/programs/igzip_cli.c b/src/isa-l/programs/igzip_cli.c
new file mode 100644
index 000000000..53124af21
--- /dev/null
+++ b/src/isa-l/programs/igzip_cli.c
@@ -0,0 +1,1206 @@
+/**********************************************************************
+ Copyright(c) 2011-2018 Intel Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+#define _FILE_OFFSET_BITS 64
+#include <stdio.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <string.h>
+#include <getopt.h>
+#include <sys/stat.h>
+#include <utime.h>
+#include <unistd.h>
+#include <stdbool.h>
+#include <stdarg.h>
+#include "igzip_lib.h" /* Normally you use isa-l.h instead for external programs */
+
+#if defined (HAVE_THREADS)
+# include <pthread.h>
+# include "crc.h"
+#endif
+
+#if !defined (VERSION)
+# if defined (ISAL_VERSION)
+# define VERSION ISAL_VERSION
+# else
+# define VERSION "unknown version"
+# endif
+#endif
+
+#define BAD_OPTION 1
+#define BAD_LEVEL 1
+#define FILE_EXISTS 0
+#define MALLOC_FAILED -1
+#define FILE_OPEN_ERROR -2
+#define FILE_READ_ERROR -3
+#define FILE_WRITE_ERROR -4
+
+#define BUF_SIZE 1024
+#define BLOCK_SIZE (1024 * 1024)
+
+#define MAX_FILEPATH_BUF 4096
+
+#define UNIX 3
+
+#define NAME_DEFAULT 0
+#define NO_NAME 1
+#define YES_NAME 2
+
+#define NO_TEST 0
+#define TEST 1
+
+#define LEVEL_DEFAULT 2
+#define DEFAULT_SUFFIX_LEN 3
+char *default_suffixes[] = { ".gz", ".z" };
+int default_suffixes_lens[] = { 3, 2 };
+
+char stdin_file_name[] = "-";
+int stdin_file_name_len = 1;
+
+enum compression_modes {
+ COMPRESS_MODE,
+ DECOMPRESS_MODE
+};
+
+enum long_only_opt_val {
+ RM
+};
+
+enum log_types {
+ INFORM,
+ WARN,
+ ERROR,
+ VERBOSE
+};
+
+int level_size_buf[10] = {
+#ifdef ISAL_DEF_LVL0_DEFAULT
+ ISAL_DEF_LVL0_DEFAULT,
+#else
+ 0,
+#endif
+#ifdef ISAL_DEF_LVL1_DEFAULT
+ ISAL_DEF_LVL1_DEFAULT,
+#else
+ 0,
+#endif
+#ifdef ISAL_DEF_LVL2_DEFAULT
+ ISAL_DEF_LVL2_DEFAULT,
+#else
+ 0,
+#endif
+#ifdef ISAL_DEF_LVL3_DEFAULT
+ ISAL_DEF_LVL3_DEFAULT,
+#else
+ 0,
+#endif
+#ifdef ISAL_DEF_LVL4_DEFAULT
+ ISAL_DEF_LVL4_DEFAULT,
+#else
+ 0,
+#endif
+#ifdef ISAL_DEF_LVL5_DEFAULT
+ ISAL_DEF_LVL5_DEFAULT,
+#else
+ 0,
+#endif
+#ifdef ISAL_DEF_LVL6_DEFAULT
+ ISAL_DEF_LVL6_DEFAULT,
+#else
+ 0,
+#endif
+#ifdef ISAL_DEF_LVL7_DEFAULT
+ ISAL_DEF_LVL7_DEFAULT,
+#else
+ 0,
+#endif
+#ifdef ISAL_DEF_LVL8_DEFAULT
+ ISAL_DEF_LVL8_DEFAULT,
+#else
+ 0,
+#endif
+#ifdef ISAL_DEF_LVL9_DEFAULT
+ ISAL_DEF_LVL9_DEFAULT,
+#else
+ 0,
+#endif
+};
+
+struct cli_options {
+ char *infile_name;
+ size_t infile_name_len;
+ char *outfile_name;
+ size_t outfile_name_len;
+ char *suffix;
+ size_t suffix_len;
+ int level;
+ int mode;
+ int use_stdout;
+ int remove;
+ int force;
+ int quiet_level;
+ int verbose_level;
+ int name;
+ int test;
+ int threads;
+ uint8_t *in_buf;
+ uint8_t *out_buf;
+ uint8_t *level_buf;
+ size_t in_buf_size;
+ size_t out_buf_size;
+ size_t level_buf_size;
+};
+
+struct cli_options global_options;
+
+void init_options(struct cli_options *options)
+{
+ options->infile_name = NULL;
+ options->infile_name_len = 0;
+ options->outfile_name = NULL;
+ options->outfile_name_len = 0;
+ options->suffix = NULL;
+ options->suffix_len = 0;
+ options->level = LEVEL_DEFAULT;
+ options->mode = COMPRESS_MODE;
+ options->use_stdout = false;
+ options->remove = false;
+ options->force = false;
+ options->quiet_level = 0;
+ options->verbose_level = 0;
+ options->name = NAME_DEFAULT;
+ options->test = NO_TEST;
+ options->in_buf = NULL;
+ options->out_buf = NULL;
+ options->level_buf = NULL;
+ options->in_buf_size = 0;
+ options->out_buf_size = 0;
+ options->level_buf_size = 0;
+ options->threads = 1;
+};
+
+int is_interactive(void)
+{
+ int ret;
+ ret = !global_options.force && !global_options.quiet_level && isatty(fileno(stdin));
+ return ret;
+}
+
+size_t get_filesize(FILE * fp)
+{
+ size_t file_size;
+ fpos_t pos, pos_curr;
+
+ fgetpos(fp, &pos_curr); /* Save current position */
+#if defined(_WIN32) || defined(_WIN64)
+ _fseeki64(fp, 0, SEEK_END);
+#else
+ fseeko(fp, 0, SEEK_END);
+#endif
+ fgetpos(fp, &pos);
+ file_size = *(size_t *)&pos;
+ fsetpos(fp, &pos_curr); /* Restore position */
+
+ return file_size;
+}
+
+uint32_t get_posix_filetime(FILE * fp)
+{
+ struct stat file_stats;
+ fstat(fileno(fp), &file_stats);
+ return file_stats.st_mtime;
+}
+
+uint32_t set_filetime(char *file_name, uint32_t posix_time)
+{
+ struct utimbuf new_time;
+ new_time.actime = posix_time;
+ new_time.modtime = posix_time;
+ return utime(file_name, &new_time);
+}
+
+void log_print(int log_type, char *format, ...)
+{
+ va_list args;
+ va_start(args, format);
+
+ switch (log_type) {
+ case INFORM:
+ vfprintf(stdout, format, args);
+ break;
+ case WARN:
+ if (global_options.quiet_level <= 0)
+ vfprintf(stderr, format, args);
+ break;
+ case ERROR:
+ if (global_options.quiet_level <= 1)
+ vfprintf(stderr, format, args);
+ break;
+ case VERBOSE:
+ if (global_options.verbose_level > 0)
+ vfprintf(stderr, format, args);
+ break;
+ }
+
+ va_end(args);
+}
+
+int usage(int exit_code)
+{
+ int log_type = exit_code ? WARN : INFORM;
+ log_print(log_type,
+ "Usage: igzip [options] [infiles]\n\n"
+ "Options:\n"
+ " -h, --help help, print this message\n"
+ " -# use compression level # with 0 <= # <= %d\n"
+ " -o <file> output file\n"
+ " -c, --stdout write to stdout\n"
+ " -d, --decompress decompress file\n"
+ " -z, --compress compress file (default)\n"
+ " -f, --force overwrite output without prompting\n"
+ " --rm remove source files after successful (de)compression\n"
+ " -k, --keep keep source files (default)\n"
+ " -S, --suffix <.suf> suffix to use while (de)compressing\n"
+ " -V, --version show version number\n"
+ " -v, --verbose verbose mode\n"
+ " -N, --name save/use file name and timestamp in compress/decompress\n"
+ " -n, --no-name do not save/use file name and timestamp in compress/decompress\n"
+ " -t, --test test compressed file integrity\n"
+ " -T, --threads <n> use n threads to compress if enabled\n"
+ " -q, --quiet suppress warnings\n\n"
+ "with no infile, or when infile is - , read standard input\n\n",
+ ISAL_DEF_MAX_LEVEL);
+
+ exit(exit_code);
+}
+
+void print_version(void)
+{
+ log_print(INFORM, "igzip command line interface %s\n", VERSION);
+}
+
+void *malloc_safe(size_t size)
+{
+ void *ptr = NULL;
+ if (size == 0)
+ return ptr;
+
+ ptr = malloc(size);
+ if (ptr == NULL) {
+ log_print(ERROR, "igzip: Failed to allocate memory\n");
+ exit(MALLOC_FAILED);
+ }
+
+ return ptr;
+}
+
+FILE *fopen_safe(char *file_name, char *mode)
+{
+ FILE *file;
+ int answer = 0, tmp;
+
+ /* Assumes write mode always starts with w */
+ if (mode[0] == 'w') {
+ if (access(file_name, F_OK) == 0) {
+ log_print(WARN, "igzip: %s already exists;", file_name);
+ if (is_interactive()) {
+ log_print(WARN, " do you wish to overwrite (y/n)?");
+ answer = getchar();
+
+ tmp = answer;
+ while (tmp != '\n' && tmp != EOF)
+ tmp = getchar();
+
+ if (answer != 'y' && answer != 'Y') {
+ log_print(WARN, " not overwritten\n");
+ return NULL;
+ }
+ } else if (!global_options.force) {
+ log_print(WARN, " not overwritten\n");
+ return NULL;
+ }
+
+ if (access(file_name, W_OK) != 0) {
+ log_print(ERROR, "igzip: %s: Permission denied\n", file_name);
+ return NULL;
+ }
+ }
+ }
+
+ /* Assumes read mode always starts with r */
+ if (mode[0] == 'r') {
+ if (access(file_name, F_OK) != 0) {
+ log_print(ERROR, "igzip: %s does not exist\n", file_name);
+ return NULL;
+ }
+
+ if (access(file_name, R_OK) != 0) {
+ log_print(ERROR, "igzip: %s: Permission denied\n", file_name);
+ return NULL;
+ }
+ }
+
+ file = fopen(file_name, mode);
+ if (!file) {
+ log_print(ERROR, "igzip: Failed to open %s\n", file_name);
+ return NULL;
+ }
+
+ return file;
+}
+
+size_t fread_safe(void *buf, size_t word_size, size_t buf_size, FILE * in, char *file_name)
+{
+ size_t read_size;
+ read_size = fread(buf, word_size, buf_size, in);
+ if (ferror(in)) {
+ log_print(ERROR, "igzip: Error encountered while reading file %s\n",
+ file_name);
+ exit(FILE_READ_ERROR);
+ }
+ return read_size;
+}
+
+size_t fwrite_safe(void *buf, size_t word_size, size_t buf_size, FILE * out, char *file_name)
+{
+ size_t write_size;
+ write_size = fwrite(buf, word_size, buf_size, out);
+ if (ferror(out)) {
+ log_print(ERROR, "igzip: Error encountered while writing to file %s\n",
+ file_name);
+ exit(FILE_WRITE_ERROR);
+ }
+ return write_size;
+}
+
+void open_in_file(FILE ** in, char *infile_name)
+{
+ *in = NULL;
+ if (infile_name == NULL)
+ *in = stdin;
+ else
+ *in = fopen_safe(infile_name, "rb");
+}
+
+void open_out_file(FILE ** out, char *outfile_name)
+{
+ *out = NULL;
+ if (global_options.use_stdout)
+ *out = stdout;
+ else if (outfile_name != NULL)
+ *out = fopen_safe(outfile_name, "wb");
+ else if (!isatty(fileno(stdout)) || global_options.force)
+ *out = stdout;
+ else {
+ log_print(WARN, "igzip: No output location. Use -c to output to terminal\n");
+ exit(FILE_OPEN_ERROR);
+ }
+}
+
+#if defined(HAVE_THREADS)
+
+#define MAX_THREADS 8
+#define MAX_JOBQUEUE 16 /* must be a power of 2 */
+
+enum job_status {
+ JOB_UNALLOCATED = 0,
+ JOB_ALLOCATED,
+ JOB_SUCCESS,
+ JOB_FAIL
+};
+
+struct thread_job {
+ uint8_t *next_in;
+ uint32_t avail_in;
+ uint8_t *next_out;
+ uint32_t avail_out;
+ uint32_t total_out;
+ uint32_t type;
+ uint32_t status;
+};
+struct thread_pool {
+ pthread_t threads[MAX_THREADS];
+ struct thread_job job[MAX_JOBQUEUE];
+ pthread_mutex_t mutex;
+ pthread_cond_t cond;
+ int head;
+ int tail;
+ int queue;
+ int shutdown;
+};
+
+// Globals for thread pool
+struct thread_pool pool;
+
+static inline int pool_has_space()
+{
+ return ((pool.head + 1) % MAX_JOBQUEUE) != pool.tail;
+}
+
+static inline int pool_has_work()
+{
+ return (pool.queue != pool.head);
+}
+
+int pool_get_work()
+{
+ assert(pool.queue != pool.head);
+ pool.queue = (pool.queue + 1) % MAX_JOBQUEUE;
+ return pool.queue;
+}
+
+int pool_put_work(struct isal_zstream *stream)
+{
+ pthread_mutex_lock(&pool.mutex);
+ if (!pool_has_space() || pool.shutdown) {
+ pthread_mutex_unlock(&pool.mutex);
+ return 1;
+ }
+ int idx = (pool.head + 1) % MAX_JOBQUEUE;
+ pool.job[idx].next_in = stream->next_in;
+ pool.job[idx].avail_in = stream->avail_in;
+ pool.job[idx].next_out = stream->next_out;
+ pool.job[idx].avail_out = stream->avail_out;
+ pool.job[idx].status = JOB_ALLOCATED;
+ pool.job[idx].type = stream->end_of_stream == 0 ? 0 : 1;
+ pool.head = idx;
+ pthread_cond_signal(&pool.cond);
+ pthread_mutex_unlock(&pool.mutex);
+ return 0;
+}
+
+void *thread_worker(void *none)
+{
+ struct isal_zstream wstream;
+ int check;
+ int work_idx;
+ int level = global_options.level;
+ int level_size = level_size_buf[level];
+ uint8_t *level_buf = malloc_safe(level_size);
+ log_print(VERBOSE, "Start worker\n");
+
+ while (!pool.shutdown) {
+ pthread_mutex_lock(&pool.mutex);
+ while (!pool_has_work() && !pool.shutdown) {
+ pthread_cond_wait(&pool.cond, &pool.mutex);
+ }
+ if (pool.shutdown) {
+ pthread_mutex_unlock(&pool.mutex);
+ continue;
+ }
+
+ work_idx = pool_get_work();
+ pthread_cond_signal(&pool.cond);
+ pthread_mutex_unlock(&pool.mutex);
+
+ isal_deflate_stateless_init(&wstream);
+ wstream.next_in = pool.job[work_idx].next_in;
+ wstream.next_out = pool.job[work_idx].next_out;
+ wstream.avail_in = pool.job[work_idx].avail_in;
+ wstream.avail_out = pool.job[work_idx].avail_out;
+ wstream.end_of_stream = pool.job[work_idx].type;
+ wstream.flush = FULL_FLUSH;
+ wstream.level = global_options.level;
+ wstream.level_buf = level_buf;
+ wstream.level_buf_size = level_size;
+
+ check = isal_deflate_stateless(&wstream);
+ log_print(VERBOSE, "Worker finished job %d, out=%d\n",
+ work_idx, wstream.total_out);
+
+ pool.job[work_idx].total_out = wstream.total_out;
+ pool.job[work_idx].status = JOB_SUCCESS + check; // complete or fail
+ if (check)
+ break;
+ }
+ free(level_buf);
+ log_print(VERBOSE, "Worker quit\n");
+ pthread_exit(NULL);
+}
+
+int pool_create()
+{
+ int i;
+ int nthreads = global_options.threads - 1;
+ pool.head = 0;
+ pool.tail = 0;
+ pool.queue = 0;
+ pool.shutdown = 0;
+ for (i = 0; i < nthreads; i++)
+ pthread_create(&pool.threads[i], NULL, thread_worker, NULL);
+
+ log_print(VERBOSE, "Created %d pool threads\n", nthreads);
+ return 0;
+}
+
+void pool_quit()
+{
+ int i;
+ pthread_mutex_lock(&pool.mutex);
+ pool.shutdown = 1;
+ pthread_mutex_unlock(&pool.mutex);
+ pthread_cond_broadcast(&pool.cond);
+ for (i = 0; i < global_options.threads - 1; i++)
+ pthread_join(pool.threads[i], NULL);
+ log_print(VERBOSE, "Deleted %d pool threads\n", i);
+}
+
+#endif // defined(HAVE_THREADS)
+
+int compress_file(void)
+{
+ FILE *in = NULL, *out = NULL;
+ unsigned char *inbuf = NULL, *outbuf = NULL, *level_buf = NULL;
+ size_t inbuf_size, outbuf_size;
+ int level_size = 0;
+ struct isal_zstream stream;
+ struct isal_gzip_header gz_hdr;
+ int ret, success = 0;
+
+ char *infile_name = global_options.infile_name;
+ char *outfile_name = global_options.outfile_name;
+ char *allocated_name = NULL;
+ char *suffix = global_options.suffix;
+ size_t infile_name_len = global_options.infile_name_len;
+ size_t outfile_name_len = global_options.outfile_name_len;
+ size_t suffix_len = global_options.suffix_len;
+
+ int level = global_options.level;
+
+ if (suffix == NULL) {
+ suffix = default_suffixes[0];
+ suffix_len = default_suffixes_lens[0];
+ }
+
+ if (infile_name_len == stdin_file_name_len &&
+ infile_name != NULL &&
+ memcmp(infile_name, stdin_file_name, infile_name_len) == 0) {
+ infile_name = NULL;
+ infile_name_len = 0;
+ }
+
+ if (outfile_name == NULL && infile_name != NULL && !global_options.use_stdout) {
+ outfile_name_len = infile_name_len + suffix_len;
+ allocated_name = malloc_safe(outfile_name_len + 1);
+ outfile_name = allocated_name;
+ strncpy(outfile_name, infile_name, infile_name_len + 1);
+ strncat(outfile_name, suffix, outfile_name_len + 1);
+ }
+
+ open_in_file(&in, infile_name);
+ if (in == NULL)
+ goto compress_file_cleanup;
+
+ if (infile_name_len != 0 && infile_name_len == outfile_name_len
+ && infile_name != NULL && outfile_name != NULL
+ && strncmp(infile_name, outfile_name, infile_name_len) == 0) {
+ log_print(ERROR, "igzip: Error input and output file names must differ\n");
+ goto compress_file_cleanup;
+ }
+
+ open_out_file(&out, outfile_name);
+ if (out == NULL)
+ goto compress_file_cleanup;
+
+ inbuf_size = global_options.in_buf_size;
+ outbuf_size = global_options.out_buf_size;
+
+ inbuf = global_options.in_buf;
+ outbuf = global_options.out_buf;
+ level_size = global_options.level_buf_size;
+ level_buf = global_options.level_buf;
+
+ isal_gzip_header_init(&gz_hdr);
+ if (global_options.name == NAME_DEFAULT || global_options.name == YES_NAME) {
+ gz_hdr.time = get_posix_filetime(in);
+ gz_hdr.name = infile_name;
+ }
+ gz_hdr.os = UNIX;
+ gz_hdr.name_buf_len = infile_name_len + 1;
+
+ isal_deflate_init(&stream);
+ stream.avail_in = 0;
+ stream.flush = NO_FLUSH;
+ stream.level = level;
+ stream.level_buf = level_buf;
+ stream.level_buf_size = level_size;
+ stream.gzip_flag = IGZIP_GZIP_NO_HDR;
+ stream.next_out = outbuf;
+ stream.avail_out = outbuf_size;
+
+ isal_write_gzip_header(&stream, &gz_hdr);
+
+ if (global_options.threads > 1) {
+#if defined(HAVE_THREADS)
+ int q;
+ int end_of_stream = 0;
+ uint32_t crc = 0;
+ uint64_t total_in = 0;
+
+ // Write the header
+ fwrite_safe(outbuf, 1, stream.total_out, out, outfile_name);
+
+ do {
+ size_t nread;
+ size_t inbuf_used = 0;
+ size_t outbuf_used = 0;
+ uint8_t *iptr = inbuf;
+ uint8_t *optr = outbuf;
+
+ for (q = 0; q < MAX_JOBQUEUE - 1; q++) {
+ inbuf_used += BLOCK_SIZE;
+ outbuf_used += 2 * BLOCK_SIZE;
+ if (inbuf_used > inbuf_size || outbuf_used > outbuf_size)
+ break;
+
+ nread = fread_safe(iptr, 1, BLOCK_SIZE, in, infile_name);
+ crc = crc32_gzip_refl(crc, iptr, nread);
+ end_of_stream = feof(in);
+ total_in += nread;
+ stream.next_in = iptr;
+ stream.next_out = optr;
+ stream.avail_in = nread;
+ stream.avail_out = 2 * BLOCK_SIZE;
+ stream.end_of_stream = end_of_stream;
+ ret = pool_put_work(&stream);
+ if (ret || end_of_stream)
+ break;
+
+ iptr += BLOCK_SIZE;
+ optr += 2 * BLOCK_SIZE;
+ }
+
+ while (pool.tail != pool.head) { // Unprocessed jobs
+ int t = (pool.tail + 1) % MAX_JOBQUEUE;
+ if (pool.job[t].status >= JOB_SUCCESS) { // Finished next
+ if (pool.job[t].status > JOB_SUCCESS) {
+ success = 0;
+ log_print(ERROR,
+ "igzip: Error encountered while compressing file %s\n",
+ infile_name);
+ goto compress_file_cleanup;
+ }
+ fwrite_safe(pool.job[t].next_out, 1,
+ pool.job[t].total_out, out, outfile_name);
+
+ pool.job[t].total_out = 0;
+ pool.job[t].status = 0;
+ pool.tail = t;
+ pthread_cond_broadcast(&pool.cond);
+ }
+ // Pick up a job while we wait
+ pthread_mutex_lock(&pool.mutex);
+ if (!pool_has_work()) {
+ pthread_mutex_unlock(&pool.mutex);
+ continue;
+ }
+
+ int work_idx = pool_get_work();
+ pthread_cond_signal(&pool.cond);
+ pthread_mutex_unlock(&pool.mutex);
+
+ isal_deflate_stateless_init(&stream);
+ stream.next_in = pool.job[work_idx].next_in;
+ stream.next_out = pool.job[work_idx].next_out;
+ stream.avail_in = pool.job[work_idx].avail_in;
+ stream.avail_out = pool.job[work_idx].avail_out;
+ stream.end_of_stream = pool.job[work_idx].type;
+ stream.flush = FULL_FLUSH;
+ stream.level = global_options.level;
+ stream.level_buf = level_buf;
+ stream.level_buf_size = level_size;
+ int check = isal_deflate_stateless(&stream);
+ log_print(VERBOSE, "Self finished job %d, out=%d\n",
+ work_idx, stream.total_out);
+ pool.job[work_idx].total_out = stream.total_out;
+ pool.job[work_idx].status = JOB_SUCCESS + check; // complete or fail
+ }
+ } while (!end_of_stream);
+
+ // Write gzip trailer
+ fwrite_safe(&crc, sizeof(uint32_t), 1, out, outfile_name);
+ fwrite_safe(&total_in, sizeof(uint32_t), 1, out, outfile_name);
+
+#else // No compiled threading support but asked for threads > 1
+ assert(1);
+#endif
+ } else { // Single thread
+ do {
+ if (stream.avail_in == 0) {
+ stream.next_in = inbuf;
+ stream.avail_in =
+ fread_safe(stream.next_in, 1, inbuf_size, in, infile_name);
+ stream.end_of_stream = feof(in);
+ }
+
+ if (stream.next_out == NULL) {
+ stream.next_out = outbuf;
+ stream.avail_out = outbuf_size;
+ }
+
+ ret = isal_deflate(&stream);
+
+ if (ret != ISAL_DECOMP_OK) {
+ log_print(ERROR,
+ "igzip: Error encountered while compressing file %s\n",
+ infile_name);
+ goto compress_file_cleanup;
+ }
+
+ fwrite_safe(outbuf, 1, stream.next_out - outbuf, out, outfile_name);
+ stream.next_out = NULL;
+
+ } while (!feof(in) || stream.avail_out == 0);
+ }
+
+ success = 1;
+
+ compress_file_cleanup:
+ if (out != NULL && out != stdout)
+ fclose(out);
+
+ if (in != NULL && in != stdin) {
+ fclose(in);
+ if (success && global_options.remove)
+ remove(infile_name);
+ }
+
+ if (allocated_name != NULL)
+ free(allocated_name);
+
+ return (success == 0);
+}
+
+int decompress_file(void)
+{
+ FILE *in = NULL, *out = NULL;
+ unsigned char *inbuf = NULL, *outbuf = NULL;
+ size_t inbuf_size, outbuf_size;
+ struct inflate_state state;
+ struct isal_gzip_header gz_hdr;
+ const int terminal = 0, implicit = 1, stripped = 2;
+ int ret = 0, success = 0, outfile_type = terminal;
+
+ char *infile_name = global_options.infile_name;
+ char *outfile_name = global_options.outfile_name;
+ char *allocated_name = NULL;
+ char *suffix = global_options.suffix;
+ size_t infile_name_len = global_options.infile_name_len;
+ size_t outfile_name_len = global_options.outfile_name_len;
+ size_t suffix_len = global_options.suffix_len;
+ int suffix_index = 0;
+ uint32_t file_time;
+
+ // Allocate mem and setup to hold gzip header info
+ if (infile_name_len == stdin_file_name_len &&
+ infile_name != NULL &&
+ memcmp(infile_name, stdin_file_name, infile_name_len) == 0) {
+ infile_name = NULL;
+ infile_name_len = 0;
+ }
+
+ if (outfile_name == NULL && !global_options.use_stdout) {
+ if (infile_name != NULL) {
+ outfile_type = stripped;
+ while (suffix_index <
+ sizeof(default_suffixes) / sizeof(*default_suffixes)) {
+ if (suffix == NULL) {
+ suffix = default_suffixes[suffix_index];
+ suffix_len = default_suffixes_lens[suffix_index];
+ suffix_index++;
+ }
+
+ outfile_name_len = infile_name_len - suffix_len;
+ if (infile_name_len >= suffix_len
+ && memcmp(infile_name + outfile_name_len, suffix,
+ suffix_len) == 0)
+ break;
+ suffix = NULL;
+ suffix_len = 0;
+ }
+
+ if (suffix == NULL && global_options.test == NO_TEST) {
+ log_print(ERROR, "igzip: %s: unknown suffix -- ignored\n",
+ infile_name);
+ return 1;
+ }
+ }
+ if (global_options.name == YES_NAME) {
+ outfile_name_len = 0;
+ outfile_type = implicit;
+ }
+ if (outfile_type != terminal) {
+ allocated_name = malloc_safe(outfile_name_len >=
+ MAX_FILEPATH_BUF ? outfile_name_len +
+ 1 : MAX_FILEPATH_BUF);
+ outfile_name = allocated_name;
+ }
+ }
+
+ open_in_file(&in, infile_name);
+ if (in == NULL)
+ goto decompress_file_cleanup;
+
+ file_time = get_posix_filetime(in);
+
+ inbuf_size = global_options.in_buf_size;
+ outbuf_size = global_options.out_buf_size;
+ inbuf = global_options.in_buf;
+ outbuf = global_options.out_buf;
+
+ isal_gzip_header_init(&gz_hdr);
+ if (outfile_type == implicit) {
+ gz_hdr.name = outfile_name;
+ gz_hdr.name_buf_len = MAX_FILEPATH_BUF;
+ }
+
+ isal_inflate_init(&state);
+ state.crc_flag = ISAL_GZIP_NO_HDR_VER;
+ state.next_in = inbuf;
+ state.avail_in = fread_safe(state.next_in, 1, inbuf_size, in, infile_name);
+
+ // Actually read and save the header info
+ ret = isal_read_gzip_header(&state, &gz_hdr);
+ if (ret != ISAL_DECOMP_OK) {
+ log_print(ERROR, "igzip: Error invalid gzip header found for file %s\n",
+ infile_name);
+ goto decompress_file_cleanup;
+ }
+
+ if (outfile_type == implicit)
+ file_time = gz_hdr.time;
+
+ if (outfile_name != NULL && infile_name != NULL
+ && (outfile_type == stripped
+ || (outfile_type == implicit && outfile_name[0] == 0))) {
+ outfile_name_len = infile_name_len - suffix_len;
+ memcpy(outfile_name, infile_name, outfile_name_len);
+ outfile_name[outfile_name_len] = 0;
+ }
+
+ if (infile_name_len != 0 && infile_name_len == outfile_name_len
+ && infile_name != NULL && outfile_name != NULL
+ && strncmp(infile_name, outfile_name, infile_name_len) == 0) {
+ log_print(ERROR, "igzip: Error input and output file names must differ\n");
+ goto decompress_file_cleanup;
+ }
+
+ if (global_options.test == NO_TEST) {
+ open_out_file(&out, outfile_name);
+ if (out == NULL)
+ goto decompress_file_cleanup;
+ }
+
+ // Start reading in compressed data and decompress
+ do {
+ if (state.avail_in == 0) {
+ state.next_in = inbuf;
+ state.avail_in =
+ fread_safe(state.next_in, 1, inbuf_size, in, infile_name);
+ }
+
+ state.next_out = outbuf;
+ state.avail_out = outbuf_size;
+
+ ret = isal_inflate(&state);
+ if (ret != ISAL_DECOMP_OK) {
+ log_print(ERROR,
+ "igzip: Error encountered while decompressing file %s\n",
+ infile_name);
+ goto decompress_file_cleanup;
+ }
+
+ if (out != NULL)
+ fwrite_safe(outbuf, 1, state.next_out - outbuf, out, outfile_name);
+
+ } while (state.block_state != ISAL_BLOCK_FINISH // while not done
+ && (!feof(in) || state.avail_out == 0) // and work to do
+ );
+
+ // Add the following to look for and decode additional concatenated files
+ if (!feof(in) && state.avail_in == 0) {
+ state.next_in = inbuf;
+ state.avail_in = fread_safe(state.next_in, 1, inbuf_size, in, infile_name);
+ }
+
+ while (state.avail_in > 0 && state.next_in[0] == 31) {
+ // Look for magic numbers for gzip header. Follows the gzread() decision
+ // whether to treat as trailing junk
+ if (state.avail_in > 1 && state.next_in[1] != 139)
+ break;
+
+ isal_inflate_reset(&state);
+ state.crc_flag = ISAL_GZIP; // Let isal_inflate() process extra headers
+ do {
+ if (state.avail_in == 0 && !feof(in)) {
+ state.next_in = inbuf;
+ state.avail_in =
+ fread_safe(state.next_in, 1, inbuf_size, in, infile_name);
+ }
+
+ state.next_out = outbuf;
+ state.avail_out = outbuf_size;
+
+ ret = isal_inflate(&state);
+ if (ret != ISAL_DECOMP_OK) {
+ log_print(ERROR,
+ "igzip: Error while decompressing extra concatenated"
+ "gzip files on %s\n", infile_name);
+ goto decompress_file_cleanup;
+ }
+
+ if (out != NULL)
+ fwrite_safe(outbuf, 1, state.next_out - outbuf, out,
+ outfile_name);
+
+ } while (state.block_state != ISAL_BLOCK_FINISH
+ && (!feof(in) || state.avail_out == 0));
+
+ if (!feof(in) && state.avail_in == 0) {
+ state.next_in = inbuf;
+ state.avail_in =
+ fread_safe(state.next_in, 1, inbuf_size, in, infile_name);
+ }
+ }
+
+ if (state.block_state != ISAL_BLOCK_FINISH)
+ log_print(ERROR, "igzip: Error %s does not contain a complete gzip file\n",
+ infile_name);
+ else
+ success = 1;
+
+ decompress_file_cleanup:
+ if (out != NULL && out != stdout) {
+ fclose(out);
+ if (success)
+ set_filetime(outfile_name, file_time);
+ }
+
+ if (in != NULL && in != stdin) {
+ fclose(in);
+ if (success && global_options.remove)
+ remove(infile_name);
+ }
+
+ if (allocated_name != NULL)
+ free(allocated_name);
+
+ return (success == 0);
+}
+
+int main(int argc, char *argv[])
+{
+ int c;
+ char optstring[] = "hcdz0123456789o:S:kfqVvNntT:";
+ int long_only_flag;
+ int ret = 0;
+ int bad_option = 0;
+ int bad_level = 0;
+ int bad_c = 0;
+
+ struct option long_options[] = {
+ {"help", no_argument, NULL, 'h'},
+ {"stdout", no_argument, NULL, 'c'},
+ {"to-stdout", no_argument, NULL, 'c'},
+ {"compress", no_argument, NULL, 'z'},
+ {"decompress", no_argument, NULL, 'd'},
+ {"uncompress", no_argument, NULL, 'd'},
+ {"keep", no_argument, NULL, 'k'},
+ {"rm", no_argument, &long_only_flag, RM},
+ {"suffix", no_argument, NULL, 'S'},
+ {"fast", no_argument, NULL, '1'},
+ {"best", no_argument, NULL, '0' + ISAL_DEF_MAX_LEVEL},
+ {"force", no_argument, NULL, 'f'},
+ {"quiet", no_argument, NULL, 'q'},
+ {"version", no_argument, NULL, 'V'},
+ {"verbose", no_argument, NULL, 'v'},
+ {"no-name", no_argument, NULL, 'n'},
+ {"name", no_argument, NULL, 'N'},
+ {"test", no_argument, NULL, 't'},
+ {"threads", required_argument, NULL, 'T'},
+ /* Possible future extensions
+ {"recursive, no_argument, NULL, 'r'},
+ {"list", no_argument, NULL, 'l'},
+ {"benchmark", optional_argument, NULL, 'b'},
+ {"benchmark_end", required_argument, NULL, 'e'},
+ */
+ {0, 0, 0, 0}
+ };
+
+ init_options(&global_options);
+
+ opterr = 0;
+ while ((c = getopt_long(argc, argv, optstring, long_options, NULL)) != -1) {
+ if (c >= '0' && c <= '9') {
+ if (c > '0' + ISAL_DEF_MAX_LEVEL)
+ bad_level = 1;
+ else
+ global_options.level = c - '0';
+
+ continue;
+ }
+
+ switch (c) {
+ case 0:
+ switch (long_only_flag) {
+ case RM:
+ global_options.remove = true;
+ break;
+ default:
+ bad_option = 1;
+ bad_c = c;
+ break;
+ }
+ break;
+ case 'o':
+ global_options.outfile_name = optarg;
+ global_options.outfile_name_len = strlen(global_options.outfile_name);
+ break;
+ case 'c':
+ global_options.use_stdout = true;
+ break;
+ case 'z':
+ global_options.mode = COMPRESS_MODE;
+ break;
+ case 'd':
+ global_options.mode = DECOMPRESS_MODE;
+ break;
+ case 'S':
+ global_options.suffix = optarg;
+ global_options.suffix_len = strlen(global_options.suffix);
+ break;
+ case 'k':
+ global_options.remove = false;
+ break;
+ case 'f':
+ global_options.force = true;
+ break;
+ case 'q':
+ global_options.quiet_level++;
+ break;
+ case 'v':
+ global_options.verbose_level++;
+ break;
+ case 'V':
+ print_version();
+ return 0;
+ case 'N':
+ global_options.name = YES_NAME;
+ break;
+ case 'n':
+ global_options.name = NO_NAME;
+ break;
+ case 't':
+ global_options.test = TEST;
+ global_options.mode = DECOMPRESS_MODE;
+ break;
+ case 'T':
+#if defined(HAVE_THREADS)
+ c = atoi(optarg);
+ c = c > MAX_THREADS ? MAX_THREADS : c;
+ c = c < 1 ? 1 : c;
+ global_options.threads = c;
+#endif
+ break;
+ case 'h':
+ usage(0);
+ default:
+ bad_option = 1;
+ bad_c = optopt;
+ break;
+ }
+ }
+
+ if (bad_option) {
+ log_print(ERROR, "igzip: invalid option ");
+ if (bad_c)
+ log_print(ERROR, "-%c\n", bad_c);
+
+ else
+ log_print(ERROR, ("\n"));
+
+ usage(BAD_OPTION);
+ }
+
+ if (bad_level) {
+ log_print(ERROR, "igzip: invalid compression level\n");
+ usage(BAD_LEVEL);
+ }
+
+ if (global_options.outfile_name && optind < argc - 1) {
+ log_print(ERROR,
+ "igzip: An output file may be specified with only one input file\n");
+ return 0;
+ }
+
+ global_options.in_buf_size = BLOCK_SIZE;
+ global_options.out_buf_size = BLOCK_SIZE;
+
+#if defined(HAVE_THREADS)
+ if (global_options.threads > 1) {
+ global_options.in_buf_size += (BLOCK_SIZE * MAX_JOBQUEUE);
+ global_options.out_buf_size += (BLOCK_SIZE * MAX_JOBQUEUE * 2);
+ pool_create();
+ }
+#endif
+ global_options.in_buf = malloc_safe(global_options.in_buf_size);
+ global_options.out_buf = malloc_safe(global_options.out_buf_size);
+ global_options.level_buf_size = level_size_buf[global_options.level];
+ global_options.level_buf = malloc_safe(global_options.level_buf_size);
+
+ if (global_options.mode == COMPRESS_MODE) {
+ if (optind >= argc)
+ ret |= compress_file();
+ while (optind < argc) {
+ global_options.infile_name = argv[optind];
+ global_options.infile_name_len = strlen(global_options.infile_name);
+ ret |= compress_file();
+ optind++;
+ }
+
+ } else if (global_options.mode == DECOMPRESS_MODE) {
+ if (optind >= argc)
+ ret |= decompress_file();
+ while (optind < argc) {
+ global_options.infile_name = argv[optind];
+ global_options.infile_name_len = strlen(global_options.infile_name);
+ ret |= decompress_file();
+ optind++;
+ }
+ }
+#if defined(HAVE_THREADS)
+ if (global_options.threads > 1)
+ pool_quit();
+#endif
+
+ free(global_options.in_buf);
+ free(global_options.out_buf);
+ free(global_options.level_buf);
+ return ret;
+}
diff --git a/src/isa-l/programs/igzip_cli_check.sh b/src/isa-l/programs/igzip_cli_check.sh
new file mode 100755
index 000000000..5d97763c6
--- /dev/null
+++ b/src/isa-l/programs/igzip_cli_check.sh
@@ -0,0 +1,261 @@
+#! /bin/bash
+set -o pipefail
+
+CWD=$PWD
+SRC_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null && pwd )"
+IGZIP="$SRC_DIR/igzip $@"
+TEST_DIR="/tmp/igzip_cli_test_$$/"
+TEST_FILE=$SRC_DIR/igzip
+DIFF="diff -q"
+
+mkdir -p $TEST_DIR
+cd $TEST_DIR
+
+cleanup ()
+{
+ cd $CWD
+ rm -rf $TEST_DIR
+ exit $1
+}
+
+clear_dir ()
+{
+ cd /tmp/
+ rm -rf $TEST_DIR
+ mkdir -p $TEST_DIR
+ cd $TEST_DIR
+}
+
+pass_check()
+{
+ if [ $1 -eq 0 ]; then
+ echo -e "\e[1;32mPass\e[0;39m: " $2
+ else
+ echo -e "\e[1;31mFail\e[0;39m: " $2
+ cleanup 1
+ fi
+}
+
+fail_check()
+{
+ if [ $1 -ne 0 ]; then
+ echo -e "\e[1;32mPass\e[0;39m: " $2
+ else
+ echo -e "\e[1;31mFail\e[0;39m: " $2
+ cleanup 1
+ fi
+}
+
+file1=tmp
+file2=jnk
+file3=blah
+bad_file=not_a_file
+dir=this_is_a_directory
+
+default_suffix=".gz"
+ds=$default_suffix
+gzip_standard_suffixes=(".gz" ".z")
+bad_suffix=".bad"
+custom_suffix=".custom"
+
+# Test basic compression and decompression
+ret=0
+cp $TEST_FILE $file1
+$IGZIP $file1 && rm $file1 || ret=1
+for suffix in ${gzip_standard_suffixes[@]}; do
+ if [ "$ds" != "$suffix" ]; then
+ cp -u $file1$ds $file1$suffix
+ fi
+ $IGZIP -d $file1$suffix && $DIFF $file1 $TEST_FILE || ret=1
+ rm $file1
+done
+pass_check $ret "Basic compression and decompression"
+clear_dir
+
+# Test piping
+cat $TEST_FILE | $IGZIP | $IGZIP -d | $DIFF $TEST_FILE - || ret=1
+cat $TEST_FILE | $IGZIP - | $IGZIP -d - | $DIFF $TEST_FILE - || ret=1
+pass_check $ret "Piping compression and decompression"
+
+# Test multiple concatenated gzip files
+ret=0
+(for i in `seq 3`; do $IGZIP -c $TEST_FILE ; done) | $IGZIP -t || ret=1
+pass_check $ret "Multiple gzip concatenated files"
+
+if command -V md5sum >/dev/null 2>&1; then
+ sum1=$((for i in `seq 15`; do $IGZIP -c $TEST_FILE; done) | $IGZIP -cd | md5sum)
+ sum2=$((for i in `seq 15`; do cat $TEST_FILE; done) | md5sum)
+ [[ "$sum1" == "$sum2" ]] && ret=0 || ret=1
+ pass_check $ret "Multiple large gzip concat test"
+ clear_dir
+else
+ echo "Skip: Multiple large gzip concat test"
+fi
+
+
+#Test outifle options
+$IGZIP $TEST_FILE -o $file2$ds && $IGZIP $file2$ds -d -o $file1 && \
+ test -f $file2$ds && test -f $file1 && $DIFF $TEST_FILE $file1
+pass_check $? "Setting outfile name"
+clear_dir
+
+# Not a file test
+ret=0
+$IGZIP $bad_file &> /dev/null && ret=1
+test -f $bad_file$ds && ret=1
+pass_check $ret "Bad file"
+clear_dir
+
+# Multiple files
+cp $TEST_FILE $file1 && cp $TEST_FILE $file2 && cp $TEST_FILE $file3 && \
+ $IGZIP $file1 $file2 $file3 && rm $file1 && rm $file2 && rm $file3 && \
+ $IGZIP -d $file1$ds $file2$ds $file3$ds && \
+ $DIFF $TEST_FILE $file1 && $DIFF $TEST_FILE $file2 && $DIFF $TEST_FILE $file3
+pass_check $? "Multiple files compression and decompression"
+clear_dir
+
+# Multiple files, one doesn't exist
+ret=0
+cp $TEST_FILE $file1 && cp $TEST_FILE $file2 || ret=1
+$IGZIP $file1 $bad_file $file2 &> /dev/null && ret=1
+rm $file1 && rm $file2 || ret=1
+$IGZIP -d $file1$ds $bad_file$ds $file2$ds &> /dev/null && ret=1
+$DIFF $TEST_FILE $file1 && $DIFF $TEST_FILE $file2 || ret=1
+pass_check $ret "Multiple files with a bad file"
+clear_dir
+
+# Custom suffix test
+cp $TEST_FILE $file1 && $IGZIP -S $custom_suffix $file1 && rm $file1 && \
+ $IGZIP -d -S $custom_suffix $file1$custom_suffix && $DIFF $TEST_FILE $file1
+pass_check $? "Custom suffix"
+
+# Bad suffix test
+ret=0
+cp $TEST_FILE $file1 && $IGZIP -S $bad_suffix $file1 && rm $file1 || ret=1
+$IGZIP -d $file1$custom_suffix &> /dev/null && ret=1
+pass_check $ret "Bad suffix"
+clear_dir
+
+# Remove file test
+ret=0
+cp $TEST_FILE $file1 && $IGZIP --rm $file1 || ret=1
+test -f $file1 && ret=1
+$IGZIP --rm -d $file1$ds || ret=1
+test -f $file1$ds && ret=1
+pass_check $ret "Remove file"
+clear_dir
+
+# Pass a directory negative test
+ret=0
+mkdir -p $dir || ret=0
+$IGZIP $dir &> /dev/null && ret=1
+clear_dir
+
+mkdir -p $dir$ds || ret=1
+$IGZIP -d $dir &> /dev/null && ret=1
+pass_check $ret "Compress/Decompress Directory without -r"
+clear_dir
+
+# Write permissions test
+cp $TEST_FILE $file1
+chmod 400 $file1
+chmod 500 $TEST_DIR
+$IGZIP $file1 &> /dev/null
+fail_check $? "don't have write permissions"
+chmod -R 700 $TEST_DIR
+clear_dir
+
+# Read permissions test
+cp $TEST_FILE $file1
+chmod 000 $file1
+$IGZIP $file1 &> /dev/null
+fail_check $? "don't have read permissions"
+clear_dir
+
+# File overwrite test -f
+ret=0
+cp $TEST_FILE $file1 && touch $file1$ds || ret=1
+yes | $IGZIP $file1 &> /dev/null && ret=1
+$IGZIP -f $file1 &> /dev/null && cp $file1$ds $file1 || ret=1
+yes | $IGZIP -d $file1 &> /dev/null && ret=1
+$IGZIP -df $file1$ds &> /dev/null && $DIFF $TEST_FILE $file1 || ret=1
+pass_check $ret "Existing file overwrite only with force"
+clear_dir
+
+# Quiet suppresses interactivity
+ret=0
+cp $TEST_FILE $file1 && touch $file1$ds || ret=1
+$IGZIP -q $file1 &> /dev/null && ret=1
+$IGZIP -dq $file1 &> /dev/null && ret=1
+pass_check $ret "Quiet will not overwrite"
+clear_dir
+
+# Input file and output file cannot be the same
+ret=0
+cp $TEST_FILE $file1 && $IGZIP $file1 -o $file1 &> /dev/null && ret=1
+$DIFF $TEST_FILE $file1 &> /dev/null || ret=1
+pass_check $ret "No in place compression"
+clear_dir
+
+# Input file and output file cannot be the same
+ret=0
+cp $TEST_FILE $file1 && $IGZIP $file1 -o $file1$ds &> /dev/null || ret=1
+$IGZIP -do $file1 $file1 &> /dev/null && ret=1
+$DIFF $TEST_FILE $file1 &> /dev/null || ret=1
+pass_check $ret "No in place decompression"
+clear_dir
+
+ret=0
+$IGZIP -n $TEST_FILE -o $file1$ds && $IGZIP -Nd $file1$ds && $DIFF $file1 $TEST_FILE || ret=1
+pass_check $ret "Decompress name with no-name info"
+clear_dir
+
+ret=0
+cp -p $TEST_FILE $file1 && sleep 1 &&\
+$IGZIP -N $file1 -o $file1$ds && $IGZIP -Nfqd $file1$ds || ret=1
+TIME_ORIG=$(stat --printf=\("%Y\n"\) $TEST_FILE)
+TIME_NEW=$(stat --printf=\("%Y\n"\) $file1)
+if [ "$TIME_ORIG" != "$TIME_NEW" ] ; then
+ ret=1
+fi
+pass_check $ret "Decompress with name info"
+clear_dir
+
+ret=0
+cp -p $TEST_FILE $file1 && touch $file2\\
+$IGZIP $file1 -o $file1$ds || ret=1
+$IGZIP -t $file1$ds || ret=1
+$IGZIP -t $file2 &> /dev/null && ret=1
+cp $file1$ds $file2 && $IGZIP -t $file1$ds || ret=1
+truncate -s -1 $file1$ds
+$IGZIP -t $file1$ds &> /dev/null && ret=1
+pass_check $ret "Test test"
+clear_dir
+
+# Large stream test with threading if enabled
+ret=0
+(for i in `seq 100`; do cat $TEST_FILE ; done) | $IGZIP -c -T 4 | $IGZIP -t || ret=1
+pass_check $ret "Large stream test"
+
+
+# Large stream tests with decompression and threading if enabled
+if command -V md5sum >/dev/null 2>&1 && command -V dd >/dev/null 2>&1; then
+ ret=0
+ dd if=<(for i in `seq 1000`; do cat $TEST_FILE; done) iflag=fullblock bs=1M count=201 2> out.stder | tee >(md5sum > out.sum1) \
+ | $IGZIP -c -T 4 | $IGZIP -d | md5sum > out.sum2 \
+ && $DIFF out.sum1 out.sum2 || ret=1
+ pass_check $ret "Large stream compresss test"
+ clear_dir
+
+ if test -e /dev/urandom; then
+ ret=0
+ dd if=/dev/urandom iflag=fullblock bs=1M count=200 2> out.stder | tee >(md5sum > out.sum3) \
+ | $IGZIP -c -T 2 | $IGZIP -d | md5sum > out.sum4 \
+ && $DIFF out.sum3 out.sum4 || ret=1
+ pass_check $ret "Large stream random data test"
+ clear_dir
+ fi
+fi
+
+echo "Passed all cli checks"
+cleanup 0
diff --git a/src/isa-l/raid/Makefile.am b/src/isa-l/raid/Makefile.am
new file mode 100644
index 000000000..5f98668d5
--- /dev/null
+++ b/src/isa-l/raid/Makefile.am
@@ -0,0 +1,67 @@
+########################################################################
+# Copyright(c) 2011-2015 Intel Corporation All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Intel Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+########################################################################
+
+include raid/aarch64/Makefile.am
+
+lsrc += raid/raid_base.c
+
+lsrc_base_aliases += raid/raid_base_aliases.c
+lsrc_ppc64le += raid/raid_base_aliases.c
+
+lsrc_x86_64 += \
+ raid/xor_gen_sse.asm \
+ raid/pq_gen_sse.asm \
+ raid/xor_check_sse.asm \
+ raid/pq_check_sse.asm \
+ raid/pq_gen_avx.asm \
+ raid/xor_gen_avx.asm \
+ raid/pq_gen_avx2.asm \
+ raid/xor_gen_avx512.asm \
+ raid/pq_gen_avx512.asm \
+ raid/raid_multibinary.asm
+
+lsrc_x86_32 += \
+ raid/xor_gen_sse.asm \
+ raid/pq_gen_sse_i32.asm \
+ raid/xor_check_sse.asm \
+ raid/pq_check_sse_i32.asm \
+ raid/raid_multibinary_i32.asm
+
+
+extern_hdrs += include/raid.h
+
+other_src += include/test.h include/types.h
+
+check_tests += raid/xor_gen_test raid/pq_gen_test raid/xor_check_test raid/pq_check_test
+
+perf_tests += raid/xor_gen_perf raid/pq_gen_perf
+
+examples += raid/xor_example
+
+lsrc32 += xor_gen_sse.asm pq_gen_sse_i32.asm xor_check_sse.asm pq_check_sse_i32.asm raid_base.c
diff --git a/src/isa-l/raid/aarch64/Makefile.am b/src/isa-l/raid/aarch64/Makefile.am
new file mode 100644
index 000000000..d08c8d67a
--- /dev/null
+++ b/src/isa-l/raid/aarch64/Makefile.am
@@ -0,0 +1,36 @@
+########################################################################
+# Copyright(c) 2019 Arm Corporation All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Arm Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#########################################################################
+
+lsrc_aarch64 += \
+ raid/aarch64/xor_gen_neon.S \
+ raid/aarch64/pq_gen_neon.S \
+ raid/aarch64/xor_check_neon.S \
+ raid/aarch64/pq_check_neon.S \
+ raid/aarch64/raid_multibinary_arm.S \
+ raid/aarch64/raid_aarch64_dispatcher.c
diff --git a/src/isa-l/raid/aarch64/pq_check_neon.S b/src/isa-l/raid/aarch64/pq_check_neon.S
new file mode 100644
index 000000000..55ad79829
--- /dev/null
+++ b/src/isa-l/raid/aarch64/pq_check_neon.S
@@ -0,0 +1,341 @@
+########################################################################
+# Copyright(c) 2019 Arm Corporation All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Arm Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#########################################################################
+
+.text
+
+.global pq_check_neon
+.type pq_check_neon, %function
+
+/* int pq_check_neon(int vects, int len, void **src) */
+
+/* arguments */
+w_vects .req w0 /* MUST >= 3 */
+x_vects .req x0
+w_len .req w1 /* MUST be 16x bytes */
+x_len .req x1
+x_src .req x2
+
+/* returns */
+w_ret .req w0
+
+/* local variables */
+x_dst_p .req x3
+x_dst_q .req x4
+x_dst_q_end .req x5
+w_col .req w6
+x_col .req x6
+x_src_ptr .req x7
+x_src_ptr_end .req x9
+x_src_last .req x10
+x_srcn .req x11
+w_min .req w12
+/* vectors */
+/* v0 ~ v7 : temporary p */
+/* v8 ~ v15: temporary q */
+/* v16 ~ v23: next 128 bytes */
+v_mask0 .req v24
+v_mask1 .req v25
+v_mask2 .req v26
+v_mask3 .req v27
+v_gf8poly .req v28
+v_0x80 .req v29
+
+/*
+ * src_ptr_end -->
+ * -------+----------+
+ * . | src[0] |
+ * . +----------+ +------------------+
+ * src_ptr --> | src[1] | - srcn -> | buffer |
+ * . +----------+ +------------------+
+ * . | ...... |
+ * . +----------+
+ * . | src[v-4] |
+ * -------+----------+ src_last +------------------+
+ * src --> | src[v-3] | ---------> | buffer |
+ * +----------+ +------------------+
+ * | src[v-2] | - dst_p -> | buffer |
+ * +----------+ +------------------+
+ * | src[v-1] | - dst_q -> | buffer | dst_q_end
+ * +----------+ +------------------+
+ */
+
+pq_check_neon:
+ sub x_src_ptr_end, x_src, #8
+
+ sub w_vects, w_vects, #3
+ add x_src, x_src, x_vects, lsl #3
+
+ ldr x_src_last, [x_src]
+ ldp x_dst_p, x_dst_q, [x_src, #8]
+
+ add x_dst_q_end, x_dst_q, x_len
+
+ mov w_min, #-1
+ mov w_col, #0
+ movi v_gf8poly.16b, #0x1D
+ movi v_0x80.16b, #0x80
+
+.Lloop128_init:
+ /* less than 128 byts? */
+ cmp w_len, #128
+ blo .Lloop16_init
+
+ /* save d8 ~ d15 to stack */
+ sub sp, sp, #64
+ stp d8, d9, [sp]
+ stp d10, d11, [sp, #16]
+ stp d12, d13, [sp, #32]
+ stp d14, d15, [sp, #48]
+
+ sub x_dst_q_end, x_dst_q_end, #128
+
+ /* batch process (vects-2)*128 bytes */
+ /* v0~v7: p; v8~v15: q; v16~v23: in */
+.Lloop128:
+ ldr q0, [x_src_last, #16*0]
+ ldr q1, [x_src_last, #16*1]
+ ldr q2, [x_src_last, #16*2]
+ ldr q3, [x_src_last, #16*3]
+ ldr q4, [x_src_last, #16*4]
+ ldr q5, [x_src_last, #16*5]
+ ldr q6, [x_src_last, #16*6]
+ ldr q7, [x_src_last, #16*7]
+ add x_src_last, x_src_last, #128
+
+ mov v8.16b, v0.16b
+ mov v9.16b, v1.16b
+ mov v10.16b, v2.16b
+ mov v11.16b, v3.16b
+ mov v12.16b, v4.16b
+ mov v13.16b, v5.16b
+ mov v14.16b, v6.16b
+ mov v15.16b, v7.16b
+
+ cbz w_vects, .Lloop128_vects_end
+
+ sub x_src_ptr, x_src, #8
+.Lloop128_vects:
+ ldr x_srcn, [x_src_ptr], #-8
+ add x_srcn, x_srcn, x_col
+ cmp x_src_ptr, x_src_ptr_end
+
+ ldr q16, [x_srcn, #16*0]
+ ldr q17, [x_srcn, #16*1]
+ ldr q18, [x_srcn, #16*2]
+ ldr q19, [x_srcn, #16*3]
+ ldr q20, [x_srcn, #16*4]
+ ldr q21, [x_srcn, #16*5]
+ ldr q22, [x_srcn, #16*6]
+ ldr q23, [x_srcn, #16*7]
+
+ eor v0.16b, v0.16b, v16.16b
+ eor v1.16b, v1.16b, v17.16b
+ eor v2.16b, v2.16b, v18.16b
+ eor v3.16b, v3.16b, v19.16b
+ eor v4.16b, v4.16b, v20.16b
+ eor v5.16b, v5.16b, v21.16b
+ eor v6.16b, v6.16b, v22.16b
+ eor v7.16b, v7.16b, v23.16b
+
+ cmhs v_mask0.16b, v8.16b, v_0x80.16b
+ cmhs v_mask1.16b, v9.16b, v_0x80.16b
+ cmhs v_mask2.16b, v10.16b, v_0x80.16b
+ cmhs v_mask3.16b, v11.16b, v_0x80.16b
+ and v_mask0.16b, v_mask0.16b, v_gf8poly.16b
+ and v_mask1.16b, v_mask1.16b, v_gf8poly.16b
+ and v_mask2.16b, v_mask2.16b, v_gf8poly.16b
+ and v_mask3.16b, v_mask3.16b, v_gf8poly.16b
+ shl v8.16b, v8.16b, #1
+ shl v9.16b, v9.16b, #1
+ shl v10.16b, v10.16b, #1
+ shl v11.16b, v11.16b, #1
+ eor v8.16b, v8.16b, v_mask0.16b
+ eor v9.16b, v9.16b, v_mask1.16b
+ eor v10.16b, v10.16b, v_mask2.16b
+ eor v11.16b, v11.16b, v_mask3.16b
+ eor v8.16b, v8.16b, v16.16b
+ eor v9.16b, v9.16b, v17.16b
+ eor v10.16b, v10.16b, v18.16b
+ eor v11.16b, v11.16b, v19.16b
+
+ cmhs v_mask0.16b, v12.16b, v_0x80.16b
+ cmhs v_mask1.16b, v13.16b, v_0x80.16b
+ cmhs v_mask2.16b, v14.16b, v_0x80.16b
+ cmhs v_mask3.16b, v15.16b, v_0x80.16b
+ and v_mask0.16b, v_mask0.16b, v_gf8poly.16b
+ and v_mask1.16b, v_mask1.16b, v_gf8poly.16b
+ and v_mask2.16b, v_mask2.16b, v_gf8poly.16b
+ and v_mask3.16b, v_mask3.16b, v_gf8poly.16b
+ shl v12.16b, v12.16b, #1
+ shl v13.16b, v13.16b, #1
+ shl v14.16b, v14.16b, #1
+ shl v15.16b, v15.16b, #1
+ eor v12.16b, v12.16b, v_mask0.16b
+ eor v13.16b, v13.16b, v_mask1.16b
+ eor v14.16b, v14.16b, v_mask2.16b
+ eor v15.16b, v15.16b, v_mask3.16b
+ eor v12.16b, v12.16b, v20.16b
+ eor v13.16b, v13.16b, v21.16b
+ eor v14.16b, v14.16b, v22.16b
+ eor v15.16b, v15.16b, v23.16b
+
+ bne .Lloop128_vects
+
+.Lloop128_vects_end:
+ /* v16~v23: true p, q */
+ ldr q16, [x_dst_p, #16*0]
+ ldr q17, [x_dst_p, #16*1]
+ ldr q18, [x_dst_p, #16*2]
+ ldr q19, [x_dst_p, #16*3]
+ ldr q20, [x_dst_p, #16*4]
+ ldr q21, [x_dst_p, #16*5]
+ ldr q22, [x_dst_p, #16*6]
+ ldr q23, [x_dst_p, #16*7]
+
+ cmeq v0.16b, v0.16b, v16.16b
+ cmeq v1.16b, v1.16b, v17.16b
+ cmeq v2.16b, v2.16b, v18.16b
+ cmeq v3.16b, v3.16b, v19.16b
+ cmeq v4.16b, v4.16b, v20.16b
+ cmeq v5.16b, v5.16b, v21.16b
+ cmeq v6.16b, v6.16b, v22.16b
+ cmeq v7.16b, v7.16b, v23.16b
+
+ ldr q16, [x_dst_q, #16*0]
+ ldr q17, [x_dst_q, #16*1]
+ ldr q18, [x_dst_q, #16*2]
+ ldr q19, [x_dst_q, #16*3]
+ ldr q20, [x_dst_q, #16*4]
+ ldr q21, [x_dst_q, #16*5]
+ ldr q22, [x_dst_q, #16*6]
+ ldr q23, [x_dst_q, #16*7]
+
+ and v0.16b, v0.16b, v1.16b
+ and v2.16b, v2.16b, v3.16b
+ and v4.16b, v4.16b, v5.16b
+ and v6.16b, v6.16b, v7.16b
+ and v0.16b, v0.16b, v2.16b
+ and v4.16b, v4.16b, v6.16b
+ and v0.16b, v0.16b, v4.16b
+
+ cmeq v8.16b, v8.16b, v16.16b
+ cmeq v9.16b, v9.16b, v17.16b
+ cmeq v10.16b, v10.16b, v18.16b
+ cmeq v11.16b, v11.16b, v19.16b
+ cmeq v12.16b, v12.16b, v20.16b
+ cmeq v13.16b, v13.16b, v21.16b
+ cmeq v14.16b, v14.16b, v22.16b
+ cmeq v15.16b, v15.16b, v23.16b
+
+ and v8.16b, v8.16b, v9.16b
+ and v10.16b, v10.16b, v11.16b
+ and v12.16b, v12.16b, v13.16b
+ and v14.16b, v14.16b, v15.16b
+ and v8.16b, v8.16b, v10.16b
+ and v12.16b, v12.16b, v14.16b
+ and v8.16b, v8.16b, v12.16b
+
+ and v0.16b, v0.16b, v8.16b
+
+ uminv b0, v0.16b
+ umov w_min, v0.b[0]
+ cbz w_min, .Lloop128_end
+
+ add x_dst_p, x_dst_p, #128
+ add x_dst_q, x_dst_q, #128
+ cmp x_dst_q, x_dst_q_end
+ add w_col, w_col, #128
+ bls .Lloop128
+
+.Lloop128_end:
+ /* restore d8 ~ d15 */
+ ldp d8, d9, [sp]
+ ldp d10, d11, [sp, #16]
+ ldp d12, d13, [sp, #32]
+ ldp d14, d15, [sp, #48]
+ add sp, sp, #64
+
+ cbz w_min, .Lerror
+
+ add x_dst_q_end, x_dst_q_end, #128
+
+.Lloop16_init:
+ tst w_len, #0x7F
+ beq .Lloop16_end
+ sub x_dst_q_end, x_dst_q_end, #16
+
+ /* batch process (vects-2)*16 bytes */
+ /* v0: p; v1: q; v2: in; v3: mask */
+.Lloop16:
+ ldr q0, [x_src_last], #16
+ mov v1.16b, v0.16b
+
+ cbz w_vects, .Lloop16_vects_end
+
+ sub x_src_ptr, x_src, #8
+.Lloop16_vects:
+ ldr x_srcn, [x_src_ptr], #-8
+ ldr q2, [x_srcn, x_col]
+ cmp x_src_ptr, x_src_ptr_end
+
+ eor v0.16b, v0.16b, v2.16b
+
+ cmhs v3.16b, v1.16b, v_0x80.16b
+ and v3.16b, v3.16b, v_gf8poly.16b
+
+ shl v1.16b, v1.16b, #1
+ eor v1.16b, v1.16b, v2.16b
+ eor v1.16b, v1.16b, v3.16b
+
+ bne .Lloop16_vects
+
+.Lloop16_vects_end:
+ /* v4: true p; v5: true q */
+ ldr q4, [x_dst_p], #16
+ ldr q5, [x_dst_q], #16
+ cmp x_dst_q, x_dst_q_end
+
+ cmeq v0.16b, v0.16b, v4.16b
+ cmeq v1.16b, v1.16b, v5.16b
+ and v0.16b, v0.16b, v1.16b
+
+ uminv b0, v0.16b
+ umov w_min, v0.b[0]
+ cbz w_min, .Lerror
+
+ add w_col, w_col, #16
+ bls .Lloop16
+
+.Lloop16_end:
+ mov w_ret, #0
+ ret
+
+.Lerror:
+ mov w_ret, #1
+ ret
diff --git a/src/isa-l/raid/aarch64/pq_gen_neon.S b/src/isa-l/raid/aarch64/pq_gen_neon.S
new file mode 100644
index 000000000..f60ad1211
--- /dev/null
+++ b/src/isa-l/raid/aarch64/pq_gen_neon.S
@@ -0,0 +1,282 @@
+########################################################################
+# Copyright(c) 2019 Arm Corporation All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Arm Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#########################################################################
+
+.text
+
+.global pq_gen_neon
+.type pq_gen_neon, %function
+
+/* int pq_gen_neon(int vects, int len, void **src) */
+
+/* arguments */
+w_vects .req w0 /* MUST >= 3 */
+x_vects .req x0
+w_len .req w1 /* MUST be 16x bytes */
+x_len .req x1
+x_src .req x2
+
+/* returns */
+w_ret .req w0
+
+/* local variables */
+x_dst_p .req x3
+x_dst_q .req x4
+x_dst_q_end .req x5
+w_col .req w6
+x_col .req x6
+x_src_ptr .req x7
+x_src_ptr_end .req x9
+x_src_last .req x10
+x_srcn .req x11
+/* vectors */
+/* v0 ~ v7 : temporary p */
+/* v8 ~ v15: temporary q */
+/* v16 ~ v23: next 128 bytes */
+v_mask0 .req v24
+v_mask1 .req v25
+v_mask2 .req v26
+v_mask3 .req v27
+v_gf8poly .req v28
+v_0x80 .req v29
+
+/*
+ * src_ptr_end -->
+ * -------+----------+
+ * . | src[0] |
+ * . +----------+ +------------------+
+ * src_ptr --> | src[1] | - srcn -> | buffer |
+ * . +----------+ +------------------+
+ * . | ...... |
+ * . +----------+
+ * . | src[v-4] |
+ * -------+----------+ src_last +------------------+
+ * src --> | src[v-3] | ---------> | buffer |
+ * +----------+ +------------------+
+ * | src[v-2] | - dst_p -> | buffer |
+ * +----------+ +------------------+
+ * | src[v-1] | - dst_q -> | buffer | dst_q_end
+ * +----------+ +------------------+
+ */
+
+pq_gen_neon:
+ sub x_src_ptr_end, x_src, #8
+
+ sub w_vects, w_vects, #3
+ add x_src, x_src, x_vects, lsl #3
+
+ ldr x_src_last, [x_src]
+ ldp x_dst_p, x_dst_q, [x_src, #8]
+
+ add x_dst_q_end, x_dst_q, x_len
+
+ mov w_col, #0
+ movi v_gf8poly.16b, #0x1D
+ movi v_0x80.16b, #0x80
+
+.Lloop128_init:
+ /* less than 128 byts? */
+ cmp w_len, #128
+ blo .Lloop16_init
+
+ /* save d8 ~ d15 to stack */
+ sub sp, sp, #64
+ stp d8, d9, [sp]
+ stp d10, d11, [sp, #16]
+ stp d12, d13, [sp, #32]
+ stp d14, d15, [sp, #48]
+
+ sub x_dst_q_end, x_dst_q_end, #128
+
+ /* batch process (vects-2)*128 bytes */
+ /* v0~v7: p; v8~v15: q; v16~v23: in */
+.Lloop128:
+ ldr q0, [x_src_last, #16*0]
+ ldr q1, [x_src_last, #16*1]
+ ldr q2, [x_src_last, #16*2]
+ ldr q3, [x_src_last, #16*3]
+ ldr q4, [x_src_last, #16*4]
+ ldr q5, [x_src_last, #16*5]
+ ldr q6, [x_src_last, #16*6]
+ ldr q7, [x_src_last, #16*7]
+ add x_src_last, x_src_last, #128
+
+ mov v8.16b, v0.16b
+ mov v9.16b, v1.16b
+ mov v10.16b, v2.16b
+ mov v11.16b, v3.16b
+ mov v12.16b, v4.16b
+ mov v13.16b, v5.16b
+ mov v14.16b, v6.16b
+ mov v15.16b, v7.16b
+
+ cbz w_vects, .Lloop128_vects_end
+
+ sub x_src_ptr, x_src, #8
+.Lloop128_vects:
+ ldr x_srcn, [x_src_ptr], #-8
+ add x_srcn, x_srcn, x_col
+ cmp x_src_ptr, x_src_ptr_end
+
+ ldr q16, [x_srcn, #16*0]
+ ldr q17, [x_srcn, #16*1]
+ ldr q18, [x_srcn, #16*2]
+ ldr q19, [x_srcn, #16*3]
+ ldr q20, [x_srcn, #16*4]
+ ldr q21, [x_srcn, #16*5]
+ ldr q22, [x_srcn, #16*6]
+ ldr q23, [x_srcn, #16*7]
+
+ eor v0.16b, v0.16b, v16.16b
+ eor v1.16b, v1.16b, v17.16b
+ eor v2.16b, v2.16b, v18.16b
+ eor v3.16b, v3.16b, v19.16b
+ eor v4.16b, v4.16b, v20.16b
+ eor v5.16b, v5.16b, v21.16b
+ eor v6.16b, v6.16b, v22.16b
+ eor v7.16b, v7.16b, v23.16b
+
+ cmhs v_mask0.16b, v8.16b, v_0x80.16b
+ cmhs v_mask1.16b, v9.16b, v_0x80.16b
+ cmhs v_mask2.16b, v10.16b, v_0x80.16b
+ cmhs v_mask3.16b, v11.16b, v_0x80.16b
+ and v_mask0.16b, v_mask0.16b, v_gf8poly.16b
+ and v_mask1.16b, v_mask1.16b, v_gf8poly.16b
+ and v_mask2.16b, v_mask2.16b, v_gf8poly.16b
+ and v_mask3.16b, v_mask3.16b, v_gf8poly.16b
+ shl v8.16b, v8.16b, #1
+ shl v9.16b, v9.16b, #1
+ shl v10.16b, v10.16b, #1
+ shl v11.16b, v11.16b, #1
+ eor v8.16b, v8.16b, v_mask0.16b
+ eor v9.16b, v9.16b, v_mask1.16b
+ eor v10.16b, v10.16b, v_mask2.16b
+ eor v11.16b, v11.16b, v_mask3.16b
+ eor v8.16b, v8.16b, v16.16b
+ eor v9.16b, v9.16b, v17.16b
+ eor v10.16b, v10.16b, v18.16b
+ eor v11.16b, v11.16b, v19.16b
+
+ cmhs v_mask0.16b, v12.16b, v_0x80.16b
+ cmhs v_mask1.16b, v13.16b, v_0x80.16b
+ cmhs v_mask2.16b, v14.16b, v_0x80.16b
+ cmhs v_mask3.16b, v15.16b, v_0x80.16b
+ and v_mask0.16b, v_mask0.16b, v_gf8poly.16b
+ and v_mask1.16b, v_mask1.16b, v_gf8poly.16b
+ and v_mask2.16b, v_mask2.16b, v_gf8poly.16b
+ and v_mask3.16b, v_mask3.16b, v_gf8poly.16b
+ shl v12.16b, v12.16b, #1
+ shl v13.16b, v13.16b, #1
+ shl v14.16b, v14.16b, #1
+ shl v15.16b, v15.16b, #1
+ eor v12.16b, v12.16b, v_mask0.16b
+ eor v13.16b, v13.16b, v_mask1.16b
+ eor v14.16b, v14.16b, v_mask2.16b
+ eor v15.16b, v15.16b, v_mask3.16b
+ eor v12.16b, v12.16b, v20.16b
+ eor v13.16b, v13.16b, v21.16b
+ eor v14.16b, v14.16b, v22.16b
+ eor v15.16b, v15.16b, v23.16b
+
+ bne .Lloop128_vects
+
+.Lloop128_vects_end:
+ str q0, [x_dst_p, #16*0]
+ str q1, [x_dst_p, #16*1]
+ str q2, [x_dst_p, #16*2]
+ str q3, [x_dst_p, #16*3]
+ str q4, [x_dst_p, #16*4]
+ str q5, [x_dst_p, #16*5]
+ str q6, [x_dst_p, #16*6]
+ str q7, [x_dst_p, #16*7]
+
+ str q8, [x_dst_q, #16*0]
+ str q9, [x_dst_q, #16*1]
+ str q10, [x_dst_q, #16*2]
+ str q11, [x_dst_q, #16*3]
+ str q12, [x_dst_q, #16*4]
+ str q13, [x_dst_q, #16*5]
+ str q14, [x_dst_q, #16*6]
+ str q15, [x_dst_q, #16*7]
+
+ add x_dst_p, x_dst_p, #128
+ add x_dst_q, x_dst_q, #128
+ cmp x_dst_q, x_dst_q_end
+ add w_col, w_col, #128
+ bls .Lloop128
+
+.Lloop128_end:
+ /* restore d8 ~ d15 */
+ ldp d8, d9, [sp]
+ ldp d10, d11, [sp, #16]
+ ldp d12, d13, [sp, #32]
+ ldp d14, d15, [sp, #48]
+ add sp, sp, #64
+
+ add x_dst_q_end, x_dst_q_end, #128
+
+.Lloop16_init:
+ tst w_len, #0x7F
+ beq .Lloop16_end
+ sub x_dst_q_end, x_dst_q_end, #16
+
+ /* batch process (vects-2)*16 bytes */
+ /* v0: p; v1: q; v2: in; v3: mask */
+.Lloop16:
+ ldr q0, [x_src_last], #16
+ mov v1.16b, v0.16b
+
+ cbz w_vects, .Lloop16_vects_end
+
+ sub x_src_ptr, x_src, #8
+.Lloop16_vects:
+ ldr x_srcn, [x_src_ptr], #-8
+ ldr q2, [x_srcn, x_col]
+ cmp x_src_ptr, x_src_ptr_end
+
+ eor v0.16b, v0.16b, v2.16b
+
+ cmhs v3.16b, v1.16b, v_0x80.16b
+ and v3.16b, v3.16b, v_gf8poly.16b
+
+ shl v1.16b, v1.16b, #1
+ eor v1.16b, v1.16b, v2.16b
+ eor v1.16b, v1.16b, v3.16b
+
+ bne .Lloop16_vects
+
+.Lloop16_vects_end:
+ str q0, [x_dst_p], #16
+ str q1, [x_dst_q], #16
+ cmp x_dst_q, x_dst_q_end
+ add w_col, w_col, #16
+ bls .Lloop16
+
+.Lloop16_end:
+ mov w_ret, #0
+ ret
diff --git a/src/isa-l/raid/aarch64/raid_aarch64_dispatcher.c b/src/isa-l/raid/aarch64/raid_aarch64_dispatcher.c
new file mode 100644
index 000000000..c81bd8c98
--- /dev/null
+++ b/src/isa-l/raid/aarch64/raid_aarch64_dispatcher.c
@@ -0,0 +1,61 @@
+/**********************************************************************
+ Copyright(c) 2019 Arm Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Arm Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+#include <aarch64_multibinary.h>
+
+DEFINE_INTERFACE_DISPATCHER(xor_gen)
+{
+ if (getauxval(AT_HWCAP) & HWCAP_ASIMD)
+ return PROVIDER_INFO(xor_gen_neon);
+ return PROVIDER_BASIC(xor_gen);
+
+}
+
+DEFINE_INTERFACE_DISPATCHER(xor_check)
+{
+ if (getauxval(AT_HWCAP) & HWCAP_ASIMD)
+ return PROVIDER_INFO(xor_check_neon);
+ return PROVIDER_BASIC(xor_check);
+
+}
+
+DEFINE_INTERFACE_DISPATCHER(pq_gen)
+{
+ if (getauxval(AT_HWCAP) & HWCAP_ASIMD)
+ return PROVIDER_INFO(pq_gen_neon);
+ return PROVIDER_BASIC(pq_gen);
+
+}
+
+DEFINE_INTERFACE_DISPATCHER(pq_check)
+{
+ if (getauxval(AT_HWCAP) & HWCAP_ASIMD)
+ return PROVIDER_INFO(pq_check_neon);
+ return PROVIDER_BASIC(pq_check);
+
+}
diff --git a/src/isa-l/raid/aarch64/raid_multibinary_arm.S b/src/isa-l/raid/aarch64/raid_multibinary_arm.S
new file mode 100644
index 000000000..0316239ec
--- /dev/null
+++ b/src/isa-l/raid/aarch64/raid_multibinary_arm.S
@@ -0,0 +1,36 @@
+########################################################################
+# Copyright(c) 2019 Arm Corporation All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Arm Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#########################################################################
+
+#include "aarch64_multibinary.h"
+
+
+mbin_interface xor_gen
+mbin_interface xor_check
+mbin_interface pq_gen
+mbin_interface pq_check
diff --git a/src/isa-l/raid/aarch64/xor_check_neon.S b/src/isa-l/raid/aarch64/xor_check_neon.S
new file mode 100644
index 000000000..95cb7d1d1
--- /dev/null
+++ b/src/isa-l/raid/aarch64/xor_check_neon.S
@@ -0,0 +1,271 @@
+########################################################################
+# Copyright(c) 2019 Arm Corporation All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Arm Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#########################################################################
+
+.text
+
+.global xor_check_neon
+.type xor_check_neon, %function
+
+/* int xor_check_neon(int vects, int len, void **src) */
+
+/* arguments */
+w_vects .req w0 /* MUST >= 2 */
+x_vects .req x0
+w_len .req w1
+x_len .req x1
+x_src .req x2
+
+/* returns */
+w_ret .req w0
+
+/* local variables */
+w_in .req w1 /* share w_len */
+x_src0 .req x3
+x_src0_end .req x4
+w_len256 .req w5 /* share w_len16 */
+x_len256 .req x5
+w_len16 .req w5
+x_len16 .req x5
+w_col .req w6
+x_col .req x6
+x_src_ptr .req x7
+x_srcn .req x9
+x_src_ptr_end .req x10
+w_xor .req w11
+/* v0 ~ v15: temporary results */
+/* v16 ~ v31: next 256 bytes */
+
+/*
+ * +----------+ +------------------+
+ * src --> | src[0] | - src0 -> | buffer | src0_end
+ * --------+----------+ +------------------+
+ * . | ...... |
+ * . +----------+ +------------------+
+ * src_ptr ~~> | src[n] | - srcn ~> | buffer |
+ * . +----------+ +------------------+
+ * . | ...... |
+ * . +----------+
+ * . | src[v-1] |
+ * --------+----------+
+ * src_ptr_end -->
+ */
+
+xor_check_neon:
+ add x_src_ptr_end, x_src, x_vects, lsl #3
+ ldr x_src0, [x_src]
+ add x_src0_end, x_src0, x_len
+
+ sub w_vects, w_vects, #1
+ mov w_col, #0
+ mov w_xor, #0
+
+.Lloop256_init:
+ /* len256 = len - len%256; len %= 256 */
+ mov w_len256, w_len
+ and w_len, w_len, #0xFF
+ sub w_len256, w_len256, w_len
+
+ /* less than 256 byts? */
+ cbz w_len256, .Lloop16_init
+
+ /* save d8 ~ d15 to stack */
+ sub sp, sp, #64
+ stp d8, d9, [sp]
+ stp d10, d11, [sp, #16]
+ stp d12, d13, [sp, #32]
+ stp d14, d15, [sp, #48]
+
+ sub x_src0_end, x_src0_end, #256
+
+ /* batch process vects*256 bytes */
+.Lloop256:
+ ldr q0, [x_src0, #16*0]
+ ldr q1, [x_src0, #16*1]
+ ldr q2, [x_src0, #16*2]
+ ldr q3, [x_src0, #16*3]
+ ldr q4, [x_src0, #16*4]
+ ldr q5, [x_src0, #16*5]
+ ldr q6, [x_src0, #16*6]
+ ldr q7, [x_src0, #16*7]
+ ldr q8, [x_src0, #16*8]
+ ldr q9, [x_src0, #16*9]
+ ldr q10, [x_src0, #16*10]
+ ldr q11, [x_src0, #16*11]
+ ldr q12, [x_src0, #16*12]
+ ldr q13, [x_src0, #16*13]
+ ldr q14, [x_src0, #16*14]
+ ldr q15, [x_src0, #16*15]
+ add x_src0, x_src0, #256
+
+ cbz w_vects, .Lloop256_vects_end
+
+ add x_src_ptr, x_src, #8
+.Lloop256_vects:
+ ldr x_srcn, [x_src_ptr], #8
+ add x_srcn, x_srcn, x_col
+ cmp x_src_ptr, x_src_ptr_end
+
+ ldr q16, [x_srcn, #16*0]
+ ldr q17, [x_srcn, #16*1]
+ ldr q18, [x_srcn, #16*2]
+ ldr q19, [x_srcn, #16*3]
+ ldr q20, [x_srcn, #16*4]
+ ldr q21, [x_srcn, #16*5]
+ ldr q22, [x_srcn, #16*6]
+ ldr q23, [x_srcn, #16*7]
+ ldr q24, [x_srcn, #16*8]
+ ldr q25, [x_srcn, #16*9]
+ ldr q26, [x_srcn, #16*10]
+ ldr q27, [x_srcn, #16*11]
+ ldr q28, [x_srcn, #16*12]
+ ldr q29, [x_srcn, #16*13]
+ ldr q30, [x_srcn, #16*14]
+ ldr q31, [x_srcn, #16*15]
+
+ eor v0.16b, v0.16b, v16.16b
+ eor v1.16b, v1.16b, v17.16b
+ eor v2.16b, v2.16b, v18.16b
+ eor v3.16b, v3.16b, v19.16b
+ eor v4.16b, v4.16b, v20.16b
+ eor v5.16b, v5.16b, v21.16b
+ eor v6.16b, v6.16b, v22.16b
+ eor v7.16b, v7.16b, v23.16b
+ eor v8.16b, v8.16b, v24.16b
+ eor v9.16b, v9.16b, v25.16b
+ eor v10.16b, v10.16b, v26.16b
+ eor v11.16b, v11.16b, v27.16b
+ eor v12.16b, v12.16b, v28.16b
+ eor v13.16b, v13.16b, v29.16b
+ eor v14.16b, v14.16b, v30.16b
+ eor v15.16b, v15.16b, v31.16b
+
+ bne .Lloop256_vects
+
+.Lloop256_vects_end:
+ orr v0.16b, v0.16b, v1.16b
+ orr v2.16b, v2.16b, v3.16b
+ orr v4.16b, v4.16b, v5.16b
+ orr v6.16b, v6.16b, v7.16b
+ orr v8.16b, v8.16b, v9.16b
+ orr v10.16b, v10.16b, v11.16b
+ orr v12.16b, v12.16b, v13.16b
+ orr v14.16b, v14.16b, v15.16b
+ orr v0.16b, v0.16b, v2.16b
+ orr v4.16b, v4.16b, v6.16b
+ orr v8.16b, v8.16b, v10.16b
+ orr v12.16b, v12.16b, v14.16b
+ orr v0.16b, v0.16b, v4.16b
+ orr v8.16b, v8.16b, v12.16b
+ orr v0.16b, v0.16b, v8.16b
+ umaxv b0, v0.16b
+ umov w_xor, v0.b[0]
+ cbnz w_xor, .Lloop256_end
+
+ cmp x_src0, x_src0_end
+ add w_col, w_col, #256
+ bls .Lloop256
+
+.Lloop256_end:
+ /* restore d8 ~ d15 */
+ ldp d8, d9, [sp]
+ ldp d10, d11, [sp, #16]
+ ldp d12, d13, [sp, #32]
+ ldp d14, d15, [sp, #48]
+ add sp, sp, #64
+
+ cbnz w_xor, .Lerror
+
+ add x_src0_end, x_src0_end, #256
+
+.Lloop16_init:
+ /* len16 = len - len%16; len %= 16 */
+ mov w_len16, w_len
+ and w_len, w_len, #0xF
+ sub w_len16, w_len16, w_len
+
+ /* less than 16 bytes? */
+ cbz w_len16, .Lloop1_init
+
+ sub x_src0_end, x_src0_end, #16
+
+ /* batch process vects*16 bytes */
+.Lloop16:
+ ldr q0, [x_src0], #16
+ cbz w_vects, .Lloop16_vects_end
+
+ add x_src_ptr, x_src, #8
+.Lloop16_vects:
+ ldr x_srcn, [x_src_ptr], #8
+ cmp x_src_ptr, x_src_ptr_end
+ ldr q1, [x_srcn, x_col]
+ eor v0.16b, v0.16b, v1.16b
+ bne .Lloop16_vects
+
+.Lloop16_vects_end:
+ umaxv b0, v0.16b
+ umov w_xor, v0.b[0]
+ cbnz w_xor, .Lerror
+ cmp x_src0, x_src0_end
+ add w_col, w_col, #16
+ bls .Lloop16
+
+.Lloop16_end:
+ add x_src0_end, x_src0_end, #16
+
+.Lloop1_init:
+ cbnz w_len, .Lloop1
+ mov w_ret, #0
+ ret
+
+ /* batch process vects*1 bytes */
+.Lloop1:
+ ldrb w_xor, [x_src0], #1
+ cbz w_vects, .Lloop1_vects_end
+
+ add x_src_ptr, x_src, #8
+.Lloop1_vects:
+ ldr x_srcn, [x_src_ptr], #8
+ cmp x_src_ptr, x_src_ptr_end
+ ldrb w_in, [x_srcn, x_col]
+ eor w_xor, w_xor, w_in
+ bne .Lloop1_vects
+
+.Lloop1_vects_end:
+ cbnz w_xor, .Lerror
+ cmp x_src0, x_src0_end
+ add w_col, w_col, #1
+ bne .Lloop1
+
+.Lloop1_end:
+ mov w_ret, #0
+ ret
+
+.Lerror:
+ mov w_ret, #1
+ ret
diff --git a/src/isa-l/raid/aarch64/xor_gen_neon.S b/src/isa-l/raid/aarch64/xor_gen_neon.S
new file mode 100644
index 000000000..00f65a2ef
--- /dev/null
+++ b/src/isa-l/raid/aarch64/xor_gen_neon.S
@@ -0,0 +1,264 @@
+########################################################################
+# Copyright(c) 2019 Arm Corporation All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Arm Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#########################################################################
+
+.text
+
+.global xor_gen_neon
+.type xor_gen_neon, %function
+
+/* int xor_gen_neon(int vects, int len, void **src) */
+
+/* arguments */
+w_vects .req w0 /* MUST >= 2 */
+x_vects .req x0
+w_len .req w1
+x_len .req x1
+x_src .req x2
+
+/* returns */
+w_ret .req w0
+
+/* local variables */
+w_in .req w1 /* share w_len */
+x_src0 .req x3
+x_src0_end .req x4
+w_len256 .req w5 /* share w_len16, w_xor */
+x_len256 .req x5
+w_len16 .req w5
+x_len16 .req x5
+w_xor .req w5
+w_col .req w6
+x_col .req x6
+x_src_ptr .req x7
+x_srcn .req x9
+x_dst .req x10
+x_dst_ptr .req x11
+/* v0 ~ v15: temporary results */
+/* v16 ~ v31: next 256 bytes */
+
+/*
+ * +----------+ +------------------+
+ * src --> | src[0] | - src0 -> | buffer | src0_end
+ * --------+----------+ +------------------+
+ * . | ...... |
+ * . +----------+ +------------------+
+ * src_ptr ~~> | src[n] | - srcn ~> | buffer |
+ * . +----------+ +------------------+
+ * . | ...... |
+ * . +----------+
+ * . | src[v-2] |
+ * --------+----------+ +------------------+
+ * dst_ptr --> | src[v-1] | -- dst --> | buffer |
+ * +----------+ +------------------+
+ */
+
+xor_gen_neon:
+ add x_dst_ptr, x_src, x_vects, lsl #3
+ ldr x_dst, [x_dst_ptr, #-8]!
+ ldr x_src0, [x_src]
+ add x_src0_end, x_src0, x_len
+
+ sub w_vects, w_vects, #2
+ mov w_col, #0
+
+.Loop256_init:
+ /* len256 = len - len%256; len %= 256 */
+ mov w_len256, w_len
+ and w_len, w_len, #0xFF
+ sub w_len256, w_len256, w_len
+
+ /* less than 256 byts? */
+ cbz w_len256, .Lloop16_init
+
+ /* save d8 ~ d15 to stack */
+ sub sp, sp, #64
+ stp d8, d9, [sp]
+ stp d10, d11, [sp, #16]
+ stp d12, d13, [sp, #32]
+ stp d14, d15, [sp, #48]
+
+ sub x_src0_end, x_src0_end, #256
+
+ /* batch process (vects-1)*256 bytes */
+.Lloop256:
+ ldr q0, [x_src0, #16*0]
+ ldr q1, [x_src0, #16*1]
+ ldr q2, [x_src0, #16*2]
+ ldr q3, [x_src0, #16*3]
+ ldr q4, [x_src0, #16*4]
+ ldr q5, [x_src0, #16*5]
+ ldr q6, [x_src0, #16*6]
+ ldr q7, [x_src0, #16*7]
+ ldr q8, [x_src0, #16*8]
+ ldr q9, [x_src0, #16*9]
+ ldr q10, [x_src0, #16*10]
+ ldr q11, [x_src0, #16*11]
+ ldr q12, [x_src0, #16*12]
+ ldr q13, [x_src0, #16*13]
+ ldr q14, [x_src0, #16*14]
+ ldr q15, [x_src0, #16*15]
+ add x_src0, x_src0, #256
+
+ cbz w_vects, .Lloop256_vects_end
+
+ add x_src_ptr, x_src, #8
+.Lloop256_vects:
+ ldr x_srcn, [x_src_ptr], #8
+ add x_srcn, x_srcn, x_col
+ cmp x_src_ptr, x_dst_ptr
+
+ ldr q16, [x_srcn, #16*0]
+ ldr q17, [x_srcn, #16*1]
+ ldr q18, [x_srcn, #16*2]
+ ldr q19, [x_srcn, #16*3]
+ ldr q20, [x_srcn, #16*4]
+ ldr q21, [x_srcn, #16*5]
+ ldr q22, [x_srcn, #16*6]
+ ldr q23, [x_srcn, #16*7]
+ ldr q24, [x_srcn, #16*8]
+ ldr q25, [x_srcn, #16*9]
+ ldr q26, [x_srcn, #16*10]
+ ldr q27, [x_srcn, #16*11]
+ ldr q28, [x_srcn, #16*12]
+ ldr q29, [x_srcn, #16*13]
+ ldr q30, [x_srcn, #16*14]
+ ldr q31, [x_srcn, #16*15]
+
+ eor v0.16b, v0.16b, v16.16b
+ eor v1.16b, v1.16b, v17.16b
+ eor v2.16b, v2.16b, v18.16b
+ eor v3.16b, v3.16b, v19.16b
+ eor v4.16b, v4.16b, v20.16b
+ eor v5.16b, v5.16b, v21.16b
+ eor v6.16b, v6.16b, v22.16b
+ eor v7.16b, v7.16b, v23.16b
+ eor v8.16b, v8.16b, v24.16b
+ eor v9.16b, v9.16b, v25.16b
+ eor v10.16b, v10.16b, v26.16b
+ eor v11.16b, v11.16b, v27.16b
+ eor v12.16b, v12.16b, v28.16b
+ eor v13.16b, v13.16b, v29.16b
+ eor v14.16b, v14.16b, v30.16b
+ eor v15.16b, v15.16b, v31.16b
+
+ bne .Lloop256_vects
+
+.Lloop256_vects_end:
+ str q0, [x_dst, #16*0]
+ str q1, [x_dst, #16*1]
+ str q2, [x_dst, #16*2]
+ str q3, [x_dst, #16*3]
+ str q4, [x_dst, #16*4]
+ str q5, [x_dst, #16*5]
+ str q6, [x_dst, #16*6]
+ str q7, [x_dst, #16*7]
+ str q8, [x_dst, #16*8]
+ str q9, [x_dst, #16*9]
+ str q10, [x_dst, #16*10]
+ str q11, [x_dst, #16*11]
+ str q12, [x_dst, #16*12]
+ str q13, [x_dst, #16*13]
+ str q14, [x_dst, #16*14]
+ str q15, [x_dst, #16*15]
+
+ cmp x_src0, x_src0_end
+ add x_dst, x_dst, #256
+ add w_col, w_col, #256
+ bls .Lloop256
+
+.Lloop256_end:
+ /* restore d8 ~ d15 */
+ ldp d8, d9, [sp]
+ ldp d10, d11, [sp, #16]
+ ldp d12, d13, [sp, #32]
+ ldp d14, d15, [sp, #48]
+ add sp, sp, #64
+
+ add x_src0_end, x_src0_end, #256
+
+.Lloop16_init:
+ /* len16 = len - len%16; len %= 16 */
+ mov w_len16, w_len
+ and w_len, w_len, #0xF
+ sub w_len16, w_len16, w_len
+
+ /* less than 16 bytes? */
+ cbz w_len16, .Lloop1_init
+
+ sub x_src0_end, x_src0_end, #16
+
+ /* batch process (vects-1)*16 bytes */
+.Lloop16:
+ ldr q0, [x_src0], #16
+ cbz w_vects, .Lloop16_vects_end
+
+ add x_src_ptr, x_src, #8
+.Lloop16_vects:
+ ldr x_srcn, [x_src_ptr], #8
+ cmp x_src_ptr, x_dst_ptr
+ ldr q1, [x_srcn, x_col]
+ eor v0.16b, v0.16b, v1.16b
+ bne .Lloop16_vects
+
+.Lloop16_vects_end:
+ cmp x_src0, x_src0_end
+ str q0, [x_dst], #16
+ add w_col, w_col, #16
+ bls .Lloop16
+
+.Loop16_end:
+ add x_src0_end, x_src0_end, #16
+
+.Lloop1_init:
+ cbnz w_len, .Lloop1
+ mov w_ret, #0
+ ret
+
+ /* batch process (vects-1)*1 bytes */
+.Lloop1:
+ ldrb w_xor, [x_src0], #1
+ cbz w_vects, .Lloop1_vects_end
+
+ add x_src_ptr, x_src, #8
+.Lloop1_vects:
+ ldr x_srcn, [x_src_ptr], #8
+ cmp x_src_ptr, x_dst_ptr
+ ldrb w_in, [x_srcn, x_col]
+ eor w_xor, w_xor, w_in
+ bne .Lloop1_vects
+
+.Lloop1_vects_end:
+ cmp x_src0, x_src0_end
+ strb w_xor, [x_dst], #1
+ add w_col, w_col, #1
+ bne .Lloop1
+
+.Loop1_end:
+ mov w_ret, #0
+ ret
diff --git a/src/isa-l/raid/pq_check_sse.asm b/src/isa-l/raid/pq_check_sse.asm
new file mode 100644
index 000000000..f2bc8a6cd
--- /dev/null
+++ b/src/isa-l/raid/pq_check_sse.asm
@@ -0,0 +1,277 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright(c) 2011-2015 Intel Corporation All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in
+; the documentation and/or other materials provided with the
+; distribution.
+; * Neither the name of Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived
+; from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;;; Optimized pq of N source vectors using SSE3
+;;; int pq_check_sse(int vects, int len, void **array)
+
+;;; Generates P+Q parity vector from N (vects-2) sources in array of pointers
+;;; (**array). Last two pointers are the P and Q destinations respectively.
+;;; Vectors must be aligned to 16 bytes. Length must be 16 byte aligned.
+
+%include "reg_sizes.asm"
+
+%ifidn __OUTPUT_FORMAT__, elf64
+ %define arg0 rdi
+ %define arg1 rsi
+ %define arg2 rdx
+ %define arg3 rcx
+ %define arg4 r8
+ %define arg5 r9
+ %define tmp r11
+ %define tmp3 arg4
+ %define return rax
+ %define func(x) x: endbranch
+ %define FUNC_SAVE
+ %define FUNC_RESTORE
+%endif
+
+%ifidn __OUTPUT_FORMAT__, win64
+ %define arg0 rcx
+ %define arg1 rdx
+ %define arg2 r8
+ %define arg3 r9
+ %define tmp r11
+ %define tmp3 r10
+ %define return rax
+ %define stack_size 7*16 + 8 ; must be an odd multiple of 8
+ %define func(x) proc_frame x
+ %macro FUNC_SAVE 0
+ alloc_stack stack_size
+ save_xmm128 xmm6, 0*16
+ save_xmm128 xmm7, 1*16
+ save_xmm128 xmm8, 2*16
+ save_xmm128 xmm9, 3*16
+ save_xmm128 xmm10, 4*16
+ save_xmm128 xmm11, 5*16
+ save_xmm128 xmm15, 6*16
+ end_prolog
+ %endmacro
+
+ %macro FUNC_RESTORE 0
+ movdqa xmm6, [rsp + 0*16]
+ movdqa xmm7, [rsp + 1*16]
+ movdqa xmm8, [rsp + 2*16]
+ movdqa xmm9, [rsp + 3*16]
+ movdqa xmm10, [rsp + 4*16]
+ movdqa xmm11, [rsp + 5*16]
+ movdqa xmm15, [rsp + 9*16]
+ add rsp, stack_size
+ %endmacro
+%endif
+
+%define vec arg0
+%define len arg1
+%define ptr arg3
+%define pos return
+
+%define xp1 xmm0
+%define xq1 xmm1
+%define xtmp1 xmm2
+%define xs1 xmm3
+
+%define xp2 xmm4
+%define xq2 xmm5
+%define xtmp2 xmm6
+%define xs2 xmm7
+
+%define xp3 xmm8
+%define xq3 xmm9
+%define xtmp3 xmm10
+%define xs3 xmm11
+
+%define xpoly xmm15
+
+;;; Use Non-temporal load/stor
+%ifdef NO_NT_LDST
+ %define XLDR movdqa
+ %define XSTR movdqa
+%else
+ %define XLDR movdqa
+ %define XSTR movntdq
+%endif
+
+default rel
+
+[bits 64]
+section .text
+
+align 16
+mk_global pq_check_sse, function
+func(pq_check_sse)
+ FUNC_SAVE
+ sub vec, 3 ;Keep as offset to last source
+ jng return_fail ;Must have at least 2 sources
+ cmp len, 0
+ je return_pass
+ test len, (16-1) ;Check alignment of length
+ jnz return_fail
+ mov pos, 0
+ movdqa xpoly, [poly]
+ cmp len, 48
+ jl loop16
+
+len_aligned_32bytes:
+ sub len, 48 ;Do end of vec first and run backward
+
+loop48:
+ mov ptr, [arg2+8+vec*8] ;Get address of P parity vector
+ mov tmp, [arg2+(2*8)+vec*8] ;Get address of Q parity vector
+ XLDR xp1, [ptr+pos] ;Initialize xp1 with P1 src
+ XLDR xp2, [ptr+pos+16] ;Initialize xp2 with P2 src + 16B ahead
+ XLDR xp3, [ptr+pos+32] ;Initialize xp3 with P2 src + 32B ahead
+ pxor xq1, xq1 ;q1 = 0
+ pxor xq2, xq2 ;q2 = 0
+ pxor xq3, xq3 ;q3 = 0
+
+ mov ptr, [arg2+vec*8] ;Fetch last source pointer
+ mov tmp, vec ;Set tmp to point back to last vector
+ XLDR xs1, [ptr+pos] ;Preload last vector (source)
+ XLDR xs2, [ptr+pos+16] ;Preload last vector (source)
+ XLDR xs3, [ptr+pos+32] ;Preload last vector (source)
+
+next_vect:
+ sub tmp, 1 ;Inner loop for each source vector
+ mov ptr, [arg2+tmp*8] ; get pointer to next vect
+ pxor xp1, xs1 ; p1 ^= s1
+ pxor xp2, xs2 ; p2 ^= s2
+ pxor xp3, xs3 ; p3 ^= s2
+ pxor xq1, xs1 ; q1 ^= s1
+ pxor xq2, xs2 ; q2 ^= s2
+ pxor xq3, xs3 ; q3 ^= s3
+ pxor xtmp1, xtmp1 ; xtmp1 = 0 - for compare to 0
+ pxor xtmp2, xtmp2 ; xtmp2 = 0
+ pxor xtmp3, xtmp3 ; xtmp3 = 0
+ pcmpgtb xtmp1, xq1 ; xtmp1 = mask 0xff or 0x00 if bit7 set
+ pcmpgtb xtmp2, xq2 ; xtmp2 = mask 0xff or 0x00 if bit7 set
+ pcmpgtb xtmp3, xq3 ; xtmp3 = mask 0xff or 0x00 if bit7 set
+ pand xtmp1, xpoly ; xtmp1 = poly or 0x00
+ pand xtmp2, xpoly ; xtmp2 = poly or 0x00
+ pand xtmp3, xpoly ; xtmp3 = poly or 0x00
+ XLDR xs1, [ptr+pos] ; Get next vector (source data1)
+ XLDR xs2, [ptr+pos+16] ; Get next vector (source data2)
+ XLDR xs3, [ptr+pos+32] ; Get next vector (source data3)
+ paddb xq1, xq1 ; q1 = q1<<1
+ paddb xq2, xq2 ; q2 = q2<<1
+ paddb xq3, xq3 ; q3 = q3<<1
+ pxor xq1, xtmp1 ; q1 = q1<<1 ^ poly_masked
+ pxor xq2, xtmp2 ; q2 = q2<<1 ^ poly_masked
+ pxor xq3, xtmp3 ; q3 = q3<<1 ^ poly_masked
+ jg next_vect ; Loop for each vect except 0
+
+ pxor xp1, xs1 ;p1 ^= s1[0] - last source is already loaded
+ pxor xq1, xs1 ;q1 ^= 1 * s1[0]
+ pxor xp2, xs2 ;p2 ^= s2[0]
+ pxor xq2, xs2 ;q2 ^= 1 * s2[0]
+ pxor xp3, xs3 ;p3 ^= s3[0]
+ pxor xq3, xs3 ;q3 ^= 1 * s3[0]
+
+ mov tmp, [arg2+(2*8)+vec*8] ;Get address of Q parity vector
+ XLDR xtmp1, [tmp+pos] ;re-init xq1 with Q1 src
+ XLDR xtmp2, [tmp+pos+16] ;re-init xq2 with Q2 src + 16B ahead
+ XLDR xtmp3, [tmp+pos+32] ;re-init xq3 with Q2 src + 32B ahead
+
+ pxor xq1, xtmp1 ;xq1 = q1 calculated ^ q1 saved
+ pxor xq2, xtmp2
+ pxor xq3, xtmp3
+
+ por xp1, xq1 ;Confirm that all P&Q parity are 0
+ por xp1, xp2
+ por xp1, xq2
+ por xp1, xp3
+ por xp1, xq3
+ ptest xp1, xp1
+ jnz return_fail
+ add pos, 48
+ cmp pos, len
+ jle loop48
+
+
+ ;; ------------------------------
+ ;; Do last 16 or 32 Bytes remaining
+ add len, 48
+ cmp pos, len
+ je return_pass
+
+loop16:
+ mov ptr, [arg2+8+vec*8] ;Get address of P parity vector
+ mov tmp, [arg2+(2*8)+vec*8] ;Get address of Q parity vector
+ XLDR xp1, [ptr+pos] ;Initialize xp1 with P1 src
+ pxor xq1, xq1 ;q = 0
+ mov ptr, [arg2+vec*8] ;Fetch last source pointer
+ mov tmp, vec ;Set tmp to point back to last vector
+ XLDR xs1, [ptr+pos] ;Preload last vector (source)
+
+next_vect16:
+ sub tmp, 1 ;Inner loop for each source vector
+ mov ptr, [arg2+tmp*8] ; get pointer to next vect
+ pxor xq1, xs1 ; q ^= s
+ pxor xtmp1, xtmp1 ; xtmp = 0
+ pcmpgtb xtmp1, xq1 ; xtmp = mask 0xff or 0x00 if bit7 set
+ pand xtmp1, xpoly ; xtmp = poly or 0x00
+ pxor xp1, xs1 ; p ^= s
+ paddb xq1, xq1 ; q = q<<1
+ pxor xq1, xtmp1 ; q = q<<1 ^ poly_masked
+ XLDR xs1, [ptr+pos] ; Get next vector (source data)
+ jg next_vect16 ; Loop for each vect except 0
+
+ pxor xp1, xs1 ;p ^= s[0] - last source is already loaded
+ pxor xq1, xs1 ;q ^= 1 * s[0]
+
+ mov tmp, [arg2+(2*8)+vec*8] ;Get address of Q parity vector
+ XLDR xtmp1, [tmp+pos] ;re-init tmp with Q1 src
+ pxor xq1, xtmp1 ;xq1 = q1 calculated ^ q1 saved
+
+ por xp1, xq1 ;Confirm that all P&Q parity are = 0
+ ptest xp1, xp1
+ jnz return_fail
+ add pos, 16
+ cmp pos, len
+ jl loop16
+
+
+return_pass:
+ mov return, 0
+ FUNC_RESTORE
+ ret
+
+return_fail:
+ mov return, 1
+ FUNC_RESTORE
+ ret
+
+endproc_frame
+
+section .data
+
+align 16
+poly:
+dq 0x1d1d1d1d1d1d1d1d, 0x1d1d1d1d1d1d1d1d
+
+;;; func core, ver, snum
+slversion pq_check_sse, 00, 06, 0033
diff --git a/src/isa-l/raid/pq_check_sse_i32.asm b/src/isa-l/raid/pq_check_sse_i32.asm
new file mode 100644
index 000000000..3271c035a
--- /dev/null
+++ b/src/isa-l/raid/pq_check_sse_i32.asm
@@ -0,0 +1,282 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright(c) 2011-2015 Intel Corporation All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in
+; the documentation and/or other materials provided with the
+; distribution.
+; * Neither the name of Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived
+; from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;;; Optimized pq of N source vectors using SSE3
+;;; int pq_gen_sse(int vects, int len, void **array)
+
+;;; Generates P+Q parity vector from N (vects-2) sources in array of pointers
+;;; (**array). Last two pointers are the P and Q destinations respectively.
+;;; Vectors must be aligned to 16 bytes. Length must be 16 byte aligned.
+
+%include "reg_sizes.asm"
+
+%ifidn __OUTPUT_FORMAT__, elf64
+ %define arg0 rdi
+ %define arg1 rsi
+ %define arg2 rdx
+ %define arg3 rcx
+ %define arg4 r8
+ %define arg5 r9
+ %define tmp r11
+ %define return rax
+ %define PS 8
+ %define func(x) x: endbranch
+ %define FUNC_SAVE
+ %define FUNC_RESTORE
+
+%elifidn __OUTPUT_FORMAT__, win64
+ %define arg0 rcx
+ %define arg1 rdx
+ %define arg2 r8
+ %define arg3 r9
+ %define return rax
+ %define PS 8
+ %define tmp r11
+ %define stack_size 2*16 + 8 ; must be an odd multiple of 8
+ %define func(x) proc_frame x
+
+ %macro FUNC_SAVE 0
+ alloc_stack stack_size
+ save_xmm128 xmm6, 0*16
+ save_xmm128 xmm7, 1*16
+ end_prolog
+ %endmacro
+ %macro FUNC_RESTORE 0
+ movdqa xmm6, [rsp + 0*16]
+ movdqa xmm7, [rsp + 1*16]
+ add rsp, stack_size
+ %endmacro
+
+
+%elifidn __OUTPUT_FORMAT__, elf32
+ %define arg0 edx
+ %define arg1 ecx
+ %define return eax
+ %define PS 4
+ %define func(x) x: endbranch
+ %define arg(x) [ebp+8+PS*x]
+ %define arg2 edi ; must sav/restore
+ %define arg3 esi
+ %define tmp ebx
+
+ %macro FUNC_SAVE 0
+ push ebp
+ mov ebp, esp
+ push esi
+ push edi
+ push ebx
+ mov arg0, arg(0)
+ mov arg1, arg(1)
+ mov arg2, arg(2)
+ %endmacro
+
+ %macro FUNC_RESTORE 0
+ pop ebx
+ pop edi
+ pop esi
+ mov esp, ebp ;if has frame pointer?
+ pop ebp
+ %endmacro
+
+%endif ; output formats
+
+%define vec arg0
+%define len arg1
+%define ptr arg3
+%define pos return
+
+%define xp1 xmm0
+%define xq1 xmm1
+%define xtmp1 xmm2
+%define xs1 xmm3
+
+%define xp2 xmm4
+%define xq2 xmm5
+%define xtmp2 xmm6
+%define xs2 xmm7
+
+%ifidn PS,8 ; 64-bit code
+ default rel
+ [bits 64]
+ %define xpoly xmm15
+%elifidn PS,4 ; 32-bit code
+ %define xpoly [poly]
+%endif
+
+;;; Use Non-temporal load/stor
+%ifdef NO_NT_LDST
+ %define XLDR movdqa
+ %define XSTR movdqa
+%else
+ %define XLDR movntdqa
+ %define XSTR movntdq
+%endif
+
+section .text
+
+align 16
+mk_global pq_check_sse, function
+func(pq_check_sse)
+ FUNC_SAVE
+ sub vec, 3 ;Keep as offset to last source
+ jng return_fail ;Must have at least 2 sources
+ cmp len, 0
+ je return_pass
+ test len, (16-1) ;Check alignment of length
+ jnz return_fail
+ mov pos, 0
+%ifidn PS,8
+ movdqa xpoly, [poly] ;For 64-bit, load poly into high xmm reg
+%endif
+ cmp len, 32
+ jl loop16
+
+len_aligned_32bytes:
+ sub len, 32 ;Do end of vec first and run backward
+
+loop32:
+ mov ptr, [arg2+PS+vec*PS] ;Get address of P parity vector
+ mov tmp, [arg2+(2*PS)+vec*PS] ;Get address of Q parity vector
+ XLDR xp1, [ptr+pos] ;Initialize xp1 with P1 src
+ XLDR xp2, [ptr+pos+16] ;Initialize xp2 with P2 src + 16B ahead
+ pxor xq1, xq1 ;q1 = 0
+ pxor xq2, xq2 ;q2 = 0
+
+ mov ptr, [arg2+vec*PS] ;Fetch last source pointer
+ mov tmp, vec ;Set tmp to point back to last vector
+ XLDR xs1, [ptr+pos] ;Preload last vector (source)
+ XLDR xs2, [ptr+pos+16] ;Preload last vector (source)
+
+next_vect:
+ sub tmp, 1 ;Inner loop for each source vector
+ mov ptr, [arg2+tmp*PS] ; get pointer to next vect
+ pxor xp1, xs1 ; p1 ^= s1
+ pxor xp2, xs2 ; p2 ^= s2
+ pxor xq1, xs1 ; q1 ^= s1
+ pxor xq2, xs2 ; q2 ^= s2
+ pxor xtmp1, xtmp1 ; xtmp1 = 0 - for compare to 0
+ pxor xtmp2, xtmp2 ; xtmp2 = 0
+ pcmpgtb xtmp1, xq1 ; xtmp1 = mask 0xff or 0x00 if bit7 set
+ pcmpgtb xtmp2, xq2 ; xtmp2 = mask 0xff or 0x00 if bit7 set
+ pand xtmp1, xpoly ; xtmp1 = poly or 0x00
+ pand xtmp2, xpoly ; xtmp2 = poly or 0x00
+ XLDR xs1, [ptr+pos] ; Get next vector (source data1)
+ XLDR xs2, [ptr+pos+16] ; Get next vector (source data2)
+ paddb xq1, xq1 ; q1 = q1<<1
+ paddb xq2, xq2 ; q2 = q2<<1
+ pxor xq1, xtmp1 ; q1 = q1<<1 ^ poly_masked
+ pxor xq2, xtmp2 ; q2 = q2<<1 ^ poly_masked
+ jg next_vect ; Loop for each vect except 0
+
+ pxor xp1, xs1 ;p1 ^= s1[0] - last source is already loaded
+ pxor xq1, xs1 ;q1 ^= 1 * s1[0]
+ pxor xp2, xs2 ;p2 ^= s2[0]
+ pxor xq2, xs2 ;q2 ^= 1 * s2[0]
+
+ mov tmp, [arg2+(2*PS)+vec*PS] ;Get address of Q parity vector
+ XLDR xtmp1, [tmp+pos] ;re-init xq1 with Q1 src
+ XLDR xtmp2, [tmp+pos+16] ;re-init xq2 with Q2 src + 16B ahead
+
+ pxor xq1, xtmp1 ;xq1 = q1 calculated ^ q1 saved
+ pxor xq2, xtmp2
+
+ por xp1, xq1 ;Confirm that all P&Q parity are 0
+ por xp1, xp2
+ por xp1, xq2
+ ptest xp1, xp1
+ jnz return_fail
+ add pos, 32
+ cmp pos, len
+ jle loop32
+
+
+ ;; ------------------------------
+ ;; Do last 16 Bytes remaining
+ add len, 32
+ cmp pos, len
+ je return_pass
+
+loop16:
+ mov ptr, [arg2+PS+vec*PS] ;Get address of P parity vector
+ mov tmp, [arg2+(2*PS)+vec*PS] ;Get address of Q parity vector
+ XLDR xp1, [ptr+pos] ;Initialize xp1 with P1 src
+ pxor xq1, xq1 ;q = 0
+ mov ptr, [arg2+vec*PS] ;Fetch last source pointer
+ mov tmp, vec ;Set tmp to point back to last vector
+ XLDR xs1, [ptr+pos] ;Preload last vector (source)
+
+next_vect16:
+ sub tmp, 1 ;Inner loop for each source vector
+ mov ptr, [arg2+tmp*PS] ; get pointer to next vect
+ pxor xq1, xs1 ; q ^= s
+ pxor xtmp1, xtmp1 ; xtmp = 0
+ pcmpgtb xtmp1, xq1 ; xtmp = mask 0xff or 0x00 if bit7 set
+ pand xtmp1, xpoly ; xtmp = poly or 0x00
+ pxor xp1, xs1 ; p ^= s
+ paddb xq1, xq1 ; q = q<<1
+ pxor xq1, xtmp1 ; q = q<<1 ^ poly_masked
+ XLDR xs1, [ptr+pos] ; Get next vector (source data)
+ jg next_vect16 ; Loop for each vect except 0
+
+ pxor xp1, xs1 ;p ^= s[0] - last source is already loaded
+ pxor xq1, xs1 ;q ^= 1 * s[0]
+
+ mov tmp, [arg2+(2*PS)+vec*PS] ;Get address of Q parity vector
+ XLDR xtmp1, [tmp+pos] ;re-init tmp with Q1 src
+ pxor xq1, xtmp1 ;xq1 = q1 calculated ^ q1 saved
+
+ por xp1, xq1 ;Confirm that all P&Q parity are = 0
+ ptest xp1, xp1
+ jnz return_fail
+ add pos, 16
+ cmp pos, len
+ jl loop16
+
+
+return_pass:
+ mov return, 0
+ FUNC_RESTORE
+ ret
+
+
+return_fail:
+ mov return, 1
+ FUNC_RESTORE
+ ret
+
+endproc_frame
+
+section .data
+
+align 16
+poly:
+dq 0x1d1d1d1d1d1d1d1d, 0x1d1d1d1d1d1d1d1d
+
+;;; func core, ver, snum
+slversion pq_check_sse, 00, 06, 0033
diff --git a/src/isa-l/raid/pq_check_test.c b/src/isa-l/raid/pq_check_test.c
new file mode 100644
index 000000000..27d0203d2
--- /dev/null
+++ b/src/isa-l/raid/pq_check_test.c
@@ -0,0 +1,304 @@
+/**********************************************************************
+ Copyright(c) 2011-2015 Intel Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+#include<stdio.h>
+#include<stdint.h>
+#include<string.h>
+#include<stdlib.h>
+#include "raid.h"
+#include "types.h"
+
+#define TEST_SOURCES 16
+#define TEST_LEN 1024
+#define TEST_MEM ((TEST_SOURCES + 2)*(TEST_LEN))
+#ifndef TEST_SEED
+# define TEST_SEED 0x1234
+#endif
+
+int ref_multi_pq(int vects, int len, void **array)
+{
+ int i, j;
+ unsigned char p, q, s;
+ unsigned char **src = (unsigned char **)array;
+
+ for (i = 0; i < len; i++) {
+ q = p = src[vects - 3][i];
+
+ for (j = vects - 4; j >= 0; j--) {
+ p ^= s = src[j][i];
+ q = s ^ ((q << 1) ^ ((q & 0x80) ? 0x1d : 0)); // mult by GF{2}
+ }
+
+ src[vects - 2][i] = p; // second to last pointer is p
+ src[vects - 1][i] = q; // last pointer is q
+ }
+ return 0;
+}
+
+// Generates pseudo-random data
+
+void rand_buffer(unsigned char *buf, long buffer_size)
+{
+ long i;
+ for (i = 0; i < buffer_size; i++)
+ buf[i] = rand();
+}
+
+int main(int argc, char *argv[])
+{
+ int i, j, k, ret, fail = 0;
+ void *buffs[TEST_SOURCES + 2];
+ char c;
+ char *tmp_buf[TEST_SOURCES + 2];
+ int serr, lerr;
+
+ printf("Test pq_check_test %d sources X %d bytes\n", TEST_SOURCES, TEST_LEN);
+
+ srand(TEST_SEED);
+
+ // Allocate the arrays
+ for (i = 0; i < TEST_SOURCES + 2; i++) {
+ void *buf;
+ if (posix_memalign(&buf, 16, TEST_LEN)) {
+ printf("alloc error: Fail");
+ return 1;
+ }
+ buffs[i] = buf;
+ }
+
+ // Test of all zeros
+ for (i = 0; i < TEST_SOURCES + 2; i++)
+ memset(buffs[i], 0, TEST_LEN);
+
+ ref_multi_pq(TEST_SOURCES + 2, TEST_LEN, buffs);
+ ret = pq_check(TEST_SOURCES + 2, TEST_LEN, buffs);
+ if (ret != 0) {
+ fail++;
+ printf("\nfail zero test %d\n", ret);
+ }
+
+ ((char *)(buffs[0]))[TEST_LEN - 2] = 0x7; // corrupt buffer
+ ret = pq_check(TEST_SOURCES + 2, TEST_LEN, buffs);
+ if (ret == 0) {
+ fail++;
+ printf("\nfail corrupt buffer test %d\n", ret);
+ }
+ ((char *)(buffs[0]))[TEST_LEN - 2] = 0; // un-corrupt buffer
+
+ // Test corrupted buffer any location on all sources
+ for (j = 0; j < TEST_SOURCES + 2; j++) {
+ for (i = TEST_LEN - 1; i >= 0; i--) {
+ ((char *)buffs[j])[i] = 0x5; // corrupt buffer
+ ret = pq_check(TEST_SOURCES + 2, TEST_LEN, buffs);
+ if (ret == 0) {
+ fail++;
+ printf("\nfail corrupt zero buffer test j=%d, i=%d\n", j, i);
+ return 1;
+ }
+ ((char *)buffs[j])[i] = 0; // un-corrupt buffer
+ }
+ putchar('.');
+ }
+
+ // Test rand1
+ for (i = 0; i < TEST_SOURCES + 2; i++)
+ rand_buffer(buffs[i], TEST_LEN);
+
+ ref_multi_pq(TEST_SOURCES + 2, TEST_LEN, buffs);
+ ret = pq_check(TEST_SOURCES + 2, TEST_LEN, buffs);
+ if (ret != 0) {
+ fail++;
+ printf("fail first rand test %d\n", ret);
+ }
+
+ c = ((char *)(buffs[0]))[TEST_LEN - 2];
+ ((char *)(buffs[0]))[TEST_LEN - 2] = c ^ 0x1;
+ ret = pq_check(TEST_SOURCES + 2, TEST_LEN, buffs);
+ if (ret == 0) {
+ fail++;
+ printf("\nFail corrupt buffer test, passed when should have failed\n");
+ }
+ ((char *)(buffs[0]))[TEST_LEN - 2] = c; // un-corrupt buffer
+
+ // Test corrupted buffer any location on all sources w/ random data
+ for (j = 0; j < TEST_SOURCES + 2; j++) {
+ for (i = TEST_LEN - 1; i >= 0; i--) {
+ // Check it still passes
+ ret = pq_check(TEST_SOURCES + 2, TEST_LEN, buffs);
+ if (ret != 0) { // should pass
+ fail++;
+ printf
+ ("\nFail rand test with un-corrupted buffer j=%d, i=%d\n",
+ j, i);
+ return 1;
+ }
+ c = ((char *)buffs[j])[i];
+ ((char *)buffs[j])[i] = c ^ 1; // corrupt buffer
+ ret = pq_check(TEST_SOURCES + 2, TEST_LEN, buffs);
+ if (ret == 0) { // Check it now fails
+ fail++;
+ printf("\nfail corrupt buffer test j=%d, i=%d\n", j, i);
+ return 1;
+ }
+ ((char *)buffs[j])[i] = c; // un-corrupt buffer
+ }
+ putchar('.');
+ }
+
+ // Test various number of sources, full length
+ for (j = 4; j <= TEST_SOURCES + 2; j++) {
+ // New random data
+ for (i = 0; i < j; i++)
+ rand_buffer(buffs[i], TEST_LEN);
+
+ // Generate p,q parity for this number of sources
+ ref_multi_pq(j, TEST_LEN, buffs);
+
+ // Set errors up in each source and len position
+ for (i = 0; i < j; i++) {
+ for (k = 0; k < TEST_LEN; k++) {
+ // See if it still passes
+ ret = pq_check(j, TEST_LEN, buffs);
+ if (ret != 0) { // Should pass
+ printf("\nfail rand fixed len test %d sources\n", j);
+ fail++;
+ return 1;
+ }
+
+ c = ((char *)buffs[i])[k];
+ ((char *)buffs[i])[k] = c ^ 1; // corrupt buffer
+
+ ret = pq_check(j, TEST_LEN, buffs);
+ if (ret == 0) { // Should fail
+ printf
+ ("\nfail rand fixed len test corrupted buffer %d sources\n",
+ j);
+ fail++;
+ return 1;
+ }
+ ((char *)buffs[i])[k] = c; // un-corrupt buffer
+ }
+ }
+ putchar('.');
+ }
+
+ fflush(0);
+
+ // Test various number of sources and len
+ k = 16;
+ while (k <= TEST_LEN) {
+ char *tmp;
+ for (j = 4; j <= TEST_SOURCES + 2; j++) {
+ for (i = 0; i < j; i++)
+ rand_buffer(buffs[i], k);
+
+ // Generate p,q parity for this number of sources
+ ref_multi_pq(j, k, buffs);
+
+ // Inject errors at various source and len positions
+ for (lerr = 0; lerr < k; lerr++) {
+ for (serr = 0; serr < j; serr++) {
+ // See if it still passes
+ ret = pq_check(j, k, buffs);
+ if (ret != 0) { // Should pass
+ printf
+ ("\nfail rand var src, len test %d sources, len=%d\n",
+ j, k);
+ fail++;
+ return 1;
+ }
+
+ tmp = (char *)buffs[serr];
+ c = tmp[lerr];
+ ((char *)buffs[serr])[lerr] = c ^ 1; // corrupt buffer
+
+ ret = pq_check(j, k, buffs);
+ if (ret == 0) { // Should fail
+ printf
+ ("\nfail rand var src, len test corrupted buffer "
+ "%d sources, len=%d, ret=%d\n", j, k,
+ ret);
+ fail++;
+ return 1;
+ }
+ ((char *)buffs[serr])[lerr] = c; // un-corrupt buffer
+ }
+ }
+ putchar('.');
+ fflush(0);
+ }
+ k += 16;
+ }
+
+ // Test at the end of buffer
+ for (i = 0; i < TEST_LEN; i += 16) {
+ for (j = 0; j < TEST_SOURCES + 2; j++) {
+ rand_buffer(buffs[j], TEST_LEN - i);
+ tmp_buf[j] = (char *)buffs[j] + i;
+ }
+
+ pq_gen_base(TEST_SOURCES + 2, TEST_LEN - i, (void *)tmp_buf);
+
+ // Test good data
+ ret = pq_check(TEST_SOURCES + 2, TEST_LEN - i, (void *)tmp_buf);
+ if (ret != 0) {
+ printf("fail end test - offset: %d, len: %d\n", i, TEST_LEN - i);
+ fail++;
+ return 1;
+ }
+ // Test bad data
+ for (serr = 0; serr < TEST_SOURCES + 2; serr++) {
+ for (lerr = 0; lerr < (TEST_LEN - i); lerr++) {
+ c = tmp_buf[serr][lerr];
+ tmp_buf[serr][lerr] = c ^ 1;
+
+ ret =
+ pq_check(TEST_SOURCES + 2, TEST_LEN - i, (void *)tmp_buf);
+ if (ret == 0) {
+ printf("fail end test corrupted buffer - "
+ "offset: %d, len: %d, ret: %d\n", i,
+ TEST_LEN - i, ret);
+ fail++;
+ return 1;
+ }
+
+ tmp_buf[serr][lerr] = c;
+ }
+ }
+
+ putchar('.');
+ fflush(0);
+ }
+
+ if (fail == 0)
+ printf("Pass\n");
+
+ return fail;
+
+}
diff --git a/src/isa-l/raid/pq_gen_avx.asm b/src/isa-l/raid/pq_gen_avx.asm
new file mode 100644
index 000000000..db4bcfb1c
--- /dev/null
+++ b/src/isa-l/raid/pq_gen_avx.asm
@@ -0,0 +1,254 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright(c) 2011-2015 Intel Corporation All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in
+; the documentation and/or other materials provided with the
+; distribution.
+; * Neither the name of Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived
+; from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;;; Optimized pq of N source vectors using AVX
+;;; int pq_gen_avx(int vects, int len, void **array)
+
+;;; Generates P+Q parity vector from N (vects-2) sources in array of pointers
+;;; (**array). Last two pointers are the P and Q destinations respectively.
+;;; Vectors must be aligned to 16 bytes. Length must be 16 byte aligned.
+
+%include "reg_sizes.asm"
+
+%ifidn __OUTPUT_FORMAT__, elf64
+ %define arg0 rdi
+ %define arg1 rsi
+ %define arg2 rdx
+ %define arg3 rcx
+ %define arg4 r8
+ %define arg5 r9
+ %define tmp r11
+ %define tmp3 arg4
+ %define return rax
+ %define func(x) x: endbranch
+ %define FUNC_SAVE
+ %define FUNC_RESTORE
+%endif
+
+%ifidn __OUTPUT_FORMAT__, win64
+ %define arg0 rcx
+ %define arg1 rdx
+ %define arg2 r8
+ %define arg3 r9
+ %define tmp r11
+ %define tmp3 r10
+ %define return rax
+ %define stack_size 8*16 + 8 ; must be an odd multiple of 8
+ %define func(x) proc_frame x
+ %macro FUNC_SAVE 0
+ alloc_stack stack_size
+ vmovdqa [rsp + 0*16], xmm6
+ vmovdqa [rsp + 1*16], xmm7
+ vmovdqa [rsp + 2*16], xmm8
+ vmovdqa [rsp + 3*16], xmm9
+ vmovdqa [rsp + 4*16], xmm10
+ vmovdqa [rsp + 5*16], xmm11
+ vmovdqa [rsp + 6*16], xmm14
+ vmovdqa [rsp + 7*16], xmm15
+ end_prolog
+ %endmacro
+
+ %macro FUNC_RESTORE 0
+ vmovdqa xmm6, [rsp + 0*16]
+ vmovdqa xmm7, [rsp + 1*16]
+ vmovdqa xmm8, [rsp + 2*16]
+ vmovdqa xmm9, [rsp + 3*16]
+ vmovdqa xmm10, [rsp + 4*16]
+ vmovdqa xmm11, [rsp + 5*16]
+ vmovdqa xmm14, [rsp + 6*16]
+ vmovdqa xmm15, [rsp + 7*16]
+ add rsp, stack_size
+ %endmacro
+%endif
+
+%define vec arg0
+%define len arg1
+%define ptr arg3
+%define pos rax
+
+%define xp1 xmm0
+%define xq1 xmm1
+%define xtmp1 xmm2
+%define xs1 xmm3
+
+%define xp2 xmm4
+%define xq2 xmm5
+%define xtmp2 xmm6
+%define xs2 xmm7
+
+%define xp3 xmm8
+%define xq3 xmm9
+%define xtmp3 xmm10
+%define xs3 xmm11
+
+%define xzero xmm14
+%define xpoly xmm15
+
+;;; Use Non-temporal load/stor
+%ifdef NO_NT_LDST
+ %define XLDR vmovdqa
+ %define XSTR vmovdqa
+%else
+ %define XLDR vmovntdqa
+ %define XSTR vmovntdq
+%endif
+
+default rel
+
+[bits 64]
+section .text
+
+align 16
+mk_global pq_gen_avx, function
+func(pq_gen_avx)
+ FUNC_SAVE
+ sub vec, 3 ;Keep as offset to last source
+ jng return_fail ;Must have at least 2 sources
+ cmp len, 0
+ je return_pass
+ test len, (16-1) ;Check alignment of length
+ jnz return_fail
+ mov pos, 0
+ vmovdqa xpoly, [poly]
+ vpxor xzero, xzero, xzero
+ cmp len, 48
+ jl loop16
+
+len_aligned_32bytes:
+ sub len, 48 ;Len points to last block
+
+loop48:
+ mov ptr, [arg2+vec*8] ;Fetch last source pointer
+ mov tmp, vec ;Set tmp to point back to last vector
+ XLDR xs1, [ptr+pos] ;Preload last vector (source)
+ XLDR xs2, [ptr+pos+16] ;Preload last vector (source)
+ XLDR xs3, [ptr+pos+32] ;Preload last vector (source)
+ vpxor xp1, xp1, xp1 ;p1 = 0
+ vpxor xp2, xp2, xp2 ;p2 = 0
+ vpxor xp3, xp3, xp3 ;p3 = 0
+ vpxor xq1, xq1, xq1 ;q1 = 0
+ vpxor xq2, xq2, xq2 ;q2 = 0
+ vpxor xq3, xq3, xq3 ;q3 = 0
+
+next_vect:
+ sub tmp, 1 ;Inner loop for each source vector
+ mov ptr, [arg2+tmp*8] ; get pointer to next vect
+ vpxor xq1, xq1, xs1 ; q1 ^= s1
+ vpxor xq2, xq2, xs2 ; q2 ^= s2
+ vpxor xq3, xq3, xs3 ; q3 ^= s3
+ vpxor xp1, xp1, xs1 ; p1 ^= s1
+ vpxor xp2, xp2, xs2 ; p2 ^= s2
+ vpxor xp3, xp3, xs3 ; p3 ^= s2
+ vpblendvb xtmp1, xzero, xpoly, xq1 ; xtmp1 = poly or 0x00
+ vpblendvb xtmp2, xzero, xpoly, xq2 ; xtmp2 = poly or 0x00
+ vpblendvb xtmp3, xzero, xpoly, xq3 ; xtmp3 = poly or 0x00
+ XLDR xs1, [ptr+pos] ; Get next vector (source data1)
+ XLDR xs2, [ptr+pos+16] ; Get next vector (source data2)
+ XLDR xs3, [ptr+pos+32] ; Get next vector (source data3)
+ vpaddb xq1, xq1, xq1 ; q1 = q1<<1
+ vpaddb xq2, xq2, xq2 ; q2 = q2<<1
+ vpaddb xq3, xq3, xq3 ; q3 = q3<<1
+ vpxor xq1, xq1, xtmp1 ; q1 = q1<<1 ^ poly_masked
+ vpxor xq2, xq2, xtmp2 ; q2 = q2<<1 ^ poly_masked
+ vpxor xq3, xq3, xtmp3 ; q3 = q3<<1 ^ poly_masked
+ jg next_vect ; Loop for each vect except 0
+
+ mov ptr, [arg2+8+vec*8] ;Get address of P parity vector
+ mov tmp, [arg2+(2*8)+vec*8] ;Get address of Q parity vector
+ vpxor xp1, xp1, xs1 ;p1 ^= s1[0] - last source is already loaded
+ vpxor xq1, xq1, xs1 ;q1 ^= 1 * s1[0]
+ vpxor xp2, xp2, xs2 ;p2 ^= s2[0]
+ vpxor xq2, xq2, xs2 ;q2 ^= 1 * s2[0]
+ vpxor xp3, xp3, xs3 ;p3 ^= s3[0]
+ vpxor xq3, xq3, xs3 ;q3 ^= 1 * s3[0]
+ XSTR [ptr+pos], xp1 ;Write parity P1 vector
+ XSTR [ptr+pos+16], xp2 ;Write parity P2 vector
+ XSTR [ptr+pos+32], xp3 ;Write parity P3 vector
+ XSTR [tmp+pos], xq1 ;Write parity Q1 vector
+ XSTR [tmp+pos+16], xq2 ;Write parity Q2 vector
+ XSTR [tmp+pos+32], xq3 ;Write parity Q3 vector
+ add pos, 48
+ cmp pos, len
+ jle loop48
+
+ ;; ------------------------------
+ ;; Do last 16 or 32 Bytes remaining
+ add len, 48
+ cmp pos, len
+ je return_pass
+
+loop16:
+ mov ptr, [arg2+vec*8] ;Fetch last source pointer
+ mov tmp, vec ;Set tmp to point back to last vector
+ XLDR xs1, [ptr+pos] ;Preload last vector (source)
+ vpxor xp1, xp1, xp1 ;p = 0
+ vpxor xq1, xq1, xq1 ;q = 0
+
+next_vect16:
+ sub tmp, 1 ;Inner loop for each source vector
+ mov ptr, [arg2+tmp*8] ; get pointer to next vect
+ vpxor xq1, xq1, xs1 ; q1 ^= s1
+ vpblendvb xtmp1, xzero, xpoly, xq1 ; xtmp1 = poly or 0x00
+ vpxor xp1, xp1, xs1 ; p ^= s
+ vpaddb xq1, xq1, xq1 ; q = q<<1
+ vpxor xq1, xq1, xtmp1 ; q = q<<1 ^ poly_masked
+ XLDR xs1, [ptr+pos] ; Get next vector (source data)
+ jg next_vect16 ; Loop for each vect except 0
+
+ mov ptr, [arg2+8+vec*8] ;Get address of P parity vector
+ mov tmp, [arg2+(2*8)+vec*8] ;Get address of Q parity vector
+ vpxor xp1, xp1, xs1 ;p ^= s[0] - last source is already loaded
+ vpxor xq1, xq1, xs1 ;q ^= 1 * s[0]
+ XSTR [ptr+pos], xp1 ;Write parity P vector
+ XSTR [tmp+pos], xq1 ;Write parity Q vector
+ add pos, 16
+ cmp pos, len
+ jl loop16
+
+
+return_pass:
+ mov return, 0
+ FUNC_RESTORE
+ ret
+
+return_fail:
+ mov return, 1
+ FUNC_RESTORE
+ ret
+
+endproc_frame
+
+section .data
+
+align 16
+poly:
+dq 0x1d1d1d1d1d1d1d1d, 0x1d1d1d1d1d1d1d1d
+
+;;; func core, ver, snum
+slversion pq_gen_avx, 02, 0a, 0039
diff --git a/src/isa-l/raid/pq_gen_avx2.asm b/src/isa-l/raid/pq_gen_avx2.asm
new file mode 100644
index 000000000..a0bf0cc40
--- /dev/null
+++ b/src/isa-l/raid/pq_gen_avx2.asm
@@ -0,0 +1,256 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright(c) 2011-2015 Intel Corporation All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in
+; the documentation and/or other materials provided with the
+; distribution.
+; * Neither the name of Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived
+; from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;;; Optimized pq of N source vectors using AVX
+;;; int pq_gen_avx(int vects, int len, void **array)
+
+;;; Generates P+Q parity vector from N (vects-2) sources in array of pointers
+;;; (**array). Last two pointers are the P and Q destinations respectively.
+;;; Vectors must be aligned to 32 bytes. Length must be 32 byte aligned.
+
+%include "reg_sizes.asm"
+
+%ifidn __OUTPUT_FORMAT__, elf64
+ %define arg0 rdi
+ %define arg1 rsi
+ %define arg2 rdx
+ %define arg3 rcx
+ %define arg4 r8
+ %define arg5 r9
+ %define tmp r11
+ %define tmp3 arg4
+ %define return rax
+ %define func(x) x: endbranch
+ %define FUNC_SAVE
+ %define FUNC_RESTORE
+%endif
+
+%ifidn __OUTPUT_FORMAT__, win64
+ %define arg0 rcx
+ %define arg1 rdx
+ %define arg2 r8
+ %define arg3 r9
+ %define tmp r11
+ %define tmp3 r10
+ %define return rax
+ %define stack_size 8*32 + 8 ; must be an odd multiple of 8
+ %define func(x) proc_frame x
+ %macro FUNC_SAVE 0
+ alloc_stack stack_size
+ ;; Until a sav_ymm256 is defined
+ vmovdqu [rsp + 0*32], ymm6
+ vmovdqu [rsp + 1*32], ymm7
+ vmovdqu [rsp + 2*32], ymm8
+ vmovdqu [rsp + 3*32], ymm9
+ vmovdqu [rsp + 4*32], ymm10
+ vmovdqu [rsp + 5*32], ymm11
+ vmovdqu [rsp + 6*32], ymm14
+ vmovdqu [rsp + 7*32], ymm15
+ end_prolog
+ %endmacro
+
+ %macro FUNC_RESTORE 0
+ vmovdqu ymm6, [rsp + 0*32]
+ vmovdqu ymm7, [rsp + 1*32]
+ vmovdqu ymm8, [rsp + 2*32]
+ vmovdqu ymm9, [rsp + 3*32]
+ vmovdqu ymm10, [rsp + 4*32]
+ vmovdqu ymm11, [rsp + 5*32]
+ vmovdqu ymm14, [rsp + 6*32]
+ vmovdqu ymm15, [rsp + 7*32]
+ add rsp, stack_size
+ %endmacro
+%endif
+
+%define vec arg0
+%define len arg1
+%define ptr arg3
+%define pos rax
+
+%define xp1 ymm0
+%define xq1 ymm1
+%define xtmp1 ymm2
+%define xs1 ymm3
+
+%define xp2 ymm4
+%define xq2 ymm5
+%define xtmp2 ymm6
+%define xs2 ymm7
+
+%define xp3 ymm8
+%define xq3 ymm9
+%define xtmp3 ymm10
+%define xs3 ymm11
+
+%define xzero ymm14
+%define xpoly ymm15
+
+;;; Use Non-temporal load/stor
+%ifdef NO_NT_LDST
+ %define XLDR vmovdqa
+ %define XSTR vmovdqa
+%else
+ %define XLDR vmovntdqa
+ %define XSTR vmovntdq
+%endif
+
+default rel
+
+[bits 64]
+section .text
+
+align 16
+mk_global pq_gen_avx2, function
+func(pq_gen_avx2)
+ FUNC_SAVE
+ sub vec, 3 ;Keep as offset to last source
+ jng return_fail ;Must have at least 2 sources
+ cmp len, 0
+ je return_pass
+ test len, (32-1) ;Check alignment of length
+ jnz return_fail
+ mov pos, 0
+ vmovdqa xpoly, [poly]
+ vpxor xzero, xzero, xzero
+ cmp len, 96
+ jl loop32
+
+len_aligned_32bytes:
+ sub len, 3*32 ;Len points to last block
+
+loop96:
+ mov ptr, [arg2+vec*8] ;Fetch last source pointer
+ mov tmp, vec ;Set tmp to point back to last vector
+ XLDR xs1, [ptr+pos] ;Preload last vector (source)
+ XLDR xs2, [ptr+pos+32] ;Preload last vector (source)
+ XLDR xs3, [ptr+pos+64] ;Preload last vector (source)
+ vpxor xp1, xp1, xp1 ;p1 = 0
+ vpxor xp2, xp2, xp2 ;p2 = 0
+ vpxor xp3, xp3, xp3 ;p3 = 0
+ vpxor xq1, xq1, xq1 ;q1 = 0
+ vpxor xq2, xq2, xq2 ;q2 = 0
+ vpxor xq3, xq3, xq3 ;q3 = 0
+
+next_vect:
+ sub tmp, 1 ;Inner loop for each source vector
+ mov ptr, [arg2+tmp*8] ; get pointer to next vect
+ vpxor xq1, xq1, xs1 ; q1 ^= s1
+ vpxor xq2, xq2, xs2 ; q2 ^= s2
+ vpxor xq3, xq3, xs3 ; q3 ^= s3
+ vpxor xp1, xp1, xs1 ; p1 ^= s1
+ vpxor xp2, xp2, xs2 ; p2 ^= s2
+ vpxor xp3, xp3, xs3 ; p3 ^= s2
+ vpblendvb xtmp1, xzero, xpoly, xq1 ; xtmp1 = poly or 0x00
+ vpblendvb xtmp2, xzero, xpoly, xq2 ; xtmp2 = poly or 0x00
+ vpblendvb xtmp3, xzero, xpoly, xq3 ; xtmp3 = poly or 0x00
+ XLDR xs1, [ptr+pos] ; Get next vector (source data1)
+ XLDR xs2, [ptr+pos+32] ; Get next vector (source data2)
+ XLDR xs3, [ptr+pos+64] ; Get next vector (source data3)
+ vpaddb xq1, xq1, xq1 ; q1 = q1<<1
+ vpaddb xq2, xq2, xq2 ; q2 = q2<<1
+ vpaddb xq3, xq3, xq3 ; q3 = q3<<1
+ vpxor xq1, xq1, xtmp1 ; q1 = q1<<1 ^ poly_masked
+ vpxor xq2, xq2, xtmp2 ; q2 = q2<<1 ^ poly_masked
+ vpxor xq3, xq3, xtmp3 ; q3 = q3<<1 ^ poly_masked
+ jg next_vect ; Loop for each vect except 0
+
+ mov ptr, [arg2+8+vec*8] ;Get address of P parity vector
+ mov tmp, [arg2+(2*8)+vec*8] ;Get address of Q parity vector
+ vpxor xp1, xp1, xs1 ;p1 ^= s1[0] - last source is already loaded
+ vpxor xq1, xq1, xs1 ;q1 ^= 1 * s1[0]
+ vpxor xp2, xp2, xs2 ;p2 ^= s2[0]
+ vpxor xq2, xq2, xs2 ;q2 ^= 1 * s2[0]
+ vpxor xp3, xp3, xs3 ;p3 ^= s3[0]
+ vpxor xq3, xq3, xs3 ;q3 ^= 1 * s3[0]
+ XSTR [ptr+pos], xp1 ;Write parity P1 vector
+ XSTR [ptr+pos+32], xp2 ;Write parity P2 vector
+ XSTR [ptr+pos+64], xp3 ;Write parity P3 vector
+ XSTR [tmp+pos], xq1 ;Write parity Q1 vector
+ XSTR [tmp+pos+32], xq2 ;Write parity Q2 vector
+ XSTR [tmp+pos+64], xq3 ;Write parity Q3 vector
+ add pos, 3*32
+ cmp pos, len
+ jle loop96
+
+ ;; ------------------------------
+ ;; Do last 16 or 32 Bytes remaining
+ add len, 3*32
+ cmp pos, len
+ je return_pass
+
+loop32:
+ mov ptr, [arg2+vec*8] ;Fetch last source pointer
+ mov tmp, vec ;Set tmp to point back to last vector
+ XLDR xs1, [ptr+pos] ;Preload last vector (source)
+ vpxor xp1, xp1, xp1 ;p = 0
+ vpxor xq1, xq1, xq1 ;q = 0
+
+next_vect32:
+ sub tmp, 1 ;Inner loop for each source vector
+ mov ptr, [arg2+tmp*8] ; get pointer to next vect
+ vpxor xq1, xq1, xs1 ; q1 ^= s1
+ vpblendvb xtmp1, xzero, xpoly, xq1 ; xtmp1 = poly or 0x00
+ vpxor xp1, xp1, xs1 ; p ^= s
+ vpaddb xq1, xq1, xq1 ; q = q<<1
+ vpxor xq1, xq1, xtmp1 ; q = q<<1 ^ poly_masked
+ XLDR xs1, [ptr+pos] ; Get next vector (source data)
+ jg next_vect32 ; Loop for each vect except 0
+
+ mov ptr, [arg2+8+vec*8] ;Get address of P parity vector
+ mov tmp, [arg2+(2*8)+vec*8] ;Get address of Q parity vector
+ vpxor xp1, xp1, xs1 ;p ^= s[0] - last source is already loaded
+ vpxor xq1, xq1, xs1 ;q ^= 1 * s[0]
+ XSTR [ptr+pos], xp1 ;Write parity P vector
+ XSTR [tmp+pos], xq1 ;Write parity Q vector
+ add pos, 32
+ cmp pos, len
+ jl loop32
+
+
+return_pass:
+ mov return, 0
+ FUNC_RESTORE
+ ret
+
+return_fail:
+ mov return, 1
+ FUNC_RESTORE
+ ret
+
+endproc_frame
+
+section .data
+
+align 32
+poly:
+dq 0x1d1d1d1d1d1d1d1d, 0x1d1d1d1d1d1d1d1d
+dq 0x1d1d1d1d1d1d1d1d, 0x1d1d1d1d1d1d1d1d
+
+;;; func core, ver, snum
+slversion pq_gen_avx2, 04, 03, 0041
diff --git a/src/isa-l/raid/pq_gen_avx512.asm b/src/isa-l/raid/pq_gen_avx512.asm
new file mode 100644
index 000000000..179ad5c28
--- /dev/null
+++ b/src/isa-l/raid/pq_gen_avx512.asm
@@ -0,0 +1,235 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright(c) 2011-2017 Intel Corporation All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in
+; the documentation and/or other materials provided with the
+; distribution.
+; * Neither the name of Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived
+; from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;;; Optimized pq of N source vectors using AVX512
+;;; int pq_gen_avx512(int vects, int len, void **array)
+
+;;; Generates P+Q parity vector from N (vects-2) sources in array of pointers
+;;; (**array). Last two pointers are the P and Q destinations respectively.
+;;; Vectors must be aligned to 64 bytes if NO_NT_LDST is not defined.
+;;; Length must be 32 byte multiple.
+
+%include "reg_sizes.asm"
+
+%ifdef HAVE_AS_KNOWS_AVX512
+
+%ifidn __OUTPUT_FORMAT__, elf64
+ %define arg0 rdi
+ %define arg1 rsi
+ %define arg2 rdx
+ %define arg3 rcx
+ %define arg4 r8
+ %define arg5 r9
+ %define tmp r11
+ %define tmp3 arg4
+ %define return rax
+ %define func(x) x: endbranch
+ %define FUNC_SAVE
+ %define FUNC_RESTORE
+%endif
+
+%ifidn __OUTPUT_FORMAT__, win64
+ %define arg0 rcx
+ %define arg1 rdx
+ %define arg2 r8
+ %define arg3 r9
+ %define tmp r11
+ %define tmp3 r10
+ %define return rax
+ %define stack_size 4*16 + 8 ; must be an odd multiple of 8
+ %define func(x) proc_frame x
+ %macro FUNC_SAVE 0
+ alloc_stack stack_size
+ vmovdqu [rsp + 0*16], xmm6
+ vmovdqu [rsp + 1*16], xmm7
+ vmovdqu [rsp + 2*16], xmm8
+ vmovdqu [rsp + 3*16], xmm9
+ end_prolog
+ %endmacro
+
+ %macro FUNC_RESTORE 0
+ vmovdqu xmm6, [rsp + 0*16]
+ vmovdqu xmm7, [rsp + 1*16]
+ vmovdqu xmm8, [rsp + 2*16]
+ vmovdqu xmm9, [rsp + 3*16]
+ add rsp, stack_size
+ %endmacro
+%endif
+
+%define vec arg0
+%define len arg1
+%define ptr arg3
+%define pos rax
+
+%define xp1 zmm0
+%define xq1 zmm1
+%define xtmp1 zmm2
+%define xs1 zmm3
+
+%define xp2 zmm4
+%define xq2 zmm5
+%define xtmp2 zmm6
+%define xs2 zmm7
+
+%define xzero zmm8
+%define xpoly zmm9
+
+%define xp1y ymm0
+%define xq1y ymm1
+%define xtmp1y ymm2
+%define xs1y ymm3
+%define xzeroy ymm8
+%define xpolyy ymm9
+
+%define NO_NT_LDST
+;;; Use Non-temporal load/stor
+%ifdef NO_NT_LDST
+ %define XLDR vmovdqu8 ;u8
+ %define XSTR vmovdqu8
+%else
+ %define XLDR vmovntdqa
+ %define XSTR vmovntdq
+%endif
+
+default rel
+
+[bits 64]
+section .text
+
+align 16
+mk_global pq_gen_avx512, function
+func(pq_gen_avx512)
+ FUNC_SAVE
+ sub vec, 3 ;Keep as offset to last source
+ jng return_fail ;Must have at least 2 sources
+ cmp len, 0
+ je return_pass
+ test len, (32-1) ;Check alignment of length
+ jnz return_fail
+ mov pos, 0
+ mov tmp, 0x1d
+ vpbroadcastb xpoly, tmp
+ vpxorq xzero, xzero, xzero
+ cmp len, 128
+ jl loop32
+
+len_aligned_32bytes:
+ sub len, 2*64 ;Len points to last block
+
+loop128:
+ mov ptr, [arg2+vec*8] ;Fetch last source pointer
+ mov tmp, vec ;Set tmp to point back to last vector
+ XLDR xs1, [ptr+pos] ;Preload last vector (source)
+ XLDR xs2, [ptr+pos+64] ;Preload last vector (source)
+ vpxorq xp1, xp1, xp1 ;p1 = 0
+ vpxorq xp2, xp2, xp2 ;p2 = 0
+ vpxorq xq1, xq1, xq1 ;q1 = 0
+ vpxorq xq2, xq2, xq2 ;q2 = 0
+
+next_vect:
+ sub tmp, 1 ;Inner loop for each source vector
+ mov ptr, [arg2+tmp*8] ; get pointer to next vect
+ vpxorq xq1, xq1, xs1 ; q1 ^= s1
+ vpxorq xq2, xq2, xs2 ; q2 ^= s2
+ vpxorq xp1, xp1, xs1 ; p1 ^= s1
+ vpxorq xp2, xp2, xs2 ; p2 ^= s2
+ vpcmpb k1, xq1, xzero, 1
+ vpcmpb k2, xq2, xzero, 1
+ vpblendmb xtmp1 {k1}, xzero, xpoly
+ vpblendmb xtmp2 {k2}, xzero, xpoly
+ XLDR xs1, [ptr+pos] ; Get next vector (source data1)
+ XLDR xs2, [ptr+pos+64] ; Get next vector (source data2)
+ vpaddb xq1, xq1, xq1 ; q1 = q1<<1
+ vpaddb xq2, xq2, xq2 ; q2 = q2<<1
+ vpxorq xq1, xq1, xtmp1 ; q1 = q1<<1 ^ poly_masked
+ vpxorq xq2, xq2, xtmp2 ; q2 = q2<<1 ^ poly_masked
+ jg next_vect ; Loop for each vect except 0
+
+ mov ptr, [arg2+8+vec*8] ;Get address of P parity vector
+ mov tmp, [arg2+(2*8)+vec*8] ;Get address of Q parity vector
+ vpxorq xp1, xp1, xs1 ;p1 ^= s1[0] - last source is already loaded
+ vpxorq xq1, xq1, xs1 ;q1 ^= 1 * s1[0]
+ vpxorq xp2, xp2, xs2 ;p2 ^= s2[0]
+ vpxorq xq2, xq2, xs2 ;q2 ^= 1 * s2[0]
+ XSTR [ptr+pos], xp1 ;Write parity P1 vector
+ XSTR [ptr+pos+64], xp2 ;Write parity P2 vector
+ XSTR [tmp+pos], xq1 ;Write parity Q1 vector
+ XSTR [tmp+pos+64], xq2 ;Write parity Q2 vector
+ add pos, 2*64
+ cmp pos, len
+ jle loop128
+
+ ;; ------------------------------
+ ;; Do last 32 or 64 Bytes remaining
+ add len, 2*64
+ cmp pos, len
+ je return_pass
+
+loop32:
+ mov ptr, [arg2+vec*8] ;Fetch last source pointer
+ mov tmp, vec ;Set tmp to point back to last vector
+ XLDR xs1y, [ptr+pos] ;Preload last vector (source)
+ vpxorq xp1y, xp1y, xp1y ;p = 0
+ vpxorq xq1y, xq1y, xq1y ;q = 0
+
+next_vect32:
+ sub tmp, 1 ;Inner loop for each source vector
+ mov ptr, [arg2+tmp*8] ; get pointer to next vect
+ vpxorq xq1y, xq1y, xs1y ; q1 ^= s1
+ vpblendvb xtmp1y, xzeroy, xpolyy, xq1y ; xtmp1 = poly or 0x00
+ vpxorq xp1y, xp1y, xs1y ; p ^= s
+ vpaddb xq1y, xq1y, xq1y ; q = q<<1
+ vpxorq xq1y, xq1y, xtmp1y ; q = q<<1 ^ poly_masked
+ XLDR xs1y, [ptr+pos] ; Get next vector (source data)
+ jg next_vect32 ; Loop for each vect except 0
+
+ mov ptr, [arg2+8+vec*8] ;Get address of P parity vector
+ mov tmp, [arg2+(2*8)+vec*8] ;Get address of Q parity vector
+ vpxorq xp1y, xp1y, xs1y ;p ^= s[0] - last source is already loaded
+ vpxorq xq1y, xq1y, xs1y ;q ^= 1 * s[0]
+ XSTR [ptr+pos], xp1y ;Write parity P vector
+ XSTR [tmp+pos], xq1y ;Write parity Q vector
+ add pos, 32
+ cmp pos, len
+ jl loop32
+
+
+return_pass:
+ mov return, 0
+ FUNC_RESTORE
+ ret
+
+return_fail:
+ mov return, 1
+ FUNC_RESTORE
+ ret
+
+endproc_frame
+
+%endif ; ifdef HAVE_AS_KNOWS_AVX512
diff --git a/src/isa-l/raid/pq_gen_perf.c b/src/isa-l/raid/pq_gen_perf.c
new file mode 100644
index 000000000..7315c82b3
--- /dev/null
+++ b/src/isa-l/raid/pq_gen_perf.c
@@ -0,0 +1,88 @@
+/**********************************************************************
+ Copyright(c) 2011-2015 Intel Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+#include<stdio.h>
+#include<stdint.h>
+#include<string.h>
+#include<stdlib.h>
+#include<sys/time.h>
+#include "raid.h"
+#include "test.h"
+
+//#define CACHED_TEST
+#ifdef CACHED_TEST
+// Cached test, loop many times over small dataset
+# define TEST_SOURCES 10
+# define TEST_LEN 8*1024
+# define TEST_TYPE_STR "_warm"
+#else
+# ifndef TEST_CUSTOM
+// Uncached test. Pull from large mem base.
+# define TEST_SOURCES 10
+# define GT_L3_CACHE 32*1024*1024 /* some number > last level cache */
+# define TEST_LEN ((GT_L3_CACHE / TEST_SOURCES) & ~(64-1))
+# define TEST_TYPE_STR "_cold"
+# else
+# define TEST_TYPE_STR "_cus"
+# endif
+#endif
+
+#define TEST_MEM ((TEST_SOURCES + 2)*(TEST_LEN))
+
+int main(int argc, char *argv[])
+{
+ int i;
+ void *buffs[TEST_SOURCES + 2];
+ struct perf start;
+
+ printf("Test pq_gen_perf %d sources X %d bytes\n", TEST_SOURCES, TEST_LEN);
+
+ // Allocate the arrays
+ for (i = 0; i < TEST_SOURCES + 2; i++) {
+ int ret;
+ void *buf;
+ ret = posix_memalign(&buf, 64, TEST_LEN);
+ if (ret) {
+ printf("alloc error: Fail");
+ return 1;
+ }
+ buffs[i] = buf;
+ }
+
+ // Setup data
+ for (i = 0; i < TEST_SOURCES + 2; i++)
+ memset(buffs[i], 0, TEST_LEN);
+
+ // Warm up
+ BENCHMARK(&start, BENCHMARK_TIME, pq_gen(TEST_SOURCES + 2, TEST_LEN, buffs));
+ printf("pq_gen" TEST_TYPE_STR ": ");
+ perf_print(start, (long long)TEST_MEM);
+
+ return 0;
+}
diff --git a/src/isa-l/raid/pq_gen_sse.asm b/src/isa-l/raid/pq_gen_sse.asm
new file mode 100644
index 000000000..b6d51481b
--- /dev/null
+++ b/src/isa-l/raid/pq_gen_sse.asm
@@ -0,0 +1,258 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright(c) 2011-2015 Intel Corporation All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in
+; the documentation and/or other materials provided with the
+; distribution.
+; * Neither the name of Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived
+; from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;;; Optimized pq of N source vectors using SSE3
+;;; int pq_gen_sse(int vects, int len, void **array)
+
+;;; Generates P+Q parity vector from N (vects-2) sources in array of pointers
+;;; (**array). Last two pointers are the P and Q destinations respectively.
+;;; Vectors must be aligned to 16 bytes. Length must be 16 byte aligned.
+
+%include "reg_sizes.asm"
+
+%ifidn __OUTPUT_FORMAT__, elf64
+ %define arg0 rdi
+ %define arg1 rsi
+ %define arg2 rdx
+ %define arg3 rcx
+ %define arg4 r8
+ %define arg5 r9
+ %define tmp r11
+ %define tmp3 arg4
+ %define return rax
+ %define func(x) x: endbranch
+ %define FUNC_SAVE
+ %define FUNC_RESTORE
+%endif
+
+%ifidn __OUTPUT_FORMAT__, win64
+ %define arg0 rcx
+ %define arg1 rdx
+ %define arg2 r8
+ %define arg3 r9
+ %define tmp r11
+ %define tmp3 r10
+ %define return rax
+ %define stack_size 7*16 + 8 ; must be an odd multiple of 8
+ %define func(x) proc_frame x
+ %macro FUNC_SAVE 0
+ alloc_stack stack_size
+ save_xmm128 xmm6, 0*16
+ save_xmm128 xmm7, 1*16
+ save_xmm128 xmm8, 2*16
+ save_xmm128 xmm9, 3*16
+ save_xmm128 xmm10, 4*16
+ save_xmm128 xmm11, 5*16
+ save_xmm128 xmm15, 6*16
+ end_prolog
+ %endmacro
+
+ %macro FUNC_RESTORE 0
+ movdqa xmm6, [rsp + 0*16]
+ movdqa xmm7, [rsp + 1*16]
+ movdqa xmm8, [rsp + 2*16]
+ movdqa xmm9, [rsp + 3*16]
+ movdqa xmm10, [rsp + 4*16]
+ movdqa xmm11, [rsp + 5*16]
+ movdqa xmm15, [rsp + 6*16]
+ add rsp, stack_size
+ %endmacro
+%endif
+
+%define vec arg0
+%define len arg1
+%define ptr arg3
+%define pos rax
+
+%define xp1 xmm0
+%define xq1 xmm1
+%define xtmp1 xmm2
+%define xs1 xmm3
+
+%define xp2 xmm4
+%define xq2 xmm5
+%define xtmp2 xmm6
+%define xs2 xmm7
+
+%define xp3 xmm8
+%define xq3 xmm9
+%define xtmp3 xmm10
+%define xs3 xmm11
+
+%define xpoly xmm15
+
+;;; Use Non-temporal load/stor
+%ifdef NO_NT_LDST
+ %define XLDR movdqa
+ %define XSTR movdqa
+%else
+ %define XLDR movntdqa
+ %define XSTR movntdq
+%endif
+
+default rel
+
+[bits 64]
+section .text
+
+align 16
+mk_global pq_gen_sse, function
+func(pq_gen_sse)
+ FUNC_SAVE
+ sub vec, 3 ;Keep as offset to last source
+ jng return_fail ;Must have at least 2 sources
+ cmp len, 0
+ je return_pass
+ test len, (16-1) ;Check alignment of length
+ jnz return_fail
+ mov pos, 0
+ movdqa xpoly, [poly]
+ cmp len, 48
+ jl loop16
+
+len_aligned_32bytes:
+ sub len, 48 ;Len points to last block
+
+loop48:
+ mov ptr, [arg2+vec*8] ;Fetch last source pointer
+ mov tmp, vec ;Set tmp to point back to last vector
+ XLDR xs1, [ptr+pos] ;Preload last vector (source)
+ XLDR xs2, [ptr+pos+16] ;Preload last vector (source)
+ XLDR xs3, [ptr+pos+32] ;Preload last vector (source)
+ pxor xp1, xp1 ;p1 = 0
+ pxor xp2, xp2 ;p2 = 0
+ pxor xp3, xp3 ;p3 = 0
+ pxor xq1, xq1 ;q1 = 0
+ pxor xq2, xq2 ;q2 = 0
+ pxor xq3, xq3 ;q3 = 0
+
+next_vect:
+ sub tmp, 1 ;Inner loop for each source vector
+ mov ptr, [arg2+tmp*8] ; get pointer to next vect
+ pxor xq1, xs1 ; q1 ^= s1
+ pxor xq2, xs2 ; q2 ^= s2
+ pxor xq3, xs3 ; q3 ^= s3
+ pxor xp1, xs1 ; p1 ^= s1
+ pxor xp2, xs2 ; p2 ^= s2
+ pxor xp3, xs3 ; p3 ^= s2
+ pxor xtmp1, xtmp1 ; xtmp1 = 0 - for compare to 0
+ pxor xtmp2, xtmp2 ; xtmp2 = 0
+ pxor xtmp3, xtmp3 ; xtmp3 = 0
+ pcmpgtb xtmp1, xq1 ; xtmp1 = mask 0xff or 0x00 if bit7 set
+ pcmpgtb xtmp2, xq2 ; xtmp2 = mask 0xff or 0x00 if bit7 set
+ pcmpgtb xtmp3, xq3 ; xtmp3 = mask 0xff or 0x00 if bit7 set
+ pand xtmp1, xpoly ; xtmp1 = poly or 0x00
+ pand xtmp2, xpoly ; xtmp2 = poly or 0x00
+ pand xtmp3, xpoly ; xtmp3 = poly or 0x00
+ XLDR xs1, [ptr+pos] ; Get next vector (source data1)
+ XLDR xs2, [ptr+pos+16] ; Get next vector (source data2)
+ XLDR xs3, [ptr+pos+32] ; Get next vector (source data3)
+ paddb xq1, xq1 ; q1 = q1<<1
+ paddb xq2, xq2 ; q2 = q2<<1
+ paddb xq3, xq3 ; q3 = q3<<1
+ pxor xq1, xtmp1 ; q1 = q1<<1 ^ poly_masked
+ pxor xq2, xtmp2 ; q2 = q2<<1 ^ poly_masked
+ pxor xq3, xtmp3 ; q3 = q3<<1 ^ poly_masked
+ jg next_vect ; Loop for each vect except 0
+
+ mov ptr, [arg2+8+vec*8] ;Get address of P parity vector
+ mov tmp, [arg2+(2*8)+vec*8] ;Get address of Q parity vector
+ pxor xp1, xs1 ;p1 ^= s1[0] - last source is already loaded
+ pxor xq1, xs1 ;q1 ^= 1 * s1[0]
+ pxor xp2, xs2 ;p2 ^= s2[0]
+ pxor xq2, xs2 ;q2 ^= 1 * s2[0]
+ pxor xp3, xs3 ;p3 ^= s3[0]
+ pxor xq3, xs3 ;q3 ^= 1 * s3[0]
+ XSTR [ptr+pos], xp1 ;Write parity P1 vector
+ XSTR [ptr+pos+16], xp2 ;Write parity P2 vector
+ XSTR [ptr+pos+32], xp3 ;Write parity P3 vector
+ XSTR [tmp+pos], xq1 ;Write parity Q1 vector
+ XSTR [tmp+pos+16], xq2 ;Write parity Q2 vector
+ XSTR [tmp+pos+32], xq3 ;Write parity Q3 vector
+ add pos, 48
+ cmp pos, len
+ jle loop48
+
+ ;; ------------------------------
+ ;; Do last 16 or 32 Bytes remaining
+ add len, 48
+ cmp pos, len
+ je return_pass
+
+loop16:
+ mov ptr, [arg2+vec*8] ;Fetch last source pointer
+ mov tmp, vec ;Set tmp to point back to last vector
+ XLDR xs1, [ptr+pos] ;Preload last vector (source)
+ pxor xp1, xp1 ;p = 0
+ pxor xq1, xq1 ;q = 0
+
+next_vect16:
+ sub tmp, 1 ;Inner loop for each source vector
+ mov ptr, [arg2+tmp*8] ; get pointer to next vect
+ pxor xq1, xs1 ; q1 ^= s1
+ pxor xtmp1, xtmp1 ; xtmp = 0
+ pcmpgtb xtmp1, xq1 ; xtmp = mask 0xff or 0x00 if bit7 set
+ pand xtmp1, xpoly ; xtmp = poly or 0x00
+ pxor xp1, xs1 ; p ^= s
+ paddb xq1, xq1 ; q = q<<1
+ pxor xq1, xtmp1 ; q = q<<1 ^ poly_masked
+ XLDR xs1, [ptr+pos] ; Get next vector (source data)
+ jg next_vect16 ; Loop for each vect except 0
+
+ mov ptr, [arg2+8+vec*8] ;Get address of P parity vector
+ mov tmp, [arg2+(2*8)+vec*8] ;Get address of Q parity vector
+ pxor xp1, xs1 ;p ^= s[0] - last source is already loaded
+ pxor xq1, xs1 ;q ^= 1 * s[0]
+ XSTR [ptr+pos], xp1 ;Write parity P vector
+ XSTR [tmp+pos], xq1 ;Write parity Q vector
+ add pos, 16
+ cmp pos, len
+ jl loop16
+
+
+return_pass:
+ mov return, 0
+ FUNC_RESTORE
+ ret
+
+return_fail:
+ mov return, 1
+ FUNC_RESTORE
+ ret
+
+endproc_frame
+
+section .data
+
+align 16
+poly:
+dq 0x1d1d1d1d1d1d1d1d, 0x1d1d1d1d1d1d1d1d
+
+;;; func core, ver, snum
+slversion pq_gen_sse, 00, 09, 0032
diff --git a/src/isa-l/raid/pq_gen_sse_i32.asm b/src/isa-l/raid/pq_gen_sse_i32.asm
new file mode 100644
index 000000000..8dabb783f
--- /dev/null
+++ b/src/isa-l/raid/pq_gen_sse_i32.asm
@@ -0,0 +1,264 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright(c) 2011-2015 Intel Corporation All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in
+; the documentation and/or other materials provided with the
+; distribution.
+; * Neither the name of Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived
+; from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;;; Optimized pq of N source vectors using SSE3
+;;; int pq_gen_sse(int vects, int len, void **array)
+
+;;; Generates P+Q parity vector from N (vects-2) sources in array of pointers
+;;; (**array). Last two pointers are the P and Q destinations respectively.
+;;; Vectors must be aligned to 16 bytes. Length must be 16 byte aligned.
+
+%include "reg_sizes.asm"
+
+%ifidn __OUTPUT_FORMAT__, elf64
+ %define arg0 rdi
+ %define arg1 rsi
+ %define arg2 rdx
+ %define arg3 rcx
+ %define arg4 r8
+ %define arg5 r9
+ %define tmp r11
+ %define return rax
+ %define PS 8
+ %define func(x) x: endbranch
+ %define FUNC_SAVE
+ %define FUNC_RESTORE
+
+%elifidn __OUTPUT_FORMAT__, win64
+ %define arg0 rcx
+ %define arg1 rdx
+ %define arg2 r8
+ %define arg3 r9
+ %define return rax
+ %define PS 8
+ %define tmp r10
+ %define stack_size 2*16 + 8 ; must be an odd multiple of 8
+ %define func(x) proc_frame x
+
+ %macro FUNC_SAVE 0
+ alloc_stack stack_size
+ save_xmm128 xmm6, 0*16
+ save_xmm128 xmm7, 1*16
+ end_prolog
+ %endmacro
+ %macro FUNC_RESTORE 0
+ movdqa xmm6, [rsp + 0*16]
+ movdqa xmm7, [rsp + 1*16]
+ add rsp, stack_size
+ %endmacro
+
+%elifidn __OUTPUT_FORMAT__, elf32
+ %define arg0 edx
+ %define arg1 ecx
+ %define return eax
+ %define PS 4
+ %define func(x) x: endbranch
+ %define arg(x) [ebp+8+PS*x]
+ %define arg2 edi ; must sav/restore
+ %define arg3 esi
+ %define tmp ebx
+
+ %macro FUNC_SAVE 0
+ push ebp
+ mov ebp, esp
+ push esi
+ push edi
+ push ebx
+ mov arg0, arg(0)
+ mov arg1, arg(1)
+ mov arg2, arg(2)
+ %endmacro
+
+ %macro FUNC_RESTORE 0
+ pop ebx
+ pop edi
+ pop esi
+ mov esp, ebp ;if has frame pointer?
+ pop ebp
+ %endmacro
+
+%endif ; output formats
+
+%define vec arg0
+%define len arg1
+%define ptr arg3
+%define pos return
+
+%define xp1 xmm0
+%define xq1 xmm1
+%define xtmp1 xmm2
+%define xs1 xmm3
+
+%define xp2 xmm4
+%define xq2 xmm5
+%define xtmp2 xmm6
+%define xs2 xmm7
+
+%ifidn PS,8 ; 64-bit code
+ default rel
+ [bits 64]
+ %define xpoly xmm15
+%elifidn PS,4 ; 32-bit code
+ %define xpoly [poly]
+%endif
+
+;;; Use Non-temporal load/stor
+%ifdef NO_NT_LDST
+ %define XLDR movdqa
+ %define XSTR movdqa
+%else
+ %define XLDR movntdqa
+ %define XSTR movntdq
+%endif
+
+section .text
+
+align 16
+mk_global pq_gen_sse, function
+func(pq_gen_sse)
+ FUNC_SAVE
+ sub vec, 3 ;Keep as offset to last source
+ jng return_fail ;Must have at least 2 sources
+ cmp len, 0
+ je return_pass
+ test len, (16-1) ;Check alignment of length
+ jnz return_fail
+ mov pos, 0
+%ifidn PS,8
+ movdqa xpoly, [poly] ;For 64-bit, load poly into high xmm reg
+%endif
+ cmp len, 32
+ jl loop16
+
+len_aligned_32bytes:
+ sub len, 32 ;Do end of vec first and run backward
+
+loop32:
+ mov ptr, [arg2+vec*PS] ;Fetch last source pointer
+ mov tmp, vec ;Set tmp to point back to last vector
+ XLDR xs1, [ptr+pos] ;Preload last vector (source)
+ XLDR xs2, [ptr+pos+16] ;Preload last vector (source)
+ pxor xp1, xp1 ;p1 = 0
+ pxor xq1, xq1 ;q1 = 0
+ pxor xp2, xp2 ;p2 = 0
+ pxor xq2, xq2 ;q2 = 0
+
+next_vect:
+ sub tmp, 1 ;Inner loop for each source vector
+ mov ptr, [arg2+tmp*PS] ; get pointer to next vect
+ pxor xq1, xs1 ; q1 ^= s1
+ pxor xq2, xs2 ; q2 ^= s2
+ pxor xp1, xs1 ; p1 ^= s1
+ pxor xp2, xs2 ; p2 ^= s2
+ pxor xtmp1, xtmp1 ; xtmp1 = 0 - for compare to 0
+ pxor xtmp2, xtmp2 ; xtmp2 = 0
+ pcmpgtb xtmp1, xq1 ; xtmp1 = mask 0xff or 0x00 if bit7 set
+ pcmpgtb xtmp2, xq2 ; xtmp2 = mask 0xff or 0x00 if bit7 set
+ pand xtmp1, xpoly ; xtmp1 = poly or 0x00
+ pand xtmp2, xpoly ; xtmp2 = poly or 0x00
+ XLDR xs1, [ptr+pos] ; Get next vector (source data1)
+ XLDR xs2, [ptr+pos+16] ; Get next vector (source data2)
+ paddb xq1, xq1 ; q1 = q1<<1
+ paddb xq2, xq2 ; q2 = q2<<1
+ pxor xq1, xtmp1 ; q1 = q1<<1 ^ poly_masked
+ pxor xq2, xtmp2 ; q2 = q2<<1 ^ poly_masked
+ jg next_vect ; Loop for each vect except 0
+
+ mov ptr, [arg2+PS+vec*PS] ;Get address of P parity vector
+ mov tmp, [arg2+(2*PS)+vec*PS] ;Get address of Q parity vector
+ pxor xp1, xs1 ;p1 ^= s1[0] - last source is already loaded
+ pxor xq1, xs1 ;q1 ^= 1 * s1[0]
+ pxor xp2, xs2 ;p2 ^= s2[0]
+ pxor xq2, xs2 ;q2 ^= 1 * s2[0]
+ XSTR [ptr+pos], xp1 ;Write parity P1 vector
+ XSTR [ptr+pos+16], xp2 ;Write parity P2 vector
+ XSTR [tmp+pos], xq1 ;Write parity Q1 vector
+ XSTR [tmp+pos+16], xq2 ;Write parity Q2 vector
+ add pos, 32
+ cmp pos, len
+ jle loop32
+
+ ;; ------------------------------
+ ;; Do last 16 Bytes remaining
+ add len, 32
+ cmp pos, len
+ je return_pass
+
+loop16:
+ mov ptr, [arg2+vec*PS] ;Fetch last source pointer
+ mov tmp, vec ;Set tmp to point back to last vector
+ XLDR xs1, [ptr+pos] ;Preload last vector (source)
+ pxor xp1, xp1 ;p = 0
+ pxor xq1, xq1 ;q = 0
+
+next_vect16:
+ sub tmp, 1 ;Inner loop for each source vector
+ mov ptr, [arg2+tmp*PS] ; get pointer to next vect
+ pxor xq1, xs1 ; q1 ^= s1
+ pxor xtmp1, xtmp1 ; xtmp = 0
+ pcmpgtb xtmp1, xq1 ; xtmp = mask 0xff or 0x00 if bit7 set
+ pand xtmp1, xpoly ; xtmp = poly or 0x00
+ pxor xp1, xs1 ; p ^= s
+ paddb xq1, xq1 ; q = q<<1
+ pxor xq1, xtmp1 ; q = q<<1 ^ poly_masked
+ XLDR xs1, [ptr+pos] ; Get next vector (source data)
+ jg next_vect16 ; Loop for each vect except 0
+
+ mov ptr, [arg2+PS+vec*PS] ;Get address of P parity vector
+ mov tmp, [arg2+(2*PS)+vec*PS] ;Get address of Q parity vector
+ pxor xp1, xs1 ;p ^= s[0] - last source is already loaded
+ pxor xq1, xs1 ;q ^= 1 * s[0]
+ XSTR [ptr+pos], xp1 ;Write parity P vector
+ XSTR [tmp+pos], xq1 ;Write parity Q vector
+ add pos, 16
+ cmp pos, len
+ jl loop16
+
+
+return_pass:
+ mov return, 0
+ FUNC_RESTORE
+ ret
+
+
+return_fail:
+ mov return, 1
+ FUNC_RESTORE
+ ret
+
+endproc_frame
+
+section .data
+
+align 16
+poly:
+dq 0x1d1d1d1d1d1d1d1d, 0x1d1d1d1d1d1d1d1d
+
+;;; func core, ver, snum
+slversion pq_gen_sse, 00, 08, 0032
diff --git a/src/isa-l/raid/pq_gen_test.c b/src/isa-l/raid/pq_gen_test.c
new file mode 100644
index 000000000..3469f7e50
--- /dev/null
+++ b/src/isa-l/raid/pq_gen_test.c
@@ -0,0 +1,194 @@
+/**********************************************************************
+ Copyright(c) 2011-2015 Intel Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+#include<stdio.h>
+#include<stdint.h>
+#include<string.h>
+#include<stdlib.h>
+#include<limits.h>
+#include "raid.h"
+#include "types.h"
+
+#define TEST_SOURCES 16
+#define TEST_LEN 1024
+#define TEST_MEM ((TEST_SOURCES + 2)*(TEST_LEN))
+#ifndef TEST_SEED
+# define TEST_SEED 0x1234
+#endif
+
+// Generates pseudo-random data
+
+void rand_buffer(unsigned char *buf, long buffer_size)
+{
+ long i;
+ for (i = 0; i < buffer_size; i++)
+ buf[i] = rand();
+}
+
+int dump(unsigned char *buf, int len)
+{
+ int i;
+ for (i = 0; i < len;) {
+ printf(" %2x", buf[i++]);
+ if (i % 16 == 0)
+ printf("\n");
+ }
+ printf("\n");
+ return 0;
+}
+
+int main(int argc, char *argv[])
+{
+ int i, j, k, ret, fail = 0;
+ void *buffs[TEST_SOURCES + 2]; // Pointers to src and dest
+ char *tmp_buf[TEST_SOURCES + 2];
+
+ printf("Test pq_gen_test ");
+
+ srand(TEST_SEED);
+
+ // Allocate the arrays
+ for (i = 0; i < TEST_SOURCES + 2; i++) {
+ void *buf;
+ ret = posix_memalign(&buf, 32, TEST_LEN);
+ if (ret) {
+ printf("alloc error: Fail");
+ return 1;
+ }
+ buffs[i] = buf;
+ }
+
+ // Test of all zeros
+ for (i = 0; i < TEST_SOURCES + 2; i++)
+ memset(buffs[i], 0, TEST_LEN);
+
+ pq_gen(TEST_SOURCES + 2, TEST_LEN, buffs);
+
+ for (i = 0; i < TEST_LEN; i++) {
+ if (((char *)buffs[TEST_SOURCES])[i] != 0)
+ fail++;
+ }
+
+ for (i = 0; i < TEST_LEN; i++) {
+ if (((char *)buffs[TEST_SOURCES + 1])[i] != 0)
+ fail++;
+ }
+
+ if (fail > 0) {
+ printf("fail zero test %d\n", fail);
+ return 1;
+ } else
+ putchar('.');
+
+ // Test rand1
+ for (i = 0; i < TEST_SOURCES + 2; i++)
+ rand_buffer(buffs[i], TEST_LEN);
+
+ ret = pq_gen(TEST_SOURCES + 2, TEST_LEN, buffs);
+ fail |= pq_check_base(TEST_SOURCES + 2, TEST_LEN, buffs);
+
+ if (fail > 0) {
+ int t;
+ printf(" Fail rand test1 fail=%d, ret=%d\n", fail, ret);
+ for (t = 0; t < TEST_SOURCES + 2; t++)
+ dump(buffs[t], 15);
+
+ printf(" reference function p,q\n");
+ pq_gen_base(TEST_SOURCES + 2, TEST_LEN, buffs);
+ for (t = TEST_SOURCES; t < TEST_SOURCES + 2; t++)
+ dump(buffs[t], 15);
+
+ return 1;
+ } else
+ putchar('.');
+
+ // Test various number of sources
+ for (j = 4; j <= TEST_SOURCES + 2; j++) {
+ for (i = 0; i < j; i++)
+ rand_buffer(buffs[i], TEST_LEN);
+
+ pq_gen(j, TEST_LEN, buffs);
+ fail |= pq_check_base(j, TEST_LEN, buffs);
+
+ if (fail > 0) {
+ printf("fail rand test %d sources\n", j);
+ return 1;
+ } else
+ putchar('.');
+ }
+
+ fflush(0);
+
+ // Test various number of sources and len
+ k = 0;
+ while (k <= TEST_LEN) {
+ for (j = 4; j <= TEST_SOURCES + 2; j++) {
+ for (i = 0; i < j; i++)
+ rand_buffer(buffs[i], k);
+
+ ret = pq_gen(j, k, buffs);
+ fail |= pq_check_base(j, k, buffs);
+
+ if (fail > 0) {
+ printf("fail rand test %d sources, len=%d, fail="
+ "%d, ret=%d\n", j, k, fail, ret);
+ return 1;
+ }
+ }
+ putchar('.');
+ k += 32;
+ }
+
+ // Test at the end of buffer
+ k = 0;
+ while (k <= TEST_LEN) {
+ for (j = 0; j < (TEST_SOURCES + 2); j++) {
+ rand_buffer(buffs[j], TEST_LEN - k);
+ tmp_buf[j] = (char *)buffs[j] + k;
+ }
+
+ ret = pq_gen(TEST_SOURCES + 2, TEST_LEN - k, (void *)tmp_buf);
+ fail |= pq_check_base(TEST_SOURCES + 2, TEST_LEN - k, (void *)tmp_buf);
+
+ if (fail > 0) {
+ printf("fail end test - offset: %d, len: %d, fail: %d, "
+ "ret: %d\n", k, TEST_LEN - k, fail, ret);
+ return 1;
+ }
+
+ putchar('.');
+ fflush(0);
+ k += 32;
+ }
+
+ if (!fail)
+ printf(" done: Pass\n");
+
+ return fail;
+}
diff --git a/src/isa-l/raid/raid_base.c b/src/isa-l/raid/raid_base.c
new file mode 100644
index 000000000..e066eb851
--- /dev/null
+++ b/src/isa-l/raid/raid_base.c
@@ -0,0 +1,147 @@
+/**********************************************************************
+ Copyright(c) 2011-2015 Intel Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+#include <limits.h>
+#include <stdint.h>
+
+#if __WORDSIZE == 64 || _WIN64 || __x86_64__
+# define notbit0 0xfefefefefefefefeULL
+# define bit7 0x8080808080808080ULL
+# define gf8poly 0x1d1d1d1d1d1d1d1dULL
+#else
+# define notbit0 0xfefefefeUL
+# define bit7 0x80808080UL
+# define gf8poly 0x1d1d1d1dUL
+#endif
+
+int pq_gen_base(int vects, int len, void **array)
+{
+ int i, j;
+ unsigned long p, q, s;
+ unsigned long **src = (unsigned long **)array;
+ int blocks = len / sizeof(long);
+
+ for (i = 0; i < blocks; i++) {
+ q = p = src[vects - 3][i];
+
+ for (j = vects - 4; j >= 0; j--) {
+ p ^= s = src[j][i];
+ q = s ^ (((q << 1) & notbit0) ^ // shift each byte
+ ((((q & bit7) << 1) - ((q & bit7) >> 7)) // mask out bytes
+ & gf8poly)); // apply poly
+ }
+
+ src[vects - 2][i] = p; // second to last pointer is p
+ src[vects - 1][i] = q; // last pointer is q
+ }
+ return 0;
+}
+
+int pq_check_base(int vects, int len, void **array)
+{
+ int i, j;
+ unsigned char p, q, s;
+ unsigned char **src = (unsigned char **)array;
+
+ for (i = 0; i < len; i++) {
+ q = p = src[vects - 3][i];
+
+ for (j = vects - 4; j >= 0; j--) {
+ s = src[j][i];
+ p ^= s;
+
+ // mult by GF{2}
+ q = s ^ ((q << 1) ^ ((q & 0x80) ? 0x1d : 0));
+ }
+
+ if (src[vects - 2][i] != p) // second to last pointer is p
+ return i | 1;
+ if (src[vects - 1][i] != q) // last pointer is q
+ return i | 2;
+ }
+ return 0;
+}
+
+int xor_gen_base(int vects, int len, void **array)
+{
+ int i, j;
+ unsigned char parity;
+ unsigned char **src = (unsigned char **)array;
+
+ for (i = 0; i < len; i++) {
+ parity = src[0][i];
+ for (j = 1; j < vects - 1; j++)
+ parity ^= src[j][i];
+
+ src[vects - 1][i] = parity; // last pointer is dest
+
+ }
+
+ return 0;
+}
+
+int xor_check_base(int vects, int len, void **array)
+{
+ int i, j, fail = 0;
+
+ unsigned char parity;
+ unsigned char **src = (unsigned char **)array;
+
+ for (i = 0; i < len; i++) {
+ parity = 0;
+ for (j = 0; j < vects; j++)
+ parity ^= src[j][i];
+
+ if (parity != 0) {
+ fail = 1;
+ break;
+ }
+ }
+ if (fail && len > 0)
+ return len;
+ return fail;
+}
+
+struct slver {
+ unsigned short snum;
+ unsigned char ver;
+ unsigned char core;
+};
+
+struct slver pq_gen_base_slver_0001012a;
+struct slver pq_gen_base_slver = { 0x012a, 0x01, 0x00 };
+
+struct slver xor_gen_base_slver_0001012b;
+struct slver xor_gen_base_slver = { 0x012b, 0x01, 0x00 };
+
+struct slver pq_check_base_slver_0001012c;
+struct slver pq_check_base_slver = { 0x012c, 0x01, 0x00 };
+
+struct slver xor_check_base_slver_0001012d;
+struct slver xor_check_base_slver = { 0x012d, 0x01, 0x00 };
diff --git a/src/isa-l/raid/raid_base_aliases.c b/src/isa-l/raid/raid_base_aliases.c
new file mode 100644
index 000000000..f81792a00
--- /dev/null
+++ b/src/isa-l/raid/raid_base_aliases.c
@@ -0,0 +1,50 @@
+/**********************************************************************
+ Copyright(c) 2011-2017 Intel Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+#include "raid.h"
+
+int pq_gen(int vects, int len, void **array)
+{
+ return pq_gen_base(vects, len, array);
+}
+
+int pq_check(int vects, int len, void **array)
+{
+ return pq_check_base(vects, len, array);
+}
+
+int xor_gen(int vects, int len, void **array)
+{
+ return xor_gen_base(vects, len, array);
+}
+
+int xor_check(int vects, int len, void **array)
+{
+ return xor_check_base(vects, len, array);
+}
diff --git a/src/isa-l/raid/raid_multibinary.asm b/src/isa-l/raid/raid_multibinary.asm
new file mode 100644
index 000000000..47ef1e369
--- /dev/null
+++ b/src/isa-l/raid/raid_multibinary.asm
@@ -0,0 +1,143 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright(c) 2011-2015 Intel Corporation All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in
+; the documentation and/or other materials provided with the
+; distribution.
+; * Neither the name of Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived
+; from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+%include "reg_sizes.asm"
+%include "multibinary.asm"
+
+default rel
+[bits 64]
+
+extern pq_gen_base
+extern pq_gen_sse
+extern pq_gen_avx
+extern pq_gen_avx2
+
+extern xor_gen_base
+extern xor_gen_sse
+extern xor_gen_avx
+
+extern pq_check_base
+extern pq_check_sse
+
+extern xor_check_base
+extern xor_check_sse
+
+%ifdef HAVE_AS_KNOWS_AVX512
+ extern xor_gen_avx512
+ extern pq_gen_avx512
+%endif
+
+mbin_interface xor_gen
+mbin_interface pq_gen
+
+
+mbin_dispatch_init6 xor_gen, xor_gen_base, xor_gen_sse, xor_gen_avx, xor_gen_avx, xor_gen_avx512
+mbin_dispatch_init6 pq_gen, pq_gen_base, pq_gen_sse, pq_gen_avx, pq_gen_avx2, pq_gen_avx512
+
+section .data
+
+xor_check_dispatched:
+ dq xor_check_mbinit
+pq_check_dispatched:
+ dq pq_check_mbinit
+
+section .text
+
+;;;;
+; pq_check multibinary function
+;;;;
+mk_global pq_check, function
+pq_check_mbinit:
+ endbranch
+ call pq_check_dispatch_init
+pq_check:
+ endbranch
+ jmp qword [pq_check_dispatched]
+
+pq_check_dispatch_init:
+ push rax
+ push rbx
+ push rcx
+ push rdx
+ push rsi
+ lea rsi, [pq_check_base WRT_OPT] ; Default
+
+ mov eax, 1
+ cpuid
+ test ecx, FLAG_CPUID1_ECX_SSE4_1
+ lea rbx, [pq_check_sse WRT_OPT]
+ cmovne rsi, rbx
+
+ mov [pq_check_dispatched], rsi
+ pop rsi
+ pop rdx
+ pop rcx
+ pop rbx
+ pop rax
+ ret
+
+
+;;;;
+; xor_check multibinary function
+;;;;
+mk_global xor_check, function
+xor_check_mbinit:
+ endbranch
+ call xor_check_dispatch_init
+xor_check:
+ endbranch
+ jmp qword [xor_check_dispatched]
+
+xor_check_dispatch_init:
+ push rax
+ push rbx
+ push rcx
+ push rdx
+ push rsi
+ lea rsi, [xor_check_base WRT_OPT] ; Default
+
+ mov eax, 1
+ cpuid
+ test ecx, FLAG_CPUID1_ECX_SSE4_1
+ lea rbx, [xor_check_sse WRT_OPT]
+ cmovne rsi, rbx
+
+ mov [xor_check_dispatched], rsi
+ pop rsi
+ pop rdx
+ pop rcx
+ pop rbx
+ pop rax
+ ret
+
+;;; func core, ver, snum
+slversion xor_gen, 00, 03, 0126
+slversion xor_check, 00, 03, 0127
+slversion pq_gen, 00, 03, 0128
+slversion pq_check, 00, 03, 0129
diff --git a/src/isa-l/raid/raid_multibinary_i32.asm b/src/isa-l/raid/raid_multibinary_i32.asm
new file mode 100644
index 000000000..eee7fd5a1
--- /dev/null
+++ b/src/isa-l/raid/raid_multibinary_i32.asm
@@ -0,0 +1,52 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright(c) 2011-2017 Intel Corporation All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in
+; the documentation and/or other materials provided with the
+; distribution.
+; * Neither the name of Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived
+; from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+%include "reg_sizes.asm"
+%include "multibinary.asm"
+
+[bits 32]
+
+extern xor_gen_base
+extern xor_gen_sse
+extern pq_gen_base
+extern pq_gen_sse
+extern xor_check_base
+extern xor_check_sse
+extern pq_check_base
+extern pq_check_sse
+
+mbin_interface xor_gen
+mbin_interface pq_gen
+mbin_interface xor_check
+mbin_interface pq_check
+
+mbin_dispatch_init5 xor_gen, xor_gen_base, xor_gen_sse, xor_gen_sse, xor_gen_sse
+mbin_dispatch_init5 pq_gen, pq_gen_base, pq_gen_sse, pq_gen_sse, pq_gen_sse
+mbin_dispatch_init5 xor_check, xor_check_base, xor_check_sse, xor_check_sse, xor_check_sse
+mbin_dispatch_init5 pq_check, pq_check_base, pq_check_sse, pq_check_sse, pq_check_sse
diff --git a/src/isa-l/raid/xor_check_sse.asm b/src/isa-l/raid/xor_check_sse.asm
new file mode 100644
index 000000000..a5fe0b2e0
--- /dev/null
+++ b/src/isa-l/raid/xor_check_sse.asm
@@ -0,0 +1,285 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright(c) 2011-2015 Intel Corporation All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in
+; the documentation and/or other materials provided with the
+; distribution.
+; * Neither the name of Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived
+; from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;;; Optimized xor of N source vectors using SSE
+;;; int xor_gen_sse(int vects, int len, void **array)
+
+;;; Generates xor parity vector from N (vects-1) sources in array of pointers
+;;; (**array). Last pointer is the dest.
+;;; Vectors must be aligned to 16 bytes. Length can be any value.
+
+%include "reg_sizes.asm"
+
+%ifidn __OUTPUT_FORMAT__, elf64
+ %define arg0 rdi
+ %define arg1 rsi
+ %define arg2 rdx
+ %define arg3 rcx
+ %define arg4 r8
+ %define arg5 r9
+ %define tmp r11
+ %define tmp2 rax
+ %define tmp2.b al
+ %define tmp3 arg4
+ %define return rax
+ %define PS 8
+ %define func(x) x: endbranch
+ %define FUNC_SAVE
+ %define FUNC_RESTORE
+
+%elifidn __OUTPUT_FORMAT__, win64
+ %define arg0 rcx
+ %define arg1 rdx
+ %define arg2 r8
+ %define arg3 r9
+ %define return rax
+ %define tmp2 rax
+ %define tmp2.b al
+ %define PS 8
+ %define tmp r11
+ %define tmp3 r10
+ %define stack_size 2*16 + 8 ; must be an odd multiple of 8
+ %define func(x) proc_frame x
+
+ %macro FUNC_SAVE 0
+ alloc_stack stack_size
+ save_xmm128 xmm6, 0*16
+ save_xmm128 xmm7, 1*16
+ end_prolog
+ %endmacro
+ %macro FUNC_RESTORE 0
+ movdqa xmm6, [rsp + 0*16]
+ movdqa xmm7, [rsp + 1*16]
+ add rsp, stack_size
+ %endmacro
+
+
+%elifidn __OUTPUT_FORMAT__, elf32
+ %define arg0 arg(0)
+ %define arg1 ecx
+ %define tmp2 eax
+ %define tmp2.b al
+ %define tmp3 edx
+ %define return eax
+ %define PS 4
+ %define func(x) x: endbranch
+ %define arg(x) [ebp+8+PS*x]
+ %define arg2 edi ; must sav/restore
+ %define arg3 esi
+ %define tmp ebx
+
+ %macro FUNC_SAVE 0
+ push ebp
+ mov ebp, esp
+ push esi
+ push edi
+ push ebx
+ mov arg1, arg(1)
+ mov arg2, arg(2)
+ %endmacro
+
+ %macro FUNC_RESTORE 0
+ pop ebx
+ pop edi
+ pop esi
+ mov esp, ebp ;if has frame pointer
+ pop ebp
+ %endmacro
+
+%endif ; output formats
+
+
+%define vec arg0
+%define len arg1
+%define ptr arg3
+%define pos tmp3
+
+%ifidn PS,8 ; 64-bit code
+ default rel
+ [bits 64]
+%endif
+
+;;; Use Non-temporal load/stor
+%ifdef NO_NT_LDST
+ %define XLDR movdqa
+ %define XSTR movdqa
+%else
+ %define XLDR movntdqa
+ %define XSTR movntdq
+%endif
+
+section .text
+
+align 16
+mk_global xor_check_sse, function
+func(xor_check_sse)
+ FUNC_SAVE
+%ifidn PS,8 ;64-bit code
+ sub vec, 1 ; Keep as offset to last source
+%else ;32-bit code
+ mov tmp, arg(0) ; Update vec length arg to last source
+ sub tmp, 1
+ mov arg(0), tmp
+%endif
+
+ jng return_fail ;Must have at least 2 sources
+ cmp len, 0
+ je return_pass
+ test len, (128-1) ;Check alignment of length
+ jnz len_not_aligned
+
+
+len_aligned_128bytes:
+ sub len, 128
+ mov pos, 0
+ mov tmp, vec ;Preset to last vector
+
+loop128:
+ mov tmp2, [arg2+tmp*PS] ;Fetch last pointer in array
+ sub tmp, 1 ;Next vect
+ XLDR xmm0, [tmp2+pos] ;Start with end of array in last vector
+ XLDR xmm1, [tmp2+pos+16] ;Keep xor parity in xmm0-7
+ XLDR xmm2, [tmp2+pos+(2*16)]
+ XLDR xmm3, [tmp2+pos+(3*16)]
+ XLDR xmm4, [tmp2+pos+(4*16)]
+ XLDR xmm5, [tmp2+pos+(5*16)]
+ XLDR xmm6, [tmp2+pos+(6*16)]
+ XLDR xmm7, [tmp2+pos+(7*16)]
+
+next_vect:
+ mov ptr, [arg2+tmp*PS]
+ sub tmp, 1
+ xorpd xmm0, [ptr+pos] ;Get next vector (source)
+ xorpd xmm1, [ptr+pos+16]
+ xorpd xmm2, [ptr+pos+(2*16)]
+ xorpd xmm3, [ptr+pos+(3*16)]
+ xorpd xmm4, [ptr+pos+(4*16)]
+ xorpd xmm5, [ptr+pos+(5*16)]
+ xorpd xmm6, [ptr+pos+(6*16)]
+ xorpd xmm7, [ptr+pos+(7*16)]
+;;; prefetch [ptr+pos+(8*16)]
+ jge next_vect ;Loop for each vect
+
+ ;; End of vects, chech that all parity regs = 0
+ mov tmp, vec ;Back to last vector
+ por xmm0, xmm1
+ por xmm0, xmm2
+ por xmm0, xmm3
+ por xmm0, xmm4
+ por xmm0, xmm5
+ por xmm0, xmm6
+ por xmm0, xmm7
+ ptest xmm0, xmm0
+ jnz return_fail
+
+ add pos, 128
+ cmp pos, len
+ jle loop128
+
+return_pass:
+ FUNC_RESTORE
+ mov return, 0
+ ret
+
+
+
+;;; Do one byte at a time for no alignment case
+
+xor_gen_byte:
+ mov tmp, vec ;Preset to last vector
+
+loop_1byte:
+ mov ptr, [arg2+tmp*PS] ;Fetch last pointer in array
+ mov tmp2.b, [ptr+len-1] ;Get array n
+ sub tmp, 1
+nextvect_1byte:
+ mov ptr, [arg2+tmp*PS]
+ xor tmp2.b, [ptr+len-1]
+ sub tmp, 1
+ jge nextvect_1byte
+
+ mov tmp, vec ;Back to last vector
+ cmp tmp2.b, 0
+ jne return_fail
+ sub len, 1
+ test len, (8-1)
+ jnz loop_1byte
+
+ cmp len, 0
+ je return_pass
+ test len, (128-1) ;If not 0 and 128bit aligned
+ jz len_aligned_128bytes ; then do aligned case. len = y * 128
+
+ ;; else we are 8-byte aligned so fall through to recheck
+
+
+ ;; Unaligned length cases
+len_not_aligned:
+ test len, (PS-1)
+ jne xor_gen_byte
+ mov tmp3, len
+ and tmp3, (128-1) ;Do the unaligned bytes 4-8 at a time
+ mov tmp, vec ;Preset to last vector
+
+ ;; Run backwards 8 bytes (4B for 32bit) at a time for (tmp3) bytes
+loopN_bytes:
+ mov ptr, [arg2+tmp*PS] ;Fetch last pointer in array
+ mov tmp2, [ptr+len-PS] ;Get array n
+ sub tmp, 1
+nextvect_Nbytes:
+ mov ptr, [arg2+tmp*PS] ;Get pointer to next vector
+ xor tmp2, [ptr+len-PS]
+ sub tmp, 1
+ jge nextvect_Nbytes ;Loop for each source
+
+ mov tmp, vec ;Back to last vector
+ cmp tmp2, 0
+ jne return_fail
+ sub len, PS
+ sub tmp3, PS
+ jg loopN_bytes
+
+ cmp len, 128 ;Now len is aligned to 128B
+ jge len_aligned_128bytes ;We can do the rest aligned
+
+ cmp len, 0
+ je return_pass
+
+return_fail:
+ mov return, 1
+ FUNC_RESTORE
+ ret
+
+endproc_frame
+
+section .data
+
+;;; func core, ver, snum
+slversion xor_check_sse, 00, 03, 0031
+
diff --git a/src/isa-l/raid/xor_check_test.c b/src/isa-l/raid/xor_check_test.c
new file mode 100644
index 000000000..c7532076f
--- /dev/null
+++ b/src/isa-l/raid/xor_check_test.c
@@ -0,0 +1,280 @@
+/**********************************************************************
+ Copyright(c) 2011-2015 Intel Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+#include<stdio.h>
+#include<stdint.h>
+#include<string.h>
+#include<stdlib.h>
+#include "raid.h"
+#include "types.h"
+
+#define TEST_SOURCES 16
+#define TEST_LEN 1024
+#define TEST_MEM ((TEST_SOURCES + 1)*(TEST_LEN))
+#ifndef TEST_SEED
+# define TEST_SEED 0x1234
+#endif
+
+// Generates pseudo-random data
+
+void rand_buffer(unsigned char *buf, long buffer_size)
+{
+ long i;
+ for (i = 0; i < buffer_size; i++)
+ buf[i] = rand();
+}
+
+int main(int argc, char *argv[])
+{
+ int i, j, k, ret, fail = 0;
+ void *buffs[TEST_SOURCES + 1];
+ char c;
+ int serr, lerr;
+ char *tmp_buf[TEST_SOURCES + 1];
+
+ printf("Test xor_check_test %d sources X %d bytes\n", TEST_SOURCES, TEST_LEN);
+
+ srand(TEST_SEED);
+
+ // Allocate the arrays
+ for (i = 0; i < TEST_SOURCES + 1; i++) {
+ void *buf;
+ if (posix_memalign(&buf, 16, TEST_LEN)) {
+ printf("alloc error: Fail");
+ return 1;
+ }
+ buffs[i] = buf;
+ }
+
+ // Test of all zeros
+ for (i = 0; i < TEST_SOURCES + 1; i++)
+ memset(buffs[i], 0, TEST_LEN);
+
+ xor_gen_base(TEST_SOURCES + 1, TEST_LEN, buffs);
+ ret = xor_check(TEST_SOURCES + 1, TEST_LEN, buffs);
+ if (ret != 0) {
+ fail++;
+ printf("\nfail zero test %d\n", ret);
+ }
+
+ ((char *)(buffs[0]))[TEST_LEN - 2] = 0x7; // corrupt buffer
+ ret = xor_check(TEST_SOURCES + 1, TEST_LEN, buffs);
+ if (ret == 0) {
+ fail++;
+ printf("\nfail corrupt buffer test %d\n", ret);
+ }
+ ((char *)(buffs[0]))[TEST_LEN - 2] = 0; // un-corrupt buffer
+
+ // Test corrupted buffer any location on all sources
+ for (j = 0; j < TEST_SOURCES + 1; j++) {
+ for (i = TEST_LEN - 1; i >= 0; i--) {
+ ((char *)buffs[j])[i] = 0x5; // corrupt buffer
+ ret = xor_check(TEST_SOURCES + 1, TEST_LEN, buffs);
+ if (ret == 0) {
+ fail++;
+ printf("\nfail corrupt buffer test j=%d, i=%d\n", j, i);
+ return 1;
+ }
+ ((char *)buffs[j])[i] = 0; // un-corrupt buffer
+ }
+ putchar('.');
+ }
+
+ // Test rand1
+ for (i = 0; i < TEST_SOURCES + 1; i++)
+ rand_buffer(buffs[i], TEST_LEN);
+
+ xor_gen_base(TEST_SOURCES + 1, TEST_LEN, buffs);
+ ret = xor_check(TEST_SOURCES + 1, TEST_LEN, buffs);
+ if (ret != 0) {
+ fail++;
+ printf("fail first rand test %d\n", ret);
+ }
+
+ c = ((char *)(buffs[0]))[TEST_LEN - 2];
+ ((char *)(buffs[0]))[TEST_LEN - 2] = c ^ 0x1;
+ ret = xor_check(TEST_SOURCES + 1, TEST_LEN, buffs);
+ if (ret == 0) {
+ fail++;
+ printf("\nFail corrupt buffer test, passed when should have failed\n");
+ }
+ ((char *)(buffs[0]))[TEST_LEN - 2] = c; // un-corrupt buffer
+
+ // Test corrupted buffer any location on all sources w/ random data
+ for (j = 0; j < TEST_SOURCES + 1; j++) {
+ for (i = TEST_LEN - 1; i >= 0; i--) {
+ // Check it still passes
+ ret = xor_check(TEST_SOURCES + 1, TEST_LEN, buffs);
+ if (ret != 0) { // should pass
+ fail++;
+ printf
+ ("\nFail rand test with un-corrupted buffer j=%d, i=%d\n",
+ j, i);
+ return 1;
+ }
+ c = ((char *)buffs[j])[i];
+ ((char *)buffs[j])[i] = c ^ 1; // corrupt buffer
+ ret = xor_check(TEST_SOURCES + 1, TEST_LEN, buffs);
+ if (ret == 0) { // Check it now fails
+ fail++;
+ printf("\nfail corrupt buffer test j=%d, i=%d\n", j, i);
+ return 1;
+ }
+ ((char *)buffs[j])[i] = c; // un-corrupt buffer
+ }
+ putchar('.');
+ }
+
+ // Test various number of sources, full length
+ for (j = 3; j <= TEST_SOURCES + 1; j++) {
+ // New random data
+ for (i = 0; i < j; i++)
+ rand_buffer(buffs[i], TEST_LEN);
+
+ // Generate xor parity for this number of sources
+ xor_gen_base(j, TEST_LEN, buffs);
+
+ // Set errors up in each source and len position
+ for (i = 0; i < j; i++) {
+ for (k = 0; k < TEST_LEN; k++) {
+ // See if it still passes
+ ret = xor_check(j, TEST_LEN, buffs);
+ if (ret != 0) { // Should pass
+ printf("\nfail rand test %d sources\n", j);
+ fail++;
+ return 1;
+ }
+
+ c = ((char *)buffs[i])[k];
+ ((char *)buffs[i])[k] = c ^ 1; // corrupt buffer
+
+ ret = xor_check(j, TEST_LEN, buffs);
+ if (ret == 0) { // Should fail
+ printf
+ ("\nfail rand test corrupted buffer %d sources\n",
+ j);
+ fail++;
+ return 1;
+ }
+ ((char *)buffs[i])[k] = c; // un-corrupt buffer
+ }
+ }
+ putchar('.');
+ }
+
+ fflush(0);
+
+ // Test various number of sources and len
+ k = 1;
+ while (k <= TEST_LEN) {
+ for (j = 3; j <= TEST_SOURCES + 1; j++) {
+ for (i = 0; i < j; i++)
+ rand_buffer(buffs[i], k);
+
+ // Generate xor parity for this number of sources
+ xor_gen_base(j, k, buffs);
+
+ // Inject errors at various source and len positions
+ for (lerr = 0; lerr < k; lerr += 10) {
+ for (serr = 0; serr < j; serr++) {
+
+ // See if it still passes
+ ret = xor_check(j, k, buffs);
+ if (ret != 0) { // Should pass
+ printf("\nfail rand test %d sources\n", j);
+ fail++;
+ return 1;
+ }
+
+ c = ((char *)buffs[serr])[lerr];
+ ((char *)buffs[serr])[lerr] = c ^ 1; // corrupt buffer
+
+ ret = xor_check(j, k, buffs);
+ if (ret == 0) { // Should fail
+ printf("\nfail rand test corrupted buffer "
+ "%d sources, len=%d, ret=%d\n", j, k,
+ ret);
+ fail++;
+ return 1;
+ }
+ ((char *)buffs[serr])[lerr] = c; // un-corrupt buffer
+ }
+ }
+ }
+ putchar('.');
+ fflush(0);
+ k += 1;
+ }
+
+ // Test at the end of buffer
+ for (i = 0; i < TEST_LEN; i += 32) {
+ for (j = 0; j < TEST_SOURCES + 1; j++) {
+ rand_buffer(buffs[j], TEST_LEN - i);
+ tmp_buf[j] = (char *)buffs[j] + i;
+ }
+
+ xor_gen_base(TEST_SOURCES + 1, TEST_LEN - i, (void *)tmp_buf);
+
+ // Test good data
+ ret = xor_check(TEST_SOURCES + 1, TEST_LEN - i, (void *)tmp_buf);
+ if (ret != 0) {
+ printf("fail end test - offset: %d, len: %d\n", i, TEST_LEN - i);
+ fail++;
+ return 1;
+ }
+ // Test bad data
+ for (serr = 0; serr < TEST_SOURCES + 1; serr++) {
+ for (lerr = 0; lerr < (TEST_LEN - i); lerr++) {
+ c = tmp_buf[serr][lerr];
+ tmp_buf[serr][lerr] = c ^ 1;
+
+ ret =
+ xor_check(TEST_SOURCES + 1, TEST_LEN - i, (void *)tmp_buf);
+ if (ret == 0) {
+ printf("fail end test corrupted buffer - "
+ "offset: %d, len: %d, ret: %d\n", i,
+ TEST_LEN - i, ret);
+ fail++;
+ return 1;
+ }
+
+ tmp_buf[serr][lerr] = c;
+ }
+ }
+
+ putchar('.');
+ fflush(0);
+ }
+
+ if (fail == 0)
+ printf("Pass\n");
+
+ return fail;
+
+}
diff --git a/src/isa-l/raid/xor_example.c b/src/isa-l/raid/xor_example.c
new file mode 100644
index 000000000..48145ac90
--- /dev/null
+++ b/src/isa-l/raid/xor_example.c
@@ -0,0 +1,70 @@
+/**********************************************************************
+ Copyright(c) 2011-2013 Intel Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+#include <stdio.h>
+#include <stdlib.h>
+#include "raid.h"
+#include "types.h"
+
+#define TEST_SOURCES 16
+#define TEST_LEN 16*1024
+
+int main(int argc, char *argv[])
+{
+ int i, j, should_pass, should_fail;
+ void *buffs[TEST_SOURCES + 1];
+
+ printf("XOR example\n");
+ for (i = 0; i < TEST_SOURCES + 1; i++) {
+ void *buf;
+ if (posix_memalign(&buf, 32, TEST_LEN)) {
+ printf("alloc error: Fail");
+ return 1;
+ }
+ buffs[i] = buf;
+ }
+
+ printf("Make random data\n");
+ for (i = 0; i < TEST_SOURCES + 1; i++)
+ for (j = 0; j < TEST_LEN; j++)
+ ((char *)buffs[i])[j] = rand();
+
+ printf("Generate xor parity\n");
+ xor_gen(TEST_SOURCES + 1, TEST_LEN, buffs);
+
+ printf("Check parity: ");
+ should_pass = xor_check(TEST_SOURCES + 1, TEST_LEN, buffs);
+ printf("%s\n", should_pass == 0 ? "Pass" : "Fail");
+
+ printf("Find corruption: ");
+ ((char *)buffs[TEST_SOURCES / 2])[TEST_LEN / 2] ^= 1; // flip one bit
+ should_fail = xor_check(TEST_SOURCES + 1, TEST_LEN, buffs); //recheck
+ printf("%s\n", should_fail != 0 ? "Pass" : "Fail");
+
+ return 0;
+}
diff --git a/src/isa-l/raid/xor_gen_avx.asm b/src/isa-l/raid/xor_gen_avx.asm
new file mode 100644
index 000000000..b5527b204
--- /dev/null
+++ b/src/isa-l/raid/xor_gen_avx.asm
@@ -0,0 +1,228 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright(c) 2011-2015 Intel Corporation All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in
+; the documentation and/or other materials provided with the
+; distribution.
+; * Neither the name of Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived
+; from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;;; Optimized xor of N source vectors using AVX
+;;; int xor_gen_avx(int vects, int len, void **array)
+
+;;; Generates xor parity vector from N (vects-1) sources in array of pointers
+;;; (**array). Last pointer is the dest.
+;;; Vectors must be aligned to 32 bytes. Length can be any value.
+
+%include "reg_sizes.asm"
+
+%ifidn __OUTPUT_FORMAT__, elf64
+ %define arg0 rdi
+ %define arg1 rsi
+ %define arg2 rdx
+ %define arg3 rcx
+ %define arg4 r8
+ %define arg5 r9
+ %define tmp r11
+ %define tmp3 arg4
+ %define func(x) x: endbranch
+ %define return rax
+ %define FUNC_SAVE
+ %define FUNC_RESTORE
+
+%elifidn __OUTPUT_FORMAT__, win64
+ %define arg0 rcx
+ %define arg1 rdx
+ %define arg2 r8
+ %define arg3 r9
+ %define tmp r11
+ %define tmp3 r10
+ %define func(x) proc_frame x
+ %define return rax
+ %define stack_size 2*32 + 8 ;must be an odd multiple of 8
+
+ %macro FUNC_SAVE 0
+ alloc_stack stack_size
+ vmovdqu [rsp + 0*32], ymm6
+ vmovdqu [rsp + 1*32], ymm7
+ end_prolog
+ %endmacro
+ %macro FUNC_RESTORE 0
+ vmovdqu ymm6, [rsp + 0*32]
+ vmovdqu ymm7, [rsp + 1*32]
+ add rsp, stack_size
+ %endmacro
+
+%endif ;output formats
+
+
+%define vec arg0
+%define len arg1
+%define ptr arg3
+%define tmp2 rax
+%define tmp2.b al
+%define pos tmp3
+%define PS 8
+
+;;; Use Non-temporal load/stor
+%ifdef NO_NT_LDST
+ %define XLDR vmovdqa
+ %define XSTR vmovdqa
+%else
+ %define XLDR vmovdqa
+ %define XSTR vmovntdq
+%endif
+
+
+default rel
+[bits 64]
+
+section .text
+
+align 16
+mk_global xor_gen_avx, function
+func(xor_gen_avx)
+
+ FUNC_SAVE
+ sub vec, 2 ;Keep as offset to last source
+ jng return_fail ;Must have at least 2 sources
+ cmp len, 0
+ je return_pass
+ test len, (128-1) ;Check alignment of length
+ jnz len_not_aligned
+
+
+len_aligned_128bytes:
+ sub len, 128
+ mov pos, 0
+
+loop128:
+ mov tmp, vec ;Back to last vector
+ mov tmp2, [arg2+vec*PS] ;Fetch last pointer in array
+ sub tmp, 1 ;Next vect
+ XLDR ymm0, [tmp2+pos] ;Start with end of array in last vector
+ XLDR ymm1, [tmp2+pos+32] ;Keep xor parity in xmm0-7
+ XLDR ymm2, [tmp2+pos+(2*32)]
+ XLDR ymm3, [tmp2+pos+(3*32)]
+
+next_vect:
+ mov ptr, [arg2+tmp*PS]
+ sub tmp, 1
+ XLDR ymm4, [ptr+pos] ;Get next vector (source)
+ XLDR ymm5, [ptr+pos+32]
+ XLDR ymm6, [ptr+pos+(2*32)]
+ XLDR ymm7, [ptr+pos+(3*32)]
+ vxorpd ymm0, ymm0, ymm4 ;Add to xor parity
+ vxorpd ymm1, ymm1, ymm5
+ vxorpd ymm2, ymm2, ymm6
+ vxorpd ymm3, ymm3, ymm7
+ jge next_vect ;Loop for each source
+
+ mov ptr, [arg2+PS+vec*PS] ;Address of parity vector
+ XSTR [ptr+pos], ymm0 ;Write parity xor vector
+ XSTR [ptr+pos+(1*32)], ymm1
+ XSTR [ptr+pos+(2*32)], ymm2
+ XSTR [ptr+pos+(3*32)], ymm3
+ add pos, 128
+ cmp pos, len
+ jle loop128
+
+return_pass:
+ FUNC_RESTORE
+ mov return, 0
+ ret
+
+
+;;; Do one byte at a time for no alignment case
+loop_1byte:
+ mov tmp, vec ;Back to last vector
+ mov ptr, [arg2+vec*PS] ;Fetch last pointer in array
+ mov tmp2.b, [ptr+len-1] ;Get array n
+ sub tmp, 1
+nextvect_1byte:
+ mov ptr, [arg2+tmp*PS]
+ xor tmp2.b, [ptr+len-1]
+ sub tmp, 1
+ jge nextvect_1byte
+
+ mov tmp, vec
+ add tmp, 1 ;Add back to point to last vec
+ mov ptr, [arg2+tmp*PS]
+ mov [ptr+len-1], tmp2.b ;Write parity
+ sub len, 1
+ test len, (PS-1)
+ jnz loop_1byte
+
+ cmp len, 0
+ je return_pass
+ test len, (128-1) ;If not 0 and 128bit aligned
+ jz len_aligned_128bytes ; then do aligned case. len = y * 128
+
+ ;; else we are 8-byte aligned so fall through to recheck
+
+
+ ;; Unaligned length cases
+len_not_aligned:
+ test len, (PS-1)
+ jne loop_1byte
+ mov tmp3, len
+ and tmp3, (128-1) ;Do the unaligned bytes 8 at a time
+
+ ;; Run backwards 8 bytes at a time for (tmp3) bytes
+loop8_bytes:
+ mov tmp, vec ;Back to last vector
+ mov ptr, [arg2+vec*PS] ;Fetch last pointer in array
+ mov tmp2, [ptr+len-PS] ;Get array n
+ sub tmp, 1
+nextvect_8bytes:
+ mov ptr, [arg2+tmp*PS] ;Get pointer to next vector
+ xor tmp2, [ptr+len-PS]
+ sub tmp, 1
+ jge nextvect_8bytes ;Loop for each source
+
+ mov tmp, vec
+ add tmp, 1 ;Add back to point to last vec
+ mov ptr, [arg2+tmp*PS]
+ mov [ptr+len-PS], tmp2 ;Write parity
+ sub len, PS
+ sub tmp3, PS
+ jg loop8_bytes
+
+ cmp len, 128 ;Now len is aligned to 128B
+ jge len_aligned_128bytes ;We can do the rest aligned
+
+ cmp len, 0
+ je return_pass
+
+return_fail:
+ FUNC_RESTORE
+ mov return, 1
+ ret
+
+endproc_frame
+
+section .data
+
+;;; func core, ver, snum
+slversion xor_gen_avx, 02, 05, 0037
+
diff --git a/src/isa-l/raid/xor_gen_avx512.asm b/src/isa-l/raid/xor_gen_avx512.asm
new file mode 100644
index 000000000..5b078682a
--- /dev/null
+++ b/src/isa-l/raid/xor_gen_avx512.asm
@@ -0,0 +1,217 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright(c) 2011-2017 Intel Corporation All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in
+; the documentation and/or other materials provided with the
+; distribution.
+; * Neither the name of Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived
+; from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;;; Optimized xor of N source vectors using AVX512
+;;; int xor_gen_avx512(int vects, int len, void **array)
+
+;;; Generates xor parity vector from N (vects-1) sources in array of pointers
+;;; (**array). Last pointer is the dest.
+;;; Vectors must be aligned to 32 bytes. Length can be any value.
+
+%include "reg_sizes.asm"
+
+%ifdef HAVE_AS_KNOWS_AVX512
+
+%ifidn __OUTPUT_FORMAT__, elf64
+ %define arg0 rdi
+ %define arg1 rsi
+ %define arg2 rdx
+ %define arg3 rcx
+ %define arg4 r8
+ %define arg5 r9
+ %define tmp r11
+ %define tmp3 arg4
+ %define func(x) x: endbranch
+ %define return rax
+ %define FUNC_SAVE
+ %define FUNC_RESTORE
+
+%elifidn __OUTPUT_FORMAT__, win64
+ %define arg0 rcx
+ %define arg1 rdx
+ %define arg2 r8
+ %define arg3 r9
+ %define tmp r11
+ %define tmp3 r10
+ %define func(x) proc_frame x
+ %define return rax
+ %define stack_size 2*16 + 8 ;must be an odd multiple of 8
+
+ %macro FUNC_SAVE 0
+ alloc_stack stack_size
+ vmovdqu [rsp + 0*16], xmm6
+ vmovdqu [rsp + 1*16], xmm7
+ end_prolog
+ %endmacro
+ %macro FUNC_RESTORE 0
+ vmovdqu xmm6, [rsp + 0*16]
+ vmovdqu xmm7, [rsp + 1*316]
+ add rsp, stack_size
+ %endmacro
+
+%endif ;output formats
+
+
+%define vec arg0
+%define len arg1
+%define ptr arg3
+%define tmp2 rax
+%define tmp2.b al
+%define pos tmp3
+%define PS 8
+
+%define NO_NT_LDST
+;;; Use Non-temporal load/stor
+%ifdef NO_NT_LDST
+ %define XLDR vmovdqu8
+ %define XSTR vmovdqu8
+%else
+ %define XLDR vmovntdqa
+ %define XSTR vmovntdq
+%endif
+
+
+default rel
+[bits 64]
+
+section .text
+
+align 16
+mk_global xor_gen_avx512, function
+func(xor_gen_avx512)
+ FUNC_SAVE
+ sub vec, 2 ;Keep as offset to last source
+ jng return_fail ;Must have at least 2 sources
+ cmp len, 0
+ je return_pass
+ test len, (128-1) ;Check alignment of length
+ jnz len_not_aligned
+
+len_aligned_128bytes:
+ sub len, 128
+ mov pos, 0
+
+loop128:
+ mov tmp, vec ;Back to last vector
+ mov tmp2, [arg2+vec*PS] ;Fetch last pointer in array
+ sub tmp, 1 ;Next vect
+ XLDR zmm0, [tmp2+pos] ;Start with end of array in last vector
+ XLDR zmm1, [tmp2+pos+64] ;Keep xor parity in xmm0-7
+
+next_vect:
+ mov ptr, [arg2+tmp*PS]
+ sub tmp, 1
+ XLDR zmm4, [ptr+pos] ;Get next vector (source)
+ XLDR zmm5, [ptr+pos+64]
+ vpxorq zmm0, zmm0, zmm4 ;Add to xor parity
+ vpxorq zmm1, zmm1, zmm5
+ jge next_vect ;Loop for each source
+
+ mov ptr, [arg2+PS+vec*PS] ;Address of parity vector
+ XSTR [ptr+pos], zmm0 ;Write parity xor vector
+ XSTR [ptr+pos+64], zmm1
+ add pos, 128
+ cmp pos, len
+ jle loop128
+
+return_pass:
+ FUNC_RESTORE
+ mov return, 0
+ ret
+
+
+;;; Do one byte at a time for no alignment case
+loop_1byte:
+ mov tmp, vec ;Back to last vector
+ mov ptr, [arg2+vec*PS] ;Fetch last pointer in array
+ mov tmp2.b, [ptr+len-1] ;Get array n
+ sub tmp, 1
+nextvect_1byte:
+ mov ptr, [arg2+tmp*PS]
+ xor tmp2.b, [ptr+len-1]
+ sub tmp, 1
+ jge nextvect_1byte
+
+ mov tmp, vec
+ add tmp, 1 ;Add back to point to last vec
+ mov ptr, [arg2+tmp*PS]
+ mov [ptr+len-1], tmp2.b ;Write parity
+ sub len, 1
+ test len, (PS-1)
+ jnz loop_1byte
+
+ cmp len, 0
+ je return_pass
+ test len, (128-1) ;If not 0 and 128bit aligned
+ jz len_aligned_128bytes ; then do aligned case. len = y * 128
+
+ ;; else we are 8-byte aligned so fall through to recheck
+
+
+ ;; Unaligned length cases
+len_not_aligned:
+ test len, (PS-1)
+ jne loop_1byte
+ mov tmp3, len
+ and tmp3, (128-1) ;Do the unaligned bytes 8 at a time
+
+ ;; Run backwards 8 bytes at a time for (tmp3) bytes
+loop8_bytes:
+ mov tmp, vec ;Back to last vector
+ mov ptr, [arg2+vec*PS] ;Fetch last pointer in array
+ mov tmp2, [ptr+len-PS] ;Get array n
+ sub tmp, 1
+nextvect_8bytes:
+ mov ptr, [arg2+tmp*PS] ;Get pointer to next vector
+ xor tmp2, [ptr+len-PS]
+ sub tmp, 1
+ jge nextvect_8bytes ;Loop for each source
+
+ mov tmp, vec
+ add tmp, 1 ;Add back to point to last vec
+ mov ptr, [arg2+tmp*PS]
+ mov [ptr+len-PS], tmp2 ;Write parity
+ sub len, PS
+ sub tmp3, PS
+ jg loop8_bytes
+
+ cmp len, 128 ;Now len is aligned to 128B
+ jge len_aligned_128bytes ;We can do the rest aligned
+
+ cmp len, 0
+ je return_pass
+
+return_fail:
+ FUNC_RESTORE
+ mov return, 1
+ ret
+
+endproc_frame
+
+%endif ; ifdef HAVE_AS_KNOWS_AVX512
diff --git a/src/isa-l/raid/xor_gen_perf.c b/src/isa-l/raid/xor_gen_perf.c
new file mode 100644
index 000000000..717e0ada7
--- /dev/null
+++ b/src/isa-l/raid/xor_gen_perf.c
@@ -0,0 +1,90 @@
+/**********************************************************************
+ Copyright(c) 2011-2015 Intel Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+#include<stdio.h>
+#include<stdint.h>
+#include<string.h>
+#include<stdlib.h>
+#include<sys/time.h>
+#include "raid.h"
+#include "test.h"
+
+//#define CACHED_TEST
+#ifdef CACHED_TEST
+// Loop many times over same
+# define TEST_SOURCES 10
+# define TEST_LEN 8*1024
+# define TEST_TYPE_STR "_warm"
+#else
+// Uncached test. Pull from large mem base.
+# define TEST_SOURCES 10
+# define GT_L3_CACHE 32*1024*1024 /* some number > last level cache */
+# define TEST_LEN GT_L3_CACHE / TEST_SOURCES
+# define TEST_TYPE_STR "_cold"
+#endif
+
+#define TEST_MEM ((TEST_SOURCES + 1)*(TEST_LEN))
+
+int main(int argc, char *argv[])
+{
+ int i, ret, fail = 0;
+ void **buffs;
+ void *buff;
+ struct perf start;
+
+ printf("Test xor_gen_perf\n");
+
+ ret = posix_memalign((void **)&buff, 8, sizeof(int *) * (TEST_SOURCES + 6));
+ if (ret) {
+ printf("alloc error: Fail");
+ return 1;
+ }
+ buffs = buff;
+
+ // Allocate the arrays
+ for (i = 0; i < TEST_SOURCES + 1; i++) {
+ void *buf;
+ ret = posix_memalign(&buf, 64, TEST_LEN);
+ if (ret) {
+ printf("alloc error: Fail");
+ return 1;
+ }
+ buffs[i] = buf;
+ }
+
+ // Setup data
+ for (i = 0; i < TEST_SOURCES + 1; i++)
+ memset(buffs[i], 0, TEST_LEN);
+
+ BENCHMARK(&start, BENCHMARK_TIME, xor_gen(TEST_SOURCES + 1, TEST_LEN, buffs));
+ printf("xor_gen" TEST_TYPE_STR ": ");
+ perf_print(start, (long long)TEST_MEM);
+
+ return fail;
+}
diff --git a/src/isa-l/raid/xor_gen_sse.asm b/src/isa-l/raid/xor_gen_sse.asm
new file mode 100644
index 000000000..f31ae63e4
--- /dev/null
+++ b/src/isa-l/raid/xor_gen_sse.asm
@@ -0,0 +1,284 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright(c) 2011-2015 Intel Corporation All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in
+; the documentation and/or other materials provided with the
+; distribution.
+; * Neither the name of Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived
+; from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;;; Optimized xor of N source vectors using SSE
+;;; int xor_gen_sse(int vects, int len, void **array)
+
+;;; Generates xor parity vector from N (vects-1) sources in array of pointers
+;;; (**array). Last pointer is the dest.
+;;; Vectors must be aligned to 16 bytes. Length can be any value.
+
+%include "reg_sizes.asm"
+
+%ifidn __OUTPUT_FORMAT__, elf64
+ %define arg0 rdi
+ %define arg1 rsi
+ %define arg2 rdx
+ %define arg3 rcx
+ %define arg4 r8
+ %define arg5 r9
+ %define tmp r11
+ %define tmp2 rax
+ %define tmp2.b al
+ %define tmp3 arg4
+ %define return rax
+ %define PS 8
+ %define func(x) x: endbranch
+ %define FUNC_SAVE
+ %define FUNC_RESTORE
+
+%elifidn __OUTPUT_FORMAT__, win64
+ %define arg0 rcx
+ %define arg1 rdx
+ %define arg2 r8
+ %define arg3 r9
+ %define return rax
+ %define tmp2 rax
+ %define tmp2.b al
+ %define PS 8
+ %define tmp r11
+ %define tmp3 r10
+ %define stack_size 2*16 + 8 ; must be an odd multiple of 8
+ %define func(x) proc_frame x
+
+ %macro FUNC_SAVE 0
+ alloc_stack stack_size
+ save_xmm128 xmm6, 0*16
+ save_xmm128 xmm7, 1*16
+ end_prolog
+ %endmacro
+ %macro FUNC_RESTORE 0
+ movdqa xmm6, [rsp + 0*16]
+ movdqa xmm7, [rsp + 1*16]
+ add rsp, stack_size
+ %endmacro
+
+
+%elifidn __OUTPUT_FORMAT__, elf32
+ %define arg0 arg(0)
+ %define arg1 ecx
+ %define tmp2 eax
+ %define tmp2.b al
+ %define tmp3 edx
+ %define return eax
+ %define PS 4
+ %define func(x) x: endbranch
+ %define arg(x) [ebp+8+PS*x]
+ %define arg2 edi ; must sav/restore
+ %define arg3 esi
+ %define tmp ebx
+
+ %macro FUNC_SAVE 0
+ push ebp
+ mov ebp, esp
+ push esi
+ push edi
+ push ebx
+ mov arg1, arg(1)
+ mov arg2, arg(2)
+ %endmacro
+
+ %macro FUNC_RESTORE 0
+ pop ebx
+ pop edi
+ pop esi
+ mov esp, ebp ;if has frame pointer
+ pop ebp
+ %endmacro
+
+%endif ; output formats
+
+
+%define vec arg0
+%define len arg1
+%define ptr arg3
+%define pos tmp3
+
+%ifidn PS,8 ; 64-bit code
+ default rel
+ [bits 64]
+%endif
+
+;;; Use Non-temporal load/stor
+%ifdef NO_NT_LDST
+ %define XLDR movdqa
+ %define XSTR movdqa
+%else
+ %define XLDR movntdqa
+ %define XSTR movntdq
+%endif
+
+section .text
+
+align 16
+mk_global xor_gen_sse, function
+func(xor_gen_sse)
+ FUNC_SAVE
+%ifidn PS,8 ;64-bit code
+ sub vec, 2 ; Keep as offset to last source
+%else ;32-bit code
+ mov tmp, arg(0) ; Update vec length arg to last source
+ sub tmp, 2
+ mov arg(0), tmp
+%endif
+
+ jng return_fail ;Must have at least 2 sources
+ cmp len, 0
+ je return_pass
+ test len, (128-1) ;Check alignment of length
+ jnz len_not_aligned
+
+
+len_aligned_128bytes:
+ sub len, 128
+ mov pos, 0
+ mov tmp, vec ;Preset to last vector
+
+loop128:
+ mov tmp2, [arg2+tmp*PS] ;Fetch last pointer in array
+ sub tmp, 1 ;Next vect
+ XLDR xmm0, [tmp2+pos] ;Start with end of array in last vector
+ XLDR xmm1, [tmp2+pos+16] ;Keep xor parity in xmm0-7
+ XLDR xmm2, [tmp2+pos+(2*16)]
+ XLDR xmm3, [tmp2+pos+(3*16)]
+ XLDR xmm4, [tmp2+pos+(4*16)]
+ XLDR xmm5, [tmp2+pos+(5*16)]
+ XLDR xmm6, [tmp2+pos+(6*16)]
+ XLDR xmm7, [tmp2+pos+(7*16)]
+
+next_vect:
+ mov ptr, [arg2+tmp*PS]
+ sub tmp, 1
+ xorpd xmm0, [ptr+pos] ;Get next vector (source)
+ xorpd xmm1, [ptr+pos+16]
+ xorpd xmm2, [ptr+pos+(2*16)]
+ xorpd xmm3, [ptr+pos+(3*16)]
+ xorpd xmm4, [ptr+pos+(4*16)]
+ xorpd xmm5, [ptr+pos+(5*16)]
+ xorpd xmm6, [ptr+pos+(6*16)]
+ xorpd xmm7, [ptr+pos+(7*16)]
+;;; prefetch [ptr+pos+(8*16)]
+ jge next_vect ;Loop for each vect
+
+
+ mov tmp, vec ;Back to last vector
+ mov ptr, [arg2+PS+tmp*PS] ;Address of parity vector
+ XSTR [ptr+pos], xmm0 ;Write parity xor vector
+ XSTR [ptr+pos+(1*16)], xmm1
+ XSTR [ptr+pos+(2*16)], xmm2
+ XSTR [ptr+pos+(3*16)], xmm3
+ XSTR [ptr+pos+(4*16)], xmm4
+ XSTR [ptr+pos+(5*16)], xmm5
+ XSTR [ptr+pos+(6*16)], xmm6
+ XSTR [ptr+pos+(7*16)], xmm7
+ add pos, 128
+ cmp pos, len
+ jle loop128
+
+return_pass:
+ mov return, 0
+ FUNC_RESTORE
+ ret
+
+
+
+;;; Do one byte at a time for no alignment case
+
+xor_gen_byte:
+ mov tmp, vec ;Preset to last vector
+
+loop_1byte:
+ mov ptr, [arg2+tmp*PS] ;Fetch last pointer in array
+ mov tmp2.b, [ptr+len-1] ;Get array n
+ sub tmp, 1
+nextvect_1byte:
+ mov ptr, [arg2+tmp*PS]
+ xor tmp2.b, [ptr+len-1]
+ sub tmp, 1
+ jge nextvect_1byte
+
+ mov tmp, vec ;Back to last vector
+ mov ptr, [arg2+PS+tmp*PS] ;Get last vec
+ mov [ptr+len-1], tmp2.b ;Write parity
+ sub len, 1
+ test len, (8-1)
+ jnz loop_1byte
+
+ cmp len, 0
+ je return_pass
+ test len, (128-1) ;If not 0 and 128bit aligned
+ jz len_aligned_128bytes ; then do aligned case. len = y * 128
+
+ ;; else we are 8-byte aligned so fall through to recheck
+
+
+ ;; Unaligned length cases
+len_not_aligned:
+ test len, (PS-1)
+ jne xor_gen_byte
+ mov tmp3, len
+ and tmp3, (128-1) ;Do the unaligned bytes 4-8 at a time
+ mov tmp, vec ;Preset to last vector
+
+ ;; Run backwards 8 bytes (4B for 32bit) at a time for (tmp3) bytes
+loopN_bytes:
+ mov ptr, [arg2+tmp*PS] ;Fetch last pointer in array
+ mov tmp2, [ptr+len-PS] ;Get array n
+ sub tmp, 1
+nextvect_Nbytes:
+ mov ptr, [arg2+tmp*PS] ;Get pointer to next vector
+ xor tmp2, [ptr+len-PS]
+ sub tmp, 1
+ jge nextvect_Nbytes ;Loop for each source
+
+ mov tmp, vec ;Back to last vector
+ mov ptr, [arg2+PS+tmp*PS] ;Get last vec
+ mov [ptr+len-PS], tmp2 ;Write parity
+ sub len, PS
+ sub tmp3, PS
+ jg loopN_bytes
+
+ cmp len, 128 ;Now len is aligned to 128B
+ jge len_aligned_128bytes ;We can do the rest aligned
+
+ cmp len, 0
+ je return_pass
+
+return_fail:
+ mov return, 1
+ FUNC_RESTORE
+ ret
+
+endproc_frame
+
+section .data
+
+;;; func core, ver, snum
+slversion xor_gen_sse, 00, 0c, 0030
+
diff --git a/src/isa-l/raid/xor_gen_test.c b/src/isa-l/raid/xor_gen_test.c
new file mode 100644
index 000000000..ee922bfaf
--- /dev/null
+++ b/src/isa-l/raid/xor_gen_test.c
@@ -0,0 +1,165 @@
+/**********************************************************************
+ Copyright(c) 2011-2015 Intel Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+#include<stdio.h>
+#include<stdint.h>
+#include<string.h>
+#include<stdlib.h>
+#include "raid.h"
+#include "types.h"
+
+#define TEST_SOURCES 16
+#define TEST_LEN 1024
+#define TEST_MEM ((TEST_SOURCES + 1)*(TEST_LEN))
+#ifndef TEST_SEED
+# define TEST_SEED 0x1234
+#endif
+
+// Generates pseudo-random data
+
+void rand_buffer(unsigned char *buf, long buffer_size)
+{
+ long i;
+ for (i = 0; i < buffer_size; i++)
+ buf[i] = rand();
+}
+
+int main(int argc, char *argv[])
+{
+ int i, j, k, ret, fail = 0;
+ void *buffs[TEST_SOURCES + 1];
+ char *tmp_buf[TEST_SOURCES + 1];
+
+ printf("Test xor_gen_test ");
+
+ srand(TEST_SEED);
+
+ // Allocate the arrays
+ for (i = 0; i < TEST_SOURCES + 1; i++) {
+ void *buf;
+ ret = posix_memalign(&buf, 32, TEST_LEN);
+ if (ret) {
+ printf("alloc error: Fail");
+ return 1;
+ }
+ buffs[i] = buf;
+ }
+
+ // Test of all zeros
+ for (i = 0; i < TEST_SOURCES + 1; i++)
+ memset(buffs[i], 0, TEST_LEN);
+
+ xor_gen(TEST_SOURCES + 1, TEST_LEN, buffs);
+
+ for (i = 0; i < TEST_LEN; i++) {
+ if (((char *)buffs[TEST_SOURCES])[i] != 0)
+ fail++;
+ }
+
+ if (fail > 0) {
+ printf("fail zero test");
+ return 1;
+ } else
+ putchar('.');
+
+ // Test rand1
+ for (i = 0; i < TEST_SOURCES + 1; i++)
+ rand_buffer(buffs[i], TEST_LEN);
+
+ xor_gen(TEST_SOURCES + 1, TEST_LEN, buffs);
+
+ fail |= xor_check_base(TEST_SOURCES + 1, TEST_LEN, buffs);
+
+ if (fail > 0) {
+ printf("fail rand test %d\n", fail);
+ return 1;
+ } else
+ putchar('.');
+
+ // Test various number of sources
+ for (j = 3; j <= TEST_SOURCES + 1; j++) {
+ for (i = 0; i < j; i++)
+ rand_buffer(buffs[i], TEST_LEN);
+
+ xor_gen(j, TEST_LEN, buffs);
+ fail |= xor_check_base(j, TEST_LEN, buffs);
+
+ if (fail > 0) {
+ printf("fail rand test %d sources\n", j);
+ return 1;
+ } else
+ putchar('.');
+ }
+
+ fflush(0);
+
+ // Test various number of sources and len
+ k = 0;
+ while (k <= TEST_LEN) {
+ for (j = 3; j <= TEST_SOURCES + 1; j++) {
+ for (i = 0; i < j; i++)
+ rand_buffer(buffs[i], k);
+
+ xor_gen(j, k, buffs);
+ fail |= xor_check_base(j, k, buffs);
+
+ if (fail > 0) {
+ printf("fail rand test %d sources, len=%d, ret=%d\n", j, k,
+ fail);
+ return 1;
+ }
+ }
+ putchar('.');
+ k += 1;
+ }
+
+ // Test at the end of buffer
+ for (i = 0; i < TEST_LEN; i += 32) {
+ for (j = 0; j < TEST_SOURCES + 1; j++) {
+ rand_buffer((unsigned char *)buffs[j] + i, TEST_LEN - i);
+ tmp_buf[j] = (char *)buffs[j] + i;
+ }
+
+ xor_gen(TEST_SOURCES + 1, TEST_LEN - i, (void *)tmp_buf);
+ fail |= xor_check_base(TEST_SOURCES + 1, TEST_LEN - i, (void *)tmp_buf);
+
+ if (fail > 0) {
+ printf("fail end test - offset: %d, len: %d\n", i, TEST_LEN - i);
+ return 1;
+ }
+
+ putchar('.');
+ fflush(0);
+ }
+
+ if (!fail)
+ printf(" done: Pass\n");
+
+ return fail;
+}
diff --git a/src/isa-l/tests/fuzz/Makefile.am b/src/isa-l/tests/fuzz/Makefile.am
new file mode 100644
index 000000000..424a028e5
--- /dev/null
+++ b/src/isa-l/tests/fuzz/Makefile.am
@@ -0,0 +1,52 @@
+########################################################################
+# Copyright(c) 2011-2017 Intel Corporation All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Intel Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+########################################################################
+
+src_include += -I $(srcdir)/tests/fuzz
+
+# AFL fuzz tests
+other_tests += tests/fuzz/igzip_fuzz_inflate
+igzip_fuzz_inflate: igzip_checked_inflate_fuzz_test.o
+igzip_fuzz_inflate: LDLIBS += -lz
+tests_fuzz_igzip_fuzz_inflate_LDADD = tests/fuzz/igzip_checked_inflate_fuzz_test.o libisal.la
+tests_fuzz_igzip_fuzz_inflate_LDFLAGS = -lz
+
+other_tests += tests/fuzz/igzip_dump_inflate_corpus
+tests_fuzz_igzip_dump_inflate_corpus_LDADD = libisal.la
+
+# LLVM fuzz tests
+llvm_fuzz_tests = tests/fuzz/igzip_simple_inflate_fuzz_test
+other_src += tests/fuzz/igzip_simple_inflate_fuzz_test.c
+
+llvm_fuzz_tests += tests/fuzz/igzip_checked_inflate_fuzz_test
+other_src += tests/fuzz/igzip_checked_inflate_fuzz_test.c
+
+llvm_fuzz_tests += tests/fuzz/igzip_simple_round_trip_fuzz_test
+other_src += tests/fuzz/igzip_simple_round_trip_fuzz_test.c
+
+igzip_checked_inflate_fuzz_test: LDLIBS += -lz
diff --git a/src/isa-l/tests/fuzz/Makefile.unx b/src/isa-l/tests/fuzz/Makefile.unx
new file mode 100644
index 000000000..afa20a36d
--- /dev/null
+++ b/src/isa-l/tests/fuzz/Makefile.unx
@@ -0,0 +1,12 @@
+
+default: llvm_fuzz_tests
+
+include ../../igzip/Makefile.am
+include Makefile.am
+include ../../make.inc
+
+CC = clang
+CXX = clang++
+CXXFLAGS += $(DEFINES)
+
+VPATH = . ../../igzip ../../include
diff --git a/src/isa-l/tests/fuzz/igzip_checked_inflate_fuzz_test.c b/src/isa-l/tests/fuzz/igzip_checked_inflate_fuzz_test.c
new file mode 100644
index 000000000..557433d6b
--- /dev/null
+++ b/src/isa-l/tests/fuzz/igzip_checked_inflate_fuzz_test.c
@@ -0,0 +1,72 @@
+#include <string.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <stddef.h>
+#include <zlib.h>
+#include <assert.h>
+#include "igzip_lib.h"
+
+int LLVMFuzzerTestOneInput(const uint8_t * data, size_t size)
+{
+ struct inflate_state state;
+ z_stream zstate;
+ size_t out_buf_size = 2 * size;
+ int zret, iret;
+ char z_msg_invalid_code_set[] = "invalid code lengths set";
+ char z_msg_invalid_dist_set[] = "invalid distances set";
+ char z_msg_invalid_lit_len_set[] = "invalid literal/lengths set";
+
+ uint8_t *isal_out_buf = (uint8_t *) malloc(size * 2);
+ uint8_t *zlib_out_buf = (uint8_t *) malloc(size * 2);
+
+ assert(NULL != isal_out_buf && NULL != zlib_out_buf);
+
+ /* Inflate data with isal_inflate */
+ memset(&state, 0xff, sizeof(struct inflate_state));
+
+ isal_inflate_init(&state);
+ state.next_in = (uint8_t *) data;
+ state.avail_in = size;
+ state.next_out = isal_out_buf;
+ state.avail_out = out_buf_size;
+
+ iret = isal_inflate_stateless(&state);
+
+ /* Inflate data with zlib */
+ zstate.zalloc = Z_NULL;
+ zstate.zfree = Z_NULL;
+ zstate.opaque = Z_NULL;
+ zstate.avail_in = size;
+ zstate.next_in = (Bytef *) data;
+ zstate.avail_out = out_buf_size;
+ zstate.next_out = zlib_out_buf;
+ inflateInit2(&zstate, -15);
+
+ zret = inflate(&zstate, Z_FINISH);
+
+ if (zret == Z_STREAM_END) {
+ /* If zlib finished, assert isal finished with the same answer */
+ assert(state.block_state == ISAL_BLOCK_FINISH);
+ assert(zstate.total_out == state.total_out);
+ assert(memcmp(isal_out_buf, zlib_out_buf, state.total_out) == 0);
+ } else if (zret < 0) {
+ if (zret != Z_BUF_ERROR)
+ /* If zlib errors, assert isal errors, excluding a few
+ * cases where zlib is overzealous and when zlib notices
+ * an error faster than isal */
+ assert(iret < 0 || strcmp(zstate.msg, z_msg_invalid_code_set) == 0
+ || strcmp(zstate.msg, z_msg_invalid_dist_set) == 0
+ || strcmp(zstate.msg, z_msg_invalid_lit_len_set) == 0
+ || (iret == ISAL_END_INPUT && zstate.avail_in < 3));
+
+ } else
+ /* If zlib did not finish or error, assert isal did not finish
+ * or that isal found an invalid header since isal notices the
+ * error faster than zlib */
+ assert(iret > 0 || iret == ISAL_INVALID_BLOCK);
+
+ inflateEnd(&zstate);
+ free(isal_out_buf);
+ free(zlib_out_buf);
+ return 0;
+}
diff --git a/src/isa-l/tests/fuzz/igzip_dump_inflate_corpus.c b/src/isa-l/tests/fuzz/igzip_dump_inflate_corpus.c
new file mode 100644
index 000000000..36771b3df
--- /dev/null
+++ b/src/isa-l/tests/fuzz/igzip_dump_inflate_corpus.c
@@ -0,0 +1,40 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "inflate_std_vects.h"
+
+#define DNAME_MAX 256
+#define FNAME_MAX (DNAME_MAX + 81)
+
+int main(int argc, char *argv[])
+{
+ uint8_t *buf;
+ int i, len, err;
+ FILE *fout = NULL;
+ char fname[FNAME_MAX];
+ char dname[DNAME_MAX];
+
+ if (argc != 2) {
+ fprintf(stderr, "Usage: %s <outdir>\n", argv[0]);
+ exit(1);
+ }
+ strncpy(dname, argv[1], DNAME_MAX - 1);
+
+ for (i = 0; i < sizeof(std_vect_array) / sizeof(struct vect_result); i++) {
+ buf = std_vect_array[i].vector;
+ len = std_vect_array[i].vector_length;
+ err = std_vect_array[i].expected_error;
+
+ snprintf(fname, FNAME_MAX, "%s/inflate_corp_n%04d_e%04d", dname, i, err);
+ printf(" writing %s\n", fname);
+ fout = fopen(fname, "w+");
+ if (!fout) {
+ fprintf(stderr, "Can't open %s for writing\n", fname);
+ exit(1);
+ }
+ fwrite(buf, len, 1, fout);
+ fclose(fout);
+ }
+
+ return 0;
+}
diff --git a/src/isa-l/tests/fuzz/igzip_fuzz_inflate.c b/src/isa-l/tests/fuzz/igzip_fuzz_inflate.c
new file mode 100644
index 000000000..b28739d3b
--- /dev/null
+++ b/src/isa-l/tests/fuzz/igzip_fuzz_inflate.c
@@ -0,0 +1,41 @@
+#define _FILE_OFFSET_BITS 64
+#include <stdio.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <zlib.h>
+#include "huff_codes.h"
+#include "igzip_lib.h"
+#include "test.h"
+
+extern int LLVMFuzzerTestOneInput(const uint8_t * data, size_t size);
+
+int main(int argc, char *argv[])
+{
+ FILE *in = NULL;
+ unsigned char *in_buf = NULL;
+ uint64_t in_file_size;
+
+ if (argc != 2) {
+ fprintf(stderr, "Usage: isal_fuzz_inflate <infile>\n");
+ exit(1);
+ }
+ in = fopen(argv[1], "rb");
+ if (!in) {
+ fprintf(stderr, "Can't open %s for reading\n", argv[1]);
+ exit(1);
+ }
+ in_file_size = get_filesize(in);
+ in_buf = malloc(in_file_size);
+
+ if (in_buf == NULL) {
+ fprintf(stderr, "Failed to malloc input and outputs buffers\n");
+ exit(1);
+ }
+
+ if (fread(in_buf, 1, in_file_size, in) != in_file_size) {
+ fprintf(stderr, "Failed to read from %s\n", argv[1]);
+ exit(1);
+ }
+
+ return LLVMFuzzerTestOneInput(in_buf, in_file_size);
+}
diff --git a/src/isa-l/tests/fuzz/igzip_simple_inflate_fuzz_test.c b/src/isa-l/tests/fuzz/igzip_simple_inflate_fuzz_test.c
new file mode 100644
index 000000000..b5f22e845
--- /dev/null
+++ b/src/isa-l/tests/fuzz/igzip_simple_inflate_fuzz_test.c
@@ -0,0 +1,22 @@
+#include <stdlib.h>
+#include <stdint.h>
+#include <stddef.h>
+#include "igzip_lib.h"
+
+int LLVMFuzzerTestOneInput(const uint8_t * data, size_t size)
+{
+ struct inflate_state state;
+ uint8_t *isal_out_buf = (uint8_t *) (malloc(size * 2));
+ size_t out_buf_size = 2 * size;
+
+ isal_inflate_init(&state);
+ state.next_in = (uint8_t *) data;
+ state.avail_in = size;
+ state.next_out = isal_out_buf;
+ state.avail_out = out_buf_size;
+
+ isal_inflate_stateless(&state);
+
+ free(isal_out_buf);
+ return 0;
+}
diff --git a/src/isa-l/tests/fuzz/igzip_simple_round_trip_fuzz_test.c b/src/isa-l/tests/fuzz/igzip_simple_round_trip_fuzz_test.c
new file mode 100644
index 000000000..381969156
--- /dev/null
+++ b/src/isa-l/tests/fuzz/igzip_simple_round_trip_fuzz_test.c
@@ -0,0 +1,130 @@
+#include <stdlib.h>
+#include <stdint.h>
+#include <stddef.h>
+#include <string.h>
+#include <assert.h>
+#include <byteswap.h>
+#include "igzip_lib.h"
+#include "unaligned.h"
+
+#define LEVEL_BITS 2
+#define HEADER_BITS 3
+#define LVL_BUF_BITS 3
+
+#define LEVEL_BIT_MASK ((1<<LEVEL_BITS) - 1)
+#define HEADER_BIT_MASK ((1<<HEADER_BITS) - 1)
+#define TYPE0_HDR_SIZE 5
+#define TYPE0_MAX_SIZE 65535
+
+#define MIN(x,y) (((x) > (y)) ? y : x )
+
+const int header_size[] = {
+ 0, //IGZIP_DEFLATE
+ 10, //IGZIP_GZIP
+ 0, //IGZIP_GZIP_NO_HDR
+ 2, //IGZIP_ZLIB
+ 0, //IGZIP_ZLIB_NO_HDR
+};
+
+const int trailer_size[] = {
+ 0, //IGZIP_DEFLATE
+ 8, //IGZIP_GZIP
+ 8, //IGZIP_GZIP_NO_HDR
+ 4, //IGZIP_ZLIB
+ 4, //IGZIP_ZLIB_NO_HDR
+};
+
+int LLVMFuzzerTestOneInput(const uint8_t * data, size_t size)
+{
+ struct inflate_state istate;
+ struct isal_zstream cstate;
+ uint8_t *in_data = (uint8_t *) data;
+ int ret = 1;
+
+ // Parameter default
+ int level = 1;
+ int lev_buf_size = ISAL_DEF_LVL1_DEFAULT;
+ int wrapper_type = 0;
+ size_t cmp_buf_size = size + ISAL_DEF_MAX_HDR_SIZE;
+
+ // Parameters are set by one byte of data input
+ if (size > 1) {
+ uint8_t in_param = in_data[--size];
+ level = MIN(in_param & LEVEL_BIT_MASK, ISAL_DEF_MAX_LEVEL);
+ in_param >>= LEVEL_BITS;
+
+ wrapper_type = (in_param & HEADER_BIT_MASK) % (IGZIP_ZLIB_NO_HDR + 1);
+ in_param >>= HEADER_BITS;
+
+ switch (level) {
+ case 0:
+ lev_buf_size = ISAL_DEF_LVL0_MIN + (in_param) *
+ (ISAL_DEF_LVL0_EXTRA_LARGE / LEVEL_BIT_MASK);
+ break;
+ case 1:
+ lev_buf_size = ISAL_DEF_LVL1_MIN + (in_param) *
+ (ISAL_DEF_LVL1_EXTRA_LARGE / LEVEL_BIT_MASK);
+ break;
+#ifdef ISAL_DEF_LVL2_MIN
+ case 2:
+ lev_buf_size = ISAL_DEF_LVL2_MIN + (in_param) *
+ (ISAL_DEF_LVL2_EXTRA_LARGE / LEVEL_BIT_MASK);
+ break;
+#endif
+#ifdef ISAL_DEF_LVL3_MIN
+ case 3:
+ lev_buf_size = ISAL_DEF_LVL3_MIN + (in_param) *
+ (ISAL_DEF_LVL3_EXTRA_LARGE / LEVEL_BIT_MASK);
+ break;
+#endif
+ }
+ if (0 == level)
+ cmp_buf_size = 2 * size + ISAL_DEF_MAX_HDR_SIZE;
+ else
+ cmp_buf_size = size + 8 + (TYPE0_HDR_SIZE * (size / TYPE0_MAX_SIZE));
+
+ cmp_buf_size += header_size[wrapper_type] + trailer_size[wrapper_type];
+ }
+
+ uint8_t *isal_cmp_buf = (uint8_t *) malloc(cmp_buf_size);
+ uint8_t *isal_out_buf = (uint8_t *) malloc(size);
+ uint8_t *isal_lev_buf = (uint8_t *) malloc(lev_buf_size);
+ assert(NULL != isal_cmp_buf || NULL != isal_out_buf || NULL != isal_lev_buf);
+
+ isal_deflate_init(&cstate);
+ cstate.end_of_stream = 1;
+ cstate.flush = NO_FLUSH;
+ cstate.next_in = in_data;
+ cstate.avail_in = size;
+ cstate.next_out = isal_cmp_buf;
+ cstate.avail_out = cmp_buf_size;
+ cstate.level = level;
+ cstate.level_buf = isal_lev_buf;
+ cstate.level_buf_size = lev_buf_size;
+ cstate.gzip_flag = wrapper_type;
+ ret = isal_deflate_stateless(&cstate);
+
+ isal_inflate_init(&istate);
+ istate.next_in = isal_cmp_buf;
+ istate.avail_in = cstate.total_out;
+ istate.next_out = isal_out_buf;
+ istate.avail_out = size;
+ istate.crc_flag = wrapper_type;
+ ret |= isal_inflate_stateless(&istate);
+ ret |= memcmp(isal_out_buf, in_data, size);
+
+ // Check trailer
+ uint32_t crc = 0;
+ int trailer_idx = cstate.total_out - trailer_size[wrapper_type];
+
+ if (wrapper_type == IGZIP_GZIP || wrapper_type == IGZIP_GZIP_NO_HDR)
+ crc = load_u32(&isal_cmp_buf[trailer_idx]);
+ else if (wrapper_type == IGZIP_ZLIB || wrapper_type == IGZIP_ZLIB_NO_HDR)
+ crc = bswap_32(load_u32(&isal_cmp_buf[trailer_idx]));
+
+ assert(istate.crc == crc);
+ free(isal_cmp_buf);
+ free(isal_out_buf);
+ free(isal_lev_buf);
+ return ret;
+}
diff --git a/src/isa-l/tools/check_format.sh b/src/isa-l/tools/check_format.sh
new file mode 100755
index 000000000..dfc92b150
--- /dev/null
+++ b/src/isa-l/tools/check_format.sh
@@ -0,0 +1,87 @@
+#!/usr/bin/env bash
+
+set -e
+rc=0
+verbose=0
+indent_args='-linux -l95 -cp1 -lps -il6 -ncs'
+function iver { printf "%03d%03d%03d%03d" $(echo "$@" | sed 's/GNU indent//' | tr '.' ' '); }
+
+while [ -n "$*" ]; do
+ case "$1" in
+ -v )
+ verbose=1
+ shift
+ ;;
+ -h )
+ echo check_format.sh [-h -v]
+ exit 0
+ ;;
+ esac
+done
+
+echo "Checking format of files in the git index at $PWD"
+if ! git rev-parse --is-inside-work-tree >& /dev/null; then
+ echo "Not in a git repo: Fail"
+ exit 1
+fi
+
+if hash indent && [ $(iver $(indent --version)) -ge $(iver 2.2.12) ]; then
+ echo "Checking C files for coding style..."
+ for f in `git ls-files '*.c'`; do
+ [ "$verbose" -gt 0 ] && echo "checking style on $f"
+ if ! indent $indent_args -st $f | diff -q $f - >& /dev/null; then
+ echo " File found with formatting issues: $f"
+ [ "$verbose" -gt 0 ] 2> /dev/null && indent $indent_args -st $f | diff -u $f -
+ rc=1
+ fi
+ done
+ [ "$rc" -gt 0 ] && echo " Run ./tools/iindent on files"
+else
+ echo "You do not have a recent indent installed so your code style is not being checked!"
+fi
+
+if hash grep; then
+ echo "Checking for dos and whitespace violations..."
+ for f in $(git ls-files); do
+ [ "$verbose" -gt 0 ] && echo "checking whitespace on $f"
+ if grep -q '[[:space:]]$' $f ; then
+ echo " File found with trailing whitespace: $f"
+ rc=1
+ fi
+ if grep -q $'\r' $f ; then
+ echo " File found with dos formatting: $f"
+ rc=1
+ fi
+ done
+fi
+
+echo "Checking source files for permissions..."
+while read -r perm _res0 _res1 f; do
+ [ -z "$f" ] && continue
+ [ "$verbose" -gt 0 ] && echo "checking permissions on $f"
+ if [ "$perm" -ne 100644 ]; then
+ echo " File found with permissions issue ($perm): $f"
+ rc=1
+ fi
+done <<< $(git ls-files -s -- ':(exclude)*.sh' ':(exclude)*iindent')
+
+echo "Checking script files for permissions..."
+while read -r perm _res0 _res1 f; do
+ [ -z "$f" ] && continue
+ [ "$verbose" -gt 0 ] && echo "checking permissions on $f"
+ if [ "$perm" -ne 100755 ]; then
+ echo " Script found with permissions issue ($perm): $f"
+ rc=1
+ fi
+done <<< $(git ls-files -s '*.sh')
+
+
+echo "Checking for signoff in commit message..."
+if ! git log -n 1 --format=%B --no-merges | grep -q "^Signed-off-by:" ; then
+ echo " Commit not signed off. Please read src/CONTRIBUTING.md"
+ rc=1
+fi
+
+[ "$rc" -gt 0 ] && echo Format Fail || echo Format Pass
+
+exit $rc
diff --git a/src/isa-l/tools/gen_nmake.mk b/src/isa-l/tools/gen_nmake.mk
new file mode 100644
index 000000000..8e6330dc8
--- /dev/null
+++ b/src/isa-l/tools/gen_nmake.mk
@@ -0,0 +1,123 @@
+# Regenerate nmake file from makefiles or check its consistency
+
+test_nmake_file: tst.nmake
+ @diff -u Makefile.nmake tst.nmake || (echo Potential nmake consistency issue; $(RM) tst.nmake; false;)
+ @echo No nmake consistency issues
+ @$(RM) tst.nmake
+
+FORCE:
+Makefile.nmake tst.nmake: FORCE
+ @echo Regenerating $@
+ @echo '########################################################################' > $@
+ @cat LICENSE | sed -e 's/^/#/ ' >> $@
+ @echo '########################################################################' >> $@
+ @echo '' >> $@
+ @echo '# This file can be auto-regenerated with $$make -f Makefile.unx Makefile.nmake' >> $@
+ @echo '' >> $@
+ @echo -n 'objs =' >> $@
+ @$(foreach o, $(subst /,\\,$(objs:.o=.obj)), printf " %s\n\t%s" \\ $(o) >> $@; )
+ @echo '' >> $@
+ @echo '' >> $@
+ @echo 'INCLUDES = $(INCLUDE)' >> $@
+ @echo '# Modern asm feature level, consider upgrading nasm/yasm before decreasing feature_level' >> $@
+ @echo 'FEAT_FLAGS = -DHAVE_AS_KNOWS_AVX512 -DAS_FEATURE_LEVEL=10' >> $@
+ @echo 'CFLAGS_REL = -O2 -DNDEBUG /Z7 /MD /Gy' >> $@
+ @echo 'CFLAGS_DBG = -Od -DDEBUG /Z7 /MDd' >> $@
+ @echo 'LINKFLAGS = -nologo -incremental:no -debug' >> $@
+ @echo 'CFLAGS = $$(CFLAGS_REL) -nologo -D_USE_MATH_DEFINES $$(FEAT_FLAGS) $$(INCLUDES) $$(D)' >> $@
+ @echo 'AFLAGS = -f win64 $$(FEAT_FLAGS) $$(INCLUDES) $$(D)' >> $@
+ @echo 'CC = cl' >> $@
+ @echo '# or CC = icl -Qstd=c99' >> $@
+ @echo 'AS = nasm' >> $@
+ @echo '' >> $@
+ @echo 'lib: bin static dll' >> $@
+ @echo 'static: bin isa-l_static.lib' >> $@
+ @echo 'dll: bin isa-l.dll' >> $@
+ @echo '' >> $@
+ @echo 'bin: ; -mkdir $$@' >> $@
+ @echo '' >> $@
+ @echo 'isa-l_static.lib: $$(objs)' >> $@
+ @echo ' lib -out:$$@ @<<' >> $@
+ @echo '$$?' >> $@
+ @echo '<<' >> $@
+ @echo '' >> $@
+ @echo 'isa-l.dll: $$(objs)' >> $@
+ @echo ' link -out:$$@ -dll -def:isa-l.def $$(LINKFLAGS) @<<' >> $@
+ @echo '$$?' >> $@
+ @echo '<<' >> $@
+ @echo '' >> $@
+ @$(foreach b, $(units), \
+ printf "{%s}.c.obj:\n\t\$$(CC) \$$(CFLAGS) /c -Fo\$$@ \$$?\n{%s}.asm.obj:\n\t\$$(AS) \$$(AFLAGS) -o \$$@ \$$?\n\n" $(b) $(b) >> $@; )
+ @echo '' >> $@
+ifneq (,$(examples))
+ @echo "# Examples" >> $@
+ @echo -n 'ex =' >> $@
+ @$(foreach ex, $(notdir $(examples)), printf " %s\n\t%s.exe" \\ $(ex) >> $@; )
+ @echo '' >> $@
+ @echo '' >> $@
+ @echo 'ex: lib $$(ex)' >> $@
+ @echo '' >> $@
+ @echo '$$(ex): $$(@B).obj' >> $@
+endif
+ @echo '' >> $@
+ @echo '.obj.exe:' >> $@
+ @echo ' link /out:$$@ $$(LINKFLAGS) isa-l.lib $$?' >> $@
+ @echo '' >> $@
+ @echo '# Check tests' >> $@
+ @echo -n 'checks =' >> $@
+ @$(foreach check, $(notdir $(check_tests)), printf " %s\n\t%s.exe" \\ $(check) >> $@; )
+ @echo '' >> $@
+ @echo '' >> $@
+ @echo 'checks: lib $$(checks)' >> $@
+ @echo '$$(checks): $$(@B).obj' >> $@
+ @echo 'check: $$(checks)' >> $@
+ @echo ' !$$?' >> $@
+ @echo '' >> $@
+ @echo '# Unit tests' >> $@
+ @echo -n 'tests =' >> $@
+ @$(foreach test, $(notdir $(unit_tests)), printf " %s\n\t%s.exe" \\ $(test) >> $@; )
+ @echo '' >> $@
+ @echo '' >> $@
+ @echo 'tests: lib $$(tests)' >> $@
+ @echo '$$(tests): $$(@B).obj' >> $@
+ @echo '' >> $@
+ @echo '# Performance tests' >> $@
+ @echo -n 'perfs =' >> $@
+ @$(foreach perf, $(notdir $(perf_tests)), printf " %s\n\t%s.exe" \\ $(perf) >> $@; )
+ @echo '' >> $@
+ @echo '' >> $@
+ @echo 'perfs: lib $$(perfs)' >> $@
+ @echo '$$(perfs): $$(@B).obj' >> $@
+ @echo '' >> $@
+ @echo -n 'progs =' >> $@
+ @$(foreach prog, $(notdir $(bin_PROGRAMS)), printf " %s\n\t%s.exe" \\ $(prog) >> $@; )
+ @echo '' >> $@
+ @echo '' >> $@
+ @echo 'progs: lib $$(progs)' >> $@
+ @$(foreach p, $(notdir $(bin_PROGRAMS)), \
+ printf "%s.exe: %s\n\tlink /out:\$$@ \$$(LINKFLAGS) isa-l.lib \$$?\n" $(p) $(subst /,\\,$(programs_$(p)_SOURCES:.c=.obj)) >> $@; )
+ @echo '' >> $@
+ @echo 'clean:' >> $@
+ @echo ' -if exist *.obj del *.obj' >> $@
+ @echo ' -if exist bin\*.obj del bin\*.obj' >> $@
+ @echo ' -if exist isa-l_static.lib del isa-l_static.lib' >> $@
+ @echo ' -if exist *.exe del *.exe' >> $@
+ @echo ' -if exist *.pdb del *.pdb' >> $@
+ @echo ' -if exist isa-l.lib del isa-l.lib' >> $@
+ @echo ' -if exist isa-l.dll del isa-l.dll' >> $@
+ @echo ' -if exist isa-l.exp del isa-l.exp' >> $@
+ @echo '' >> $@
+ $(if $(findstring igzip,$(units)),@echo 'zlib.lib:' >> $@ )
+ @cat $(foreach unit,$(units), $(unit)/Makefile.am) | sed \
+ -e '/: /!d' \
+ -e 's/\([^ :]*\)[ ]*/\1.exe /g' \
+ -e :c -e 's/:\(.*\).exe/:\1/;tc' \
+ -e 's/\.o[ $$]/.obj /g' \
+ -e 's/\.o\.exe[ ]:/.obj:/g' \
+ -e '/CFLAGS_.*+=/d' \
+ -e '/:.*\%.*:/d' \
+ -e 's/ :/:/' \
+ -e 's/LDLIBS *+=//' \
+ -e 's/-lz/zlib.lib/' \
+ -e 's/ $$//' \
+ >> $@
diff --git a/src/isa-l/tools/iindent b/src/isa-l/tools/iindent
new file mode 100755
index 000000000..48d26360f
--- /dev/null
+++ b/src/isa-l/tools/iindent
@@ -0,0 +1,2 @@
+#!/bin/sh
+indent -linux -l95 -cp1 -lps -il6 -ncs "$@"
diff --git a/src/isa-l/tools/nasm-cet-filter.sh b/src/isa-l/tools/nasm-cet-filter.sh
new file mode 100755
index 000000000..19e03856c
--- /dev/null
+++ b/src/isa-l/tools/nasm-cet-filter.sh
@@ -0,0 +1,56 @@
+#/bin/sh
+
+# Filter out unnecessary options added by automake
+
+while [ -n "$*" ]; do
+ case "$1" in
+ -o )
+ # Supported options with arg
+ options="$options $1 $2"
+ shift
+ object="$1"
+ shift
+ ;;
+ -f | -D )
+ # Supported options with arg
+ options="$options $1 $2"
+ shift
+ shift
+ ;;
+ -I | -i )
+ options="$options $1 $2/"
+ shift
+ shift
+ ;;
+ --prefix* )
+ # Supported options without arg
+ options="$options $1"
+ shift
+ ;;
+ -I* | -i* )
+ options="$options $1/"
+ shift
+ ;;
+ -D* ) # For defines we need to remove spaces
+ case "$1" in
+ *' '* ) ;;
+ *) options="$options $1" ;;
+ esac
+ shift
+ ;;
+ #-blah )
+ # Unsupported options with args - none known
+ -* )
+ # Unsupported options with no args
+ shift
+ ;;
+ * )
+ args="$args $1"
+ shift
+ ;;
+ esac
+done
+
+nasm $options $args
+$CET_LD -r -z ibt -z shstk -o $object.tmp $object
+mv $object.tmp $object
diff --git a/src/isa-l/tools/nasm-filter.sh b/src/isa-l/tools/nasm-filter.sh
new file mode 100755
index 000000000..5ec9ba3f3
--- /dev/null
+++ b/src/isa-l/tools/nasm-filter.sh
@@ -0,0 +1,47 @@
+#/bin/sh
+
+# Filter out unnecessary options added by automake
+
+while [ -n "$*" ]; do
+ case "$1" in
+ -f | -o | -D )
+ # Supported options with arg
+ options="$options $1 $2"
+ shift
+ shift
+ ;;
+ -I | -i )
+ options="$options $1 $2/"
+ shift
+ shift
+ ;;
+ --prefix* )
+ # Supported options without arg
+ options="$options $1"
+ shift
+ ;;
+ -I* | -i* )
+ options="$options $1/"
+ shift
+ ;;
+ -D* ) # For defines we need to remove spaces
+ case "$1" in
+ *' '* ) ;;
+ *) options="$options $1" ;;
+ esac
+ shift
+ ;;
+ #-blah )
+ # Unsupported options with args - none known
+ -* )
+ # Unsupported options with no args
+ shift
+ ;;
+ * )
+ args="$args $1"
+ shift
+ ;;
+ esac
+done
+
+nasm $options $args
diff --git a/src/isa-l/tools/remove_trailing_whitespace.sh b/src/isa-l/tools/remove_trailing_whitespace.sh
new file mode 100755
index 000000000..bb82b9fa5
--- /dev/null
+++ b/src/isa-l/tools/remove_trailing_whitespace.sh
@@ -0,0 +1,2 @@
+#!/bin/sh
+sed -i -i.bak 's/[[:blank:]]*$//' "$@"
diff --git a/src/isa-l/tools/test_autorun.sh b/src/isa-l/tools/test_autorun.sh
new file mode 100755
index 000000000..58d1a0a69
--- /dev/null
+++ b/src/isa-l/tools/test_autorun.sh
@@ -0,0 +1,63 @@
+#!/usr/bin/env bash
+
+set -e #exit on fail
+
+# Override defaults if exist
+READLINK=readlink
+command -V greadlink >/dev/null 2>&1 && READLINK=greadlink
+
+# Run in build directory
+out="$PWD"
+src=$($READLINK -f $(dirname $0))/..
+cd "$src"
+
+# Echo environment info
+if test -d .git; then
+ branch=$(git describe --always)
+ commitid=$(git rev-parse HEAD)
+ brief=$(git log -1 --format='%s')
+ branch_changes=$(git diff --shortstat)
+fi
+if command -V uname >/dev/null 2>&1; then
+ node=$(uname -n)
+ os_name=$(uname -s)
+ os_all=$(uname -a)
+fi
+
+echo "Test report v1"
+echo "branch: $branch"
+echo "brief: $brief"
+echo "commitid: $commitid"
+echo "node: $node"
+echo "os_name: $os_name"
+echo "os_all: $os_all"
+echo "test_args: $@"
+echo "changes: $branch_changes"
+command -V lscpu > /dev/null 2>&1 && lscpu
+
+# Start tests
+
+[ -z "$1" ] && ./tools/test_checks.sh
+
+while [ -n "$1" ]; do
+ case "$1" in
+ check )
+ ./tools/test_checks.sh
+ shift ;;
+ ext )
+ ./tools/test_extended.sh
+ shift ;;
+ format )
+ shift ;;
+ all )
+ ./tools/test_checks.sh
+ ./tools/test_extended.sh
+ shift ;;
+ * )
+ echo $0 undefined option: $1
+ shift ;;
+ esac
+done
+
+./tools/check_format.sh
+
diff --git a/src/isa-l/tools/test_checks.sh b/src/isa-l/tools/test_checks.sh
new file mode 100755
index 000000000..a5a0f45de
--- /dev/null
+++ b/src/isa-l/tools/test_checks.sh
@@ -0,0 +1,115 @@
+#!/usr/bin/env bash
+
+set -xe #exit on fail
+
+# Defaults
+cpus=1
+S=$RANDOM
+MAKE=make
+READLINK=readlink
+
+# Override defaults if exist
+command -V gmake >/dev/null 2>&1 && MAKE=gmake
+command -V greadlink >/dev/null 2>&1 && READLINK=greadlink
+
+out="$PWD"
+src=$($READLINK -f $(dirname $0))/..
+source $src/tools/test_tools.sh
+cd "$src"
+tmp_install_dir=$out/tmp_install
+
+# Run on mult cpus
+if command -V lscpu >/dev/null 2>&1; then
+ cpus=`lscpu -p | tail -1 | cut -d, -f 2`
+ cpus=$(($cpus + 1))
+elif command -V sysctl; then
+ if sysctl -n hw.ncpu >/dev/null 2>&1; then
+ cpus=$(sysctl -n hw.ncpu)
+ cpus=$(($cpus + 1))
+ fi
+fi
+echo "Using $cpus cpu threads"
+
+# Pick a random test seed
+if [ -z "$S" ]; then
+ S=`tr -cd 0-9 </dev/urandom | head -c 4 | sed -e 's/^0*/1/g'`
+ [ "$S" -gt 0 ] 2> /dev/null || S="123"
+fi
+echo "Running with TEST_SEED=$S"
+
+# Fix Darwin issues
+if uname | grep -q 'Darwin' 2>&1; then
+ export SED=`which sed`
+fi
+
+# Build and run check tests
+if [ -z "$CFLAGS" ]; then
+ CFLAGS='-g -O2 -fsanitize=undefined -fno-sanitize=nonnull-attribute -fsanitize-undefined-trap-on-error'
+
+ if [ $CC ]; then
+ echo int main\(\)\{\}\; | $CC $CFLAGS -xc -o /dev/null - >& /dev/null && sanitize=1
+ elif ( command -V gcc > /dev/null ); then
+ echo int main\(\)\{\}\; | gcc $CFLAGS -xc -o /dev/null - >& /dev/null && sanitize=1
+ elif ( command -V clang > /dev/null ); then
+ echo int main\(\)\{\}\; | clang $CFLAGS -xc -o /dev/null - >& /dev/null && sanitize=1
+ fi
+
+ if [ $sanitize ]; then
+ echo "Sanitizing undefined behaviour"
+ export CFLAGS=$CFLAGS
+ fi
+fi
+
+time ./autogen.sh
+time ./configure --prefix=$tmp_install_dir $opt_config_target
+time $MAKE -j $cpus
+test_start "check_tests"
+time $MAKE check -j $cpus D="-D TEST_SEED=$S"
+test_end "check_tests" $?
+
+# Build other tests if deps found
+if command -V ldconfig >/dev/null 2>&1; then
+ if ldconfig -p | grep -q libz.so; then
+ test_start "other_check_tests"
+ time $MAKE other -j $cpus
+ test_end "other_check_tests" $?
+ test_start "example_tests"
+ time $MAKE ex -j $cpus
+ test_end "example_tests" $?
+ test_start "unit_tests"
+ time $MAKE tests -j $cpus
+ test_end "unit_tests" $?
+ fi
+fi
+test_start "installation_test"
+time $MAKE install
+test_end "installation_test" $?
+
+# Check for gnu executable stack set
+if command -V readelf >/dev/null 2>&1; then
+ if readelf -W -l $tmp_install_dir/lib/libisal.so | grep 'GNU_STACK' | grep -q 'RWE'; then
+ echo Stack NX check $tmp_install_dir/lib/libisal.so Fail
+ exit 1
+ else
+ echo Stack NX check $tmp_install_dir/lib/libisal.so Pass
+ fi
+else
+ echo Stack NX check not supported
+fi
+
+$MAKE clean
+
+# Check that make clean did not leave any junk behind
+if git status > /dev/null 2>&1; then
+ if git status --porcelain --ignored | grep -x '.*\.o\|.*\.lo\|.*\.a\|.*\.la\|.*\.s'; then
+ echo Clean directory check Fail
+ exit 1
+ else
+ echo Clean directory check Pass
+ fi
+else
+ echo Clean directory check not supported
+fi
+
+
+echo $0: Pass
diff --git a/src/isa-l/tools/test_extended.sh b/src/isa-l/tools/test_extended.sh
new file mode 100755
index 000000000..a5f1e9674
--- /dev/null
+++ b/src/isa-l/tools/test_extended.sh
@@ -0,0 +1,211 @@
+#!/usr/bin/env bash
+
+# Extended tests: Run a few more options other than make check
+
+set -xe #exit on fail
+
+# Defaults
+cpus=1
+S=$RANDOM
+MAKE=make
+READLINK=readlink
+test_level=check
+build_opt=''
+msg=''
+
+# Override defaults if exist
+command -V gmake >/dev/null 2>&1 && MAKE=gmake
+command -V greadlink >/dev/null 2>&1 && READLINK=greadlink
+[ -n "$CC" ] && build_opt+="CC=$CC "
+[ -n "$AS" ] && build_opt+="AS=$AS "
+
+out="$PWD"
+src=$($READLINK -f $(dirname $0))/..
+source $src/tools/test_tools.sh
+cd "$src"
+
+# Run on mult cpus
+if command -V lscpu >/dev/null 2>&1; then
+ cpus=`lscpu -p | tail -1 | cut -d, -f 2`
+ cpus=$(($cpus + 1))
+elif command -V sysctl; then
+ if sysctl -n hw.ncpu >/dev/null 2>&1; then
+ cpus=$(sysctl -n hw.ncpu)
+ cpus=$(($cpus + 1))
+ fi
+fi
+echo "Using $cpus cpu threads"
+
+if [ -z "$S" ]; then
+ S=`tr -cd 0-9 </dev/urandom | head -c 4 | sed -e 's/^0*/1/g'`
+ [ "$S" -gt 0 ] 2> /dev/null || S="123"
+fi
+msg+="Running with TEST_SEED=$S".$'\n'
+
+# Fix Darwin issues
+if uname | grep -q 'Darwin' 2>&1; then
+ export SED=`which sed`
+fi
+
+# Check for test libs to add
+if command -V ldconfig >/dev/null 2>&1; then
+ if ldconfig -p | grep -q libz.so; then
+ test_level=test
+ msg+=$'With extra tests\n'
+ fi
+ if ldconfig -p | grep -q libefence.so; then
+ build_opt+="LDFLAGS+='-lefence' "
+ msg+=$'With efence\n'
+ fi
+fi
+
+# Std makefile build test
+$MAKE -f Makefile.unx clean
+test_start "extended_build_test"
+time $MAKE -f Makefile.unx -j $cpus $build_opt
+test_end "extended_build_test" $?
+msg+=$'Std makefile build: Pass\n'
+
+# Check for gnu executable stack set
+if command -V readelf >/dev/null 2>&1; then
+ test_start "stack_nx_check"
+ if readelf -W -l bin/libisal.so | grep 'GNU_STACK' | grep -q 'RWE'; then
+ echo $0: Stack NX check bin/libisal.so: Fail
+ test_end "stack_nx_check" 1
+ exit 1
+ else
+ test_end "stack_nx_check" 0
+ msg+=$'Stack NX check bin/lib/libisal.so: Pass\n'
+ fi
+else
+ msg+=$'Stack NX check not supported: Skip\n'
+fi
+
+# Std makefile build perf tests
+test_start "extended_perf_test"
+time $MAKE -f Makefile.unx -j $cpus perfs
+test_end "extended_perf_test" $?
+msg+=$'Std makefile build perf: Pass\n'
+
+# Std makefile run tests
+test_start "extended_makefile_tests"
+time $MAKE -f Makefile.unx -j $cpus $build_opt D="TEST_SEED=$S" $test_level
+test_end "extended_makefile_tests" $?
+msg+=$'Std makefile tests: Pass\n'
+
+# Std makefile build other
+test_start "extended_other_tests"
+time $MAKE -f Makefile.unx -j $cpus $build_opt D="TEST_SEED=$S" other
+test_end "extended_other_tests" $?
+msg+=$'Other tests build: Pass\n'
+
+# Try to pick a random src file
+if command -V shuf >/dev/null 2>&1; then
+ in_file=$(find $src -type f -size +0 -name \*.c -o -name \*.asm -print 2>/dev/null | shuf | head -1 );
+else
+ in_file=configure.ac
+fi
+
+echo Other tests using $in_file
+test_start "igzip_file_perf"
+./igzip_file_perf $in_file
+test_end "igzip_file_perf" $?
+test_start "igzip_hist_perf"
+./igzip_hist_perf $in_file
+test_end "igzip_hist_perf" $?
+test_start "igzip_semi_dyn_file_perf"
+./igzip_semi_dyn_file_perf $in_file
+test_end "igzip_semi_dyn_file_perf" $?
+test_start "igzip_fuzz_inflate"
+./igzip_fuzz_inflate $in_file
+test_end "igzip_fuzz_inflate" $?
+msg+=$'Other tests run: Pass\n'
+
+if command -V shuf >/dev/null 2>&1; then
+ in_files=$(find $src -type f -size +0 -print 2>/dev/null | shuf | head -10 );
+ test_start "igzip_rand_test"
+ ./igzip_rand_test $in_files
+ test_end "igzip_rand_test" $?
+ test_start "igzip_inflate_test"
+ ./igzip_inflate_test $in_files
+ test_end "igzip_inflate_test" $?
+ msg+=$'Compression file tests: Pass\n'
+else
+ msg+=$'Compression file test: Skip\n'
+fi
+
+time $MAKE -f Makefile.unx -j $cpus $build_opt ex
+msg+=$'Examples build: Pass\n'
+
+test_start "ec_simple_example"
+./ec_simple_example -r $S
+test_end "ec_simple_example" $?
+test_start "crc_simple_test"
+./crc_simple_test
+test_end "crc_simple_test" $?
+test_start "crc64_example"
+./crc64_example
+test_end "crc64_example" $?
+test_start "xor_example"
+./xor_example
+test_end "xor_example" $?
+test_start "igzip_example"
+./igzip_example ${in_file} ${in_file}.cmp
+test_end "igzip_example" $?
+rm -rf ${in_file}.cmp
+msg+=$'Examples run: Pass\n'
+
+# Test custom hufftables
+test_start "generate_custom_hufftables"
+./generate_custom_hufftables $in_file
+$MAKE -f Makefile.unx clean
+$MAKE -f Makefile.unx -j $cpus D="NO_STATIC_INFLATE_H" checks
+./igzip_rand_test $in_file
+rm -rf hufftables_c.c
+test_end "generate_custom_hufftables" $?
+
+msg+=$'Custom hufftable build: Pass\n'
+
+$MAKE -f Makefile.unx clean
+
+test_start "nmake_file_consistency"
+$MAKE -f Makefile.unx host_cpu="x86_64" test_nmake_file
+test_end "nmake_file_consistency" $?
+msg+=$'Nmake file consistency: Pass\n'
+
+# noarch build
+test_start "noarch_build"
+time $MAKE -f Makefile.unx -j $cpus arch=noarch $build_opt
+test_end "noarch_build" $?
+test_start "noarch_build_random"
+time $MAKE -f Makefile.unx -j $cpus arch=noarch $build_opt D="TEST_SEED=$S" check
+test_end "noarch_build_random" $?
+$MAKE -f Makefile.unx arch=noarch clean
+msg+=$'Noarch build: Pass\n'
+
+# Try mingw build
+if [ $(uname -m) == "x86_64" ] && command -V x86_64-w64-mingw32-gcc >/dev/null 2>&1; then
+ test_start "mingw_build"
+ time $MAKE -f Makefile.unx -j $cpus arch=mingw
+ test_end "mingw_build" $?
+ msg+=$'Mingw build: Pass\n'
+
+ if command -V wine >/dev/null 2>&1; then
+ test_start "mingw_check_tests"
+ time $MAKE -f Makefile.unx -j $cpus arch=mingw D="TEST_SEED=$S" check
+ test_end "mingw_check_tests" $?
+ msg+=$'Mingw check tests: Pass\n'
+ else
+ msg+=$'No wine, mingw check: Skip\n'
+ fi
+ $MAKE -f Makefile.unx arch=mingw clean
+else
+ msg+=$'No mingw build: Skip\n'
+fi
+
+set +x
+echo
+echo "Summary test $0:"
+echo "Build opt: $build_opt"
+echo "$msg"
+echo "$0: Final: Pass"
diff --git a/src/isa-l/tools/test_fuzz.sh b/src/isa-l/tools/test_fuzz.sh
new file mode 100755
index 000000000..bc9797e57
--- /dev/null
+++ b/src/isa-l/tools/test_fuzz.sh
@@ -0,0 +1,171 @@
+#!/usr/bin/env bash
+
+usage ()
+{
+test_ids=$(echo "${llvm_all_ids[*]}" | sed 's/ /, /g')
+cat << EOF
+usage: $0 options
+options:
+ -h Help
+ -l, --llvm <n> Use llvm fuzz tests and run n times 0=just build, -1=skip (default $use_llvm).
+ -a, --afl <n> Use AFL fuzz tests and run n times 0=just build, -1=skip (default $use_afl).
+ -t, --time <n> Run each group of max time <n>[s,h,m,d] - n seconds, hours, minutes or days.
+ -e <exec|rand|all> Run a specific llvm test or [$test_ids, rand, all].
+ -f <file> Use this file as initial raw input. Can be repeated.
+ -d <0,1> Use dump of internal inflate test corpus (default $use_internal_corp).
+ -i <dir> Fuzz input dir (default $fuzzin_dir).
+ -o <dir> Fuzz output dir (default $fuzzout_dir).
+EOF
+exit 0
+}
+
+# Defaults
+use_afl=-1
+use_llvm=1
+samp_files=
+use_internal_corp=0
+fuzzin_dir=fuzzin
+fuzzout_dir=fuzzout
+llvm_opts=" -print_final_stats=1"
+afl_timeout_cmd=""
+run_secs=0
+llvm_tests=("igzip_simple_inflate_fuzz_test")
+llvm_all_ids=("simple" "checked" "round_trip")
+llvm_all_tests=("igzip_simple_inflate_fuzz_test" "igzip_checked_inflate_fuzz_test" "igzip_simple_round_trip_fuzz_test")
+
+# Options
+while [ "$1" != "${1##-}" ]; do
+ case $1 in
+ -h | --help)
+ usage
+ ;;
+ -t | --time)
+ run_secs=$(echo $2 | sed -e 's/d$/*24h/' -e 's/h$/*60m/' -e 's/m$/*60/' -e 's/s$//'| bc)
+ llvm_opts+=" -max_total_time=$run_secs"
+ afl_timeout_cmd="timeout --preserve-status $run_secs"
+ echo Run each for $run_secs seconds
+ shift 2
+ ;;
+ -a | --afl)
+ use_afl=$2
+ shift 2
+ ;;
+ -l | --llvm)
+ use_llvm=$2
+ shift 2
+ ;;
+ -f)
+ samp_files+="$2 "
+ use_internal_corp=0
+ shift 2
+ ;;
+ -d)
+ use_internal_corp=$2
+ shift 2
+ ;;
+ -e)
+ case $2 in
+ all)
+ llvm_tests=${llvm_all_tests[@]}
+ ;;
+ rand)
+ llvm_tests=${llvm_all_tests[$RANDOM % ${#llvm_all_tests[@]} ]}
+ ;;
+ *)
+ flag=0
+ for id_index in "${!llvm_all_ids[@]}"; do
+ if [[ "${llvm_all_ids[$id_index]}" = "$2" ]]; then
+ flag=1
+ llvm_tests[0]="${llvm_all_tests[$id_index]}"
+ break;
+ fi
+ done
+
+ if [ $flag -eq 0 ]; then
+ test_ids=$(echo "${llvm_all_ids[*]}" | sed 's/ /, /g')
+ echo "Invalid test, valid options: $test_ids, rand, or all"
+ exit 0
+ fi
+ ;;
+ esac
+ shift 2
+ ;;
+ -i)
+ fuzzin_dir=$2
+ shift 2
+ ;;
+ -o)
+ fuzzout_dir=$2
+ shift 2
+ ;;
+ esac
+done
+
+set -xe #exit on fail
+
+# Optionally build afl fuzz tests
+if [ $use_afl -ge 0 ]; then
+ echo Build afl fuzz tests
+ if ! command -V afl-gcc > /dev/null; then
+ echo $0 option --afl requires package afl installed
+ exit 0
+ fi
+ make -f Makefile.unx clean
+ make -f Makefile.unx units=igzip CC=afl-gcc other
+fi
+
+# Optionally build llvm fuzz tests
+if [ $use_llvm -ge 0 ]; then
+ echo Build llvm fuzz tests
+ if ( command -V clang++ > /dev/null ); then
+ if (echo int LLVMFuzzerTestOneInput\(\)\{return 0\;\} | clang++ -x c - -fsanitize=fuzzer,address -lpthread -o /dev/null >& /dev/null); then
+ echo have modern clang
+ llvm_link_args='FUZZLINK=-fsanitize=fuzzer,address'
+ elif (echo int LLVMFuzzerTestOneInput\(\)\{return 0\;\} | clang++ -x c - -lFuzzer -lpthread -o /dev/null >& /dev/null); then
+ echo have libFuzzer
+ llvm_link_args='FUZZLINK=-lFuzzer'
+ else
+ echo $0 option --llvm requires clang++ and libFuzzer
+ exit 0
+ fi
+ fi
+ rm -rf bin
+ make -f Makefile.unx units=igzip llvm_fuzz_tests igzip_dump_inflate_corpus CC=clang CXX=clang++ ${llvm_link_args}
+fi
+
+#Create fuzz input/output directories
+mkdir -p $fuzzin_dir
+if [ $use_afl -ge 0 ]; then
+ mkdir -p $fuzzout_dir
+fi
+
+# Optionally fill fuzz input with internal tests corpus
+[ $use_internal_corp -gt 0 ] && ./igzip_dump_inflate_corpus $fuzzin_dir
+
+# Optionally compress input samples as input into fuzz dir
+for f in $samp_files; do
+ echo Using sample file $f
+ f_base=`basename $f`
+ ./igzip_file_perf $f -o $fuzzin_dir/samp_${f_base}_cmp
+done
+
+# Optionally run tests alternately one after the other
+while [ $use_llvm -gt 0 -o $use_afl -gt 0 ]; do
+ if [ $use_afl -gt 0 ]; then
+ echo afl run $use_afl
+ let use_afl--
+ $afl_timeout_cmd afl-fuzz -T "Run inflate $run_secs s" -i $fuzzin_dir -o $fuzzout_dir -M fuzzer1 -- ./igzip_fuzz_inflate @@
+ afl-whatsup $fuzzout_dir
+ fi
+
+ if [ $use_llvm -gt 0 ]; then
+ echo llvm run $use_llvm
+ let use_llvm--
+ for test in $llvm_tests; do
+ echo "Run llvm test $test"
+ ./$test $fuzzin_dir $llvm_opts
+ done
+ fi
+done
+
+make -f Makefile.unx clean
diff --git a/src/isa-l/tools/test_tools.sh b/src/isa-l/tools/test_tools.sh
new file mode 100755
index 000000000..448b1f92b
--- /dev/null
+++ b/src/isa-l/tools/test_tools.sh
@@ -0,0 +1,11 @@
+#!/usr/bin/env bash
+
+function test_start()
+{
+ echo "entering test: $1"
+}
+
+function test_end()
+{
+ echo "leaving test: $1 status: $2"
+}
diff --git a/src/isa-l/tools/yasm-cet-filter.sh b/src/isa-l/tools/yasm-cet-filter.sh
new file mode 100755
index 000000000..d7b3e973d
--- /dev/null
+++ b/src/isa-l/tools/yasm-cet-filter.sh
@@ -0,0 +1,47 @@
+#/bin/sh
+
+# Filter out unnecessary options added by automake
+
+while [ -n "$*" ]; do
+ case "$1" in
+ -o )
+ # Supported options with arg
+ options="$options $1 $2"
+ shift
+ object="$1"
+ shift
+ ;;
+ -f | -I | -i | -D )
+ # Supported options with arg
+ options="$options $1 $2"
+ shift
+ shift
+ ;;
+ -I* | -i* | --prefix* )
+ # Supported options without arg
+ options="$options $1"
+ shift
+ ;;
+ -D* ) # For defines we need to remove spaces
+ case "$1" in
+ *' '* ) ;;
+ *) options="$options $1" ;;
+ esac
+ shift
+ ;;
+ #-blah )
+ # Unsupported options with args - none known
+ -* )
+ # Unsupported options with no args
+ shift
+ ;;
+ * )
+ args="$args $1"
+ shift
+ ;;
+ esac
+done
+
+yasm $options $args
+$CET_LD -r -z ibt -z shstk -o $object.tmp $object
+mv $object.tmp $object
diff --git a/src/isa-l/tools/yasm-filter.sh b/src/isa-l/tools/yasm-filter.sh
new file mode 100755
index 000000000..c33952a40
--- /dev/null
+++ b/src/isa-l/tools/yasm-filter.sh
@@ -0,0 +1,38 @@
+#/bin/sh
+
+# Filter out unnecessary options added by automake
+
+while [ -n "$*" ]; do
+ case "$1" in
+ -f | -o | -I | -i | -D )
+ # Supported options with arg
+ options="$options $1 $2"
+ shift
+ shift
+ ;;
+ -I* | -i* | --prefix* )
+ # Supported options without arg
+ options="$options $1"
+ shift
+ ;;
+ -D* ) # For defines we need to remove spaces
+ case "$1" in
+ *' '* ) ;;
+ *) options="$options $1" ;;
+ esac
+ shift
+ ;;
+ #-blah )
+ # Unsupported options with args - none known
+ -* )
+ # Unsupported options with no args
+ shift
+ ;;
+ * )
+ args="$args $1"
+ shift
+ ;;
+ esac
+done
+
+yasm $options $args