From 19fcec84d8d7d21e796c7624e521b60d28ee21ed Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sun, 7 Apr 2024 20:45:59 +0200 Subject: Adding upstream version 16.2.11+ds. Signed-off-by: Daniel Baumann --- src/spdk/isa-l/.drone.yml | 133 + src/spdk/isa-l/.gitignore | 28 + src/spdk/isa-l/.travis.yml | 87 + src/spdk/isa-l/CONTRIBUTING.md | 39 + src/spdk/isa-l/Doxyfile | 29 + src/spdk/isa-l/LICENSE | 26 + src/spdk/isa-l/Makefile.am | 163 + src/spdk/isa-l/Makefile.nmake | 266 + src/spdk/isa-l/Makefile.unx | 55 + src/spdk/isa-l/README.md | 62 + src/spdk/isa-l/Release_notes.txt | 278 + src/spdk/isa-l/autogen.sh | 17 + src/spdk/isa-l/configure.ac | 296 + src/spdk/isa-l/crc/Makefile.am | 80 + src/spdk/isa-l/crc/aarch64/Makefile.am | 47 + .../isa-l/crc/aarch64/crc16_t10dif_copy_pmull.S | 423 ++ src/spdk/isa-l/crc/aarch64/crc16_t10dif_pmull.S | 404 ++ .../isa-l/crc/aarch64/crc32_gzip_refl_hw_fold.S | 176 + src/spdk/isa-l/crc/aarch64/crc32_gzip_refl_pmull.S | 33 + src/spdk/isa-l/crc/aarch64/crc32_gzip_refl_pmull.h | 87 + src/spdk/isa-l/crc/aarch64/crc32_ieee_norm_pmull.S | 33 + src/spdk/isa-l/crc/aarch64/crc32_ieee_norm_pmull.h | 87 + .../isa-l/crc/aarch64/crc32_iscsi_refl_hw_fold.S | 172 + .../isa-l/crc/aarch64/crc32_iscsi_refl_pmull.S | 53 + .../isa-l/crc/aarch64/crc32_iscsi_refl_pmull.h | 87 + .../isa-l/crc/aarch64/crc32_norm_common_pmull.h | 316 + .../isa-l/crc/aarch64/crc32_refl_common_pmull.h | 280 + src/spdk/isa-l/crc/aarch64/crc64_ecma_norm_pmull.S | 33 + src/spdk/isa-l/crc/aarch64/crc64_ecma_norm_pmull.h | 200 + src/spdk/isa-l/crc/aarch64/crc64_ecma_refl_pmull.S | 33 + src/spdk/isa-l/crc/aarch64/crc64_ecma_refl_pmull.h | 196 + src/spdk/isa-l/crc/aarch64/crc64_iso_norm_pmull.S | 33 + src/spdk/isa-l/crc/aarch64/crc64_iso_norm_pmull.h | 201 + src/spdk/isa-l/crc/aarch64/crc64_iso_refl_pmull.S | 33 + src/spdk/isa-l/crc/aarch64/crc64_iso_refl_pmull.h | 197 + .../isa-l/crc/aarch64/crc64_jones_norm_pmull.S | 33 + .../isa-l/crc/aarch64/crc64_jones_norm_pmull.h | 200 + .../isa-l/crc/aarch64/crc64_jones_refl_pmull.S | 33 + .../isa-l/crc/aarch64/crc64_jones_refl_pmull.h | 196 + .../isa-l/crc/aarch64/crc64_norm_common_pmull.h | 310 + .../isa-l/crc/aarch64/crc64_refl_common_pmull.h | 302 + .../isa-l/crc/aarch64/crc_aarch64_dispatcher.c | 145 + src/spdk/isa-l/crc/aarch64/crc_multibinary_arm.S | 42 + src/spdk/isa-l/crc/crc16_t10dif_01.asm | 665 ++ src/spdk/isa-l/crc/crc16_t10dif_by4.asm | 562 ++ src/spdk/isa-l/crc/crc16_t10dif_copy_by4.asm | 598 ++ src/spdk/isa-l/crc/crc16_t10dif_copy_perf.c | 84 + src/spdk/isa-l/crc/crc16_t10dif_copy_test.c | 175 + src/spdk/isa-l/crc/crc16_t10dif_op_perf.c | 116 + src/spdk/isa-l/crc/crc16_t10dif_perf.c | 79 + src/spdk/isa-l/crc/crc16_t10dif_test.c | 179 + src/spdk/isa-l/crc/crc32_funcs_test.c | 324 + src/spdk/isa-l/crc/crc32_gzip_refl_by8.asm | 624 ++ src/spdk/isa-l/crc/crc32_gzip_refl_perf.c | 91 + src/spdk/isa-l/crc/crc32_ieee_01.asm | 655 ++ src/spdk/isa-l/crc/crc32_ieee_by4.asm | 565 ++ src/spdk/isa-l/crc/crc32_ieee_perf.c | 79 + src/spdk/isa-l/crc/crc32_iscsi_00.asm | 671 ++ src/spdk/isa-l/crc/crc32_iscsi_01.asm | 590 ++ src/spdk/isa-l/crc/crc32_iscsi_perf.c | 79 + src/spdk/isa-l/crc/crc64_base.c | 912 +++ src/spdk/isa-l/crc/crc64_ecma_norm_by16_10.asm | 61 + src/spdk/isa-l/crc/crc64_ecma_norm_by8.asm | 583 ++ src/spdk/isa-l/crc/crc64_ecma_refl_by16_10.asm | 61 + src/spdk/isa-l/crc/crc64_ecma_refl_by8.asm | 548 ++ src/spdk/isa-l/crc/crc64_example.c | 68 + src/spdk/isa-l/crc/crc64_funcs_perf.c | 103 + src/spdk/isa-l/crc/crc64_funcs_test.c | 315 + src/spdk/isa-l/crc/crc64_iso_norm_by16_10.asm | 524 ++ src/spdk/isa-l/crc/crc64_iso_norm_by8.asm | 581 ++ src/spdk/isa-l/crc/crc64_iso_refl_by16_10.asm | 494 ++ src/spdk/isa-l/crc/crc64_iso_refl_by8.asm | 544 ++ src/spdk/isa-l/crc/crc64_jones_norm_by16_10.asm | 61 + src/spdk/isa-l/crc/crc64_jones_norm_by8.asm | 581 ++ src/spdk/isa-l/crc/crc64_jones_refl_by16_10.asm | 61 + src/spdk/isa-l/crc/crc64_jones_refl_by8.asm | 544 ++ src/spdk/isa-l/crc/crc64_multibinary.asm | 92 + src/spdk/isa-l/crc/crc64_ref.h | 148 + src/spdk/isa-l/crc/crc_base.c | 351 + src/spdk/isa-l/crc/crc_base_aliases.c | 87 + src/spdk/isa-l/crc/crc_multibinary.asm | 189 + src/spdk/isa-l/crc/crc_ref.h | 140 + src/spdk/isa-l/crc/crc_simple_test.c | 64 + src/spdk/isa-l/erasure_code/Makefile.am | 161 + src/spdk/isa-l/erasure_code/ec_base.c | 371 ++ src/spdk/isa-l/erasure_code/ec_base.h | 6680 +++++++++++++++++++ src/spdk/isa-l/erasure_code/ec_base_aliases.c | 61 + src/spdk/isa-l/erasure_code/ec_highlevel_func.c | 336 + src/spdk/isa-l/erasure_code/ec_multibinary.asm | 95 + .../isa-l/erasure_code/erasure_code_base_perf.c | 176 + .../isa-l/erasure_code/erasure_code_base_test.c | 764 +++ src/spdk/isa-l/erasure_code/erasure_code_perf.c | 176 + src/spdk/isa-l/erasure_code/erasure_code_test.c | 764 +++ .../isa-l/erasure_code/erasure_code_update_perf.c | 281 + .../isa-l/erasure_code/erasure_code_update_test.c | 959 +++ src/spdk/isa-l/erasure_code/gen_rs_matrix_limits.c | 115 + .../isa-l/erasure_code/gf_2vect_dot_prod_avx.asm | 337 + .../isa-l/erasure_code/gf_2vect_dot_prod_avx2.asm | 356 ++ .../erasure_code/gf_2vect_dot_prod_avx512.asm | 245 + .../isa-l/erasure_code/gf_2vect_dot_prod_sse.asm | 339 + .../erasure_code/gf_2vect_dot_prod_sse_test.c | 480 ++ src/spdk/isa-l/erasure_code/gf_2vect_mad_avx.asm | 236 + src/spdk/isa-l/erasure_code/gf_2vect_mad_avx2.asm | 247 + .../isa-l/erasure_code/gf_2vect_mad_avx512.asm | 230 + src/spdk/isa-l/erasure_code/gf_2vect_mad_sse.asm | 239 + .../isa-l/erasure_code/gf_3vect_dot_prod_avx.asm | 377 ++ .../isa-l/erasure_code/gf_3vect_dot_prod_avx2.asm | 397 ++ .../erasure_code/gf_3vect_dot_prod_avx512.asm | 270 + .../isa-l/erasure_code/gf_3vect_dot_prod_sse.asm | 378 ++ .../erasure_code/gf_3vect_dot_prod_sse_test.c | 586 ++ src/spdk/isa-l/erasure_code/gf_3vect_mad_avx.asm | 288 + src/spdk/isa-l/erasure_code/gf_3vect_mad_avx2.asm | 317 + .../isa-l/erasure_code/gf_3vect_mad_avx512.asm | 247 + src/spdk/isa-l/erasure_code/gf_3vect_mad_sse.asm | 298 + .../isa-l/erasure_code/gf_4vect_dot_prod_avx.asm | 441 ++ .../isa-l/erasure_code/gf_4vect_dot_prod_avx2.asm | 460 ++ .../erasure_code/gf_4vect_dot_prod_avx512.asm | 301 + .../isa-l/erasure_code/gf_4vect_dot_prod_sse.asm | 443 ++ .../erasure_code/gf_4vect_dot_prod_sse_test.c | 695 ++ src/spdk/isa-l/erasure_code/gf_4vect_mad_avx.asm | 336 + src/spdk/isa-l/erasure_code/gf_4vect_mad_avx2.asm | 342 + .../isa-l/erasure_code/gf_4vect_mad_avx512.asm | 267 + src/spdk/isa-l/erasure_code/gf_4vect_mad_sse.asm | 342 + .../isa-l/erasure_code/gf_5vect_dot_prod_avx.asm | 303 + .../isa-l/erasure_code/gf_5vect_dot_prod_avx2.asm | 315 + .../isa-l/erasure_code/gf_5vect_dot_prod_sse.asm | 304 + .../erasure_code/gf_5vect_dot_prod_sse_test.c | 805 +++ src/spdk/isa-l/erasure_code/gf_5vect_mad_avx.asm | 365 ++ src/spdk/isa-l/erasure_code/gf_5vect_mad_avx2.asm | 363 ++ src/spdk/isa-l/erasure_code/gf_5vect_mad_sse.asm | 373 ++ .../isa-l/erasure_code/gf_6vect_dot_prod_avx.asm | 315 + .../isa-l/erasure_code/gf_6vect_dot_prod_avx2.asm | 326 + .../isa-l/erasure_code/gf_6vect_dot_prod_sse.asm | 315 + .../erasure_code/gf_6vect_dot_prod_sse_test.c | 911 +++ src/spdk/isa-l/erasure_code/gf_6vect_mad_avx.asm | 394 ++ src/spdk/isa-l/erasure_code/gf_6vect_mad_avx2.asm | 400 ++ src/spdk/isa-l/erasure_code/gf_6vect_mad_sse.asm | 406 ++ src/spdk/isa-l/erasure_code/gf_inverse_test.c | 225 + .../isa-l/erasure_code/gf_vect_dot_prod_1tbl.c | 152 + .../isa-l/erasure_code/gf_vect_dot_prod_avx.asm | 271 + .../isa-l/erasure_code/gf_vect_dot_prod_avx2.asm | 280 + .../isa-l/erasure_code/gf_vect_dot_prod_avx512.asm | 240 + .../erasure_code/gf_vect_dot_prod_base_test.c | 290 + .../isa-l/erasure_code/gf_vect_dot_prod_perf.c | 174 + .../isa-l/erasure_code/gf_vect_dot_prod_sse.asm | 271 + .../isa-l/erasure_code/gf_vect_dot_prod_test.c | 525 ++ src/spdk/isa-l/erasure_code/gf_vect_mad_avx.asm | 196 + src/spdk/isa-l/erasure_code/gf_vect_mad_avx2.asm | 203 + src/spdk/isa-l/erasure_code/gf_vect_mad_avx512.asm | 193 + src/spdk/isa-l/erasure_code/gf_vect_mad_sse.asm | 197 + src/spdk/isa-l/erasure_code/gf_vect_mad_test.c | 519 ++ src/spdk/isa-l/erasure_code/gf_vect_mul_avx.asm | 164 + .../isa-l/erasure_code/gf_vect_mul_base_test.c | 129 + src/spdk/isa-l/erasure_code/gf_vect_mul_perf.c | 90 + src/spdk/isa-l/erasure_code/gf_vect_mul_sse.asm | 170 + src/spdk/isa-l/erasure_code/gf_vect_mul_test.c | 158 + src/spdk/isa-l/examples/ec/Makefile.am | 33 + src/spdk/isa-l/examples/ec/Makefile.unx | 8 + src/spdk/isa-l/examples/ec/ec_piggyback_example.c | 506 ++ src/spdk/isa-l/examples/ec/ec_simple_example.c | 277 + src/spdk/isa-l/igzip/Makefile.am | 138 + src/spdk/isa-l/igzip/aarch64/bitbuf2_aarch64.h | 57 + src/spdk/isa-l/igzip/aarch64/data_struct_aarch64.h | 226 + src/spdk/isa-l/igzip/aarch64/huffman_aarch64.h | 154 + .../igzip/aarch64/igzip_deflate_body_aarch64.S | 261 + .../igzip/aarch64/igzip_deflate_finish_aarch64.S | 264 + .../aarch64/igzip_inflate_multibinary_arm64.S | 32 + .../isa-l/igzip/aarch64/igzip_isal_adler32_neon.S | 178 + .../aarch64/igzip_multibinary_aarch64_dispatcher.c | 123 + .../isa-l/igzip/aarch64/igzip_multibinary_arm64.S | 50 + .../aarch64/isal_deflate_icf_body_hash_hist.S | 364 ++ .../aarch64/isal_deflate_icf_finish_hash_hist.S | 397 ++ .../isa-l/igzip/aarch64/isal_update_histogram.S | 311 + src/spdk/isa-l/igzip/aarch64/lz0a_const_aarch64.h | 72 + src/spdk/isa-l/igzip/aarch64/options_aarch64.h | 71 + src/spdk/isa-l/igzip/aarch64/stdmac_aarch64.h | 57 + src/spdk/isa-l/igzip/adler32_avx2_4.asm | 292 + src/spdk/isa-l/igzip/adler32_base.c | 63 + src/spdk/isa-l/igzip/adler32_perf.c | 72 + src/spdk/isa-l/igzip/adler32_sse.asm | 249 + src/spdk/isa-l/igzip/bitbuf2.asm | 64 + src/spdk/isa-l/igzip/bitbuf2.h | 130 + src/spdk/isa-l/igzip/checksum32_funcs_test.c | 308 + src/spdk/isa-l/igzip/checksum_test_ref.h | 102 + src/spdk/isa-l/igzip/data_struct2.asm | 275 + src/spdk/isa-l/igzip/encode_df.c | 38 + src/spdk/isa-l/igzip/encode_df.h | 30 + src/spdk/isa-l/igzip/encode_df_04.asm | 576 ++ src/spdk/isa-l/igzip/encode_df_06.asm | 620 ++ src/spdk/isa-l/igzip/flatten_ll.c | 41 + src/spdk/isa-l/igzip/flatten_ll.h | 3 + src/spdk/isa-l/igzip/generate_custom_hufftables.c | 308 + src/spdk/isa-l/igzip/generate_static_inflate.c | 163 + src/spdk/isa-l/igzip/heap_macros.asm | 98 + src/spdk/isa-l/igzip/huff_codes.c | 1694 +++++ src/spdk/isa-l/igzip/huff_codes.h | 170 + src/spdk/isa-l/igzip/huffman.asm | 249 + src/spdk/isa-l/igzip/huffman.h | 359 ++ src/spdk/isa-l/igzip/hufftables_c.c | 6742 ++++++++++++++++++++ src/spdk/isa-l/igzip/igzip.c | 1931 ++++++ src/spdk/isa-l/igzip/igzip_base.c | 236 + src/spdk/isa-l/igzip/igzip_base_aliases.c | 153 + src/spdk/isa-l/igzip/igzip_body.asm | 786 +++ src/spdk/isa-l/igzip/igzip_build_hash_table_perf.c | 39 + src/spdk/isa-l/igzip/igzip_checksums.h | 12 + src/spdk/isa-l/igzip/igzip_compare_types.asm | 452 ++ .../isa-l/igzip/igzip_decode_block_stateless.asm | 795 +++ .../igzip/igzip_decode_block_stateless_01.asm | 3 + .../igzip/igzip_decode_block_stateless_04.asm | 4 + src/spdk/isa-l/igzip/igzip_deflate_hash.asm | 165 + src/spdk/isa-l/igzip/igzip_example.c | 101 + src/spdk/isa-l/igzip/igzip_file_perf.c | 334 + src/spdk/isa-l/igzip/igzip_finish.asm | 324 + src/spdk/isa-l/igzip/igzip_gen_icf_map_lh1_04.asm | 741 +++ src/spdk/isa-l/igzip/igzip_gen_icf_map_lh1_06.asm | 576 ++ src/spdk/isa-l/igzip/igzip_hist_perf.c | 133 + src/spdk/isa-l/igzip/igzip_icf_base.c | 370 ++ src/spdk/isa-l/igzip/igzip_icf_body.c | 326 + src/spdk/isa-l/igzip/igzip_icf_body_h1_gr_bt.asm | 901 +++ src/spdk/isa-l/igzip/igzip_icf_finish.asm | 322 + src/spdk/isa-l/igzip/igzip_inflate.c | 2437 +++++++ src/spdk/isa-l/igzip/igzip_inflate_multibinary.asm | 45 + src/spdk/isa-l/igzip/igzip_inflate_test.c | 311 + src/spdk/isa-l/igzip/igzip_level_buf_structs.h | 48 + src/spdk/isa-l/igzip/igzip_multibinary.asm | 134 + src/spdk/isa-l/igzip/igzip_perf.c | 832 +++ src/spdk/isa-l/igzip/igzip_rand_test.c | 3087 +++++++++ src/spdk/isa-l/igzip/igzip_semi_dyn_file_perf.c | 334 + src/spdk/isa-l/igzip/igzip_set_long_icf_fg_04.asm | 295 + src/spdk/isa-l/igzip/igzip_set_long_icf_fg_06.asm | 367 ++ src/spdk/isa-l/igzip/igzip_sync_flush_example.c | 86 + src/spdk/isa-l/igzip/igzip_update_histogram.asm | 574 ++ src/spdk/isa-l/igzip/igzip_update_histogram_01.asm | 7 + src/spdk/isa-l/igzip/igzip_update_histogram_04.asm | 8 + src/spdk/isa-l/igzip/igzip_wrapper.h | 52 + src/spdk/isa-l/igzip/igzip_wrapper_hdr_test.c | 890 +++ src/spdk/isa-l/igzip/inflate_data_structs.asm | 146 + src/spdk/isa-l/igzip/inflate_std_vects.h | 1554 +++++ src/spdk/isa-l/igzip/lz0a_const.asm | 65 + src/spdk/isa-l/igzip/options.asm | 77 + src/spdk/isa-l/igzip/proc_heap.asm | 126 + src/spdk/isa-l/igzip/proc_heap_base.c | 85 + src/spdk/isa-l/igzip/repeated_char_result.h | 68 + src/spdk/isa-l/igzip/rfc1951_lookup.asm | 118 + src/spdk/isa-l/igzip/static_inflate.h | 1346 ++++ src/spdk/isa-l/igzip/stdmac.asm | 469 ++ src/spdk/isa-l/include/aarch64_multibinary.h | 221 + src/spdk/isa-l/include/crc.h | 212 + src/spdk/isa-l/include/crc64.h | 277 + src/spdk/isa-l/include/erasure_code.h | 944 +++ src/spdk/isa-l/include/gf_vect_mul.h | 152 + src/spdk/isa-l/include/igzip_lib.h | 937 +++ src/spdk/isa-l/include/mem_routines.h | 64 + src/spdk/isa-l/include/multibinary.asm | 399 ++ src/spdk/isa-l/include/raid.h | 305 + src/spdk/isa-l/include/reg_sizes.asm | 248 + src/spdk/isa-l/include/test.h | 285 + src/spdk/isa-l/include/types.h | 77 + src/spdk/isa-l/include/unaligned.h | 76 + src/spdk/isa-l/isa-l.def | 115 + src/spdk/isa-l/libisal.pc.in | 11 + src/spdk/isa-l/make.inc | 372 ++ src/spdk/isa-l/mem/Makefile.am | 47 + src/spdk/isa-l/mem/aarch64/Makefile.am | 33 + .../isa-l/mem/aarch64/mem_aarch64_dispatcher.c | 39 + src/spdk/isa-l/mem/aarch64/mem_multibinary_arm.S | 33 + src/spdk/isa-l/mem/aarch64/mem_zero_detect_neon.S | 243 + src/spdk/isa-l/mem/mem_multibinary.asm | 42 + src/spdk/isa-l/mem/mem_zero_detect_avx.asm | 189 + src/spdk/isa-l/mem/mem_zero_detect_base.c | 69 + src/spdk/isa-l/mem/mem_zero_detect_base_aliases.c | 38 + src/spdk/isa-l/mem/mem_zero_detect_perf.c | 60 + src/spdk/isa-l/mem/mem_zero_detect_sse.asm | 176 + src/spdk/isa-l/mem/mem_zero_detect_test.c | 226 + src/spdk/isa-l/programs/Makefile.am | 38 + src/spdk/isa-l/programs/igzip.1 | 87 + src/spdk/isa-l/programs/igzip.1.h2m | 31 + src/spdk/isa-l/programs/igzip_cli.c | 1155 ++++ src/spdk/isa-l/programs/igzip_cli_check.sh | 245 + src/spdk/isa-l/raid/Makefile.am | 66 + src/spdk/isa-l/raid/aarch64/Makefile.am | 36 + src/spdk/isa-l/raid/aarch64/pq_check_neon.S | 341 + src/spdk/isa-l/raid/aarch64/pq_gen_neon.S | 282 + .../isa-l/raid/aarch64/raid_aarch64_dispatcher.c | 61 + src/spdk/isa-l/raid/aarch64/raid_multibinary_arm.S | 36 + src/spdk/isa-l/raid/aarch64/xor_check_neon.S | 271 + src/spdk/isa-l/raid/aarch64/xor_gen_neon.S | 264 + src/spdk/isa-l/raid/pq_check_sse.asm | 277 + src/spdk/isa-l/raid/pq_check_sse_i32.asm | 282 + src/spdk/isa-l/raid/pq_check_test.c | 304 + src/spdk/isa-l/raid/pq_gen_avx.asm | 254 + src/spdk/isa-l/raid/pq_gen_avx2.asm | 256 + src/spdk/isa-l/raid/pq_gen_avx512.asm | 235 + src/spdk/isa-l/raid/pq_gen_perf.c | 88 + src/spdk/isa-l/raid/pq_gen_sse.asm | 258 + src/spdk/isa-l/raid/pq_gen_sse_i32.asm | 264 + src/spdk/isa-l/raid/pq_gen_test.c | 194 + src/spdk/isa-l/raid/raid_base.c | 147 + src/spdk/isa-l/raid/raid_base_aliases.c | 50 + src/spdk/isa-l/raid/raid_multibinary.asm | 139 + src/spdk/isa-l/raid/raid_multibinary_i32.asm | 52 + src/spdk/isa-l/raid/xor_check_sse.asm | 285 + src/spdk/isa-l/raid/xor_check_test.c | 280 + src/spdk/isa-l/raid/xor_example.c | 70 + src/spdk/isa-l/raid/xor_gen_avx.asm | 228 + src/spdk/isa-l/raid/xor_gen_avx512.asm | 217 + src/spdk/isa-l/raid/xor_gen_perf.c | 90 + src/spdk/isa-l/raid/xor_gen_sse.asm | 284 + src/spdk/isa-l/raid/xor_gen_test.c | 165 + src/spdk/isa-l/tests/fuzz/Makefile.am | 52 + src/spdk/isa-l/tests/fuzz/Makefile.unx | 12 + .../tests/fuzz/igzip_checked_inflate_fuzz_test.c | 72 + .../isa-l/tests/fuzz/igzip_dump_inflate_corpus.c | 40 + src/spdk/isa-l/tests/fuzz/igzip_fuzz_inflate.c | 41 + .../tests/fuzz/igzip_simple_inflate_fuzz_test.c | 22 + .../tests/fuzz/igzip_simple_round_trip_fuzz_test.c | 130 + src/spdk/isa-l/tools/check_format.sh | 87 + src/spdk/isa-l/tools/iindent | 2 + src/spdk/isa-l/tools/nasm-filter.sh | 47 + src/spdk/isa-l/tools/remove_trailing_whitespace.sh | 2 + src/spdk/isa-l/tools/test_autorun.sh | 63 + src/spdk/isa-l/tools/test_checks.sh | 115 + src/spdk/isa-l/tools/test_extended.sh | 208 + src/spdk/isa-l/tools/test_fuzz.sh | 171 + src/spdk/isa-l/tools/test_tools.sh | 11 + src/spdk/isa-l/tools/yasm-filter.sh | 38 + 326 files changed, 102792 insertions(+) create mode 100644 src/spdk/isa-l/.drone.yml create mode 100644 src/spdk/isa-l/.gitignore create mode 100644 src/spdk/isa-l/.travis.yml create mode 100644 src/spdk/isa-l/CONTRIBUTING.md create mode 100644 src/spdk/isa-l/Doxyfile create mode 100644 src/spdk/isa-l/LICENSE create mode 100644 src/spdk/isa-l/Makefile.am create mode 100644 src/spdk/isa-l/Makefile.nmake create mode 100644 src/spdk/isa-l/Makefile.unx create mode 100644 src/spdk/isa-l/README.md create mode 100644 src/spdk/isa-l/Release_notes.txt create mode 100755 src/spdk/isa-l/autogen.sh create mode 100644 src/spdk/isa-l/configure.ac create mode 100644 src/spdk/isa-l/crc/Makefile.am create mode 100644 src/spdk/isa-l/crc/aarch64/Makefile.am create mode 100644 src/spdk/isa-l/crc/aarch64/crc16_t10dif_copy_pmull.S create mode 100644 src/spdk/isa-l/crc/aarch64/crc16_t10dif_pmull.S create mode 100644 src/spdk/isa-l/crc/aarch64/crc32_gzip_refl_hw_fold.S create mode 100644 src/spdk/isa-l/crc/aarch64/crc32_gzip_refl_pmull.S create mode 100644 src/spdk/isa-l/crc/aarch64/crc32_gzip_refl_pmull.h create mode 100644 src/spdk/isa-l/crc/aarch64/crc32_ieee_norm_pmull.S create mode 100644 src/spdk/isa-l/crc/aarch64/crc32_ieee_norm_pmull.h create mode 100644 src/spdk/isa-l/crc/aarch64/crc32_iscsi_refl_hw_fold.S create mode 100644 src/spdk/isa-l/crc/aarch64/crc32_iscsi_refl_pmull.S create mode 100644 src/spdk/isa-l/crc/aarch64/crc32_iscsi_refl_pmull.h create mode 100644 src/spdk/isa-l/crc/aarch64/crc32_norm_common_pmull.h create mode 100644 src/spdk/isa-l/crc/aarch64/crc32_refl_common_pmull.h create mode 100644 src/spdk/isa-l/crc/aarch64/crc64_ecma_norm_pmull.S create mode 100644 src/spdk/isa-l/crc/aarch64/crc64_ecma_norm_pmull.h create mode 100644 src/spdk/isa-l/crc/aarch64/crc64_ecma_refl_pmull.S create mode 100644 src/spdk/isa-l/crc/aarch64/crc64_ecma_refl_pmull.h create mode 100644 src/spdk/isa-l/crc/aarch64/crc64_iso_norm_pmull.S create mode 100644 src/spdk/isa-l/crc/aarch64/crc64_iso_norm_pmull.h create mode 100644 src/spdk/isa-l/crc/aarch64/crc64_iso_refl_pmull.S create mode 100644 src/spdk/isa-l/crc/aarch64/crc64_iso_refl_pmull.h create mode 100644 src/spdk/isa-l/crc/aarch64/crc64_jones_norm_pmull.S create mode 100644 src/spdk/isa-l/crc/aarch64/crc64_jones_norm_pmull.h create mode 100644 src/spdk/isa-l/crc/aarch64/crc64_jones_refl_pmull.S create mode 100644 src/spdk/isa-l/crc/aarch64/crc64_jones_refl_pmull.h create mode 100644 src/spdk/isa-l/crc/aarch64/crc64_norm_common_pmull.h create mode 100644 src/spdk/isa-l/crc/aarch64/crc64_refl_common_pmull.h create mode 100644 src/spdk/isa-l/crc/aarch64/crc_aarch64_dispatcher.c create mode 100644 src/spdk/isa-l/crc/aarch64/crc_multibinary_arm.S create mode 100644 src/spdk/isa-l/crc/crc16_t10dif_01.asm create mode 100644 src/spdk/isa-l/crc/crc16_t10dif_by4.asm create mode 100644 src/spdk/isa-l/crc/crc16_t10dif_copy_by4.asm create mode 100644 src/spdk/isa-l/crc/crc16_t10dif_copy_perf.c create mode 100644 src/spdk/isa-l/crc/crc16_t10dif_copy_test.c create mode 100644 src/spdk/isa-l/crc/crc16_t10dif_op_perf.c create mode 100644 src/spdk/isa-l/crc/crc16_t10dif_perf.c create mode 100644 src/spdk/isa-l/crc/crc16_t10dif_test.c create mode 100644 src/spdk/isa-l/crc/crc32_funcs_test.c create mode 100644 src/spdk/isa-l/crc/crc32_gzip_refl_by8.asm create mode 100644 src/spdk/isa-l/crc/crc32_gzip_refl_perf.c create mode 100644 src/spdk/isa-l/crc/crc32_ieee_01.asm create mode 100644 src/spdk/isa-l/crc/crc32_ieee_by4.asm create mode 100644 src/spdk/isa-l/crc/crc32_ieee_perf.c create mode 100644 src/spdk/isa-l/crc/crc32_iscsi_00.asm create mode 100644 src/spdk/isa-l/crc/crc32_iscsi_01.asm create mode 100644 src/spdk/isa-l/crc/crc32_iscsi_perf.c create mode 100644 src/spdk/isa-l/crc/crc64_base.c create mode 100644 src/spdk/isa-l/crc/crc64_ecma_norm_by16_10.asm create mode 100644 src/spdk/isa-l/crc/crc64_ecma_norm_by8.asm create mode 100644 src/spdk/isa-l/crc/crc64_ecma_refl_by16_10.asm create mode 100644 src/spdk/isa-l/crc/crc64_ecma_refl_by8.asm create mode 100644 src/spdk/isa-l/crc/crc64_example.c create mode 100644 src/spdk/isa-l/crc/crc64_funcs_perf.c create mode 100644 src/spdk/isa-l/crc/crc64_funcs_test.c create mode 100644 src/spdk/isa-l/crc/crc64_iso_norm_by16_10.asm create mode 100644 src/spdk/isa-l/crc/crc64_iso_norm_by8.asm create mode 100644 src/spdk/isa-l/crc/crc64_iso_refl_by16_10.asm create mode 100644 src/spdk/isa-l/crc/crc64_iso_refl_by8.asm create mode 100644 src/spdk/isa-l/crc/crc64_jones_norm_by16_10.asm create mode 100644 src/spdk/isa-l/crc/crc64_jones_norm_by8.asm create mode 100644 src/spdk/isa-l/crc/crc64_jones_refl_by16_10.asm create mode 100644 src/spdk/isa-l/crc/crc64_jones_refl_by8.asm create mode 100644 src/spdk/isa-l/crc/crc64_multibinary.asm create mode 100644 src/spdk/isa-l/crc/crc64_ref.h create mode 100644 src/spdk/isa-l/crc/crc_base.c create mode 100644 src/spdk/isa-l/crc/crc_base_aliases.c create mode 100644 src/spdk/isa-l/crc/crc_multibinary.asm create mode 100644 src/spdk/isa-l/crc/crc_ref.h create mode 100644 src/spdk/isa-l/crc/crc_simple_test.c create mode 100644 src/spdk/isa-l/erasure_code/Makefile.am create mode 100644 src/spdk/isa-l/erasure_code/ec_base.c create mode 100644 src/spdk/isa-l/erasure_code/ec_base.h create mode 100644 src/spdk/isa-l/erasure_code/ec_base_aliases.c create mode 100644 src/spdk/isa-l/erasure_code/ec_highlevel_func.c create mode 100644 src/spdk/isa-l/erasure_code/ec_multibinary.asm create mode 100644 src/spdk/isa-l/erasure_code/erasure_code_base_perf.c create mode 100644 src/spdk/isa-l/erasure_code/erasure_code_base_test.c create mode 100644 src/spdk/isa-l/erasure_code/erasure_code_perf.c create mode 100644 src/spdk/isa-l/erasure_code/erasure_code_test.c create mode 100644 src/spdk/isa-l/erasure_code/erasure_code_update_perf.c create mode 100644 src/spdk/isa-l/erasure_code/erasure_code_update_test.c create mode 100644 src/spdk/isa-l/erasure_code/gen_rs_matrix_limits.c create mode 100644 src/spdk/isa-l/erasure_code/gf_2vect_dot_prod_avx.asm create mode 100644 src/spdk/isa-l/erasure_code/gf_2vect_dot_prod_avx2.asm create mode 100644 src/spdk/isa-l/erasure_code/gf_2vect_dot_prod_avx512.asm create mode 100644 src/spdk/isa-l/erasure_code/gf_2vect_dot_prod_sse.asm create mode 100644 src/spdk/isa-l/erasure_code/gf_2vect_dot_prod_sse_test.c create mode 100644 src/spdk/isa-l/erasure_code/gf_2vect_mad_avx.asm create mode 100644 src/spdk/isa-l/erasure_code/gf_2vect_mad_avx2.asm create mode 100644 src/spdk/isa-l/erasure_code/gf_2vect_mad_avx512.asm create mode 100644 src/spdk/isa-l/erasure_code/gf_2vect_mad_sse.asm create mode 100644 src/spdk/isa-l/erasure_code/gf_3vect_dot_prod_avx.asm create mode 100644 src/spdk/isa-l/erasure_code/gf_3vect_dot_prod_avx2.asm create mode 100644 src/spdk/isa-l/erasure_code/gf_3vect_dot_prod_avx512.asm create mode 100644 src/spdk/isa-l/erasure_code/gf_3vect_dot_prod_sse.asm create mode 100644 src/spdk/isa-l/erasure_code/gf_3vect_dot_prod_sse_test.c create mode 100644 src/spdk/isa-l/erasure_code/gf_3vect_mad_avx.asm create mode 100644 src/spdk/isa-l/erasure_code/gf_3vect_mad_avx2.asm create mode 100644 src/spdk/isa-l/erasure_code/gf_3vect_mad_avx512.asm create mode 100644 src/spdk/isa-l/erasure_code/gf_3vect_mad_sse.asm create mode 100644 src/spdk/isa-l/erasure_code/gf_4vect_dot_prod_avx.asm create mode 100644 src/spdk/isa-l/erasure_code/gf_4vect_dot_prod_avx2.asm create mode 100644 src/spdk/isa-l/erasure_code/gf_4vect_dot_prod_avx512.asm create mode 100644 src/spdk/isa-l/erasure_code/gf_4vect_dot_prod_sse.asm create mode 100644 src/spdk/isa-l/erasure_code/gf_4vect_dot_prod_sse_test.c create mode 100644 src/spdk/isa-l/erasure_code/gf_4vect_mad_avx.asm create mode 100644 src/spdk/isa-l/erasure_code/gf_4vect_mad_avx2.asm create mode 100644 src/spdk/isa-l/erasure_code/gf_4vect_mad_avx512.asm create mode 100644 src/spdk/isa-l/erasure_code/gf_4vect_mad_sse.asm create mode 100644 src/spdk/isa-l/erasure_code/gf_5vect_dot_prod_avx.asm create mode 100644 src/spdk/isa-l/erasure_code/gf_5vect_dot_prod_avx2.asm create mode 100644 src/spdk/isa-l/erasure_code/gf_5vect_dot_prod_sse.asm create mode 100644 src/spdk/isa-l/erasure_code/gf_5vect_dot_prod_sse_test.c create mode 100644 src/spdk/isa-l/erasure_code/gf_5vect_mad_avx.asm create mode 100644 src/spdk/isa-l/erasure_code/gf_5vect_mad_avx2.asm create mode 100644 src/spdk/isa-l/erasure_code/gf_5vect_mad_sse.asm create mode 100644 src/spdk/isa-l/erasure_code/gf_6vect_dot_prod_avx.asm create mode 100644 src/spdk/isa-l/erasure_code/gf_6vect_dot_prod_avx2.asm create mode 100644 src/spdk/isa-l/erasure_code/gf_6vect_dot_prod_sse.asm create mode 100644 src/spdk/isa-l/erasure_code/gf_6vect_dot_prod_sse_test.c create mode 100644 src/spdk/isa-l/erasure_code/gf_6vect_mad_avx.asm create mode 100644 src/spdk/isa-l/erasure_code/gf_6vect_mad_avx2.asm create mode 100644 src/spdk/isa-l/erasure_code/gf_6vect_mad_sse.asm create mode 100644 src/spdk/isa-l/erasure_code/gf_inverse_test.c create mode 100644 src/spdk/isa-l/erasure_code/gf_vect_dot_prod_1tbl.c create mode 100644 src/spdk/isa-l/erasure_code/gf_vect_dot_prod_avx.asm create mode 100644 src/spdk/isa-l/erasure_code/gf_vect_dot_prod_avx2.asm create mode 100644 src/spdk/isa-l/erasure_code/gf_vect_dot_prod_avx512.asm create mode 100644 src/spdk/isa-l/erasure_code/gf_vect_dot_prod_base_test.c create mode 100644 src/spdk/isa-l/erasure_code/gf_vect_dot_prod_perf.c create mode 100644 src/spdk/isa-l/erasure_code/gf_vect_dot_prod_sse.asm create mode 100644 src/spdk/isa-l/erasure_code/gf_vect_dot_prod_test.c create mode 100644 src/spdk/isa-l/erasure_code/gf_vect_mad_avx.asm create mode 100644 src/spdk/isa-l/erasure_code/gf_vect_mad_avx2.asm create mode 100644 src/spdk/isa-l/erasure_code/gf_vect_mad_avx512.asm create mode 100644 src/spdk/isa-l/erasure_code/gf_vect_mad_sse.asm create mode 100644 src/spdk/isa-l/erasure_code/gf_vect_mad_test.c create mode 100644 src/spdk/isa-l/erasure_code/gf_vect_mul_avx.asm create mode 100644 src/spdk/isa-l/erasure_code/gf_vect_mul_base_test.c create mode 100644 src/spdk/isa-l/erasure_code/gf_vect_mul_perf.c create mode 100644 src/spdk/isa-l/erasure_code/gf_vect_mul_sse.asm create mode 100644 src/spdk/isa-l/erasure_code/gf_vect_mul_test.c create mode 100644 src/spdk/isa-l/examples/ec/Makefile.am create mode 100644 src/spdk/isa-l/examples/ec/Makefile.unx create mode 100644 src/spdk/isa-l/examples/ec/ec_piggyback_example.c create mode 100644 src/spdk/isa-l/examples/ec/ec_simple_example.c create mode 100644 src/spdk/isa-l/igzip/Makefile.am create mode 100644 src/spdk/isa-l/igzip/aarch64/bitbuf2_aarch64.h create mode 100644 src/spdk/isa-l/igzip/aarch64/data_struct_aarch64.h create mode 100644 src/spdk/isa-l/igzip/aarch64/huffman_aarch64.h create mode 100644 src/spdk/isa-l/igzip/aarch64/igzip_deflate_body_aarch64.S create mode 100644 src/spdk/isa-l/igzip/aarch64/igzip_deflate_finish_aarch64.S create mode 100644 src/spdk/isa-l/igzip/aarch64/igzip_inflate_multibinary_arm64.S create mode 100644 src/spdk/isa-l/igzip/aarch64/igzip_isal_adler32_neon.S create mode 100644 src/spdk/isa-l/igzip/aarch64/igzip_multibinary_aarch64_dispatcher.c create mode 100644 src/spdk/isa-l/igzip/aarch64/igzip_multibinary_arm64.S create mode 100644 src/spdk/isa-l/igzip/aarch64/isal_deflate_icf_body_hash_hist.S create mode 100644 src/spdk/isa-l/igzip/aarch64/isal_deflate_icf_finish_hash_hist.S create mode 100644 src/spdk/isa-l/igzip/aarch64/isal_update_histogram.S create mode 100644 src/spdk/isa-l/igzip/aarch64/lz0a_const_aarch64.h create mode 100644 src/spdk/isa-l/igzip/aarch64/options_aarch64.h create mode 100644 src/spdk/isa-l/igzip/aarch64/stdmac_aarch64.h create mode 100644 src/spdk/isa-l/igzip/adler32_avx2_4.asm create mode 100644 src/spdk/isa-l/igzip/adler32_base.c create mode 100644 src/spdk/isa-l/igzip/adler32_perf.c create mode 100644 src/spdk/isa-l/igzip/adler32_sse.asm create mode 100644 src/spdk/isa-l/igzip/bitbuf2.asm create mode 100644 src/spdk/isa-l/igzip/bitbuf2.h create mode 100644 src/spdk/isa-l/igzip/checksum32_funcs_test.c create mode 100644 src/spdk/isa-l/igzip/checksum_test_ref.h create mode 100644 src/spdk/isa-l/igzip/data_struct2.asm create mode 100644 src/spdk/isa-l/igzip/encode_df.c create mode 100644 src/spdk/isa-l/igzip/encode_df.h create mode 100644 src/spdk/isa-l/igzip/encode_df_04.asm create mode 100644 src/spdk/isa-l/igzip/encode_df_06.asm create mode 100644 src/spdk/isa-l/igzip/flatten_ll.c create mode 100644 src/spdk/isa-l/igzip/flatten_ll.h create mode 100644 src/spdk/isa-l/igzip/generate_custom_hufftables.c create mode 100644 src/spdk/isa-l/igzip/generate_static_inflate.c create mode 100644 src/spdk/isa-l/igzip/heap_macros.asm create mode 100644 src/spdk/isa-l/igzip/huff_codes.c create mode 100644 src/spdk/isa-l/igzip/huff_codes.h create mode 100644 src/spdk/isa-l/igzip/huffman.asm create mode 100644 src/spdk/isa-l/igzip/huffman.h create mode 100644 src/spdk/isa-l/igzip/hufftables_c.c create mode 100644 src/spdk/isa-l/igzip/igzip.c create mode 100644 src/spdk/isa-l/igzip/igzip_base.c create mode 100644 src/spdk/isa-l/igzip/igzip_base_aliases.c create mode 100644 src/spdk/isa-l/igzip/igzip_body.asm create mode 100644 src/spdk/isa-l/igzip/igzip_build_hash_table_perf.c create mode 100644 src/spdk/isa-l/igzip/igzip_checksums.h create mode 100644 src/spdk/isa-l/igzip/igzip_compare_types.asm create mode 100644 src/spdk/isa-l/igzip/igzip_decode_block_stateless.asm create mode 100644 src/spdk/isa-l/igzip/igzip_decode_block_stateless_01.asm create mode 100644 src/spdk/isa-l/igzip/igzip_decode_block_stateless_04.asm create mode 100644 src/spdk/isa-l/igzip/igzip_deflate_hash.asm create mode 100644 src/spdk/isa-l/igzip/igzip_example.c create mode 100644 src/spdk/isa-l/igzip/igzip_file_perf.c create mode 100644 src/spdk/isa-l/igzip/igzip_finish.asm create mode 100644 src/spdk/isa-l/igzip/igzip_gen_icf_map_lh1_04.asm create mode 100644 src/spdk/isa-l/igzip/igzip_gen_icf_map_lh1_06.asm create mode 100644 src/spdk/isa-l/igzip/igzip_hist_perf.c create mode 100644 src/spdk/isa-l/igzip/igzip_icf_base.c create mode 100644 src/spdk/isa-l/igzip/igzip_icf_body.c create mode 100644 src/spdk/isa-l/igzip/igzip_icf_body_h1_gr_bt.asm create mode 100644 src/spdk/isa-l/igzip/igzip_icf_finish.asm create mode 100644 src/spdk/isa-l/igzip/igzip_inflate.c create mode 100644 src/spdk/isa-l/igzip/igzip_inflate_multibinary.asm create mode 100644 src/spdk/isa-l/igzip/igzip_inflate_test.c create mode 100644 src/spdk/isa-l/igzip/igzip_level_buf_structs.h create mode 100644 src/spdk/isa-l/igzip/igzip_multibinary.asm create mode 100644 src/spdk/isa-l/igzip/igzip_perf.c create mode 100644 src/spdk/isa-l/igzip/igzip_rand_test.c create mode 100644 src/spdk/isa-l/igzip/igzip_semi_dyn_file_perf.c create mode 100644 src/spdk/isa-l/igzip/igzip_set_long_icf_fg_04.asm create mode 100644 src/spdk/isa-l/igzip/igzip_set_long_icf_fg_06.asm create mode 100644 src/spdk/isa-l/igzip/igzip_sync_flush_example.c create mode 100644 src/spdk/isa-l/igzip/igzip_update_histogram.asm create mode 100644 src/spdk/isa-l/igzip/igzip_update_histogram_01.asm create mode 100644 src/spdk/isa-l/igzip/igzip_update_histogram_04.asm create mode 100644 src/spdk/isa-l/igzip/igzip_wrapper.h create mode 100644 src/spdk/isa-l/igzip/igzip_wrapper_hdr_test.c create mode 100644 src/spdk/isa-l/igzip/inflate_data_structs.asm create mode 100644 src/spdk/isa-l/igzip/inflate_std_vects.h create mode 100644 src/spdk/isa-l/igzip/lz0a_const.asm create mode 100644 src/spdk/isa-l/igzip/options.asm create mode 100644 src/spdk/isa-l/igzip/proc_heap.asm create mode 100644 src/spdk/isa-l/igzip/proc_heap_base.c create mode 100644 src/spdk/isa-l/igzip/repeated_char_result.h create mode 100644 src/spdk/isa-l/igzip/rfc1951_lookup.asm create mode 100644 src/spdk/isa-l/igzip/static_inflate.h create mode 100644 src/spdk/isa-l/igzip/stdmac.asm create mode 100644 src/spdk/isa-l/include/aarch64_multibinary.h create mode 100644 src/spdk/isa-l/include/crc.h create mode 100644 src/spdk/isa-l/include/crc64.h create mode 100644 src/spdk/isa-l/include/erasure_code.h create mode 100644 src/spdk/isa-l/include/gf_vect_mul.h create mode 100644 src/spdk/isa-l/include/igzip_lib.h create mode 100644 src/spdk/isa-l/include/mem_routines.h create mode 100644 src/spdk/isa-l/include/multibinary.asm create mode 100644 src/spdk/isa-l/include/raid.h create mode 100644 src/spdk/isa-l/include/reg_sizes.asm create mode 100644 src/spdk/isa-l/include/test.h create mode 100644 src/spdk/isa-l/include/types.h create mode 100644 src/spdk/isa-l/include/unaligned.h create mode 100644 src/spdk/isa-l/isa-l.def create mode 100644 src/spdk/isa-l/libisal.pc.in create mode 100644 src/spdk/isa-l/make.inc create mode 100644 src/spdk/isa-l/mem/Makefile.am create mode 100644 src/spdk/isa-l/mem/aarch64/Makefile.am create mode 100644 src/spdk/isa-l/mem/aarch64/mem_aarch64_dispatcher.c create mode 100644 src/spdk/isa-l/mem/aarch64/mem_multibinary_arm.S create mode 100644 src/spdk/isa-l/mem/aarch64/mem_zero_detect_neon.S create mode 100644 src/spdk/isa-l/mem/mem_multibinary.asm create mode 100644 src/spdk/isa-l/mem/mem_zero_detect_avx.asm create mode 100644 src/spdk/isa-l/mem/mem_zero_detect_base.c create mode 100644 src/spdk/isa-l/mem/mem_zero_detect_base_aliases.c create mode 100644 src/spdk/isa-l/mem/mem_zero_detect_perf.c create mode 100644 src/spdk/isa-l/mem/mem_zero_detect_sse.asm create mode 100644 src/spdk/isa-l/mem/mem_zero_detect_test.c create mode 100644 src/spdk/isa-l/programs/Makefile.am create mode 100644 src/spdk/isa-l/programs/igzip.1 create mode 100644 src/spdk/isa-l/programs/igzip.1.h2m create mode 100644 src/spdk/isa-l/programs/igzip_cli.c create mode 100755 src/spdk/isa-l/programs/igzip_cli_check.sh create mode 100644 src/spdk/isa-l/raid/Makefile.am create mode 100644 src/spdk/isa-l/raid/aarch64/Makefile.am create mode 100644 src/spdk/isa-l/raid/aarch64/pq_check_neon.S create mode 100644 src/spdk/isa-l/raid/aarch64/pq_gen_neon.S create mode 100644 src/spdk/isa-l/raid/aarch64/raid_aarch64_dispatcher.c create mode 100644 src/spdk/isa-l/raid/aarch64/raid_multibinary_arm.S create mode 100644 src/spdk/isa-l/raid/aarch64/xor_check_neon.S create mode 100644 src/spdk/isa-l/raid/aarch64/xor_gen_neon.S create mode 100644 src/spdk/isa-l/raid/pq_check_sse.asm create mode 100644 src/spdk/isa-l/raid/pq_check_sse_i32.asm create mode 100644 src/spdk/isa-l/raid/pq_check_test.c create mode 100644 src/spdk/isa-l/raid/pq_gen_avx.asm create mode 100644 src/spdk/isa-l/raid/pq_gen_avx2.asm create mode 100644 src/spdk/isa-l/raid/pq_gen_avx512.asm create mode 100644 src/spdk/isa-l/raid/pq_gen_perf.c create mode 100644 src/spdk/isa-l/raid/pq_gen_sse.asm create mode 100644 src/spdk/isa-l/raid/pq_gen_sse_i32.asm create mode 100644 src/spdk/isa-l/raid/pq_gen_test.c create mode 100644 src/spdk/isa-l/raid/raid_base.c create mode 100644 src/spdk/isa-l/raid/raid_base_aliases.c create mode 100644 src/spdk/isa-l/raid/raid_multibinary.asm create mode 100644 src/spdk/isa-l/raid/raid_multibinary_i32.asm create mode 100644 src/spdk/isa-l/raid/xor_check_sse.asm create mode 100644 src/spdk/isa-l/raid/xor_check_test.c create mode 100644 src/spdk/isa-l/raid/xor_example.c create mode 100644 src/spdk/isa-l/raid/xor_gen_avx.asm create mode 100644 src/spdk/isa-l/raid/xor_gen_avx512.asm create mode 100644 src/spdk/isa-l/raid/xor_gen_perf.c create mode 100644 src/spdk/isa-l/raid/xor_gen_sse.asm create mode 100644 src/spdk/isa-l/raid/xor_gen_test.c create mode 100644 src/spdk/isa-l/tests/fuzz/Makefile.am create mode 100644 src/spdk/isa-l/tests/fuzz/Makefile.unx create mode 100644 src/spdk/isa-l/tests/fuzz/igzip_checked_inflate_fuzz_test.c create mode 100644 src/spdk/isa-l/tests/fuzz/igzip_dump_inflate_corpus.c create mode 100644 src/spdk/isa-l/tests/fuzz/igzip_fuzz_inflate.c create mode 100644 src/spdk/isa-l/tests/fuzz/igzip_simple_inflate_fuzz_test.c create mode 100644 src/spdk/isa-l/tests/fuzz/igzip_simple_round_trip_fuzz_test.c create mode 100755 src/spdk/isa-l/tools/check_format.sh create mode 100755 src/spdk/isa-l/tools/iindent create mode 100755 src/spdk/isa-l/tools/nasm-filter.sh create mode 100755 src/spdk/isa-l/tools/remove_trailing_whitespace.sh create mode 100755 src/spdk/isa-l/tools/test_autorun.sh create mode 100755 src/spdk/isa-l/tools/test_checks.sh create mode 100755 src/spdk/isa-l/tools/test_extended.sh create mode 100755 src/spdk/isa-l/tools/test_fuzz.sh create mode 100755 src/spdk/isa-l/tools/test_tools.sh create mode 100755 src/spdk/isa-l/tools/yasm-filter.sh (limited to 'src/spdk/isa-l') diff --git a/src/spdk/isa-l/.drone.yml b/src/spdk/isa-l/.drone.yml new file mode 100644 index 000000000..db734ff73 --- /dev/null +++ b/src/spdk/isa-l/.drone.yml @@ -0,0 +1,133 @@ +kind: pipeline +name: arm64-linux-gcc-5.4 + +platform: + os: linux + arch: arm64 + +steps: +- name: arm64-linux-gcc-5.4 + image: ubuntu:xenial + environment: + C_COMPILER: gcc + commands: + - if [ -n "$C_COMPILER" ]; then export CC="$C_COMPILER"; fi + - if [ -n "$AS_ASSEMBL" ]; then export AS="$AS_ASSEMBL"; fi + - apt-get -qq update + - apt-get install -qq -y build-essential git indent libtool libz-dev yasm autoconf + - if [ -n "$CC" ]; then $CC --version; fi + - if [ -n "$AS" ]; then $AS --version; fi + - ./tools/test_autorun.sh "$TEST_TYPE" + +--- +kind: pipeline +name: arm64-linux-clang-3.8 + +platform: + os: linux + arch: arm64 + +steps: +- name: arm64-linux-clang-3.8 + image: ubuntu:xenial + environment: + C_COMPILER: clang + commands: + - if [ -n "$C_COMPILER" ]; then export CC="$C_COMPILER"; fi + - if [ -n "$AS_ASSEMBL" ]; then export AS="$AS_ASSEMBL"; fi + - apt-get -qq update + - apt-get install -qq -y build-essential clang git indent libtool libz-dev yasm autoconf + - if [ -n "$CC" ]; then $CC --version; fi + - if [ -n "$AS" ]; then $AS --version; fi + - ./tools/test_autorun.sh "$TEST_TYPE" + +--- +kind: pipeline +name: arm64-linux-clang-4.0 + +platform: + os: linux + arch: arm64 + +steps: +- name: arm64-linux-clang-4.0 + image: ubuntu:xenial + environment: + C_COMPILER: clang-4.0 + commands: + - if [ -n "$C_COMPILER" ]; then export CC="$C_COMPILER"; fi + - if [ -n "$AS_ASSEMBL" ]; then export AS="$AS_ASSEMBL"; fi + - apt-get -qq update + - apt-get install -qq -y build-essential clang-4.0 git indent libtool libz-dev yasm autoconf + - if [ -n "$CC" ]; then $CC --version; fi + - if [ -n "$AS" ]; then $AS --version; fi + - ./tools/test_autorun.sh "$TEST_TYPE" + +--- +kind: pipeline +name: arm64-linux-gcc-4.7 + +platform: + os: linux + arch: arm64 + +steps: +- name: arm64-linux-gcc-4.7 + image: ubuntu:xenial + environment: + C_COMPILER: gcc-4.7 + commands: + - if [ -n "$C_COMPILER" ]; then export CC="$C_COMPILER"; fi + - if [ -n "$AS_ASSEMBL" ]; then export AS="$AS_ASSEMBL"; fi + - apt-get -qq update + - apt-get install -qq -y build-essential git indent libtool libz-dev software-properties-common yasm autoconf + - add-apt-repository -y ppa:ubuntu-toolchain-r/test + - apt-get -qq update + - apt-get install -qq -y g++-4.7 + - if [ -n "$CC" ]; then $CC --version; fi + - if [ -n "$AS" ]; then $AS --version; fi + - ./tools/test_autorun.sh "$TEST_TYPE" + +--- +kind: pipeline +name: arm64-linux-gcc-6 + +platform: + os: linux + arch: arm64 + +steps: +- name: arm64-linux-gcc-6 + image: debian:9 + environment: + C_COMPILER: gcc + commands: + - if [ -n "$C_COMPILER" ]; then export CC="$C_COMPILER"; fi + - if [ -n "$AS_ASSEMBL" ]; then export AS="$AS_ASSEMBL"; fi + - apt-get -q update + - apt-get install -y build-essential git indent libtool libz-dev software-properties-common yasm autoconf + - if [ -n "$CC" ]; then $CC --version; fi + - if [ -n "$AS" ]; then $AS --version; fi + - ./tools/test_autorun.sh "$TEST_TYPE" + +--- +kind: pipeline +name: arm64-linux-extended-tests + +platform: + os: linux + arch: arm64 + +steps: +- name: arm64-linux-extended-tests + image: ubuntu:xenial + environment: + TEST_TYPE: ext + commands: + - if [ -n "$C_COMPILER" ]; then export CC="$C_COMPILER"; fi + - if [ -n "$AS_ASSEMBL" ]; then export AS="$AS_ASSEMBL"; fi + - apt-get -qq update + - apt-get install -qq -y build-essential git indent libtool libz-dev software-properties-common yasm autoconf + - if [ -n "$CC" ]; then $CC --version; fi + - if [ -n "$AS" ]; then $AS --version; fi + - ./tools/test_autorun.sh "$TEST_TYPE" diff --git a/src/spdk/isa-l/.gitignore b/src/spdk/isa-l/.gitignore new file mode 100644 index 000000000..e85c4176b --- /dev/null +++ b/src/spdk/isa-l/.gitignore @@ -0,0 +1,28 @@ +# Objects +*~ +*.o +*.lo +*.so +*.dll +*.exp +*.lib +bin + +# Autobuild +Makefile +Makefile.in +aclocal.m4 +autom4te.cache +build-aux +config.* +configure +.deps +.dirstamp +.libs +libtool + +# Generated files +isa-l.h +/libisal.la +libisal.pc +programs/igzip diff --git a/src/spdk/isa-l/.travis.yml b/src/spdk/isa-l/.travis.yml new file mode 100644 index 000000000..96ce78cb9 --- /dev/null +++ b/src/spdk/isa-l/.travis.yml @@ -0,0 +1,87 @@ +language: c +sudo: required +matrix: + include: + ### OS X + - os: osx + env: C_COMPILER=clang + + ### linux gcc and format check + - dist: xenial + addons: + apt: + packages: + - nasm + install: + # Install newer indent to check formatting + - sudo apt-get install texinfo + - wget http://archive.ubuntu.com/ubuntu/pool/main/i/indent/indent_2.2.12.orig.tar.xz -O /tmp/indent.tar.xz + - tar -xJf /tmp/indent.tar.xz -C /tmp/ + - pushd /tmp/indent-2.2.12 && ./configure --prefix=/usr && make && sudo make install && popd + env: C_COMPILER=gcc + + ### linux clang + - dist: xenial + addons: + apt: + packages: + - nasm + env: C_COMPILER=clang + + ### linux newer clang + - dist: trusty + addons: + apt: + sources: + - ubuntu-toolchain-r-test + - llvm-toolchain-trusty-4.0 + packages: + - clang-4.0 + env: C_COMPILER=clang-4.0 + + ### linux older gcc + - dist: trusty + addons: + apt: + sources: + - ubuntu-toolchain-r-test + packages: + - g++-4.7 + env: C_COMPILER=gcc-4.7 + + ### linux newer gcc + - dist: trusty + addons: + apt: + sources: + - ubuntu-toolchain-r-test + packages: + - g++-6 + env: C_COMPILER=gcc-6 + + ### linux extended tests + - dist: xenial + addons: + apt: + sources: + - ubuntu-toolchain-r-test + packages: + - binutils-mingw-w64-x86-64 + - gcc-mingw-w64-x86-64 + - wine + - nasm + env: TEST_TYPE=ext + +before_install: + - if [ -n "${C_COMPILER}" ]; then export CC="${C_COMPILER}"; fi + - if [ -n "${AS_ASSEMBL}" ]; then export AS="${AS_ASSEMBL}"; fi + +before_script: + - if [ $TRAVIS_OS_NAME = linux ]; then sudo apt-get -q update; fi + - if [ $TRAVIS_OS_NAME = linux ]; then sudo apt-get install yasm; fi + - if [ $TRAVIS_OS_NAME = osx ]; then brew install yasm; fi + +script: + - if [ -n "${CC}" ]; then $CC --version; fi + - if [ -n "${AS}" ]; then $AS --version || echo No version; fi + - ./tools/test_autorun.sh "${TEST_TYPE}" diff --git a/src/spdk/isa-l/CONTRIBUTING.md b/src/spdk/isa-l/CONTRIBUTING.md new file mode 100644 index 000000000..a42ae814b --- /dev/null +++ b/src/spdk/isa-l/CONTRIBUTING.md @@ -0,0 +1,39 @@ +# Contributing to ISA-L + +Everyone is welcome to contribute. Patches may be submitted using GitHub pull +requests (PRs). All commits must be signed off by the developer (--signoff) +which indicates that you agree to the Developer Certificate of Origin. Patch +discussion will happen directly on the GitHub PR. Design pre-work and general +discussion occurs on the [mailing list]. Anyone can provide feedback in either +location and all discussion is welcome. Decisions on whether to merge patches +will be handled by the maintainer. + +## License + +ISA-L is licensed using a BSD 3-clause [license]. All code submitted to +the project is required to carry that license. + +## Certificate of Origin + +In order to get a clear contribution chain of trust we use the +[signed-off-by language] used by the Linux kernel project. + +## Mailing List + +Contributors and users are welcome to submit new request on our roadmap, submit +patches, file issues, and ask questions on our [mailing list]. + +## Coding Style + +The coding style for ISA-L C code roughly follows linux kernel guidelines. Use +the included indent script to format C code. + + ./tools/iindent your_files.c + +And use check format script before submitting. + + ./tools/check_format.sh + +[mailing list]:https://lists.01.org/mailman/listinfo/isal +[license]:LICENSE +[signed-off-by language]:https://01.org/community/signed-process diff --git a/src/spdk/isa-l/Doxyfile b/src/spdk/isa-l/Doxyfile new file mode 100644 index 000000000..56b0c3047 --- /dev/null +++ b/src/spdk/isa-l/Doxyfile @@ -0,0 +1,29 @@ +PROJECT_NAME = "Intel Intelligent Storage Acceleration Library" +PROJECT_BRIEF = "ISA-L API reference doc" + +OUTPUT_DIRECTORY = generated_doc +FULL_PATH_NAMES = NO +TAB_SIZE = 8 +ALIASES = "requires=\xrefitem requires \"Requires\" \"Instruction Set Requirements for arch-specific functions (non-multibinary)\"" +OPTIMIZE_OUTPUT_FOR_C = YES +HIDE_UNDOC_MEMBERS = YES +USE_MDFILE_AS_MAINPAGE = README.md + +INPUT = isa-l.h \ + include \ + README.md \ + CONTRIBUTING.md \ + Release_notes.txt + +EXCLUDE = include/test.h include/types.h +EXAMPLE_PATH = . crc raid erasure_code igzip +PAPER_TYPE = letter +LATEX_SOURCE_CODE = YES +MACRO_EXPANSION = YES +EXPAND_ONLY_PREDEF = YES +PREDEFINED = "DECLARE_ALIGNED(n, a)=ALIGN n" \ + __declspec(x)='x' \ + align(x)='ALIGN \ + x' +EXPAND_AS_DEFINED = DECLARE_ALIGNED +EXTENSION_MAPPING = "txt=md" diff --git a/src/spdk/isa-l/LICENSE b/src/spdk/isa-l/LICENSE new file mode 100644 index 000000000..ecebef110 --- /dev/null +++ b/src/spdk/isa-l/LICENSE @@ -0,0 +1,26 @@ + Copyright(c) 2011-2017 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/src/spdk/isa-l/Makefile.am b/src/spdk/isa-l/Makefile.am new file mode 100644 index 000000000..d9361eb05 --- /dev/null +++ b/src/spdk/isa-l/Makefile.am @@ -0,0 +1,163 @@ +EXTRA_DIST = autogen.sh Makefile.unx make.inc Makefile.nmake isa-l.def LICENSE README.md Doxyfile CONTRIBUTING.md +CLEANFILES = +LDADD = +AM_MAKEFLAGS = --no-print-directory +noinst_HEADERS = +pkginclude_HEADERS = include/test.h include/types.h +noinst_LTLIBRARIES = +bin_PROGRAMS = +INCLUDE = -I $(srcdir)/include/ + +pkgconfigdir = $(libdir)/pkgconfig +pkgconfig_DATA = libisal.pc +EXTRA_DIST += libisal.pc.in +CLEANFILES += libisal.pc + +lsrc= +src_include= +extern_hdrs= +other_src= +check_tests= +unit_tests= +perf_tests= +unit_tests_extra= +perf_tests_extra= +examples= +other_tests= +other_tests_x86_64= +other_tests_x86_32= +other_tests_aarch64= +lsrc_x86_64= +lsrc_x86_32= +lsrc_aarch64= +lsrc_base_aliases= +lsrc32= +unit_tests32= +perf_tests32= +progs= + +# Include units + +include erasure_code/Makefile.am +include raid/Makefile.am +include crc/Makefile.am +include igzip/Makefile.am +include tests/fuzz/Makefile.am +include examples/ec/Makefile.am +include programs/Makefile.am +include mem/Makefile.am + +# LIB version info not necessarily the same as package version +LIBISAL_CURRENT=2 +LIBISAL_REVISION=28 +LIBISAL_AGE=0 + +lib_LTLIBRARIES = libisal.la +pkginclude_HEADERS += $(sort ${extern_hdrs}) +libisal_la_SOURCES = ${lsrc} + +if CPU_X86_64 +libisal_la_SOURCES += ${lsrc_x86_64} +other_tests += ${other_tests_x86_64} +endif + +if CPU_X86_32 +libisal_la_SOURCES += ${lsrc_x86_32} +other_tests += ${other_tests_x86_32} +endif + +if CPU_AARCH64 +libisal_la_SOURCES += ${lsrc_aarch64} +other_tests += ${other_tests_aarch64} +endif + +if CPU_UNDEFINED +libisal_la_SOURCES += ${lsrc_base_aliases} +endif + +nobase_include_HEADERS = isa-l.h +libisal_la_LDFLAGS = $(AM_LDFLAGS) \ + -version-info $(LIBISAL_CURRENT):$(LIBISAL_REVISION):$(LIBISAL_AGE) +libisal_la_LIBADD = ${noinst_LTLIBRARIES} + +EXTRA_DIST += ${other_src} +EXTRA_DIST += Release_notes.txt + +# For tests +LDADD += libisal.la +check_PROGRAMS = ${check_tests} +TESTS = ${check_tests} + +# For additional tests +EXTRA_PROGRAMS = ${unit_tests} +EXTRA_PROGRAMS += ${perf_tests} +EXTRA_PROGRAMS += ${other_tests} +EXTRA_PROGRAMS += ${examples} +CLEANFILES += ${EXTRA_PROGRAMS} + +programs:${progs} +perfs: ${perf_tests} +tests: ${unit_tests} +checks: ${check_tests} +other: ${other_tests} +perf: $(addsuffix .run,$(perf_tests)) +ex: ${examples} +test: $(addsuffix .run,$(unit_tests)) + +# Build rule to run tests +%.run: % + $< + @echo Completed run: $< + +# Support for yasm/nasm/gas +if USE_YASM + as_filter = ${srcdir}/tools/yasm-filter.sh +endif +if USE_NASM + as_filter = ${srcdir}/tools/nasm-filter.sh +endif +if CPU_AARCH64 + as_filter = gcc -D__ASSEMBLY__ +endif + +CCAS = $(as_filter) +EXTRA_DIST += tools/yasm-filter.sh tools/nasm-filter.sh + +AM_CFLAGS = ${my_CFLAGS} ${INCLUDE} $(src_include) ${D} +if CPU_AARCH64 +AM_CCASFLAGS = ${AM_CFLAGS} +else +AM_CCASFLAGS = ${yasm_args} ${INCLUDE} ${src_include} ${DEFS} ${D} +endif + +.asm.s: + @echo " MKTMP " $@; + @cp $< $@ + +# Generate isa-l.h +BUILT_SOURCES = isa-l.h +CLEANFILES += isa-l.h +isa-l.h: + @echo 'Building $@' + @echo '' >> $@ + @echo '/**' >> $@ + @echo ' * @file isa-l.h' >> $@ + @echo ' * @brief Include for ISA-L library' >> $@ + @echo ' */' >> $@ + @echo '' >> $@ + @echo '#ifndef _ISAL_H_' >> $@ + @echo '#define _ISAL_H_' >> $@ + @echo '' >> $@ + @echo '#define.ISAL_MAJOR_VERSION.${VERSION}' | ${AWK} -F . '{print $$1, $$2, $$3}' >> $@ + @echo '#define.ISAL_MINOR_VERSION.${VERSION}' | ${AWK} -F . '{print $$1, $$2, $$4}' >> $@ + @echo '#define.ISAL_PATCH_VERSION.${VERSION}' | ${AWK} -F . '{print $$1, $$2, $$5}' >> $@ + @echo '#define ISAL_MAKE_VERSION(maj, min, patch) ((maj) * 0x10000 + (min) * 0x100 + (patch))' >> $@ + @echo '#define ISAL_VERSION ISAL_MAKE_VERSION(ISAL_MAJOR_VERSION, ISAL_MINOR_VERSION, ISAL_PATCH_VERSION)' >> $@ + @echo '' >> $@ + @for unit in $(sort $(extern_hdrs)); do echo "#include " | sed -e 's;include/;;' >> $@; done + @echo '#endif //_ISAL_H_' >> $@ + +doc: isa-l.h + (cat Doxyfile; echo 'PROJECT_NUMBER=${VERSION}') | doxygen - + $(MAKE) -C generated_doc/latex &> generated_doc/latex_build_api.log + cp generated_doc/latex/refman.pdf isa-l_api_${VERSION}.pdf diff --git a/src/spdk/isa-l/Makefile.nmake b/src/spdk/isa-l/Makefile.nmake new file mode 100644 index 000000000..fc72aa126 --- /dev/null +++ b/src/spdk/isa-l/Makefile.nmake @@ -0,0 +1,266 @@ +######################################################################## +# Copyright(c) 2011-2016 Intel Corporation All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +######################################################################## + +objs = \ + bin\ec_base.obj \ + bin\ec_highlevel_func.obj \ + bin\ec_multibinary.obj \ + bin\gf_2vect_dot_prod_avx.obj \ + bin\gf_2vect_dot_prod_avx2.obj \ + bin\gf_2vect_dot_prod_avx512.obj \ + bin\gf_2vect_dot_prod_sse.obj \ + bin\gf_2vect_mad_avx.obj \ + bin\gf_2vect_mad_avx2.obj \ + bin\gf_2vect_mad_avx512.obj \ + bin\gf_2vect_mad_sse.obj \ + bin\gf_3vect_dot_prod_avx.obj \ + bin\gf_3vect_dot_prod_avx2.obj \ + bin\gf_3vect_dot_prod_avx512.obj \ + bin\gf_3vect_dot_prod_sse.obj \ + bin\gf_3vect_mad_avx.obj \ + bin\gf_3vect_mad_avx2.obj \ + bin\gf_3vect_mad_avx512.obj \ + bin\gf_3vect_mad_sse.obj \ + bin\gf_4vect_dot_prod_avx.obj \ + bin\gf_4vect_dot_prod_avx2.obj \ + bin\gf_4vect_dot_prod_avx512.obj \ + bin\gf_4vect_dot_prod_sse.obj \ + bin\gf_4vect_mad_avx.obj \ + bin\gf_4vect_mad_avx2.obj \ + bin\gf_4vect_mad_avx512.obj \ + bin\gf_4vect_mad_sse.obj \ + bin\gf_5vect_dot_prod_avx.obj \ + bin\gf_5vect_dot_prod_avx2.obj \ + bin\gf_5vect_dot_prod_sse.obj \ + bin\gf_5vect_mad_avx.obj \ + bin\gf_5vect_mad_avx2.obj \ + bin\gf_5vect_mad_sse.obj \ + bin\gf_6vect_dot_prod_avx.obj \ + bin\gf_6vect_dot_prod_avx2.obj \ + bin\gf_6vect_dot_prod_sse.obj \ + bin\gf_6vect_mad_avx.obj \ + bin\gf_6vect_mad_avx2.obj \ + bin\gf_6vect_mad_sse.obj \ + bin\gf_vect_dot_prod_avx.obj \ + bin\gf_vect_dot_prod_avx2.obj \ + bin\gf_vect_dot_prod_avx512.obj \ + bin\gf_vect_dot_prod_sse.obj \ + bin\gf_vect_mad_avx.obj \ + bin\gf_vect_mad_avx2.obj \ + bin\gf_vect_mad_avx512.obj \ + bin\gf_vect_mad_sse.obj \ + bin\gf_vect_mul_avx.obj \ + bin\gf_vect_mul_sse.obj \ + bin\pq_check_sse.obj \ + bin\pq_gen_avx.obj \ + bin\pq_gen_avx2.obj \ + bin\pq_gen_avx512.obj \ + bin\pq_gen_sse.obj \ + bin\raid_base.obj \ + bin\raid_multibinary.obj \ + bin\xor_check_sse.obj \ + bin\xor_gen_avx.obj \ + bin\xor_gen_avx512.obj \ + bin\xor_gen_sse.obj \ + bin\crc16_t10dif_01.obj \ + bin\crc16_t10dif_by4.obj \ + bin\crc16_t10dif_copy_by4.obj \ + bin\crc32_ieee_01.obj \ + bin\crc32_ieee_by4.obj \ + bin\crc32_iscsi_00.obj \ + bin\crc32_iscsi_01.obj \ + bin\crc64_base.obj \ + bin\crc64_ecma_norm_by8.obj \ + bin\crc64_ecma_refl_by8.obj \ + bin\crc64_iso_norm_by8.obj \ + bin\crc64_iso_refl_by8.obj \ + bin\crc64_jones_norm_by8.obj \ + bin\crc64_jones_refl_by8.obj \ + bin\crc64_multibinary.obj \ + bin\crc_base.obj \ + bin\adler32_base.obj \ + bin\crc_multibinary.obj \ + bin\huff_codes.obj \ + bin\hufftables_c.obj \ + bin\igzip.obj \ + bin\igzip_base.obj \ + bin\igzip_body.obj \ + bin\igzip_decode_block_stateless_01.obj \ + bin\igzip_decode_block_stateless_04.obj \ + bin\igzip_finish.obj \ + bin\flatten_ll.obj \ + bin\encode_df.obj \ + bin\encode_df_04.obj \ + bin\proc_heap.obj \ + bin\igzip_icf_body_h1_gr_bt.obj \ + bin\igzip_icf_finish.obj \ + bin\igzip_icf_base.obj \ + bin\igzip_inflate.obj \ + bin\igzip_inflate_multibinary.obj \ + bin\igzip_multibinary.obj \ + bin\igzip_update_histogram_01.obj \ + bin\igzip_update_histogram_04.obj \ + bin\rfc1951_lookup.obj \ + bin\crc32_gzip_refl_by8.obj \ + bin\adler32_sse.obj \ + bin\adler32_avx2_4.obj \ + bin\igzip_deflate_hash.obj \ + bin\igzip_gen_icf_map_lh1_04.obj \ + bin\igzip_gen_icf_map_lh1_06.obj \ + bin\igzip_set_long_icf_fg_04.obj \ + bin\igzip_set_long_icf_fg_06.obj \ + bin\igzip_icf_body.obj \ + bin\mem_zero_detect_avx.obj \ + bin\mem_zero_detect_base.obj \ + bin\mem_multibinary.obj \ + bin\mem_zero_detect_sse.obj + +INCLUDES = -I./ -Ierasure_code/ -Iraid/ -Icrc/ -Iigzip/ -Iinclude/ -Imem/ +LINKFLAGS = /nologo +CFLAGS = -O2 -D NDEBUG /nologo -D_USE_MATH_DEFINES -Qstd=c99 $(INCLUDES) $(D) +AFLAGS = -f win64 $(INCLUDES) $(D) +CC = icl +AS = yasm + +lib: bin static dll +static: bin isa-l_static.lib +dll: bin isa-l.dll + +bin: ; -mkdir $@ + +isa-l_static.lib: $(objs) + lib -out:$@ @<< +$? +<< + +isa-l.dll: $(objs) + link -out:$@ -dll -def:isa-l.def @<< +$? +<< + +{erasure_code}.c.obj: + $(CC) $(CFLAGS) /c -Fo$@ $? +{erasure_code}.asm.obj: + $(AS) $(AFLAGS) -o $@ $? + +{raid}.c.obj: + $(CC) $(CFLAGS) /c -Fo$@ $? +{raid}.asm.obj: + $(AS) $(AFLAGS) -o $@ $? + +{crc}.c.obj: + $(CC) $(CFLAGS) /c -Fo$@ $? +{crc}.asm.obj: + $(AS) $(AFLAGS) -o $@ $? + +{igzip}.c.obj: + $(CC) $(CFLAGS) /c -Fo$@ $? +{igzip}.asm.obj: + $(AS) $(AFLAGS) -o $@ $? + +{mem}.c.obj: + $(CC) $(CFLAGS) /c -Fo$@ $? +{mem}.asm.obj: + $(AS) $(AFLAGS) -o $@ $? + +# Examples +ex = xor_example.exe crc_simple_test.exe crc64_example.exe igzip_example.exe igzip_sync_flush_example.exe +ex: lib $(ex) + +$(ex): $(@B).obj + +.obj.exe: + link /out:$@ $(LINKFLAGS) isa-l.lib $? + +# Check tests +checks = \ + gf_vect_mul_test.exe \ + erasure_code_test.exe \ + gf_inverse_test.exe \ + erasure_code_update_test.exe \ + xor_gen_test.exe \ + pq_gen_test.exe \ + xor_check_test.exe \ + pq_check_test.exe \ + crc16_t10dif_test.exe \ + crc16_t10dif_copy_test.exe \ + crc32_funcs_test.exe \ + crc64_funcs_test.exe \ + igzip_wrapper_hdr_test.exe \ + igzip_rand_test.exe \ + mem_zero_detect_test.exe + +checks: lib $(checks) +$(checks): $(@B).obj +check: $(checks) + !$? + +# Unit tests +tests = \ + gf_vect_mul_base_test.exe \ + gf_vect_dot_prod_base_test.exe \ + gf_vect_dot_prod_test.exe \ + gf_vect_mad_test.exe \ + erasure_code_base_test.exe + +tests: lib $(tests) +$(tests): $(@B).obj + +# Performance tests +perfs = \ + gf_vect_mul_perf.exe \ + gf_vect_dot_prod_perf.exe \ + gf_vect_dot_prod_1tbl.exe \ + erasure_code_perf.exe \ + erasure_code_base_perf.exe \ + erasure_code_sse_perf.exe \ + erasure_code_update_perf.exe \ + xor_gen_perf.exe \ + pq_gen_perf.exe \ + crc16_t10dif_perf.exe \ + crc32_ieee_perf.exe \ + crc32_iscsi_perf.exe \ + igzip_perf.exe \ + igzip_sync_flush_perf.exe \ + crc32_gzip_refl_perf.exe \ + mem_zero_detect_perf.exe + +perfs: lib $(perfs) +$(perfs): $(@B).obj + +clean: + -if exist *.obj del *.obj + -if exist bin\*.obj del bin\*.obj + -if exist isa-l_static.lib del isa-l_static.lib + -if exist *.exe del *.exe + -if exist isa-l.lib del isa-l.lib + -if exist isa-l.dll del isa-l.dll + +zlib.lib: +igzip_inflate_test.exe: zlib.lib diff --git a/src/spdk/isa-l/Makefile.unx b/src/spdk/isa-l/Makefile.unx new file mode 100644 index 000000000..16ad1ff47 --- /dev/null +++ b/src/spdk/isa-l/Makefile.unx @@ -0,0 +1,55 @@ +######################################################################## +# Copyright(c) 2011-2015 Intel Corporation All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +######################################################################## + +units = erasure_code raid crc igzip programs mem + +default: lib + +ifeq (,$(findstring crc,$(units))) + ifneq (,$(findstring igzip,$(units))) + override units += crc + endif +endif + +include $(foreach unit,$(units), $(unit)/Makefile.am) + +ifneq (,$(findstring igzip,$(units))) + include tests/fuzz/Makefile.am +endif + +ifneq (,$(findstring erasure_code,$(units))) + include examples/ec/Makefile.am +endif + +# Override individual lib names to make one inclusive library. +lib_name := bin/isa-l.a + +include make.inc + +VPATH = . $(units) include tests/fuzz examples/ec diff --git a/src/spdk/isa-l/README.md b/src/spdk/isa-l/README.md new file mode 100644 index 000000000..4aa54d924 --- /dev/null +++ b/src/spdk/isa-l/README.md @@ -0,0 +1,62 @@ +Intel(R) Intelligent Storage Acceleration Library +================================================= + +[![Build Status](https://travis-ci.org/intel/isa-l.svg?branch=master)](https://travis-ci.org/intel/isa-l) + +ISA-L is a collection of optimized low-level functions targeting storage +applications. ISA-L includes: +* Erasure codes - Fast block Reed-Solomon type erasure codes for any + encode/decode matrix in GF(2^8). +* CRC - Fast implementations of cyclic redundancy check. Six different + polynomials supported. + - iscsi32, ieee32, t10dif, ecma64, iso64, jones64. +* Raid - calculate and operate on XOR and P+Q parity found in common RAID + implementations. +* Compression - Fast deflate-compatible data compression. +* De-compression - Fast inflate-compatible data compression. + +Also see: +* [ISA-L for updates](https://github.com/intel/isa-l). +* For crypto functions see [isa-l_crypto on github](https://github.com/intel/isa-l_crypto). +* The [github wiki](https://github.com/intel/isa-l/wiki) including a list of + [distros/ports](https://github.com/intel/isa-l/wiki/Ports--Repos) offering binary packages. +* ISA-L [mailing list](https://lists.01.org/mailman/listinfo/isal). +* [Contributing](CONTRIBUTING.md). + +Building ISA-L +-------------- + +### Prerequisites + +* Assembler: nasm v2.11.01 or later (nasm v2.13 or better suggested for building in AVX512 support) + or yasm version 1.2.0 or later. +* Compiler: gcc, clang, icc or VC compiler. +* Make: GNU 'make' or 'nmake' (Windows). +* Optional: Building with autotools requires autoconf/automake packages. + +### Autotools +To build and install the library with autotools it is usually sufficient to run: + + ./autogen.sh + ./configure + make + sudo make install + +### Makefile +To use a standard makefile run: + + make -f Makefile.unx + +### Windows +On Windows use nmake to build dll and static lib: + + nmake -f Makefile.nmake + +### Other make targets +Other targets include: +* `make check` : create and run tests +* `make tests` : create additional unit tests +* `make perfs` : create included performance tests +* `make ex` : build examples +* `make other` : build other utilities such as compression file tests +* `make doc` : build API manual diff --git a/src/spdk/isa-l/Release_notes.txt b/src/spdk/isa-l/Release_notes.txt new file mode 100644 index 000000000..e8a72dedc --- /dev/null +++ b/src/spdk/isa-l/Release_notes.txt @@ -0,0 +1,278 @@ +v2.28 Intel Intelligent Storage Acceleration Library Release Notes +================================================================== + +RELEASE NOTE CONTENTS +1. KNOWN ISSUES +2. FIXED ISSUES +3. CHANGE LOG & FEATURES ADDED + +1. KNOWN ISSUES +---------------- + +* Perf tests do not run in Windows environment. + +* 32-bit lib is not supported in Windows. + +2. FIXED ISSUES +--------------- +v2.28 + +* Fix documentation on gf_vect_mad(). Min length listed as 32 instead of + required min 64 bytes. + +v2.27 + +* Fix lack of install for pkg-config files + +v2.26 + +* Fixes for sanitizer warnings. + +v2.25 + +* Fix for nasm on Mac OS X/darwin. + +v2.24 + +* Fix for crc32_iscsi(). Potential read-over for small buffer. For an input + buffer length of less than 8 bytes and aligned to an 8 byte boundary, function + could read past length. Previously had the possibility to cause a seg fault + only for length 0 and invalid buffer passed. Calculated CRC is unchanged. + +* Fix for compression/decompression of > 4GB files. For streaming compression + of extremely large files, the total_out parameter would wrap and could + potentially flag an otherwise valid lookback distance as being invalid. + Total_out is still 32bit for zlib compatibility. No inconsistent compressed + buffers were generated by the issue. + +v2.23 + +* Fix for histogram generation base function. +* Fix library build warnings on macOS. +* Fix igzip to use bsf instruction when tzcnt is not available. + +v2.22 + +* Fix ISA-L builds for other architectures. Base function and examples + sanitized for non-IA builds. + +* Fix fuzz test script to work with llvm 6.0 builtin libFuzz. + +v2.20 + +* Inflate total_out behavior corrected for in-progress decompression. + Previously total_out represented the total bytes decompressed into the output + buffer or temp internal buffer. This is changed to be only the bytes put into + the output buffer. + +* Fixed issue with isal_create_hufftables_subset. Affects semi-dynamic + compression use case when explicitly creating hufftables from histogram. The + _hufftables_subset function could fail to generate length symbols for any + length that were never seen. + +v2.19 + +* Fix erasure code test that violates rs matrix bounds. + +* Fix 0 length file and looping errors in igzip_inflate_test. + +v2.18 + +* Mac OS X/darwin systems no longer require the --target=darwin config option. + The autoconf canonical build should detect. + +v2.17 + +* Fix igzip using 32K window and a shared object + +* Fix igzip undefined instruction error on Nehalem. + +* Fixed issue in crc performance tests where OS optimizations turned cold cache + tests into warm tests. + +v2.15 + +* Fix for windows register save in gf_6vect_mad_avx2.asm. Only affects windows + versions of ec_encode_data_update() running with AVX2. A GP register was not + properly restored resulting in corruption on return. + +v2.14 + +* Building in unit directories is no longer supported removing the issue of + leftover object files causing the top-level make build to fail. + +v2.10 + +* Fix for windows register save overlap in gf_{3-6}vect_dot_prod_sse.asm. Only + affects windows versions of erasure code. GP register saves/restore were + pushed to same stack area as XMM. + +3. CHANGE LOG & FEATURES ADDED +------------------------------ +v2.28 + +* New next-arch versions of 64-bit CRC. All norm and reflected 64-bit + polynomials are expanded to utilize vpclmulqdq. + +v2.27 + +* New multi-threaded compression option for igzip cli tool + +v2.26 + +* Adler32 added to external API. +* Multi-arch improvements. +* Performance test improvements. + +v2.25 + +* Igzip performance improvements and features. + - Performance improvements for uncompressable files. Random or uncompressable + files can be up to 3x faster in level 1 or 2 compression. + - Additional small file performance improvments. + - New options in igzip cli: use name from header or not, test compressed file. + +* Multi-arch autoconf script. + - Autoconf should detect architecture and run base functions at minimum. + +v2.24 + +* Igzip small file performance improvements and new features. + - Better performance on small files. + - New gzip/zlib header and trailer handling. + - New gzip/zlib header parsing helper functions. + - New user-space compression/decompression tool igzip. + +* New mem unit added with first function isal_zero_detect(). + +v2.23 + +* Igzip inflate (decompression) performance improvements. + - Implemented multi-byte decode for inflate. Decode can pack up to three + symbols into the decode table making some compressed streams decompress much + faster depending on the prevalence of short codes. + +v2.22 + +* Igzip: AVX2 version of level 3 compression added. + +* Erasure code examples + - New examples for standard EC encode and decode. + - Example of piggyback EC encode and decode. + +v2.21 + +* Igzip improvements + - New compression levels added. ISA-L fast deflate now has more levels to + balance speed vs. target compression level. Level 0, 1 are as in previous + generations. New levels 2 & 3 target higher compression roughly comparable + to zlib levels 2-3. Level 3 is currently only optimized for processors with + AVX512 instructions. + +* New T10dif & copy function - crc16_t10dif_copy() + - CRC and copy was added to emulate T10dif operations such as DIF insert and + strip. This function stitches together CRC and memcpy operations + eliminating an extra data read. + +* CRC32 iscsi performance improvements + - Fixes issue under some distributions where warm cache performance was + reduced. + +v2.20 + +* Igzip improvements + - Optimized deflate_hash in compression functions. + Improves performance of using preset dictionary. + - Removed alignment restrictions on input structure. + +v2.19 + +* Igzip improvements + + - Add optimized Adler-32 checksum. + + - Implement zlib compression format. + + - Add stateful dictionary support. + + - Add struct reset functions for both deflate and inflate. + +* Reflected IEEE format CRC32 is released out. Function interface is named + crc32_gzip_refl. + +* Exact work condition of Erasure Code Reed-Solomon Matrix is determined by new + added program gen_rs_matrix_limits. + +v2.18 + +* New 2-pass fully-dynamic deflate compression (level -1). ISA-L fast deflate + now has two levels. Level 0 (default) is the same as previous generations. + Setting to level 1 will switch to the fully-dynamic compression that will + typically reach higher compression ratios. + +* RAID AVX512 functions. + +v2.17 + +* New fast decompression (inflate) + +* Compression improvements (deflate) + - Speed and compression ratio improvements. + - Fast custom Huffman code generation. + - New features: + * Run-time option of gzip crc calculation and headers/trailer. + * Choice of static header (BTYPE 01) blocks. + * LARGE_WINDOW, 32K history, now default. + * Stateless full flush mode. + +* CRC64 + - Six new 64-bit polynomials supported. Normal and reflected versions of ECMA, + ISO and Jones polynomials. + +v2.16 + +* Units added: crc, raid, igzip (deflate compression). + +v2.15 + +* Erasure code updates. New AVX512 versions. + +* Nasm support. ISA-L ported to build with nasm or yasm assembler. + +* Windows DLL support. Windows builds DLL by default. + +v2.14 + +* Autoconf and autotools build allows easier porting to additional systems. + Previous make system still available to embedded users with Makefile.unx. + +* Includes update for building on Mac OS X/darwin systems. Add --target=darwin + to ./configure step. + +v2.13 + +* Erasure code improvments + - 32-bit port of optimized gf_vect_dot_prod() functions. This makes + ec_encode_data() functions much faster on 32-bit processors. + - Avoton performance improvements. Performance on Avoton for + gf_vect_dot_prod() and ec_encode_data() can improve by as much as 20%. + +v2.11 + +* Incremental erasure code. New functions added to erasure code to handle + single source update of code blocks. The function ec_encode_data_update() + works with parameters similar to ec_encode_data() but are called incrementally + with each source block. These versions are useful when source blocks are not + all available at once. + +v2.10 + +* Erasure code updates + - New AVX and AVX2 support functions. + - Changes min len requirement on gf_vect_dot_prod() to 32 from 16. + - Tests include both source and parity recovery with ec_encode_data(). + - New encoding examples with Vandermonde or Cauchy matrix. + +v2.8 + +* First open release of erasure code unit that is part of ISA-L. diff --git a/src/spdk/isa-l/autogen.sh b/src/spdk/isa-l/autogen.sh new file mode 100755 index 000000000..0a3189383 --- /dev/null +++ b/src/spdk/isa-l/autogen.sh @@ -0,0 +1,17 @@ +#!/bin/sh -e + +autoreconf --install --symlink -f + +libdir() { + echo $(cd $1/$(gcc -print-multi-os-directory); pwd) +} + +args="--prefix=/usr --libdir=$(libdir /usr/lib)" + +echo +echo "----------------------------------------------------------------" +echo "Initialized build system. For a common configuration please run:" +echo "----------------------------------------------------------------" +echo +echo "./configure $args" +echo diff --git a/src/spdk/isa-l/configure.ac b/src/spdk/isa-l/configure.ac new file mode 100644 index 000000000..1c1d4d980 --- /dev/null +++ b/src/spdk/isa-l/configure.ac @@ -0,0 +1,296 @@ +# -*- Autoconf -*- +# Process this file with autoconf to produce a configure script. + +AC_PREREQ(2.69) +AC_INIT([libisal], + [2.28.0], + [sg.support.isal@intel.com], + [isa-l], + [http://01.org/storage-acceleration-library]) +AC_CONFIG_SRCDIR([]) +AC_CONFIG_AUX_DIR([build-aux]) +AM_INIT_AUTOMAKE([ + foreign + 1.11 + -Wall + -Wno-portability + silent-rules + tar-pax + no-dist-gzip + dist-xz + subdir-objects +]) +AM_PROG_AS + +AC_CANONICAL_HOST +CPU="" +AS_CASE([$host_cpu], + [x86_64], [CPU="x86_64"], + [amd64], [CPU="x86_64"], + [i?86], [CPU="x86_32"], + [aarch64], [CPU="aarch64"], + [arm64], [CPU="aarch64"], +) +AM_CONDITIONAL([CPU_X86_64], [test "$CPU" = "x86_64"]) +AM_CONDITIONAL([CPU_X86_32], [test "$CPU" = "x86_32"]) +AM_CONDITIONAL([CPU_AARCH64], [test "$CPU" = "aarch64"]) +AM_CONDITIONAL([CPU_UNDEFINED], [test "x$CPU" = "x"]) + +if test "$CPU" = "x86_64"; then + is_x86=yes +else + if test "$CPU" = "x86_32"; then + is_x86=yes + else + is_x86=no + fi +fi + +# Check for programs +AC_PROG_CC_STDC +AC_USE_SYSTEM_EXTENSIONS +AM_SILENT_RULES([yes]) +LT_INIT +AC_PREFIX_DEFAULT([/usr]) +AC_PROG_SED +AC_PROG_MKDIR_P + +# Options +AC_ARG_ENABLE([debug], + AS_HELP_STRING([--enable-debug], [enable debug messages @<:@default=disabled@:>@]), + [], [enable_debug=no]) +AS_IF([test "x$enable_debug" = "xyes"], [ + AC_DEFINE(ENABLE_DEBUG, [1], [Debug messages.]) +]) + +# If this build is for x86, look for yasm and nasm +if test x"$is_x86" = x"yes"; then + # Pick an assembler yasm or nasm + if test x"$AS" = x""; then + # Check for yasm and yasm features + yasm_feature_level=0 + AC_CHECK_PROG(HAVE_YASM, yasm, yes, no) + if test "$HAVE_YASM" = "yes"; then + yasm_feature_level=1 + else + AC_MSG_RESULT([no yasm]) + fi + if test x"$yasm_feature_level" = x"1"; then + AC_MSG_CHECKING([for modern yasm]) + AC_LANG_CONFTEST([AC_LANG_SOURCE([[vmovdqa %xmm0, %xmm1;]])]) + if yasm -f elf64 -p gas conftest.c ; then + AC_MSG_RESULT([yes]) + yasm_feature_level=4 + else + AC_MSG_RESULT([no]) + fi + fi + if test x"$yasm_feature_level" = x"4"; then + AC_MSG_CHECKING([for optional yasm AVX512 support]) + AC_LANG_CONFTEST([AC_LANG_SOURCE([[vpshufb %zmm0, %zmm1, %zmm2;]])]) + if yasm -f elf64 -p gas conftest.c 2> /dev/null; then + AC_MSG_RESULT([yes]) + yasm_feature_level=6 + else + AC_MSG_RESULT([no]) + fi + fi + if test x"$yasm_feature_level" = x"6"; then + AC_MSG_CHECKING([for additional yasm AVX512 support]) + AC_LANG_CONFTEST([AC_LANG_SOURCE([[vpcompressb zmm0, k1, zmm1;]])]) + sed -i -e '/vpcompressb/!d' conftest.c + if yasm -f elf64 conftest.c 2> /dev/null; then + AC_MSG_RESULT([yes]) + yasm_feature_level=10 + else + AC_MSG_RESULT([no]) + fi + fi + + # Check for nasm and nasm features + nasm_feature_level=0 + AC_CHECK_PROG(HAVE_NASM, nasm, yes, no) + if test "$HAVE_NASM" = "yes"; then + nasm_feature_level=1 + else + AC_MSG_RESULT([no nasm]) + fi + + if test x"$nasm_feature_level" = x"1"; then + AC_MSG_CHECKING([for modern nasm]) + AC_LANG_CONFTEST([AC_LANG_SOURCE([[pblendvb xmm2, xmm1;]])]) + sed -i -e '/pblendvb/!d' conftest.c + if nasm -f elf64 conftest.c 2> /dev/null; then + AC_MSG_RESULT([yes]) + nasm_feature_level=4 + else + AC_MSG_RESULT([no]) + fi + fi + if test x"$nasm_feature_level" = x"4"; then + AC_MSG_CHECKING([for optional nasm AVX512 support]) + AC_LANG_CONFTEST([AC_LANG_SOURCE([[vinserti32x8 zmm0, ymm1, 1;]])]) + sed -i -e '/vinsert/!d' conftest.c + if nasm -f elf64 conftest.c 2> /dev/null; then + AC_MSG_RESULT([yes]) + nasm_feature_level=6 + else + AC_MSG_RESULT([no]) + fi + fi + if test x"$nasm_feature_level" = x"6"; then + AC_MSG_CHECKING([for additional nasm AVX512 support]) + AC_LANG_CONFTEST([AC_LANG_SOURCE([[vpcompressb zmm0 {k1}, zmm1;]])]) + sed -i -e '/vpcompressb/!d' conftest.c + if nasm -f elf64 conftest.c 2> /dev/null; then + AC_MSG_RESULT([yes]) + nasm_feature_level=10 + else + AC_MSG_RESULT([no]) + fi + fi + + if test x"$arch" = x"mingw"; then + AS=yasm + as_feature_level=$yasm_feature_level + elif test $nasm_feature_level -ge $yasm_feature_level ; then + AS=nasm + as_feature_level=$nasm_feature_level + else + AS=yasm + as_feature_level=$yasm_feature_level + fi + + else + # Check for $AS supported features + as_feature_level=0 + AC_CHECK_PROG(HAVE_AS, $AS, yes, no) + if test "$HAVE_AS" = "yes"; then + as_feature_level=1 + else + AC_MSG_ERROR([no $AS]) + fi + + if test x"$as_feature_level" = x"1"; then + AC_MSG_CHECKING([for modern $AS]) + AC_LANG_CONFTEST([AC_LANG_SOURCE([[pblendvb xmm2, xmm1;]])]) + sed -i -e '/pblendvb/!d' conftest.c + if $AS -f elf64 conftest.c 2> /dev/null; then + AC_MSG_RESULT([yes]) + as_feature_level=4 + else + AC_MSG_RESULT([no]) + fi + fi + if test x"$as_feature_level" = x"4"; then + AC_MSG_CHECKING([for optional as AVX512 support]) + AC_LANG_CONFTEST([AC_LANG_SOURCE([[vinserti32x8 zmm0, ymm1, 1;]])]) + sed -i -e '/vinsert/!d' conftest.c + if $AS -f elf64 conftest.c 2> /dev/null; then + AC_MSG_RESULT([yes]) + as_feature_level=6 + else + AC_MSG_RESULT([no]) + fi + fi + if test x"$as_feature_level" = x"6"; then + AC_MSG_CHECKING([for additional as AVX512 support]) + AC_LANG_CONFTEST([AC_LANG_SOURCE([[vpcompressb zmm0, k1, zmm1;]])]) + sed -i -e '/vpcompressb/!d' conftest.c + if $AS -f elf64 conftest.c 2> /dev/null; then + AC_MSG_RESULT([yes]) + as_feature_level=10 + else + AC_MSG_RESULT([no]) + fi + fi + fi + + if test $as_feature_level -lt 2 ; then + AC_MSG_ERROR([No modern nasm or yasm found as required. Nasm should be v2.11.01 or later (v2.13 for AVX512) and yasm should be 1.2.0 or later.]) + fi + + case $host_os in + *linux*) arch=linux yasm_args="-f elf64";; + *darwin*) arch=darwin yasm_args="-f macho64 --prefix=_ ";; + *netbsd*) arch=netbsd yasm_args="-f elf64";; + *mingw*) arch=mingw yasm_args="-f win64";; + *) arch=unknown yasm_args="-f elf64";; + esac + + # Fix for nasm missing windows features + if test x"$arch" = x"mingw" -a x"$AS" != x"yasm"; then + AC_MSG_ERROR([Mingw build requires Yasm 1.2.0 or later.]) + fi + + AC_DEFINE_UNQUOTED(AS_FEATURE_LEVEL, [$as_feature_level], [Assembler feature level.]) + if test $as_feature_level -ge 6 ; then + AC_DEFINE(HAVE_AS_KNOWS_AVX512, [1], [Assembler can do AVX512.]) + have_as_knows_avx512=yes + else + AC_MSG_RESULT([Assembler does not understand AVX512 opcodes. Consider upgrading for best performance.]) + fi + + AM_CONDITIONAL(USE_YASM, test x"$AS" = x"yasm") + AM_CONDITIONAL(USE_NASM, test x"$AS" = x"nasm") + AM_CONDITIONAL(WITH_AVX512, test x"$have_as_knows_avx512" = x"yes") + AC_SUBST([yasm_args]) + AM_CONDITIONAL(DARWIN, test x"$arch" = x"darwin") + AC_MSG_RESULT([Using $AS args target "$arch" "$yasm_args"]) +else + # Disable below conditionals if not x86 + AM_CONDITIONAL(USE_YASM, test "x" = "y") + AM_CONDITIONAL(USE_NASM, test "x" = "y") + AM_CONDITIONAL(WITH_AVX512, test "x" = "y") + AM_CONDITIONAL(DARWIN, test "x" = "y") +fi + + +# Check for header files +AC_CHECK_HEADERS([limits.h stdint.h stdlib.h string.h]) + +# Checks for typedefs, structures, and compiler characteristics. +AC_C_INLINE +AC_TYPE_SIZE_T +AC_TYPE_UINT16_T +AC_TYPE_UINT32_T +AC_TYPE_UINT64_T +AC_TYPE_UINT8_T + +# Checks for library functions. +AC_FUNC_MALLOC # Used only in tests +AC_CHECK_FUNCS([memmove memset getopt]) + +my_CFLAGS="\ +-Wall \ +-Wchar-subscripts \ +-Wformat-security \ +-Wnested-externs \ +-Wpointer-arith \ +-Wshadow \ +-Wstrict-prototypes \ +-Wtype-limits \ +" +AC_SUBST([my_CFLAGS]) + +AC_CONFIG_FILES([\ + Makefile\ + libisal.pc +]) + +AC_OUTPUT +AC_MSG_RESULT([ + $PACKAGE $VERSION + ===== + + prefix: ${prefix} + sysconfdir: ${sysconfdir} + libdir: ${libdir} + includedir: ${includedir} + + compiler: ${CC} + cflags: ${CFLAGS} + ldflags: ${LDFLAGS} + + debug: ${enable_debug} +]) diff --git a/src/spdk/isa-l/crc/Makefile.am b/src/spdk/isa-l/crc/Makefile.am new file mode 100644 index 000000000..38dee97c8 --- /dev/null +++ b/src/spdk/isa-l/crc/Makefile.am @@ -0,0 +1,80 @@ +######################################################################## +# Copyright(c) 2011-2017 Intel Corporation All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +######################################################################## + +include crc/aarch64/Makefile.am + +lsrc += \ + crc/crc_base.c \ + crc/crc64_base.c + +lsrc_base_aliases += crc/crc_base_aliases.c +lsrc_x86_32 += crc/crc_base_aliases.c + +lsrc_x86_64 += \ + crc/crc16_t10dif_01.asm \ + crc/crc16_t10dif_by4.asm \ + crc/crc16_t10dif_copy_by4.asm \ + crc/crc32_ieee_01.asm \ + crc/crc32_ieee_by4.asm \ + crc/crc32_iscsi_01.asm \ + crc/crc32_iscsi_00.asm \ + crc/crc_multibinary.asm \ + crc/crc64_multibinary.asm \ + crc/crc64_ecma_refl_by8.asm \ + crc/crc64_ecma_refl_by16_10.asm \ + crc/crc64_ecma_norm_by8.asm \ + crc/crc64_ecma_norm_by16_10.asm \ + crc/crc64_iso_refl_by8.asm \ + crc/crc64_iso_refl_by16_10.asm \ + crc/crc64_iso_norm_by8.asm \ + crc/crc64_iso_norm_by16_10.asm \ + crc/crc64_jones_refl_by8.asm \ + crc/crc64_jones_refl_by16_10.asm \ + crc/crc64_jones_norm_by8.asm \ + crc/crc64_jones_norm_by16_10.asm \ + crc/crc32_gzip_refl_by8.asm + +src_include += -I $(srcdir)/crc +extern_hdrs += include/crc.h include/crc64.h + +other_src += include/reg_sizes.asm include/types.h include/test.h \ + crc/crc_ref.h crc/crc64_ref.h + +check_tests += crc/crc16_t10dif_test \ + crc/crc16_t10dif_copy_test \ + crc/crc64_funcs_test \ + crc/crc32_funcs_test + +perf_tests += crc/crc16_t10dif_perf crc/crc16_t10dif_copy_perf \ + crc/crc16_t10dif_op_perf \ + crc/crc32_ieee_perf crc/crc32_iscsi_perf \ + crc/crc64_funcs_perf crc/crc32_gzip_refl_perf + +examples += crc/crc_simple_test crc/crc64_example + diff --git a/src/spdk/isa-l/crc/aarch64/Makefile.am b/src/spdk/isa-l/crc/aarch64/Makefile.am new file mode 100644 index 000000000..57061f0f1 --- /dev/null +++ b/src/spdk/isa-l/crc/aarch64/Makefile.am @@ -0,0 +1,47 @@ +######################################################################## +# Copyright(c) 2019 Arm Corporation All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Arm Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +######################################################################### + +lsrc_aarch64 += \ + crc/aarch64/crc_multibinary_arm.S \ + crc/aarch64/crc_aarch64_dispatcher.c + +lsrc_aarch64 += \ + crc/aarch64/crc16_t10dif_pmull.S \ + crc/aarch64/crc16_t10dif_copy_pmull.S \ + crc/aarch64/crc32_iscsi_refl_pmull.S \ + crc/aarch64/crc32_iscsi_refl_hw_fold.S \ + crc/aarch64/crc32_ieee_norm_pmull.S \ + crc/aarch64/crc32_gzip_refl_pmull.S \ + crc/aarch64/crc32_gzip_refl_hw_fold.S \ + crc/aarch64/crc64_ecma_refl_pmull.S \ + crc/aarch64/crc64_ecma_norm_pmull.S \ + crc/aarch64/crc64_iso_refl_pmull.S \ + crc/aarch64/crc64_iso_norm_pmull.S \ + crc/aarch64/crc64_jones_refl_pmull.S \ + crc/aarch64/crc64_jones_norm_pmull.S diff --git a/src/spdk/isa-l/crc/aarch64/crc16_t10dif_copy_pmull.S b/src/spdk/isa-l/crc/aarch64/crc16_t10dif_copy_pmull.S new file mode 100644 index 000000000..10bf157c2 --- /dev/null +++ b/src/spdk/isa-l/crc/aarch64/crc16_t10dif_copy_pmull.S @@ -0,0 +1,423 @@ +######################################################################## +# Copyright(c) 2019 Arm Corporation All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Arm Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +######################################################################### + + .arch armv8-a+crc+crypto + .text + .align 3 + .global crc16_t10dif_copy_pmull + .type crc16_t10dif_copy_pmull, %function + +/* uint16_t crc16_t10dif_pmull(uint16_t seed, uint8_t *buf, uint64_t len) */ + +/* arguments */ +w_seed .req w0 +x_dst .req x1 +x_src .req x2 +x_len .req x3 +w_len .req w3 + +/* returns */ +w_ret .req w0 + +/* these as global temporary registers */ +w_tmp .req w6 +x_tmp .req x6 +x_tmp1 .req x7 +x_tmp2 .req x11 + +d_tmp1 .req d0 +d_tmp2 .req d1 +q_tmp1 .req q0 +q_tmp2 .req q1 +v_tmp1 .req v0 +v_tmp2 .req v1 + +/* local variables */ +w_counter .req w4 +w_crc .req w0 +x_crc .req x0 +x_counter .req x4 +x_crc16tab .req x5 +x_src_saved .req x0 +x_dst_saved .req x12 + +crc16_t10dif_copy_pmull: + cmp x_len, 1023 + sub sp, sp, #16 + uxth w_seed, w_seed + bhi .crc_fold + + mov x_tmp, 0 + mov w_counter, 0 + +.crc_table_loop_pre: + cmp x_len, x_tmp + bls .end + + sxtw x_counter, w_counter + adrp x_crc16tab, .LANCHOR0 + sub x_src, x_src, x_counter + sub x_dst, x_dst, x_counter + add x_crc16tab, x_crc16tab, :lo12:.LANCHOR0 + + .align 2 +.crc_table_loop: + ldrb w_tmp, [x_src, x_counter] + strb w_tmp, [x_dst, x_counter] + add x_counter, x_counter, 1 + cmp x_len, x_counter + eor w_tmp, w_tmp, w_crc, lsr 8 + ldrh w_tmp, [x_crc16tab, w_tmp, sxtw 1] + eor w_crc, w_tmp, w_crc, lsl 8 + uxth w_crc, w_crc + bhi .crc_table_loop + +.end: + add sp, sp, 16 + ret + +/* carry less multiplication, part1 - before loop */ +q_x0 .req q2 +q_x1 .req q3 +q_x2 .req q4 +q_x3 .req q5 + +v_x0 .req v2 +v_x1 .req v3 +v_x2 .req v4 +v_x3 .req v5 + +d_x0 .req d2 +d_x1 .req d3 +d_x2 .req d4 +d_x3 .req d5 + +// the following registers only used this part1 +d_tmp3 .req d16 +v_tmp3 .req v16 + + .align 3 +.crc_fold: + fmov d_tmp1, x_crc + fmov d_tmp2, xzr + dup d_tmp3, v_tmp2.d[0] + shl d_tmp1, d_tmp1, 48 + ins v_tmp3.d[1], v_tmp1.d[0] + + and x_counter, x_len, -64 + sub x_counter, x_counter, #64 + cmp x_counter, 63 + add x_src_saved, x_src, 64 + add x_dst_saved, x_dst, 64 + + ldr q_x0, [x_src] + ldr q_x1, [x_src, 16] + ldr q_x2, [x_src, 32] + ldr q_x3, [x_src, 48] + + str q_x0, [x_dst] + str q_x1, [x_dst, 16] + str q_x2, [x_dst, 32] + str q_x3, [x_dst, 48] + + adrp x_tmp, .shuffle_mask_lanchor + ldr q7, [x_tmp, :lo12:.shuffle_mask_lanchor] + + tbl v_tmp1.16b, {v_x0.16b}, v7.16b + eor v_x0.16b, v_tmp3.16b, v_tmp1.16b + + tbl v_x1.16b, {v_x1.16b}, v7.16b + tbl v_x2.16b, {v_x2.16b}, v7.16b + tbl v_x3.16b, {v_x3.16b}, v7.16b + bls .crc_fold_loop_end + +/* carry less multiplication, part2 - loop */ +q_y0 .req q28 +q_y1 .req q29 +q_y2 .req q30 +q_y3 .req q31 + +v_y0 .req v28 +v_y1 .req v29 +v_y2 .req v30 +v_y3 .req v31 + +d_x0_h .req d24 +d_x0_l .req d2 +d_x1_h .req d25 +d_x1_l .req d3 +d_x2_h .req d26 +d_x2_l .req d4 +d_x3_h .req d27 +d_x3_l .req d5 + +v_x0_h .req v24 +v_x0_l .req v2 +v_x1_h .req v25 +v_x1_l .req v3 +v_x2_h .req v26 +v_x2_l .req v4 +v_x3_h .req v27 +v_x3_l .req v5 + +v_tmp1_x0 .req v24 +v_tmp1_x1 .req v25 +v_tmp1_x2 .req v26 +v_tmp1_x3 .req v27 + +d_p4_h .req d19 +v_p4_h .req v19 +d_p4_l .req d17 +v_p4_l .req v17 + + mov x_tmp, 0x371d0000 /* p4 [1] */ + fmov d_p4_h, x_tmp + mov x_tmp, 0x87e70000 /* p4 [0] */ + fmov d_p4_l, x_tmp + + .align 2 +.crc_fold_loop: + add x_src_saved, x_src_saved, 64 + add x_dst_saved, x_dst_saved, 64 + + sub x_counter, x_counter, #64 + cmp x_counter, 63 + + dup d_x0_h, v_x0.d[1] + dup d_x1_h, v_x1.d[1] + dup d_x2_h, v_x2.d[1] + dup d_x3_h, v_x3.d[1] + + dup d_x0_l, v_x0.d[0] + dup d_x1_l, v_x1.d[0] + dup d_x2_l, v_x2.d[0] + dup d_x3_l, v_x3.d[0] + + ldr q_y0, [x_src_saved, -64] + ldr q_y1, [x_src_saved, -48] + ldr q_y2, [x_src_saved, -32] + ldr q_y3, [x_src_saved, -16] + + str q_y0, [x_dst_saved, -64] + str q_y1, [x_dst_saved, -48] + str q_y2, [x_dst_saved, -32] + str q_y3, [x_dst_saved, -16] + + pmull v_x0_h.1q, v_x0_h.1d, v_p4_h.1d + pmull v_x0_l.1q, v_x0_l.1d, v_p4_l.1d + pmull v_x1_h.1q, v_x1_h.1d, v_p4_h.1d + pmull v_x1_l.1q, v_x1_l.1d, v_p4_l.1d + pmull v_x2_h.1q, v_x2_h.1d, v_p4_h.1d + pmull v_x2_l.1q, v_x2_l.1d, v_p4_l.1d + pmull v_x3_h.1q, v_x3_h.1d, v_p4_h.1d + pmull v_x3_l.1q, v_x3_l.1d, v_p4_l.1d + + tbl v_y0.16b, {v_y0.16b}, v7.16b + tbl v_y1.16b, {v_y1.16b}, v7.16b + tbl v_y2.16b, {v_y2.16b}, v7.16b + tbl v_y3.16b, {v_y3.16b}, v7.16b + + eor v_tmp1_x0.16b, v_x0_h.16b, v_x0_l.16b + eor v_tmp1_x1.16b, v_x1_h.16b, v_x1_l.16b + eor v_tmp1_x2.16b, v_x2_h.16b, v_x2_l.16b + eor v_tmp1_x3.16b, v_x3_h.16b, v_x3_l.16b + + eor v_x0.16b, v_tmp1_x0.16b, v_y0.16b + eor v_x1.16b, v_tmp1_x1.16b, v_y1.16b + eor v_x2.16b, v_tmp1_x2.16b, v_y2.16b + eor v_x3.16b, v_tmp1_x3.16b, v_y3.16b + + bhi .crc_fold_loop + +/* carry less multiplication, part3 - after loop */ +/* folding 512bit ---> 128bit */ + +// input parameters: +// v_x0 => v2 +// v_x1 => v3 +// v_x2 => v4 +// v_x3 => v5 + +// v0, v1, v6, v30, are tmp registers + +.crc_fold_loop_end: + mov x_tmp, 0x4c1a0000 /* p1 [1] */ + fmov d0, x_tmp + mov x_tmp, 0xfb0b0000 /* p1 [0] */ + fmov d1, x_tmp + + and w_counter, w_len, -64 + sxtw x_tmp, w_counter + + add x_src, x_src, x_tmp + add x_dst, x_dst, x_tmp + + dup d6, v_x0.d[1] + dup d30, v_x0.d[0] + pmull v6.1q, v6.1d, v0.1d + pmull v30.1q, v30.1d, v1.1d + eor v6.16b, v6.16b, v30.16b + eor v_x1.16b, v6.16b, v_x1.16b + + dup d6, v_x1.d[1] + dup d30, v_x1.d[0] + pmull v6.1q, v6.1d, v0.1d + pmull v16.1q, v30.1d, v1.1d + eor v6.16b, v6.16b, v16.16b + eor v_x2.16b, v6.16b, v_x2.16b + + dup d_x0, v_x2.d[1] + dup d30, v_x2.d[0] + pmull v0.1q, v_x0.1d, v0.1d + pmull v_x0.1q, v30.1d, v1.1d + eor v1.16b, v0.16b, v_x0.16b + eor v_x0.16b, v1.16b, v_x3.16b + +/* carry less multiplication, part3 - after loop */ +/* crc16 fold function */ +d_16fold_p0_h .req d18 +v_16fold_p0_h .req v18 + +d_16fold_p0_l .req d4 +v_16fold_p0_l .req v4 + +v_16fold_from .req v_x0 +d_16fold_from_h .req d3 +v_16fold_from_h .req v3 + +v_16fold_zero .req v7 + +v_16fold_from1 .req v16 + +v_16fold_from2 .req v0 +d_16fold_from2_h .req d6 +v_16fold_from2_h .req v6 + +v_16fold_tmp .req v0 + + movi v_16fold_zero.4s, 0 + mov x_tmp1, 0x2d560000 /* p0 [1] */ + mov x_tmp2, 0x13680000 /* p0 [0] */ + + ext v_16fold_tmp.16b, v_16fold_zero.16b, v_16fold_from.16b, #8 + ext v_16fold_tmp.16b, v0.16b, v_16fold_zero.16b, #4 + + dup d_16fold_from_h, v_16fold_from.d[1] + fmov d_16fold_p0_h, x_tmp1 + pmull v_16fold_from1.1q, v_16fold_from_h.1d, v_16fold_p0_h.1d + eor v_16fold_from2.16b, v_16fold_tmp.16b, v_16fold_from1.16b + + dup d_16fold_from2_h, v_16fold_from2.d[1] + fmov d_16fold_p0_l, x_tmp2 + pmull v6.1q, v_16fold_from2_h.1d, v_16fold_p0_l.1d + eor v_x0.16b, v0.16b, v6.16b + +/* carry less multiplication, part3 - after loop */ +/* crc16 barrett reduction function */ + +// input parameters: +// v_x0: v2 +// barrett reduction constant: br[0], br[1] + +d_br0 .req d3 +v_br0 .req v3 +d_br1 .req d5 +v_br1 .req v5 + + mov x_tmp1, 0x57f9 /* br[0] low */ + movk x_tmp1, 0xf65a, lsl 16 /* br[0] high */ + movk x_tmp1, 0x1, lsl 32 + fmov d_br0, x_tmp1 + + dup d1, v_x0.d[0] + dup d1, v1.d[0] + ext v1.16b, v1.16b, v7.16b, #4 + pmull v4.1q, v1.1d, v_br0.1d + + ext v1.16b, v4.16b, v7.16b, #4 + mov x_tmp1, 0x8bb70000 /* br[1] low */ + movk x_tmp1, 0x1, lsl 32 /* br[1] high */ + + fmov d_br1, x_tmp1 + pmull v_br1.1q, v1.1d, v_br1.1d + eor v_x0.16b, v_x0.16b, v_br1.16b + + umov x0, v_x0.d[0] + ubfx x0, x0, 16, 16 + b .crc_table_loop_pre + + .size crc16_t10dif_copy_pmull, .-crc16_t10dif_copy_pmull + + .section .rodata + + .align 4 +.shuffle_mask_lanchor = . + 0 + .type shuffle_mask, %object + .size shuffle_mask, 16 +shuffle_mask: + .byte 15, 14, 13, 12, 11, 10, 9, 8 + .byte 7, 6, 5, 4, 3, 2, 1, 0 + + .align 4 +.LANCHOR0 = . + 0 + .type crc16tab, %object + .size crc16tab, 512 +crc16tab: + .hword 0x0000, 0x8bb7, 0x9cd9, 0x176e, 0xb205, 0x39b2, 0x2edc, 0xa56b + .hword 0xEFBD, 0x640a, 0x7364, 0xf8d3, 0x5db8, 0xd60f, 0xc161, 0x4ad6 + .hword 0x54CD, 0xdf7a, 0xc814, 0x43a3, 0xe6c8, 0x6d7f, 0x7a11, 0xf1a6 + .hword 0xBB70, 0x30c7, 0x27a9, 0xac1e, 0x0975, 0x82c2, 0x95ac, 0x1e1b + .hword 0xA99A, 0x222d, 0x3543, 0xbef4, 0x1b9f, 0x9028, 0x8746, 0x0cf1 + .hword 0x4627, 0xcd90, 0xdafe, 0x5149, 0xf422, 0x7f95, 0x68fb, 0xe34c + .hword 0xFD57, 0x76e0, 0x618e, 0xea39, 0x4f52, 0xc4e5, 0xd38b, 0x583c + .hword 0x12EA, 0x995d, 0x8e33, 0x0584, 0xa0ef, 0x2b58, 0x3c36, 0xb781 + .hword 0xD883, 0x5334, 0x445a, 0xcfed, 0x6a86, 0xe131, 0xf65f, 0x7de8 + .hword 0x373E, 0xbc89, 0xabe7, 0x2050, 0x853b, 0x0e8c, 0x19e2, 0x9255 + .hword 0x8C4E, 0x07f9, 0x1097, 0x9b20, 0x3e4b, 0xb5fc, 0xa292, 0x2925 + .hword 0x63F3, 0xe844, 0xff2a, 0x749d, 0xd1f6, 0x5a41, 0x4d2f, 0xc698 + .hword 0x7119, 0xfaae, 0xedc0, 0x6677, 0xc31c, 0x48ab, 0x5fc5, 0xd472 + .hword 0x9EA4, 0x1513, 0x027d, 0x89ca, 0x2ca1, 0xa716, 0xb078, 0x3bcf + .hword 0x25D4, 0xae63, 0xb90d, 0x32ba, 0x97d1, 0x1c66, 0x0b08, 0x80bf + .hword 0xCA69, 0x41de, 0x56b0, 0xdd07, 0x786c, 0xf3db, 0xe4b5, 0x6f02 + .hword 0x3AB1, 0xb106, 0xa668, 0x2ddf, 0x88b4, 0x0303, 0x146d, 0x9fda + .hword 0xD50C, 0x5ebb, 0x49d5, 0xc262, 0x6709, 0xecbe, 0xfbd0, 0x7067 + .hword 0x6E7C, 0xe5cb, 0xf2a5, 0x7912, 0xdc79, 0x57ce, 0x40a0, 0xcb17 + .hword 0x81C1, 0x0a76, 0x1d18, 0x96af, 0x33c4, 0xb873, 0xaf1d, 0x24aa + .hword 0x932B, 0x189c, 0x0ff2, 0x8445, 0x212e, 0xaa99, 0xbdf7, 0x3640 + .hword 0x7C96, 0xf721, 0xe04f, 0x6bf8, 0xce93, 0x4524, 0x524a, 0xd9fd + .hword 0xC7E6, 0x4c51, 0x5b3f, 0xd088, 0x75e3, 0xfe54, 0xe93a, 0x628d + .hword 0x285B, 0xa3ec, 0xb482, 0x3f35, 0x9a5e, 0x11e9, 0x0687, 0x8d30 + .hword 0xE232, 0x6985, 0x7eeb, 0xf55c, 0x5037, 0xdb80, 0xccee, 0x4759 + .hword 0x0D8F, 0x8638, 0x9156, 0x1ae1, 0xbf8a, 0x343d, 0x2353, 0xa8e4 + .hword 0xB6FF, 0x3d48, 0x2a26, 0xa191, 0x04fa, 0x8f4d, 0x9823, 0x1394 + .hword 0x5942, 0xd2f5, 0xc59b, 0x4e2c, 0xeb47, 0x60f0, 0x779e, 0xfc29 + .hword 0x4BA8, 0xc01f, 0xd771, 0x5cc6, 0xf9ad, 0x721a, 0x6574, 0xeec3 + .hword 0xA415, 0x2fa2, 0x38cc, 0xb37b, 0x1610, 0x9da7, 0x8ac9, 0x017e + .hword 0x1F65, 0x94d2, 0x83bc, 0x080b, 0xad60, 0x26d7, 0x31b9, 0xba0e + .hword 0xF0D8, 0x7b6f, 0x6c01, 0xe7b6, 0x42dd, 0xc96a, 0xde04, 0x55b3 diff --git a/src/spdk/isa-l/crc/aarch64/crc16_t10dif_pmull.S b/src/spdk/isa-l/crc/aarch64/crc16_t10dif_pmull.S new file mode 100644 index 000000000..08f1a35ad --- /dev/null +++ b/src/spdk/isa-l/crc/aarch64/crc16_t10dif_pmull.S @@ -0,0 +1,404 @@ +######################################################################## +# Copyright(c) 2019 Arm Corporation All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Arm Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +######################################################################### + + .arch armv8-a+crc+crypto + .text + .align 3 + .global crc16_t10dif_pmull + .type crc16_t10dif_pmull, %function + +/* uint16_t crc16_t10dif_pmull(uint16_t seed, uint8_t *buf, uint64_t len) */ + +/* arguments */ +w_seed .req w0 +x_buf .req x1 +x_len .req x2 +w_len .req w2 + +/* returns */ +w_ret .req w0 + +/* these as global temporary registers */ +w_tmp .req w5 +x_tmp .req x5 +x_tmp1 .req x6 +x_tmp2 .req x7 + +d_tmp1 .req d0 +d_tmp2 .req d1 +q_tmp1 .req q0 +q_tmp2 .req q1 +v_tmp1 .req v0 +v_tmp2 .req v1 + +/* local variables */ +w_counter .req w3 +w_crc .req w0 +x_crc .req x0 +x_counter .req x3 +x_crc16tab .req x4 +x_buf_saved .req x0 + +crc16_t10dif_pmull: + cmp x_len, 1023 + sub sp, sp, #16 + uxth w_seed, w_seed + bhi .crc_fold + + mov x_tmp, 0 + mov w_counter, 0 + +.crc_table_loop_pre: + cmp x_len, x_tmp + bls .end + + sxtw x_counter, w_counter + adrp x_crc16tab, .LANCHOR0 + sub x_buf, x_buf, x_counter + add x_crc16tab, x_crc16tab, :lo12:.LANCHOR0 + + .align 2 +.crc_table_loop: + ldrb w_tmp, [x_buf, x_counter] + add x_counter, x_counter, 1 + cmp x_len, x_counter + eor w_tmp, w_tmp, w_crc, lsr 8 + ldrh w_tmp, [x_crc16tab, w_tmp, sxtw 1] + eor w_crc, w_tmp, w_crc, lsl 8 + uxth w_crc, w_crc + bhi .crc_table_loop + +.end: + add sp, sp, 16 + ret + +/* carry less multiplication, part1 - before loop */ +q_x0 .req q2 +q_x1 .req q3 +q_x2 .req q4 +q_x3 .req q5 + +v_x0 .req v2 +v_x1 .req v3 +v_x2 .req v4 +v_x3 .req v5 + +d_x0 .req d2 +d_x1 .req d3 +d_x2 .req d4 +d_x3 .req d5 + +// the following registers only used this part1 +d_tmp3 .req d16 +v_tmp3 .req v16 + + .align 3 +.crc_fold: + fmov d_tmp1, x_crc + fmov d_tmp2, xzr + dup d_tmp3, v_tmp2.d[0] + shl d_tmp1, d_tmp1, 48 + ins v_tmp3.d[1], v_tmp1.d[0] + + and x_counter, x_len, -64 + sub x_counter, x_counter, #64 + cmp x_counter, 63 + add x_buf_saved, x_buf, 64 + + ldr q_x0, [x_buf] + ldr q_x1, [x_buf, 16] + ldr q_x2, [x_buf, 32] + ldr q_x3, [x_buf, 48] + + adrp x_tmp, .shuffle_mask_lanchor + ldr q7, [x_tmp, :lo12:.shuffle_mask_lanchor] + + tbl v_tmp1.16b, {v_x0.16b}, v7.16b + eor v_x0.16b, v_tmp3.16b, v_tmp1.16b + + tbl v_x1.16b, {v_x1.16b}, v7.16b + tbl v_x2.16b, {v_x2.16b}, v7.16b + tbl v_x3.16b, {v_x3.16b}, v7.16b + bls .crc_fold_loop_end + +/* carry less multiplication, part2 - loop */ +q_y0 .req q28 +q_y1 .req q29 +q_y2 .req q30 +q_y3 .req q31 + +v_y0 .req v28 +v_y1 .req v29 +v_y2 .req v30 +v_y3 .req v31 + +d_x0_h .req d24 +d_x0_l .req d2 +d_x1_h .req d25 +d_x1_l .req d3 +d_x2_h .req d26 +d_x2_l .req d4 +d_x3_h .req d27 +d_x3_l .req d5 + +v_x0_h .req v24 +v_x0_l .req v2 +v_x1_h .req v25 +v_x1_l .req v3 +v_x2_h .req v26 +v_x2_l .req v4 +v_x3_h .req v27 +v_x3_l .req v5 + +v_tmp1_x0 .req v24 +v_tmp1_x1 .req v25 +v_tmp1_x2 .req v26 +v_tmp1_x3 .req v27 + +d_p4_h .req d19 +v_p4_h .req v19 +d_p4_l .req d17 +v_p4_l .req v17 + + mov x_tmp, 0x371d0000 /* p4 [1] */ + fmov d_p4_h, x_tmp + mov x_tmp, 0x87e70000 /* p4 [0] */ + fmov d_p4_l, x_tmp + + .align 2 +.crc_fold_loop: + add x_buf_saved, x_buf_saved, 64 + sub x_counter, x_counter, #64 + cmp x_counter, 63 + + dup d_x0_h, v_x0.d[1] + dup d_x1_h, v_x1.d[1] + dup d_x2_h, v_x2.d[1] + dup d_x3_h, v_x3.d[1] + + dup d_x0_l, v_x0.d[0] + dup d_x1_l, v_x1.d[0] + dup d_x2_l, v_x2.d[0] + dup d_x3_l, v_x3.d[0] + + ldr q_y0, [x_buf_saved, -64] + ldr q_y1, [x_buf_saved, -48] + ldr q_y2, [x_buf_saved, -32] + ldr q_y3, [x_buf_saved, -16] + + pmull v_x0_h.1q, v_x0_h.1d, v_p4_h.1d + pmull v_x0_l.1q, v_x0_l.1d, v_p4_l.1d + pmull v_x1_h.1q, v_x1_h.1d, v_p4_h.1d + pmull v_x1_l.1q, v_x1_l.1d, v_p4_l.1d + pmull v_x2_h.1q, v_x2_h.1d, v_p4_h.1d + pmull v_x2_l.1q, v_x2_l.1d, v_p4_l.1d + pmull v_x3_h.1q, v_x3_h.1d, v_p4_h.1d + pmull v_x3_l.1q, v_x3_l.1d, v_p4_l.1d + + tbl v_y0.16b, {v_y0.16b}, v7.16b + tbl v_y1.16b, {v_y1.16b}, v7.16b + tbl v_y2.16b, {v_y2.16b}, v7.16b + tbl v_y3.16b, {v_y3.16b}, v7.16b + + eor v_tmp1_x0.16b, v_x0_h.16b, v_x0_l.16b + eor v_tmp1_x1.16b, v_x1_h.16b, v_x1_l.16b + eor v_tmp1_x2.16b, v_x2_h.16b, v_x2_l.16b + eor v_tmp1_x3.16b, v_x3_h.16b, v_x3_l.16b + + eor v_x0.16b, v_tmp1_x0.16b, v_y0.16b + eor v_x1.16b, v_tmp1_x1.16b, v_y1.16b + eor v_x2.16b, v_tmp1_x2.16b, v_y2.16b + eor v_x3.16b, v_tmp1_x3.16b, v_y3.16b + + bhi .crc_fold_loop + +/* carry less multiplication, part3 - after loop */ +/* folding 512bit ---> 128bit */ + +// input parameters: +// v_x0 => v2 +// v_x1 => v3 +// v_x2 => v4 +// v_x3 => v5 + +// v0, v1, v6, v30, are tmp registers + +.crc_fold_loop_end: + mov x_tmp, 0x4c1a0000 /* p1 [1] */ + fmov d0, x_tmp + mov x_tmp, 0xfb0b0000 /* p1 [0] */ + fmov d1, x_tmp + + and w_counter, w_len, -64 + sxtw x_tmp, w_counter + add x_buf, x_buf, x_tmp + + dup d6, v_x0.d[1] + dup d30, v_x0.d[0] + pmull v6.1q, v6.1d, v0.1d + pmull v30.1q, v30.1d, v1.1d + eor v6.16b, v6.16b, v30.16b + eor v_x1.16b, v6.16b, v_x1.16b + + dup d6, v_x1.d[1] + dup d30, v_x1.d[0] + pmull v6.1q, v6.1d, v0.1d + pmull v16.1q, v30.1d, v1.1d + eor v6.16b, v6.16b, v16.16b + eor v_x2.16b, v6.16b, v_x2.16b + + dup d_x0, v_x2.d[1] + dup d30, v_x2.d[0] + pmull v0.1q, v_x0.1d, v0.1d + pmull v_x0.1q, v30.1d, v1.1d + eor v1.16b, v0.16b, v_x0.16b + eor v_x0.16b, v1.16b, v_x3.16b + +/* carry less multiplication, part3 - after loop */ +/* crc16 fold function */ +d_16fold_p0_h .req d18 +v_16fold_p0_h .req v18 + +d_16fold_p0_l .req d4 +v_16fold_p0_l .req v4 + +v_16fold_from .req v_x0 +d_16fold_from_h .req d3 +v_16fold_from_h .req v3 + +v_16fold_zero .req v7 + +v_16fold_from1 .req v16 + +v_16fold_from2 .req v0 +d_16fold_from2_h .req d6 +v_16fold_from2_h .req v6 + +v_16fold_tmp .req v0 + + movi v_16fold_zero.4s, 0 + mov x_tmp1, 0x2d560000 /* p0 [1] */ + mov x_tmp2, 0x13680000 /* p0 [0] */ + + ext v_16fold_tmp.16b, v_16fold_zero.16b, v_16fold_from.16b, #8 + ext v_16fold_tmp.16b, v0.16b, v_16fold_zero.16b, #4 + + dup d_16fold_from_h, v_16fold_from.d[1] + fmov d_16fold_p0_h, x_tmp1 + pmull v_16fold_from1.1q, v_16fold_from_h.1d, v_16fold_p0_h.1d + eor v_16fold_from2.16b, v_16fold_tmp.16b, v_16fold_from1.16b + + dup d_16fold_from2_h, v_16fold_from2.d[1] + fmov d_16fold_p0_l, x_tmp2 + pmull v6.1q, v_16fold_from2_h.1d, v_16fold_p0_l.1d + eor v_x0.16b, v0.16b, v6.16b + +/* carry less multiplication, part3 - after loop */ +/* crc16 barrett reduction function */ + +// input parameters: +// v_x0: v2 +// barrett reduction constant: br[0], br[1] + +d_br0 .req d3 +v_br0 .req v3 +d_br1 .req d5 +v_br1 .req v5 + + mov x_tmp1, 0x57f9 /* br[0] low */ + movk x_tmp1, 0xf65a, lsl 16 /* br[0] high */ + movk x_tmp1, 0x1, lsl 32 + fmov d_br0, x_tmp1 + + dup d1, v_x0.d[0] + dup d1, v1.d[0] + ext v1.16b, v1.16b, v7.16b, #4 + pmull v4.1q, v1.1d, v_br0.1d + + ext v1.16b, v4.16b, v7.16b, #4 + mov x_tmp1, 0x8bb70000 /* br[1] low */ + movk x_tmp1, 0x1, lsl 32 /* br[1] high */ + + fmov d_br1, x_tmp1 + pmull v_br1.1q, v1.1d, v_br1.1d + eor v_x0.16b, v_x0.16b, v_br1.16b + + umov x0, v_x0.d[0] + ubfx x0, x0, 16, 16 + b .crc_table_loop_pre + + .size crc16_t10dif_pmull, .-crc16_t10dif_pmull + + .section .rodata + + .align 4 +.shuffle_mask_lanchor = . + 0 + .type shuffle_mask, %object + .size shuffle_mask, 16 +shuffle_mask: + .byte 15, 14, 13, 12, 11, 10, 9, 8 + .byte 7, 6, 5, 4, 3, 2, 1, 0 + + .align 4 +.LANCHOR0 = . + 0 + .type crc16tab, %object + .size crc16tab, 512 +crc16tab: + .hword 0x0000, 0x8bb7, 0x9cd9, 0x176e, 0xb205, 0x39b2, 0x2edc, 0xa56b + .hword 0xEFBD, 0x640a, 0x7364, 0xf8d3, 0x5db8, 0xd60f, 0xc161, 0x4ad6 + .hword 0x54CD, 0xdf7a, 0xc814, 0x43a3, 0xe6c8, 0x6d7f, 0x7a11, 0xf1a6 + .hword 0xBB70, 0x30c7, 0x27a9, 0xac1e, 0x0975, 0x82c2, 0x95ac, 0x1e1b + .hword 0xA99A, 0x222d, 0x3543, 0xbef4, 0x1b9f, 0x9028, 0x8746, 0x0cf1 + .hword 0x4627, 0xcd90, 0xdafe, 0x5149, 0xf422, 0x7f95, 0x68fb, 0xe34c + .hword 0xFD57, 0x76e0, 0x618e, 0xea39, 0x4f52, 0xc4e5, 0xd38b, 0x583c + .hword 0x12EA, 0x995d, 0x8e33, 0x0584, 0xa0ef, 0x2b58, 0x3c36, 0xb781 + .hword 0xD883, 0x5334, 0x445a, 0xcfed, 0x6a86, 0xe131, 0xf65f, 0x7de8 + .hword 0x373E, 0xbc89, 0xabe7, 0x2050, 0x853b, 0x0e8c, 0x19e2, 0x9255 + .hword 0x8C4E, 0x07f9, 0x1097, 0x9b20, 0x3e4b, 0xb5fc, 0xa292, 0x2925 + .hword 0x63F3, 0xe844, 0xff2a, 0x749d, 0xd1f6, 0x5a41, 0x4d2f, 0xc698 + .hword 0x7119, 0xfaae, 0xedc0, 0x6677, 0xc31c, 0x48ab, 0x5fc5, 0xd472 + .hword 0x9EA4, 0x1513, 0x027d, 0x89ca, 0x2ca1, 0xa716, 0xb078, 0x3bcf + .hword 0x25D4, 0xae63, 0xb90d, 0x32ba, 0x97d1, 0x1c66, 0x0b08, 0x80bf + .hword 0xCA69, 0x41de, 0x56b0, 0xdd07, 0x786c, 0xf3db, 0xe4b5, 0x6f02 + .hword 0x3AB1, 0xb106, 0xa668, 0x2ddf, 0x88b4, 0x0303, 0x146d, 0x9fda + .hword 0xD50C, 0x5ebb, 0x49d5, 0xc262, 0x6709, 0xecbe, 0xfbd0, 0x7067 + .hword 0x6E7C, 0xe5cb, 0xf2a5, 0x7912, 0xdc79, 0x57ce, 0x40a0, 0xcb17 + .hword 0x81C1, 0x0a76, 0x1d18, 0x96af, 0x33c4, 0xb873, 0xaf1d, 0x24aa + .hword 0x932B, 0x189c, 0x0ff2, 0x8445, 0x212e, 0xaa99, 0xbdf7, 0x3640 + .hword 0x7C96, 0xf721, 0xe04f, 0x6bf8, 0xce93, 0x4524, 0x524a, 0xd9fd + .hword 0xC7E6, 0x4c51, 0x5b3f, 0xd088, 0x75e3, 0xfe54, 0xe93a, 0x628d + .hword 0x285B, 0xa3ec, 0xb482, 0x3f35, 0x9a5e, 0x11e9, 0x0687, 0x8d30 + .hword 0xE232, 0x6985, 0x7eeb, 0xf55c, 0x5037, 0xdb80, 0xccee, 0x4759 + .hword 0x0D8F, 0x8638, 0x9156, 0x1ae1, 0xbf8a, 0x343d, 0x2353, 0xa8e4 + .hword 0xB6FF, 0x3d48, 0x2a26, 0xa191, 0x04fa, 0x8f4d, 0x9823, 0x1394 + .hword 0x5942, 0xd2f5, 0xc59b, 0x4e2c, 0xeb47, 0x60f0, 0x779e, 0xfc29 + .hword 0x4BA8, 0xc01f, 0xd771, 0x5cc6, 0xf9ad, 0x721a, 0x6574, 0xeec3 + .hword 0xA415, 0x2fa2, 0x38cc, 0xb37b, 0x1610, 0x9da7, 0x8ac9, 0x017e + .hword 0x1F65, 0x94d2, 0x83bc, 0x080b, 0xad60, 0x26d7, 0x31b9, 0xba0e + .hword 0xF0D8, 0x7b6f, 0x6c01, 0xe7b6, 0x42dd, 0xc96a, 0xde04, 0x55b3 diff --git a/src/spdk/isa-l/crc/aarch64/crc32_gzip_refl_hw_fold.S b/src/spdk/isa-l/crc/aarch64/crc32_gzip_refl_hw_fold.S new file mode 100644 index 000000000..98cf12928 --- /dev/null +++ b/src/spdk/isa-l/crc/aarch64/crc32_gzip_refl_hw_fold.S @@ -0,0 +1,176 @@ +######################################################################## +# Copyright(c) 2019 Arm Corporation All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Arm Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +######################################################################### + + .arch armv8-a+crc+crypto + .text + .align 3 + .global crc32_gzip_refl_hw_fold + .type crc32_gzip_refl_hw_fold, %function + +/* uint32_t crc32_gzip_refl_hw_fold(uint32_t seed, const unsigned char *buf, uint64_t len) */ + +w_seed .req w0 +w_crc .req w0 +x_buf .req x1 +x_len .req x2 + +x_buf_loop_end .req x10 +x_buf_iter .req x10 + +x_tmp .req x15 +w_tmp .req w15 + +d_c0 .req d3 +d_c1 .req d1 +v_c0 .req v3 +v_c1 .req v1 +crc32_gzip_refl_hw_fold: + mvn w_seed, w_seed + cmp x_len, 1023 + mov x_buf_iter, x_buf + bls .loop_fold_end + + sub x_buf_loop_end, x_len, #1024 + and x_buf_loop_end, x_buf_loop_end, -1024 + add x_buf_loop_end, x_buf_loop_end, 1024 + add x_buf_loop_end, x_buf, x_buf_loop_end + + mov x_tmp, 0x819b + movk x_tmp, 0xb486, lsl 16 + fmov d_c0, x_tmp + + mov x_tmp, 0x8617 + movk x_tmp, 0x7627, lsl 16 + fmov d_c1, x_tmp + +x_in64 .req x3 +w_crc0 .req w0 +w_crc1 .req w4 +w_crc2 .req w5 + +d_crc0 .req d4 +d_crc1 .req d5 +v_crc0 .req v4 +v_crc1 .req v5 + .align 3 +.loop_fold: + add x9, x_buf, 336 + mov x_in64, x_buf + mov w_crc1, 0 + mov w_crc2, 0 + + .align 3 +.loop_for: + ldr x8, [x_in64] + ldr x7, [x_in64, 336] + ldr x6, [x_in64, 672] + + add x_in64, x_in64, 8 + cmp x_in64, x9 + + crc32x w_crc0, w_crc0, x8 + crc32x w_crc1, w_crc1, x7 + crc32x w_crc2, w_crc2, x6 + bne .loop_for + + uxtw x_tmp, w_crc0 + fmov d_crc0, x_tmp + pmull v_crc0.1q, v_crc0.1d, v_c0.1d + + uxtw x_tmp, w_crc1 + fmov d_crc1, x_tmp + pmull v_crc1.1q, v_crc1.1d, v_c1.1d + + ldr x_tmp, [x_buf, 1008] + crc32x w_crc2, w_crc2, x_tmp + + fmov x_tmp, d_crc0 + crc32x w_crc0, wzr, x_tmp + + fmov x_tmp, d_crc1 + crc32x w_crc1, wzr, x_tmp + + eor w_crc0, w_crc0, w_crc1 + eor w_crc0, w_crc0, w_crc2 + + ldr x_tmp, [x_buf, 1016] + crc32x w_crc0, w_crc0, x_tmp + + add x_buf, x_buf, 1024 + cmp x_buf_loop_end, x_buf + bne .loop_fold + + and x_len, x_len, 1023 + +x_buf_loop_size8_end .req x3 +.loop_fold_end: + cmp x_len, 7 + bls .size_4 + + sub x_buf_loop_size8_end, x_len, #8 + and x_buf_loop_size8_end, x_buf_loop_size8_end, -8 + add x_buf_loop_size8_end, x_buf_loop_size8_end, 8 + add x_buf_loop_size8_end, x_buf_iter, x_buf_loop_size8_end + + .align 3 +.loop_size_8: + ldr x_tmp, [x_buf_iter], 8 + crc32x w_crc, w_crc, x_tmp + + cmp x_buf_iter, x_buf_loop_size8_end + bne .loop_size_8 + + and x_len, x_len, 7 +.size_4: + cmp x_len, 3 + bls .size_2 + + ldr w_tmp, [x_buf_iter], 4 + crc32w w_crc, w_crc, w_tmp + + sub x_len, x_len, #4 +.size_2: + cmp x_len, 1 + bls .size_1 + + ldrh w_tmp, [x_buf_iter], 2 + crc32h w_crc, w_crc, w_tmp + + sub x_len, x_len, #2 +.size_1: + cbz x_len, .done + + ldrb w_tmp, [x_buf_iter] + crc32b w_crc, w_crc, w_tmp + +.done: + mvn w_crc, w_crc + ret + + .size crc32_gzip_refl_hw_fold, .-crc32_gzip_refl_hw_fold diff --git a/src/spdk/isa-l/crc/aarch64/crc32_gzip_refl_pmull.S b/src/spdk/isa-l/crc/aarch64/crc32_gzip_refl_pmull.S new file mode 100644 index 000000000..d52e2d8f5 --- /dev/null +++ b/src/spdk/isa-l/crc/aarch64/crc32_gzip_refl_pmull.S @@ -0,0 +1,33 @@ +######################################################################## +# Copyright(c) 2019 Arm Corporation All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Arm Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +######################################################################### + +#include "crc32_gzip_refl_pmull.h" +#include "crc32_refl_common_pmull.h" + +crc32_refl_func crc32_gzip_refl_pmull diff --git a/src/spdk/isa-l/crc/aarch64/crc32_gzip_refl_pmull.h b/src/spdk/isa-l/crc/aarch64/crc32_gzip_refl_pmull.h new file mode 100644 index 000000000..29ccd383f --- /dev/null +++ b/src/spdk/isa-l/crc/aarch64/crc32_gzip_refl_pmull.h @@ -0,0 +1,87 @@ +######################################################################## +# Copyright(c) 2019 Arm Corporation All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Arm Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +######################################################################### + +.equ p4_low_b0, 0x2d95 +.equ p4_low_b1, 0x8f35 +.equ p4_high_b0, 0x13d7 +.equ p4_high_b1, 0x1d95 +.equ p1_low_b0, 0x9191 +.equ p1_low_b1, 0xae68 +.equ p1_high_b0, 0x009e +.equ p1_high_b1, 0xccaa +.equ p0_low_b0, 0x6765 +.equ p0_low_b1, 0xb8bc +.equ p0_high_b0, p1_high_b0 +.equ p0_high_b1, p1_high_b1 +.equ br_low_b0, 0x0641 +.equ br_low_b1, 0xdb71 +.equ br_low_b2, 0x1 +.equ br_high_b0, 0x1641 +.equ br_high_b1, 0xf701 +.equ br_high_b2, 0x1 + + .text + .section .rodata + .align 4 + .set .LANCHOR0,. + 0 + .type crc32_table_gzip_refl, %object + .size crc32_table_gzip_refl, 1024 +crc32_table_gzip_refl: + .word 0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, 0x076dc419, 0x706af48f, 0xe963a535, 0x9e6495a3 + .word 0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988, 0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, 0x90bf1d91 + .word 0x1db71064, 0x6ab020f2, 0xf3b97148, 0x84be41de, 0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7 + .word 0x136c9856, 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec, 0x14015c4f, 0x63066cd9, 0xfa0f3d63, 0x8d080df5 + .word 0x3b6e20c8, 0x4c69105e, 0xd56041e4, 0xa2677172, 0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b + .word 0x35b5a8fa, 0x42b2986c, 0xdbbbc9d6, 0xacbcf940, 0x32d86ce3, 0x45df5c75, 0xdcd60dcf, 0xabd13d59 + .word 0x26d930ac, 0x51de003a, 0xc8d75180, 0xbfd06116, 0x21b4f4b5, 0x56b3c423, 0xcfba9599, 0xb8bda50f + .word 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924, 0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d + .word 0x76dc4190, 0x01db7106, 0x98d220bc, 0xefd5102a, 0x71b18589, 0x06b6b51f, 0x9fbfe4a5, 0xe8b8d433 + .word 0x7807c9a2, 0x0f00f934, 0x9609a88e, 0xe10e9818, 0x7f6a0dbb, 0x086d3d2d, 0x91646c97, 0xe6635c01 + .word 0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e, 0x6c0695ed, 0x1b01a57b, 0x8208f4c1, 0xf50fc457 + .word 0x65b0d9c6, 0x12b7e950, 0x8bbeb8ea, 0xfcb9887c, 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, 0xfbd44c65 + .word 0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2, 0x4adfa541, 0x3dd895d7, 0xa4d1c46d, 0xd3d6f4fb + .word 0x4369e96a, 0x346ed9fc, 0xad678846, 0xda60b8d0, 0x44042d73, 0x33031de5, 0xaa0a4c5f, 0xdd0d7cc9 + .word 0x5005713c, 0x270241aa, 0xbe0b1010, 0xc90c2086, 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f + .word 0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4, 0x59b33d17, 0x2eb40d81, 0xb7bd5c3b, 0xc0ba6cad + .word 0xedb88320, 0x9abfb3b6, 0x03b6e20c, 0x74b1d29a, 0xead54739, 0x9dd277af, 0x04db2615, 0x73dc1683 + .word 0xe3630b12, 0x94643b84, 0x0d6d6a3e, 0x7a6a5aa8, 0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1 + .word 0xf00f9344, 0x8708a3d2, 0x1e01f268, 0x6906c2fe, 0xf762575d, 0x806567cb, 0x196c3671, 0x6e6b06e7 + .word 0xfed41b76, 0x89d32be0, 0x10da7a5a, 0x67dd4acc, 0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5 + .word 0xd6d6a3e8, 0xa1d1937e, 0x38d8c2c4, 0x4fdff252, 0xd1bb67f1, 0xa6bc5767, 0x3fb506dd, 0x48b2364b + .word 0xd80d2bda, 0xaf0a1b4c, 0x36034af6, 0x41047a60, 0xdf60efc3, 0xa867df55, 0x316e8eef, 0x4669be79 + .word 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236, 0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f + .word 0xc5ba3bbe, 0xb2bd0b28, 0x2bb45a92, 0x5cb36a04, 0xc2d7ffa7, 0xb5d0cf31, 0x2cd99e8b, 0x5bdeae1d + .word 0x9b64c2b0, 0xec63f226, 0x756aa39c, 0x026d930a, 0x9c0906a9, 0xeb0e363f, 0x72076785, 0x05005713 + .word 0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38, 0x92d28e9b, 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21 + .word 0x86d3d2d4, 0xf1d4e242, 0x68ddb3f8, 0x1fda836e, 0x81be16cd, 0xf6b9265b, 0x6fb077e1, 0x18b74777 + .word 0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c, 0x8f659eff, 0xf862ae69, 0x616bffd3, 0x166ccf45 + .word 0xa00ae278, 0xd70dd2ee, 0x4e048354, 0x3903b3c2, 0xa7672661, 0xd06016f7, 0x4969474d, 0x3e6e77db + .word 0xaed16a4a, 0xd9d65adc, 0x40df0b66, 0x37d83bf0, 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9 + .word 0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, 0xbad03605, 0xcdd70693, 0x54de5729, 0x23d967bf + .word 0xb3667a2e, 0xc4614ab8, 0x5d681b02, 0x2a6f2b94, 0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d diff --git a/src/spdk/isa-l/crc/aarch64/crc32_ieee_norm_pmull.S b/src/spdk/isa-l/crc/aarch64/crc32_ieee_norm_pmull.S new file mode 100644 index 000000000..32966fb9d --- /dev/null +++ b/src/spdk/isa-l/crc/aarch64/crc32_ieee_norm_pmull.S @@ -0,0 +1,33 @@ +######################################################################## +# Copyright(c) 2019 Arm Corporation All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Arm Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +######################################################################### + +#include "crc32_ieee_norm_pmull.h" +#include "crc32_norm_common_pmull.h" + +crc32_norm_func crc32_ieee_norm_pmull diff --git a/src/spdk/isa-l/crc/aarch64/crc32_ieee_norm_pmull.h b/src/spdk/isa-l/crc/aarch64/crc32_ieee_norm_pmull.h new file mode 100644 index 000000000..bd8e5e244 --- /dev/null +++ b/src/spdk/isa-l/crc/aarch64/crc32_ieee_norm_pmull.h @@ -0,0 +1,87 @@ +######################################################################## +# Copyright(c) 2019 Arm Corporation All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Arm Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +######################################################################### + +.equ p4_low_b0, 0x8b11 +.equ p4_low_b1, 0xe622 +.equ p4_high_b0, 0x794c +.equ p4_high_b1, 0x8833 +.equ p1_low_b0, 0x5605 +.equ p1_low_b1, 0xe8a4 +.equ p1_high_b0, 0xcd4c +.equ p1_high_b1, 0xc5b9 +.equ p0_low_b0, 0x678d +.equ p0_low_b1, 0x490d +.equ p0_high_b0, 0xaa66 +.equ p0_high_b1, 0xf200 +.equ br_low_b0, 0x01df +.equ br_low_b1, 0x04d1 +.equ br_low_b2, 0x1 +.equ br_high_b0, 0x1db7 +.equ br_high_b1, 0x04c1 +.equ br_high_b2, 0x1 + + .text + .section .rodata + .align 4 + .set .LANCHOR0,. + 0 + .type crc32_table_ieee_norm, %object + .size crc32_table_ieee_norm, 1024 +crc32_table_ieee_norm: + .word 0x00000000, 0x04c11db7, 0x09823b6e, 0x0d4326d9, 0x130476dc, 0x17c56b6b, 0x1a864db2, 0x1e475005 + .word 0x2608edb8, 0x22c9f00f, 0x2f8ad6d6, 0x2b4bcb61, 0x350c9b64, 0x31cd86d3, 0x3c8ea00a, 0x384fbdbd + .word 0x4c11db70, 0x48d0c6c7, 0x4593e01e, 0x4152fda9, 0x5f15adac, 0x5bd4b01b, 0x569796c2, 0x52568b75 + .word 0x6a1936c8, 0x6ed82b7f, 0x639b0da6, 0x675a1011, 0x791d4014, 0x7ddc5da3, 0x709f7b7a, 0x745e66cd + .word 0x9823b6e0, 0x9ce2ab57, 0x91a18d8e, 0x95609039, 0x8b27c03c, 0x8fe6dd8b, 0x82a5fb52, 0x8664e6e5 + .word 0xbe2b5b58, 0xbaea46ef, 0xb7a96036, 0xb3687d81, 0xad2f2d84, 0xa9ee3033, 0xa4ad16ea, 0xa06c0b5d + .word 0xd4326d90, 0xd0f37027, 0xddb056fe, 0xd9714b49, 0xc7361b4c, 0xc3f706fb, 0xceb42022, 0xca753d95 + .word 0xf23a8028, 0xf6fb9d9f, 0xfbb8bb46, 0xff79a6f1, 0xe13ef6f4, 0xe5ffeb43, 0xe8bccd9a, 0xec7dd02d + .word 0x34867077, 0x30476dc0, 0x3d044b19, 0x39c556ae, 0x278206ab, 0x23431b1c, 0x2e003dc5, 0x2ac12072 + .word 0x128e9dcf, 0x164f8078, 0x1b0ca6a1, 0x1fcdbb16, 0x018aeb13, 0x054bf6a4, 0x0808d07d, 0x0cc9cdca + .word 0x7897ab07, 0x7c56b6b0, 0x71159069, 0x75d48dde, 0x6b93dddb, 0x6f52c06c, 0x6211e6b5, 0x66d0fb02 + .word 0x5e9f46bf, 0x5a5e5b08, 0x571d7dd1, 0x53dc6066, 0x4d9b3063, 0x495a2dd4, 0x44190b0d, 0x40d816ba + .word 0xaca5c697, 0xa864db20, 0xa527fdf9, 0xa1e6e04e, 0xbfa1b04b, 0xbb60adfc, 0xb6238b25, 0xb2e29692 + .word 0x8aad2b2f, 0x8e6c3698, 0x832f1041, 0x87ee0df6, 0x99a95df3, 0x9d684044, 0x902b669d, 0x94ea7b2a + .word 0xe0b41de7, 0xe4750050, 0xe9362689, 0xedf73b3e, 0xf3b06b3b, 0xf771768c, 0xfa325055, 0xfef34de2 + .word 0xc6bcf05f, 0xc27dede8, 0xcf3ecb31, 0xcbffd686, 0xd5b88683, 0xd1799b34, 0xdc3abded, 0xd8fba05a + .word 0x690ce0ee, 0x6dcdfd59, 0x608edb80, 0x644fc637, 0x7a089632, 0x7ec98b85, 0x738aad5c, 0x774bb0eb + .word 0x4f040d56, 0x4bc510e1, 0x46863638, 0x42472b8f, 0x5c007b8a, 0x58c1663d, 0x558240e4, 0x51435d53 + .word 0x251d3b9e, 0x21dc2629, 0x2c9f00f0, 0x285e1d47, 0x36194d42, 0x32d850f5, 0x3f9b762c, 0x3b5a6b9b + .word 0x0315d626, 0x07d4cb91, 0x0a97ed48, 0x0e56f0ff, 0x1011a0fa, 0x14d0bd4d, 0x19939b94, 0x1d528623 + .word 0xf12f560e, 0xf5ee4bb9, 0xf8ad6d60, 0xfc6c70d7, 0xe22b20d2, 0xe6ea3d65, 0xeba91bbc, 0xef68060b + .word 0xd727bbb6, 0xd3e6a601, 0xdea580d8, 0xda649d6f, 0xc423cd6a, 0xc0e2d0dd, 0xcda1f604, 0xc960ebb3 + .word 0xbd3e8d7e, 0xb9ff90c9, 0xb4bcb610, 0xb07daba7, 0xae3afba2, 0xaafbe615, 0xa7b8c0cc, 0xa379dd7b + .word 0x9b3660c6, 0x9ff77d71, 0x92b45ba8, 0x9675461f, 0x8832161a, 0x8cf30bad, 0x81b02d74, 0x857130c3 + .word 0x5d8a9099, 0x594b8d2e, 0x5408abf7, 0x50c9b640, 0x4e8ee645, 0x4a4ffbf2, 0x470cdd2b, 0x43cdc09c + .word 0x7b827d21, 0x7f436096, 0x7200464f, 0x76c15bf8, 0x68860bfd, 0x6c47164a, 0x61043093, 0x65c52d24 + .word 0x119b4be9, 0x155a565e, 0x18197087, 0x1cd86d30, 0x029f3d35, 0x065e2082, 0x0b1d065b, 0x0fdc1bec + .word 0x3793a651, 0x3352bbe6, 0x3e119d3f, 0x3ad08088, 0x2497d08d, 0x2056cd3a, 0x2d15ebe3, 0x29d4f654 + .word 0xc5a92679, 0xc1683bce, 0xcc2b1d17, 0xc8ea00a0, 0xd6ad50a5, 0xd26c4d12, 0xdf2f6bcb, 0xdbee767c + .word 0xe3a1cbc1, 0xe760d676, 0xea23f0af, 0xeee2ed18, 0xf0a5bd1d, 0xf464a0aa, 0xf9278673, 0xfde69bc4 + .word 0x89b8fd09, 0x8d79e0be, 0x803ac667, 0x84fbdbd0, 0x9abc8bd5, 0x9e7d9662, 0x933eb0bb, 0x97ffad0c + .word 0xafb010b1, 0xab710d06, 0xa6322bdf, 0xa2f33668, 0xbcb4666d, 0xb8757bda, 0xb5365d03, 0xb1f740b4 diff --git a/src/spdk/isa-l/crc/aarch64/crc32_iscsi_refl_hw_fold.S b/src/spdk/isa-l/crc/aarch64/crc32_iscsi_refl_hw_fold.S new file mode 100644 index 000000000..85527c24d --- /dev/null +++ b/src/spdk/isa-l/crc/aarch64/crc32_iscsi_refl_hw_fold.S @@ -0,0 +1,172 @@ +######################################################################## +# Copyright(c) 2019 Arm Corporation All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Arm Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +######################################################################### + + .arch armv8-a+crc+crypto + .text + .align 3 + .global crc32_iscsi_refl_hw_fold + .type crc32_iscsi_refl_hw_fold, %function + +/* unsigned int crc32_iscsi_refl_hw_fold(unsigned char *buffer, int len, unsigned int crc_init) */ + +x_buffer .req x0 +w_len .req w1 +w_crc_init .req w2 +w_crc .req w2 + +w_len_loop_end .req w9 +x_buf_loop_end .req x9 +x_buf_iter .req x9 + +x_tmp .req x15 +w_tmp .req w15 + +w_crc_ret .req w0 +crc32_iscsi_refl_hw_fold: + cmp w_len, 1023 + mov x_buf_iter, x_buffer + ble .loop_fold_end + + sub w10, w_len, #1024 + lsr w12, w10, 10 + lsl w_len_loop_end, w12, 10 + + add x_buf_loop_end, x_buf_loop_end, 1024 + add x_buf_loop_end, x_buffer, x_buf_loop_end + + mov x_tmp, 0xf38a + movk x_tmp, 0xe417, lsl 16 + fmov d3, x_tmp + + mov x_tmp, 0x8014 + movk x_tmp, 0x8f15, lsl 16 + fmov d1, x_tmp + +x_in64 .req x1 +w_crc0 .req w2 +w_crc1 .req w3 +w_crc2 .req w4 + .align 3 +.loop_fold: + add x8, x_buffer, 336 + mov x_in64, x_buffer + mov w_crc1, 0 + mov w_crc2, 0 + + .align 3 +.loop_for: + ldr x7, [x_in64] + ldr x6, [x_in64, 336] + ldr x5, [x_in64, 672] + + add x_in64, x_in64, 8 + cmp x_in64, x8 + + crc32cx w_crc0, w_crc0, x7 + crc32cx w_crc1, w_crc1, x6 + crc32cx w_crc2, w_crc2, x5 + bne .loop_for + + uxtw x_tmp, w_crc0 + fmov d4, x_tmp + pmull v2.1q, v4.1d, v3.1d + + uxtw x_tmp, w_crc1 + fmov d5, x_tmp + pmull v5.1q, v5.1d, v1.1d + + fmov x_tmp, d2 + crc32cx w_crc0, wzr, x_tmp + + fmov x_tmp, d5 + crc32cx w_crc1, wzr, x_tmp + + ldr x_tmp, [x_buffer, 1008] + crc32cx w_crc2, w_crc2, x_tmp + + eor w_crc1, w_crc1, w_crc0 + eor w_crc1, w_crc1, w_crc2 + + ldr x_tmp, [x_buffer, 1016] + crc32cx w_crc0, w_crc1, x_tmp + + add x_buffer, x_buffer, 1024 + cmp x_buf_loop_end, x_buffer + bne .loop_fold + + sub w_len, w10, w12, lsl 10 + +x_buf_loop_size8_end .req x3 +.loop_fold_end: + cmp w_len, 7 + ble .size_4 + + sub w_len, w_len, #8 + lsr w4, w_len, 3 + lsl w3, w4, 3 + add x_buf_loop_size8_end, x_buf_loop_size8_end, 8 + add x_buf_loop_size8_end, x_buf_iter, x_buf_loop_size8_end + + .align 3 +.loop_size_8: + ldr x_tmp, [x_buf_iter], 8 + crc32cx w_crc, w_crc, x_tmp + + cmp x_buf_iter, x_buf_loop_size8_end + bne .loop_size_8 + + sub w_len, w_len, w4, lsl 3 +.size_4: + cmp w_len, 3 + ble .size_2 + + ldr w_tmp, [x_buf_iter], 4 + crc32cw w_crc, w_crc, w_tmp + sub w_len, w_len, #4 + +.size_2: + cmp w_len, 1 + ble .size_1 + + ldrh w_tmp, [x_buf_iter], 2 + crc32ch w_crc, w_crc, w_tmp + sub w_len, w_len, #2 + +.size_1: + mov w_crc_ret, w_crc + cmp w_len, 1 + bne .done + + ldrb w_tmp, [x_buf_iter] + crc32cb w_crc_ret, w_crc, w_tmp + +.done: + ret + + .size crc32_iscsi_refl_hw_fold, .-crc32_iscsi_refl_hw_fold diff --git a/src/spdk/isa-l/crc/aarch64/crc32_iscsi_refl_pmull.S b/src/spdk/isa-l/crc/aarch64/crc32_iscsi_refl_pmull.S new file mode 100644 index 000000000..09a88e2e1 --- /dev/null +++ b/src/spdk/isa-l/crc/aarch64/crc32_iscsi_refl_pmull.S @@ -0,0 +1,53 @@ +######################################################################## +# Copyright(c) 2019 Arm Corporation All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Arm Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +######################################################################### + +#include "crc32_iscsi_refl_pmull.h" +#include "crc32_refl_common_pmull.h" + +crc32_refl_func crc32_iscsi_refl_pmull_internal + + .arch armv8-a+crc+crypto + .text + .align 3 + .global crc32_iscsi_refl_pmull + .type crc32_iscsi_refl_pmull, %function +crc32_iscsi_refl_pmull: + stp x29, x30, [sp, -32]! + mov x29, sp + + mov w7, w2 + sxtw x2, w1 + mov x1, x0 + mov w0, w7 + mvn w0, w0 + + bl crc32_iscsi_refl_pmull_internal + mvn w0, w0 + ldp x29, x30, [sp], 32 + ret diff --git a/src/spdk/isa-l/crc/aarch64/crc32_iscsi_refl_pmull.h b/src/spdk/isa-l/crc/aarch64/crc32_iscsi_refl_pmull.h new file mode 100644 index 000000000..93204db74 --- /dev/null +++ b/src/spdk/isa-l/crc/aarch64/crc32_iscsi_refl_pmull.h @@ -0,0 +1,87 @@ +######################################################################## +# Copyright(c) 2019 Arm Corporation All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Arm Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +######################################################################### + +.equ p4_low_b0, 0xef02 +.equ p4_low_b1, 0x740e +.equ p4_high_b0, 0xddf8 +.equ p4_high_b1, 0x9e4a +.equ p1_low_b0, 0x0dfe +.equ p1_low_b1, 0xf20c +.equ p1_high_b0, 0x7d27 +.equ p1_high_b1, 0x493c +.equ p0_low_b0, 0xaab8 +.equ p0_low_b1, 0xdd45 +.equ p0_high_b0, p1_high_b0 +.equ p0_high_b1, p1_high_b1 +.equ br_low_b0, 0x76f1 +.equ br_low_b1, 0x05ec +.equ br_low_b2, 0x1 +.equ br_high_b0, 0x13f1 +.equ br_high_b1, 0xdea7 +.equ br_high_b2, 0x0 + + .text + .section .rodata + .align 4 + .set .LANCHOR0,. + 0 + .type crc32_table_iscsi_refl, %object + .size crc32_table_iscsi_refl, 1024 +crc32_table_iscsi_refl: + .word 0x00000000, 0xF26B8303, 0xE13B70F7, 0x1350F3F4, 0xC79A971F, 0x35F1141C, 0x26A1E7E8, 0xD4CA64EB + .word 0x8AD958CF, 0x78B2DBCC, 0x6BE22838, 0x9989AB3B, 0x4D43CFD0, 0xBF284CD3, 0xAC78BF27, 0x5E133C24 + .word 0x105EC76F, 0xE235446C, 0xF165B798, 0x030E349B, 0xD7C45070, 0x25AFD373, 0x36FF2087, 0xC494A384 + .word 0x9A879FA0, 0x68EC1CA3, 0x7BBCEF57, 0x89D76C54, 0x5D1D08BF, 0xAF768BBC, 0xBC267848, 0x4E4DFB4B + .word 0x20BD8EDE, 0xD2D60DDD, 0xC186FE29, 0x33ED7D2A, 0xE72719C1, 0x154C9AC2, 0x061C6936, 0xF477EA35 + .word 0xAA64D611, 0x580F5512, 0x4B5FA6E6, 0xB93425E5, 0x6DFE410E, 0x9F95C20D, 0x8CC531F9, 0x7EAEB2FA + .word 0x30E349B1, 0xC288CAB2, 0xD1D83946, 0x23B3BA45, 0xF779DEAE, 0x05125DAD, 0x1642AE59, 0xE4292D5A + .word 0xBA3A117E, 0x4851927D, 0x5B016189, 0xA96AE28A, 0x7DA08661, 0x8FCB0562, 0x9C9BF696, 0x6EF07595 + .word 0x417B1DBC, 0xB3109EBF, 0xA0406D4B, 0x522BEE48, 0x86E18AA3, 0x748A09A0, 0x67DAFA54, 0x95B17957 + .word 0xCBA24573, 0x39C9C670, 0x2A993584, 0xD8F2B687, 0x0C38D26C, 0xFE53516F, 0xED03A29B, 0x1F682198 + .word 0x5125DAD3, 0xA34E59D0, 0xB01EAA24, 0x42752927, 0x96BF4DCC, 0x64D4CECF, 0x77843D3B, 0x85EFBE38 + .word 0xDBFC821C, 0x2997011F, 0x3AC7F2EB, 0xC8AC71E8, 0x1C661503, 0xEE0D9600, 0xFD5D65F4, 0x0F36E6F7 + .word 0x61C69362, 0x93AD1061, 0x80FDE395, 0x72966096, 0xA65C047D, 0x5437877E, 0x4767748A, 0xB50CF789 + .word 0xEB1FCBAD, 0x197448AE, 0x0A24BB5A, 0xF84F3859, 0x2C855CB2, 0xDEEEDFB1, 0xCDBE2C45, 0x3FD5AF46 + .word 0x7198540D, 0x83F3D70E, 0x90A324FA, 0x62C8A7F9, 0xB602C312, 0x44694011, 0x5739B3E5, 0xA55230E6 + .word 0xFB410CC2, 0x092A8FC1, 0x1A7A7C35, 0xE811FF36, 0x3CDB9BDD, 0xCEB018DE, 0xDDE0EB2A, 0x2F8B6829 + .word 0x82F63B78, 0x709DB87B, 0x63CD4B8F, 0x91A6C88C, 0x456CAC67, 0xB7072F64, 0xA457DC90, 0x563C5F93 + .word 0x082F63B7, 0xFA44E0B4, 0xE9141340, 0x1B7F9043, 0xCFB5F4A8, 0x3DDE77AB, 0x2E8E845F, 0xDCE5075C + .word 0x92A8FC17, 0x60C37F14, 0x73938CE0, 0x81F80FE3, 0x55326B08, 0xA759E80B, 0xB4091BFF, 0x466298FC + .word 0x1871A4D8, 0xEA1A27DB, 0xF94AD42F, 0x0B21572C, 0xDFEB33C7, 0x2D80B0C4, 0x3ED04330, 0xCCBBC033 + .word 0xA24BB5A6, 0x502036A5, 0x4370C551, 0xB11B4652, 0x65D122B9, 0x97BAA1BA, 0x84EA524E, 0x7681D14D + .word 0x2892ED69, 0xDAF96E6A, 0xC9A99D9E, 0x3BC21E9D, 0xEF087A76, 0x1D63F975, 0x0E330A81, 0xFC588982 + .word 0xB21572C9, 0x407EF1CA, 0x532E023E, 0xA145813D, 0x758FE5D6, 0x87E466D5, 0x94B49521, 0x66DF1622 + .word 0x38CC2A06, 0xCAA7A905, 0xD9F75AF1, 0x2B9CD9F2, 0xFF56BD19, 0x0D3D3E1A, 0x1E6DCDEE, 0xEC064EED + .word 0xC38D26C4, 0x31E6A5C7, 0x22B65633, 0xD0DDD530, 0x0417B1DB, 0xF67C32D8, 0xE52CC12C, 0x1747422F + .word 0x49547E0B, 0xBB3FFD08, 0xA86F0EFC, 0x5A048DFF, 0x8ECEE914, 0x7CA56A17, 0x6FF599E3, 0x9D9E1AE0 + .word 0xD3D3E1AB, 0x21B862A8, 0x32E8915C, 0xC083125F, 0x144976B4, 0xE622F5B7, 0xF5720643, 0x07198540 + .word 0x590AB964, 0xAB613A67, 0xB831C993, 0x4A5A4A90, 0x9E902E7B, 0x6CFBAD78, 0x7FAB5E8C, 0x8DC0DD8F + .word 0xE330A81A, 0x115B2B19, 0x020BD8ED, 0xF0605BEE, 0x24AA3F05, 0xD6C1BC06, 0xC5914FF2, 0x37FACCF1 + .word 0x69E9F0D5, 0x9B8273D6, 0x88D28022, 0x7AB90321, 0xAE7367CA, 0x5C18E4C9, 0x4F48173D, 0xBD23943E + .word 0xF36E6F75, 0x0105EC76, 0x12551F82, 0xE03E9C81, 0x34F4F86A, 0xC69F7B69, 0xD5CF889D, 0x27A40B9E + .word 0x79B737BA, 0x8BDCB4B9, 0x988C474D, 0x6AE7C44E, 0xBE2DA0A5, 0x4C4623A6, 0x5F16D052, 0xAD7D5351 diff --git a/src/spdk/isa-l/crc/aarch64/crc32_norm_common_pmull.h b/src/spdk/isa-l/crc/aarch64/crc32_norm_common_pmull.h new file mode 100644 index 000000000..f90397478 --- /dev/null +++ b/src/spdk/isa-l/crc/aarch64/crc32_norm_common_pmull.h @@ -0,0 +1,316 @@ +######################################################################## +# Copyright(c) 2019 Arm Corporation All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Arm Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +######################################################################### + +.macro crc32_norm_func name:req + .arch armv8-a+crc+crypto + .text + .align 3 + .global \name + .type \name, %function + +/* crc32_norm_func(uint32_t seed, uint8_t * buf, uint64_t len) */ + +// constant +.equ FOLD_SIZE, 1024 + +// parameter +w_seed .req w0 +x_seed .req x0 +x_buf .req x1 +x_len .req x2 + +x_buf_tmp .req x0 + +// crc32 normal function entry +\name\(): + mvn w_seed, w_seed + mov x3, 0 + mov w4, 0 + cmp x_len, (FOLD_SIZE - 1) + uxtw x_seed, w_seed + bhi .crc32_clmul_pre + +.crc32_norm_tab_pre: + cmp x_len, x3 + bls .done + + sxtw x4, w4 + adrp x5, .LANCHOR0 + sub x_buf, x_buf, x4 + add x5, x5, :lo12:.LANCHOR0 + + .align 3 +.loop_crc32_norm_tab: + ldrb w3, [x_buf, x4] + add x4, x4, 1 + cmp x_len, x4 + eor x3, x3, x0, lsr 24 + and x3, x3, 255 + ldr w3, [x5, x3, lsl 2] + eor x0, x3, x0, lsl 8 + bhi .loop_crc32_norm_tab + +.done: + mvn w_seed, w_seed + ret + +// crcc32 clmul prepare + +x_buf_end .req x3 +q_shuffle .req q3 +v_shuffle .req v3 + +q_x0_tmp .req q5 +q_x1 .req q6 +q_x2 .req q4 +q_x3 .req q1 + +v_x0_tmp .req v5 +v_x0 .req v2 +v_x1 .req v6 +v_x2 .req v4 +v_x3 .req v1 + +d_p4_high .req d7 +d_p4_low .req d5 + + .align 2 +.crc32_clmul_pre: + and x3, x_len, -64 + cmp x3, 63 + bls .clmul_end + + lsl x_seed, x_seed, 32 + movi v2.4s, 0 + ins v2.d[1], x_seed + + adrp x4, .shuffle + ldr q_shuffle, [x4, #:lo12:.shuffle] + + sub x4, x3, #64 + cmp x4, 63 + + ldr q_x0_tmp, [x_buf] + ldr q_x1, [x_buf, 16] + ldr q_x2, [x_buf, 32] + ldr q_x3, [x_buf, 48] + add x_buf_tmp, x_buf, 64 + + tbl v_x0_tmp.16b, {v_x0_tmp.16b}, v_shuffle.16b + tbl v_x1.16b, {v_x1.16b}, v_shuffle.16b + tbl v_x2.16b, {v_x2.16b}, v_shuffle.16b + tbl v_x3.16b, {v_x3.16b}, v_shuffle.16b + eor v_x0.16b, v_x0.16b, v_x0_tmp.16b + bls .clmul_loop_end + + add x_buf_end, x_buf_tmp, x4 + mov x4, p4_high_b0 + movk x4, p4_high_b1, lsl 16 + fmov d_p4_high, x4 + + mov x4, p4_low_b0 + movk x4, p4_low_b1, lsl 16 + fmov d_p4_low, x4 + +// crc32 clmul loop +//v_x0 .req v2 +//v_x1 .req v6 +//v_x2 .req v4 +//v_x3 .req v1 + +d_x0_high .req d22 +d_x1_high .req d20 +d_x2_high .req d18 +d_x3_high .req d16 + +v_x0_high .req v22 +v_x1_high .req v20 +v_x2_high .req v18 +v_x3_high .req v16 + +q_y0_high .req q23 +q_y1_high .req q21 +q_y2_high .req q19 +q_y3_high .req q17 + +v_y0_high .req v23 +v_y1_high .req v21 +v_y2_high .req v19 +v_y3_high .req v17 + +v_p4_high .req v7 +v_p4_low .req v5 +//v_shuffle .req v3 + + .align 3 +.clmul_loop: + dup d_x0_high, v_x0.d[1] + dup d_x1_high, v_x1.d[1] + dup d_x2_high, v_x2.d[1] + dup d_x3_high, v_x3.d[1] + + add x_buf_tmp, x_buf_tmp, 64 + + ldr q_y0_high, [x_buf_tmp, -64] + ldr q_y1_high, [x_buf_tmp, -48] + ldr q_y2_high, [x_buf_tmp, -32] + ldr q_y3_high, [x_buf_tmp, -16] + + cmp x_buf_tmp, x_buf_end + + pmull v_x0.1q, v_x0.1d, v_p4_low.1d + pmull v_x1.1q, v_x1.1d, v_p4_low.1d + pmull v_x2.1q, v_x2.1d, v_p4_low.1d + pmull v_x3.1q, v_x3.1d, v_p4_low.1d + + pmull v_x0_high.1q, v_x0_high.1d, v_p4_high.1d + pmull v_x1_high.1q, v_x1_high.1d, v_p4_high.1d + pmull v_x2_high.1q, v_x2_high.1d, v_p4_high.1d + pmull v_x3_high.1q, v_x3_high.1d, v_p4_high.1d + + eor v_x0.16b, v_x0_high.16b, v_x0.16b + eor v_x1.16b, v_x1_high.16b, v_x1.16b + eor v_x2.16b, v_x2_high.16b, v_x2.16b + eor v_x3.16b, v_x3_high.16b, v_x3.16b + + tbl v_y0_high.16b, {v_y0_high.16b}, v_shuffle.16b + tbl v_y1_high.16b, {v_y1_high.16b}, v_shuffle.16b + tbl v_y2_high.16b, {v_y2_high.16b}, v_shuffle.16b + tbl v_y3_high.16b, {v_y3_high.16b}, v_shuffle.16b + + eor v_x0.16b, v_x0.16b, v_y0_high.16b + eor v_x1.16b, v_x1.16b, v_y1_high.16b + eor v_x2.16b, v_x2.16b, v_y2_high.16b + eor v_x3.16b, v_x3.16b, v_y3_high.16b + bne .clmul_loop + +//v_x0 .req v2 +//v_x1 .req v6 +//v_x2 .req v4 +//v_x3 .req v1 + +d_p1_high .req d7 +d_p1_low .req d5 + +v_p1_high .req v7 +v_p1_low .req v5 +.clmul_loop_end: +// folding 512bit --> 128bit + mov x0, p1_high_b0 + movk x0, p1_high_b1, lsl 16 + fmov d_p1_high, x0 + + mov x0, p1_low_b0 + movk x0, p1_low_b1, lsl 16 + fmov d_p1_low, x0 + + dup d16, v_x0.d[1] + pmull v_x0.1q, v_x0.1d, v_p1_low.1d + pmull v16.1q, v16.1d, v_p1_high.1d + eor v_x0.16b, v16.16b, v_x0.16b + eor v_x1.16b, v_x0.16b, v_x1.16b + + dup d17, v_x1.d[1] + pmull v_x1.1q, v_x1.1d, v_p1_low.1d + pmull v17.1q, v17.1d, v_p1_high.1d + eor v_x1.16b, v17.16b, v_x1.16b + eor v_x2.16b, v_x1.16b, v_x2.16b + + dup d0, v_x2.d[1] + pmull v_x2.1q, v_x2.1d, v_p1_low.1d + pmull v0.1q, v0.1d, v_p1_high.1d + eor v_x2.16b, v0.16b, v_x2.16b + eor v_x3.16b, v_x2.16b, v_x3.16b + +//v_x0 .req v2 +//v_x3 .req v1 + +d_x0 .req d2 +v_zero .req v3 +// fold 64b + movi v_zero.4s, 0 + + mov x5, p0_high_b0 + movk x5, p0_high_b1, lsl 16 + + mov x0, p0_low_b0 + movk x0, p0_low_b1, lsl 16 + + dup d_x0, v_x3.d[1] + ext v0.16b, v_zero.16b, v_x3.16b, #8 + + fmov d16, x5 + pmull v_x0.1q, v_x0.1d, v16.1d + + fmov d17, x0 + ext v0.16b, v0.16b, v_zero.16b, #4 + eor v0.16b, v0.16b, v_x0.16b + dup d_x0, v0.d[1] + pmull v_x0.1q, v_x0.1d, v17.1d + +// barrett reduction +d_br_low .req d16 +d_br_high .req d17 + +v_br_low .req v16 +v_br_high .req v17 + mov x4, br_low_b0 + movk x4, br_low_b1, lsl 16 + movk x4, br_low_b2, lsl 32 + + mov x3, br_high_b0 + movk x3, br_high_b1, lsl 16 + movk x3, br_high_b2, lsl 32 + + fmov d_br_low, x4 + eor v0.16b, v0.16b, v2.16b + umov x0, v0.d[0] + fmov d2, x0 + ext v2.16b, v2.16b, v3.16b, #4 + pmull v2.1q, v2.1d, v_br_low.1d + + fmov d_br_high, x3 + ext v2.16b, v2.16b, v3.16b, #4 + pmull v2.1q, v2.1d, v_br_high.1d + eor v0.16b, v0.16b, v2.16b + umov x_seed, v0.d[0] + +.clmul_end: + and w4, w2, -64 + sxtw x3, w4 + add x1, x1, x3 + b .crc32_norm_tab_pre + .size \name, .-\name + .section .rodata.cst16,"aM",@progbits,16 + + .align 4 +.shuffle: + .byte 15, 14, 13, 12, 11, 10, 9 + .byte 8, 7, 6, 5, 4, 3, 2, 1, 0 +.endm diff --git a/src/spdk/isa-l/crc/aarch64/crc32_refl_common_pmull.h b/src/spdk/isa-l/crc/aarch64/crc32_refl_common_pmull.h new file mode 100644 index 000000000..6f0a30223 --- /dev/null +++ b/src/spdk/isa-l/crc/aarch64/crc32_refl_common_pmull.h @@ -0,0 +1,280 @@ +######################################################################## +# Copyright(c) 2019 Arm Corporation All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Arm Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +######################################################################### + +.macro crc32_refl_func name:req + .arch armv8-a+crc+crypto + .text + .align 3 + .global \name + .type \name, %function + +/* crc32_refl_func(uint32_t seed, uint8_t * buf, uint64_t len) */ + +// constant +.equ FOLD_SIZE, 1024 + +// paramter +w_seed .req w0 +x_seed .req x0 +x_buf .req x1 +x_len .req x2 + +x_buf_tmp .req x0 + +// crc32 refl function entry +\name\(): + mvn w_seed, w_seed + mov x3, 0 + mov w4, 0 + cmp x_len, (FOLD_SIZE - 1) + bhi .crc32_clmul_pre + +.crc32_refl_tab_pre: + cmp x_len, x3 + bls .done + sxtw x4, w4 + adrp x5, .LANCHOR0 + sub x_buf, x_buf, x4 + add x5, x5, :lo12:.LANCHOR0 + + .align 3 +.loop_crc32_refl_tab: + ldrb w3, [x_buf, x4] + add x4, x4, 1 + cmp x_len, x4 + eor x3, x_seed, x3 + and x3, x3, 255 + ldr w3, [x5, x3, lsl 2] + eor w_seed, w3, w_seed, lsr 8 + bhi .loop_crc32_refl_tab +.done: + mvn w_seed, w_seed + ret + +d_y0_tmp .req d0 +v_y0_tmp .req v0 + +q_x0_tmp .req q3 +v_x0_tmp .req v3 + +v_x0 .req v0 +q_x1 .req q2 +q_x2 .req q4 +q_x3 .req q1 + +d_p4_low .req d17 +d_p4_high .req d19 + +x_buf_end .req x3 + .align 2 +.crc32_clmul_pre: + and x4, x_len, -64 + uxtw x_seed, w_seed + cmp x4, 63 + bls .clmul_end + + fmov d_y0_tmp, x_seed + ins v_y0_tmp.d[1], x3 + + ldr q_x0_tmp, [x_buf] + ldr q_x1, [x_buf, 16] + ldr q_x2, [x_buf, 32] + ldr q_x3, [x_buf, 48] + eor v_x0.16b, v_y0_tmp.16b, v_x0_tmp.16b + + sub x5, x4, #64 + cmp x5, 63 + + add x_buf_tmp, x_buf, 64 + bls .clmul_loop_end + + mov x4, p4_high_b0 + movk x4, p4_high_b1, lsl 16 + fmov d_p4_high, x4 + + mov x4, p4_low_b0 + movk x4, p4_low_b1, lsl 16 + fmov d_p4_low, x4 + + add x_buf_end, x_buf_tmp, x5 + +v_p4_low .req v17 +v_p4_high .req v19 + +// v_x0 .req v0 +v_x1 .req v2 +v_x2 .req v4 +v_x3 .req v1 + +q_y0 .req q7 +q_y1 .req q5 +q_y2 .req q3 +q_y3 .req q21 + +v_y0 .req v7 +v_y1 .req v5 +v_y2 .req v3 +v_y3 .req v21 + +d_x0_h .req d22 +d_x1_h .req d20 +d_x2_h .req d18 +d_x3_h .req d6 + +v_x0_h .req v22 +v_x1_h .req v20 +v_x2_h .req v18 +v_x3_h .req v6 + + .align 3 +.clmul_loop: + add x_buf_tmp, x_buf_tmp, 64 + cmp x_buf_tmp, x_buf_end + + dup d_x0_h, v_x0.d[1] + dup d_x1_h, v_x1.d[1] + dup d_x2_h, v_x2.d[1] + dup d_x3_h, v_x3.d[1] + + ldr q_y0, [x_buf_tmp, -64] + ldr q_y1, [x_buf_tmp, -48] + ldr q_y2, [x_buf_tmp, -32] + ldr q_y3, [x_buf_tmp, -16] + + pmull v_x0.1q, v_x0.1d, v_p4_low.1d + pmull v_x1.1q, v_x1.1d, v_p4_low.1d + pmull v_x2.1q, v_x2.1d, v_p4_low.1d + pmull v_x3.1q, v_x3.1d, v_p4_low.1d + + pmull v_x0_h.1q, v_x0_h.1d, v_p4_high.1d + pmull v_x1_h.1q, v_x1_h.1d, v_p4_high.1d + pmull v_x2_h.1q, v_x2_h.1d, v_p4_high.1d + pmull v_x3_h.1q, v_x3_h.1d, v_p4_high.1d + + eor v_y0.16b, v_y0.16b, v22.16b + eor v_y1.16b, v_y1.16b, v20.16b + eor v_y2.16b, v_y2.16b, v18.16b + eor v_y3.16b, v_y3.16b, v6.16b + + eor v_x0.16b, v_y0.16b, v_x0.16b + eor v_x1.16b, v_y1.16b, v_x1.16b + eor v_x2.16b, v_y2.16b, v_x2.16b + eor v_x3.16b, v_y3.16b, v_x3.16b + + bne .clmul_loop + + +// v_x0 .req v0 +// v_x1 .req v2 +// v_x2 .req v4 +// v_x3 .req v1 + +d_x0 .req d0 + +d_p1_high .req d7 +d_p1_low .req d17 + +v_p1_high .req v7 +v_p1_low .req v17 + +.clmul_loop_end: +// fold 128b + mov x0, p1_high_b0 + movk x0, p1_high_b1, lsl 16 + fmov d_p1_high, x0 + + mov x0, p1_low_b0 + movk x0, p1_low_b1, lsl 16 + fmov d_p1_low, x0 + + dup d6, v_x0.d[1] + pmull v_x0.1q, v_x0.1d, v_p1_low.1d + pmull v6.1q, v6.1d, v_p1_high.1d + eor v6.16b, v6.16b, v_x0.16b + eor v_x1.16b, v6.16b, v_x1.16b + + dup d6, v_x1.d[1] + pmull v_x1.1q, v_x1.1d, v_p1_low.1d + pmull v6.1q, v6.1d, v_p1_high.1d + eor v6.16b, v6.16b, v_x1.16b + eor v_x2.16b, v6.16b, v_x2.16b + + dup d_x0, v_x2.d[1] // d_x0 temparory saved v_x2 high + pmull v_x2.1q, v_x2.1d, v_p1_low.1d + pmull v_x0.1q, v_x0.1d, v_p1_high.1d + eor v_x0.16b, v_x0.16b, v_x2.16b + eor v_x0.16b, v_x0.16b, v_x3.16b + +// all + mov x0, 4294967295 + fmov d3, x0 + + movi v5.4s, 0 + +// fold 64b + mov x4, p0_low_b0 + movk x4, p0_low_b1, lsl 16 + fmov d1, x4 + + dup d2, v0.d[0] + ext v0.16b, v0.16b, v5.16b, #8 + pmull v2.1q, v2.1d, v7.1d + eor v0.16b, v0.16b, v2.16b + and v2.16b, v3.16b, v0.16b + ext v0.16b, v0.16b, v5.16b, #4 + pmull v2.1q, v2.1d, v1.1d + +// barrett reduction + mov x3, br_high_b0 + movk x3, br_high_b1, lsl 16 + movk x3, br_high_b2, lsl 32 + + fmov d1, x3 + eor v0.16b, v0.16b, v2.16b + and v2.16b, v0.16b, v3.16b + pmull v2.1q, v2.1d, v1.1d + + mov x0, br_low_b0 + movk x0, br_low_b1, lsl 16 + movk x0, br_low_b2, lsl 32 + + fmov d1, x0 + and v2.16b, v3.16b, v2.16b + pmull v2.1q, v2.1d, v1.1d + eor v0.16b, v0.16b, v2.16b + umov w_seed, v0.s[1] + uxtw x_seed, w_seed + +.clmul_end: + and w4, w2, -64 + sxtw x3, w4 + add x_buf, x_buf, x3 + b .crc32_refl_tab_pre + .size \name, .-\name +.endm diff --git a/src/spdk/isa-l/crc/aarch64/crc64_ecma_norm_pmull.S b/src/spdk/isa-l/crc/aarch64/crc64_ecma_norm_pmull.S new file mode 100644 index 000000000..0089a09de --- /dev/null +++ b/src/spdk/isa-l/crc/aarch64/crc64_ecma_norm_pmull.S @@ -0,0 +1,33 @@ +######################################################################## +# Copyright(c) 2019 Arm Corporation All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Arm Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +######################################################################### + +#include "crc64_ecma_norm_pmull.h" +#include "crc64_norm_common_pmull.h" + +crc64_norm_func crc64_ecma_norm_pmull diff --git a/src/spdk/isa-l/crc/aarch64/crc64_ecma_norm_pmull.h b/src/spdk/isa-l/crc/aarch64/crc64_ecma_norm_pmull.h new file mode 100644 index 000000000..bce14d206 --- /dev/null +++ b/src/spdk/isa-l/crc/aarch64/crc64_ecma_norm_pmull.h @@ -0,0 +1,200 @@ +######################################################################## +# Copyright(c) 2019 Arm Corporation All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Arm Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +######################################################################### + +.equ p4_low_b0, (0xf020) +.equ p4_low_b1, 0x540d +.equ p4_low_b2, 0x43ca +.equ p4_low_b3, 0x5f68 +.equ p4_high_b0, 0xb83f +.equ p4_high_b1, 0x1205 +.equ p4_high_b2, 0xb698 +.equ p4_high_b3, 0xddf4 + +.equ p1_low_b0, (0xfab6) +.equ p1_low_b1, 0xeb52 +.equ p1_low_b2, 0xc3c7 +.equ p1_low_b3, 0x05f5 +.equ p1_high_b0, 0x740e +.equ p1_high_b1, 0xd257 +.equ p1_high_b2, 0x38a7 +.equ p1_high_b3, 0x4eb9 + +.equ p0_low_b0, (0xfab6) +.equ p0_low_b1, 0xeb52 +.equ p0_low_b2, 0xc3c7 +.equ p0_low_b3, 0x05f5 +.equ p0_high_b0, 0x0 +.equ p0_high_b1, 0x0 +.equ p0_high_b2, 0x0 +.equ p0_high_b3, 0x0 + +.equ br_low_b0, (0xf872) +.equ br_low_b1, 0x6cc4 +.equ br_low_b2, 0x29d0 +.equ br_low_b3, 0x578d +.equ br_high_b0, 0x3693 +.equ br_high_b1, 0xa9ea +.equ br_high_b2, 0xe1eb +.equ br_high_b3, 0x42f0 + + .text + .section .rodata + .align 4 + .set lanchor_crc64_tab,. + 0 + .type crc64_tab, %object + .size crc64_tab, 2048 +crc64_tab: + .xword 0x0000000000000000, 0x42f0e1eba9ea3693 + .xword 0x85e1c3d753d46d26, 0xc711223cfa3e5bb5 + .xword 0x493366450e42ecdf, 0x0bc387aea7a8da4c + .xword 0xccd2a5925d9681f9, 0x8e224479f47cb76a + .xword 0x9266cc8a1c85d9be, 0xd0962d61b56fef2d + .xword 0x17870f5d4f51b498, 0x5577eeb6e6bb820b + .xword 0xdb55aacf12c73561, 0x99a54b24bb2d03f2 + .xword 0x5eb4691841135847, 0x1c4488f3e8f96ed4 + .xword 0x663d78ff90e185ef, 0x24cd9914390bb37c + .xword 0xe3dcbb28c335e8c9, 0xa12c5ac36adfde5a + .xword 0x2f0e1eba9ea36930, 0x6dfeff5137495fa3 + .xword 0xaaefdd6dcd770416, 0xe81f3c86649d3285 + .xword 0xf45bb4758c645c51, 0xb6ab559e258e6ac2 + .xword 0x71ba77a2dfb03177, 0x334a9649765a07e4 + .xword 0xbd68d2308226b08e, 0xff9833db2bcc861d + .xword 0x388911e7d1f2dda8, 0x7a79f00c7818eb3b + .xword 0xcc7af1ff21c30bde, 0x8e8a101488293d4d + .xword 0x499b3228721766f8, 0x0b6bd3c3dbfd506b + .xword 0x854997ba2f81e701, 0xc7b97651866bd192 + .xword 0x00a8546d7c558a27, 0x4258b586d5bfbcb4 + .xword 0x5e1c3d753d46d260, 0x1cecdc9e94ace4f3 + .xword 0xdbfdfea26e92bf46, 0x990d1f49c77889d5 + .xword 0x172f5b3033043ebf, 0x55dfbadb9aee082c + .xword 0x92ce98e760d05399, 0xd03e790cc93a650a + .xword 0xaa478900b1228e31, 0xe8b768eb18c8b8a2 + .xword 0x2fa64ad7e2f6e317, 0x6d56ab3c4b1cd584 + .xword 0xe374ef45bf6062ee, 0xa1840eae168a547d + .xword 0x66952c92ecb40fc8, 0x2465cd79455e395b + .xword 0x3821458aada7578f, 0x7ad1a461044d611c + .xword 0xbdc0865dfe733aa9, 0xff3067b657990c3a + .xword 0x711223cfa3e5bb50, 0x33e2c2240a0f8dc3 + .xword 0xf4f3e018f031d676, 0xb60301f359dbe0e5 + .xword 0xda050215ea6c212f, 0x98f5e3fe438617bc + .xword 0x5fe4c1c2b9b84c09, 0x1d14202910527a9a + .xword 0x93366450e42ecdf0, 0xd1c685bb4dc4fb63 + .xword 0x16d7a787b7faa0d6, 0x5427466c1e109645 + .xword 0x4863ce9ff6e9f891, 0x0a932f745f03ce02 + .xword 0xcd820d48a53d95b7, 0x8f72eca30cd7a324 + .xword 0x0150a8daf8ab144e, 0x43a04931514122dd + .xword 0x84b16b0dab7f7968, 0xc6418ae602954ffb + .xword 0xbc387aea7a8da4c0, 0xfec89b01d3679253 + .xword 0x39d9b93d2959c9e6, 0x7b2958d680b3ff75 + .xword 0xf50b1caf74cf481f, 0xb7fbfd44dd257e8c + .xword 0x70eadf78271b2539, 0x321a3e938ef113aa + .xword 0x2e5eb66066087d7e, 0x6cae578bcfe24bed + .xword 0xabbf75b735dc1058, 0xe94f945c9c3626cb + .xword 0x676dd025684a91a1, 0x259d31cec1a0a732 + .xword 0xe28c13f23b9efc87, 0xa07cf2199274ca14 + .xword 0x167ff3eacbaf2af1, 0x548f120162451c62 + .xword 0x939e303d987b47d7, 0xd16ed1d631917144 + .xword 0x5f4c95afc5edc62e, 0x1dbc74446c07f0bd + .xword 0xdaad56789639ab08, 0x985db7933fd39d9b + .xword 0x84193f60d72af34f, 0xc6e9de8b7ec0c5dc + .xword 0x01f8fcb784fe9e69, 0x43081d5c2d14a8fa + .xword 0xcd2a5925d9681f90, 0x8fdab8ce70822903 + .xword 0x48cb9af28abc72b6, 0x0a3b7b1923564425 + .xword 0x70428b155b4eaf1e, 0x32b26afef2a4998d + .xword 0xf5a348c2089ac238, 0xb753a929a170f4ab + .xword 0x3971ed50550c43c1, 0x7b810cbbfce67552 + .xword 0xbc902e8706d82ee7, 0xfe60cf6caf321874 + .xword 0xe224479f47cb76a0, 0xa0d4a674ee214033 + .xword 0x67c58448141f1b86, 0x253565a3bdf52d15 + .xword 0xab1721da49899a7f, 0xe9e7c031e063acec + .xword 0x2ef6e20d1a5df759, 0x6c0603e6b3b7c1ca + .xword 0xf6fae5c07d3274cd, 0xb40a042bd4d8425e + .xword 0x731b26172ee619eb, 0x31ebc7fc870c2f78 + .xword 0xbfc9838573709812, 0xfd39626eda9aae81 + .xword 0x3a28405220a4f534, 0x78d8a1b9894ec3a7 + .xword 0x649c294a61b7ad73, 0x266cc8a1c85d9be0 + .xword 0xe17dea9d3263c055, 0xa38d0b769b89f6c6 + .xword 0x2daf4f0f6ff541ac, 0x6f5faee4c61f773f + .xword 0xa84e8cd83c212c8a, 0xeabe6d3395cb1a19 + .xword 0x90c79d3fedd3f122, 0xd2377cd44439c7b1 + .xword 0x15265ee8be079c04, 0x57d6bf0317edaa97 + .xword 0xd9f4fb7ae3911dfd, 0x9b041a914a7b2b6e + .xword 0x5c1538adb04570db, 0x1ee5d94619af4648 + .xword 0x02a151b5f156289c, 0x4051b05e58bc1e0f + .xword 0x87409262a28245ba, 0xc5b073890b687329 + .xword 0x4b9237f0ff14c443, 0x0962d61b56fef2d0 + .xword 0xce73f427acc0a965, 0x8c8315cc052a9ff6 + .xword 0x3a80143f5cf17f13, 0x7870f5d4f51b4980 + .xword 0xbf61d7e80f251235, 0xfd913603a6cf24a6 + .xword 0x73b3727a52b393cc, 0x31439391fb59a55f + .xword 0xf652b1ad0167feea, 0xb4a25046a88dc879 + .xword 0xa8e6d8b54074a6ad, 0xea16395ee99e903e + .xword 0x2d071b6213a0cb8b, 0x6ff7fa89ba4afd18 + .xword 0xe1d5bef04e364a72, 0xa3255f1be7dc7ce1 + .xword 0x64347d271de22754, 0x26c49cccb40811c7 + .xword 0x5cbd6cc0cc10fafc, 0x1e4d8d2b65facc6f + .xword 0xd95caf179fc497da, 0x9bac4efc362ea149 + .xword 0x158e0a85c2521623, 0x577eeb6e6bb820b0 + .xword 0x906fc95291867b05, 0xd29f28b9386c4d96 + .xword 0xcedba04ad0952342, 0x8c2b41a1797f15d1 + .xword 0x4b3a639d83414e64, 0x09ca82762aab78f7 + .xword 0x87e8c60fded7cf9d, 0xc51827e4773df90e + .xword 0x020905d88d03a2bb, 0x40f9e43324e99428 + .xword 0x2cffe7d5975e55e2, 0x6e0f063e3eb46371 + .xword 0xa91e2402c48a38c4, 0xebeec5e96d600e57 + .xword 0x65cc8190991cb93d, 0x273c607b30f68fae + .xword 0xe02d4247cac8d41b, 0xa2dda3ac6322e288 + .xword 0xbe992b5f8bdb8c5c, 0xfc69cab42231bacf + .xword 0x3b78e888d80fe17a, 0x7988096371e5d7e9 + .xword 0xf7aa4d1a85996083, 0xb55aacf12c735610 + .xword 0x724b8ecdd64d0da5, 0x30bb6f267fa73b36 + .xword 0x4ac29f2a07bfd00d, 0x08327ec1ae55e69e + .xword 0xcf235cfd546bbd2b, 0x8dd3bd16fd818bb8 + .xword 0x03f1f96f09fd3cd2, 0x41011884a0170a41 + .xword 0x86103ab85a2951f4, 0xc4e0db53f3c36767 + .xword 0xd8a453a01b3a09b3, 0x9a54b24bb2d03f20 + .xword 0x5d45907748ee6495, 0x1fb5719ce1045206 + .xword 0x919735e51578e56c, 0xd367d40ebc92d3ff + .xword 0x1476f63246ac884a, 0x568617d9ef46bed9 + .xword 0xe085162ab69d5e3c, 0xa275f7c11f7768af + .xword 0x6564d5fde549331a, 0x279434164ca30589 + .xword 0xa9b6706fb8dfb2e3, 0xeb46918411358470 + .xword 0x2c57b3b8eb0bdfc5, 0x6ea7525342e1e956 + .xword 0x72e3daa0aa188782, 0x30133b4b03f2b111 + .xword 0xf7021977f9cceaa4, 0xb5f2f89c5026dc37 + .xword 0x3bd0bce5a45a6b5d, 0x79205d0e0db05dce + .xword 0xbe317f32f78e067b, 0xfcc19ed95e6430e8 + .xword 0x86b86ed5267cdbd3, 0xc4488f3e8f96ed40 + .xword 0x0359ad0275a8b6f5, 0x41a94ce9dc428066 + .xword 0xcf8b0890283e370c, 0x8d7be97b81d4019f + .xword 0x4a6acb477bea5a2a, 0x089a2aacd2006cb9 + .xword 0x14dea25f3af9026d, 0x562e43b4931334fe + .xword 0x913f6188692d6f4b, 0xd3cf8063c0c759d8 + .xword 0x5dedc41a34bbeeb2, 0x1f1d25f19d51d821 + .xword 0xd80c07cd676f8394, 0x9afce626ce85b507 diff --git a/src/spdk/isa-l/crc/aarch64/crc64_ecma_refl_pmull.S b/src/spdk/isa-l/crc/aarch64/crc64_ecma_refl_pmull.S new file mode 100644 index 000000000..812517f77 --- /dev/null +++ b/src/spdk/isa-l/crc/aarch64/crc64_ecma_refl_pmull.S @@ -0,0 +1,33 @@ +######################################################################## +# Copyright(c) 2019 Arm Corporation All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Arm Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +######################################################################### + +#include "crc64_ecma_refl_pmull.h" +#include "crc64_refl_common_pmull.h" + +crc64_refl_func crc64_ecma_refl_pmull diff --git a/src/spdk/isa-l/crc/aarch64/crc64_ecma_refl_pmull.h b/src/spdk/isa-l/crc/aarch64/crc64_ecma_refl_pmull.h new file mode 100644 index 000000000..f86a2a97a --- /dev/null +++ b/src/spdk/isa-l/crc/aarch64/crc64_ecma_refl_pmull.h @@ -0,0 +1,196 @@ +######################################################################## +# Copyright(c) 2019 Arm Corporation All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Arm Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +######################################################################### + +.equ p4_low_b0, 0x41f3 +.equ p4_low_b1, 0x9dd4 +.equ p4_low_b2, 0xefbb +.equ p4_low_b3, 0x6ae3 +.equ p4_high_b0, 0x2df4 +.equ p4_high_b1, 0xa784 +.equ p4_high_b2, 0x6054 +.equ p4_high_b3, 0x081f + +.equ p1_low_b0, 0x3ae4 +.equ p1_low_b1, 0xca39 +.equ p1_low_b2, 0xd497 +.equ p1_low_b3, 0xe05d +.equ p1_high_b0, 0x5f40 +.equ p1_high_b1, 0xc787 +.equ p1_high_b2, 0x95af +.equ p1_high_b3, 0xdabe + +.equ p0_low_b0, 0x5f40 +.equ p0_low_b1, 0xc787 +.equ p0_low_b2, 0x95af +.equ p0_low_b3, 0xdabe + +.equ br_low_b0, 0x63d5 +.equ br_low_b1, 0x1729 +.equ br_low_b2, 0x466c +.equ br_low_b3, 0x9c3e +.equ br_high_b0, 0x1e85 +.equ br_high_b1, 0xaf0e +.equ br_high_b2, 0xaf2b +.equ br_high_b3, 0x92d8 + + .text + .section .rodata + .align 4 + .set .lanchor_crc64_tab,. + 0 + .type crc64_tab, %object + .size crc64_tab, 2048 +crc64_tab: + .xword 0x0000000000000000, 0xb32e4cbe03a75f6f + .xword 0xf4843657a840a05b, 0x47aa7ae9abe7ff34 + .xword 0x7bd0c384ff8f5e33, 0xc8fe8f3afc28015c + .xword 0x8f54f5d357cffe68, 0x3c7ab96d5468a107 + .xword 0xf7a18709ff1ebc66, 0x448fcbb7fcb9e309 + .xword 0x0325b15e575e1c3d, 0xb00bfde054f94352 + .xword 0x8c71448d0091e255, 0x3f5f08330336bd3a + .xword 0x78f572daa8d1420e, 0xcbdb3e64ab761d61 + .xword 0x7d9ba13851336649, 0xceb5ed8652943926 + .xword 0x891f976ff973c612, 0x3a31dbd1fad4997d + .xword 0x064b62bcaebc387a, 0xb5652e02ad1b6715 + .xword 0xf2cf54eb06fc9821, 0x41e11855055bc74e + .xword 0x8a3a2631ae2dda2f, 0x39146a8fad8a8540 + .xword 0x7ebe1066066d7a74, 0xcd905cd805ca251b + .xword 0xf1eae5b551a2841c, 0x42c4a90b5205db73 + .xword 0x056ed3e2f9e22447, 0xb6409f5cfa457b28 + .xword 0xfb374270a266cc92, 0x48190ecea1c193fd + .xword 0x0fb374270a266cc9, 0xbc9d3899098133a6 + .xword 0x80e781f45de992a1, 0x33c9cd4a5e4ecdce + .xword 0x7463b7a3f5a932fa, 0xc74dfb1df60e6d95 + .xword 0x0c96c5795d7870f4, 0xbfb889c75edf2f9b + .xword 0xf812f32ef538d0af, 0x4b3cbf90f69f8fc0 + .xword 0x774606fda2f72ec7, 0xc4684a43a15071a8 + .xword 0x83c230aa0ab78e9c, 0x30ec7c140910d1f3 + .xword 0x86ace348f355aadb, 0x3582aff6f0f2f5b4 + .xword 0x7228d51f5b150a80, 0xc10699a158b255ef + .xword 0xfd7c20cc0cdaf4e8, 0x4e526c720f7dab87 + .xword 0x09f8169ba49a54b3, 0xbad65a25a73d0bdc + .xword 0x710d64410c4b16bd, 0xc22328ff0fec49d2 + .xword 0x85895216a40bb6e6, 0x36a71ea8a7ace989 + .xword 0x0adda7c5f3c4488e, 0xb9f3eb7bf06317e1 + .xword 0xfe5991925b84e8d5, 0x4d77dd2c5823b7ba + .xword 0x64b62bcaebc387a1, 0xd7986774e864d8ce + .xword 0x90321d9d438327fa, 0x231c512340247895 + .xword 0x1f66e84e144cd992, 0xac48a4f017eb86fd + .xword 0xebe2de19bc0c79c9, 0x58cc92a7bfab26a6 + .xword 0x9317acc314dd3bc7, 0x2039e07d177a64a8 + .xword 0x67939a94bc9d9b9c, 0xd4bdd62abf3ac4f3 + .xword 0xe8c76f47eb5265f4, 0x5be923f9e8f53a9b + .xword 0x1c4359104312c5af, 0xaf6d15ae40b59ac0 + .xword 0x192d8af2baf0e1e8, 0xaa03c64cb957be87 + .xword 0xeda9bca512b041b3, 0x5e87f01b11171edc + .xword 0x62fd4976457fbfdb, 0xd1d305c846d8e0b4 + .xword 0x96797f21ed3f1f80, 0x2557339fee9840ef + .xword 0xee8c0dfb45ee5d8e, 0x5da24145464902e1 + .xword 0x1a083bacedaefdd5, 0xa9267712ee09a2ba + .xword 0x955cce7fba6103bd, 0x267282c1b9c65cd2 + .xword 0x61d8f8281221a3e6, 0xd2f6b4961186fc89 + .xword 0x9f8169ba49a54b33, 0x2caf25044a02145c + .xword 0x6b055fede1e5eb68, 0xd82b1353e242b407 + .xword 0xe451aa3eb62a1500, 0x577fe680b58d4a6f + .xword 0x10d59c691e6ab55b, 0xa3fbd0d71dcdea34 + .xword 0x6820eeb3b6bbf755, 0xdb0ea20db51ca83a + .xword 0x9ca4d8e41efb570e, 0x2f8a945a1d5c0861 + .xword 0x13f02d374934a966, 0xa0de61894a93f609 + .xword 0xe7741b60e174093d, 0x545a57dee2d35652 + .xword 0xe21ac88218962d7a, 0x5134843c1b317215 + .xword 0x169efed5b0d68d21, 0xa5b0b26bb371d24e + .xword 0x99ca0b06e7197349, 0x2ae447b8e4be2c26 + .xword 0x6d4e3d514f59d312, 0xde6071ef4cfe8c7d + .xword 0x15bb4f8be788911c, 0xa6950335e42fce73 + .xword 0xe13f79dc4fc83147, 0x521135624c6f6e28 + .xword 0x6e6b8c0f1807cf2f, 0xdd45c0b11ba09040 + .xword 0x9aefba58b0476f74, 0x29c1f6e6b3e0301b + .xword 0xc96c5795d7870f42, 0x7a421b2bd420502d + .xword 0x3de861c27fc7af19, 0x8ec62d7c7c60f076 + .xword 0xb2bc941128085171, 0x0192d8af2baf0e1e + .xword 0x4638a2468048f12a, 0xf516eef883efae45 + .xword 0x3ecdd09c2899b324, 0x8de39c222b3eec4b + .xword 0xca49e6cb80d9137f, 0x7967aa75837e4c10 + .xword 0x451d1318d716ed17, 0xf6335fa6d4b1b278 + .xword 0xb199254f7f564d4c, 0x02b769f17cf11223 + .xword 0xb4f7f6ad86b4690b, 0x07d9ba1385133664 + .xword 0x4073c0fa2ef4c950, 0xf35d8c442d53963f + .xword 0xcf273529793b3738, 0x7c0979977a9c6857 + .xword 0x3ba3037ed17b9763, 0x888d4fc0d2dcc80c + .xword 0x435671a479aad56d, 0xf0783d1a7a0d8a02 + .xword 0xb7d247f3d1ea7536, 0x04fc0b4dd24d2a59 + .xword 0x3886b22086258b5e, 0x8ba8fe9e8582d431 + .xword 0xcc0284772e652b05, 0x7f2cc8c92dc2746a + .xword 0x325b15e575e1c3d0, 0x8175595b76469cbf + .xword 0xc6df23b2dda1638b, 0x75f16f0cde063ce4 + .xword 0x498bd6618a6e9de3, 0xfaa59adf89c9c28c + .xword 0xbd0fe036222e3db8, 0x0e21ac88218962d7 + .xword 0xc5fa92ec8aff7fb6, 0x76d4de52895820d9 + .xword 0x317ea4bb22bfdfed, 0x8250e80521188082 + .xword 0xbe2a516875702185, 0x0d041dd676d77eea + .xword 0x4aae673fdd3081de, 0xf9802b81de97deb1 + .xword 0x4fc0b4dd24d2a599, 0xfceef8632775faf6 + .xword 0xbb44828a8c9205c2, 0x086ace348f355aad + .xword 0x34107759db5dfbaa, 0x873e3be7d8faa4c5 + .xword 0xc094410e731d5bf1, 0x73ba0db070ba049e + .xword 0xb86133d4dbcc19ff, 0x0b4f7f6ad86b4690 + .xword 0x4ce50583738cb9a4, 0xffcb493d702be6cb + .xword 0xc3b1f050244347cc, 0x709fbcee27e418a3 + .xword 0x3735c6078c03e797, 0x841b8ab98fa4b8f8 + .xword 0xadda7c5f3c4488e3, 0x1ef430e13fe3d78c + .xword 0x595e4a08940428b8, 0xea7006b697a377d7 + .xword 0xd60abfdbc3cbd6d0, 0x6524f365c06c89bf + .xword 0x228e898c6b8b768b, 0x91a0c532682c29e4 + .xword 0x5a7bfb56c35a3485, 0xe955b7e8c0fd6bea + .xword 0xaeffcd016b1a94de, 0x1dd181bf68bdcbb1 + .xword 0x21ab38d23cd56ab6, 0x9285746c3f7235d9 + .xword 0xd52f0e859495caed, 0x6601423b97329582 + .xword 0xd041dd676d77eeaa, 0x636f91d96ed0b1c5 + .xword 0x24c5eb30c5374ef1, 0x97eba78ec690119e + .xword 0xab911ee392f8b099, 0x18bf525d915feff6 + .xword 0x5f1528b43ab810c2, 0xec3b640a391f4fad + .xword 0x27e05a6e926952cc, 0x94ce16d091ce0da3 + .xword 0xd3646c393a29f297, 0x604a2087398eadf8 + .xword 0x5c3099ea6de60cff, 0xef1ed5546e415390 + .xword 0xa8b4afbdc5a6aca4, 0x1b9ae303c601f3cb + .xword 0x56ed3e2f9e224471, 0xe5c372919d851b1e + .xword 0xa26908783662e42a, 0x114744c635c5bb45 + .xword 0x2d3dfdab61ad1a42, 0x9e13b115620a452d + .xword 0xd9b9cbfcc9edba19, 0x6a978742ca4ae576 + .xword 0xa14cb926613cf817, 0x1262f598629ba778 + .xword 0x55c88f71c97c584c, 0xe6e6c3cfcadb0723 + .xword 0xda9c7aa29eb3a624, 0x69b2361c9d14f94b + .xword 0x2e184cf536f3067f, 0x9d36004b35545910 + .xword 0x2b769f17cf112238, 0x9858d3a9ccb67d57 + .xword 0xdff2a94067518263, 0x6cdce5fe64f6dd0c + .xword 0x50a65c93309e7c0b, 0xe388102d33392364 + .xword 0xa4226ac498dedc50, 0x170c267a9b79833f + .xword 0xdcd7181e300f9e5e, 0x6ff954a033a8c131 + .xword 0x28532e49984f3e05, 0x9b7d62f79be8616a + .xword 0xa707db9acf80c06d, 0x14299724cc279f02 + .xword 0x5383edcd67c06036, 0xe0ada17364673f59 diff --git a/src/spdk/isa-l/crc/aarch64/crc64_iso_norm_pmull.S b/src/spdk/isa-l/crc/aarch64/crc64_iso_norm_pmull.S new file mode 100644 index 000000000..185b75bdf --- /dev/null +++ b/src/spdk/isa-l/crc/aarch64/crc64_iso_norm_pmull.S @@ -0,0 +1,33 @@ +######################################################################## +# Copyright(c) 2019 Arm Corporation All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Arm Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +######################################################################### + +#include "crc64_iso_norm_pmull.h" +#include "crc64_norm_common_pmull.h" + +crc64_norm_func crc64_iso_norm_pmull diff --git a/src/spdk/isa-l/crc/aarch64/crc64_iso_norm_pmull.h b/src/spdk/isa-l/crc/aarch64/crc64_iso_norm_pmull.h new file mode 100644 index 000000000..9a07335ff --- /dev/null +++ b/src/spdk/isa-l/crc/aarch64/crc64_iso_norm_pmull.h @@ -0,0 +1,201 @@ +######################################################################## +# Copyright(c) 2019 Arm Corporation All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Arm Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +######################################################################### + +.equ p4_low_b0, (0x0101) +.equ p4_low_b1, 0x0100 +.equ p4_low_b2, 0x0001 +.equ p4_low_b3, 0x0000 +.equ p4_high_b0, 0x1b1b +.equ p4_high_b1, 0x1b00 +.equ p4_high_b2, 0x001b +.equ p4_high_b3, 0x0000 + +.equ p1_low_b0, (0x0145) +.equ p1_low_b1, 0x0000 +.equ p1_low_b2, 0x0000 +.equ p1_low_b3, 0x0000 +.equ p1_high_b0, 0x1db7 +.equ p1_high_b1, 0x0000 +.equ p1_high_b2, 0x0000 +.equ p1_high_b3, 0x0000 + +.equ p0_low_b0, (0x0145) +.equ p0_low_b1, 0x0000 +.equ p0_low_b2, 0x0000 +.equ p0_low_b3, 0x0000 +.equ p0_high_b0, 0x0000 +.equ p0_high_b1, 0x0000 +.equ p0_high_b2, 0x0000 +.equ p0_high_b3, 0x0000 + +.equ br_low_b0, (0x001b) +.equ br_low_b1, 0x0000 +.equ br_low_b2, 0x0000 +.equ br_low_b3, 0x0000 +.equ br_high_b0, 0x001b +.equ br_high_b1, 0x0000 +.equ br_high_b2, 0x0000 +.equ br_high_b3, 0x0000 + + .text + .section .rodata + .align 4 + .set lanchor_crc64_tab,. + 0 + .type crc64_tab, %object + .size crc64_tab, 2048 + +crc64_tab: + .xword 0x0000000000000000, 0x000000000000001b + .xword 0x0000000000000036, 0x000000000000002d + .xword 0x000000000000006c, 0x0000000000000077 + .xword 0x000000000000005a, 0x0000000000000041 + .xword 0x00000000000000d8, 0x00000000000000c3 + .xword 0x00000000000000ee, 0x00000000000000f5 + .xword 0x00000000000000b4, 0x00000000000000af + .xword 0x0000000000000082, 0x0000000000000099 + .xword 0x00000000000001b0, 0x00000000000001ab + .xword 0x0000000000000186, 0x000000000000019d + .xword 0x00000000000001dc, 0x00000000000001c7 + .xword 0x00000000000001ea, 0x00000000000001f1 + .xword 0x0000000000000168, 0x0000000000000173 + .xword 0x000000000000015e, 0x0000000000000145 + .xword 0x0000000000000104, 0x000000000000011f + .xword 0x0000000000000132, 0x0000000000000129 + .xword 0x0000000000000360, 0x000000000000037b + .xword 0x0000000000000356, 0x000000000000034d + .xword 0x000000000000030c, 0x0000000000000317 + .xword 0x000000000000033a, 0x0000000000000321 + .xword 0x00000000000003b8, 0x00000000000003a3 + .xword 0x000000000000038e, 0x0000000000000395 + .xword 0x00000000000003d4, 0x00000000000003cf + .xword 0x00000000000003e2, 0x00000000000003f9 + .xword 0x00000000000002d0, 0x00000000000002cb + .xword 0x00000000000002e6, 0x00000000000002fd + .xword 0x00000000000002bc, 0x00000000000002a7 + .xword 0x000000000000028a, 0x0000000000000291 + .xword 0x0000000000000208, 0x0000000000000213 + .xword 0x000000000000023e, 0x0000000000000225 + .xword 0x0000000000000264, 0x000000000000027f + .xword 0x0000000000000252, 0x0000000000000249 + .xword 0x00000000000006c0, 0x00000000000006db + .xword 0x00000000000006f6, 0x00000000000006ed + .xword 0x00000000000006ac, 0x00000000000006b7 + .xword 0x000000000000069a, 0x0000000000000681 + .xword 0x0000000000000618, 0x0000000000000603 + .xword 0x000000000000062e, 0x0000000000000635 + .xword 0x0000000000000674, 0x000000000000066f + .xword 0x0000000000000642, 0x0000000000000659 + .xword 0x0000000000000770, 0x000000000000076b + .xword 0x0000000000000746, 0x000000000000075d + .xword 0x000000000000071c, 0x0000000000000707 + .xword 0x000000000000072a, 0x0000000000000731 + .xword 0x00000000000007a8, 0x00000000000007b3 + .xword 0x000000000000079e, 0x0000000000000785 + .xword 0x00000000000007c4, 0x00000000000007df + .xword 0x00000000000007f2, 0x00000000000007e9 + .xword 0x00000000000005a0, 0x00000000000005bb + .xword 0x0000000000000596, 0x000000000000058d + .xword 0x00000000000005cc, 0x00000000000005d7 + .xword 0x00000000000005fa, 0x00000000000005e1 + .xword 0x0000000000000578, 0x0000000000000563 + .xword 0x000000000000054e, 0x0000000000000555 + .xword 0x0000000000000514, 0x000000000000050f + .xword 0x0000000000000522, 0x0000000000000539 + .xword 0x0000000000000410, 0x000000000000040b + .xword 0x0000000000000426, 0x000000000000043d + .xword 0x000000000000047c, 0x0000000000000467 + .xword 0x000000000000044a, 0x0000000000000451 + .xword 0x00000000000004c8, 0x00000000000004d3 + .xword 0x00000000000004fe, 0x00000000000004e5 + .xword 0x00000000000004a4, 0x00000000000004bf + .xword 0x0000000000000492, 0x0000000000000489 + .xword 0x0000000000000d80, 0x0000000000000d9b + .xword 0x0000000000000db6, 0x0000000000000dad + .xword 0x0000000000000dec, 0x0000000000000df7 + .xword 0x0000000000000dda, 0x0000000000000dc1 + .xword 0x0000000000000d58, 0x0000000000000d43 + .xword 0x0000000000000d6e, 0x0000000000000d75 + .xword 0x0000000000000d34, 0x0000000000000d2f + .xword 0x0000000000000d02, 0x0000000000000d19 + .xword 0x0000000000000c30, 0x0000000000000c2b + .xword 0x0000000000000c06, 0x0000000000000c1d + .xword 0x0000000000000c5c, 0x0000000000000c47 + .xword 0x0000000000000c6a, 0x0000000000000c71 + .xword 0x0000000000000ce8, 0x0000000000000cf3 + .xword 0x0000000000000cde, 0x0000000000000cc5 + .xword 0x0000000000000c84, 0x0000000000000c9f + .xword 0x0000000000000cb2, 0x0000000000000ca9 + .xword 0x0000000000000ee0, 0x0000000000000efb + .xword 0x0000000000000ed6, 0x0000000000000ecd + .xword 0x0000000000000e8c, 0x0000000000000e97 + .xword 0x0000000000000eba, 0x0000000000000ea1 + .xword 0x0000000000000e38, 0x0000000000000e23 + .xword 0x0000000000000e0e, 0x0000000000000e15 + .xword 0x0000000000000e54, 0x0000000000000e4f + .xword 0x0000000000000e62, 0x0000000000000e79 + .xword 0x0000000000000f50, 0x0000000000000f4b + .xword 0x0000000000000f66, 0x0000000000000f7d + .xword 0x0000000000000f3c, 0x0000000000000f27 + .xword 0x0000000000000f0a, 0x0000000000000f11 + .xword 0x0000000000000f88, 0x0000000000000f93 + .xword 0x0000000000000fbe, 0x0000000000000fa5 + .xword 0x0000000000000fe4, 0x0000000000000fff + .xword 0x0000000000000fd2, 0x0000000000000fc9 + .xword 0x0000000000000b40, 0x0000000000000b5b + .xword 0x0000000000000b76, 0x0000000000000b6d + .xword 0x0000000000000b2c, 0x0000000000000b37 + .xword 0x0000000000000b1a, 0x0000000000000b01 + .xword 0x0000000000000b98, 0x0000000000000b83 + .xword 0x0000000000000bae, 0x0000000000000bb5 + .xword 0x0000000000000bf4, 0x0000000000000bef + .xword 0x0000000000000bc2, 0x0000000000000bd9 + .xword 0x0000000000000af0, 0x0000000000000aeb + .xword 0x0000000000000ac6, 0x0000000000000add + .xword 0x0000000000000a9c, 0x0000000000000a87 + .xword 0x0000000000000aaa, 0x0000000000000ab1 + .xword 0x0000000000000a28, 0x0000000000000a33 + .xword 0x0000000000000a1e, 0x0000000000000a05 + .xword 0x0000000000000a44, 0x0000000000000a5f + .xword 0x0000000000000a72, 0x0000000000000a69 + .xword 0x0000000000000820, 0x000000000000083b + .xword 0x0000000000000816, 0x000000000000080d + .xword 0x000000000000084c, 0x0000000000000857 + .xword 0x000000000000087a, 0x0000000000000861 + .xword 0x00000000000008f8, 0x00000000000008e3 + .xword 0x00000000000008ce, 0x00000000000008d5 + .xword 0x0000000000000894, 0x000000000000088f + .xword 0x00000000000008a2, 0x00000000000008b9 + .xword 0x0000000000000990, 0x000000000000098b + .xword 0x00000000000009a6, 0x00000000000009bd + .xword 0x00000000000009fc, 0x00000000000009e7 + .xword 0x00000000000009ca, 0x00000000000009d1 + .xword 0x0000000000000948, 0x0000000000000953 + .xword 0x000000000000097e, 0x0000000000000965 + .xword 0x0000000000000924, 0x000000000000093f + .xword 0x0000000000000912, 0x0000000000000909 diff --git a/src/spdk/isa-l/crc/aarch64/crc64_iso_refl_pmull.S b/src/spdk/isa-l/crc/aarch64/crc64_iso_refl_pmull.S new file mode 100644 index 000000000..2d2bc6658 --- /dev/null +++ b/src/spdk/isa-l/crc/aarch64/crc64_iso_refl_pmull.S @@ -0,0 +1,33 @@ +######################################################################## +# Copyright(c) 2019 Arm Corporation All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Arm Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +######################################################################### + +#include "crc64_iso_refl_pmull.h" +#include "crc64_refl_common_pmull.h" + +crc64_refl_func crc64_iso_refl_pmull diff --git a/src/spdk/isa-l/crc/aarch64/crc64_iso_refl_pmull.h b/src/spdk/isa-l/crc/aarch64/crc64_iso_refl_pmull.h new file mode 100644 index 000000000..0b50d5596 --- /dev/null +++ b/src/spdk/isa-l/crc/aarch64/crc64_iso_refl_pmull.h @@ -0,0 +1,197 @@ +######################################################################## +# Copyright(c) 2019 Arm Corporation All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Arm Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +######################################################################### + +.equ p4_low_b0, 0x0001 +.equ p4_low_b1, 0xb000 +.equ p4_low_b2, 0x01b1 +.equ p4_low_b3, 0x01b0 +.equ p4_high_b0, 0x0001 +.equ p4_high_b1, 0x0000 +.equ p4_high_b2, 0x0101 +.equ p4_high_b3, 0xb100 + +.equ p1_low_b0, 0x0001 +.equ p1_low_b1, 0x0000 +.equ p1_low_b2, 0x0000 +.equ p1_low_b3, 0x6b70 +.equ p1_high_b0, 0x0001 +.equ p1_high_b1, 0x0000 +.equ p1_high_b2, 0x0000 +.equ p1_high_b3, 0xf500 + +.equ p0_low_b0, 0x0001 +.equ p0_low_b1, 0x0000 +.equ p0_low_b2, 0x0000 +.equ p0_low_b3, 0xf500 + +.equ br_low_b0, 0x0001 +.equ br_low_b1, 0x0000 +.equ br_low_b2, 0x0000 +.equ br_low_b3, 0xb000 +.equ br_high_b0, 0x0001 +.equ br_high_b1, 0x0000 +.equ br_high_b2, 0x0000 +.equ br_high_b3, 0xb000 + + .text + .section .rodata + .align 4 + .set .lanchor_crc64_tab,. + 0 + .type crc64_tab, %object + .size crc64_tab, 2048 + +crc64_tab: + .xword 0x0000000000000000, 0x01b0000000000000 + .xword 0x0360000000000000, 0x02d0000000000000 + .xword 0x06c0000000000000, 0x0770000000000000 + .xword 0x05a0000000000000, 0x0410000000000000 + .xword 0x0d80000000000000, 0x0c30000000000000 + .xword 0x0ee0000000000000, 0x0f50000000000000 + .xword 0x0b40000000000000, 0x0af0000000000000 + .xword 0x0820000000000000, 0x0990000000000000 + .xword 0x1b00000000000000, 0x1ab0000000000000 + .xword 0x1860000000000000, 0x19d0000000000000 + .xword 0x1dc0000000000000, 0x1c70000000000000 + .xword 0x1ea0000000000000, 0x1f10000000000000 + .xword 0x1680000000000000, 0x1730000000000000 + .xword 0x15e0000000000000, 0x1450000000000000 + .xword 0x1040000000000000, 0x11f0000000000000 + .xword 0x1320000000000000, 0x1290000000000000 + .xword 0x3600000000000000, 0x37b0000000000000 + .xword 0x3560000000000000, 0x34d0000000000000 + .xword 0x30c0000000000000, 0x3170000000000000 + .xword 0x33a0000000000000, 0x3210000000000000 + .xword 0x3b80000000000000, 0x3a30000000000000 + .xword 0x38e0000000000000, 0x3950000000000000 + .xword 0x3d40000000000000, 0x3cf0000000000000 + .xword 0x3e20000000000000, 0x3f90000000000000 + .xword 0x2d00000000000000, 0x2cb0000000000000 + .xword 0x2e60000000000000, 0x2fd0000000000000 + .xword 0x2bc0000000000000, 0x2a70000000000000 + .xword 0x28a0000000000000, 0x2910000000000000 + .xword 0x2080000000000000, 0x2130000000000000 + .xword 0x23e0000000000000, 0x2250000000000000 + .xword 0x2640000000000000, 0x27f0000000000000 + .xword 0x2520000000000000, 0x2490000000000000 + .xword 0x6c00000000000000, 0x6db0000000000000 + .xword 0x6f60000000000000, 0x6ed0000000000000 + .xword 0x6ac0000000000000, 0x6b70000000000000 + .xword 0x69a0000000000000, 0x6810000000000000 + .xword 0x6180000000000000, 0x6030000000000000 + .xword 0x62e0000000000000, 0x6350000000000000 + .xword 0x6740000000000000, 0x66f0000000000000 + .xword 0x6420000000000000, 0x6590000000000000 + .xword 0x7700000000000000, 0x76b0000000000000 + .xword 0x7460000000000000, 0x75d0000000000000 + .xword 0x71c0000000000000, 0x7070000000000000 + .xword 0x72a0000000000000, 0x7310000000000000 + .xword 0x7a80000000000000, 0x7b30000000000000 + .xword 0x79e0000000000000, 0x7850000000000000 + .xword 0x7c40000000000000, 0x7df0000000000000 + .xword 0x7f20000000000000, 0x7e90000000000000 + .xword 0x5a00000000000000, 0x5bb0000000000000 + .xword 0x5960000000000000, 0x58d0000000000000 + .xword 0x5cc0000000000000, 0x5d70000000000000 + .xword 0x5fa0000000000000, 0x5e10000000000000 + .xword 0x5780000000000000, 0x5630000000000000 + .xword 0x54e0000000000000, 0x5550000000000000 + .xword 0x5140000000000000, 0x50f0000000000000 + .xword 0x5220000000000000, 0x5390000000000000 + .xword 0x4100000000000000, 0x40b0000000000000 + .xword 0x4260000000000000, 0x43d0000000000000 + .xword 0x47c0000000000000, 0x4670000000000000 + .xword 0x44a0000000000000, 0x4510000000000000 + .xword 0x4c80000000000000, 0x4d30000000000000 + .xword 0x4fe0000000000000, 0x4e50000000000000 + .xword 0x4a40000000000000, 0x4bf0000000000000 + .xword 0x4920000000000000, 0x4890000000000000 + .xword 0xd800000000000000, 0xd9b0000000000000 + .xword 0xdb60000000000000, 0xdad0000000000000 + .xword 0xdec0000000000000, 0xdf70000000000000 + .xword 0xdda0000000000000, 0xdc10000000000000 + .xword 0xd580000000000000, 0xd430000000000000 + .xword 0xd6e0000000000000, 0xd750000000000000 + .xword 0xd340000000000000, 0xd2f0000000000000 + .xword 0xd020000000000000, 0xd190000000000000 + .xword 0xc300000000000000, 0xc2b0000000000000 + .xword 0xc060000000000000, 0xc1d0000000000000 + .xword 0xc5c0000000000000, 0xc470000000000000 + .xword 0xc6a0000000000000, 0xc710000000000000 + .xword 0xce80000000000000, 0xcf30000000000000 + .xword 0xcde0000000000000, 0xcc50000000000000 + .xword 0xc840000000000000, 0xc9f0000000000000 + .xword 0xcb20000000000000, 0xca90000000000000 + .xword 0xee00000000000000, 0xefb0000000000000 + .xword 0xed60000000000000, 0xecd0000000000000 + .xword 0xe8c0000000000000, 0xe970000000000000 + .xword 0xeba0000000000000, 0xea10000000000000 + .xword 0xe380000000000000, 0xe230000000000000 + .xword 0xe0e0000000000000, 0xe150000000000000 + .xword 0xe540000000000000, 0xe4f0000000000000 + .xword 0xe620000000000000, 0xe790000000000000 + .xword 0xf500000000000000, 0xf4b0000000000000 + .xword 0xf660000000000000, 0xf7d0000000000000 + .xword 0xf3c0000000000000, 0xf270000000000000 + .xword 0xf0a0000000000000, 0xf110000000000000 + .xword 0xf880000000000000, 0xf930000000000000 + .xword 0xfbe0000000000000, 0xfa50000000000000 + .xword 0xfe40000000000000, 0xfff0000000000000 + .xword 0xfd20000000000000, 0xfc90000000000000 + .xword 0xb400000000000000, 0xb5b0000000000000 + .xword 0xb760000000000000, 0xb6d0000000000000 + .xword 0xb2c0000000000000, 0xb370000000000000 + .xword 0xb1a0000000000000, 0xb010000000000000 + .xword 0xb980000000000000, 0xb830000000000000 + .xword 0xbae0000000000000, 0xbb50000000000000 + .xword 0xbf40000000000000, 0xbef0000000000000 + .xword 0xbc20000000000000, 0xbd90000000000000 + .xword 0xaf00000000000000, 0xaeb0000000000000 + .xword 0xac60000000000000, 0xadd0000000000000 + .xword 0xa9c0000000000000, 0xa870000000000000 + .xword 0xaaa0000000000000, 0xab10000000000000 + .xword 0xa280000000000000, 0xa330000000000000 + .xword 0xa1e0000000000000, 0xa050000000000000 + .xword 0xa440000000000000, 0xa5f0000000000000 + .xword 0xa720000000000000, 0xa690000000000000 + .xword 0x8200000000000000, 0x83b0000000000000 + .xword 0x8160000000000000, 0x80d0000000000000 + .xword 0x84c0000000000000, 0x8570000000000000 + .xword 0x87a0000000000000, 0x8610000000000000 + .xword 0x8f80000000000000, 0x8e30000000000000 + .xword 0x8ce0000000000000, 0x8d50000000000000 + .xword 0x8940000000000000, 0x88f0000000000000 + .xword 0x8a20000000000000, 0x8b90000000000000 + .xword 0x9900000000000000, 0x98b0000000000000 + .xword 0x9a60000000000000, 0x9bd0000000000000 + .xword 0x9fc0000000000000, 0x9e70000000000000 + .xword 0x9ca0000000000000, 0x9d10000000000000 + .xword 0x9480000000000000, 0x9530000000000000 + .xword 0x97e0000000000000, 0x9650000000000000 + .xword 0x9240000000000000, 0x93f0000000000000 + .xword 0x9120000000000000, 0x9090000000000000 diff --git a/src/spdk/isa-l/crc/aarch64/crc64_jones_norm_pmull.S b/src/spdk/isa-l/crc/aarch64/crc64_jones_norm_pmull.S new file mode 100644 index 000000000..4f298376c --- /dev/null +++ b/src/spdk/isa-l/crc/aarch64/crc64_jones_norm_pmull.S @@ -0,0 +1,33 @@ +######################################################################## +# Copyright(c) 2019 Arm Corporation All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Arm Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +######################################################################### + +#include "crc64_jones_norm_pmull.h" +#include "crc64_norm_common_pmull.h" + +crc64_norm_func crc64_jones_norm_pmull diff --git a/src/spdk/isa-l/crc/aarch64/crc64_jones_norm_pmull.h b/src/spdk/isa-l/crc/aarch64/crc64_jones_norm_pmull.h new file mode 100644 index 000000000..00538bf1c --- /dev/null +++ b/src/spdk/isa-l/crc/aarch64/crc64_jones_norm_pmull.h @@ -0,0 +1,200 @@ +######################################################################## +# Copyright(c) 2019 Arm Corporation All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Arm Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +######################################################################### + +.equ p4_low_b0, (0xd25e) +.equ p4_low_b1, 0xca43 +.equ p4_low_b2, 0x1e58 +.equ p4_low_b3, 0x4e50 +.equ p4_high_b0, 0xf643 +.equ p4_high_b1, 0x8f27 +.equ p4_high_b2, 0x6158 +.equ p4_high_b3, 0x13c9 + +.equ p1_low_b0, (0x7038) +.equ p1_low_b1, 0x5001 +.equ p1_low_b2, 0xed27 +.equ p1_low_b3, 0x4445 +.equ p1_high_b0, 0xd736 +.equ p1_high_b1, 0x7cfb +.equ p1_high_b2, 0x7415 +.equ p1_high_b3, 0x698b + +.equ p0_low_b0, (0x7038) +.equ p0_low_b1, 0x5001 +.equ p0_low_b2, 0xed27 +.equ p0_low_b3, 0x4445 +.equ p0_high_b0, 0x0000 +.equ p0_high_b1, 0x0000 +.equ p0_high_b2, 0x0000 +.equ p0_high_b3, 0x0000 + +.equ br_low_b0, (0x6cf8) +.equ br_low_b1, 0x98be +.equ br_low_b2, 0xeeb2 +.equ br_low_b3, 0xddf3 +.equ br_high_b0, 0x35a9 +.equ br_high_b1, 0x94c9 +.equ br_high_b2, 0xd235 +.equ br_high_b3, 0xad93 + + .text + .section .rodata + .align 4 + .set lanchor_crc64_tab,. + 0 + .type crc64_tab, %object + .size crc64_tab, 2048 +crc64_tab: + .xword 0x0000000000000000, 0xad93d23594c935a9 + .xword 0xf6b4765ebd5b5efb, 0x5b27a46b29926b52 + .xword 0x40fb3e88ee7f885f, 0xed68ecbd7ab6bdf6 + .xword 0xb64f48d65324d6a4, 0x1bdc9ae3c7ede30d + .xword 0x81f67d11dcff10be, 0x2c65af2448362517 + .xword 0x77420b4f61a44e45, 0xdad1d97af56d7bec + .xword 0xc10d4399328098e1, 0x6c9e91aca649ad48 + .xword 0x37b935c78fdbc61a, 0x9a2ae7f21b12f3b3 + .xword 0xae7f28162d3714d5, 0x03ecfa23b9fe217c + .xword 0x58cb5e48906c4a2e, 0xf5588c7d04a57f87 + .xword 0xee84169ec3489c8a, 0x4317c4ab5781a923 + .xword 0x183060c07e13c271, 0xb5a3b2f5eadaf7d8 + .xword 0x2f895507f1c8046b, 0x821a8732650131c2 + .xword 0xd93d23594c935a90, 0x74aef16cd85a6f39 + .xword 0x6f726b8f1fb78c34, 0xc2e1b9ba8b7eb99d + .xword 0x99c61dd1a2ecd2cf, 0x3455cfe43625e766 + .xword 0xf16d8219cea71c03, 0x5cfe502c5a6e29aa + .xword 0x07d9f44773fc42f8, 0xaa4a2672e7357751 + .xword 0xb196bc9120d8945c, 0x1c056ea4b411a1f5 + .xword 0x4722cacf9d83caa7, 0xeab118fa094aff0e + .xword 0x709bff0812580cbd, 0xdd082d3d86913914 + .xword 0x862f8956af035246, 0x2bbc5b633bca67ef + .xword 0x3060c180fc2784e2, 0x9df313b568eeb14b + .xword 0xc6d4b7de417cda19, 0x6b4765ebd5b5efb0 + .xword 0x5f12aa0fe39008d6, 0xf281783a77593d7f + .xword 0xa9a6dc515ecb562d, 0x04350e64ca026384 + .xword 0x1fe994870def8089, 0xb27a46b29926b520 + .xword 0xe95de2d9b0b4de72, 0x44ce30ec247debdb + .xword 0xdee4d71e3f6f1868, 0x7377052baba62dc1 + .xword 0x2850a14082344693, 0x85c3737516fd733a + .xword 0x9e1fe996d1109037, 0x338c3ba345d9a59e + .xword 0x68ab9fc86c4bcecc, 0xc5384dfdf882fb65 + .xword 0x4f48d60609870daf, 0xe2db04339d4e3806 + .xword 0xb9fca058b4dc5354, 0x146f726d201566fd + .xword 0x0fb3e88ee7f885f0, 0xa2203abb7331b059 + .xword 0xf9079ed05aa3db0b, 0x54944ce5ce6aeea2 + .xword 0xcebeab17d5781d11, 0x632d792241b128b8 + .xword 0x380add49682343ea, 0x95990f7cfcea7643 + .xword 0x8e45959f3b07954e, 0x23d647aaafcea0e7 + .xword 0x78f1e3c1865ccbb5, 0xd56231f41295fe1c + .xword 0xe137fe1024b0197a, 0x4ca42c25b0792cd3 + .xword 0x1783884e99eb4781, 0xba105a7b0d227228 + .xword 0xa1ccc098cacf9125, 0x0c5f12ad5e06a48c + .xword 0x5778b6c67794cfde, 0xfaeb64f3e35dfa77 + .xword 0x60c18301f84f09c4, 0xcd5251346c863c6d + .xword 0x9675f55f4514573f, 0x3be6276ad1dd6296 + .xword 0x203abd891630819b, 0x8da96fbc82f9b432 + .xword 0xd68ecbd7ab6bdf60, 0x7b1d19e23fa2eac9 + .xword 0xbe25541fc72011ac, 0x13b6862a53e92405 + .xword 0x489122417a7b4f57, 0xe502f074eeb27afe + .xword 0xfede6a97295f99f3, 0x534db8a2bd96ac5a + .xword 0x086a1cc99404c708, 0xa5f9cefc00cdf2a1 + .xword 0x3fd3290e1bdf0112, 0x9240fb3b8f1634bb + .xword 0xc9675f50a6845fe9, 0x64f48d65324d6a40 + .xword 0x7f281786f5a0894d, 0xd2bbc5b36169bce4 + .xword 0x899c61d848fbd7b6, 0x240fb3eddc32e21f + .xword 0x105a7c09ea170579, 0xbdc9ae3c7ede30d0 + .xword 0xe6ee0a57574c5b82, 0x4b7dd862c3856e2b + .xword 0x50a1428104688d26, 0xfd3290b490a1b88f + .xword 0xa61534dfb933d3dd, 0x0b86e6ea2dfae674 + .xword 0x91ac011836e815c7, 0x3c3fd32da221206e + .xword 0x671877468bb34b3c, 0xca8ba5731f7a7e95 + .xword 0xd1573f90d8979d98, 0x7cc4eda54c5ea831 + .xword 0x27e349ce65ccc363, 0x8a709bfbf105f6ca + .xword 0x9e91ac0c130e1b5e, 0x33027e3987c72ef7 + .xword 0x6825da52ae5545a5, 0xc5b608673a9c700c + .xword 0xde6a9284fd719301, 0x73f940b169b8a6a8 + .xword 0x28dee4da402acdfa, 0x854d36efd4e3f853 + .xword 0x1f67d11dcff10be0, 0xb2f403285b383e49 + .xword 0xe9d3a74372aa551b, 0x44407576e66360b2 + .xword 0x5f9cef95218e83bf, 0xf20f3da0b547b616 + .xword 0xa92899cb9cd5dd44, 0x04bb4bfe081ce8ed + .xword 0x30ee841a3e390f8b, 0x9d7d562faaf03a22 + .xword 0xc65af24483625170, 0x6bc9207117ab64d9 + .xword 0x7015ba92d04687d4, 0xdd8668a7448fb27d + .xword 0x86a1cccc6d1dd92f, 0x2b321ef9f9d4ec86 + .xword 0xb118f90be2c61f35, 0x1c8b2b3e760f2a9c + .xword 0x47ac8f555f9d41ce, 0xea3f5d60cb547467 + .xword 0xf1e3c7830cb9976a, 0x5c7015b69870a2c3 + .xword 0x0757b1ddb1e2c991, 0xaac463e8252bfc38 + .xword 0x6ffc2e15dda9075d, 0xc26ffc20496032f4 + .xword 0x9948584b60f259a6, 0x34db8a7ef43b6c0f + .xword 0x2f07109d33d68f02, 0x8294c2a8a71fbaab + .xword 0xd9b366c38e8dd1f9, 0x7420b4f61a44e450 + .xword 0xee0a5304015617e3, 0x43998131959f224a + .xword 0x18be255abc0d4918, 0xb52df76f28c47cb1 + .xword 0xaef16d8cef299fbc, 0x0362bfb97be0aa15 + .xword 0x58451bd25272c147, 0xf5d6c9e7c6bbf4ee + .xword 0xc1830603f09e1388, 0x6c10d43664572621 + .xword 0x3737705d4dc54d73, 0x9aa4a268d90c78da + .xword 0x8178388b1ee19bd7, 0x2cebeabe8a28ae7e + .xword 0x77cc4ed5a3bac52c, 0xda5f9ce03773f085 + .xword 0x40757b122c610336, 0xede6a927b8a8369f + .xword 0xb6c10d4c913a5dcd, 0x1b52df7905f36864 + .xword 0x008e459ac21e8b69, 0xad1d97af56d7bec0 + .xword 0xf63a33c47f45d592, 0x5ba9e1f1eb8ce03b + .xword 0xd1d97a0a1a8916f1, 0x7c4aa83f8e402358 + .xword 0x276d0c54a7d2480a, 0x8afede61331b7da3 + .xword 0x91224482f4f69eae, 0x3cb196b7603fab07 + .xword 0x679632dc49adc055, 0xca05e0e9dd64f5fc + .xword 0x502f071bc676064f, 0xfdbcd52e52bf33e6 + .xword 0xa69b71457b2d58b4, 0x0b08a370efe46d1d + .xword 0x10d4399328098e10, 0xbd47eba6bcc0bbb9 + .xword 0xe6604fcd9552d0eb, 0x4bf39df8019be542 + .xword 0x7fa6521c37be0224, 0xd2358029a377378d + .xword 0x891224428ae55cdf, 0x2481f6771e2c6976 + .xword 0x3f5d6c94d9c18a7b, 0x92cebea14d08bfd2 + .xword 0xc9e91aca649ad480, 0x647ac8fff053e129 + .xword 0xfe502f0deb41129a, 0x53c3fd387f882733 + .xword 0x08e45953561a4c61, 0xa5778b66c2d379c8 + .xword 0xbeab1185053e9ac5, 0x1338c3b091f7af6c + .xword 0x481f67dbb865c43e, 0xe58cb5ee2cacf197 + .xword 0x20b4f813d42e0af2, 0x8d272a2640e73f5b + .xword 0xd6008e4d69755409, 0x7b935c78fdbc61a0 + .xword 0x604fc69b3a5182ad, 0xcddc14aeae98b704 + .xword 0x96fbb0c5870adc56, 0x3b6862f013c3e9ff + .xword 0xa142850208d11a4c, 0x0cd157379c182fe5 + .xword 0x57f6f35cb58a44b7, 0xfa6521692143711e + .xword 0xe1b9bb8ae6ae9213, 0x4c2a69bf7267a7ba + .xword 0x170dcdd45bf5cce8, 0xba9e1fe1cf3cf941 + .xword 0x8ecbd005f9191e27, 0x235802306dd02b8e + .xword 0x787fa65b444240dc, 0xd5ec746ed08b7575 + .xword 0xce30ee8d17669678, 0x63a33cb883afa3d1 + .xword 0x388498d3aa3dc883, 0x95174ae63ef4fd2a + .xword 0x0f3dad1425e60e99, 0xa2ae7f21b12f3b30 + .xword 0xf989db4a98bd5062, 0x541a097f0c7465cb + .xword 0x4fc6939ccb9986c6, 0xe25541a95f50b36f + .xword 0xb972e5c276c2d83d, 0x14e137f7e20bed94 diff --git a/src/spdk/isa-l/crc/aarch64/crc64_jones_refl_pmull.S b/src/spdk/isa-l/crc/aarch64/crc64_jones_refl_pmull.S new file mode 100644 index 000000000..177092f9f --- /dev/null +++ b/src/spdk/isa-l/crc/aarch64/crc64_jones_refl_pmull.S @@ -0,0 +1,33 @@ +######################################################################## +# Copyright(c) 2019 Arm Corporation All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Arm Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +######################################################################### + +#include "crc64_jones_refl_pmull.h" +#include "crc64_refl_common_pmull.h" + +crc64_refl_func crc64_jones_refl_pmull diff --git a/src/spdk/isa-l/crc/aarch64/crc64_jones_refl_pmull.h b/src/spdk/isa-l/crc/aarch64/crc64_jones_refl_pmull.h new file mode 100644 index 000000000..a58908436 --- /dev/null +++ b/src/spdk/isa-l/crc/aarch64/crc64_jones_refl_pmull.h @@ -0,0 +1,196 @@ +######################################################################## +# Copyright(c) 2019 Arm Corporation All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Arm Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +######################################################################### + +.equ p4_low_b0, 0xb4fb +.equ p4_low_b1, 0x6d9a +.equ p4_low_b2, 0xefb1 +.equ p4_low_b3, 0xaf86 +.equ p4_high_b0, 0x14e4 +.equ p4_high_b1, 0x34f0 +.equ p4_high_b2, 0x84a6 +.equ p4_high_b3, 0xf497 + +.equ p1_low_b0, 0xa32c +.equ p1_low_b1, 0x505d +.equ p1_low_b2, 0xbe7d +.equ p1_low_b3, 0xd9d7 +.equ p1_high_b0, 0x4444 +.equ p1_high_b1, 0xc96f +.equ p1_high_b2, 0x0015 +.equ p1_high_b3, 0x381d + +.equ p0_low_b0, 0x4444 +.equ p0_low_b1, 0xc96f +.equ p0_low_b2, 0x0015 +.equ p0_low_b3, 0x381d + +.equ br_low_b0, 0x9f77 +.equ br_low_b1, 0x9aef +.equ br_low_b2, 0xfa32 +.equ br_low_b3, 0x3e6c +.equ br_high_b0, 0x936b +.equ br_high_b1, 0x5897 +.equ br_high_b2, 0x2653 +.equ br_high_b3, 0x2b59 + + .text + .section .rodata + .align 4 + .set .lanchor_crc64_tab,. + 0 + .type crc64_tab, %object + .size crc64_tab, 2048 +crc64_tab: + .xword 0x0000000000000000, 0x7ad870c830358979 + .xword 0xf5b0e190606b12f2, 0x8f689158505e9b8b + .xword 0xc038e5739841b68f, 0xbae095bba8743ff6 + .xword 0x358804e3f82aa47d, 0x4f50742bc81f2d04 + .xword 0xab28ecb46814fe75, 0xd1f09c7c5821770c + .xword 0x5e980d24087fec87, 0x24407dec384a65fe + .xword 0x6b1009c7f05548fa, 0x11c8790fc060c183 + .xword 0x9ea0e857903e5a08, 0xe478989fa00bd371 + .xword 0x7d08ff3b88be6f81, 0x07d08ff3b88be6f8 + .xword 0x88b81eabe8d57d73, 0xf2606e63d8e0f40a + .xword 0xbd301a4810ffd90e, 0xc7e86a8020ca5077 + .xword 0x4880fbd87094cbfc, 0x32588b1040a14285 + .xword 0xd620138fe0aa91f4, 0xacf86347d09f188d + .xword 0x2390f21f80c18306, 0x594882d7b0f40a7f + .xword 0x1618f6fc78eb277b, 0x6cc0863448deae02 + .xword 0xe3a8176c18803589, 0x997067a428b5bcf0 + .xword 0xfa11fe77117cdf02, 0x80c98ebf2149567b + .xword 0x0fa11fe77117cdf0, 0x75796f2f41224489 + .xword 0x3a291b04893d698d, 0x40f16bccb908e0f4 + .xword 0xcf99fa94e9567b7f, 0xb5418a5cd963f206 + .xword 0x513912c379682177, 0x2be1620b495da80e + .xword 0xa489f35319033385, 0xde51839b2936bafc + .xword 0x9101f7b0e12997f8, 0xebd98778d11c1e81 + .xword 0x64b116208142850a, 0x1e6966e8b1770c73 + .xword 0x8719014c99c2b083, 0xfdc17184a9f739fa + .xword 0x72a9e0dcf9a9a271, 0x08719014c99c2b08 + .xword 0x4721e43f0183060c, 0x3df994f731b68f75 + .xword 0xb29105af61e814fe, 0xc849756751dd9d87 + .xword 0x2c31edf8f1d64ef6, 0x56e99d30c1e3c78f + .xword 0xd9810c6891bd5c04, 0xa3597ca0a188d57d + .xword 0xec09088b6997f879, 0x96d1784359a27100 + .xword 0x19b9e91b09fcea8b, 0x636199d339c963f2 + .xword 0xdf7adabd7a6e2d6f, 0xa5a2aa754a5ba416 + .xword 0x2aca3b2d1a053f9d, 0x50124be52a30b6e4 + .xword 0x1f423fcee22f9be0, 0x659a4f06d21a1299 + .xword 0xeaf2de5e82448912, 0x902aae96b271006b + .xword 0x74523609127ad31a, 0x0e8a46c1224f5a63 + .xword 0x81e2d7997211c1e8, 0xfb3aa75142244891 + .xword 0xb46ad37a8a3b6595, 0xceb2a3b2ba0eecec + .xword 0x41da32eaea507767, 0x3b024222da65fe1e + .xword 0xa2722586f2d042ee, 0xd8aa554ec2e5cb97 + .xword 0x57c2c41692bb501c, 0x2d1ab4dea28ed965 + .xword 0x624ac0f56a91f461, 0x1892b03d5aa47d18 + .xword 0x97fa21650afae693, 0xed2251ad3acf6fea + .xword 0x095ac9329ac4bc9b, 0x7382b9faaaf135e2 + .xword 0xfcea28a2faafae69, 0x8632586aca9a2710 + .xword 0xc9622c4102850a14, 0xb3ba5c8932b0836d + .xword 0x3cd2cdd162ee18e6, 0x460abd1952db919f + .xword 0x256b24ca6b12f26d, 0x5fb354025b277b14 + .xword 0xd0dbc55a0b79e09f, 0xaa03b5923b4c69e6 + .xword 0xe553c1b9f35344e2, 0x9f8bb171c366cd9b + .xword 0x10e3202993385610, 0x6a3b50e1a30ddf69 + .xword 0x8e43c87e03060c18, 0xf49bb8b633338561 + .xword 0x7bf329ee636d1eea, 0x012b592653589793 + .xword 0x4e7b2d0d9b47ba97, 0x34a35dc5ab7233ee + .xword 0xbbcbcc9dfb2ca865, 0xc113bc55cb19211c + .xword 0x5863dbf1e3ac9dec, 0x22bbab39d3991495 + .xword 0xadd33a6183c78f1e, 0xd70b4aa9b3f20667 + .xword 0x985b3e827bed2b63, 0xe2834e4a4bd8a21a + .xword 0x6debdf121b863991, 0x1733afda2bb3b0e8 + .xword 0xf34b37458bb86399, 0x8993478dbb8deae0 + .xword 0x06fbd6d5ebd3716b, 0x7c23a61ddbe6f812 + .xword 0x3373d23613f9d516, 0x49aba2fe23cc5c6f + .xword 0xc6c333a67392c7e4, 0xbc1b436e43a74e9d + .xword 0x95ac9329ac4bc9b5, 0xef74e3e19c7e40cc + .xword 0x601c72b9cc20db47, 0x1ac40271fc15523e + .xword 0x5594765a340a7f3a, 0x2f4c0692043ff643 + .xword 0xa02497ca54616dc8, 0xdafce7026454e4b1 + .xword 0x3e847f9dc45f37c0, 0x445c0f55f46abeb9 + .xword 0xcb349e0da4342532, 0xb1eceec59401ac4b + .xword 0xfebc9aee5c1e814f, 0x8464ea266c2b0836 + .xword 0x0b0c7b7e3c7593bd, 0x71d40bb60c401ac4 + .xword 0xe8a46c1224f5a634, 0x927c1cda14c02f4d + .xword 0x1d148d82449eb4c6, 0x67ccfd4a74ab3dbf + .xword 0x289c8961bcb410bb, 0x5244f9a98c8199c2 + .xword 0xdd2c68f1dcdf0249, 0xa7f41839ecea8b30 + .xword 0x438c80a64ce15841, 0x3954f06e7cd4d138 + .xword 0xb63c61362c8a4ab3, 0xcce411fe1cbfc3ca + .xword 0x83b465d5d4a0eece, 0xf96c151de49567b7 + .xword 0x76048445b4cbfc3c, 0x0cdcf48d84fe7545 + .xword 0x6fbd6d5ebd3716b7, 0x15651d968d029fce + .xword 0x9a0d8ccedd5c0445, 0xe0d5fc06ed698d3c + .xword 0xaf85882d2576a038, 0xd55df8e515432941 + .xword 0x5a3569bd451db2ca, 0x20ed197575283bb3 + .xword 0xc49581ead523e8c2, 0xbe4df122e51661bb + .xword 0x3125607ab548fa30, 0x4bfd10b2857d7349 + .xword 0x04ad64994d625e4d, 0x7e7514517d57d734 + .xword 0xf11d85092d094cbf, 0x8bc5f5c11d3cc5c6 + .xword 0x12b5926535897936, 0x686de2ad05bcf04f + .xword 0xe70573f555e26bc4, 0x9ddd033d65d7e2bd + .xword 0xd28d7716adc8cfb9, 0xa85507de9dfd46c0 + .xword 0x273d9686cda3dd4b, 0x5de5e64efd965432 + .xword 0xb99d7ed15d9d8743, 0xc3450e196da80e3a + .xword 0x4c2d9f413df695b1, 0x36f5ef890dc31cc8 + .xword 0x79a59ba2c5dc31cc, 0x037deb6af5e9b8b5 + .xword 0x8c157a32a5b7233e, 0xf6cd0afa9582aa47 + .xword 0x4ad64994d625e4da, 0x300e395ce6106da3 + .xword 0xbf66a804b64ef628, 0xc5bed8cc867b7f51 + .xword 0x8aeeace74e645255, 0xf036dc2f7e51db2c + .xword 0x7f5e4d772e0f40a7, 0x05863dbf1e3ac9de + .xword 0xe1fea520be311aaf, 0x9b26d5e88e0493d6 + .xword 0x144e44b0de5a085d, 0x6e963478ee6f8124 + .xword 0x21c640532670ac20, 0x5b1e309b16452559 + .xword 0xd476a1c3461bbed2, 0xaeaed10b762e37ab + .xword 0x37deb6af5e9b8b5b, 0x4d06c6676eae0222 + .xword 0xc26e573f3ef099a9, 0xb8b627f70ec510d0 + .xword 0xf7e653dcc6da3dd4, 0x8d3e2314f6efb4ad + .xword 0x0256b24ca6b12f26, 0x788ec2849684a65f + .xword 0x9cf65a1b368f752e, 0xe62e2ad306bafc57 + .xword 0x6946bb8b56e467dc, 0x139ecb4366d1eea5 + .xword 0x5ccebf68aecec3a1, 0x2616cfa09efb4ad8 + .xword 0xa97e5ef8cea5d153, 0xd3a62e30fe90582a + .xword 0xb0c7b7e3c7593bd8, 0xca1fc72bf76cb2a1 + .xword 0x45775673a732292a, 0x3faf26bb9707a053 + .xword 0x70ff52905f188d57, 0x0a2722586f2d042e + .xword 0x854fb3003f739fa5, 0xff97c3c80f4616dc + .xword 0x1bef5b57af4dc5ad, 0x61372b9f9f784cd4 + .xword 0xee5fbac7cf26d75f, 0x9487ca0fff135e26 + .xword 0xdbd7be24370c7322, 0xa10fceec0739fa5b + .xword 0x2e675fb4576761d0, 0x54bf2f7c6752e8a9 + .xword 0xcdcf48d84fe75459, 0xb71738107fd2dd20 + .xword 0x387fa9482f8c46ab, 0x42a7d9801fb9cfd2 + .xword 0x0df7adabd7a6e2d6, 0x772fdd63e7936baf + .xword 0xf8474c3bb7cdf024, 0x829f3cf387f8795d + .xword 0x66e7a46c27f3aa2c, 0x1c3fd4a417c62355 + .xword 0x935745fc4798b8de, 0xe98f353477ad31a7 + .xword 0xa6df411fbfb21ca3, 0xdc0731d78f8795da + .xword 0x536fa08fdfd90e51, 0x29b7d047efec8728 diff --git a/src/spdk/isa-l/crc/aarch64/crc64_norm_common_pmull.h b/src/spdk/isa-l/crc/aarch64/crc64_norm_common_pmull.h new file mode 100644 index 000000000..1ad8ea94a --- /dev/null +++ b/src/spdk/isa-l/crc/aarch64/crc64_norm_common_pmull.h @@ -0,0 +1,310 @@ +######################################################################## +# Copyright(c) 2019 Arm Corporation All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Arm Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +######################################################################### + +.macro crc64_norm_func name:req + .arch armv8-a+crc+crypto + .text + .align 3 + .global \name + .type \name, %function + +/* uint64_t crc64_norm_func(uint64_t seed, const uint8_t * buf, uint64_t len) */ + +// parameter +x_seed .req x0 +x_buf .req x1 +x_len .req x2 + +// return +x_crc_ret .req x0 + +// constant +.equ FOLD_SIZE, 1024 + +// global variables +x_buf_end .req x3 // buffer address with truncated +x_counter .req x4 +x_buf_iter .req x5 +x_crc64_tab_addr .req x9 + +w_tmp .req w6 +x_tmp .req x6 +w_tmp1 .req w7 +x_tmp1 .req x7 +\name\(): + mvn x_seed, x_seed + mov x_counter, 0 + cmp x_len, (FOLD_SIZE-1) + bhi .crc64_clmul_pre + +.crc64_tab_pre: + cmp x_len, x_counter + bls .done + + adrp x_tmp, lanchor_crc64_tab + add x_buf_iter, x_buf, x_counter + add x_buf, x_buf, x_len + add x_crc64_tab_addr, x_tmp, :lo12:lanchor_crc64_tab + + .align 3 +.loop_crc64_tab: + ldrb w_tmp, [x_buf_iter], 1 + cmp x_buf, x_buf_iter + eor x_tmp, x_tmp, x_seed, lsr 56 + ldr x_tmp, [x_crc64_tab_addr, x_tmp, lsl 3] + eor x_seed, x_tmp, x_seed, lsl 8 + bne .loop_crc64_tab + +.done: + mvn x_crc_ret, x_seed + ret + +d_tmp .req d3 +q_tmp .req q3 +v_tmp .req v3 + +q_x0 .req q2 +q_x1 .req q16 +q_x2 .req q6 +q_x3 .req q4 + +v_x0 .req v2 +v_x1 .req v16 +v_x2 .req v6 +v_x3 .req v4 + +d_p4_low .req d3 +d_p4_high .req d5 +v_p4_low .req v3 +v_p4_high .req v5 + +q_shuffle .req q1 +v_shuffle .req v1 + .align 2 +.crc64_clmul_pre: + adrp x_tmp, .shuffle_data + ldr q_shuffle, [x_tmp, #:lo12:.shuffle_data] + + and x_counter, x_len, -64 + sub x_tmp1, x_counter, #64 + cmp x_tmp1, 63 // align and the truncated buffer size + + movi v_x0.4s, 0 + ins v_x0.d[1], x_seed + + add x_buf_iter, x_buf, 64 + + ldr q_tmp, [x_buf] + ldr q_x1, [x_buf, 16] + ldr q_x2, [x_buf, 32] + ldr q_x3, [x_buf, 48] + + tbl v_tmp.16b, {v_tmp.16b}, v_shuffle.16b + tbl v_x1.16b, {v_x1.16b}, v_shuffle.16b + tbl v_x2.16b, {v_x2.16b}, v_shuffle.16b + tbl v_x3.16b, {v_x3.16b}, v_shuffle.16b + + eor v_x0.16b, v_x0.16b, v_tmp.16b + bls .clmul_loop_end + + mov x_tmp, p4_high_b0 + movk x_tmp, p4_high_b1, lsl 16 + movk x_tmp, p4_high_b2, lsl 32 + movk x_tmp, p4_high_b3, lsl 48 + fmov d_p4_high, x_tmp + + mov x_tmp, p4_low_b0 + movk x_tmp, p4_low_b1, lsl 16 + movk x_tmp, p4_low_b2, lsl 32 + movk x_tmp, p4_low_b3, lsl 48 + fmov d_p4_low, x_tmp + + add x_buf_end, x_buf_iter, x_tmp1 + +// 1024bit --> 512bit loop +// merge x0, x1, x2, x3, y0, y1, y2, y3 => x0, x1, x2, x3 (uint64x2_t) +d_x0_high .req d24 +d_x1_high .req d22 +d_x2_high .req d20 +d_x3_high .req d18 + +v_x0_high .req v24 +v_x1_high .req v22 +v_x2_high .req v20 +v_x3_high .req v18 + +q_y0 .req q17 +q_y1 .req q19 +q_y2 .req q7 +q_y3 .req q0 + +v_y0 .req v17 +v_y1 .req v19 +v_y2 .req v7 +v_y3 .req v0 + .align 3 +.clmul_loop: + dup d_x0_high, v_x0.d[1] + dup d_x1_high, v_x1.d[1] + dup d_x2_high, v_x2.d[1] + dup d_x3_high, v_x3.d[1] + + add x_buf_iter, x_buf_iter, 64 + cmp x_buf_iter, x_buf_end + + ldr q_y0, [x_buf_iter, -64] + ldr q_y1, [x_buf_iter, -48] + ldr q_y2, [x_buf_iter, -32] + ldr q_y3, [x_buf_iter, -16] + + pmull v_x0.1q, v_x0.1d, v_p4_low.1d + pmull v_x1.1q, v_x1.1d, v_p4_low.1d + pmull v_x2.1q, v_x2.1d, v_p4_low.1d + pmull v_x3.1q, v_x3.1d, v_p4_low.1d + + pmull v_x0_high.1q, v_x0_high.1d, v_p4_high.1d + pmull v_x1_high.1q, v_x1_high.1d, v_p4_high.1d + pmull v_x2_high.1q, v_x2_high.1d, v_p4_high.1d + pmull v_x3_high.1q, v_x3_high.1d, v_p4_high.1d + + tbl v_y0.16b, {v_y0.16b}, v_shuffle.16b + tbl v_y1.16b, {v_y1.16b}, v_shuffle.16b + tbl v_y2.16b, {v_y2.16b}, v_shuffle.16b + tbl v_y3.16b, {v_y3.16b}, v_shuffle.16b + + eor v_x0_high.16b, v_x0_high.16b, v_x0.16b + eor v_x1_high.16b, v_x1_high.16b, v_x1.16b + eor v_x2_high.16b, v_x2_high.16b, v_x2.16b + eor v_x3_high.16b, v_x3_high.16b, v_x3.16b + + eor v_x0.16b, v_x0_high.16b, v_y0.16b + eor v_x1.16b, v_x1_high.16b, v_y1.16b + eor v_x2.16b, v_x2_high.16b, v_y2.16b + eor v_x3.16b, v_x3_high.16b, v_y3.16b + bne .clmul_loop + +// folding 512bit --> 128bit +// merge x0, x1, x2, x3 => x0 (uint64x2_t) +d_tmp1 .req d18 +v_tmp1 .req v18 + +d_p1_high .req d5 +v_p1_high .req v5 + +d_p1_low .req d3 +v_p1_low .req v3 +.clmul_loop_end: + mov x_tmp, p1_high_b0 + movk x_tmp, p1_high_b1, lsl 16 + movk x_tmp, p1_high_b2, lsl 32 + movk x_tmp, p1_high_b3, lsl 48 + + fmov d_p1_high, x_tmp + + mov x_tmp, p1_low_b0 + movk x_tmp, p1_low_b1, lsl 16 + movk x_tmp, p1_low_b2, lsl 32 + movk x_tmp, p1_low_b3, lsl 48 + + fmov d_p1_low, x_tmp + + dup d_tmp1, v_x0.d[1] + pmull v_x0.1q, v_x0.1d, v_p1_low.1d + pmull v_tmp1.1q, v_tmp1.1d, v_p1_high.1d + eor v_x0.16b, v_tmp1.16b, v_x0.16b + eor v_x1.16b, v_x0.16b, v_x1.16b + + dup d_tmp1, v_x1.d[1] + pmull v_x1.1q, v_x1.1d, v_p1_low.1d + pmull v_tmp1.1q, v_tmp1.1d, v_p1_high.1d + eor v_tmp1.16b, v_tmp1.16b, v_x1.16b + eor v_x2.16b, v_tmp1.16b, v_x2.16b + + dup d_tmp1, v_x2.d[1] + pmull v_x2.1q, v_x2.1d, v_p1_low.1d + pmull v_tmp1.1q, v_tmp1.1d, v_p1_high.1d + eor v_x2.16b, v_tmp1.16b, v_x2.16b + eor v_x3.16b, v_x2.16b, v_x3.16b + +// fold 64b +d_p0_low .req d3 +v_p0_low .req v3 + +d_x3_high1 .req d2 +v_x3_high1 .req v2 + mov x_tmp, p0_low_b0 + movk x_tmp, p0_low_b1, lsl 16 + movk x_tmp, p0_low_b2, lsl 32 + movk x_tmp, p0_low_b3, lsl 48 + fmov d_p0_low, x_tmp + + dup d_x3_high1, v_x3.d[1] + movi v0.4s, 0 + ext v0.16b, v0.16b, v_x3.16b, #8 + pmull v_x3_high1.1q, v_x3_high1.1d, v_p0_low.1d + eor v0.16b, v0.16b, v_x3_high1.16b + +// barrett reduction + mov x_tmp, br_low_b0 + movk x_tmp, br_low_b1, lsl 16 + movk x_tmp, br_low_b2, lsl 32 + movk x_tmp, br_low_b3, lsl 48 + + movi v1.4s, 0x0 + mov x_tmp1, -1 + ins v1.d[1], x_tmp1 + and v2.16b, v1.16b, v0.16b + + fmov d1, x_tmp + + dup d4, v0.d[1] + pmull v4.1q, v4.1d, v1.1d + + mov x_tmp, br_high_b0 + movk x_tmp, br_high_b1, lsl 16 + movk x_tmp, br_high_b2, lsl 32 + movk x_tmp, br_high_b3, lsl 48 + fmov d1, x_tmp + + eor v2.16b, v2.16b, v4.16b + dup d2, v2.d[1] + pmull v2.1q, v2.1d, v1.1d + eor v0.16b, v0.16b, v2.16b + umov x_seed, v0.d[0] + + b .crc64_tab_pre + + .size \name, .-\name + + .section .rodata.cst16,"aM",@progbits,16 + .align 4 +.shuffle_data: + .byte 15, 14, 13, 12, 11, 10, 9, 8 + .byte 7, 6, 5, 4, 3, 2, 1, 0 +.endm diff --git a/src/spdk/isa-l/crc/aarch64/crc64_refl_common_pmull.h b/src/spdk/isa-l/crc/aarch64/crc64_refl_common_pmull.h new file mode 100644 index 000000000..e852dea4a --- /dev/null +++ b/src/spdk/isa-l/crc/aarch64/crc64_refl_common_pmull.h @@ -0,0 +1,302 @@ +######################################################################## +# Copyright(c) 2019 Arm Corporation All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Arm Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +######################################################################### + +.macro crc64_refl_func name:req + .arch armv8-a+crc+crypto + .text + .align 3 + .global \name + .type \name, %function + +// parameter +x_seed .req x0 +x_buf .req x1 +x_len .req x2 + +// return +x_crc_ret .req x0 + +// constant +.equ FOLD_SIZE, 1024 + +// global variable +x_buf_end .req x3 +x_counter .req x4 +x_buf_iter .req x5 +x_crc64_tab_addr .req x6 +w_tmp .req w7 +x_tmp .req x7 + +// crc64 refl function entry +\name\(): +// crc64 for table + mvn x_seed, x_seed + mov x_counter, 0 + cmp x_len, (FOLD_SIZE-1) + bhi .crc64_clmul_pre +.crc64_tab_pre: + cmp x_len, x_counter + bls .done + + adrp x_tmp, .lanchor_crc64_tab + add x_buf_iter, x_buf, x_counter + add x_buf, x_buf, x_len + add x_crc64_tab_addr, x_tmp, :lo12:.lanchor_crc64_tab + + .align 3 +.loop_crc64_tab: + ldrb w_tmp, [x_buf_iter], 1 + eor w_tmp, w_tmp, w0 + cmp x_buf, x_buf_iter + and x_tmp, x_tmp, 255 + ldr x_tmp, [x_crc64_tab_addr, x_tmp, lsl 3] + eor x_seed, x_tmp, x_seed, lsr 8 + bne .loop_crc64_tab +.done: + mvn x_crc_ret, x_crc_ret + ret + +// clmul prepare +q_x0 .req q0 +q_x1 .req q4 +q_x2 .req q6 +q_x3 .req q1 + +v_x0 .req v0 +v_x1 .req v4 +v_x2 .req v6 +v_x3 .req v1 + +d_p4_high .req d17 +d_p4_low .req d7 +v_p4_high .req v17 +v_p4_low .req v7 + +d_y0_tmp .req d0 +v_y0_tmp .req v0 + +q_tmp .req q2 +v_tmp .req v2 + + .align 2 +.crc64_clmul_pre: + ldr q_tmp, [x_buf] + ldr q_x1, [x_buf, 16] + ldr q_x2, [x_buf, 32] + ldr q_x3, [x_buf, 48] + + and x_counter, x_len, -64 + sub x_tmp, x_counter, #64 + cmp x_tmp, 63 + + fmov d_y0_tmp, x_seed // save crc to d0 + eor v_x0.16b, v_y0_tmp.16b, v_tmp.16b + + add x_buf_iter, x_buf, 64 + bls .clmul_loop_end + + add x_buf_end, x_buf_iter, x_tmp + + mov x_tmp, p4_high_b0 + movk x_tmp, p4_high_b1, lsl 16 + movk x_tmp, p4_high_b2, lsl 32 + movk x_tmp, p4_high_b3, lsl 48 + fmov d_p4_high, x_tmp + + mov x_tmp, p4_low_b0 + movk x_tmp, p4_low_b1, lsl 16 + movk x_tmp, p4_low_b2, lsl 32 + movk x_tmp, p4_low_b3, lsl 48 + fmov d_p4_low, x_tmp + +// 1024bit --> 512bit loop +// merge x0, x1, x2, x3, y0, y1, y2, y3 => x0, x1, x2, x3 (uint64x2_t) +d_x0_high .req d24 +d_x1_high .req d22 +d_x2_high .req d20 +d_x3_high .req d16 + +v_x0_high .req v24 +v_x1_high .req v22 +v_x2_high .req v20 +v_x3_high .req v16 + +q_x0_tmp .req q2 +q_x1_tmp .req q5 +q_x2_tmp .req q3 +q_x3_tmp .req q18 + +v_x0_tmp .req v2 +v_x1_tmp .req v5 +v_x2_tmp .req v3 +v_x3_tmp .req v18 + +q_x0_tmp .req q2 +q_x1_tmp .req q5 +q_x2_tmp .req q3 +q_x3_tmp .req q18 + + .align 3 +.clmul_loop: + add x_buf_iter, x_buf_iter, 64 + cmp x_buf_iter, x_buf_end + + dup d_x0_high, v_x0.d[1] + dup d_x1_high, v_x1.d[1] + dup d_x2_high, v_x2.d[1] + dup d_x3_high, v_x3.d[1] + + pmull v_x0_high.1q, v_x0_high.1d, v_p4_high.1d + pmull v_x1_high.1q, v_x1_high.1d, v_p4_high.1d + pmull v_x2_high.1q, v_x2_high.1d, v_p4_high.1d + pmull v_x3_high.1q, v_x3_high.1d, v_p4_high.1d + + pmull v_x0.1q, v_x0.1d, v_p4_low.1d + pmull v_x1.1q, v_x1.1d, v_p4_low.1d + pmull v_x2.1q, v_x2.1d, v_p4_low.1d + pmull v_x3.1q, v_x3.1d, v_p4_low.1d + + ldr q_x0_tmp, [x_buf_iter, -64] + ldr q_x1_tmp, [x_buf_iter, -48] + ldr q_x2_tmp, [x_buf_iter, -32] + ldr q_x3_tmp, [x_buf_iter, -16] + + eor v_x0_tmp.16b, v_x0_tmp.16b, v_x0_high.16b + eor v_x1_tmp.16b, v_x1_tmp.16b, v_x1_high.16b + eor v_x2_tmp.16b, v_x2_tmp.16b, v_x2_high.16b + eor v_x3_tmp.16b, v_x3_tmp.16b, v_x3_high.16b + + eor v_x0.16b, v_x0_tmp.16b, v_x0.16b + eor v_x1.16b, v_x1_tmp.16b, v_x1.16b + eor v_x2.16b, v_x2_tmp.16b, v_x2.16b + eor v_x3.16b, v_x3_tmp.16b, v_x3.16b + bne .clmul_loop + +// folding 512bit --> 128bit +// merge x0, x1, x2, x3 => x3 (uint64x2_t) +// input: x0 -> v_x0, x1 -> v_x1, x2 -> v_x2, x3 -> v_x3 +// output: v_x3 +d_p1_high .req d5 +d_p1_low .req d3 +v_p1_high .req v5 +v_p1_low .req v3 + +d_tmp_high .req d16 +d_tmp_low .req d2 +v_tmp_high .req v16 +v_tmp_low .req v2 + +.clmul_loop_end: + mov x_tmp, p1_high_b0 + movk x_tmp, p1_high_b1, lsl 16 + movk x_tmp, p1_high_b2, lsl 32 + movk x_tmp, p1_high_b3, lsl 48 + fmov d_p1_high, x_tmp + + mov x_tmp, p1_low_b0 + movk x_tmp, p1_low_b1, lsl 16 + movk x_tmp, p1_low_b2, lsl 32 + movk x_tmp, p1_low_b3, lsl 48 + fmov d_p1_low, x_tmp + + dup d_tmp_high, v_x0.d[1] + dup d_tmp_low, v_x0.d[0] + + pmull v_tmp_high.1q, v_tmp_high.1d, v_p1_high.1d + pmull v_tmp_low.1q, v_tmp_low.1d, v_p1_low.1d + eor v_tmp_high.16b, v_tmp_high.16b, v_tmp_low.16b + eor v_x1.16b, v_tmp_high.16b, v_x1.16b + + dup d_tmp_high, v_x1.d[1] + pmull v_x1.1q, v_x1.1d, v_p1_low.1d + pmull v_tmp_high.1q, v_tmp_high.1d, v_p1_high.1d + eor v_tmp_high.16b, v_tmp_high.16b, v_x1.16b + eor v_x2.16b, v_tmp_high.16b, v_x2.16b + + dup d_tmp_high, v_x2.d[1] + pmull v_x2.1q, v_x2.1d, v_p1_low.1d + pmull v_tmp_high.1q, v_tmp_high.1d, v_p1_high.1d + eor v_tmp_high.16b, v_tmp_high.16b, v_x2.16b + eor v_x3.16b, v_tmp_high.16b, v_x3.16b + +// fold 64b +// input: v_x3 +// output: v_x3 +d_p0_low .req d3 +v_p0_low .req v3 +d_x3_low_fold_64b .req d2 +v_x3_low_fold_64b .req v2 +v_zero_fold_64b .req v0 + mov x_tmp, p0_low_b0 + movk x_tmp, p0_low_b1, lsl 16 + movk x_tmp, p0_low_b2, lsl 32 + movk x_tmp, p0_low_b3, lsl 48 + fmov d_p0_low, x_tmp + + dup d_x3_low_fold_64b, v_x3.d[0] + movi v_zero_fold_64b.4s, 0 + ext v_x3.16b, v_x3.16b, v0.16b, #8 + + pmull v_x3_low_fold_64b.1q, v_x3_low_fold_64b.1d, v_p0_low.1d + eor v_x3.16b, v_x3.16b, v_x3_low_fold_64b.16b + +// barrett reduction +// input: v_x3 +// output: x0 +d_br_low .req d3 +d_br_high .req d5 +v_br_low .req v3 +v_br_high .req v5 + mov x0, br_low_b0 + movk x0, br_low_b1, lsl 16 + movk x0, br_low_b2, lsl 32 + movk x0, br_low_b3, lsl 48 + fmov d_br_low, x0 + + mov x0, br_high_b0 + movk x0, br_high_b1, lsl 16 + movk x0, br_high_b2, lsl 32 + movk x0, br_high_b3, lsl 48 + fmov d_br_high, x0 + + dup d2, v_x3.d[0] + + pmull v2.1q, v2.1d, v_br_low.1d + pmull v4.1q, v2.1d, v_br_high.1d + + ext v0.16b, v0.16b, v2.16b, #8 + + eor v0.16b, v0.16b, v4.16b + eor v0.16b, v0.16b, v_x3.16b + umov x0, v0.d[1] + + b .crc64_tab_pre + + .size \name, .-\name +.endm diff --git a/src/spdk/isa-l/crc/aarch64/crc_aarch64_dispatcher.c b/src/spdk/isa-l/crc/aarch64/crc_aarch64_dispatcher.c new file mode 100644 index 000000000..bac9eeba3 --- /dev/null +++ b/src/spdk/isa-l/crc/aarch64/crc_aarch64_dispatcher.c @@ -0,0 +1,145 @@ +/********************************************************************** + Copyright(c) 2019 Arm Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Arm Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ +#include + +DEFINE_INTERFACE_DISPATCHER(crc16_t10dif) +{ + unsigned long auxval = getauxval(AT_HWCAP); + if (auxval & HWCAP_PMULL) + return PROVIDER_INFO(crc16_t10dif_pmull); + + return PROVIDER_BASIC(crc16_t10dif); + +} + +DEFINE_INTERFACE_DISPATCHER(crc16_t10dif_copy) +{ + unsigned long auxval = getauxval(AT_HWCAP); + if (auxval & HWCAP_PMULL) + return PROVIDER_INFO(crc16_t10dif_copy_pmull); + + return PROVIDER_BASIC(crc16_t10dif_copy); + +} + +DEFINE_INTERFACE_DISPATCHER(crc32_ieee) +{ + unsigned long auxval = getauxval(AT_HWCAP); + if (auxval & HWCAP_PMULL) { + return PROVIDER_INFO(crc32_ieee_norm_pmull); + } + + return PROVIDER_BASIC(crc32_ieee); + +} + +DEFINE_INTERFACE_DISPATCHER(crc32_iscsi) +{ + unsigned long auxval = getauxval(AT_HWCAP); + if (auxval & HWCAP_CRC32) + return PROVIDER_INFO(crc32_iscsi_refl_hw_fold); + if (auxval & HWCAP_PMULL) { + return PROVIDER_INFO(crc32_iscsi_refl_pmull); + } + return PROVIDER_BASIC(crc32_iscsi); + +} + +DEFINE_INTERFACE_DISPATCHER(crc32_gzip_refl) +{ + unsigned long auxval = getauxval(AT_HWCAP); + if (auxval & HWCAP_CRC32) + return PROVIDER_INFO(crc32_gzip_refl_hw_fold); + if (auxval & HWCAP_PMULL) + return PROVIDER_INFO(crc32_gzip_refl_pmull); + + return PROVIDER_BASIC(crc32_gzip_refl); + +} + +DEFINE_INTERFACE_DISPATCHER(crc64_ecma_refl) +{ + unsigned long auxval = getauxval(AT_HWCAP); + + if (auxval & HWCAP_PMULL) + return PROVIDER_INFO(crc64_ecma_refl_pmull); + + return PROVIDER_BASIC(crc64_ecma_refl); + +} + +DEFINE_INTERFACE_DISPATCHER(crc64_ecma_norm) +{ + unsigned long auxval = getauxval(AT_HWCAP); + if (auxval & HWCAP_PMULL) + return PROVIDER_INFO(crc64_ecma_norm_pmull); + + return PROVIDER_BASIC(crc64_ecma_norm); + +} + +DEFINE_INTERFACE_DISPATCHER(crc64_iso_refl) +{ + unsigned long auxval = getauxval(AT_HWCAP); + if (auxval & HWCAP_PMULL) + return PROVIDER_INFO(crc64_iso_refl_pmull); + + return PROVIDER_BASIC(crc64_iso_refl); + +} + +DEFINE_INTERFACE_DISPATCHER(crc64_iso_norm) +{ + unsigned long auxval = getauxval(AT_HWCAP); + if (auxval & HWCAP_PMULL) + return PROVIDER_INFO(crc64_iso_norm_pmull); + + return PROVIDER_BASIC(crc64_iso_norm); + +} + +DEFINE_INTERFACE_DISPATCHER(crc64_jones_refl) +{ + unsigned long auxval = getauxval(AT_HWCAP); + if (auxval & HWCAP_PMULL) + return PROVIDER_INFO(crc64_jones_refl_pmull); + + return PROVIDER_BASIC(crc64_jones_refl); + +} + +DEFINE_INTERFACE_DISPATCHER(crc64_jones_norm) +{ + unsigned long auxval = getauxval(AT_HWCAP); + if (auxval & HWCAP_PMULL) + return PROVIDER_INFO(crc64_jones_norm_pmull); + + return PROVIDER_BASIC(crc64_jones_norm); + +} diff --git a/src/spdk/isa-l/crc/aarch64/crc_multibinary_arm.S b/src/spdk/isa-l/crc/aarch64/crc_multibinary_arm.S new file mode 100644 index 000000000..76f957164 --- /dev/null +++ b/src/spdk/isa-l/crc/aarch64/crc_multibinary_arm.S @@ -0,0 +1,42 @@ +######################################################################## +# Copyright(c) 2019 Arm Corporation All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Arm Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +######################################################################### +#include + + +mbin_interface crc32_iscsi +mbin_interface crc16_t10dif +mbin_interface crc16_t10dif_copy +mbin_interface crc32_ieee +mbin_interface crc32_gzip_refl +mbin_interface crc64_ecma_refl +mbin_interface crc64_ecma_norm +mbin_interface crc64_iso_refl +mbin_interface crc64_iso_norm +mbin_interface crc64_jones_refl +mbin_interface crc64_jones_norm diff --git a/src/spdk/isa-l/crc/crc16_t10dif_01.asm b/src/spdk/isa-l/crc/crc16_t10dif_01.asm new file mode 100644 index 000000000..f79cd3f16 --- /dev/null +++ b/src/spdk/isa-l/crc/crc16_t10dif_01.asm @@ -0,0 +1,665 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2015 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +; Function API: +; UINT16 crc16_t10dif_01( +; UINT16 init_crc, //initial CRC value, 16 bits +; const unsigned char *buf, //buffer pointer to calculate CRC on +; UINT64 len //buffer length in bytes (64-bit data) +; ); +; +; Authors: +; Erdinc Ozturk +; Vinodh Gopal +; James Guilford +; +; Reference paper titled "Fast CRC Computation for Generic Polynomials Using PCLMULQDQ Instruction" +; URL: http://www.intel.com/content/dam/www/public/us/en/documents/white-papers/fast-crc-computation-generic-polynomials-pclmulqdq-paper.pdf + +%include "reg_sizes.asm" + +%define fetch_dist 1024 + +[bits 64] +default rel + +section .text + +%ifidn __OUTPUT_FORMAT__, win64 + %xdefine arg1 rcx + %xdefine arg2 rdx + %xdefine arg3 r8 + + %xdefine arg1_low32 ecx +%else + %xdefine arg1 rdi + %xdefine arg2 rsi + %xdefine arg3 rdx + + %xdefine arg1_low32 edi +%endif + +%ifidn __OUTPUT_FORMAT__, win64 + %define XMM_SAVE 16*2 + %define VARIABLE_OFFSET 16*10+8 +%else + %define VARIABLE_OFFSET 16*2+8 +%endif + +align 16 +global crc16_t10dif_01:ISAL_SYM_TYPE_FUNCTION +crc16_t10dif_01: + + ; adjust the 16-bit initial_crc value, scale it to 32 bits + shl arg1_low32, 16 + + ; After this point, code flow is exactly same as a 32-bit CRC. + ; The only difference is before returning eax, we will shift it right 16 bits, to scale back to 16 bits. + + sub rsp, VARIABLE_OFFSET +%ifidn __OUTPUT_FORMAT__, win64 + ; push the xmm registers into the stack to maintain + movdqa [rsp+16*2],xmm6 + movdqa [rsp+16*3],xmm7 + movdqa [rsp+16*4],xmm8 + movdqa [rsp+16*5],xmm9 + movdqa [rsp+16*6],xmm10 + movdqa [rsp+16*7],xmm11 + movdqa [rsp+16*8],xmm12 + movdqa [rsp+16*9],xmm13 +%endif + + ; check if smaller than 256 + cmp arg3, 256 + + ; for sizes less than 256, we can't fold 128B at a time... + jl _less_than_256 + + + ; load the initial crc value + movd xmm10, arg1_low32 ; initial crc + + ; crc value does not need to be byte-reflected, but it needs to be moved to the high part of the register. + ; because data will be byte-reflected and will align with initial crc at correct place. + pslldq xmm10, 12 + + movdqa xmm11, [SHUF_MASK] + ; receive the initial 128B data, xor the initial crc value + movdqu xmm0, [arg2+16*0] + movdqu xmm1, [arg2+16*1] + movdqu xmm2, [arg2+16*2] + movdqu xmm3, [arg2+16*3] + movdqu xmm4, [arg2+16*4] + movdqu xmm5, [arg2+16*5] + movdqu xmm6, [arg2+16*6] + movdqu xmm7, [arg2+16*7] + + pshufb xmm0, xmm11 + ; XOR the initial_crc value + pxor xmm0, xmm10 + pshufb xmm1, xmm11 + pshufb xmm2, xmm11 + pshufb xmm3, xmm11 + pshufb xmm4, xmm11 + pshufb xmm5, xmm11 + pshufb xmm6, xmm11 + pshufb xmm7, xmm11 + + movdqa xmm10, [rk3] ;xmm10 has rk3 and rk4 + ;imm value of pclmulqdq instruction will determine which constant to use + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; we subtract 256 instead of 128 to save one instruction from the loop + sub arg3, 256 + + ; at this section of the code, there is 128*x+y (0<=y<128) bytes of buffer. The _fold_128_B_loop + ; loop will fold 128B at a time until we have 128+y Bytes of buffer + + + ; fold 128B at a time. This section of the code folds 8 xmm registers in parallel +_fold_128_B_loop: + + ; update the buffer pointer + add arg2, 128 ; buf += 128; + + prefetchnta [arg2+fetch_dist+0] + movdqu xmm9, [arg2+16*0] + movdqu xmm12, [arg2+16*1] + pshufb xmm9, xmm11 + pshufb xmm12, xmm11 + movdqa xmm8, xmm0 + movdqa xmm13, xmm1 + pclmulqdq xmm0, xmm10, 0x0 + pclmulqdq xmm8, xmm10 , 0x11 + pclmulqdq xmm1, xmm10, 0x0 + pclmulqdq xmm13, xmm10 , 0x11 + pxor xmm0, xmm9 + xorps xmm0, xmm8 + pxor xmm1, xmm12 + xorps xmm1, xmm13 + + prefetchnta [arg2+fetch_dist+32] + movdqu xmm9, [arg2+16*2] + movdqu xmm12, [arg2+16*3] + pshufb xmm9, xmm11 + pshufb xmm12, xmm11 + movdqa xmm8, xmm2 + movdqa xmm13, xmm3 + pclmulqdq xmm2, xmm10, 0x0 + pclmulqdq xmm8, xmm10 , 0x11 + pclmulqdq xmm3, xmm10, 0x0 + pclmulqdq xmm13, xmm10 , 0x11 + pxor xmm2, xmm9 + xorps xmm2, xmm8 + pxor xmm3, xmm12 + xorps xmm3, xmm13 + + prefetchnta [arg2+fetch_dist+64] + movdqu xmm9, [arg2+16*4] + movdqu xmm12, [arg2+16*5] + pshufb xmm9, xmm11 + pshufb xmm12, xmm11 + movdqa xmm8, xmm4 + movdqa xmm13, xmm5 + pclmulqdq xmm4, xmm10, 0x0 + pclmulqdq xmm8, xmm10 , 0x11 + pclmulqdq xmm5, xmm10, 0x0 + pclmulqdq xmm13, xmm10 , 0x11 + pxor xmm4, xmm9 + xorps xmm4, xmm8 + pxor xmm5, xmm12 + xorps xmm5, xmm13 + + prefetchnta [arg2+fetch_dist+96] + movdqu xmm9, [arg2+16*6] + movdqu xmm12, [arg2+16*7] + pshufb xmm9, xmm11 + pshufb xmm12, xmm11 + movdqa xmm8, xmm6 + movdqa xmm13, xmm7 + pclmulqdq xmm6, xmm10, 0x0 + pclmulqdq xmm8, xmm10 , 0x11 + pclmulqdq xmm7, xmm10, 0x0 + pclmulqdq xmm13, xmm10 , 0x11 + pxor xmm6, xmm9 + xorps xmm6, xmm8 + pxor xmm7, xmm12 + xorps xmm7, xmm13 + + sub arg3, 128 + + ; check if there is another 128B in the buffer to be able to fold + jge _fold_128_B_loop + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + + add arg2, 128 + ; at this point, the buffer pointer is pointing at the last y Bytes of the buffer + ; fold the 8 xmm registers to 1 xmm register with different constants + + movdqa xmm10, [rk9] + movdqa xmm8, xmm0 + pclmulqdq xmm0, xmm10, 0x11 + pclmulqdq xmm8, xmm10, 0x0 + pxor xmm7, xmm8 + xorps xmm7, xmm0 + + movdqa xmm10, [rk11] + movdqa xmm8, xmm1 + pclmulqdq xmm1, xmm10, 0x11 + pclmulqdq xmm8, xmm10, 0x0 + pxor xmm7, xmm8 + xorps xmm7, xmm1 + + movdqa xmm10, [rk13] + movdqa xmm8, xmm2 + pclmulqdq xmm2, xmm10, 0x11 + pclmulqdq xmm8, xmm10, 0x0 + pxor xmm7, xmm8 + pxor xmm7, xmm2 + + movdqa xmm10, [rk15] + movdqa xmm8, xmm3 + pclmulqdq xmm3, xmm10, 0x11 + pclmulqdq xmm8, xmm10, 0x0 + pxor xmm7, xmm8 + xorps xmm7, xmm3 + + movdqa xmm10, [rk17] + movdqa xmm8, xmm4 + pclmulqdq xmm4, xmm10, 0x11 + pclmulqdq xmm8, xmm10, 0x0 + pxor xmm7, xmm8 + pxor xmm7, xmm4 + + movdqa xmm10, [rk19] + movdqa xmm8, xmm5 + pclmulqdq xmm5, xmm10, 0x11 + pclmulqdq xmm8, xmm10, 0x0 + pxor xmm7, xmm8 + xorps xmm7, xmm5 + + movdqa xmm10, [rk1] ;xmm10 has rk1 and rk2 + ;imm value of pclmulqdq instruction will determine which constant to use + movdqa xmm8, xmm6 + pclmulqdq xmm6, xmm10, 0x11 + pclmulqdq xmm8, xmm10, 0x0 + pxor xmm7, xmm8 + pxor xmm7, xmm6 + + + ; instead of 128, we add 112 to the loop counter to save 1 instruction from the loop + ; instead of a cmp instruction, we use the negative flag with the jl instruction + add arg3, 128-16 + jl _final_reduction_for_128 + + ; now we have 16+y bytes left to reduce. 16 Bytes is in register xmm7 and the rest is in memory + ; we can fold 16 bytes at a time if y>=16 + ; continue folding 16B at a time + +_16B_reduction_loop: + movdqa xmm8, xmm7 + pclmulqdq xmm7, xmm10, 0x11 + pclmulqdq xmm8, xmm10, 0x0 + pxor xmm7, xmm8 + movdqu xmm0, [arg2] + pshufb xmm0, xmm11 + pxor xmm7, xmm0 + add arg2, 16 + sub arg3, 16 + ; instead of a cmp instruction, we utilize the flags with the jge instruction + ; equivalent of: cmp arg3, 16-16 + ; check if there is any more 16B in the buffer to be able to fold + jge _16B_reduction_loop + + ;now we have 16+z bytes left to reduce, where 0<= z < 16. + ;first, we reduce the data in the xmm7 register + + +_final_reduction_for_128: + ; check if any more data to fold. If not, compute the CRC of the final 128 bits + add arg3, 16 + je _128_done + + ; here we are getting data that is less than 16 bytes. + ; since we know that there was data before the pointer, we can offset the input pointer before the actual point, to receive exactly 16 bytes. + ; after that the registers need to be adjusted. +_get_last_two_xmms: + movdqa xmm2, xmm7 + + movdqu xmm1, [arg2 - 16 + arg3] + pshufb xmm1, xmm11 + + ; get rid of the extra data that was loaded before + ; load the shift constant + lea rax, [pshufb_shf_table + 16] + sub rax, arg3 + movdqu xmm0, [rax] + + ; shift xmm2 to the left by arg3 bytes + pshufb xmm2, xmm0 + + ; shift xmm7 to the right by 16-arg3 bytes + pxor xmm0, [mask1] + pshufb xmm7, xmm0 + pblendvb xmm1, xmm2 ;xmm0 is implicit + + ; fold 16 Bytes + movdqa xmm2, xmm1 + movdqa xmm8, xmm7 + pclmulqdq xmm7, xmm10, 0x11 + pclmulqdq xmm8, xmm10, 0x0 + pxor xmm7, xmm8 + pxor xmm7, xmm2 + +_128_done: + ; compute crc of a 128-bit value + movdqa xmm10, [rk5] ; rk5 and rk6 in xmm10 + movdqa xmm0, xmm7 + + ;64b fold + pclmulqdq xmm7, xmm10, 0x1 + pslldq xmm0, 8 + pxor xmm7, xmm0 + + ;32b fold + movdqa xmm0, xmm7 + + pand xmm0, [mask2] + + psrldq xmm7, 12 + pclmulqdq xmm7, xmm10, 0x10 + pxor xmm7, xmm0 + + ;barrett reduction +_barrett: + movdqa xmm10, [rk7] ; rk7 and rk8 in xmm10 + movdqa xmm0, xmm7 + pclmulqdq xmm7, xmm10, 0x01 + pslldq xmm7, 4 + pclmulqdq xmm7, xmm10, 0x11 + + pslldq xmm7, 4 + pxor xmm7, xmm0 + pextrd eax, xmm7,1 + +_cleanup: + ; scale the result back to 16 bits + shr eax, 16 +%ifidn __OUTPUT_FORMAT__, win64 + movdqa xmm6, [rsp+16*2] + movdqa xmm7, [rsp+16*3] + movdqa xmm8, [rsp+16*4] + movdqa xmm9, [rsp+16*5] + movdqa xmm10, [rsp+16*6] + movdqa xmm11, [rsp+16*7] + movdqa xmm12, [rsp+16*8] + movdqa xmm13, [rsp+16*9] +%endif + add rsp, VARIABLE_OFFSET + ret + + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +align 16 +_less_than_256: + + ; check if there is enough buffer to be able to fold 16B at a time + cmp arg3, 32 + jl _less_than_32 + movdqa xmm11, [SHUF_MASK] + + ; if there is, load the constants + movdqa xmm10, [rk1] ; rk1 and rk2 in xmm10 + + movd xmm0, arg1_low32 ; get the initial crc value + pslldq xmm0, 12 ; align it to its correct place + movdqu xmm7, [arg2] ; load the plaintext + pshufb xmm7, xmm11 ; byte-reflect the plaintext + pxor xmm7, xmm0 + + + ; update the buffer pointer + add arg2, 16 + + ; update the counter. subtract 32 instead of 16 to save one instruction from the loop + sub arg3, 32 + + jmp _16B_reduction_loop + + +align 16 +_less_than_32: + ; mov initial crc to the return value. this is necessary for zero-length buffers. + mov eax, arg1_low32 + test arg3, arg3 + je _cleanup + + movdqa xmm11, [SHUF_MASK] + + movd xmm0, arg1_low32 ; get the initial crc value + pslldq xmm0, 12 ; align it to its correct place + + cmp arg3, 16 + je _exact_16_left + jl _less_than_16_left + + movdqu xmm7, [arg2] ; load the plaintext + pshufb xmm7, xmm11 ; byte-reflect the plaintext + pxor xmm7, xmm0 ; xor the initial crc value + add arg2, 16 + sub arg3, 16 + movdqa xmm10, [rk1] ; rk1 and rk2 in xmm10 + jmp _get_last_two_xmms + + +align 16 +_less_than_16_left: + ; use stack space to load data less than 16 bytes, zero-out the 16B in memory first. + + pxor xmm1, xmm1 + mov r11, rsp + movdqa [r11], xmm1 + + cmp arg3, 4 + jl _only_less_than_4 + + ; backup the counter value + mov r9, arg3 + cmp arg3, 8 + jl _less_than_8_left + + ; load 8 Bytes + mov rax, [arg2] + mov [r11], rax + add r11, 8 + sub arg3, 8 + add arg2, 8 +_less_than_8_left: + + cmp arg3, 4 + jl _less_than_4_left + + ; load 4 Bytes + mov eax, [arg2] + mov [r11], eax + add r11, 4 + sub arg3, 4 + add arg2, 4 +_less_than_4_left: + + cmp arg3, 2 + jl _less_than_2_left + + ; load 2 Bytes + mov ax, [arg2] + mov [r11], ax + add r11, 2 + sub arg3, 2 + add arg2, 2 +_less_than_2_left: + cmp arg3, 1 + jl _zero_left + + ; load 1 Byte + mov al, [arg2] + mov [r11], al +_zero_left: + movdqa xmm7, [rsp] + pshufb xmm7, xmm11 + pxor xmm7, xmm0 ; xor the initial crc value + + lea rax, [pshufb_shf_table + 16] + sub rax, r9 + movdqu xmm0, [rax] + pxor xmm0, [mask1] + + pshufb xmm7, xmm0 + jmp _128_done + +align 16 +_exact_16_left: + movdqu xmm7, [arg2] + pshufb xmm7, xmm11 + pxor xmm7, xmm0 ; xor the initial crc value + + jmp _128_done + +_only_less_than_4: + cmp arg3, 3 + jl _only_less_than_3 + + ; load 3 Bytes + mov al, [arg2] + mov [r11], al + + mov al, [arg2+1] + mov [r11+1], al + + mov al, [arg2+2] + mov [r11+2], al + + movdqa xmm7, [rsp] + pshufb xmm7, xmm11 + pxor xmm7, xmm0 ; xor the initial crc value + + psrldq xmm7, 5 + + jmp _barrett +_only_less_than_3: + cmp arg3, 2 + jl _only_less_than_2 + + ; load 2 Bytes + mov al, [arg2] + mov [r11], al + + mov al, [arg2+1] + mov [r11+1], al + + movdqa xmm7, [rsp] + pshufb xmm7, xmm11 + pxor xmm7, xmm0 ; xor the initial crc value + + psrldq xmm7, 6 + + jmp _barrett +_only_less_than_2: + + ; load 1 Byte + mov al, [arg2] + mov [r11], al + + movdqa xmm7, [rsp] + pshufb xmm7, xmm11 + pxor xmm7, xmm0 ; xor the initial crc value + + psrldq xmm7, 7 + + jmp _barrett + +section .data + +; precomputed constants +; these constants are precomputed from the poly: 0x8bb70000 (0x8bb7 scaled to 32 bits) +align 16 +; Q = 0x18BB70000 +; rk1 = 2^(32*3) mod Q << 32 +; rk2 = 2^(32*5) mod Q << 32 +; rk3 = 2^(32*15) mod Q << 32 +; rk4 = 2^(32*17) mod Q << 32 +; rk5 = 2^(32*3) mod Q << 32 +; rk6 = 2^(32*2) mod Q << 32 +; rk7 = floor(2^64/Q) +; rk8 = Q +rk1: +DQ 0x2d56000000000000 +rk2: +DQ 0x06df000000000000 +rk3: +DQ 0x9d9d000000000000 +rk4: +DQ 0x7cf5000000000000 +rk5: +DQ 0x2d56000000000000 +rk6: +DQ 0x1368000000000000 +rk7: +DQ 0x00000001f65a57f8 +rk8: +DQ 0x000000018bb70000 + +rk9: +DQ 0xceae000000000000 +rk10: +DQ 0xbfd6000000000000 +rk11: +DQ 0x1e16000000000000 +rk12: +DQ 0x713c000000000000 +rk13: +DQ 0xf7f9000000000000 +rk14: +DQ 0x80a6000000000000 +rk15: +DQ 0x044c000000000000 +rk16: +DQ 0xe658000000000000 +rk17: +DQ 0xad18000000000000 +rk18: +DQ 0xa497000000000000 +rk19: +DQ 0x6ee3000000000000 +rk20: +DQ 0xe7b5000000000000 + + + + + + + + + +mask1: +dq 0x8080808080808080, 0x8080808080808080 +mask2: +dq 0xFFFFFFFFFFFFFFFF, 0x00000000FFFFFFFF + +SHUF_MASK: +dq 0x08090A0B0C0D0E0F, 0x0001020304050607 + +pshufb_shf_table: +; use these values for shift constants for the pshufb instruction +; different alignments result in values as shown: +; dq 0x8887868584838281, 0x008f8e8d8c8b8a89 ; shl 15 (16-1) / shr1 +; dq 0x8988878685848382, 0x01008f8e8d8c8b8a ; shl 14 (16-3) / shr2 +; dq 0x8a89888786858483, 0x0201008f8e8d8c8b ; shl 13 (16-4) / shr3 +; dq 0x8b8a898887868584, 0x030201008f8e8d8c ; shl 12 (16-4) / shr4 +; dq 0x8c8b8a8988878685, 0x04030201008f8e8d ; shl 11 (16-5) / shr5 +; dq 0x8d8c8b8a89888786, 0x0504030201008f8e ; shl 10 (16-6) / shr6 +; dq 0x8e8d8c8b8a898887, 0x060504030201008f ; shl 9 (16-7) / shr7 +; dq 0x8f8e8d8c8b8a8988, 0x0706050403020100 ; shl 8 (16-8) / shr8 +; dq 0x008f8e8d8c8b8a89, 0x0807060504030201 ; shl 7 (16-9) / shr9 +; dq 0x01008f8e8d8c8b8a, 0x0908070605040302 ; shl 6 (16-10) / shr10 +; dq 0x0201008f8e8d8c8b, 0x0a09080706050403 ; shl 5 (16-11) / shr11 +; dq 0x030201008f8e8d8c, 0x0b0a090807060504 ; shl 4 (16-12) / shr12 +; dq 0x04030201008f8e8d, 0x0c0b0a0908070605 ; shl 3 (16-13) / shr13 +; dq 0x0504030201008f8e, 0x0d0c0b0a09080706 ; shl 2 (16-14) / shr14 +; dq 0x060504030201008f, 0x0e0d0c0b0a090807 ; shl 1 (16-15) / shr15 +dq 0x8786858483828100, 0x8f8e8d8c8b8a8988 +dq 0x0706050403020100, 0x000e0d0c0b0a0908 + +;;; func core, ver, snum +slversion crc16_t10dif_01, 01, 06, 0010 + diff --git a/src/spdk/isa-l/crc/crc16_t10dif_by4.asm b/src/spdk/isa-l/crc/crc16_t10dif_by4.asm new file mode 100644 index 000000000..722ed9549 --- /dev/null +++ b/src/spdk/isa-l/crc/crc16_t10dif_by4.asm @@ -0,0 +1,562 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2015 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; +; Function API: +; UINT16 crc16_t10dif_by4( +; UINT16 init_crc, //initial CRC value, 16 bits +; const unsigned char *buf, //buffer pointer to calculate CRC on +; UINT64 len //buffer length in bytes (64-bit data) +; ); +; +; Authors: +; Erdinc Ozturk +; Vinodh Gopal +; James Guilford +; +; Reference paper titled "Fast CRC Computation for Generic Polynomials Using PCLMULQDQ Instruction" +; URL: http://download.intel.com/design/intarch/papers/323102.pdf +; + +%include "reg_sizes.asm" + +%define fetch_dist 1024 + +[bits 64] +default rel + +section .text +%ifidn __OUTPUT_FORMAT__, win64 + %xdefine arg1 rcx + %xdefine arg2 rdx + %xdefine arg3 r8 + + %xdefine arg1_low32 ecx +%else + %xdefine arg1 rdi + %xdefine arg2 rsi + %xdefine arg3 rdx + + %xdefine arg1_low32 edi +%endif + +align 16 +global crc16_t10dif_by4:ISAL_SYM_TYPE_FUNCTION +crc16_t10dif_by4: + + ; adjust the 16-bit initial_crc value, scale it to 32 bits + shl arg1_low32, 16 + + ; After this point, code flow is exactly same as a 32-bit CRC. + ; The only difference is before returning eax, we will shift + ; it right 16 bits, to scale back to 16 bits. + + sub rsp,16*4+8 + + ; push the xmm registers into the stack to maintain + movdqa [rsp+16*2],xmm6 + movdqa [rsp+16*3],xmm7 + + ; check if smaller than 128B + cmp arg3, 128 + + ; for sizes less than 128, we can't fold 64B at a time... + jl _less_than_128 + + + ; load the initial crc value + movd xmm6, arg1_low32 ; initial crc + + ; crc value does not need to be byte-reflected, but it needs to + ; be moved to the high part of the register. + ; because data will be byte-reflected and will align with + ; initial crc at correct place. + pslldq xmm6, 12 + + movdqa xmm7, [SHUF_MASK] + ; receive the initial 64B data, xor the initial crc value + movdqu xmm0, [arg2] + movdqu xmm1, [arg2+16] + movdqu xmm2, [arg2+32] + movdqu xmm3, [arg2+48] + + pshufb xmm0, xmm7 + ; XOR the initial_crc value + pxor xmm0, xmm6 + pshufb xmm1, xmm7 + pshufb xmm2, xmm7 + pshufb xmm3, xmm7 + + movdqa xmm6, [rk3] ;xmm6 has rk3 and rk4 + ;imm value of pclmulqdq instruction + ;will determine which constant to use + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; we subtract 128 instead of 64 to save one instruction from the loop + sub arg3, 128 + + ; at this section of the code, there is 64*x+y (0<=y<64) bytes of + ; buffer. The _fold_64_B_loop + ; loop will fold 64B at a time until we have 64+y Bytes of buffer + + + ; fold 64B at a time. This section of the code folds 4 xmm + ; registers in parallel +_fold_64_B_loop: + + ; update the buffer pointer + add arg2, 64 ; buf += 64; + + prefetchnta [arg2+fetch_dist+0] + movdqu xmm4, xmm0 + movdqu xmm5, xmm1 + + pclmulqdq xmm0, xmm6 , 0x11 + pclmulqdq xmm1, xmm6 , 0x11 + + pclmulqdq xmm4, xmm6, 0x0 + pclmulqdq xmm5, xmm6, 0x0 + + pxor xmm0, xmm4 + pxor xmm1, xmm5 + + prefetchnta [arg2+fetch_dist+32] + movdqu xmm4, xmm2 + movdqu xmm5, xmm3 + + pclmulqdq xmm2, xmm6, 0x11 + pclmulqdq xmm3, xmm6, 0x11 + + pclmulqdq xmm4, xmm6, 0x0 + pclmulqdq xmm5, xmm6, 0x0 + + pxor xmm2, xmm4 + pxor xmm3, xmm5 + + movdqu xmm4, [arg2] + movdqu xmm5, [arg2+16] + pshufb xmm4, xmm7 + pshufb xmm5, xmm7 + pxor xmm0, xmm4 + pxor xmm1, xmm5 + + movdqu xmm4, [arg2+32] + movdqu xmm5, [arg2+48] + pshufb xmm4, xmm7 + pshufb xmm5, xmm7 + + pxor xmm2, xmm4 + pxor xmm3, xmm5 + + sub arg3, 64 + + ; check if there is another 64B in the buffer to be able to fold + jge _fold_64_B_loop + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + + add arg2, 64 + ; at this point, the buffer pointer is pointing at the last y Bytes of the buffer + ; the 64B of folded data is in 4 of the xmm registers: xmm0, xmm1, xmm2, xmm3 + + + ; fold the 4 xmm registers to 1 xmm register with different constants + + movdqa xmm6, [rk1] ;xmm6 has rk1 and rk2 + ;imm value of pclmulqdq instruction will + ;determine which constant to use + + movdqa xmm4, xmm0 + pclmulqdq xmm0, xmm6, 0x11 + pclmulqdq xmm4, xmm6, 0x0 + pxor xmm1, xmm4 + pxor xmm1, xmm0 + + movdqa xmm4, xmm1 + pclmulqdq xmm1, xmm6, 0x11 + pclmulqdq xmm4, xmm6, 0x0 + pxor xmm2, xmm4 + pxor xmm2, xmm1 + + movdqa xmm4, xmm2 + pclmulqdq xmm2, xmm6, 0x11 + pclmulqdq xmm4, xmm6, 0x0 + pxor xmm3, xmm4 + pxor xmm3, xmm2 + + + ; instead of 64, we add 48 to the loop counter to save 1 instruction from the loop + ; instead of a cmp instruction, we use the negative flag with the jl instruction + add arg3, 64-16 + jl _final_reduction_for_128 + + ; now we have 16+y bytes left to reduce. 16 Bytes + ; is in register xmm3 and the rest is in memory + ; we can fold 16 bytes at a time if y>=16 + ; continue folding 16B at a time + +_16B_reduction_loop: + movdqa xmm4, xmm3 + pclmulqdq xmm3, xmm6, 0x11 + pclmulqdq xmm4, xmm6, 0x0 + pxor xmm3, xmm4 + movdqu xmm0, [arg2] + pshufb xmm0, xmm7 + pxor xmm3, xmm0 + add arg2, 16 + sub arg3, 16 + ; instead of a cmp instruction, we utilize the flags with the jge instruction + ; equivalent of: cmp arg3, 16-16 + ; check if there is any more 16B in the buffer to be able to fold + jge _16B_reduction_loop + + ;now we have 16+z bytes left to reduce, where 0<= z < 16. + ;first, we reduce the data in the xmm3 register + + +_final_reduction_for_128: + ; check if any more data to fold. If not, compute the CRC of the final 128 bits + add arg3, 16 + je _128_done + + ; here we are getting data that is less than 16 bytes. + ; since we know that there was data before the pointer, + ; we can offset the input pointer before the actual point, + ; to receive exactly 16 bytes. + ; after that the registers need to be adjusted. +_get_last_two_xmms: + movdqa xmm2, xmm3 + + movdqu xmm1, [arg2 - 16 + arg3] + pshufb xmm1, xmm7 + + ; get rid of the extra data that was loaded before + ; load the shift constant + lea rax, [pshufb_shf_table + 16] + sub rax, arg3 + movdqu xmm0, [rax] + + ; shift xmm2 to the left by arg3 bytes + pshufb xmm2, xmm0 + + ; shift xmm3 to the right by 16-arg3 bytes + pxor xmm0, [mask1] + pshufb xmm3, xmm0 + pblendvb xmm1, xmm2 ;xmm0 is implicit + + ; fold 16 Bytes + movdqa xmm2, xmm1 + movdqa xmm4, xmm3 + pclmulqdq xmm3, xmm6, 0x11 + pclmulqdq xmm4, xmm6, 0x0 + pxor xmm3, xmm4 + pxor xmm3, xmm2 + +_128_done: + ; compute crc of a 128-bit value + movdqa xmm6, [rk5] ; rk5 and rk6 in xmm6 + movdqa xmm0, xmm3 + + ;64b fold + pclmulqdq xmm3, xmm6, 0x1 + pslldq xmm0, 8 + pxor xmm3, xmm0 + + ;32b fold + movdqa xmm0, xmm3 + + pand xmm0, [mask2] + + psrldq xmm3, 12 + pclmulqdq xmm3, xmm6, 0x10 + pxor xmm3, xmm0 + + ;barrett reduction +_barrett: + movdqa xmm6, [rk7] ; rk7 and rk8 in xmm6 + movdqa xmm0, xmm3 + pclmulqdq xmm3, xmm6, 0x01 + pslldq xmm3, 4 + pclmulqdq xmm3, xmm6, 0x11 + + pslldq xmm3, 4 + pxor xmm3, xmm0 + pextrd eax, xmm3,1 + +_cleanup: + ; scale the result back to 16 bits + shr eax, 16 + movdqa xmm6, [rsp+16*2] + movdqa xmm7, [rsp+16*3] + add rsp,16*4+8 + ret + + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +align 16 +_less_than_128: + + ; check if there is enough buffer to be able to fold 16B at a time + cmp arg3, 32 + jl _less_than_32 + movdqa xmm7, [SHUF_MASK] + + ; if there is, load the constants + movdqa xmm6, [rk1] ; rk1 and rk2 in xmm6 + + movd xmm0, arg1_low32 ; get the initial crc value + pslldq xmm0, 12 ; align it to its correct place + movdqu xmm3, [arg2] ; load the plaintext + pshufb xmm3, xmm7 ; byte-reflect the plaintext + pxor xmm3, xmm0 + + + ; update the buffer pointer + add arg2, 16 + + ; update the counter. subtract 32 instead of 16 to save one instruction from the loop + sub arg3, 32 + + jmp _16B_reduction_loop + + +align 16 +_less_than_32: + ; mov initial crc to the return value. this is necessary for zero-length buffers. + mov eax, arg1_low32 + test arg3, arg3 + je _cleanup + + movdqa xmm7, [SHUF_MASK] + + movd xmm0, arg1_low32 ; get the initial crc value + pslldq xmm0, 12 ; align it to its correct place + + cmp arg3, 16 + je _exact_16_left + jl _less_than_16_left + + movdqu xmm3, [arg2] ; load the plaintext + pshufb xmm3, xmm7 ; byte-reflect the plaintext + pxor xmm3, xmm0 ; xor the initial crc value + add arg2, 16 + sub arg3, 16 + movdqa xmm6, [rk1] ; rk1 and rk2 in xmm6 + jmp _get_last_two_xmms + + +align 16 +_less_than_16_left: + ; use stack space to load data less than 16 bytes, zero-out the 16B in memory first. + + pxor xmm1, xmm1 + mov r11, rsp + movdqa [r11], xmm1 + + cmp arg3, 4 + jl _only_less_than_4 + + ; backup the counter value + mov r9, arg3 + cmp arg3, 8 + jl _less_than_8_left + + ; load 8 Bytes + mov rax, [arg2] + mov [r11], rax + add r11, 8 + sub arg3, 8 + add arg2, 8 +_less_than_8_left: + + cmp arg3, 4 + jl _less_than_4_left + + ; load 4 Bytes + mov eax, [arg2] + mov [r11], eax + add r11, 4 + sub arg3, 4 + add arg2, 4 +_less_than_4_left: + + cmp arg3, 2 + jl _less_than_2_left + + ; load 2 Bytes + mov ax, [arg2] + mov [r11], ax + add r11, 2 + sub arg3, 2 + add arg2, 2 +_less_than_2_left: + cmp arg3, 1 + jl _zero_left + + ; load 1 Byte + mov al, [arg2] + mov [r11], al +_zero_left: + movdqa xmm3, [rsp] + pshufb xmm3, xmm7 + pxor xmm3, xmm0 ; xor the initial crc value + + ; shl r9, 4 + lea rax, [pshufb_shf_table + 16] + sub rax, r9 + movdqu xmm0, [rax] + pxor xmm0, [mask1] + + pshufb xmm3, xmm0 + jmp _128_done + +align 16 +_exact_16_left: + movdqu xmm3, [arg2] + pshufb xmm3, xmm7 + pxor xmm3, xmm0 ; xor the initial crc value + + jmp _128_done + +_only_less_than_4: + cmp arg3, 3 + jl _only_less_than_3 + + ; load 3 Bytes + mov al, [arg2] + mov [r11], al + + mov al, [arg2+1] + mov [r11+1], al + + mov al, [arg2+2] + mov [r11+2], al + + movdqa xmm3, [rsp] + pshufb xmm3, xmm7 + pxor xmm3, xmm0 ; xor the initial crc value + + psrldq xmm3, 5 + + jmp _barrett +_only_less_than_3: + cmp arg3, 2 + jl _only_less_than_2 + + ; load 2 Bytes + mov al, [arg2] + mov [r11], al + + mov al, [arg2+1] + mov [r11+1], al + + movdqa xmm3, [rsp] + pshufb xmm3, xmm7 + pxor xmm3, xmm0 ; xor the initial crc value + + psrldq xmm3, 6 + + jmp _barrett +_only_less_than_2: + + ; load 1 Byte + mov al, [arg2] + mov [r11], al + + movdqa xmm3, [rsp] + pshufb xmm3, xmm7 + pxor xmm3, xmm0 ; xor the initial crc value + + psrldq xmm3, 7 + + jmp _barrett + +section .data + +; precomputed constants +; these constants are precomputed from the poly: 0x8bb70000 (0x8bb7 scaled to 32 bits) +align 16 +; Q = 0x18BB70000 +; rk1 = 2^(32*3) mod Q << 32 +; rk2 = 2^(32*5) mod Q << 32 +; rk3 = 2^(32*15) mod Q << 32 +; rk4 = 2^(32*17) mod Q << 32 +; rk5 = 2^(32*3) mod Q << 32 +; rk6 = 2^(32*2) mod Q << 32 +; rk7 = floor(2^64/Q) +; rk8 = Q +rk1: +DQ 0x2d56000000000000 +rk2: +DQ 0x06df000000000000 +rk3: +DQ 0x044c000000000000 +rk4: +DQ 0xe658000000000000 +rk5: +DQ 0x2d56000000000000 +rk6: +DQ 0x1368000000000000 +rk7: +DQ 0x00000001f65a57f8 +rk8: +DQ 0x000000018bb70000 +mask1: +dq 0x8080808080808080, 0x8080808080808080 +mask2: +dq 0xFFFFFFFFFFFFFFFF, 0x00000000FFFFFFFF + +SHUF_MASK: +dq 0x08090A0B0C0D0E0F, 0x0001020304050607 + +pshufb_shf_table: +; use these values for shift constants for the pshufb instruction +; different alignments result in values as shown: +; dq 0x8887868584838281, 0x008f8e8d8c8b8a89 ; shl 15 (16-1) / shr1 +; dq 0x8988878685848382, 0x01008f8e8d8c8b8a ; shl 14 (16-3) / shr2 +; dq 0x8a89888786858483, 0x0201008f8e8d8c8b ; shl 13 (16-4) / shr3 +; dq 0x8b8a898887868584, 0x030201008f8e8d8c ; shl 12 (16-4) / shr4 +; dq 0x8c8b8a8988878685, 0x04030201008f8e8d ; shl 11 (16-5) / shr5 +; dq 0x8d8c8b8a89888786, 0x0504030201008f8e ; shl 10 (16-6) / shr6 +; dq 0x8e8d8c8b8a898887, 0x060504030201008f ; shl 9 (16-7) / shr7 +; dq 0x8f8e8d8c8b8a8988, 0x0706050403020100 ; shl 8 (16-8) / shr8 +; dq 0x008f8e8d8c8b8a89, 0x0807060504030201 ; shl 7 (16-9) / shr9 +; dq 0x01008f8e8d8c8b8a, 0x0908070605040302 ; shl 6 (16-10) / shr10 +; dq 0x0201008f8e8d8c8b, 0x0a09080706050403 ; shl 5 (16-11) / shr11 +; dq 0x030201008f8e8d8c, 0x0b0a090807060504 ; shl 4 (16-12) / shr12 +; dq 0x04030201008f8e8d, 0x0c0b0a0908070605 ; shl 3 (16-13) / shr13 +; dq 0x0504030201008f8e, 0x0d0c0b0a09080706 ; shl 2 (16-14) / shr14 +; dq 0x060504030201008f, 0x0e0d0c0b0a090807 ; shl 1 (16-15) / shr15 +dq 0x8786858483828100, 0x8f8e8d8c8b8a8988 +dq 0x0706050403020100, 0x000e0d0c0b0a0908 + +;;; func core, ver, snum +slversion crc16_t10dif_by4, 05, 02, 0016 diff --git a/src/spdk/isa-l/crc/crc16_t10dif_copy_by4.asm b/src/spdk/isa-l/crc/crc16_t10dif_copy_by4.asm new file mode 100644 index 000000000..fd9b75471 --- /dev/null +++ b/src/spdk/isa-l/crc/crc16_t10dif_copy_by4.asm @@ -0,0 +1,598 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2017 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; +; Function API: +; UINT16 crc16_t10dif_copy_by4( +; UINT16 init_crc, //initial CRC value, 16 bits +; unsigned char *dst, //buffer pointer destination for copy +; const unsigned char *src, //buffer pointer to calculate CRC on +; UINT64 len //buffer length in bytes (64-bit data) +; ); +; +; Authors: +; Erdinc Ozturk +; Vinodh Gopal +; James Guilford +; +; Reference paper titled "Fast CRC Computation for Generic Polynomials Using PCLMULQDQ Instruction" +; URL: http://download.intel.com/design/intarch/papers/323102.pdf +; + +%include "reg_sizes.asm" + +%define fetch_dist 1024 + +[bits 64] +default rel + +section .text +%ifidn __OUTPUT_FORMAT__, win64 + %xdefine arg1 rcx + %xdefine arg2 rdx + %xdefine arg3 r8 + %xdefine arg4 r9 + %xdefine tmp1 r10 + %xdefine arg1_low32 ecx +%else + %xdefine arg1 rdi + %xdefine arg2 rsi + %xdefine arg3 rdx + %xdefine arg4 rcx + %xdefine tmp1 r10 + %xdefine arg1_low32 edi +%endif + +align 16 +global crc16_t10dif_copy_by4:ISAL_SYM_TYPE_FUNCTION +crc16_t10dif_copy_by4: + + ; adjust the 16-bit initial_crc value, scale it to 32 bits + shl arg1_low32, 16 + + ; After this point, code flow is exactly same as a 32-bit CRC. + ; The only difference is before returning eax, we will shift + ; it right 16 bits, to scale back to 16 bits. + + sub rsp,16*4+8 + + ; push the xmm registers into the stack to maintain + movdqa [rsp+16*2],xmm6 + movdqa [rsp+16*3],xmm7 + + ; check if smaller than 128B + cmp arg4, 128 + + ; for sizes less than 128, we can't fold 64B at a time... + jl _less_than_128 + + + ; load the initial crc value + movd xmm6, arg1_low32 ; initial crc + + ; crc value does not need to be byte-reflected, but it needs to + ; be moved to the high part of the register. + ; because data will be byte-reflected and will align with + ; initial crc at correct place. + pslldq xmm6, 12 + + movdqa xmm7, [SHUF_MASK] + ; receive the initial 64B data, xor the initial crc value + movdqu xmm0, [arg3] + movdqu xmm1, [arg3+16] + movdqu xmm2, [arg3+32] + movdqu xmm3, [arg3+48] + + ; copy initial data + movdqu [arg2], xmm0 + movdqu [arg2+16], xmm1 + movdqu [arg2+32], xmm2 + movdqu [arg2+48], xmm3 + + pshufb xmm0, xmm7 + ; XOR the initial_crc value + pxor xmm0, xmm6 + pshufb xmm1, xmm7 + pshufb xmm2, xmm7 + pshufb xmm3, xmm7 + + movdqa xmm6, [rk3] ;xmm6 has rk3 and rk4 + ;imm value of pclmulqdq instruction + ;will determine which constant to use + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; we subtract 128 instead of 64 to save one instruction from the loop + sub arg4, 128 + + ; at this section of the code, there is 64*x+y (0<=y<64) bytes of + ; buffer. The _fold_64_B_loop + ; loop will fold 64B at a time until we have 64+y Bytes of buffer + + + ; fold 64B at a time. This section of the code folds 4 xmm + ; registers in parallel +_fold_64_B_loop: + + ; update the buffer pointer + add arg3, 64 ; buf += 64; + add arg2, 64 + + prefetchnta [arg3+fetch_dist+0] + movdqu xmm4, xmm0 + movdqu xmm5, xmm1 + + pclmulqdq xmm0, xmm6 , 0x11 + pclmulqdq xmm1, xmm6 , 0x11 + + pclmulqdq xmm4, xmm6, 0x0 + pclmulqdq xmm5, xmm6, 0x0 + + pxor xmm0, xmm4 + pxor xmm1, xmm5 + + prefetchnta [arg3+fetch_dist+32] + movdqu xmm4, xmm2 + movdqu xmm5, xmm3 + + pclmulqdq xmm2, xmm6, 0x11 + pclmulqdq xmm3, xmm6, 0x11 + + pclmulqdq xmm4, xmm6, 0x0 + pclmulqdq xmm5, xmm6, 0x0 + + pxor xmm2, xmm4 + pxor xmm3, xmm5 + + movdqu xmm4, [arg3] + movdqu xmm5, [arg3+16] + movdqu [arg2], xmm4 + movdqu [arg2+16], xmm5 + pshufb xmm4, xmm7 + pshufb xmm5, xmm7 + pxor xmm0, xmm4 + pxor xmm1, xmm5 + + movdqu xmm4, [arg3+32] + movdqu xmm5, [arg3+48] + movdqu [arg2+32], xmm4 + movdqu [arg2+48], xmm5 + pshufb xmm4, xmm7 + pshufb xmm5, xmm7 + + pxor xmm2, xmm4 + pxor xmm3, xmm5 + + sub arg4, 64 + + ; check if there is another 64B in the buffer to be able to fold + jge _fold_64_B_loop + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + + add arg3, 64 + add arg2, 64 + ; at this point, the buffer pointer is pointing at the last y Bytes of the buffer + ; the 64B of folded data is in 4 of the xmm registers: xmm0, xmm1, xmm2, xmm3 + + + ; fold the 4 xmm registers to 1 xmm register with different constants + + movdqa xmm6, [rk1] ;xmm6 has rk1 and rk2 + ;imm value of pclmulqdq instruction will + ;determine which constant to use + + movdqa xmm4, xmm0 + pclmulqdq xmm0, xmm6, 0x11 + pclmulqdq xmm4, xmm6, 0x0 + pxor xmm1, xmm4 + pxor xmm1, xmm0 + + movdqa xmm4, xmm1 + pclmulqdq xmm1, xmm6, 0x11 + pclmulqdq xmm4, xmm6, 0x0 + pxor xmm2, xmm4 + pxor xmm2, xmm1 + + movdqa xmm4, xmm2 + pclmulqdq xmm2, xmm6, 0x11 + pclmulqdq xmm4, xmm6, 0x0 + pxor xmm3, xmm4 + pxor xmm3, xmm2 + + + ; instead of 64, we add 48 to the loop counter to save 1 instruction from the loop + ; instead of a cmp instruction, we use the negative flag with the jl instruction + add arg4, 64-16 + jl _final_reduction_for_128 + + ; now we have 16+y bytes left to reduce. 16 Bytes + ; is in register xmm3 and the rest is in memory + ; we can fold 16 bytes at a time if y>=16 + ; continue folding 16B at a time + +_16B_reduction_loop: + movdqa xmm4, xmm3 + pclmulqdq xmm3, xmm6, 0x11 + pclmulqdq xmm4, xmm6, 0x0 + pxor xmm3, xmm4 + movdqu xmm0, [arg3] + movdqu [arg2], xmm0 + pshufb xmm0, xmm7 + pxor xmm3, xmm0 + add arg3, 16 + add arg2, 16 + sub arg4, 16 + ; instead of a cmp instruction, we utilize the flags with the jge instruction + ; equivalent of: cmp arg4, 16-16 + ; check if there is any more 16B in the buffer to be able to fold + jge _16B_reduction_loop + + ;now we have 16+z bytes left to reduce, where 0<= z < 16. + ;first, we reduce the data in the xmm3 register + + +_final_reduction_for_128: + ; check if any more data to fold. If not, compute the CRC of the final 128 bits + add arg4, 16 + je _128_done + + ; here we are getting data that is less than 16 bytes. + ; since we know that there was data before the pointer, + ; we can offset the input pointer before the actual point, + ; to receive exactly 16 bytes. + ; after that the registers need to be adjusted. +_get_last_two_xmms: + movdqa xmm2, xmm3 + + movdqu xmm1, [arg3 - 16 + arg4] + movdqu [arg2 - 16 + arg4], xmm1 + pshufb xmm1, xmm7 + + ; get rid of the extra data that was loaded before + ; load the shift constant + lea rax, [pshufb_shf_table + 16] + sub rax, arg4 + movdqu xmm0, [rax] + + ; shift xmm2 to the left by arg4 bytes + pshufb xmm2, xmm0 + + ; shift xmm3 to the right by 16-arg4 bytes + pxor xmm0, [mask1] + pshufb xmm3, xmm0 + pblendvb xmm1, xmm2 ;xmm0 is implicit + + ; fold 16 Bytes + movdqa xmm2, xmm1 + movdqa xmm4, xmm3 + pclmulqdq xmm3, xmm6, 0x11 + pclmulqdq xmm4, xmm6, 0x0 + pxor xmm3, xmm4 + pxor xmm3, xmm2 + +_128_done: + ; compute crc of a 128-bit value + movdqa xmm6, [rk5] ; rk5 and rk6 in xmm6 + movdqa xmm0, xmm3 + + ;64b fold + pclmulqdq xmm3, xmm6, 0x1 + pslldq xmm0, 8 + pxor xmm3, xmm0 + + ;32b fold + movdqa xmm0, xmm3 + + pand xmm0, [mask2] + + psrldq xmm3, 12 + pclmulqdq xmm3, xmm6, 0x10 + pxor xmm3, xmm0 + + ;barrett reduction +_barrett: + movdqa xmm6, [rk7] ; rk7 and rk8 in xmm6 + movdqa xmm0, xmm3 + pclmulqdq xmm3, xmm6, 0x01 + pslldq xmm3, 4 + pclmulqdq xmm3, xmm6, 0x11 + + pslldq xmm3, 4 + pxor xmm3, xmm0 + pextrd eax, xmm3,1 + +_cleanup: + ; scale the result back to 16 bits + shr eax, 16 + movdqa xmm6, [rsp+16*2] + movdqa xmm7, [rsp+16*3] + add rsp,16*4+8 + ret + + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +align 16 +_less_than_128: + + ; check if there is enough buffer to be able to fold 16B at a time + cmp arg4, 32 + jl _less_than_32 + movdqa xmm7, [SHUF_MASK] + + ; if there is, load the constants + movdqa xmm6, [rk1] ; rk1 and rk2 in xmm6 + + movd xmm0, arg1_low32 ; get the initial crc value + pslldq xmm0, 12 ; align it to its correct place + movdqu xmm3, [arg3] ; load the plaintext + movdqu [arg2], xmm3 ; store copy + pshufb xmm3, xmm7 ; byte-reflect the plaintext + pxor xmm3, xmm0 + + + ; update the buffer pointer + add arg3, 16 + add arg2, 16 + + ; update the counter. subtract 32 instead of 16 to save one instruction from the loop + sub arg4, 32 + + jmp _16B_reduction_loop + + +align 16 +_less_than_32: + ; mov initial crc to the return value. this is necessary for zero-length buffers. + mov eax, arg1_low32 + test arg4, arg4 + je _cleanup + + movdqa xmm7, [SHUF_MASK] + + movd xmm0, arg1_low32 ; get the initial crc value + pslldq xmm0, 12 ; align it to its correct place + + cmp arg4, 16 + je _exact_16_left + jl _less_than_16_left + + movdqu xmm3, [arg3] ; load the plaintext + movdqu [arg2], xmm3 ; store the copy + pshufb xmm3, xmm7 ; byte-reflect the plaintext + pxor xmm3, xmm0 ; xor the initial crc value + add arg3, 16 + add arg2, 16 + sub arg4, 16 + movdqa xmm6, [rk1] ; rk1 and rk2 in xmm6 + jmp _get_last_two_xmms + + +align 16 +_less_than_16_left: + ; use stack space to load data less than 16 bytes, zero-out the 16B in memory first. + + pxor xmm1, xmm1 + mov r11, rsp + movdqa [r11], xmm1 + + cmp arg4, 4 + jl _only_less_than_4 + + ; backup the counter value + mov tmp1, arg4 + cmp arg4, 8 + jl _less_than_8_left + + ; load 8 Bytes + mov rax, [arg3] + mov [arg2], rax + mov [r11], rax + add r11, 8 + sub arg4, 8 + add arg3, 8 + add arg2, 8 +_less_than_8_left: + + cmp arg4, 4 + jl _less_than_4_left + + ; load 4 Bytes + mov eax, [arg3] + mov [arg2], eax + mov [r11], eax + add r11, 4 + sub arg4, 4 + add arg3, 4 + add arg2, 4 +_less_than_4_left: + + cmp arg4, 2 + jl _less_than_2_left + + ; load 2 Bytes + mov ax, [arg3] + mov [arg2], ax + mov [r11], ax + add r11, 2 + sub arg4, 2 + add arg3, 2 + add arg2, 2 +_less_than_2_left: + cmp arg4, 1 + jl _zero_left + + ; load 1 Byte + mov al, [arg3] + mov [arg2], al + mov [r11], al +_zero_left: + movdqa xmm3, [rsp] + pshufb xmm3, xmm7 + pxor xmm3, xmm0 ; xor the initial crc value + + ; shl tmp1, 4 + lea rax, [pshufb_shf_table + 16] + sub rax, tmp1 + movdqu xmm0, [rax] + pxor xmm0, [mask1] + + pshufb xmm3, xmm0 + jmp _128_done + +align 16 +_exact_16_left: + movdqu xmm3, [arg3] + movdqu [arg2], xmm3 + pshufb xmm3, xmm7 + pxor xmm3, xmm0 ; xor the initial crc value + + jmp _128_done + +_only_less_than_4: + cmp arg4, 3 + jl _only_less_than_3 + + ; load 3 Bytes + mov al, [arg3] + mov [arg2], al + mov [r11], al + + mov al, [arg3+1] + mov [arg2+1], al + mov [r11+1], al + + mov al, [arg3+2] + mov [arg2+2], al + mov [r11+2], al + + movdqa xmm3, [rsp] + pshufb xmm3, xmm7 + pxor xmm3, xmm0 ; xor the initial crc value + + psrldq xmm3, 5 + + jmp _barrett +_only_less_than_3: + cmp arg4, 2 + jl _only_less_than_2 + + ; load 2 Bytes + mov al, [arg3] + mov [arg2], al + mov [r11], al + + mov al, [arg3+1] + mov [arg2+1], al + mov [r11+1], al + + movdqa xmm3, [rsp] + pshufb xmm3, xmm7 + pxor xmm3, xmm0 ; xor the initial crc value + + psrldq xmm3, 6 + + jmp _barrett +_only_less_than_2: + + ; load 1 Byte + mov al, [arg3] + mov [arg2],al + mov [r11], al + + movdqa xmm3, [rsp] + pshufb xmm3, xmm7 + pxor xmm3, xmm0 ; xor the initial crc value + + psrldq xmm3, 7 + + jmp _barrett + +section .data + +; precomputed constants +; these constants are precomputed from the poly: 0x8bb70000 (0x8bb7 scaled to 32 bits) +align 16 +; Q = 0x18BB70000 +; rk1 = 2^(32*3) mod Q << 32 +; rk2 = 2^(32*5) mod Q << 32 +; rk3 = 2^(32*15) mod Q << 32 +; rk4 = 2^(32*17) mod Q << 32 +; rk5 = 2^(32*3) mod Q << 32 +; rk6 = 2^(32*2) mod Q << 32 +; rk7 = floor(2^64/Q) +; rk8 = Q +rk1: +DQ 0x2d56000000000000 +rk2: +DQ 0x06df000000000000 +rk3: +DQ 0x044c000000000000 +rk4: +DQ 0xe658000000000000 +rk5: +DQ 0x2d56000000000000 +rk6: +DQ 0x1368000000000000 +rk7: +DQ 0x00000001f65a57f8 +rk8: +DQ 0x000000018bb70000 +mask1: +dq 0x8080808080808080, 0x8080808080808080 +mask2: +dq 0xFFFFFFFFFFFFFFFF, 0x00000000FFFFFFFF + +SHUF_MASK: +dq 0x08090A0B0C0D0E0F, 0x0001020304050607 + +pshufb_shf_table: +; use these values for shift constants for the pshufb instruction +; different alignments result in values as shown: +; dq 0x8887868584838281, 0x008f8e8d8c8b8a89 ; shl 15 (16-1) / shr1 +; dq 0x8988878685848382, 0x01008f8e8d8c8b8a ; shl 14 (16-3) / shr2 +; dq 0x8a89888786858483, 0x0201008f8e8d8c8b ; shl 13 (16-4) / shr3 +; dq 0x8b8a898887868584, 0x030201008f8e8d8c ; shl 12 (16-4) / shr4 +; dq 0x8c8b8a8988878685, 0x04030201008f8e8d ; shl 11 (16-5) / shr5 +; dq 0x8d8c8b8a89888786, 0x0504030201008f8e ; shl 10 (16-6) / shr6 +; dq 0x8e8d8c8b8a898887, 0x060504030201008f ; shl 9 (16-7) / shr7 +; dq 0x8f8e8d8c8b8a8988, 0x0706050403020100 ; shl 8 (16-8) / shr8 +; dq 0x008f8e8d8c8b8a89, 0x0807060504030201 ; shl 7 (16-9) / shr9 +; dq 0x01008f8e8d8c8b8a, 0x0908070605040302 ; shl 6 (16-10) / shr10 +; dq 0x0201008f8e8d8c8b, 0x0a09080706050403 ; shl 5 (16-11) / shr11 +; dq 0x030201008f8e8d8c, 0x0b0a090807060504 ; shl 4 (16-12) / shr12 +; dq 0x04030201008f8e8d, 0x0c0b0a0908070605 ; shl 3 (16-13) / shr13 +; dq 0x0504030201008f8e, 0x0d0c0b0a09080706 ; shl 2 (16-14) / shr14 +; dq 0x060504030201008f, 0x0e0d0c0b0a090807 ; shl 1 (16-15) / shr15 +dq 0x8786858483828100, 0x8f8e8d8c8b8a8988 +dq 0x0706050403020100, 0x000e0d0c0b0a0908 + +;;; func core, ver, snum +slversion crc16_t10dif_copy_by4, 05, 02, 0000 diff --git a/src/spdk/isa-l/crc/crc16_t10dif_copy_perf.c b/src/spdk/isa-l/crc/crc16_t10dif_copy_perf.c new file mode 100644 index 000000000..17cba6bc0 --- /dev/null +++ b/src/spdk/isa-l/crc/crc16_t10dif_copy_perf.c @@ -0,0 +1,84 @@ +/********************************************************************** + Copyright(c) 2011-2017 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#include +#include +#include +#include +#include +#include "crc.h" +#include "test.h" + +//#define CACHED_TEST +#ifdef CACHED_TEST +// Cached test, loop many times over small dataset +# define TEST_LEN 8*1024 +# define TEST_TYPE_STR "_warm" +#else +// Uncached test. Pull from large mem base. +# define GT_L3_CACHE 32*1024*1024 /* some number > last level cache */ +# define TEST_LEN (2 * GT_L3_CACHE) +# define TEST_TYPE_STR "_cold" +#endif + +#ifndef TEST_SEED +# define TEST_SEED 0x1234 +#endif + +#define TEST_MEM TEST_LEN + +int main(int argc, char *argv[]) +{ + void *src, *dst; + uint16_t crc; + struct perf start; + + printf("crc16_t10dif_copy_perf:\n"); + + if (posix_memalign(&src, 1024, TEST_LEN)) { + printf("alloc error: Fail"); + return -1; + } + if (posix_memalign(&dst, 1024, TEST_LEN)) { + printf("alloc error: Fail"); + return -1; + } + + printf("Start timed tests\n"); + fflush(0); + + memset(src, 0, TEST_LEN); + BENCHMARK(&start, BENCHMARK_TIME, crc = + crc16_t10dif_copy(TEST_SEED, dst, src, TEST_LEN)); + printf("crc16_t10dif_copy" TEST_TYPE_STR ": "); + perf_print(start, (long long)TEST_LEN); + + printf("finish 0x%x\n", crc); + return 0; +} diff --git a/src/spdk/isa-l/crc/crc16_t10dif_copy_test.c b/src/spdk/isa-l/crc/crc16_t10dif_copy_test.c new file mode 100644 index 000000000..4c398c429 --- /dev/null +++ b/src/spdk/isa-l/crc/crc16_t10dif_copy_test.c @@ -0,0 +1,175 @@ +/********************************************************************** + Copyright(c) 2011-2017 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#include +#include +#include +#include +#include +#include "crc.h" +#include "crc_ref.h" + +#ifndef RANDOMS +# define RANDOMS 20 +#endif +#ifndef TEST_SEED +# define TEST_SEED 0x1234 +#endif + +#define MAX_BUF 2345 +#define TEST_SIZE 217 +#define TEST_LEN (8 * 1024) + +typedef uint16_t u16; +typedef uint8_t u8; + +// bitwise crc version +uint16_t crc16_t10dif_copy_ref(uint16_t seed, uint8_t * dst, uint8_t * src, uint64_t len); + +void rand_buffer(unsigned char *buf, long buffer_size) +{ + long i; + for (i = 0; i < buffer_size; i++) + buf[i] = rand(); +} + +int memtst(unsigned char *buf, unsigned char c, int len) +{ + int i; + for (i = 0; i < len; i++) + if (*buf++ != c) + return 1; + + return 0; +} + +int crc_copy_check(const char *description, u8 * dst, u8 * src, u8 dst_fill_val, int len, + int tot) +{ + u16 seed; + int rem; + + assert(tot >= len); + seed = rand(); + rem = tot - len; + memset(dst, dst_fill_val, tot); + + // multi-binary crc version + u16 crc_dut = crc16_t10dif_copy(seed, dst, src, len); + u16 crc_ref = crc16_t10dif(seed, src, len); + if (crc_dut != crc_ref) { + printf("%s, crc gen fail: 0x%4x 0x%4x len=%d\n", description, crc_dut, + crc_ref, len); + return 1; + } else if (memcmp(dst, src, len)) { + printf("%s, copy fail: len=%d\n", description, len); + return 1; + } else if (memtst(&dst[len], dst_fill_val, rem)) { + printf("%s, writeover fail: len=%d\n", description, len); + return 1; + } + // bitwise crc version + crc_dut = crc16_t10dif_copy_ref(seed, dst, src, len); + crc_ref = crc16_t10dif_ref(seed, src, len); + if (crc_dut != crc_ref) { + printf("%s, crc gen fail (table-driven): 0x%4x 0x%4x len=%d\n", description, + crc_dut, crc_ref, len); + return 1; + } else if (memcmp(dst, src, len)) { + printf("%s, copy fail (table driven): len=%d\n", description, len); + return 1; + } else if (memtst(&dst[len], dst_fill_val, rem)) { + printf("%s, writeover fail (table driven): len=%d\n", description, len); + return 1; + } + return 0; +} + +int main(int argc, char *argv[]) +{ + int r = 0; + int i; + int len, tot; + u8 *src_raw, *dst_raw; + u8 *src, *dst; + + printf("Test crc16_t10dif_copy_test:\n"); + src_raw = (u8 *) malloc(TEST_LEN); + dst_raw = (u8 *) malloc(TEST_LEN); + if (NULL == src_raw || NULL == dst_raw) { + printf("alloc error: Fail"); + return -1; + } + src = src_raw; + dst = dst_raw; + + srand(TEST_SEED); + + // Test of all zeros + memset(src, 0, TEST_LEN); + r |= crc_copy_check("zero tst", dst, src, 0x5e, MAX_BUF, TEST_LEN); + + // Another simple test pattern + memset(src, 0xff, TEST_LEN); + r |= crc_copy_check("simp tst", dst, src, 0x5e, MAX_BUF, TEST_LEN); + + // Do a few short len random data tests + rand_buffer(src, TEST_LEN); + rand_buffer(dst, TEST_LEN); + for (i = 0; i < MAX_BUF; i++) { + r |= crc_copy_check("short len", dst, src, rand(), i, MAX_BUF); + } + printf("."); + + // Do a few longer tests, random data + for (i = TEST_LEN; i >= (TEST_LEN - TEST_SIZE); i--) { + r |= crc_copy_check("long len", dst, src, rand(), i, TEST_LEN); + } + printf("."); + + // Do random size, random data + for (i = 0; i < RANDOMS; i++) { + len = rand() % TEST_LEN; + r |= crc_copy_check("rand len", dst, src, rand(), len, TEST_LEN); + } + printf("."); + + // Run tests at end of buffer + for (i = 0; i < RANDOMS; i++) { + len = rand() % TEST_LEN; + src = &src_raw[TEST_LEN - len - 1]; + dst = &dst_raw[TEST_LEN - len - 1]; + tot = len; + r |= crc_copy_check("end of buffer", dst, src, rand(), len, tot); + } + printf("."); + + printf("Test done: %s\n", r ? "Fail" : "Pass"); + return r; +} diff --git a/src/spdk/isa-l/crc/crc16_t10dif_op_perf.c b/src/spdk/isa-l/crc/crc16_t10dif_op_perf.c new file mode 100644 index 000000000..9b91ef39d --- /dev/null +++ b/src/spdk/isa-l/crc/crc16_t10dif_op_perf.c @@ -0,0 +1,116 @@ +/********************************************************************** + Copyright(c) 2011-2017 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#include +#include +#include +#include +#include "crc.h" +#include "test.h" + +#define BLKSIZE (512) + +//#define CACHED_TEST +#ifdef CACHED_TEST +// Cached test, loop many times over small dataset +# define NBLOCKS 100 +# define TEST_TYPE_STR "_warm" +#else +// Uncached test. Pull from large mem base. +# define GT_L3_CACHE 32*1024*1024 /* some number > last level cache */ +# define TEST_LEN (2 * GT_L3_CACHE) +# define NBLOCKS (TEST_LEN / BLKSIZE) +# define TEST_TYPE_STR "_cold" +#endif + +#ifndef TEST_SEED +# define TEST_SEED 0x1234 +#endif + +struct blk { + uint8_t data[BLKSIZE]; +}; + +struct blk_ext { + uint8_t data[BLKSIZE]; + uint32_t tag; + uint16_t meta; + uint16_t crc; +}; + +void crc16_t10dif_copy_perf(struct blk *blks, struct blk *blkp, struct blk_ext *blks_ext, + struct blk_ext *blkp_ext, uint16_t * crc) +{ + int i; + for (i = 0, blkp = blks, blkp_ext = blks_ext; i < NBLOCKS; i++) { + *crc = crc16_t10dif_copy(TEST_SEED, blkp_ext->data, blkp->data, + sizeof(blks->data)); + blkp_ext->crc = *crc; + blkp++; + blkp_ext++; + } +} + +int main(int argc, char *argv[]) +{ + uint16_t crc; + struct blk *blks, *blkp; + struct blk_ext *blks_ext, *blkp_ext; + struct perf start; + + printf("crc16_t10dif_streaming_insert_perf:\n"); + + if (posix_memalign((void *)&blks, 1024, NBLOCKS * sizeof(*blks))) { + printf("alloc error: Fail"); + return -1; + } + if (posix_memalign((void *)&blks_ext, 1024, NBLOCKS * sizeof(*blks_ext))) { + printf("alloc error: Fail"); + return -1; + } + + printf(" size blk: %ld, blk_ext: %ld, blk data: %ld, stream: %ld\n", + sizeof(*blks), sizeof(*blks_ext), sizeof(blks->data), + NBLOCKS * sizeof(blks->data)); + memset(blks, 0xe5, NBLOCKS * sizeof(*blks)); + memset(blks_ext, 0xe5, NBLOCKS * sizeof(*blks_ext)); + + printf("Start timed tests\n"); + fflush(0); + + // Copy and insert test + BENCHMARK(&start, BENCHMARK_TIME, + crc16_t10dif_copy_perf(blks, blkp, blks_ext, blkp_ext, &crc)); + + printf("crc16_t10pi_op_copy_insert" TEST_TYPE_STR ": "); + perf_print(start, (long long)sizeof(blks->data) * NBLOCKS); + + printf("finish 0x%x\n", crc); + return 0; +} diff --git a/src/spdk/isa-l/crc/crc16_t10dif_perf.c b/src/spdk/isa-l/crc/crc16_t10dif_perf.c new file mode 100644 index 000000000..7b7c0bcd9 --- /dev/null +++ b/src/spdk/isa-l/crc/crc16_t10dif_perf.c @@ -0,0 +1,79 @@ +/********************************************************************** + Copyright(c) 2011-2015 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#include +#include +#include +#include +#include +#include "crc.h" +#include "test.h" + +//#define CACHED_TEST +#ifdef CACHED_TEST +// Cached test, loop many times over small dataset +# define TEST_LEN 8*1024 +# define TEST_TYPE_STR "_warm" +#else +// Uncached test. Pull from large mem base. +# define GT_L3_CACHE 32*1024*1024 /* some number > last level cache */ +# define TEST_LEN (2 * GT_L3_CACHE) +# define TEST_TYPE_STR "_cold" +#endif + +#ifndef TEST_SEED +# define TEST_SEED 0x1234 +#endif + +#define TEST_MEM TEST_LEN + +int main(int argc, char *argv[]) +{ + void *buf; + uint16_t crc; + struct perf start; + + printf("crc16_t10dif_perf:\n"); + + if (posix_memalign(&buf, 1024, TEST_LEN)) { + printf("alloc error: Fail"); + return -1; + } + + printf("Start timed tests\n"); + fflush(0); + + memset(buf, 0, TEST_LEN); + BENCHMARK(&start, BENCHMARK_TIME, crc = crc16_t10dif(TEST_SEED, buf, TEST_LEN)); + printf("crc16_t10dif" TEST_TYPE_STR ": "); + perf_print(start, (long long)TEST_LEN); + + printf("finish 0x%x\n", crc); + return 0; +} diff --git a/src/spdk/isa-l/crc/crc16_t10dif_test.c b/src/spdk/isa-l/crc/crc16_t10dif_test.c new file mode 100644 index 000000000..ceb9aab45 --- /dev/null +++ b/src/spdk/isa-l/crc/crc16_t10dif_test.c @@ -0,0 +1,179 @@ +/********************************************************************** + Copyright(c) 2011-2015 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#include +#include +#include +#include +#include "crc.h" +#include "types.h" +#include "crc_ref.h" + +#ifndef TEST_SEED +# define TEST_SEED 0x1234 +#endif + +#define MAX_BUF 4096 +#define TEST_SIZE 20 + +typedef uint32_t u32; +typedef uint16_t u16; +typedef uint8_t u8; + +uint16_t crc16_t10dif_ref(uint16_t seed, uint8_t * buf, uint64_t len); + +void rand_buffer(unsigned char *buf, long buffer_size) +{ + long i; + for (i = 0; i < buffer_size; i++) + buf[i] = rand(); +} + +int main(int argc, char *argv[]) +{ + int fail = 0; + u32 r = 0; + int verbose = argc - 1; + int i, s; + void *buf_raw; + unsigned char *buf; + + printf("Test crc16_t10dif_test "); + if (posix_memalign(&buf_raw, 32, MAX_BUF * TEST_SIZE)) { + printf("alloc error: Fail"); + return -1; + } + buf = (unsigned char *)buf_raw; + + srand(TEST_SEED); + + // Test of all zeros + memset(buf, 0, MAX_BUF * 10); + u16 crc_ref = crc16_t10dif_ref(TEST_SEED, buf, MAX_BUF); + u16 crc_base = crc16_t10dif_base(TEST_SEED, buf, MAX_BUF); + u16 crc = crc16_t10dif(TEST_SEED, buf, MAX_BUF); + if ((crc_base != crc_ref) || (crc != crc_ref)) { + fail++; + printf("\n opt ref\n"); + printf(" ------ ------\n"); + printf("crc zero = 0x%4x 0x%4x 0x%4x \n", crc_ref, crc_base, crc); + } else + printf("."); + + // Another simple test pattern + memset(buf, 0x8a, MAX_BUF); + crc_ref = crc16_t10dif_ref(TEST_SEED, buf, MAX_BUF); + crc_base = crc16_t10dif_base(TEST_SEED, buf, MAX_BUF); + crc = crc16_t10dif(TEST_SEED, buf, MAX_BUF); + if ((crc_base != crc_ref) || (crc != crc_ref)) { + fail++; + printf("crc all 8a = 0x%4x 0x%4x 0x%4x\n", crc_ref, crc_base, crc); + } else + printf("."); + + // Do a few random tests + + rand_buffer(buf, MAX_BUF * TEST_SIZE); + + for (i = 0; i < TEST_SIZE; i++) { + crc_ref = crc16_t10dif_ref(TEST_SEED, buf, MAX_BUF); + crc_base = crc16_t10dif_base(TEST_SEED, buf, MAX_BUF); + crc = crc16_t10dif(TEST_SEED, buf, MAX_BUF); + if ((crc_base != crc_ref) || (crc != crc_ref)) + fail++; + if (verbose) + printf("crc rand%3d = 0x%4x 0x%4x 0x%4x\n", i, crc_ref, crc_base, crc); + else if (i % (TEST_SIZE / 8) == 0) + printf("."); + buf += MAX_BUF; + } + + // Do a few random sizes + buf = (unsigned char *)buf_raw; //reset buf + r = rand(); + + for (i = MAX_BUF; i >= 0; i--) { + crc_ref = crc16_t10dif_ref(r, buf, i); + crc_base = crc16_t10dif_base(r, buf, i); + crc = crc16_t10dif(r, buf, i); + if ((crc_base != crc_ref) || (crc != crc_ref)) { + fail++; + printf("fail random size%i 0x%8x 0x%8x 0x%8x\n", i, crc_ref, crc_base, + crc); + } else if (i % (MAX_BUF / 8) == 0) + printf("."); + } + + // Try different seeds + for (s = 0; s < 20; s++) { + buf = (unsigned char *)buf_raw; //reset buf + + r = rand(); // just to get a new seed + rand_buffer(buf, MAX_BUF * TEST_SIZE); // new pseudo-rand data + + if (verbose) + printf("seed = 0x%x\n", r); + + for (i = 0; i < TEST_SIZE; i++) { + crc_ref = crc16_t10dif_ref(r, buf, MAX_BUF); + crc_base = crc16_t10dif_base(r, buf, MAX_BUF); + crc = crc16_t10dif(r, buf, MAX_BUF); + if ((crc_base != crc_ref) || (crc != crc_ref)) + fail++; + if (verbose) + printf("crc rand%3d = 0x%4x 0x%4x 0x%4x\n", i, crc_ref, + crc_base, crc); + else if (i % (TEST_SIZE * 20 / 8) == 0) + printf("."); + buf += MAX_BUF; + } + } + + // Run tests at end of buffer + buf = (unsigned char *)buf_raw; //reset buf + buf = buf + ((MAX_BUF - 1) * TEST_SIZE); //Line up TEST_SIZE from end + for (i = 0; i < TEST_SIZE; i++) { + crc_ref = crc16_t10dif_ref(TEST_SEED, buf + i, TEST_SIZE - i); + crc_base = crc16_t10dif_base(TEST_SEED, buf + i, TEST_SIZE - i); + crc = crc16_t10dif(TEST_SEED, buf + i, TEST_SIZE - i); + if ((crc_base != crc_ref) || (crc != crc_ref)) + fail++; + if (verbose) + printf("crc eob rand%3d = 0x%4x 0x%4x 0x%4x\n", i, crc_ref, crc_base, + crc); + else + printf("."); + } + + printf("Test done: %s\n", fail ? "Fail" : "Pass"); + if (fail) + printf("\nFailed %d tests\n", fail); + + return fail; +} diff --git a/src/spdk/isa-l/crc/crc32_funcs_test.c b/src/spdk/isa-l/crc/crc32_funcs_test.c new file mode 100644 index 000000000..e28da4018 --- /dev/null +++ b/src/spdk/isa-l/crc/crc32_funcs_test.c @@ -0,0 +1,324 @@ +/********************************************************************** + Copyright(c) 2011-2018 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#include +#include +#include +#include +#include "crc.h" +#include "types.h" +#include "crc_ref.h" + +#ifndef TEST_SEED +# define TEST_SEED 0x1234 +#endif + +#define MAX_BUF 4096 +#define TEST_SIZE 32 + +typedef uint32_t(*crc32_func_t) (uint32_t, const uint8_t *, uint64_t); +typedef uint32_t(*crc32_func_t_base) (uint32_t, uint8_t *, uint64_t); +typedef uint32_t(*crc32_func_t_ref) (uint32_t, uint8_t *, uint64_t); + +typedef struct func_case { + char *note; + crc32_func_t crc32_func_call; + crc32_func_t_base crc32_base_call; + crc32_func_t_ref crc32_ref_call; +} func_case_t; + +uint32_t crc32_iscsi_wrap(uint32_t seed, const uint8_t * buf, uint64_t len) +{ + return crc32_iscsi((uint8_t *) buf, len, seed); +} + +uint32_t crc32_iscsi_base_wrap(uint32_t seed, uint8_t * buf, uint64_t len) +{ + return crc32_iscsi_base(buf, len, seed); +} + +uint32_t crc32_iscsi_ref_wrap(uint32_t seed, uint8_t * buf, uint64_t len) +{ + return crc32_iscsi_ref(buf, len, seed); +} + +func_case_t test_funcs[] = { + {"crc32_ieee", crc32_ieee, crc32_ieee_base, crc32_ieee_ref} + , + {"crc32_gzip_refl", crc32_gzip_refl, crc32_gzip_refl_base, crc32_gzip_refl_ref} + , + {"crc32_iscsi", crc32_iscsi_wrap, crc32_iscsi_base_wrap, crc32_iscsi_ref_wrap} +}; + +// Generates pseudo-random data + +void rand_buffer(unsigned char *buf, long buffer_size) +{ + long i; + for (i = 0; i < buffer_size; i++) + buf[i] = rand(); +} + +// Test cases +int zeros_test(func_case_t * test_func); + +int simple_pattern_test(func_case_t * test_func); + +int seeds_sizes_test(func_case_t * test_func); + +int eob_test(func_case_t * test_func); + +int update_test(func_case_t * test_func); + +int verbose = 0; +void *buf_alloc = NULL; + +int main(int argc, char *argv[]) +{ + int fail = 0, fail_case; + int i, ret; + func_case_t *test_func; + + verbose = argc - 1; + + // Align to TEST_SIZE boundary + ret = posix_memalign(&buf_alloc, TEST_SIZE, MAX_BUF * TEST_SIZE); + if (ret) { + printf("alloc error: Fail"); + return -1; + } + srand(TEST_SEED); + printf("CRC32 Tests\n"); + + for (i = 0; i < sizeof(test_funcs) / sizeof(test_funcs[0]); i++) { + fail_case = 0; + test_func = &test_funcs[i]; + + printf("Test %s\t", test_func->note); + fail_case += zeros_test(test_func); + fail_case += simple_pattern_test(test_func); + fail_case += seeds_sizes_test(test_func); + fail_case += eob_test(test_func); + fail_case += update_test(test_func); + printf(" done: %s\n", fail_case ? "Fail" : "Pass"); + + if (fail_case) { + printf("\n%s Failed %d tests\n", test_func->note, fail_case); + fail++; + } + } + + printf("CRC32 Tests all done: %s\n", fail ? "Fail" : "Pass"); + + return fail; +} + +// Test of all zeros +int zeros_test(func_case_t * test_func) +{ + uint32_t crc_ref, crc_base, crc; + int fail = 0; + unsigned char *buf = NULL; + + buf = (unsigned char *)buf_alloc; + memset(buf, 0, MAX_BUF * 10); + crc_ref = test_func->crc32_ref_call(TEST_SEED, buf, MAX_BUF * 10); + crc_base = test_func->crc32_base_call(TEST_SEED, buf, MAX_BUF * 10); + crc = test_func->crc32_func_call(TEST_SEED, buf, MAX_BUF * 10); + + if ((crc_base != crc_ref) || (crc != crc_ref)) { + fail++; + printf("\n opt ref\n"); + printf(" ------ ------\n"); + printf("crc zero = 0x%8x 0x%8x 0x%8x\n", crc_ref, crc_base, crc); + } else + printf("."); + + return fail; +} + +// Another simple test pattern +int simple_pattern_test(func_case_t * test_func) +{ + uint32_t crc_ref, crc_base, crc; + int fail = 0; + unsigned char *buf = NULL; + + buf = (unsigned char *)buf_alloc; + memset(buf, 0x8a, MAX_BUF); + crc_ref = test_func->crc32_ref_call(TEST_SEED, buf, MAX_BUF); + crc_base = test_func->crc32_base_call(TEST_SEED, buf, MAX_BUF); + crc = test_func->crc32_func_call(TEST_SEED, buf, MAX_BUF); + + if ((crc_base != crc_ref) || (crc != crc_ref)) + fail++; + if (verbose) + printf("crc all 8a = 0x%8x 0x%8x 0x%8x\n", crc_ref, crc_base, crc); + else + printf("."); + + return fail; +} + +int seeds_sizes_test(func_case_t * test_func) +{ + uint32_t crc_ref, crc_base, crc; + int fail = 0; + int i; + uint64_t r, s; + unsigned char *buf = NULL; + + // Do a few random tests + buf = (unsigned char *)buf_alloc; //reset buf + r = rand(); + rand_buffer(buf, MAX_BUF * TEST_SIZE); + + for (i = 0; i < TEST_SIZE; i++) { + crc_ref = test_func->crc32_ref_call(r, buf, MAX_BUF); + crc_base = test_func->crc32_base_call(r, buf, MAX_BUF); + crc = test_func->crc32_func_call(r, buf, MAX_BUF); + + if ((crc_base != crc_ref) || (crc != crc_ref)) + fail++; + if (verbose) + printf("crc rand%3d = 0x%8x 0x%8x 0x%8x\n", i, crc_ref, crc_base, crc); + else if (i % (TEST_SIZE / 8) == 0) + printf("."); + buf += MAX_BUF; + } + + // Do a few random sizes + buf = (unsigned char *)buf_alloc; //reset buf + r = rand(); + + for (i = MAX_BUF; i >= 0; i--) { + crc_ref = test_func->crc32_ref_call(r, buf, i); + crc_base = test_func->crc32_base_call(r, buf, i); + crc = test_func->crc32_func_call(r, buf, i); + + if ((crc_base != crc_ref) || (crc != crc_ref)) { + fail++; + printf("fail random size%i 0x%8x 0x%8x 0x%8x\n", i, crc_ref, crc_base, + crc); + } else if (i % (MAX_BUF / 8) == 0) + printf("."); + } + + // Try different seeds + for (s = 0; s < 20; s++) { + buf = (unsigned char *)buf_alloc; //reset buf + + r = rand(); // just to get a new seed + rand_buffer(buf, MAX_BUF * TEST_SIZE); // new pseudo-rand data + + if (verbose) + printf("seed = 0x%lx\n", r); + + for (i = 0; i < TEST_SIZE; i++) { + crc_ref = test_func->crc32_ref_call(r, buf, MAX_BUF); + crc_base = test_func->crc32_base_call(r, buf, MAX_BUF); + crc = test_func->crc32_func_call(r, buf, MAX_BUF); + + if ((crc_base != crc_ref) || (crc != crc_ref)) + fail++; + if (verbose) + printf("crc rand%3d = 0x%8x 0x%8x 0x%8x\n", i, crc_ref, + crc_base, crc); + else if (i % (TEST_SIZE * 20 / 8) == 0) + printf("."); + buf += MAX_BUF; + } + } + + return fail; +} + +// Run tests at end of buffer +int eob_test(func_case_t * test_func) +{ + uint32_t crc_ref, crc_base, crc; + int fail = 0; + int i; + unsigned char *buf = NULL; + + // Null test + if (0 != test_func->crc32_func_call(0, NULL, 0)) { + fail++; + printf("crc null test fail\n"); + } + + buf = (unsigned char *)buf_alloc; //reset buf + buf = buf + ((MAX_BUF - 1) * TEST_SIZE); //Line up TEST_SIZE from end + for (i = 0; i <= TEST_SIZE; i++) { + crc_ref = test_func->crc32_ref_call(TEST_SEED, buf + i, TEST_SIZE - i); + crc_base = test_func->crc32_base_call(TEST_SEED, buf + i, TEST_SIZE - i); + crc = test_func->crc32_func_call(TEST_SEED, buf + i, TEST_SIZE - i); + + if ((crc_base != crc_ref) || (crc != crc_ref)) + fail++; + if (verbose) + printf("crc eob rand%3d = 0x%8x 0x%8x 0x%8x\n", i, crc_ref, crc_base, + crc); + else if (i % (TEST_SIZE / 8) == 0) + printf("."); + } + + return fail; +} + +int update_test(func_case_t * test_func) +{ + uint32_t crc_ref, crc_base, crc; + int fail = 0; + int i; + uint64_t r; + unsigned char *buf = NULL; + + buf = (unsigned char *)buf_alloc; //reset buf + r = rand(); + // Process the whole buf with reference func single call. + crc_ref = test_func->crc32_ref_call(r, buf, MAX_BUF * TEST_SIZE); + crc_base = test_func->crc32_base_call(r, buf, MAX_BUF * TEST_SIZE); + // Process buf with update method. + for (i = 0; i < TEST_SIZE; i++) { + crc = test_func->crc32_func_call(r, buf, MAX_BUF); + // Update crc seeds and buf pointer. + r = crc; + buf += MAX_BUF; + } + + if ((crc_base != crc_ref) || (crc != crc_ref)) + fail++; + if (verbose) + printf("crc rand%3d = 0x%8x 0x%8x 0x%8x\n", i, crc_ref, crc_base, crc); + else + printf("."); + + return fail; +} diff --git a/src/spdk/isa-l/crc/crc32_gzip_refl_by8.asm b/src/spdk/isa-l/crc/crc32_gzip_refl_by8.asm new file mode 100644 index 000000000..62f7e7d02 --- /dev/null +++ b/src/spdk/isa-l/crc/crc32_gzip_refl_by8.asm @@ -0,0 +1,624 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2017 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Function API: +; UINT32 crc32_gzip_refl_by8( +; UINT32 init_crc, //initial CRC value, 32 bits +; const unsigned char *buf, //buffer pointer to calculate CRC on +; UINT64 len //buffer length in bytes (64-bit data) +; ); +; +; Authors: +; Erdinc Ozturk +; Vinodh Gopal +; James Guilford +; +; Reference paper titled "Fast CRC Computation for Generic Polynomials Using PCLMULQDQ Instruction" +; URL: http://download.intel.com/design/intarch/papers/323102.pdf +; +; +; sample yasm command line: +; yasm -f x64 -f elf64 -X gnu -g dwarf2 crc32_gzip_refl_by8 +; +; As explained here: +; http://docs.oracle.com/javase/7/docs/api/java/util/zip/package-summary.html +; CRC-32 checksum is described in RFC 1952 +; Implementing RFC 1952 CRC: +; http://www.ietf.org/rfc/rfc1952.txt + +%include "reg_sizes.asm" + +%define fetch_dist 1024 + +[bits 64] +default rel + +section .text + + +%ifidn __OUTPUT_FORMAT__, win64 + %xdefine arg1 rcx + %xdefine arg2 rdx + %xdefine arg3 r8 + + %xdefine arg1_low32 ecx +%else + %xdefine arg1 rdi + %xdefine arg2 rsi + %xdefine arg3 rdx + + %xdefine arg1_low32 edi +%endif + +%define TMP 16*0 +%ifidn __OUTPUT_FORMAT__, win64 + %define XMM_SAVE 16*2 + %define VARIABLE_OFFSET 16*10+8 +%else + %define VARIABLE_OFFSET 16*2+8 +%endif + +align 16 +global crc32_gzip_refl_by8:ISAL_SYM_TYPE_FUNCTION +crc32_gzip_refl_by8: + + ; unsigned long c = crc ^ 0xffffffffL; + not arg1_low32 ; + + + sub rsp, VARIABLE_OFFSET +%ifidn __OUTPUT_FORMAT__, win64 + ; push the xmm registers into the stack to maintain + movdqa [rsp + XMM_SAVE + 16*0], xmm6 + movdqa [rsp + XMM_SAVE + 16*1], xmm7 + movdqa [rsp + XMM_SAVE + 16*2], xmm8 + movdqa [rsp + XMM_SAVE + 16*3], xmm9 + movdqa [rsp + XMM_SAVE + 16*4], xmm10 + movdqa [rsp + XMM_SAVE + 16*5], xmm11 + movdqa [rsp + XMM_SAVE + 16*6], xmm12 + movdqa [rsp + XMM_SAVE + 16*7], xmm13 +%endif + + ; check if smaller than 256B + cmp arg3, 256 + + ; for sizes less than 256, we can't fold 128B at a time... + jl _less_than_256 + + + ; load the initial crc value + movd xmm10, arg1_low32 ; initial crc + + ; receive the initial 64B data, xor the initial crc value + movdqu xmm0, [arg2+16*0] + movdqu xmm1, [arg2+16*1] + movdqu xmm2, [arg2+16*2] + movdqu xmm3, [arg2+16*3] + movdqu xmm4, [arg2+16*4] + movdqu xmm5, [arg2+16*5] + movdqu xmm6, [arg2+16*6] + movdqu xmm7, [arg2+16*7] + + ; XOR the initial_crc value + pxor xmm0, xmm10 + movdqa xmm10, [rk3] ;xmm10 has rk3 and rk4 + ;imm value of pclmulqdq instruction will determine which constant to use + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; we subtract 256 instead of 128 to save one instruction from the loop + sub arg3, 256 + + ; at this section of the code, there is 128*x+y (0<=y<128) bytes of buffer. The _fold_128_B_loop + ; loop will fold 128B at a time until we have 128+y Bytes of buffer + + + ; fold 128B at a time. This section of the code folds 8 xmm registers in parallel +_fold_128_B_loop: + + ; update the buffer pointer + add arg2, 128 + + prefetchnta [arg2+fetch_dist+0] + movdqu xmm9, [arg2+16*0] + movdqu xmm12, [arg2+16*1] + movdqa xmm8, xmm0 + movdqa xmm13, xmm1 + pclmulqdq xmm0, xmm10, 0x10 + pclmulqdq xmm8, xmm10 , 0x1 + pclmulqdq xmm1, xmm10, 0x10 + pclmulqdq xmm13, xmm10 , 0x1 + pxor xmm0, xmm9 + xorps xmm0, xmm8 + pxor xmm1, xmm12 + xorps xmm1, xmm13 + + prefetchnta [arg2+fetch_dist+32] + movdqu xmm9, [arg2+16*2] + movdqu xmm12, [arg2+16*3] + movdqa xmm8, xmm2 + movdqa xmm13, xmm3 + pclmulqdq xmm2, xmm10, 0x10 + pclmulqdq xmm8, xmm10 , 0x1 + pclmulqdq xmm3, xmm10, 0x10 + pclmulqdq xmm13, xmm10 , 0x1 + pxor xmm2, xmm9 + xorps xmm2, xmm8 + pxor xmm3, xmm12 + xorps xmm3, xmm13 + + prefetchnta [arg2+fetch_dist+64] + movdqu xmm9, [arg2+16*4] + movdqu xmm12, [arg2+16*5] + movdqa xmm8, xmm4 + movdqa xmm13, xmm5 + pclmulqdq xmm4, xmm10, 0x10 + pclmulqdq xmm8, xmm10 , 0x1 + pclmulqdq xmm5, xmm10, 0x10 + pclmulqdq xmm13, xmm10 , 0x1 + pxor xmm4, xmm9 + xorps xmm4, xmm8 + pxor xmm5, xmm12 + xorps xmm5, xmm13 + + prefetchnta [arg2+fetch_dist+96] + movdqu xmm9, [arg2+16*6] + movdqu xmm12, [arg2+16*7] + movdqa xmm8, xmm6 + movdqa xmm13, xmm7 + pclmulqdq xmm6, xmm10, 0x10 + pclmulqdq xmm8, xmm10 , 0x1 + pclmulqdq xmm7, xmm10, 0x10 + pclmulqdq xmm13, xmm10 , 0x1 + pxor xmm6, xmm9 + xorps xmm6, xmm8 + pxor xmm7, xmm12 + xorps xmm7, xmm13 + + sub arg3, 128 + + ; check if there is another 128B in the buffer to be able to fold + jge _fold_128_B_loop + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + + add arg2, 128 + ; at this point, the buffer pointer is pointing at the last y Bytes of the buffer, where 0 <= y < 128 + ; the 128B of folded data is in 8 of the xmm registers: xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7 + + + ; fold the 8 xmm registers to 1 xmm register with different constants + + movdqa xmm10, [rk9] + movdqa xmm8, xmm0 + pclmulqdq xmm0, xmm10, 0x1 + pclmulqdq xmm8, xmm10, 0x10 + pxor xmm7, xmm8 + xorps xmm7, xmm0 + + movdqa xmm10, [rk11] + movdqa xmm8, xmm1 + pclmulqdq xmm1, xmm10, 0x1 + pclmulqdq xmm8, xmm10, 0x10 + pxor xmm7, xmm8 + xorps xmm7, xmm1 + + movdqa xmm10, [rk13] + movdqa xmm8, xmm2 + pclmulqdq xmm2, xmm10, 0x1 + pclmulqdq xmm8, xmm10, 0x10 + pxor xmm7, xmm8 + pxor xmm7, xmm2 + + movdqa xmm10, [rk15] + movdqa xmm8, xmm3 + pclmulqdq xmm3, xmm10, 0x1 + pclmulqdq xmm8, xmm10, 0x10 + pxor xmm7, xmm8 + xorps xmm7, xmm3 + + movdqa xmm10, [rk17] + movdqa xmm8, xmm4 + pclmulqdq xmm4, xmm10, 0x1 + pclmulqdq xmm8, xmm10, 0x10 + pxor xmm7, xmm8 + pxor xmm7, xmm4 + + movdqa xmm10, [rk19] + movdqa xmm8, xmm5 + pclmulqdq xmm5, xmm10, 0x1 + pclmulqdq xmm8, xmm10, 0x10 + pxor xmm7, xmm8 + xorps xmm7, xmm5 + + movdqa xmm10, [rk1] + movdqa xmm8, xmm6 + pclmulqdq xmm6, xmm10, 0x1 + pclmulqdq xmm8, xmm10, 0x10 + pxor xmm7, xmm8 + pxor xmm7, xmm6 + + + ; instead of 128, we add 128-16 to the loop counter to save 1 instruction from the loop + ; instead of a cmp instruction, we use the negative flag with the jl instruction + add arg3, 128-16 + jl _final_reduction_for_128 + + ; now we have 16+y bytes left to reduce. 16 Bytes is in register xmm7 and the rest is in memory + ; we can fold 16 bytes at a time if y>=16 + ; continue folding 16B at a time + +_16B_reduction_loop: + movdqa xmm8, xmm7 + pclmulqdq xmm7, xmm10, 0x1 + pclmulqdq xmm8, xmm10, 0x10 + pxor xmm7, xmm8 + movdqu xmm0, [arg2] + pxor xmm7, xmm0 + add arg2, 16 + sub arg3, 16 + ; instead of a cmp instruction, we utilize the flags with the jge instruction + ; equivalent of: cmp arg3, 16-16 + ; check if there is any more 16B in the buffer to be able to fold + jge _16B_reduction_loop + + ;now we have 16+z bytes left to reduce, where 0<= z < 16. + ;first, we reduce the data in the xmm7 register + + +_final_reduction_for_128: + add arg3, 16 + je _128_done + +; here we are getting data that is less than 16 bytes. + ; since we know that there was data before the pointer, we can offset the input pointer before the actual point, to receive exactly 16 bytes. + ; after that the registers need to be adjusted. +_get_last_two_xmms: + + + movdqa xmm2, xmm7 + movdqu xmm1, [arg2 - 16 + arg3] + + ; get rid of the extra data that was loaded before + ; load the shift constant + lea rax, [pshufb_shf_table] + add rax, arg3 + movdqu xmm0, [rax] + + + pshufb xmm7, xmm0 + pxor xmm0, [mask3] + pshufb xmm2, xmm0 + + pblendvb xmm2, xmm1 ;xmm0 is implicit + ;;;;;;;;;; + movdqa xmm8, xmm7 + pclmulqdq xmm7, xmm10, 0x1 + + pclmulqdq xmm8, xmm10, 0x10 + pxor xmm7, xmm8 + pxor xmm7, xmm2 + +_128_done: + ; compute crc of a 128-bit value + movdqa xmm10, [rk5] + movdqa xmm0, xmm7 + + ;64b fold + pclmulqdq xmm7, xmm10, 0 + psrldq xmm0, 8 + pxor xmm7, xmm0 + + ;32b fold + movdqa xmm0, xmm7 + pslldq xmm7, 4 + pclmulqdq xmm7, xmm10, 0x10 + + pxor xmm7, xmm0 + + + ;barrett reduction +_barrett: + pand xmm7, [mask2] + movdqa xmm1, xmm7 + movdqa xmm2, xmm7 + movdqa xmm10, [rk7] + + pclmulqdq xmm7, xmm10, 0 + pxor xmm7, xmm2 + pand xmm7, [mask] + movdqa xmm2, xmm7 + pclmulqdq xmm7, xmm10, 0x10 + pxor xmm7, xmm2 + pxor xmm7, xmm1 + pextrd eax, xmm7, 2 + +_cleanup: + ; return c ^ 0xffffffffL; + not eax + + +%ifidn __OUTPUT_FORMAT__, win64 + movdqa xmm6, [rsp + XMM_SAVE + 16*0] + movdqa xmm7, [rsp + XMM_SAVE + 16*1] + movdqa xmm8, [rsp + XMM_SAVE + 16*2] + movdqa xmm9, [rsp + XMM_SAVE + 16*3] + movdqa xmm10, [rsp + XMM_SAVE + 16*4] + movdqa xmm11, [rsp + XMM_SAVE + 16*5] + movdqa xmm12, [rsp + XMM_SAVE + 16*6] + movdqa xmm13, [rsp + XMM_SAVE + 16*7] +%endif + add rsp, VARIABLE_OFFSET + ret + + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +align 16 +_less_than_256: + + ; check if there is enough buffer to be able to fold 16B at a time + cmp arg3, 32 + jl _less_than_32 + + ; if there is, load the constants + movdqa xmm10, [rk1] ; rk1 and rk2 in xmm10 + + movd xmm0, arg1_low32 ; get the initial crc value + movdqu xmm7, [arg2] ; load the plaintext + pxor xmm7, xmm0 + + ; update the buffer pointer + add arg2, 16 + + ; update the counter. subtract 32 instead of 16 to save one instruction from the loop + sub arg3, 32 + + jmp _16B_reduction_loop + + +align 16 +_less_than_32: + ; mov initial crc to the return value. this is necessary for zero-length buffers. + mov eax, arg1_low32 + test arg3, arg3 + je _cleanup + + movd xmm0, arg1_low32 ; get the initial crc value + + cmp arg3, 16 + je _exact_16_left + jl _less_than_16_left + + movdqu xmm7, [arg2] ; load the plaintext + pxor xmm7, xmm0 ; xor the initial crc value + add arg2, 16 + sub arg3, 16 + movdqa xmm10, [rk1] ; rk1 and rk2 in xmm10 + jmp _get_last_two_xmms + + +align 16 +_less_than_16_left: + ; use stack space to load data less than 16 bytes, zero-out the 16B in memory first. + + pxor xmm1, xmm1 + mov r11, rsp + movdqa [r11], xmm1 + + cmp arg3, 4 + jl _only_less_than_4 + + ; backup the counter value + mov r9, arg3 + cmp arg3, 8 + jl _less_than_8_left + + ; load 8 Bytes + mov rax, [arg2] + mov [r11], rax + add r11, 8 + sub arg3, 8 + add arg2, 8 +_less_than_8_left: + + cmp arg3, 4 + jl _less_than_4_left + + ; load 4 Bytes + mov eax, [arg2] + mov [r11], eax + add r11, 4 + sub arg3, 4 + add arg2, 4 +_less_than_4_left: + + cmp arg3, 2 + jl _less_than_2_left + + ; load 2 Bytes + mov ax, [arg2] + mov [r11], ax + add r11, 2 + sub arg3, 2 + add arg2, 2 +_less_than_2_left: + cmp arg3, 1 + jl _zero_left + + ; load 1 Byte + mov al, [arg2] + mov [r11], al + +_zero_left: + movdqa xmm7, [rsp] + pxor xmm7, xmm0 ; xor the initial crc value + + lea rax,[pshufb_shf_table] + movdqu xmm0, [rax + r9] + pshufb xmm7,xmm0 + + + + jmp _128_done + +align 16 +_exact_16_left: + movdqu xmm7, [arg2] + pxor xmm7, xmm0 ; xor the initial crc value + + jmp _128_done + +_only_less_than_4: + cmp arg3, 3 + jl _only_less_than_3 + + ; load 3 Bytes + mov al, [arg2] + mov [r11], al + + mov al, [arg2+1] + mov [r11+1], al + + mov al, [arg2+2] + mov [r11+2], al + + movdqa xmm7, [rsp] + pxor xmm7, xmm0 ; xor the initial crc value + + pslldq xmm7, 5 + + jmp _barrett +_only_less_than_3: + cmp arg3, 2 + jl _only_less_than_2 + + ; load 2 Bytes + mov al, [arg2] + mov [r11], al + + mov al, [arg2+1] + mov [r11+1], al + + movdqa xmm7, [rsp] + pxor xmm7, xmm0 ; xor the initial crc value + + pslldq xmm7, 6 + + jmp _barrett +_only_less_than_2: + + ; load 1 Byte + mov al, [arg2] + mov [r11], al + + movdqa xmm7, [rsp] + pxor xmm7, xmm0 ; xor the initial crc value + + pslldq xmm7, 7 + + jmp _barrett + +section .data + +; precomputed constants +align 16 +rk1 : +DQ 0x00000000ccaa009e +rk2 : +DQ 0x00000001751997d0 +rk3 : +DQ 0x000000014a7fe880 +rk4 : +DQ 0x00000001e88ef372 +rk5 : +DQ 0x00000000ccaa009e +rk6 : +DQ 0x0000000163cd6124 +rk7 : +DQ 0x00000001f7011640 +rk8 : +DQ 0x00000001db710640 +rk9 : +DQ 0x00000001d7cfc6ac +rk10 : +DQ 0x00000001ea89367e +rk11 : +DQ 0x000000018cb44e58 +rk12 : +DQ 0x00000000df068dc2 +rk13 : +DQ 0x00000000ae0b5394 +rk14 : +DQ 0x00000001c7569e54 +rk15 : +DQ 0x00000001c6e41596 +rk16 : +DQ 0x0000000154442bd4 +rk17 : +DQ 0x0000000174359406 +rk18 : +DQ 0x000000003db1ecdc +rk19 : +DQ 0x000000015a546366 +rk20 : +DQ 0x00000000f1da05aa + +mask: +dq 0xFFFFFFFFFFFFFFFF, 0x0000000000000000 +mask2: +dq 0xFFFFFFFF00000000, 0xFFFFFFFFFFFFFFFF +mask3: +dq 0x8080808080808080, 0x8080808080808080 + +pshufb_shf_table: +; use these values for shift constants for the pshufb instruction +; different alignments result in values as shown: +; dq 0x8887868584838281, 0x008f8e8d8c8b8a89 ; shl 15 (16-1) / shr1 +; dq 0x8988878685848382, 0x01008f8e8d8c8b8a ; shl 14 (16-3) / shr2 +; dq 0x8a89888786858483, 0x0201008f8e8d8c8b ; shl 13 (16-4) / shr3 +; dq 0x8b8a898887868584, 0x030201008f8e8d8c ; shl 12 (16-4) / shr4 +; dq 0x8c8b8a8988878685, 0x04030201008f8e8d ; shl 11 (16-5) / shr5 +; dq 0x8d8c8b8a89888786, 0x0504030201008f8e ; shl 10 (16-6) / shr6 +; dq 0x8e8d8c8b8a898887, 0x060504030201008f ; shl 9 (16-7) / shr7 +; dq 0x8f8e8d8c8b8a8988, 0x0706050403020100 ; shl 8 (16-8) / shr8 +; dq 0x008f8e8d8c8b8a89, 0x0807060504030201 ; shl 7 (16-9) / shr9 +; dq 0x01008f8e8d8c8b8a, 0x0908070605040302 ; shl 6 (16-10) / shr10 +; dq 0x0201008f8e8d8c8b, 0x0a09080706050403 ; shl 5 (16-11) / shr11 +; dq 0x030201008f8e8d8c, 0x0b0a090807060504 ; shl 4 (16-12) / shr12 +; dq 0x04030201008f8e8d, 0x0c0b0a0908070605 ; shl 3 (16-13) / shr13 +; dq 0x0504030201008f8e, 0x0d0c0b0a09080706 ; shl 2 (16-14) / shr14 +; dq 0x060504030201008f, 0x0e0d0c0b0a090807 ; shl 1 (16-15) / shr15 +dq 0x8786858483828100, 0x8f8e8d8c8b8a8988 +dq 0x0706050403020100, 0x000e0d0c0b0a0908 + +;;; func core, ver, snum +slversion crc32_gzip_refl_by8, 01, 00, 002c diff --git a/src/spdk/isa-l/crc/crc32_gzip_refl_perf.c b/src/spdk/isa-l/crc/crc32_gzip_refl_perf.c new file mode 100644 index 000000000..ad3d86fb5 --- /dev/null +++ b/src/spdk/isa-l/crc/crc32_gzip_refl_perf.c @@ -0,0 +1,91 @@ +/********************************************************************** + Copyright(c) 2011-2017 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#include +#include +#include +#include +#include +#include "crc.h" +#include "test.h" + +//#define CACHED_TEST +#ifdef CACHED_TEST +// Cached test, loop many times over small dataset +# define TEST_LEN 8*1024 +# define TEST_TYPE_STR "_warm" +#else +// Uncached test. Pull from large mem base. +# define GT_L3_CACHE 32*1024*1024 /* some number > last level cache */ +# define TEST_LEN (2 * GT_L3_CACHE) +# define TEST_TYPE_STR "_cold" +#endif + +#ifndef TEST_SEED +# define TEST_SEED 0x1234 +#endif + +#define TEST_MEM TEST_LEN + +int main(int argc, char *argv[]) +{ + void *buf; + uint32_t crc; + struct perf start; + + printf("crc32_gzip_refl_perf:\n"); + + if (posix_memalign(&buf, 1024, TEST_LEN)) { + printf("alloc error: Fail"); + return -1; + } + + printf("Start timed tests\n"); + fflush(0); + + memset(buf, 0, TEST_LEN); + BENCHMARK(&start, BENCHMARK_TIME, crc = crc32_gzip_refl(TEST_SEED, buf, TEST_LEN)); + printf("crc32_gzip_refl" TEST_TYPE_STR ": "); + perf_print(start, (long long)TEST_LEN); + + printf("finish 0x%x\n", crc); + + printf("crc32_gzip_refl_base_perf:\n"); + printf("Start timed tests\n"); + fflush(0); + + BENCHMARK(&start, BENCHMARK_TIME, crc = + crc32_gzip_refl_base(TEST_SEED, buf, TEST_LEN)); + printf("crc32_gzip_refl_base" TEST_TYPE_STR ": "); + perf_print(start, (long long)TEST_LEN); + + printf("finish 0x%x\n", crc); + + return 0; +} diff --git a/src/spdk/isa-l/crc/crc32_ieee_01.asm b/src/spdk/isa-l/crc/crc32_ieee_01.asm new file mode 100644 index 000000000..32495ed7f --- /dev/null +++ b/src/spdk/isa-l/crc/crc32_ieee_01.asm @@ -0,0 +1,655 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2015 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +; Function API: +; UINT32 crc32_ieee_01( +; UINT32 init_crc, //initial CRC value, 32 bits +; const unsigned char *buf, //buffer pointer to calculate CRC on +; UINT64 len //buffer length in bytes (64-bit data) +; ); +; +; Authors: +; Erdinc Ozturk +; Vinodh Gopal +; James Guilford +; +; Reference paper titled "Fast CRC Computation for Generic Polynomials Using PCLMULQDQ Instruction" +; URL: http://www.intel.com/content/dam/www/public/us/en/documents/white-papers/fast-crc-computation-generic-polynomials-pclmulqdq-paper.pdf + +%include "reg_sizes.asm" + +%define fetch_dist 1024 +[bits 64] +default rel + +section .text + +%ifidn __OUTPUT_FORMAT__, win64 + %xdefine arg1 rcx + %xdefine arg2 rdx + %xdefine arg3 r8 + + %xdefine arg1_low32 ecx +%else + %xdefine arg1 rdi + %xdefine arg2 rsi + %xdefine arg3 rdx + + %xdefine arg1_low32 edi +%endif + +%define TMP 16*0 +%ifidn __OUTPUT_FORMAT__, win64 + %define XMM_SAVE 16*2 + %define VARIABLE_OFFSET 16*10+8 +%else + %define VARIABLE_OFFSET 16*2+8 +%endif +align 16 +global crc32_ieee_01:ISAL_SYM_TYPE_FUNCTION +crc32_ieee_01: + + not arg1_low32 ;~init_crc + + sub rsp,VARIABLE_OFFSET + +%ifidn __OUTPUT_FORMAT__, win64 + ; push the xmm registers into the stack to maintain + movdqa [rsp + XMM_SAVE + 16*0], xmm6 + movdqa [rsp + XMM_SAVE + 16*1], xmm7 + movdqa [rsp + XMM_SAVE + 16*2], xmm8 + movdqa [rsp + XMM_SAVE + 16*3], xmm9 + movdqa [rsp + XMM_SAVE + 16*4], xmm10 + movdqa [rsp + XMM_SAVE + 16*5], xmm11 + movdqa [rsp + XMM_SAVE + 16*6], xmm12 + movdqa [rsp + XMM_SAVE + 16*7], xmm13 +%endif + + + ; check if smaller than 256 + cmp arg3, 256 + + ; for sizes less than 256, we can't fold 128B at a time... + jl _less_than_256 + + + ; load the initial crc value + movd xmm10, arg1_low32 ; initial crc + + ; crc value does not need to be byte-reflected, but it needs to be moved to the high part of the register. + ; because data will be byte-reflected and will align with initial crc at correct place. + pslldq xmm10, 12 + + movdqa xmm11, [SHUF_MASK] + ; receive the initial 128B data, xor the initial crc value + movdqu xmm0, [arg2+16*0] + movdqu xmm1, [arg2+16*1] + movdqu xmm2, [arg2+16*2] + movdqu xmm3, [arg2+16*3] + movdqu xmm4, [arg2+16*4] + movdqu xmm5, [arg2+16*5] + movdqu xmm6, [arg2+16*6] + movdqu xmm7, [arg2+16*7] + + pshufb xmm0, xmm11 + ; XOR the initial_crc value + pxor xmm0, xmm10 + pshufb xmm1, xmm11 + pshufb xmm2, xmm11 + pshufb xmm3, xmm11 + pshufb xmm4, xmm11 + pshufb xmm5, xmm11 + pshufb xmm6, xmm11 + pshufb xmm7, xmm11 + + movdqa xmm10, [rk3] ;xmm10 has rk3 and rk4 + ;imm value of pclmulqdq instruction will determine which constant to use + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; we subtract 256 instead of 128 to save one instruction from the loop + sub arg3, 256 + + ; at this section of the code, there is 128*x+y (0<=y<128) bytes of buffer. The _fold_128_B_loop + ; loop will fold 128B at a time until we have 128+y Bytes of buffer + + + ; fold 128B at a time. This section of the code folds 8 xmm registers in parallel +_fold_128_B_loop: + + ; update the buffer pointer + add arg2, 128 ; buf += 128; + + prefetchnta [arg2+fetch_dist+0] + movdqu xmm9, [arg2+16*0] + movdqu xmm12, [arg2+16*1] + pshufb xmm9, xmm11 + pshufb xmm12, xmm11 + movdqa xmm8, xmm0 + movdqa xmm13, xmm1 + pclmulqdq xmm0, xmm10, 0x0 + pclmulqdq xmm8, xmm10 , 0x11 + pclmulqdq xmm1, xmm10, 0x0 + pclmulqdq xmm13, xmm10 , 0x11 + pxor xmm0, xmm9 + xorps xmm0, xmm8 + pxor xmm1, xmm12 + xorps xmm1, xmm13 + + prefetchnta [arg2+fetch_dist+32] + movdqu xmm9, [arg2+16*2] + movdqu xmm12, [arg2+16*3] + pshufb xmm9, xmm11 + pshufb xmm12, xmm11 + movdqa xmm8, xmm2 + movdqa xmm13, xmm3 + pclmulqdq xmm2, xmm10, 0x0 + pclmulqdq xmm8, xmm10 , 0x11 + pclmulqdq xmm3, xmm10, 0x0 + pclmulqdq xmm13, xmm10 , 0x11 + pxor xmm2, xmm9 + xorps xmm2, xmm8 + pxor xmm3, xmm12 + xorps xmm3, xmm13 + + prefetchnta [arg2+fetch_dist+64] + movdqu xmm9, [arg2+16*4] + movdqu xmm12, [arg2+16*5] + pshufb xmm9, xmm11 + pshufb xmm12, xmm11 + movdqa xmm8, xmm4 + movdqa xmm13, xmm5 + pclmulqdq xmm4, xmm10, 0x0 + pclmulqdq xmm8, xmm10 , 0x11 + pclmulqdq xmm5, xmm10, 0x0 + pclmulqdq xmm13, xmm10 , 0x11 + pxor xmm4, xmm9 + xorps xmm4, xmm8 + pxor xmm5, xmm12 + xorps xmm5, xmm13 + + prefetchnta [arg2+fetch_dist+96] + movdqu xmm9, [arg2+16*6] + movdqu xmm12, [arg2+16*7] + pshufb xmm9, xmm11 + pshufb xmm12, xmm11 + movdqa xmm8, xmm6 + movdqa xmm13, xmm7 + pclmulqdq xmm6, xmm10, 0x0 + pclmulqdq xmm8, xmm10 , 0x11 + pclmulqdq xmm7, xmm10, 0x0 + pclmulqdq xmm13, xmm10 , 0x11 + pxor xmm6, xmm9 + xorps xmm6, xmm8 + pxor xmm7, xmm12 + xorps xmm7, xmm13 + + sub arg3, 128 + + ; check if there is another 128B in the buffer to be able to fold + jge _fold_128_B_loop + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + + add arg2, 128 + ; at this point, the buffer pointer is pointing at the last y Bytes of the buffer + ; the 128 of folded data is in 4 of the xmm registers: xmm0, xmm1, xmm2, xmm3 + + + ; fold the 8 xmm registers to 1 xmm register with different constants + + movdqa xmm10, [rk9] + movdqa xmm8, xmm0 + pclmulqdq xmm0, xmm10, 0x11 + pclmulqdq xmm8, xmm10, 0x0 + pxor xmm7, xmm8 + xorps xmm7, xmm0 + + movdqa xmm10, [rk11] + movdqa xmm8, xmm1 + pclmulqdq xmm1, xmm10, 0x11 + pclmulqdq xmm8, xmm10, 0x0 + pxor xmm7, xmm8 + xorps xmm7, xmm1 + + movdqa xmm10, [rk13] + movdqa xmm8, xmm2 + pclmulqdq xmm2, xmm10, 0x11 + pclmulqdq xmm8, xmm10, 0x0 + pxor xmm7, xmm8 + pxor xmm7, xmm2 + + movdqa xmm10, [rk15] + movdqa xmm8, xmm3 + pclmulqdq xmm3, xmm10, 0x11 + pclmulqdq xmm8, xmm10, 0x0 + pxor xmm7, xmm8 + xorps xmm7, xmm3 + + movdqa xmm10, [rk17] + movdqa xmm8, xmm4 + pclmulqdq xmm4, xmm10, 0x11 + pclmulqdq xmm8, xmm10, 0x0 + pxor xmm7, xmm8 + pxor xmm7, xmm4 + + movdqa xmm10, [rk19] + movdqa xmm8, xmm5 + pclmulqdq xmm5, xmm10, 0x11 + pclmulqdq xmm8, xmm10, 0x0 + pxor xmm7, xmm8 + xorps xmm7, xmm5 + + movdqa xmm10, [rk1] ;xmm10 has rk1 and rk2 + ;imm value of pclmulqdq instruction will determine which constant to use + movdqa xmm8, xmm6 + pclmulqdq xmm6, xmm10, 0x11 + pclmulqdq xmm8, xmm10, 0x0 + pxor xmm7, xmm8 + pxor xmm7, xmm6 + + + ; instead of 128, we add 112 to the loop counter to save 1 instruction from the loop + ; instead of a cmp instruction, we use the negative flag with the jl instruction + add arg3, 128-16 + jl _final_reduction_for_128 + + ; now we have 16+y bytes left to reduce. 16 Bytes is in register xmm7 and the rest is in memory + ; we can fold 16 bytes at a time if y>=16 + ; continue folding 16B at a time + +_16B_reduction_loop: + movdqa xmm8, xmm7 + pclmulqdq xmm7, xmm10, 0x11 + pclmulqdq xmm8, xmm10, 0x0 + pxor xmm7, xmm8 + movdqu xmm0, [arg2] + pshufb xmm0, xmm11 + pxor xmm7, xmm0 + add arg2, 16 + sub arg3, 16 + ; instead of a cmp instruction, we utilize the flags with the jge instruction + ; equivalent of: cmp arg3, 16-16 + ; check if there is any more 16B in the buffer to be able to fold + jge _16B_reduction_loop + + ;now we have 16+z bytes left to reduce, where 0<= z < 16. + ;first, we reduce the data in the xmm7 register + + +_final_reduction_for_128: + ; check if any more data to fold. If not, compute the CRC of the final 128 bits + add arg3, 16 + je _128_done + + ; here we are getting data that is less than 16 bytes. + ; since we know that there was data before the pointer, we can offset the input pointer before the actual point, to receive exactly 16 bytes. + ; after that the registers need to be adjusted. +_get_last_two_xmms: + movdqa xmm2, xmm7 + + movdqu xmm1, [arg2 - 16 + arg3] + pshufb xmm1, xmm11 + + ; get rid of the extra data that was loaded before + ; load the shift constant + lea rax, [pshufb_shf_table + 16] + sub rax, arg3 + movdqu xmm0, [rax] + + ; shift xmm2 to the left by arg3 bytes + pshufb xmm2, xmm0 + + ; shift xmm7 to the right by 16-arg3 bytes + pxor xmm0, [mask1] + pshufb xmm7, xmm0 + pblendvb xmm1, xmm2 ;xmm0 is implicit + + ; fold 16 Bytes + movdqa xmm2, xmm1 + movdqa xmm8, xmm7 + pclmulqdq xmm7, xmm10, 0x11 + pclmulqdq xmm8, xmm10, 0x0 + pxor xmm7, xmm8 + pxor xmm7, xmm2 + +_128_done: + ; compute crc of a 128-bit value + movdqa xmm10, [rk5] ; rk5 and rk6 in xmm10 + movdqa xmm0, xmm7 + + ;64b fold + pclmulqdq xmm7, xmm10, 0x1 + pslldq xmm0, 8 + pxor xmm7, xmm0 + + ;32b fold + movdqa xmm0, xmm7 + + pand xmm0, [mask2] + + psrldq xmm7, 12 + pclmulqdq xmm7, xmm10, 0x10 + pxor xmm7, xmm0 + + ;barrett reduction +_barrett: + movdqa xmm10, [rk7] ; rk7 and rk8 in xmm10 + movdqa xmm0, xmm7 + pclmulqdq xmm7, xmm10, 0x01 + pslldq xmm7, 4 + pclmulqdq xmm7, xmm10, 0x11 + + pslldq xmm7, 4 + pxor xmm7, xmm0 + pextrd eax, xmm7,1 + +_cleanup: + not eax +%ifidn __OUTPUT_FORMAT__, win64 + movdqa xmm6, [rsp + XMM_SAVE + 16*0] + movdqa xmm7, [rsp + XMM_SAVE + 16*1] + movdqa xmm8, [rsp + XMM_SAVE + 16*2] + movdqa xmm9, [rsp + XMM_SAVE + 16*3] + movdqa xmm10, [rsp + XMM_SAVE + 16*4] + movdqa xmm11, [rsp + XMM_SAVE + 16*5] + movdqa xmm12, [rsp + XMM_SAVE + 16*6] + movdqa xmm13, [rsp + XMM_SAVE + 16*7] +%endif + add rsp,VARIABLE_OFFSET + ret + + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +align 16 +_less_than_256: + + ; check if there is enough buffer to be able to fold 16B at a time + cmp arg3, 32 + jl _less_than_32 + movdqa xmm11, [SHUF_MASK] + + ; if there is, load the constants + movdqa xmm10, [rk1] ; rk1 and rk2 in xmm10 + + movd xmm0, arg1_low32 ; get the initial crc value + pslldq xmm0, 12 ; align it to its correct place + movdqu xmm7, [arg2] ; load the plaintext + pshufb xmm7, xmm11 ; byte-reflect the plaintext + pxor xmm7, xmm0 + + + ; update the buffer pointer + add arg2, 16 + + ; update the counter. subtract 32 instead of 16 to save one instruction from the loop + sub arg3, 32 + + jmp _16B_reduction_loop + + +align 16 +_less_than_32: + ; mov initial crc to the return value. this is necessary for zero-length buffers. + mov eax, arg1_low32 + test arg3, arg3 + je _cleanup + + movdqa xmm11, [SHUF_MASK] + + movd xmm0, arg1_low32 ; get the initial crc value + pslldq xmm0, 12 ; align it to its correct place + + cmp arg3, 16 + je _exact_16_left + jl _less_than_16_left + + movdqu xmm7, [arg2] ; load the plaintext + pshufb xmm7, xmm11 ; byte-reflect the plaintext + pxor xmm7, xmm0 ; xor the initial crc value + add arg2, 16 + sub arg3, 16 + movdqa xmm10, [rk1] ; rk1 and rk2 in xmm10 + jmp _get_last_two_xmms + + +align 16 +_less_than_16_left: + ; use stack space to load data less than 16 bytes, zero-out the 16B in memory first. + + pxor xmm1, xmm1 + mov r11, rsp + movdqa [r11], xmm1 + + cmp arg3, 4 + jl _only_less_than_4 + + ; backup the counter value + mov r9, arg3 + cmp arg3, 8 + jl _less_than_8_left + + ; load 8 Bytes + mov rax, [arg2] + mov [r11], rax + add r11, 8 + sub arg3, 8 + add arg2, 8 +_less_than_8_left: + + cmp arg3, 4 + jl _less_than_4_left + + ; load 4 Bytes + mov eax, [arg2] + mov [r11], eax + add r11, 4 + sub arg3, 4 + add arg2, 4 +_less_than_4_left: + + cmp arg3, 2 + jl _less_than_2_left + + ; load 2 Bytes + mov ax, [arg2] + mov [r11], ax + add r11, 2 + sub arg3, 2 + add arg2, 2 +_less_than_2_left: + cmp arg3, 1 + jl _zero_left + + ; load 1 Byte + mov al, [arg2] + mov [r11], al +_zero_left: + movdqa xmm7, [rsp] + pshufb xmm7, xmm11 + pxor xmm7, xmm0 ; xor the initial crc value + + ; shl r9, 4 + lea rax, [pshufb_shf_table + 16] + sub rax, r9 + movdqu xmm0, [rax] + pxor xmm0, [mask1] + + pshufb xmm7, xmm0 + jmp _128_done + +align 16 +_exact_16_left: + movdqu xmm7, [arg2] + pshufb xmm7, xmm11 + pxor xmm7, xmm0 ; xor the initial crc value + + jmp _128_done + +_only_less_than_4: + cmp arg3, 3 + jl _only_less_than_3 + + ; load 3 Bytes + mov al, [arg2] + mov [r11], al + + mov al, [arg2+1] + mov [r11+1], al + + mov al, [arg2+2] + mov [r11+2], al + + movdqa xmm7, [rsp] + pshufb xmm7, xmm11 + pxor xmm7, xmm0 ; xor the initial crc value + + psrldq xmm7, 5 + + jmp _barrett +_only_less_than_3: + cmp arg3, 2 + jl _only_less_than_2 + + ; load 2 Bytes + mov al, [arg2] + mov [r11], al + + mov al, [arg2+1] + mov [r11+1], al + + movdqa xmm7, [rsp] + pshufb xmm7, xmm11 + pxor xmm7, xmm0 ; xor the initial crc value + + psrldq xmm7, 6 + + jmp _barrett +_only_less_than_2: + + ; load 1 Byte + mov al, [arg2] + mov [r11], al + + movdqa xmm7, [rsp] + pshufb xmm7, xmm11 + pxor xmm7, xmm0 ; xor the initial crc value + + psrldq xmm7, 7 + + jmp _barrett + +section .data + +; precomputed constants +align 16 + +rk1 : +DQ 0xf200aa6600000000 +rk2 : +DQ 0x17d3315d00000000 +rk3 : +DQ 0x022ffca500000000 +rk4 : +DQ 0x9d9ee22f00000000 +rk5 : +DQ 0xf200aa6600000000 +rk6 : +DQ 0x490d678d00000000 +rk7 : +DQ 0x0000000104d101df +rk8 : +DQ 0x0000000104c11db7 +rk9 : +DQ 0x6ac7e7d700000000 +rk10 : +DQ 0xfcd922af00000000 +rk11 : +DQ 0x34e45a6300000000 +rk12 : +DQ 0x8762c1f600000000 +rk13 : +DQ 0x5395a0ea00000000 +rk14 : +DQ 0x54f2d5c700000000 +rk15 : +DQ 0xd3504ec700000000 +rk16 : +DQ 0x57a8445500000000 +rk17 : +DQ 0xc053585d00000000 +rk18 : +DQ 0x766f1b7800000000 +rk19 : +DQ 0xcd8c54b500000000 +rk20 : +DQ 0xab40b71e00000000 + + + + + + + + + +mask1: +dq 0x8080808080808080, 0x8080808080808080 +mask2: +dq 0xFFFFFFFFFFFFFFFF, 0x00000000FFFFFFFF + +SHUF_MASK: +dq 0x08090A0B0C0D0E0F, 0x0001020304050607 + +pshufb_shf_table: +; use these values for shift constants for the pshufb instruction +; different alignments result in values as shown: +; dq 0x8887868584838281, 0x008f8e8d8c8b8a89 ; shl 15 (16-1) / shr1 +; dq 0x8988878685848382, 0x01008f8e8d8c8b8a ; shl 14 (16-3) / shr2 +; dq 0x8a89888786858483, 0x0201008f8e8d8c8b ; shl 13 (16-4) / shr3 +; dq 0x8b8a898887868584, 0x030201008f8e8d8c ; shl 12 (16-4) / shr4 +; dq 0x8c8b8a8988878685, 0x04030201008f8e8d ; shl 11 (16-5) / shr5 +; dq 0x8d8c8b8a89888786, 0x0504030201008f8e ; shl 10 (16-6) / shr6 +; dq 0x8e8d8c8b8a898887, 0x060504030201008f ; shl 9 (16-7) / shr7 +; dq 0x8f8e8d8c8b8a8988, 0x0706050403020100 ; shl 8 (16-8) / shr8 +; dq 0x008f8e8d8c8b8a89, 0x0807060504030201 ; shl 7 (16-9) / shr9 +; dq 0x01008f8e8d8c8b8a, 0x0908070605040302 ; shl 6 (16-10) / shr10 +; dq 0x0201008f8e8d8c8b, 0x0a09080706050403 ; shl 5 (16-11) / shr11 +; dq 0x030201008f8e8d8c, 0x0b0a090807060504 ; shl 4 (16-12) / shr12 +; dq 0x04030201008f8e8d, 0x0c0b0a0908070605 ; shl 3 (16-13) / shr13 +; dq 0x0504030201008f8e, 0x0d0c0b0a09080706 ; shl 2 (16-14) / shr14 +; dq 0x060504030201008f, 0x0e0d0c0b0a090807 ; shl 1 (16-15) / shr15 +dq 0x8786858483828100, 0x8f8e8d8c8b8a8988 +dq 0x0706050403020100, 0x000e0d0c0b0a0908 + +;;; func core, ver, snum +slversion crc32_ieee_01, 01, 06, 0011 + diff --git a/src/spdk/isa-l/crc/crc32_ieee_by4.asm b/src/spdk/isa-l/crc/crc32_ieee_by4.asm new file mode 100644 index 000000000..39bed5a95 --- /dev/null +++ b/src/spdk/isa-l/crc/crc32_ieee_by4.asm @@ -0,0 +1,565 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2015 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; +; Function API: +; UINT32 crc32_ieee_by4( +; UINT32 init_crc, //initial CRC value, 32 bits +; const unsigned char *buf, //buffer pointer to calculate CRC on +; UINT64 len //buffer length in bytes (64-bit data) +; ); +; +; Authors: +; Erdinc Ozturk +; Vinodh Gopal +; James Guilford +; +; Reference paper titled "Fast CRC Computation for Generic Polynomials Using PCLMULQDQ Instruction" +; URL: http://download.intel.com/design/intarch/papers/323102.pdf +; + +%include "reg_sizes.asm" + +%define fetch_dist 1024 + +[bits 64] +default rel + +section .text + +%ifidn __OUTPUT_FORMAT__, win64 + %xdefine arg1 rcx + %xdefine arg2 rdx + %xdefine arg3 r8 + + %xdefine arg1_low32 ecx +%else + %xdefine arg1 rdi + %xdefine arg2 rsi + %xdefine arg3 rdx + + %xdefine arg1_low32 edi +%endif + +%ifidn __OUTPUT_FORMAT__, win64 + %define XMM_SAVE 16*2 + %define VARIABLE_OFFSET 16*4+8 +%else + %define VARIABLE_OFFSET 16*2+8 +%endif + +align 16 +global crc32_ieee_by4:ISAL_SYM_TYPE_FUNCTION +crc32_ieee_by4: + + not arg1_low32 + + sub rsp,VARIABLE_OFFSET + +%ifidn __OUTPUT_FORMAT__, win64 + ; push the xmm registers into the stack to maintain + movdqa [rsp + XMM_SAVE + 16*0],xmm6 + movdqa [rsp + XMM_SAVE + 16*1],xmm7 +%endif + + ; check if smaller than 128B + cmp arg3, 128 + jl _less_than_128 + + + + ; load the initial crc value + movd xmm6, arg1_low32 ; initial crc + ; crc value does not need to be byte-reflected, but it needs to be + ; moved to the high part of the register. + ; because data will be byte-reflected and will align with initial + ; crc at correct place. + pslldq xmm6, 12 + + + + movdqa xmm7, [SHUF_MASK] + ; receive the initial 64B data, xor the initial crc value + movdqu xmm0, [arg2] + movdqu xmm1, [arg2+16] + movdqu xmm2, [arg2+32] + movdqu xmm3, [arg2+48] + + + + pshufb xmm0, xmm7 + ; XOR the initial_crc value + pxor xmm0, xmm6 + pshufb xmm1, xmm7 + pshufb xmm2, xmm7 + pshufb xmm3, xmm7 + + movdqa xmm6, [rk3] ; k3=2^480 mod POLY << 32 + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ;we subtract 128 instead of 64 to save one instruction from the loop + sub arg3, 128 + + ; at this section of the code, there is 64*x+y (0<=y<64) bytes of + ; buffer. The _fold_64_B_loop loop will fold 64B at a time until we + ; have 64+y Bytes of buffer + + + ; fold 64B at a time. This section of the code folds 4 xmm registers in parallel +_fold_64_B_loop: + + ;update the buffer pointer + add arg2, 64 + + prefetchnta [arg2+fetch_dist+0] + movdqa xmm4, xmm0 + movdqa xmm5, xmm1 + + pclmulqdq xmm0, xmm6 , 0x11 + pclmulqdq xmm1, xmm6 , 0x11 + + pclmulqdq xmm4, xmm6, 0x0 + pclmulqdq xmm5, xmm6, 0x0 + + pxor xmm0, xmm4 + pxor xmm1, xmm5 + + prefetchnta [arg2+fetch_dist+32] + movdqa xmm4, xmm2 + movdqa xmm5, xmm3 + + pclmulqdq xmm2, xmm6, 0x11 + pclmulqdq xmm3, xmm6, 0x11 + + pclmulqdq xmm4, xmm6, 0x0 + pclmulqdq xmm5, xmm6, 0x0 + + pxor xmm2, xmm4 + pxor xmm3, xmm5 + + movdqu xmm4, [arg2] + movdqu xmm5, [arg2+16] + pshufb xmm4, xmm7 + pshufb xmm5, xmm7 + pxor xmm0, xmm4 + pxor xmm1, xmm5 + + movdqu xmm4, [arg2+32] + movdqu xmm5, [arg2+48] + pshufb xmm4, xmm7 + pshufb xmm5, xmm7 + + pxor xmm2, xmm4 + pxor xmm3, xmm5 + + sub arg3, 64 + + ; check if there is another 64B in the buffer to be able to fold + jge _fold_64_B_loop + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + + add arg2, 64 + ;at this point, the arg2 is pointing at the last y Bytes of the buffer + ; the 64B of data is in 4 of the xmm registers: xmm0, xmm1, xmm2, xmm3 + + + movdqa xmm6, [rk1] ;k1 + + ; fold the 4 xmm registers to 1 xmm register with different constants + movdqa xmm4, xmm0 + pclmulqdq xmm0, xmm6, 0x11 + pclmulqdq xmm4, xmm6, 0x0 + pxor xmm1, xmm4 + xorps xmm1, xmm0 + + movdqa xmm4, xmm1 + pclmulqdq xmm1, xmm6, 0x11 + pclmulqdq xmm4, xmm6, 0x0 + pxor xmm2, xmm4 + xorps xmm2, xmm1 + + movdqa xmm4, xmm2 + pclmulqdq xmm2, xmm6, 0x11 + pclmulqdq xmm4, xmm6, 0x0 + pxor xmm3, xmm4 + pxor xmm3, xmm2 + + + ;instead of 64, we add 48 to the loop counter to save 1 instruction from the loop + ; instead of a cmp instruction, we use the negative flag with the jl instruction + add arg3, 64-16 + jl _final_reduction_for_128 + +; now we have 16+y bytes left to reduce. 16 Bytes is in register xmm3 and the rest is in memory +; we can fold 16 bytes at a time if y>=16 +; continue folding 16B at a time + +_16B_reduction_loop: + movdqa xmm4, xmm3 + pclmulqdq xmm3, xmm6, 0x11 + pclmulqdq xmm4, xmm6, 0x0 + pxor xmm3, xmm4 + movdqu xmm0, [arg2] + pshufb xmm0, xmm7 + pxor xmm3, xmm0 + add arg2, 16 + sub arg3, 16 + ; instead of a cmp instruction, we utilize the flags with the jge instruction + ; equivalent of: cmp arg3, 16-16 + ; check if there is any more 16B in the buffer to be able to fold + jge _16B_reduction_loop + + ;now we have 16+z bytes left to reduce, where 0<= z < 16. + ;first, we reduce the data in the xmm3 register + + + +_final_reduction_for_128: + ; check if any more data to fold. If not, compute the CRC of the final 128 bits + add arg3, 16 + je _128_done + + ; here we are getting data that is less than 16 bytes. + ; since we know that there was data before the pointer, we can offset + ; the input pointer before the actual point, to receive exactly 16 bytes. + ; after that the registers need to be adjusted. +_get_last_two_xmms: + movdqa xmm2, xmm3 + + movdqu xmm1, [arg2 - 16 + arg3] + pshufb xmm1, xmm7 + + shl arg3, 4 + lea rax, [pshufb_shf_table + 15*16] + sub rax, arg3 + movdqu xmm0, [rax] + + pshufb xmm2, xmm0 + + pxor xmm0, [mask3] + + pshufb xmm3, xmm0 + + pblendvb xmm1, xmm2 ;xmm0 is implicit + + movdqa xmm2, xmm1 + + movdqa xmm4, xmm3 + pclmulqdq xmm3, xmm6, 0x11 + + pclmulqdq xmm4, xmm6, 0x0 + pxor xmm3, xmm4 + pxor xmm3, xmm2 + +_128_done: + + movdqa xmm6, [rk5] + movdqa xmm0, xmm3 + + ;64b fold + pclmulqdq xmm3, xmm6, 0x1 + pslldq xmm0, 8 + pxor xmm3, xmm0 + + ;32b fold + movdqa xmm0, xmm3 + + pand xmm0, [mask4] + + psrldq xmm3, 12 + pclmulqdq xmm3, xmm6, 0x10 + pxor xmm3, xmm0 + + ;barrett reduction +_barrett: + movdqa xmm6, [rk7] + movdqa xmm0, xmm3 + pclmulqdq xmm3, xmm6, 0x01 + pslldq xmm3, 4 + pclmulqdq xmm3, xmm6, 0x11 + + pslldq xmm3, 4 + pxor xmm3, xmm0 + pextrd eax, xmm3,1 + +_cleanup: + not eax +%ifidn __OUTPUT_FORMAT__, win64 + movdqa xmm6, [rsp + XMM_SAVE + 16*0] + movdqa xmm7, [rsp + XMM_SAVE + 16*1] +%endif + add rsp,VARIABLE_OFFSET + + + ret + + + + + + + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +align 16 +_less_than_128: + + ;check if there is enough buffer to be able to fold 16B at a time + cmp arg3, 32 + jl _less_than_32 + movdqa xmm7, [SHUF_MASK] + + ;if there is, load the constants + movdqa xmm6, [rk1] ;k1 + + movd xmm0, arg1_low32 + pslldq xmm0, 12 + movdqu xmm3, [arg2] + pshufb xmm3, xmm7 + pxor xmm3, xmm0 + + + ;update the buffer pointer + add arg2, 16 + + ;update the counter. subtract 32 instead of 16 to save one instruction from the loop + sub arg3, 32 + + jmp _16B_reduction_loop + + +align 16 +_less_than_32: + mov eax, arg1_low32 + test arg3, arg3 + je _cleanup + + movdqa xmm7, [SHUF_MASK] + + movd xmm0, arg1_low32 + pslldq xmm0, 12 + + cmp arg3, 16 + je _exact_16_left + jl _less_than_16_left + movd xmm0, arg1_low32 + pslldq xmm0, 12 + movdqu xmm3, [arg2] + pshufb xmm3, xmm7 + pxor xmm3, xmm0 + add arg2, 16 + sub arg3, 16 + movdqa xmm6, [rk1] ;k1 + jmp _get_last_two_xmms + + +align 16 +_less_than_16_left: + ; use stack space to load data less than 16 bytes, zero-out the 16B in memory first. + + pxor xmm1, xmm1 + mov r11, rsp + movdqa [r11], xmm1 + + + + cmp arg3, 4 + jl _only_less_than_4 + + mov r9, arg3 + + + cmp arg3, 8 + jl _less_than_8_left + mov rax, [arg2] + mov [r11], rax + add r11, 8 + sub arg3, 8 + add arg2, 8 +_less_than_8_left: + + cmp arg3, 4 + jl _less_than_4_left + mov eax, [arg2] + mov [r11], eax + add r11, 4 + sub arg3, 4 + add arg2, 4 +_less_than_4_left: + + cmp arg3, 2 + jl _less_than_2_left + mov ax, [arg2] + mov [r11], ax + add r11, 2 + sub arg3, 2 + add arg2, 2 +_less_than_2_left: + cmp arg3, 1 + jl _zero_left + + mov al, [arg2] + mov [r11], al + +_zero_left: + movdqa xmm3, [rsp] + pshufb xmm3, xmm7 + pxor xmm3, xmm0 + + shl r9, 4 + lea rax, [pshufb_shf_table + 15*16] + sub rax, r9 + movdqu xmm0, [rax] + pxor xmm0, [mask3] + + pshufb xmm3, xmm0 + jmp _128_done + +align 16 +_exact_16_left: + movdqu xmm3, [arg2] + pshufb xmm3, xmm7 + pxor xmm3, xmm0 + + jmp _128_done + +_only_less_than_4: + cmp arg3, 3 + jl _only_less_than_3 + mov al, [arg2] + mov [r11], al + + mov al, [arg2+1] + mov [r11+1], al + + mov al, [arg2+2] + mov [r11+2], al + + movdqa xmm3, [rsp] + pshufb xmm3, xmm7 + pxor xmm3, xmm0 + + psrldq xmm3, 5 + + jmp _barrett +_only_less_than_3: + cmp arg3, 2 + jl _only_less_than_2 + mov al, [arg2] + mov [r11], al + + mov al, [arg2+1] + mov [r11+1], al + + movdqa xmm3, [rsp] + pshufb xmm3, xmm7 + pxor xmm3, xmm0 + + psrldq xmm3, 6 + + jmp _barrett +_only_less_than_2: + mov al, [arg2] + mov [r11], al + + movdqa xmm3, [rsp] + pshufb xmm3, xmm7 + pxor xmm3, xmm0 + + psrldq xmm3, 7 + + jmp _barrett +; precomputed constants +section .data + +align 16 +rk1: +DQ 0xf200aa6600000000 +rk2: +DQ 0x17d3315d00000000 +rk3: +DQ 0xd3504ec700000000 +rk4: +DQ 0x57a8445500000000 +rk5: +DQ 0xf200aa6600000000 +rk6: +DQ 0x490d678d00000000 +rk7: +DQ 0x0000000104d101df +rk8: +DQ 0x0000000104c11db7 +mask: +dq 0xFFFFFFFFFFFFFFFF, 0x0000000000000000 +mask2: +dq 0xFFFFFFFF00000000, 0xFFFFFFFFFFFFFFFF +mask3: +dq 0x8080808080808080, 0x8080808080808080 +mask4: +dq 0xFFFFFFFFFFFFFFFF, 0x00000000FFFFFFFF + align 32 +pshufb_shf_table: + + dq 0x8887868584838281, 0x008f8e8d8c8b8a89 ; shl 15 (16-1) / shr1 + + dq 0x8988878685848382, 0x01008f8e8d8c8b8a ; shl 14 (16-3) / shr2 + + dq 0x8a89888786858483, 0x0201008f8e8d8c8b ; shl 13 (16-4) / shr3 + + dq 0x8b8a898887868584, 0x030201008f8e8d8c ; shl 12 (16-4) / shr4 + + dq 0x8c8b8a8988878685, 0x04030201008f8e8d ; shl 11 (16-5) / shr5 + + dq 0x8d8c8b8a89888786, 0x0504030201008f8e ; shl 10 (16-6) / shr6 + + dq 0x8e8d8c8b8a898887, 0x060504030201008f ; shl 9 (16-7) / shr7 + + dq 0x8f8e8d8c8b8a8988, 0x0706050403020100 ; shl 8 (16-8) / shr8 + + dq 0x008f8e8d8c8b8a89, 0x0807060504030201 ; shl 7 (16-9) / shr9 + + dq 0x01008f8e8d8c8b8a, 0x0908070605040302 ; shl 6 (16-10) / shr10 + + dq 0x0201008f8e8d8c8b, 0x0a09080706050403 ; shl 5 (16-11) / shr11 + + dq 0x030201008f8e8d8c, 0x0b0a090807060504 ; shl 4 (16-12) / shr12 + + dq 0x04030201008f8e8d, 0x0c0b0a0908070605 ; shl 3 (16-13) / shr13 + + dq 0x0504030201008f8e, 0x0d0c0b0a09080706 ; shl 2 (16-14) / shr14 + + dq 0x060504030201008f, 0x0e0d0c0b0a090807 ; shl 1 (16-15) / shr15 + + +SHUF_MASK dq 0x08090A0B0C0D0E0F, 0x0001020304050607 + +;;; func core, ver, snum +slversion crc32_ieee_by4, 05, 02, 0017 diff --git a/src/spdk/isa-l/crc/crc32_ieee_perf.c b/src/spdk/isa-l/crc/crc32_ieee_perf.c new file mode 100644 index 000000000..f6ffbbe44 --- /dev/null +++ b/src/spdk/isa-l/crc/crc32_ieee_perf.c @@ -0,0 +1,79 @@ +/********************************************************************** + Copyright(c) 2011-2015 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#include +#include +#include +#include +#include +#include "crc.h" +#include "test.h" + +//#define CACHED_TEST +#ifdef CACHED_TEST +// Cached test, loop many times over small dataset +# define TEST_LEN 8*1024 +# define TEST_TYPE_STR "_warm" +#else +// Uncached test. Pull from large mem base. +# define GT_L3_CACHE 32*1024*1024 /* some number > last level cache */ +# define TEST_LEN (2 * GT_L3_CACHE) +# define TEST_TYPE_STR "_cold" +#endif + +#ifndef TEST_SEED +# define TEST_SEED 0x1234 +#endif + +#define TEST_MEM TEST_LEN + +int main(int argc, char *argv[]) +{ + void *buf; + uint32_t crc; + struct perf start; + + printf("crc32_ieee_perf:\n"); + + if (posix_memalign(&buf, 1024, TEST_LEN)) { + printf("alloc error: Fail"); + return -1; + } + + printf("Start timed tests\n"); + fflush(0); + + memset(buf, 0, TEST_LEN); + BENCHMARK(&start, BENCHMARK_TIME, crc = crc32_ieee(TEST_SEED, buf, TEST_LEN)); + printf("crc32_ieee" TEST_TYPE_STR ": "); + perf_print(start, (long long)TEST_LEN); + + printf("finish 0x%x\n", crc); + return 0; +} diff --git a/src/spdk/isa-l/crc/crc32_iscsi_00.asm b/src/spdk/isa-l/crc/crc32_iscsi_00.asm new file mode 100644 index 000000000..4f81e3a3b --- /dev/null +++ b/src/spdk/isa-l/crc/crc32_iscsi_00.asm @@ -0,0 +1,671 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2015 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +; Function to compute iscsi CRC32 with table-based recombination +; crc done "by 3" with block sizes 1920, 960, 480, 240 + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +%include "reg_sizes.asm" + +default rel +; crcB3 MACRO to implement crc32 on 3 %%bSize-byte blocks +%macro crcB3 3 +%define %%bSize %1 ; 1/3 of buffer size +%define %%td2 %2 ; table offset for crc0 (2/3 of buffer) +%define %%td1 %3 ; table offset for crc1 (1/3 of buffer) + +%IF %%bSize=640 + sub len, %%bSize*3 + js %%crcB3_end ;; jump to next level if 3*blockSize > len +%ELSE + cmp len, %%bSize*3 + jnae %%crcB3_end ;; jump to next level if 3*blockSize > len +%ENDIF + ;;;;;; Calculate CRC of 3 blocks of the buffer ;;;;;; +%%crcB3_loop: + ;; rax = crc0 = initial crc + xor rbx, rbx ;; rbx = crc1 = 0; + xor r10, r10 ;; r10 = crc2 = 0; + + cmp len, %%bSize*3*2 + jbe %%non_prefetch + + %assign i 0 + %rep %%bSize/8 - 1 + %if i < %%bSize*3/4 + prefetchnta [bufptmp+ %%bSize*3 +i*4] + %endif + crc32 rax, qword [bufptmp+i + 0*%%bSize] ;; update crc0 + crc32 rbx, qword [bufptmp+i + 1*%%bSize] ;; update crc1 + crc32 r10, qword [bufptmp+i + 2*%%bSize] ;; update crc2 + %assign i (i+8) + %endrep + jmp %%next %+ %1 + +%%non_prefetch: + %assign i 0 + %rep %%bSize/8 - 1 + crc32 rax, qword [bufptmp+i + 0*%%bSize] ;; update crc0 + crc32 rbx, qword [bufptmp+i + 1*%%bSize] ;; update crc1 + crc32 r10, qword [bufptmp+i + 2*%%bSize] ;; update crc2 + %assign i (i+8) + %endrep + +%%next %+ %1: + crc32 rax, qword [bufptmp+i + 0*%%bSize] ;; update crc0 + crc32 rbx, qword [bufptmp+i + 1*%%bSize] ;; update crc1 +; SKIP ;crc32 r10, [bufptmp+i + 2*%%bSize] ;; update crc2 + + ; merge in crc0 + movzx bufp_dw, al + mov r9d, [crc_init + bufp*4 + %%td2] + movzx bufp_dw, ah + shr eax, 16 + mov r11d, [crc_init + bufp*4 + %%td2] + shl r11, 8 + xor r9, r11 + + movzx bufp_dw, al + mov r11d, [crc_init + bufp*4 + %%td2] + movzx bufp_dw, ah + shl r11, 16 + xor r9, r11 + mov r11d, [crc_init + bufp*4 + %%td2] + shl r11, 24 + xor r9, r11 + + ; merge in crc1 + + movzx bufp_dw, bl + mov r11d, [crc_init + bufp*4 + %%td1] + movzx bufp_dw, bh + shr ebx, 16 + xor r9, r11 + mov r11d, [crc_init + bufp*4 + %%td1] + shl r11, 8 + xor r9, r11 + + movzx bufp_dw, bl + mov r11d, [crc_init + bufp*4 + %%td1] + movzx bufp_dw, bh + shl r11, 16 + xor r9, r11 + mov r11d, [crc_init + bufp*4 + %%td1] + shl r11, 24 + xor r9, r11 + + xor r9, [bufptmp+i + 2*%%bSize] + crc32 r10, r9 + mov rax, r10 + + add bufptmp, %%bSize*3 ;; move to next block + sub len, %%bSize*3 +%IF %%bSize=640 + jns %%crcB3_loop +%ENDIF + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +%%crcB3_end: +%IF %%bSize=640 + add len, %%bSize*3 +%ENDIF + je do_return ;; return if remaining data is zero +%endmacro + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;;; ISCSI CRC 32 Implementation with crc32 Instruction + +;;; unsigned int crc32_iscsi_00(unsigned char * buffer, int len, unsigned int crc_init); +;;; +;;; *buf = rcx +;;; len = rdx +;;; crc_init = r8 +;;; + +global crc32_iscsi_00:ISAL_SYM_TYPE_FUNCTION +crc32_iscsi_00: + +%ifidn __OUTPUT_FORMAT__, elf64 +%define bufp rdi +%define bufp_dw edi +%define bufp_w di +%define bufp_b dil +%define bufptmp rcx +%define block_0 rcx +%define block_1 r8 +%define block_2 r11 +%define len rsi +%define len_dw esi +%define len_w si +%define len_b sil +%define crc_init rdx +%define crc_init_dw edx +%else +%define bufp rcx +%define bufp_dw ecx +%define bufp_w cx +%define bufp_b cl +%define bufptmp rdi +%define block_0 rdi +%define block_1 rsi +%define block_2 r11 +%define len rdx +%define len_dw edx +%define len_w dx +%define len_b dl +%define crc_init r8 +%define crc_init_dw r8d +%endif + + + push rdi + push rbx + + mov rax, crc_init ;; rax = crc_init; + + cmp len, 8 + jb less_than_8 + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; 1) ALIGN: ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + mov bufptmp, bufp ;; rdi = *buf + neg bufp + and bufp, 7 ;; calculate the unalignment + ;; amount of the address + je proc_block ;; Skip if aligned + + ;;;; Calculate CRC of unaligned bytes of the buffer (if any) ;;;; + mov rbx, [bufptmp] ;; load a quadword from the buffer + add bufptmp, bufp ;; align buffer pointer for + ;; quadword processing + sub len, bufp ;; update buffer length +align_loop: + crc32 eax, bl ;; compute crc32 of 1-byte + shr rbx, 8 ;; get next byte + dec bufp + jne align_loop + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; 2) BLOCK LEVEL: ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +proc_block: + cmp len, 240 + jb bit8 + + lea crc_init, [mul_table_72] ;; load table base address + + crcB3 640, 0x1000, 0x0c00 ; 640*3 = 1920 (Tables 1280, 640) + crcB3 320, 0x0c00, 0x0800 ; 320*3 = 960 (Tables 640, 320) + crcB3 160, 0x0800, 0x0400 ; 160*3 = 480 (Tables 320, 160) + crcB3 80, 0x0400, 0x0000 ; 80*3 = 240 (Tables 160, 80) + + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;4) LESS THAN 256-bytes REMAIN AT THIS POINT (8-bits of rdx are full) + +bit8: + shl len_b, 1 ;; shift-out MSB (bit-7) + jnc bit7 ;; jump to bit-6 if bit-7 == 0 + %assign i 0 + %rep 16 + crc32 rax, qword [bufptmp+i] ;; compute crc32 of 8-byte data + %assign i (i+8) + %endrep + je do_return ;; return if remaining data is zero + add bufptmp, 128 ;; buf +=64; (next 64 bytes) + +bit7: + shl len_b, 1 ;; shift-out MSB (bit-7) + jnc bit6 ;; jump to bit-6 if bit-7 == 0 + %assign i 0 + %rep 8 + crc32 rax, qword [bufptmp+i] ;; compute crc32 of 8-byte data + %assign i (i+8) + %endrep + je do_return ;; return if remaining data is zero + add bufptmp, 64 ;; buf +=64; (next 64 bytes) +bit6: + shl len_b, 1 ;; shift-out MSB (bit-6) + jnc bit5 ;; jump to bit-5 if bit-6 == 0 + %assign i 0 + %rep 4 + crc32 rax, qword [bufptmp+i] ;; compute crc32 of 8-byte data + %assign i (i+8) + %endrep + je do_return ;; return if remaining data is zero + add bufptmp, 32 ;; buf +=32; (next 32 bytes) +bit5: + shl len_b, 1 ;; shift-out MSB (bit-5) + jnc bit4 ;; jump to bit-4 if bit-5 == 0 + %assign i 0 + %rep 2 + crc32 rax, qword [bufptmp+i] ;; compute crc32 of 8-byte data + %assign i (i+8) + %endrep + je do_return ;; return if remaining data is zero + add bufptmp, 16 ;; buf +=16; (next 16 bytes) +bit4: + shl len_b, 1 ;; shift-out MSB (bit-4) + jnc bit3 ;; jump to bit-3 if bit-4 == 0 + crc32 rax, qword [bufptmp] ;; compute crc32 of 8-byte data + je do_return ;; return if remaining data is zero + add bufptmp, 8 ;; buf +=8; (next 8 bytes) +bit3: + mov rbx, qword [bufptmp] ;; load a 8-bytes from the buffer: + shl len_b, 1 ;; shift-out MSB (bit-3) + jnc bit2 ;; jump to bit-2 if bit-3 == 0 + crc32 eax, ebx ;; compute crc32 of 4-byte data + je do_return ;; return if remaining data is zero + shr rbx, 32 ;; get next 3 bytes +bit2: + shl len_b, 1 ;; shift-out MSB (bit-2) + jnc bit1 ;; jump to bit-1 if bit-2 == 0 + crc32 eax, bx ;; compute crc32 of 2-byte data + je do_return ;; return if remaining data is zero + shr rbx, 16 ;; next byte +bit1: + test len_b,len_b + je do_return + crc32 eax, bl ;; compute crc32 of 1-byte data +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +do_return: + + pop rbx + pop rdi + ret + +less_than_8: + test len,4 + jz less_than_4 + crc32 eax, dword[bufp] + add bufp,4 +less_than_4: + test len,2 + jz less_than_2 + crc32 eax, word[bufp] + add bufp,2 +less_than_2: + test len,1 + jz do_return + crc32 rax, byte[bufp] + pop rbx + pop bufp + ret + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;;; global mul_table_72, mul_table_152, mul_table_312, mul_table_632, mul_table_1272 + +section .data +align 8 +mul_table_72: +DD 0x00000000,0x39d3b296,0x73a7652c,0x4a74d7ba +DD 0xe74eca58,0xde9d78ce,0x94e9af74,0xad3a1de2 +DD 0xcb71e241,0xf2a250d7,0xb8d6876d,0x810535fb +DD 0x2c3f2819,0x15ec9a8f,0x5f984d35,0x664bffa3 +DD 0x930fb273,0xaadc00e5,0xe0a8d75f,0xd97b65c9 +DD 0x7441782b,0x4d92cabd,0x07e61d07,0x3e35af91 +DD 0x587e5032,0x61ade2a4,0x2bd9351e,0x120a8788 +DD 0xbf309a6a,0x86e328fc,0xcc97ff46,0xf5444dd0 +DD 0x23f31217,0x1a20a081,0x5054773b,0x6987c5ad +DD 0xc4bdd84f,0xfd6e6ad9,0xb71abd63,0x8ec90ff5 +DD 0xe882f056,0xd15142c0,0x9b25957a,0xa2f627ec +DD 0x0fcc3a0e,0x361f8898,0x7c6b5f22,0x45b8edb4 +DD 0xb0fca064,0x892f12f2,0xc35bc548,0xfa8877de +DD 0x57b26a3c,0x6e61d8aa,0x24150f10,0x1dc6bd86 +DD 0x7b8d4225,0x425ef0b3,0x082a2709,0x31f9959f +DD 0x9cc3887d,0xa5103aeb,0xef64ed51,0xd6b75fc7 +DD 0x47e6242e,0x7e3596b8,0x34414102,0x0d92f394 +DD 0xa0a8ee76,0x997b5ce0,0xd30f8b5a,0xeadc39cc +DD 0x8c97c66f,0xb54474f9,0xff30a343,0xc6e311d5 +DD 0x6bd90c37,0x520abea1,0x187e691b,0x21addb8d +DD 0xd4e9965d,0xed3a24cb,0xa74ef371,0x9e9d41e7 +DD 0x33a75c05,0x0a74ee93,0x40003929,0x79d38bbf +DD 0x1f98741c,0x264bc68a,0x6c3f1130,0x55eca3a6 +DD 0xf8d6be44,0xc1050cd2,0x8b71db68,0xb2a269fe +DD 0x64153639,0x5dc684af,0x17b25315,0x2e61e183 +DD 0x835bfc61,0xba884ef7,0xf0fc994d,0xc92f2bdb +DD 0xaf64d478,0x96b766ee,0xdcc3b154,0xe51003c2 +DD 0x482a1e20,0x71f9acb6,0x3b8d7b0c,0x025ec99a +DD 0xf71a844a,0xcec936dc,0x84bde166,0xbd6e53f0 +DD 0x10544e12,0x2987fc84,0x63f32b3e,0x5a2099a8 +DD 0x3c6b660b,0x05b8d49d,0x4fcc0327,0x761fb1b1 +DD 0xdb25ac53,0xe2f61ec5,0xa882c97f,0x91517be9 +DD 0x8fcc485c,0xb61ffaca,0xfc6b2d70,0xc5b89fe6 +DD 0x68828204,0x51513092,0x1b25e728,0x22f655be +DD 0x44bdaa1d,0x7d6e188b,0x371acf31,0x0ec97da7 +DD 0xa3f36045,0x9a20d2d3,0xd0540569,0xe987b7ff +DD 0x1cc3fa2f,0x251048b9,0x6f649f03,0x56b72d95 +DD 0xfb8d3077,0xc25e82e1,0x882a555b,0xb1f9e7cd +DD 0xd7b2186e,0xee61aaf8,0xa4157d42,0x9dc6cfd4 +DD 0x30fcd236,0x092f60a0,0x435bb71a,0x7a88058c +DD 0xac3f5a4b,0x95ece8dd,0xdf983f67,0xe64b8df1 +DD 0x4b719013,0x72a22285,0x38d6f53f,0x010547a9 +DD 0x674eb80a,0x5e9d0a9c,0x14e9dd26,0x2d3a6fb0 +DD 0x80007252,0xb9d3c0c4,0xf3a7177e,0xca74a5e8 +DD 0x3f30e838,0x06e35aae,0x4c978d14,0x75443f82 +DD 0xd87e2260,0xe1ad90f6,0xabd9474c,0x920af5da +DD 0xf4410a79,0xcd92b8ef,0x87e66f55,0xbe35ddc3 +DD 0x130fc021,0x2adc72b7,0x60a8a50d,0x597b179b +DD 0xc82a6c72,0xf1f9dee4,0xbb8d095e,0x825ebbc8 +DD 0x2f64a62a,0x16b714bc,0x5cc3c306,0x65107190 +DD 0x035b8e33,0x3a883ca5,0x70fceb1f,0x492f5989 +DD 0xe415446b,0xddc6f6fd,0x97b22147,0xae6193d1 +DD 0x5b25de01,0x62f66c97,0x2882bb2d,0x115109bb +DD 0xbc6b1459,0x85b8a6cf,0xcfcc7175,0xf61fc3e3 +DD 0x90543c40,0xa9878ed6,0xe3f3596c,0xda20ebfa +DD 0x771af618,0x4ec9448e,0x04bd9334,0x3d6e21a2 +DD 0xebd97e65,0xd20accf3,0x987e1b49,0xa1ada9df +DD 0x0c97b43d,0x354406ab,0x7f30d111,0x46e36387 +DD 0x20a89c24,0x197b2eb2,0x530ff908,0x6adc4b9e +DD 0xc7e6567c,0xfe35e4ea,0xb4413350,0x8d9281c6 +DD 0x78d6cc16,0x41057e80,0x0b71a93a,0x32a21bac +DD 0x9f98064e,0xa64bb4d8,0xec3f6362,0xd5ecd1f4 +DD 0xb3a72e57,0x8a749cc1,0xc0004b7b,0xf9d3f9ed +DD 0x54e9e40f,0x6d3a5699,0x274e8123,0x1e9d33b5 + +mul_table_152: +DD 0x00000000,0x878a92a7,0x0af953bf,0x8d73c118 +DD 0x15f2a77e,0x927835d9,0x1f0bf4c1,0x98816666 +DD 0x2be54efc,0xac6fdc5b,0x211c1d43,0xa6968fe4 +DD 0x3e17e982,0xb99d7b25,0x34eeba3d,0xb364289a +DD 0x57ca9df8,0xd0400f5f,0x5d33ce47,0xdab95ce0 +DD 0x42383a86,0xc5b2a821,0x48c16939,0xcf4bfb9e +DD 0x7c2fd304,0xfba541a3,0x76d680bb,0xf15c121c +DD 0x69dd747a,0xee57e6dd,0x632427c5,0xe4aeb562 +DD 0xaf953bf0,0x281fa957,0xa56c684f,0x22e6fae8 +DD 0xba679c8e,0x3ded0e29,0xb09ecf31,0x37145d96 +DD 0x8470750c,0x03fae7ab,0x8e8926b3,0x0903b414 +DD 0x9182d272,0x160840d5,0x9b7b81cd,0x1cf1136a +DD 0xf85fa608,0x7fd534af,0xf2a6f5b7,0x752c6710 +DD 0xedad0176,0x6a2793d1,0xe75452c9,0x60dec06e +DD 0xd3bae8f4,0x54307a53,0xd943bb4b,0x5ec929ec +DD 0xc6484f8a,0x41c2dd2d,0xccb11c35,0x4b3b8e92 +DD 0x5ac60111,0xdd4c93b6,0x503f52ae,0xd7b5c009 +DD 0x4f34a66f,0xc8be34c8,0x45cdf5d0,0xc2476777 +DD 0x71234fed,0xf6a9dd4a,0x7bda1c52,0xfc508ef5 +DD 0x64d1e893,0xe35b7a34,0x6e28bb2c,0xe9a2298b +DD 0x0d0c9ce9,0x8a860e4e,0x07f5cf56,0x807f5df1 +DD 0x18fe3b97,0x9f74a930,0x12076828,0x958dfa8f +DD 0x26e9d215,0xa16340b2,0x2c1081aa,0xab9a130d +DD 0x331b756b,0xb491e7cc,0x39e226d4,0xbe68b473 +DD 0xf5533ae1,0x72d9a846,0xffaa695e,0x7820fbf9 +DD 0xe0a19d9f,0x672b0f38,0xea58ce20,0x6dd25c87 +DD 0xdeb6741d,0x593ce6ba,0xd44f27a2,0x53c5b505 +DD 0xcb44d363,0x4cce41c4,0xc1bd80dc,0x4637127b +DD 0xa299a719,0x251335be,0xa860f4a6,0x2fea6601 +DD 0xb76b0067,0x30e192c0,0xbd9253d8,0x3a18c17f +DD 0x897ce9e5,0x0ef67b42,0x8385ba5a,0x040f28fd +DD 0x9c8e4e9b,0x1b04dc3c,0x96771d24,0x11fd8f83 +DD 0xb58c0222,0x32069085,0xbf75519d,0x38ffc33a +DD 0xa07ea55c,0x27f437fb,0xaa87f6e3,0x2d0d6444 +DD 0x9e694cde,0x19e3de79,0x94901f61,0x131a8dc6 +DD 0x8b9beba0,0x0c117907,0x8162b81f,0x06e82ab8 +DD 0xe2469fda,0x65cc0d7d,0xe8bfcc65,0x6f355ec2 +DD 0xf7b438a4,0x703eaa03,0xfd4d6b1b,0x7ac7f9bc +DD 0xc9a3d126,0x4e294381,0xc35a8299,0x44d0103e +DD 0xdc517658,0x5bdbe4ff,0xd6a825e7,0x5122b740 +DD 0x1a1939d2,0x9d93ab75,0x10e06a6d,0x976af8ca +DD 0x0feb9eac,0x88610c0b,0x0512cd13,0x82985fb4 +DD 0x31fc772e,0xb676e589,0x3b052491,0xbc8fb636 +DD 0x240ed050,0xa38442f7,0x2ef783ef,0xa97d1148 +DD 0x4dd3a42a,0xca59368d,0x472af795,0xc0a06532 +DD 0x58210354,0xdfab91f3,0x52d850eb,0xd552c24c +DD 0x6636ead6,0xe1bc7871,0x6ccfb969,0xeb452bce +DD 0x73c44da8,0xf44edf0f,0x793d1e17,0xfeb78cb0 +DD 0xef4a0333,0x68c09194,0xe5b3508c,0x6239c22b +DD 0xfab8a44d,0x7d3236ea,0xf041f7f2,0x77cb6555 +DD 0xc4af4dcf,0x4325df68,0xce561e70,0x49dc8cd7 +DD 0xd15deab1,0x56d77816,0xdba4b90e,0x5c2e2ba9 +DD 0xb8809ecb,0x3f0a0c6c,0xb279cd74,0x35f35fd3 +DD 0xad7239b5,0x2af8ab12,0xa78b6a0a,0x2001f8ad +DD 0x9365d037,0x14ef4290,0x999c8388,0x1e16112f +DD 0x86977749,0x011de5ee,0x8c6e24f6,0x0be4b651 +DD 0x40df38c3,0xc755aa64,0x4a266b7c,0xcdacf9db +DD 0x552d9fbd,0xd2a70d1a,0x5fd4cc02,0xd85e5ea5 +DD 0x6b3a763f,0xecb0e498,0x61c32580,0xe649b727 +DD 0x7ec8d141,0xf94243e6,0x743182fe,0xf3bb1059 +DD 0x1715a53b,0x909f379c,0x1decf684,0x9a666423 +DD 0x02e70245,0x856d90e2,0x081e51fa,0x8f94c35d +DD 0x3cf0ebc7,0xbb7a7960,0x3609b878,0xb1832adf +DD 0x29024cb9,0xae88de1e,0x23fb1f06,0xa4718da1 + +mul_table_312: +DD 0x00000000,0xbac2fd7b,0x70698c07,0xcaab717c +DD 0xe0d3180e,0x5a11e575,0x90ba9409,0x2a786972 +DD 0xc44a46ed,0x7e88bb96,0xb423caea,0x0ee13791 +DD 0x24995ee3,0x9e5ba398,0x54f0d2e4,0xee322f9f +DD 0x8d78fb2b,0x37ba0650,0xfd11772c,0x47d38a57 +DD 0x6dabe325,0xd7691e5e,0x1dc26f22,0xa7009259 +DD 0x4932bdc6,0xf3f040bd,0x395b31c1,0x8399ccba +DD 0xa9e1a5c8,0x132358b3,0xd98829cf,0x634ad4b4 +DD 0x1f1d80a7,0xa5df7ddc,0x6f740ca0,0xd5b6f1db +DD 0xffce98a9,0x450c65d2,0x8fa714ae,0x3565e9d5 +DD 0xdb57c64a,0x61953b31,0xab3e4a4d,0x11fcb736 +DD 0x3b84de44,0x8146233f,0x4bed5243,0xf12faf38 +DD 0x92657b8c,0x28a786f7,0xe20cf78b,0x58ce0af0 +DD 0x72b66382,0xc8749ef9,0x02dfef85,0xb81d12fe +DD 0x562f3d61,0xecedc01a,0x2646b166,0x9c844c1d +DD 0xb6fc256f,0x0c3ed814,0xc695a968,0x7c575413 +DD 0x3e3b014e,0x84f9fc35,0x4e528d49,0xf4907032 +DD 0xdee81940,0x642ae43b,0xae819547,0x1443683c +DD 0xfa7147a3,0x40b3bad8,0x8a18cba4,0x30da36df +DD 0x1aa25fad,0xa060a2d6,0x6acbd3aa,0xd0092ed1 +DD 0xb343fa65,0x0981071e,0xc32a7662,0x79e88b19 +DD 0x5390e26b,0xe9521f10,0x23f96e6c,0x993b9317 +DD 0x7709bc88,0xcdcb41f3,0x0760308f,0xbda2cdf4 +DD 0x97daa486,0x2d1859fd,0xe7b32881,0x5d71d5fa +DD 0x212681e9,0x9be47c92,0x514f0dee,0xeb8df095 +DD 0xc1f599e7,0x7b37649c,0xb19c15e0,0x0b5ee89b +DD 0xe56cc704,0x5fae3a7f,0x95054b03,0x2fc7b678 +DD 0x05bfdf0a,0xbf7d2271,0x75d6530d,0xcf14ae76 +DD 0xac5e7ac2,0x169c87b9,0xdc37f6c5,0x66f50bbe +DD 0x4c8d62cc,0xf64f9fb7,0x3ce4eecb,0x862613b0 +DD 0x68143c2f,0xd2d6c154,0x187db028,0xa2bf4d53 +DD 0x88c72421,0x3205d95a,0xf8aea826,0x426c555d +DD 0x7c76029c,0xc6b4ffe7,0x0c1f8e9b,0xb6dd73e0 +DD 0x9ca51a92,0x2667e7e9,0xeccc9695,0x560e6bee +DD 0xb83c4471,0x02feb90a,0xc855c876,0x7297350d +DD 0x58ef5c7f,0xe22da104,0x2886d078,0x92442d03 +DD 0xf10ef9b7,0x4bcc04cc,0x816775b0,0x3ba588cb +DD 0x11dde1b9,0xab1f1cc2,0x61b46dbe,0xdb7690c5 +DD 0x3544bf5a,0x8f864221,0x452d335d,0xffefce26 +DD 0xd597a754,0x6f555a2f,0xa5fe2b53,0x1f3cd628 +DD 0x636b823b,0xd9a97f40,0x13020e3c,0xa9c0f347 +DD 0x83b89a35,0x397a674e,0xf3d11632,0x4913eb49 +DD 0xa721c4d6,0x1de339ad,0xd74848d1,0x6d8ab5aa +DD 0x47f2dcd8,0xfd3021a3,0x379b50df,0x8d59ada4 +DD 0xee137910,0x54d1846b,0x9e7af517,0x24b8086c +DD 0x0ec0611e,0xb4029c65,0x7ea9ed19,0xc46b1062 +DD 0x2a593ffd,0x909bc286,0x5a30b3fa,0xe0f24e81 +DD 0xca8a27f3,0x7048da88,0xbae3abf4,0x0021568f +DD 0x424d03d2,0xf88ffea9,0x32248fd5,0x88e672ae +DD 0xa29e1bdc,0x185ce6a7,0xd2f797db,0x68356aa0 +DD 0x8607453f,0x3cc5b844,0xf66ec938,0x4cac3443 +DD 0x66d45d31,0xdc16a04a,0x16bdd136,0xac7f2c4d +DD 0xcf35f8f9,0x75f70582,0xbf5c74fe,0x059e8985 +DD 0x2fe6e0f7,0x95241d8c,0x5f8f6cf0,0xe54d918b +DD 0x0b7fbe14,0xb1bd436f,0x7b163213,0xc1d4cf68 +DD 0xebaca61a,0x516e5b61,0x9bc52a1d,0x2107d766 +DD 0x5d508375,0xe7927e0e,0x2d390f72,0x97fbf209 +DD 0xbd839b7b,0x07416600,0xcdea177c,0x7728ea07 +DD 0x991ac598,0x23d838e3,0xe973499f,0x53b1b4e4 +DD 0x79c9dd96,0xc30b20ed,0x09a05191,0xb362acea +DD 0xd028785e,0x6aea8525,0xa041f459,0x1a830922 +DD 0x30fb6050,0x8a399d2b,0x4092ec57,0xfa50112c +DD 0x14623eb3,0xaea0c3c8,0x640bb2b4,0xdec94fcf +DD 0xf4b126bd,0x4e73dbc6,0x84d8aaba,0x3e1a57c1 + +mul_table_632: +DD 0x00000000,0x6b749fb2,0xd6e93f64,0xbd9da0d6 +DD 0xa83e0839,0xc34a978b,0x7ed7375d,0x15a3a8ef +DD 0x55906683,0x3ee4f931,0x837959e7,0xe80dc655 +DD 0xfdae6eba,0x96daf108,0x2b4751de,0x4033ce6c +DD 0xab20cd06,0xc05452b4,0x7dc9f262,0x16bd6dd0 +DD 0x031ec53f,0x686a5a8d,0xd5f7fa5b,0xbe8365e9 +DD 0xfeb0ab85,0x95c43437,0x285994e1,0x432d0b53 +DD 0x568ea3bc,0x3dfa3c0e,0x80679cd8,0xeb13036a +DD 0x53adecfd,0x38d9734f,0x8544d399,0xee304c2b +DD 0xfb93e4c4,0x90e77b76,0x2d7adba0,0x460e4412 +DD 0x063d8a7e,0x6d4915cc,0xd0d4b51a,0xbba02aa8 +DD 0xae038247,0xc5771df5,0x78eabd23,0x139e2291 +DD 0xf88d21fb,0x93f9be49,0x2e641e9f,0x4510812d +DD 0x50b329c2,0x3bc7b670,0x865a16a6,0xed2e8914 +DD 0xad1d4778,0xc669d8ca,0x7bf4781c,0x1080e7ae +DD 0x05234f41,0x6e57d0f3,0xd3ca7025,0xb8beef97 +DD 0xa75bd9fa,0xcc2f4648,0x71b2e69e,0x1ac6792c +DD 0x0f65d1c3,0x64114e71,0xd98ceea7,0xb2f87115 +DD 0xf2cbbf79,0x99bf20cb,0x2422801d,0x4f561faf +DD 0x5af5b740,0x318128f2,0x8c1c8824,0xe7681796 +DD 0x0c7b14fc,0x670f8b4e,0xda922b98,0xb1e6b42a +DD 0xa4451cc5,0xcf318377,0x72ac23a1,0x19d8bc13 +DD 0x59eb727f,0x329fedcd,0x8f024d1b,0xe476d2a9 +DD 0xf1d57a46,0x9aa1e5f4,0x273c4522,0x4c48da90 +DD 0xf4f63507,0x9f82aab5,0x221f0a63,0x496b95d1 +DD 0x5cc83d3e,0x37bca28c,0x8a21025a,0xe1559de8 +DD 0xa1665384,0xca12cc36,0x778f6ce0,0x1cfbf352 +DD 0x09585bbd,0x622cc40f,0xdfb164d9,0xb4c5fb6b +DD 0x5fd6f801,0x34a267b3,0x893fc765,0xe24b58d7 +DD 0xf7e8f038,0x9c9c6f8a,0x2101cf5c,0x4a7550ee +DD 0x0a469e82,0x61320130,0xdcafa1e6,0xb7db3e54 +DD 0xa27896bb,0xc90c0909,0x7491a9df,0x1fe5366d +DD 0x4b5bc505,0x202f5ab7,0x9db2fa61,0xf6c665d3 +DD 0xe365cd3c,0x8811528e,0x358cf258,0x5ef86dea +DD 0x1ecba386,0x75bf3c34,0xc8229ce2,0xa3560350 +DD 0xb6f5abbf,0xdd81340d,0x601c94db,0x0b680b69 +DD 0xe07b0803,0x8b0f97b1,0x36923767,0x5de6a8d5 +DD 0x4845003a,0x23319f88,0x9eac3f5e,0xf5d8a0ec +DD 0xb5eb6e80,0xde9ff132,0x630251e4,0x0876ce56 +DD 0x1dd566b9,0x76a1f90b,0xcb3c59dd,0xa048c66f +DD 0x18f629f8,0x7382b64a,0xce1f169c,0xa56b892e +DD 0xb0c821c1,0xdbbcbe73,0x66211ea5,0x0d558117 +DD 0x4d664f7b,0x2612d0c9,0x9b8f701f,0xf0fbefad +DD 0xe5584742,0x8e2cd8f0,0x33b17826,0x58c5e794 +DD 0xb3d6e4fe,0xd8a27b4c,0x653fdb9a,0x0e4b4428 +DD 0x1be8ecc7,0x709c7375,0xcd01d3a3,0xa6754c11 +DD 0xe646827d,0x8d321dcf,0x30afbd19,0x5bdb22ab +DD 0x4e788a44,0x250c15f6,0x9891b520,0xf3e52a92 +DD 0xec001cff,0x8774834d,0x3ae9239b,0x519dbc29 +DD 0x443e14c6,0x2f4a8b74,0x92d72ba2,0xf9a3b410 +DD 0xb9907a7c,0xd2e4e5ce,0x6f794518,0x040ddaaa +DD 0x11ae7245,0x7adaedf7,0xc7474d21,0xac33d293 +DD 0x4720d1f9,0x2c544e4b,0x91c9ee9d,0xfabd712f +DD 0xef1ed9c0,0x846a4672,0x39f7e6a4,0x52837916 +DD 0x12b0b77a,0x79c428c8,0xc459881e,0xaf2d17ac +DD 0xba8ebf43,0xd1fa20f1,0x6c678027,0x07131f95 +DD 0xbfadf002,0xd4d96fb0,0x6944cf66,0x023050d4 +DD 0x1793f83b,0x7ce76789,0xc17ac75f,0xaa0e58ed +DD 0xea3d9681,0x81490933,0x3cd4a9e5,0x57a03657 +DD 0x42039eb8,0x2977010a,0x94eaa1dc,0xff9e3e6e +DD 0x148d3d04,0x7ff9a2b6,0xc2640260,0xa9109dd2 +DD 0xbcb3353d,0xd7c7aa8f,0x6a5a0a59,0x012e95eb +DD 0x411d5b87,0x2a69c435,0x97f464e3,0xfc80fb51 +DD 0xe92353be,0x8257cc0c,0x3fca6cda,0x54bef368 + +mul_table_1272: +DD 0x00000000,0xdd66cbbb,0xbf21e187,0x62472a3c +DD 0x7bafb5ff,0xa6c97e44,0xc48e5478,0x19e89fc3 +DD 0xf75f6bfe,0x2a39a045,0x487e8a79,0x951841c2 +DD 0x8cf0de01,0x519615ba,0x33d13f86,0xeeb7f43d +DD 0xeb52a10d,0x36346ab6,0x5473408a,0x89158b31 +DD 0x90fd14f2,0x4d9bdf49,0x2fdcf575,0xf2ba3ece +DD 0x1c0dcaf3,0xc16b0148,0xa32c2b74,0x7e4ae0cf +DD 0x67a27f0c,0xbac4b4b7,0xd8839e8b,0x05e55530 +DD 0xd34934eb,0x0e2fff50,0x6c68d56c,0xb10e1ed7 +DD 0xa8e68114,0x75804aaf,0x17c76093,0xcaa1ab28 +DD 0x24165f15,0xf97094ae,0x9b37be92,0x46517529 +DD 0x5fb9eaea,0x82df2151,0xe0980b6d,0x3dfec0d6 +DD 0x381b95e6,0xe57d5e5d,0x873a7461,0x5a5cbfda +DD 0x43b42019,0x9ed2eba2,0xfc95c19e,0x21f30a25 +DD 0xcf44fe18,0x122235a3,0x70651f9f,0xad03d424 +DD 0xb4eb4be7,0x698d805c,0x0bcaaa60,0xd6ac61db +DD 0xa37e1f27,0x7e18d49c,0x1c5ffea0,0xc139351b +DD 0xd8d1aad8,0x05b76163,0x67f04b5f,0xba9680e4 +DD 0x542174d9,0x8947bf62,0xeb00955e,0x36665ee5 +DD 0x2f8ec126,0xf2e80a9d,0x90af20a1,0x4dc9eb1a +DD 0x482cbe2a,0x954a7591,0xf70d5fad,0x2a6b9416 +DD 0x33830bd5,0xeee5c06e,0x8ca2ea52,0x51c421e9 +DD 0xbf73d5d4,0x62151e6f,0x00523453,0xdd34ffe8 +DD 0xc4dc602b,0x19baab90,0x7bfd81ac,0xa69b4a17 +DD 0x70372bcc,0xad51e077,0xcf16ca4b,0x127001f0 +DD 0x0b989e33,0xd6fe5588,0xb4b97fb4,0x69dfb40f +DD 0x87684032,0x5a0e8b89,0x3849a1b5,0xe52f6a0e +DD 0xfcc7f5cd,0x21a13e76,0x43e6144a,0x9e80dff1 +DD 0x9b658ac1,0x4603417a,0x24446b46,0xf922a0fd +DD 0xe0ca3f3e,0x3dacf485,0x5febdeb9,0x828d1502 +DD 0x6c3ae13f,0xb15c2a84,0xd31b00b8,0x0e7dcb03 +DD 0x179554c0,0xcaf39f7b,0xa8b4b547,0x75d27efc +DD 0x431048bf,0x9e768304,0xfc31a938,0x21576283 +DD 0x38bffd40,0xe5d936fb,0x879e1cc7,0x5af8d77c +DD 0xb44f2341,0x6929e8fa,0x0b6ec2c6,0xd608097d +DD 0xcfe096be,0x12865d05,0x70c17739,0xada7bc82 +DD 0xa842e9b2,0x75242209,0x17630835,0xca05c38e +DD 0xd3ed5c4d,0x0e8b97f6,0x6cccbdca,0xb1aa7671 +DD 0x5f1d824c,0x827b49f7,0xe03c63cb,0x3d5aa870 +DD 0x24b237b3,0xf9d4fc08,0x9b93d634,0x46f51d8f +DD 0x90597c54,0x4d3fb7ef,0x2f789dd3,0xf21e5668 +DD 0xebf6c9ab,0x36900210,0x54d7282c,0x89b1e397 +DD 0x670617aa,0xba60dc11,0xd827f62d,0x05413d96 +DD 0x1ca9a255,0xc1cf69ee,0xa38843d2,0x7eee8869 +DD 0x7b0bdd59,0xa66d16e2,0xc42a3cde,0x194cf765 +DD 0x00a468a6,0xddc2a31d,0xbf858921,0x62e3429a +DD 0x8c54b6a7,0x51327d1c,0x33755720,0xee139c9b +DD 0xf7fb0358,0x2a9dc8e3,0x48dae2df,0x95bc2964 +DD 0xe06e5798,0x3d089c23,0x5f4fb61f,0x82297da4 +DD 0x9bc1e267,0x46a729dc,0x24e003e0,0xf986c85b +DD 0x17313c66,0xca57f7dd,0xa810dde1,0x7576165a +DD 0x6c9e8999,0xb1f84222,0xd3bf681e,0x0ed9a3a5 +DD 0x0b3cf695,0xd65a3d2e,0xb41d1712,0x697bdca9 +DD 0x7093436a,0xadf588d1,0xcfb2a2ed,0x12d46956 +DD 0xfc639d6b,0x210556d0,0x43427cec,0x9e24b757 +DD 0x87cc2894,0x5aaae32f,0x38edc913,0xe58b02a8 +DD 0x33276373,0xee41a8c8,0x8c0682f4,0x5160494f +DD 0x4888d68c,0x95ee1d37,0xf7a9370b,0x2acffcb0 +DD 0xc478088d,0x191ec336,0x7b59e90a,0xa63f22b1 +DD 0xbfd7bd72,0x62b176c9,0x00f65cf5,0xdd90974e +DD 0xd875c27e,0x051309c5,0x675423f9,0xba32e842 +DD 0xa3da7781,0x7ebcbc3a,0x1cfb9606,0xc19d5dbd +DD 0x2f2aa980,0xf24c623b,0x900b4807,0x4d6d83bc +DD 0x54851c7f,0x89e3d7c4,0xeba4fdf8,0x36c23643 + +;;; func core, ver, snum +slversion crc32_iscsi_00, 00, 04, 0014 + diff --git a/src/spdk/isa-l/crc/crc32_iscsi_01.asm b/src/spdk/isa-l/crc/crc32_iscsi_01.asm new file mode 100644 index 000000000..2a815177a --- /dev/null +++ b/src/spdk/isa-l/crc/crc32_iscsi_01.asm @@ -0,0 +1,590 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2015 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;;; ISCSI CRC 32 Implementation with crc32 and pclmulqdq Instruction + +%include "reg_sizes.asm" + +default rel +%define CONCAT(a,b,c) a %+ b %+ c + +; Define threshold where buffers are considered "small" and routed to more +; efficient "by-1" code. This "by-1" code only handles up to 255 bytes, so +; SMALL_SIZE can be no larger than 256. +%define SMALL_SIZE 200 + +%if (SMALL_SIZE > 256) +%error SMALL_ SIZE must be <= 256 +% error ; needed because '%error' actually generates only a warning +%endif + +;;; unsigned int crc32_iscsi_01(unsigned char * buffer, int len, unsigned int crc_init); +;;; +;;; *buf = rcx +;;; len = rdx +;;; crc_init = r8 + +global crc32_iscsi_01:ISAL_SYM_TYPE_FUNCTION +crc32_iscsi_01: + +%ifidn __OUTPUT_FORMAT__, elf64 +%define bufp rdi +%define bufp_dw edi +%define bufp_w di +%define bufp_b dil +%define bufptmp rcx +%define block_0 rcx +%define block_1 rdx +%define block_2 r11 +%define len rsi +%define len_dw esi +%define len_w si +%define len_b sil +%define crc_init_arg rdx +%else +%define bufp rcx +%define bufp_dw ecx +%define bufp_w cx +%define bufp_b cl +%define bufptmp rdi +%define block_0 rdi +%define block_1 rsi +%define block_2 r11 +%define len rdx +%define len_dw edx +%define len_w dx +%define len_b dl +%endif + +%define tmp rbx +%define crc_init r8 +%define crc_init_dw r8d +%define crc1 r9 +%define crc2 r10 + + push rbx + push rdi + push rsi + + ;; Move crc_init for Linux to a different reg +%ifidn __OUTPUT_FORMAT__, elf64 + mov crc_init, crc_init_arg +%endif + + ;; If len is less than 8 we need to jump to special code to avoid + ;; reading beyond the end of the buffer + cmp len, 8 + jb less_than_8 + + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ;; 1) ALIGN: ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + mov bufptmp, bufp ;; rdi = *buf + neg bufp + and bufp, 7 ;; calculate the unalignment amount of + ;; the address + je proc_block ;; Skip if aligned + + ;;;; Calculate CRC of unaligned bytes of the buffer (if any) ;;; + mov tmp, [bufptmp] ;; load a quadword from the buffer + add bufptmp, bufp ;; align buffer pointer for quadword + ;; processing + sub len, bufp ;; update buffer length +align_loop: + crc32 crc_init_dw, bl ;; compute crc32 of 1-byte + shr tmp, 8 ;; get next byte + dec bufp + jne align_loop + +proc_block: + + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ;; 2) PROCESS BLOCKS: ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ;; compute num of bytes to be processed + mov tmp, len ;; save num bytes in tmp + + cmp len, 128*24 + jae full_block + +continue_block: + cmp len, SMALL_SIZE + jb small + + ;; len < 128*24 + mov rax, 2731 ;; 2731 = ceil(2^16 / 24) + mul len_dw + shr rax, 16 + + ;; eax contains floor(bytes / 24) = num 24-byte chunks to do + + ;; process rax 24-byte chunks (128 >= rax >= 0) + + ;; compute end address of each block + ;; rdi -> block 0 (base addr + RAX * 8) + ;; rsi -> block 1 (base addr + RAX * 16) + ;; r11 -> block 2 (base addr + RAX * 24) + lea block_0, [bufptmp + rax * 8] + lea block_1, [block_0 + rax * 8] + lea block_2, [block_1 + rax * 8] + + xor crc1,crc1 + xor crc2,crc2 + + ;; branch into array + lea bufp, [jump_table] + movzx len, word [bufp + rax * 2] ;; len is offset from crc_array + lea bufp, [bufp + len + crc_array - jump_table] + jmp bufp + + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ;; 2a) PROCESS FULL BLOCKS: ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +full_block: + mov rax, 128 + lea block_1, [block_0 + 128*8*2] + lea block_2, [block_0 + 128*8*3] + add block_0, 128*8*1 + + xor crc1,crc1 + xor crc2,crc2 + +; ;; branch into array +; jmp CONCAT(crc_,128,) + ; Fall thruogh into top of crc array (crc_128) + + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ;; 3) CRC Array: ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +crc_array: + cmp len, 128*24*2 + jbe non_prefetch + +%assign i 128 +%rep 128-1 + +CONCAT(_crc_,i,:) + crc32 crc_init, qword [block_0 - i*8] + crc32 crc1, qword [block_1 - i*8] + crc32 crc2, qword [block_2 - i*8] + + %if i > 128*8 / 32 ; prefetch next 3KB data + prefetchnta [block_2 + 128*32 - i*32] + %endif + +%assign i (i-1) +%endrep + jmp next_ + +non_prefetch: +%assign i 128 +%rep 128-1 + +CONCAT(crc_,i,:) + crc32 crc_init, qword [block_0 - i*8] + crc32 crc1, qword [block_1 - i*8] + crc32 crc2, qword [block_2 - i*8] +%assign i (i-1) +%endrep + +next_: +CONCAT(crc_,i,:) + crc32 crc_init, qword [block_0 - i*8] + crc32 crc1, qword [block_1 - i*8] +; SKIP ;crc32 crc2, [block_2 - i*8] ; Don't do this one yet + + mov block_0, block_2 + + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ;; 4) Combine three results: ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + lea bufp, [K_table - 16] ; first entry is for idx 1 + shl rax, 3 ; rax *= 8 + sub tmp, rax ; tmp -= rax*8 + shl rax, 1 + sub tmp, rax ; tmp -= rax*16 (total tmp -= rax*24) + add bufp, rax + + movdqa xmm0, [bufp] ; 2 consts: K1:K2 + + movq xmm1, crc_init ; CRC for block 1 + pclmulqdq xmm1, xmm0, 0x00 ; Multiply by K2 + + movq xmm2, crc1 ; CRC for block 2 + pclmulqdq xmm2, xmm0, 0x10 ; Multiply by K1 + + pxor xmm1, xmm2 + movq rax, xmm1 + xor rax, [block_2 - i*8] + mov crc_init, crc2 + crc32 crc_init, rax + + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ;; 5) Check for end: ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +CONCAT(crc_,0,:) + mov len, tmp + cmp tmp, 128*24 + jae full_block + cmp tmp, 24 + jae continue_block + +fewer_than_24: + ;; now fewer than 24 bytes remain + cmp tmp, 16 + jae do_16 + cmp tmp, 8 + jae do_8 + + ;; 0 <= tmp <= 7 + shl ebx, 29 ; size now in bits 31:29 + jz do_return +check_4: + mov bufp, [bufptmp] + shl ebx, 1 ; shift out into carry MSB (orig size & 4) + jnc check_2 + crc32 crc_init_dw, bufp_dw + jz do_return + shr bufp, 32 ; shift data down by 4 bytes +check_2: + shl ebx, 1 ; shift out into carry MSB (orig size & 2) + jnc check_1 + crc32 crc_init_dw, bufp_w + jz do_return + shr bufp, 16 ; shift data down by 2 bytes +check_1: + crc32 crc_init_dw, bufp_b + +do_return: + mov rax, crc_init + pop rsi + pop rdi + pop rbx + ret + +do_8: + crc32 crc_init, qword [bufptmp] + add bufptmp, 8 + shl ebx, 29 ; size (0...7) in bits 31:29 + jnz check_4 + mov rax, crc_init + pop rsi + pop rdi + pop rbx + ret + +do_16: + crc32 crc_init, qword [bufptmp] + crc32 crc_init, qword [bufptmp+8] + add bufptmp, 16 + shl ebx, 29 ; size (0...7) in bits 31:29 + jnz check_4 + mov rax, crc_init + pop rsi + pop rdi + pop rbx + ret + + + + + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ;; Handle the case of fewer than 8 bytes, unaligned. In this case + ;; we can't read 8 bytes, as this might go beyond the end of the buffer + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +less_than_8: + test len,4 + jz less_than_4 + crc32 crc_init_dw, dword[bufp] + add bufp,4 +less_than_4: + test len,2 + jz less_than_2 + crc32 crc_init_dw, word[bufp] + add bufp,2 +less_than_2: + test len,1 + jz do_return + crc32 crc_init_dw, byte[bufp] + mov rax, crc_init + pop rsi + pop rdi + pop rbx + ret + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;4) LESS THAN 256-bytes REMAIN AT THIS POINT (8-bits of len are full) + +small: + mov rax, crc_init + +bit8: + shl len_b, 1 ;; shift-out MSB (bit-7) + jnc bit7 ;; jump to bit-6 if bit-7 == 0 + %assign i 0 + %rep 16 + crc32 rax, qword [bufptmp+i] ;; compute crc32 of 8-byte data + %assign i (i+8) + %endrep + je do_return2 ;; return if remaining data is zero + add bufptmp, 128 ;; buf +=64; (next 64 bytes) + +bit7: + shl len_b, 1 ;; shift-out MSB (bit-7) + jnc bit6 ;; jump to bit-6 if bit-7 == 0 + %assign i 0 + %rep 8 + crc32 rax, qword [bufptmp+i] ;; compute crc32 of 8-byte data + %assign i (i+8) + %endrep + je do_return2 ;; return if remaining data is zero + add bufptmp, 64 ;; buf +=64; (next 64 bytes) +bit6: + shl len_b, 1 ;; shift-out MSB (bit-6) + jnc bit5 ;; jump to bit-5 if bit-6 == 0 + %assign i 0 + %rep 4 + crc32 rax, qword [bufptmp+i] ;; compute crc32 of 8-byte data + %assign i (i+8) + %endrep + je do_return2 ;; return if remaining data is zero + add bufptmp, 32 ;; buf +=32; (next 32 bytes) +bit5: + shl len_b, 1 ;; shift-out MSB (bit-5) + jnc bit4 ;; jump to bit-4 if bit-5 == 0 + %assign i 0 + %rep 2 + crc32 rax, qword [bufptmp+i] ;; compute crc32 of 8-byte data + %assign i (i+8) + %endrep + je do_return2 ;; return if remaining data is zero + add bufptmp, 16 ;; buf +=16; (next 16 bytes) +bit4: + shl len_b, 1 ;; shift-out MSB (bit-4) + jnc bit3 ;; jump to bit-3 if bit-4 == 0 + crc32 rax, qword [bufptmp] ;; compute crc32 of 8-byte data + je do_return2 ;; return if remaining data is zero + add bufptmp, 8 ;; buf +=8; (next 8 bytes) +bit3: + mov rbx, qword [bufptmp] ;; load a 8-bytes from the buffer: + shl len_b, 1 ;; shift-out MSB (bit-3) + jnc bit2 ;; jump to bit-2 if bit-3 == 0 + crc32 eax, ebx ;; compute crc32 of 4-byte data + je do_return2 ;; return if remaining data is zero + shr rbx, 32 ;; get next 3 bytes +bit2: + shl len_b, 1 ;; shift-out MSB (bit-2) + jnc bit1 ;; jump to bit-1 if bit-2 == 0 + crc32 eax, bx ;; compute crc32 of 2-byte data + je do_return2 ;; return if remaining data is zero + shr rbx, 16 ;; next byte +bit1: + test len_b,len_b + je do_return2 + crc32 eax, bl ;; compute crc32 of 1-byte data +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +do_return2: + pop rsi + pop rdi + pop rbx + ret + + + + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ;; jump table ;; Table is 129 entries x 2 bytes each + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +align 4 +jump_table: +%assign i 0 +%rep 129 + dw CONCAT(crc_,i,) - crc_array +%assign i (i+1) +%endrep + + + + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ;; PCLMULQDQ tables + ;; Table is 128 entries x 2 quad words each + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +section .data +align 64 +K_table: + dq 0x14cd00bd6, 0x105ec76f0 + dq 0x0ba4fc28e, 0x14cd00bd6 + dq 0x1d82c63da, 0x0f20c0dfe + dq 0x09e4addf8, 0x0ba4fc28e + dq 0x039d3b296, 0x1384aa63a + dq 0x102f9b8a2, 0x1d82c63da + dq 0x14237f5e6, 0x01c291d04 + dq 0x00d3b6092, 0x09e4addf8 + dq 0x0c96cfdc0, 0x0740eef02 + dq 0x18266e456, 0x039d3b296 + dq 0x0daece73e, 0x0083a6eec + dq 0x0ab7aff2a, 0x102f9b8a2 + dq 0x1248ea574, 0x1c1733996 + dq 0x083348832, 0x14237f5e6 + dq 0x12c743124, 0x02ad91c30 + dq 0x0b9e02b86, 0x00d3b6092 + dq 0x018b33a4e, 0x06992cea2 + dq 0x1b331e26a, 0x0c96cfdc0 + dq 0x17d35ba46, 0x07e908048 + dq 0x1bf2e8b8a, 0x18266e456 + dq 0x1a3e0968a, 0x11ed1f9d8 + dq 0x0ce7f39f4, 0x0daece73e + dq 0x061d82e56, 0x0f1d0f55e + dq 0x0d270f1a2, 0x0ab7aff2a + dq 0x1c3f5f66c, 0x0a87ab8a8 + dq 0x12ed0daac, 0x1248ea574 + dq 0x065863b64, 0x08462d800 + dq 0x11eef4f8e, 0x083348832 + dq 0x1ee54f54c, 0x071d111a8 + dq 0x0b3e32c28, 0x12c743124 + dq 0x0064f7f26, 0x0ffd852c6 + dq 0x0dd7e3b0c, 0x0b9e02b86 + dq 0x0f285651c, 0x0dcb17aa4 + dq 0x010746f3c, 0x018b33a4e + dq 0x1c24afea4, 0x0f37c5aee + dq 0x0271d9844, 0x1b331e26a + dq 0x08e766a0c, 0x06051d5a2 + dq 0x093a5f730, 0x17d35ba46 + dq 0x06cb08e5c, 0x11d5ca20e + dq 0x06b749fb2, 0x1bf2e8b8a + dq 0x1167f94f2, 0x021f3d99c + dq 0x0cec3662e, 0x1a3e0968a + dq 0x19329634a, 0x08f158014 + dq 0x0e6fc4e6a, 0x0ce7f39f4 + dq 0x08227bb8a, 0x1a5e82106 + dq 0x0b0cd4768, 0x061d82e56 + dq 0x13c2b89c4, 0x188815ab2 + dq 0x0d7a4825c, 0x0d270f1a2 + dq 0x10f5ff2ba, 0x105405f3e + dq 0x00167d312, 0x1c3f5f66c + dq 0x0f6076544, 0x0e9adf796 + dq 0x026f6a60a, 0x12ed0daac + dq 0x1a2adb74e, 0x096638b34 + dq 0x19d34af3a, 0x065863b64 + dq 0x049c3cc9c, 0x1e50585a0 + dq 0x068bce87a, 0x11eef4f8e + dq 0x1524fa6c6, 0x19f1c69dc + dq 0x16cba8aca, 0x1ee54f54c + dq 0x042d98888, 0x12913343e + dq 0x1329d9f7e, 0x0b3e32c28 + dq 0x1b1c69528, 0x088f25a3a + dq 0x02178513a, 0x0064f7f26 + dq 0x0e0ac139e, 0x04e36f0b0 + dq 0x0170076fa, 0x0dd7e3b0c + dq 0x141a1a2e2, 0x0bd6f81f8 + dq 0x16ad828b4, 0x0f285651c + dq 0x041d17b64, 0x19425cbba + dq 0x1fae1cc66, 0x010746f3c + dq 0x1a75b4b00, 0x18db37e8a + dq 0x0f872e54c, 0x1c24afea4 + dq 0x01e41e9fc, 0x04c144932 + dq 0x086d8e4d2, 0x0271d9844 + dq 0x160f7af7a, 0x052148f02 + dq 0x05bb8f1bc, 0x08e766a0c + dq 0x0a90fd27a, 0x0a3c6f37a + dq 0x0b3af077a, 0x093a5f730 + dq 0x04984d782, 0x1d22c238e + dq 0x0ca6ef3ac, 0x06cb08e5c + dq 0x0234e0b26, 0x063ded06a + dq 0x1d88abd4a, 0x06b749fb2 + dq 0x04597456a, 0x04d56973c + dq 0x0e9e28eb4, 0x1167f94f2 + dq 0x07b3ff57a, 0x19385bf2e + dq 0x0c9c8b782, 0x0cec3662e + dq 0x13a9cba9e, 0x0e417f38a + dq 0x093e106a4, 0x19329634a + dq 0x167001a9c, 0x14e727980 + dq 0x1ddffc5d4, 0x0e6fc4e6a + dq 0x00df04680, 0x0d104b8fc + dq 0x02342001e, 0x08227bb8a + dq 0x00a2a8d7e, 0x05b397730 + dq 0x168763fa6, 0x0b0cd4768 + dq 0x1ed5a407a, 0x0e78eb416 + dq 0x0d2c3ed1a, 0x13c2b89c4 + dq 0x0995a5724, 0x1641378f0 + dq 0x19b1afbc4, 0x0d7a4825c + dq 0x109ffedc0, 0x08d96551c + dq 0x0f2271e60, 0x10f5ff2ba + dq 0x00b0bf8ca, 0x00bf80dd2 + dq 0x123888b7a, 0x00167d312 + dq 0x1e888f7dc, 0x18dcddd1c + dq 0x002ee03b2, 0x0f6076544 + dq 0x183e8d8fe, 0x06a45d2b2 + dq 0x133d7a042, 0x026f6a60a + dq 0x116b0f50c, 0x1dd3e10e8 + dq 0x05fabe670, 0x1a2adb74e + dq 0x130004488, 0x0de87806c + dq 0x000bcf5f6, 0x19d34af3a + dq 0x18f0c7078, 0x014338754 + dq 0x017f27698, 0x049c3cc9c + dq 0x058ca5f00, 0x15e3e77ee + dq 0x1af900c24, 0x068bce87a + dq 0x0b5cfca28, 0x0dd07448e + dq 0x0ded288f8, 0x1524fa6c6 + dq 0x059f229bc, 0x1d8048348 + dq 0x06d390dec, 0x16cba8aca + dq 0x037170390, 0x0a3e3e02c + dq 0x06353c1cc, 0x042d98888 + dq 0x0c4584f5c, 0x0d73c7bea + dq 0x1f16a3418, 0x1329d9f7e + dq 0x0531377e2, 0x185137662 + dq 0x1d8d9ca7c, 0x1b1c69528 + dq 0x0b25b29f2, 0x18a08b5bc + dq 0x19fb2a8b0, 0x02178513a + dq 0x1a08fe6ac, 0x1da758ae0 + dq 0x045cddf4e, 0x0e0ac139e + dq 0x1a91647f2, 0x169cf9eb0 + dq 0x1a0f717c4, 0x0170076fa + +;;; func core, ver, snum +slversion crc32_iscsi_01, 01, 04, 0015 + diff --git a/src/spdk/isa-l/crc/crc32_iscsi_perf.c b/src/spdk/isa-l/crc/crc32_iscsi_perf.c new file mode 100644 index 000000000..d768cdfa6 --- /dev/null +++ b/src/spdk/isa-l/crc/crc32_iscsi_perf.c @@ -0,0 +1,79 @@ +/********************************************************************** + Copyright(c) 2011-2015 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#include +#include +#include +#include +#include +#include "crc.h" +#include "test.h" + +//#define CACHED_TEST +#ifdef CACHED_TEST +// Cached test, loop many times over small dataset +# define TEST_LEN 8*1024 +# define TEST_TYPE_STR "_warm" +#else +// Uncached test. Pull from large mem base. +# define GT_L3_CACHE 32*1024*1024 /* some number > last level cache */ +# define TEST_LEN (2 * GT_L3_CACHE) +# define TEST_TYPE_STR "_cold" +#endif + +#ifndef TEST_SEED +# define TEST_SEED 0x1234 +#endif + +#define TEST_MEM TEST_LEN + +int main(int argc, char *argv[]) +{ + void *buf; + uint32_t crc; + struct perf start; + + printf("crc32_iscsi_perf:\n"); + + if (posix_memalign(&buf, 1024, TEST_LEN)) { + printf("alloc error: Fail"); + return -1; + } + + printf("Start timed tests\n"); + fflush(0); + + memset(buf, 0, TEST_LEN); + BENCHMARK(&start, BENCHMARK_TIME, crc = crc32_iscsi(buf, TEST_LEN, TEST_SEED)); + printf("crc32_iscsi" TEST_TYPE_STR ": "); + perf_print(start, (long long)TEST_LEN); + + printf("finish 0x%x\n", crc); + return 0; +} diff --git a/src/spdk/isa-l/crc/crc64_base.c b/src/spdk/isa-l/crc/crc64_base.c new file mode 100644 index 000000000..7cf5a69cf --- /dev/null +++ b/src/spdk/isa-l/crc/crc64_base.c @@ -0,0 +1,912 @@ +/********************************************************************** + Copyright(c) 2011-2016 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#include "crc64.h" + +static const uint64_t crc64_ecma_refl_table[256] = { + 0x0000000000000000ULL, 0xb32e4cbe03a75f6fULL, + 0xf4843657a840a05bULL, 0x47aa7ae9abe7ff34ULL, + 0x7bd0c384ff8f5e33ULL, 0xc8fe8f3afc28015cULL, + 0x8f54f5d357cffe68ULL, 0x3c7ab96d5468a107ULL, + 0xf7a18709ff1ebc66ULL, 0x448fcbb7fcb9e309ULL, + 0x0325b15e575e1c3dULL, 0xb00bfde054f94352ULL, + 0x8c71448d0091e255ULL, 0x3f5f08330336bd3aULL, + 0x78f572daa8d1420eULL, 0xcbdb3e64ab761d61ULL, + 0x7d9ba13851336649ULL, 0xceb5ed8652943926ULL, + 0x891f976ff973c612ULL, 0x3a31dbd1fad4997dULL, + 0x064b62bcaebc387aULL, 0xb5652e02ad1b6715ULL, + 0xf2cf54eb06fc9821ULL, 0x41e11855055bc74eULL, + 0x8a3a2631ae2dda2fULL, 0x39146a8fad8a8540ULL, + 0x7ebe1066066d7a74ULL, 0xcd905cd805ca251bULL, + 0xf1eae5b551a2841cULL, 0x42c4a90b5205db73ULL, + 0x056ed3e2f9e22447ULL, 0xb6409f5cfa457b28ULL, + 0xfb374270a266cc92ULL, 0x48190ecea1c193fdULL, + 0x0fb374270a266cc9ULL, 0xbc9d3899098133a6ULL, + 0x80e781f45de992a1ULL, 0x33c9cd4a5e4ecdceULL, + 0x7463b7a3f5a932faULL, 0xc74dfb1df60e6d95ULL, + 0x0c96c5795d7870f4ULL, 0xbfb889c75edf2f9bULL, + 0xf812f32ef538d0afULL, 0x4b3cbf90f69f8fc0ULL, + 0x774606fda2f72ec7ULL, 0xc4684a43a15071a8ULL, + 0x83c230aa0ab78e9cULL, 0x30ec7c140910d1f3ULL, + 0x86ace348f355aadbULL, 0x3582aff6f0f2f5b4ULL, + 0x7228d51f5b150a80ULL, 0xc10699a158b255efULL, + 0xfd7c20cc0cdaf4e8ULL, 0x4e526c720f7dab87ULL, + 0x09f8169ba49a54b3ULL, 0xbad65a25a73d0bdcULL, + 0x710d64410c4b16bdULL, 0xc22328ff0fec49d2ULL, + 0x85895216a40bb6e6ULL, 0x36a71ea8a7ace989ULL, + 0x0adda7c5f3c4488eULL, 0xb9f3eb7bf06317e1ULL, + 0xfe5991925b84e8d5ULL, 0x4d77dd2c5823b7baULL, + 0x64b62bcaebc387a1ULL, 0xd7986774e864d8ceULL, + 0x90321d9d438327faULL, 0x231c512340247895ULL, + 0x1f66e84e144cd992ULL, 0xac48a4f017eb86fdULL, + 0xebe2de19bc0c79c9ULL, 0x58cc92a7bfab26a6ULL, + 0x9317acc314dd3bc7ULL, 0x2039e07d177a64a8ULL, + 0x67939a94bc9d9b9cULL, 0xd4bdd62abf3ac4f3ULL, + 0xe8c76f47eb5265f4ULL, 0x5be923f9e8f53a9bULL, + 0x1c4359104312c5afULL, 0xaf6d15ae40b59ac0ULL, + 0x192d8af2baf0e1e8ULL, 0xaa03c64cb957be87ULL, + 0xeda9bca512b041b3ULL, 0x5e87f01b11171edcULL, + 0x62fd4976457fbfdbULL, 0xd1d305c846d8e0b4ULL, + 0x96797f21ed3f1f80ULL, 0x2557339fee9840efULL, + 0xee8c0dfb45ee5d8eULL, 0x5da24145464902e1ULL, + 0x1a083bacedaefdd5ULL, 0xa9267712ee09a2baULL, + 0x955cce7fba6103bdULL, 0x267282c1b9c65cd2ULL, + 0x61d8f8281221a3e6ULL, 0xd2f6b4961186fc89ULL, + 0x9f8169ba49a54b33ULL, 0x2caf25044a02145cULL, + 0x6b055fede1e5eb68ULL, 0xd82b1353e242b407ULL, + 0xe451aa3eb62a1500ULL, 0x577fe680b58d4a6fULL, + 0x10d59c691e6ab55bULL, 0xa3fbd0d71dcdea34ULL, + 0x6820eeb3b6bbf755ULL, 0xdb0ea20db51ca83aULL, + 0x9ca4d8e41efb570eULL, 0x2f8a945a1d5c0861ULL, + 0x13f02d374934a966ULL, 0xa0de61894a93f609ULL, + 0xe7741b60e174093dULL, 0x545a57dee2d35652ULL, + 0xe21ac88218962d7aULL, 0x5134843c1b317215ULL, + 0x169efed5b0d68d21ULL, 0xa5b0b26bb371d24eULL, + 0x99ca0b06e7197349ULL, 0x2ae447b8e4be2c26ULL, + 0x6d4e3d514f59d312ULL, 0xde6071ef4cfe8c7dULL, + 0x15bb4f8be788911cULL, 0xa6950335e42fce73ULL, + 0xe13f79dc4fc83147ULL, 0x521135624c6f6e28ULL, + 0x6e6b8c0f1807cf2fULL, 0xdd45c0b11ba09040ULL, + 0x9aefba58b0476f74ULL, 0x29c1f6e6b3e0301bULL, + 0xc96c5795d7870f42ULL, 0x7a421b2bd420502dULL, + 0x3de861c27fc7af19ULL, 0x8ec62d7c7c60f076ULL, + 0xb2bc941128085171ULL, 0x0192d8af2baf0e1eULL, + 0x4638a2468048f12aULL, 0xf516eef883efae45ULL, + 0x3ecdd09c2899b324ULL, 0x8de39c222b3eec4bULL, + 0xca49e6cb80d9137fULL, 0x7967aa75837e4c10ULL, + 0x451d1318d716ed17ULL, 0xf6335fa6d4b1b278ULL, + 0xb199254f7f564d4cULL, 0x02b769f17cf11223ULL, + 0xb4f7f6ad86b4690bULL, 0x07d9ba1385133664ULL, + 0x4073c0fa2ef4c950ULL, 0xf35d8c442d53963fULL, + 0xcf273529793b3738ULL, 0x7c0979977a9c6857ULL, + 0x3ba3037ed17b9763ULL, 0x888d4fc0d2dcc80cULL, + 0x435671a479aad56dULL, 0xf0783d1a7a0d8a02ULL, + 0xb7d247f3d1ea7536ULL, 0x04fc0b4dd24d2a59ULL, + 0x3886b22086258b5eULL, 0x8ba8fe9e8582d431ULL, + 0xcc0284772e652b05ULL, 0x7f2cc8c92dc2746aULL, + 0x325b15e575e1c3d0ULL, 0x8175595b76469cbfULL, + 0xc6df23b2dda1638bULL, 0x75f16f0cde063ce4ULL, + 0x498bd6618a6e9de3ULL, 0xfaa59adf89c9c28cULL, + 0xbd0fe036222e3db8ULL, 0x0e21ac88218962d7ULL, + 0xc5fa92ec8aff7fb6ULL, 0x76d4de52895820d9ULL, + 0x317ea4bb22bfdfedULL, 0x8250e80521188082ULL, + 0xbe2a516875702185ULL, 0x0d041dd676d77eeaULL, + 0x4aae673fdd3081deULL, 0xf9802b81de97deb1ULL, + 0x4fc0b4dd24d2a599ULL, 0xfceef8632775faf6ULL, + 0xbb44828a8c9205c2ULL, 0x086ace348f355aadULL, + 0x34107759db5dfbaaULL, 0x873e3be7d8faa4c5ULL, + 0xc094410e731d5bf1ULL, 0x73ba0db070ba049eULL, + 0xb86133d4dbcc19ffULL, 0x0b4f7f6ad86b4690ULL, + 0x4ce50583738cb9a4ULL, 0xffcb493d702be6cbULL, + 0xc3b1f050244347ccULL, 0x709fbcee27e418a3ULL, + 0x3735c6078c03e797ULL, 0x841b8ab98fa4b8f8ULL, + 0xadda7c5f3c4488e3ULL, 0x1ef430e13fe3d78cULL, + 0x595e4a08940428b8ULL, 0xea7006b697a377d7ULL, + 0xd60abfdbc3cbd6d0ULL, 0x6524f365c06c89bfULL, + 0x228e898c6b8b768bULL, 0x91a0c532682c29e4ULL, + 0x5a7bfb56c35a3485ULL, 0xe955b7e8c0fd6beaULL, + 0xaeffcd016b1a94deULL, 0x1dd181bf68bdcbb1ULL, + 0x21ab38d23cd56ab6ULL, 0x9285746c3f7235d9ULL, + 0xd52f0e859495caedULL, 0x6601423b97329582ULL, + 0xd041dd676d77eeaaULL, 0x636f91d96ed0b1c5ULL, + 0x24c5eb30c5374ef1ULL, 0x97eba78ec690119eULL, + 0xab911ee392f8b099ULL, 0x18bf525d915feff6ULL, + 0x5f1528b43ab810c2ULL, 0xec3b640a391f4fadULL, + 0x27e05a6e926952ccULL, 0x94ce16d091ce0da3ULL, + 0xd3646c393a29f297ULL, 0x604a2087398eadf8ULL, + 0x5c3099ea6de60cffULL, 0xef1ed5546e415390ULL, + 0xa8b4afbdc5a6aca4ULL, 0x1b9ae303c601f3cbULL, + 0x56ed3e2f9e224471ULL, 0xe5c372919d851b1eULL, + 0xa26908783662e42aULL, 0x114744c635c5bb45ULL, + 0x2d3dfdab61ad1a42ULL, 0x9e13b115620a452dULL, + 0xd9b9cbfcc9edba19ULL, 0x6a978742ca4ae576ULL, + 0xa14cb926613cf817ULL, 0x1262f598629ba778ULL, + 0x55c88f71c97c584cULL, 0xe6e6c3cfcadb0723ULL, + 0xda9c7aa29eb3a624ULL, 0x69b2361c9d14f94bULL, + 0x2e184cf536f3067fULL, 0x9d36004b35545910ULL, + 0x2b769f17cf112238ULL, 0x9858d3a9ccb67d57ULL, + 0xdff2a94067518263ULL, 0x6cdce5fe64f6dd0cULL, + 0x50a65c93309e7c0bULL, 0xe388102d33392364ULL, + 0xa4226ac498dedc50ULL, 0x170c267a9b79833fULL, + 0xdcd7181e300f9e5eULL, 0x6ff954a033a8c131ULL, + 0x28532e49984f3e05ULL, 0x9b7d62f79be8616aULL, + 0xa707db9acf80c06dULL, 0x14299724cc279f02ULL, + 0x5383edcd67c06036ULL, 0xe0ada17364673f59ULL +}; + +static const uint64_t crc64_ecma_norm_table[256] = { + 0x0000000000000000ULL, 0x42f0e1eba9ea3693ULL, + 0x85e1c3d753d46d26ULL, 0xc711223cfa3e5bb5ULL, + 0x493366450e42ecdfULL, 0x0bc387aea7a8da4cULL, + 0xccd2a5925d9681f9ULL, 0x8e224479f47cb76aULL, + 0x9266cc8a1c85d9beULL, 0xd0962d61b56fef2dULL, + 0x17870f5d4f51b498ULL, 0x5577eeb6e6bb820bULL, + 0xdb55aacf12c73561ULL, 0x99a54b24bb2d03f2ULL, + 0x5eb4691841135847ULL, 0x1c4488f3e8f96ed4ULL, + 0x663d78ff90e185efULL, 0x24cd9914390bb37cULL, + 0xe3dcbb28c335e8c9ULL, 0xa12c5ac36adfde5aULL, + 0x2f0e1eba9ea36930ULL, 0x6dfeff5137495fa3ULL, + 0xaaefdd6dcd770416ULL, 0xe81f3c86649d3285ULL, + 0xf45bb4758c645c51ULL, 0xb6ab559e258e6ac2ULL, + 0x71ba77a2dfb03177ULL, 0x334a9649765a07e4ULL, + 0xbd68d2308226b08eULL, 0xff9833db2bcc861dULL, + 0x388911e7d1f2dda8ULL, 0x7a79f00c7818eb3bULL, + 0xcc7af1ff21c30bdeULL, 0x8e8a101488293d4dULL, + 0x499b3228721766f8ULL, 0x0b6bd3c3dbfd506bULL, + 0x854997ba2f81e701ULL, 0xc7b97651866bd192ULL, + 0x00a8546d7c558a27ULL, 0x4258b586d5bfbcb4ULL, + 0x5e1c3d753d46d260ULL, 0x1cecdc9e94ace4f3ULL, + 0xdbfdfea26e92bf46ULL, 0x990d1f49c77889d5ULL, + 0x172f5b3033043ebfULL, 0x55dfbadb9aee082cULL, + 0x92ce98e760d05399ULL, 0xd03e790cc93a650aULL, + 0xaa478900b1228e31ULL, 0xe8b768eb18c8b8a2ULL, + 0x2fa64ad7e2f6e317ULL, 0x6d56ab3c4b1cd584ULL, + 0xe374ef45bf6062eeULL, 0xa1840eae168a547dULL, + 0x66952c92ecb40fc8ULL, 0x2465cd79455e395bULL, + 0x3821458aada7578fULL, 0x7ad1a461044d611cULL, + 0xbdc0865dfe733aa9ULL, 0xff3067b657990c3aULL, + 0x711223cfa3e5bb50ULL, 0x33e2c2240a0f8dc3ULL, + 0xf4f3e018f031d676ULL, 0xb60301f359dbe0e5ULL, + 0xda050215ea6c212fULL, 0x98f5e3fe438617bcULL, + 0x5fe4c1c2b9b84c09ULL, 0x1d14202910527a9aULL, + 0x93366450e42ecdf0ULL, 0xd1c685bb4dc4fb63ULL, + 0x16d7a787b7faa0d6ULL, 0x5427466c1e109645ULL, + 0x4863ce9ff6e9f891ULL, 0x0a932f745f03ce02ULL, + 0xcd820d48a53d95b7ULL, 0x8f72eca30cd7a324ULL, + 0x0150a8daf8ab144eULL, 0x43a04931514122ddULL, + 0x84b16b0dab7f7968ULL, 0xc6418ae602954ffbULL, + 0xbc387aea7a8da4c0ULL, 0xfec89b01d3679253ULL, + 0x39d9b93d2959c9e6ULL, 0x7b2958d680b3ff75ULL, + 0xf50b1caf74cf481fULL, 0xb7fbfd44dd257e8cULL, + 0x70eadf78271b2539ULL, 0x321a3e938ef113aaULL, + 0x2e5eb66066087d7eULL, 0x6cae578bcfe24bedULL, + 0xabbf75b735dc1058ULL, 0xe94f945c9c3626cbULL, + 0x676dd025684a91a1ULL, 0x259d31cec1a0a732ULL, + 0xe28c13f23b9efc87ULL, 0xa07cf2199274ca14ULL, + 0x167ff3eacbaf2af1ULL, 0x548f120162451c62ULL, + 0x939e303d987b47d7ULL, 0xd16ed1d631917144ULL, + 0x5f4c95afc5edc62eULL, 0x1dbc74446c07f0bdULL, + 0xdaad56789639ab08ULL, 0x985db7933fd39d9bULL, + 0x84193f60d72af34fULL, 0xc6e9de8b7ec0c5dcULL, + 0x01f8fcb784fe9e69ULL, 0x43081d5c2d14a8faULL, + 0xcd2a5925d9681f90ULL, 0x8fdab8ce70822903ULL, + 0x48cb9af28abc72b6ULL, 0x0a3b7b1923564425ULL, + 0x70428b155b4eaf1eULL, 0x32b26afef2a4998dULL, + 0xf5a348c2089ac238ULL, 0xb753a929a170f4abULL, + 0x3971ed50550c43c1ULL, 0x7b810cbbfce67552ULL, + 0xbc902e8706d82ee7ULL, 0xfe60cf6caf321874ULL, + 0xe224479f47cb76a0ULL, 0xa0d4a674ee214033ULL, + 0x67c58448141f1b86ULL, 0x253565a3bdf52d15ULL, + 0xab1721da49899a7fULL, 0xe9e7c031e063acecULL, + 0x2ef6e20d1a5df759ULL, 0x6c0603e6b3b7c1caULL, + 0xf6fae5c07d3274cdULL, 0xb40a042bd4d8425eULL, + 0x731b26172ee619ebULL, 0x31ebc7fc870c2f78ULL, + 0xbfc9838573709812ULL, 0xfd39626eda9aae81ULL, + 0x3a28405220a4f534ULL, 0x78d8a1b9894ec3a7ULL, + 0x649c294a61b7ad73ULL, 0x266cc8a1c85d9be0ULL, + 0xe17dea9d3263c055ULL, 0xa38d0b769b89f6c6ULL, + 0x2daf4f0f6ff541acULL, 0x6f5faee4c61f773fULL, + 0xa84e8cd83c212c8aULL, 0xeabe6d3395cb1a19ULL, + 0x90c79d3fedd3f122ULL, 0xd2377cd44439c7b1ULL, + 0x15265ee8be079c04ULL, 0x57d6bf0317edaa97ULL, + 0xd9f4fb7ae3911dfdULL, 0x9b041a914a7b2b6eULL, + 0x5c1538adb04570dbULL, 0x1ee5d94619af4648ULL, + 0x02a151b5f156289cULL, 0x4051b05e58bc1e0fULL, + 0x87409262a28245baULL, 0xc5b073890b687329ULL, + 0x4b9237f0ff14c443ULL, 0x0962d61b56fef2d0ULL, + 0xce73f427acc0a965ULL, 0x8c8315cc052a9ff6ULL, + 0x3a80143f5cf17f13ULL, 0x7870f5d4f51b4980ULL, + 0xbf61d7e80f251235ULL, 0xfd913603a6cf24a6ULL, + 0x73b3727a52b393ccULL, 0x31439391fb59a55fULL, + 0xf652b1ad0167feeaULL, 0xb4a25046a88dc879ULL, + 0xa8e6d8b54074a6adULL, 0xea16395ee99e903eULL, + 0x2d071b6213a0cb8bULL, 0x6ff7fa89ba4afd18ULL, + 0xe1d5bef04e364a72ULL, 0xa3255f1be7dc7ce1ULL, + 0x64347d271de22754ULL, 0x26c49cccb40811c7ULL, + 0x5cbd6cc0cc10fafcULL, 0x1e4d8d2b65facc6fULL, + 0xd95caf179fc497daULL, 0x9bac4efc362ea149ULL, + 0x158e0a85c2521623ULL, 0x577eeb6e6bb820b0ULL, + 0x906fc95291867b05ULL, 0xd29f28b9386c4d96ULL, + 0xcedba04ad0952342ULL, 0x8c2b41a1797f15d1ULL, + 0x4b3a639d83414e64ULL, 0x09ca82762aab78f7ULL, + 0x87e8c60fded7cf9dULL, 0xc51827e4773df90eULL, + 0x020905d88d03a2bbULL, 0x40f9e43324e99428ULL, + 0x2cffe7d5975e55e2ULL, 0x6e0f063e3eb46371ULL, + 0xa91e2402c48a38c4ULL, 0xebeec5e96d600e57ULL, + 0x65cc8190991cb93dULL, 0x273c607b30f68faeULL, + 0xe02d4247cac8d41bULL, 0xa2dda3ac6322e288ULL, + 0xbe992b5f8bdb8c5cULL, 0xfc69cab42231bacfULL, + 0x3b78e888d80fe17aULL, 0x7988096371e5d7e9ULL, + 0xf7aa4d1a85996083ULL, 0xb55aacf12c735610ULL, + 0x724b8ecdd64d0da5ULL, 0x30bb6f267fa73b36ULL, + 0x4ac29f2a07bfd00dULL, 0x08327ec1ae55e69eULL, + 0xcf235cfd546bbd2bULL, 0x8dd3bd16fd818bb8ULL, + 0x03f1f96f09fd3cd2ULL, 0x41011884a0170a41ULL, + 0x86103ab85a2951f4ULL, 0xc4e0db53f3c36767ULL, + 0xd8a453a01b3a09b3ULL, 0x9a54b24bb2d03f20ULL, + 0x5d45907748ee6495ULL, 0x1fb5719ce1045206ULL, + 0x919735e51578e56cULL, 0xd367d40ebc92d3ffULL, + 0x1476f63246ac884aULL, 0x568617d9ef46bed9ULL, + 0xe085162ab69d5e3cULL, 0xa275f7c11f7768afULL, + 0x6564d5fde549331aULL, 0x279434164ca30589ULL, + 0xa9b6706fb8dfb2e3ULL, 0xeb46918411358470ULL, + 0x2c57b3b8eb0bdfc5ULL, 0x6ea7525342e1e956ULL, + 0x72e3daa0aa188782ULL, 0x30133b4b03f2b111ULL, + 0xf7021977f9cceaa4ULL, 0xb5f2f89c5026dc37ULL, + 0x3bd0bce5a45a6b5dULL, 0x79205d0e0db05dceULL, + 0xbe317f32f78e067bULL, 0xfcc19ed95e6430e8ULL, + 0x86b86ed5267cdbd3ULL, 0xc4488f3e8f96ed40ULL, + 0x0359ad0275a8b6f5ULL, 0x41a94ce9dc428066ULL, + 0xcf8b0890283e370cULL, 0x8d7be97b81d4019fULL, + 0x4a6acb477bea5a2aULL, 0x089a2aacd2006cb9ULL, + 0x14dea25f3af9026dULL, 0x562e43b4931334feULL, + 0x913f6188692d6f4bULL, 0xd3cf8063c0c759d8ULL, + 0x5dedc41a34bbeeb2ULL, 0x1f1d25f19d51d821ULL, + 0xd80c07cd676f8394ULL, 0x9afce626ce85b507ULL +}; + +static const uint64_t crc64_iso_refl_table[256] = { + 0x0000000000000000ULL, 0x01b0000000000000ULL, + 0x0360000000000000ULL, 0x02d0000000000000ULL, + 0x06c0000000000000ULL, 0x0770000000000000ULL, + 0x05a0000000000000ULL, 0x0410000000000000ULL, + 0x0d80000000000000ULL, 0x0c30000000000000ULL, + 0x0ee0000000000000ULL, 0x0f50000000000000ULL, + 0x0b40000000000000ULL, 0x0af0000000000000ULL, + 0x0820000000000000ULL, 0x0990000000000000ULL, + 0x1b00000000000000ULL, 0x1ab0000000000000ULL, + 0x1860000000000000ULL, 0x19d0000000000000ULL, + 0x1dc0000000000000ULL, 0x1c70000000000000ULL, + 0x1ea0000000000000ULL, 0x1f10000000000000ULL, + 0x1680000000000000ULL, 0x1730000000000000ULL, + 0x15e0000000000000ULL, 0x1450000000000000ULL, + 0x1040000000000000ULL, 0x11f0000000000000ULL, + 0x1320000000000000ULL, 0x1290000000000000ULL, + 0x3600000000000000ULL, 0x37b0000000000000ULL, + 0x3560000000000000ULL, 0x34d0000000000000ULL, + 0x30c0000000000000ULL, 0x3170000000000000ULL, + 0x33a0000000000000ULL, 0x3210000000000000ULL, + 0x3b80000000000000ULL, 0x3a30000000000000ULL, + 0x38e0000000000000ULL, 0x3950000000000000ULL, + 0x3d40000000000000ULL, 0x3cf0000000000000ULL, + 0x3e20000000000000ULL, 0x3f90000000000000ULL, + 0x2d00000000000000ULL, 0x2cb0000000000000ULL, + 0x2e60000000000000ULL, 0x2fd0000000000000ULL, + 0x2bc0000000000000ULL, 0x2a70000000000000ULL, + 0x28a0000000000000ULL, 0x2910000000000000ULL, + 0x2080000000000000ULL, 0x2130000000000000ULL, + 0x23e0000000000000ULL, 0x2250000000000000ULL, + 0x2640000000000000ULL, 0x27f0000000000000ULL, + 0x2520000000000000ULL, 0x2490000000000000ULL, + 0x6c00000000000000ULL, 0x6db0000000000000ULL, + 0x6f60000000000000ULL, 0x6ed0000000000000ULL, + 0x6ac0000000000000ULL, 0x6b70000000000000ULL, + 0x69a0000000000000ULL, 0x6810000000000000ULL, + 0x6180000000000000ULL, 0x6030000000000000ULL, + 0x62e0000000000000ULL, 0x6350000000000000ULL, + 0x6740000000000000ULL, 0x66f0000000000000ULL, + 0x6420000000000000ULL, 0x6590000000000000ULL, + 0x7700000000000000ULL, 0x76b0000000000000ULL, + 0x7460000000000000ULL, 0x75d0000000000000ULL, + 0x71c0000000000000ULL, 0x7070000000000000ULL, + 0x72a0000000000000ULL, 0x7310000000000000ULL, + 0x7a80000000000000ULL, 0x7b30000000000000ULL, + 0x79e0000000000000ULL, 0x7850000000000000ULL, + 0x7c40000000000000ULL, 0x7df0000000000000ULL, + 0x7f20000000000000ULL, 0x7e90000000000000ULL, + 0x5a00000000000000ULL, 0x5bb0000000000000ULL, + 0x5960000000000000ULL, 0x58d0000000000000ULL, + 0x5cc0000000000000ULL, 0x5d70000000000000ULL, + 0x5fa0000000000000ULL, 0x5e10000000000000ULL, + 0x5780000000000000ULL, 0x5630000000000000ULL, + 0x54e0000000000000ULL, 0x5550000000000000ULL, + 0x5140000000000000ULL, 0x50f0000000000000ULL, + 0x5220000000000000ULL, 0x5390000000000000ULL, + 0x4100000000000000ULL, 0x40b0000000000000ULL, + 0x4260000000000000ULL, 0x43d0000000000000ULL, + 0x47c0000000000000ULL, 0x4670000000000000ULL, + 0x44a0000000000000ULL, 0x4510000000000000ULL, + 0x4c80000000000000ULL, 0x4d30000000000000ULL, + 0x4fe0000000000000ULL, 0x4e50000000000000ULL, + 0x4a40000000000000ULL, 0x4bf0000000000000ULL, + 0x4920000000000000ULL, 0x4890000000000000ULL, + 0xd800000000000000ULL, 0xd9b0000000000000ULL, + 0xdb60000000000000ULL, 0xdad0000000000000ULL, + 0xdec0000000000000ULL, 0xdf70000000000000ULL, + 0xdda0000000000000ULL, 0xdc10000000000000ULL, + 0xd580000000000000ULL, 0xd430000000000000ULL, + 0xd6e0000000000000ULL, 0xd750000000000000ULL, + 0xd340000000000000ULL, 0xd2f0000000000000ULL, + 0xd020000000000000ULL, 0xd190000000000000ULL, + 0xc300000000000000ULL, 0xc2b0000000000000ULL, + 0xc060000000000000ULL, 0xc1d0000000000000ULL, + 0xc5c0000000000000ULL, 0xc470000000000000ULL, + 0xc6a0000000000000ULL, 0xc710000000000000ULL, + 0xce80000000000000ULL, 0xcf30000000000000ULL, + 0xcde0000000000000ULL, 0xcc50000000000000ULL, + 0xc840000000000000ULL, 0xc9f0000000000000ULL, + 0xcb20000000000000ULL, 0xca90000000000000ULL, + 0xee00000000000000ULL, 0xefb0000000000000ULL, + 0xed60000000000000ULL, 0xecd0000000000000ULL, + 0xe8c0000000000000ULL, 0xe970000000000000ULL, + 0xeba0000000000000ULL, 0xea10000000000000ULL, + 0xe380000000000000ULL, 0xe230000000000000ULL, + 0xe0e0000000000000ULL, 0xe150000000000000ULL, + 0xe540000000000000ULL, 0xe4f0000000000000ULL, + 0xe620000000000000ULL, 0xe790000000000000ULL, + 0xf500000000000000ULL, 0xf4b0000000000000ULL, + 0xf660000000000000ULL, 0xf7d0000000000000ULL, + 0xf3c0000000000000ULL, 0xf270000000000000ULL, + 0xf0a0000000000000ULL, 0xf110000000000000ULL, + 0xf880000000000000ULL, 0xf930000000000000ULL, + 0xfbe0000000000000ULL, 0xfa50000000000000ULL, + 0xfe40000000000000ULL, 0xfff0000000000000ULL, + 0xfd20000000000000ULL, 0xfc90000000000000ULL, + 0xb400000000000000ULL, 0xb5b0000000000000ULL, + 0xb760000000000000ULL, 0xb6d0000000000000ULL, + 0xb2c0000000000000ULL, 0xb370000000000000ULL, + 0xb1a0000000000000ULL, 0xb010000000000000ULL, + 0xb980000000000000ULL, 0xb830000000000000ULL, + 0xbae0000000000000ULL, 0xbb50000000000000ULL, + 0xbf40000000000000ULL, 0xbef0000000000000ULL, + 0xbc20000000000000ULL, 0xbd90000000000000ULL, + 0xaf00000000000000ULL, 0xaeb0000000000000ULL, + 0xac60000000000000ULL, 0xadd0000000000000ULL, + 0xa9c0000000000000ULL, 0xa870000000000000ULL, + 0xaaa0000000000000ULL, 0xab10000000000000ULL, + 0xa280000000000000ULL, 0xa330000000000000ULL, + 0xa1e0000000000000ULL, 0xa050000000000000ULL, + 0xa440000000000000ULL, 0xa5f0000000000000ULL, + 0xa720000000000000ULL, 0xa690000000000000ULL, + 0x8200000000000000ULL, 0x83b0000000000000ULL, + 0x8160000000000000ULL, 0x80d0000000000000ULL, + 0x84c0000000000000ULL, 0x8570000000000000ULL, + 0x87a0000000000000ULL, 0x8610000000000000ULL, + 0x8f80000000000000ULL, 0x8e30000000000000ULL, + 0x8ce0000000000000ULL, 0x8d50000000000000ULL, + 0x8940000000000000ULL, 0x88f0000000000000ULL, + 0x8a20000000000000ULL, 0x8b90000000000000ULL, + 0x9900000000000000ULL, 0x98b0000000000000ULL, + 0x9a60000000000000ULL, 0x9bd0000000000000ULL, + 0x9fc0000000000000ULL, 0x9e70000000000000ULL, + 0x9ca0000000000000ULL, 0x9d10000000000000ULL, + 0x9480000000000000ULL, 0x9530000000000000ULL, + 0x97e0000000000000ULL, 0x9650000000000000ULL, + 0x9240000000000000ULL, 0x93f0000000000000ULL, + 0x9120000000000000ULL, 0x9090000000000000ULL +}; + +static const uint64_t crc64_iso_norm_table[256] = { + 0x0000000000000000ULL, 0x000000000000001bULL, + 0x0000000000000036ULL, 0x000000000000002dULL, + 0x000000000000006cULL, 0x0000000000000077ULL, + 0x000000000000005aULL, 0x0000000000000041ULL, + 0x00000000000000d8ULL, 0x00000000000000c3ULL, + 0x00000000000000eeULL, 0x00000000000000f5ULL, + 0x00000000000000b4ULL, 0x00000000000000afULL, + 0x0000000000000082ULL, 0x0000000000000099ULL, + 0x00000000000001b0ULL, 0x00000000000001abULL, + 0x0000000000000186ULL, 0x000000000000019dULL, + 0x00000000000001dcULL, 0x00000000000001c7ULL, + 0x00000000000001eaULL, 0x00000000000001f1ULL, + 0x0000000000000168ULL, 0x0000000000000173ULL, + 0x000000000000015eULL, 0x0000000000000145ULL, + 0x0000000000000104ULL, 0x000000000000011fULL, + 0x0000000000000132ULL, 0x0000000000000129ULL, + 0x0000000000000360ULL, 0x000000000000037bULL, + 0x0000000000000356ULL, 0x000000000000034dULL, + 0x000000000000030cULL, 0x0000000000000317ULL, + 0x000000000000033aULL, 0x0000000000000321ULL, + 0x00000000000003b8ULL, 0x00000000000003a3ULL, + 0x000000000000038eULL, 0x0000000000000395ULL, + 0x00000000000003d4ULL, 0x00000000000003cfULL, + 0x00000000000003e2ULL, 0x00000000000003f9ULL, + 0x00000000000002d0ULL, 0x00000000000002cbULL, + 0x00000000000002e6ULL, 0x00000000000002fdULL, + 0x00000000000002bcULL, 0x00000000000002a7ULL, + 0x000000000000028aULL, 0x0000000000000291ULL, + 0x0000000000000208ULL, 0x0000000000000213ULL, + 0x000000000000023eULL, 0x0000000000000225ULL, + 0x0000000000000264ULL, 0x000000000000027fULL, + 0x0000000000000252ULL, 0x0000000000000249ULL, + 0x00000000000006c0ULL, 0x00000000000006dbULL, + 0x00000000000006f6ULL, 0x00000000000006edULL, + 0x00000000000006acULL, 0x00000000000006b7ULL, + 0x000000000000069aULL, 0x0000000000000681ULL, + 0x0000000000000618ULL, 0x0000000000000603ULL, + 0x000000000000062eULL, 0x0000000000000635ULL, + 0x0000000000000674ULL, 0x000000000000066fULL, + 0x0000000000000642ULL, 0x0000000000000659ULL, + 0x0000000000000770ULL, 0x000000000000076bULL, + 0x0000000000000746ULL, 0x000000000000075dULL, + 0x000000000000071cULL, 0x0000000000000707ULL, + 0x000000000000072aULL, 0x0000000000000731ULL, + 0x00000000000007a8ULL, 0x00000000000007b3ULL, + 0x000000000000079eULL, 0x0000000000000785ULL, + 0x00000000000007c4ULL, 0x00000000000007dfULL, + 0x00000000000007f2ULL, 0x00000000000007e9ULL, + 0x00000000000005a0ULL, 0x00000000000005bbULL, + 0x0000000000000596ULL, 0x000000000000058dULL, + 0x00000000000005ccULL, 0x00000000000005d7ULL, + 0x00000000000005faULL, 0x00000000000005e1ULL, + 0x0000000000000578ULL, 0x0000000000000563ULL, + 0x000000000000054eULL, 0x0000000000000555ULL, + 0x0000000000000514ULL, 0x000000000000050fULL, + 0x0000000000000522ULL, 0x0000000000000539ULL, + 0x0000000000000410ULL, 0x000000000000040bULL, + 0x0000000000000426ULL, 0x000000000000043dULL, + 0x000000000000047cULL, 0x0000000000000467ULL, + 0x000000000000044aULL, 0x0000000000000451ULL, + 0x00000000000004c8ULL, 0x00000000000004d3ULL, + 0x00000000000004feULL, 0x00000000000004e5ULL, + 0x00000000000004a4ULL, 0x00000000000004bfULL, + 0x0000000000000492ULL, 0x0000000000000489ULL, + 0x0000000000000d80ULL, 0x0000000000000d9bULL, + 0x0000000000000db6ULL, 0x0000000000000dadULL, + 0x0000000000000decULL, 0x0000000000000df7ULL, + 0x0000000000000ddaULL, 0x0000000000000dc1ULL, + 0x0000000000000d58ULL, 0x0000000000000d43ULL, + 0x0000000000000d6eULL, 0x0000000000000d75ULL, + 0x0000000000000d34ULL, 0x0000000000000d2fULL, + 0x0000000000000d02ULL, 0x0000000000000d19ULL, + 0x0000000000000c30ULL, 0x0000000000000c2bULL, + 0x0000000000000c06ULL, 0x0000000000000c1dULL, + 0x0000000000000c5cULL, 0x0000000000000c47ULL, + 0x0000000000000c6aULL, 0x0000000000000c71ULL, + 0x0000000000000ce8ULL, 0x0000000000000cf3ULL, + 0x0000000000000cdeULL, 0x0000000000000cc5ULL, + 0x0000000000000c84ULL, 0x0000000000000c9fULL, + 0x0000000000000cb2ULL, 0x0000000000000ca9ULL, + 0x0000000000000ee0ULL, 0x0000000000000efbULL, + 0x0000000000000ed6ULL, 0x0000000000000ecdULL, + 0x0000000000000e8cULL, 0x0000000000000e97ULL, + 0x0000000000000ebaULL, 0x0000000000000ea1ULL, + 0x0000000000000e38ULL, 0x0000000000000e23ULL, + 0x0000000000000e0eULL, 0x0000000000000e15ULL, + 0x0000000000000e54ULL, 0x0000000000000e4fULL, + 0x0000000000000e62ULL, 0x0000000000000e79ULL, + 0x0000000000000f50ULL, 0x0000000000000f4bULL, + 0x0000000000000f66ULL, 0x0000000000000f7dULL, + 0x0000000000000f3cULL, 0x0000000000000f27ULL, + 0x0000000000000f0aULL, 0x0000000000000f11ULL, + 0x0000000000000f88ULL, 0x0000000000000f93ULL, + 0x0000000000000fbeULL, 0x0000000000000fa5ULL, + 0x0000000000000fe4ULL, 0x0000000000000fffULL, + 0x0000000000000fd2ULL, 0x0000000000000fc9ULL, + 0x0000000000000b40ULL, 0x0000000000000b5bULL, + 0x0000000000000b76ULL, 0x0000000000000b6dULL, + 0x0000000000000b2cULL, 0x0000000000000b37ULL, + 0x0000000000000b1aULL, 0x0000000000000b01ULL, + 0x0000000000000b98ULL, 0x0000000000000b83ULL, + 0x0000000000000baeULL, 0x0000000000000bb5ULL, + 0x0000000000000bf4ULL, 0x0000000000000befULL, + 0x0000000000000bc2ULL, 0x0000000000000bd9ULL, + 0x0000000000000af0ULL, 0x0000000000000aebULL, + 0x0000000000000ac6ULL, 0x0000000000000addULL, + 0x0000000000000a9cULL, 0x0000000000000a87ULL, + 0x0000000000000aaaULL, 0x0000000000000ab1ULL, + 0x0000000000000a28ULL, 0x0000000000000a33ULL, + 0x0000000000000a1eULL, 0x0000000000000a05ULL, + 0x0000000000000a44ULL, 0x0000000000000a5fULL, + 0x0000000000000a72ULL, 0x0000000000000a69ULL, + 0x0000000000000820ULL, 0x000000000000083bULL, + 0x0000000000000816ULL, 0x000000000000080dULL, + 0x000000000000084cULL, 0x0000000000000857ULL, + 0x000000000000087aULL, 0x0000000000000861ULL, + 0x00000000000008f8ULL, 0x00000000000008e3ULL, + 0x00000000000008ceULL, 0x00000000000008d5ULL, + 0x0000000000000894ULL, 0x000000000000088fULL, + 0x00000000000008a2ULL, 0x00000000000008b9ULL, + 0x0000000000000990ULL, 0x000000000000098bULL, + 0x00000000000009a6ULL, 0x00000000000009bdULL, + 0x00000000000009fcULL, 0x00000000000009e7ULL, + 0x00000000000009caULL, 0x00000000000009d1ULL, + 0x0000000000000948ULL, 0x0000000000000953ULL, + 0x000000000000097eULL, 0x0000000000000965ULL, + 0x0000000000000924ULL, 0x000000000000093fULL, + 0x0000000000000912ULL, 0x0000000000000909ULL +}; + +static const uint64_t crc64_jones_refl_table[256] = { + 0x0000000000000000ULL, 0x7ad870c830358979ULL, + 0xf5b0e190606b12f2ULL, 0x8f689158505e9b8bULL, + 0xc038e5739841b68fULL, 0xbae095bba8743ff6ULL, + 0x358804e3f82aa47dULL, 0x4f50742bc81f2d04ULL, + 0xab28ecb46814fe75ULL, 0xd1f09c7c5821770cULL, + 0x5e980d24087fec87ULL, 0x24407dec384a65feULL, + 0x6b1009c7f05548faULL, 0x11c8790fc060c183ULL, + 0x9ea0e857903e5a08ULL, 0xe478989fa00bd371ULL, + 0x7d08ff3b88be6f81ULL, 0x07d08ff3b88be6f8ULL, + 0x88b81eabe8d57d73ULL, 0xf2606e63d8e0f40aULL, + 0xbd301a4810ffd90eULL, 0xc7e86a8020ca5077ULL, + 0x4880fbd87094cbfcULL, 0x32588b1040a14285ULL, + 0xd620138fe0aa91f4ULL, 0xacf86347d09f188dULL, + 0x2390f21f80c18306ULL, 0x594882d7b0f40a7fULL, + 0x1618f6fc78eb277bULL, 0x6cc0863448deae02ULL, + 0xe3a8176c18803589ULL, 0x997067a428b5bcf0ULL, + 0xfa11fe77117cdf02ULL, 0x80c98ebf2149567bULL, + 0x0fa11fe77117cdf0ULL, 0x75796f2f41224489ULL, + 0x3a291b04893d698dULL, 0x40f16bccb908e0f4ULL, + 0xcf99fa94e9567b7fULL, 0xb5418a5cd963f206ULL, + 0x513912c379682177ULL, 0x2be1620b495da80eULL, + 0xa489f35319033385ULL, 0xde51839b2936bafcULL, + 0x9101f7b0e12997f8ULL, 0xebd98778d11c1e81ULL, + 0x64b116208142850aULL, 0x1e6966e8b1770c73ULL, + 0x8719014c99c2b083ULL, 0xfdc17184a9f739faULL, + 0x72a9e0dcf9a9a271ULL, 0x08719014c99c2b08ULL, + 0x4721e43f0183060cULL, 0x3df994f731b68f75ULL, + 0xb29105af61e814feULL, 0xc849756751dd9d87ULL, + 0x2c31edf8f1d64ef6ULL, 0x56e99d30c1e3c78fULL, + 0xd9810c6891bd5c04ULL, 0xa3597ca0a188d57dULL, + 0xec09088b6997f879ULL, 0x96d1784359a27100ULL, + 0x19b9e91b09fcea8bULL, 0x636199d339c963f2ULL, + 0xdf7adabd7a6e2d6fULL, 0xa5a2aa754a5ba416ULL, + 0x2aca3b2d1a053f9dULL, 0x50124be52a30b6e4ULL, + 0x1f423fcee22f9be0ULL, 0x659a4f06d21a1299ULL, + 0xeaf2de5e82448912ULL, 0x902aae96b271006bULL, + 0x74523609127ad31aULL, 0x0e8a46c1224f5a63ULL, + 0x81e2d7997211c1e8ULL, 0xfb3aa75142244891ULL, + 0xb46ad37a8a3b6595ULL, 0xceb2a3b2ba0eececULL, + 0x41da32eaea507767ULL, 0x3b024222da65fe1eULL, + 0xa2722586f2d042eeULL, 0xd8aa554ec2e5cb97ULL, + 0x57c2c41692bb501cULL, 0x2d1ab4dea28ed965ULL, + 0x624ac0f56a91f461ULL, 0x1892b03d5aa47d18ULL, + 0x97fa21650afae693ULL, 0xed2251ad3acf6feaULL, + 0x095ac9329ac4bc9bULL, 0x7382b9faaaf135e2ULL, + 0xfcea28a2faafae69ULL, 0x8632586aca9a2710ULL, + 0xc9622c4102850a14ULL, 0xb3ba5c8932b0836dULL, + 0x3cd2cdd162ee18e6ULL, 0x460abd1952db919fULL, + 0x256b24ca6b12f26dULL, 0x5fb354025b277b14ULL, + 0xd0dbc55a0b79e09fULL, 0xaa03b5923b4c69e6ULL, + 0xe553c1b9f35344e2ULL, 0x9f8bb171c366cd9bULL, + 0x10e3202993385610ULL, 0x6a3b50e1a30ddf69ULL, + 0x8e43c87e03060c18ULL, 0xf49bb8b633338561ULL, + 0x7bf329ee636d1eeaULL, 0x012b592653589793ULL, + 0x4e7b2d0d9b47ba97ULL, 0x34a35dc5ab7233eeULL, + 0xbbcbcc9dfb2ca865ULL, 0xc113bc55cb19211cULL, + 0x5863dbf1e3ac9decULL, 0x22bbab39d3991495ULL, + 0xadd33a6183c78f1eULL, 0xd70b4aa9b3f20667ULL, + 0x985b3e827bed2b63ULL, 0xe2834e4a4bd8a21aULL, + 0x6debdf121b863991ULL, 0x1733afda2bb3b0e8ULL, + 0xf34b37458bb86399ULL, 0x8993478dbb8deae0ULL, + 0x06fbd6d5ebd3716bULL, 0x7c23a61ddbe6f812ULL, + 0x3373d23613f9d516ULL, 0x49aba2fe23cc5c6fULL, + 0xc6c333a67392c7e4ULL, 0xbc1b436e43a74e9dULL, + 0x95ac9329ac4bc9b5ULL, 0xef74e3e19c7e40ccULL, + 0x601c72b9cc20db47ULL, 0x1ac40271fc15523eULL, + 0x5594765a340a7f3aULL, 0x2f4c0692043ff643ULL, + 0xa02497ca54616dc8ULL, 0xdafce7026454e4b1ULL, + 0x3e847f9dc45f37c0ULL, 0x445c0f55f46abeb9ULL, + 0xcb349e0da4342532ULL, 0xb1eceec59401ac4bULL, + 0xfebc9aee5c1e814fULL, 0x8464ea266c2b0836ULL, + 0x0b0c7b7e3c7593bdULL, 0x71d40bb60c401ac4ULL, + 0xe8a46c1224f5a634ULL, 0x927c1cda14c02f4dULL, + 0x1d148d82449eb4c6ULL, 0x67ccfd4a74ab3dbfULL, + 0x289c8961bcb410bbULL, 0x5244f9a98c8199c2ULL, + 0xdd2c68f1dcdf0249ULL, 0xa7f41839ecea8b30ULL, + 0x438c80a64ce15841ULL, 0x3954f06e7cd4d138ULL, + 0xb63c61362c8a4ab3ULL, 0xcce411fe1cbfc3caULL, + 0x83b465d5d4a0eeceULL, 0xf96c151de49567b7ULL, + 0x76048445b4cbfc3cULL, 0x0cdcf48d84fe7545ULL, + 0x6fbd6d5ebd3716b7ULL, 0x15651d968d029fceULL, + 0x9a0d8ccedd5c0445ULL, 0xe0d5fc06ed698d3cULL, + 0xaf85882d2576a038ULL, 0xd55df8e515432941ULL, + 0x5a3569bd451db2caULL, 0x20ed197575283bb3ULL, + 0xc49581ead523e8c2ULL, 0xbe4df122e51661bbULL, + 0x3125607ab548fa30ULL, 0x4bfd10b2857d7349ULL, + 0x04ad64994d625e4dULL, 0x7e7514517d57d734ULL, + 0xf11d85092d094cbfULL, 0x8bc5f5c11d3cc5c6ULL, + 0x12b5926535897936ULL, 0x686de2ad05bcf04fULL, + 0xe70573f555e26bc4ULL, 0x9ddd033d65d7e2bdULL, + 0xd28d7716adc8cfb9ULL, 0xa85507de9dfd46c0ULL, + 0x273d9686cda3dd4bULL, 0x5de5e64efd965432ULL, + 0xb99d7ed15d9d8743ULL, 0xc3450e196da80e3aULL, + 0x4c2d9f413df695b1ULL, 0x36f5ef890dc31cc8ULL, + 0x79a59ba2c5dc31ccULL, 0x037deb6af5e9b8b5ULL, + 0x8c157a32a5b7233eULL, 0xf6cd0afa9582aa47ULL, + 0x4ad64994d625e4daULL, 0x300e395ce6106da3ULL, + 0xbf66a804b64ef628ULL, 0xc5bed8cc867b7f51ULL, + 0x8aeeace74e645255ULL, 0xf036dc2f7e51db2cULL, + 0x7f5e4d772e0f40a7ULL, 0x05863dbf1e3ac9deULL, + 0xe1fea520be311aafULL, 0x9b26d5e88e0493d6ULL, + 0x144e44b0de5a085dULL, 0x6e963478ee6f8124ULL, + 0x21c640532670ac20ULL, 0x5b1e309b16452559ULL, + 0xd476a1c3461bbed2ULL, 0xaeaed10b762e37abULL, + 0x37deb6af5e9b8b5bULL, 0x4d06c6676eae0222ULL, + 0xc26e573f3ef099a9ULL, 0xb8b627f70ec510d0ULL, + 0xf7e653dcc6da3dd4ULL, 0x8d3e2314f6efb4adULL, + 0x0256b24ca6b12f26ULL, 0x788ec2849684a65fULL, + 0x9cf65a1b368f752eULL, 0xe62e2ad306bafc57ULL, + 0x6946bb8b56e467dcULL, 0x139ecb4366d1eea5ULL, + 0x5ccebf68aecec3a1ULL, 0x2616cfa09efb4ad8ULL, + 0xa97e5ef8cea5d153ULL, 0xd3a62e30fe90582aULL, + 0xb0c7b7e3c7593bd8ULL, 0xca1fc72bf76cb2a1ULL, + 0x45775673a732292aULL, 0x3faf26bb9707a053ULL, + 0x70ff52905f188d57ULL, 0x0a2722586f2d042eULL, + 0x854fb3003f739fa5ULL, 0xff97c3c80f4616dcULL, + 0x1bef5b57af4dc5adULL, 0x61372b9f9f784cd4ULL, + 0xee5fbac7cf26d75fULL, 0x9487ca0fff135e26ULL, + 0xdbd7be24370c7322ULL, 0xa10fceec0739fa5bULL, + 0x2e675fb4576761d0ULL, 0x54bf2f7c6752e8a9ULL, + 0xcdcf48d84fe75459ULL, 0xb71738107fd2dd20ULL, + 0x387fa9482f8c46abULL, 0x42a7d9801fb9cfd2ULL, + 0x0df7adabd7a6e2d6ULL, 0x772fdd63e7936bafULL, + 0xf8474c3bb7cdf024ULL, 0x829f3cf387f8795dULL, + 0x66e7a46c27f3aa2cULL, 0x1c3fd4a417c62355ULL, + 0x935745fc4798b8deULL, 0xe98f353477ad31a7ULL, + 0xa6df411fbfb21ca3ULL, 0xdc0731d78f8795daULL, + 0x536fa08fdfd90e51ULL, 0x29b7d047efec8728ULL +}; + +static const uint64_t crc64_jones_norm_table[256] = { + 0x0000000000000000ULL, 0xad93d23594c935a9ULL, + 0xf6b4765ebd5b5efbULL, 0x5b27a46b29926b52ULL, + 0x40fb3e88ee7f885fULL, 0xed68ecbd7ab6bdf6ULL, + 0xb64f48d65324d6a4ULL, 0x1bdc9ae3c7ede30dULL, + 0x81f67d11dcff10beULL, 0x2c65af2448362517ULL, + 0x77420b4f61a44e45ULL, 0xdad1d97af56d7becULL, + 0xc10d4399328098e1ULL, 0x6c9e91aca649ad48ULL, + 0x37b935c78fdbc61aULL, 0x9a2ae7f21b12f3b3ULL, + 0xae7f28162d3714d5ULL, 0x03ecfa23b9fe217cULL, + 0x58cb5e48906c4a2eULL, 0xf5588c7d04a57f87ULL, + 0xee84169ec3489c8aULL, 0x4317c4ab5781a923ULL, + 0x183060c07e13c271ULL, 0xb5a3b2f5eadaf7d8ULL, + 0x2f895507f1c8046bULL, 0x821a8732650131c2ULL, + 0xd93d23594c935a90ULL, 0x74aef16cd85a6f39ULL, + 0x6f726b8f1fb78c34ULL, 0xc2e1b9ba8b7eb99dULL, + 0x99c61dd1a2ecd2cfULL, 0x3455cfe43625e766ULL, + 0xf16d8219cea71c03ULL, 0x5cfe502c5a6e29aaULL, + 0x07d9f44773fc42f8ULL, 0xaa4a2672e7357751ULL, + 0xb196bc9120d8945cULL, 0x1c056ea4b411a1f5ULL, + 0x4722cacf9d83caa7ULL, 0xeab118fa094aff0eULL, + 0x709bff0812580cbdULL, 0xdd082d3d86913914ULL, + 0x862f8956af035246ULL, 0x2bbc5b633bca67efULL, + 0x3060c180fc2784e2ULL, 0x9df313b568eeb14bULL, + 0xc6d4b7de417cda19ULL, 0x6b4765ebd5b5efb0ULL, + 0x5f12aa0fe39008d6ULL, 0xf281783a77593d7fULL, + 0xa9a6dc515ecb562dULL, 0x04350e64ca026384ULL, + 0x1fe994870def8089ULL, 0xb27a46b29926b520ULL, + 0xe95de2d9b0b4de72ULL, 0x44ce30ec247debdbULL, + 0xdee4d71e3f6f1868ULL, 0x7377052baba62dc1ULL, + 0x2850a14082344693ULL, 0x85c3737516fd733aULL, + 0x9e1fe996d1109037ULL, 0x338c3ba345d9a59eULL, + 0x68ab9fc86c4bceccULL, 0xc5384dfdf882fb65ULL, + 0x4f48d60609870dafULL, 0xe2db04339d4e3806ULL, + 0xb9fca058b4dc5354ULL, 0x146f726d201566fdULL, + 0x0fb3e88ee7f885f0ULL, 0xa2203abb7331b059ULL, + 0xf9079ed05aa3db0bULL, 0x54944ce5ce6aeea2ULL, + 0xcebeab17d5781d11ULL, 0x632d792241b128b8ULL, + 0x380add49682343eaULL, 0x95990f7cfcea7643ULL, + 0x8e45959f3b07954eULL, 0x23d647aaafcea0e7ULL, + 0x78f1e3c1865ccbb5ULL, 0xd56231f41295fe1cULL, + 0xe137fe1024b0197aULL, 0x4ca42c25b0792cd3ULL, + 0x1783884e99eb4781ULL, 0xba105a7b0d227228ULL, + 0xa1ccc098cacf9125ULL, 0x0c5f12ad5e06a48cULL, + 0x5778b6c67794cfdeULL, 0xfaeb64f3e35dfa77ULL, + 0x60c18301f84f09c4ULL, 0xcd5251346c863c6dULL, + 0x9675f55f4514573fULL, 0x3be6276ad1dd6296ULL, + 0x203abd891630819bULL, 0x8da96fbc82f9b432ULL, + 0xd68ecbd7ab6bdf60ULL, 0x7b1d19e23fa2eac9ULL, + 0xbe25541fc72011acULL, 0x13b6862a53e92405ULL, + 0x489122417a7b4f57ULL, 0xe502f074eeb27afeULL, + 0xfede6a97295f99f3ULL, 0x534db8a2bd96ac5aULL, + 0x086a1cc99404c708ULL, 0xa5f9cefc00cdf2a1ULL, + 0x3fd3290e1bdf0112ULL, 0x9240fb3b8f1634bbULL, + 0xc9675f50a6845fe9ULL, 0x64f48d65324d6a40ULL, + 0x7f281786f5a0894dULL, 0xd2bbc5b36169bce4ULL, + 0x899c61d848fbd7b6ULL, 0x240fb3eddc32e21fULL, + 0x105a7c09ea170579ULL, 0xbdc9ae3c7ede30d0ULL, + 0xe6ee0a57574c5b82ULL, 0x4b7dd862c3856e2bULL, + 0x50a1428104688d26ULL, 0xfd3290b490a1b88fULL, + 0xa61534dfb933d3ddULL, 0x0b86e6ea2dfae674ULL, + 0x91ac011836e815c7ULL, 0x3c3fd32da221206eULL, + 0x671877468bb34b3cULL, 0xca8ba5731f7a7e95ULL, + 0xd1573f90d8979d98ULL, 0x7cc4eda54c5ea831ULL, + 0x27e349ce65ccc363ULL, 0x8a709bfbf105f6caULL, + 0x9e91ac0c130e1b5eULL, 0x33027e3987c72ef7ULL, + 0x6825da52ae5545a5ULL, 0xc5b608673a9c700cULL, + 0xde6a9284fd719301ULL, 0x73f940b169b8a6a8ULL, + 0x28dee4da402acdfaULL, 0x854d36efd4e3f853ULL, + 0x1f67d11dcff10be0ULL, 0xb2f403285b383e49ULL, + 0xe9d3a74372aa551bULL, 0x44407576e66360b2ULL, + 0x5f9cef95218e83bfULL, 0xf20f3da0b547b616ULL, + 0xa92899cb9cd5dd44ULL, 0x04bb4bfe081ce8edULL, + 0x30ee841a3e390f8bULL, 0x9d7d562faaf03a22ULL, + 0xc65af24483625170ULL, 0x6bc9207117ab64d9ULL, + 0x7015ba92d04687d4ULL, 0xdd8668a7448fb27dULL, + 0x86a1cccc6d1dd92fULL, 0x2b321ef9f9d4ec86ULL, + 0xb118f90be2c61f35ULL, 0x1c8b2b3e760f2a9cULL, + 0x47ac8f555f9d41ceULL, 0xea3f5d60cb547467ULL, + 0xf1e3c7830cb9976aULL, 0x5c7015b69870a2c3ULL, + 0x0757b1ddb1e2c991ULL, 0xaac463e8252bfc38ULL, + 0x6ffc2e15dda9075dULL, 0xc26ffc20496032f4ULL, + 0x9948584b60f259a6ULL, 0x34db8a7ef43b6c0fULL, + 0x2f07109d33d68f02ULL, 0x8294c2a8a71fbaabULL, + 0xd9b366c38e8dd1f9ULL, 0x7420b4f61a44e450ULL, + 0xee0a5304015617e3ULL, 0x43998131959f224aULL, + 0x18be255abc0d4918ULL, 0xb52df76f28c47cb1ULL, + 0xaef16d8cef299fbcULL, 0x0362bfb97be0aa15ULL, + 0x58451bd25272c147ULL, 0xf5d6c9e7c6bbf4eeULL, + 0xc1830603f09e1388ULL, 0x6c10d43664572621ULL, + 0x3737705d4dc54d73ULL, 0x9aa4a268d90c78daULL, + 0x8178388b1ee19bd7ULL, 0x2cebeabe8a28ae7eULL, + 0x77cc4ed5a3bac52cULL, 0xda5f9ce03773f085ULL, + 0x40757b122c610336ULL, 0xede6a927b8a8369fULL, + 0xb6c10d4c913a5dcdULL, 0x1b52df7905f36864ULL, + 0x008e459ac21e8b69ULL, 0xad1d97af56d7bec0ULL, + 0xf63a33c47f45d592ULL, 0x5ba9e1f1eb8ce03bULL, + 0xd1d97a0a1a8916f1ULL, 0x7c4aa83f8e402358ULL, + 0x276d0c54a7d2480aULL, 0x8afede61331b7da3ULL, + 0x91224482f4f69eaeULL, 0x3cb196b7603fab07ULL, + 0x679632dc49adc055ULL, 0xca05e0e9dd64f5fcULL, + 0x502f071bc676064fULL, 0xfdbcd52e52bf33e6ULL, + 0xa69b71457b2d58b4ULL, 0x0b08a370efe46d1dULL, + 0x10d4399328098e10ULL, 0xbd47eba6bcc0bbb9ULL, + 0xe6604fcd9552d0ebULL, 0x4bf39df8019be542ULL, + 0x7fa6521c37be0224ULL, 0xd2358029a377378dULL, + 0x891224428ae55cdfULL, 0x2481f6771e2c6976ULL, + 0x3f5d6c94d9c18a7bULL, 0x92cebea14d08bfd2ULL, + 0xc9e91aca649ad480ULL, 0x647ac8fff053e129ULL, + 0xfe502f0deb41129aULL, 0x53c3fd387f882733ULL, + 0x08e45953561a4c61ULL, 0xa5778b66c2d379c8ULL, + 0xbeab1185053e9ac5ULL, 0x1338c3b091f7af6cULL, + 0x481f67dbb865c43eULL, 0xe58cb5ee2cacf197ULL, + 0x20b4f813d42e0af2ULL, 0x8d272a2640e73f5bULL, + 0xd6008e4d69755409ULL, 0x7b935c78fdbc61a0ULL, + 0x604fc69b3a5182adULL, 0xcddc14aeae98b704ULL, + 0x96fbb0c5870adc56ULL, 0x3b6862f013c3e9ffULL, + 0xa142850208d11a4cULL, 0x0cd157379c182fe5ULL, + 0x57f6f35cb58a44b7ULL, 0xfa6521692143711eULL, + 0xe1b9bb8ae6ae9213ULL, 0x4c2a69bf7267a7baULL, + 0x170dcdd45bf5cce8ULL, 0xba9e1fe1cf3cf941ULL, + 0x8ecbd005f9191e27ULL, 0x235802306dd02b8eULL, + 0x787fa65b444240dcULL, 0xd5ec746ed08b7575ULL, + 0xce30ee8d17669678ULL, 0x63a33cb883afa3d1ULL, + 0x388498d3aa3dc883ULL, 0x95174ae63ef4fd2aULL, + 0x0f3dad1425e60e99ULL, 0xa2ae7f21b12f3b30ULL, + 0xf989db4a98bd5062ULL, 0x541a097f0c7465cbULL, + 0x4fc6939ccb9986c6ULL, 0xe25541a95f50b36fULL, + 0xb972e5c276c2d83dULL, 0x14e137f7e20bed94ULL +}; + +uint64_t crc64_ecma_refl_base(uint64_t seed, const uint8_t * buf, uint64_t len) +{ + uint64_t i, crc = ~seed; + + for (i = 0; i < len; i++) { + uint8_t byte = buf[i]; + crc = crc64_ecma_refl_table[(uint8_t) crc ^ byte] ^ (crc >> 8); + } + + return ~crc; +} + +uint64_t crc64_ecma_norm_base(uint64_t seed, const uint8_t * buf, uint64_t len) +{ + uint64_t i, crc = ~seed; + + for (i = 0; i < len; i++) { + uint8_t byte = buf[i]; + crc = crc64_ecma_norm_table[((crc >> 56) ^ byte) & 0xff] ^ (crc << 8); + } + + return ~crc; +} + +uint64_t crc64_iso_refl_base(uint64_t seed, const uint8_t * buf, uint64_t len) +{ + uint64_t i, crc = ~seed; + + for (i = 0; i < len; i++) { + uint8_t byte = buf[i]; + crc = crc64_iso_refl_table[(uint8_t) crc ^ byte] ^ (crc >> 8); + } + + return ~crc; +} + +uint64_t crc64_iso_norm_base(uint64_t seed, const uint8_t * buf, uint64_t len) +{ + uint64_t i, crc = ~seed; + + for (i = 0; i < len; i++) { + uint8_t byte = buf[i]; + crc = crc64_iso_norm_table[((crc >> 56) ^ byte) & 0xff] ^ (crc << 8); + } + + return ~crc; +} + +uint64_t crc64_jones_refl_base(uint64_t seed, const uint8_t * buf, uint64_t len) +{ + uint64_t i, crc = ~seed; + + for (i = 0; i < len; i++) { + uint8_t byte = buf[i]; + crc = crc64_jones_refl_table[(uint8_t) crc ^ byte] ^ (crc >> 8); + } + + return ~crc; +} + +uint64_t crc64_jones_norm_base(uint64_t seed, const uint8_t * buf, uint64_t len) +{ + uint64_t i, crc = ~seed; + + for (i = 0; i < len; i++) { + uint8_t byte = buf[i]; + crc = crc64_jones_norm_table[((crc >> 56) ^ byte) & 0xff] ^ (crc << 8); + } + + return ~crc; +} + +struct slver { + unsigned short snum; + unsigned char ver; + unsigned char core; +}; + +struct slver crc64_ecma_refl_base_slver_0000001c; +struct slver crc64_ecma_refl_base_slver = { 0x001c, 0x00, 0x00 }; + +struct slver crc64_ecma_norm_base_slver_00000019; +struct slver crc64_ecma_norm_base_slver = { 0x0019, 0x00, 0x00 }; + +struct slver crc64_iso_refl_base_slver_00000022; +struct slver crc64_iso_refl_base_slver = { 0x0022, 0x00, 0x00 }; + +struct slver crc64_iso_norm_base_slver_0000001f; +struct slver crc64_iso_norm_base_slver = { 0x001f, 0x00, 0x00 }; + +struct slver crc64_jones_refl_base_slver_00000028; +struct slver crc64_jones_refl_base_slver = { 0x0028, 0x00, 0x00 }; + +struct slver crc64_jones_norm_base_slver_00000025; +struct slver crc64_jones_norm_base_slver = { 0x0025, 0x00, 0x00 }; diff --git a/src/spdk/isa-l/crc/crc64_ecma_norm_by16_10.asm b/src/spdk/isa-l/crc/crc64_ecma_norm_by16_10.asm new file mode 100644 index 000000000..8b09a89c4 --- /dev/null +++ b/src/spdk/isa-l/crc/crc64_ecma_norm_by16_10.asm @@ -0,0 +1,61 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2019 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +%define FUNCTION_NAME crc64_ecma_norm_by16_10 +%define USE_CONSTS +%macro INCLUDE_CONSTS 0 +rk_1: dq 0x7f52691a60ddc70d +rk_2: dq 0x7036b0389f6a0c82 +rk1: dq 0x05f5c3c7eb52fab6 +rk2: dq 0x4eb938a7d257740e +rk3: dq 0x05cf79dea9ac37d6 +rk4: dq 0x001067e571d7d5c2 +rk5: dq 0x05f5c3c7eb52fab6 +rk6: dq 0x0000000000000000 +rk7: dq 0x578d29d06cc4f872 +rk8: dq 0x42f0e1eba9ea3693 +rk9: dq 0xe464f4df5fb60ac1 +rk10: dq 0xb649c5b35a759cf2 +rk11: dq 0x9af04e1eff82d0dd +rk12: dq 0x6e82e609297f8fe8 +rk13: dq 0x097c516e98bd2e73 +rk14: dq 0x0b76477b31e22e7b +rk15: dq 0x5f6843ca540df020 +rk16: dq 0xddf4b6981205b83f +rk17: dq 0x54819d8713758b2c +rk18: dq 0x4a6b90073eb0af5a +rk19: dq 0x571bee0a227ef92b +rk20: dq 0x44bef2a201b5200c +rk_1b: dq 0x05f5c3c7eb52fab6 +rk_2b: dq 0x4eb938a7d257740e + dq 0x0000000000000000 + dq 0x0000000000000000 +%endm + +%include "crc64_iso_norm_by16_10.asm" diff --git a/src/spdk/isa-l/crc/crc64_ecma_norm_by8.asm b/src/spdk/isa-l/crc/crc64_ecma_norm_by8.asm new file mode 100644 index 000000000..6770e34e8 --- /dev/null +++ b/src/spdk/isa-l/crc/crc64_ecma_norm_by8.asm @@ -0,0 +1,583 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2016 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +; Function API: +; uint64_t crc64_ecma_norm_by8( +; uint64_t init_crc, //initial CRC value, 64 bits +; const unsigned char *buf, //buffer pointer to calculate CRC on +; uint64_t len //buffer length in bytes (64-bit data) +; ); +; +; yasm -f x64 -f elf64 -X gnu -g dwarf2 crc64_ecma_norm_by8 +%include "reg_sizes.asm" + +%define fetch_dist 1024 + +[bits 64] +default rel + +section .text + +%ifidn __OUTPUT_FORMAT__, win64 + %xdefine arg1 rcx + %xdefine arg2 rdx + %xdefine arg3 r8 +%else + %xdefine arg1 rdi + %xdefine arg2 rsi + %xdefine arg3 rdx +%endif + +%define TMP 16*0 +%ifidn __OUTPUT_FORMAT__, win64 + %define XMM_SAVE 16*2 + %define VARIABLE_OFFSET 16*10+8 +%else + %define VARIABLE_OFFSET 16*2+8 +%endif +align 16 +global crc64_ecma_norm_by8:ISAL_SYM_TYPE_FUNCTION +crc64_ecma_norm_by8: + + not arg1 ;~init_crc + + sub rsp,VARIABLE_OFFSET + +%ifidn __OUTPUT_FORMAT__, win64 + ; push the xmm registers into the stack to maintain + movdqa [rsp + XMM_SAVE + 16*0], xmm6 + movdqa [rsp + XMM_SAVE + 16*1], xmm7 + movdqa [rsp + XMM_SAVE + 16*2], xmm8 + movdqa [rsp + XMM_SAVE + 16*3], xmm9 + movdqa [rsp + XMM_SAVE + 16*4], xmm10 + movdqa [rsp + XMM_SAVE + 16*5], xmm11 + movdqa [rsp + XMM_SAVE + 16*6], xmm12 + movdqa [rsp + XMM_SAVE + 16*7], xmm13 +%endif + + + ; check if smaller than 256 + cmp arg3, 256 + + ; for sizes less than 256, we can't fold 128B at a time... + jl _less_than_256 + + + ; load the initial crc value + movq xmm10, arg1 ; initial crc + + ; crc value does not need to be byte-reflected, but it needs to be moved to the high part of the register. + ; because data will be byte-reflected and will align with initial crc at correct place. + pslldq xmm10, 8 + + movdqa xmm11, [SHUF_MASK] + ; receive the initial 128B data, xor the initial crc value + movdqu xmm0, [arg2+16*0] + movdqu xmm1, [arg2+16*1] + movdqu xmm2, [arg2+16*2] + movdqu xmm3, [arg2+16*3] + movdqu xmm4, [arg2+16*4] + movdqu xmm5, [arg2+16*5] + movdqu xmm6, [arg2+16*6] + movdqu xmm7, [arg2+16*7] + + pshufb xmm0, xmm11 + ; XOR the initial_crc value + pxor xmm0, xmm10 + pshufb xmm1, xmm11 + pshufb xmm2, xmm11 + pshufb xmm3, xmm11 + pshufb xmm4, xmm11 + pshufb xmm5, xmm11 + pshufb xmm6, xmm11 + pshufb xmm7, xmm11 + + movdqa xmm10, [rk3] ;xmm10 has rk3 and rk4 + ;imm value of pclmulqdq instruction will determine which constant to use + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; we subtract 256 instead of 128 to save one instruction from the loop + sub arg3, 256 + + ; at this section of the code, there is 128*x+y (0<=y<128) bytes of buffer. The _fold_128_B_loop + ; loop will fold 128B at a time until we have 128+y Bytes of buffer + + + ; fold 128B at a time. This section of the code folds 8 xmm registers in parallel +_fold_128_B_loop: + + ; update the buffer pointer + add arg2, 128 ; buf += 128; + + prefetchnta [arg2+fetch_dist+0] + movdqu xmm9, [arg2+16*0] + movdqu xmm12, [arg2+16*1] + pshufb xmm9, xmm11 + pshufb xmm12, xmm11 + movdqa xmm8, xmm0 + movdqa xmm13, xmm1 + pclmulqdq xmm0, xmm10, 0x0 + pclmulqdq xmm8, xmm10 , 0x11 + pclmulqdq xmm1, xmm10, 0x0 + pclmulqdq xmm13, xmm10 , 0x11 + pxor xmm0, xmm9 + xorps xmm0, xmm8 + pxor xmm1, xmm12 + xorps xmm1, xmm13 + + prefetchnta [arg2+fetch_dist+32] + movdqu xmm9, [arg2+16*2] + movdqu xmm12, [arg2+16*3] + pshufb xmm9, xmm11 + pshufb xmm12, xmm11 + movdqa xmm8, xmm2 + movdqa xmm13, xmm3 + pclmulqdq xmm2, xmm10, 0x0 + pclmulqdq xmm8, xmm10 , 0x11 + pclmulqdq xmm3, xmm10, 0x0 + pclmulqdq xmm13, xmm10 , 0x11 + pxor xmm2, xmm9 + xorps xmm2, xmm8 + pxor xmm3, xmm12 + xorps xmm3, xmm13 + + prefetchnta [arg2+fetch_dist+64] + movdqu xmm9, [arg2+16*4] + movdqu xmm12, [arg2+16*5] + pshufb xmm9, xmm11 + pshufb xmm12, xmm11 + movdqa xmm8, xmm4 + movdqa xmm13, xmm5 + pclmulqdq xmm4, xmm10, 0x0 + pclmulqdq xmm8, xmm10 , 0x11 + pclmulqdq xmm5, xmm10, 0x0 + pclmulqdq xmm13, xmm10 , 0x11 + pxor xmm4, xmm9 + xorps xmm4, xmm8 + pxor xmm5, xmm12 + xorps xmm5, xmm13 + + prefetchnta [arg2+fetch_dist+96] + movdqu xmm9, [arg2+16*6] + movdqu xmm12, [arg2+16*7] + pshufb xmm9, xmm11 + pshufb xmm12, xmm11 + movdqa xmm8, xmm6 + movdqa xmm13, xmm7 + pclmulqdq xmm6, xmm10, 0x0 + pclmulqdq xmm8, xmm10 , 0x11 + pclmulqdq xmm7, xmm10, 0x0 + pclmulqdq xmm13, xmm10 , 0x11 + pxor xmm6, xmm9 + xorps xmm6, xmm8 + pxor xmm7, xmm12 + xorps xmm7, xmm13 + + sub arg3, 128 + + ; check if there is another 128B in the buffer to be able to fold + jge _fold_128_B_loop + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + add arg2, 128 + ; at this point, the buffer pointer is pointing at the last y Bytes of the buffer, where 0 <= y < 128 + ; the 128B of folded data is in 8 of the xmm registers: xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7 + + + ; fold the 8 xmm registers to 1 xmm register with different constants + + movdqa xmm10, [rk9] + movdqa xmm8, xmm0 + pclmulqdq xmm0, xmm10, 0x11 + pclmulqdq xmm8, xmm10, 0x0 + pxor xmm7, xmm8 + xorps xmm7, xmm0 + + movdqa xmm10, [rk11] + movdqa xmm8, xmm1 + pclmulqdq xmm1, xmm10, 0x11 + pclmulqdq xmm8, xmm10, 0x0 + pxor xmm7, xmm8 + xorps xmm7, xmm1 + + movdqa xmm10, [rk13] + movdqa xmm8, xmm2 + pclmulqdq xmm2, xmm10, 0x11 + pclmulqdq xmm8, xmm10, 0x0 + pxor xmm7, xmm8 + pxor xmm7, xmm2 + + movdqa xmm10, [rk15] + movdqa xmm8, xmm3 + pclmulqdq xmm3, xmm10, 0x11 + pclmulqdq xmm8, xmm10, 0x0 + pxor xmm7, xmm8 + xorps xmm7, xmm3 + + movdqa xmm10, [rk17] + movdqa xmm8, xmm4 + pclmulqdq xmm4, xmm10, 0x11 + pclmulqdq xmm8, xmm10, 0x0 + pxor xmm7, xmm8 + pxor xmm7, xmm4 + + movdqa xmm10, [rk19] + movdqa xmm8, xmm5 + pclmulqdq xmm5, xmm10, 0x11 + pclmulqdq xmm8, xmm10, 0x0 + pxor xmm7, xmm8 + xorps xmm7, xmm5 + + movdqa xmm10, [rk1] ;xmm10 has rk1 and rk2 + + movdqa xmm8, xmm6 + pclmulqdq xmm6, xmm10, 0x11 + pclmulqdq xmm8, xmm10, 0x0 + pxor xmm7, xmm8 + pxor xmm7, xmm6 + + + ; instead of 128, we add 112 to the loop counter to save 1 instruction from the loop + ; instead of a cmp instruction, we use the negative flag with the jl instruction + add arg3, 128-16 + jl _final_reduction_for_128 + + ; now we have 16+y bytes left to reduce. 16 Bytes is in register xmm7 and the rest is in memory + ; we can fold 16 bytes at a time if y>=16 + ; continue folding 16B at a time + +_16B_reduction_loop: + movdqa xmm8, xmm7 + pclmulqdq xmm7, xmm10, 0x11 + pclmulqdq xmm8, xmm10, 0x0 + pxor xmm7, xmm8 + movdqu xmm0, [arg2] + pshufb xmm0, xmm11 + pxor xmm7, xmm0 + add arg2, 16 + sub arg3, 16 + ; instead of a cmp instruction, we utilize the flags with the jge instruction + ; equivalent of: cmp arg3, 16-16 + ; check if there is any more 16B in the buffer to be able to fold + jge _16B_reduction_loop + + ;now we have 16+z bytes left to reduce, where 0<= z < 16. + ;first, we reduce the data in the xmm7 register + + +_final_reduction_for_128: + ; check if any more data to fold. If not, compute the CRC of the final 128 bits + add arg3, 16 + je _128_done + + ; here we are getting data that is less than 16 bytes. + ; since we know that there was data before the pointer, we can offset the input pointer before the actual point, to receive exactly 16 bytes. + ; after that the registers need to be adjusted. +_get_last_two_xmms: + movdqa xmm2, xmm7 + + movdqu xmm1, [arg2 - 16 + arg3] + pshufb xmm1, xmm11 + + ; get rid of the extra data that was loaded before + ; load the shift constant + lea rax, [pshufb_shf_table + 16] + sub rax, arg3 + movdqu xmm0, [rax] + + ; shift xmm2 to the left by arg3 bytes + pshufb xmm2, xmm0 + + ; shift xmm7 to the right by 16-arg3 bytes + pxor xmm0, [mask1] + pshufb xmm7, xmm0 + pblendvb xmm1, xmm2 ;xmm0 is implicit + + ; fold 16 Bytes + movdqa xmm2, xmm1 + movdqa xmm8, xmm7 + pclmulqdq xmm7, xmm10, 0x11 + pclmulqdq xmm8, xmm10, 0x0 + pxor xmm7, xmm8 + pxor xmm7, xmm2 + +_128_done: + ; compute crc of a 128-bit value + movdqa xmm10, [rk5] ; rk5 and rk6 in xmm10 + movdqa xmm0, xmm7 + + ;64b fold + pclmulqdq xmm7, xmm10, 0x01 ; H*L + pslldq xmm0, 8 + pxor xmm7, xmm0 + + ;barrett reduction +_barrett: + movdqa xmm10, [rk7] ; rk7 and rk8 in xmm10 + movdqa xmm0, xmm7 + + movdqa xmm1, xmm7 + pand xmm1, [mask3] + pclmulqdq xmm7, xmm10, 0x01 + pxor xmm7, xmm1 + + pclmulqdq xmm7, xmm10, 0x11 + pxor xmm7, xmm0 + pextrq rax, xmm7, 0 + +_cleanup: + not rax +%ifidn __OUTPUT_FORMAT__, win64 + movdqa xmm6, [rsp + XMM_SAVE + 16*0] + movdqa xmm7, [rsp + XMM_SAVE + 16*1] + movdqa xmm8, [rsp + XMM_SAVE + 16*2] + movdqa xmm9, [rsp + XMM_SAVE + 16*3] + movdqa xmm10, [rsp + XMM_SAVE + 16*4] + movdqa xmm11, [rsp + XMM_SAVE + 16*5] + movdqa xmm12, [rsp + XMM_SAVE + 16*6] + movdqa xmm13, [rsp + XMM_SAVE + 16*7] +%endif + add rsp, VARIABLE_OFFSET + ret + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +align 16 +_less_than_256: + + ; check if there is enough buffer to be able to fold 16B at a time + cmp arg3, 32 + jl _less_than_32 + movdqa xmm11, [SHUF_MASK] + + ; if there is, load the constants + movdqa xmm10, [rk1] ; rk1 and rk2 in xmm10 + + movq xmm0, arg1 ; get the initial crc value + pslldq xmm0, 8 ; align it to its correct place + movdqu xmm7, [arg2] ; load the plaintext + pshufb xmm7, xmm11 ; byte-reflect the plaintext + pxor xmm7, xmm0 + + + ; update the buffer pointer + add arg2, 16 + + ; update the counter. subtract 32 instead of 16 to save one instruction from the loop + sub arg3, 32 + + jmp _16B_reduction_loop +align 16 +_less_than_32: + ; mov initial crc to the return value. this is necessary for zero-length buffers. + mov rax, arg1 + test arg3, arg3 + je _cleanup + + movdqa xmm11, [SHUF_MASK] + + movq xmm0, arg1 ; get the initial crc value + pslldq xmm0, 8 ; align it to its correct place + + cmp arg3, 16 + je _exact_16_left + jl _less_than_16_left + + movdqu xmm7, [arg2] ; load the plaintext + pshufb xmm7, xmm11 ; byte-reflect the plaintext + pxor xmm7, xmm0 ; xor the initial crc value + add arg2, 16 + sub arg3, 16 + movdqa xmm10, [rk1] ; rk1 and rk2 in xmm10 + jmp _get_last_two_xmms +align 16 +_less_than_16_left: + ; use stack space to load data less than 16 bytes, zero-out the 16B in memory first. + pxor xmm1, xmm1 + mov r11, rsp + movdqa [r11], xmm1 + + ; backup the counter value + mov r9, arg3 + cmp arg3, 8 + jl _less_than_8_left + + ; load 8 Bytes + mov rax, [arg2] + mov [r11], rax + add r11, 8 + sub arg3, 8 + add arg2, 8 +_less_than_8_left: + + cmp arg3, 4 + jl _less_than_4_left + + ; load 4 Bytes + mov eax, [arg2] + mov [r11], eax + add r11, 4 + sub arg3, 4 + add arg2, 4 +_less_than_4_left: + + cmp arg3, 2 + jl _less_than_2_left + + ; load 2 Bytes + mov ax, [arg2] + mov [r11], ax + add r11, 2 + sub arg3, 2 + add arg2, 2 +_less_than_2_left: + cmp arg3, 1 + jl _zero_left + + ; load 1 Byte + mov al, [arg2] + mov [r11], al +_zero_left: + movdqa xmm7, [rsp] + pshufb xmm7, xmm11 + pxor xmm7, xmm0 ; xor the initial crc value + + ; shl r9, 4 + lea rax, [pshufb_shf_table + 16] + sub rax, r9 + + cmp r9, 8 + jl _end_1to7 + +_end_8to15: + movdqu xmm0, [rax] + pxor xmm0, [mask1] + + pshufb xmm7, xmm0 + jmp _128_done + +_end_1to7: + ; Right shift (8-length) bytes in XMM + add rax, 8 + movdqu xmm0, [rax] + pshufb xmm7,xmm0 + + jmp _barrett +align 16 +_exact_16_left: + movdqu xmm7, [arg2] + pshufb xmm7, xmm11 + pxor xmm7, xmm0 ; xor the initial crc value + + jmp _128_done + +section .data + +; precomputed constants +align 16 + +rk1 : +DQ 0x5f5c3c7eb52fab6 +rk2 : +DQ 0x4eb938a7d257740e +rk3 : +DQ 0x5cf79dea9ac37d6 +rk4 : +DQ 0x001067e571d7d5c2 +rk5 : +DQ 0x5f5c3c7eb52fab6 +rk6 : +DQ 0x0000000000000000 +rk7 : +DQ 0x578d29d06cc4f872 +rk8 : +DQ 0x42f0e1eba9ea3693 +rk9 : +DQ 0xe464f4df5fb60ac1 +rk10 : +DQ 0xb649c5b35a759cf2 +rk11 : +DQ 0x9af04e1eff82d0dd +rk12 : +DQ 0x6e82e609297f8fe8 +rk13 : +DQ 0x97c516e98bd2e73 +rk14 : +DQ 0xb76477b31e22e7b +rk15 : +DQ 0x5f6843ca540df020 +rk16 : +DQ 0xddf4b6981205b83f +rk17 : +DQ 0x54819d8713758b2c +rk18 : +DQ 0x4a6b90073eb0af5a +rk19 : +DQ 0x571bee0a227ef92b +rk20 : +DQ 0x44bef2a201b5200c + + +mask1: +dq 0x8080808080808080, 0x8080808080808080 +mask2: +dq 0xFFFFFFFFFFFFFFFF, 0x00000000FFFFFFFF +mask3: +dq 0x0000000000000000, 0xFFFFFFFFFFFFFFFF + +SHUF_MASK: +dq 0x08090A0B0C0D0E0F, 0x0001020304050607 + +pshufb_shf_table: +; use these values for shift constants for the pshufb instruction +; different alignments result in values as shown: +; dq 0x8887868584838281, 0x008f8e8d8c8b8a89 ; shl 15 (16-1) / shr1 +; dq 0x8988878685848382, 0x01008f8e8d8c8b8a ; shl 14 (16-3) / shr2 +; dq 0x8a89888786858483, 0x0201008f8e8d8c8b ; shl 13 (16-4) / shr3 +; dq 0x8b8a898887868584, 0x030201008f8e8d8c ; shl 12 (16-4) / shr4 +; dq 0x8c8b8a8988878685, 0x04030201008f8e8d ; shl 11 (16-5) / shr5 +; dq 0x8d8c8b8a89888786, 0x0504030201008f8e ; shl 10 (16-6) / shr6 +; dq 0x8e8d8c8b8a898887, 0x060504030201008f ; shl 9 (16-7) / shr7 +; dq 0x8f8e8d8c8b8a8988, 0x0706050403020100 ; shl 8 (16-8) / shr8 +; dq 0x008f8e8d8c8b8a89, 0x0807060504030201 ; shl 7 (16-9) / shr9 +; dq 0x01008f8e8d8c8b8a, 0x0908070605040302 ; shl 6 (16-10) / shr10 +; dq 0x0201008f8e8d8c8b, 0x0a09080706050403 ; shl 5 (16-11) / shr11 +; dq 0x030201008f8e8d8c, 0x0b0a090807060504 ; shl 4 (16-12) / shr12 +; dq 0x04030201008f8e8d, 0x0c0b0a0908070605 ; shl 3 (16-13) / shr13 +; dq 0x0504030201008f8e, 0x0d0c0b0a09080706 ; shl 2 (16-14) / shr14 +; dq 0x060504030201008f, 0x0e0d0c0b0a090807 ; shl 1 (16-15) / shr15 +dq 0x8786858483828100, 0x8f8e8d8c8b8a8988 +dq 0x0706050403020100, 0x0f0e0d0c0b0a0908 +dq 0x8080808080808080, 0x0f0e0d0c0b0a0908 +dq 0x8080808080808080, 0x8080808080808080 + +;;; func core, ver, snum +slversion crc64_ecma_norm_by8, 01, 00, 001a diff --git a/src/spdk/isa-l/crc/crc64_ecma_refl_by16_10.asm b/src/spdk/isa-l/crc/crc64_ecma_refl_by16_10.asm new file mode 100644 index 000000000..a48d0b203 --- /dev/null +++ b/src/spdk/isa-l/crc/crc64_ecma_refl_by16_10.asm @@ -0,0 +1,61 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2019 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +%define FUNCTION_NAME crc64_ecma_refl_by16_10 +%define USE_CONSTS +%macro INCLUDE_CONSTS 0 +rk_1: dq 0xf31fd9271e228b79 +rk_2: dq 0x8260adf2381ad81c +rk1: dq 0xdabe95afc7875f40 +rk2: dq 0xe05dd497ca393ae4 +rk3: dq 0xd7d86b2af73de740 +rk4: dq 0x8757d71d4fcc1000 +rk5: dq 0xdabe95afc7875f40 +rk6: dq 0x0000000000000000 +rk7: dq 0x9c3e466c172963d5 +rk8: dq 0x92d8af2baf0e1e84 +rk9: dq 0x947874de595052cb +rk10: dq 0x9e735cb59b4724da +rk11: dq 0xe4ce2cd55fea0037 +rk12: dq 0x2fe3fd2920ce82ec +rk13: dq 0x0e31d519421a63a5 +rk14: dq 0x2e30203212cac325 +rk15: dq 0x081f6054a7842df4 +rk16: dq 0x6ae3efbb9dd441f3 +rk17: dq 0x69a35d91c3730254 +rk18: dq 0xb5ea1af9c013aca4 +rk19: dq 0x3be653a30fe1af51 +rk20: dq 0x60095b008a9efa44 +rk_1b: dq 0xdabe95afc7875f40 +rk_2b: dq 0xe05dd497ca393ae4 + dq 0x0000000000000000 + dq 0x0000000000000000 +%endm + +%include "crc64_iso_refl_by16_10.asm" diff --git a/src/spdk/isa-l/crc/crc64_ecma_refl_by8.asm b/src/spdk/isa-l/crc/crc64_ecma_refl_by8.asm new file mode 100644 index 000000000..e6518f424 --- /dev/null +++ b/src/spdk/isa-l/crc/crc64_ecma_refl_by8.asm @@ -0,0 +1,548 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2016 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Function API: +; uint64_t crc64_ecma_refl_by8( +; uint64_t init_crc, //initial CRC value, 64 bits +; const unsigned char *buf, //buffer pointer to calculate CRC on +; uint64_t len //buffer length in bytes (64-bit data) +; ); +; +; Reference paper titled "Fast CRC Computation for Generic Polynomials Using PCLMULQDQ Instruction" +; sample yasm command line: +; yasm -f x64 -f elf64 -X gnu -g dwarf2 crc64_ecma_refl_by8 +%include "reg_sizes.asm" + +%define fetch_dist 1024 + +[bits 64] +default rel + +section .text + + +%ifidn __OUTPUT_FORMAT__, win64 + %xdefine arg1 rcx + %xdefine arg2 rdx + %xdefine arg3 r8 +%else + %xdefine arg1 rdi + %xdefine arg2 rsi + %xdefine arg3 rdx +%endif + +%define TMP 16*0 +%ifidn __OUTPUT_FORMAT__, win64 + %define XMM_SAVE 16*2 + %define VARIABLE_OFFSET 16*10+8 +%else + %define VARIABLE_OFFSET 16*2+8 +%endif + + +align 16 +global crc64_ecma_refl_by8:ISAL_SYM_TYPE_FUNCTION +crc64_ecma_refl_by8: + ; uint64_t c = crc ^ 0xffffffff,ffffffffL; + not arg1 + sub rsp, VARIABLE_OFFSET + +%ifidn __OUTPUT_FORMAT__, win64 + ; push the xmm registers into the stack to maintain + movdqa [rsp + XMM_SAVE + 16*0], xmm6 + movdqa [rsp + XMM_SAVE + 16*1], xmm7 + movdqa [rsp + XMM_SAVE + 16*2], xmm8 + movdqa [rsp + XMM_SAVE + 16*3], xmm9 + movdqa [rsp + XMM_SAVE + 16*4], xmm10 + movdqa [rsp + XMM_SAVE + 16*5], xmm11 + movdqa [rsp + XMM_SAVE + 16*6], xmm12 + movdqa [rsp + XMM_SAVE + 16*7], xmm13 +%endif + + ; check if smaller than 256B + cmp arg3, 256 + + ; for sizes less than 256, we can't fold 128B at a time... + jl _less_than_256 + + + ; load the initial crc value + movq xmm10, arg1 ; initial crc + ; receive the initial 128B data, xor the initial crc value + movdqu xmm0, [arg2+16*0] + movdqu xmm1, [arg2+16*1] + movdqu xmm2, [arg2+16*2] + movdqu xmm3, [arg2+16*3] + movdqu xmm4, [arg2+16*4] + movdqu xmm5, [arg2+16*5] + movdqu xmm6, [arg2+16*6] + movdqu xmm7, [arg2+16*7] + + ; XOR the initial_crc value + pxor xmm0, xmm10 + movdqa xmm10, [rk3] ;xmm10 has rk3 and rk4 + ;imm value of pclmulqdq instruction will determine which constant to use + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; we subtract 256 instead of 128 to save one instruction from the loop + sub arg3, 256 + + ; at this section of the code, there is 128*x+y (0<=y<128) bytes of buffer. The _fold_128_B_loop + ; loop will fold 128B at a time until we have 128+y Bytes of buffer + + + ; fold 128B at a time. This section of the code folds 8 xmm registers in parallel +_fold_128_B_loop: + + ; update the buffer pointer + add arg2, 128 + + prefetchnta [arg2+fetch_dist+0] + movdqu xmm9, [arg2+16*0] + movdqu xmm12, [arg2+16*1] + movdqa xmm8, xmm0 + movdqa xmm13, xmm1 + pclmulqdq xmm0, xmm10, 0x10 + pclmulqdq xmm8, xmm10 , 0x1 + pclmulqdq xmm1, xmm10, 0x10 + pclmulqdq xmm13, xmm10 , 0x1 + pxor xmm0, xmm9 + xorps xmm0, xmm8 + pxor xmm1, xmm12 + xorps xmm1, xmm13 + + prefetchnta [arg2+fetch_dist+32] + movdqu xmm9, [arg2+16*2] + movdqu xmm12, [arg2+16*3] + movdqa xmm8, xmm2 + movdqa xmm13, xmm3 + pclmulqdq xmm2, xmm10, 0x10 + pclmulqdq xmm8, xmm10 , 0x1 + pclmulqdq xmm3, xmm10, 0x10 + pclmulqdq xmm13, xmm10 , 0x1 + pxor xmm2, xmm9 + xorps xmm2, xmm8 + pxor xmm3, xmm12 + xorps xmm3, xmm13 + + prefetchnta [arg2+fetch_dist+64] + movdqu xmm9, [arg2+16*4] + movdqu xmm12, [arg2+16*5] + movdqa xmm8, xmm4 + movdqa xmm13, xmm5 + pclmulqdq xmm4, xmm10, 0x10 + pclmulqdq xmm8, xmm10 , 0x1 + pclmulqdq xmm5, xmm10, 0x10 + pclmulqdq xmm13, xmm10 , 0x1 + pxor xmm4, xmm9 + xorps xmm4, xmm8 + pxor xmm5, xmm12 + xorps xmm5, xmm13 + + prefetchnta [arg2+fetch_dist+96] + movdqu xmm9, [arg2+16*6] + movdqu xmm12, [arg2+16*7] + movdqa xmm8, xmm6 + movdqa xmm13, xmm7 + pclmulqdq xmm6, xmm10, 0x10 + pclmulqdq xmm8, xmm10 , 0x1 + pclmulqdq xmm7, xmm10, 0x10 + pclmulqdq xmm13, xmm10 , 0x1 + pxor xmm6, xmm9 + xorps xmm6, xmm8 + pxor xmm7, xmm12 + xorps xmm7, xmm13 + + sub arg3, 128 + + ; check if there is another 128B in the buffer to be able to fold + jge _fold_128_B_loop + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + add arg2, 128 + ; at this point, the buffer pointer is pointing at the last y Bytes of the buffer, where 0 <= y < 128 + ; the 128B of folded data is in 8 of the xmm registers: xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7 + + + ; fold the 8 xmm registers to 1 xmm register with different constants + ; xmm0 to xmm7 + movdqa xmm10, [rk9] + movdqa xmm8, xmm0 + pclmulqdq xmm0, xmm10, 0x1 + pclmulqdq xmm8, xmm10, 0x10 + pxor xmm7, xmm8 + xorps xmm7, xmm0 + ;xmm1 to xmm7 + movdqa xmm10, [rk11] + movdqa xmm8, xmm1 + pclmulqdq xmm1, xmm10, 0x1 + pclmulqdq xmm8, xmm10, 0x10 + pxor xmm7, xmm8 + xorps xmm7, xmm1 + + movdqa xmm10, [rk13] + movdqa xmm8, xmm2 + pclmulqdq xmm2, xmm10, 0x1 + pclmulqdq xmm8, xmm10, 0x10 + pxor xmm7, xmm8 + pxor xmm7, xmm2 + + movdqa xmm10, [rk15] + movdqa xmm8, xmm3 + pclmulqdq xmm3, xmm10, 0x1 + pclmulqdq xmm8, xmm10, 0x10 + pxor xmm7, xmm8 + xorps xmm7, xmm3 + + movdqa xmm10, [rk17] + movdqa xmm8, xmm4 + pclmulqdq xmm4, xmm10, 0x1 + pclmulqdq xmm8, xmm10, 0x10 + pxor xmm7, xmm8 + pxor xmm7, xmm4 + + movdqa xmm10, [rk19] + movdqa xmm8, xmm5 + pclmulqdq xmm5, xmm10, 0x1 + pclmulqdq xmm8, xmm10, 0x10 + pxor xmm7, xmm8 + xorps xmm7, xmm5 + ; xmm6 to xmm7 + movdqa xmm10, [rk1] + movdqa xmm8, xmm6 + pclmulqdq xmm6, xmm10, 0x1 + pclmulqdq xmm8, xmm10, 0x10 + pxor xmm7, xmm8 + pxor xmm7, xmm6 + + + ; instead of 128, we add 128-16 to the loop counter to save 1 instruction from the loop + ; instead of a cmp instruction, we use the negative flag with the jl instruction + add arg3, 128-16 + jl _final_reduction_for_128 + + ; now we have 16+y bytes left to reduce. 16 Bytes is in register xmm7 and the rest is in memory + ; we can fold 16 bytes at a time if y>=16 + ; continue folding 16B at a time + +_16B_reduction_loop: + movdqa xmm8, xmm7 + pclmulqdq xmm7, xmm10, 0x1 + pclmulqdq xmm8, xmm10, 0x10 + pxor xmm7, xmm8 + movdqu xmm0, [arg2] + pxor xmm7, xmm0 + add arg2, 16 + sub arg3, 16 + ; instead of a cmp instruction, we utilize the flags with the jge instruction + ; equivalent of: cmp arg3, 16-16 + ; check if there is any more 16B in the buffer to be able to fold + jge _16B_reduction_loop + + ;now we have 16+z bytes left to reduce, where 0<= z < 16. + ;first, we reduce the data in the xmm7 register + + +_final_reduction_for_128: + add arg3, 16 + je _128_done + ; here we are getting data that is less than 16 bytes. + ; since we know that there was data before the pointer, we can offset the input pointer before the actual point, to receive exactly 16 bytes. + ; after that the registers need to be adjusted. +_get_last_two_xmms: + + + movdqa xmm2, xmm7 + movdqu xmm1, [arg2 - 16 + arg3] + + ; get rid of the extra data that was loaded before + ; load the shift constant + lea rax, [pshufb_shf_table] + add rax, arg3 + movdqu xmm0, [rax] + + + pshufb xmm7, xmm0 + pxor xmm0, [mask3] + pshufb xmm2, xmm0 + + pblendvb xmm2, xmm1 ;xmm0 is implicit + ;;;;;;;;;; + movdqa xmm8, xmm7 + pclmulqdq xmm7, xmm10, 0x1 + + pclmulqdq xmm8, xmm10, 0x10 + pxor xmm7, xmm8 + pxor xmm7, xmm2 + +_128_done: + ; compute crc of a 128-bit value + movdqa xmm10, [rk5] + movdqa xmm0, xmm7 + + ;64b fold + pclmulqdq xmm7, xmm10, 0 + psrldq xmm0, 8 + pxor xmm7, xmm0 + + ;barrett reduction +_barrett: + movdqa xmm1, xmm7 + movdqa xmm10, [rk7] + + pclmulqdq xmm7, xmm10, 0 + movdqa xmm2, xmm7 + pclmulqdq xmm7, xmm10, 0x10 + pslldq xmm2, 8 + pxor xmm7, xmm2 + pxor xmm7, xmm1 + pextrq rax, xmm7, 1 + +_cleanup: + ; return c ^ 0xffffffff, ffffffffL; + not rax + + +%ifidn __OUTPUT_FORMAT__, win64 + movdqa xmm6, [rsp + XMM_SAVE + 16*0] + movdqa xmm7, [rsp + XMM_SAVE + 16*1] + movdqa xmm8, [rsp + XMM_SAVE + 16*2] + movdqa xmm9, [rsp + XMM_SAVE + 16*3] + movdqa xmm10, [rsp + XMM_SAVE + 16*4] + movdqa xmm11, [rsp + XMM_SAVE + 16*5] + movdqa xmm12, [rsp + XMM_SAVE + 16*6] + movdqa xmm13, [rsp + XMM_SAVE + 16*7] +%endif + add rsp, VARIABLE_OFFSET + ret + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +align 16 +_less_than_256: + + ; check if there is enough buffer to be able to fold 16B at a time + cmp arg3, 32 + jl _less_than_32 + + ; if there is, load the constants + movdqa xmm10, [rk1] ; rk1 and rk2 in xmm10 + + movq xmm0, arg1 ; get the initial crc value + movdqu xmm7, [arg2] ; load the plaintext + pxor xmm7, xmm0 + + ; update the buffer pointer + add arg2, 16 + + ; update the counter. subtract 32 instead of 16 to save one instruction from the loop + sub arg3, 32 + + jmp _16B_reduction_loop + +align 16 +_less_than_32: + ; mov initial crc to the return value. this is necessary for zero-length buffers. + mov rax, arg1 + test arg3, arg3 + je _cleanup + + movq xmm0, arg1 ; get the initial crc value + + cmp arg3, 16 + je _exact_16_left + jl _less_than_16_left + + movdqu xmm7, [arg2] ; load the plaintext + pxor xmm7, xmm0 ; xor the initial crc value + add arg2, 16 + sub arg3, 16 + movdqa xmm10, [rk1] ; rk1 and rk2 in xmm10 + jmp _get_last_two_xmms + + +align 16 +_less_than_16_left: + ; use stack space to load data less than 16 bytes, zero-out the 16B in memory first. + + pxor xmm1, xmm1 + mov r11, rsp + movdqa [r11], xmm1 + + ; backup the counter value + mov r9, arg3 + cmp arg3, 8 + jl _less_than_8_left + + ; load 8 Bytes + mov rax, [arg2] + mov [r11], rax + add r11, 8 + sub arg3, 8 + add arg2, 8 +_less_than_8_left: + + cmp arg3, 4 + jl _less_than_4_left + + ; load 4 Bytes + mov eax, [arg2] + mov [r11], eax + add r11, 4 + sub arg3, 4 + add arg2, 4 +_less_than_4_left: + + cmp arg3, 2 + jl _less_than_2_left + + ; load 2 Bytes + mov ax, [arg2] + mov [r11], ax + add r11, 2 + sub arg3, 2 + add arg2, 2 +_less_than_2_left: + cmp arg3, 1 + jl _zero_left + + ; load 1 Byte + mov al, [arg2] + mov [r11], al + +_zero_left: + movdqa xmm7, [rsp] + pxor xmm7, xmm0 ; xor the initial crc value + + lea rax,[pshufb_shf_table] + + cmp r9, 8 + jl _end_1to7 + +_end_8to15: + movdqu xmm0, [rax + r9] + pshufb xmm7,xmm0 + jmp _128_done + +_end_1to7: + ; Left shift (8-length) bytes in XMM + movdqu xmm0, [rax + r9 + 8] + pshufb xmm7,xmm0 + + jmp _barrett + +align 16 +_exact_16_left: + movdqu xmm7, [arg2] + pxor xmm7, xmm0 ; xor the initial crc value + + jmp _128_done + +section .data + +; precomputed constants +align 16 +; rk7 = floor(2^128/Q) +; rk8 = Q +rk1 : +DQ 0xdabe95afc7875f40 +rk2 : +DQ 0xe05dd497ca393ae4 +rk3 : +DQ 0xd7d86b2af73de740 +rk4 : +DQ 0x8757d71d4fcc1000 +rk5 : +DQ 0xdabe95afc7875f40 +rk6 : +DQ 0x0000000000000000 +rk7 : +DQ 0x9c3e466c172963d5 +rk8 : +DQ 0x92d8af2baf0e1e84 +rk9 : +DQ 0x947874de595052cb +rk10 : +DQ 0x9e735cb59b4724da +rk11 : +DQ 0xe4ce2cd55fea0037 +rk12 : +DQ 0x2fe3fd2920ce82ec +rk13 : +DQ 0xe31d519421a63a5 +rk14 : +DQ 0x2e30203212cac325 +rk15 : +DQ 0x81f6054a7842df4 +rk16 : +DQ 0x6ae3efbb9dd441f3 +rk17 : +DQ 0x69a35d91c3730254 +rk18 : +DQ 0xb5ea1af9c013aca4 +rk19 : +DQ 0x3be653a30fe1af51 +rk20 : +DQ 0x60095b008a9efa44 + + +pshufb_shf_table: +; use these values for shift constants for the pshufb instruction +; different alignments result in values as shown: +; dq 0x8887868584838281, 0x008f8e8d8c8b8a89 ; shl 15 (16-1) / shr1 +; dq 0x8988878685848382, 0x01008f8e8d8c8b8a ; shl 14 (16-3) / shr2 +; dq 0x8a89888786858483, 0x0201008f8e8d8c8b ; shl 13 (16-4) / shr3 +; dq 0x8b8a898887868584, 0x030201008f8e8d8c ; shl 12 (16-4) / shr4 +; dq 0x8c8b8a8988878685, 0x04030201008f8e8d ; shl 11 (16-5) / shr5 +; dq 0x8d8c8b8a89888786, 0x0504030201008f8e ; shl 10 (16-6) / shr6 +; dq 0x8e8d8c8b8a898887, 0x060504030201008f ; shl 9 (16-7) / shr7 +; dq 0x8f8e8d8c8b8a8988, 0x0706050403020100 ; shl 8 (16-8) / shr8 +; dq 0x008f8e8d8c8b8a89, 0x0807060504030201 ; shl 7 (16-9) / shr9 +; dq 0x01008f8e8d8c8b8a, 0x0908070605040302 ; shl 6 (16-10) / shr10 +; dq 0x0201008f8e8d8c8b, 0x0a09080706050403 ; shl 5 (16-11) / shr11 +; dq 0x030201008f8e8d8c, 0x0b0a090807060504 ; shl 4 (16-12) / shr12 +; dq 0x04030201008f8e8d, 0x0c0b0a0908070605 ; shl 3 (16-13) / shr13 +; dq 0x0504030201008f8e, 0x0d0c0b0a09080706 ; shl 2 (16-14) / shr14 +; dq 0x060504030201008f, 0x0e0d0c0b0a090807 ; shl 1 (16-15) / shr15 +dq 0x8786858483828100, 0x8f8e8d8c8b8a8988 +dq 0x0706050403020100, 0x000e0d0c0b0a0908 + + +mask: +dq 0xFFFFFFFFFFFFFFFF, 0x0000000000000000 +mask2: +dq 0xFFFFFFFF00000000, 0xFFFFFFFFFFFFFFFF +mask3: +dq 0x8080808080808080, 0x8080808080808080 + +;;; func core, ver, snum +slversion crc64_ecma_refl_by8, 01, 00, 001d diff --git a/src/spdk/isa-l/crc/crc64_example.c b/src/spdk/isa-l/crc/crc64_example.c new file mode 100644 index 000000000..64763a1b0 --- /dev/null +++ b/src/spdk/isa-l/crc/crc64_example.c @@ -0,0 +1,68 @@ +/********************************************************************** + Copyright(c) 2011-2016 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ +#include +#include +#include +#include "crc64.h" + +#define BUF_SIZE 8192 +#define INIT_SEED 0x12345678 + +int main(int argc, char *argv[]) +{ + uint8_t inbuf[BUF_SIZE]; + uint64_t avail_in, total_in = 0; + uint64_t crc64_checksum; + FILE *in; + + if (argc != 2) { + fprintf(stderr, "Usage: crc64_example infile\n"); + exit(0); + } + in = fopen(argv[1], "rb"); + if (!in) { + fprintf(stderr, "Can't open %s for reading\n", argv[1]); + exit(0); + } + + printf("crc64_example -- crc64_ecma_refl:\n"); + fflush(0); + + crc64_checksum = INIT_SEED; + while ((avail_in = fread(inbuf, 1, BUF_SIZE, in))) { + // crc update mode + crc64_checksum = crc64_ecma_refl(crc64_checksum, inbuf, avail_in); + total_in += avail_in; + } + + fclose(in); + printf("total length is %ld, checksum is 0x%lx\n", total_in, crc64_checksum); + + return 0; +} diff --git a/src/spdk/isa-l/crc/crc64_funcs_perf.c b/src/spdk/isa-l/crc/crc64_funcs_perf.c new file mode 100644 index 000000000..4ad1cc199 --- /dev/null +++ b/src/spdk/isa-l/crc/crc64_funcs_perf.c @@ -0,0 +1,103 @@ +/********************************************************************** + Copyright(c) 2011-2016 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#include +#include +#include +#include +#include +#include "crc64.h" +#include "test.h" + +//#define CACHED_TEST +#ifdef CACHED_TEST +// Cached test, loop many times over small dataset +# define TEST_LEN 8*1024 +# define TEST_TYPE_STR "_warm" +#else +// Uncached test. Pull from large mem base. +# define GT_L3_CACHE 32*1024*1024 /* some number > last level cache */ +# define TEST_LEN (2 * GT_L3_CACHE) +# define TEST_TYPE_STR "_cold" +#endif + +#ifndef TEST_SEED +# define TEST_SEED 0x1234 +#endif + +#define TEST_MEM TEST_LEN + +typedef uint64_t(*crc64_func_t) (uint64_t, const uint8_t *, uint64_t); + +typedef struct func_case { + char *note; + crc64_func_t crc64_func_call; + crc64_func_t crc64_ref_call; +} func_case_t; + +func_case_t test_funcs[] = { + {"crc64_ecma_norm", crc64_ecma_norm, crc64_ecma_norm_base}, + {"crc64_ecma_refl", crc64_ecma_refl, crc64_ecma_refl_base}, + {"crc64_iso_norm", crc64_iso_norm, crc64_iso_norm_base}, + {"crc64_iso_refl", crc64_iso_refl, crc64_iso_refl_base}, + {"crc64_jones_norm", crc64_jones_norm, crc64_jones_norm_base}, + {"crc64_jones_refl", crc64_jones_refl, crc64_jones_refl_base} +}; + +int main(int argc, char *argv[]) +{ + int j; + void *buf; + uint64_t crc; + struct perf start; + func_case_t *test_func; + + if (posix_memalign(&buf, 1024, TEST_LEN)) { + printf("alloc error: Fail"); + return -1; + } + memset(buf, (char)TEST_SEED, TEST_LEN); + + for (j = 0; j < sizeof(test_funcs) / sizeof(test_funcs[0]); j++) { + test_func = &test_funcs[j]; + printf("%s_perf:\n", test_func->note); + + printf("Start timed tests\n"); + fflush(0); + + BENCHMARK(&start, BENCHMARK_TIME, crc = + test_func->crc64_func_call(TEST_SEED, buf, TEST_LEN)); + printf("%s" TEST_TYPE_STR ": ", test_func->note); + perf_print(start, (long long)TEST_LEN); + + printf("finish 0x%lx\n", crc); + } + + return 0; +} diff --git a/src/spdk/isa-l/crc/crc64_funcs_test.c b/src/spdk/isa-l/crc/crc64_funcs_test.c new file mode 100644 index 000000000..7e4ee2b37 --- /dev/null +++ b/src/spdk/isa-l/crc/crc64_funcs_test.c @@ -0,0 +1,315 @@ +/********************************************************************** + Copyright(c) 2011-2016 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#include +#include +#include +#include +#include "crc64.h" +#include "types.h" +#include "crc64_ref.h" + +#ifndef TEST_SEED +# define TEST_SEED 0x1234 +#endif + +#define MAX_BUF 4096 +#define TEST_SIZE 32 + +typedef uint64_t u64; +typedef uint32_t u32; +typedef uint16_t u16; +typedef uint8_t u8; + +typedef uint64_t(*crc64_func_t) (uint64_t, const uint8_t *, uint64_t); + +typedef struct func_case { + char *note; + crc64_func_t crc64_func_call; + crc64_func_t crc64_base_call; + crc64_func_t crc64_ref_call; +} func_case_t; + +func_case_t test_funcs[] = { + {"crc64_ecma_norm", crc64_ecma_norm, crc64_ecma_norm_base, crc64_ecma_norm_ref}, + {"crc64_ecma_refl", crc64_ecma_refl, crc64_ecma_refl_base, crc64_ecma_refl_ref}, + {"crc64_iso_norm", crc64_iso_norm, crc64_iso_norm_base, crc64_iso_norm_ref}, + {"crc64_iso_refl", crc64_iso_refl, crc64_iso_refl_base, crc64_iso_refl_ref}, + {"crc64_jones_norm", crc64_jones_norm, crc64_jones_norm_base, + crc64_jones_norm_ref}, + {"crc64_jones_refl", crc64_jones_refl, crc64_jones_refl_base, crc64_jones_refl_ref} +}; + +// Generates pseudo-random data + +void rand_buffer(unsigned char *buf, long buffer_size) +{ + long i; + for (i = 0; i < buffer_size; i++) + buf[i] = rand(); +} + +// Test cases +int zeros_test(func_case_t * test_func); + +int simple_pattern_test(func_case_t * test_func); + +int seeds_sizes_test(func_case_t * test_func); + +int eob_test(func_case_t * test_func); + +int update_test(func_case_t * test_func); + +int verbose = 0; +void *buf_alloc = NULL; + +int main(int argc, char *argv[]) +{ + int fail = 0, fail_case; + int i, ret; + func_case_t *test_func; + + verbose = argc - 1; + + // Align to 32B boundary + ret = posix_memalign(&buf_alloc, TEST_SIZE, MAX_BUF * TEST_SIZE); + if (ret) { + printf("alloc error: Fail"); + return -1; + } + srand(TEST_SEED); + printf("CRC64 Tests\n"); + + for (i = 0; i < sizeof(test_funcs) / sizeof(test_funcs[0]); i++) { + fail_case = 0; + test_func = &test_funcs[i]; + + printf("Test %s\t", test_func->note); + fail_case += zeros_test(test_func); + fail_case += simple_pattern_test(test_func); + fail_case += seeds_sizes_test(test_func); + fail_case += eob_test(test_func); + fail_case += update_test(test_func); + printf(" done: %s\n", fail_case ? "Fail" : "Pass"); + + if (fail_case) { + printf("\n%s Failed %d tests\n", test_func->note, fail_case); + fail++; + } + } + + printf("CRC64 Tests all done: %s\n", fail ? "Fail" : "Pass"); + + return fail; +} + +// Test of all zeros +int zeros_test(func_case_t * test_func) +{ + uint64_t crc_ref, crc_base, crc; + int fail = 0; + unsigned char *buf = NULL; + + buf = (unsigned char *)buf_alloc; + memset(buf, 0, MAX_BUF * 10); + crc_ref = test_func->crc64_ref_call(TEST_SEED, buf, MAX_BUF * 10); + crc_base = test_func->crc64_base_call(TEST_SEED, buf, MAX_BUF * 10); + crc = test_func->crc64_func_call(TEST_SEED, buf, MAX_BUF * 10); + + if ((crc_base != crc_ref) || (crc != crc_ref)) { + fail++; + printf("\n opt ref\n"); + printf(" ------ ------\n"); + printf("crc zero = 0x%16lx 0x%16lx 0x%16lx \n", crc_ref, crc_base, crc); + } else + printf("."); + + return fail; +} + +// Another simple test pattern +int simple_pattern_test(func_case_t * test_func) +{ + uint64_t crc_ref, crc_base, crc; + int fail = 0; + unsigned char *buf = NULL; + + buf = (unsigned char *)buf_alloc; + memset(buf, 0x8a, MAX_BUF); + crc_ref = test_func->crc64_ref_call(TEST_SEED, buf, MAX_BUF); + crc_base = test_func->crc64_base_call(TEST_SEED, buf, MAX_BUF); + crc = test_func->crc64_func_call(TEST_SEED, buf, MAX_BUF); + + if ((crc_base != crc_ref) || (crc != crc_ref)) + fail++; + if (verbose) + printf("crc all 8a = 0x%16lx 0x%16lx 0x%16lx\n", crc_ref, crc_base, crc); + else + printf("."); + + return fail; +} + +int seeds_sizes_test(func_case_t * test_func) +{ + uint64_t crc_ref, crc_base, crc; + int fail = 0; + int i; + uint64_t r, s; + unsigned char *buf = NULL; + + // Do a few random tests + buf = (unsigned char *)buf_alloc; //reset buf + r = rand(); + rand_buffer(buf, MAX_BUF * TEST_SIZE); + + for (i = 0; i < TEST_SIZE; i++) { + crc_ref = test_func->crc64_ref_call(r, buf, MAX_BUF); + crc_base = test_func->crc64_base_call(r, buf, MAX_BUF); + crc = test_func->crc64_func_call(r, buf, MAX_BUF); + + if ((crc_base != crc_ref) || (crc != crc_ref)) + fail++; + if (verbose) + printf("crc rand%3d = 0x%16lx 0x%16lx 0x%16lx\n", i, crc_ref, crc_base, + crc); + else if (i % (TEST_SIZE / 8) == 0) + printf("."); + buf += MAX_BUF; + } + + // Do a few random sizes + buf = (unsigned char *)buf_alloc; //reset buf + r = rand(); + + for (i = MAX_BUF; i >= 0; i--) { + crc_ref = test_func->crc64_ref_call(r, buf, i); + crc_base = test_func->crc64_base_call(r, buf, i); + crc = test_func->crc64_func_call(r, buf, i); + + if ((crc_base != crc_ref) || (crc != crc_ref)) { + fail++; + printf("fail random size%i 0x%16lx 0x%16lx 0x%16lx\n", i, crc_ref, + crc_base, crc); + } else if (i % (MAX_BUF / 8) == 0) + printf("."); + } + + // Try different seeds + for (s = 0; s < 20; s++) { + buf = (unsigned char *)buf_alloc; //reset buf + + r = rand(); // just to get a new seed + rand_buffer(buf, MAX_BUF * TEST_SIZE); // new pseudo-rand data + + if (verbose) + printf("seed = 0x%lx\n", r); + + for (i = 0; i < TEST_SIZE; i++) { + crc_ref = test_func->crc64_ref_call(r, buf, MAX_BUF); + crc_base = test_func->crc64_base_call(r, buf, MAX_BUF); + crc = test_func->crc64_func_call(r, buf, MAX_BUF); + + if ((crc_base != crc_ref) || (crc != crc_ref)) + fail++; + if (verbose) + printf("crc rand%3d = 0x%16lx 0x%16lx 0x%16lx\n", i, crc_ref, + crc_base, crc); + else if (i % (TEST_SIZE * 20 / 8) == 0) + printf("."); + buf += MAX_BUF; + } + } + + return fail; +} + +// Run tests at end of buffer +int eob_test(func_case_t * test_func) +{ + uint64_t crc_ref, crc_base, crc; + int fail = 0; + int i; + unsigned char *buf = NULL; + + // Null test + if (0 != test_func->crc64_func_call(0, NULL, 0)) { + fail++; + printf("crc null test fail\n"); + } + + buf = (unsigned char *)buf_alloc; //reset buf + buf = buf + ((MAX_BUF - 1) * TEST_SIZE); //Line up TEST_SIZE from end + for (i = 0; i <= TEST_SIZE; i++) { + crc_ref = test_func->crc64_ref_call(TEST_SEED, buf + i, TEST_SIZE - i); + crc_base = test_func->crc64_base_call(TEST_SEED, buf + i, TEST_SIZE - i); + crc = test_func->crc64_func_call(TEST_SEED, buf + i, TEST_SIZE - i); + + if ((crc_base != crc_ref) || (crc != crc_ref)) + fail++; + if (verbose) + printf("crc eob rand%3d = 0x%16lx 0x%16lx 0x%16lx\n", i, crc_ref, + crc_base, crc); + else if (i % (TEST_SIZE / 8) == 0) + printf("."); + } + + return fail; +} + +int update_test(func_case_t * test_func) +{ + uint64_t crc_ref, crc_base, crc; + int fail = 0; + int i; + uint64_t r; + unsigned char *buf = NULL; + + buf = (unsigned char *)buf_alloc; //reset buf + r = rand(); + // Process the whole buf with reference func single call. + crc_ref = test_func->crc64_ref_call(r, buf, MAX_BUF * TEST_SIZE); + crc_base = test_func->crc64_base_call(r, buf, MAX_BUF * TEST_SIZE); + // Process buf with update method. + for (i = 0; i < TEST_SIZE; i++) { + crc = test_func->crc64_func_call(r, buf, MAX_BUF); + // Update crc seeds and buf pointer. + r = crc; + buf += MAX_BUF; + } + + if ((crc_base != crc_ref) || (crc != crc_ref)) + fail++; + if (verbose) + printf("crc rand%3d = 0x%16lx 0x%16lx 0x%16lx\n", i, crc_ref, crc_base, crc); + else + printf("."); + + return fail; +} diff --git a/src/spdk/isa-l/crc/crc64_iso_norm_by16_10.asm b/src/spdk/isa-l/crc/crc64_iso_norm_by16_10.asm new file mode 100644 index 000000000..2030ad71e --- /dev/null +++ b/src/spdk/isa-l/crc/crc64_iso_norm_by16_10.asm @@ -0,0 +1,524 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2019 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Function API: +; uint64_t crc64_iso_norm_by16_10( +; uint64_t init_crc, //initial CRC value, 64 bits +; const unsigned char *buf, //buffer pointer to calculate CRC on +; uint64_t len //buffer length in bytes (64-bit data) +; ); +; +%include "reg_sizes.asm" + +%if (AS_FEATURE_LEVEL) >= 10 + +%define fetch_dist 1024 + +[bits 64] +default rel + +section .text + + +%ifidn __OUTPUT_FORMAT__, win64 + %xdefine arg1 rcx + %xdefine arg2 rdx + %xdefine arg3 r8 +%else + %xdefine arg1 rdi + %xdefine arg2 rsi + %xdefine arg3 rdx +%endif + +%ifidn __OUTPUT_FORMAT__, win64 + %define XMM_SAVE 16*2 + %define VARIABLE_OFFSET 16*12+8 +%else + %define VARIABLE_OFFSET 16*2+8 +%endif + +%ifndef FUNCTION_NAME +%define FUNCTION_NAME crc64_iso_norm_by16_10 +%endif + +align 16 +global FUNCTION_NAME:ISAL_SYM_TYPE_FUNCTION +FUNCTION_NAME: + not arg1 + sub rsp, VARIABLE_OFFSET + +%ifidn __OUTPUT_FORMAT__, win64 + ; push the xmm registers into the stack to maintain + vmovdqa [rsp + XMM_SAVE + 16*0], xmm6 + vmovdqa [rsp + XMM_SAVE + 16*1], xmm7 + vmovdqa [rsp + XMM_SAVE + 16*2], xmm8 + vmovdqa [rsp + XMM_SAVE + 16*3], xmm9 + vmovdqa [rsp + XMM_SAVE + 16*4], xmm10 + vmovdqa [rsp + XMM_SAVE + 16*5], xmm11 + vmovdqa [rsp + XMM_SAVE + 16*6], xmm12 + vmovdqa [rsp + XMM_SAVE + 16*7], xmm13 + vmovdqa [rsp + XMM_SAVE + 16*8], xmm14 + vmovdqa [rsp + XMM_SAVE + 16*9], xmm15 +%endif + vbroadcasti32x4 zmm18, [SHUF_MASK] + cmp arg3, 256 + jl _less_than_256 + + ; load the initial crc value + vmovq xmm10, arg1 ; initial crc + + ; crc value does not need to be byte-reflected, but it needs to be moved to the high part of the register. + ; because data will be byte-reflected and will align with initial crc at correct place. + vpslldq xmm10, 8 + + ; receive the initial 128B data, xor the initial crc value + vmovdqu8 zmm0, [arg2+16*0] + vmovdqu8 zmm4, [arg2+16*4] + vpshufb zmm0, zmm0, zmm18 + vpshufb zmm4, zmm4, zmm18 + vpxorq zmm0, zmm10 + vbroadcasti32x4 zmm10, [rk3] ;zmm10 has rk3 and rk4 + ;imm value of pclmulqdq instruction will determine which constant to use + sub arg3, 256 + cmp arg3, 256 + jl _fold_128_B_loop + + vmovdqu8 zmm7, [arg2+16*8] + vmovdqu8 zmm8, [arg2+16*12] + vpshufb zmm7, zmm7, zmm18 + vpshufb zmm8, zmm8, zmm18 + vbroadcasti32x4 zmm16, [rk_1] ;zmm16 has rk-1 and rk-2 + sub arg3, 256 + +_fold_256_B_loop: + add arg2, 256 + vmovdqu8 zmm3, [arg2+16*0] + vpshufb zmm3, zmm3, zmm18 + vpclmulqdq zmm1, zmm0, zmm16, 0x00 + vpclmulqdq zmm2, zmm0, zmm16, 0x11 + vpxorq zmm0, zmm1, zmm2 + vpxorq zmm0, zmm0, zmm3 + + vmovdqu8 zmm9, [arg2+16*4] + vpshufb zmm9, zmm9, zmm18 + vpclmulqdq zmm5, zmm4, zmm16, 0x00 + vpclmulqdq zmm6, zmm4, zmm16, 0x11 + vpxorq zmm4, zmm5, zmm6 + vpxorq zmm4, zmm4, zmm9 + + vmovdqu8 zmm11, [arg2+16*8] + vpshufb zmm11, zmm11, zmm18 + vpclmulqdq zmm12, zmm7, zmm16, 0x00 + vpclmulqdq zmm13, zmm7, zmm16, 0x11 + vpxorq zmm7, zmm12, zmm13 + vpxorq zmm7, zmm7, zmm11 + + vmovdqu8 zmm17, [arg2+16*12] + vpshufb zmm17, zmm17, zmm18 + vpclmulqdq zmm14, zmm8, zmm16, 0x00 + vpclmulqdq zmm15, zmm8, zmm16, 0x11 + vpxorq zmm8, zmm14, zmm15 + vpxorq zmm8, zmm8, zmm17 + + sub arg3, 256 + jge _fold_256_B_loop + + ;; Fold 256 into 128 + add arg2, 256 + vpclmulqdq zmm1, zmm0, zmm10, 0x00 + vpclmulqdq zmm2, zmm0, zmm10, 0x11 + vpternlogq zmm7, zmm1, zmm2, 0x96 ; xor ABC + + vpclmulqdq zmm5, zmm4, zmm10, 0x00 + vpclmulqdq zmm6, zmm4, zmm10, 0x11 + vpternlogq zmm8, zmm5, zmm6, 0x96 ; xor ABC + + vmovdqa32 zmm0, zmm7 + vmovdqa32 zmm4, zmm8 + + add arg3, 128 + jmp _fold_128_B_register + + ; fold 128B at a time. This section of the code folds 2 zmm registers in parallel +_fold_128_B_loop: + add arg2, 128 ; update the buffer pointer + vmovdqu8 zmm8, [arg2+16*0] + vpshufb zmm8, zmm8, zmm18 + vpclmulqdq zmm1, zmm0, zmm10, 0x00 + vpclmulqdq zmm2, zmm0, zmm10, 0x11 + vpxorq zmm0, zmm1, zmm2 + vpxorq zmm0, zmm0, zmm8 + + vmovdqu8 zmm9, [arg2+16*4] + vpshufb zmm9, zmm9, zmm18 + vpclmulqdq zmm5, zmm4, zmm10, 0x00 + vpclmulqdq zmm6, zmm4, zmm10, 0x11 + vpxorq zmm4, zmm5, zmm6 + vpxorq zmm4, zmm4, zmm9 + sub arg3, 128 + jge _fold_128_B_loop + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + add arg2, 128 + ; at this point, the buffer pointer is pointing at the last y Bytes of the buffer, where 0 <= y < 128 + ; the 128B of folded data is in 2 zmm registers: zmm0, zmm4 + +_fold_128_B_register: + ; fold the 8 128b parts into 1 xmm register with different constants + vmovdqu8 zmm16, [rk9] ; multiply by rk9-rk16 + vmovdqu8 zmm11, [rk17] ; multiply by rk17-rk20, rk1,rk2, 0,0 + vpclmulqdq zmm1, zmm0, zmm16, 0x00 + vpclmulqdq zmm2, zmm0, zmm16, 0x11 + vextracti64x2 xmm7, zmm4, 3 ; save last that has no multiplicand + + vpclmulqdq zmm5, zmm4, zmm11, 0x00 + vpclmulqdq zmm6, zmm4, zmm11, 0x11 + vmovdqa xmm10, [rk1] ; Needed later in reduction loop + vpternlogq zmm1, zmm2, zmm5, 0x96 ; xor ABC + vpternlogq zmm1, zmm6, zmm7, 0x96 ; xor ABC + + vshufi64x2 zmm8, zmm1, zmm1, 0x4e ; Swap 1,0,3,2 - 01 00 11 10 + vpxorq ymm8, ymm8, ymm1 + vextracti64x2 xmm5, ymm8, 1 + vpxorq xmm7, xmm5, xmm8 + + ; instead of 128, we add 128-16 to the loop counter to save 1 instruction from the loop + ; instead of a cmp instruction, we use the negative flag with the jl instruction + add arg3, 128-16 + jl _final_reduction_for_128 + + ; now we have 16+y bytes left to reduce. 16 Bytes is in register xmm7 and the rest is in memory + ; we can fold 16 bytes at a time if y>=16 + ; continue folding 16B at a time + +_16B_reduction_loop: + vmovdqa xmm8, xmm7 + vpclmulqdq xmm7, xmm10, 0x11 + vpclmulqdq xmm8, xmm10, 0x00 + vpxor xmm7, xmm8 + vmovdqu xmm0, [arg2] + vpshufb xmm0, xmm0, xmm18 + vpxor xmm7, xmm0 + add arg2, 16 + sub arg3, 16 + ; instead of a cmp instruction, we utilize the flags with the jge instruction + ; equivalent of: cmp arg3, 16-16 + ; check if there is any more 16B in the buffer to be able to fold + jge _16B_reduction_loop + + ;now we have 16+z bytes left to reduce, where 0<= z < 16. + ;first, we reduce the data in the xmm7 register + + +_final_reduction_for_128: + add arg3, 16 + je _128_done + ; here we are getting data that is less than 16 bytes. + ; since we know that there was data before the pointer, we can offset + ; the input pointer before the actual point, to receive exactly 16 bytes. + ; after that the registers need to be adjusted. +_get_last_two_xmms: + + vmovdqa xmm2, xmm7 + vmovdqu xmm1, [arg2 - 16 + arg3] + vpshufb xmm1, xmm18 + + ; get rid of the extra data that was loaded before + ; load the shift constant + lea rax, [pshufb_shf_table + 16] + sub rax, arg3 + vmovdqu xmm0, [rax] + + ; shift xmm2 to the left by arg3 bytes + vpshufb xmm2, xmm0 + + ; shift xmm7 to the right by 16-arg3 bytes + vpxor xmm0, [mask1] + vpshufb xmm7, xmm0 + vpblendvb xmm1, xmm1, xmm2, xmm0 + + ; fold 16 Bytes + vmovdqa xmm2, xmm1 + vmovdqa xmm8, xmm7 + vpclmulqdq xmm7, xmm10, 0x11 + vpclmulqdq xmm8, xmm10, 0x0 + vpxor xmm7, xmm8 + vpxor xmm7, xmm2 + +_128_done: + ; compute crc of a 128-bit value + vmovdqa xmm10, [rk5] + vmovdqa xmm0, xmm7 + + ;64b fold + vpclmulqdq xmm7, xmm10, 0x01 ; H*L + vpslldq xmm0, 8 + vpxor xmm7, xmm0 + + ;barrett reduction +_barrett: + vmovdqa xmm10, [rk7] ; rk7 and rk8 in xmm10 + vmovdqa xmm0, xmm7 + + vmovdqa xmm1, xmm7 + vpand xmm1, [mask3] + vpclmulqdq xmm7, xmm10, 0x01 + vpxor xmm7, xmm1 + + vpclmulqdq xmm7, xmm10, 0x11 + vpxor xmm7, xmm0 + vpextrq rax, xmm7, 0 + +_cleanup: + not rax + + +%ifidn __OUTPUT_FORMAT__, win64 + vmovdqa xmm6, [rsp + XMM_SAVE + 16*0] + vmovdqa xmm7, [rsp + XMM_SAVE + 16*1] + vmovdqa xmm8, [rsp + XMM_SAVE + 16*2] + vmovdqa xmm9, [rsp + XMM_SAVE + 16*3] + vmovdqa xmm10, [rsp + XMM_SAVE + 16*4] + vmovdqa xmm11, [rsp + XMM_SAVE + 16*5] + vmovdqa xmm12, [rsp + XMM_SAVE + 16*6] + vmovdqa xmm13, [rsp + XMM_SAVE + 16*7] + vmovdqa xmm14, [rsp + XMM_SAVE + 16*8] + vmovdqa xmm15, [rsp + XMM_SAVE + 16*9] +%endif + add rsp, VARIABLE_OFFSET + ret + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +align 16 +_less_than_256: + + ; check if there is enough buffer to be able to fold 16B at a time + cmp arg3, 32 + jl _less_than_32 + + ; if there is, load the constants + vmovdqa xmm10, [rk1] ; rk1 and rk2 in xmm10 + + vmovq xmm0, arg1 ; get the initial crc value + vpslldq xmm0, 8 ; align it to its correct place + vmovdqu xmm7, [arg2] ; load the plaintext + vpshufb xmm7, xmm18 ; byte-reflect the plaintext + vpxor xmm7, xmm0 + + ; update the buffer pointer + add arg2, 16 + + ; update the counter. subtract 32 instead of 16 to save one instruction from the loop + sub arg3, 32 + + jmp _16B_reduction_loop + +align 16 +_less_than_32: + ; mov initial crc to the return value. this is necessary for zero-length buffers. + mov rax, arg1 + test arg3, arg3 + je _cleanup + + vmovq xmm0, arg1 ; get the initial crc value + vpslldq xmm0, 8 ; align it to its correct place + + cmp arg3, 16 + je _exact_16_left + jl _less_than_16_left + + vmovdqu xmm7, [arg2] ; load the plaintext + vpshufb xmm7, xmm18 ; byte-reflect the plaintext + vpxor xmm7, xmm0 ; xor the initial crc value + add arg2, 16 + sub arg3, 16 + vmovdqa xmm10, [rk1] ; rk1 and rk2 in xmm10 + jmp _get_last_two_xmms + + +align 16 +_less_than_16_left: + ; use stack space to load data less than 16 bytes, zero-out the 16B in memory first. + + vpxor xmm1, xmm1 + mov r11, rsp + vmovdqa [r11], xmm1 + + ; backup the counter value + mov r9, arg3 + cmp arg3, 8 + jl _less_than_8_left + + ; load 8 Bytes + mov rax, [arg2] + mov [r11], rax + add r11, 8 + sub arg3, 8 + add arg2, 8 +_less_than_8_left: + + cmp arg3, 4 + jl _less_than_4_left + + ; load 4 Bytes + mov eax, [arg2] + mov [r11], eax + add r11, 4 + sub arg3, 4 + add arg2, 4 +_less_than_4_left: + + cmp arg3, 2 + jl _less_than_2_left + + ; load 2 Bytes + mov ax, [arg2] + mov [r11], ax + add r11, 2 + sub arg3, 2 + add arg2, 2 +_less_than_2_left: + cmp arg3, 1 + jl _zero_left + + ; load 1 Byte + mov al, [arg2] + mov [r11], al + +_zero_left: + vmovdqa xmm7, [rsp] + vpshufb xmm7, xmm18 + vpxor xmm7, xmm0 ; xor the initial crc value + + lea rax, [pshufb_shf_table + 16] + sub rax, r9 + + cmp r9, 8 + jl _end_1to7 + +_end_8to15: + vmovdqu xmm0, [rax] + vpxor xmm0, [mask1] + + vpshufb xmm7, xmm0 + jmp _128_done + +_end_1to7: + ; Right shift (8-length) bytes in XMM + add rax, 8 + vmovdqu xmm0, [rax] + vpshufb xmm7,xmm0 + + jmp _barrett + +align 16 +_exact_16_left: + vmovdqu xmm7, [arg2] + vpshufb xmm7, xmm18 + vpxor xmm7, xmm0 ; xor the initial crc value + + jmp _128_done + +section .data +align 32 + +%ifndef USE_CONSTS +; precomputed constants +rk_1: dq 0x0000001a00000144 +rk_2: dq 0x0000015e00001dac +rk1: dq 0x0000000000000145 +rk2: dq 0x0000000000001db7 +rk3: dq 0x000100000001001a +rk4: dq 0x001b0000001b015e +rk5: dq 0x0000000000000145 +rk6: dq 0x0000000000000000 +rk7: dq 0x000000000000001b +rk8: dq 0x000000000000001b +rk9: dq 0x0150145145145015 +rk10: dq 0x1c71db6db6db71c7 +rk11: dq 0x0001110110110111 +rk12: dq 0x001aab1ab1ab1aab +rk13: dq 0x0000014445014445 +rk14: dq 0x00001daab71daab7 +rk15: dq 0x0000000101000101 +rk16: dq 0x0000001b1b001b1b +rk17: dq 0x0000000001514515 +rk18: dq 0x000000001c6db6c7 +rk19: dq 0x0000000000011011 +rk20: dq 0x00000000001ab1ab + +rk_1b: dq 0x0000000000000145 +rk_2b: dq 0x0000000000001db7 + dq 0x0000000000000000 + dq 0x0000000000000000 +%else +INCLUDE_CONSTS +%endif + +mask1: dq 0x8080808080808080, 0x8080808080808080 +mask2: dq 0xFFFFFFFFFFFFFFFF, 0x00000000FFFFFFFF +mask3: dq 0x0000000000000000, 0xFFFFFFFFFFFFFFFF + +SHUF_MASK: dq 0x08090A0B0C0D0E0F, 0x0001020304050607 + +pshufb_shf_table: +; use these values for shift constants for the pshufb instruction +; different alignments result in values as shown: +; dq 0x8887868584838281, 0x008f8e8d8c8b8a89 ; shl 15 (16-1) / shr1 +; dq 0x8988878685848382, 0x01008f8e8d8c8b8a ; shl 14 (16-3) / shr2 +; dq 0x8a89888786858483, 0x0201008f8e8d8c8b ; shl 13 (16-4) / shr3 +; dq 0x8b8a898887868584, 0x030201008f8e8d8c ; shl 12 (16-4) / shr4 +; dq 0x8c8b8a8988878685, 0x04030201008f8e8d ; shl 11 (16-5) / shr5 +; dq 0x8d8c8b8a89888786, 0x0504030201008f8e ; shl 10 (16-6) / shr6 +; dq 0x8e8d8c8b8a898887, 0x060504030201008f ; shl 9 (16-7) / shr7 +; dq 0x8f8e8d8c8b8a8988, 0x0706050403020100 ; shl 8 (16-8) / shr8 +; dq 0x008f8e8d8c8b8a89, 0x0807060504030201 ; shl 7 (16-9) / shr9 +; dq 0x01008f8e8d8c8b8a, 0x0908070605040302 ; shl 6 (16-10) / shr10 +; dq 0x0201008f8e8d8c8b, 0x0a09080706050403 ; shl 5 (16-11) / shr11 +; dq 0x030201008f8e8d8c, 0x0b0a090807060504 ; shl 4 (16-12) / shr12 +; dq 0x04030201008f8e8d, 0x0c0b0a0908070605 ; shl 3 (16-13) / shr13 +; dq 0x0504030201008f8e, 0x0d0c0b0a09080706 ; shl 2 (16-14) / shr14 +; dq 0x060504030201008f, 0x0e0d0c0b0a090807 ; shl 1 (16-15) / shr15 +dq 0x8786858483828100, 0x8f8e8d8c8b8a8988 +dq 0x0706050403020100, 0x0f0e0d0c0b0a0908 +dq 0x8080808080808080, 0x0f0e0d0c0b0a0908 +dq 0x8080808080808080, 0x8080808080808080 + + +%else ; Assembler doesn't understand these opcodes. Add empty symbol for windows. +%ifidn __OUTPUT_FORMAT__, win64 +global no_ %+ FUNCTION_NAME +no_ %+ FUNCTION_NAME: +%endif +%endif ; (AS_FEATURE_LEVEL) >= 10 diff --git a/src/spdk/isa-l/crc/crc64_iso_norm_by8.asm b/src/spdk/isa-l/crc/crc64_iso_norm_by8.asm new file mode 100644 index 000000000..9bc38ec94 --- /dev/null +++ b/src/spdk/isa-l/crc/crc64_iso_norm_by8.asm @@ -0,0 +1,581 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2016 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +; Function API: +; uint64_t crc64_iso_norm_by8( +; uint64_t init_crc, //initial CRC value, 64 bits +; const unsigned char *buf, //buffer pointer to calculate CRC on +; uint64_t len //buffer length in bytes (64-bit data) +; ); +; +%include "reg_sizes.asm" + +%define fetch_dist 1024 + +[bits 64] +default rel + +section .text + +%ifidn __OUTPUT_FORMAT__, win64 + %xdefine arg1 rcx + %xdefine arg2 rdx + %xdefine arg3 r8 +%else + %xdefine arg1 rdi + %xdefine arg2 rsi + %xdefine arg3 rdx +%endif + +%define TMP 16*0 +%ifidn __OUTPUT_FORMAT__, win64 + %define XMM_SAVE 16*2 + %define VARIABLE_OFFSET 16*10+8 +%else + %define VARIABLE_OFFSET 16*2+8 +%endif +align 16 +global crc64_iso_norm_by8:ISAL_SYM_TYPE_FUNCTION +crc64_iso_norm_by8: + + not arg1 ;~init_crc + + sub rsp,VARIABLE_OFFSET + +%ifidn __OUTPUT_FORMAT__, win64 + ; push the xmm registers into the stack to maintain + movdqa [rsp + XMM_SAVE + 16*0], xmm6 + movdqa [rsp + XMM_SAVE + 16*1], xmm7 + movdqa [rsp + XMM_SAVE + 16*2], xmm8 + movdqa [rsp + XMM_SAVE + 16*3], xmm9 + movdqa [rsp + XMM_SAVE + 16*4], xmm10 + movdqa [rsp + XMM_SAVE + 16*5], xmm11 + movdqa [rsp + XMM_SAVE + 16*6], xmm12 + movdqa [rsp + XMM_SAVE + 16*7], xmm13 +%endif + + + ; check if smaller than 256 + cmp arg3, 256 + + ; for sizes less than 256, we can't fold 128B at a time... + jl _less_than_256 + + + ; load the initial crc value + movq xmm10, arg1 ; initial crc + + ; crc value does not need to be byte-reflected, but it needs to be moved to the high part of the register. + ; because data will be byte-reflected and will align with initial crc at correct place. + pslldq xmm10, 8 + + movdqa xmm11, [SHUF_MASK] + ; receive the initial 128B data, xor the initial crc value + movdqu xmm0, [arg2+16*0] + movdqu xmm1, [arg2+16*1] + movdqu xmm2, [arg2+16*2] + movdqu xmm3, [arg2+16*3] + movdqu xmm4, [arg2+16*4] + movdqu xmm5, [arg2+16*5] + movdqu xmm6, [arg2+16*6] + movdqu xmm7, [arg2+16*7] + + pshufb xmm0, xmm11 + ; XOR the initial_crc value + pxor xmm0, xmm10 + pshufb xmm1, xmm11 + pshufb xmm2, xmm11 + pshufb xmm3, xmm11 + pshufb xmm4, xmm11 + pshufb xmm5, xmm11 + pshufb xmm6, xmm11 + pshufb xmm7, xmm11 + + movdqa xmm10, [rk3] ;xmm10 has rk3 and rk4 + ;imm value of pclmulqdq instruction will determine which constant to use + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; we subtract 256 instead of 128 to save one instruction from the loop + sub arg3, 256 + + ; at this section of the code, there is 128*x+y (0<=y<128) bytes of buffer. The _fold_128_B_loop + ; loop will fold 128B at a time until we have 128+y Bytes of buffer + + + ; fold 128B at a time. This section of the code folds 8 xmm registers in parallel +_fold_128_B_loop: + + ; update the buffer pointer + add arg2, 128 ; buf += 128; + + prefetchnta [arg2+fetch_dist+0] + movdqu xmm9, [arg2+16*0] + movdqu xmm12, [arg2+16*1] + pshufb xmm9, xmm11 + pshufb xmm12, xmm11 + movdqa xmm8, xmm0 + movdqa xmm13, xmm1 + pclmulqdq xmm0, xmm10, 0x0 + pclmulqdq xmm8, xmm10 , 0x11 + pclmulqdq xmm1, xmm10, 0x0 + pclmulqdq xmm13, xmm10 , 0x11 + pxor xmm0, xmm9 + xorps xmm0, xmm8 + pxor xmm1, xmm12 + xorps xmm1, xmm13 + + prefetchnta [arg2+fetch_dist+32] + movdqu xmm9, [arg2+16*2] + movdqu xmm12, [arg2+16*3] + pshufb xmm9, xmm11 + pshufb xmm12, xmm11 + movdqa xmm8, xmm2 + movdqa xmm13, xmm3 + pclmulqdq xmm2, xmm10, 0x0 + pclmulqdq xmm8, xmm10 , 0x11 + pclmulqdq xmm3, xmm10, 0x0 + pclmulqdq xmm13, xmm10 , 0x11 + pxor xmm2, xmm9 + xorps xmm2, xmm8 + pxor xmm3, xmm12 + xorps xmm3, xmm13 + + prefetchnta [arg2+fetch_dist+64] + movdqu xmm9, [arg2+16*4] + movdqu xmm12, [arg2+16*5] + pshufb xmm9, xmm11 + pshufb xmm12, xmm11 + movdqa xmm8, xmm4 + movdqa xmm13, xmm5 + pclmulqdq xmm4, xmm10, 0x0 + pclmulqdq xmm8, xmm10 , 0x11 + pclmulqdq xmm5, xmm10, 0x0 + pclmulqdq xmm13, xmm10 , 0x11 + pxor xmm4, xmm9 + xorps xmm4, xmm8 + pxor xmm5, xmm12 + xorps xmm5, xmm13 + + prefetchnta [arg2+fetch_dist+96] + movdqu xmm9, [arg2+16*6] + movdqu xmm12, [arg2+16*7] + pshufb xmm9, xmm11 + pshufb xmm12, xmm11 + movdqa xmm8, xmm6 + movdqa xmm13, xmm7 + pclmulqdq xmm6, xmm10, 0x0 + pclmulqdq xmm8, xmm10 , 0x11 + pclmulqdq xmm7, xmm10, 0x0 + pclmulqdq xmm13, xmm10 , 0x11 + pxor xmm6, xmm9 + xorps xmm6, xmm8 + pxor xmm7, xmm12 + xorps xmm7, xmm13 + + sub arg3, 128 + + ; check if there is another 128B in the buffer to be able to fold + jge _fold_128_B_loop + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + add arg2, 128 + ; at this point, the buffer pointer is pointing at the last y Bytes of the buffer, where 0 <= y < 128 + ; the 128B of folded data is in 8 of the xmm registers: xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7 + + + ; fold the 8 xmm registers to 1 xmm register with different constants + + movdqa xmm10, [rk9] + movdqa xmm8, xmm0 + pclmulqdq xmm0, xmm10, 0x11 + pclmulqdq xmm8, xmm10, 0x0 + pxor xmm7, xmm8 + xorps xmm7, xmm0 + + movdqa xmm10, [rk11] + movdqa xmm8, xmm1 + pclmulqdq xmm1, xmm10, 0x11 + pclmulqdq xmm8, xmm10, 0x0 + pxor xmm7, xmm8 + xorps xmm7, xmm1 + + movdqa xmm10, [rk13] + movdqa xmm8, xmm2 + pclmulqdq xmm2, xmm10, 0x11 + pclmulqdq xmm8, xmm10, 0x0 + pxor xmm7, xmm8 + pxor xmm7, xmm2 + + movdqa xmm10, [rk15] + movdqa xmm8, xmm3 + pclmulqdq xmm3, xmm10, 0x11 + pclmulqdq xmm8, xmm10, 0x0 + pxor xmm7, xmm8 + xorps xmm7, xmm3 + + movdqa xmm10, [rk17] + movdqa xmm8, xmm4 + pclmulqdq xmm4, xmm10, 0x11 + pclmulqdq xmm8, xmm10, 0x0 + pxor xmm7, xmm8 + pxor xmm7, xmm4 + + movdqa xmm10, [rk19] + movdqa xmm8, xmm5 + pclmulqdq xmm5, xmm10, 0x11 + pclmulqdq xmm8, xmm10, 0x0 + pxor xmm7, xmm8 + xorps xmm7, xmm5 + + movdqa xmm10, [rk1] ;xmm10 has rk1 and rk2 + + movdqa xmm8, xmm6 + pclmulqdq xmm6, xmm10, 0x11 + pclmulqdq xmm8, xmm10, 0x0 + pxor xmm7, xmm8 + pxor xmm7, xmm6 + + + ; instead of 128, we add 112 to the loop counter to save 1 instruction from the loop + ; instead of a cmp instruction, we use the negative flag with the jl instruction + add arg3, 128-16 + jl _final_reduction_for_128 + + ; now we have 16+y bytes left to reduce. 16 Bytes is in register xmm7 and the rest is in memory + ; we can fold 16 bytes at a time if y>=16 + ; continue folding 16B at a time + +_16B_reduction_loop: + movdqa xmm8, xmm7 + pclmulqdq xmm7, xmm10, 0x11 + pclmulqdq xmm8, xmm10, 0x0 + pxor xmm7, xmm8 + movdqu xmm0, [arg2] + pshufb xmm0, xmm11 + pxor xmm7, xmm0 + add arg2, 16 + sub arg3, 16 + ; instead of a cmp instruction, we utilize the flags with the jge instruction + ; equivalent of: cmp arg3, 16-16 + ; check if there is any more 16B in the buffer to be able to fold + jge _16B_reduction_loop + + ;now we have 16+z bytes left to reduce, where 0<= z < 16. + ;first, we reduce the data in the xmm7 register + + +_final_reduction_for_128: + ; check if any more data to fold. If not, compute the CRC of the final 128 bits + add arg3, 16 + je _128_done + + ; here we are getting data that is less than 16 bytes. + ; since we know that there was data before the pointer, we can offset the input pointer before the actual point, to receive exactly 16 bytes. + ; after that the registers need to be adjusted. +_get_last_two_xmms: + movdqa xmm2, xmm7 + + movdqu xmm1, [arg2 - 16 + arg3] + pshufb xmm1, xmm11 + + ; get rid of the extra data that was loaded before + ; load the shift constant + lea rax, [pshufb_shf_table + 16] + sub rax, arg3 + movdqu xmm0, [rax] + + ; shift xmm2 to the left by arg3 bytes + pshufb xmm2, xmm0 + + ; shift xmm7 to the right by 16-arg3 bytes + pxor xmm0, [mask1] + pshufb xmm7, xmm0 + pblendvb xmm1, xmm2 ;xmm0 is implicit + + ; fold 16 Bytes + movdqa xmm2, xmm1 + movdqa xmm8, xmm7 + pclmulqdq xmm7, xmm10, 0x11 + pclmulqdq xmm8, xmm10, 0x0 + pxor xmm7, xmm8 + pxor xmm7, xmm2 + +_128_done: + ; compute crc of a 128-bit value + movdqa xmm10, [rk5] ; rk5 and rk6 in xmm10 + movdqa xmm0, xmm7 + + ;64b fold + pclmulqdq xmm7, xmm10, 0x01 ; H*L + pslldq xmm0, 8 + pxor xmm7, xmm0 + + ;barrett reduction +_barrett: + movdqa xmm10, [rk7] ; rk7 and rk8 in xmm10 + movdqa xmm0, xmm7 + + movdqa xmm1, xmm7 + pand xmm1, [mask3] + pclmulqdq xmm7, xmm10, 0x01 + pxor xmm7, xmm1 + + pclmulqdq xmm7, xmm10, 0x11 + pxor xmm7, xmm0 + pextrq rax, xmm7, 0 + +_cleanup: + not rax +%ifidn __OUTPUT_FORMAT__, win64 + movdqa xmm6, [rsp + XMM_SAVE + 16*0] + movdqa xmm7, [rsp + XMM_SAVE + 16*1] + movdqa xmm8, [rsp + XMM_SAVE + 16*2] + movdqa xmm9, [rsp + XMM_SAVE + 16*3] + movdqa xmm10, [rsp + XMM_SAVE + 16*4] + movdqa xmm11, [rsp + XMM_SAVE + 16*5] + movdqa xmm12, [rsp + XMM_SAVE + 16*6] + movdqa xmm13, [rsp + XMM_SAVE + 16*7] +%endif + add rsp, VARIABLE_OFFSET + ret + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +align 16 +_less_than_256: + + ; check if there is enough buffer to be able to fold 16B at a time + cmp arg3, 32 + jl _less_than_32 + movdqa xmm11, [SHUF_MASK] + + ; if there is, load the constants + movdqa xmm10, [rk1] ; rk1 and rk2 in xmm10 + + movq xmm0, arg1 ; get the initial crc value + pslldq xmm0, 8 ; align it to its correct place + movdqu xmm7, [arg2] ; load the plaintext + pshufb xmm7, xmm11 ; byte-reflect the plaintext + pxor xmm7, xmm0 + + + ; update the buffer pointer + add arg2, 16 + + ; update the counter. subtract 32 instead of 16 to save one instruction from the loop + sub arg3, 32 + + jmp _16B_reduction_loop +align 16 +_less_than_32: + ; mov initial crc to the return value. this is necessary for zero-length buffers. + mov rax, arg1 + test arg3, arg3 + je _cleanup + + movdqa xmm11, [SHUF_MASK] + + movq xmm0, arg1 ; get the initial crc value + pslldq xmm0, 8 ; align it to its correct place + + cmp arg3, 16 + je _exact_16_left + jl _less_than_16_left + + movdqu xmm7, [arg2] ; load the plaintext + pshufb xmm7, xmm11 ; byte-reflect the plaintext + pxor xmm7, xmm0 ; xor the initial crc value + add arg2, 16 + sub arg3, 16 + movdqa xmm10, [rk1] ; rk1 and rk2 in xmm10 + jmp _get_last_two_xmms +align 16 +_less_than_16_left: + ; use stack space to load data less than 16 bytes, zero-out the 16B in memory first. + pxor xmm1, xmm1 + mov r11, rsp + movdqa [r11], xmm1 + + ; backup the counter value + mov r9, arg3 + cmp arg3, 8 + jl _less_than_8_left + + ; load 8 Bytes + mov rax, [arg2] + mov [r11], rax + add r11, 8 + sub arg3, 8 + add arg2, 8 +_less_than_8_left: + + cmp arg3, 4 + jl _less_than_4_left + + ; load 4 Bytes + mov eax, [arg2] + mov [r11], eax + add r11, 4 + sub arg3, 4 + add arg2, 4 +_less_than_4_left: + + cmp arg3, 2 + jl _less_than_2_left + + ; load 2 Bytes + mov ax, [arg2] + mov [r11], ax + add r11, 2 + sub arg3, 2 + add arg2, 2 +_less_than_2_left: + cmp arg3, 1 + jl _zero_left + + ; load 1 Byte + mov al, [arg2] + mov [r11], al +_zero_left: + movdqa xmm7, [rsp] + pshufb xmm7, xmm11 + pxor xmm7, xmm0 ; xor the initial crc value + + ; shl r9, 4 + lea rax, [pshufb_shf_table + 16] + sub rax, r9 + + cmp r9, 8 + jl _end_1to7 + +_end_8to15: + movdqu xmm0, [rax] + pxor xmm0, [mask1] + + pshufb xmm7, xmm0 + jmp _128_done + +_end_1to7: + ; Right shift (8-length) bytes in XMM + add rax, 8 + movdqu xmm0, [rax] + pshufb xmm7,xmm0 + + jmp _barrett +align 16 +_exact_16_left: + movdqu xmm7, [arg2] + pshufb xmm7, xmm11 + pxor xmm7, xmm0 ; xor the initial crc value + + jmp _128_done + +section .data + +; precomputed constants +align 16 + +rk1: +DQ 0x0000000000000145 +rk2: +DQ 0x0000000000001db7 +rk3: +DQ 0x000100000001001a +rk4: +DQ 0x001b0000001b015e +rk5: +DQ 0x0000000000000145 +rk6: +DQ 0x0000000000000000 +rk7: +DQ 0x000000000000001b +rk8: +DQ 0x000000000000001b +rk9: +DQ 0x0150145145145015 +rk10: +DQ 0x1c71db6db6db71c7 +rk11: +DQ 0x0001110110110111 +rk12: +DQ 0x001aab1ab1ab1aab +rk13: +DQ 0x0000014445014445 +rk14: +DQ 0x00001daab71daab7 +rk15: +DQ 0x0000000101000101 +rk16: +DQ 0x0000001b1b001b1b +rk17: +DQ 0x0000000001514515 +rk18: +DQ 0x000000001c6db6c7 +rk19: +DQ 0x0000000000011011 +rk20: +DQ 0x00000000001ab1ab + +mask1: +dq 0x8080808080808080, 0x8080808080808080 +mask2: +dq 0xFFFFFFFFFFFFFFFF, 0x00000000FFFFFFFF +mask3: +dq 0x0000000000000000, 0xFFFFFFFFFFFFFFFF + +SHUF_MASK: +dq 0x08090A0B0C0D0E0F, 0x0001020304050607 + +pshufb_shf_table: +; use these values for shift constants for the pshufb instruction +; different alignments result in values as shown: +; dq 0x8887868584838281, 0x008f8e8d8c8b8a89 ; shl 15 (16-1) / shr1 +; dq 0x8988878685848382, 0x01008f8e8d8c8b8a ; shl 14 (16-3) / shr2 +; dq 0x8a89888786858483, 0x0201008f8e8d8c8b ; shl 13 (16-4) / shr3 +; dq 0x8b8a898887868584, 0x030201008f8e8d8c ; shl 12 (16-4) / shr4 +; dq 0x8c8b8a8988878685, 0x04030201008f8e8d ; shl 11 (16-5) / shr5 +; dq 0x8d8c8b8a89888786, 0x0504030201008f8e ; shl 10 (16-6) / shr6 +; dq 0x8e8d8c8b8a898887, 0x060504030201008f ; shl 9 (16-7) / shr7 +; dq 0x8f8e8d8c8b8a8988, 0x0706050403020100 ; shl 8 (16-8) / shr8 +; dq 0x008f8e8d8c8b8a89, 0x0807060504030201 ; shl 7 (16-9) / shr9 +; dq 0x01008f8e8d8c8b8a, 0x0908070605040302 ; shl 6 (16-10) / shr10 +; dq 0x0201008f8e8d8c8b, 0x0a09080706050403 ; shl 5 (16-11) / shr11 +; dq 0x030201008f8e8d8c, 0x0b0a090807060504 ; shl 4 (16-12) / shr12 +; dq 0x04030201008f8e8d, 0x0c0b0a0908070605 ; shl 3 (16-13) / shr13 +; dq 0x0504030201008f8e, 0x0d0c0b0a09080706 ; shl 2 (16-14) / shr14 +; dq 0x060504030201008f, 0x0e0d0c0b0a090807 ; shl 1 (16-15) / shr15 +dq 0x8786858483828100, 0x8f8e8d8c8b8a8988 +dq 0x0706050403020100, 0x0f0e0d0c0b0a0908 +dq 0x8080808080808080, 0x0f0e0d0c0b0a0908 +dq 0x8080808080808080, 0x8080808080808080 + +;;; func core, ver, snum +slversion crc64_iso_norm_by8, 01, 00, 0020 diff --git a/src/spdk/isa-l/crc/crc64_iso_refl_by16_10.asm b/src/spdk/isa-l/crc/crc64_iso_refl_by16_10.asm new file mode 100644 index 000000000..911a6c1a5 --- /dev/null +++ b/src/spdk/isa-l/crc/crc64_iso_refl_by16_10.asm @@ -0,0 +1,494 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2019 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Function API: +; uint64_t crc64_iso_refl_by16_10( +; uint64_t init_crc, //initial CRC value, 64 bits +; const unsigned char *buf, //buffer pointer to calculate CRC on +; uint64_t len //buffer length in bytes (64-bit data) +; ); +; +%include "reg_sizes.asm" + +%if (AS_FEATURE_LEVEL) >= 10 + +%define fetch_dist 1024 + +[bits 64] +default rel + +section .text + + +%ifidn __OUTPUT_FORMAT__, win64 + %xdefine arg1 rcx + %xdefine arg2 rdx + %xdefine arg3 r8 +%else + %xdefine arg1 rdi + %xdefine arg2 rsi + %xdefine arg3 rdx +%endif + +%define TMP 16*0 +%ifidn __OUTPUT_FORMAT__, win64 + %define XMM_SAVE 16*2 + %define VARIABLE_OFFSET 16*12+8 +%else + %define VARIABLE_OFFSET 16*2+8 +%endif + +%ifndef FUNCTION_NAME +%define FUNCTION_NAME crc64_iso_refl_by16_10 +%endif + +align 16 +global FUNCTION_NAME:ISAL_SYM_TYPE_FUNCTION +FUNCTION_NAME: + not arg1 + sub rsp, VARIABLE_OFFSET + +%ifidn __OUTPUT_FORMAT__, win64 + ; push the xmm registers into the stack to maintain + vmovdqa [rsp + XMM_SAVE + 16*0], xmm6 + vmovdqa [rsp + XMM_SAVE + 16*1], xmm7 + vmovdqa [rsp + XMM_SAVE + 16*2], xmm8 + vmovdqa [rsp + XMM_SAVE + 16*3], xmm9 + vmovdqa [rsp + XMM_SAVE + 16*4], xmm10 + vmovdqa [rsp + XMM_SAVE + 16*5], xmm11 + vmovdqa [rsp + XMM_SAVE + 16*6], xmm12 + vmovdqa [rsp + XMM_SAVE + 16*7], xmm13 + vmovdqa [rsp + XMM_SAVE + 16*8], xmm14 + vmovdqa [rsp + XMM_SAVE + 16*9], xmm15 +%endif + + cmp arg3, 256 + jl _less_than_256 + + ; load the initial crc value + vmovq xmm10, arg1 ; initial crc + + ; receive the initial 128B data, xor the initial crc value + vmovdqu8 zmm0, [arg2+16*0] + vmovdqu8 zmm4, [arg2+16*4] + vpxorq zmm0, zmm10 + vbroadcasti32x4 zmm10, [rk3] ;zmm10 has rk3 and rk4 + ;imm value of pclmulqdq instruction will determine which constant to use + + sub arg3, 256 + cmp arg3, 256 + jl _fold_128_B_loop + + vmovdqu8 zmm7, [arg2+16*8] + vmovdqu8 zmm8, [arg2+16*12] + vbroadcasti32x4 zmm16, [rk_1] ;zmm16 has rk-1 and rk-2 + sub arg3, 256 + +_fold_256_B_loop: + add arg2, 256 + vmovdqu8 zmm3, [arg2+16*0] + vpclmulqdq zmm1, zmm0, zmm16, 0x10 + vpclmulqdq zmm2, zmm0, zmm16, 0x01 + vpxorq zmm0, zmm1, zmm2 + vpxorq zmm0, zmm0, zmm3 + + vmovdqu8 zmm9, [arg2+16*4] + vpclmulqdq zmm5, zmm4, zmm16, 0x10 + vpclmulqdq zmm6, zmm4, zmm16, 0x01 + vpxorq zmm4, zmm5, zmm6 + vpxorq zmm4, zmm4, zmm9 + + vmovdqu8 zmm11, [arg2+16*8] + vpclmulqdq zmm12, zmm7, zmm16, 0x10 + vpclmulqdq zmm13, zmm7, zmm16, 0x01 + vpxorq zmm7, zmm12, zmm13 + vpxorq zmm7, zmm7, zmm11 + + vmovdqu8 zmm17, [arg2+16*12] + vpclmulqdq zmm14, zmm8, zmm16, 0x10 + vpclmulqdq zmm15, zmm8, zmm16, 0x01 + vpxorq zmm8, zmm14, zmm15 + vpxorq zmm8, zmm8, zmm17 + + sub arg3, 256 + jge _fold_256_B_loop + + ;; Fold 256 into 128 + add arg2, 256 + vpclmulqdq zmm1, zmm0, zmm10, 0x01 + vpclmulqdq zmm2, zmm0, zmm10, 0x10 + vpternlogq zmm7, zmm1, zmm2, 0x96 ; xor ABC + + vpclmulqdq zmm5, zmm4, zmm10, 0x01 + vpclmulqdq zmm6, zmm4, zmm10, 0x10 + vpternlogq zmm8, zmm5, zmm6, 0x96 ; xor ABC + + vmovdqa32 zmm0, zmm7 + vmovdqa32 zmm4, zmm8 + + add arg3, 128 + jmp _fold_128_B_register + + ; fold 128B at a time. This section of the code folds 2 zmm registers in parallel +_fold_128_B_loop: + add arg2, 128 ; update the buffer pointer + vmovdqu8 zmm8, [arg2+16*0] + vpclmulqdq zmm1, zmm0, zmm10, 0x10 + vpclmulqdq zmm2, zmm0, zmm10, 0x01 + vpxorq zmm0, zmm1, zmm2 + vpxorq zmm0, zmm0, zmm8 + + vmovdqu8 zmm9, [arg2+16*4] + vpclmulqdq zmm5, zmm4, zmm10, 0x10 + vpclmulqdq zmm6, zmm4, zmm10, 0x01 + vpxorq zmm4, zmm5, zmm6 + vpxorq zmm4, zmm4, zmm9 + + sub arg3, 128 + jge _fold_128_B_loop + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + add arg2, 128 + ; at this point, the buffer pointer is pointing at the last y Bytes of the buffer, where 0 <= y < 128 + ; the 128B of folded data is in 2 zmm registers: zmm0, zmm4 + +_fold_128_B_register: + ; fold the 8 128b parts into 1 xmm register with different constants + vmovdqu8 zmm16, [rk9] ; multiply by rk9-rk16 + vmovdqu8 zmm11, [rk17] ; multiply by rk17-rk20, rk1,rk2, 0,0 + vpclmulqdq zmm1, zmm0, zmm16, 0x01 + vpclmulqdq zmm2, zmm0, zmm16, 0x10 + vextracti64x2 xmm7, zmm4, 3 ; save last that has no multiplicand + + vpclmulqdq zmm5, zmm4, zmm11, 0x01 + vpclmulqdq zmm6, zmm4, zmm11, 0x10 + vmovdqa xmm10, [rk1] ; Needed later in reduction loop + vpternlogq zmm1, zmm2, zmm5, 0x96 ; xor ABC + vpternlogq zmm1, zmm6, zmm7, 0x96 ; xor ABC + + vshufi64x2 zmm8, zmm1, zmm1, 0x4e ; Swap 1,0,3,2 - 01 00 11 10 + vpxorq ymm8, ymm8, ymm1 + vextracti64x2 xmm5, ymm8, 1 + vpxorq xmm7, xmm5, xmm8 + + ; instead of 128, we add 128-16 to the loop counter to save 1 instruction from the loop + ; instead of a cmp instruction, we use the negative flag with the jl instruction + add arg3, 128-16 + jl _final_reduction_for_128 + + ; now we have 16+y bytes left to reduce. 16 Bytes is in register xmm7 and the rest is in memory + ; we can fold 16 bytes at a time if y>=16 + ; continue folding 16B at a time + +_16B_reduction_loop: + vmovdqa xmm8, xmm7 + vpclmulqdq xmm7, xmm10, 0x1 + vpclmulqdq xmm8, xmm10, 0x10 + vpxor xmm7, xmm8 + vmovdqu xmm0, [arg2] + vpxor xmm7, xmm0 + add arg2, 16 + sub arg3, 16 + ; instead of a cmp instruction, we utilize the flags with the jge instruction + ; equivalent of: cmp arg3, 16-16 + ; check if there is any more 16B in the buffer to be able to fold + jge _16B_reduction_loop + + ;now we have 16+z bytes left to reduce, where 0<= z < 16. + ;first, we reduce the data in the xmm7 register + + +_final_reduction_for_128: + add arg3, 16 + je _128_done + ; here we are getting data that is less than 16 bytes. + ; since we know that there was data before the pointer, we can offset + ; the input pointer before the actual point, to receive exactly 16 bytes. + ; after that the registers need to be adjusted. +_get_last_two_xmms: + + + vmovdqa xmm2, xmm7 + vmovdqu xmm1, [arg2 - 16 + arg3] + + ; get rid of the extra data that was loaded before + ; load the shift constant + lea rax, [pshufb_shf_table] + add rax, arg3 + vmovdqu xmm0, [rax] + + + vpshufb xmm7, xmm0 + vpxor xmm0, [mask3] + vpshufb xmm2, xmm0 + + vpblendvb xmm2, xmm2, xmm1, xmm0 + ;;;;;;;;;; + vmovdqa xmm8, xmm7 + vpclmulqdq xmm7, xmm10, 0x1 + + vpclmulqdq xmm8, xmm10, 0x10 + vpxor xmm7, xmm8 + vpxor xmm7, xmm2 + +_128_done: + ; compute crc of a 128-bit value + vmovdqa xmm10, [rk5] + vmovdqa xmm0, xmm7 + + ;64b fold + vpclmulqdq xmm7, xmm10, 0 + vpsrldq xmm0, 8 + vpxor xmm7, xmm0 + + ;barrett reduction +_barrett: + vmovdqa xmm1, xmm7 + vmovdqa xmm10, [rk7] + + vpclmulqdq xmm7, xmm10, 0 + vmovdqa xmm2, xmm7 + vpclmulqdq xmm7, xmm10, 0x10 + vpslldq xmm2, 8 + vpxor xmm7, xmm2 + vpxor xmm7, xmm1 + vpextrq rax, xmm7, 1 + +_cleanup: + not rax + + +%ifidn __OUTPUT_FORMAT__, win64 + vmovdqa xmm6, [rsp + XMM_SAVE + 16*0] + vmovdqa xmm7, [rsp + XMM_SAVE + 16*1] + vmovdqa xmm8, [rsp + XMM_SAVE + 16*2] + vmovdqa xmm9, [rsp + XMM_SAVE + 16*3] + vmovdqa xmm10, [rsp + XMM_SAVE + 16*4] + vmovdqa xmm11, [rsp + XMM_SAVE + 16*5] + vmovdqa xmm12, [rsp + XMM_SAVE + 16*6] + vmovdqa xmm13, [rsp + XMM_SAVE + 16*7] + vmovdqa xmm14, [rsp + XMM_SAVE + 16*8] + vmovdqa xmm15, [rsp + XMM_SAVE + 16*9] +%endif + add rsp, VARIABLE_OFFSET + ret + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +align 16 +_less_than_256: + + ; check if there is enough buffer to be able to fold 16B at a time + cmp arg3, 32 + jl _less_than_32 + + ; if there is, load the constants + vmovdqa xmm10, [rk1] ; rk1 and rk2 in xmm10 + + vmovq xmm0, arg1 ; get the initial crc value + vmovdqu xmm7, [arg2] ; load the plaintext + vpxor xmm7, xmm0 + + ; update the buffer pointer + add arg2, 16 + + ; update the counter. subtract 32 instead of 16 to save one instruction from the loop + sub arg3, 32 + + jmp _16B_reduction_loop + +align 16 +_less_than_32: + ; mov initial crc to the return value. this is necessary for zero-length buffers. + mov rax, arg1 + test arg3, arg3 + je _cleanup + + vmovq xmm0, arg1 ; get the initial crc value + + cmp arg3, 16 + je _exact_16_left + jl _less_than_16_left + + vmovdqu xmm7, [arg2] ; load the plaintext + vpxor xmm7, xmm0 ; xor the initial crc value + add arg2, 16 + sub arg3, 16 + vmovdqa xmm10, [rk1] ; rk1 and rk2 in xmm10 + jmp _get_last_two_xmms + + +align 16 +_less_than_16_left: + ; use stack space to load data less than 16 bytes, zero-out the 16B in memory first. + + vpxor xmm1, xmm1 + mov r11, rsp + vmovdqa [r11], xmm1 + + ; backup the counter value + mov r9, arg3 + cmp arg3, 8 + jl _less_than_8_left + + ; load 8 Bytes + mov rax, [arg2] + mov [r11], rax + add r11, 8 + sub arg3, 8 + add arg2, 8 +_less_than_8_left: + + cmp arg3, 4 + jl _less_than_4_left + + ; load 4 Bytes + mov eax, [arg2] + mov [r11], eax + add r11, 4 + sub arg3, 4 + add arg2, 4 +_less_than_4_left: + + cmp arg3, 2 + jl _less_than_2_left + + ; load 2 Bytes + mov ax, [arg2] + mov [r11], ax + add r11, 2 + sub arg3, 2 + add arg2, 2 +_less_than_2_left: + cmp arg3, 1 + jl _zero_left + + ; load 1 Byte + mov al, [arg2] + mov [r11], al + +_zero_left: + vmovdqa xmm7, [rsp] + vpxor xmm7, xmm0 ; xor the initial crc value + + lea rax,[pshufb_shf_table] + + cmp r9, 8 + jl _end_1to7 + +_end_8to15: + vmovdqu xmm0, [rax + r9] + vpshufb xmm7,xmm0 + jmp _128_done + +_end_1to7: + ; Left shift (8-length) bytes in XMM + vmovdqu xmm0, [rax + r9 + 8] + vpshufb xmm7,xmm0 + + jmp _barrett + +align 16 +_exact_16_left: + vmovdqu xmm7, [arg2] + vpxor xmm7, xmm0 ; xor the initial crc value + + jmp _128_done + +section .data +align 32 + +%ifndef USE_CONSTS +; precomputed constants +rk_1: dq 0x45000000b0000000 +rk_2: dq 0x6b700000f5000000 +rk1: dq 0xf500000000000001 +rk2: dq 0x6b70000000000001 +rk3: dq 0xb001000000010000 +rk4: dq 0xf501b0000001b000 +rk5: dq 0xf500000000000001 +rk6: dq 0x0000000000000000 +rk7: dq 0xb000000000000001 +rk8: dq 0xb000000000000000 +rk9: dq 0xe014514514501501 +rk10: dq 0x771db6db6db71c71 +rk11: dq 0xa101101101110001 +rk12: dq 0x1ab1ab1ab1aab001 +rk13: dq 0xf445014445000001 +rk14: dq 0x6aab71daab700001 +rk15: dq 0xb100010100000001 +rk16: dq 0x01b001b1b0000001 +rk17: dq 0xe145150000000001 +rk18: dq 0x76db6c7000000001 +rk19: dq 0xa011000000000001 +rk20: dq 0x1b1ab00000000001 + +rk_1b: dq 0xf500000000000001 +rk_2b: dq 0x6b70000000000001 + dq 0x0000000000000000 + dq 0x0000000000000000 +%else +INCLUDE_CONSTS +%endif + +pshufb_shf_table: +; use these values for shift constants for the pshufb instruction +; different alignments result in values as shown: +; dq 0x8887868584838281, 0x008f8e8d8c8b8a89 ; shl 15 (16-1) / shr1 +; dq 0x8988878685848382, 0x01008f8e8d8c8b8a ; shl 14 (16-3) / shr2 +; dq 0x8a89888786858483, 0x0201008f8e8d8c8b ; shl 13 (16-4) / shr3 +; dq 0x8b8a898887868584, 0x030201008f8e8d8c ; shl 12 (16-4) / shr4 +; dq 0x8c8b8a8988878685, 0x04030201008f8e8d ; shl 11 (16-5) / shr5 +; dq 0x8d8c8b8a89888786, 0x0504030201008f8e ; shl 10 (16-6) / shr6 +; dq 0x8e8d8c8b8a898887, 0x060504030201008f ; shl 9 (16-7) / shr7 +; dq 0x8f8e8d8c8b8a8988, 0x0706050403020100 ; shl 8 (16-8) / shr8 +; dq 0x008f8e8d8c8b8a89, 0x0807060504030201 ; shl 7 (16-9) / shr9 +; dq 0x01008f8e8d8c8b8a, 0x0908070605040302 ; shl 6 (16-10) / shr10 +; dq 0x0201008f8e8d8c8b, 0x0a09080706050403 ; shl 5 (16-11) / shr11 +; dq 0x030201008f8e8d8c, 0x0b0a090807060504 ; shl 4 (16-12) / shr12 +; dq 0x04030201008f8e8d, 0x0c0b0a0908070605 ; shl 3 (16-13) / shr13 +; dq 0x0504030201008f8e, 0x0d0c0b0a09080706 ; shl 2 (16-14) / shr14 +; dq 0x060504030201008f, 0x0e0d0c0b0a090807 ; shl 1 (16-15) / shr15 +dq 0x8786858483828100, 0x8f8e8d8c8b8a8988 +dq 0x0706050403020100, 0x000e0d0c0b0a0908 + +mask: dq 0xFFFFFFFFFFFFFFFF, 0x0000000000000000 +mask2: dq 0xFFFFFFFF00000000, 0xFFFFFFFFFFFFFFFF +mask3: dq 0x8080808080808080, 0x8080808080808080 + +%else ; Assembler doesn't understand these opcodes. Add empty symbol for windows. +%ifidn __OUTPUT_FORMAT__, win64 +global no_ %+ FUNCTION_NAME +no_ %+ FUNCTION_NAME: +%endif +%endif ; (AS_FEATURE_LEVEL) >= 10 diff --git a/src/spdk/isa-l/crc/crc64_iso_refl_by8.asm b/src/spdk/isa-l/crc/crc64_iso_refl_by8.asm new file mode 100644 index 000000000..564a51097 --- /dev/null +++ b/src/spdk/isa-l/crc/crc64_iso_refl_by8.asm @@ -0,0 +1,544 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2016 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Function API: +; uint64_t crc64_iso_refl_by8( +; uint64_t init_crc, //initial CRC value, 64 bits +; const unsigned char *buf, //buffer pointer to calculate CRC on +; uint64_t len //buffer length in bytes (64-bit data) +; ); +; +%include "reg_sizes.asm" + +%define fetch_dist 1024 + +[bits 64] +default rel + +section .text + + +%ifidn __OUTPUT_FORMAT__, win64 + %xdefine arg1 rcx + %xdefine arg2 rdx + %xdefine arg3 r8 +%else + %xdefine arg1 rdi + %xdefine arg2 rsi + %xdefine arg3 rdx +%endif + +%define TMP 16*0 +%ifidn __OUTPUT_FORMAT__, win64 + %define XMM_SAVE 16*2 + %define VARIABLE_OFFSET 16*10+8 +%else + %define VARIABLE_OFFSET 16*2+8 +%endif + + +align 16 +global crc64_iso_refl_by8:ISAL_SYM_TYPE_FUNCTION +crc64_iso_refl_by8: + ; uint64_t c = crc ^ 0xffffffff,ffffffffL; + not arg1 + sub rsp, VARIABLE_OFFSET + +%ifidn __OUTPUT_FORMAT__, win64 + ; push the xmm registers into the stack to maintain + movdqa [rsp + XMM_SAVE + 16*0], xmm6 + movdqa [rsp + XMM_SAVE + 16*1], xmm7 + movdqa [rsp + XMM_SAVE + 16*2], xmm8 + movdqa [rsp + XMM_SAVE + 16*3], xmm9 + movdqa [rsp + XMM_SAVE + 16*4], xmm10 + movdqa [rsp + XMM_SAVE + 16*5], xmm11 + movdqa [rsp + XMM_SAVE + 16*6], xmm12 + movdqa [rsp + XMM_SAVE + 16*7], xmm13 +%endif + + ; check if smaller than 256B + cmp arg3, 256 + + ; for sizes less than 256, we can't fold 128B at a time... + jl _less_than_256 + + + ; load the initial crc value + movq xmm10, arg1 ; initial crc + ; receive the initial 128B data, xor the initial crc value + movdqu xmm0, [arg2+16*0] + movdqu xmm1, [arg2+16*1] + movdqu xmm2, [arg2+16*2] + movdqu xmm3, [arg2+16*3] + movdqu xmm4, [arg2+16*4] + movdqu xmm5, [arg2+16*5] + movdqu xmm6, [arg2+16*6] + movdqu xmm7, [arg2+16*7] + + ; XOR the initial_crc value + pxor xmm0, xmm10 + movdqa xmm10, [rk3] ;xmm10 has rk3 and rk4 + ;imm value of pclmulqdq instruction will determine which constant to use + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; we subtract 256 instead of 128 to save one instruction from the loop + sub arg3, 256 + + ; at this section of the code, there is 128*x+y (0<=y<128) bytes of buffer. The _fold_128_B_loop + ; loop will fold 128B at a time until we have 128+y Bytes of buffer + + + ; fold 128B at a time. This section of the code folds 8 xmm registers in parallel +_fold_128_B_loop: + + ; update the buffer pointer + add arg2, 128 + + prefetchnta [arg2+fetch_dist+0] + movdqu xmm9, [arg2+16*0] + movdqu xmm12, [arg2+16*1] + movdqa xmm8, xmm0 + movdqa xmm13, xmm1 + pclmulqdq xmm0, xmm10, 0x10 + pclmulqdq xmm8, xmm10 , 0x1 + pclmulqdq xmm1, xmm10, 0x10 + pclmulqdq xmm13, xmm10 , 0x1 + pxor xmm0, xmm9 + xorps xmm0, xmm8 + pxor xmm1, xmm12 + xorps xmm1, xmm13 + + prefetchnta [arg2+fetch_dist+32] + movdqu xmm9, [arg2+16*2] + movdqu xmm12, [arg2+16*3] + movdqa xmm8, xmm2 + movdqa xmm13, xmm3 + pclmulqdq xmm2, xmm10, 0x10 + pclmulqdq xmm8, xmm10 , 0x1 + pclmulqdq xmm3, xmm10, 0x10 + pclmulqdq xmm13, xmm10 , 0x1 + pxor xmm2, xmm9 + xorps xmm2, xmm8 + pxor xmm3, xmm12 + xorps xmm3, xmm13 + + prefetchnta [arg2+fetch_dist+64] + movdqu xmm9, [arg2+16*4] + movdqu xmm12, [arg2+16*5] + movdqa xmm8, xmm4 + movdqa xmm13, xmm5 + pclmulqdq xmm4, xmm10, 0x10 + pclmulqdq xmm8, xmm10 , 0x1 + pclmulqdq xmm5, xmm10, 0x10 + pclmulqdq xmm13, xmm10 , 0x1 + pxor xmm4, xmm9 + xorps xmm4, xmm8 + pxor xmm5, xmm12 + xorps xmm5, xmm13 + + prefetchnta [arg2+fetch_dist+96] + movdqu xmm9, [arg2+16*6] + movdqu xmm12, [arg2+16*7] + movdqa xmm8, xmm6 + movdqa xmm13, xmm7 + pclmulqdq xmm6, xmm10, 0x10 + pclmulqdq xmm8, xmm10 , 0x1 + pclmulqdq xmm7, xmm10, 0x10 + pclmulqdq xmm13, xmm10 , 0x1 + pxor xmm6, xmm9 + xorps xmm6, xmm8 + pxor xmm7, xmm12 + xorps xmm7, xmm13 + + sub arg3, 128 + + ; check if there is another 128B in the buffer to be able to fold + jge _fold_128_B_loop + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + add arg2, 128 + ; at this point, the buffer pointer is pointing at the last y Bytes of the buffer, where 0 <= y < 128 + ; the 128B of folded data is in 8 of the xmm registers: xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7 + + + ; fold the 8 xmm registers to 1 xmm register with different constants + ; xmm0 to xmm7 + movdqa xmm10, [rk9] + movdqa xmm8, xmm0 + pclmulqdq xmm0, xmm10, 0x1 + pclmulqdq xmm8, xmm10, 0x10 + pxor xmm7, xmm8 + xorps xmm7, xmm0 + ;xmm1 to xmm7 + movdqa xmm10, [rk11] + movdqa xmm8, xmm1 + pclmulqdq xmm1, xmm10, 0x1 + pclmulqdq xmm8, xmm10, 0x10 + pxor xmm7, xmm8 + xorps xmm7, xmm1 + + movdqa xmm10, [rk13] + movdqa xmm8, xmm2 + pclmulqdq xmm2, xmm10, 0x1 + pclmulqdq xmm8, xmm10, 0x10 + pxor xmm7, xmm8 + pxor xmm7, xmm2 + + movdqa xmm10, [rk15] + movdqa xmm8, xmm3 + pclmulqdq xmm3, xmm10, 0x1 + pclmulqdq xmm8, xmm10, 0x10 + pxor xmm7, xmm8 + xorps xmm7, xmm3 + + movdqa xmm10, [rk17] + movdqa xmm8, xmm4 + pclmulqdq xmm4, xmm10, 0x1 + pclmulqdq xmm8, xmm10, 0x10 + pxor xmm7, xmm8 + pxor xmm7, xmm4 + + movdqa xmm10, [rk19] + movdqa xmm8, xmm5 + pclmulqdq xmm5, xmm10, 0x1 + pclmulqdq xmm8, xmm10, 0x10 + pxor xmm7, xmm8 + xorps xmm7, xmm5 + ; xmm6 to xmm7 + movdqa xmm10, [rk1] + movdqa xmm8, xmm6 + pclmulqdq xmm6, xmm10, 0x1 + pclmulqdq xmm8, xmm10, 0x10 + pxor xmm7, xmm8 + pxor xmm7, xmm6 + + + ; instead of 128, we add 128-16 to the loop counter to save 1 instruction from the loop + ; instead of a cmp instruction, we use the negative flag with the jl instruction + add arg3, 128-16 + jl _final_reduction_for_128 + + ; now we have 16+y bytes left to reduce. 16 Bytes is in register xmm7 and the rest is in memory + ; we can fold 16 bytes at a time if y>=16 + ; continue folding 16B at a time + +_16B_reduction_loop: + movdqa xmm8, xmm7 + pclmulqdq xmm7, xmm10, 0x1 + pclmulqdq xmm8, xmm10, 0x10 + pxor xmm7, xmm8 + movdqu xmm0, [arg2] + pxor xmm7, xmm0 + add arg2, 16 + sub arg3, 16 + ; instead of a cmp instruction, we utilize the flags with the jge instruction + ; equivalent of: cmp arg3, 16-16 + ; check if there is any more 16B in the buffer to be able to fold + jge _16B_reduction_loop + + ;now we have 16+z bytes left to reduce, where 0<= z < 16. + ;first, we reduce the data in the xmm7 register + + +_final_reduction_for_128: + add arg3, 16 + je _128_done + ; here we are getting data that is less than 16 bytes. + ; since we know that there was data before the pointer, we can offset the input pointer before the actual point, to receive exactly 16 bytes. + ; after that the registers need to be adjusted. +_get_last_two_xmms: + + + movdqa xmm2, xmm7 + movdqu xmm1, [arg2 - 16 + arg3] + + ; get rid of the extra data that was loaded before + ; load the shift constant + lea rax, [pshufb_shf_table] + add rax, arg3 + movdqu xmm0, [rax] + + + pshufb xmm7, xmm0 + pxor xmm0, [mask3] + pshufb xmm2, xmm0 + + pblendvb xmm2, xmm1 ;xmm0 is implicit + ;;;;;;;;;; + movdqa xmm8, xmm7 + pclmulqdq xmm7, xmm10, 0x1 + + pclmulqdq xmm8, xmm10, 0x10 + pxor xmm7, xmm8 + pxor xmm7, xmm2 + +_128_done: + ; compute crc of a 128-bit value + movdqa xmm10, [rk5] + movdqa xmm0, xmm7 + + ;64b fold + pclmulqdq xmm7, xmm10, 0 + psrldq xmm0, 8 + pxor xmm7, xmm0 + + ;barrett reduction +_barrett: + movdqa xmm1, xmm7 + movdqa xmm10, [rk7] + + pclmulqdq xmm7, xmm10, 0 + movdqa xmm2, xmm7 + pclmulqdq xmm7, xmm10, 0x10 + pslldq xmm2, 8 + pxor xmm7, xmm2 + pxor xmm7, xmm1 + pextrq rax, xmm7, 1 + +_cleanup: + ; return c ^ 0xffffffff, ffffffffL; + not rax + + +%ifidn __OUTPUT_FORMAT__, win64 + movdqa xmm6, [rsp + XMM_SAVE + 16*0] + movdqa xmm7, [rsp + XMM_SAVE + 16*1] + movdqa xmm8, [rsp + XMM_SAVE + 16*2] + movdqa xmm9, [rsp + XMM_SAVE + 16*3] + movdqa xmm10, [rsp + XMM_SAVE + 16*4] + movdqa xmm11, [rsp + XMM_SAVE + 16*5] + movdqa xmm12, [rsp + XMM_SAVE + 16*6] + movdqa xmm13, [rsp + XMM_SAVE + 16*7] +%endif + add rsp, VARIABLE_OFFSET + ret + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +align 16 +_less_than_256: + + ; check if there is enough buffer to be able to fold 16B at a time + cmp arg3, 32 + jl _less_than_32 + + ; if there is, load the constants + movdqa xmm10, [rk1] ; rk1 and rk2 in xmm10 + + movq xmm0, arg1 ; get the initial crc value + movdqu xmm7, [arg2] ; load the plaintext + pxor xmm7, xmm0 + + ; update the buffer pointer + add arg2, 16 + + ; update the counter. subtract 32 instead of 16 to save one instruction from the loop + sub arg3, 32 + + jmp _16B_reduction_loop + +align 16 +_less_than_32: + ; mov initial crc to the return value. this is necessary for zero-length buffers. + mov rax, arg1 + test arg3, arg3 + je _cleanup + + movq xmm0, arg1 ; get the initial crc value + + cmp arg3, 16 + je _exact_16_left + jl _less_than_16_left + + movdqu xmm7, [arg2] ; load the plaintext + pxor xmm7, xmm0 ; xor the initial crc value + add arg2, 16 + sub arg3, 16 + movdqa xmm10, [rk1] ; rk1 and rk2 in xmm10 + jmp _get_last_two_xmms + + +align 16 +_less_than_16_left: + ; use stack space to load data less than 16 bytes, zero-out the 16B in memory first. + + pxor xmm1, xmm1 + mov r11, rsp + movdqa [r11], xmm1 + + ; backup the counter value + mov r9, arg3 + cmp arg3, 8 + jl _less_than_8_left + + ; load 8 Bytes + mov rax, [arg2] + mov [r11], rax + add r11, 8 + sub arg3, 8 + add arg2, 8 +_less_than_8_left: + + cmp arg3, 4 + jl _less_than_4_left + + ; load 4 Bytes + mov eax, [arg2] + mov [r11], eax + add r11, 4 + sub arg3, 4 + add arg2, 4 +_less_than_4_left: + + cmp arg3, 2 + jl _less_than_2_left + + ; load 2 Bytes + mov ax, [arg2] + mov [r11], ax + add r11, 2 + sub arg3, 2 + add arg2, 2 +_less_than_2_left: + cmp arg3, 1 + jl _zero_left + + ; load 1 Byte + mov al, [arg2] + mov [r11], al + +_zero_left: + movdqa xmm7, [rsp] + pxor xmm7, xmm0 ; xor the initial crc value + + lea rax,[pshufb_shf_table] + + cmp r9, 8 + jl _end_1to7 + +_end_8to15: + movdqu xmm0, [rax + r9] + pshufb xmm7,xmm0 + jmp _128_done + +_end_1to7: + ; Left shift (8-length) bytes in XMM + movdqu xmm0, [rax + r9 + 8] + pshufb xmm7,xmm0 + + jmp _barrett + +align 16 +_exact_16_left: + movdqu xmm7, [arg2] + pxor xmm7, xmm0 ; xor the initial crc value + + jmp _128_done + +section .data + +; precomputed constants +align 16 +; rk7 = floor(2^128/Q) +; rk8 = Q +rk1: +DQ 0xf500000000000001 +rk2: +DQ 0x6b70000000000001 +rk3: +DQ 0xb001000000010000 +rk4: +DQ 0xf501b0000001b000 +rk5: +DQ 0xf500000000000001 +rk6: +DQ 0x0000000000000000 +rk7: +DQ 0xb000000000000001 +rk8: +DQ 0xb000000000000000 +rk9: +DQ 0xe014514514501501 +rk10: +DQ 0x771db6db6db71c71 +rk11: +DQ 0xa101101101110001 +rk12: +DQ 0x1ab1ab1ab1aab001 +rk13: +DQ 0xf445014445000001 +rk14: +DQ 0x6aab71daab700001 +rk15: +DQ 0xb100010100000001 +rk16: +DQ 0x01b001b1b0000001 +rk17: +DQ 0xe145150000000001 +rk18: +DQ 0x76db6c7000000001 +rk19: +DQ 0xa011000000000001 +rk20: +DQ 0x1b1ab00000000001 + +pshufb_shf_table: +; use these values for shift constants for the pshufb instruction +; different alignments result in values as shown: +; dq 0x8887868584838281, 0x008f8e8d8c8b8a89 ; shl 15 (16-1) / shr1 +; dq 0x8988878685848382, 0x01008f8e8d8c8b8a ; shl 14 (16-3) / shr2 +; dq 0x8a89888786858483, 0x0201008f8e8d8c8b ; shl 13 (16-4) / shr3 +; dq 0x8b8a898887868584, 0x030201008f8e8d8c ; shl 12 (16-4) / shr4 +; dq 0x8c8b8a8988878685, 0x04030201008f8e8d ; shl 11 (16-5) / shr5 +; dq 0x8d8c8b8a89888786, 0x0504030201008f8e ; shl 10 (16-6) / shr6 +; dq 0x8e8d8c8b8a898887, 0x060504030201008f ; shl 9 (16-7) / shr7 +; dq 0x8f8e8d8c8b8a8988, 0x0706050403020100 ; shl 8 (16-8) / shr8 +; dq 0x008f8e8d8c8b8a89, 0x0807060504030201 ; shl 7 (16-9) / shr9 +; dq 0x01008f8e8d8c8b8a, 0x0908070605040302 ; shl 6 (16-10) / shr10 +; dq 0x0201008f8e8d8c8b, 0x0a09080706050403 ; shl 5 (16-11) / shr11 +; dq 0x030201008f8e8d8c, 0x0b0a090807060504 ; shl 4 (16-12) / shr12 +; dq 0x04030201008f8e8d, 0x0c0b0a0908070605 ; shl 3 (16-13) / shr13 +; dq 0x0504030201008f8e, 0x0d0c0b0a09080706 ; shl 2 (16-14) / shr14 +; dq 0x060504030201008f, 0x0e0d0c0b0a090807 ; shl 1 (16-15) / shr15 +dq 0x8786858483828100, 0x8f8e8d8c8b8a8988 +dq 0x0706050403020100, 0x000e0d0c0b0a0908 + + +mask: +dq 0xFFFFFFFFFFFFFFFF, 0x0000000000000000 +mask2: +dq 0xFFFFFFFF00000000, 0xFFFFFFFFFFFFFFFF +mask3: +dq 0x8080808080808080, 0x8080808080808080 + +;;; func core, ver, snum +slversion crc64_iso_refl_by8, 01, 00, 0023 diff --git a/src/spdk/isa-l/crc/crc64_jones_norm_by16_10.asm b/src/spdk/isa-l/crc/crc64_jones_norm_by16_10.asm new file mode 100644 index 000000000..2c9836b95 --- /dev/null +++ b/src/spdk/isa-l/crc/crc64_jones_norm_by16_10.asm @@ -0,0 +1,61 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2019 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +%define FUNCTION_NAME crc64_jones_norm_by16_10 +%define USE_CONSTS +%macro INCLUDE_CONSTS 0 +rk_1: dq 0x44ff5212394b1c52 +rk_2: dq 0x956d6cb0582122b2 +rk1: dq 0x4445ed2750017038 +rk2: dq 0x698b74157cfbd736 +rk3: dq 0x0cfcfb5101c4b775 +rk4: dq 0x65403fd47cbec866 +rk5: dq 0x4445ed2750017038 +rk6: dq 0x0000000000000000 +rk7: dq 0xddf3eeb298be6cf8 +rk8: dq 0xad93d23594c935a9 +rk9: dq 0xd8dc208e2ba527b4 +rk10: dq 0xf032cfec76bb2bc5 +rk11: dq 0xb536044f357f4238 +rk12: dq 0xfdbf104d938ba67a +rk13: dq 0xeeddad9297a843e7 +rk14: dq 0x3550bce629466473 +rk15: dq 0x4e501e58ca43d25e +rk16: dq 0x13c961588f27f643 +rk17: dq 0x3b60d00dcb1099bc +rk18: dq 0x44bf1f468c53b9a3 +rk19: dq 0x96f2236e317179ee +rk20: dq 0xf00839aa0dd64bac +rk_1b: dq 0x4445ed2750017038 +rk_2b: dq 0x698b74157cfbd736 + dq 0x0000000000000000 + dq 0x0000000000000000 +%endm + +%include "crc64_iso_norm_by16_10.asm" diff --git a/src/spdk/isa-l/crc/crc64_jones_norm_by8.asm b/src/spdk/isa-l/crc/crc64_jones_norm_by8.asm new file mode 100644 index 000000000..44ad726c0 --- /dev/null +++ b/src/spdk/isa-l/crc/crc64_jones_norm_by8.asm @@ -0,0 +1,581 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2016 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +; Function API: +; uint64_t crc64_jones_norm_by8( +; uint64_t init_crc, //initial CRC value, 64 bits +; const unsigned char *buf, //buffer pointer to calculate CRC on +; uint64_t len //buffer length in bytes (64-bit data) +; ); +; +%include "reg_sizes.asm" + +%define fetch_dist 1024 + +[bits 64] +default rel + +section .text + +%ifidn __OUTPUT_FORMAT__, win64 + %xdefine arg1 rcx + %xdefine arg2 rdx + %xdefine arg3 r8 +%else + %xdefine arg1 rdi + %xdefine arg2 rsi + %xdefine arg3 rdx +%endif + +%define TMP 16*0 +%ifidn __OUTPUT_FORMAT__, win64 + %define XMM_SAVE 16*2 + %define VARIABLE_OFFSET 16*10+8 +%else + %define VARIABLE_OFFSET 16*2+8 +%endif +align 16 +global crc64_jones_norm_by8:ISAL_SYM_TYPE_FUNCTION +crc64_jones_norm_by8: + + not arg1 ;~init_crc + + sub rsp,VARIABLE_OFFSET + +%ifidn __OUTPUT_FORMAT__, win64 + ; push the xmm registers into the stack to maintain + movdqa [rsp + XMM_SAVE + 16*0], xmm6 + movdqa [rsp + XMM_SAVE + 16*1], xmm7 + movdqa [rsp + XMM_SAVE + 16*2], xmm8 + movdqa [rsp + XMM_SAVE + 16*3], xmm9 + movdqa [rsp + XMM_SAVE + 16*4], xmm10 + movdqa [rsp + XMM_SAVE + 16*5], xmm11 + movdqa [rsp + XMM_SAVE + 16*6], xmm12 + movdqa [rsp + XMM_SAVE + 16*7], xmm13 +%endif + + + ; check if smaller than 256 + cmp arg3, 256 + + ; for sizes less than 256, we can't fold 128B at a time... + jl _less_than_256 + + + ; load the initial crc value + movq xmm10, arg1 ; initial crc + + ; crc value does not need to be byte-reflected, but it needs to be moved to the high part of the register. + ; because data will be byte-reflected and will align with initial crc at correct place. + pslldq xmm10, 8 + + movdqa xmm11, [SHUF_MASK] + ; receive the initial 128B data, xor the initial crc value + movdqu xmm0, [arg2+16*0] + movdqu xmm1, [arg2+16*1] + movdqu xmm2, [arg2+16*2] + movdqu xmm3, [arg2+16*3] + movdqu xmm4, [arg2+16*4] + movdqu xmm5, [arg2+16*5] + movdqu xmm6, [arg2+16*6] + movdqu xmm7, [arg2+16*7] + + pshufb xmm0, xmm11 + ; XOR the initial_crc value + pxor xmm0, xmm10 + pshufb xmm1, xmm11 + pshufb xmm2, xmm11 + pshufb xmm3, xmm11 + pshufb xmm4, xmm11 + pshufb xmm5, xmm11 + pshufb xmm6, xmm11 + pshufb xmm7, xmm11 + + movdqa xmm10, [rk3] ;xmm10 has rk3 and rk4 + ;imm value of pclmulqdq instruction will determine which constant to use + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; we subtract 256 instead of 128 to save one instruction from the loop + sub arg3, 256 + + ; at this section of the code, there is 128*x+y (0<=y<128) bytes of buffer. The _fold_128_B_loop + ; loop will fold 128B at a time until we have 128+y Bytes of buffer + + + ; fold 128B at a time. This section of the code folds 8 xmm registers in parallel +_fold_128_B_loop: + + ; update the buffer pointer + add arg2, 128 ; buf += 128; + + prefetchnta [arg2+fetch_dist+0] + movdqu xmm9, [arg2+16*0] + movdqu xmm12, [arg2+16*1] + pshufb xmm9, xmm11 + pshufb xmm12, xmm11 + movdqa xmm8, xmm0 + movdqa xmm13, xmm1 + pclmulqdq xmm0, xmm10, 0x0 + pclmulqdq xmm8, xmm10 , 0x11 + pclmulqdq xmm1, xmm10, 0x0 + pclmulqdq xmm13, xmm10 , 0x11 + pxor xmm0, xmm9 + xorps xmm0, xmm8 + pxor xmm1, xmm12 + xorps xmm1, xmm13 + + prefetchnta [arg2+fetch_dist+32] + movdqu xmm9, [arg2+16*2] + movdqu xmm12, [arg2+16*3] + pshufb xmm9, xmm11 + pshufb xmm12, xmm11 + movdqa xmm8, xmm2 + movdqa xmm13, xmm3 + pclmulqdq xmm2, xmm10, 0x0 + pclmulqdq xmm8, xmm10 , 0x11 + pclmulqdq xmm3, xmm10, 0x0 + pclmulqdq xmm13, xmm10 , 0x11 + pxor xmm2, xmm9 + xorps xmm2, xmm8 + pxor xmm3, xmm12 + xorps xmm3, xmm13 + + prefetchnta [arg2+fetch_dist+64] + movdqu xmm9, [arg2+16*4] + movdqu xmm12, [arg2+16*5] + pshufb xmm9, xmm11 + pshufb xmm12, xmm11 + movdqa xmm8, xmm4 + movdqa xmm13, xmm5 + pclmulqdq xmm4, xmm10, 0x0 + pclmulqdq xmm8, xmm10 , 0x11 + pclmulqdq xmm5, xmm10, 0x0 + pclmulqdq xmm13, xmm10 , 0x11 + pxor xmm4, xmm9 + xorps xmm4, xmm8 + pxor xmm5, xmm12 + xorps xmm5, xmm13 + + prefetchnta [arg2+fetch_dist+96] + movdqu xmm9, [arg2+16*6] + movdqu xmm12, [arg2+16*7] + pshufb xmm9, xmm11 + pshufb xmm12, xmm11 + movdqa xmm8, xmm6 + movdqa xmm13, xmm7 + pclmulqdq xmm6, xmm10, 0x0 + pclmulqdq xmm8, xmm10 , 0x11 + pclmulqdq xmm7, xmm10, 0x0 + pclmulqdq xmm13, xmm10 , 0x11 + pxor xmm6, xmm9 + xorps xmm6, xmm8 + pxor xmm7, xmm12 + xorps xmm7, xmm13 + + sub arg3, 128 + + ; check if there is another 128B in the buffer to be able to fold + jge _fold_128_B_loop + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + add arg2, 128 + ; at this point, the buffer pointer is pointing at the last y Bytes of the buffer, where 0 <= y < 128 + ; the 128B of folded data is in 8 of the xmm registers: xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7 + + + ; fold the 8 xmm registers to 1 xmm register with different constants + + movdqa xmm10, [rk9] + movdqa xmm8, xmm0 + pclmulqdq xmm0, xmm10, 0x11 + pclmulqdq xmm8, xmm10, 0x0 + pxor xmm7, xmm8 + xorps xmm7, xmm0 + + movdqa xmm10, [rk11] + movdqa xmm8, xmm1 + pclmulqdq xmm1, xmm10, 0x11 + pclmulqdq xmm8, xmm10, 0x0 + pxor xmm7, xmm8 + xorps xmm7, xmm1 + + movdqa xmm10, [rk13] + movdqa xmm8, xmm2 + pclmulqdq xmm2, xmm10, 0x11 + pclmulqdq xmm8, xmm10, 0x0 + pxor xmm7, xmm8 + pxor xmm7, xmm2 + + movdqa xmm10, [rk15] + movdqa xmm8, xmm3 + pclmulqdq xmm3, xmm10, 0x11 + pclmulqdq xmm8, xmm10, 0x0 + pxor xmm7, xmm8 + xorps xmm7, xmm3 + + movdqa xmm10, [rk17] + movdqa xmm8, xmm4 + pclmulqdq xmm4, xmm10, 0x11 + pclmulqdq xmm8, xmm10, 0x0 + pxor xmm7, xmm8 + pxor xmm7, xmm4 + + movdqa xmm10, [rk19] + movdqa xmm8, xmm5 + pclmulqdq xmm5, xmm10, 0x11 + pclmulqdq xmm8, xmm10, 0x0 + pxor xmm7, xmm8 + xorps xmm7, xmm5 + + movdqa xmm10, [rk1] ;xmm10 has rk1 and rk2 + + movdqa xmm8, xmm6 + pclmulqdq xmm6, xmm10, 0x11 + pclmulqdq xmm8, xmm10, 0x0 + pxor xmm7, xmm8 + pxor xmm7, xmm6 + + + ; instead of 128, we add 112 to the loop counter to save 1 instruction from the loop + ; instead of a cmp instruction, we use the negative flag with the jl instruction + add arg3, 128-16 + jl _final_reduction_for_128 + + ; now we have 16+y bytes left to reduce. 16 Bytes is in register xmm7 and the rest is in memory + ; we can fold 16 bytes at a time if y>=16 + ; continue folding 16B at a time + +_16B_reduction_loop: + movdqa xmm8, xmm7 + pclmulqdq xmm7, xmm10, 0x11 + pclmulqdq xmm8, xmm10, 0x0 + pxor xmm7, xmm8 + movdqu xmm0, [arg2] + pshufb xmm0, xmm11 + pxor xmm7, xmm0 + add arg2, 16 + sub arg3, 16 + ; instead of a cmp instruction, we utilize the flags with the jge instruction + ; equivalent of: cmp arg3, 16-16 + ; check if there is any more 16B in the buffer to be able to fold + jge _16B_reduction_loop + + ;now we have 16+z bytes left to reduce, where 0<= z < 16. + ;first, we reduce the data in the xmm7 register + + +_final_reduction_for_128: + ; check if any more data to fold. If not, compute the CRC of the final 128 bits + add arg3, 16 + je _128_done + + ; here we are getting data that is less than 16 bytes. + ; since we know that there was data before the pointer, we can offset the input pointer before the actual point, to receive exactly 16 bytes. + ; after that the registers need to be adjusted. +_get_last_two_xmms: + movdqa xmm2, xmm7 + + movdqu xmm1, [arg2 - 16 + arg3] + pshufb xmm1, xmm11 + + ; get rid of the extra data that was loaded before + ; load the shift constant + lea rax, [pshufb_shf_table + 16] + sub rax, arg3 + movdqu xmm0, [rax] + + ; shift xmm2 to the left by arg3 bytes + pshufb xmm2, xmm0 + + ; shift xmm7 to the right by 16-arg3 bytes + pxor xmm0, [mask1] + pshufb xmm7, xmm0 + pblendvb xmm1, xmm2 ;xmm0 is implicit + + ; fold 16 Bytes + movdqa xmm2, xmm1 + movdqa xmm8, xmm7 + pclmulqdq xmm7, xmm10, 0x11 + pclmulqdq xmm8, xmm10, 0x0 + pxor xmm7, xmm8 + pxor xmm7, xmm2 + +_128_done: + ; compute crc of a 128-bit value + movdqa xmm10, [rk5] ; rk5 and rk6 in xmm10 + movdqa xmm0, xmm7 + + ;64b fold + pclmulqdq xmm7, xmm10, 0x01 ; H*L + pslldq xmm0, 8 + pxor xmm7, xmm0 + + ;barrett reduction +_barrett: + movdqa xmm10, [rk7] ; rk7 and rk8 in xmm10 + movdqa xmm0, xmm7 + + movdqa xmm1, xmm7 + pand xmm1, [mask3] + pclmulqdq xmm7, xmm10, 0x01 + pxor xmm7, xmm1 + + pclmulqdq xmm7, xmm10, 0x11 + pxor xmm7, xmm0 + pextrq rax, xmm7, 0 + +_cleanup: + not rax +%ifidn __OUTPUT_FORMAT__, win64 + movdqa xmm6, [rsp + XMM_SAVE + 16*0] + movdqa xmm7, [rsp + XMM_SAVE + 16*1] + movdqa xmm8, [rsp + XMM_SAVE + 16*2] + movdqa xmm9, [rsp + XMM_SAVE + 16*3] + movdqa xmm10, [rsp + XMM_SAVE + 16*4] + movdqa xmm11, [rsp + XMM_SAVE + 16*5] + movdqa xmm12, [rsp + XMM_SAVE + 16*6] + movdqa xmm13, [rsp + XMM_SAVE + 16*7] +%endif + add rsp, VARIABLE_OFFSET + ret + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +align 16 +_less_than_256: + + ; check if there is enough buffer to be able to fold 16B at a time + cmp arg3, 32 + jl _less_than_32 + movdqa xmm11, [SHUF_MASK] + + ; if there is, load the constants + movdqa xmm10, [rk1] ; rk1 and rk2 in xmm10 + + movq xmm0, arg1 ; get the initial crc value + pslldq xmm0, 8 ; align it to its correct place + movdqu xmm7, [arg2] ; load the plaintext + pshufb xmm7, xmm11 ; byte-reflect the plaintext + pxor xmm7, xmm0 + + + ; update the buffer pointer + add arg2, 16 + + ; update the counter. subtract 32 instead of 16 to save one instruction from the loop + sub arg3, 32 + + jmp _16B_reduction_loop +align 16 +_less_than_32: + ; mov initial crc to the return value. this is necessary for zero-length buffers. + mov rax, arg1 + test arg3, arg3 + je _cleanup + + movdqa xmm11, [SHUF_MASK] + + movq xmm0, arg1 ; get the initial crc value + pslldq xmm0, 8 ; align it to its correct place + + cmp arg3, 16 + je _exact_16_left + jl _less_than_16_left + + movdqu xmm7, [arg2] ; load the plaintext + pshufb xmm7, xmm11 ; byte-reflect the plaintext + pxor xmm7, xmm0 ; xor the initial crc value + add arg2, 16 + sub arg3, 16 + movdqa xmm10, [rk1] ; rk1 and rk2 in xmm10 + jmp _get_last_two_xmms +align 16 +_less_than_16_left: + ; use stack space to load data less than 16 bytes, zero-out the 16B in memory first. + pxor xmm1, xmm1 + mov r11, rsp + movdqa [r11], xmm1 + + ; backup the counter value + mov r9, arg3 + cmp arg3, 8 + jl _less_than_8_left + + ; load 8 Bytes + mov rax, [arg2] + mov [r11], rax + add r11, 8 + sub arg3, 8 + add arg2, 8 +_less_than_8_left: + + cmp arg3, 4 + jl _less_than_4_left + + ; load 4 Bytes + mov eax, [arg2] + mov [r11], eax + add r11, 4 + sub arg3, 4 + add arg2, 4 +_less_than_4_left: + + cmp arg3, 2 + jl _less_than_2_left + + ; load 2 Bytes + mov ax, [arg2] + mov [r11], ax + add r11, 2 + sub arg3, 2 + add arg2, 2 +_less_than_2_left: + cmp arg3, 1 + jl _zero_left + + ; load 1 Byte + mov al, [arg2] + mov [r11], al +_zero_left: + movdqa xmm7, [rsp] + pshufb xmm7, xmm11 + pxor xmm7, xmm0 ; xor the initial crc value + + ; shl r9, 4 + lea rax, [pshufb_shf_table + 16] + sub rax, r9 + + cmp r9, 8 + jl _end_1to7 + +_end_8to15: + movdqu xmm0, [rax] + pxor xmm0, [mask1] + + pshufb xmm7, xmm0 + jmp _128_done + +_end_1to7: + ; Right shift (8-length) bytes in XMM + add rax, 8 + movdqu xmm0, [rax] + pshufb xmm7,xmm0 + + jmp _barrett +align 16 +_exact_16_left: + movdqu xmm7, [arg2] + pshufb xmm7, xmm11 + pxor xmm7, xmm0 ; xor the initial crc value + + jmp _128_done + +section .data + +; precomputed constants +align 16 + +rk1: +DQ 0x4445ed2750017038 +rk2: +DQ 0x698b74157cfbd736 +rk3: +DQ 0x0cfcfb5101c4b775 +rk4: +DQ 0x65403fd47cbec866 +rk5: +DQ 0x4445ed2750017038 +rk6: +DQ 0x0000000000000000 +rk7: +DQ 0xddf3eeb298be6cf8 +rk8: +DQ 0xad93d23594c935a9 +rk9: +DQ 0xd8dc208e2ba527b4 +rk10: +DQ 0xf032cfec76bb2bc5 +rk11: +DQ 0xb536044f357f4238 +rk12: +DQ 0xfdbf104d938ba67a +rk13: +DQ 0xeeddad9297a843e7 +rk14: +DQ 0x3550bce629466473 +rk15: +DQ 0x4e501e58ca43d25e +rk16: +DQ 0x13c961588f27f643 +rk17: +DQ 0x3b60d00dcb1099bc +rk18: +DQ 0x44bf1f468c53b9a3 +rk19: +DQ 0x96f2236e317179ee +rk20: +DQ 0xf00839aa0dd64bac + +mask1: +dq 0x8080808080808080, 0x8080808080808080 +mask2: +dq 0xFFFFFFFFFFFFFFFF, 0x00000000FFFFFFFF +mask3: +dq 0x0000000000000000, 0xFFFFFFFFFFFFFFFF + +SHUF_MASK: +dq 0x08090A0B0C0D0E0F, 0x0001020304050607 + +pshufb_shf_table: +; use these values for shift constants for the pshufb instruction +; different alignments result in values as shown: +; dq 0x8887868584838281, 0x008f8e8d8c8b8a89 ; shl 15 (16-1) / shr1 +; dq 0x8988878685848382, 0x01008f8e8d8c8b8a ; shl 14 (16-3) / shr2 +; dq 0x8a89888786858483, 0x0201008f8e8d8c8b ; shl 13 (16-4) / shr3 +; dq 0x8b8a898887868584, 0x030201008f8e8d8c ; shl 12 (16-4) / shr4 +; dq 0x8c8b8a8988878685, 0x04030201008f8e8d ; shl 11 (16-5) / shr5 +; dq 0x8d8c8b8a89888786, 0x0504030201008f8e ; shl 10 (16-6) / shr6 +; dq 0x8e8d8c8b8a898887, 0x060504030201008f ; shl 9 (16-7) / shr7 +; dq 0x8f8e8d8c8b8a8988, 0x0706050403020100 ; shl 8 (16-8) / shr8 +; dq 0x008f8e8d8c8b8a89, 0x0807060504030201 ; shl 7 (16-9) / shr9 +; dq 0x01008f8e8d8c8b8a, 0x0908070605040302 ; shl 6 (16-10) / shr10 +; dq 0x0201008f8e8d8c8b, 0x0a09080706050403 ; shl 5 (16-11) / shr11 +; dq 0x030201008f8e8d8c, 0x0b0a090807060504 ; shl 4 (16-12) / shr12 +; dq 0x04030201008f8e8d, 0x0c0b0a0908070605 ; shl 3 (16-13) / shr13 +; dq 0x0504030201008f8e, 0x0d0c0b0a09080706 ; shl 2 (16-14) / shr14 +; dq 0x060504030201008f, 0x0e0d0c0b0a090807 ; shl 1 (16-15) / shr15 +dq 0x8786858483828100, 0x8f8e8d8c8b8a8988 +dq 0x0706050403020100, 0x0f0e0d0c0b0a0908 +dq 0x8080808080808080, 0x0f0e0d0c0b0a0908 +dq 0x8080808080808080, 0x8080808080808080 + +;;; func core, ver, snum +slversion crc64_jones_norm_by8, 01, 00, 0026 diff --git a/src/spdk/isa-l/crc/crc64_jones_refl_by16_10.asm b/src/spdk/isa-l/crc/crc64_jones_refl_by16_10.asm new file mode 100644 index 000000000..39502729b --- /dev/null +++ b/src/spdk/isa-l/crc/crc64_jones_refl_by16_10.asm @@ -0,0 +1,61 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2019 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +%define FUNCTION_NAME crc64_jones_refl_by16_10 +%define USE_CONSTS +%macro INCLUDE_CONSTS 0 +rk_1: dq 0x9471a5389095fe44 +rk_2: dq 0x9a8908341a6d6d52 +rk1: dq 0x381d0015c96f4444 +rk2: dq 0xd9d7be7d505da32c +rk3: dq 0x768361524d29ed0b +rk4: dq 0xcc26fa7c57f8054c +rk5: dq 0x381d0015c96f4444 +rk6: dq 0x0000000000000000 +rk7: dq 0x3e6cfa329aef9f77 +rk8: dq 0x2b5926535897936a +rk9: dq 0x5bc94ba8e2087636 +rk10: dq 0x6cf09c8f37710b75 +rk11: dq 0x3885fd59e440d95a +rk12: dq 0xbccba3936411fb7e +rk13: dq 0xe4dd0d81cbfce585 +rk14: dq 0xb715e37b96ed8633 +rk15: dq 0xf49784a634f014e4 +rk16: dq 0xaf86efb16d9ab4fb +rk17: dq 0x7b3211a760160db8 +rk18: dq 0xa062b2319d66692f +rk19: dq 0xef3d1d18ed889ed2 +rk20: dq 0x6ba4d760ab38201e +rk_1b: dq 0x381d0015c96f4444 +rk_2b: dq 0xd9d7be7d505da32c + dq 0x0000000000000000 + dq 0x0000000000000000 +%endm + +%include "crc64_iso_refl_by16_10.asm" diff --git a/src/spdk/isa-l/crc/crc64_jones_refl_by8.asm b/src/spdk/isa-l/crc/crc64_jones_refl_by8.asm new file mode 100644 index 000000000..7081f54a7 --- /dev/null +++ b/src/spdk/isa-l/crc/crc64_jones_refl_by8.asm @@ -0,0 +1,544 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2016 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Function API: +; uint64_t crc64_jones_refl_by8( +; uint64_t init_crc, //initial CRC value, 64 bits +; const unsigned char *buf, //buffer pointer to calculate CRC on +; uint64_t len //buffer length in bytes (64-bit data) +; ); +; +%include "reg_sizes.asm" + +%define fetch_dist 1024 + +[bits 64] +default rel + +section .text + + +%ifidn __OUTPUT_FORMAT__, win64 + %xdefine arg1 rcx + %xdefine arg2 rdx + %xdefine arg3 r8 +%else + %xdefine arg1 rdi + %xdefine arg2 rsi + %xdefine arg3 rdx +%endif + +%define TMP 16*0 +%ifidn __OUTPUT_FORMAT__, win64 + %define XMM_SAVE 16*2 + %define VARIABLE_OFFSET 16*10+8 +%else + %define VARIABLE_OFFSET 16*2+8 +%endif + + +align 16 +global crc64_jones_refl_by8:ISAL_SYM_TYPE_FUNCTION +crc64_jones_refl_by8: + ; uint64_t c = crc ^ 0xffffffff,ffffffffL; + not arg1 + sub rsp, VARIABLE_OFFSET + +%ifidn __OUTPUT_FORMAT__, win64 + ; push the xmm registers into the stack to maintain + movdqa [rsp + XMM_SAVE + 16*0], xmm6 + movdqa [rsp + XMM_SAVE + 16*1], xmm7 + movdqa [rsp + XMM_SAVE + 16*2], xmm8 + movdqa [rsp + XMM_SAVE + 16*3], xmm9 + movdqa [rsp + XMM_SAVE + 16*4], xmm10 + movdqa [rsp + XMM_SAVE + 16*5], xmm11 + movdqa [rsp + XMM_SAVE + 16*6], xmm12 + movdqa [rsp + XMM_SAVE + 16*7], xmm13 +%endif + + ; check if smaller than 256B + cmp arg3, 256 + + ; for sizes less than 256, we can't fold 128B at a time... + jl _less_than_256 + + + ; load the initial crc value + movq xmm10, arg1 ; initial crc + ; receive the initial 128B data, xor the initial crc value + movdqu xmm0, [arg2+16*0] + movdqu xmm1, [arg2+16*1] + movdqu xmm2, [arg2+16*2] + movdqu xmm3, [arg2+16*3] + movdqu xmm4, [arg2+16*4] + movdqu xmm5, [arg2+16*5] + movdqu xmm6, [arg2+16*6] + movdqu xmm7, [arg2+16*7] + + ; XOR the initial_crc value + pxor xmm0, xmm10 + movdqa xmm10, [rk3] ;xmm10 has rk3 and rk4 + ;imm value of pclmulqdq instruction will determine which constant to use + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; we subtract 256 instead of 128 to save one instruction from the loop + sub arg3, 256 + + ; at this section of the code, there is 128*x+y (0<=y<128) bytes of buffer. The _fold_128_B_loop + ; loop will fold 128B at a time until we have 128+y Bytes of buffer + + + ; fold 128B at a time. This section of the code folds 8 xmm registers in parallel +_fold_128_B_loop: + + ; update the buffer pointer + add arg2, 128 + + prefetchnta [arg2+fetch_dist+0] + movdqu xmm9, [arg2+16*0] + movdqu xmm12, [arg2+16*1] + movdqa xmm8, xmm0 + movdqa xmm13, xmm1 + pclmulqdq xmm0, xmm10, 0x10 + pclmulqdq xmm8, xmm10 , 0x1 + pclmulqdq xmm1, xmm10, 0x10 + pclmulqdq xmm13, xmm10 , 0x1 + pxor xmm0, xmm9 + xorps xmm0, xmm8 + pxor xmm1, xmm12 + xorps xmm1, xmm13 + + prefetchnta [arg2+fetch_dist+32] + movdqu xmm9, [arg2+16*2] + movdqu xmm12, [arg2+16*3] + movdqa xmm8, xmm2 + movdqa xmm13, xmm3 + pclmulqdq xmm2, xmm10, 0x10 + pclmulqdq xmm8, xmm10 , 0x1 + pclmulqdq xmm3, xmm10, 0x10 + pclmulqdq xmm13, xmm10 , 0x1 + pxor xmm2, xmm9 + xorps xmm2, xmm8 + pxor xmm3, xmm12 + xorps xmm3, xmm13 + + prefetchnta [arg2+fetch_dist+64] + movdqu xmm9, [arg2+16*4] + movdqu xmm12, [arg2+16*5] + movdqa xmm8, xmm4 + movdqa xmm13, xmm5 + pclmulqdq xmm4, xmm10, 0x10 + pclmulqdq xmm8, xmm10 , 0x1 + pclmulqdq xmm5, xmm10, 0x10 + pclmulqdq xmm13, xmm10 , 0x1 + pxor xmm4, xmm9 + xorps xmm4, xmm8 + pxor xmm5, xmm12 + xorps xmm5, xmm13 + + prefetchnta [arg2+fetch_dist+96] + movdqu xmm9, [arg2+16*6] + movdqu xmm12, [arg2+16*7] + movdqa xmm8, xmm6 + movdqa xmm13, xmm7 + pclmulqdq xmm6, xmm10, 0x10 + pclmulqdq xmm8, xmm10 , 0x1 + pclmulqdq xmm7, xmm10, 0x10 + pclmulqdq xmm13, xmm10 , 0x1 + pxor xmm6, xmm9 + xorps xmm6, xmm8 + pxor xmm7, xmm12 + xorps xmm7, xmm13 + + sub arg3, 128 + + ; check if there is another 128B in the buffer to be able to fold + jge _fold_128_B_loop + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + add arg2, 128 + ; at this point, the buffer pointer is pointing at the last y Bytes of the buffer, where 0 <= y < 128 + ; the 128B of folded data is in 8 of the xmm registers: xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7 + + + ; fold the 8 xmm registers to 1 xmm register with different constants + ; xmm0 to xmm7 + movdqa xmm10, [rk9] + movdqa xmm8, xmm0 + pclmulqdq xmm0, xmm10, 0x1 + pclmulqdq xmm8, xmm10, 0x10 + pxor xmm7, xmm8 + xorps xmm7, xmm0 + ;xmm1 to xmm7 + movdqa xmm10, [rk11] + movdqa xmm8, xmm1 + pclmulqdq xmm1, xmm10, 0x1 + pclmulqdq xmm8, xmm10, 0x10 + pxor xmm7, xmm8 + xorps xmm7, xmm1 + + movdqa xmm10, [rk13] + movdqa xmm8, xmm2 + pclmulqdq xmm2, xmm10, 0x1 + pclmulqdq xmm8, xmm10, 0x10 + pxor xmm7, xmm8 + pxor xmm7, xmm2 + + movdqa xmm10, [rk15] + movdqa xmm8, xmm3 + pclmulqdq xmm3, xmm10, 0x1 + pclmulqdq xmm8, xmm10, 0x10 + pxor xmm7, xmm8 + xorps xmm7, xmm3 + + movdqa xmm10, [rk17] + movdqa xmm8, xmm4 + pclmulqdq xmm4, xmm10, 0x1 + pclmulqdq xmm8, xmm10, 0x10 + pxor xmm7, xmm8 + pxor xmm7, xmm4 + + movdqa xmm10, [rk19] + movdqa xmm8, xmm5 + pclmulqdq xmm5, xmm10, 0x1 + pclmulqdq xmm8, xmm10, 0x10 + pxor xmm7, xmm8 + xorps xmm7, xmm5 + ; xmm6 to xmm7 + movdqa xmm10, [rk1] + movdqa xmm8, xmm6 + pclmulqdq xmm6, xmm10, 0x1 + pclmulqdq xmm8, xmm10, 0x10 + pxor xmm7, xmm8 + pxor xmm7, xmm6 + + + ; instead of 128, we add 128-16 to the loop counter to save 1 instruction from the loop + ; instead of a cmp instruction, we use the negative flag with the jl instruction + add arg3, 128-16 + jl _final_reduction_for_128 + + ; now we have 16+y bytes left to reduce. 16 Bytes is in register xmm7 and the rest is in memory + ; we can fold 16 bytes at a time if y>=16 + ; continue folding 16B at a time + +_16B_reduction_loop: + movdqa xmm8, xmm7 + pclmulqdq xmm7, xmm10, 0x1 + pclmulqdq xmm8, xmm10, 0x10 + pxor xmm7, xmm8 + movdqu xmm0, [arg2] + pxor xmm7, xmm0 + add arg2, 16 + sub arg3, 16 + ; instead of a cmp instruction, we utilize the flags with the jge instruction + ; equivalent of: cmp arg3, 16-16 + ; check if there is any more 16B in the buffer to be able to fold + jge _16B_reduction_loop + + ;now we have 16+z bytes left to reduce, where 0<= z < 16. + ;first, we reduce the data in the xmm7 register + + +_final_reduction_for_128: + add arg3, 16 + je _128_done + ; here we are getting data that is less than 16 bytes. + ; since we know that there was data before the pointer, we can offset the input pointer before the actual point, to receive exactly 16 bytes. + ; after that the registers need to be adjusted. +_get_last_two_xmms: + + + movdqa xmm2, xmm7 + movdqu xmm1, [arg2 - 16 + arg3] + + ; get rid of the extra data that was loaded before + ; load the shift constant + lea rax, [pshufb_shf_table] + add rax, arg3 + movdqu xmm0, [rax] + + + pshufb xmm7, xmm0 + pxor xmm0, [mask3] + pshufb xmm2, xmm0 + + pblendvb xmm2, xmm1 ;xmm0 is implicit + ;;;;;;;;;; + movdqa xmm8, xmm7 + pclmulqdq xmm7, xmm10, 0x1 + + pclmulqdq xmm8, xmm10, 0x10 + pxor xmm7, xmm8 + pxor xmm7, xmm2 + +_128_done: + ; compute crc of a 128-bit value + movdqa xmm10, [rk5] + movdqa xmm0, xmm7 + + ;64b fold + pclmulqdq xmm7, xmm10, 0 + psrldq xmm0, 8 + pxor xmm7, xmm0 + + ;barrett reduction +_barrett: + movdqa xmm1, xmm7 + movdqa xmm10, [rk7] + + pclmulqdq xmm7, xmm10, 0 + movdqa xmm2, xmm7 + pclmulqdq xmm7, xmm10, 0x10 + pslldq xmm2, 8 + pxor xmm7, xmm2 + pxor xmm7, xmm1 + pextrq rax, xmm7, 1 + +_cleanup: + ; return c ^ 0xffffffff, ffffffffL; + not rax + + +%ifidn __OUTPUT_FORMAT__, win64 + movdqa xmm6, [rsp + XMM_SAVE + 16*0] + movdqa xmm7, [rsp + XMM_SAVE + 16*1] + movdqa xmm8, [rsp + XMM_SAVE + 16*2] + movdqa xmm9, [rsp + XMM_SAVE + 16*3] + movdqa xmm10, [rsp + XMM_SAVE + 16*4] + movdqa xmm11, [rsp + XMM_SAVE + 16*5] + movdqa xmm12, [rsp + XMM_SAVE + 16*6] + movdqa xmm13, [rsp + XMM_SAVE + 16*7] +%endif + add rsp, VARIABLE_OFFSET + ret + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +align 16 +_less_than_256: + + ; check if there is enough buffer to be able to fold 16B at a time + cmp arg3, 32 + jl _less_than_32 + + ; if there is, load the constants + movdqa xmm10, [rk1] ; rk1 and rk2 in xmm10 + + movq xmm0, arg1 ; get the initial crc value + movdqu xmm7, [arg2] ; load the plaintext + pxor xmm7, xmm0 + + ; update the buffer pointer + add arg2, 16 + + ; update the counter. subtract 32 instead of 16 to save one instruction from the loop + sub arg3, 32 + + jmp _16B_reduction_loop + +align 16 +_less_than_32: + ; mov initial crc to the return value. this is necessary for zero-length buffers. + mov rax, arg1 + test arg3, arg3 + je _cleanup + + movq xmm0, arg1 ; get the initial crc value + + cmp arg3, 16 + je _exact_16_left + jl _less_than_16_left + + movdqu xmm7, [arg2] ; load the plaintext + pxor xmm7, xmm0 ; xor the initial crc value + add arg2, 16 + sub arg3, 16 + movdqa xmm10, [rk1] ; rk1 and rk2 in xmm10 + jmp _get_last_two_xmms + + +align 16 +_less_than_16_left: + ; use stack space to load data less than 16 bytes, zero-out the 16B in memory first. + + pxor xmm1, xmm1 + mov r11, rsp + movdqa [r11], xmm1 + + ; backup the counter value + mov r9, arg3 + cmp arg3, 8 + jl _less_than_8_left + + ; load 8 Bytes + mov rax, [arg2] + mov [r11], rax + add r11, 8 + sub arg3, 8 + add arg2, 8 +_less_than_8_left: + + cmp arg3, 4 + jl _less_than_4_left + + ; load 4 Bytes + mov eax, [arg2] + mov [r11], eax + add r11, 4 + sub arg3, 4 + add arg2, 4 +_less_than_4_left: + + cmp arg3, 2 + jl _less_than_2_left + + ; load 2 Bytes + mov ax, [arg2] + mov [r11], ax + add r11, 2 + sub arg3, 2 + add arg2, 2 +_less_than_2_left: + cmp arg3, 1 + jl _zero_left + + ; load 1 Byte + mov al, [arg2] + mov [r11], al + +_zero_left: + movdqa xmm7, [rsp] + pxor xmm7, xmm0 ; xor the initial crc value + + lea rax,[pshufb_shf_table] + + cmp r9, 8 + jl _end_1to7 + +_end_8to15: + movdqu xmm0, [rax + r9] + pshufb xmm7,xmm0 + jmp _128_done + +_end_1to7: + ; Left shift (8-length) bytes in XMM + movdqu xmm0, [rax + r9 + 8] + pshufb xmm7,xmm0 + + jmp _barrett + +align 16 +_exact_16_left: + movdqu xmm7, [arg2] + pxor xmm7, xmm0 ; xor the initial crc value + + jmp _128_done + +section .data + +; precomputed constants +align 16 +; rk7 = floor(2^128/Q) +; rk8 = Q +rk1: +DQ 0x381d0015c96f4444 +rk2: +DQ 0xd9d7be7d505da32c +rk3: +DQ 0x768361524d29ed0b +rk4: +DQ 0xcc26fa7c57f8054c +rk5: +DQ 0x381d0015c96f4444 +rk6: +DQ 0x0000000000000000 +rk7: +DQ 0x3e6cfa329aef9f77 +rk8: +DQ 0x2b5926535897936a +rk9: +DQ 0x5bc94ba8e2087636 +rk10: +DQ 0x6cf09c8f37710b75 +rk11: +DQ 0x3885fd59e440d95a +rk12: +DQ 0xbccba3936411fb7e +rk13: +DQ 0xe4dd0d81cbfce585 +rk14: +DQ 0xb715e37b96ed8633 +rk15: +DQ 0xf49784a634f014e4 +rk16: +DQ 0xaf86efb16d9ab4fb +rk17: +DQ 0x7b3211a760160db8 +rk18: +DQ 0xa062b2319d66692f +rk19: +DQ 0xef3d1d18ed889ed2 +rk20: +DQ 0x6ba4d760ab38201e + +pshufb_shf_table: +; use these values for shift constants for the pshufb instruction +; different alignments result in values as shown: +; dq 0x8887868584838281, 0x008f8e8d8c8b8a89 ; shl 15 (16-1) / shr1 +; dq 0x8988878685848382, 0x01008f8e8d8c8b8a ; shl 14 (16-3) / shr2 +; dq 0x8a89888786858483, 0x0201008f8e8d8c8b ; shl 13 (16-4) / shr3 +; dq 0x8b8a898887868584, 0x030201008f8e8d8c ; shl 12 (16-4) / shr4 +; dq 0x8c8b8a8988878685, 0x04030201008f8e8d ; shl 11 (16-5) / shr5 +; dq 0x8d8c8b8a89888786, 0x0504030201008f8e ; shl 10 (16-6) / shr6 +; dq 0x8e8d8c8b8a898887, 0x060504030201008f ; shl 9 (16-7) / shr7 +; dq 0x8f8e8d8c8b8a8988, 0x0706050403020100 ; shl 8 (16-8) / shr8 +; dq 0x008f8e8d8c8b8a89, 0x0807060504030201 ; shl 7 (16-9) / shr9 +; dq 0x01008f8e8d8c8b8a, 0x0908070605040302 ; shl 6 (16-10) / shr10 +; dq 0x0201008f8e8d8c8b, 0x0a09080706050403 ; shl 5 (16-11) / shr11 +; dq 0x030201008f8e8d8c, 0x0b0a090807060504 ; shl 4 (16-12) / shr12 +; dq 0x04030201008f8e8d, 0x0c0b0a0908070605 ; shl 3 (16-13) / shr13 +; dq 0x0504030201008f8e, 0x0d0c0b0a09080706 ; shl 2 (16-14) / shr14 +; dq 0x060504030201008f, 0x0e0d0c0b0a090807 ; shl 1 (16-15) / shr15 +dq 0x8786858483828100, 0x8f8e8d8c8b8a8988 +dq 0x0706050403020100, 0x000e0d0c0b0a0908 + + +mask: +dq 0xFFFFFFFFFFFFFFFF, 0x0000000000000000 +mask2: +dq 0xFFFFFFFF00000000, 0xFFFFFFFFFFFFFFFF +mask3: +dq 0x8080808080808080, 0x8080808080808080 + +;;; func core, ver, snum +slversion crc64_jones_refl_by8, 01, 00, 0029 diff --git a/src/spdk/isa-l/crc/crc64_multibinary.asm b/src/spdk/isa-l/crc/crc64_multibinary.asm new file mode 100644 index 000000000..3e06a0ecb --- /dev/null +++ b/src/spdk/isa-l/crc/crc64_multibinary.asm @@ -0,0 +1,92 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2016 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; +;;; uint64_t crc64_func(uint64_t init_crc, const unsigned char *buf, uint64_t len); +;;; + +default rel +[bits 64] + +%include "reg_sizes.asm" + +extern crc64_ecma_refl_by8 +extern crc64_ecma_refl_base + +extern crc64_ecma_norm_by8 +extern crc64_ecma_norm_base + +extern crc64_iso_refl_by8 +extern crc64_iso_refl_base + +extern crc64_iso_norm_by8 +extern crc64_iso_norm_base + +extern crc64_jones_refl_by8 +extern crc64_jones_refl_base + +extern crc64_jones_norm_by8 +extern crc64_jones_norm_base + +%if (AS_FEATURE_LEVEL) >= 10 +extern crc64_iso_refl_by16_10 +extern crc64_iso_norm_by16_10 +extern crc64_jones_refl_by16_10 +extern crc64_jones_norm_by16_10 +extern crc64_ecma_refl_by16_10 +extern crc64_ecma_norm_by16_10 +%endif + +section .text + +%include "multibinary.asm" + +mbin_interface crc64_ecma_refl +mbin_dispatch_init7 crc64_ecma_refl, crc64_ecma_refl_base, crc64_ecma_refl_by8, crc64_ecma_refl_by8, crc64_ecma_refl_by8, crc64_ecma_refl_by8, crc64_ecma_refl_by16_10 +mbin_interface crc64_ecma_norm +mbin_dispatch_init7 crc64_ecma_norm, crc64_ecma_norm_base, crc64_ecma_norm_by8, crc64_ecma_norm_by8, crc64_ecma_norm_by8, crc64_ecma_norm_by8, crc64_ecma_norm_by16_10 + +mbin_interface crc64_iso_refl +mbin_dispatch_init7 crc64_iso_refl, crc64_iso_refl_base, crc64_iso_refl_by8, crc64_iso_refl_by8, crc64_iso_refl_by8, crc64_iso_refl_by8, crc64_iso_refl_by16_10 +mbin_interface crc64_iso_norm +mbin_dispatch_init7 crc64_iso_norm, crc64_iso_norm_base, crc64_iso_norm_by8, crc64_iso_norm_by8, crc64_iso_norm_by8, crc64_iso_norm_by8, crc64_iso_norm_by16_10 + +mbin_interface crc64_jones_refl +mbin_dispatch_init7 crc64_jones_refl, crc64_jones_refl_base, crc64_jones_refl_by8, crc64_jones_refl_by8, crc64_jones_refl_by8, crc64_jones_refl_by8, crc64_jones_refl_by16_10 +mbin_interface crc64_jones_norm +mbin_dispatch_init7 crc64_jones_norm, crc64_jones_norm_base, crc64_jones_norm_by8, crc64_jones_norm_by8, crc64_jones_norm_by8, crc64_jones_norm_by8, crc64_jones_norm_by16_10 + +;;; func core, ver, snum +slversion crc64_ecma_refl, 00, 00, 001b +slversion crc64_ecma_norm, 00, 00, 0018 +slversion crc64_iso_refl, 00, 00, 0021 +slversion crc64_iso_norm, 00, 00, 001e +slversion crc64_jones_refl, 00, 00, 0027 +slversion crc64_jones_norm, 00, 00, 0024 diff --git a/src/spdk/isa-l/crc/crc64_ref.h b/src/spdk/isa-l/crc/crc64_ref.h new file mode 100644 index 000000000..b30f63866 --- /dev/null +++ b/src/spdk/isa-l/crc/crc64_ref.h @@ -0,0 +1,148 @@ +/********************************************************************** + Copyright(c) 2011-2016 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#ifndef _CRC64_REF_H +#define _CRC64_REF_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include "crc64.h" + +#ifdef _MSC_VER +# define inline __inline +#endif + +#define MAX_ITER 8 + +// crc64_ecma reference function, slow crc64 from the definition. +static inline uint64_t crc64_ecma_refl_ref(uint64_t seed, const uint8_t * buf, uint64_t len) +{ + uint64_t rem = ~seed; + unsigned int i, j; + + uint64_t poly = 0xC96C5795D7870F42ULL; // ECMA-182 standard reflected + + for (i = 0; i < len; i++) { + rem = rem ^ (uint64_t) buf[i]; + for (j = 0; j < MAX_ITER; j++) { + rem = (rem & 0x1ULL ? poly : 0) ^ (rem >> 1); + } + } + return ~rem; +} + +static inline uint64_t crc64_ecma_norm_ref(uint64_t seed, const uint8_t * buf, uint64_t len) +{ + uint64_t rem = ~seed; + unsigned int i, j; + + uint64_t poly = 0x42F0E1EBA9EA3693ULL; // ECMA-182 standard + + for (i = 0; i < len; i++) { + rem = rem ^ ((uint64_t) buf[i] << 56); + for (j = 0; j < MAX_ITER; j++) { + rem = (rem & 0x8000000000000000ULL ? poly : 0) ^ (rem << 1); + } + } + return ~rem; +} + +// crc64_iso reference function, slow crc64 from the definition. +static inline uint64_t crc64_iso_refl_ref(uint64_t seed, const uint8_t * buf, uint64_t len) +{ + uint64_t rem = ~seed; + unsigned int i, j; + + uint64_t poly = 0xD800000000000000ULL; // ISO standard reflected + + for (i = 0; i < len; i++) { + rem = rem ^ (uint64_t) buf[i]; + for (j = 0; j < MAX_ITER; j++) { + rem = (rem & 0x1ULL ? poly : 0) ^ (rem >> 1); + } + } + return ~rem; +} + +static inline uint64_t crc64_iso_norm_ref(uint64_t seed, const uint8_t * buf, uint64_t len) +{ + uint64_t rem = ~seed; + unsigned int i, j; + + uint64_t poly = 0x000000000000001BULL; // ISO standard + + for (i = 0; i < len; i++) { + rem = rem ^ ((uint64_t) buf[i] << 56); + for (j = 0; j < MAX_ITER; j++) { + rem = (rem & 0x8000000000000000ULL ? poly : 0) ^ (rem << 1); + } + } + return ~rem; +} + +// crc64_jones reference function, slow crc64 from the definition. +static inline uint64_t crc64_jones_refl_ref(uint64_t seed, const uint8_t * buf, uint64_t len) +{ + uint64_t rem = ~seed; + unsigned int i, j; + + uint64_t poly = 0x95ac9329ac4bc9b5ULL; // Jones coefficients reflected + + for (i = 0; i < len; i++) { + rem = rem ^ (uint64_t) buf[i]; + for (j = 0; j < MAX_ITER; j++) { + rem = (rem & 0x1ULL ? poly : 0) ^ (rem >> 1); + } + } + return ~rem; +} + +static inline uint64_t crc64_jones_norm_ref(uint64_t seed, const uint8_t * buf, uint64_t len) +{ + uint64_t rem = ~seed; + unsigned int i, j; + + uint64_t poly = 0xad93d23594c935a9ULL; // Jones coefficients + + for (i = 0; i < len; i++) { + rem = rem ^ ((uint64_t) buf[i] << 56); + for (j = 0; j < MAX_ITER; j++) { + rem = (rem & 0x8000000000000000ULL ? poly : 0) ^ (rem << 1); + } + } + return ~rem; +} + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/spdk/isa-l/crc/crc_base.c b/src/spdk/isa-l/crc/crc_base.c new file mode 100644 index 000000000..d1eb2d22e --- /dev/null +++ b/src/spdk/isa-l/crc/crc_base.c @@ -0,0 +1,351 @@ +/********************************************************************** + Copyright(c) 2011-2015 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#include "crc.h" + +static const uint16_t crc16tab[256] = { + 0x0000, 0x8BB7, 0x9CD9, 0x176E, 0xB205, 0x39B2, 0x2EDC, 0xA56B, + 0xEFBD, 0x640A, 0x7364, 0xF8D3, 0x5DB8, 0xD60F, 0xC161, 0x4AD6, + 0x54CD, 0xDF7A, 0xC814, 0x43A3, 0xE6C8, 0x6D7F, 0x7A11, 0xF1A6, + 0xBB70, 0x30C7, 0x27A9, 0xAC1E, 0x0975, 0x82C2, 0x95AC, 0x1E1B, + 0xA99A, 0x222D, 0x3543, 0xBEF4, 0x1B9F, 0x9028, 0x8746, 0x0CF1, + 0x4627, 0xCD90, 0xDAFE, 0x5149, 0xF422, 0x7F95, 0x68FB, 0xE34C, + 0xFD57, 0x76E0, 0x618E, 0xEA39, 0x4F52, 0xC4E5, 0xD38B, 0x583C, + 0x12EA, 0x995D, 0x8E33, 0x0584, 0xA0EF, 0x2B58, 0x3C36, 0xB781, + 0xD883, 0x5334, 0x445A, 0xCFED, 0x6A86, 0xE131, 0xF65F, 0x7DE8, + 0x373E, 0xBC89, 0xABE7, 0x2050, 0x853B, 0x0E8C, 0x19E2, 0x9255, + 0x8C4E, 0x07F9, 0x1097, 0x9B20, 0x3E4B, 0xB5FC, 0xA292, 0x2925, + 0x63F3, 0xE844, 0xFF2A, 0x749D, 0xD1F6, 0x5A41, 0x4D2F, 0xC698, + 0x7119, 0xFAAE, 0xEDC0, 0x6677, 0xC31C, 0x48AB, 0x5FC5, 0xD472, + 0x9EA4, 0x1513, 0x027D, 0x89CA, 0x2CA1, 0xA716, 0xB078, 0x3BCF, + 0x25D4, 0xAE63, 0xB90D, 0x32BA, 0x97D1, 0x1C66, 0x0B08, 0x80BF, + 0xCA69, 0x41DE, 0x56B0, 0xDD07, 0x786C, 0xF3DB, 0xE4B5, 0x6F02, + 0x3AB1, 0xB106, 0xA668, 0x2DDF, 0x88B4, 0x0303, 0x146D, 0x9FDA, + 0xD50C, 0x5EBB, 0x49D5, 0xC262, 0x6709, 0xECBE, 0xFBD0, 0x7067, + 0x6E7C, 0xE5CB, 0xF2A5, 0x7912, 0xDC79, 0x57CE, 0x40A0, 0xCB17, + 0x81C1, 0x0A76, 0x1D18, 0x96AF, 0x33C4, 0xB873, 0xAF1D, 0x24AA, + 0x932B, 0x189C, 0x0FF2, 0x8445, 0x212E, 0xAA99, 0xBDF7, 0x3640, + 0x7C96, 0xF721, 0xE04F, 0x6BF8, 0xCE93, 0x4524, 0x524A, 0xD9FD, + 0xC7E6, 0x4C51, 0x5B3F, 0xD088, 0x75E3, 0xFE54, 0xE93A, 0x628D, + 0x285B, 0xA3EC, 0xB482, 0x3F35, 0x9A5E, 0x11E9, 0x0687, 0x8D30, + 0xE232, 0x6985, 0x7EEB, 0xF55C, 0x5037, 0xDB80, 0xCCEE, 0x4759, + 0x0D8F, 0x8638, 0x9156, 0x1AE1, 0xBF8A, 0x343D, 0x2353, 0xA8E4, + 0xB6FF, 0x3D48, 0x2A26, 0xA191, 0x04FA, 0x8F4D, 0x9823, 0x1394, + 0x5942, 0xD2F5, 0xC59B, 0x4E2C, 0xEB47, 0x60F0, 0x779E, 0xFC29, + 0x4BA8, 0xC01F, 0xD771, 0x5CC6, 0xF9AD, 0x721A, 0x6574, 0xEEC3, + 0xA415, 0x2FA2, 0x38CC, 0xB37B, 0x1610, 0x9DA7, 0x8AC9, 0x017E, + 0x1F65, 0x94D2, 0x83BC, 0x080B, 0xAD60, 0x26D7, 0x31B9, 0xBA0E, + 0xF0D8, 0x7B6F, 0x6C01, 0xE7B6, 0x42DD, 0xC96A, 0xDE04, 0x55B3 +}; + +static const uint32_t crc32_table_iscsi_refl[256] = { + 0x00000000, 0xF26B8303, 0xE13B70F7, 0x1350F3F4, + 0xC79A971F, 0x35F1141C, 0x26A1E7E8, 0xD4CA64EB, + 0x8AD958CF, 0x78B2DBCC, 0x6BE22838, 0x9989AB3B, + 0x4D43CFD0, 0xBF284CD3, 0xAC78BF27, 0x5E133C24, + 0x105EC76F, 0xE235446C, 0xF165B798, 0x030E349B, + 0xD7C45070, 0x25AFD373, 0x36FF2087, 0xC494A384, + 0x9A879FA0, 0x68EC1CA3, 0x7BBCEF57, 0x89D76C54, + 0x5D1D08BF, 0xAF768BBC, 0xBC267848, 0x4E4DFB4B, + 0x20BD8EDE, 0xD2D60DDD, 0xC186FE29, 0x33ED7D2A, + 0xE72719C1, 0x154C9AC2, 0x061C6936, 0xF477EA35, + 0xAA64D611, 0x580F5512, 0x4B5FA6E6, 0xB93425E5, + 0x6DFE410E, 0x9F95C20D, 0x8CC531F9, 0x7EAEB2FA, + 0x30E349B1, 0xC288CAB2, 0xD1D83946, 0x23B3BA45, + 0xF779DEAE, 0x05125DAD, 0x1642AE59, 0xE4292D5A, + 0xBA3A117E, 0x4851927D, 0x5B016189, 0xA96AE28A, + 0x7DA08661, 0x8FCB0562, 0x9C9BF696, 0x6EF07595, + 0x417B1DBC, 0xB3109EBF, 0xA0406D4B, 0x522BEE48, + 0x86E18AA3, 0x748A09A0, 0x67DAFA54, 0x95B17957, + 0xCBA24573, 0x39C9C670, 0x2A993584, 0xD8F2B687, + 0x0C38D26C, 0xFE53516F, 0xED03A29B, 0x1F682198, + 0x5125DAD3, 0xA34E59D0, 0xB01EAA24, 0x42752927, + 0x96BF4DCC, 0x64D4CECF, 0x77843D3B, 0x85EFBE38, + 0xDBFC821C, 0x2997011F, 0x3AC7F2EB, 0xC8AC71E8, + 0x1C661503, 0xEE0D9600, 0xFD5D65F4, 0x0F36E6F7, + 0x61C69362, 0x93AD1061, 0x80FDE395, 0x72966096, + 0xA65C047D, 0x5437877E, 0x4767748A, 0xB50CF789, + 0xEB1FCBAD, 0x197448AE, 0x0A24BB5A, 0xF84F3859, + 0x2C855CB2, 0xDEEEDFB1, 0xCDBE2C45, 0x3FD5AF46, + 0x7198540D, 0x83F3D70E, 0x90A324FA, 0x62C8A7F9, + 0xB602C312, 0x44694011, 0x5739B3E5, 0xA55230E6, + 0xFB410CC2, 0x092A8FC1, 0x1A7A7C35, 0xE811FF36, + 0x3CDB9BDD, 0xCEB018DE, 0xDDE0EB2A, 0x2F8B6829, + 0x82F63B78, 0x709DB87B, 0x63CD4B8F, 0x91A6C88C, + 0x456CAC67, 0xB7072F64, 0xA457DC90, 0x563C5F93, + 0x082F63B7, 0xFA44E0B4, 0xE9141340, 0x1B7F9043, + 0xCFB5F4A8, 0x3DDE77AB, 0x2E8E845F, 0xDCE5075C, + 0x92A8FC17, 0x60C37F14, 0x73938CE0, 0x81F80FE3, + 0x55326B08, 0xA759E80B, 0xB4091BFF, 0x466298FC, + 0x1871A4D8, 0xEA1A27DB, 0xF94AD42F, 0x0B21572C, + 0xDFEB33C7, 0x2D80B0C4, 0x3ED04330, 0xCCBBC033, + 0xA24BB5A6, 0x502036A5, 0x4370C551, 0xB11B4652, + 0x65D122B9, 0x97BAA1BA, 0x84EA524E, 0x7681D14D, + 0x2892ED69, 0xDAF96E6A, 0xC9A99D9E, 0x3BC21E9D, + 0xEF087A76, 0x1D63F975, 0x0E330A81, 0xFC588982, + 0xB21572C9, 0x407EF1CA, 0x532E023E, 0xA145813D, + 0x758FE5D6, 0x87E466D5, 0x94B49521, 0x66DF1622, + 0x38CC2A06, 0xCAA7A905, 0xD9F75AF1, 0x2B9CD9F2, + 0xFF56BD19, 0x0D3D3E1A, 0x1E6DCDEE, 0xEC064EED, + 0xC38D26C4, 0x31E6A5C7, 0x22B65633, 0xD0DDD530, + 0x0417B1DB, 0xF67C32D8, 0xE52CC12C, 0x1747422F, + 0x49547E0B, 0xBB3FFD08, 0xA86F0EFC, 0x5A048DFF, + 0x8ECEE914, 0x7CA56A17, 0x6FF599E3, 0x9D9E1AE0, + 0xD3D3E1AB, 0x21B862A8, 0x32E8915C, 0xC083125F, + 0x144976B4, 0xE622F5B7, 0xF5720643, 0x07198540, + 0x590AB964, 0xAB613A67, 0xB831C993, 0x4A5A4A90, + 0x9E902E7B, 0x6CFBAD78, 0x7FAB5E8C, 0x8DC0DD8F, + 0xE330A81A, 0x115B2B19, 0x020BD8ED, 0xF0605BEE, + 0x24AA3F05, 0xD6C1BC06, 0xC5914FF2, 0x37FACCF1, + 0x69E9F0D5, 0x9B8273D6, 0x88D28022, 0x7AB90321, + 0xAE7367CA, 0x5C18E4C9, 0x4F48173D, 0xBD23943E, + 0xF36E6F75, 0x0105EC76, 0x12551F82, 0xE03E9C81, + 0x34F4F86A, 0xC69F7B69, 0xD5CF889D, 0x27A40B9E, + 0x79B737BA, 0x8BDCB4B9, 0x988C474D, 0x6AE7C44E, + 0xBE2DA0A5, 0x4C4623A6, 0x5F16D052, 0xAD7D5351 +}; + +static const uint32_t crc32_table_ieee_norm[256] = { + 0x00000000, 0x04c11db7, 0x09823b6e, 0x0d4326d9, + 0x130476dc, 0x17c56b6b, 0x1a864db2, 0x1e475005, + 0x2608edb8, 0x22c9f00f, 0x2f8ad6d6, 0x2b4bcb61, + 0x350c9b64, 0x31cd86d3, 0x3c8ea00a, 0x384fbdbd, + 0x4c11db70, 0x48d0c6c7, 0x4593e01e, 0x4152fda9, + 0x5f15adac, 0x5bd4b01b, 0x569796c2, 0x52568b75, + 0x6a1936c8, 0x6ed82b7f, 0x639b0da6, 0x675a1011, + 0x791d4014, 0x7ddc5da3, 0x709f7b7a, 0x745e66cd, + 0x9823b6e0, 0x9ce2ab57, 0x91a18d8e, 0x95609039, + 0x8b27c03c, 0x8fe6dd8b, 0x82a5fb52, 0x8664e6e5, + 0xbe2b5b58, 0xbaea46ef, 0xb7a96036, 0xb3687d81, + 0xad2f2d84, 0xa9ee3033, 0xa4ad16ea, 0xa06c0b5d, + 0xd4326d90, 0xd0f37027, 0xddb056fe, 0xd9714b49, + 0xc7361b4c, 0xc3f706fb, 0xceb42022, 0xca753d95, + 0xf23a8028, 0xf6fb9d9f, 0xfbb8bb46, 0xff79a6f1, + 0xe13ef6f4, 0xe5ffeb43, 0xe8bccd9a, 0xec7dd02d, + 0x34867077, 0x30476dc0, 0x3d044b19, 0x39c556ae, + 0x278206ab, 0x23431b1c, 0x2e003dc5, 0x2ac12072, + 0x128e9dcf, 0x164f8078, 0x1b0ca6a1, 0x1fcdbb16, + 0x018aeb13, 0x054bf6a4, 0x0808d07d, 0x0cc9cdca, + 0x7897ab07, 0x7c56b6b0, 0x71159069, 0x75d48dde, + 0x6b93dddb, 0x6f52c06c, 0x6211e6b5, 0x66d0fb02, + 0x5e9f46bf, 0x5a5e5b08, 0x571d7dd1, 0x53dc6066, + 0x4d9b3063, 0x495a2dd4, 0x44190b0d, 0x40d816ba, + 0xaca5c697, 0xa864db20, 0xa527fdf9, 0xa1e6e04e, + 0xbfa1b04b, 0xbb60adfc, 0xb6238b25, 0xb2e29692, + 0x8aad2b2f, 0x8e6c3698, 0x832f1041, 0x87ee0df6, + 0x99a95df3, 0x9d684044, 0x902b669d, 0x94ea7b2a, + 0xe0b41de7, 0xe4750050, 0xe9362689, 0xedf73b3e, + 0xf3b06b3b, 0xf771768c, 0xfa325055, 0xfef34de2, + 0xc6bcf05f, 0xc27dede8, 0xcf3ecb31, 0xcbffd686, + 0xd5b88683, 0xd1799b34, 0xdc3abded, 0xd8fba05a, + 0x690ce0ee, 0x6dcdfd59, 0x608edb80, 0x644fc637, + 0x7a089632, 0x7ec98b85, 0x738aad5c, 0x774bb0eb, + 0x4f040d56, 0x4bc510e1, 0x46863638, 0x42472b8f, + 0x5c007b8a, 0x58c1663d, 0x558240e4, 0x51435d53, + 0x251d3b9e, 0x21dc2629, 0x2c9f00f0, 0x285e1d47, + 0x36194d42, 0x32d850f5, 0x3f9b762c, 0x3b5a6b9b, + 0x0315d626, 0x07d4cb91, 0x0a97ed48, 0x0e56f0ff, + 0x1011a0fa, 0x14d0bd4d, 0x19939b94, 0x1d528623, + 0xf12f560e, 0xf5ee4bb9, 0xf8ad6d60, 0xfc6c70d7, + 0xe22b20d2, 0xe6ea3d65, 0xeba91bbc, 0xef68060b, + 0xd727bbb6, 0xd3e6a601, 0xdea580d8, 0xda649d6f, + 0xc423cd6a, 0xc0e2d0dd, 0xcda1f604, 0xc960ebb3, + 0xbd3e8d7e, 0xb9ff90c9, 0xb4bcb610, 0xb07daba7, + 0xae3afba2, 0xaafbe615, 0xa7b8c0cc, 0xa379dd7b, + 0x9b3660c6, 0x9ff77d71, 0x92b45ba8, 0x9675461f, + 0x8832161a, 0x8cf30bad, 0x81b02d74, 0x857130c3, + 0x5d8a9099, 0x594b8d2e, 0x5408abf7, 0x50c9b640, + 0x4e8ee645, 0x4a4ffbf2, 0x470cdd2b, 0x43cdc09c, + 0x7b827d21, 0x7f436096, 0x7200464f, 0x76c15bf8, + 0x68860bfd, 0x6c47164a, 0x61043093, 0x65c52d24, + 0x119b4be9, 0x155a565e, 0x18197087, 0x1cd86d30, + 0x029f3d35, 0x065e2082, 0x0b1d065b, 0x0fdc1bec, + 0x3793a651, 0x3352bbe6, 0x3e119d3f, 0x3ad08088, + 0x2497d08d, 0x2056cd3a, 0x2d15ebe3, 0x29d4f654, + 0xc5a92679, 0xc1683bce, 0xcc2b1d17, 0xc8ea00a0, + 0xd6ad50a5, 0xd26c4d12, 0xdf2f6bcb, 0xdbee767c, + 0xe3a1cbc1, 0xe760d676, 0xea23f0af, 0xeee2ed18, + 0xf0a5bd1d, 0xf464a0aa, 0xf9278673, 0xfde69bc4, + 0x89b8fd09, 0x8d79e0be, 0x803ac667, 0x84fbdbd0, + 0x9abc8bd5, 0x9e7d9662, 0x933eb0bb, 0x97ffad0c, + 0xafb010b1, 0xab710d06, 0xa6322bdf, 0xa2f33668, + 0xbcb4666d, 0xb8757bda, 0xb5365d03, 0xb1f740b4 +}; + +static const uint32_t crc32_table_gzip_refl[256] = { + 0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, + 0x076dc419, 0x706af48f, 0xe963a535, 0x9e6495a3, + 0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988, + 0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, 0x90bf1d91, + 0x1db71064, 0x6ab020f2, 0xf3b97148, 0x84be41de, + 0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7, + 0x136c9856, 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec, + 0x14015c4f, 0x63066cd9, 0xfa0f3d63, 0x8d080df5, + 0x3b6e20c8, 0x4c69105e, 0xd56041e4, 0xa2677172, + 0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b, + 0x35b5a8fa, 0x42b2986c, 0xdbbbc9d6, 0xacbcf940, + 0x32d86ce3, 0x45df5c75, 0xdcd60dcf, 0xabd13d59, + 0x26d930ac, 0x51de003a, 0xc8d75180, 0xbfd06116, + 0x21b4f4b5, 0x56b3c423, 0xcfba9599, 0xb8bda50f, + 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924, + 0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d, + 0x76dc4190, 0x01db7106, 0x98d220bc, 0xefd5102a, + 0x71b18589, 0x06b6b51f, 0x9fbfe4a5, 0xe8b8d433, + 0x7807c9a2, 0x0f00f934, 0x9609a88e, 0xe10e9818, + 0x7f6a0dbb, 0x086d3d2d, 0x91646c97, 0xe6635c01, + 0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e, + 0x6c0695ed, 0x1b01a57b, 0x8208f4c1, 0xf50fc457, + 0x65b0d9c6, 0x12b7e950, 0x8bbeb8ea, 0xfcb9887c, + 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, 0xfbd44c65, + 0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2, + 0x4adfa541, 0x3dd895d7, 0xa4d1c46d, 0xd3d6f4fb, + 0x4369e96a, 0x346ed9fc, 0xad678846, 0xda60b8d0, + 0x44042d73, 0x33031de5, 0xaa0a4c5f, 0xdd0d7cc9, + 0x5005713c, 0x270241aa, 0xbe0b1010, 0xc90c2086, + 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f, + 0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4, + 0x59b33d17, 0x2eb40d81, 0xb7bd5c3b, 0xc0ba6cad, + 0xedb88320, 0x9abfb3b6, 0x03b6e20c, 0x74b1d29a, + 0xead54739, 0x9dd277af, 0x04db2615, 0x73dc1683, + 0xe3630b12, 0x94643b84, 0x0d6d6a3e, 0x7a6a5aa8, + 0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1, + 0xf00f9344, 0x8708a3d2, 0x1e01f268, 0x6906c2fe, + 0xf762575d, 0x806567cb, 0x196c3671, 0x6e6b06e7, + 0xfed41b76, 0x89d32be0, 0x10da7a5a, 0x67dd4acc, + 0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5, + 0xd6d6a3e8, 0xa1d1937e, 0x38d8c2c4, 0x4fdff252, + 0xd1bb67f1, 0xa6bc5767, 0x3fb506dd, 0x48b2364b, + 0xd80d2bda, 0xaf0a1b4c, 0x36034af6, 0x41047a60, + 0xdf60efc3, 0xa867df55, 0x316e8eef, 0x4669be79, + 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236, + 0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f, + 0xc5ba3bbe, 0xb2bd0b28, 0x2bb45a92, 0x5cb36a04, + 0xc2d7ffa7, 0xb5d0cf31, 0x2cd99e8b, 0x5bdeae1d, + 0x9b64c2b0, 0xec63f226, 0x756aa39c, 0x026d930a, + 0x9c0906a9, 0xeb0e363f, 0x72076785, 0x05005713, + 0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38, + 0x92d28e9b, 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21, + 0x86d3d2d4, 0xf1d4e242, 0x68ddb3f8, 0x1fda836e, + 0x81be16cd, 0xf6b9265b, 0x6fb077e1, 0x18b74777, + 0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c, + 0x8f659eff, 0xf862ae69, 0x616bffd3, 0x166ccf45, + 0xa00ae278, 0xd70dd2ee, 0x4e048354, 0x3903b3c2, + 0xa7672661, 0xd06016f7, 0x4969474d, 0x3e6e77db, + 0xaed16a4a, 0xd9d65adc, 0x40df0b66, 0x37d83bf0, + 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9, + 0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, + 0xbad03605, 0xcdd70693, 0x54de5729, 0x23d967bf, + 0xb3667a2e, 0xc4614ab8, 0x5d681b02, 0x2a6f2b94, + 0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d +}; + +uint16_t crc16_t10dif_base(uint16_t seed, uint8_t * buf, uint64_t len) +{ + int i; + uint16_t crc = seed; + + for (i = 0; i < len; i++) + crc = (crc << 8) ^ crc16tab[((crc >> 8) ^ *buf++) & 0x00FF]; + + return crc; +} + +uint16_t crc16_t10dif_copy_base(uint16_t seed, uint8_t * dst, uint8_t * src, uint64_t len) +{ + int i; + uint16_t crc = seed; + + for (i = 0; i < len; i++) { + crc = (crc << 8) ^ crc16tab[((crc >> 8) ^ *src) & 0x00FF]; + *dst++ = *src++; + } + + return crc; +} + +unsigned int crc32_iscsi_base(unsigned char *buffer, int len, unsigned int crc_init) +{ + unsigned int crc; + unsigned char *p_buf; + unsigned char *p_end = buffer + len; + + p_buf = buffer; + crc = crc_init; + + while (p_buf < p_end) { + crc = (crc >> 8) ^ crc32_table_iscsi_refl[(crc & 0x000000FF) ^ *p_buf++]; + } + return crc; +} + +uint32_t crc32_ieee_base(uint32_t seed, uint8_t * buf, uint64_t len) +{ + unsigned int crc = ~seed; + + while (len--) { + crc = (crc << 8) ^ crc32_table_ieee_norm[((crc >> 24) ^ *buf) & 255]; + buf++; + } + + return ~crc; +} + +uint32_t crc32_gzip_refl_base(uint32_t seed, uint8_t * buf, uint64_t len) +{ + unsigned int crc; + unsigned char *p_buf; + unsigned char *p_end = buf + len; + + p_buf = (unsigned char *)buf; + crc = ~seed; + + while (p_buf < p_end) { + crc = (crc >> 8) ^ crc32_table_gzip_refl[(crc & 0x000000FF) ^ *p_buf++]; + } + + return ~crc; +} + +struct slver { + unsigned short snum; + unsigned char ver; + unsigned char core; +}; + +struct slver crc32_iscsi_base_slver_0001011d; +struct slver crc32_iscsi_base_slver = { 0x011d, 0x02, 0x00 }; + +struct slver crc16_t10dif_base_slver_0001011e; +struct slver crc16_t10dif_base_slver = { 0x011e, 0x02, 0x00 }; + +struct slver crc32_ieee_base_slver_0001011f; +struct slver crc32_ieee_base_slver = { 0x011f, 0x02, 0x00 }; + +struct slver crc32_gzip_refl_base_slver_0000002b; +struct slver crc32_gzip_refl_base_slver = { 0x002b, 0x00, 0x00 }; diff --git a/src/spdk/isa-l/crc/crc_base_aliases.c b/src/spdk/isa-l/crc/crc_base_aliases.c new file mode 100644 index 000000000..0ffc62f96 --- /dev/null +++ b/src/spdk/isa-l/crc/crc_base_aliases.c @@ -0,0 +1,87 @@ +/********************************************************************** + Copyright(c) 2011-2017 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#include "crc.h" +#include "crc64.h" +#include + +unsigned int crc32_iscsi(unsigned char *buffer, int len, unsigned int crc_init) +{ + return crc32_iscsi_base(buffer, len, crc_init); +} + +uint16_t crc16_t10dif(uint16_t seed, const unsigned char *buf, uint64_t len) +{ + return crc16_t10dif_base(seed, (uint8_t *) buf, len); +} + +uint16_t crc16_t10dif_copy(uint16_t seed, uint8_t * dst, uint8_t * src, uint64_t len) +{ + return crc16_t10dif_copy_base(seed, dst, src, len); +} + +uint32_t crc32_ieee(uint32_t seed, const unsigned char *buf, uint64_t len) +{ + return crc32_ieee_base(seed, (uint8_t *) buf, len); +} + +uint32_t crc32_gzip_refl(uint32_t seed, const unsigned char *buf, uint64_t len) +{ + return crc32_gzip_refl_base(seed, (uint8_t *) buf, len); +} + +uint64_t crc64_ecma_refl(uint64_t seed, const uint8_t * buf, uint64_t len) +{ + return crc64_ecma_refl_base(seed, buf, len); +} + +uint64_t crc64_ecma_norm(uint64_t seed, const uint8_t * buf, uint64_t len) +{ + return crc64_ecma_norm_base(seed, buf, len); +} + +uint64_t crc64_iso_refl(uint64_t seed, const uint8_t * buf, uint64_t len) +{ + return crc64_iso_refl_base(seed, buf, len); +} + +uint64_t crc64_iso_norm(uint64_t seed, const uint8_t * buf, uint64_t len) +{ + return crc64_iso_norm_base(seed, buf, len); +} + +uint64_t crc64_jones_refl(uint64_t seed, const uint8_t * buf, uint64_t len) +{ + return crc64_jones_refl_base(seed, buf, len); +} + +uint64_t crc64_jones_norm(uint64_t seed, const uint8_t * buf, uint64_t len) +{ + return crc64_jones_norm_base(seed, buf, len); +} diff --git a/src/spdk/isa-l/crc/crc_multibinary.asm b/src/spdk/isa-l/crc/crc_multibinary.asm new file mode 100644 index 000000000..39814566d --- /dev/null +++ b/src/spdk/isa-l/crc/crc_multibinary.asm @@ -0,0 +1,189 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2015 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +default rel +[bits 64] + +%include "reg_sizes.asm" + +extern crc32_iscsi_00 +extern crc32_iscsi_01 +extern crc32_iscsi_base + +extern crc32_ieee_01 +extern crc32_ieee_by4 ;; Optimized for SLM +extern crc32_ieee_base + +extern crc16_t10dif_01 +extern crc16_t10dif_by4 ;; Optimized for SLM +extern crc16_t10dif_base + +extern crc32_gzip_refl_by8 +extern crc32_gzip_refl_base + +extern crc16_t10dif_copy_by4 +extern crc16_t10dif_copy_base + +%include "multibinary.asm" + +section .data +;;; *_mbinit are initial values for *_dispatched; is updated on first call. +;;; Therefore, *_dispatch_init is only executed on first call. + +crc32_iscsi_dispatched: + dq crc32_iscsi_mbinit + +crc32_ieee_dispatched: + dq crc32_ieee_mbinit + +crc16_t10dif_dispatched: + dq crc16_t10dif_mbinit + +section .text +;;;; +; crc32_iscsi multibinary function +;;;; +global crc32_iscsi:ISAL_SYM_TYPE_FUNCTION +crc32_iscsi_mbinit: + call crc32_iscsi_dispatch_init +crc32_iscsi: + jmp qword [crc32_iscsi_dispatched] + +crc32_iscsi_dispatch_init: + push rax + push rbx + push rcx + push rdx + push rsi + lea rsi, [crc32_iscsi_base WRT_OPT] ; Default + + mov eax, 1 + cpuid + lea rbx, [crc32_iscsi_00 WRT_OPT] + lea rax, [crc32_iscsi_01 WRT_OPT] + + test ecx, FLAG_CPUID1_ECX_SSE4_2 + cmovne rsi, rbx + test ecx, FLAG_CPUID1_ECX_CLMUL + cmovne rsi, rax + mov [crc32_iscsi_dispatched], rsi + pop rsi + pop rdx + pop rcx + pop rbx + pop rax + ret + +;;;; +; crc32_ieee multibinary function +;;;; +global crc32_ieee:ISAL_SYM_TYPE_FUNCTION +crc32_ieee_mbinit: + call crc32_ieee_dispatch_init +crc32_ieee: + jmp qword [crc32_ieee_dispatched] + +crc32_ieee_dispatch_init: + push rax + push rbx + push rcx + push rdx + push rsi + lea rsi, [crc32_ieee_base WRT_OPT] ; Default + + mov eax, 1 + cpuid + lea rbx, [crc32_ieee_01 WRT_OPT] + lea rdx, [crc32_ieee_by4 WRT_OPT] + + test ecx, FLAG_CPUID1_ECX_SSE3 + jz use_ieee_base + test ecx, FLAG_CPUID1_ECX_CLMUL + cmovne rsi, rbx + and eax, FLAG_CPUID1_EAX_STEP_MASK + cmp eax, FLAG_CPUID1_EAX_AVOTON + cmove rsi, rdx +use_ieee_base: + mov [crc32_ieee_dispatched], rsi + pop rsi + pop rdx + pop rcx + pop rbx + pop rax + ret + +;;;; +; crc16_t10dif multibinary function +;;;; +global crc16_t10dif:ISAL_SYM_TYPE_FUNCTION +crc16_t10dif_mbinit: + call crc16_t10dif_dispatch_init +crc16_t10dif: + jmp qword [crc16_t10dif_dispatched] + +crc16_t10dif_dispatch_init: + push rax + push rbx + push rcx + push rdx + push rsi + lea rsi, [crc16_t10dif_base WRT_OPT] ; Default + + mov eax, 1 + cpuid + lea rbx, [crc16_t10dif_01 WRT_OPT] + lea rdx, [crc16_t10dif_by4 WRT_OPT] + + test ecx, FLAG_CPUID1_ECX_SSE3 + jz use_t10dif_base + test ecx, FLAG_CPUID1_ECX_CLMUL + cmovne rsi, rbx + and eax, FLAG_CPUID1_EAX_STEP_MASK + cmp eax, FLAG_CPUID1_EAX_AVOTON + cmove rsi, rdx +use_t10dif_base: + mov [crc16_t10dif_dispatched], rsi + pop rsi + pop rdx + pop rcx + pop rbx + pop rax + ret + +mbin_interface crc32_gzip_refl +mbin_dispatch_init_clmul crc32_gzip_refl, crc32_gzip_refl_base, crc32_gzip_refl_by8 + +mbin_interface crc16_t10dif_copy +mbin_dispatch_init_clmul crc16_t10dif_copy, crc16_t10dif_copy_base, crc16_t10dif_copy_by4 + +;;; func core, ver, snum +slversion crc16_t10dif, 00, 03, 011a +slversion crc32_ieee, 00, 03, 011b +slversion crc32_iscsi, 00, 03, 011c +slversion crc32_gzip_refl, 00, 00, 002a diff --git a/src/spdk/isa-l/crc/crc_ref.h b/src/spdk/isa-l/crc/crc_ref.h new file mode 100644 index 000000000..e97a60b5e --- /dev/null +++ b/src/spdk/isa-l/crc/crc_ref.h @@ -0,0 +1,140 @@ +/********************************************************************** + Copyright(c) 2011-2015 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#ifndef _CRC_REF_H +#define _CRC_REF_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include "crc.h" + +#ifdef _MSC_VER +# define inline __inline +#endif + +#define MAX_ITER 8 + +// iSCSI CRC reference function +static inline unsigned int crc32_iscsi_ref(unsigned char *buffer, int len, unsigned int crc_init) +{ + uint64_t rem = crc_init; + int i, j; + + uint32_t poly = 0x82F63B78; + + for (i = 0; i < len; i++) { + rem = rem ^ (buffer[i]); + for (j = 0; j < MAX_ITER; j++) { + rem = (rem & 0x1ULL) ? (rem >> 1) ^ poly : (rem >> 1); + } + } + return rem; +} + +// crc16_t10dif reference function, slow crc16 from the definition. +static inline uint16_t crc16_t10dif_ref(uint16_t seed, uint8_t * buf, uint64_t len) +{ + size_t rem = seed; + unsigned int i, j; + + uint16_t poly = 0x8bb7; // t10dif standard + + for (i = 0; i < len; i++) { + rem = rem ^ (buf[i] << 8); + for (j = 0; j < MAX_ITER; j++) { + rem = rem << 1; + rem = (rem & 0x10000) ? rem ^ poly : rem; + } + } + return rem; +} + +// crc16_t10dif reference function, slow crc16 from the definition. +static inline uint16_t crc16_t10dif_copy_ref(uint16_t seed, uint8_t * dst, uint8_t * src, uint64_t len) +{ + size_t rem = seed; + unsigned int i, j; + + uint16_t poly = 0x8bb7; // t10dif standard + + for (i = 0; i < len; i++) { + rem = rem ^ (src[i] << 8); + dst[i] = src[i]; + for (j = 0; j < MAX_ITER; j++) { + rem = rem << 1; + rem = (rem & 0x10000) ? rem ^ poly : rem; + } + } + return rem; +} + +// crc32_ieee reference function, slow crc32 from the definition. +static inline uint32_t crc32_ieee_ref(uint32_t seed, uint8_t * buf, uint64_t len) +{ + uint64_t rem = ~seed; + unsigned int i, j; + + uint32_t poly = 0x04C11DB7; // IEEE standard + + for (i = 0; i < len; i++) { + rem = rem ^ ((uint64_t) buf[i] << 24); + for (j = 0; j < MAX_ITER; j++) { + rem = rem << 1; + rem = (rem & 0x100000000ULL) ? rem ^ poly : rem; + } + } + return ~rem; +} + +// crc32_gzip_refl reference function, slow crc32 from the definition. +// Please get difference details between crc32_gzip_ref and crc32_ieee +// from crc.h. +static inline uint32_t crc32_gzip_refl_ref(uint32_t seed, uint8_t * buf, uint64_t len) +{ + uint64_t rem = ~seed; + int i, j; + + uint32_t poly = 0xEDB88320; // IEEE standard + + for (i = 0; i < len; i++) { + rem = rem ^ (buf[i]); + for (j = 0; j < MAX_ITER; j++) { + rem = (rem & 0x1ULL) ? (rem >> 1) ^ poly : (rem >> 1); + } + } + return ~rem; +} + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/spdk/isa-l/crc/crc_simple_test.c b/src/spdk/isa-l/crc/crc_simple_test.c new file mode 100644 index 000000000..4799f8745 --- /dev/null +++ b/src/spdk/isa-l/crc/crc_simple_test.c @@ -0,0 +1,64 @@ +/********************************************************************** + Copyright(c) 2011-2013 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#include +#include +#include "crc.h" + +const uint16_t init_crc_16 = 0x1234; +const uint16_t t10_dif_expected = 0x60b3; +const uint32_t init_crc_32 = 0x12345678; +const uint32_t ieee_expected = 0x2ceadbe3; + +int main(void) +{ + unsigned char p_buf[48]; + uint16_t t10_dif_computed; + uint32_t ieee_computed; + int i; + + for (i = 0; i < 48; i++) + p_buf[i] = i; + + t10_dif_computed = crc16_t10dif(init_crc_16, p_buf, 48); + + if (t10_dif_computed != t10_dif_expected) + printf("WRONG CRC-16(T10 DIF) value\n"); + else + printf("CORRECT CRC-16(T10 DIF) value\n"); + + ieee_computed = crc32_ieee(init_crc_32, p_buf, 48); + + if (ieee_computed != ieee_expected) + printf("WRONG CRC-32(IEEE) value\n"); + else + printf("CORRECT CRC-32(IEEE) value\n"); + + return 0; +} diff --git a/src/spdk/isa-l/erasure_code/Makefile.am b/src/spdk/isa-l/erasure_code/Makefile.am new file mode 100644 index 000000000..bad2aae2f --- /dev/null +++ b/src/spdk/isa-l/erasure_code/Makefile.am @@ -0,0 +1,161 @@ +######################################################################## +# Copyright(c) 2011-2017 Intel Corporation All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +######################################################################## + +lsrc += erasure_code/ec_base.c + +lsrc_base_aliases += erasure_code/ec_base_aliases.c +lsrc_aarch64 += erasure_code/ec_base_aliases.c + +lsrc_x86_64 += \ + erasure_code/ec_highlevel_func.c \ + erasure_code/gf_vect_mul_sse.asm \ + erasure_code/gf_vect_mul_avx.asm \ + erasure_code/gf_vect_dot_prod_sse.asm \ + erasure_code/gf_vect_dot_prod_avx.asm \ + erasure_code/gf_vect_dot_prod_avx2.asm \ + erasure_code/gf_2vect_dot_prod_sse.asm \ + erasure_code/gf_3vect_dot_prod_sse.asm \ + erasure_code/gf_4vect_dot_prod_sse.asm \ + erasure_code/gf_5vect_dot_prod_sse.asm \ + erasure_code/gf_6vect_dot_prod_sse.asm \ + erasure_code/gf_2vect_dot_prod_avx.asm \ + erasure_code/gf_3vect_dot_prod_avx.asm \ + erasure_code/gf_4vect_dot_prod_avx.asm \ + erasure_code/gf_5vect_dot_prod_avx.asm \ + erasure_code/gf_6vect_dot_prod_avx.asm \ + erasure_code/gf_2vect_dot_prod_avx2.asm \ + erasure_code/gf_3vect_dot_prod_avx2.asm \ + erasure_code/gf_4vect_dot_prod_avx2.asm \ + erasure_code/gf_5vect_dot_prod_avx2.asm \ + erasure_code/gf_6vect_dot_prod_avx2.asm \ + erasure_code/gf_vect_mad_sse.asm \ + erasure_code/gf_2vect_mad_sse.asm \ + erasure_code/gf_3vect_mad_sse.asm \ + erasure_code/gf_4vect_mad_sse.asm \ + erasure_code/gf_5vect_mad_sse.asm \ + erasure_code/gf_6vect_mad_sse.asm \ + erasure_code/gf_vect_mad_avx.asm \ + erasure_code/gf_2vect_mad_avx.asm \ + erasure_code/gf_3vect_mad_avx.asm \ + erasure_code/gf_4vect_mad_avx.asm \ + erasure_code/gf_5vect_mad_avx.asm \ + erasure_code/gf_6vect_mad_avx.asm \ + erasure_code/gf_vect_mad_avx2.asm \ + erasure_code/gf_2vect_mad_avx2.asm \ + erasure_code/gf_3vect_mad_avx2.asm \ + erasure_code/gf_4vect_mad_avx2.asm \ + erasure_code/gf_5vect_mad_avx2.asm \ + erasure_code/gf_6vect_mad_avx2.asm \ + erasure_code/ec_multibinary.asm + +#if HAVE_AVX512 +lsrc_x86_64 += \ + erasure_code/gf_vect_dot_prod_avx512.asm \ + erasure_code/gf_2vect_dot_prod_avx512.asm \ + erasure_code/gf_3vect_dot_prod_avx512.asm \ + erasure_code/gf_4vect_dot_prod_avx512.asm \ + erasure_code/gf_vect_mad_avx512.asm \ + erasure_code/gf_2vect_mad_avx512.asm \ + erasure_code/gf_3vect_mad_avx512.asm \ + erasure_code/gf_4vect_mad_avx512.asm + +lsrc_x86_32 += \ + erasure_code/ec_highlevel_func.c \ + erasure_code/ec_multibinary.asm \ + erasure_code/gf_vect_dot_prod_avx.asm \ + erasure_code/gf_2vect_dot_prod_avx.asm \ + erasure_code/gf_3vect_dot_prod_avx.asm \ + erasure_code/gf_4vect_dot_prod_avx.asm \ + erasure_code/gf_vect_dot_prod_sse.asm \ + erasure_code/gf_2vect_dot_prod_sse.asm \ + erasure_code/gf_3vect_dot_prod_sse.asm \ + erasure_code/gf_4vect_dot_prod_sse.asm \ + erasure_code/gf_vect_dot_prod_avx2.asm \ + erasure_code/gf_2vect_dot_prod_avx2.asm \ + erasure_code/gf_3vect_dot_prod_avx2.asm \ + erasure_code/gf_4vect_dot_prod_avx2.asm + +unit_tests32 += erasure_code/erasure_code_base_test \ + erasure_code/erasure_code_test \ + erasure_code/gf_vect_mul_test \ + erasure_code/gf_vect_mul_base_test \ + erasure_code/gf_vect_dot_prod_base_test \ + erasure_code/gf_vect_dot_prod_test + +perf_tests32 += erasure_code/gf_vect_mul_perf \ + erasure_code/gf_vect_dot_prod_perf \ + erasure_code/erasure_code_perf \ + erasure_code/erasure_code_base_perf \ + erasure_code/gf_vect_dot_prod_1tbl + +src_include += -I $(srcdir)/erasure_code +extern_hdrs += include/erasure_code.h \ + include/gf_vect_mul.h + +other_src += erasure_code/ec_base.h \ + include/multibinary.asm \ + include/reg_sizes.asm + +check_tests += erasure_code/gf_vect_mul_test \ + erasure_code/erasure_code_test \ + erasure_code/gf_inverse_test \ + erasure_code/erasure_code_update_test + +unit_tests += \ + erasure_code/gf_vect_mul_base_test \ + erasure_code/gf_vect_dot_prod_base_test \ + erasure_code/gf_vect_dot_prod_test \ + erasure_code/gf_vect_mad_test \ + erasure_code/erasure_code_base_test + +perf_tests += erasure_code/gf_vect_mul_perf \ + erasure_code/gf_vect_dot_prod_perf \ + erasure_code/gf_vect_dot_prod_1tbl \ + erasure_code/erasure_code_perf \ + erasure_code/erasure_code_base_perf \ + erasure_code/erasure_code_update_perf + +other_tests += erasure_code/gen_rs_matrix_limits + +other_tests_x86_64 += \ + erasure_code/gf_2vect_dot_prod_sse_test \ + erasure_code/gf_3vect_dot_prod_sse_test \ + erasure_code/gf_4vect_dot_prod_sse_test \ + erasure_code/gf_5vect_dot_prod_sse_test \ + erasure_code/gf_6vect_dot_prod_sse_test + +other_tests_x86_32 += \ + erasure_code/gf_2vect_dot_prod_sse_test \ + erasure_code/gf_3vect_dot_prod_sse_test \ + erasure_code/gf_4vect_dot_prod_sse_test \ + erasure_code/gf_5vect_dot_prod_sse_test \ + erasure_code/gf_6vect_dot_prod_sse_test + +other_src += include/test.h \ + include/types.h diff --git a/src/spdk/isa-l/erasure_code/ec_base.c b/src/spdk/isa-l/erasure_code/ec_base.c new file mode 100644 index 000000000..9d76c8df4 --- /dev/null +++ b/src/spdk/isa-l/erasure_code/ec_base.c @@ -0,0 +1,371 @@ +/********************************************************************** + Copyright(c) 2011-2015 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#include +#include // for memset +#include "erasure_code.h" +#include "ec_base.h" // for GF tables + +void ec_init_tables(int k, int rows, unsigned char *a, unsigned char *g_tbls) +{ + int i, j; + + for (i = 0; i < rows; i++) { + for (j = 0; j < k; j++) { + gf_vect_mul_init(*a++, g_tbls); + g_tbls += 32; + } + } +} + +unsigned char gf_mul(unsigned char a, unsigned char b) +{ +#ifndef GF_LARGE_TABLES + int i; + + if ((a == 0) || (b == 0)) + return 0; + + return gff_base[(i = gflog_base[a] + gflog_base[b]) > 254 ? i - 255 : i]; +#else + return gf_mul_table_base[b * 256 + a]; +#endif +} + +unsigned char gf_inv(unsigned char a) +{ +#ifndef GF_LARGE_TABLES + if (a == 0) + return 0; + + return gff_base[255 - gflog_base[a]]; +#else + return gf_inv_table_base[a]; +#endif +} + +void gf_gen_rs_matrix(unsigned char *a, int m, int k) +{ + int i, j; + unsigned char p, gen = 1; + + memset(a, 0, k * m); + for (i = 0; i < k; i++) + a[k * i + i] = 1; + + for (i = k; i < m; i++) { + p = 1; + for (j = 0; j < k; j++) { + a[k * i + j] = p; + p = gf_mul(p, gen); + } + gen = gf_mul(gen, 2); + } +} + +void gf_gen_cauchy1_matrix(unsigned char *a, int m, int k) +{ + int i, j; + unsigned char *p; + + // Identity matrix in high position + memset(a, 0, k * m); + for (i = 0; i < k; i++) + a[k * i + i] = 1; + + // For the rest choose 1/(i + j) | i != j + p = &a[k * k]; + for (i = k; i < m; i++) + for (j = 0; j < k; j++) + *p++ = gf_inv(i ^ j); + +} + +int gf_invert_matrix(unsigned char *in_mat, unsigned char *out_mat, const int n) +{ + int i, j, k; + unsigned char temp; + + // Set out_mat[] to the identity matrix + for (i = 0; i < n * n; i++) // memset(out_mat, 0, n*n) + out_mat[i] = 0; + + for (i = 0; i < n; i++) + out_mat[i * n + i] = 1; + + // Inverse + for (i = 0; i < n; i++) { + // Check for 0 in pivot element + if (in_mat[i * n + i] == 0) { + // Find a row with non-zero in current column and swap + for (j = i + 1; j < n; j++) + if (in_mat[j * n + i]) + break; + + if (j == n) // Couldn't find means it's singular + return -1; + + for (k = 0; k < n; k++) { // Swap rows i,j + temp = in_mat[i * n + k]; + in_mat[i * n + k] = in_mat[j * n + k]; + in_mat[j * n + k] = temp; + + temp = out_mat[i * n + k]; + out_mat[i * n + k] = out_mat[j * n + k]; + out_mat[j * n + k] = temp; + } + } + + temp = gf_inv(in_mat[i * n + i]); // 1/pivot + for (j = 0; j < n; j++) { // Scale row i by 1/pivot + in_mat[i * n + j] = gf_mul(in_mat[i * n + j], temp); + out_mat[i * n + j] = gf_mul(out_mat[i * n + j], temp); + } + + for (j = 0; j < n; j++) { + if (j == i) + continue; + + temp = in_mat[j * n + i]; + for (k = 0; k < n; k++) { + out_mat[j * n + k] ^= gf_mul(temp, out_mat[i * n + k]); + in_mat[j * n + k] ^= gf_mul(temp, in_mat[i * n + k]); + } + } + } + return 0; +} + +// Calculates const table gftbl in GF(2^8) from single input A +// gftbl(A) = {A{00}, A{01}, A{02}, ... , A{0f} }, {A{00}, A{10}, A{20}, ... , A{f0} } + +void gf_vect_mul_init(unsigned char c, unsigned char *tbl) +{ + unsigned char c2 = (c << 1) ^ ((c & 0x80) ? 0x1d : 0); //Mult by GF{2} + unsigned char c4 = (c2 << 1) ^ ((c2 & 0x80) ? 0x1d : 0); //Mult by GF{2} + unsigned char c8 = (c4 << 1) ^ ((c4 & 0x80) ? 0x1d : 0); //Mult by GF{2} + +#if __WORDSIZE == 64 || _WIN64 || __x86_64__ + unsigned long long v1, v2, v4, v8, *t; + unsigned long long v10, v20, v40, v80; + unsigned char c17, c18, c20, c24; + + t = (unsigned long long *)tbl; + + v1 = c * 0x0100010001000100ull; + v2 = c2 * 0x0101000001010000ull; + v4 = c4 * 0x0101010100000000ull; + v8 = c8 * 0x0101010101010101ull; + + v4 = v1 ^ v2 ^ v4; + t[0] = v4; + t[1] = v8 ^ v4; + + c17 = (c8 << 1) ^ ((c8 & 0x80) ? 0x1d : 0); //Mult by GF{2} + c18 = (c17 << 1) ^ ((c17 & 0x80) ? 0x1d : 0); //Mult by GF{2} + c20 = (c18 << 1) ^ ((c18 & 0x80) ? 0x1d : 0); //Mult by GF{2} + c24 = (c20 << 1) ^ ((c20 & 0x80) ? 0x1d : 0); //Mult by GF{2} + + v10 = c17 * 0x0100010001000100ull; + v20 = c18 * 0x0101000001010000ull; + v40 = c20 * 0x0101010100000000ull; + v80 = c24 * 0x0101010101010101ull; + + v40 = v10 ^ v20 ^ v40; + t[2] = v40; + t[3] = v80 ^ v40; + +#else // 32-bit or other + unsigned char c3, c5, c6, c7, c9, c10, c11, c12, c13, c14, c15; + unsigned char c17, c18, c19, c20, c21, c22, c23, c24, c25, c26, c27, c28, c29, c30, + c31; + + c3 = c2 ^ c; + c5 = c4 ^ c; + c6 = c4 ^ c2; + c7 = c4 ^ c3; + + c9 = c8 ^ c; + c10 = c8 ^ c2; + c11 = c8 ^ c3; + c12 = c8 ^ c4; + c13 = c8 ^ c5; + c14 = c8 ^ c6; + c15 = c8 ^ c7; + + tbl[0] = 0; + tbl[1] = c; + tbl[2] = c2; + tbl[3] = c3; + tbl[4] = c4; + tbl[5] = c5; + tbl[6] = c6; + tbl[7] = c7; + tbl[8] = c8; + tbl[9] = c9; + tbl[10] = c10; + tbl[11] = c11; + tbl[12] = c12; + tbl[13] = c13; + tbl[14] = c14; + tbl[15] = c15; + + c17 = (c8 << 1) ^ ((c8 & 0x80) ? 0x1d : 0); //Mult by GF{2} + c18 = (c17 << 1) ^ ((c17 & 0x80) ? 0x1d : 0); //Mult by GF{2} + c19 = c18 ^ c17; + c20 = (c18 << 1) ^ ((c18 & 0x80) ? 0x1d : 0); //Mult by GF{2} + c21 = c20 ^ c17; + c22 = c20 ^ c18; + c23 = c20 ^ c19; + c24 = (c20 << 1) ^ ((c20 & 0x80) ? 0x1d : 0); //Mult by GF{2} + c25 = c24 ^ c17; + c26 = c24 ^ c18; + c27 = c24 ^ c19; + c28 = c24 ^ c20; + c29 = c24 ^ c21; + c30 = c24 ^ c22; + c31 = c24 ^ c23; + + tbl[16] = 0; + tbl[17] = c17; + tbl[18] = c18; + tbl[19] = c19; + tbl[20] = c20; + tbl[21] = c21; + tbl[22] = c22; + tbl[23] = c23; + tbl[24] = c24; + tbl[25] = c25; + tbl[26] = c26; + tbl[27] = c27; + tbl[28] = c28; + tbl[29] = c29; + tbl[30] = c30; + tbl[31] = c31; + +#endif //__WORDSIZE == 64 || _WIN64 || __x86_64__ +} + +void gf_vect_dot_prod_base(int len, int vlen, unsigned char *v, + unsigned char **src, unsigned char *dest) +{ + int i, j; + unsigned char s; + for (i = 0; i < len; i++) { + s = 0; + for (j = 0; j < vlen; j++) + s ^= gf_mul(src[j][i], v[j * 32 + 1]); + + dest[i] = s; + } +} + +void gf_vect_mad_base(int len, int vec, int vec_i, + unsigned char *v, unsigned char *src, unsigned char *dest) +{ + int i; + unsigned char s; + for (i = 0; i < len; i++) { + s = dest[i]; + s ^= gf_mul(src[i], v[vec_i * 32 + 1]); + dest[i] = s; + } +} + +void ec_encode_data_base(int len, int srcs, int dests, unsigned char *v, + unsigned char **src, unsigned char **dest) +{ + int i, j, l; + unsigned char s; + + for (l = 0; l < dests; l++) { + for (i = 0; i < len; i++) { + s = 0; + for (j = 0; j < srcs; j++) + s ^= gf_mul(src[j][i], v[j * 32 + l * srcs * 32 + 1]); + + dest[l][i] = s; + } + } +} + +void ec_encode_data_update_base(int len, int k, int rows, int vec_i, unsigned char *v, + unsigned char *data, unsigned char **dest) +{ + int i, l; + unsigned char s; + + for (l = 0; l < rows; l++) { + for (i = 0; i < len; i++) { + s = dest[l][i]; + s ^= gf_mul(data[i], v[vec_i * 32 + l * k * 32 + 1]); + + dest[l][i] = s; + } + } +} + +void gf_vect_mul_base(int len, unsigned char *a, unsigned char *src, unsigned char *dest) +{ + //2nd element of table array is ref value used to fill it in + unsigned char c = a[1]; + while (len-- > 0) + *dest++ = gf_mul(c, *src++); +} + +struct slver { + unsigned short snum; + unsigned char ver; + unsigned char core; +}; + +// Version info +struct slver gf_vect_mul_init_slver_00020035; +struct slver gf_vect_mul_init_slver = { 0x0035, 0x02, 0x00 }; + +struct slver ec_encode_data_base_slver_00010135; +struct slver ec_encode_data_base_slver = { 0x0135, 0x01, 0x00 }; + +struct slver gf_vect_mul_base_slver_00010136; +struct slver gf_vect_mul_base_slver = { 0x0136, 0x01, 0x00 }; + +struct slver gf_vect_dot_prod_base_slver_00010137; +struct slver gf_vect_dot_prod_base_slver = { 0x0137, 0x01, 0x00 }; + +struct slver gf_mul_slver_00000214; +struct slver gf_mul_slver = { 0x0214, 0x00, 0x00 }; + +struct slver gf_invert_matrix_slver_00000215; +struct slver gf_invert_matrix_slver = { 0x0215, 0x00, 0x00 }; + +struct slver gf_gen_rs_matrix_slver_00000216; +struct slver gf_gen_rs_matrix_slver = { 0x0216, 0x00, 0x00 }; + +struct slver gf_gen_cauchy1_matrix_slver_00000217; +struct slver gf_gen_cauchy1_matrix_slver = { 0x0217, 0x00, 0x00 }; diff --git a/src/spdk/isa-l/erasure_code/ec_base.h b/src/spdk/isa-l/erasure_code/ec_base.h new file mode 100644 index 000000000..d69a92d67 --- /dev/null +++ b/src/spdk/isa-l/erasure_code/ec_base.h @@ -0,0 +1,6680 @@ +/********************************************************************** + Copyright(c) 2011-2015 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#ifndef _EC_BASE_H_ +#define _EC_BASE_H_ + +// Global GF(256) tables +#ifndef GF_LARGE_TABLES +unsigned char gff_base[] = { + 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1d, 0x3a, + 0x74, 0xe8, 0xcd, 0x87, 0x13, 0x26, 0x4c, 0x98, 0x2d, 0x5a, + 0xb4, 0x75, 0xea, 0xc9, 0x8f, 0x03, 0x06, 0x0c, 0x18, 0x30, + 0x60, 0xc0, 0x9d, 0x27, 0x4e, 0x9c, 0x25, 0x4a, 0x94, 0x35, + 0x6a, 0xd4, 0xb5, 0x77, 0xee, 0xc1, 0x9f, 0x23, 0x46, 0x8c, + 0x05, 0x0a, 0x14, 0x28, 0x50, 0xa0, 0x5d, 0xba, 0x69, 0xd2, + 0xb9, 0x6f, 0xde, 0xa1, 0x5f, 0xbe, 0x61, 0xc2, 0x99, 0x2f, + 0x5e, 0xbc, 0x65, 0xca, 0x89, 0x0f, 0x1e, 0x3c, 0x78, 0xf0, + 0xfd, 0xe7, 0xd3, 0xbb, 0x6b, 0xd6, 0xb1, 0x7f, 0xfe, 0xe1, + 0xdf, 0xa3, 0x5b, 0xb6, 0x71, 0xe2, 0xd9, 0xaf, 0x43, 0x86, + 0x11, 0x22, 0x44, 0x88, 0x0d, 0x1a, 0x34, 0x68, 0xd0, 0xbd, + 0x67, 0xce, 0x81, 0x1f, 0x3e, 0x7c, 0xf8, 0xed, 0xc7, 0x93, + 0x3b, 0x76, 0xec, 0xc5, 0x97, 0x33, 0x66, 0xcc, 0x85, 0x17, + 0x2e, 0x5c, 0xb8, 0x6d, 0xda, 0xa9, 0x4f, 0x9e, 0x21, 0x42, + 0x84, 0x15, 0x2a, 0x54, 0xa8, 0x4d, 0x9a, 0x29, 0x52, 0xa4, + 0x55, 0xaa, 0x49, 0x92, 0x39, 0x72, 0xe4, 0xd5, 0xb7, 0x73, + 0xe6, 0xd1, 0xbf, 0x63, 0xc6, 0x91, 0x3f, 0x7e, 0xfc, 0xe5, + 0xd7, 0xb3, 0x7b, 0xf6, 0xf1, 0xff, 0xe3, 0xdb, 0xab, 0x4b, + 0x96, 0x31, 0x62, 0xc4, 0x95, 0x37, 0x6e, 0xdc, 0xa5, 0x57, + 0xae, 0x41, 0x82, 0x19, 0x32, 0x64, 0xc8, 0x8d, 0x07, 0x0e, + 0x1c, 0x38, 0x70, 0xe0, 0xdd, 0xa7, 0x53, 0xa6, 0x51, 0xa2, + 0x59, 0xb2, 0x79, 0xf2, 0xf9, 0xef, 0xc3, 0x9b, 0x2b, 0x56, + 0xac, 0x45, 0x8a, 0x09, 0x12, 0x24, 0x48, 0x90, 0x3d, 0x7a, + 0xf4, 0xf5, 0xf7, 0xf3, 0xfb, 0xeb, 0xcb, 0x8b, 0x0b, 0x16, + 0x2c, 0x58, 0xb0, 0x7d, 0xfa, 0xe9, 0xcf, 0x83, 0x1b, 0x36, + 0x6c, 0xd8, 0xad, 0x47, 0x8e, 0x01 +}; + +unsigned char gflog_base[] = { + 0x00, 0xff, 0x01, 0x19, 0x02, 0x32, 0x1a, 0xc6, 0x03, 0xdf, + 0x33, 0xee, 0x1b, 0x68, 0xc7, 0x4b, 0x04, 0x64, 0xe0, 0x0e, + 0x34, 0x8d, 0xef, 0x81, 0x1c, 0xc1, 0x69, 0xf8, 0xc8, 0x08, + 0x4c, 0x71, 0x05, 0x8a, 0x65, 0x2f, 0xe1, 0x24, 0x0f, 0x21, + 0x35, 0x93, 0x8e, 0xda, 0xf0, 0x12, 0x82, 0x45, 0x1d, 0xb5, + 0xc2, 0x7d, 0x6a, 0x27, 0xf9, 0xb9, 0xc9, 0x9a, 0x09, 0x78, + 0x4d, 0xe4, 0x72, 0xa6, 0x06, 0xbf, 0x8b, 0x62, 0x66, 0xdd, + 0x30, 0xfd, 0xe2, 0x98, 0x25, 0xb3, 0x10, 0x91, 0x22, 0x88, + 0x36, 0xd0, 0x94, 0xce, 0x8f, 0x96, 0xdb, 0xbd, 0xf1, 0xd2, + 0x13, 0x5c, 0x83, 0x38, 0x46, 0x40, 0x1e, 0x42, 0xb6, 0xa3, + 0xc3, 0x48, 0x7e, 0x6e, 0x6b, 0x3a, 0x28, 0x54, 0xfa, 0x85, + 0xba, 0x3d, 0xca, 0x5e, 0x9b, 0x9f, 0x0a, 0x15, 0x79, 0x2b, + 0x4e, 0xd4, 0xe5, 0xac, 0x73, 0xf3, 0xa7, 0x57, 0x07, 0x70, + 0xc0, 0xf7, 0x8c, 0x80, 0x63, 0x0d, 0x67, 0x4a, 0xde, 0xed, + 0x31, 0xc5, 0xfe, 0x18, 0xe3, 0xa5, 0x99, 0x77, 0x26, 0xb8, + 0xb4, 0x7c, 0x11, 0x44, 0x92, 0xd9, 0x23, 0x20, 0x89, 0x2e, + 0x37, 0x3f, 0xd1, 0x5b, 0x95, 0xbc, 0xcf, 0xcd, 0x90, 0x87, + 0x97, 0xb2, 0xdc, 0xfc, 0xbe, 0x61, 0xf2, 0x56, 0xd3, 0xab, + 0x14, 0x2a, 0x5d, 0x9e, 0x84, 0x3c, 0x39, 0x53, 0x47, 0x6d, + 0x41, 0xa2, 0x1f, 0x2d, 0x43, 0xd8, 0xb7, 0x7b, 0xa4, 0x76, + 0xc4, 0x17, 0x49, 0xec, 0x7f, 0x0c, 0x6f, 0xf6, 0x6c, 0xa1, + 0x3b, 0x52, 0x29, 0x9d, 0x55, 0xaa, 0xfb, 0x60, 0x86, 0xb1, + 0xbb, 0xcc, 0x3e, 0x5a, 0xcb, 0x59, 0x5f, 0xb0, 0x9c, 0xa9, + 0xa0, 0x51, 0x0b, 0xf5, 0x16, 0xeb, 0x7a, 0x75, 0x2c, 0xd7, + 0x4f, 0xae, 0xd5, 0xe9, 0xe6, 0xe7, 0xad, 0xe8, 0x74, 0xd6, + 0xf4, 0xea, 0xa8, 0x50, 0x58, 0xaf +}; +#else +unsigned char gf_mul_table_base[] = { + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x02, 0x03, + 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, + 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, + 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20, 0x21, + 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, + 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, + 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, + 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, + 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, + 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, + 0x5e, 0x5f, 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, + 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, + 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x7b, + 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, + 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, + 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, + 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, + 0xae, 0xaf, 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, + 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, + 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, + 0xcc, 0xcd, 0xce, 0xcf, 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, + 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, + 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, + 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1, 0xf2, 0xf3, + 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, + 0xfe, 0xff, 0x00, 0x02, 0x04, 0x06, 0x08, 0x0a, 0x0c, 0x0e, + 0x10, 0x12, 0x14, 0x16, 0x18, 0x1a, 0x1c, 0x1e, 0x20, 0x22, + 0x24, 0x26, 0x28, 0x2a, 0x2c, 0x2e, 0x30, 0x32, 0x34, 0x36, + 0x38, 0x3a, 0x3c, 0x3e, 0x40, 0x42, 0x44, 0x46, 0x48, 0x4a, + 0x4c, 0x4e, 0x50, 0x52, 0x54, 0x56, 0x58, 0x5a, 0x5c, 0x5e, + 0x60, 0x62, 0x64, 0x66, 0x68, 0x6a, 0x6c, 0x6e, 0x70, 0x72, + 0x74, 0x76, 0x78, 0x7a, 0x7c, 0x7e, 0x80, 0x82, 0x84, 0x86, + 0x88, 0x8a, 0x8c, 0x8e, 0x90, 0x92, 0x94, 0x96, 0x98, 0x9a, + 0x9c, 0x9e, 0xa0, 0xa2, 0xa4, 0xa6, 0xa8, 0xaa, 0xac, 0xae, + 0xb0, 0xb2, 0xb4, 0xb6, 0xb8, 0xba, 0xbc, 0xbe, 0xc0, 0xc2, + 0xc4, 0xc6, 0xc8, 0xca, 0xcc, 0xce, 0xd0, 0xd2, 0xd4, 0xd6, + 0xd8, 0xda, 0xdc, 0xde, 0xe0, 0xe2, 0xe4, 0xe6, 0xe8, 0xea, + 0xec, 0xee, 0xf0, 0xf2, 0xf4, 0xf6, 0xf8, 0xfa, 0xfc, 0xfe, + 0x1d, 0x1f, 0x19, 0x1b, 0x15, 0x17, 0x11, 0x13, 0x0d, 0x0f, + 0x09, 0x0b, 0x05, 0x07, 0x01, 0x03, 0x3d, 0x3f, 0x39, 0x3b, + 0x35, 0x37, 0x31, 0x33, 0x2d, 0x2f, 0x29, 0x2b, 0x25, 0x27, + 0x21, 0x23, 0x5d, 0x5f, 0x59, 0x5b, 0x55, 0x57, 0x51, 0x53, + 0x4d, 0x4f, 0x49, 0x4b, 0x45, 0x47, 0x41, 0x43, 0x7d, 0x7f, + 0x79, 0x7b, 0x75, 0x77, 0x71, 0x73, 0x6d, 0x6f, 0x69, 0x6b, + 0x65, 0x67, 0x61, 0x63, 0x9d, 0x9f, 0x99, 0x9b, 0x95, 0x97, + 0x91, 0x93, 0x8d, 0x8f, 0x89, 0x8b, 0x85, 0x87, 0x81, 0x83, + 0xbd, 0xbf, 0xb9, 0xbb, 0xb5, 0xb7, 0xb1, 0xb3, 0xad, 0xaf, + 0xa9, 0xab, 0xa5, 0xa7, 0xa1, 0xa3, 0xdd, 0xdf, 0xd9, 0xdb, + 0xd5, 0xd7, 0xd1, 0xd3, 0xcd, 0xcf, 0xc9, 0xcb, 0xc5, 0xc7, + 0xc1, 0xc3, 0xfd, 0xff, 0xf9, 0xfb, 0xf5, 0xf7, 0xf1, 0xf3, + 0xed, 0xef, 0xe9, 0xeb, 0xe5, 0xe7, 0xe1, 0xe3, 0x00, 0x03, + 0x06, 0x05, 0x0c, 0x0f, 0x0a, 0x09, 0x18, 0x1b, 0x1e, 0x1d, + 0x14, 0x17, 0x12, 0x11, 0x30, 0x33, 0x36, 0x35, 0x3c, 0x3f, + 0x3a, 0x39, 0x28, 0x2b, 0x2e, 0x2d, 0x24, 0x27, 0x22, 0x21, + 0x60, 0x63, 0x66, 0x65, 0x6c, 0x6f, 0x6a, 0x69, 0x78, 0x7b, + 0x7e, 0x7d, 0x74, 0x77, 0x72, 0x71, 0x50, 0x53, 0x56, 0x55, + 0x5c, 0x5f, 0x5a, 0x59, 0x48, 0x4b, 0x4e, 0x4d, 0x44, 0x47, + 0x42, 0x41, 0xc0, 0xc3, 0xc6, 0xc5, 0xcc, 0xcf, 0xca, 0xc9, + 0xd8, 0xdb, 0xde, 0xdd, 0xd4, 0xd7, 0xd2, 0xd1, 0xf0, 0xf3, + 0xf6, 0xf5, 0xfc, 0xff, 0xfa, 0xf9, 0xe8, 0xeb, 0xee, 0xed, + 0xe4, 0xe7, 0xe2, 0xe1, 0xa0, 0xa3, 0xa6, 0xa5, 0xac, 0xaf, + 0xaa, 0xa9, 0xb8, 0xbb, 0xbe, 0xbd, 0xb4, 0xb7, 0xb2, 0xb1, + 0x90, 0x93, 0x96, 0x95, 0x9c, 0x9f, 0x9a, 0x99, 0x88, 0x8b, + 0x8e, 0x8d, 0x84, 0x87, 0x82, 0x81, 0x9d, 0x9e, 0x9b, 0x98, + 0x91, 0x92, 0x97, 0x94, 0x85, 0x86, 0x83, 0x80, 0x89, 0x8a, + 0x8f, 0x8c, 0xad, 0xae, 0xab, 0xa8, 0xa1, 0xa2, 0xa7, 0xa4, + 0xb5, 0xb6, 0xb3, 0xb0, 0xb9, 0xba, 0xbf, 0xbc, 0xfd, 0xfe, + 0xfb, 0xf8, 0xf1, 0xf2, 0xf7, 0xf4, 0xe5, 0xe6, 0xe3, 0xe0, + 0xe9, 0xea, 0xef, 0xec, 0xcd, 0xce, 0xcb, 0xc8, 0xc1, 0xc2, + 0xc7, 0xc4, 0xd5, 0xd6, 0xd3, 0xd0, 0xd9, 0xda, 0xdf, 0xdc, + 0x5d, 0x5e, 0x5b, 0x58, 0x51, 0x52, 0x57, 0x54, 0x45, 0x46, + 0x43, 0x40, 0x49, 0x4a, 0x4f, 0x4c, 0x6d, 0x6e, 0x6b, 0x68, + 0x61, 0x62, 0x67, 0x64, 0x75, 0x76, 0x73, 0x70, 0x79, 0x7a, + 0x7f, 0x7c, 0x3d, 0x3e, 0x3b, 0x38, 0x31, 0x32, 0x37, 0x34, + 0x25, 0x26, 0x23, 0x20, 0x29, 0x2a, 0x2f, 0x2c, 0x0d, 0x0e, + 0x0b, 0x08, 0x01, 0x02, 0x07, 0x04, 0x15, 0x16, 0x13, 0x10, + 0x19, 0x1a, 0x1f, 0x1c, 0x00, 0x04, 0x08, 0x0c, 0x10, 0x14, + 0x18, 0x1c, 0x20, 0x24, 0x28, 0x2c, 0x30, 0x34, 0x38, 0x3c, + 0x40, 0x44, 0x48, 0x4c, 0x50, 0x54, 0x58, 0x5c, 0x60, 0x64, + 0x68, 0x6c, 0x70, 0x74, 0x78, 0x7c, 0x80, 0x84, 0x88, 0x8c, + 0x90, 0x94, 0x98, 0x9c, 0xa0, 0xa4, 0xa8, 0xac, 0xb0, 0xb4, + 0xb8, 0xbc, 0xc0, 0xc4, 0xc8, 0xcc, 0xd0, 0xd4, 0xd8, 0xdc, + 0xe0, 0xe4, 0xe8, 0xec, 0xf0, 0xf4, 0xf8, 0xfc, 0x1d, 0x19, + 0x15, 0x11, 0x0d, 0x09, 0x05, 0x01, 0x3d, 0x39, 0x35, 0x31, + 0x2d, 0x29, 0x25, 0x21, 0x5d, 0x59, 0x55, 0x51, 0x4d, 0x49, + 0x45, 0x41, 0x7d, 0x79, 0x75, 0x71, 0x6d, 0x69, 0x65, 0x61, + 0x9d, 0x99, 0x95, 0x91, 0x8d, 0x89, 0x85, 0x81, 0xbd, 0xb9, + 0xb5, 0xb1, 0xad, 0xa9, 0xa5, 0xa1, 0xdd, 0xd9, 0xd5, 0xd1, + 0xcd, 0xc9, 0xc5, 0xc1, 0xfd, 0xf9, 0xf5, 0xf1, 0xed, 0xe9, + 0xe5, 0xe1, 0x3a, 0x3e, 0x32, 0x36, 0x2a, 0x2e, 0x22, 0x26, + 0x1a, 0x1e, 0x12, 0x16, 0x0a, 0x0e, 0x02, 0x06, 0x7a, 0x7e, + 0x72, 0x76, 0x6a, 0x6e, 0x62, 0x66, 0x5a, 0x5e, 0x52, 0x56, + 0x4a, 0x4e, 0x42, 0x46, 0xba, 0xbe, 0xb2, 0xb6, 0xaa, 0xae, + 0xa2, 0xa6, 0x9a, 0x9e, 0x92, 0x96, 0x8a, 0x8e, 0x82, 0x86, + 0xfa, 0xfe, 0xf2, 0xf6, 0xea, 0xee, 0xe2, 0xe6, 0xda, 0xde, + 0xd2, 0xd6, 0xca, 0xce, 0xc2, 0xc6, 0x27, 0x23, 0x2f, 0x2b, + 0x37, 0x33, 0x3f, 0x3b, 0x07, 0x03, 0x0f, 0x0b, 0x17, 0x13, + 0x1f, 0x1b, 0x67, 0x63, 0x6f, 0x6b, 0x77, 0x73, 0x7f, 0x7b, + 0x47, 0x43, 0x4f, 0x4b, 0x57, 0x53, 0x5f, 0x5b, 0xa7, 0xa3, + 0xaf, 0xab, 0xb7, 0xb3, 0xbf, 0xbb, 0x87, 0x83, 0x8f, 0x8b, + 0x97, 0x93, 0x9f, 0x9b, 0xe7, 0xe3, 0xef, 0xeb, 0xf7, 0xf3, + 0xff, 0xfb, 0xc7, 0xc3, 0xcf, 0xcb, 0xd7, 0xd3, 0xdf, 0xdb, + 0x00, 0x05, 0x0a, 0x0f, 0x14, 0x11, 0x1e, 0x1b, 0x28, 0x2d, + 0x22, 0x27, 0x3c, 0x39, 0x36, 0x33, 0x50, 0x55, 0x5a, 0x5f, + 0x44, 0x41, 0x4e, 0x4b, 0x78, 0x7d, 0x72, 0x77, 0x6c, 0x69, + 0x66, 0x63, 0xa0, 0xa5, 0xaa, 0xaf, 0xb4, 0xb1, 0xbe, 0xbb, + 0x88, 0x8d, 0x82, 0x87, 0x9c, 0x99, 0x96, 0x93, 0xf0, 0xf5, + 0xfa, 0xff, 0xe4, 0xe1, 0xee, 0xeb, 0xd8, 0xdd, 0xd2, 0xd7, + 0xcc, 0xc9, 0xc6, 0xc3, 0x5d, 0x58, 0x57, 0x52, 0x49, 0x4c, + 0x43, 0x46, 0x75, 0x70, 0x7f, 0x7a, 0x61, 0x64, 0x6b, 0x6e, + 0x0d, 0x08, 0x07, 0x02, 0x19, 0x1c, 0x13, 0x16, 0x25, 0x20, + 0x2f, 0x2a, 0x31, 0x34, 0x3b, 0x3e, 0xfd, 0xf8, 0xf7, 0xf2, + 0xe9, 0xec, 0xe3, 0xe6, 0xd5, 0xd0, 0xdf, 0xda, 0xc1, 0xc4, + 0xcb, 0xce, 0xad, 0xa8, 0xa7, 0xa2, 0xb9, 0xbc, 0xb3, 0xb6, + 0x85, 0x80, 0x8f, 0x8a, 0x91, 0x94, 0x9b, 0x9e, 0xba, 0xbf, + 0xb0, 0xb5, 0xae, 0xab, 0xa4, 0xa1, 0x92, 0x97, 0x98, 0x9d, + 0x86, 0x83, 0x8c, 0x89, 0xea, 0xef, 0xe0, 0xe5, 0xfe, 0xfb, + 0xf4, 0xf1, 0xc2, 0xc7, 0xc8, 0xcd, 0xd6, 0xd3, 0xdc, 0xd9, + 0x1a, 0x1f, 0x10, 0x15, 0x0e, 0x0b, 0x04, 0x01, 0x32, 0x37, + 0x38, 0x3d, 0x26, 0x23, 0x2c, 0x29, 0x4a, 0x4f, 0x40, 0x45, + 0x5e, 0x5b, 0x54, 0x51, 0x62, 0x67, 0x68, 0x6d, 0x76, 0x73, + 0x7c, 0x79, 0xe7, 0xe2, 0xed, 0xe8, 0xf3, 0xf6, 0xf9, 0xfc, + 0xcf, 0xca, 0xc5, 0xc0, 0xdb, 0xde, 0xd1, 0xd4, 0xb7, 0xb2, + 0xbd, 0xb8, 0xa3, 0xa6, 0xa9, 0xac, 0x9f, 0x9a, 0x95, 0x90, + 0x8b, 0x8e, 0x81, 0x84, 0x47, 0x42, 0x4d, 0x48, 0x53, 0x56, + 0x59, 0x5c, 0x6f, 0x6a, 0x65, 0x60, 0x7b, 0x7e, 0x71, 0x74, + 0x17, 0x12, 0x1d, 0x18, 0x03, 0x06, 0x09, 0x0c, 0x3f, 0x3a, + 0x35, 0x30, 0x2b, 0x2e, 0x21, 0x24, 0x00, 0x06, 0x0c, 0x0a, + 0x18, 0x1e, 0x14, 0x12, 0x30, 0x36, 0x3c, 0x3a, 0x28, 0x2e, + 0x24, 0x22, 0x60, 0x66, 0x6c, 0x6a, 0x78, 0x7e, 0x74, 0x72, + 0x50, 0x56, 0x5c, 0x5a, 0x48, 0x4e, 0x44, 0x42, 0xc0, 0xc6, + 0xcc, 0xca, 0xd8, 0xde, 0xd4, 0xd2, 0xf0, 0xf6, 0xfc, 0xfa, + 0xe8, 0xee, 0xe4, 0xe2, 0xa0, 0xa6, 0xac, 0xaa, 0xb8, 0xbe, + 0xb4, 0xb2, 0x90, 0x96, 0x9c, 0x9a, 0x88, 0x8e, 0x84, 0x82, + 0x9d, 0x9b, 0x91, 0x97, 0x85, 0x83, 0x89, 0x8f, 0xad, 0xab, + 0xa1, 0xa7, 0xb5, 0xb3, 0xb9, 0xbf, 0xfd, 0xfb, 0xf1, 0xf7, + 0xe5, 0xe3, 0xe9, 0xef, 0xcd, 0xcb, 0xc1, 0xc7, 0xd5, 0xd3, + 0xd9, 0xdf, 0x5d, 0x5b, 0x51, 0x57, 0x45, 0x43, 0x49, 0x4f, + 0x6d, 0x6b, 0x61, 0x67, 0x75, 0x73, 0x79, 0x7f, 0x3d, 0x3b, + 0x31, 0x37, 0x25, 0x23, 0x29, 0x2f, 0x0d, 0x0b, 0x01, 0x07, + 0x15, 0x13, 0x19, 0x1f, 0x27, 0x21, 0x2b, 0x2d, 0x3f, 0x39, + 0x33, 0x35, 0x17, 0x11, 0x1b, 0x1d, 0x0f, 0x09, 0x03, 0x05, + 0x47, 0x41, 0x4b, 0x4d, 0x5f, 0x59, 0x53, 0x55, 0x77, 0x71, + 0x7b, 0x7d, 0x6f, 0x69, 0x63, 0x65, 0xe7, 0xe1, 0xeb, 0xed, + 0xff, 0xf9, 0xf3, 0xf5, 0xd7, 0xd1, 0xdb, 0xdd, 0xcf, 0xc9, + 0xc3, 0xc5, 0x87, 0x81, 0x8b, 0x8d, 0x9f, 0x99, 0x93, 0x95, + 0xb7, 0xb1, 0xbb, 0xbd, 0xaf, 0xa9, 0xa3, 0xa5, 0xba, 0xbc, + 0xb6, 0xb0, 0xa2, 0xa4, 0xae, 0xa8, 0x8a, 0x8c, 0x86, 0x80, + 0x92, 0x94, 0x9e, 0x98, 0xda, 0xdc, 0xd6, 0xd0, 0xc2, 0xc4, + 0xce, 0xc8, 0xea, 0xec, 0xe6, 0xe0, 0xf2, 0xf4, 0xfe, 0xf8, + 0x7a, 0x7c, 0x76, 0x70, 0x62, 0x64, 0x6e, 0x68, 0x4a, 0x4c, + 0x46, 0x40, 0x52, 0x54, 0x5e, 0x58, 0x1a, 0x1c, 0x16, 0x10, + 0x02, 0x04, 0x0e, 0x08, 0x2a, 0x2c, 0x26, 0x20, 0x32, 0x34, + 0x3e, 0x38, 0x00, 0x07, 0x0e, 0x09, 0x1c, 0x1b, 0x12, 0x15, + 0x38, 0x3f, 0x36, 0x31, 0x24, 0x23, 0x2a, 0x2d, 0x70, 0x77, + 0x7e, 0x79, 0x6c, 0x6b, 0x62, 0x65, 0x48, 0x4f, 0x46, 0x41, + 0x54, 0x53, 0x5a, 0x5d, 0xe0, 0xe7, 0xee, 0xe9, 0xfc, 0xfb, + 0xf2, 0xf5, 0xd8, 0xdf, 0xd6, 0xd1, 0xc4, 0xc3, 0xca, 0xcd, + 0x90, 0x97, 0x9e, 0x99, 0x8c, 0x8b, 0x82, 0x85, 0xa8, 0xaf, + 0xa6, 0xa1, 0xb4, 0xb3, 0xba, 0xbd, 0xdd, 0xda, 0xd3, 0xd4, + 0xc1, 0xc6, 0xcf, 0xc8, 0xe5, 0xe2, 0xeb, 0xec, 0xf9, 0xfe, + 0xf7, 0xf0, 0xad, 0xaa, 0xa3, 0xa4, 0xb1, 0xb6, 0xbf, 0xb8, + 0x95, 0x92, 0x9b, 0x9c, 0x89, 0x8e, 0x87, 0x80, 0x3d, 0x3a, + 0x33, 0x34, 0x21, 0x26, 0x2f, 0x28, 0x05, 0x02, 0x0b, 0x0c, + 0x19, 0x1e, 0x17, 0x10, 0x4d, 0x4a, 0x43, 0x44, 0x51, 0x56, + 0x5f, 0x58, 0x75, 0x72, 0x7b, 0x7c, 0x69, 0x6e, 0x67, 0x60, + 0xa7, 0xa0, 0xa9, 0xae, 0xbb, 0xbc, 0xb5, 0xb2, 0x9f, 0x98, + 0x91, 0x96, 0x83, 0x84, 0x8d, 0x8a, 0xd7, 0xd0, 0xd9, 0xde, + 0xcb, 0xcc, 0xc5, 0xc2, 0xef, 0xe8, 0xe1, 0xe6, 0xf3, 0xf4, + 0xfd, 0xfa, 0x47, 0x40, 0x49, 0x4e, 0x5b, 0x5c, 0x55, 0x52, + 0x7f, 0x78, 0x71, 0x76, 0x63, 0x64, 0x6d, 0x6a, 0x37, 0x30, + 0x39, 0x3e, 0x2b, 0x2c, 0x25, 0x22, 0x0f, 0x08, 0x01, 0x06, + 0x13, 0x14, 0x1d, 0x1a, 0x7a, 0x7d, 0x74, 0x73, 0x66, 0x61, + 0x68, 0x6f, 0x42, 0x45, 0x4c, 0x4b, 0x5e, 0x59, 0x50, 0x57, + 0x0a, 0x0d, 0x04, 0x03, 0x16, 0x11, 0x18, 0x1f, 0x32, 0x35, + 0x3c, 0x3b, 0x2e, 0x29, 0x20, 0x27, 0x9a, 0x9d, 0x94, 0x93, + 0x86, 0x81, 0x88, 0x8f, 0xa2, 0xa5, 0xac, 0xab, 0xbe, 0xb9, + 0xb0, 0xb7, 0xea, 0xed, 0xe4, 0xe3, 0xf6, 0xf1, 0xf8, 0xff, + 0xd2, 0xd5, 0xdc, 0xdb, 0xce, 0xc9, 0xc0, 0xc7, 0x00, 0x08, + 0x10, 0x18, 0x20, 0x28, 0x30, 0x38, 0x40, 0x48, 0x50, 0x58, + 0x60, 0x68, 0x70, 0x78, 0x80, 0x88, 0x90, 0x98, 0xa0, 0xa8, + 0xb0, 0xb8, 0xc0, 0xc8, 0xd0, 0xd8, 0xe0, 0xe8, 0xf0, 0xf8, + 0x1d, 0x15, 0x0d, 0x05, 0x3d, 0x35, 0x2d, 0x25, 0x5d, 0x55, + 0x4d, 0x45, 0x7d, 0x75, 0x6d, 0x65, 0x9d, 0x95, 0x8d, 0x85, + 0xbd, 0xb5, 0xad, 0xa5, 0xdd, 0xd5, 0xcd, 0xc5, 0xfd, 0xf5, + 0xed, 0xe5, 0x3a, 0x32, 0x2a, 0x22, 0x1a, 0x12, 0x0a, 0x02, + 0x7a, 0x72, 0x6a, 0x62, 0x5a, 0x52, 0x4a, 0x42, 0xba, 0xb2, + 0xaa, 0xa2, 0x9a, 0x92, 0x8a, 0x82, 0xfa, 0xf2, 0xea, 0xe2, + 0xda, 0xd2, 0xca, 0xc2, 0x27, 0x2f, 0x37, 0x3f, 0x07, 0x0f, + 0x17, 0x1f, 0x67, 0x6f, 0x77, 0x7f, 0x47, 0x4f, 0x57, 0x5f, + 0xa7, 0xaf, 0xb7, 0xbf, 0x87, 0x8f, 0x97, 0x9f, 0xe7, 0xef, + 0xf7, 0xff, 0xc7, 0xcf, 0xd7, 0xdf, 0x74, 0x7c, 0x64, 0x6c, + 0x54, 0x5c, 0x44, 0x4c, 0x34, 0x3c, 0x24, 0x2c, 0x14, 0x1c, + 0x04, 0x0c, 0xf4, 0xfc, 0xe4, 0xec, 0xd4, 0xdc, 0xc4, 0xcc, + 0xb4, 0xbc, 0xa4, 0xac, 0x94, 0x9c, 0x84, 0x8c, 0x69, 0x61, + 0x79, 0x71, 0x49, 0x41, 0x59, 0x51, 0x29, 0x21, 0x39, 0x31, + 0x09, 0x01, 0x19, 0x11, 0xe9, 0xe1, 0xf9, 0xf1, 0xc9, 0xc1, + 0xd9, 0xd1, 0xa9, 0xa1, 0xb9, 0xb1, 0x89, 0x81, 0x99, 0x91, + 0x4e, 0x46, 0x5e, 0x56, 0x6e, 0x66, 0x7e, 0x76, 0x0e, 0x06, + 0x1e, 0x16, 0x2e, 0x26, 0x3e, 0x36, 0xce, 0xc6, 0xde, 0xd6, + 0xee, 0xe6, 0xfe, 0xf6, 0x8e, 0x86, 0x9e, 0x96, 0xae, 0xa6, + 0xbe, 0xb6, 0x53, 0x5b, 0x43, 0x4b, 0x73, 0x7b, 0x63, 0x6b, + 0x13, 0x1b, 0x03, 0x0b, 0x33, 0x3b, 0x23, 0x2b, 0xd3, 0xdb, + 0xc3, 0xcb, 0xf3, 0xfb, 0xe3, 0xeb, 0x93, 0x9b, 0x83, 0x8b, + 0xb3, 0xbb, 0xa3, 0xab, 0x00, 0x09, 0x12, 0x1b, 0x24, 0x2d, + 0x36, 0x3f, 0x48, 0x41, 0x5a, 0x53, 0x6c, 0x65, 0x7e, 0x77, + 0x90, 0x99, 0x82, 0x8b, 0xb4, 0xbd, 0xa6, 0xaf, 0xd8, 0xd1, + 0xca, 0xc3, 0xfc, 0xf5, 0xee, 0xe7, 0x3d, 0x34, 0x2f, 0x26, + 0x19, 0x10, 0x0b, 0x02, 0x75, 0x7c, 0x67, 0x6e, 0x51, 0x58, + 0x43, 0x4a, 0xad, 0xa4, 0xbf, 0xb6, 0x89, 0x80, 0x9b, 0x92, + 0xe5, 0xec, 0xf7, 0xfe, 0xc1, 0xc8, 0xd3, 0xda, 0x7a, 0x73, + 0x68, 0x61, 0x5e, 0x57, 0x4c, 0x45, 0x32, 0x3b, 0x20, 0x29, + 0x16, 0x1f, 0x04, 0x0d, 0xea, 0xe3, 0xf8, 0xf1, 0xce, 0xc7, + 0xdc, 0xd5, 0xa2, 0xab, 0xb0, 0xb9, 0x86, 0x8f, 0x94, 0x9d, + 0x47, 0x4e, 0x55, 0x5c, 0x63, 0x6a, 0x71, 0x78, 0x0f, 0x06, + 0x1d, 0x14, 0x2b, 0x22, 0x39, 0x30, 0xd7, 0xde, 0xc5, 0xcc, + 0xf3, 0xfa, 0xe1, 0xe8, 0x9f, 0x96, 0x8d, 0x84, 0xbb, 0xb2, + 0xa9, 0xa0, 0xf4, 0xfd, 0xe6, 0xef, 0xd0, 0xd9, 0xc2, 0xcb, + 0xbc, 0xb5, 0xae, 0xa7, 0x98, 0x91, 0x8a, 0x83, 0x64, 0x6d, + 0x76, 0x7f, 0x40, 0x49, 0x52, 0x5b, 0x2c, 0x25, 0x3e, 0x37, + 0x08, 0x01, 0x1a, 0x13, 0xc9, 0xc0, 0xdb, 0xd2, 0xed, 0xe4, + 0xff, 0xf6, 0x81, 0x88, 0x93, 0x9a, 0xa5, 0xac, 0xb7, 0xbe, + 0x59, 0x50, 0x4b, 0x42, 0x7d, 0x74, 0x6f, 0x66, 0x11, 0x18, + 0x03, 0x0a, 0x35, 0x3c, 0x27, 0x2e, 0x8e, 0x87, 0x9c, 0x95, + 0xaa, 0xa3, 0xb8, 0xb1, 0xc6, 0xcf, 0xd4, 0xdd, 0xe2, 0xeb, + 0xf0, 0xf9, 0x1e, 0x17, 0x0c, 0x05, 0x3a, 0x33, 0x28, 0x21, + 0x56, 0x5f, 0x44, 0x4d, 0x72, 0x7b, 0x60, 0x69, 0xb3, 0xba, + 0xa1, 0xa8, 0x97, 0x9e, 0x85, 0x8c, 0xfb, 0xf2, 0xe9, 0xe0, + 0xdf, 0xd6, 0xcd, 0xc4, 0x23, 0x2a, 0x31, 0x38, 0x07, 0x0e, + 0x15, 0x1c, 0x6b, 0x62, 0x79, 0x70, 0x4f, 0x46, 0x5d, 0x54, + 0x00, 0x0a, 0x14, 0x1e, 0x28, 0x22, 0x3c, 0x36, 0x50, 0x5a, + 0x44, 0x4e, 0x78, 0x72, 0x6c, 0x66, 0xa0, 0xaa, 0xb4, 0xbe, + 0x88, 0x82, 0x9c, 0x96, 0xf0, 0xfa, 0xe4, 0xee, 0xd8, 0xd2, + 0xcc, 0xc6, 0x5d, 0x57, 0x49, 0x43, 0x75, 0x7f, 0x61, 0x6b, + 0x0d, 0x07, 0x19, 0x13, 0x25, 0x2f, 0x31, 0x3b, 0xfd, 0xf7, + 0xe9, 0xe3, 0xd5, 0xdf, 0xc1, 0xcb, 0xad, 0xa7, 0xb9, 0xb3, + 0x85, 0x8f, 0x91, 0x9b, 0xba, 0xb0, 0xae, 0xa4, 0x92, 0x98, + 0x86, 0x8c, 0xea, 0xe0, 0xfe, 0xf4, 0xc2, 0xc8, 0xd6, 0xdc, + 0x1a, 0x10, 0x0e, 0x04, 0x32, 0x38, 0x26, 0x2c, 0x4a, 0x40, + 0x5e, 0x54, 0x62, 0x68, 0x76, 0x7c, 0xe7, 0xed, 0xf3, 0xf9, + 0xcf, 0xc5, 0xdb, 0xd1, 0xb7, 0xbd, 0xa3, 0xa9, 0x9f, 0x95, + 0x8b, 0x81, 0x47, 0x4d, 0x53, 0x59, 0x6f, 0x65, 0x7b, 0x71, + 0x17, 0x1d, 0x03, 0x09, 0x3f, 0x35, 0x2b, 0x21, 0x69, 0x63, + 0x7d, 0x77, 0x41, 0x4b, 0x55, 0x5f, 0x39, 0x33, 0x2d, 0x27, + 0x11, 0x1b, 0x05, 0x0f, 0xc9, 0xc3, 0xdd, 0xd7, 0xe1, 0xeb, + 0xf5, 0xff, 0x99, 0x93, 0x8d, 0x87, 0xb1, 0xbb, 0xa5, 0xaf, + 0x34, 0x3e, 0x20, 0x2a, 0x1c, 0x16, 0x08, 0x02, 0x64, 0x6e, + 0x70, 0x7a, 0x4c, 0x46, 0x58, 0x52, 0x94, 0x9e, 0x80, 0x8a, + 0xbc, 0xb6, 0xa8, 0xa2, 0xc4, 0xce, 0xd0, 0xda, 0xec, 0xe6, + 0xf8, 0xf2, 0xd3, 0xd9, 0xc7, 0xcd, 0xfb, 0xf1, 0xef, 0xe5, + 0x83, 0x89, 0x97, 0x9d, 0xab, 0xa1, 0xbf, 0xb5, 0x73, 0x79, + 0x67, 0x6d, 0x5b, 0x51, 0x4f, 0x45, 0x23, 0x29, 0x37, 0x3d, + 0x0b, 0x01, 0x1f, 0x15, 0x8e, 0x84, 0x9a, 0x90, 0xa6, 0xac, + 0xb2, 0xb8, 0xde, 0xd4, 0xca, 0xc0, 0xf6, 0xfc, 0xe2, 0xe8, + 0x2e, 0x24, 0x3a, 0x30, 0x06, 0x0c, 0x12, 0x18, 0x7e, 0x74, + 0x6a, 0x60, 0x56, 0x5c, 0x42, 0x48, 0x00, 0x0b, 0x16, 0x1d, + 0x2c, 0x27, 0x3a, 0x31, 0x58, 0x53, 0x4e, 0x45, 0x74, 0x7f, + 0x62, 0x69, 0xb0, 0xbb, 0xa6, 0xad, 0x9c, 0x97, 0x8a, 0x81, + 0xe8, 0xe3, 0xfe, 0xf5, 0xc4, 0xcf, 0xd2, 0xd9, 0x7d, 0x76, + 0x6b, 0x60, 0x51, 0x5a, 0x47, 0x4c, 0x25, 0x2e, 0x33, 0x38, + 0x09, 0x02, 0x1f, 0x14, 0xcd, 0xc6, 0xdb, 0xd0, 0xe1, 0xea, + 0xf7, 0xfc, 0x95, 0x9e, 0x83, 0x88, 0xb9, 0xb2, 0xaf, 0xa4, + 0xfa, 0xf1, 0xec, 0xe7, 0xd6, 0xdd, 0xc0, 0xcb, 0xa2, 0xa9, + 0xb4, 0xbf, 0x8e, 0x85, 0x98, 0x93, 0x4a, 0x41, 0x5c, 0x57, + 0x66, 0x6d, 0x70, 0x7b, 0x12, 0x19, 0x04, 0x0f, 0x3e, 0x35, + 0x28, 0x23, 0x87, 0x8c, 0x91, 0x9a, 0xab, 0xa0, 0xbd, 0xb6, + 0xdf, 0xd4, 0xc9, 0xc2, 0xf3, 0xf8, 0xe5, 0xee, 0x37, 0x3c, + 0x21, 0x2a, 0x1b, 0x10, 0x0d, 0x06, 0x6f, 0x64, 0x79, 0x72, + 0x43, 0x48, 0x55, 0x5e, 0xe9, 0xe2, 0xff, 0xf4, 0xc5, 0xce, + 0xd3, 0xd8, 0xb1, 0xba, 0xa7, 0xac, 0x9d, 0x96, 0x8b, 0x80, + 0x59, 0x52, 0x4f, 0x44, 0x75, 0x7e, 0x63, 0x68, 0x01, 0x0a, + 0x17, 0x1c, 0x2d, 0x26, 0x3b, 0x30, 0x94, 0x9f, 0x82, 0x89, + 0xb8, 0xb3, 0xae, 0xa5, 0xcc, 0xc7, 0xda, 0xd1, 0xe0, 0xeb, + 0xf6, 0xfd, 0x24, 0x2f, 0x32, 0x39, 0x08, 0x03, 0x1e, 0x15, + 0x7c, 0x77, 0x6a, 0x61, 0x50, 0x5b, 0x46, 0x4d, 0x13, 0x18, + 0x05, 0x0e, 0x3f, 0x34, 0x29, 0x22, 0x4b, 0x40, 0x5d, 0x56, + 0x67, 0x6c, 0x71, 0x7a, 0xa3, 0xa8, 0xb5, 0xbe, 0x8f, 0x84, + 0x99, 0x92, 0xfb, 0xf0, 0xed, 0xe6, 0xd7, 0xdc, 0xc1, 0xca, + 0x6e, 0x65, 0x78, 0x73, 0x42, 0x49, 0x54, 0x5f, 0x36, 0x3d, + 0x20, 0x2b, 0x1a, 0x11, 0x0c, 0x07, 0xde, 0xd5, 0xc8, 0xc3, + 0xf2, 0xf9, 0xe4, 0xef, 0x86, 0x8d, 0x90, 0x9b, 0xaa, 0xa1, + 0xbc, 0xb7, 0x00, 0x0c, 0x18, 0x14, 0x30, 0x3c, 0x28, 0x24, + 0x60, 0x6c, 0x78, 0x74, 0x50, 0x5c, 0x48, 0x44, 0xc0, 0xcc, + 0xd8, 0xd4, 0xf0, 0xfc, 0xe8, 0xe4, 0xa0, 0xac, 0xb8, 0xb4, + 0x90, 0x9c, 0x88, 0x84, 0x9d, 0x91, 0x85, 0x89, 0xad, 0xa1, + 0xb5, 0xb9, 0xfd, 0xf1, 0xe5, 0xe9, 0xcd, 0xc1, 0xd5, 0xd9, + 0x5d, 0x51, 0x45, 0x49, 0x6d, 0x61, 0x75, 0x79, 0x3d, 0x31, + 0x25, 0x29, 0x0d, 0x01, 0x15, 0x19, 0x27, 0x2b, 0x3f, 0x33, + 0x17, 0x1b, 0x0f, 0x03, 0x47, 0x4b, 0x5f, 0x53, 0x77, 0x7b, + 0x6f, 0x63, 0xe7, 0xeb, 0xff, 0xf3, 0xd7, 0xdb, 0xcf, 0xc3, + 0x87, 0x8b, 0x9f, 0x93, 0xb7, 0xbb, 0xaf, 0xa3, 0xba, 0xb6, + 0xa2, 0xae, 0x8a, 0x86, 0x92, 0x9e, 0xda, 0xd6, 0xc2, 0xce, + 0xea, 0xe6, 0xf2, 0xfe, 0x7a, 0x76, 0x62, 0x6e, 0x4a, 0x46, + 0x52, 0x5e, 0x1a, 0x16, 0x02, 0x0e, 0x2a, 0x26, 0x32, 0x3e, + 0x4e, 0x42, 0x56, 0x5a, 0x7e, 0x72, 0x66, 0x6a, 0x2e, 0x22, + 0x36, 0x3a, 0x1e, 0x12, 0x06, 0x0a, 0x8e, 0x82, 0x96, 0x9a, + 0xbe, 0xb2, 0xa6, 0xaa, 0xee, 0xe2, 0xf6, 0xfa, 0xde, 0xd2, + 0xc6, 0xca, 0xd3, 0xdf, 0xcb, 0xc7, 0xe3, 0xef, 0xfb, 0xf7, + 0xb3, 0xbf, 0xab, 0xa7, 0x83, 0x8f, 0x9b, 0x97, 0x13, 0x1f, + 0x0b, 0x07, 0x23, 0x2f, 0x3b, 0x37, 0x73, 0x7f, 0x6b, 0x67, + 0x43, 0x4f, 0x5b, 0x57, 0x69, 0x65, 0x71, 0x7d, 0x59, 0x55, + 0x41, 0x4d, 0x09, 0x05, 0x11, 0x1d, 0x39, 0x35, 0x21, 0x2d, + 0xa9, 0xa5, 0xb1, 0xbd, 0x99, 0x95, 0x81, 0x8d, 0xc9, 0xc5, + 0xd1, 0xdd, 0xf9, 0xf5, 0xe1, 0xed, 0xf4, 0xf8, 0xec, 0xe0, + 0xc4, 0xc8, 0xdc, 0xd0, 0x94, 0x98, 0x8c, 0x80, 0xa4, 0xa8, + 0xbc, 0xb0, 0x34, 0x38, 0x2c, 0x20, 0x04, 0x08, 0x1c, 0x10, + 0x54, 0x58, 0x4c, 0x40, 0x64, 0x68, 0x7c, 0x70, 0x00, 0x0d, + 0x1a, 0x17, 0x34, 0x39, 0x2e, 0x23, 0x68, 0x65, 0x72, 0x7f, + 0x5c, 0x51, 0x46, 0x4b, 0xd0, 0xdd, 0xca, 0xc7, 0xe4, 0xe9, + 0xfe, 0xf3, 0xb8, 0xb5, 0xa2, 0xaf, 0x8c, 0x81, 0x96, 0x9b, + 0xbd, 0xb0, 0xa7, 0xaa, 0x89, 0x84, 0x93, 0x9e, 0xd5, 0xd8, + 0xcf, 0xc2, 0xe1, 0xec, 0xfb, 0xf6, 0x6d, 0x60, 0x77, 0x7a, + 0x59, 0x54, 0x43, 0x4e, 0x05, 0x08, 0x1f, 0x12, 0x31, 0x3c, + 0x2b, 0x26, 0x67, 0x6a, 0x7d, 0x70, 0x53, 0x5e, 0x49, 0x44, + 0x0f, 0x02, 0x15, 0x18, 0x3b, 0x36, 0x21, 0x2c, 0xb7, 0xba, + 0xad, 0xa0, 0x83, 0x8e, 0x99, 0x94, 0xdf, 0xd2, 0xc5, 0xc8, + 0xeb, 0xe6, 0xf1, 0xfc, 0xda, 0xd7, 0xc0, 0xcd, 0xee, 0xe3, + 0xf4, 0xf9, 0xb2, 0xbf, 0xa8, 0xa5, 0x86, 0x8b, 0x9c, 0x91, + 0x0a, 0x07, 0x10, 0x1d, 0x3e, 0x33, 0x24, 0x29, 0x62, 0x6f, + 0x78, 0x75, 0x56, 0x5b, 0x4c, 0x41, 0xce, 0xc3, 0xd4, 0xd9, + 0xfa, 0xf7, 0xe0, 0xed, 0xa6, 0xab, 0xbc, 0xb1, 0x92, 0x9f, + 0x88, 0x85, 0x1e, 0x13, 0x04, 0x09, 0x2a, 0x27, 0x30, 0x3d, + 0x76, 0x7b, 0x6c, 0x61, 0x42, 0x4f, 0x58, 0x55, 0x73, 0x7e, + 0x69, 0x64, 0x47, 0x4a, 0x5d, 0x50, 0x1b, 0x16, 0x01, 0x0c, + 0x2f, 0x22, 0x35, 0x38, 0xa3, 0xae, 0xb9, 0xb4, 0x97, 0x9a, + 0x8d, 0x80, 0xcb, 0xc6, 0xd1, 0xdc, 0xff, 0xf2, 0xe5, 0xe8, + 0xa9, 0xa4, 0xb3, 0xbe, 0x9d, 0x90, 0x87, 0x8a, 0xc1, 0xcc, + 0xdb, 0xd6, 0xf5, 0xf8, 0xef, 0xe2, 0x79, 0x74, 0x63, 0x6e, + 0x4d, 0x40, 0x57, 0x5a, 0x11, 0x1c, 0x0b, 0x06, 0x25, 0x28, + 0x3f, 0x32, 0x14, 0x19, 0x0e, 0x03, 0x20, 0x2d, 0x3a, 0x37, + 0x7c, 0x71, 0x66, 0x6b, 0x48, 0x45, 0x52, 0x5f, 0xc4, 0xc9, + 0xde, 0xd3, 0xf0, 0xfd, 0xea, 0xe7, 0xac, 0xa1, 0xb6, 0xbb, + 0x98, 0x95, 0x82, 0x8f, 0x00, 0x0e, 0x1c, 0x12, 0x38, 0x36, + 0x24, 0x2a, 0x70, 0x7e, 0x6c, 0x62, 0x48, 0x46, 0x54, 0x5a, + 0xe0, 0xee, 0xfc, 0xf2, 0xd8, 0xd6, 0xc4, 0xca, 0x90, 0x9e, + 0x8c, 0x82, 0xa8, 0xa6, 0xb4, 0xba, 0xdd, 0xd3, 0xc1, 0xcf, + 0xe5, 0xeb, 0xf9, 0xf7, 0xad, 0xa3, 0xb1, 0xbf, 0x95, 0x9b, + 0x89, 0x87, 0x3d, 0x33, 0x21, 0x2f, 0x05, 0x0b, 0x19, 0x17, + 0x4d, 0x43, 0x51, 0x5f, 0x75, 0x7b, 0x69, 0x67, 0xa7, 0xa9, + 0xbb, 0xb5, 0x9f, 0x91, 0x83, 0x8d, 0xd7, 0xd9, 0xcb, 0xc5, + 0xef, 0xe1, 0xf3, 0xfd, 0x47, 0x49, 0x5b, 0x55, 0x7f, 0x71, + 0x63, 0x6d, 0x37, 0x39, 0x2b, 0x25, 0x0f, 0x01, 0x13, 0x1d, + 0x7a, 0x74, 0x66, 0x68, 0x42, 0x4c, 0x5e, 0x50, 0x0a, 0x04, + 0x16, 0x18, 0x32, 0x3c, 0x2e, 0x20, 0x9a, 0x94, 0x86, 0x88, + 0xa2, 0xac, 0xbe, 0xb0, 0xea, 0xe4, 0xf6, 0xf8, 0xd2, 0xdc, + 0xce, 0xc0, 0x53, 0x5d, 0x4f, 0x41, 0x6b, 0x65, 0x77, 0x79, + 0x23, 0x2d, 0x3f, 0x31, 0x1b, 0x15, 0x07, 0x09, 0xb3, 0xbd, + 0xaf, 0xa1, 0x8b, 0x85, 0x97, 0x99, 0xc3, 0xcd, 0xdf, 0xd1, + 0xfb, 0xf5, 0xe7, 0xe9, 0x8e, 0x80, 0x92, 0x9c, 0xb6, 0xb8, + 0xaa, 0xa4, 0xfe, 0xf0, 0xe2, 0xec, 0xc6, 0xc8, 0xda, 0xd4, + 0x6e, 0x60, 0x72, 0x7c, 0x56, 0x58, 0x4a, 0x44, 0x1e, 0x10, + 0x02, 0x0c, 0x26, 0x28, 0x3a, 0x34, 0xf4, 0xfa, 0xe8, 0xe6, + 0xcc, 0xc2, 0xd0, 0xde, 0x84, 0x8a, 0x98, 0x96, 0xbc, 0xb2, + 0xa0, 0xae, 0x14, 0x1a, 0x08, 0x06, 0x2c, 0x22, 0x30, 0x3e, + 0x64, 0x6a, 0x78, 0x76, 0x5c, 0x52, 0x40, 0x4e, 0x29, 0x27, + 0x35, 0x3b, 0x11, 0x1f, 0x0d, 0x03, 0x59, 0x57, 0x45, 0x4b, + 0x61, 0x6f, 0x7d, 0x73, 0xc9, 0xc7, 0xd5, 0xdb, 0xf1, 0xff, + 0xed, 0xe3, 0xb9, 0xb7, 0xa5, 0xab, 0x81, 0x8f, 0x9d, 0x93, + 0x00, 0x0f, 0x1e, 0x11, 0x3c, 0x33, 0x22, 0x2d, 0x78, 0x77, + 0x66, 0x69, 0x44, 0x4b, 0x5a, 0x55, 0xf0, 0xff, 0xee, 0xe1, + 0xcc, 0xc3, 0xd2, 0xdd, 0x88, 0x87, 0x96, 0x99, 0xb4, 0xbb, + 0xaa, 0xa5, 0xfd, 0xf2, 0xe3, 0xec, 0xc1, 0xce, 0xdf, 0xd0, + 0x85, 0x8a, 0x9b, 0x94, 0xb9, 0xb6, 0xa7, 0xa8, 0x0d, 0x02, + 0x13, 0x1c, 0x31, 0x3e, 0x2f, 0x20, 0x75, 0x7a, 0x6b, 0x64, + 0x49, 0x46, 0x57, 0x58, 0xe7, 0xe8, 0xf9, 0xf6, 0xdb, 0xd4, + 0xc5, 0xca, 0x9f, 0x90, 0x81, 0x8e, 0xa3, 0xac, 0xbd, 0xb2, + 0x17, 0x18, 0x09, 0x06, 0x2b, 0x24, 0x35, 0x3a, 0x6f, 0x60, + 0x71, 0x7e, 0x53, 0x5c, 0x4d, 0x42, 0x1a, 0x15, 0x04, 0x0b, + 0x26, 0x29, 0x38, 0x37, 0x62, 0x6d, 0x7c, 0x73, 0x5e, 0x51, + 0x40, 0x4f, 0xea, 0xe5, 0xf4, 0xfb, 0xd6, 0xd9, 0xc8, 0xc7, + 0x92, 0x9d, 0x8c, 0x83, 0xae, 0xa1, 0xb0, 0xbf, 0xd3, 0xdc, + 0xcd, 0xc2, 0xef, 0xe0, 0xf1, 0xfe, 0xab, 0xa4, 0xb5, 0xba, + 0x97, 0x98, 0x89, 0x86, 0x23, 0x2c, 0x3d, 0x32, 0x1f, 0x10, + 0x01, 0x0e, 0x5b, 0x54, 0x45, 0x4a, 0x67, 0x68, 0x79, 0x76, + 0x2e, 0x21, 0x30, 0x3f, 0x12, 0x1d, 0x0c, 0x03, 0x56, 0x59, + 0x48, 0x47, 0x6a, 0x65, 0x74, 0x7b, 0xde, 0xd1, 0xc0, 0xcf, + 0xe2, 0xed, 0xfc, 0xf3, 0xa6, 0xa9, 0xb8, 0xb7, 0x9a, 0x95, + 0x84, 0x8b, 0x34, 0x3b, 0x2a, 0x25, 0x08, 0x07, 0x16, 0x19, + 0x4c, 0x43, 0x52, 0x5d, 0x70, 0x7f, 0x6e, 0x61, 0xc4, 0xcb, + 0xda, 0xd5, 0xf8, 0xf7, 0xe6, 0xe9, 0xbc, 0xb3, 0xa2, 0xad, + 0x80, 0x8f, 0x9e, 0x91, 0xc9, 0xc6, 0xd7, 0xd8, 0xf5, 0xfa, + 0xeb, 0xe4, 0xb1, 0xbe, 0xaf, 0xa0, 0x8d, 0x82, 0x93, 0x9c, + 0x39, 0x36, 0x27, 0x28, 0x05, 0x0a, 0x1b, 0x14, 0x41, 0x4e, + 0x5f, 0x50, 0x7d, 0x72, 0x63, 0x6c, 0x00, 0x10, 0x20, 0x30, + 0x40, 0x50, 0x60, 0x70, 0x80, 0x90, 0xa0, 0xb0, 0xc0, 0xd0, + 0xe0, 0xf0, 0x1d, 0x0d, 0x3d, 0x2d, 0x5d, 0x4d, 0x7d, 0x6d, + 0x9d, 0x8d, 0xbd, 0xad, 0xdd, 0xcd, 0xfd, 0xed, 0x3a, 0x2a, + 0x1a, 0x0a, 0x7a, 0x6a, 0x5a, 0x4a, 0xba, 0xaa, 0x9a, 0x8a, + 0xfa, 0xea, 0xda, 0xca, 0x27, 0x37, 0x07, 0x17, 0x67, 0x77, + 0x47, 0x57, 0xa7, 0xb7, 0x87, 0x97, 0xe7, 0xf7, 0xc7, 0xd7, + 0x74, 0x64, 0x54, 0x44, 0x34, 0x24, 0x14, 0x04, 0xf4, 0xe4, + 0xd4, 0xc4, 0xb4, 0xa4, 0x94, 0x84, 0x69, 0x79, 0x49, 0x59, + 0x29, 0x39, 0x09, 0x19, 0xe9, 0xf9, 0xc9, 0xd9, 0xa9, 0xb9, + 0x89, 0x99, 0x4e, 0x5e, 0x6e, 0x7e, 0x0e, 0x1e, 0x2e, 0x3e, + 0xce, 0xde, 0xee, 0xfe, 0x8e, 0x9e, 0xae, 0xbe, 0x53, 0x43, + 0x73, 0x63, 0x13, 0x03, 0x33, 0x23, 0xd3, 0xc3, 0xf3, 0xe3, + 0x93, 0x83, 0xb3, 0xa3, 0xe8, 0xf8, 0xc8, 0xd8, 0xa8, 0xb8, + 0x88, 0x98, 0x68, 0x78, 0x48, 0x58, 0x28, 0x38, 0x08, 0x18, + 0xf5, 0xe5, 0xd5, 0xc5, 0xb5, 0xa5, 0x95, 0x85, 0x75, 0x65, + 0x55, 0x45, 0x35, 0x25, 0x15, 0x05, 0xd2, 0xc2, 0xf2, 0xe2, + 0x92, 0x82, 0xb2, 0xa2, 0x52, 0x42, 0x72, 0x62, 0x12, 0x02, + 0x32, 0x22, 0xcf, 0xdf, 0xef, 0xff, 0x8f, 0x9f, 0xaf, 0xbf, + 0x4f, 0x5f, 0x6f, 0x7f, 0x0f, 0x1f, 0x2f, 0x3f, 0x9c, 0x8c, + 0xbc, 0xac, 0xdc, 0xcc, 0xfc, 0xec, 0x1c, 0x0c, 0x3c, 0x2c, + 0x5c, 0x4c, 0x7c, 0x6c, 0x81, 0x91, 0xa1, 0xb1, 0xc1, 0xd1, + 0xe1, 0xf1, 0x01, 0x11, 0x21, 0x31, 0x41, 0x51, 0x61, 0x71, + 0xa6, 0xb6, 0x86, 0x96, 0xe6, 0xf6, 0xc6, 0xd6, 0x26, 0x36, + 0x06, 0x16, 0x66, 0x76, 0x46, 0x56, 0xbb, 0xab, 0x9b, 0x8b, + 0xfb, 0xeb, 0xdb, 0xcb, 0x3b, 0x2b, 0x1b, 0x0b, 0x7b, 0x6b, + 0x5b, 0x4b, 0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, + 0x88, 0x99, 0xaa, 0xbb, 0xcc, 0xdd, 0xee, 0xff, 0x0d, 0x1c, + 0x2f, 0x3e, 0x49, 0x58, 0x6b, 0x7a, 0x85, 0x94, 0xa7, 0xb6, + 0xc1, 0xd0, 0xe3, 0xf2, 0x1a, 0x0b, 0x38, 0x29, 0x5e, 0x4f, + 0x7c, 0x6d, 0x92, 0x83, 0xb0, 0xa1, 0xd6, 0xc7, 0xf4, 0xe5, + 0x17, 0x06, 0x35, 0x24, 0x53, 0x42, 0x71, 0x60, 0x9f, 0x8e, + 0xbd, 0xac, 0xdb, 0xca, 0xf9, 0xe8, 0x34, 0x25, 0x16, 0x07, + 0x70, 0x61, 0x52, 0x43, 0xbc, 0xad, 0x9e, 0x8f, 0xf8, 0xe9, + 0xda, 0xcb, 0x39, 0x28, 0x1b, 0x0a, 0x7d, 0x6c, 0x5f, 0x4e, + 0xb1, 0xa0, 0x93, 0x82, 0xf5, 0xe4, 0xd7, 0xc6, 0x2e, 0x3f, + 0x0c, 0x1d, 0x6a, 0x7b, 0x48, 0x59, 0xa6, 0xb7, 0x84, 0x95, + 0xe2, 0xf3, 0xc0, 0xd1, 0x23, 0x32, 0x01, 0x10, 0x67, 0x76, + 0x45, 0x54, 0xab, 0xba, 0x89, 0x98, 0xef, 0xfe, 0xcd, 0xdc, + 0x68, 0x79, 0x4a, 0x5b, 0x2c, 0x3d, 0x0e, 0x1f, 0xe0, 0xf1, + 0xc2, 0xd3, 0xa4, 0xb5, 0x86, 0x97, 0x65, 0x74, 0x47, 0x56, + 0x21, 0x30, 0x03, 0x12, 0xed, 0xfc, 0xcf, 0xde, 0xa9, 0xb8, + 0x8b, 0x9a, 0x72, 0x63, 0x50, 0x41, 0x36, 0x27, 0x14, 0x05, + 0xfa, 0xeb, 0xd8, 0xc9, 0xbe, 0xaf, 0x9c, 0x8d, 0x7f, 0x6e, + 0x5d, 0x4c, 0x3b, 0x2a, 0x19, 0x08, 0xf7, 0xe6, 0xd5, 0xc4, + 0xb3, 0xa2, 0x91, 0x80, 0x5c, 0x4d, 0x7e, 0x6f, 0x18, 0x09, + 0x3a, 0x2b, 0xd4, 0xc5, 0xf6, 0xe7, 0x90, 0x81, 0xb2, 0xa3, + 0x51, 0x40, 0x73, 0x62, 0x15, 0x04, 0x37, 0x26, 0xd9, 0xc8, + 0xfb, 0xea, 0x9d, 0x8c, 0xbf, 0xae, 0x46, 0x57, 0x64, 0x75, + 0x02, 0x13, 0x20, 0x31, 0xce, 0xdf, 0xec, 0xfd, 0x8a, 0x9b, + 0xa8, 0xb9, 0x4b, 0x5a, 0x69, 0x78, 0x0f, 0x1e, 0x2d, 0x3c, + 0xc3, 0xd2, 0xe1, 0xf0, 0x87, 0x96, 0xa5, 0xb4, 0x00, 0x12, + 0x24, 0x36, 0x48, 0x5a, 0x6c, 0x7e, 0x90, 0x82, 0xb4, 0xa6, + 0xd8, 0xca, 0xfc, 0xee, 0x3d, 0x2f, 0x19, 0x0b, 0x75, 0x67, + 0x51, 0x43, 0xad, 0xbf, 0x89, 0x9b, 0xe5, 0xf7, 0xc1, 0xd3, + 0x7a, 0x68, 0x5e, 0x4c, 0x32, 0x20, 0x16, 0x04, 0xea, 0xf8, + 0xce, 0xdc, 0xa2, 0xb0, 0x86, 0x94, 0x47, 0x55, 0x63, 0x71, + 0x0f, 0x1d, 0x2b, 0x39, 0xd7, 0xc5, 0xf3, 0xe1, 0x9f, 0x8d, + 0xbb, 0xa9, 0xf4, 0xe6, 0xd0, 0xc2, 0xbc, 0xae, 0x98, 0x8a, + 0x64, 0x76, 0x40, 0x52, 0x2c, 0x3e, 0x08, 0x1a, 0xc9, 0xdb, + 0xed, 0xff, 0x81, 0x93, 0xa5, 0xb7, 0x59, 0x4b, 0x7d, 0x6f, + 0x11, 0x03, 0x35, 0x27, 0x8e, 0x9c, 0xaa, 0xb8, 0xc6, 0xd4, + 0xe2, 0xf0, 0x1e, 0x0c, 0x3a, 0x28, 0x56, 0x44, 0x72, 0x60, + 0xb3, 0xa1, 0x97, 0x85, 0xfb, 0xe9, 0xdf, 0xcd, 0x23, 0x31, + 0x07, 0x15, 0x6b, 0x79, 0x4f, 0x5d, 0xf5, 0xe7, 0xd1, 0xc3, + 0xbd, 0xaf, 0x99, 0x8b, 0x65, 0x77, 0x41, 0x53, 0x2d, 0x3f, + 0x09, 0x1b, 0xc8, 0xda, 0xec, 0xfe, 0x80, 0x92, 0xa4, 0xb6, + 0x58, 0x4a, 0x7c, 0x6e, 0x10, 0x02, 0x34, 0x26, 0x8f, 0x9d, + 0xab, 0xb9, 0xc7, 0xd5, 0xe3, 0xf1, 0x1f, 0x0d, 0x3b, 0x29, + 0x57, 0x45, 0x73, 0x61, 0xb2, 0xa0, 0x96, 0x84, 0xfa, 0xe8, + 0xde, 0xcc, 0x22, 0x30, 0x06, 0x14, 0x6a, 0x78, 0x4e, 0x5c, + 0x01, 0x13, 0x25, 0x37, 0x49, 0x5b, 0x6d, 0x7f, 0x91, 0x83, + 0xb5, 0xa7, 0xd9, 0xcb, 0xfd, 0xef, 0x3c, 0x2e, 0x18, 0x0a, + 0x74, 0x66, 0x50, 0x42, 0xac, 0xbe, 0x88, 0x9a, 0xe4, 0xf6, + 0xc0, 0xd2, 0x7b, 0x69, 0x5f, 0x4d, 0x33, 0x21, 0x17, 0x05, + 0xeb, 0xf9, 0xcf, 0xdd, 0xa3, 0xb1, 0x87, 0x95, 0x46, 0x54, + 0x62, 0x70, 0x0e, 0x1c, 0x2a, 0x38, 0xd6, 0xc4, 0xf2, 0xe0, + 0x9e, 0x8c, 0xba, 0xa8, 0x00, 0x13, 0x26, 0x35, 0x4c, 0x5f, + 0x6a, 0x79, 0x98, 0x8b, 0xbe, 0xad, 0xd4, 0xc7, 0xf2, 0xe1, + 0x2d, 0x3e, 0x0b, 0x18, 0x61, 0x72, 0x47, 0x54, 0xb5, 0xa6, + 0x93, 0x80, 0xf9, 0xea, 0xdf, 0xcc, 0x5a, 0x49, 0x7c, 0x6f, + 0x16, 0x05, 0x30, 0x23, 0xc2, 0xd1, 0xe4, 0xf7, 0x8e, 0x9d, + 0xa8, 0xbb, 0x77, 0x64, 0x51, 0x42, 0x3b, 0x28, 0x1d, 0x0e, + 0xef, 0xfc, 0xc9, 0xda, 0xa3, 0xb0, 0x85, 0x96, 0xb4, 0xa7, + 0x92, 0x81, 0xf8, 0xeb, 0xde, 0xcd, 0x2c, 0x3f, 0x0a, 0x19, + 0x60, 0x73, 0x46, 0x55, 0x99, 0x8a, 0xbf, 0xac, 0xd5, 0xc6, + 0xf3, 0xe0, 0x01, 0x12, 0x27, 0x34, 0x4d, 0x5e, 0x6b, 0x78, + 0xee, 0xfd, 0xc8, 0xdb, 0xa2, 0xb1, 0x84, 0x97, 0x76, 0x65, + 0x50, 0x43, 0x3a, 0x29, 0x1c, 0x0f, 0xc3, 0xd0, 0xe5, 0xf6, + 0x8f, 0x9c, 0xa9, 0xba, 0x5b, 0x48, 0x7d, 0x6e, 0x17, 0x04, + 0x31, 0x22, 0x75, 0x66, 0x53, 0x40, 0x39, 0x2a, 0x1f, 0x0c, + 0xed, 0xfe, 0xcb, 0xd8, 0xa1, 0xb2, 0x87, 0x94, 0x58, 0x4b, + 0x7e, 0x6d, 0x14, 0x07, 0x32, 0x21, 0xc0, 0xd3, 0xe6, 0xf5, + 0x8c, 0x9f, 0xaa, 0xb9, 0x2f, 0x3c, 0x09, 0x1a, 0x63, 0x70, + 0x45, 0x56, 0xb7, 0xa4, 0x91, 0x82, 0xfb, 0xe8, 0xdd, 0xce, + 0x02, 0x11, 0x24, 0x37, 0x4e, 0x5d, 0x68, 0x7b, 0x9a, 0x89, + 0xbc, 0xaf, 0xd6, 0xc5, 0xf0, 0xe3, 0xc1, 0xd2, 0xe7, 0xf4, + 0x8d, 0x9e, 0xab, 0xb8, 0x59, 0x4a, 0x7f, 0x6c, 0x15, 0x06, + 0x33, 0x20, 0xec, 0xff, 0xca, 0xd9, 0xa0, 0xb3, 0x86, 0x95, + 0x74, 0x67, 0x52, 0x41, 0x38, 0x2b, 0x1e, 0x0d, 0x9b, 0x88, + 0xbd, 0xae, 0xd7, 0xc4, 0xf1, 0xe2, 0x03, 0x10, 0x25, 0x36, + 0x4f, 0x5c, 0x69, 0x7a, 0xb6, 0xa5, 0x90, 0x83, 0xfa, 0xe9, + 0xdc, 0xcf, 0x2e, 0x3d, 0x08, 0x1b, 0x62, 0x71, 0x44, 0x57, + 0x00, 0x14, 0x28, 0x3c, 0x50, 0x44, 0x78, 0x6c, 0xa0, 0xb4, + 0x88, 0x9c, 0xf0, 0xe4, 0xd8, 0xcc, 0x5d, 0x49, 0x75, 0x61, + 0x0d, 0x19, 0x25, 0x31, 0xfd, 0xe9, 0xd5, 0xc1, 0xad, 0xb9, + 0x85, 0x91, 0xba, 0xae, 0x92, 0x86, 0xea, 0xfe, 0xc2, 0xd6, + 0x1a, 0x0e, 0x32, 0x26, 0x4a, 0x5e, 0x62, 0x76, 0xe7, 0xf3, + 0xcf, 0xdb, 0xb7, 0xa3, 0x9f, 0x8b, 0x47, 0x53, 0x6f, 0x7b, + 0x17, 0x03, 0x3f, 0x2b, 0x69, 0x7d, 0x41, 0x55, 0x39, 0x2d, + 0x11, 0x05, 0xc9, 0xdd, 0xe1, 0xf5, 0x99, 0x8d, 0xb1, 0xa5, + 0x34, 0x20, 0x1c, 0x08, 0x64, 0x70, 0x4c, 0x58, 0x94, 0x80, + 0xbc, 0xa8, 0xc4, 0xd0, 0xec, 0xf8, 0xd3, 0xc7, 0xfb, 0xef, + 0x83, 0x97, 0xab, 0xbf, 0x73, 0x67, 0x5b, 0x4f, 0x23, 0x37, + 0x0b, 0x1f, 0x8e, 0x9a, 0xa6, 0xb2, 0xde, 0xca, 0xf6, 0xe2, + 0x2e, 0x3a, 0x06, 0x12, 0x7e, 0x6a, 0x56, 0x42, 0xd2, 0xc6, + 0xfa, 0xee, 0x82, 0x96, 0xaa, 0xbe, 0x72, 0x66, 0x5a, 0x4e, + 0x22, 0x36, 0x0a, 0x1e, 0x8f, 0x9b, 0xa7, 0xb3, 0xdf, 0xcb, + 0xf7, 0xe3, 0x2f, 0x3b, 0x07, 0x13, 0x7f, 0x6b, 0x57, 0x43, + 0x68, 0x7c, 0x40, 0x54, 0x38, 0x2c, 0x10, 0x04, 0xc8, 0xdc, + 0xe0, 0xf4, 0x98, 0x8c, 0xb0, 0xa4, 0x35, 0x21, 0x1d, 0x09, + 0x65, 0x71, 0x4d, 0x59, 0x95, 0x81, 0xbd, 0xa9, 0xc5, 0xd1, + 0xed, 0xf9, 0xbb, 0xaf, 0x93, 0x87, 0xeb, 0xff, 0xc3, 0xd7, + 0x1b, 0x0f, 0x33, 0x27, 0x4b, 0x5f, 0x63, 0x77, 0xe6, 0xf2, + 0xce, 0xda, 0xb6, 0xa2, 0x9e, 0x8a, 0x46, 0x52, 0x6e, 0x7a, + 0x16, 0x02, 0x3e, 0x2a, 0x01, 0x15, 0x29, 0x3d, 0x51, 0x45, + 0x79, 0x6d, 0xa1, 0xb5, 0x89, 0x9d, 0xf1, 0xe5, 0xd9, 0xcd, + 0x5c, 0x48, 0x74, 0x60, 0x0c, 0x18, 0x24, 0x30, 0xfc, 0xe8, + 0xd4, 0xc0, 0xac, 0xb8, 0x84, 0x90, 0x00, 0x15, 0x2a, 0x3f, + 0x54, 0x41, 0x7e, 0x6b, 0xa8, 0xbd, 0x82, 0x97, 0xfc, 0xe9, + 0xd6, 0xc3, 0x4d, 0x58, 0x67, 0x72, 0x19, 0x0c, 0x33, 0x26, + 0xe5, 0xf0, 0xcf, 0xda, 0xb1, 0xa4, 0x9b, 0x8e, 0x9a, 0x8f, + 0xb0, 0xa5, 0xce, 0xdb, 0xe4, 0xf1, 0x32, 0x27, 0x18, 0x0d, + 0x66, 0x73, 0x4c, 0x59, 0xd7, 0xc2, 0xfd, 0xe8, 0x83, 0x96, + 0xa9, 0xbc, 0x7f, 0x6a, 0x55, 0x40, 0x2b, 0x3e, 0x01, 0x14, + 0x29, 0x3c, 0x03, 0x16, 0x7d, 0x68, 0x57, 0x42, 0x81, 0x94, + 0xab, 0xbe, 0xd5, 0xc0, 0xff, 0xea, 0x64, 0x71, 0x4e, 0x5b, + 0x30, 0x25, 0x1a, 0x0f, 0xcc, 0xd9, 0xe6, 0xf3, 0x98, 0x8d, + 0xb2, 0xa7, 0xb3, 0xa6, 0x99, 0x8c, 0xe7, 0xf2, 0xcd, 0xd8, + 0x1b, 0x0e, 0x31, 0x24, 0x4f, 0x5a, 0x65, 0x70, 0xfe, 0xeb, + 0xd4, 0xc1, 0xaa, 0xbf, 0x80, 0x95, 0x56, 0x43, 0x7c, 0x69, + 0x02, 0x17, 0x28, 0x3d, 0x52, 0x47, 0x78, 0x6d, 0x06, 0x13, + 0x2c, 0x39, 0xfa, 0xef, 0xd0, 0xc5, 0xae, 0xbb, 0x84, 0x91, + 0x1f, 0x0a, 0x35, 0x20, 0x4b, 0x5e, 0x61, 0x74, 0xb7, 0xa2, + 0x9d, 0x88, 0xe3, 0xf6, 0xc9, 0xdc, 0xc8, 0xdd, 0xe2, 0xf7, + 0x9c, 0x89, 0xb6, 0xa3, 0x60, 0x75, 0x4a, 0x5f, 0x34, 0x21, + 0x1e, 0x0b, 0x85, 0x90, 0xaf, 0xba, 0xd1, 0xc4, 0xfb, 0xee, + 0x2d, 0x38, 0x07, 0x12, 0x79, 0x6c, 0x53, 0x46, 0x7b, 0x6e, + 0x51, 0x44, 0x2f, 0x3a, 0x05, 0x10, 0xd3, 0xc6, 0xf9, 0xec, + 0x87, 0x92, 0xad, 0xb8, 0x36, 0x23, 0x1c, 0x09, 0x62, 0x77, + 0x48, 0x5d, 0x9e, 0x8b, 0xb4, 0xa1, 0xca, 0xdf, 0xe0, 0xf5, + 0xe1, 0xf4, 0xcb, 0xde, 0xb5, 0xa0, 0x9f, 0x8a, 0x49, 0x5c, + 0x63, 0x76, 0x1d, 0x08, 0x37, 0x22, 0xac, 0xb9, 0x86, 0x93, + 0xf8, 0xed, 0xd2, 0xc7, 0x04, 0x11, 0x2e, 0x3b, 0x50, 0x45, + 0x7a, 0x6f, 0x00, 0x16, 0x2c, 0x3a, 0x58, 0x4e, 0x74, 0x62, + 0xb0, 0xa6, 0x9c, 0x8a, 0xe8, 0xfe, 0xc4, 0xd2, 0x7d, 0x6b, + 0x51, 0x47, 0x25, 0x33, 0x09, 0x1f, 0xcd, 0xdb, 0xe1, 0xf7, + 0x95, 0x83, 0xb9, 0xaf, 0xfa, 0xec, 0xd6, 0xc0, 0xa2, 0xb4, + 0x8e, 0x98, 0x4a, 0x5c, 0x66, 0x70, 0x12, 0x04, 0x3e, 0x28, + 0x87, 0x91, 0xab, 0xbd, 0xdf, 0xc9, 0xf3, 0xe5, 0x37, 0x21, + 0x1b, 0x0d, 0x6f, 0x79, 0x43, 0x55, 0xe9, 0xff, 0xc5, 0xd3, + 0xb1, 0xa7, 0x9d, 0x8b, 0x59, 0x4f, 0x75, 0x63, 0x01, 0x17, + 0x2d, 0x3b, 0x94, 0x82, 0xb8, 0xae, 0xcc, 0xda, 0xe0, 0xf6, + 0x24, 0x32, 0x08, 0x1e, 0x7c, 0x6a, 0x50, 0x46, 0x13, 0x05, + 0x3f, 0x29, 0x4b, 0x5d, 0x67, 0x71, 0xa3, 0xb5, 0x8f, 0x99, + 0xfb, 0xed, 0xd7, 0xc1, 0x6e, 0x78, 0x42, 0x54, 0x36, 0x20, + 0x1a, 0x0c, 0xde, 0xc8, 0xf2, 0xe4, 0x86, 0x90, 0xaa, 0xbc, + 0xcf, 0xd9, 0xe3, 0xf5, 0x97, 0x81, 0xbb, 0xad, 0x7f, 0x69, + 0x53, 0x45, 0x27, 0x31, 0x0b, 0x1d, 0xb2, 0xa4, 0x9e, 0x88, + 0xea, 0xfc, 0xc6, 0xd0, 0x02, 0x14, 0x2e, 0x38, 0x5a, 0x4c, + 0x76, 0x60, 0x35, 0x23, 0x19, 0x0f, 0x6d, 0x7b, 0x41, 0x57, + 0x85, 0x93, 0xa9, 0xbf, 0xdd, 0xcb, 0xf1, 0xe7, 0x48, 0x5e, + 0x64, 0x72, 0x10, 0x06, 0x3c, 0x2a, 0xf8, 0xee, 0xd4, 0xc2, + 0xa0, 0xb6, 0x8c, 0x9a, 0x26, 0x30, 0x0a, 0x1c, 0x7e, 0x68, + 0x52, 0x44, 0x96, 0x80, 0xba, 0xac, 0xce, 0xd8, 0xe2, 0xf4, + 0x5b, 0x4d, 0x77, 0x61, 0x03, 0x15, 0x2f, 0x39, 0xeb, 0xfd, + 0xc7, 0xd1, 0xb3, 0xa5, 0x9f, 0x89, 0xdc, 0xca, 0xf0, 0xe6, + 0x84, 0x92, 0xa8, 0xbe, 0x6c, 0x7a, 0x40, 0x56, 0x34, 0x22, + 0x18, 0x0e, 0xa1, 0xb7, 0x8d, 0x9b, 0xf9, 0xef, 0xd5, 0xc3, + 0x11, 0x07, 0x3d, 0x2b, 0x49, 0x5f, 0x65, 0x73, 0x00, 0x17, + 0x2e, 0x39, 0x5c, 0x4b, 0x72, 0x65, 0xb8, 0xaf, 0x96, 0x81, + 0xe4, 0xf3, 0xca, 0xdd, 0x6d, 0x7a, 0x43, 0x54, 0x31, 0x26, + 0x1f, 0x08, 0xd5, 0xc2, 0xfb, 0xec, 0x89, 0x9e, 0xa7, 0xb0, + 0xda, 0xcd, 0xf4, 0xe3, 0x86, 0x91, 0xa8, 0xbf, 0x62, 0x75, + 0x4c, 0x5b, 0x3e, 0x29, 0x10, 0x07, 0xb7, 0xa0, 0x99, 0x8e, + 0xeb, 0xfc, 0xc5, 0xd2, 0x0f, 0x18, 0x21, 0x36, 0x53, 0x44, + 0x7d, 0x6a, 0xa9, 0xbe, 0x87, 0x90, 0xf5, 0xe2, 0xdb, 0xcc, + 0x11, 0x06, 0x3f, 0x28, 0x4d, 0x5a, 0x63, 0x74, 0xc4, 0xd3, + 0xea, 0xfd, 0x98, 0x8f, 0xb6, 0xa1, 0x7c, 0x6b, 0x52, 0x45, + 0x20, 0x37, 0x0e, 0x19, 0x73, 0x64, 0x5d, 0x4a, 0x2f, 0x38, + 0x01, 0x16, 0xcb, 0xdc, 0xe5, 0xf2, 0x97, 0x80, 0xb9, 0xae, + 0x1e, 0x09, 0x30, 0x27, 0x42, 0x55, 0x6c, 0x7b, 0xa6, 0xb1, + 0x88, 0x9f, 0xfa, 0xed, 0xd4, 0xc3, 0x4f, 0x58, 0x61, 0x76, + 0x13, 0x04, 0x3d, 0x2a, 0xf7, 0xe0, 0xd9, 0xce, 0xab, 0xbc, + 0x85, 0x92, 0x22, 0x35, 0x0c, 0x1b, 0x7e, 0x69, 0x50, 0x47, + 0x9a, 0x8d, 0xb4, 0xa3, 0xc6, 0xd1, 0xe8, 0xff, 0x95, 0x82, + 0xbb, 0xac, 0xc9, 0xde, 0xe7, 0xf0, 0x2d, 0x3a, 0x03, 0x14, + 0x71, 0x66, 0x5f, 0x48, 0xf8, 0xef, 0xd6, 0xc1, 0xa4, 0xb3, + 0x8a, 0x9d, 0x40, 0x57, 0x6e, 0x79, 0x1c, 0x0b, 0x32, 0x25, + 0xe6, 0xf1, 0xc8, 0xdf, 0xba, 0xad, 0x94, 0x83, 0x5e, 0x49, + 0x70, 0x67, 0x02, 0x15, 0x2c, 0x3b, 0x8b, 0x9c, 0xa5, 0xb2, + 0xd7, 0xc0, 0xf9, 0xee, 0x33, 0x24, 0x1d, 0x0a, 0x6f, 0x78, + 0x41, 0x56, 0x3c, 0x2b, 0x12, 0x05, 0x60, 0x77, 0x4e, 0x59, + 0x84, 0x93, 0xaa, 0xbd, 0xd8, 0xcf, 0xf6, 0xe1, 0x51, 0x46, + 0x7f, 0x68, 0x0d, 0x1a, 0x23, 0x34, 0xe9, 0xfe, 0xc7, 0xd0, + 0xb5, 0xa2, 0x9b, 0x8c, 0x00, 0x18, 0x30, 0x28, 0x60, 0x78, + 0x50, 0x48, 0xc0, 0xd8, 0xf0, 0xe8, 0xa0, 0xb8, 0x90, 0x88, + 0x9d, 0x85, 0xad, 0xb5, 0xfd, 0xe5, 0xcd, 0xd5, 0x5d, 0x45, + 0x6d, 0x75, 0x3d, 0x25, 0x0d, 0x15, 0x27, 0x3f, 0x17, 0x0f, + 0x47, 0x5f, 0x77, 0x6f, 0xe7, 0xff, 0xd7, 0xcf, 0x87, 0x9f, + 0xb7, 0xaf, 0xba, 0xa2, 0x8a, 0x92, 0xda, 0xc2, 0xea, 0xf2, + 0x7a, 0x62, 0x4a, 0x52, 0x1a, 0x02, 0x2a, 0x32, 0x4e, 0x56, + 0x7e, 0x66, 0x2e, 0x36, 0x1e, 0x06, 0x8e, 0x96, 0xbe, 0xa6, + 0xee, 0xf6, 0xde, 0xc6, 0xd3, 0xcb, 0xe3, 0xfb, 0xb3, 0xab, + 0x83, 0x9b, 0x13, 0x0b, 0x23, 0x3b, 0x73, 0x6b, 0x43, 0x5b, + 0x69, 0x71, 0x59, 0x41, 0x09, 0x11, 0x39, 0x21, 0xa9, 0xb1, + 0x99, 0x81, 0xc9, 0xd1, 0xf9, 0xe1, 0xf4, 0xec, 0xc4, 0xdc, + 0x94, 0x8c, 0xa4, 0xbc, 0x34, 0x2c, 0x04, 0x1c, 0x54, 0x4c, + 0x64, 0x7c, 0x9c, 0x84, 0xac, 0xb4, 0xfc, 0xe4, 0xcc, 0xd4, + 0x5c, 0x44, 0x6c, 0x74, 0x3c, 0x24, 0x0c, 0x14, 0x01, 0x19, + 0x31, 0x29, 0x61, 0x79, 0x51, 0x49, 0xc1, 0xd9, 0xf1, 0xe9, + 0xa1, 0xb9, 0x91, 0x89, 0xbb, 0xa3, 0x8b, 0x93, 0xdb, 0xc3, + 0xeb, 0xf3, 0x7b, 0x63, 0x4b, 0x53, 0x1b, 0x03, 0x2b, 0x33, + 0x26, 0x3e, 0x16, 0x0e, 0x46, 0x5e, 0x76, 0x6e, 0xe6, 0xfe, + 0xd6, 0xce, 0x86, 0x9e, 0xb6, 0xae, 0xd2, 0xca, 0xe2, 0xfa, + 0xb2, 0xaa, 0x82, 0x9a, 0x12, 0x0a, 0x22, 0x3a, 0x72, 0x6a, + 0x42, 0x5a, 0x4f, 0x57, 0x7f, 0x67, 0x2f, 0x37, 0x1f, 0x07, + 0x8f, 0x97, 0xbf, 0xa7, 0xef, 0xf7, 0xdf, 0xc7, 0xf5, 0xed, + 0xc5, 0xdd, 0x95, 0x8d, 0xa5, 0xbd, 0x35, 0x2d, 0x05, 0x1d, + 0x55, 0x4d, 0x65, 0x7d, 0x68, 0x70, 0x58, 0x40, 0x08, 0x10, + 0x38, 0x20, 0xa8, 0xb0, 0x98, 0x80, 0xc8, 0xd0, 0xf8, 0xe0, + 0x00, 0x19, 0x32, 0x2b, 0x64, 0x7d, 0x56, 0x4f, 0xc8, 0xd1, + 0xfa, 0xe3, 0xac, 0xb5, 0x9e, 0x87, 0x8d, 0x94, 0xbf, 0xa6, + 0xe9, 0xf0, 0xdb, 0xc2, 0x45, 0x5c, 0x77, 0x6e, 0x21, 0x38, + 0x13, 0x0a, 0x07, 0x1e, 0x35, 0x2c, 0x63, 0x7a, 0x51, 0x48, + 0xcf, 0xd6, 0xfd, 0xe4, 0xab, 0xb2, 0x99, 0x80, 0x8a, 0x93, + 0xb8, 0xa1, 0xee, 0xf7, 0xdc, 0xc5, 0x42, 0x5b, 0x70, 0x69, + 0x26, 0x3f, 0x14, 0x0d, 0x0e, 0x17, 0x3c, 0x25, 0x6a, 0x73, + 0x58, 0x41, 0xc6, 0xdf, 0xf4, 0xed, 0xa2, 0xbb, 0x90, 0x89, + 0x83, 0x9a, 0xb1, 0xa8, 0xe7, 0xfe, 0xd5, 0xcc, 0x4b, 0x52, + 0x79, 0x60, 0x2f, 0x36, 0x1d, 0x04, 0x09, 0x10, 0x3b, 0x22, + 0x6d, 0x74, 0x5f, 0x46, 0xc1, 0xd8, 0xf3, 0xea, 0xa5, 0xbc, + 0x97, 0x8e, 0x84, 0x9d, 0xb6, 0xaf, 0xe0, 0xf9, 0xd2, 0xcb, + 0x4c, 0x55, 0x7e, 0x67, 0x28, 0x31, 0x1a, 0x03, 0x1c, 0x05, + 0x2e, 0x37, 0x78, 0x61, 0x4a, 0x53, 0xd4, 0xcd, 0xe6, 0xff, + 0xb0, 0xa9, 0x82, 0x9b, 0x91, 0x88, 0xa3, 0xba, 0xf5, 0xec, + 0xc7, 0xde, 0x59, 0x40, 0x6b, 0x72, 0x3d, 0x24, 0x0f, 0x16, + 0x1b, 0x02, 0x29, 0x30, 0x7f, 0x66, 0x4d, 0x54, 0xd3, 0xca, + 0xe1, 0xf8, 0xb7, 0xae, 0x85, 0x9c, 0x96, 0x8f, 0xa4, 0xbd, + 0xf2, 0xeb, 0xc0, 0xd9, 0x5e, 0x47, 0x6c, 0x75, 0x3a, 0x23, + 0x08, 0x11, 0x12, 0x0b, 0x20, 0x39, 0x76, 0x6f, 0x44, 0x5d, + 0xda, 0xc3, 0xe8, 0xf1, 0xbe, 0xa7, 0x8c, 0x95, 0x9f, 0x86, + 0xad, 0xb4, 0xfb, 0xe2, 0xc9, 0xd0, 0x57, 0x4e, 0x65, 0x7c, + 0x33, 0x2a, 0x01, 0x18, 0x15, 0x0c, 0x27, 0x3e, 0x71, 0x68, + 0x43, 0x5a, 0xdd, 0xc4, 0xef, 0xf6, 0xb9, 0xa0, 0x8b, 0x92, + 0x98, 0x81, 0xaa, 0xb3, 0xfc, 0xe5, 0xce, 0xd7, 0x50, 0x49, + 0x62, 0x7b, 0x34, 0x2d, 0x06, 0x1f, 0x00, 0x1a, 0x34, 0x2e, + 0x68, 0x72, 0x5c, 0x46, 0xd0, 0xca, 0xe4, 0xfe, 0xb8, 0xa2, + 0x8c, 0x96, 0xbd, 0xa7, 0x89, 0x93, 0xd5, 0xcf, 0xe1, 0xfb, + 0x6d, 0x77, 0x59, 0x43, 0x05, 0x1f, 0x31, 0x2b, 0x67, 0x7d, + 0x53, 0x49, 0x0f, 0x15, 0x3b, 0x21, 0xb7, 0xad, 0x83, 0x99, + 0xdf, 0xc5, 0xeb, 0xf1, 0xda, 0xc0, 0xee, 0xf4, 0xb2, 0xa8, + 0x86, 0x9c, 0x0a, 0x10, 0x3e, 0x24, 0x62, 0x78, 0x56, 0x4c, + 0xce, 0xd4, 0xfa, 0xe0, 0xa6, 0xbc, 0x92, 0x88, 0x1e, 0x04, + 0x2a, 0x30, 0x76, 0x6c, 0x42, 0x58, 0x73, 0x69, 0x47, 0x5d, + 0x1b, 0x01, 0x2f, 0x35, 0xa3, 0xb9, 0x97, 0x8d, 0xcb, 0xd1, + 0xff, 0xe5, 0xa9, 0xb3, 0x9d, 0x87, 0xc1, 0xdb, 0xf5, 0xef, + 0x79, 0x63, 0x4d, 0x57, 0x11, 0x0b, 0x25, 0x3f, 0x14, 0x0e, + 0x20, 0x3a, 0x7c, 0x66, 0x48, 0x52, 0xc4, 0xde, 0xf0, 0xea, + 0xac, 0xb6, 0x98, 0x82, 0x81, 0x9b, 0xb5, 0xaf, 0xe9, 0xf3, + 0xdd, 0xc7, 0x51, 0x4b, 0x65, 0x7f, 0x39, 0x23, 0x0d, 0x17, + 0x3c, 0x26, 0x08, 0x12, 0x54, 0x4e, 0x60, 0x7a, 0xec, 0xf6, + 0xd8, 0xc2, 0x84, 0x9e, 0xb0, 0xaa, 0xe6, 0xfc, 0xd2, 0xc8, + 0x8e, 0x94, 0xba, 0xa0, 0x36, 0x2c, 0x02, 0x18, 0x5e, 0x44, + 0x6a, 0x70, 0x5b, 0x41, 0x6f, 0x75, 0x33, 0x29, 0x07, 0x1d, + 0x8b, 0x91, 0xbf, 0xa5, 0xe3, 0xf9, 0xd7, 0xcd, 0x4f, 0x55, + 0x7b, 0x61, 0x27, 0x3d, 0x13, 0x09, 0x9f, 0x85, 0xab, 0xb1, + 0xf7, 0xed, 0xc3, 0xd9, 0xf2, 0xe8, 0xc6, 0xdc, 0x9a, 0x80, + 0xae, 0xb4, 0x22, 0x38, 0x16, 0x0c, 0x4a, 0x50, 0x7e, 0x64, + 0x28, 0x32, 0x1c, 0x06, 0x40, 0x5a, 0x74, 0x6e, 0xf8, 0xe2, + 0xcc, 0xd6, 0x90, 0x8a, 0xa4, 0xbe, 0x95, 0x8f, 0xa1, 0xbb, + 0xfd, 0xe7, 0xc9, 0xd3, 0x45, 0x5f, 0x71, 0x6b, 0x2d, 0x37, + 0x19, 0x03, 0x00, 0x1b, 0x36, 0x2d, 0x6c, 0x77, 0x5a, 0x41, + 0xd8, 0xc3, 0xee, 0xf5, 0xb4, 0xaf, 0x82, 0x99, 0xad, 0xb6, + 0x9b, 0x80, 0xc1, 0xda, 0xf7, 0xec, 0x75, 0x6e, 0x43, 0x58, + 0x19, 0x02, 0x2f, 0x34, 0x47, 0x5c, 0x71, 0x6a, 0x2b, 0x30, + 0x1d, 0x06, 0x9f, 0x84, 0xa9, 0xb2, 0xf3, 0xe8, 0xc5, 0xde, + 0xea, 0xf1, 0xdc, 0xc7, 0x86, 0x9d, 0xb0, 0xab, 0x32, 0x29, + 0x04, 0x1f, 0x5e, 0x45, 0x68, 0x73, 0x8e, 0x95, 0xb8, 0xa3, + 0xe2, 0xf9, 0xd4, 0xcf, 0x56, 0x4d, 0x60, 0x7b, 0x3a, 0x21, + 0x0c, 0x17, 0x23, 0x38, 0x15, 0x0e, 0x4f, 0x54, 0x79, 0x62, + 0xfb, 0xe0, 0xcd, 0xd6, 0x97, 0x8c, 0xa1, 0xba, 0xc9, 0xd2, + 0xff, 0xe4, 0xa5, 0xbe, 0x93, 0x88, 0x11, 0x0a, 0x27, 0x3c, + 0x7d, 0x66, 0x4b, 0x50, 0x64, 0x7f, 0x52, 0x49, 0x08, 0x13, + 0x3e, 0x25, 0xbc, 0xa7, 0x8a, 0x91, 0xd0, 0xcb, 0xe6, 0xfd, + 0x01, 0x1a, 0x37, 0x2c, 0x6d, 0x76, 0x5b, 0x40, 0xd9, 0xc2, + 0xef, 0xf4, 0xb5, 0xae, 0x83, 0x98, 0xac, 0xb7, 0x9a, 0x81, + 0xc0, 0xdb, 0xf6, 0xed, 0x74, 0x6f, 0x42, 0x59, 0x18, 0x03, + 0x2e, 0x35, 0x46, 0x5d, 0x70, 0x6b, 0x2a, 0x31, 0x1c, 0x07, + 0x9e, 0x85, 0xa8, 0xb3, 0xf2, 0xe9, 0xc4, 0xdf, 0xeb, 0xf0, + 0xdd, 0xc6, 0x87, 0x9c, 0xb1, 0xaa, 0x33, 0x28, 0x05, 0x1e, + 0x5f, 0x44, 0x69, 0x72, 0x8f, 0x94, 0xb9, 0xa2, 0xe3, 0xf8, + 0xd5, 0xce, 0x57, 0x4c, 0x61, 0x7a, 0x3b, 0x20, 0x0d, 0x16, + 0x22, 0x39, 0x14, 0x0f, 0x4e, 0x55, 0x78, 0x63, 0xfa, 0xe1, + 0xcc, 0xd7, 0x96, 0x8d, 0xa0, 0xbb, 0xc8, 0xd3, 0xfe, 0xe5, + 0xa4, 0xbf, 0x92, 0x89, 0x10, 0x0b, 0x26, 0x3d, 0x7c, 0x67, + 0x4a, 0x51, 0x65, 0x7e, 0x53, 0x48, 0x09, 0x12, 0x3f, 0x24, + 0xbd, 0xa6, 0x8b, 0x90, 0xd1, 0xca, 0xe7, 0xfc, 0x00, 0x1c, + 0x38, 0x24, 0x70, 0x6c, 0x48, 0x54, 0xe0, 0xfc, 0xd8, 0xc4, + 0x90, 0x8c, 0xa8, 0xb4, 0xdd, 0xc1, 0xe5, 0xf9, 0xad, 0xb1, + 0x95, 0x89, 0x3d, 0x21, 0x05, 0x19, 0x4d, 0x51, 0x75, 0x69, + 0xa7, 0xbb, 0x9f, 0x83, 0xd7, 0xcb, 0xef, 0xf3, 0x47, 0x5b, + 0x7f, 0x63, 0x37, 0x2b, 0x0f, 0x13, 0x7a, 0x66, 0x42, 0x5e, + 0x0a, 0x16, 0x32, 0x2e, 0x9a, 0x86, 0xa2, 0xbe, 0xea, 0xf6, + 0xd2, 0xce, 0x53, 0x4f, 0x6b, 0x77, 0x23, 0x3f, 0x1b, 0x07, + 0xb3, 0xaf, 0x8b, 0x97, 0xc3, 0xdf, 0xfb, 0xe7, 0x8e, 0x92, + 0xb6, 0xaa, 0xfe, 0xe2, 0xc6, 0xda, 0x6e, 0x72, 0x56, 0x4a, + 0x1e, 0x02, 0x26, 0x3a, 0xf4, 0xe8, 0xcc, 0xd0, 0x84, 0x98, + 0xbc, 0xa0, 0x14, 0x08, 0x2c, 0x30, 0x64, 0x78, 0x5c, 0x40, + 0x29, 0x35, 0x11, 0x0d, 0x59, 0x45, 0x61, 0x7d, 0xc9, 0xd5, + 0xf1, 0xed, 0xb9, 0xa5, 0x81, 0x9d, 0xa6, 0xba, 0x9e, 0x82, + 0xd6, 0xca, 0xee, 0xf2, 0x46, 0x5a, 0x7e, 0x62, 0x36, 0x2a, + 0x0e, 0x12, 0x7b, 0x67, 0x43, 0x5f, 0x0b, 0x17, 0x33, 0x2f, + 0x9b, 0x87, 0xa3, 0xbf, 0xeb, 0xf7, 0xd3, 0xcf, 0x01, 0x1d, + 0x39, 0x25, 0x71, 0x6d, 0x49, 0x55, 0xe1, 0xfd, 0xd9, 0xc5, + 0x91, 0x8d, 0xa9, 0xb5, 0xdc, 0xc0, 0xe4, 0xf8, 0xac, 0xb0, + 0x94, 0x88, 0x3c, 0x20, 0x04, 0x18, 0x4c, 0x50, 0x74, 0x68, + 0xf5, 0xe9, 0xcd, 0xd1, 0x85, 0x99, 0xbd, 0xa1, 0x15, 0x09, + 0x2d, 0x31, 0x65, 0x79, 0x5d, 0x41, 0x28, 0x34, 0x10, 0x0c, + 0x58, 0x44, 0x60, 0x7c, 0xc8, 0xd4, 0xf0, 0xec, 0xb8, 0xa4, + 0x80, 0x9c, 0x52, 0x4e, 0x6a, 0x76, 0x22, 0x3e, 0x1a, 0x06, + 0xb2, 0xae, 0x8a, 0x96, 0xc2, 0xde, 0xfa, 0xe6, 0x8f, 0x93, + 0xb7, 0xab, 0xff, 0xe3, 0xc7, 0xdb, 0x6f, 0x73, 0x57, 0x4b, + 0x1f, 0x03, 0x27, 0x3b, 0x00, 0x1d, 0x3a, 0x27, 0x74, 0x69, + 0x4e, 0x53, 0xe8, 0xf5, 0xd2, 0xcf, 0x9c, 0x81, 0xa6, 0xbb, + 0xcd, 0xd0, 0xf7, 0xea, 0xb9, 0xa4, 0x83, 0x9e, 0x25, 0x38, + 0x1f, 0x02, 0x51, 0x4c, 0x6b, 0x76, 0x87, 0x9a, 0xbd, 0xa0, + 0xf3, 0xee, 0xc9, 0xd4, 0x6f, 0x72, 0x55, 0x48, 0x1b, 0x06, + 0x21, 0x3c, 0x4a, 0x57, 0x70, 0x6d, 0x3e, 0x23, 0x04, 0x19, + 0xa2, 0xbf, 0x98, 0x85, 0xd6, 0xcb, 0xec, 0xf1, 0x13, 0x0e, + 0x29, 0x34, 0x67, 0x7a, 0x5d, 0x40, 0xfb, 0xe6, 0xc1, 0xdc, + 0x8f, 0x92, 0xb5, 0xa8, 0xde, 0xc3, 0xe4, 0xf9, 0xaa, 0xb7, + 0x90, 0x8d, 0x36, 0x2b, 0x0c, 0x11, 0x42, 0x5f, 0x78, 0x65, + 0x94, 0x89, 0xae, 0xb3, 0xe0, 0xfd, 0xda, 0xc7, 0x7c, 0x61, + 0x46, 0x5b, 0x08, 0x15, 0x32, 0x2f, 0x59, 0x44, 0x63, 0x7e, + 0x2d, 0x30, 0x17, 0x0a, 0xb1, 0xac, 0x8b, 0x96, 0xc5, 0xd8, + 0xff, 0xe2, 0x26, 0x3b, 0x1c, 0x01, 0x52, 0x4f, 0x68, 0x75, + 0xce, 0xd3, 0xf4, 0xe9, 0xba, 0xa7, 0x80, 0x9d, 0xeb, 0xf6, + 0xd1, 0xcc, 0x9f, 0x82, 0xa5, 0xb8, 0x03, 0x1e, 0x39, 0x24, + 0x77, 0x6a, 0x4d, 0x50, 0xa1, 0xbc, 0x9b, 0x86, 0xd5, 0xc8, + 0xef, 0xf2, 0x49, 0x54, 0x73, 0x6e, 0x3d, 0x20, 0x07, 0x1a, + 0x6c, 0x71, 0x56, 0x4b, 0x18, 0x05, 0x22, 0x3f, 0x84, 0x99, + 0xbe, 0xa3, 0xf0, 0xed, 0xca, 0xd7, 0x35, 0x28, 0x0f, 0x12, + 0x41, 0x5c, 0x7b, 0x66, 0xdd, 0xc0, 0xe7, 0xfa, 0xa9, 0xb4, + 0x93, 0x8e, 0xf8, 0xe5, 0xc2, 0xdf, 0x8c, 0x91, 0xb6, 0xab, + 0x10, 0x0d, 0x2a, 0x37, 0x64, 0x79, 0x5e, 0x43, 0xb2, 0xaf, + 0x88, 0x95, 0xc6, 0xdb, 0xfc, 0xe1, 0x5a, 0x47, 0x60, 0x7d, + 0x2e, 0x33, 0x14, 0x09, 0x7f, 0x62, 0x45, 0x58, 0x0b, 0x16, + 0x31, 0x2c, 0x97, 0x8a, 0xad, 0xb0, 0xe3, 0xfe, 0xd9, 0xc4, + 0x00, 0x1e, 0x3c, 0x22, 0x78, 0x66, 0x44, 0x5a, 0xf0, 0xee, + 0xcc, 0xd2, 0x88, 0x96, 0xb4, 0xaa, 0xfd, 0xe3, 0xc1, 0xdf, + 0x85, 0x9b, 0xb9, 0xa7, 0x0d, 0x13, 0x31, 0x2f, 0x75, 0x6b, + 0x49, 0x57, 0xe7, 0xf9, 0xdb, 0xc5, 0x9f, 0x81, 0xa3, 0xbd, + 0x17, 0x09, 0x2b, 0x35, 0x6f, 0x71, 0x53, 0x4d, 0x1a, 0x04, + 0x26, 0x38, 0x62, 0x7c, 0x5e, 0x40, 0xea, 0xf4, 0xd6, 0xc8, + 0x92, 0x8c, 0xae, 0xb0, 0xd3, 0xcd, 0xef, 0xf1, 0xab, 0xb5, + 0x97, 0x89, 0x23, 0x3d, 0x1f, 0x01, 0x5b, 0x45, 0x67, 0x79, + 0x2e, 0x30, 0x12, 0x0c, 0x56, 0x48, 0x6a, 0x74, 0xde, 0xc0, + 0xe2, 0xfc, 0xa6, 0xb8, 0x9a, 0x84, 0x34, 0x2a, 0x08, 0x16, + 0x4c, 0x52, 0x70, 0x6e, 0xc4, 0xda, 0xf8, 0xe6, 0xbc, 0xa2, + 0x80, 0x9e, 0xc9, 0xd7, 0xf5, 0xeb, 0xb1, 0xaf, 0x8d, 0x93, + 0x39, 0x27, 0x05, 0x1b, 0x41, 0x5f, 0x7d, 0x63, 0xbb, 0xa5, + 0x87, 0x99, 0xc3, 0xdd, 0xff, 0xe1, 0x4b, 0x55, 0x77, 0x69, + 0x33, 0x2d, 0x0f, 0x11, 0x46, 0x58, 0x7a, 0x64, 0x3e, 0x20, + 0x02, 0x1c, 0xb6, 0xa8, 0x8a, 0x94, 0xce, 0xd0, 0xf2, 0xec, + 0x5c, 0x42, 0x60, 0x7e, 0x24, 0x3a, 0x18, 0x06, 0xac, 0xb2, + 0x90, 0x8e, 0xd4, 0xca, 0xe8, 0xf6, 0xa1, 0xbf, 0x9d, 0x83, + 0xd9, 0xc7, 0xe5, 0xfb, 0x51, 0x4f, 0x6d, 0x73, 0x29, 0x37, + 0x15, 0x0b, 0x68, 0x76, 0x54, 0x4a, 0x10, 0x0e, 0x2c, 0x32, + 0x98, 0x86, 0xa4, 0xba, 0xe0, 0xfe, 0xdc, 0xc2, 0x95, 0x8b, + 0xa9, 0xb7, 0xed, 0xf3, 0xd1, 0xcf, 0x65, 0x7b, 0x59, 0x47, + 0x1d, 0x03, 0x21, 0x3f, 0x8f, 0x91, 0xb3, 0xad, 0xf7, 0xe9, + 0xcb, 0xd5, 0x7f, 0x61, 0x43, 0x5d, 0x07, 0x19, 0x3b, 0x25, + 0x72, 0x6c, 0x4e, 0x50, 0x0a, 0x14, 0x36, 0x28, 0x82, 0x9c, + 0xbe, 0xa0, 0xfa, 0xe4, 0xc6, 0xd8, 0x00, 0x1f, 0x3e, 0x21, + 0x7c, 0x63, 0x42, 0x5d, 0xf8, 0xe7, 0xc6, 0xd9, 0x84, 0x9b, + 0xba, 0xa5, 0xed, 0xf2, 0xd3, 0xcc, 0x91, 0x8e, 0xaf, 0xb0, + 0x15, 0x0a, 0x2b, 0x34, 0x69, 0x76, 0x57, 0x48, 0xc7, 0xd8, + 0xf9, 0xe6, 0xbb, 0xa4, 0x85, 0x9a, 0x3f, 0x20, 0x01, 0x1e, + 0x43, 0x5c, 0x7d, 0x62, 0x2a, 0x35, 0x14, 0x0b, 0x56, 0x49, + 0x68, 0x77, 0xd2, 0xcd, 0xec, 0xf3, 0xae, 0xb1, 0x90, 0x8f, + 0x93, 0x8c, 0xad, 0xb2, 0xef, 0xf0, 0xd1, 0xce, 0x6b, 0x74, + 0x55, 0x4a, 0x17, 0x08, 0x29, 0x36, 0x7e, 0x61, 0x40, 0x5f, + 0x02, 0x1d, 0x3c, 0x23, 0x86, 0x99, 0xb8, 0xa7, 0xfa, 0xe5, + 0xc4, 0xdb, 0x54, 0x4b, 0x6a, 0x75, 0x28, 0x37, 0x16, 0x09, + 0xac, 0xb3, 0x92, 0x8d, 0xd0, 0xcf, 0xee, 0xf1, 0xb9, 0xa6, + 0x87, 0x98, 0xc5, 0xda, 0xfb, 0xe4, 0x41, 0x5e, 0x7f, 0x60, + 0x3d, 0x22, 0x03, 0x1c, 0x3b, 0x24, 0x05, 0x1a, 0x47, 0x58, + 0x79, 0x66, 0xc3, 0xdc, 0xfd, 0xe2, 0xbf, 0xa0, 0x81, 0x9e, + 0xd6, 0xc9, 0xe8, 0xf7, 0xaa, 0xb5, 0x94, 0x8b, 0x2e, 0x31, + 0x10, 0x0f, 0x52, 0x4d, 0x6c, 0x73, 0xfc, 0xe3, 0xc2, 0xdd, + 0x80, 0x9f, 0xbe, 0xa1, 0x04, 0x1b, 0x3a, 0x25, 0x78, 0x67, + 0x46, 0x59, 0x11, 0x0e, 0x2f, 0x30, 0x6d, 0x72, 0x53, 0x4c, + 0xe9, 0xf6, 0xd7, 0xc8, 0x95, 0x8a, 0xab, 0xb4, 0xa8, 0xb7, + 0x96, 0x89, 0xd4, 0xcb, 0xea, 0xf5, 0x50, 0x4f, 0x6e, 0x71, + 0x2c, 0x33, 0x12, 0x0d, 0x45, 0x5a, 0x7b, 0x64, 0x39, 0x26, + 0x07, 0x18, 0xbd, 0xa2, 0x83, 0x9c, 0xc1, 0xde, 0xff, 0xe0, + 0x6f, 0x70, 0x51, 0x4e, 0x13, 0x0c, 0x2d, 0x32, 0x97, 0x88, + 0xa9, 0xb6, 0xeb, 0xf4, 0xd5, 0xca, 0x82, 0x9d, 0xbc, 0xa3, + 0xfe, 0xe1, 0xc0, 0xdf, 0x7a, 0x65, 0x44, 0x5b, 0x06, 0x19, + 0x38, 0x27, 0x00, 0x20, 0x40, 0x60, 0x80, 0xa0, 0xc0, 0xe0, + 0x1d, 0x3d, 0x5d, 0x7d, 0x9d, 0xbd, 0xdd, 0xfd, 0x3a, 0x1a, + 0x7a, 0x5a, 0xba, 0x9a, 0xfa, 0xda, 0x27, 0x07, 0x67, 0x47, + 0xa7, 0x87, 0xe7, 0xc7, 0x74, 0x54, 0x34, 0x14, 0xf4, 0xd4, + 0xb4, 0x94, 0x69, 0x49, 0x29, 0x09, 0xe9, 0xc9, 0xa9, 0x89, + 0x4e, 0x6e, 0x0e, 0x2e, 0xce, 0xee, 0x8e, 0xae, 0x53, 0x73, + 0x13, 0x33, 0xd3, 0xf3, 0x93, 0xb3, 0xe8, 0xc8, 0xa8, 0x88, + 0x68, 0x48, 0x28, 0x08, 0xf5, 0xd5, 0xb5, 0x95, 0x75, 0x55, + 0x35, 0x15, 0xd2, 0xf2, 0x92, 0xb2, 0x52, 0x72, 0x12, 0x32, + 0xcf, 0xef, 0x8f, 0xaf, 0x4f, 0x6f, 0x0f, 0x2f, 0x9c, 0xbc, + 0xdc, 0xfc, 0x1c, 0x3c, 0x5c, 0x7c, 0x81, 0xa1, 0xc1, 0xe1, + 0x01, 0x21, 0x41, 0x61, 0xa6, 0x86, 0xe6, 0xc6, 0x26, 0x06, + 0x66, 0x46, 0xbb, 0x9b, 0xfb, 0xdb, 0x3b, 0x1b, 0x7b, 0x5b, + 0xcd, 0xed, 0x8d, 0xad, 0x4d, 0x6d, 0x0d, 0x2d, 0xd0, 0xf0, + 0x90, 0xb0, 0x50, 0x70, 0x10, 0x30, 0xf7, 0xd7, 0xb7, 0x97, + 0x77, 0x57, 0x37, 0x17, 0xea, 0xca, 0xaa, 0x8a, 0x6a, 0x4a, + 0x2a, 0x0a, 0xb9, 0x99, 0xf9, 0xd9, 0x39, 0x19, 0x79, 0x59, + 0xa4, 0x84, 0xe4, 0xc4, 0x24, 0x04, 0x64, 0x44, 0x83, 0xa3, + 0xc3, 0xe3, 0x03, 0x23, 0x43, 0x63, 0x9e, 0xbe, 0xde, 0xfe, + 0x1e, 0x3e, 0x5e, 0x7e, 0x25, 0x05, 0x65, 0x45, 0xa5, 0x85, + 0xe5, 0xc5, 0x38, 0x18, 0x78, 0x58, 0xb8, 0x98, 0xf8, 0xd8, + 0x1f, 0x3f, 0x5f, 0x7f, 0x9f, 0xbf, 0xdf, 0xff, 0x02, 0x22, + 0x42, 0x62, 0x82, 0xa2, 0xc2, 0xe2, 0x51, 0x71, 0x11, 0x31, + 0xd1, 0xf1, 0x91, 0xb1, 0x4c, 0x6c, 0x0c, 0x2c, 0xcc, 0xec, + 0x8c, 0xac, 0x6b, 0x4b, 0x2b, 0x0b, 0xeb, 0xcb, 0xab, 0x8b, + 0x76, 0x56, 0x36, 0x16, 0xf6, 0xd6, 0xb6, 0x96, 0x00, 0x21, + 0x42, 0x63, 0x84, 0xa5, 0xc6, 0xe7, 0x15, 0x34, 0x57, 0x76, + 0x91, 0xb0, 0xd3, 0xf2, 0x2a, 0x0b, 0x68, 0x49, 0xae, 0x8f, + 0xec, 0xcd, 0x3f, 0x1e, 0x7d, 0x5c, 0xbb, 0x9a, 0xf9, 0xd8, + 0x54, 0x75, 0x16, 0x37, 0xd0, 0xf1, 0x92, 0xb3, 0x41, 0x60, + 0x03, 0x22, 0xc5, 0xe4, 0x87, 0xa6, 0x7e, 0x5f, 0x3c, 0x1d, + 0xfa, 0xdb, 0xb8, 0x99, 0x6b, 0x4a, 0x29, 0x08, 0xef, 0xce, + 0xad, 0x8c, 0xa8, 0x89, 0xea, 0xcb, 0x2c, 0x0d, 0x6e, 0x4f, + 0xbd, 0x9c, 0xff, 0xde, 0x39, 0x18, 0x7b, 0x5a, 0x82, 0xa3, + 0xc0, 0xe1, 0x06, 0x27, 0x44, 0x65, 0x97, 0xb6, 0xd5, 0xf4, + 0x13, 0x32, 0x51, 0x70, 0xfc, 0xdd, 0xbe, 0x9f, 0x78, 0x59, + 0x3a, 0x1b, 0xe9, 0xc8, 0xab, 0x8a, 0x6d, 0x4c, 0x2f, 0x0e, + 0xd6, 0xf7, 0x94, 0xb5, 0x52, 0x73, 0x10, 0x31, 0xc3, 0xe2, + 0x81, 0xa0, 0x47, 0x66, 0x05, 0x24, 0x4d, 0x6c, 0x0f, 0x2e, + 0xc9, 0xe8, 0x8b, 0xaa, 0x58, 0x79, 0x1a, 0x3b, 0xdc, 0xfd, + 0x9e, 0xbf, 0x67, 0x46, 0x25, 0x04, 0xe3, 0xc2, 0xa1, 0x80, + 0x72, 0x53, 0x30, 0x11, 0xf6, 0xd7, 0xb4, 0x95, 0x19, 0x38, + 0x5b, 0x7a, 0x9d, 0xbc, 0xdf, 0xfe, 0x0c, 0x2d, 0x4e, 0x6f, + 0x88, 0xa9, 0xca, 0xeb, 0x33, 0x12, 0x71, 0x50, 0xb7, 0x96, + 0xf5, 0xd4, 0x26, 0x07, 0x64, 0x45, 0xa2, 0x83, 0xe0, 0xc1, + 0xe5, 0xc4, 0xa7, 0x86, 0x61, 0x40, 0x23, 0x02, 0xf0, 0xd1, + 0xb2, 0x93, 0x74, 0x55, 0x36, 0x17, 0xcf, 0xee, 0x8d, 0xac, + 0x4b, 0x6a, 0x09, 0x28, 0xda, 0xfb, 0x98, 0xb9, 0x5e, 0x7f, + 0x1c, 0x3d, 0xb1, 0x90, 0xf3, 0xd2, 0x35, 0x14, 0x77, 0x56, + 0xa4, 0x85, 0xe6, 0xc7, 0x20, 0x01, 0x62, 0x43, 0x9b, 0xba, + 0xd9, 0xf8, 0x1f, 0x3e, 0x5d, 0x7c, 0x8e, 0xaf, 0xcc, 0xed, + 0x0a, 0x2b, 0x48, 0x69, 0x00, 0x22, 0x44, 0x66, 0x88, 0xaa, + 0xcc, 0xee, 0x0d, 0x2f, 0x49, 0x6b, 0x85, 0xa7, 0xc1, 0xe3, + 0x1a, 0x38, 0x5e, 0x7c, 0x92, 0xb0, 0xd6, 0xf4, 0x17, 0x35, + 0x53, 0x71, 0x9f, 0xbd, 0xdb, 0xf9, 0x34, 0x16, 0x70, 0x52, + 0xbc, 0x9e, 0xf8, 0xda, 0x39, 0x1b, 0x7d, 0x5f, 0xb1, 0x93, + 0xf5, 0xd7, 0x2e, 0x0c, 0x6a, 0x48, 0xa6, 0x84, 0xe2, 0xc0, + 0x23, 0x01, 0x67, 0x45, 0xab, 0x89, 0xef, 0xcd, 0x68, 0x4a, + 0x2c, 0x0e, 0xe0, 0xc2, 0xa4, 0x86, 0x65, 0x47, 0x21, 0x03, + 0xed, 0xcf, 0xa9, 0x8b, 0x72, 0x50, 0x36, 0x14, 0xfa, 0xd8, + 0xbe, 0x9c, 0x7f, 0x5d, 0x3b, 0x19, 0xf7, 0xd5, 0xb3, 0x91, + 0x5c, 0x7e, 0x18, 0x3a, 0xd4, 0xf6, 0x90, 0xb2, 0x51, 0x73, + 0x15, 0x37, 0xd9, 0xfb, 0x9d, 0xbf, 0x46, 0x64, 0x02, 0x20, + 0xce, 0xec, 0x8a, 0xa8, 0x4b, 0x69, 0x0f, 0x2d, 0xc3, 0xe1, + 0x87, 0xa5, 0xd0, 0xf2, 0x94, 0xb6, 0x58, 0x7a, 0x1c, 0x3e, + 0xdd, 0xff, 0x99, 0xbb, 0x55, 0x77, 0x11, 0x33, 0xca, 0xe8, + 0x8e, 0xac, 0x42, 0x60, 0x06, 0x24, 0xc7, 0xe5, 0x83, 0xa1, + 0x4f, 0x6d, 0x0b, 0x29, 0xe4, 0xc6, 0xa0, 0x82, 0x6c, 0x4e, + 0x28, 0x0a, 0xe9, 0xcb, 0xad, 0x8f, 0x61, 0x43, 0x25, 0x07, + 0xfe, 0xdc, 0xba, 0x98, 0x76, 0x54, 0x32, 0x10, 0xf3, 0xd1, + 0xb7, 0x95, 0x7b, 0x59, 0x3f, 0x1d, 0xb8, 0x9a, 0xfc, 0xde, + 0x30, 0x12, 0x74, 0x56, 0xb5, 0x97, 0xf1, 0xd3, 0x3d, 0x1f, + 0x79, 0x5b, 0xa2, 0x80, 0xe6, 0xc4, 0x2a, 0x08, 0x6e, 0x4c, + 0xaf, 0x8d, 0xeb, 0xc9, 0x27, 0x05, 0x63, 0x41, 0x8c, 0xae, + 0xc8, 0xea, 0x04, 0x26, 0x40, 0x62, 0x81, 0xa3, 0xc5, 0xe7, + 0x09, 0x2b, 0x4d, 0x6f, 0x96, 0xb4, 0xd2, 0xf0, 0x1e, 0x3c, + 0x5a, 0x78, 0x9b, 0xb9, 0xdf, 0xfd, 0x13, 0x31, 0x57, 0x75, + 0x00, 0x23, 0x46, 0x65, 0x8c, 0xaf, 0xca, 0xe9, 0x05, 0x26, + 0x43, 0x60, 0x89, 0xaa, 0xcf, 0xec, 0x0a, 0x29, 0x4c, 0x6f, + 0x86, 0xa5, 0xc0, 0xe3, 0x0f, 0x2c, 0x49, 0x6a, 0x83, 0xa0, + 0xc5, 0xe6, 0x14, 0x37, 0x52, 0x71, 0x98, 0xbb, 0xde, 0xfd, + 0x11, 0x32, 0x57, 0x74, 0x9d, 0xbe, 0xdb, 0xf8, 0x1e, 0x3d, + 0x58, 0x7b, 0x92, 0xb1, 0xd4, 0xf7, 0x1b, 0x38, 0x5d, 0x7e, + 0x97, 0xb4, 0xd1, 0xf2, 0x28, 0x0b, 0x6e, 0x4d, 0xa4, 0x87, + 0xe2, 0xc1, 0x2d, 0x0e, 0x6b, 0x48, 0xa1, 0x82, 0xe7, 0xc4, + 0x22, 0x01, 0x64, 0x47, 0xae, 0x8d, 0xe8, 0xcb, 0x27, 0x04, + 0x61, 0x42, 0xab, 0x88, 0xed, 0xce, 0x3c, 0x1f, 0x7a, 0x59, + 0xb0, 0x93, 0xf6, 0xd5, 0x39, 0x1a, 0x7f, 0x5c, 0xb5, 0x96, + 0xf3, 0xd0, 0x36, 0x15, 0x70, 0x53, 0xba, 0x99, 0xfc, 0xdf, + 0x33, 0x10, 0x75, 0x56, 0xbf, 0x9c, 0xf9, 0xda, 0x50, 0x73, + 0x16, 0x35, 0xdc, 0xff, 0x9a, 0xb9, 0x55, 0x76, 0x13, 0x30, + 0xd9, 0xfa, 0x9f, 0xbc, 0x5a, 0x79, 0x1c, 0x3f, 0xd6, 0xf5, + 0x90, 0xb3, 0x5f, 0x7c, 0x19, 0x3a, 0xd3, 0xf0, 0x95, 0xb6, + 0x44, 0x67, 0x02, 0x21, 0xc8, 0xeb, 0x8e, 0xad, 0x41, 0x62, + 0x07, 0x24, 0xcd, 0xee, 0x8b, 0xa8, 0x4e, 0x6d, 0x08, 0x2b, + 0xc2, 0xe1, 0x84, 0xa7, 0x4b, 0x68, 0x0d, 0x2e, 0xc7, 0xe4, + 0x81, 0xa2, 0x78, 0x5b, 0x3e, 0x1d, 0xf4, 0xd7, 0xb2, 0x91, + 0x7d, 0x5e, 0x3b, 0x18, 0xf1, 0xd2, 0xb7, 0x94, 0x72, 0x51, + 0x34, 0x17, 0xfe, 0xdd, 0xb8, 0x9b, 0x77, 0x54, 0x31, 0x12, + 0xfb, 0xd8, 0xbd, 0x9e, 0x6c, 0x4f, 0x2a, 0x09, 0xe0, 0xc3, + 0xa6, 0x85, 0x69, 0x4a, 0x2f, 0x0c, 0xe5, 0xc6, 0xa3, 0x80, + 0x66, 0x45, 0x20, 0x03, 0xea, 0xc9, 0xac, 0x8f, 0x63, 0x40, + 0x25, 0x06, 0xef, 0xcc, 0xa9, 0x8a, 0x00, 0x24, 0x48, 0x6c, + 0x90, 0xb4, 0xd8, 0xfc, 0x3d, 0x19, 0x75, 0x51, 0xad, 0x89, + 0xe5, 0xc1, 0x7a, 0x5e, 0x32, 0x16, 0xea, 0xce, 0xa2, 0x86, + 0x47, 0x63, 0x0f, 0x2b, 0xd7, 0xf3, 0x9f, 0xbb, 0xf4, 0xd0, + 0xbc, 0x98, 0x64, 0x40, 0x2c, 0x08, 0xc9, 0xed, 0x81, 0xa5, + 0x59, 0x7d, 0x11, 0x35, 0x8e, 0xaa, 0xc6, 0xe2, 0x1e, 0x3a, + 0x56, 0x72, 0xb3, 0x97, 0xfb, 0xdf, 0x23, 0x07, 0x6b, 0x4f, + 0xf5, 0xd1, 0xbd, 0x99, 0x65, 0x41, 0x2d, 0x09, 0xc8, 0xec, + 0x80, 0xa4, 0x58, 0x7c, 0x10, 0x34, 0x8f, 0xab, 0xc7, 0xe3, + 0x1f, 0x3b, 0x57, 0x73, 0xb2, 0x96, 0xfa, 0xde, 0x22, 0x06, + 0x6a, 0x4e, 0x01, 0x25, 0x49, 0x6d, 0x91, 0xb5, 0xd9, 0xfd, + 0x3c, 0x18, 0x74, 0x50, 0xac, 0x88, 0xe4, 0xc0, 0x7b, 0x5f, + 0x33, 0x17, 0xeb, 0xcf, 0xa3, 0x87, 0x46, 0x62, 0x0e, 0x2a, + 0xd6, 0xf2, 0x9e, 0xba, 0xf7, 0xd3, 0xbf, 0x9b, 0x67, 0x43, + 0x2f, 0x0b, 0xca, 0xee, 0x82, 0xa6, 0x5a, 0x7e, 0x12, 0x36, + 0x8d, 0xa9, 0xc5, 0xe1, 0x1d, 0x39, 0x55, 0x71, 0xb0, 0x94, + 0xf8, 0xdc, 0x20, 0x04, 0x68, 0x4c, 0x03, 0x27, 0x4b, 0x6f, + 0x93, 0xb7, 0xdb, 0xff, 0x3e, 0x1a, 0x76, 0x52, 0xae, 0x8a, + 0xe6, 0xc2, 0x79, 0x5d, 0x31, 0x15, 0xe9, 0xcd, 0xa1, 0x85, + 0x44, 0x60, 0x0c, 0x28, 0xd4, 0xf0, 0x9c, 0xb8, 0x02, 0x26, + 0x4a, 0x6e, 0x92, 0xb6, 0xda, 0xfe, 0x3f, 0x1b, 0x77, 0x53, + 0xaf, 0x8b, 0xe7, 0xc3, 0x78, 0x5c, 0x30, 0x14, 0xe8, 0xcc, + 0xa0, 0x84, 0x45, 0x61, 0x0d, 0x29, 0xd5, 0xf1, 0x9d, 0xb9, + 0xf6, 0xd2, 0xbe, 0x9a, 0x66, 0x42, 0x2e, 0x0a, 0xcb, 0xef, + 0x83, 0xa7, 0x5b, 0x7f, 0x13, 0x37, 0x8c, 0xa8, 0xc4, 0xe0, + 0x1c, 0x38, 0x54, 0x70, 0xb1, 0x95, 0xf9, 0xdd, 0x21, 0x05, + 0x69, 0x4d, 0x00, 0x25, 0x4a, 0x6f, 0x94, 0xb1, 0xde, 0xfb, + 0x35, 0x10, 0x7f, 0x5a, 0xa1, 0x84, 0xeb, 0xce, 0x6a, 0x4f, + 0x20, 0x05, 0xfe, 0xdb, 0xb4, 0x91, 0x5f, 0x7a, 0x15, 0x30, + 0xcb, 0xee, 0x81, 0xa4, 0xd4, 0xf1, 0x9e, 0xbb, 0x40, 0x65, + 0x0a, 0x2f, 0xe1, 0xc4, 0xab, 0x8e, 0x75, 0x50, 0x3f, 0x1a, + 0xbe, 0x9b, 0xf4, 0xd1, 0x2a, 0x0f, 0x60, 0x45, 0x8b, 0xae, + 0xc1, 0xe4, 0x1f, 0x3a, 0x55, 0x70, 0xb5, 0x90, 0xff, 0xda, + 0x21, 0x04, 0x6b, 0x4e, 0x80, 0xa5, 0xca, 0xef, 0x14, 0x31, + 0x5e, 0x7b, 0xdf, 0xfa, 0x95, 0xb0, 0x4b, 0x6e, 0x01, 0x24, + 0xea, 0xcf, 0xa0, 0x85, 0x7e, 0x5b, 0x34, 0x11, 0x61, 0x44, + 0x2b, 0x0e, 0xf5, 0xd0, 0xbf, 0x9a, 0x54, 0x71, 0x1e, 0x3b, + 0xc0, 0xe5, 0x8a, 0xaf, 0x0b, 0x2e, 0x41, 0x64, 0x9f, 0xba, + 0xd5, 0xf0, 0x3e, 0x1b, 0x74, 0x51, 0xaa, 0x8f, 0xe0, 0xc5, + 0x77, 0x52, 0x3d, 0x18, 0xe3, 0xc6, 0xa9, 0x8c, 0x42, 0x67, + 0x08, 0x2d, 0xd6, 0xf3, 0x9c, 0xb9, 0x1d, 0x38, 0x57, 0x72, + 0x89, 0xac, 0xc3, 0xe6, 0x28, 0x0d, 0x62, 0x47, 0xbc, 0x99, + 0xf6, 0xd3, 0xa3, 0x86, 0xe9, 0xcc, 0x37, 0x12, 0x7d, 0x58, + 0x96, 0xb3, 0xdc, 0xf9, 0x02, 0x27, 0x48, 0x6d, 0xc9, 0xec, + 0x83, 0xa6, 0x5d, 0x78, 0x17, 0x32, 0xfc, 0xd9, 0xb6, 0x93, + 0x68, 0x4d, 0x22, 0x07, 0xc2, 0xe7, 0x88, 0xad, 0x56, 0x73, + 0x1c, 0x39, 0xf7, 0xd2, 0xbd, 0x98, 0x63, 0x46, 0x29, 0x0c, + 0xa8, 0x8d, 0xe2, 0xc7, 0x3c, 0x19, 0x76, 0x53, 0x9d, 0xb8, + 0xd7, 0xf2, 0x09, 0x2c, 0x43, 0x66, 0x16, 0x33, 0x5c, 0x79, + 0x82, 0xa7, 0xc8, 0xed, 0x23, 0x06, 0x69, 0x4c, 0xb7, 0x92, + 0xfd, 0xd8, 0x7c, 0x59, 0x36, 0x13, 0xe8, 0xcd, 0xa2, 0x87, + 0x49, 0x6c, 0x03, 0x26, 0xdd, 0xf8, 0x97, 0xb2, 0x00, 0x26, + 0x4c, 0x6a, 0x98, 0xbe, 0xd4, 0xf2, 0x2d, 0x0b, 0x61, 0x47, + 0xb5, 0x93, 0xf9, 0xdf, 0x5a, 0x7c, 0x16, 0x30, 0xc2, 0xe4, + 0x8e, 0xa8, 0x77, 0x51, 0x3b, 0x1d, 0xef, 0xc9, 0xa3, 0x85, + 0xb4, 0x92, 0xf8, 0xde, 0x2c, 0x0a, 0x60, 0x46, 0x99, 0xbf, + 0xd5, 0xf3, 0x01, 0x27, 0x4d, 0x6b, 0xee, 0xc8, 0xa2, 0x84, + 0x76, 0x50, 0x3a, 0x1c, 0xc3, 0xe5, 0x8f, 0xa9, 0x5b, 0x7d, + 0x17, 0x31, 0x75, 0x53, 0x39, 0x1f, 0xed, 0xcb, 0xa1, 0x87, + 0x58, 0x7e, 0x14, 0x32, 0xc0, 0xe6, 0x8c, 0xaa, 0x2f, 0x09, + 0x63, 0x45, 0xb7, 0x91, 0xfb, 0xdd, 0x02, 0x24, 0x4e, 0x68, + 0x9a, 0xbc, 0xd6, 0xf0, 0xc1, 0xe7, 0x8d, 0xab, 0x59, 0x7f, + 0x15, 0x33, 0xec, 0xca, 0xa0, 0x86, 0x74, 0x52, 0x38, 0x1e, + 0x9b, 0xbd, 0xd7, 0xf1, 0x03, 0x25, 0x4f, 0x69, 0xb6, 0x90, + 0xfa, 0xdc, 0x2e, 0x08, 0x62, 0x44, 0xea, 0xcc, 0xa6, 0x80, + 0x72, 0x54, 0x3e, 0x18, 0xc7, 0xe1, 0x8b, 0xad, 0x5f, 0x79, + 0x13, 0x35, 0xb0, 0x96, 0xfc, 0xda, 0x28, 0x0e, 0x64, 0x42, + 0x9d, 0xbb, 0xd1, 0xf7, 0x05, 0x23, 0x49, 0x6f, 0x5e, 0x78, + 0x12, 0x34, 0xc6, 0xe0, 0x8a, 0xac, 0x73, 0x55, 0x3f, 0x19, + 0xeb, 0xcd, 0xa7, 0x81, 0x04, 0x22, 0x48, 0x6e, 0x9c, 0xba, + 0xd0, 0xf6, 0x29, 0x0f, 0x65, 0x43, 0xb1, 0x97, 0xfd, 0xdb, + 0x9f, 0xb9, 0xd3, 0xf5, 0x07, 0x21, 0x4b, 0x6d, 0xb2, 0x94, + 0xfe, 0xd8, 0x2a, 0x0c, 0x66, 0x40, 0xc5, 0xe3, 0x89, 0xaf, + 0x5d, 0x7b, 0x11, 0x37, 0xe8, 0xce, 0xa4, 0x82, 0x70, 0x56, + 0x3c, 0x1a, 0x2b, 0x0d, 0x67, 0x41, 0xb3, 0x95, 0xff, 0xd9, + 0x06, 0x20, 0x4a, 0x6c, 0x9e, 0xb8, 0xd2, 0xf4, 0x71, 0x57, + 0x3d, 0x1b, 0xe9, 0xcf, 0xa5, 0x83, 0x5c, 0x7a, 0x10, 0x36, + 0xc4, 0xe2, 0x88, 0xae, 0x00, 0x27, 0x4e, 0x69, 0x9c, 0xbb, + 0xd2, 0xf5, 0x25, 0x02, 0x6b, 0x4c, 0xb9, 0x9e, 0xf7, 0xd0, + 0x4a, 0x6d, 0x04, 0x23, 0xd6, 0xf1, 0x98, 0xbf, 0x6f, 0x48, + 0x21, 0x06, 0xf3, 0xd4, 0xbd, 0x9a, 0x94, 0xb3, 0xda, 0xfd, + 0x08, 0x2f, 0x46, 0x61, 0xb1, 0x96, 0xff, 0xd8, 0x2d, 0x0a, + 0x63, 0x44, 0xde, 0xf9, 0x90, 0xb7, 0x42, 0x65, 0x0c, 0x2b, + 0xfb, 0xdc, 0xb5, 0x92, 0x67, 0x40, 0x29, 0x0e, 0x35, 0x12, + 0x7b, 0x5c, 0xa9, 0x8e, 0xe7, 0xc0, 0x10, 0x37, 0x5e, 0x79, + 0x8c, 0xab, 0xc2, 0xe5, 0x7f, 0x58, 0x31, 0x16, 0xe3, 0xc4, + 0xad, 0x8a, 0x5a, 0x7d, 0x14, 0x33, 0xc6, 0xe1, 0x88, 0xaf, + 0xa1, 0x86, 0xef, 0xc8, 0x3d, 0x1a, 0x73, 0x54, 0x84, 0xa3, + 0xca, 0xed, 0x18, 0x3f, 0x56, 0x71, 0xeb, 0xcc, 0xa5, 0x82, + 0x77, 0x50, 0x39, 0x1e, 0xce, 0xe9, 0x80, 0xa7, 0x52, 0x75, + 0x1c, 0x3b, 0x6a, 0x4d, 0x24, 0x03, 0xf6, 0xd1, 0xb8, 0x9f, + 0x4f, 0x68, 0x01, 0x26, 0xd3, 0xf4, 0x9d, 0xba, 0x20, 0x07, + 0x6e, 0x49, 0xbc, 0x9b, 0xf2, 0xd5, 0x05, 0x22, 0x4b, 0x6c, + 0x99, 0xbe, 0xd7, 0xf0, 0xfe, 0xd9, 0xb0, 0x97, 0x62, 0x45, + 0x2c, 0x0b, 0xdb, 0xfc, 0x95, 0xb2, 0x47, 0x60, 0x09, 0x2e, + 0xb4, 0x93, 0xfa, 0xdd, 0x28, 0x0f, 0x66, 0x41, 0x91, 0xb6, + 0xdf, 0xf8, 0x0d, 0x2a, 0x43, 0x64, 0x5f, 0x78, 0x11, 0x36, + 0xc3, 0xe4, 0x8d, 0xaa, 0x7a, 0x5d, 0x34, 0x13, 0xe6, 0xc1, + 0xa8, 0x8f, 0x15, 0x32, 0x5b, 0x7c, 0x89, 0xae, 0xc7, 0xe0, + 0x30, 0x17, 0x7e, 0x59, 0xac, 0x8b, 0xe2, 0xc5, 0xcb, 0xec, + 0x85, 0xa2, 0x57, 0x70, 0x19, 0x3e, 0xee, 0xc9, 0xa0, 0x87, + 0x72, 0x55, 0x3c, 0x1b, 0x81, 0xa6, 0xcf, 0xe8, 0x1d, 0x3a, + 0x53, 0x74, 0xa4, 0x83, 0xea, 0xcd, 0x38, 0x1f, 0x76, 0x51, + 0x00, 0x28, 0x50, 0x78, 0xa0, 0x88, 0xf0, 0xd8, 0x5d, 0x75, + 0x0d, 0x25, 0xfd, 0xd5, 0xad, 0x85, 0xba, 0x92, 0xea, 0xc2, + 0x1a, 0x32, 0x4a, 0x62, 0xe7, 0xcf, 0xb7, 0x9f, 0x47, 0x6f, + 0x17, 0x3f, 0x69, 0x41, 0x39, 0x11, 0xc9, 0xe1, 0x99, 0xb1, + 0x34, 0x1c, 0x64, 0x4c, 0x94, 0xbc, 0xc4, 0xec, 0xd3, 0xfb, + 0x83, 0xab, 0x73, 0x5b, 0x23, 0x0b, 0x8e, 0xa6, 0xde, 0xf6, + 0x2e, 0x06, 0x7e, 0x56, 0xd2, 0xfa, 0x82, 0xaa, 0x72, 0x5a, + 0x22, 0x0a, 0x8f, 0xa7, 0xdf, 0xf7, 0x2f, 0x07, 0x7f, 0x57, + 0x68, 0x40, 0x38, 0x10, 0xc8, 0xe0, 0x98, 0xb0, 0x35, 0x1d, + 0x65, 0x4d, 0x95, 0xbd, 0xc5, 0xed, 0xbb, 0x93, 0xeb, 0xc3, + 0x1b, 0x33, 0x4b, 0x63, 0xe6, 0xce, 0xb6, 0x9e, 0x46, 0x6e, + 0x16, 0x3e, 0x01, 0x29, 0x51, 0x79, 0xa1, 0x89, 0xf1, 0xd9, + 0x5c, 0x74, 0x0c, 0x24, 0xfc, 0xd4, 0xac, 0x84, 0xb9, 0x91, + 0xe9, 0xc1, 0x19, 0x31, 0x49, 0x61, 0xe4, 0xcc, 0xb4, 0x9c, + 0x44, 0x6c, 0x14, 0x3c, 0x03, 0x2b, 0x53, 0x7b, 0xa3, 0x8b, + 0xf3, 0xdb, 0x5e, 0x76, 0x0e, 0x26, 0xfe, 0xd6, 0xae, 0x86, + 0xd0, 0xf8, 0x80, 0xa8, 0x70, 0x58, 0x20, 0x08, 0x8d, 0xa5, + 0xdd, 0xf5, 0x2d, 0x05, 0x7d, 0x55, 0x6a, 0x42, 0x3a, 0x12, + 0xca, 0xe2, 0x9a, 0xb2, 0x37, 0x1f, 0x67, 0x4f, 0x97, 0xbf, + 0xc7, 0xef, 0x6b, 0x43, 0x3b, 0x13, 0xcb, 0xe3, 0x9b, 0xb3, + 0x36, 0x1e, 0x66, 0x4e, 0x96, 0xbe, 0xc6, 0xee, 0xd1, 0xf9, + 0x81, 0xa9, 0x71, 0x59, 0x21, 0x09, 0x8c, 0xa4, 0xdc, 0xf4, + 0x2c, 0x04, 0x7c, 0x54, 0x02, 0x2a, 0x52, 0x7a, 0xa2, 0x8a, + 0xf2, 0xda, 0x5f, 0x77, 0x0f, 0x27, 0xff, 0xd7, 0xaf, 0x87, + 0xb8, 0x90, 0xe8, 0xc0, 0x18, 0x30, 0x48, 0x60, 0xe5, 0xcd, + 0xb5, 0x9d, 0x45, 0x6d, 0x15, 0x3d, 0x00, 0x29, 0x52, 0x7b, + 0xa4, 0x8d, 0xf6, 0xdf, 0x55, 0x7c, 0x07, 0x2e, 0xf1, 0xd8, + 0xa3, 0x8a, 0xaa, 0x83, 0xf8, 0xd1, 0x0e, 0x27, 0x5c, 0x75, + 0xff, 0xd6, 0xad, 0x84, 0x5b, 0x72, 0x09, 0x20, 0x49, 0x60, + 0x1b, 0x32, 0xed, 0xc4, 0xbf, 0x96, 0x1c, 0x35, 0x4e, 0x67, + 0xb8, 0x91, 0xea, 0xc3, 0xe3, 0xca, 0xb1, 0x98, 0x47, 0x6e, + 0x15, 0x3c, 0xb6, 0x9f, 0xe4, 0xcd, 0x12, 0x3b, 0x40, 0x69, + 0x92, 0xbb, 0xc0, 0xe9, 0x36, 0x1f, 0x64, 0x4d, 0xc7, 0xee, + 0x95, 0xbc, 0x63, 0x4a, 0x31, 0x18, 0x38, 0x11, 0x6a, 0x43, + 0x9c, 0xb5, 0xce, 0xe7, 0x6d, 0x44, 0x3f, 0x16, 0xc9, 0xe0, + 0x9b, 0xb2, 0xdb, 0xf2, 0x89, 0xa0, 0x7f, 0x56, 0x2d, 0x04, + 0x8e, 0xa7, 0xdc, 0xf5, 0x2a, 0x03, 0x78, 0x51, 0x71, 0x58, + 0x23, 0x0a, 0xd5, 0xfc, 0x87, 0xae, 0x24, 0x0d, 0x76, 0x5f, + 0x80, 0xa9, 0xd2, 0xfb, 0x39, 0x10, 0x6b, 0x42, 0x9d, 0xb4, + 0xcf, 0xe6, 0x6c, 0x45, 0x3e, 0x17, 0xc8, 0xe1, 0x9a, 0xb3, + 0x93, 0xba, 0xc1, 0xe8, 0x37, 0x1e, 0x65, 0x4c, 0xc6, 0xef, + 0x94, 0xbd, 0x62, 0x4b, 0x30, 0x19, 0x70, 0x59, 0x22, 0x0b, + 0xd4, 0xfd, 0x86, 0xaf, 0x25, 0x0c, 0x77, 0x5e, 0x81, 0xa8, + 0xd3, 0xfa, 0xda, 0xf3, 0x88, 0xa1, 0x7e, 0x57, 0x2c, 0x05, + 0x8f, 0xa6, 0xdd, 0xf4, 0x2b, 0x02, 0x79, 0x50, 0xab, 0x82, + 0xf9, 0xd0, 0x0f, 0x26, 0x5d, 0x74, 0xfe, 0xd7, 0xac, 0x85, + 0x5a, 0x73, 0x08, 0x21, 0x01, 0x28, 0x53, 0x7a, 0xa5, 0x8c, + 0xf7, 0xde, 0x54, 0x7d, 0x06, 0x2f, 0xf0, 0xd9, 0xa2, 0x8b, + 0xe2, 0xcb, 0xb0, 0x99, 0x46, 0x6f, 0x14, 0x3d, 0xb7, 0x9e, + 0xe5, 0xcc, 0x13, 0x3a, 0x41, 0x68, 0x48, 0x61, 0x1a, 0x33, + 0xec, 0xc5, 0xbe, 0x97, 0x1d, 0x34, 0x4f, 0x66, 0xb9, 0x90, + 0xeb, 0xc2, 0x00, 0x2a, 0x54, 0x7e, 0xa8, 0x82, 0xfc, 0xd6, + 0x4d, 0x67, 0x19, 0x33, 0xe5, 0xcf, 0xb1, 0x9b, 0x9a, 0xb0, + 0xce, 0xe4, 0x32, 0x18, 0x66, 0x4c, 0xd7, 0xfd, 0x83, 0xa9, + 0x7f, 0x55, 0x2b, 0x01, 0x29, 0x03, 0x7d, 0x57, 0x81, 0xab, + 0xd5, 0xff, 0x64, 0x4e, 0x30, 0x1a, 0xcc, 0xe6, 0x98, 0xb2, + 0xb3, 0x99, 0xe7, 0xcd, 0x1b, 0x31, 0x4f, 0x65, 0xfe, 0xd4, + 0xaa, 0x80, 0x56, 0x7c, 0x02, 0x28, 0x52, 0x78, 0x06, 0x2c, + 0xfa, 0xd0, 0xae, 0x84, 0x1f, 0x35, 0x4b, 0x61, 0xb7, 0x9d, + 0xe3, 0xc9, 0xc8, 0xe2, 0x9c, 0xb6, 0x60, 0x4a, 0x34, 0x1e, + 0x85, 0xaf, 0xd1, 0xfb, 0x2d, 0x07, 0x79, 0x53, 0x7b, 0x51, + 0x2f, 0x05, 0xd3, 0xf9, 0x87, 0xad, 0x36, 0x1c, 0x62, 0x48, + 0x9e, 0xb4, 0xca, 0xe0, 0xe1, 0xcb, 0xb5, 0x9f, 0x49, 0x63, + 0x1d, 0x37, 0xac, 0x86, 0xf8, 0xd2, 0x04, 0x2e, 0x50, 0x7a, + 0xa4, 0x8e, 0xf0, 0xda, 0x0c, 0x26, 0x58, 0x72, 0xe9, 0xc3, + 0xbd, 0x97, 0x41, 0x6b, 0x15, 0x3f, 0x3e, 0x14, 0x6a, 0x40, + 0x96, 0xbc, 0xc2, 0xe8, 0x73, 0x59, 0x27, 0x0d, 0xdb, 0xf1, + 0x8f, 0xa5, 0x8d, 0xa7, 0xd9, 0xf3, 0x25, 0x0f, 0x71, 0x5b, + 0xc0, 0xea, 0x94, 0xbe, 0x68, 0x42, 0x3c, 0x16, 0x17, 0x3d, + 0x43, 0x69, 0xbf, 0x95, 0xeb, 0xc1, 0x5a, 0x70, 0x0e, 0x24, + 0xf2, 0xd8, 0xa6, 0x8c, 0xf6, 0xdc, 0xa2, 0x88, 0x5e, 0x74, + 0x0a, 0x20, 0xbb, 0x91, 0xef, 0xc5, 0x13, 0x39, 0x47, 0x6d, + 0x6c, 0x46, 0x38, 0x12, 0xc4, 0xee, 0x90, 0xba, 0x21, 0x0b, + 0x75, 0x5f, 0x89, 0xa3, 0xdd, 0xf7, 0xdf, 0xf5, 0x8b, 0xa1, + 0x77, 0x5d, 0x23, 0x09, 0x92, 0xb8, 0xc6, 0xec, 0x3a, 0x10, + 0x6e, 0x44, 0x45, 0x6f, 0x11, 0x3b, 0xed, 0xc7, 0xb9, 0x93, + 0x08, 0x22, 0x5c, 0x76, 0xa0, 0x8a, 0xf4, 0xde, 0x00, 0x2b, + 0x56, 0x7d, 0xac, 0x87, 0xfa, 0xd1, 0x45, 0x6e, 0x13, 0x38, + 0xe9, 0xc2, 0xbf, 0x94, 0x8a, 0xa1, 0xdc, 0xf7, 0x26, 0x0d, + 0x70, 0x5b, 0xcf, 0xe4, 0x99, 0xb2, 0x63, 0x48, 0x35, 0x1e, + 0x09, 0x22, 0x5f, 0x74, 0xa5, 0x8e, 0xf3, 0xd8, 0x4c, 0x67, + 0x1a, 0x31, 0xe0, 0xcb, 0xb6, 0x9d, 0x83, 0xa8, 0xd5, 0xfe, + 0x2f, 0x04, 0x79, 0x52, 0xc6, 0xed, 0x90, 0xbb, 0x6a, 0x41, + 0x3c, 0x17, 0x12, 0x39, 0x44, 0x6f, 0xbe, 0x95, 0xe8, 0xc3, + 0x57, 0x7c, 0x01, 0x2a, 0xfb, 0xd0, 0xad, 0x86, 0x98, 0xb3, + 0xce, 0xe5, 0x34, 0x1f, 0x62, 0x49, 0xdd, 0xf6, 0x8b, 0xa0, + 0x71, 0x5a, 0x27, 0x0c, 0x1b, 0x30, 0x4d, 0x66, 0xb7, 0x9c, + 0xe1, 0xca, 0x5e, 0x75, 0x08, 0x23, 0xf2, 0xd9, 0xa4, 0x8f, + 0x91, 0xba, 0xc7, 0xec, 0x3d, 0x16, 0x6b, 0x40, 0xd4, 0xff, + 0x82, 0xa9, 0x78, 0x53, 0x2e, 0x05, 0x24, 0x0f, 0x72, 0x59, + 0x88, 0xa3, 0xde, 0xf5, 0x61, 0x4a, 0x37, 0x1c, 0xcd, 0xe6, + 0x9b, 0xb0, 0xae, 0x85, 0xf8, 0xd3, 0x02, 0x29, 0x54, 0x7f, + 0xeb, 0xc0, 0xbd, 0x96, 0x47, 0x6c, 0x11, 0x3a, 0x2d, 0x06, + 0x7b, 0x50, 0x81, 0xaa, 0xd7, 0xfc, 0x68, 0x43, 0x3e, 0x15, + 0xc4, 0xef, 0x92, 0xb9, 0xa7, 0x8c, 0xf1, 0xda, 0x0b, 0x20, + 0x5d, 0x76, 0xe2, 0xc9, 0xb4, 0x9f, 0x4e, 0x65, 0x18, 0x33, + 0x36, 0x1d, 0x60, 0x4b, 0x9a, 0xb1, 0xcc, 0xe7, 0x73, 0x58, + 0x25, 0x0e, 0xdf, 0xf4, 0x89, 0xa2, 0xbc, 0x97, 0xea, 0xc1, + 0x10, 0x3b, 0x46, 0x6d, 0xf9, 0xd2, 0xaf, 0x84, 0x55, 0x7e, + 0x03, 0x28, 0x3f, 0x14, 0x69, 0x42, 0x93, 0xb8, 0xc5, 0xee, + 0x7a, 0x51, 0x2c, 0x07, 0xd6, 0xfd, 0x80, 0xab, 0xb5, 0x9e, + 0xe3, 0xc8, 0x19, 0x32, 0x4f, 0x64, 0xf0, 0xdb, 0xa6, 0x8d, + 0x5c, 0x77, 0x0a, 0x21, 0x00, 0x2c, 0x58, 0x74, 0xb0, 0x9c, + 0xe8, 0xc4, 0x7d, 0x51, 0x25, 0x09, 0xcd, 0xe1, 0x95, 0xb9, + 0xfa, 0xd6, 0xa2, 0x8e, 0x4a, 0x66, 0x12, 0x3e, 0x87, 0xab, + 0xdf, 0xf3, 0x37, 0x1b, 0x6f, 0x43, 0xe9, 0xc5, 0xb1, 0x9d, + 0x59, 0x75, 0x01, 0x2d, 0x94, 0xb8, 0xcc, 0xe0, 0x24, 0x08, + 0x7c, 0x50, 0x13, 0x3f, 0x4b, 0x67, 0xa3, 0x8f, 0xfb, 0xd7, + 0x6e, 0x42, 0x36, 0x1a, 0xde, 0xf2, 0x86, 0xaa, 0xcf, 0xe3, + 0x97, 0xbb, 0x7f, 0x53, 0x27, 0x0b, 0xb2, 0x9e, 0xea, 0xc6, + 0x02, 0x2e, 0x5a, 0x76, 0x35, 0x19, 0x6d, 0x41, 0x85, 0xa9, + 0xdd, 0xf1, 0x48, 0x64, 0x10, 0x3c, 0xf8, 0xd4, 0xa0, 0x8c, + 0x26, 0x0a, 0x7e, 0x52, 0x96, 0xba, 0xce, 0xe2, 0x5b, 0x77, + 0x03, 0x2f, 0xeb, 0xc7, 0xb3, 0x9f, 0xdc, 0xf0, 0x84, 0xa8, + 0x6c, 0x40, 0x34, 0x18, 0xa1, 0x8d, 0xf9, 0xd5, 0x11, 0x3d, + 0x49, 0x65, 0x83, 0xaf, 0xdb, 0xf7, 0x33, 0x1f, 0x6b, 0x47, + 0xfe, 0xd2, 0xa6, 0x8a, 0x4e, 0x62, 0x16, 0x3a, 0x79, 0x55, + 0x21, 0x0d, 0xc9, 0xe5, 0x91, 0xbd, 0x04, 0x28, 0x5c, 0x70, + 0xb4, 0x98, 0xec, 0xc0, 0x6a, 0x46, 0x32, 0x1e, 0xda, 0xf6, + 0x82, 0xae, 0x17, 0x3b, 0x4f, 0x63, 0xa7, 0x8b, 0xff, 0xd3, + 0x90, 0xbc, 0xc8, 0xe4, 0x20, 0x0c, 0x78, 0x54, 0xed, 0xc1, + 0xb5, 0x99, 0x5d, 0x71, 0x05, 0x29, 0x4c, 0x60, 0x14, 0x38, + 0xfc, 0xd0, 0xa4, 0x88, 0x31, 0x1d, 0x69, 0x45, 0x81, 0xad, + 0xd9, 0xf5, 0xb6, 0x9a, 0xee, 0xc2, 0x06, 0x2a, 0x5e, 0x72, + 0xcb, 0xe7, 0x93, 0xbf, 0x7b, 0x57, 0x23, 0x0f, 0xa5, 0x89, + 0xfd, 0xd1, 0x15, 0x39, 0x4d, 0x61, 0xd8, 0xf4, 0x80, 0xac, + 0x68, 0x44, 0x30, 0x1c, 0x5f, 0x73, 0x07, 0x2b, 0xef, 0xc3, + 0xb7, 0x9b, 0x22, 0x0e, 0x7a, 0x56, 0x92, 0xbe, 0xca, 0xe6, + 0x00, 0x2d, 0x5a, 0x77, 0xb4, 0x99, 0xee, 0xc3, 0x75, 0x58, + 0x2f, 0x02, 0xc1, 0xec, 0x9b, 0xb6, 0xea, 0xc7, 0xb0, 0x9d, + 0x5e, 0x73, 0x04, 0x29, 0x9f, 0xb2, 0xc5, 0xe8, 0x2b, 0x06, + 0x71, 0x5c, 0xc9, 0xe4, 0x93, 0xbe, 0x7d, 0x50, 0x27, 0x0a, + 0xbc, 0x91, 0xe6, 0xcb, 0x08, 0x25, 0x52, 0x7f, 0x23, 0x0e, + 0x79, 0x54, 0x97, 0xba, 0xcd, 0xe0, 0x56, 0x7b, 0x0c, 0x21, + 0xe2, 0xcf, 0xb8, 0x95, 0x8f, 0xa2, 0xd5, 0xf8, 0x3b, 0x16, + 0x61, 0x4c, 0xfa, 0xd7, 0xa0, 0x8d, 0x4e, 0x63, 0x14, 0x39, + 0x65, 0x48, 0x3f, 0x12, 0xd1, 0xfc, 0x8b, 0xa6, 0x10, 0x3d, + 0x4a, 0x67, 0xa4, 0x89, 0xfe, 0xd3, 0x46, 0x6b, 0x1c, 0x31, + 0xf2, 0xdf, 0xa8, 0x85, 0x33, 0x1e, 0x69, 0x44, 0x87, 0xaa, + 0xdd, 0xf0, 0xac, 0x81, 0xf6, 0xdb, 0x18, 0x35, 0x42, 0x6f, + 0xd9, 0xf4, 0x83, 0xae, 0x6d, 0x40, 0x37, 0x1a, 0x03, 0x2e, + 0x59, 0x74, 0xb7, 0x9a, 0xed, 0xc0, 0x76, 0x5b, 0x2c, 0x01, + 0xc2, 0xef, 0x98, 0xb5, 0xe9, 0xc4, 0xb3, 0x9e, 0x5d, 0x70, + 0x07, 0x2a, 0x9c, 0xb1, 0xc6, 0xeb, 0x28, 0x05, 0x72, 0x5f, + 0xca, 0xe7, 0x90, 0xbd, 0x7e, 0x53, 0x24, 0x09, 0xbf, 0x92, + 0xe5, 0xc8, 0x0b, 0x26, 0x51, 0x7c, 0x20, 0x0d, 0x7a, 0x57, + 0x94, 0xb9, 0xce, 0xe3, 0x55, 0x78, 0x0f, 0x22, 0xe1, 0xcc, + 0xbb, 0x96, 0x8c, 0xa1, 0xd6, 0xfb, 0x38, 0x15, 0x62, 0x4f, + 0xf9, 0xd4, 0xa3, 0x8e, 0x4d, 0x60, 0x17, 0x3a, 0x66, 0x4b, + 0x3c, 0x11, 0xd2, 0xff, 0x88, 0xa5, 0x13, 0x3e, 0x49, 0x64, + 0xa7, 0x8a, 0xfd, 0xd0, 0x45, 0x68, 0x1f, 0x32, 0xf1, 0xdc, + 0xab, 0x86, 0x30, 0x1d, 0x6a, 0x47, 0x84, 0xa9, 0xde, 0xf3, + 0xaf, 0x82, 0xf5, 0xd8, 0x1b, 0x36, 0x41, 0x6c, 0xda, 0xf7, + 0x80, 0xad, 0x6e, 0x43, 0x34, 0x19, 0x00, 0x2e, 0x5c, 0x72, + 0xb8, 0x96, 0xe4, 0xca, 0x6d, 0x43, 0x31, 0x1f, 0xd5, 0xfb, + 0x89, 0xa7, 0xda, 0xf4, 0x86, 0xa8, 0x62, 0x4c, 0x3e, 0x10, + 0xb7, 0x99, 0xeb, 0xc5, 0x0f, 0x21, 0x53, 0x7d, 0xa9, 0x87, + 0xf5, 0xdb, 0x11, 0x3f, 0x4d, 0x63, 0xc4, 0xea, 0x98, 0xb6, + 0x7c, 0x52, 0x20, 0x0e, 0x73, 0x5d, 0x2f, 0x01, 0xcb, 0xe5, + 0x97, 0xb9, 0x1e, 0x30, 0x42, 0x6c, 0xa6, 0x88, 0xfa, 0xd4, + 0x4f, 0x61, 0x13, 0x3d, 0xf7, 0xd9, 0xab, 0x85, 0x22, 0x0c, + 0x7e, 0x50, 0x9a, 0xb4, 0xc6, 0xe8, 0x95, 0xbb, 0xc9, 0xe7, + 0x2d, 0x03, 0x71, 0x5f, 0xf8, 0xd6, 0xa4, 0x8a, 0x40, 0x6e, + 0x1c, 0x32, 0xe6, 0xc8, 0xba, 0x94, 0x5e, 0x70, 0x02, 0x2c, + 0x8b, 0xa5, 0xd7, 0xf9, 0x33, 0x1d, 0x6f, 0x41, 0x3c, 0x12, + 0x60, 0x4e, 0x84, 0xaa, 0xd8, 0xf6, 0x51, 0x7f, 0x0d, 0x23, + 0xe9, 0xc7, 0xb5, 0x9b, 0x9e, 0xb0, 0xc2, 0xec, 0x26, 0x08, + 0x7a, 0x54, 0xf3, 0xdd, 0xaf, 0x81, 0x4b, 0x65, 0x17, 0x39, + 0x44, 0x6a, 0x18, 0x36, 0xfc, 0xd2, 0xa0, 0x8e, 0x29, 0x07, + 0x75, 0x5b, 0x91, 0xbf, 0xcd, 0xe3, 0x37, 0x19, 0x6b, 0x45, + 0x8f, 0xa1, 0xd3, 0xfd, 0x5a, 0x74, 0x06, 0x28, 0xe2, 0xcc, + 0xbe, 0x90, 0xed, 0xc3, 0xb1, 0x9f, 0x55, 0x7b, 0x09, 0x27, + 0x80, 0xae, 0xdc, 0xf2, 0x38, 0x16, 0x64, 0x4a, 0xd1, 0xff, + 0x8d, 0xa3, 0x69, 0x47, 0x35, 0x1b, 0xbc, 0x92, 0xe0, 0xce, + 0x04, 0x2a, 0x58, 0x76, 0x0b, 0x25, 0x57, 0x79, 0xb3, 0x9d, + 0xef, 0xc1, 0x66, 0x48, 0x3a, 0x14, 0xde, 0xf0, 0x82, 0xac, + 0x78, 0x56, 0x24, 0x0a, 0xc0, 0xee, 0x9c, 0xb2, 0x15, 0x3b, + 0x49, 0x67, 0xad, 0x83, 0xf1, 0xdf, 0xa2, 0x8c, 0xfe, 0xd0, + 0x1a, 0x34, 0x46, 0x68, 0xcf, 0xe1, 0x93, 0xbd, 0x77, 0x59, + 0x2b, 0x05, 0x00, 0x2f, 0x5e, 0x71, 0xbc, 0x93, 0xe2, 0xcd, + 0x65, 0x4a, 0x3b, 0x14, 0xd9, 0xf6, 0x87, 0xa8, 0xca, 0xe5, + 0x94, 0xbb, 0x76, 0x59, 0x28, 0x07, 0xaf, 0x80, 0xf1, 0xde, + 0x13, 0x3c, 0x4d, 0x62, 0x89, 0xa6, 0xd7, 0xf8, 0x35, 0x1a, + 0x6b, 0x44, 0xec, 0xc3, 0xb2, 0x9d, 0x50, 0x7f, 0x0e, 0x21, + 0x43, 0x6c, 0x1d, 0x32, 0xff, 0xd0, 0xa1, 0x8e, 0x26, 0x09, + 0x78, 0x57, 0x9a, 0xb5, 0xc4, 0xeb, 0x0f, 0x20, 0x51, 0x7e, + 0xb3, 0x9c, 0xed, 0xc2, 0x6a, 0x45, 0x34, 0x1b, 0xd6, 0xf9, + 0x88, 0xa7, 0xc5, 0xea, 0x9b, 0xb4, 0x79, 0x56, 0x27, 0x08, + 0xa0, 0x8f, 0xfe, 0xd1, 0x1c, 0x33, 0x42, 0x6d, 0x86, 0xa9, + 0xd8, 0xf7, 0x3a, 0x15, 0x64, 0x4b, 0xe3, 0xcc, 0xbd, 0x92, + 0x5f, 0x70, 0x01, 0x2e, 0x4c, 0x63, 0x12, 0x3d, 0xf0, 0xdf, + 0xae, 0x81, 0x29, 0x06, 0x77, 0x58, 0x95, 0xba, 0xcb, 0xe4, + 0x1e, 0x31, 0x40, 0x6f, 0xa2, 0x8d, 0xfc, 0xd3, 0x7b, 0x54, + 0x25, 0x0a, 0xc7, 0xe8, 0x99, 0xb6, 0xd4, 0xfb, 0x8a, 0xa5, + 0x68, 0x47, 0x36, 0x19, 0xb1, 0x9e, 0xef, 0xc0, 0x0d, 0x22, + 0x53, 0x7c, 0x97, 0xb8, 0xc9, 0xe6, 0x2b, 0x04, 0x75, 0x5a, + 0xf2, 0xdd, 0xac, 0x83, 0x4e, 0x61, 0x10, 0x3f, 0x5d, 0x72, + 0x03, 0x2c, 0xe1, 0xce, 0xbf, 0x90, 0x38, 0x17, 0x66, 0x49, + 0x84, 0xab, 0xda, 0xf5, 0x11, 0x3e, 0x4f, 0x60, 0xad, 0x82, + 0xf3, 0xdc, 0x74, 0x5b, 0x2a, 0x05, 0xc8, 0xe7, 0x96, 0xb9, + 0xdb, 0xf4, 0x85, 0xaa, 0x67, 0x48, 0x39, 0x16, 0xbe, 0x91, + 0xe0, 0xcf, 0x02, 0x2d, 0x5c, 0x73, 0x98, 0xb7, 0xc6, 0xe9, + 0x24, 0x0b, 0x7a, 0x55, 0xfd, 0xd2, 0xa3, 0x8c, 0x41, 0x6e, + 0x1f, 0x30, 0x52, 0x7d, 0x0c, 0x23, 0xee, 0xc1, 0xb0, 0x9f, + 0x37, 0x18, 0x69, 0x46, 0x8b, 0xa4, 0xd5, 0xfa, 0x00, 0x30, + 0x60, 0x50, 0xc0, 0xf0, 0xa0, 0x90, 0x9d, 0xad, 0xfd, 0xcd, + 0x5d, 0x6d, 0x3d, 0x0d, 0x27, 0x17, 0x47, 0x77, 0xe7, 0xd7, + 0x87, 0xb7, 0xba, 0x8a, 0xda, 0xea, 0x7a, 0x4a, 0x1a, 0x2a, + 0x4e, 0x7e, 0x2e, 0x1e, 0x8e, 0xbe, 0xee, 0xde, 0xd3, 0xe3, + 0xb3, 0x83, 0x13, 0x23, 0x73, 0x43, 0x69, 0x59, 0x09, 0x39, + 0xa9, 0x99, 0xc9, 0xf9, 0xf4, 0xc4, 0x94, 0xa4, 0x34, 0x04, + 0x54, 0x64, 0x9c, 0xac, 0xfc, 0xcc, 0x5c, 0x6c, 0x3c, 0x0c, + 0x01, 0x31, 0x61, 0x51, 0xc1, 0xf1, 0xa1, 0x91, 0xbb, 0x8b, + 0xdb, 0xeb, 0x7b, 0x4b, 0x1b, 0x2b, 0x26, 0x16, 0x46, 0x76, + 0xe6, 0xd6, 0x86, 0xb6, 0xd2, 0xe2, 0xb2, 0x82, 0x12, 0x22, + 0x72, 0x42, 0x4f, 0x7f, 0x2f, 0x1f, 0x8f, 0xbf, 0xef, 0xdf, + 0xf5, 0xc5, 0x95, 0xa5, 0x35, 0x05, 0x55, 0x65, 0x68, 0x58, + 0x08, 0x38, 0xa8, 0x98, 0xc8, 0xf8, 0x25, 0x15, 0x45, 0x75, + 0xe5, 0xd5, 0x85, 0xb5, 0xb8, 0x88, 0xd8, 0xe8, 0x78, 0x48, + 0x18, 0x28, 0x02, 0x32, 0x62, 0x52, 0xc2, 0xf2, 0xa2, 0x92, + 0x9f, 0xaf, 0xff, 0xcf, 0x5f, 0x6f, 0x3f, 0x0f, 0x6b, 0x5b, + 0x0b, 0x3b, 0xab, 0x9b, 0xcb, 0xfb, 0xf6, 0xc6, 0x96, 0xa6, + 0x36, 0x06, 0x56, 0x66, 0x4c, 0x7c, 0x2c, 0x1c, 0x8c, 0xbc, + 0xec, 0xdc, 0xd1, 0xe1, 0xb1, 0x81, 0x11, 0x21, 0x71, 0x41, + 0xb9, 0x89, 0xd9, 0xe9, 0x79, 0x49, 0x19, 0x29, 0x24, 0x14, + 0x44, 0x74, 0xe4, 0xd4, 0x84, 0xb4, 0x9e, 0xae, 0xfe, 0xce, + 0x5e, 0x6e, 0x3e, 0x0e, 0x03, 0x33, 0x63, 0x53, 0xc3, 0xf3, + 0xa3, 0x93, 0xf7, 0xc7, 0x97, 0xa7, 0x37, 0x07, 0x57, 0x67, + 0x6a, 0x5a, 0x0a, 0x3a, 0xaa, 0x9a, 0xca, 0xfa, 0xd0, 0xe0, + 0xb0, 0x80, 0x10, 0x20, 0x70, 0x40, 0x4d, 0x7d, 0x2d, 0x1d, + 0x8d, 0xbd, 0xed, 0xdd, 0x00, 0x31, 0x62, 0x53, 0xc4, 0xf5, + 0xa6, 0x97, 0x95, 0xa4, 0xf7, 0xc6, 0x51, 0x60, 0x33, 0x02, + 0x37, 0x06, 0x55, 0x64, 0xf3, 0xc2, 0x91, 0xa0, 0xa2, 0x93, + 0xc0, 0xf1, 0x66, 0x57, 0x04, 0x35, 0x6e, 0x5f, 0x0c, 0x3d, + 0xaa, 0x9b, 0xc8, 0xf9, 0xfb, 0xca, 0x99, 0xa8, 0x3f, 0x0e, + 0x5d, 0x6c, 0x59, 0x68, 0x3b, 0x0a, 0x9d, 0xac, 0xff, 0xce, + 0xcc, 0xfd, 0xae, 0x9f, 0x08, 0x39, 0x6a, 0x5b, 0xdc, 0xed, + 0xbe, 0x8f, 0x18, 0x29, 0x7a, 0x4b, 0x49, 0x78, 0x2b, 0x1a, + 0x8d, 0xbc, 0xef, 0xde, 0xeb, 0xda, 0x89, 0xb8, 0x2f, 0x1e, + 0x4d, 0x7c, 0x7e, 0x4f, 0x1c, 0x2d, 0xba, 0x8b, 0xd8, 0xe9, + 0xb2, 0x83, 0xd0, 0xe1, 0x76, 0x47, 0x14, 0x25, 0x27, 0x16, + 0x45, 0x74, 0xe3, 0xd2, 0x81, 0xb0, 0x85, 0xb4, 0xe7, 0xd6, + 0x41, 0x70, 0x23, 0x12, 0x10, 0x21, 0x72, 0x43, 0xd4, 0xe5, + 0xb6, 0x87, 0xa5, 0x94, 0xc7, 0xf6, 0x61, 0x50, 0x03, 0x32, + 0x30, 0x01, 0x52, 0x63, 0xf4, 0xc5, 0x96, 0xa7, 0x92, 0xa3, + 0xf0, 0xc1, 0x56, 0x67, 0x34, 0x05, 0x07, 0x36, 0x65, 0x54, + 0xc3, 0xf2, 0xa1, 0x90, 0xcb, 0xfa, 0xa9, 0x98, 0x0f, 0x3e, + 0x6d, 0x5c, 0x5e, 0x6f, 0x3c, 0x0d, 0x9a, 0xab, 0xf8, 0xc9, + 0xfc, 0xcd, 0x9e, 0xaf, 0x38, 0x09, 0x5a, 0x6b, 0x69, 0x58, + 0x0b, 0x3a, 0xad, 0x9c, 0xcf, 0xfe, 0x79, 0x48, 0x1b, 0x2a, + 0xbd, 0x8c, 0xdf, 0xee, 0xec, 0xdd, 0x8e, 0xbf, 0x28, 0x19, + 0x4a, 0x7b, 0x4e, 0x7f, 0x2c, 0x1d, 0x8a, 0xbb, 0xe8, 0xd9, + 0xdb, 0xea, 0xb9, 0x88, 0x1f, 0x2e, 0x7d, 0x4c, 0x17, 0x26, + 0x75, 0x44, 0xd3, 0xe2, 0xb1, 0x80, 0x82, 0xb3, 0xe0, 0xd1, + 0x46, 0x77, 0x24, 0x15, 0x20, 0x11, 0x42, 0x73, 0xe4, 0xd5, + 0x86, 0xb7, 0xb5, 0x84, 0xd7, 0xe6, 0x71, 0x40, 0x13, 0x22, + 0x00, 0x32, 0x64, 0x56, 0xc8, 0xfa, 0xac, 0x9e, 0x8d, 0xbf, + 0xe9, 0xdb, 0x45, 0x77, 0x21, 0x13, 0x07, 0x35, 0x63, 0x51, + 0xcf, 0xfd, 0xab, 0x99, 0x8a, 0xb8, 0xee, 0xdc, 0x42, 0x70, + 0x26, 0x14, 0x0e, 0x3c, 0x6a, 0x58, 0xc6, 0xf4, 0xa2, 0x90, + 0x83, 0xb1, 0xe7, 0xd5, 0x4b, 0x79, 0x2f, 0x1d, 0x09, 0x3b, + 0x6d, 0x5f, 0xc1, 0xf3, 0xa5, 0x97, 0x84, 0xb6, 0xe0, 0xd2, + 0x4c, 0x7e, 0x28, 0x1a, 0x1c, 0x2e, 0x78, 0x4a, 0xd4, 0xe6, + 0xb0, 0x82, 0x91, 0xa3, 0xf5, 0xc7, 0x59, 0x6b, 0x3d, 0x0f, + 0x1b, 0x29, 0x7f, 0x4d, 0xd3, 0xe1, 0xb7, 0x85, 0x96, 0xa4, + 0xf2, 0xc0, 0x5e, 0x6c, 0x3a, 0x08, 0x12, 0x20, 0x76, 0x44, + 0xda, 0xe8, 0xbe, 0x8c, 0x9f, 0xad, 0xfb, 0xc9, 0x57, 0x65, + 0x33, 0x01, 0x15, 0x27, 0x71, 0x43, 0xdd, 0xef, 0xb9, 0x8b, + 0x98, 0xaa, 0xfc, 0xce, 0x50, 0x62, 0x34, 0x06, 0x38, 0x0a, + 0x5c, 0x6e, 0xf0, 0xc2, 0x94, 0xa6, 0xb5, 0x87, 0xd1, 0xe3, + 0x7d, 0x4f, 0x19, 0x2b, 0x3f, 0x0d, 0x5b, 0x69, 0xf7, 0xc5, + 0x93, 0xa1, 0xb2, 0x80, 0xd6, 0xe4, 0x7a, 0x48, 0x1e, 0x2c, + 0x36, 0x04, 0x52, 0x60, 0xfe, 0xcc, 0x9a, 0xa8, 0xbb, 0x89, + 0xdf, 0xed, 0x73, 0x41, 0x17, 0x25, 0x31, 0x03, 0x55, 0x67, + 0xf9, 0xcb, 0x9d, 0xaf, 0xbc, 0x8e, 0xd8, 0xea, 0x74, 0x46, + 0x10, 0x22, 0x24, 0x16, 0x40, 0x72, 0xec, 0xde, 0x88, 0xba, + 0xa9, 0x9b, 0xcd, 0xff, 0x61, 0x53, 0x05, 0x37, 0x23, 0x11, + 0x47, 0x75, 0xeb, 0xd9, 0x8f, 0xbd, 0xae, 0x9c, 0xca, 0xf8, + 0x66, 0x54, 0x02, 0x30, 0x2a, 0x18, 0x4e, 0x7c, 0xe2, 0xd0, + 0x86, 0xb4, 0xa7, 0x95, 0xc3, 0xf1, 0x6f, 0x5d, 0x0b, 0x39, + 0x2d, 0x1f, 0x49, 0x7b, 0xe5, 0xd7, 0x81, 0xb3, 0xa0, 0x92, + 0xc4, 0xf6, 0x68, 0x5a, 0x0c, 0x3e, 0x00, 0x33, 0x66, 0x55, + 0xcc, 0xff, 0xaa, 0x99, 0x85, 0xb6, 0xe3, 0xd0, 0x49, 0x7a, + 0x2f, 0x1c, 0x17, 0x24, 0x71, 0x42, 0xdb, 0xe8, 0xbd, 0x8e, + 0x92, 0xa1, 0xf4, 0xc7, 0x5e, 0x6d, 0x38, 0x0b, 0x2e, 0x1d, + 0x48, 0x7b, 0xe2, 0xd1, 0x84, 0xb7, 0xab, 0x98, 0xcd, 0xfe, + 0x67, 0x54, 0x01, 0x32, 0x39, 0x0a, 0x5f, 0x6c, 0xf5, 0xc6, + 0x93, 0xa0, 0xbc, 0x8f, 0xda, 0xe9, 0x70, 0x43, 0x16, 0x25, + 0x5c, 0x6f, 0x3a, 0x09, 0x90, 0xa3, 0xf6, 0xc5, 0xd9, 0xea, + 0xbf, 0x8c, 0x15, 0x26, 0x73, 0x40, 0x4b, 0x78, 0x2d, 0x1e, + 0x87, 0xb4, 0xe1, 0xd2, 0xce, 0xfd, 0xa8, 0x9b, 0x02, 0x31, + 0x64, 0x57, 0x72, 0x41, 0x14, 0x27, 0xbe, 0x8d, 0xd8, 0xeb, + 0xf7, 0xc4, 0x91, 0xa2, 0x3b, 0x08, 0x5d, 0x6e, 0x65, 0x56, + 0x03, 0x30, 0xa9, 0x9a, 0xcf, 0xfc, 0xe0, 0xd3, 0x86, 0xb5, + 0x2c, 0x1f, 0x4a, 0x79, 0xb8, 0x8b, 0xde, 0xed, 0x74, 0x47, + 0x12, 0x21, 0x3d, 0x0e, 0x5b, 0x68, 0xf1, 0xc2, 0x97, 0xa4, + 0xaf, 0x9c, 0xc9, 0xfa, 0x63, 0x50, 0x05, 0x36, 0x2a, 0x19, + 0x4c, 0x7f, 0xe6, 0xd5, 0x80, 0xb3, 0x96, 0xa5, 0xf0, 0xc3, + 0x5a, 0x69, 0x3c, 0x0f, 0x13, 0x20, 0x75, 0x46, 0xdf, 0xec, + 0xb9, 0x8a, 0x81, 0xb2, 0xe7, 0xd4, 0x4d, 0x7e, 0x2b, 0x18, + 0x04, 0x37, 0x62, 0x51, 0xc8, 0xfb, 0xae, 0x9d, 0xe4, 0xd7, + 0x82, 0xb1, 0x28, 0x1b, 0x4e, 0x7d, 0x61, 0x52, 0x07, 0x34, + 0xad, 0x9e, 0xcb, 0xf8, 0xf3, 0xc0, 0x95, 0xa6, 0x3f, 0x0c, + 0x59, 0x6a, 0x76, 0x45, 0x10, 0x23, 0xba, 0x89, 0xdc, 0xef, + 0xca, 0xf9, 0xac, 0x9f, 0x06, 0x35, 0x60, 0x53, 0x4f, 0x7c, + 0x29, 0x1a, 0x83, 0xb0, 0xe5, 0xd6, 0xdd, 0xee, 0xbb, 0x88, + 0x11, 0x22, 0x77, 0x44, 0x58, 0x6b, 0x3e, 0x0d, 0x94, 0xa7, + 0xf2, 0xc1, 0x00, 0x34, 0x68, 0x5c, 0xd0, 0xe4, 0xb8, 0x8c, + 0xbd, 0x89, 0xd5, 0xe1, 0x6d, 0x59, 0x05, 0x31, 0x67, 0x53, + 0x0f, 0x3b, 0xb7, 0x83, 0xdf, 0xeb, 0xda, 0xee, 0xb2, 0x86, + 0x0a, 0x3e, 0x62, 0x56, 0xce, 0xfa, 0xa6, 0x92, 0x1e, 0x2a, + 0x76, 0x42, 0x73, 0x47, 0x1b, 0x2f, 0xa3, 0x97, 0xcb, 0xff, + 0xa9, 0x9d, 0xc1, 0xf5, 0x79, 0x4d, 0x11, 0x25, 0x14, 0x20, + 0x7c, 0x48, 0xc4, 0xf0, 0xac, 0x98, 0x81, 0xb5, 0xe9, 0xdd, + 0x51, 0x65, 0x39, 0x0d, 0x3c, 0x08, 0x54, 0x60, 0xec, 0xd8, + 0x84, 0xb0, 0xe6, 0xd2, 0x8e, 0xba, 0x36, 0x02, 0x5e, 0x6a, + 0x5b, 0x6f, 0x33, 0x07, 0x8b, 0xbf, 0xe3, 0xd7, 0x4f, 0x7b, + 0x27, 0x13, 0x9f, 0xab, 0xf7, 0xc3, 0xf2, 0xc6, 0x9a, 0xae, + 0x22, 0x16, 0x4a, 0x7e, 0x28, 0x1c, 0x40, 0x74, 0xf8, 0xcc, + 0x90, 0xa4, 0x95, 0xa1, 0xfd, 0xc9, 0x45, 0x71, 0x2d, 0x19, + 0x1f, 0x2b, 0x77, 0x43, 0xcf, 0xfb, 0xa7, 0x93, 0xa2, 0x96, + 0xca, 0xfe, 0x72, 0x46, 0x1a, 0x2e, 0x78, 0x4c, 0x10, 0x24, + 0xa8, 0x9c, 0xc0, 0xf4, 0xc5, 0xf1, 0xad, 0x99, 0x15, 0x21, + 0x7d, 0x49, 0xd1, 0xe5, 0xb9, 0x8d, 0x01, 0x35, 0x69, 0x5d, + 0x6c, 0x58, 0x04, 0x30, 0xbc, 0x88, 0xd4, 0xe0, 0xb6, 0x82, + 0xde, 0xea, 0x66, 0x52, 0x0e, 0x3a, 0x0b, 0x3f, 0x63, 0x57, + 0xdb, 0xef, 0xb3, 0x87, 0x9e, 0xaa, 0xf6, 0xc2, 0x4e, 0x7a, + 0x26, 0x12, 0x23, 0x17, 0x4b, 0x7f, 0xf3, 0xc7, 0x9b, 0xaf, + 0xf9, 0xcd, 0x91, 0xa5, 0x29, 0x1d, 0x41, 0x75, 0x44, 0x70, + 0x2c, 0x18, 0x94, 0xa0, 0xfc, 0xc8, 0x50, 0x64, 0x38, 0x0c, + 0x80, 0xb4, 0xe8, 0xdc, 0xed, 0xd9, 0x85, 0xb1, 0x3d, 0x09, + 0x55, 0x61, 0x37, 0x03, 0x5f, 0x6b, 0xe7, 0xd3, 0x8f, 0xbb, + 0x8a, 0xbe, 0xe2, 0xd6, 0x5a, 0x6e, 0x32, 0x06, 0x00, 0x35, + 0x6a, 0x5f, 0xd4, 0xe1, 0xbe, 0x8b, 0xb5, 0x80, 0xdf, 0xea, + 0x61, 0x54, 0x0b, 0x3e, 0x77, 0x42, 0x1d, 0x28, 0xa3, 0x96, + 0xc9, 0xfc, 0xc2, 0xf7, 0xa8, 0x9d, 0x16, 0x23, 0x7c, 0x49, + 0xee, 0xdb, 0x84, 0xb1, 0x3a, 0x0f, 0x50, 0x65, 0x5b, 0x6e, + 0x31, 0x04, 0x8f, 0xba, 0xe5, 0xd0, 0x99, 0xac, 0xf3, 0xc6, + 0x4d, 0x78, 0x27, 0x12, 0x2c, 0x19, 0x46, 0x73, 0xf8, 0xcd, + 0x92, 0xa7, 0xc1, 0xf4, 0xab, 0x9e, 0x15, 0x20, 0x7f, 0x4a, + 0x74, 0x41, 0x1e, 0x2b, 0xa0, 0x95, 0xca, 0xff, 0xb6, 0x83, + 0xdc, 0xe9, 0x62, 0x57, 0x08, 0x3d, 0x03, 0x36, 0x69, 0x5c, + 0xd7, 0xe2, 0xbd, 0x88, 0x2f, 0x1a, 0x45, 0x70, 0xfb, 0xce, + 0x91, 0xa4, 0x9a, 0xaf, 0xf0, 0xc5, 0x4e, 0x7b, 0x24, 0x11, + 0x58, 0x6d, 0x32, 0x07, 0x8c, 0xb9, 0xe6, 0xd3, 0xed, 0xd8, + 0x87, 0xb2, 0x39, 0x0c, 0x53, 0x66, 0x9f, 0xaa, 0xf5, 0xc0, + 0x4b, 0x7e, 0x21, 0x14, 0x2a, 0x1f, 0x40, 0x75, 0xfe, 0xcb, + 0x94, 0xa1, 0xe8, 0xdd, 0x82, 0xb7, 0x3c, 0x09, 0x56, 0x63, + 0x5d, 0x68, 0x37, 0x02, 0x89, 0xbc, 0xe3, 0xd6, 0x71, 0x44, + 0x1b, 0x2e, 0xa5, 0x90, 0xcf, 0xfa, 0xc4, 0xf1, 0xae, 0x9b, + 0x10, 0x25, 0x7a, 0x4f, 0x06, 0x33, 0x6c, 0x59, 0xd2, 0xe7, + 0xb8, 0x8d, 0xb3, 0x86, 0xd9, 0xec, 0x67, 0x52, 0x0d, 0x38, + 0x5e, 0x6b, 0x34, 0x01, 0x8a, 0xbf, 0xe0, 0xd5, 0xeb, 0xde, + 0x81, 0xb4, 0x3f, 0x0a, 0x55, 0x60, 0x29, 0x1c, 0x43, 0x76, + 0xfd, 0xc8, 0x97, 0xa2, 0x9c, 0xa9, 0xf6, 0xc3, 0x48, 0x7d, + 0x22, 0x17, 0xb0, 0x85, 0xda, 0xef, 0x64, 0x51, 0x0e, 0x3b, + 0x05, 0x30, 0x6f, 0x5a, 0xd1, 0xe4, 0xbb, 0x8e, 0xc7, 0xf2, + 0xad, 0x98, 0x13, 0x26, 0x79, 0x4c, 0x72, 0x47, 0x18, 0x2d, + 0xa6, 0x93, 0xcc, 0xf9, 0x00, 0x36, 0x6c, 0x5a, 0xd8, 0xee, + 0xb4, 0x82, 0xad, 0x9b, 0xc1, 0xf7, 0x75, 0x43, 0x19, 0x2f, + 0x47, 0x71, 0x2b, 0x1d, 0x9f, 0xa9, 0xf3, 0xc5, 0xea, 0xdc, + 0x86, 0xb0, 0x32, 0x04, 0x5e, 0x68, 0x8e, 0xb8, 0xe2, 0xd4, + 0x56, 0x60, 0x3a, 0x0c, 0x23, 0x15, 0x4f, 0x79, 0xfb, 0xcd, + 0x97, 0xa1, 0xc9, 0xff, 0xa5, 0x93, 0x11, 0x27, 0x7d, 0x4b, + 0x64, 0x52, 0x08, 0x3e, 0xbc, 0x8a, 0xd0, 0xe6, 0x01, 0x37, + 0x6d, 0x5b, 0xd9, 0xef, 0xb5, 0x83, 0xac, 0x9a, 0xc0, 0xf6, + 0x74, 0x42, 0x18, 0x2e, 0x46, 0x70, 0x2a, 0x1c, 0x9e, 0xa8, + 0xf2, 0xc4, 0xeb, 0xdd, 0x87, 0xb1, 0x33, 0x05, 0x5f, 0x69, + 0x8f, 0xb9, 0xe3, 0xd5, 0x57, 0x61, 0x3b, 0x0d, 0x22, 0x14, + 0x4e, 0x78, 0xfa, 0xcc, 0x96, 0xa0, 0xc8, 0xfe, 0xa4, 0x92, + 0x10, 0x26, 0x7c, 0x4a, 0x65, 0x53, 0x09, 0x3f, 0xbd, 0x8b, + 0xd1, 0xe7, 0x02, 0x34, 0x6e, 0x58, 0xda, 0xec, 0xb6, 0x80, + 0xaf, 0x99, 0xc3, 0xf5, 0x77, 0x41, 0x1b, 0x2d, 0x45, 0x73, + 0x29, 0x1f, 0x9d, 0xab, 0xf1, 0xc7, 0xe8, 0xde, 0x84, 0xb2, + 0x30, 0x06, 0x5c, 0x6a, 0x8c, 0xba, 0xe0, 0xd6, 0x54, 0x62, + 0x38, 0x0e, 0x21, 0x17, 0x4d, 0x7b, 0xf9, 0xcf, 0x95, 0xa3, + 0xcb, 0xfd, 0xa7, 0x91, 0x13, 0x25, 0x7f, 0x49, 0x66, 0x50, + 0x0a, 0x3c, 0xbe, 0x88, 0xd2, 0xe4, 0x03, 0x35, 0x6f, 0x59, + 0xdb, 0xed, 0xb7, 0x81, 0xae, 0x98, 0xc2, 0xf4, 0x76, 0x40, + 0x1a, 0x2c, 0x44, 0x72, 0x28, 0x1e, 0x9c, 0xaa, 0xf0, 0xc6, + 0xe9, 0xdf, 0x85, 0xb3, 0x31, 0x07, 0x5d, 0x6b, 0x8d, 0xbb, + 0xe1, 0xd7, 0x55, 0x63, 0x39, 0x0f, 0x20, 0x16, 0x4c, 0x7a, + 0xf8, 0xce, 0x94, 0xa2, 0xca, 0xfc, 0xa6, 0x90, 0x12, 0x24, + 0x7e, 0x48, 0x67, 0x51, 0x0b, 0x3d, 0xbf, 0x89, 0xd3, 0xe5, + 0x00, 0x37, 0x6e, 0x59, 0xdc, 0xeb, 0xb2, 0x85, 0xa5, 0x92, + 0xcb, 0xfc, 0x79, 0x4e, 0x17, 0x20, 0x57, 0x60, 0x39, 0x0e, + 0x8b, 0xbc, 0xe5, 0xd2, 0xf2, 0xc5, 0x9c, 0xab, 0x2e, 0x19, + 0x40, 0x77, 0xae, 0x99, 0xc0, 0xf7, 0x72, 0x45, 0x1c, 0x2b, + 0x0b, 0x3c, 0x65, 0x52, 0xd7, 0xe0, 0xb9, 0x8e, 0xf9, 0xce, + 0x97, 0xa0, 0x25, 0x12, 0x4b, 0x7c, 0x5c, 0x6b, 0x32, 0x05, + 0x80, 0xb7, 0xee, 0xd9, 0x41, 0x76, 0x2f, 0x18, 0x9d, 0xaa, + 0xf3, 0xc4, 0xe4, 0xd3, 0x8a, 0xbd, 0x38, 0x0f, 0x56, 0x61, + 0x16, 0x21, 0x78, 0x4f, 0xca, 0xfd, 0xa4, 0x93, 0xb3, 0x84, + 0xdd, 0xea, 0x6f, 0x58, 0x01, 0x36, 0xef, 0xd8, 0x81, 0xb6, + 0x33, 0x04, 0x5d, 0x6a, 0x4a, 0x7d, 0x24, 0x13, 0x96, 0xa1, + 0xf8, 0xcf, 0xb8, 0x8f, 0xd6, 0xe1, 0x64, 0x53, 0x0a, 0x3d, + 0x1d, 0x2a, 0x73, 0x44, 0xc1, 0xf6, 0xaf, 0x98, 0x82, 0xb5, + 0xec, 0xdb, 0x5e, 0x69, 0x30, 0x07, 0x27, 0x10, 0x49, 0x7e, + 0xfb, 0xcc, 0x95, 0xa2, 0xd5, 0xe2, 0xbb, 0x8c, 0x09, 0x3e, + 0x67, 0x50, 0x70, 0x47, 0x1e, 0x29, 0xac, 0x9b, 0xc2, 0xf5, + 0x2c, 0x1b, 0x42, 0x75, 0xf0, 0xc7, 0x9e, 0xa9, 0x89, 0xbe, + 0xe7, 0xd0, 0x55, 0x62, 0x3b, 0x0c, 0x7b, 0x4c, 0x15, 0x22, + 0xa7, 0x90, 0xc9, 0xfe, 0xde, 0xe9, 0xb0, 0x87, 0x02, 0x35, + 0x6c, 0x5b, 0xc3, 0xf4, 0xad, 0x9a, 0x1f, 0x28, 0x71, 0x46, + 0x66, 0x51, 0x08, 0x3f, 0xba, 0x8d, 0xd4, 0xe3, 0x94, 0xa3, + 0xfa, 0xcd, 0x48, 0x7f, 0x26, 0x11, 0x31, 0x06, 0x5f, 0x68, + 0xed, 0xda, 0x83, 0xb4, 0x6d, 0x5a, 0x03, 0x34, 0xb1, 0x86, + 0xdf, 0xe8, 0xc8, 0xff, 0xa6, 0x91, 0x14, 0x23, 0x7a, 0x4d, + 0x3a, 0x0d, 0x54, 0x63, 0xe6, 0xd1, 0x88, 0xbf, 0x9f, 0xa8, + 0xf1, 0xc6, 0x43, 0x74, 0x2d, 0x1a, 0x00, 0x38, 0x70, 0x48, + 0xe0, 0xd8, 0x90, 0xa8, 0xdd, 0xe5, 0xad, 0x95, 0x3d, 0x05, + 0x4d, 0x75, 0xa7, 0x9f, 0xd7, 0xef, 0x47, 0x7f, 0x37, 0x0f, + 0x7a, 0x42, 0x0a, 0x32, 0x9a, 0xa2, 0xea, 0xd2, 0x53, 0x6b, + 0x23, 0x1b, 0xb3, 0x8b, 0xc3, 0xfb, 0x8e, 0xb6, 0xfe, 0xc6, + 0x6e, 0x56, 0x1e, 0x26, 0xf4, 0xcc, 0x84, 0xbc, 0x14, 0x2c, + 0x64, 0x5c, 0x29, 0x11, 0x59, 0x61, 0xc9, 0xf1, 0xb9, 0x81, + 0xa6, 0x9e, 0xd6, 0xee, 0x46, 0x7e, 0x36, 0x0e, 0x7b, 0x43, + 0x0b, 0x33, 0x9b, 0xa3, 0xeb, 0xd3, 0x01, 0x39, 0x71, 0x49, + 0xe1, 0xd9, 0x91, 0xa9, 0xdc, 0xe4, 0xac, 0x94, 0x3c, 0x04, + 0x4c, 0x74, 0xf5, 0xcd, 0x85, 0xbd, 0x15, 0x2d, 0x65, 0x5d, + 0x28, 0x10, 0x58, 0x60, 0xc8, 0xf0, 0xb8, 0x80, 0x52, 0x6a, + 0x22, 0x1a, 0xb2, 0x8a, 0xc2, 0xfa, 0x8f, 0xb7, 0xff, 0xc7, + 0x6f, 0x57, 0x1f, 0x27, 0x51, 0x69, 0x21, 0x19, 0xb1, 0x89, + 0xc1, 0xf9, 0x8c, 0xb4, 0xfc, 0xc4, 0x6c, 0x54, 0x1c, 0x24, + 0xf6, 0xce, 0x86, 0xbe, 0x16, 0x2e, 0x66, 0x5e, 0x2b, 0x13, + 0x5b, 0x63, 0xcb, 0xf3, 0xbb, 0x83, 0x02, 0x3a, 0x72, 0x4a, + 0xe2, 0xda, 0x92, 0xaa, 0xdf, 0xe7, 0xaf, 0x97, 0x3f, 0x07, + 0x4f, 0x77, 0xa5, 0x9d, 0xd5, 0xed, 0x45, 0x7d, 0x35, 0x0d, + 0x78, 0x40, 0x08, 0x30, 0x98, 0xa0, 0xe8, 0xd0, 0xf7, 0xcf, + 0x87, 0xbf, 0x17, 0x2f, 0x67, 0x5f, 0x2a, 0x12, 0x5a, 0x62, + 0xca, 0xf2, 0xba, 0x82, 0x50, 0x68, 0x20, 0x18, 0xb0, 0x88, + 0xc0, 0xf8, 0x8d, 0xb5, 0xfd, 0xc5, 0x6d, 0x55, 0x1d, 0x25, + 0xa4, 0x9c, 0xd4, 0xec, 0x44, 0x7c, 0x34, 0x0c, 0x79, 0x41, + 0x09, 0x31, 0x99, 0xa1, 0xe9, 0xd1, 0x03, 0x3b, 0x73, 0x4b, + 0xe3, 0xdb, 0x93, 0xab, 0xde, 0xe6, 0xae, 0x96, 0x3e, 0x06, + 0x4e, 0x76, 0x00, 0x39, 0x72, 0x4b, 0xe4, 0xdd, 0x96, 0xaf, + 0xd5, 0xec, 0xa7, 0x9e, 0x31, 0x08, 0x43, 0x7a, 0xb7, 0x8e, + 0xc5, 0xfc, 0x53, 0x6a, 0x21, 0x18, 0x62, 0x5b, 0x10, 0x29, + 0x86, 0xbf, 0xf4, 0xcd, 0x73, 0x4a, 0x01, 0x38, 0x97, 0xae, + 0xe5, 0xdc, 0xa6, 0x9f, 0xd4, 0xed, 0x42, 0x7b, 0x30, 0x09, + 0xc4, 0xfd, 0xb6, 0x8f, 0x20, 0x19, 0x52, 0x6b, 0x11, 0x28, + 0x63, 0x5a, 0xf5, 0xcc, 0x87, 0xbe, 0xe6, 0xdf, 0x94, 0xad, + 0x02, 0x3b, 0x70, 0x49, 0x33, 0x0a, 0x41, 0x78, 0xd7, 0xee, + 0xa5, 0x9c, 0x51, 0x68, 0x23, 0x1a, 0xb5, 0x8c, 0xc7, 0xfe, + 0x84, 0xbd, 0xf6, 0xcf, 0x60, 0x59, 0x12, 0x2b, 0x95, 0xac, + 0xe7, 0xde, 0x71, 0x48, 0x03, 0x3a, 0x40, 0x79, 0x32, 0x0b, + 0xa4, 0x9d, 0xd6, 0xef, 0x22, 0x1b, 0x50, 0x69, 0xc6, 0xff, + 0xb4, 0x8d, 0xf7, 0xce, 0x85, 0xbc, 0x13, 0x2a, 0x61, 0x58, + 0xd1, 0xe8, 0xa3, 0x9a, 0x35, 0x0c, 0x47, 0x7e, 0x04, 0x3d, + 0x76, 0x4f, 0xe0, 0xd9, 0x92, 0xab, 0x66, 0x5f, 0x14, 0x2d, + 0x82, 0xbb, 0xf0, 0xc9, 0xb3, 0x8a, 0xc1, 0xf8, 0x57, 0x6e, + 0x25, 0x1c, 0xa2, 0x9b, 0xd0, 0xe9, 0x46, 0x7f, 0x34, 0x0d, + 0x77, 0x4e, 0x05, 0x3c, 0x93, 0xaa, 0xe1, 0xd8, 0x15, 0x2c, + 0x67, 0x5e, 0xf1, 0xc8, 0x83, 0xba, 0xc0, 0xf9, 0xb2, 0x8b, + 0x24, 0x1d, 0x56, 0x6f, 0x37, 0x0e, 0x45, 0x7c, 0xd3, 0xea, + 0xa1, 0x98, 0xe2, 0xdb, 0x90, 0xa9, 0x06, 0x3f, 0x74, 0x4d, + 0x80, 0xb9, 0xf2, 0xcb, 0x64, 0x5d, 0x16, 0x2f, 0x55, 0x6c, + 0x27, 0x1e, 0xb1, 0x88, 0xc3, 0xfa, 0x44, 0x7d, 0x36, 0x0f, + 0xa0, 0x99, 0xd2, 0xeb, 0x91, 0xa8, 0xe3, 0xda, 0x75, 0x4c, + 0x07, 0x3e, 0xf3, 0xca, 0x81, 0xb8, 0x17, 0x2e, 0x65, 0x5c, + 0x26, 0x1f, 0x54, 0x6d, 0xc2, 0xfb, 0xb0, 0x89, 0x00, 0x3a, + 0x74, 0x4e, 0xe8, 0xd2, 0x9c, 0xa6, 0xcd, 0xf7, 0xb9, 0x83, + 0x25, 0x1f, 0x51, 0x6b, 0x87, 0xbd, 0xf3, 0xc9, 0x6f, 0x55, + 0x1b, 0x21, 0x4a, 0x70, 0x3e, 0x04, 0xa2, 0x98, 0xd6, 0xec, + 0x13, 0x29, 0x67, 0x5d, 0xfb, 0xc1, 0x8f, 0xb5, 0xde, 0xe4, + 0xaa, 0x90, 0x36, 0x0c, 0x42, 0x78, 0x94, 0xae, 0xe0, 0xda, + 0x7c, 0x46, 0x08, 0x32, 0x59, 0x63, 0x2d, 0x17, 0xb1, 0x8b, + 0xc5, 0xff, 0x26, 0x1c, 0x52, 0x68, 0xce, 0xf4, 0xba, 0x80, + 0xeb, 0xd1, 0x9f, 0xa5, 0x03, 0x39, 0x77, 0x4d, 0xa1, 0x9b, + 0xd5, 0xef, 0x49, 0x73, 0x3d, 0x07, 0x6c, 0x56, 0x18, 0x22, + 0x84, 0xbe, 0xf0, 0xca, 0x35, 0x0f, 0x41, 0x7b, 0xdd, 0xe7, + 0xa9, 0x93, 0xf8, 0xc2, 0x8c, 0xb6, 0x10, 0x2a, 0x64, 0x5e, + 0xb2, 0x88, 0xc6, 0xfc, 0x5a, 0x60, 0x2e, 0x14, 0x7f, 0x45, + 0x0b, 0x31, 0x97, 0xad, 0xe3, 0xd9, 0x4c, 0x76, 0x38, 0x02, + 0xa4, 0x9e, 0xd0, 0xea, 0x81, 0xbb, 0xf5, 0xcf, 0x69, 0x53, + 0x1d, 0x27, 0xcb, 0xf1, 0xbf, 0x85, 0x23, 0x19, 0x57, 0x6d, + 0x06, 0x3c, 0x72, 0x48, 0xee, 0xd4, 0x9a, 0xa0, 0x5f, 0x65, + 0x2b, 0x11, 0xb7, 0x8d, 0xc3, 0xf9, 0x92, 0xa8, 0xe6, 0xdc, + 0x7a, 0x40, 0x0e, 0x34, 0xd8, 0xe2, 0xac, 0x96, 0x30, 0x0a, + 0x44, 0x7e, 0x15, 0x2f, 0x61, 0x5b, 0xfd, 0xc7, 0x89, 0xb3, + 0x6a, 0x50, 0x1e, 0x24, 0x82, 0xb8, 0xf6, 0xcc, 0xa7, 0x9d, + 0xd3, 0xe9, 0x4f, 0x75, 0x3b, 0x01, 0xed, 0xd7, 0x99, 0xa3, + 0x05, 0x3f, 0x71, 0x4b, 0x20, 0x1a, 0x54, 0x6e, 0xc8, 0xf2, + 0xbc, 0x86, 0x79, 0x43, 0x0d, 0x37, 0x91, 0xab, 0xe5, 0xdf, + 0xb4, 0x8e, 0xc0, 0xfa, 0x5c, 0x66, 0x28, 0x12, 0xfe, 0xc4, + 0x8a, 0xb0, 0x16, 0x2c, 0x62, 0x58, 0x33, 0x09, 0x47, 0x7d, + 0xdb, 0xe1, 0xaf, 0x95, 0x00, 0x3b, 0x76, 0x4d, 0xec, 0xd7, + 0x9a, 0xa1, 0xc5, 0xfe, 0xb3, 0x88, 0x29, 0x12, 0x5f, 0x64, + 0x97, 0xac, 0xe1, 0xda, 0x7b, 0x40, 0x0d, 0x36, 0x52, 0x69, + 0x24, 0x1f, 0xbe, 0x85, 0xc8, 0xf3, 0x33, 0x08, 0x45, 0x7e, + 0xdf, 0xe4, 0xa9, 0x92, 0xf6, 0xcd, 0x80, 0xbb, 0x1a, 0x21, + 0x6c, 0x57, 0xa4, 0x9f, 0xd2, 0xe9, 0x48, 0x73, 0x3e, 0x05, + 0x61, 0x5a, 0x17, 0x2c, 0x8d, 0xb6, 0xfb, 0xc0, 0x66, 0x5d, + 0x10, 0x2b, 0x8a, 0xb1, 0xfc, 0xc7, 0xa3, 0x98, 0xd5, 0xee, + 0x4f, 0x74, 0x39, 0x02, 0xf1, 0xca, 0x87, 0xbc, 0x1d, 0x26, + 0x6b, 0x50, 0x34, 0x0f, 0x42, 0x79, 0xd8, 0xe3, 0xae, 0x95, + 0x55, 0x6e, 0x23, 0x18, 0xb9, 0x82, 0xcf, 0xf4, 0x90, 0xab, + 0xe6, 0xdd, 0x7c, 0x47, 0x0a, 0x31, 0xc2, 0xf9, 0xb4, 0x8f, + 0x2e, 0x15, 0x58, 0x63, 0x07, 0x3c, 0x71, 0x4a, 0xeb, 0xd0, + 0x9d, 0xa6, 0xcc, 0xf7, 0xba, 0x81, 0x20, 0x1b, 0x56, 0x6d, + 0x09, 0x32, 0x7f, 0x44, 0xe5, 0xde, 0x93, 0xa8, 0x5b, 0x60, + 0x2d, 0x16, 0xb7, 0x8c, 0xc1, 0xfa, 0x9e, 0xa5, 0xe8, 0xd3, + 0x72, 0x49, 0x04, 0x3f, 0xff, 0xc4, 0x89, 0xb2, 0x13, 0x28, + 0x65, 0x5e, 0x3a, 0x01, 0x4c, 0x77, 0xd6, 0xed, 0xa0, 0x9b, + 0x68, 0x53, 0x1e, 0x25, 0x84, 0xbf, 0xf2, 0xc9, 0xad, 0x96, + 0xdb, 0xe0, 0x41, 0x7a, 0x37, 0x0c, 0xaa, 0x91, 0xdc, 0xe7, + 0x46, 0x7d, 0x30, 0x0b, 0x6f, 0x54, 0x19, 0x22, 0x83, 0xb8, + 0xf5, 0xce, 0x3d, 0x06, 0x4b, 0x70, 0xd1, 0xea, 0xa7, 0x9c, + 0xf8, 0xc3, 0x8e, 0xb5, 0x14, 0x2f, 0x62, 0x59, 0x99, 0xa2, + 0xef, 0xd4, 0x75, 0x4e, 0x03, 0x38, 0x5c, 0x67, 0x2a, 0x11, + 0xb0, 0x8b, 0xc6, 0xfd, 0x0e, 0x35, 0x78, 0x43, 0xe2, 0xd9, + 0x94, 0xaf, 0xcb, 0xf0, 0xbd, 0x86, 0x27, 0x1c, 0x51, 0x6a, + 0x00, 0x3c, 0x78, 0x44, 0xf0, 0xcc, 0x88, 0xb4, 0xfd, 0xc1, + 0x85, 0xb9, 0x0d, 0x31, 0x75, 0x49, 0xe7, 0xdb, 0x9f, 0xa3, + 0x17, 0x2b, 0x6f, 0x53, 0x1a, 0x26, 0x62, 0x5e, 0xea, 0xd6, + 0x92, 0xae, 0xd3, 0xef, 0xab, 0x97, 0x23, 0x1f, 0x5b, 0x67, + 0x2e, 0x12, 0x56, 0x6a, 0xde, 0xe2, 0xa6, 0x9a, 0x34, 0x08, + 0x4c, 0x70, 0xc4, 0xf8, 0xbc, 0x80, 0xc9, 0xf5, 0xb1, 0x8d, + 0x39, 0x05, 0x41, 0x7d, 0xbb, 0x87, 0xc3, 0xff, 0x4b, 0x77, + 0x33, 0x0f, 0x46, 0x7a, 0x3e, 0x02, 0xb6, 0x8a, 0xce, 0xf2, + 0x5c, 0x60, 0x24, 0x18, 0xac, 0x90, 0xd4, 0xe8, 0xa1, 0x9d, + 0xd9, 0xe5, 0x51, 0x6d, 0x29, 0x15, 0x68, 0x54, 0x10, 0x2c, + 0x98, 0xa4, 0xe0, 0xdc, 0x95, 0xa9, 0xed, 0xd1, 0x65, 0x59, + 0x1d, 0x21, 0x8f, 0xb3, 0xf7, 0xcb, 0x7f, 0x43, 0x07, 0x3b, + 0x72, 0x4e, 0x0a, 0x36, 0x82, 0xbe, 0xfa, 0xc6, 0x6b, 0x57, + 0x13, 0x2f, 0x9b, 0xa7, 0xe3, 0xdf, 0x96, 0xaa, 0xee, 0xd2, + 0x66, 0x5a, 0x1e, 0x22, 0x8c, 0xb0, 0xf4, 0xc8, 0x7c, 0x40, + 0x04, 0x38, 0x71, 0x4d, 0x09, 0x35, 0x81, 0xbd, 0xf9, 0xc5, + 0xb8, 0x84, 0xc0, 0xfc, 0x48, 0x74, 0x30, 0x0c, 0x45, 0x79, + 0x3d, 0x01, 0xb5, 0x89, 0xcd, 0xf1, 0x5f, 0x63, 0x27, 0x1b, + 0xaf, 0x93, 0xd7, 0xeb, 0xa2, 0x9e, 0xda, 0xe6, 0x52, 0x6e, + 0x2a, 0x16, 0xd0, 0xec, 0xa8, 0x94, 0x20, 0x1c, 0x58, 0x64, + 0x2d, 0x11, 0x55, 0x69, 0xdd, 0xe1, 0xa5, 0x99, 0x37, 0x0b, + 0x4f, 0x73, 0xc7, 0xfb, 0xbf, 0x83, 0xca, 0xf6, 0xb2, 0x8e, + 0x3a, 0x06, 0x42, 0x7e, 0x03, 0x3f, 0x7b, 0x47, 0xf3, 0xcf, + 0x8b, 0xb7, 0xfe, 0xc2, 0x86, 0xba, 0x0e, 0x32, 0x76, 0x4a, + 0xe4, 0xd8, 0x9c, 0xa0, 0x14, 0x28, 0x6c, 0x50, 0x19, 0x25, + 0x61, 0x5d, 0xe9, 0xd5, 0x91, 0xad, 0x00, 0x3d, 0x7a, 0x47, + 0xf4, 0xc9, 0x8e, 0xb3, 0xf5, 0xc8, 0x8f, 0xb2, 0x01, 0x3c, + 0x7b, 0x46, 0xf7, 0xca, 0x8d, 0xb0, 0x03, 0x3e, 0x79, 0x44, + 0x02, 0x3f, 0x78, 0x45, 0xf6, 0xcb, 0x8c, 0xb1, 0xf3, 0xce, + 0x89, 0xb4, 0x07, 0x3a, 0x7d, 0x40, 0x06, 0x3b, 0x7c, 0x41, + 0xf2, 0xcf, 0x88, 0xb5, 0x04, 0x39, 0x7e, 0x43, 0xf0, 0xcd, + 0x8a, 0xb7, 0xf1, 0xcc, 0x8b, 0xb6, 0x05, 0x38, 0x7f, 0x42, + 0xfb, 0xc6, 0x81, 0xbc, 0x0f, 0x32, 0x75, 0x48, 0x0e, 0x33, + 0x74, 0x49, 0xfa, 0xc7, 0x80, 0xbd, 0x0c, 0x31, 0x76, 0x4b, + 0xf8, 0xc5, 0x82, 0xbf, 0xf9, 0xc4, 0x83, 0xbe, 0x0d, 0x30, + 0x77, 0x4a, 0x08, 0x35, 0x72, 0x4f, 0xfc, 0xc1, 0x86, 0xbb, + 0xfd, 0xc0, 0x87, 0xba, 0x09, 0x34, 0x73, 0x4e, 0xff, 0xc2, + 0x85, 0xb8, 0x0b, 0x36, 0x71, 0x4c, 0x0a, 0x37, 0x70, 0x4d, + 0xfe, 0xc3, 0x84, 0xb9, 0xeb, 0xd6, 0x91, 0xac, 0x1f, 0x22, + 0x65, 0x58, 0x1e, 0x23, 0x64, 0x59, 0xea, 0xd7, 0x90, 0xad, + 0x1c, 0x21, 0x66, 0x5b, 0xe8, 0xd5, 0x92, 0xaf, 0xe9, 0xd4, + 0x93, 0xae, 0x1d, 0x20, 0x67, 0x5a, 0x18, 0x25, 0x62, 0x5f, + 0xec, 0xd1, 0x96, 0xab, 0xed, 0xd0, 0x97, 0xaa, 0x19, 0x24, + 0x63, 0x5e, 0xef, 0xd2, 0x95, 0xa8, 0x1b, 0x26, 0x61, 0x5c, + 0x1a, 0x27, 0x60, 0x5d, 0xee, 0xd3, 0x94, 0xa9, 0x10, 0x2d, + 0x6a, 0x57, 0xe4, 0xd9, 0x9e, 0xa3, 0xe5, 0xd8, 0x9f, 0xa2, + 0x11, 0x2c, 0x6b, 0x56, 0xe7, 0xda, 0x9d, 0xa0, 0x13, 0x2e, + 0x69, 0x54, 0x12, 0x2f, 0x68, 0x55, 0xe6, 0xdb, 0x9c, 0xa1, + 0xe3, 0xde, 0x99, 0xa4, 0x17, 0x2a, 0x6d, 0x50, 0x16, 0x2b, + 0x6c, 0x51, 0xe2, 0xdf, 0x98, 0xa5, 0x14, 0x29, 0x6e, 0x53, + 0xe0, 0xdd, 0x9a, 0xa7, 0xe1, 0xdc, 0x9b, 0xa6, 0x15, 0x28, + 0x6f, 0x52, 0x00, 0x3e, 0x7c, 0x42, 0xf8, 0xc6, 0x84, 0xba, + 0xed, 0xd3, 0x91, 0xaf, 0x15, 0x2b, 0x69, 0x57, 0xc7, 0xf9, + 0xbb, 0x85, 0x3f, 0x01, 0x43, 0x7d, 0x2a, 0x14, 0x56, 0x68, + 0xd2, 0xec, 0xae, 0x90, 0x93, 0xad, 0xef, 0xd1, 0x6b, 0x55, + 0x17, 0x29, 0x7e, 0x40, 0x02, 0x3c, 0x86, 0xb8, 0xfa, 0xc4, + 0x54, 0x6a, 0x28, 0x16, 0xac, 0x92, 0xd0, 0xee, 0xb9, 0x87, + 0xc5, 0xfb, 0x41, 0x7f, 0x3d, 0x03, 0x3b, 0x05, 0x47, 0x79, + 0xc3, 0xfd, 0xbf, 0x81, 0xd6, 0xe8, 0xaa, 0x94, 0x2e, 0x10, + 0x52, 0x6c, 0xfc, 0xc2, 0x80, 0xbe, 0x04, 0x3a, 0x78, 0x46, + 0x11, 0x2f, 0x6d, 0x53, 0xe9, 0xd7, 0x95, 0xab, 0xa8, 0x96, + 0xd4, 0xea, 0x50, 0x6e, 0x2c, 0x12, 0x45, 0x7b, 0x39, 0x07, + 0xbd, 0x83, 0xc1, 0xff, 0x6f, 0x51, 0x13, 0x2d, 0x97, 0xa9, + 0xeb, 0xd5, 0x82, 0xbc, 0xfe, 0xc0, 0x7a, 0x44, 0x06, 0x38, + 0x76, 0x48, 0x0a, 0x34, 0x8e, 0xb0, 0xf2, 0xcc, 0x9b, 0xa5, + 0xe7, 0xd9, 0x63, 0x5d, 0x1f, 0x21, 0xb1, 0x8f, 0xcd, 0xf3, + 0x49, 0x77, 0x35, 0x0b, 0x5c, 0x62, 0x20, 0x1e, 0xa4, 0x9a, + 0xd8, 0xe6, 0xe5, 0xdb, 0x99, 0xa7, 0x1d, 0x23, 0x61, 0x5f, + 0x08, 0x36, 0x74, 0x4a, 0xf0, 0xce, 0x8c, 0xb2, 0x22, 0x1c, + 0x5e, 0x60, 0xda, 0xe4, 0xa6, 0x98, 0xcf, 0xf1, 0xb3, 0x8d, + 0x37, 0x09, 0x4b, 0x75, 0x4d, 0x73, 0x31, 0x0f, 0xb5, 0x8b, + 0xc9, 0xf7, 0xa0, 0x9e, 0xdc, 0xe2, 0x58, 0x66, 0x24, 0x1a, + 0x8a, 0xb4, 0xf6, 0xc8, 0x72, 0x4c, 0x0e, 0x30, 0x67, 0x59, + 0x1b, 0x25, 0x9f, 0xa1, 0xe3, 0xdd, 0xde, 0xe0, 0xa2, 0x9c, + 0x26, 0x18, 0x5a, 0x64, 0x33, 0x0d, 0x4f, 0x71, 0xcb, 0xf5, + 0xb7, 0x89, 0x19, 0x27, 0x65, 0x5b, 0xe1, 0xdf, 0x9d, 0xa3, + 0xf4, 0xca, 0x88, 0xb6, 0x0c, 0x32, 0x70, 0x4e, 0x00, 0x3f, + 0x7e, 0x41, 0xfc, 0xc3, 0x82, 0xbd, 0xe5, 0xda, 0x9b, 0xa4, + 0x19, 0x26, 0x67, 0x58, 0xd7, 0xe8, 0xa9, 0x96, 0x2b, 0x14, + 0x55, 0x6a, 0x32, 0x0d, 0x4c, 0x73, 0xce, 0xf1, 0xb0, 0x8f, + 0xb3, 0x8c, 0xcd, 0xf2, 0x4f, 0x70, 0x31, 0x0e, 0x56, 0x69, + 0x28, 0x17, 0xaa, 0x95, 0xd4, 0xeb, 0x64, 0x5b, 0x1a, 0x25, + 0x98, 0xa7, 0xe6, 0xd9, 0x81, 0xbe, 0xff, 0xc0, 0x7d, 0x42, + 0x03, 0x3c, 0x7b, 0x44, 0x05, 0x3a, 0x87, 0xb8, 0xf9, 0xc6, + 0x9e, 0xa1, 0xe0, 0xdf, 0x62, 0x5d, 0x1c, 0x23, 0xac, 0x93, + 0xd2, 0xed, 0x50, 0x6f, 0x2e, 0x11, 0x49, 0x76, 0x37, 0x08, + 0xb5, 0x8a, 0xcb, 0xf4, 0xc8, 0xf7, 0xb6, 0x89, 0x34, 0x0b, + 0x4a, 0x75, 0x2d, 0x12, 0x53, 0x6c, 0xd1, 0xee, 0xaf, 0x90, + 0x1f, 0x20, 0x61, 0x5e, 0xe3, 0xdc, 0x9d, 0xa2, 0xfa, 0xc5, + 0x84, 0xbb, 0x06, 0x39, 0x78, 0x47, 0xf6, 0xc9, 0x88, 0xb7, + 0x0a, 0x35, 0x74, 0x4b, 0x13, 0x2c, 0x6d, 0x52, 0xef, 0xd0, + 0x91, 0xae, 0x21, 0x1e, 0x5f, 0x60, 0xdd, 0xe2, 0xa3, 0x9c, + 0xc4, 0xfb, 0xba, 0x85, 0x38, 0x07, 0x46, 0x79, 0x45, 0x7a, + 0x3b, 0x04, 0xb9, 0x86, 0xc7, 0xf8, 0xa0, 0x9f, 0xde, 0xe1, + 0x5c, 0x63, 0x22, 0x1d, 0x92, 0xad, 0xec, 0xd3, 0x6e, 0x51, + 0x10, 0x2f, 0x77, 0x48, 0x09, 0x36, 0x8b, 0xb4, 0xf5, 0xca, + 0x8d, 0xb2, 0xf3, 0xcc, 0x71, 0x4e, 0x0f, 0x30, 0x68, 0x57, + 0x16, 0x29, 0x94, 0xab, 0xea, 0xd5, 0x5a, 0x65, 0x24, 0x1b, + 0xa6, 0x99, 0xd8, 0xe7, 0xbf, 0x80, 0xc1, 0xfe, 0x43, 0x7c, + 0x3d, 0x02, 0x3e, 0x01, 0x40, 0x7f, 0xc2, 0xfd, 0xbc, 0x83, + 0xdb, 0xe4, 0xa5, 0x9a, 0x27, 0x18, 0x59, 0x66, 0xe9, 0xd6, + 0x97, 0xa8, 0x15, 0x2a, 0x6b, 0x54, 0x0c, 0x33, 0x72, 0x4d, + 0xf0, 0xcf, 0x8e, 0xb1, 0x00, 0x40, 0x80, 0xc0, 0x1d, 0x5d, + 0x9d, 0xdd, 0x3a, 0x7a, 0xba, 0xfa, 0x27, 0x67, 0xa7, 0xe7, + 0x74, 0x34, 0xf4, 0xb4, 0x69, 0x29, 0xe9, 0xa9, 0x4e, 0x0e, + 0xce, 0x8e, 0x53, 0x13, 0xd3, 0x93, 0xe8, 0xa8, 0x68, 0x28, + 0xf5, 0xb5, 0x75, 0x35, 0xd2, 0x92, 0x52, 0x12, 0xcf, 0x8f, + 0x4f, 0x0f, 0x9c, 0xdc, 0x1c, 0x5c, 0x81, 0xc1, 0x01, 0x41, + 0xa6, 0xe6, 0x26, 0x66, 0xbb, 0xfb, 0x3b, 0x7b, 0xcd, 0x8d, + 0x4d, 0x0d, 0xd0, 0x90, 0x50, 0x10, 0xf7, 0xb7, 0x77, 0x37, + 0xea, 0xaa, 0x6a, 0x2a, 0xb9, 0xf9, 0x39, 0x79, 0xa4, 0xe4, + 0x24, 0x64, 0x83, 0xc3, 0x03, 0x43, 0x9e, 0xde, 0x1e, 0x5e, + 0x25, 0x65, 0xa5, 0xe5, 0x38, 0x78, 0xb8, 0xf8, 0x1f, 0x5f, + 0x9f, 0xdf, 0x02, 0x42, 0x82, 0xc2, 0x51, 0x11, 0xd1, 0x91, + 0x4c, 0x0c, 0xcc, 0x8c, 0x6b, 0x2b, 0xeb, 0xab, 0x76, 0x36, + 0xf6, 0xb6, 0x87, 0xc7, 0x07, 0x47, 0x9a, 0xda, 0x1a, 0x5a, + 0xbd, 0xfd, 0x3d, 0x7d, 0xa0, 0xe0, 0x20, 0x60, 0xf3, 0xb3, + 0x73, 0x33, 0xee, 0xae, 0x6e, 0x2e, 0xc9, 0x89, 0x49, 0x09, + 0xd4, 0x94, 0x54, 0x14, 0x6f, 0x2f, 0xef, 0xaf, 0x72, 0x32, + 0xf2, 0xb2, 0x55, 0x15, 0xd5, 0x95, 0x48, 0x08, 0xc8, 0x88, + 0x1b, 0x5b, 0x9b, 0xdb, 0x06, 0x46, 0x86, 0xc6, 0x21, 0x61, + 0xa1, 0xe1, 0x3c, 0x7c, 0xbc, 0xfc, 0x4a, 0x0a, 0xca, 0x8a, + 0x57, 0x17, 0xd7, 0x97, 0x70, 0x30, 0xf0, 0xb0, 0x6d, 0x2d, + 0xed, 0xad, 0x3e, 0x7e, 0xbe, 0xfe, 0x23, 0x63, 0xa3, 0xe3, + 0x04, 0x44, 0x84, 0xc4, 0x19, 0x59, 0x99, 0xd9, 0xa2, 0xe2, + 0x22, 0x62, 0xbf, 0xff, 0x3f, 0x7f, 0x98, 0xd8, 0x18, 0x58, + 0x85, 0xc5, 0x05, 0x45, 0xd6, 0x96, 0x56, 0x16, 0xcb, 0x8b, + 0x4b, 0x0b, 0xec, 0xac, 0x6c, 0x2c, 0xf1, 0xb1, 0x71, 0x31, + 0x00, 0x41, 0x82, 0xc3, 0x19, 0x58, 0x9b, 0xda, 0x32, 0x73, + 0xb0, 0xf1, 0x2b, 0x6a, 0xa9, 0xe8, 0x64, 0x25, 0xe6, 0xa7, + 0x7d, 0x3c, 0xff, 0xbe, 0x56, 0x17, 0xd4, 0x95, 0x4f, 0x0e, + 0xcd, 0x8c, 0xc8, 0x89, 0x4a, 0x0b, 0xd1, 0x90, 0x53, 0x12, + 0xfa, 0xbb, 0x78, 0x39, 0xe3, 0xa2, 0x61, 0x20, 0xac, 0xed, + 0x2e, 0x6f, 0xb5, 0xf4, 0x37, 0x76, 0x9e, 0xdf, 0x1c, 0x5d, + 0x87, 0xc6, 0x05, 0x44, 0x8d, 0xcc, 0x0f, 0x4e, 0x94, 0xd5, + 0x16, 0x57, 0xbf, 0xfe, 0x3d, 0x7c, 0xa6, 0xe7, 0x24, 0x65, + 0xe9, 0xa8, 0x6b, 0x2a, 0xf0, 0xb1, 0x72, 0x33, 0xdb, 0x9a, + 0x59, 0x18, 0xc2, 0x83, 0x40, 0x01, 0x45, 0x04, 0xc7, 0x86, + 0x5c, 0x1d, 0xde, 0x9f, 0x77, 0x36, 0xf5, 0xb4, 0x6e, 0x2f, + 0xec, 0xad, 0x21, 0x60, 0xa3, 0xe2, 0x38, 0x79, 0xba, 0xfb, + 0x13, 0x52, 0x91, 0xd0, 0x0a, 0x4b, 0x88, 0xc9, 0x07, 0x46, + 0x85, 0xc4, 0x1e, 0x5f, 0x9c, 0xdd, 0x35, 0x74, 0xb7, 0xf6, + 0x2c, 0x6d, 0xae, 0xef, 0x63, 0x22, 0xe1, 0xa0, 0x7a, 0x3b, + 0xf8, 0xb9, 0x51, 0x10, 0xd3, 0x92, 0x48, 0x09, 0xca, 0x8b, + 0xcf, 0x8e, 0x4d, 0x0c, 0xd6, 0x97, 0x54, 0x15, 0xfd, 0xbc, + 0x7f, 0x3e, 0xe4, 0xa5, 0x66, 0x27, 0xab, 0xea, 0x29, 0x68, + 0xb2, 0xf3, 0x30, 0x71, 0x99, 0xd8, 0x1b, 0x5a, 0x80, 0xc1, + 0x02, 0x43, 0x8a, 0xcb, 0x08, 0x49, 0x93, 0xd2, 0x11, 0x50, + 0xb8, 0xf9, 0x3a, 0x7b, 0xa1, 0xe0, 0x23, 0x62, 0xee, 0xaf, + 0x6c, 0x2d, 0xf7, 0xb6, 0x75, 0x34, 0xdc, 0x9d, 0x5e, 0x1f, + 0xc5, 0x84, 0x47, 0x06, 0x42, 0x03, 0xc0, 0x81, 0x5b, 0x1a, + 0xd9, 0x98, 0x70, 0x31, 0xf2, 0xb3, 0x69, 0x28, 0xeb, 0xaa, + 0x26, 0x67, 0xa4, 0xe5, 0x3f, 0x7e, 0xbd, 0xfc, 0x14, 0x55, + 0x96, 0xd7, 0x0d, 0x4c, 0x8f, 0xce, 0x00, 0x42, 0x84, 0xc6, + 0x15, 0x57, 0x91, 0xd3, 0x2a, 0x68, 0xae, 0xec, 0x3f, 0x7d, + 0xbb, 0xf9, 0x54, 0x16, 0xd0, 0x92, 0x41, 0x03, 0xc5, 0x87, + 0x7e, 0x3c, 0xfa, 0xb8, 0x6b, 0x29, 0xef, 0xad, 0xa8, 0xea, + 0x2c, 0x6e, 0xbd, 0xff, 0x39, 0x7b, 0x82, 0xc0, 0x06, 0x44, + 0x97, 0xd5, 0x13, 0x51, 0xfc, 0xbe, 0x78, 0x3a, 0xe9, 0xab, + 0x6d, 0x2f, 0xd6, 0x94, 0x52, 0x10, 0xc3, 0x81, 0x47, 0x05, + 0x4d, 0x0f, 0xc9, 0x8b, 0x58, 0x1a, 0xdc, 0x9e, 0x67, 0x25, + 0xe3, 0xa1, 0x72, 0x30, 0xf6, 0xb4, 0x19, 0x5b, 0x9d, 0xdf, + 0x0c, 0x4e, 0x88, 0xca, 0x33, 0x71, 0xb7, 0xf5, 0x26, 0x64, + 0xa2, 0xe0, 0xe5, 0xa7, 0x61, 0x23, 0xf0, 0xb2, 0x74, 0x36, + 0xcf, 0x8d, 0x4b, 0x09, 0xda, 0x98, 0x5e, 0x1c, 0xb1, 0xf3, + 0x35, 0x77, 0xa4, 0xe6, 0x20, 0x62, 0x9b, 0xd9, 0x1f, 0x5d, + 0x8e, 0xcc, 0x0a, 0x48, 0x9a, 0xd8, 0x1e, 0x5c, 0x8f, 0xcd, + 0x0b, 0x49, 0xb0, 0xf2, 0x34, 0x76, 0xa5, 0xe7, 0x21, 0x63, + 0xce, 0x8c, 0x4a, 0x08, 0xdb, 0x99, 0x5f, 0x1d, 0xe4, 0xa6, + 0x60, 0x22, 0xf1, 0xb3, 0x75, 0x37, 0x32, 0x70, 0xb6, 0xf4, + 0x27, 0x65, 0xa3, 0xe1, 0x18, 0x5a, 0x9c, 0xde, 0x0d, 0x4f, + 0x89, 0xcb, 0x66, 0x24, 0xe2, 0xa0, 0x73, 0x31, 0xf7, 0xb5, + 0x4c, 0x0e, 0xc8, 0x8a, 0x59, 0x1b, 0xdd, 0x9f, 0xd7, 0x95, + 0x53, 0x11, 0xc2, 0x80, 0x46, 0x04, 0xfd, 0xbf, 0x79, 0x3b, + 0xe8, 0xaa, 0x6c, 0x2e, 0x83, 0xc1, 0x07, 0x45, 0x96, 0xd4, + 0x12, 0x50, 0xa9, 0xeb, 0x2d, 0x6f, 0xbc, 0xfe, 0x38, 0x7a, + 0x7f, 0x3d, 0xfb, 0xb9, 0x6a, 0x28, 0xee, 0xac, 0x55, 0x17, + 0xd1, 0x93, 0x40, 0x02, 0xc4, 0x86, 0x2b, 0x69, 0xaf, 0xed, + 0x3e, 0x7c, 0xba, 0xf8, 0x01, 0x43, 0x85, 0xc7, 0x14, 0x56, + 0x90, 0xd2, 0x00, 0x43, 0x86, 0xc5, 0x11, 0x52, 0x97, 0xd4, + 0x22, 0x61, 0xa4, 0xe7, 0x33, 0x70, 0xb5, 0xf6, 0x44, 0x07, + 0xc2, 0x81, 0x55, 0x16, 0xd3, 0x90, 0x66, 0x25, 0xe0, 0xa3, + 0x77, 0x34, 0xf1, 0xb2, 0x88, 0xcb, 0x0e, 0x4d, 0x99, 0xda, + 0x1f, 0x5c, 0xaa, 0xe9, 0x2c, 0x6f, 0xbb, 0xf8, 0x3d, 0x7e, + 0xcc, 0x8f, 0x4a, 0x09, 0xdd, 0x9e, 0x5b, 0x18, 0xee, 0xad, + 0x68, 0x2b, 0xff, 0xbc, 0x79, 0x3a, 0x0d, 0x4e, 0x8b, 0xc8, + 0x1c, 0x5f, 0x9a, 0xd9, 0x2f, 0x6c, 0xa9, 0xea, 0x3e, 0x7d, + 0xb8, 0xfb, 0x49, 0x0a, 0xcf, 0x8c, 0x58, 0x1b, 0xde, 0x9d, + 0x6b, 0x28, 0xed, 0xae, 0x7a, 0x39, 0xfc, 0xbf, 0x85, 0xc6, + 0x03, 0x40, 0x94, 0xd7, 0x12, 0x51, 0xa7, 0xe4, 0x21, 0x62, + 0xb6, 0xf5, 0x30, 0x73, 0xc1, 0x82, 0x47, 0x04, 0xd0, 0x93, + 0x56, 0x15, 0xe3, 0xa0, 0x65, 0x26, 0xf2, 0xb1, 0x74, 0x37, + 0x1a, 0x59, 0x9c, 0xdf, 0x0b, 0x48, 0x8d, 0xce, 0x38, 0x7b, + 0xbe, 0xfd, 0x29, 0x6a, 0xaf, 0xec, 0x5e, 0x1d, 0xd8, 0x9b, + 0x4f, 0x0c, 0xc9, 0x8a, 0x7c, 0x3f, 0xfa, 0xb9, 0x6d, 0x2e, + 0xeb, 0xa8, 0x92, 0xd1, 0x14, 0x57, 0x83, 0xc0, 0x05, 0x46, + 0xb0, 0xf3, 0x36, 0x75, 0xa1, 0xe2, 0x27, 0x64, 0xd6, 0x95, + 0x50, 0x13, 0xc7, 0x84, 0x41, 0x02, 0xf4, 0xb7, 0x72, 0x31, + 0xe5, 0xa6, 0x63, 0x20, 0x17, 0x54, 0x91, 0xd2, 0x06, 0x45, + 0x80, 0xc3, 0x35, 0x76, 0xb3, 0xf0, 0x24, 0x67, 0xa2, 0xe1, + 0x53, 0x10, 0xd5, 0x96, 0x42, 0x01, 0xc4, 0x87, 0x71, 0x32, + 0xf7, 0xb4, 0x60, 0x23, 0xe6, 0xa5, 0x9f, 0xdc, 0x19, 0x5a, + 0x8e, 0xcd, 0x08, 0x4b, 0xbd, 0xfe, 0x3b, 0x78, 0xac, 0xef, + 0x2a, 0x69, 0xdb, 0x98, 0x5d, 0x1e, 0xca, 0x89, 0x4c, 0x0f, + 0xf9, 0xba, 0x7f, 0x3c, 0xe8, 0xab, 0x6e, 0x2d, 0x00, 0x44, + 0x88, 0xcc, 0x0d, 0x49, 0x85, 0xc1, 0x1a, 0x5e, 0x92, 0xd6, + 0x17, 0x53, 0x9f, 0xdb, 0x34, 0x70, 0xbc, 0xf8, 0x39, 0x7d, + 0xb1, 0xf5, 0x2e, 0x6a, 0xa6, 0xe2, 0x23, 0x67, 0xab, 0xef, + 0x68, 0x2c, 0xe0, 0xa4, 0x65, 0x21, 0xed, 0xa9, 0x72, 0x36, + 0xfa, 0xbe, 0x7f, 0x3b, 0xf7, 0xb3, 0x5c, 0x18, 0xd4, 0x90, + 0x51, 0x15, 0xd9, 0x9d, 0x46, 0x02, 0xce, 0x8a, 0x4b, 0x0f, + 0xc3, 0x87, 0xd0, 0x94, 0x58, 0x1c, 0xdd, 0x99, 0x55, 0x11, + 0xca, 0x8e, 0x42, 0x06, 0xc7, 0x83, 0x4f, 0x0b, 0xe4, 0xa0, + 0x6c, 0x28, 0xe9, 0xad, 0x61, 0x25, 0xfe, 0xba, 0x76, 0x32, + 0xf3, 0xb7, 0x7b, 0x3f, 0xb8, 0xfc, 0x30, 0x74, 0xb5, 0xf1, + 0x3d, 0x79, 0xa2, 0xe6, 0x2a, 0x6e, 0xaf, 0xeb, 0x27, 0x63, + 0x8c, 0xc8, 0x04, 0x40, 0x81, 0xc5, 0x09, 0x4d, 0x96, 0xd2, + 0x1e, 0x5a, 0x9b, 0xdf, 0x13, 0x57, 0xbd, 0xf9, 0x35, 0x71, + 0xb0, 0xf4, 0x38, 0x7c, 0xa7, 0xe3, 0x2f, 0x6b, 0xaa, 0xee, + 0x22, 0x66, 0x89, 0xcd, 0x01, 0x45, 0x84, 0xc0, 0x0c, 0x48, + 0x93, 0xd7, 0x1b, 0x5f, 0x9e, 0xda, 0x16, 0x52, 0xd5, 0x91, + 0x5d, 0x19, 0xd8, 0x9c, 0x50, 0x14, 0xcf, 0x8b, 0x47, 0x03, + 0xc2, 0x86, 0x4a, 0x0e, 0xe1, 0xa5, 0x69, 0x2d, 0xec, 0xa8, + 0x64, 0x20, 0xfb, 0xbf, 0x73, 0x37, 0xf6, 0xb2, 0x7e, 0x3a, + 0x6d, 0x29, 0xe5, 0xa1, 0x60, 0x24, 0xe8, 0xac, 0x77, 0x33, + 0xff, 0xbb, 0x7a, 0x3e, 0xf2, 0xb6, 0x59, 0x1d, 0xd1, 0x95, + 0x54, 0x10, 0xdc, 0x98, 0x43, 0x07, 0xcb, 0x8f, 0x4e, 0x0a, + 0xc6, 0x82, 0x05, 0x41, 0x8d, 0xc9, 0x08, 0x4c, 0x80, 0xc4, + 0x1f, 0x5b, 0x97, 0xd3, 0x12, 0x56, 0x9a, 0xde, 0x31, 0x75, + 0xb9, 0xfd, 0x3c, 0x78, 0xb4, 0xf0, 0x2b, 0x6f, 0xa3, 0xe7, + 0x26, 0x62, 0xae, 0xea, 0x00, 0x45, 0x8a, 0xcf, 0x09, 0x4c, + 0x83, 0xc6, 0x12, 0x57, 0x98, 0xdd, 0x1b, 0x5e, 0x91, 0xd4, + 0x24, 0x61, 0xae, 0xeb, 0x2d, 0x68, 0xa7, 0xe2, 0x36, 0x73, + 0xbc, 0xf9, 0x3f, 0x7a, 0xb5, 0xf0, 0x48, 0x0d, 0xc2, 0x87, + 0x41, 0x04, 0xcb, 0x8e, 0x5a, 0x1f, 0xd0, 0x95, 0x53, 0x16, + 0xd9, 0x9c, 0x6c, 0x29, 0xe6, 0xa3, 0x65, 0x20, 0xef, 0xaa, + 0x7e, 0x3b, 0xf4, 0xb1, 0x77, 0x32, 0xfd, 0xb8, 0x90, 0xd5, + 0x1a, 0x5f, 0x99, 0xdc, 0x13, 0x56, 0x82, 0xc7, 0x08, 0x4d, + 0x8b, 0xce, 0x01, 0x44, 0xb4, 0xf1, 0x3e, 0x7b, 0xbd, 0xf8, + 0x37, 0x72, 0xa6, 0xe3, 0x2c, 0x69, 0xaf, 0xea, 0x25, 0x60, + 0xd8, 0x9d, 0x52, 0x17, 0xd1, 0x94, 0x5b, 0x1e, 0xca, 0x8f, + 0x40, 0x05, 0xc3, 0x86, 0x49, 0x0c, 0xfc, 0xb9, 0x76, 0x33, + 0xf5, 0xb0, 0x7f, 0x3a, 0xee, 0xab, 0x64, 0x21, 0xe7, 0xa2, + 0x6d, 0x28, 0x3d, 0x78, 0xb7, 0xf2, 0x34, 0x71, 0xbe, 0xfb, + 0x2f, 0x6a, 0xa5, 0xe0, 0x26, 0x63, 0xac, 0xe9, 0x19, 0x5c, + 0x93, 0xd6, 0x10, 0x55, 0x9a, 0xdf, 0x0b, 0x4e, 0x81, 0xc4, + 0x02, 0x47, 0x88, 0xcd, 0x75, 0x30, 0xff, 0xba, 0x7c, 0x39, + 0xf6, 0xb3, 0x67, 0x22, 0xed, 0xa8, 0x6e, 0x2b, 0xe4, 0xa1, + 0x51, 0x14, 0xdb, 0x9e, 0x58, 0x1d, 0xd2, 0x97, 0x43, 0x06, + 0xc9, 0x8c, 0x4a, 0x0f, 0xc0, 0x85, 0xad, 0xe8, 0x27, 0x62, + 0xa4, 0xe1, 0x2e, 0x6b, 0xbf, 0xfa, 0x35, 0x70, 0xb6, 0xf3, + 0x3c, 0x79, 0x89, 0xcc, 0x03, 0x46, 0x80, 0xc5, 0x0a, 0x4f, + 0x9b, 0xde, 0x11, 0x54, 0x92, 0xd7, 0x18, 0x5d, 0xe5, 0xa0, + 0x6f, 0x2a, 0xec, 0xa9, 0x66, 0x23, 0xf7, 0xb2, 0x7d, 0x38, + 0xfe, 0xbb, 0x74, 0x31, 0xc1, 0x84, 0x4b, 0x0e, 0xc8, 0x8d, + 0x42, 0x07, 0xd3, 0x96, 0x59, 0x1c, 0xda, 0x9f, 0x50, 0x15, + 0x00, 0x46, 0x8c, 0xca, 0x05, 0x43, 0x89, 0xcf, 0x0a, 0x4c, + 0x86, 0xc0, 0x0f, 0x49, 0x83, 0xc5, 0x14, 0x52, 0x98, 0xde, + 0x11, 0x57, 0x9d, 0xdb, 0x1e, 0x58, 0x92, 0xd4, 0x1b, 0x5d, + 0x97, 0xd1, 0x28, 0x6e, 0xa4, 0xe2, 0x2d, 0x6b, 0xa1, 0xe7, + 0x22, 0x64, 0xae, 0xe8, 0x27, 0x61, 0xab, 0xed, 0x3c, 0x7a, + 0xb0, 0xf6, 0x39, 0x7f, 0xb5, 0xf3, 0x36, 0x70, 0xba, 0xfc, + 0x33, 0x75, 0xbf, 0xf9, 0x50, 0x16, 0xdc, 0x9a, 0x55, 0x13, + 0xd9, 0x9f, 0x5a, 0x1c, 0xd6, 0x90, 0x5f, 0x19, 0xd3, 0x95, + 0x44, 0x02, 0xc8, 0x8e, 0x41, 0x07, 0xcd, 0x8b, 0x4e, 0x08, + 0xc2, 0x84, 0x4b, 0x0d, 0xc7, 0x81, 0x78, 0x3e, 0xf4, 0xb2, + 0x7d, 0x3b, 0xf1, 0xb7, 0x72, 0x34, 0xfe, 0xb8, 0x77, 0x31, + 0xfb, 0xbd, 0x6c, 0x2a, 0xe0, 0xa6, 0x69, 0x2f, 0xe5, 0xa3, + 0x66, 0x20, 0xea, 0xac, 0x63, 0x25, 0xef, 0xa9, 0xa0, 0xe6, + 0x2c, 0x6a, 0xa5, 0xe3, 0x29, 0x6f, 0xaa, 0xec, 0x26, 0x60, + 0xaf, 0xe9, 0x23, 0x65, 0xb4, 0xf2, 0x38, 0x7e, 0xb1, 0xf7, + 0x3d, 0x7b, 0xbe, 0xf8, 0x32, 0x74, 0xbb, 0xfd, 0x37, 0x71, + 0x88, 0xce, 0x04, 0x42, 0x8d, 0xcb, 0x01, 0x47, 0x82, 0xc4, + 0x0e, 0x48, 0x87, 0xc1, 0x0b, 0x4d, 0x9c, 0xda, 0x10, 0x56, + 0x99, 0xdf, 0x15, 0x53, 0x96, 0xd0, 0x1a, 0x5c, 0x93, 0xd5, + 0x1f, 0x59, 0xf0, 0xb6, 0x7c, 0x3a, 0xf5, 0xb3, 0x79, 0x3f, + 0xfa, 0xbc, 0x76, 0x30, 0xff, 0xb9, 0x73, 0x35, 0xe4, 0xa2, + 0x68, 0x2e, 0xe1, 0xa7, 0x6d, 0x2b, 0xee, 0xa8, 0x62, 0x24, + 0xeb, 0xad, 0x67, 0x21, 0xd8, 0x9e, 0x54, 0x12, 0xdd, 0x9b, + 0x51, 0x17, 0xd2, 0x94, 0x5e, 0x18, 0xd7, 0x91, 0x5b, 0x1d, + 0xcc, 0x8a, 0x40, 0x06, 0xc9, 0x8f, 0x45, 0x03, 0xc6, 0x80, + 0x4a, 0x0c, 0xc3, 0x85, 0x4f, 0x09, 0x00, 0x47, 0x8e, 0xc9, + 0x01, 0x46, 0x8f, 0xc8, 0x02, 0x45, 0x8c, 0xcb, 0x03, 0x44, + 0x8d, 0xca, 0x04, 0x43, 0x8a, 0xcd, 0x05, 0x42, 0x8b, 0xcc, + 0x06, 0x41, 0x88, 0xcf, 0x07, 0x40, 0x89, 0xce, 0x08, 0x4f, + 0x86, 0xc1, 0x09, 0x4e, 0x87, 0xc0, 0x0a, 0x4d, 0x84, 0xc3, + 0x0b, 0x4c, 0x85, 0xc2, 0x0c, 0x4b, 0x82, 0xc5, 0x0d, 0x4a, + 0x83, 0xc4, 0x0e, 0x49, 0x80, 0xc7, 0x0f, 0x48, 0x81, 0xc6, + 0x10, 0x57, 0x9e, 0xd9, 0x11, 0x56, 0x9f, 0xd8, 0x12, 0x55, + 0x9c, 0xdb, 0x13, 0x54, 0x9d, 0xda, 0x14, 0x53, 0x9a, 0xdd, + 0x15, 0x52, 0x9b, 0xdc, 0x16, 0x51, 0x98, 0xdf, 0x17, 0x50, + 0x99, 0xde, 0x18, 0x5f, 0x96, 0xd1, 0x19, 0x5e, 0x97, 0xd0, + 0x1a, 0x5d, 0x94, 0xd3, 0x1b, 0x5c, 0x95, 0xd2, 0x1c, 0x5b, + 0x92, 0xd5, 0x1d, 0x5a, 0x93, 0xd4, 0x1e, 0x59, 0x90, 0xd7, + 0x1f, 0x58, 0x91, 0xd6, 0x20, 0x67, 0xae, 0xe9, 0x21, 0x66, + 0xaf, 0xe8, 0x22, 0x65, 0xac, 0xeb, 0x23, 0x64, 0xad, 0xea, + 0x24, 0x63, 0xaa, 0xed, 0x25, 0x62, 0xab, 0xec, 0x26, 0x61, + 0xa8, 0xef, 0x27, 0x60, 0xa9, 0xee, 0x28, 0x6f, 0xa6, 0xe1, + 0x29, 0x6e, 0xa7, 0xe0, 0x2a, 0x6d, 0xa4, 0xe3, 0x2b, 0x6c, + 0xa5, 0xe2, 0x2c, 0x6b, 0xa2, 0xe5, 0x2d, 0x6a, 0xa3, 0xe4, + 0x2e, 0x69, 0xa0, 0xe7, 0x2f, 0x68, 0xa1, 0xe6, 0x30, 0x77, + 0xbe, 0xf9, 0x31, 0x76, 0xbf, 0xf8, 0x32, 0x75, 0xbc, 0xfb, + 0x33, 0x74, 0xbd, 0xfa, 0x34, 0x73, 0xba, 0xfd, 0x35, 0x72, + 0xbb, 0xfc, 0x36, 0x71, 0xb8, 0xff, 0x37, 0x70, 0xb9, 0xfe, + 0x38, 0x7f, 0xb6, 0xf1, 0x39, 0x7e, 0xb7, 0xf0, 0x3a, 0x7d, + 0xb4, 0xf3, 0x3b, 0x7c, 0xb5, 0xf2, 0x3c, 0x7b, 0xb2, 0xf5, + 0x3d, 0x7a, 0xb3, 0xf4, 0x3e, 0x79, 0xb0, 0xf7, 0x3f, 0x78, + 0xb1, 0xf6, 0x00, 0x48, 0x90, 0xd8, 0x3d, 0x75, 0xad, 0xe5, + 0x7a, 0x32, 0xea, 0xa2, 0x47, 0x0f, 0xd7, 0x9f, 0xf4, 0xbc, + 0x64, 0x2c, 0xc9, 0x81, 0x59, 0x11, 0x8e, 0xc6, 0x1e, 0x56, + 0xb3, 0xfb, 0x23, 0x6b, 0xf5, 0xbd, 0x65, 0x2d, 0xc8, 0x80, + 0x58, 0x10, 0x8f, 0xc7, 0x1f, 0x57, 0xb2, 0xfa, 0x22, 0x6a, + 0x01, 0x49, 0x91, 0xd9, 0x3c, 0x74, 0xac, 0xe4, 0x7b, 0x33, + 0xeb, 0xa3, 0x46, 0x0e, 0xd6, 0x9e, 0xf7, 0xbf, 0x67, 0x2f, + 0xca, 0x82, 0x5a, 0x12, 0x8d, 0xc5, 0x1d, 0x55, 0xb0, 0xf8, + 0x20, 0x68, 0x03, 0x4b, 0x93, 0xdb, 0x3e, 0x76, 0xae, 0xe6, + 0x79, 0x31, 0xe9, 0xa1, 0x44, 0x0c, 0xd4, 0x9c, 0x02, 0x4a, + 0x92, 0xda, 0x3f, 0x77, 0xaf, 0xe7, 0x78, 0x30, 0xe8, 0xa0, + 0x45, 0x0d, 0xd5, 0x9d, 0xf6, 0xbe, 0x66, 0x2e, 0xcb, 0x83, + 0x5b, 0x13, 0x8c, 0xc4, 0x1c, 0x54, 0xb1, 0xf9, 0x21, 0x69, + 0xf3, 0xbb, 0x63, 0x2b, 0xce, 0x86, 0x5e, 0x16, 0x89, 0xc1, + 0x19, 0x51, 0xb4, 0xfc, 0x24, 0x6c, 0x07, 0x4f, 0x97, 0xdf, + 0x3a, 0x72, 0xaa, 0xe2, 0x7d, 0x35, 0xed, 0xa5, 0x40, 0x08, + 0xd0, 0x98, 0x06, 0x4e, 0x96, 0xde, 0x3b, 0x73, 0xab, 0xe3, + 0x7c, 0x34, 0xec, 0xa4, 0x41, 0x09, 0xd1, 0x99, 0xf2, 0xba, + 0x62, 0x2a, 0xcf, 0x87, 0x5f, 0x17, 0x88, 0xc0, 0x18, 0x50, + 0xb5, 0xfd, 0x25, 0x6d, 0x04, 0x4c, 0x94, 0xdc, 0x39, 0x71, + 0xa9, 0xe1, 0x7e, 0x36, 0xee, 0xa6, 0x43, 0x0b, 0xd3, 0x9b, + 0xf0, 0xb8, 0x60, 0x28, 0xcd, 0x85, 0x5d, 0x15, 0x8a, 0xc2, + 0x1a, 0x52, 0xb7, 0xff, 0x27, 0x6f, 0xf1, 0xb9, 0x61, 0x29, + 0xcc, 0x84, 0x5c, 0x14, 0x8b, 0xc3, 0x1b, 0x53, 0xb6, 0xfe, + 0x26, 0x6e, 0x05, 0x4d, 0x95, 0xdd, 0x38, 0x70, 0xa8, 0xe0, + 0x7f, 0x37, 0xef, 0xa7, 0x42, 0x0a, 0xd2, 0x9a, 0x00, 0x49, + 0x92, 0xdb, 0x39, 0x70, 0xab, 0xe2, 0x72, 0x3b, 0xe0, 0xa9, + 0x4b, 0x02, 0xd9, 0x90, 0xe4, 0xad, 0x76, 0x3f, 0xdd, 0x94, + 0x4f, 0x06, 0x96, 0xdf, 0x04, 0x4d, 0xaf, 0xe6, 0x3d, 0x74, + 0xd5, 0x9c, 0x47, 0x0e, 0xec, 0xa5, 0x7e, 0x37, 0xa7, 0xee, + 0x35, 0x7c, 0x9e, 0xd7, 0x0c, 0x45, 0x31, 0x78, 0xa3, 0xea, + 0x08, 0x41, 0x9a, 0xd3, 0x43, 0x0a, 0xd1, 0x98, 0x7a, 0x33, + 0xe8, 0xa1, 0xb7, 0xfe, 0x25, 0x6c, 0x8e, 0xc7, 0x1c, 0x55, + 0xc5, 0x8c, 0x57, 0x1e, 0xfc, 0xb5, 0x6e, 0x27, 0x53, 0x1a, + 0xc1, 0x88, 0x6a, 0x23, 0xf8, 0xb1, 0x21, 0x68, 0xb3, 0xfa, + 0x18, 0x51, 0x8a, 0xc3, 0x62, 0x2b, 0xf0, 0xb9, 0x5b, 0x12, + 0xc9, 0x80, 0x10, 0x59, 0x82, 0xcb, 0x29, 0x60, 0xbb, 0xf2, + 0x86, 0xcf, 0x14, 0x5d, 0xbf, 0xf6, 0x2d, 0x64, 0xf4, 0xbd, + 0x66, 0x2f, 0xcd, 0x84, 0x5f, 0x16, 0x73, 0x3a, 0xe1, 0xa8, + 0x4a, 0x03, 0xd8, 0x91, 0x01, 0x48, 0x93, 0xda, 0x38, 0x71, + 0xaa, 0xe3, 0x97, 0xde, 0x05, 0x4c, 0xae, 0xe7, 0x3c, 0x75, + 0xe5, 0xac, 0x77, 0x3e, 0xdc, 0x95, 0x4e, 0x07, 0xa6, 0xef, + 0x34, 0x7d, 0x9f, 0xd6, 0x0d, 0x44, 0xd4, 0x9d, 0x46, 0x0f, + 0xed, 0xa4, 0x7f, 0x36, 0x42, 0x0b, 0xd0, 0x99, 0x7b, 0x32, + 0xe9, 0xa0, 0x30, 0x79, 0xa2, 0xeb, 0x09, 0x40, 0x9b, 0xd2, + 0xc4, 0x8d, 0x56, 0x1f, 0xfd, 0xb4, 0x6f, 0x26, 0xb6, 0xff, + 0x24, 0x6d, 0x8f, 0xc6, 0x1d, 0x54, 0x20, 0x69, 0xb2, 0xfb, + 0x19, 0x50, 0x8b, 0xc2, 0x52, 0x1b, 0xc0, 0x89, 0x6b, 0x22, + 0xf9, 0xb0, 0x11, 0x58, 0x83, 0xca, 0x28, 0x61, 0xba, 0xf3, + 0x63, 0x2a, 0xf1, 0xb8, 0x5a, 0x13, 0xc8, 0x81, 0xf5, 0xbc, + 0x67, 0x2e, 0xcc, 0x85, 0x5e, 0x17, 0x87, 0xce, 0x15, 0x5c, + 0xbe, 0xf7, 0x2c, 0x65, 0x00, 0x4a, 0x94, 0xde, 0x35, 0x7f, + 0xa1, 0xeb, 0x6a, 0x20, 0xfe, 0xb4, 0x5f, 0x15, 0xcb, 0x81, + 0xd4, 0x9e, 0x40, 0x0a, 0xe1, 0xab, 0x75, 0x3f, 0xbe, 0xf4, + 0x2a, 0x60, 0x8b, 0xc1, 0x1f, 0x55, 0xb5, 0xff, 0x21, 0x6b, + 0x80, 0xca, 0x14, 0x5e, 0xdf, 0x95, 0x4b, 0x01, 0xea, 0xa0, + 0x7e, 0x34, 0x61, 0x2b, 0xf5, 0xbf, 0x54, 0x1e, 0xc0, 0x8a, + 0x0b, 0x41, 0x9f, 0xd5, 0x3e, 0x74, 0xaa, 0xe0, 0x77, 0x3d, + 0xe3, 0xa9, 0x42, 0x08, 0xd6, 0x9c, 0x1d, 0x57, 0x89, 0xc3, + 0x28, 0x62, 0xbc, 0xf6, 0xa3, 0xe9, 0x37, 0x7d, 0x96, 0xdc, + 0x02, 0x48, 0xc9, 0x83, 0x5d, 0x17, 0xfc, 0xb6, 0x68, 0x22, + 0xc2, 0x88, 0x56, 0x1c, 0xf7, 0xbd, 0x63, 0x29, 0xa8, 0xe2, + 0x3c, 0x76, 0x9d, 0xd7, 0x09, 0x43, 0x16, 0x5c, 0x82, 0xc8, + 0x23, 0x69, 0xb7, 0xfd, 0x7c, 0x36, 0xe8, 0xa2, 0x49, 0x03, + 0xdd, 0x97, 0xee, 0xa4, 0x7a, 0x30, 0xdb, 0x91, 0x4f, 0x05, + 0x84, 0xce, 0x10, 0x5a, 0xb1, 0xfb, 0x25, 0x6f, 0x3a, 0x70, + 0xae, 0xe4, 0x0f, 0x45, 0x9b, 0xd1, 0x50, 0x1a, 0xc4, 0x8e, + 0x65, 0x2f, 0xf1, 0xbb, 0x5b, 0x11, 0xcf, 0x85, 0x6e, 0x24, + 0xfa, 0xb0, 0x31, 0x7b, 0xa5, 0xef, 0x04, 0x4e, 0x90, 0xda, + 0x8f, 0xc5, 0x1b, 0x51, 0xba, 0xf0, 0x2e, 0x64, 0xe5, 0xaf, + 0x71, 0x3b, 0xd0, 0x9a, 0x44, 0x0e, 0x99, 0xd3, 0x0d, 0x47, + 0xac, 0xe6, 0x38, 0x72, 0xf3, 0xb9, 0x67, 0x2d, 0xc6, 0x8c, + 0x52, 0x18, 0x4d, 0x07, 0xd9, 0x93, 0x78, 0x32, 0xec, 0xa6, + 0x27, 0x6d, 0xb3, 0xf9, 0x12, 0x58, 0x86, 0xcc, 0x2c, 0x66, + 0xb8, 0xf2, 0x19, 0x53, 0x8d, 0xc7, 0x46, 0x0c, 0xd2, 0x98, + 0x73, 0x39, 0xe7, 0xad, 0xf8, 0xb2, 0x6c, 0x26, 0xcd, 0x87, + 0x59, 0x13, 0x92, 0xd8, 0x06, 0x4c, 0xa7, 0xed, 0x33, 0x79, + 0x00, 0x4b, 0x96, 0xdd, 0x31, 0x7a, 0xa7, 0xec, 0x62, 0x29, + 0xf4, 0xbf, 0x53, 0x18, 0xc5, 0x8e, 0xc4, 0x8f, 0x52, 0x19, + 0xf5, 0xbe, 0x63, 0x28, 0xa6, 0xed, 0x30, 0x7b, 0x97, 0xdc, + 0x01, 0x4a, 0x95, 0xde, 0x03, 0x48, 0xa4, 0xef, 0x32, 0x79, + 0xf7, 0xbc, 0x61, 0x2a, 0xc6, 0x8d, 0x50, 0x1b, 0x51, 0x1a, + 0xc7, 0x8c, 0x60, 0x2b, 0xf6, 0xbd, 0x33, 0x78, 0xa5, 0xee, + 0x02, 0x49, 0x94, 0xdf, 0x37, 0x7c, 0xa1, 0xea, 0x06, 0x4d, + 0x90, 0xdb, 0x55, 0x1e, 0xc3, 0x88, 0x64, 0x2f, 0xf2, 0xb9, + 0xf3, 0xb8, 0x65, 0x2e, 0xc2, 0x89, 0x54, 0x1f, 0x91, 0xda, + 0x07, 0x4c, 0xa0, 0xeb, 0x36, 0x7d, 0xa2, 0xe9, 0x34, 0x7f, + 0x93, 0xd8, 0x05, 0x4e, 0xc0, 0x8b, 0x56, 0x1d, 0xf1, 0xba, + 0x67, 0x2c, 0x66, 0x2d, 0xf0, 0xbb, 0x57, 0x1c, 0xc1, 0x8a, + 0x04, 0x4f, 0x92, 0xd9, 0x35, 0x7e, 0xa3, 0xe8, 0x6e, 0x25, + 0xf8, 0xb3, 0x5f, 0x14, 0xc9, 0x82, 0x0c, 0x47, 0x9a, 0xd1, + 0x3d, 0x76, 0xab, 0xe0, 0xaa, 0xe1, 0x3c, 0x77, 0x9b, 0xd0, + 0x0d, 0x46, 0xc8, 0x83, 0x5e, 0x15, 0xf9, 0xb2, 0x6f, 0x24, + 0xfb, 0xb0, 0x6d, 0x26, 0xca, 0x81, 0x5c, 0x17, 0x99, 0xd2, + 0x0f, 0x44, 0xa8, 0xe3, 0x3e, 0x75, 0x3f, 0x74, 0xa9, 0xe2, + 0x0e, 0x45, 0x98, 0xd3, 0x5d, 0x16, 0xcb, 0x80, 0x6c, 0x27, + 0xfa, 0xb1, 0x59, 0x12, 0xcf, 0x84, 0x68, 0x23, 0xfe, 0xb5, + 0x3b, 0x70, 0xad, 0xe6, 0x0a, 0x41, 0x9c, 0xd7, 0x9d, 0xd6, + 0x0b, 0x40, 0xac, 0xe7, 0x3a, 0x71, 0xff, 0xb4, 0x69, 0x22, + 0xce, 0x85, 0x58, 0x13, 0xcc, 0x87, 0x5a, 0x11, 0xfd, 0xb6, + 0x6b, 0x20, 0xae, 0xe5, 0x38, 0x73, 0x9f, 0xd4, 0x09, 0x42, + 0x08, 0x43, 0x9e, 0xd5, 0x39, 0x72, 0xaf, 0xe4, 0x6a, 0x21, + 0xfc, 0xb7, 0x5b, 0x10, 0xcd, 0x86, 0x00, 0x4c, 0x98, 0xd4, + 0x2d, 0x61, 0xb5, 0xf9, 0x5a, 0x16, 0xc2, 0x8e, 0x77, 0x3b, + 0xef, 0xa3, 0xb4, 0xf8, 0x2c, 0x60, 0x99, 0xd5, 0x01, 0x4d, + 0xee, 0xa2, 0x76, 0x3a, 0xc3, 0x8f, 0x5b, 0x17, 0x75, 0x39, + 0xed, 0xa1, 0x58, 0x14, 0xc0, 0x8c, 0x2f, 0x63, 0xb7, 0xfb, + 0x02, 0x4e, 0x9a, 0xd6, 0xc1, 0x8d, 0x59, 0x15, 0xec, 0xa0, + 0x74, 0x38, 0x9b, 0xd7, 0x03, 0x4f, 0xb6, 0xfa, 0x2e, 0x62, + 0xea, 0xa6, 0x72, 0x3e, 0xc7, 0x8b, 0x5f, 0x13, 0xb0, 0xfc, + 0x28, 0x64, 0x9d, 0xd1, 0x05, 0x49, 0x5e, 0x12, 0xc6, 0x8a, + 0x73, 0x3f, 0xeb, 0xa7, 0x04, 0x48, 0x9c, 0xd0, 0x29, 0x65, + 0xb1, 0xfd, 0x9f, 0xd3, 0x07, 0x4b, 0xb2, 0xfe, 0x2a, 0x66, + 0xc5, 0x89, 0x5d, 0x11, 0xe8, 0xa4, 0x70, 0x3c, 0x2b, 0x67, + 0xb3, 0xff, 0x06, 0x4a, 0x9e, 0xd2, 0x71, 0x3d, 0xe9, 0xa5, + 0x5c, 0x10, 0xc4, 0x88, 0xc9, 0x85, 0x51, 0x1d, 0xe4, 0xa8, + 0x7c, 0x30, 0x93, 0xdf, 0x0b, 0x47, 0xbe, 0xf2, 0x26, 0x6a, + 0x7d, 0x31, 0xe5, 0xa9, 0x50, 0x1c, 0xc8, 0x84, 0x27, 0x6b, + 0xbf, 0xf3, 0x0a, 0x46, 0x92, 0xde, 0xbc, 0xf0, 0x24, 0x68, + 0x91, 0xdd, 0x09, 0x45, 0xe6, 0xaa, 0x7e, 0x32, 0xcb, 0x87, + 0x53, 0x1f, 0x08, 0x44, 0x90, 0xdc, 0x25, 0x69, 0xbd, 0xf1, + 0x52, 0x1e, 0xca, 0x86, 0x7f, 0x33, 0xe7, 0xab, 0x23, 0x6f, + 0xbb, 0xf7, 0x0e, 0x42, 0x96, 0xda, 0x79, 0x35, 0xe1, 0xad, + 0x54, 0x18, 0xcc, 0x80, 0x97, 0xdb, 0x0f, 0x43, 0xba, 0xf6, + 0x22, 0x6e, 0xcd, 0x81, 0x55, 0x19, 0xe0, 0xac, 0x78, 0x34, + 0x56, 0x1a, 0xce, 0x82, 0x7b, 0x37, 0xe3, 0xaf, 0x0c, 0x40, + 0x94, 0xd8, 0x21, 0x6d, 0xb9, 0xf5, 0xe2, 0xae, 0x7a, 0x36, + 0xcf, 0x83, 0x57, 0x1b, 0xb8, 0xf4, 0x20, 0x6c, 0x95, 0xd9, + 0x0d, 0x41, 0x00, 0x4d, 0x9a, 0xd7, 0x29, 0x64, 0xb3, 0xfe, + 0x52, 0x1f, 0xc8, 0x85, 0x7b, 0x36, 0xe1, 0xac, 0xa4, 0xe9, + 0x3e, 0x73, 0x8d, 0xc0, 0x17, 0x5a, 0xf6, 0xbb, 0x6c, 0x21, + 0xdf, 0x92, 0x45, 0x08, 0x55, 0x18, 0xcf, 0x82, 0x7c, 0x31, + 0xe6, 0xab, 0x07, 0x4a, 0x9d, 0xd0, 0x2e, 0x63, 0xb4, 0xf9, + 0xf1, 0xbc, 0x6b, 0x26, 0xd8, 0x95, 0x42, 0x0f, 0xa3, 0xee, + 0x39, 0x74, 0x8a, 0xc7, 0x10, 0x5d, 0xaa, 0xe7, 0x30, 0x7d, + 0x83, 0xce, 0x19, 0x54, 0xf8, 0xb5, 0x62, 0x2f, 0xd1, 0x9c, + 0x4b, 0x06, 0x0e, 0x43, 0x94, 0xd9, 0x27, 0x6a, 0xbd, 0xf0, + 0x5c, 0x11, 0xc6, 0x8b, 0x75, 0x38, 0xef, 0xa2, 0xff, 0xb2, + 0x65, 0x28, 0xd6, 0x9b, 0x4c, 0x01, 0xad, 0xe0, 0x37, 0x7a, + 0x84, 0xc9, 0x1e, 0x53, 0x5b, 0x16, 0xc1, 0x8c, 0x72, 0x3f, + 0xe8, 0xa5, 0x09, 0x44, 0x93, 0xde, 0x20, 0x6d, 0xba, 0xf7, + 0x49, 0x04, 0xd3, 0x9e, 0x60, 0x2d, 0xfa, 0xb7, 0x1b, 0x56, + 0x81, 0xcc, 0x32, 0x7f, 0xa8, 0xe5, 0xed, 0xa0, 0x77, 0x3a, + 0xc4, 0x89, 0x5e, 0x13, 0xbf, 0xf2, 0x25, 0x68, 0x96, 0xdb, + 0x0c, 0x41, 0x1c, 0x51, 0x86, 0xcb, 0x35, 0x78, 0xaf, 0xe2, + 0x4e, 0x03, 0xd4, 0x99, 0x67, 0x2a, 0xfd, 0xb0, 0xb8, 0xf5, + 0x22, 0x6f, 0x91, 0xdc, 0x0b, 0x46, 0xea, 0xa7, 0x70, 0x3d, + 0xc3, 0x8e, 0x59, 0x14, 0xe3, 0xae, 0x79, 0x34, 0xca, 0x87, + 0x50, 0x1d, 0xb1, 0xfc, 0x2b, 0x66, 0x98, 0xd5, 0x02, 0x4f, + 0x47, 0x0a, 0xdd, 0x90, 0x6e, 0x23, 0xf4, 0xb9, 0x15, 0x58, + 0x8f, 0xc2, 0x3c, 0x71, 0xa6, 0xeb, 0xb6, 0xfb, 0x2c, 0x61, + 0x9f, 0xd2, 0x05, 0x48, 0xe4, 0xa9, 0x7e, 0x33, 0xcd, 0x80, + 0x57, 0x1a, 0x12, 0x5f, 0x88, 0xc5, 0x3b, 0x76, 0xa1, 0xec, + 0x40, 0x0d, 0xda, 0x97, 0x69, 0x24, 0xf3, 0xbe, 0x00, 0x4e, + 0x9c, 0xd2, 0x25, 0x6b, 0xb9, 0xf7, 0x4a, 0x04, 0xd6, 0x98, + 0x6f, 0x21, 0xf3, 0xbd, 0x94, 0xda, 0x08, 0x46, 0xb1, 0xff, + 0x2d, 0x63, 0xde, 0x90, 0x42, 0x0c, 0xfb, 0xb5, 0x67, 0x29, + 0x35, 0x7b, 0xa9, 0xe7, 0x10, 0x5e, 0x8c, 0xc2, 0x7f, 0x31, + 0xe3, 0xad, 0x5a, 0x14, 0xc6, 0x88, 0xa1, 0xef, 0x3d, 0x73, + 0x84, 0xca, 0x18, 0x56, 0xeb, 0xa5, 0x77, 0x39, 0xce, 0x80, + 0x52, 0x1c, 0x6a, 0x24, 0xf6, 0xb8, 0x4f, 0x01, 0xd3, 0x9d, + 0x20, 0x6e, 0xbc, 0xf2, 0x05, 0x4b, 0x99, 0xd7, 0xfe, 0xb0, + 0x62, 0x2c, 0xdb, 0x95, 0x47, 0x09, 0xb4, 0xfa, 0x28, 0x66, + 0x91, 0xdf, 0x0d, 0x43, 0x5f, 0x11, 0xc3, 0x8d, 0x7a, 0x34, + 0xe6, 0xa8, 0x15, 0x5b, 0x89, 0xc7, 0x30, 0x7e, 0xac, 0xe2, + 0xcb, 0x85, 0x57, 0x19, 0xee, 0xa0, 0x72, 0x3c, 0x81, 0xcf, + 0x1d, 0x53, 0xa4, 0xea, 0x38, 0x76, 0xd4, 0x9a, 0x48, 0x06, + 0xf1, 0xbf, 0x6d, 0x23, 0x9e, 0xd0, 0x02, 0x4c, 0xbb, 0xf5, + 0x27, 0x69, 0x40, 0x0e, 0xdc, 0x92, 0x65, 0x2b, 0xf9, 0xb7, + 0x0a, 0x44, 0x96, 0xd8, 0x2f, 0x61, 0xb3, 0xfd, 0xe1, 0xaf, + 0x7d, 0x33, 0xc4, 0x8a, 0x58, 0x16, 0xab, 0xe5, 0x37, 0x79, + 0x8e, 0xc0, 0x12, 0x5c, 0x75, 0x3b, 0xe9, 0xa7, 0x50, 0x1e, + 0xcc, 0x82, 0x3f, 0x71, 0xa3, 0xed, 0x1a, 0x54, 0x86, 0xc8, + 0xbe, 0xf0, 0x22, 0x6c, 0x9b, 0xd5, 0x07, 0x49, 0xf4, 0xba, + 0x68, 0x26, 0xd1, 0x9f, 0x4d, 0x03, 0x2a, 0x64, 0xb6, 0xf8, + 0x0f, 0x41, 0x93, 0xdd, 0x60, 0x2e, 0xfc, 0xb2, 0x45, 0x0b, + 0xd9, 0x97, 0x8b, 0xc5, 0x17, 0x59, 0xae, 0xe0, 0x32, 0x7c, + 0xc1, 0x8f, 0x5d, 0x13, 0xe4, 0xaa, 0x78, 0x36, 0x1f, 0x51, + 0x83, 0xcd, 0x3a, 0x74, 0xa6, 0xe8, 0x55, 0x1b, 0xc9, 0x87, + 0x70, 0x3e, 0xec, 0xa2, 0x00, 0x4f, 0x9e, 0xd1, 0x21, 0x6e, + 0xbf, 0xf0, 0x42, 0x0d, 0xdc, 0x93, 0x63, 0x2c, 0xfd, 0xb2, + 0x84, 0xcb, 0x1a, 0x55, 0xa5, 0xea, 0x3b, 0x74, 0xc6, 0x89, + 0x58, 0x17, 0xe7, 0xa8, 0x79, 0x36, 0x15, 0x5a, 0x8b, 0xc4, + 0x34, 0x7b, 0xaa, 0xe5, 0x57, 0x18, 0xc9, 0x86, 0x76, 0x39, + 0xe8, 0xa7, 0x91, 0xde, 0x0f, 0x40, 0xb0, 0xff, 0x2e, 0x61, + 0xd3, 0x9c, 0x4d, 0x02, 0xf2, 0xbd, 0x6c, 0x23, 0x2a, 0x65, + 0xb4, 0xfb, 0x0b, 0x44, 0x95, 0xda, 0x68, 0x27, 0xf6, 0xb9, + 0x49, 0x06, 0xd7, 0x98, 0xae, 0xe1, 0x30, 0x7f, 0x8f, 0xc0, + 0x11, 0x5e, 0xec, 0xa3, 0x72, 0x3d, 0xcd, 0x82, 0x53, 0x1c, + 0x3f, 0x70, 0xa1, 0xee, 0x1e, 0x51, 0x80, 0xcf, 0x7d, 0x32, + 0xe3, 0xac, 0x5c, 0x13, 0xc2, 0x8d, 0xbb, 0xf4, 0x25, 0x6a, + 0x9a, 0xd5, 0x04, 0x4b, 0xf9, 0xb6, 0x67, 0x28, 0xd8, 0x97, + 0x46, 0x09, 0x54, 0x1b, 0xca, 0x85, 0x75, 0x3a, 0xeb, 0xa4, + 0x16, 0x59, 0x88, 0xc7, 0x37, 0x78, 0xa9, 0xe6, 0xd0, 0x9f, + 0x4e, 0x01, 0xf1, 0xbe, 0x6f, 0x20, 0x92, 0xdd, 0x0c, 0x43, + 0xb3, 0xfc, 0x2d, 0x62, 0x41, 0x0e, 0xdf, 0x90, 0x60, 0x2f, + 0xfe, 0xb1, 0x03, 0x4c, 0x9d, 0xd2, 0x22, 0x6d, 0xbc, 0xf3, + 0xc5, 0x8a, 0x5b, 0x14, 0xe4, 0xab, 0x7a, 0x35, 0x87, 0xc8, + 0x19, 0x56, 0xa6, 0xe9, 0x38, 0x77, 0x7e, 0x31, 0xe0, 0xaf, + 0x5f, 0x10, 0xc1, 0x8e, 0x3c, 0x73, 0xa2, 0xed, 0x1d, 0x52, + 0x83, 0xcc, 0xfa, 0xb5, 0x64, 0x2b, 0xdb, 0x94, 0x45, 0x0a, + 0xb8, 0xf7, 0x26, 0x69, 0x99, 0xd6, 0x07, 0x48, 0x6b, 0x24, + 0xf5, 0xba, 0x4a, 0x05, 0xd4, 0x9b, 0x29, 0x66, 0xb7, 0xf8, + 0x08, 0x47, 0x96, 0xd9, 0xef, 0xa0, 0x71, 0x3e, 0xce, 0x81, + 0x50, 0x1f, 0xad, 0xe2, 0x33, 0x7c, 0x8c, 0xc3, 0x12, 0x5d, + 0x00, 0x50, 0xa0, 0xf0, 0x5d, 0x0d, 0xfd, 0xad, 0xba, 0xea, + 0x1a, 0x4a, 0xe7, 0xb7, 0x47, 0x17, 0x69, 0x39, 0xc9, 0x99, + 0x34, 0x64, 0x94, 0xc4, 0xd3, 0x83, 0x73, 0x23, 0x8e, 0xde, + 0x2e, 0x7e, 0xd2, 0x82, 0x72, 0x22, 0x8f, 0xdf, 0x2f, 0x7f, + 0x68, 0x38, 0xc8, 0x98, 0x35, 0x65, 0x95, 0xc5, 0xbb, 0xeb, + 0x1b, 0x4b, 0xe6, 0xb6, 0x46, 0x16, 0x01, 0x51, 0xa1, 0xf1, + 0x5c, 0x0c, 0xfc, 0xac, 0xb9, 0xe9, 0x19, 0x49, 0xe4, 0xb4, + 0x44, 0x14, 0x03, 0x53, 0xa3, 0xf3, 0x5e, 0x0e, 0xfe, 0xae, + 0xd0, 0x80, 0x70, 0x20, 0x8d, 0xdd, 0x2d, 0x7d, 0x6a, 0x3a, + 0xca, 0x9a, 0x37, 0x67, 0x97, 0xc7, 0x6b, 0x3b, 0xcb, 0x9b, + 0x36, 0x66, 0x96, 0xc6, 0xd1, 0x81, 0x71, 0x21, 0x8c, 0xdc, + 0x2c, 0x7c, 0x02, 0x52, 0xa2, 0xf2, 0x5f, 0x0f, 0xff, 0xaf, + 0xb8, 0xe8, 0x18, 0x48, 0xe5, 0xb5, 0x45, 0x15, 0x6f, 0x3f, + 0xcf, 0x9f, 0x32, 0x62, 0x92, 0xc2, 0xd5, 0x85, 0x75, 0x25, + 0x88, 0xd8, 0x28, 0x78, 0x06, 0x56, 0xa6, 0xf6, 0x5b, 0x0b, + 0xfb, 0xab, 0xbc, 0xec, 0x1c, 0x4c, 0xe1, 0xb1, 0x41, 0x11, + 0xbd, 0xed, 0x1d, 0x4d, 0xe0, 0xb0, 0x40, 0x10, 0x07, 0x57, + 0xa7, 0xf7, 0x5a, 0x0a, 0xfa, 0xaa, 0xd4, 0x84, 0x74, 0x24, + 0x89, 0xd9, 0x29, 0x79, 0x6e, 0x3e, 0xce, 0x9e, 0x33, 0x63, + 0x93, 0xc3, 0xd6, 0x86, 0x76, 0x26, 0x8b, 0xdb, 0x2b, 0x7b, + 0x6c, 0x3c, 0xcc, 0x9c, 0x31, 0x61, 0x91, 0xc1, 0xbf, 0xef, + 0x1f, 0x4f, 0xe2, 0xb2, 0x42, 0x12, 0x05, 0x55, 0xa5, 0xf5, + 0x58, 0x08, 0xf8, 0xa8, 0x04, 0x54, 0xa4, 0xf4, 0x59, 0x09, + 0xf9, 0xa9, 0xbe, 0xee, 0x1e, 0x4e, 0xe3, 0xb3, 0x43, 0x13, + 0x6d, 0x3d, 0xcd, 0x9d, 0x30, 0x60, 0x90, 0xc0, 0xd7, 0x87, + 0x77, 0x27, 0x8a, 0xda, 0x2a, 0x7a, 0x00, 0x51, 0xa2, 0xf3, + 0x59, 0x08, 0xfb, 0xaa, 0xb2, 0xe3, 0x10, 0x41, 0xeb, 0xba, + 0x49, 0x18, 0x79, 0x28, 0xdb, 0x8a, 0x20, 0x71, 0x82, 0xd3, + 0xcb, 0x9a, 0x69, 0x38, 0x92, 0xc3, 0x30, 0x61, 0xf2, 0xa3, + 0x50, 0x01, 0xab, 0xfa, 0x09, 0x58, 0x40, 0x11, 0xe2, 0xb3, + 0x19, 0x48, 0xbb, 0xea, 0x8b, 0xda, 0x29, 0x78, 0xd2, 0x83, + 0x70, 0x21, 0x39, 0x68, 0x9b, 0xca, 0x60, 0x31, 0xc2, 0x93, + 0xf9, 0xa8, 0x5b, 0x0a, 0xa0, 0xf1, 0x02, 0x53, 0x4b, 0x1a, + 0xe9, 0xb8, 0x12, 0x43, 0xb0, 0xe1, 0x80, 0xd1, 0x22, 0x73, + 0xd9, 0x88, 0x7b, 0x2a, 0x32, 0x63, 0x90, 0xc1, 0x6b, 0x3a, + 0xc9, 0x98, 0x0b, 0x5a, 0xa9, 0xf8, 0x52, 0x03, 0xf0, 0xa1, + 0xb9, 0xe8, 0x1b, 0x4a, 0xe0, 0xb1, 0x42, 0x13, 0x72, 0x23, + 0xd0, 0x81, 0x2b, 0x7a, 0x89, 0xd8, 0xc0, 0x91, 0x62, 0x33, + 0x99, 0xc8, 0x3b, 0x6a, 0xef, 0xbe, 0x4d, 0x1c, 0xb6, 0xe7, + 0x14, 0x45, 0x5d, 0x0c, 0xff, 0xae, 0x04, 0x55, 0xa6, 0xf7, + 0x96, 0xc7, 0x34, 0x65, 0xcf, 0x9e, 0x6d, 0x3c, 0x24, 0x75, + 0x86, 0xd7, 0x7d, 0x2c, 0xdf, 0x8e, 0x1d, 0x4c, 0xbf, 0xee, + 0x44, 0x15, 0xe6, 0xb7, 0xaf, 0xfe, 0x0d, 0x5c, 0xf6, 0xa7, + 0x54, 0x05, 0x64, 0x35, 0xc6, 0x97, 0x3d, 0x6c, 0x9f, 0xce, + 0xd6, 0x87, 0x74, 0x25, 0x8f, 0xde, 0x2d, 0x7c, 0x16, 0x47, + 0xb4, 0xe5, 0x4f, 0x1e, 0xed, 0xbc, 0xa4, 0xf5, 0x06, 0x57, + 0xfd, 0xac, 0x5f, 0x0e, 0x6f, 0x3e, 0xcd, 0x9c, 0x36, 0x67, + 0x94, 0xc5, 0xdd, 0x8c, 0x7f, 0x2e, 0x84, 0xd5, 0x26, 0x77, + 0xe4, 0xb5, 0x46, 0x17, 0xbd, 0xec, 0x1f, 0x4e, 0x56, 0x07, + 0xf4, 0xa5, 0x0f, 0x5e, 0xad, 0xfc, 0x9d, 0xcc, 0x3f, 0x6e, + 0xc4, 0x95, 0x66, 0x37, 0x2f, 0x7e, 0x8d, 0xdc, 0x76, 0x27, + 0xd4, 0x85, 0x00, 0x52, 0xa4, 0xf6, 0x55, 0x07, 0xf1, 0xa3, + 0xaa, 0xf8, 0x0e, 0x5c, 0xff, 0xad, 0x5b, 0x09, 0x49, 0x1b, + 0xed, 0xbf, 0x1c, 0x4e, 0xb8, 0xea, 0xe3, 0xb1, 0x47, 0x15, + 0xb6, 0xe4, 0x12, 0x40, 0x92, 0xc0, 0x36, 0x64, 0xc7, 0x95, + 0x63, 0x31, 0x38, 0x6a, 0x9c, 0xce, 0x6d, 0x3f, 0xc9, 0x9b, + 0xdb, 0x89, 0x7f, 0x2d, 0x8e, 0xdc, 0x2a, 0x78, 0x71, 0x23, + 0xd5, 0x87, 0x24, 0x76, 0x80, 0xd2, 0x39, 0x6b, 0x9d, 0xcf, + 0x6c, 0x3e, 0xc8, 0x9a, 0x93, 0xc1, 0x37, 0x65, 0xc6, 0x94, + 0x62, 0x30, 0x70, 0x22, 0xd4, 0x86, 0x25, 0x77, 0x81, 0xd3, + 0xda, 0x88, 0x7e, 0x2c, 0x8f, 0xdd, 0x2b, 0x79, 0xab, 0xf9, + 0x0f, 0x5d, 0xfe, 0xac, 0x5a, 0x08, 0x01, 0x53, 0xa5, 0xf7, + 0x54, 0x06, 0xf0, 0xa2, 0xe2, 0xb0, 0x46, 0x14, 0xb7, 0xe5, + 0x13, 0x41, 0x48, 0x1a, 0xec, 0xbe, 0x1d, 0x4f, 0xb9, 0xeb, + 0x72, 0x20, 0xd6, 0x84, 0x27, 0x75, 0x83, 0xd1, 0xd8, 0x8a, + 0x7c, 0x2e, 0x8d, 0xdf, 0x29, 0x7b, 0x3b, 0x69, 0x9f, 0xcd, + 0x6e, 0x3c, 0xca, 0x98, 0x91, 0xc3, 0x35, 0x67, 0xc4, 0x96, + 0x60, 0x32, 0xe0, 0xb2, 0x44, 0x16, 0xb5, 0xe7, 0x11, 0x43, + 0x4a, 0x18, 0xee, 0xbc, 0x1f, 0x4d, 0xbb, 0xe9, 0xa9, 0xfb, + 0x0d, 0x5f, 0xfc, 0xae, 0x58, 0x0a, 0x03, 0x51, 0xa7, 0xf5, + 0x56, 0x04, 0xf2, 0xa0, 0x4b, 0x19, 0xef, 0xbd, 0x1e, 0x4c, + 0xba, 0xe8, 0xe1, 0xb3, 0x45, 0x17, 0xb4, 0xe6, 0x10, 0x42, + 0x02, 0x50, 0xa6, 0xf4, 0x57, 0x05, 0xf3, 0xa1, 0xa8, 0xfa, + 0x0c, 0x5e, 0xfd, 0xaf, 0x59, 0x0b, 0xd9, 0x8b, 0x7d, 0x2f, + 0x8c, 0xde, 0x28, 0x7a, 0x73, 0x21, 0xd7, 0x85, 0x26, 0x74, + 0x82, 0xd0, 0x90, 0xc2, 0x34, 0x66, 0xc5, 0x97, 0x61, 0x33, + 0x3a, 0x68, 0x9e, 0xcc, 0x6f, 0x3d, 0xcb, 0x99, 0x00, 0x53, + 0xa6, 0xf5, 0x51, 0x02, 0xf7, 0xa4, 0xa2, 0xf1, 0x04, 0x57, + 0xf3, 0xa0, 0x55, 0x06, 0x59, 0x0a, 0xff, 0xac, 0x08, 0x5b, + 0xae, 0xfd, 0xfb, 0xa8, 0x5d, 0x0e, 0xaa, 0xf9, 0x0c, 0x5f, + 0xb2, 0xe1, 0x14, 0x47, 0xe3, 0xb0, 0x45, 0x16, 0x10, 0x43, + 0xb6, 0xe5, 0x41, 0x12, 0xe7, 0xb4, 0xeb, 0xb8, 0x4d, 0x1e, + 0xba, 0xe9, 0x1c, 0x4f, 0x49, 0x1a, 0xef, 0xbc, 0x18, 0x4b, + 0xbe, 0xed, 0x79, 0x2a, 0xdf, 0x8c, 0x28, 0x7b, 0x8e, 0xdd, + 0xdb, 0x88, 0x7d, 0x2e, 0x8a, 0xd9, 0x2c, 0x7f, 0x20, 0x73, + 0x86, 0xd5, 0x71, 0x22, 0xd7, 0x84, 0x82, 0xd1, 0x24, 0x77, + 0xd3, 0x80, 0x75, 0x26, 0xcb, 0x98, 0x6d, 0x3e, 0x9a, 0xc9, + 0x3c, 0x6f, 0x69, 0x3a, 0xcf, 0x9c, 0x38, 0x6b, 0x9e, 0xcd, + 0x92, 0xc1, 0x34, 0x67, 0xc3, 0x90, 0x65, 0x36, 0x30, 0x63, + 0x96, 0xc5, 0x61, 0x32, 0xc7, 0x94, 0xf2, 0xa1, 0x54, 0x07, + 0xa3, 0xf0, 0x05, 0x56, 0x50, 0x03, 0xf6, 0xa5, 0x01, 0x52, + 0xa7, 0xf4, 0xab, 0xf8, 0x0d, 0x5e, 0xfa, 0xa9, 0x5c, 0x0f, + 0x09, 0x5a, 0xaf, 0xfc, 0x58, 0x0b, 0xfe, 0xad, 0x40, 0x13, + 0xe6, 0xb5, 0x11, 0x42, 0xb7, 0xe4, 0xe2, 0xb1, 0x44, 0x17, + 0xb3, 0xe0, 0x15, 0x46, 0x19, 0x4a, 0xbf, 0xec, 0x48, 0x1b, + 0xee, 0xbd, 0xbb, 0xe8, 0x1d, 0x4e, 0xea, 0xb9, 0x4c, 0x1f, + 0x8b, 0xd8, 0x2d, 0x7e, 0xda, 0x89, 0x7c, 0x2f, 0x29, 0x7a, + 0x8f, 0xdc, 0x78, 0x2b, 0xde, 0x8d, 0xd2, 0x81, 0x74, 0x27, + 0x83, 0xd0, 0x25, 0x76, 0x70, 0x23, 0xd6, 0x85, 0x21, 0x72, + 0x87, 0xd4, 0x39, 0x6a, 0x9f, 0xcc, 0x68, 0x3b, 0xce, 0x9d, + 0x9b, 0xc8, 0x3d, 0x6e, 0xca, 0x99, 0x6c, 0x3f, 0x60, 0x33, + 0xc6, 0x95, 0x31, 0x62, 0x97, 0xc4, 0xc2, 0x91, 0x64, 0x37, + 0x93, 0xc0, 0x35, 0x66, 0x00, 0x54, 0xa8, 0xfc, 0x4d, 0x19, + 0xe5, 0xb1, 0x9a, 0xce, 0x32, 0x66, 0xd7, 0x83, 0x7f, 0x2b, + 0x29, 0x7d, 0x81, 0xd5, 0x64, 0x30, 0xcc, 0x98, 0xb3, 0xe7, + 0x1b, 0x4f, 0xfe, 0xaa, 0x56, 0x02, 0x52, 0x06, 0xfa, 0xae, + 0x1f, 0x4b, 0xb7, 0xe3, 0xc8, 0x9c, 0x60, 0x34, 0x85, 0xd1, + 0x2d, 0x79, 0x7b, 0x2f, 0xd3, 0x87, 0x36, 0x62, 0x9e, 0xca, + 0xe1, 0xb5, 0x49, 0x1d, 0xac, 0xf8, 0x04, 0x50, 0xa4, 0xf0, + 0x0c, 0x58, 0xe9, 0xbd, 0x41, 0x15, 0x3e, 0x6a, 0x96, 0xc2, + 0x73, 0x27, 0xdb, 0x8f, 0x8d, 0xd9, 0x25, 0x71, 0xc0, 0x94, + 0x68, 0x3c, 0x17, 0x43, 0xbf, 0xeb, 0x5a, 0x0e, 0xf2, 0xa6, + 0xf6, 0xa2, 0x5e, 0x0a, 0xbb, 0xef, 0x13, 0x47, 0x6c, 0x38, + 0xc4, 0x90, 0x21, 0x75, 0x89, 0xdd, 0xdf, 0x8b, 0x77, 0x23, + 0x92, 0xc6, 0x3a, 0x6e, 0x45, 0x11, 0xed, 0xb9, 0x08, 0x5c, + 0xa0, 0xf4, 0x55, 0x01, 0xfd, 0xa9, 0x18, 0x4c, 0xb0, 0xe4, + 0xcf, 0x9b, 0x67, 0x33, 0x82, 0xd6, 0x2a, 0x7e, 0x7c, 0x28, + 0xd4, 0x80, 0x31, 0x65, 0x99, 0xcd, 0xe6, 0xb2, 0x4e, 0x1a, + 0xab, 0xff, 0x03, 0x57, 0x07, 0x53, 0xaf, 0xfb, 0x4a, 0x1e, + 0xe2, 0xb6, 0x9d, 0xc9, 0x35, 0x61, 0xd0, 0x84, 0x78, 0x2c, + 0x2e, 0x7a, 0x86, 0xd2, 0x63, 0x37, 0xcb, 0x9f, 0xb4, 0xe0, + 0x1c, 0x48, 0xf9, 0xad, 0x51, 0x05, 0xf1, 0xa5, 0x59, 0x0d, + 0xbc, 0xe8, 0x14, 0x40, 0x6b, 0x3f, 0xc3, 0x97, 0x26, 0x72, + 0x8e, 0xda, 0xd8, 0x8c, 0x70, 0x24, 0x95, 0xc1, 0x3d, 0x69, + 0x42, 0x16, 0xea, 0xbe, 0x0f, 0x5b, 0xa7, 0xf3, 0xa3, 0xf7, + 0x0b, 0x5f, 0xee, 0xba, 0x46, 0x12, 0x39, 0x6d, 0x91, 0xc5, + 0x74, 0x20, 0xdc, 0x88, 0x8a, 0xde, 0x22, 0x76, 0xc7, 0x93, + 0x6f, 0x3b, 0x10, 0x44, 0xb8, 0xec, 0x5d, 0x09, 0xf5, 0xa1, + 0x00, 0x55, 0xaa, 0xff, 0x49, 0x1c, 0xe3, 0xb6, 0x92, 0xc7, + 0x38, 0x6d, 0xdb, 0x8e, 0x71, 0x24, 0x39, 0x6c, 0x93, 0xc6, + 0x70, 0x25, 0xda, 0x8f, 0xab, 0xfe, 0x01, 0x54, 0xe2, 0xb7, + 0x48, 0x1d, 0x72, 0x27, 0xd8, 0x8d, 0x3b, 0x6e, 0x91, 0xc4, + 0xe0, 0xb5, 0x4a, 0x1f, 0xa9, 0xfc, 0x03, 0x56, 0x4b, 0x1e, + 0xe1, 0xb4, 0x02, 0x57, 0xa8, 0xfd, 0xd9, 0x8c, 0x73, 0x26, + 0x90, 0xc5, 0x3a, 0x6f, 0xe4, 0xb1, 0x4e, 0x1b, 0xad, 0xf8, + 0x07, 0x52, 0x76, 0x23, 0xdc, 0x89, 0x3f, 0x6a, 0x95, 0xc0, + 0xdd, 0x88, 0x77, 0x22, 0x94, 0xc1, 0x3e, 0x6b, 0x4f, 0x1a, + 0xe5, 0xb0, 0x06, 0x53, 0xac, 0xf9, 0x96, 0xc3, 0x3c, 0x69, + 0xdf, 0x8a, 0x75, 0x20, 0x04, 0x51, 0xae, 0xfb, 0x4d, 0x18, + 0xe7, 0xb2, 0xaf, 0xfa, 0x05, 0x50, 0xe6, 0xb3, 0x4c, 0x19, + 0x3d, 0x68, 0x97, 0xc2, 0x74, 0x21, 0xde, 0x8b, 0xd5, 0x80, + 0x7f, 0x2a, 0x9c, 0xc9, 0x36, 0x63, 0x47, 0x12, 0xed, 0xb8, + 0x0e, 0x5b, 0xa4, 0xf1, 0xec, 0xb9, 0x46, 0x13, 0xa5, 0xf0, + 0x0f, 0x5a, 0x7e, 0x2b, 0xd4, 0x81, 0x37, 0x62, 0x9d, 0xc8, + 0xa7, 0xf2, 0x0d, 0x58, 0xee, 0xbb, 0x44, 0x11, 0x35, 0x60, + 0x9f, 0xca, 0x7c, 0x29, 0xd6, 0x83, 0x9e, 0xcb, 0x34, 0x61, + 0xd7, 0x82, 0x7d, 0x28, 0x0c, 0x59, 0xa6, 0xf3, 0x45, 0x10, + 0xef, 0xba, 0x31, 0x64, 0x9b, 0xce, 0x78, 0x2d, 0xd2, 0x87, + 0xa3, 0xf6, 0x09, 0x5c, 0xea, 0xbf, 0x40, 0x15, 0x08, 0x5d, + 0xa2, 0xf7, 0x41, 0x14, 0xeb, 0xbe, 0x9a, 0xcf, 0x30, 0x65, + 0xd3, 0x86, 0x79, 0x2c, 0x43, 0x16, 0xe9, 0xbc, 0x0a, 0x5f, + 0xa0, 0xf5, 0xd1, 0x84, 0x7b, 0x2e, 0x98, 0xcd, 0x32, 0x67, + 0x7a, 0x2f, 0xd0, 0x85, 0x33, 0x66, 0x99, 0xcc, 0xe8, 0xbd, + 0x42, 0x17, 0xa1, 0xf4, 0x0b, 0x5e, 0x00, 0x56, 0xac, 0xfa, + 0x45, 0x13, 0xe9, 0xbf, 0x8a, 0xdc, 0x26, 0x70, 0xcf, 0x99, + 0x63, 0x35, 0x09, 0x5f, 0xa5, 0xf3, 0x4c, 0x1a, 0xe0, 0xb6, + 0x83, 0xd5, 0x2f, 0x79, 0xc6, 0x90, 0x6a, 0x3c, 0x12, 0x44, + 0xbe, 0xe8, 0x57, 0x01, 0xfb, 0xad, 0x98, 0xce, 0x34, 0x62, + 0xdd, 0x8b, 0x71, 0x27, 0x1b, 0x4d, 0xb7, 0xe1, 0x5e, 0x08, + 0xf2, 0xa4, 0x91, 0xc7, 0x3d, 0x6b, 0xd4, 0x82, 0x78, 0x2e, + 0x24, 0x72, 0x88, 0xde, 0x61, 0x37, 0xcd, 0x9b, 0xae, 0xf8, + 0x02, 0x54, 0xeb, 0xbd, 0x47, 0x11, 0x2d, 0x7b, 0x81, 0xd7, + 0x68, 0x3e, 0xc4, 0x92, 0xa7, 0xf1, 0x0b, 0x5d, 0xe2, 0xb4, + 0x4e, 0x18, 0x36, 0x60, 0x9a, 0xcc, 0x73, 0x25, 0xdf, 0x89, + 0xbc, 0xea, 0x10, 0x46, 0xf9, 0xaf, 0x55, 0x03, 0x3f, 0x69, + 0x93, 0xc5, 0x7a, 0x2c, 0xd6, 0x80, 0xb5, 0xe3, 0x19, 0x4f, + 0xf0, 0xa6, 0x5c, 0x0a, 0x48, 0x1e, 0xe4, 0xb2, 0x0d, 0x5b, + 0xa1, 0xf7, 0xc2, 0x94, 0x6e, 0x38, 0x87, 0xd1, 0x2b, 0x7d, + 0x41, 0x17, 0xed, 0xbb, 0x04, 0x52, 0xa8, 0xfe, 0xcb, 0x9d, + 0x67, 0x31, 0x8e, 0xd8, 0x22, 0x74, 0x5a, 0x0c, 0xf6, 0xa0, + 0x1f, 0x49, 0xb3, 0xe5, 0xd0, 0x86, 0x7c, 0x2a, 0x95, 0xc3, + 0x39, 0x6f, 0x53, 0x05, 0xff, 0xa9, 0x16, 0x40, 0xba, 0xec, + 0xd9, 0x8f, 0x75, 0x23, 0x9c, 0xca, 0x30, 0x66, 0x6c, 0x3a, + 0xc0, 0x96, 0x29, 0x7f, 0x85, 0xd3, 0xe6, 0xb0, 0x4a, 0x1c, + 0xa3, 0xf5, 0x0f, 0x59, 0x65, 0x33, 0xc9, 0x9f, 0x20, 0x76, + 0x8c, 0xda, 0xef, 0xb9, 0x43, 0x15, 0xaa, 0xfc, 0x06, 0x50, + 0x7e, 0x28, 0xd2, 0x84, 0x3b, 0x6d, 0x97, 0xc1, 0xf4, 0xa2, + 0x58, 0x0e, 0xb1, 0xe7, 0x1d, 0x4b, 0x77, 0x21, 0xdb, 0x8d, + 0x32, 0x64, 0x9e, 0xc8, 0xfd, 0xab, 0x51, 0x07, 0xb8, 0xee, + 0x14, 0x42, 0x00, 0x57, 0xae, 0xf9, 0x41, 0x16, 0xef, 0xb8, + 0x82, 0xd5, 0x2c, 0x7b, 0xc3, 0x94, 0x6d, 0x3a, 0x19, 0x4e, + 0xb7, 0xe0, 0x58, 0x0f, 0xf6, 0xa1, 0x9b, 0xcc, 0x35, 0x62, + 0xda, 0x8d, 0x74, 0x23, 0x32, 0x65, 0x9c, 0xcb, 0x73, 0x24, + 0xdd, 0x8a, 0xb0, 0xe7, 0x1e, 0x49, 0xf1, 0xa6, 0x5f, 0x08, + 0x2b, 0x7c, 0x85, 0xd2, 0x6a, 0x3d, 0xc4, 0x93, 0xa9, 0xfe, + 0x07, 0x50, 0xe8, 0xbf, 0x46, 0x11, 0x64, 0x33, 0xca, 0x9d, + 0x25, 0x72, 0x8b, 0xdc, 0xe6, 0xb1, 0x48, 0x1f, 0xa7, 0xf0, + 0x09, 0x5e, 0x7d, 0x2a, 0xd3, 0x84, 0x3c, 0x6b, 0x92, 0xc5, + 0xff, 0xa8, 0x51, 0x06, 0xbe, 0xe9, 0x10, 0x47, 0x56, 0x01, + 0xf8, 0xaf, 0x17, 0x40, 0xb9, 0xee, 0xd4, 0x83, 0x7a, 0x2d, + 0x95, 0xc2, 0x3b, 0x6c, 0x4f, 0x18, 0xe1, 0xb6, 0x0e, 0x59, + 0xa0, 0xf7, 0xcd, 0x9a, 0x63, 0x34, 0x8c, 0xdb, 0x22, 0x75, + 0xc8, 0x9f, 0x66, 0x31, 0x89, 0xde, 0x27, 0x70, 0x4a, 0x1d, + 0xe4, 0xb3, 0x0b, 0x5c, 0xa5, 0xf2, 0xd1, 0x86, 0x7f, 0x28, + 0x90, 0xc7, 0x3e, 0x69, 0x53, 0x04, 0xfd, 0xaa, 0x12, 0x45, + 0xbc, 0xeb, 0xfa, 0xad, 0x54, 0x03, 0xbb, 0xec, 0x15, 0x42, + 0x78, 0x2f, 0xd6, 0x81, 0x39, 0x6e, 0x97, 0xc0, 0xe3, 0xb4, + 0x4d, 0x1a, 0xa2, 0xf5, 0x0c, 0x5b, 0x61, 0x36, 0xcf, 0x98, + 0x20, 0x77, 0x8e, 0xd9, 0xac, 0xfb, 0x02, 0x55, 0xed, 0xba, + 0x43, 0x14, 0x2e, 0x79, 0x80, 0xd7, 0x6f, 0x38, 0xc1, 0x96, + 0xb5, 0xe2, 0x1b, 0x4c, 0xf4, 0xa3, 0x5a, 0x0d, 0x37, 0x60, + 0x99, 0xce, 0x76, 0x21, 0xd8, 0x8f, 0x9e, 0xc9, 0x30, 0x67, + 0xdf, 0x88, 0x71, 0x26, 0x1c, 0x4b, 0xb2, 0xe5, 0x5d, 0x0a, + 0xf3, 0xa4, 0x87, 0xd0, 0x29, 0x7e, 0xc6, 0x91, 0x68, 0x3f, + 0x05, 0x52, 0xab, 0xfc, 0x44, 0x13, 0xea, 0xbd, 0x00, 0x58, + 0xb0, 0xe8, 0x7d, 0x25, 0xcd, 0x95, 0xfa, 0xa2, 0x4a, 0x12, + 0x87, 0xdf, 0x37, 0x6f, 0xe9, 0xb1, 0x59, 0x01, 0x94, 0xcc, + 0x24, 0x7c, 0x13, 0x4b, 0xa3, 0xfb, 0x6e, 0x36, 0xde, 0x86, + 0xcf, 0x97, 0x7f, 0x27, 0xb2, 0xea, 0x02, 0x5a, 0x35, 0x6d, + 0x85, 0xdd, 0x48, 0x10, 0xf8, 0xa0, 0x26, 0x7e, 0x96, 0xce, + 0x5b, 0x03, 0xeb, 0xb3, 0xdc, 0x84, 0x6c, 0x34, 0xa1, 0xf9, + 0x11, 0x49, 0x83, 0xdb, 0x33, 0x6b, 0xfe, 0xa6, 0x4e, 0x16, + 0x79, 0x21, 0xc9, 0x91, 0x04, 0x5c, 0xb4, 0xec, 0x6a, 0x32, + 0xda, 0x82, 0x17, 0x4f, 0xa7, 0xff, 0x90, 0xc8, 0x20, 0x78, + 0xed, 0xb5, 0x5d, 0x05, 0x4c, 0x14, 0xfc, 0xa4, 0x31, 0x69, + 0x81, 0xd9, 0xb6, 0xee, 0x06, 0x5e, 0xcb, 0x93, 0x7b, 0x23, + 0xa5, 0xfd, 0x15, 0x4d, 0xd8, 0x80, 0x68, 0x30, 0x5f, 0x07, + 0xef, 0xb7, 0x22, 0x7a, 0x92, 0xca, 0x1b, 0x43, 0xab, 0xf3, + 0x66, 0x3e, 0xd6, 0x8e, 0xe1, 0xb9, 0x51, 0x09, 0x9c, 0xc4, + 0x2c, 0x74, 0xf2, 0xaa, 0x42, 0x1a, 0x8f, 0xd7, 0x3f, 0x67, + 0x08, 0x50, 0xb8, 0xe0, 0x75, 0x2d, 0xc5, 0x9d, 0xd4, 0x8c, + 0x64, 0x3c, 0xa9, 0xf1, 0x19, 0x41, 0x2e, 0x76, 0x9e, 0xc6, + 0x53, 0x0b, 0xe3, 0xbb, 0x3d, 0x65, 0x8d, 0xd5, 0x40, 0x18, + 0xf0, 0xa8, 0xc7, 0x9f, 0x77, 0x2f, 0xba, 0xe2, 0x0a, 0x52, + 0x98, 0xc0, 0x28, 0x70, 0xe5, 0xbd, 0x55, 0x0d, 0x62, 0x3a, + 0xd2, 0x8a, 0x1f, 0x47, 0xaf, 0xf7, 0x71, 0x29, 0xc1, 0x99, + 0x0c, 0x54, 0xbc, 0xe4, 0x8b, 0xd3, 0x3b, 0x63, 0xf6, 0xae, + 0x46, 0x1e, 0x57, 0x0f, 0xe7, 0xbf, 0x2a, 0x72, 0x9a, 0xc2, + 0xad, 0xf5, 0x1d, 0x45, 0xd0, 0x88, 0x60, 0x38, 0xbe, 0xe6, + 0x0e, 0x56, 0xc3, 0x9b, 0x73, 0x2b, 0x44, 0x1c, 0xf4, 0xac, + 0x39, 0x61, 0x89, 0xd1, 0x00, 0x59, 0xb2, 0xeb, 0x79, 0x20, + 0xcb, 0x92, 0xf2, 0xab, 0x40, 0x19, 0x8b, 0xd2, 0x39, 0x60, + 0xf9, 0xa0, 0x4b, 0x12, 0x80, 0xd9, 0x32, 0x6b, 0x0b, 0x52, + 0xb9, 0xe0, 0x72, 0x2b, 0xc0, 0x99, 0xef, 0xb6, 0x5d, 0x04, + 0x96, 0xcf, 0x24, 0x7d, 0x1d, 0x44, 0xaf, 0xf6, 0x64, 0x3d, + 0xd6, 0x8f, 0x16, 0x4f, 0xa4, 0xfd, 0x6f, 0x36, 0xdd, 0x84, + 0xe4, 0xbd, 0x56, 0x0f, 0x9d, 0xc4, 0x2f, 0x76, 0xc3, 0x9a, + 0x71, 0x28, 0xba, 0xe3, 0x08, 0x51, 0x31, 0x68, 0x83, 0xda, + 0x48, 0x11, 0xfa, 0xa3, 0x3a, 0x63, 0x88, 0xd1, 0x43, 0x1a, + 0xf1, 0xa8, 0xc8, 0x91, 0x7a, 0x23, 0xb1, 0xe8, 0x03, 0x5a, + 0x2c, 0x75, 0x9e, 0xc7, 0x55, 0x0c, 0xe7, 0xbe, 0xde, 0x87, + 0x6c, 0x35, 0xa7, 0xfe, 0x15, 0x4c, 0xd5, 0x8c, 0x67, 0x3e, + 0xac, 0xf5, 0x1e, 0x47, 0x27, 0x7e, 0x95, 0xcc, 0x5e, 0x07, + 0xec, 0xb5, 0x9b, 0xc2, 0x29, 0x70, 0xe2, 0xbb, 0x50, 0x09, + 0x69, 0x30, 0xdb, 0x82, 0x10, 0x49, 0xa2, 0xfb, 0x62, 0x3b, + 0xd0, 0x89, 0x1b, 0x42, 0xa9, 0xf0, 0x90, 0xc9, 0x22, 0x7b, + 0xe9, 0xb0, 0x5b, 0x02, 0x74, 0x2d, 0xc6, 0x9f, 0x0d, 0x54, + 0xbf, 0xe6, 0x86, 0xdf, 0x34, 0x6d, 0xff, 0xa6, 0x4d, 0x14, + 0x8d, 0xd4, 0x3f, 0x66, 0xf4, 0xad, 0x46, 0x1f, 0x7f, 0x26, + 0xcd, 0x94, 0x06, 0x5f, 0xb4, 0xed, 0x58, 0x01, 0xea, 0xb3, + 0x21, 0x78, 0x93, 0xca, 0xaa, 0xf3, 0x18, 0x41, 0xd3, 0x8a, + 0x61, 0x38, 0xa1, 0xf8, 0x13, 0x4a, 0xd8, 0x81, 0x6a, 0x33, + 0x53, 0x0a, 0xe1, 0xb8, 0x2a, 0x73, 0x98, 0xc1, 0xb7, 0xee, + 0x05, 0x5c, 0xce, 0x97, 0x7c, 0x25, 0x45, 0x1c, 0xf7, 0xae, + 0x3c, 0x65, 0x8e, 0xd7, 0x4e, 0x17, 0xfc, 0xa5, 0x37, 0x6e, + 0x85, 0xdc, 0xbc, 0xe5, 0x0e, 0x57, 0xc5, 0x9c, 0x77, 0x2e, + 0x00, 0x5a, 0xb4, 0xee, 0x75, 0x2f, 0xc1, 0x9b, 0xea, 0xb0, + 0x5e, 0x04, 0x9f, 0xc5, 0x2b, 0x71, 0xc9, 0x93, 0x7d, 0x27, + 0xbc, 0xe6, 0x08, 0x52, 0x23, 0x79, 0x97, 0xcd, 0x56, 0x0c, + 0xe2, 0xb8, 0x8f, 0xd5, 0x3b, 0x61, 0xfa, 0xa0, 0x4e, 0x14, + 0x65, 0x3f, 0xd1, 0x8b, 0x10, 0x4a, 0xa4, 0xfe, 0x46, 0x1c, + 0xf2, 0xa8, 0x33, 0x69, 0x87, 0xdd, 0xac, 0xf6, 0x18, 0x42, + 0xd9, 0x83, 0x6d, 0x37, 0x03, 0x59, 0xb7, 0xed, 0x76, 0x2c, + 0xc2, 0x98, 0xe9, 0xb3, 0x5d, 0x07, 0x9c, 0xc6, 0x28, 0x72, + 0xca, 0x90, 0x7e, 0x24, 0xbf, 0xe5, 0x0b, 0x51, 0x20, 0x7a, + 0x94, 0xce, 0x55, 0x0f, 0xe1, 0xbb, 0x8c, 0xd6, 0x38, 0x62, + 0xf9, 0xa3, 0x4d, 0x17, 0x66, 0x3c, 0xd2, 0x88, 0x13, 0x49, + 0xa7, 0xfd, 0x45, 0x1f, 0xf1, 0xab, 0x30, 0x6a, 0x84, 0xde, + 0xaf, 0xf5, 0x1b, 0x41, 0xda, 0x80, 0x6e, 0x34, 0x06, 0x5c, + 0xb2, 0xe8, 0x73, 0x29, 0xc7, 0x9d, 0xec, 0xb6, 0x58, 0x02, + 0x99, 0xc3, 0x2d, 0x77, 0xcf, 0x95, 0x7b, 0x21, 0xba, 0xe0, + 0x0e, 0x54, 0x25, 0x7f, 0x91, 0xcb, 0x50, 0x0a, 0xe4, 0xbe, + 0x89, 0xd3, 0x3d, 0x67, 0xfc, 0xa6, 0x48, 0x12, 0x63, 0x39, + 0xd7, 0x8d, 0x16, 0x4c, 0xa2, 0xf8, 0x40, 0x1a, 0xf4, 0xae, + 0x35, 0x6f, 0x81, 0xdb, 0xaa, 0xf0, 0x1e, 0x44, 0xdf, 0x85, + 0x6b, 0x31, 0x05, 0x5f, 0xb1, 0xeb, 0x70, 0x2a, 0xc4, 0x9e, + 0xef, 0xb5, 0x5b, 0x01, 0x9a, 0xc0, 0x2e, 0x74, 0xcc, 0x96, + 0x78, 0x22, 0xb9, 0xe3, 0x0d, 0x57, 0x26, 0x7c, 0x92, 0xc8, + 0x53, 0x09, 0xe7, 0xbd, 0x8a, 0xd0, 0x3e, 0x64, 0xff, 0xa5, + 0x4b, 0x11, 0x60, 0x3a, 0xd4, 0x8e, 0x15, 0x4f, 0xa1, 0xfb, + 0x43, 0x19, 0xf7, 0xad, 0x36, 0x6c, 0x82, 0xd8, 0xa9, 0xf3, + 0x1d, 0x47, 0xdc, 0x86, 0x68, 0x32, 0x00, 0x5b, 0xb6, 0xed, + 0x71, 0x2a, 0xc7, 0x9c, 0xe2, 0xb9, 0x54, 0x0f, 0x93, 0xc8, + 0x25, 0x7e, 0xd9, 0x82, 0x6f, 0x34, 0xa8, 0xf3, 0x1e, 0x45, + 0x3b, 0x60, 0x8d, 0xd6, 0x4a, 0x11, 0xfc, 0xa7, 0xaf, 0xf4, + 0x19, 0x42, 0xde, 0x85, 0x68, 0x33, 0x4d, 0x16, 0xfb, 0xa0, + 0x3c, 0x67, 0x8a, 0xd1, 0x76, 0x2d, 0xc0, 0x9b, 0x07, 0x5c, + 0xb1, 0xea, 0x94, 0xcf, 0x22, 0x79, 0xe5, 0xbe, 0x53, 0x08, + 0x43, 0x18, 0xf5, 0xae, 0x32, 0x69, 0x84, 0xdf, 0xa1, 0xfa, + 0x17, 0x4c, 0xd0, 0x8b, 0x66, 0x3d, 0x9a, 0xc1, 0x2c, 0x77, + 0xeb, 0xb0, 0x5d, 0x06, 0x78, 0x23, 0xce, 0x95, 0x09, 0x52, + 0xbf, 0xe4, 0xec, 0xb7, 0x5a, 0x01, 0x9d, 0xc6, 0x2b, 0x70, + 0x0e, 0x55, 0xb8, 0xe3, 0x7f, 0x24, 0xc9, 0x92, 0x35, 0x6e, + 0x83, 0xd8, 0x44, 0x1f, 0xf2, 0xa9, 0xd7, 0x8c, 0x61, 0x3a, + 0xa6, 0xfd, 0x10, 0x4b, 0x86, 0xdd, 0x30, 0x6b, 0xf7, 0xac, + 0x41, 0x1a, 0x64, 0x3f, 0xd2, 0x89, 0x15, 0x4e, 0xa3, 0xf8, + 0x5f, 0x04, 0xe9, 0xb2, 0x2e, 0x75, 0x98, 0xc3, 0xbd, 0xe6, + 0x0b, 0x50, 0xcc, 0x97, 0x7a, 0x21, 0x29, 0x72, 0x9f, 0xc4, + 0x58, 0x03, 0xee, 0xb5, 0xcb, 0x90, 0x7d, 0x26, 0xba, 0xe1, + 0x0c, 0x57, 0xf0, 0xab, 0x46, 0x1d, 0x81, 0xda, 0x37, 0x6c, + 0x12, 0x49, 0xa4, 0xff, 0x63, 0x38, 0xd5, 0x8e, 0xc5, 0x9e, + 0x73, 0x28, 0xb4, 0xef, 0x02, 0x59, 0x27, 0x7c, 0x91, 0xca, + 0x56, 0x0d, 0xe0, 0xbb, 0x1c, 0x47, 0xaa, 0xf1, 0x6d, 0x36, + 0xdb, 0x80, 0xfe, 0xa5, 0x48, 0x13, 0x8f, 0xd4, 0x39, 0x62, + 0x6a, 0x31, 0xdc, 0x87, 0x1b, 0x40, 0xad, 0xf6, 0x88, 0xd3, + 0x3e, 0x65, 0xf9, 0xa2, 0x4f, 0x14, 0xb3, 0xe8, 0x05, 0x5e, + 0xc2, 0x99, 0x74, 0x2f, 0x51, 0x0a, 0xe7, 0xbc, 0x20, 0x7b, + 0x96, 0xcd, 0x00, 0x5c, 0xb8, 0xe4, 0x6d, 0x31, 0xd5, 0x89, + 0xda, 0x86, 0x62, 0x3e, 0xb7, 0xeb, 0x0f, 0x53, 0xa9, 0xf5, + 0x11, 0x4d, 0xc4, 0x98, 0x7c, 0x20, 0x73, 0x2f, 0xcb, 0x97, + 0x1e, 0x42, 0xa6, 0xfa, 0x4f, 0x13, 0xf7, 0xab, 0x22, 0x7e, + 0x9a, 0xc6, 0x95, 0xc9, 0x2d, 0x71, 0xf8, 0xa4, 0x40, 0x1c, + 0xe6, 0xba, 0x5e, 0x02, 0x8b, 0xd7, 0x33, 0x6f, 0x3c, 0x60, + 0x84, 0xd8, 0x51, 0x0d, 0xe9, 0xb5, 0x9e, 0xc2, 0x26, 0x7a, + 0xf3, 0xaf, 0x4b, 0x17, 0x44, 0x18, 0xfc, 0xa0, 0x29, 0x75, + 0x91, 0xcd, 0x37, 0x6b, 0x8f, 0xd3, 0x5a, 0x06, 0xe2, 0xbe, + 0xed, 0xb1, 0x55, 0x09, 0x80, 0xdc, 0x38, 0x64, 0xd1, 0x8d, + 0x69, 0x35, 0xbc, 0xe0, 0x04, 0x58, 0x0b, 0x57, 0xb3, 0xef, + 0x66, 0x3a, 0xde, 0x82, 0x78, 0x24, 0xc0, 0x9c, 0x15, 0x49, + 0xad, 0xf1, 0xa2, 0xfe, 0x1a, 0x46, 0xcf, 0x93, 0x77, 0x2b, + 0x21, 0x7d, 0x99, 0xc5, 0x4c, 0x10, 0xf4, 0xa8, 0xfb, 0xa7, + 0x43, 0x1f, 0x96, 0xca, 0x2e, 0x72, 0x88, 0xd4, 0x30, 0x6c, + 0xe5, 0xb9, 0x5d, 0x01, 0x52, 0x0e, 0xea, 0xb6, 0x3f, 0x63, + 0x87, 0xdb, 0x6e, 0x32, 0xd6, 0x8a, 0x03, 0x5f, 0xbb, 0xe7, + 0xb4, 0xe8, 0x0c, 0x50, 0xd9, 0x85, 0x61, 0x3d, 0xc7, 0x9b, + 0x7f, 0x23, 0xaa, 0xf6, 0x12, 0x4e, 0x1d, 0x41, 0xa5, 0xf9, + 0x70, 0x2c, 0xc8, 0x94, 0xbf, 0xe3, 0x07, 0x5b, 0xd2, 0x8e, + 0x6a, 0x36, 0x65, 0x39, 0xdd, 0x81, 0x08, 0x54, 0xb0, 0xec, + 0x16, 0x4a, 0xae, 0xf2, 0x7b, 0x27, 0xc3, 0x9f, 0xcc, 0x90, + 0x74, 0x28, 0xa1, 0xfd, 0x19, 0x45, 0xf0, 0xac, 0x48, 0x14, + 0x9d, 0xc1, 0x25, 0x79, 0x2a, 0x76, 0x92, 0xce, 0x47, 0x1b, + 0xff, 0xa3, 0x59, 0x05, 0xe1, 0xbd, 0x34, 0x68, 0x8c, 0xd0, + 0x83, 0xdf, 0x3b, 0x67, 0xee, 0xb2, 0x56, 0x0a, 0x00, 0x5d, + 0xba, 0xe7, 0x69, 0x34, 0xd3, 0x8e, 0xd2, 0x8f, 0x68, 0x35, + 0xbb, 0xe6, 0x01, 0x5c, 0xb9, 0xe4, 0x03, 0x5e, 0xd0, 0x8d, + 0x6a, 0x37, 0x6b, 0x36, 0xd1, 0x8c, 0x02, 0x5f, 0xb8, 0xe5, + 0x6f, 0x32, 0xd5, 0x88, 0x06, 0x5b, 0xbc, 0xe1, 0xbd, 0xe0, + 0x07, 0x5a, 0xd4, 0x89, 0x6e, 0x33, 0xd6, 0x8b, 0x6c, 0x31, + 0xbf, 0xe2, 0x05, 0x58, 0x04, 0x59, 0xbe, 0xe3, 0x6d, 0x30, + 0xd7, 0x8a, 0xde, 0x83, 0x64, 0x39, 0xb7, 0xea, 0x0d, 0x50, + 0x0c, 0x51, 0xb6, 0xeb, 0x65, 0x38, 0xdf, 0x82, 0x67, 0x3a, + 0xdd, 0x80, 0x0e, 0x53, 0xb4, 0xe9, 0xb5, 0xe8, 0x0f, 0x52, + 0xdc, 0x81, 0x66, 0x3b, 0xb1, 0xec, 0x0b, 0x56, 0xd8, 0x85, + 0x62, 0x3f, 0x63, 0x3e, 0xd9, 0x84, 0x0a, 0x57, 0xb0, 0xed, + 0x08, 0x55, 0xb2, 0xef, 0x61, 0x3c, 0xdb, 0x86, 0xda, 0x87, + 0x60, 0x3d, 0xb3, 0xee, 0x09, 0x54, 0xa1, 0xfc, 0x1b, 0x46, + 0xc8, 0x95, 0x72, 0x2f, 0x73, 0x2e, 0xc9, 0x94, 0x1a, 0x47, + 0xa0, 0xfd, 0x18, 0x45, 0xa2, 0xff, 0x71, 0x2c, 0xcb, 0x96, + 0xca, 0x97, 0x70, 0x2d, 0xa3, 0xfe, 0x19, 0x44, 0xce, 0x93, + 0x74, 0x29, 0xa7, 0xfa, 0x1d, 0x40, 0x1c, 0x41, 0xa6, 0xfb, + 0x75, 0x28, 0xcf, 0x92, 0x77, 0x2a, 0xcd, 0x90, 0x1e, 0x43, + 0xa4, 0xf9, 0xa5, 0xf8, 0x1f, 0x42, 0xcc, 0x91, 0x76, 0x2b, + 0x7f, 0x22, 0xc5, 0x98, 0x16, 0x4b, 0xac, 0xf1, 0xad, 0xf0, + 0x17, 0x4a, 0xc4, 0x99, 0x7e, 0x23, 0xc6, 0x9b, 0x7c, 0x21, + 0xaf, 0xf2, 0x15, 0x48, 0x14, 0x49, 0xae, 0xf3, 0x7d, 0x20, + 0xc7, 0x9a, 0x10, 0x4d, 0xaa, 0xf7, 0x79, 0x24, 0xc3, 0x9e, + 0xc2, 0x9f, 0x78, 0x25, 0xab, 0xf6, 0x11, 0x4c, 0xa9, 0xf4, + 0x13, 0x4e, 0xc0, 0x9d, 0x7a, 0x27, 0x7b, 0x26, 0xc1, 0x9c, + 0x12, 0x4f, 0xa8, 0xf5, 0x00, 0x5e, 0xbc, 0xe2, 0x65, 0x3b, + 0xd9, 0x87, 0xca, 0x94, 0x76, 0x28, 0xaf, 0xf1, 0x13, 0x4d, + 0x89, 0xd7, 0x35, 0x6b, 0xec, 0xb2, 0x50, 0x0e, 0x43, 0x1d, + 0xff, 0xa1, 0x26, 0x78, 0x9a, 0xc4, 0x0f, 0x51, 0xb3, 0xed, + 0x6a, 0x34, 0xd6, 0x88, 0xc5, 0x9b, 0x79, 0x27, 0xa0, 0xfe, + 0x1c, 0x42, 0x86, 0xd8, 0x3a, 0x64, 0xe3, 0xbd, 0x5f, 0x01, + 0x4c, 0x12, 0xf0, 0xae, 0x29, 0x77, 0x95, 0xcb, 0x1e, 0x40, + 0xa2, 0xfc, 0x7b, 0x25, 0xc7, 0x99, 0xd4, 0x8a, 0x68, 0x36, + 0xb1, 0xef, 0x0d, 0x53, 0x97, 0xc9, 0x2b, 0x75, 0xf2, 0xac, + 0x4e, 0x10, 0x5d, 0x03, 0xe1, 0xbf, 0x38, 0x66, 0x84, 0xda, + 0x11, 0x4f, 0xad, 0xf3, 0x74, 0x2a, 0xc8, 0x96, 0xdb, 0x85, + 0x67, 0x39, 0xbe, 0xe0, 0x02, 0x5c, 0x98, 0xc6, 0x24, 0x7a, + 0xfd, 0xa3, 0x41, 0x1f, 0x52, 0x0c, 0xee, 0xb0, 0x37, 0x69, + 0x8b, 0xd5, 0x3c, 0x62, 0x80, 0xde, 0x59, 0x07, 0xe5, 0xbb, + 0xf6, 0xa8, 0x4a, 0x14, 0x93, 0xcd, 0x2f, 0x71, 0xb5, 0xeb, + 0x09, 0x57, 0xd0, 0x8e, 0x6c, 0x32, 0x7f, 0x21, 0xc3, 0x9d, + 0x1a, 0x44, 0xa6, 0xf8, 0x33, 0x6d, 0x8f, 0xd1, 0x56, 0x08, + 0xea, 0xb4, 0xf9, 0xa7, 0x45, 0x1b, 0x9c, 0xc2, 0x20, 0x7e, + 0xba, 0xe4, 0x06, 0x58, 0xdf, 0x81, 0x63, 0x3d, 0x70, 0x2e, + 0xcc, 0x92, 0x15, 0x4b, 0xa9, 0xf7, 0x22, 0x7c, 0x9e, 0xc0, + 0x47, 0x19, 0xfb, 0xa5, 0xe8, 0xb6, 0x54, 0x0a, 0x8d, 0xd3, + 0x31, 0x6f, 0xab, 0xf5, 0x17, 0x49, 0xce, 0x90, 0x72, 0x2c, + 0x61, 0x3f, 0xdd, 0x83, 0x04, 0x5a, 0xb8, 0xe6, 0x2d, 0x73, + 0x91, 0xcf, 0x48, 0x16, 0xf4, 0xaa, 0xe7, 0xb9, 0x5b, 0x05, + 0x82, 0xdc, 0x3e, 0x60, 0xa4, 0xfa, 0x18, 0x46, 0xc1, 0x9f, + 0x7d, 0x23, 0x6e, 0x30, 0xd2, 0x8c, 0x0b, 0x55, 0xb7, 0xe9, + 0x00, 0x5f, 0xbe, 0xe1, 0x61, 0x3e, 0xdf, 0x80, 0xc2, 0x9d, + 0x7c, 0x23, 0xa3, 0xfc, 0x1d, 0x42, 0x99, 0xc6, 0x27, 0x78, + 0xf8, 0xa7, 0x46, 0x19, 0x5b, 0x04, 0xe5, 0xba, 0x3a, 0x65, + 0x84, 0xdb, 0x2f, 0x70, 0x91, 0xce, 0x4e, 0x11, 0xf0, 0xaf, + 0xed, 0xb2, 0x53, 0x0c, 0x8c, 0xd3, 0x32, 0x6d, 0xb6, 0xe9, + 0x08, 0x57, 0xd7, 0x88, 0x69, 0x36, 0x74, 0x2b, 0xca, 0x95, + 0x15, 0x4a, 0xab, 0xf4, 0x5e, 0x01, 0xe0, 0xbf, 0x3f, 0x60, + 0x81, 0xde, 0x9c, 0xc3, 0x22, 0x7d, 0xfd, 0xa2, 0x43, 0x1c, + 0xc7, 0x98, 0x79, 0x26, 0xa6, 0xf9, 0x18, 0x47, 0x05, 0x5a, + 0xbb, 0xe4, 0x64, 0x3b, 0xda, 0x85, 0x71, 0x2e, 0xcf, 0x90, + 0x10, 0x4f, 0xae, 0xf1, 0xb3, 0xec, 0x0d, 0x52, 0xd2, 0x8d, + 0x6c, 0x33, 0xe8, 0xb7, 0x56, 0x09, 0x89, 0xd6, 0x37, 0x68, + 0x2a, 0x75, 0x94, 0xcb, 0x4b, 0x14, 0xf5, 0xaa, 0xbc, 0xe3, + 0x02, 0x5d, 0xdd, 0x82, 0x63, 0x3c, 0x7e, 0x21, 0xc0, 0x9f, + 0x1f, 0x40, 0xa1, 0xfe, 0x25, 0x7a, 0x9b, 0xc4, 0x44, 0x1b, + 0xfa, 0xa5, 0xe7, 0xb8, 0x59, 0x06, 0x86, 0xd9, 0x38, 0x67, + 0x93, 0xcc, 0x2d, 0x72, 0xf2, 0xad, 0x4c, 0x13, 0x51, 0x0e, + 0xef, 0xb0, 0x30, 0x6f, 0x8e, 0xd1, 0x0a, 0x55, 0xb4, 0xeb, + 0x6b, 0x34, 0xd5, 0x8a, 0xc8, 0x97, 0x76, 0x29, 0xa9, 0xf6, + 0x17, 0x48, 0xe2, 0xbd, 0x5c, 0x03, 0x83, 0xdc, 0x3d, 0x62, + 0x20, 0x7f, 0x9e, 0xc1, 0x41, 0x1e, 0xff, 0xa0, 0x7b, 0x24, + 0xc5, 0x9a, 0x1a, 0x45, 0xa4, 0xfb, 0xb9, 0xe6, 0x07, 0x58, + 0xd8, 0x87, 0x66, 0x39, 0xcd, 0x92, 0x73, 0x2c, 0xac, 0xf3, + 0x12, 0x4d, 0x0f, 0x50, 0xb1, 0xee, 0x6e, 0x31, 0xd0, 0x8f, + 0x54, 0x0b, 0xea, 0xb5, 0x35, 0x6a, 0x8b, 0xd4, 0x96, 0xc9, + 0x28, 0x77, 0xf7, 0xa8, 0x49, 0x16, 0x00, 0x60, 0xc0, 0xa0, + 0x9d, 0xfd, 0x5d, 0x3d, 0x27, 0x47, 0xe7, 0x87, 0xba, 0xda, + 0x7a, 0x1a, 0x4e, 0x2e, 0x8e, 0xee, 0xd3, 0xb3, 0x13, 0x73, + 0x69, 0x09, 0xa9, 0xc9, 0xf4, 0x94, 0x34, 0x54, 0x9c, 0xfc, + 0x5c, 0x3c, 0x01, 0x61, 0xc1, 0xa1, 0xbb, 0xdb, 0x7b, 0x1b, + 0x26, 0x46, 0xe6, 0x86, 0xd2, 0xb2, 0x12, 0x72, 0x4f, 0x2f, + 0x8f, 0xef, 0xf5, 0x95, 0x35, 0x55, 0x68, 0x08, 0xa8, 0xc8, + 0x25, 0x45, 0xe5, 0x85, 0xb8, 0xd8, 0x78, 0x18, 0x02, 0x62, + 0xc2, 0xa2, 0x9f, 0xff, 0x5f, 0x3f, 0x6b, 0x0b, 0xab, 0xcb, + 0xf6, 0x96, 0x36, 0x56, 0x4c, 0x2c, 0x8c, 0xec, 0xd1, 0xb1, + 0x11, 0x71, 0xb9, 0xd9, 0x79, 0x19, 0x24, 0x44, 0xe4, 0x84, + 0x9e, 0xfe, 0x5e, 0x3e, 0x03, 0x63, 0xc3, 0xa3, 0xf7, 0x97, + 0x37, 0x57, 0x6a, 0x0a, 0xaa, 0xca, 0xd0, 0xb0, 0x10, 0x70, + 0x4d, 0x2d, 0x8d, 0xed, 0x4a, 0x2a, 0x8a, 0xea, 0xd7, 0xb7, + 0x17, 0x77, 0x6d, 0x0d, 0xad, 0xcd, 0xf0, 0x90, 0x30, 0x50, + 0x04, 0x64, 0xc4, 0xa4, 0x99, 0xf9, 0x59, 0x39, 0x23, 0x43, + 0xe3, 0x83, 0xbe, 0xde, 0x7e, 0x1e, 0xd6, 0xb6, 0x16, 0x76, + 0x4b, 0x2b, 0x8b, 0xeb, 0xf1, 0x91, 0x31, 0x51, 0x6c, 0x0c, + 0xac, 0xcc, 0x98, 0xf8, 0x58, 0x38, 0x05, 0x65, 0xc5, 0xa5, + 0xbf, 0xdf, 0x7f, 0x1f, 0x22, 0x42, 0xe2, 0x82, 0x6f, 0x0f, + 0xaf, 0xcf, 0xf2, 0x92, 0x32, 0x52, 0x48, 0x28, 0x88, 0xe8, + 0xd5, 0xb5, 0x15, 0x75, 0x21, 0x41, 0xe1, 0x81, 0xbc, 0xdc, + 0x7c, 0x1c, 0x06, 0x66, 0xc6, 0xa6, 0x9b, 0xfb, 0x5b, 0x3b, + 0xf3, 0x93, 0x33, 0x53, 0x6e, 0x0e, 0xae, 0xce, 0xd4, 0xb4, + 0x14, 0x74, 0x49, 0x29, 0x89, 0xe9, 0xbd, 0xdd, 0x7d, 0x1d, + 0x20, 0x40, 0xe0, 0x80, 0x9a, 0xfa, 0x5a, 0x3a, 0x07, 0x67, + 0xc7, 0xa7, 0x00, 0x61, 0xc2, 0xa3, 0x99, 0xf8, 0x5b, 0x3a, + 0x2f, 0x4e, 0xed, 0x8c, 0xb6, 0xd7, 0x74, 0x15, 0x5e, 0x3f, + 0x9c, 0xfd, 0xc7, 0xa6, 0x05, 0x64, 0x71, 0x10, 0xb3, 0xd2, + 0xe8, 0x89, 0x2a, 0x4b, 0xbc, 0xdd, 0x7e, 0x1f, 0x25, 0x44, + 0xe7, 0x86, 0x93, 0xf2, 0x51, 0x30, 0x0a, 0x6b, 0xc8, 0xa9, + 0xe2, 0x83, 0x20, 0x41, 0x7b, 0x1a, 0xb9, 0xd8, 0xcd, 0xac, + 0x0f, 0x6e, 0x54, 0x35, 0x96, 0xf7, 0x65, 0x04, 0xa7, 0xc6, + 0xfc, 0x9d, 0x3e, 0x5f, 0x4a, 0x2b, 0x88, 0xe9, 0xd3, 0xb2, + 0x11, 0x70, 0x3b, 0x5a, 0xf9, 0x98, 0xa2, 0xc3, 0x60, 0x01, + 0x14, 0x75, 0xd6, 0xb7, 0x8d, 0xec, 0x4f, 0x2e, 0xd9, 0xb8, + 0x1b, 0x7a, 0x40, 0x21, 0x82, 0xe3, 0xf6, 0x97, 0x34, 0x55, + 0x6f, 0x0e, 0xad, 0xcc, 0x87, 0xe6, 0x45, 0x24, 0x1e, 0x7f, + 0xdc, 0xbd, 0xa8, 0xc9, 0x6a, 0x0b, 0x31, 0x50, 0xf3, 0x92, + 0xca, 0xab, 0x08, 0x69, 0x53, 0x32, 0x91, 0xf0, 0xe5, 0x84, + 0x27, 0x46, 0x7c, 0x1d, 0xbe, 0xdf, 0x94, 0xf5, 0x56, 0x37, + 0x0d, 0x6c, 0xcf, 0xae, 0xbb, 0xda, 0x79, 0x18, 0x22, 0x43, + 0xe0, 0x81, 0x76, 0x17, 0xb4, 0xd5, 0xef, 0x8e, 0x2d, 0x4c, + 0x59, 0x38, 0x9b, 0xfa, 0xc0, 0xa1, 0x02, 0x63, 0x28, 0x49, + 0xea, 0x8b, 0xb1, 0xd0, 0x73, 0x12, 0x07, 0x66, 0xc5, 0xa4, + 0x9e, 0xff, 0x5c, 0x3d, 0xaf, 0xce, 0x6d, 0x0c, 0x36, 0x57, + 0xf4, 0x95, 0x80, 0xe1, 0x42, 0x23, 0x19, 0x78, 0xdb, 0xba, + 0xf1, 0x90, 0x33, 0x52, 0x68, 0x09, 0xaa, 0xcb, 0xde, 0xbf, + 0x1c, 0x7d, 0x47, 0x26, 0x85, 0xe4, 0x13, 0x72, 0xd1, 0xb0, + 0x8a, 0xeb, 0x48, 0x29, 0x3c, 0x5d, 0xfe, 0x9f, 0xa5, 0xc4, + 0x67, 0x06, 0x4d, 0x2c, 0x8f, 0xee, 0xd4, 0xb5, 0x16, 0x77, + 0x62, 0x03, 0xa0, 0xc1, 0xfb, 0x9a, 0x39, 0x58, 0x00, 0x62, + 0xc4, 0xa6, 0x95, 0xf7, 0x51, 0x33, 0x37, 0x55, 0xf3, 0x91, + 0xa2, 0xc0, 0x66, 0x04, 0x6e, 0x0c, 0xaa, 0xc8, 0xfb, 0x99, + 0x3f, 0x5d, 0x59, 0x3b, 0x9d, 0xff, 0xcc, 0xae, 0x08, 0x6a, + 0xdc, 0xbe, 0x18, 0x7a, 0x49, 0x2b, 0x8d, 0xef, 0xeb, 0x89, + 0x2f, 0x4d, 0x7e, 0x1c, 0xba, 0xd8, 0xb2, 0xd0, 0x76, 0x14, + 0x27, 0x45, 0xe3, 0x81, 0x85, 0xe7, 0x41, 0x23, 0x10, 0x72, + 0xd4, 0xb6, 0xa5, 0xc7, 0x61, 0x03, 0x30, 0x52, 0xf4, 0x96, + 0x92, 0xf0, 0x56, 0x34, 0x07, 0x65, 0xc3, 0xa1, 0xcb, 0xa9, + 0x0f, 0x6d, 0x5e, 0x3c, 0x9a, 0xf8, 0xfc, 0x9e, 0x38, 0x5a, + 0x69, 0x0b, 0xad, 0xcf, 0x79, 0x1b, 0xbd, 0xdf, 0xec, 0x8e, + 0x28, 0x4a, 0x4e, 0x2c, 0x8a, 0xe8, 0xdb, 0xb9, 0x1f, 0x7d, + 0x17, 0x75, 0xd3, 0xb1, 0x82, 0xe0, 0x46, 0x24, 0x20, 0x42, + 0xe4, 0x86, 0xb5, 0xd7, 0x71, 0x13, 0x57, 0x35, 0x93, 0xf1, + 0xc2, 0xa0, 0x06, 0x64, 0x60, 0x02, 0xa4, 0xc6, 0xf5, 0x97, + 0x31, 0x53, 0x39, 0x5b, 0xfd, 0x9f, 0xac, 0xce, 0x68, 0x0a, + 0x0e, 0x6c, 0xca, 0xa8, 0x9b, 0xf9, 0x5f, 0x3d, 0x8b, 0xe9, + 0x4f, 0x2d, 0x1e, 0x7c, 0xda, 0xb8, 0xbc, 0xde, 0x78, 0x1a, + 0x29, 0x4b, 0xed, 0x8f, 0xe5, 0x87, 0x21, 0x43, 0x70, 0x12, + 0xb4, 0xd6, 0xd2, 0xb0, 0x16, 0x74, 0x47, 0x25, 0x83, 0xe1, + 0xf2, 0x90, 0x36, 0x54, 0x67, 0x05, 0xa3, 0xc1, 0xc5, 0xa7, + 0x01, 0x63, 0x50, 0x32, 0x94, 0xf6, 0x9c, 0xfe, 0x58, 0x3a, + 0x09, 0x6b, 0xcd, 0xaf, 0xab, 0xc9, 0x6f, 0x0d, 0x3e, 0x5c, + 0xfa, 0x98, 0x2e, 0x4c, 0xea, 0x88, 0xbb, 0xd9, 0x7f, 0x1d, + 0x19, 0x7b, 0xdd, 0xbf, 0x8c, 0xee, 0x48, 0x2a, 0x40, 0x22, + 0x84, 0xe6, 0xd5, 0xb7, 0x11, 0x73, 0x77, 0x15, 0xb3, 0xd1, + 0xe2, 0x80, 0x26, 0x44, 0x00, 0x63, 0xc6, 0xa5, 0x91, 0xf2, + 0x57, 0x34, 0x3f, 0x5c, 0xf9, 0x9a, 0xae, 0xcd, 0x68, 0x0b, + 0x7e, 0x1d, 0xb8, 0xdb, 0xef, 0x8c, 0x29, 0x4a, 0x41, 0x22, + 0x87, 0xe4, 0xd0, 0xb3, 0x16, 0x75, 0xfc, 0x9f, 0x3a, 0x59, + 0x6d, 0x0e, 0xab, 0xc8, 0xc3, 0xa0, 0x05, 0x66, 0x52, 0x31, + 0x94, 0xf7, 0x82, 0xe1, 0x44, 0x27, 0x13, 0x70, 0xd5, 0xb6, + 0xbd, 0xde, 0x7b, 0x18, 0x2c, 0x4f, 0xea, 0x89, 0xe5, 0x86, + 0x23, 0x40, 0x74, 0x17, 0xb2, 0xd1, 0xda, 0xb9, 0x1c, 0x7f, + 0x4b, 0x28, 0x8d, 0xee, 0x9b, 0xf8, 0x5d, 0x3e, 0x0a, 0x69, + 0xcc, 0xaf, 0xa4, 0xc7, 0x62, 0x01, 0x35, 0x56, 0xf3, 0x90, + 0x19, 0x7a, 0xdf, 0xbc, 0x88, 0xeb, 0x4e, 0x2d, 0x26, 0x45, + 0xe0, 0x83, 0xb7, 0xd4, 0x71, 0x12, 0x67, 0x04, 0xa1, 0xc2, + 0xf6, 0x95, 0x30, 0x53, 0x58, 0x3b, 0x9e, 0xfd, 0xc9, 0xaa, + 0x0f, 0x6c, 0xd7, 0xb4, 0x11, 0x72, 0x46, 0x25, 0x80, 0xe3, + 0xe8, 0x8b, 0x2e, 0x4d, 0x79, 0x1a, 0xbf, 0xdc, 0xa9, 0xca, + 0x6f, 0x0c, 0x38, 0x5b, 0xfe, 0x9d, 0x96, 0xf5, 0x50, 0x33, + 0x07, 0x64, 0xc1, 0xa2, 0x2b, 0x48, 0xed, 0x8e, 0xba, 0xd9, + 0x7c, 0x1f, 0x14, 0x77, 0xd2, 0xb1, 0x85, 0xe6, 0x43, 0x20, + 0x55, 0x36, 0x93, 0xf0, 0xc4, 0xa7, 0x02, 0x61, 0x6a, 0x09, + 0xac, 0xcf, 0xfb, 0x98, 0x3d, 0x5e, 0x32, 0x51, 0xf4, 0x97, + 0xa3, 0xc0, 0x65, 0x06, 0x0d, 0x6e, 0xcb, 0xa8, 0x9c, 0xff, + 0x5a, 0x39, 0x4c, 0x2f, 0x8a, 0xe9, 0xdd, 0xbe, 0x1b, 0x78, + 0x73, 0x10, 0xb5, 0xd6, 0xe2, 0x81, 0x24, 0x47, 0xce, 0xad, + 0x08, 0x6b, 0x5f, 0x3c, 0x99, 0xfa, 0xf1, 0x92, 0x37, 0x54, + 0x60, 0x03, 0xa6, 0xc5, 0xb0, 0xd3, 0x76, 0x15, 0x21, 0x42, + 0xe7, 0x84, 0x8f, 0xec, 0x49, 0x2a, 0x1e, 0x7d, 0xd8, 0xbb, + 0x00, 0x64, 0xc8, 0xac, 0x8d, 0xe9, 0x45, 0x21, 0x07, 0x63, + 0xcf, 0xab, 0x8a, 0xee, 0x42, 0x26, 0x0e, 0x6a, 0xc6, 0xa2, + 0x83, 0xe7, 0x4b, 0x2f, 0x09, 0x6d, 0xc1, 0xa5, 0x84, 0xe0, + 0x4c, 0x28, 0x1c, 0x78, 0xd4, 0xb0, 0x91, 0xf5, 0x59, 0x3d, + 0x1b, 0x7f, 0xd3, 0xb7, 0x96, 0xf2, 0x5e, 0x3a, 0x12, 0x76, + 0xda, 0xbe, 0x9f, 0xfb, 0x57, 0x33, 0x15, 0x71, 0xdd, 0xb9, + 0x98, 0xfc, 0x50, 0x34, 0x38, 0x5c, 0xf0, 0x94, 0xb5, 0xd1, + 0x7d, 0x19, 0x3f, 0x5b, 0xf7, 0x93, 0xb2, 0xd6, 0x7a, 0x1e, + 0x36, 0x52, 0xfe, 0x9a, 0xbb, 0xdf, 0x73, 0x17, 0x31, 0x55, + 0xf9, 0x9d, 0xbc, 0xd8, 0x74, 0x10, 0x24, 0x40, 0xec, 0x88, + 0xa9, 0xcd, 0x61, 0x05, 0x23, 0x47, 0xeb, 0x8f, 0xae, 0xca, + 0x66, 0x02, 0x2a, 0x4e, 0xe2, 0x86, 0xa7, 0xc3, 0x6f, 0x0b, + 0x2d, 0x49, 0xe5, 0x81, 0xa0, 0xc4, 0x68, 0x0c, 0x70, 0x14, + 0xb8, 0xdc, 0xfd, 0x99, 0x35, 0x51, 0x77, 0x13, 0xbf, 0xdb, + 0xfa, 0x9e, 0x32, 0x56, 0x7e, 0x1a, 0xb6, 0xd2, 0xf3, 0x97, + 0x3b, 0x5f, 0x79, 0x1d, 0xb1, 0xd5, 0xf4, 0x90, 0x3c, 0x58, + 0x6c, 0x08, 0xa4, 0xc0, 0xe1, 0x85, 0x29, 0x4d, 0x6b, 0x0f, + 0xa3, 0xc7, 0xe6, 0x82, 0x2e, 0x4a, 0x62, 0x06, 0xaa, 0xce, + 0xef, 0x8b, 0x27, 0x43, 0x65, 0x01, 0xad, 0xc9, 0xe8, 0x8c, + 0x20, 0x44, 0x48, 0x2c, 0x80, 0xe4, 0xc5, 0xa1, 0x0d, 0x69, + 0x4f, 0x2b, 0x87, 0xe3, 0xc2, 0xa6, 0x0a, 0x6e, 0x46, 0x22, + 0x8e, 0xea, 0xcb, 0xaf, 0x03, 0x67, 0x41, 0x25, 0x89, 0xed, + 0xcc, 0xa8, 0x04, 0x60, 0x54, 0x30, 0x9c, 0xf8, 0xd9, 0xbd, + 0x11, 0x75, 0x53, 0x37, 0x9b, 0xff, 0xde, 0xba, 0x16, 0x72, + 0x5a, 0x3e, 0x92, 0xf6, 0xd7, 0xb3, 0x1f, 0x7b, 0x5d, 0x39, + 0x95, 0xf1, 0xd0, 0xb4, 0x18, 0x7c, 0x00, 0x65, 0xca, 0xaf, + 0x89, 0xec, 0x43, 0x26, 0x0f, 0x6a, 0xc5, 0xa0, 0x86, 0xe3, + 0x4c, 0x29, 0x1e, 0x7b, 0xd4, 0xb1, 0x97, 0xf2, 0x5d, 0x38, + 0x11, 0x74, 0xdb, 0xbe, 0x98, 0xfd, 0x52, 0x37, 0x3c, 0x59, + 0xf6, 0x93, 0xb5, 0xd0, 0x7f, 0x1a, 0x33, 0x56, 0xf9, 0x9c, + 0xba, 0xdf, 0x70, 0x15, 0x22, 0x47, 0xe8, 0x8d, 0xab, 0xce, + 0x61, 0x04, 0x2d, 0x48, 0xe7, 0x82, 0xa4, 0xc1, 0x6e, 0x0b, + 0x78, 0x1d, 0xb2, 0xd7, 0xf1, 0x94, 0x3b, 0x5e, 0x77, 0x12, + 0xbd, 0xd8, 0xfe, 0x9b, 0x34, 0x51, 0x66, 0x03, 0xac, 0xc9, + 0xef, 0x8a, 0x25, 0x40, 0x69, 0x0c, 0xa3, 0xc6, 0xe0, 0x85, + 0x2a, 0x4f, 0x44, 0x21, 0x8e, 0xeb, 0xcd, 0xa8, 0x07, 0x62, + 0x4b, 0x2e, 0x81, 0xe4, 0xc2, 0xa7, 0x08, 0x6d, 0x5a, 0x3f, + 0x90, 0xf5, 0xd3, 0xb6, 0x19, 0x7c, 0x55, 0x30, 0x9f, 0xfa, + 0xdc, 0xb9, 0x16, 0x73, 0xf0, 0x95, 0x3a, 0x5f, 0x79, 0x1c, + 0xb3, 0xd6, 0xff, 0x9a, 0x35, 0x50, 0x76, 0x13, 0xbc, 0xd9, + 0xee, 0x8b, 0x24, 0x41, 0x67, 0x02, 0xad, 0xc8, 0xe1, 0x84, + 0x2b, 0x4e, 0x68, 0x0d, 0xa2, 0xc7, 0xcc, 0xa9, 0x06, 0x63, + 0x45, 0x20, 0x8f, 0xea, 0xc3, 0xa6, 0x09, 0x6c, 0x4a, 0x2f, + 0x80, 0xe5, 0xd2, 0xb7, 0x18, 0x7d, 0x5b, 0x3e, 0x91, 0xf4, + 0xdd, 0xb8, 0x17, 0x72, 0x54, 0x31, 0x9e, 0xfb, 0x88, 0xed, + 0x42, 0x27, 0x01, 0x64, 0xcb, 0xae, 0x87, 0xe2, 0x4d, 0x28, + 0x0e, 0x6b, 0xc4, 0xa1, 0x96, 0xf3, 0x5c, 0x39, 0x1f, 0x7a, + 0xd5, 0xb0, 0x99, 0xfc, 0x53, 0x36, 0x10, 0x75, 0xda, 0xbf, + 0xb4, 0xd1, 0x7e, 0x1b, 0x3d, 0x58, 0xf7, 0x92, 0xbb, 0xde, + 0x71, 0x14, 0x32, 0x57, 0xf8, 0x9d, 0xaa, 0xcf, 0x60, 0x05, + 0x23, 0x46, 0xe9, 0x8c, 0xa5, 0xc0, 0x6f, 0x0a, 0x2c, 0x49, + 0xe6, 0x83, 0x00, 0x66, 0xcc, 0xaa, 0x85, 0xe3, 0x49, 0x2f, + 0x17, 0x71, 0xdb, 0xbd, 0x92, 0xf4, 0x5e, 0x38, 0x2e, 0x48, + 0xe2, 0x84, 0xab, 0xcd, 0x67, 0x01, 0x39, 0x5f, 0xf5, 0x93, + 0xbc, 0xda, 0x70, 0x16, 0x5c, 0x3a, 0x90, 0xf6, 0xd9, 0xbf, + 0x15, 0x73, 0x4b, 0x2d, 0x87, 0xe1, 0xce, 0xa8, 0x02, 0x64, + 0x72, 0x14, 0xbe, 0xd8, 0xf7, 0x91, 0x3b, 0x5d, 0x65, 0x03, + 0xa9, 0xcf, 0xe0, 0x86, 0x2c, 0x4a, 0xb8, 0xde, 0x74, 0x12, + 0x3d, 0x5b, 0xf1, 0x97, 0xaf, 0xc9, 0x63, 0x05, 0x2a, 0x4c, + 0xe6, 0x80, 0x96, 0xf0, 0x5a, 0x3c, 0x13, 0x75, 0xdf, 0xb9, + 0x81, 0xe7, 0x4d, 0x2b, 0x04, 0x62, 0xc8, 0xae, 0xe4, 0x82, + 0x28, 0x4e, 0x61, 0x07, 0xad, 0xcb, 0xf3, 0x95, 0x3f, 0x59, + 0x76, 0x10, 0xba, 0xdc, 0xca, 0xac, 0x06, 0x60, 0x4f, 0x29, + 0x83, 0xe5, 0xdd, 0xbb, 0x11, 0x77, 0x58, 0x3e, 0x94, 0xf2, + 0x6d, 0x0b, 0xa1, 0xc7, 0xe8, 0x8e, 0x24, 0x42, 0x7a, 0x1c, + 0xb6, 0xd0, 0xff, 0x99, 0x33, 0x55, 0x43, 0x25, 0x8f, 0xe9, + 0xc6, 0xa0, 0x0a, 0x6c, 0x54, 0x32, 0x98, 0xfe, 0xd1, 0xb7, + 0x1d, 0x7b, 0x31, 0x57, 0xfd, 0x9b, 0xb4, 0xd2, 0x78, 0x1e, + 0x26, 0x40, 0xea, 0x8c, 0xa3, 0xc5, 0x6f, 0x09, 0x1f, 0x79, + 0xd3, 0xb5, 0x9a, 0xfc, 0x56, 0x30, 0x08, 0x6e, 0xc4, 0xa2, + 0x8d, 0xeb, 0x41, 0x27, 0xd5, 0xb3, 0x19, 0x7f, 0x50, 0x36, + 0x9c, 0xfa, 0xc2, 0xa4, 0x0e, 0x68, 0x47, 0x21, 0x8b, 0xed, + 0xfb, 0x9d, 0x37, 0x51, 0x7e, 0x18, 0xb2, 0xd4, 0xec, 0x8a, + 0x20, 0x46, 0x69, 0x0f, 0xa5, 0xc3, 0x89, 0xef, 0x45, 0x23, + 0x0c, 0x6a, 0xc0, 0xa6, 0x9e, 0xf8, 0x52, 0x34, 0x1b, 0x7d, + 0xd7, 0xb1, 0xa7, 0xc1, 0x6b, 0x0d, 0x22, 0x44, 0xee, 0x88, + 0xb0, 0xd6, 0x7c, 0x1a, 0x35, 0x53, 0xf9, 0x9f, 0x00, 0x67, + 0xce, 0xa9, 0x81, 0xe6, 0x4f, 0x28, 0x1f, 0x78, 0xd1, 0xb6, + 0x9e, 0xf9, 0x50, 0x37, 0x3e, 0x59, 0xf0, 0x97, 0xbf, 0xd8, + 0x71, 0x16, 0x21, 0x46, 0xef, 0x88, 0xa0, 0xc7, 0x6e, 0x09, + 0x7c, 0x1b, 0xb2, 0xd5, 0xfd, 0x9a, 0x33, 0x54, 0x63, 0x04, + 0xad, 0xca, 0xe2, 0x85, 0x2c, 0x4b, 0x42, 0x25, 0x8c, 0xeb, + 0xc3, 0xa4, 0x0d, 0x6a, 0x5d, 0x3a, 0x93, 0xf4, 0xdc, 0xbb, + 0x12, 0x75, 0xf8, 0x9f, 0x36, 0x51, 0x79, 0x1e, 0xb7, 0xd0, + 0xe7, 0x80, 0x29, 0x4e, 0x66, 0x01, 0xa8, 0xcf, 0xc6, 0xa1, + 0x08, 0x6f, 0x47, 0x20, 0x89, 0xee, 0xd9, 0xbe, 0x17, 0x70, + 0x58, 0x3f, 0x96, 0xf1, 0x84, 0xe3, 0x4a, 0x2d, 0x05, 0x62, + 0xcb, 0xac, 0x9b, 0xfc, 0x55, 0x32, 0x1a, 0x7d, 0xd4, 0xb3, + 0xba, 0xdd, 0x74, 0x13, 0x3b, 0x5c, 0xf5, 0x92, 0xa5, 0xc2, + 0x6b, 0x0c, 0x24, 0x43, 0xea, 0x8d, 0xed, 0x8a, 0x23, 0x44, + 0x6c, 0x0b, 0xa2, 0xc5, 0xf2, 0x95, 0x3c, 0x5b, 0x73, 0x14, + 0xbd, 0xda, 0xd3, 0xb4, 0x1d, 0x7a, 0x52, 0x35, 0x9c, 0xfb, + 0xcc, 0xab, 0x02, 0x65, 0x4d, 0x2a, 0x83, 0xe4, 0x91, 0xf6, + 0x5f, 0x38, 0x10, 0x77, 0xde, 0xb9, 0x8e, 0xe9, 0x40, 0x27, + 0x0f, 0x68, 0xc1, 0xa6, 0xaf, 0xc8, 0x61, 0x06, 0x2e, 0x49, + 0xe0, 0x87, 0xb0, 0xd7, 0x7e, 0x19, 0x31, 0x56, 0xff, 0x98, + 0x15, 0x72, 0xdb, 0xbc, 0x94, 0xf3, 0x5a, 0x3d, 0x0a, 0x6d, + 0xc4, 0xa3, 0x8b, 0xec, 0x45, 0x22, 0x2b, 0x4c, 0xe5, 0x82, + 0xaa, 0xcd, 0x64, 0x03, 0x34, 0x53, 0xfa, 0x9d, 0xb5, 0xd2, + 0x7b, 0x1c, 0x69, 0x0e, 0xa7, 0xc0, 0xe8, 0x8f, 0x26, 0x41, + 0x76, 0x11, 0xb8, 0xdf, 0xf7, 0x90, 0x39, 0x5e, 0x57, 0x30, + 0x99, 0xfe, 0xd6, 0xb1, 0x18, 0x7f, 0x48, 0x2f, 0x86, 0xe1, + 0xc9, 0xae, 0x07, 0x60, 0x00, 0x68, 0xd0, 0xb8, 0xbd, 0xd5, + 0x6d, 0x05, 0x67, 0x0f, 0xb7, 0xdf, 0xda, 0xb2, 0x0a, 0x62, + 0xce, 0xa6, 0x1e, 0x76, 0x73, 0x1b, 0xa3, 0xcb, 0xa9, 0xc1, + 0x79, 0x11, 0x14, 0x7c, 0xc4, 0xac, 0x81, 0xe9, 0x51, 0x39, + 0x3c, 0x54, 0xec, 0x84, 0xe6, 0x8e, 0x36, 0x5e, 0x5b, 0x33, + 0x8b, 0xe3, 0x4f, 0x27, 0x9f, 0xf7, 0xf2, 0x9a, 0x22, 0x4a, + 0x28, 0x40, 0xf8, 0x90, 0x95, 0xfd, 0x45, 0x2d, 0x1f, 0x77, + 0xcf, 0xa7, 0xa2, 0xca, 0x72, 0x1a, 0x78, 0x10, 0xa8, 0xc0, + 0xc5, 0xad, 0x15, 0x7d, 0xd1, 0xb9, 0x01, 0x69, 0x6c, 0x04, + 0xbc, 0xd4, 0xb6, 0xde, 0x66, 0x0e, 0x0b, 0x63, 0xdb, 0xb3, + 0x9e, 0xf6, 0x4e, 0x26, 0x23, 0x4b, 0xf3, 0x9b, 0xf9, 0x91, + 0x29, 0x41, 0x44, 0x2c, 0x94, 0xfc, 0x50, 0x38, 0x80, 0xe8, + 0xed, 0x85, 0x3d, 0x55, 0x37, 0x5f, 0xe7, 0x8f, 0x8a, 0xe2, + 0x5a, 0x32, 0x3e, 0x56, 0xee, 0x86, 0x83, 0xeb, 0x53, 0x3b, + 0x59, 0x31, 0x89, 0xe1, 0xe4, 0x8c, 0x34, 0x5c, 0xf0, 0x98, + 0x20, 0x48, 0x4d, 0x25, 0x9d, 0xf5, 0x97, 0xff, 0x47, 0x2f, + 0x2a, 0x42, 0xfa, 0x92, 0xbf, 0xd7, 0x6f, 0x07, 0x02, 0x6a, + 0xd2, 0xba, 0xd8, 0xb0, 0x08, 0x60, 0x65, 0x0d, 0xb5, 0xdd, + 0x71, 0x19, 0xa1, 0xc9, 0xcc, 0xa4, 0x1c, 0x74, 0x16, 0x7e, + 0xc6, 0xae, 0xab, 0xc3, 0x7b, 0x13, 0x21, 0x49, 0xf1, 0x99, + 0x9c, 0xf4, 0x4c, 0x24, 0x46, 0x2e, 0x96, 0xfe, 0xfb, 0x93, + 0x2b, 0x43, 0xef, 0x87, 0x3f, 0x57, 0x52, 0x3a, 0x82, 0xea, + 0x88, 0xe0, 0x58, 0x30, 0x35, 0x5d, 0xe5, 0x8d, 0xa0, 0xc8, + 0x70, 0x18, 0x1d, 0x75, 0xcd, 0xa5, 0xc7, 0xaf, 0x17, 0x7f, + 0x7a, 0x12, 0xaa, 0xc2, 0x6e, 0x06, 0xbe, 0xd6, 0xd3, 0xbb, + 0x03, 0x6b, 0x09, 0x61, 0xd9, 0xb1, 0xb4, 0xdc, 0x64, 0x0c, + 0x00, 0x69, 0xd2, 0xbb, 0xb9, 0xd0, 0x6b, 0x02, 0x6f, 0x06, + 0xbd, 0xd4, 0xd6, 0xbf, 0x04, 0x6d, 0xde, 0xb7, 0x0c, 0x65, + 0x67, 0x0e, 0xb5, 0xdc, 0xb1, 0xd8, 0x63, 0x0a, 0x08, 0x61, + 0xda, 0xb3, 0xa1, 0xc8, 0x73, 0x1a, 0x18, 0x71, 0xca, 0xa3, + 0xce, 0xa7, 0x1c, 0x75, 0x77, 0x1e, 0xa5, 0xcc, 0x7f, 0x16, + 0xad, 0xc4, 0xc6, 0xaf, 0x14, 0x7d, 0x10, 0x79, 0xc2, 0xab, + 0xa9, 0xc0, 0x7b, 0x12, 0x5f, 0x36, 0x8d, 0xe4, 0xe6, 0x8f, + 0x34, 0x5d, 0x30, 0x59, 0xe2, 0x8b, 0x89, 0xe0, 0x5b, 0x32, + 0x81, 0xe8, 0x53, 0x3a, 0x38, 0x51, 0xea, 0x83, 0xee, 0x87, + 0x3c, 0x55, 0x57, 0x3e, 0x85, 0xec, 0xfe, 0x97, 0x2c, 0x45, + 0x47, 0x2e, 0x95, 0xfc, 0x91, 0xf8, 0x43, 0x2a, 0x28, 0x41, + 0xfa, 0x93, 0x20, 0x49, 0xf2, 0x9b, 0x99, 0xf0, 0x4b, 0x22, + 0x4f, 0x26, 0x9d, 0xf4, 0xf6, 0x9f, 0x24, 0x4d, 0xbe, 0xd7, + 0x6c, 0x05, 0x07, 0x6e, 0xd5, 0xbc, 0xd1, 0xb8, 0x03, 0x6a, + 0x68, 0x01, 0xba, 0xd3, 0x60, 0x09, 0xb2, 0xdb, 0xd9, 0xb0, + 0x0b, 0x62, 0x0f, 0x66, 0xdd, 0xb4, 0xb6, 0xdf, 0x64, 0x0d, + 0x1f, 0x76, 0xcd, 0xa4, 0xa6, 0xcf, 0x74, 0x1d, 0x70, 0x19, + 0xa2, 0xcb, 0xc9, 0xa0, 0x1b, 0x72, 0xc1, 0xa8, 0x13, 0x7a, + 0x78, 0x11, 0xaa, 0xc3, 0xae, 0xc7, 0x7c, 0x15, 0x17, 0x7e, + 0xc5, 0xac, 0xe1, 0x88, 0x33, 0x5a, 0x58, 0x31, 0x8a, 0xe3, + 0x8e, 0xe7, 0x5c, 0x35, 0x37, 0x5e, 0xe5, 0x8c, 0x3f, 0x56, + 0xed, 0x84, 0x86, 0xef, 0x54, 0x3d, 0x50, 0x39, 0x82, 0xeb, + 0xe9, 0x80, 0x3b, 0x52, 0x40, 0x29, 0x92, 0xfb, 0xf9, 0x90, + 0x2b, 0x42, 0x2f, 0x46, 0xfd, 0x94, 0x96, 0xff, 0x44, 0x2d, + 0x9e, 0xf7, 0x4c, 0x25, 0x27, 0x4e, 0xf5, 0x9c, 0xf1, 0x98, + 0x23, 0x4a, 0x48, 0x21, 0x9a, 0xf3, 0x00, 0x6a, 0xd4, 0xbe, + 0xb5, 0xdf, 0x61, 0x0b, 0x77, 0x1d, 0xa3, 0xc9, 0xc2, 0xa8, + 0x16, 0x7c, 0xee, 0x84, 0x3a, 0x50, 0x5b, 0x31, 0x8f, 0xe5, + 0x99, 0xf3, 0x4d, 0x27, 0x2c, 0x46, 0xf8, 0x92, 0xc1, 0xab, + 0x15, 0x7f, 0x74, 0x1e, 0xa0, 0xca, 0xb6, 0xdc, 0x62, 0x08, + 0x03, 0x69, 0xd7, 0xbd, 0x2f, 0x45, 0xfb, 0x91, 0x9a, 0xf0, + 0x4e, 0x24, 0x58, 0x32, 0x8c, 0xe6, 0xed, 0x87, 0x39, 0x53, + 0x9f, 0xf5, 0x4b, 0x21, 0x2a, 0x40, 0xfe, 0x94, 0xe8, 0x82, + 0x3c, 0x56, 0x5d, 0x37, 0x89, 0xe3, 0x71, 0x1b, 0xa5, 0xcf, + 0xc4, 0xae, 0x10, 0x7a, 0x06, 0x6c, 0xd2, 0xb8, 0xb3, 0xd9, + 0x67, 0x0d, 0x5e, 0x34, 0x8a, 0xe0, 0xeb, 0x81, 0x3f, 0x55, + 0x29, 0x43, 0xfd, 0x97, 0x9c, 0xf6, 0x48, 0x22, 0xb0, 0xda, + 0x64, 0x0e, 0x05, 0x6f, 0xd1, 0xbb, 0xc7, 0xad, 0x13, 0x79, + 0x72, 0x18, 0xa6, 0xcc, 0x23, 0x49, 0xf7, 0x9d, 0x96, 0xfc, + 0x42, 0x28, 0x54, 0x3e, 0x80, 0xea, 0xe1, 0x8b, 0x35, 0x5f, + 0xcd, 0xa7, 0x19, 0x73, 0x78, 0x12, 0xac, 0xc6, 0xba, 0xd0, + 0x6e, 0x04, 0x0f, 0x65, 0xdb, 0xb1, 0xe2, 0x88, 0x36, 0x5c, + 0x57, 0x3d, 0x83, 0xe9, 0x95, 0xff, 0x41, 0x2b, 0x20, 0x4a, + 0xf4, 0x9e, 0x0c, 0x66, 0xd8, 0xb2, 0xb9, 0xd3, 0x6d, 0x07, + 0x7b, 0x11, 0xaf, 0xc5, 0xce, 0xa4, 0x1a, 0x70, 0xbc, 0xd6, + 0x68, 0x02, 0x09, 0x63, 0xdd, 0xb7, 0xcb, 0xa1, 0x1f, 0x75, + 0x7e, 0x14, 0xaa, 0xc0, 0x52, 0x38, 0x86, 0xec, 0xe7, 0x8d, + 0x33, 0x59, 0x25, 0x4f, 0xf1, 0x9b, 0x90, 0xfa, 0x44, 0x2e, + 0x7d, 0x17, 0xa9, 0xc3, 0xc8, 0xa2, 0x1c, 0x76, 0x0a, 0x60, + 0xde, 0xb4, 0xbf, 0xd5, 0x6b, 0x01, 0x93, 0xf9, 0x47, 0x2d, + 0x26, 0x4c, 0xf2, 0x98, 0xe4, 0x8e, 0x30, 0x5a, 0x51, 0x3b, + 0x85, 0xef, 0x00, 0x6b, 0xd6, 0xbd, 0xb1, 0xda, 0x67, 0x0c, + 0x7f, 0x14, 0xa9, 0xc2, 0xce, 0xa5, 0x18, 0x73, 0xfe, 0x95, + 0x28, 0x43, 0x4f, 0x24, 0x99, 0xf2, 0x81, 0xea, 0x57, 0x3c, + 0x30, 0x5b, 0xe6, 0x8d, 0xe1, 0x8a, 0x37, 0x5c, 0x50, 0x3b, + 0x86, 0xed, 0x9e, 0xf5, 0x48, 0x23, 0x2f, 0x44, 0xf9, 0x92, + 0x1f, 0x74, 0xc9, 0xa2, 0xae, 0xc5, 0x78, 0x13, 0x60, 0x0b, + 0xb6, 0xdd, 0xd1, 0xba, 0x07, 0x6c, 0xdf, 0xb4, 0x09, 0x62, + 0x6e, 0x05, 0xb8, 0xd3, 0xa0, 0xcb, 0x76, 0x1d, 0x11, 0x7a, + 0xc7, 0xac, 0x21, 0x4a, 0xf7, 0x9c, 0x90, 0xfb, 0x46, 0x2d, + 0x5e, 0x35, 0x88, 0xe3, 0xef, 0x84, 0x39, 0x52, 0x3e, 0x55, + 0xe8, 0x83, 0x8f, 0xe4, 0x59, 0x32, 0x41, 0x2a, 0x97, 0xfc, + 0xf0, 0x9b, 0x26, 0x4d, 0xc0, 0xab, 0x16, 0x7d, 0x71, 0x1a, + 0xa7, 0xcc, 0xbf, 0xd4, 0x69, 0x02, 0x0e, 0x65, 0xd8, 0xb3, + 0xa3, 0xc8, 0x75, 0x1e, 0x12, 0x79, 0xc4, 0xaf, 0xdc, 0xb7, + 0x0a, 0x61, 0x6d, 0x06, 0xbb, 0xd0, 0x5d, 0x36, 0x8b, 0xe0, + 0xec, 0x87, 0x3a, 0x51, 0x22, 0x49, 0xf4, 0x9f, 0x93, 0xf8, + 0x45, 0x2e, 0x42, 0x29, 0x94, 0xff, 0xf3, 0x98, 0x25, 0x4e, + 0x3d, 0x56, 0xeb, 0x80, 0x8c, 0xe7, 0x5a, 0x31, 0xbc, 0xd7, + 0x6a, 0x01, 0x0d, 0x66, 0xdb, 0xb0, 0xc3, 0xa8, 0x15, 0x7e, + 0x72, 0x19, 0xa4, 0xcf, 0x7c, 0x17, 0xaa, 0xc1, 0xcd, 0xa6, + 0x1b, 0x70, 0x03, 0x68, 0xd5, 0xbe, 0xb2, 0xd9, 0x64, 0x0f, + 0x82, 0xe9, 0x54, 0x3f, 0x33, 0x58, 0xe5, 0x8e, 0xfd, 0x96, + 0x2b, 0x40, 0x4c, 0x27, 0x9a, 0xf1, 0x9d, 0xf6, 0x4b, 0x20, + 0x2c, 0x47, 0xfa, 0x91, 0xe2, 0x89, 0x34, 0x5f, 0x53, 0x38, + 0x85, 0xee, 0x63, 0x08, 0xb5, 0xde, 0xd2, 0xb9, 0x04, 0x6f, + 0x1c, 0x77, 0xca, 0xa1, 0xad, 0xc6, 0x7b, 0x10, 0x00, 0x6c, + 0xd8, 0xb4, 0xad, 0xc1, 0x75, 0x19, 0x47, 0x2b, 0x9f, 0xf3, + 0xea, 0x86, 0x32, 0x5e, 0x8e, 0xe2, 0x56, 0x3a, 0x23, 0x4f, + 0xfb, 0x97, 0xc9, 0xa5, 0x11, 0x7d, 0x64, 0x08, 0xbc, 0xd0, + 0x01, 0x6d, 0xd9, 0xb5, 0xac, 0xc0, 0x74, 0x18, 0x46, 0x2a, + 0x9e, 0xf2, 0xeb, 0x87, 0x33, 0x5f, 0x8f, 0xe3, 0x57, 0x3b, + 0x22, 0x4e, 0xfa, 0x96, 0xc8, 0xa4, 0x10, 0x7c, 0x65, 0x09, + 0xbd, 0xd1, 0x02, 0x6e, 0xda, 0xb6, 0xaf, 0xc3, 0x77, 0x1b, + 0x45, 0x29, 0x9d, 0xf1, 0xe8, 0x84, 0x30, 0x5c, 0x8c, 0xe0, + 0x54, 0x38, 0x21, 0x4d, 0xf9, 0x95, 0xcb, 0xa7, 0x13, 0x7f, + 0x66, 0x0a, 0xbe, 0xd2, 0x03, 0x6f, 0xdb, 0xb7, 0xae, 0xc2, + 0x76, 0x1a, 0x44, 0x28, 0x9c, 0xf0, 0xe9, 0x85, 0x31, 0x5d, + 0x8d, 0xe1, 0x55, 0x39, 0x20, 0x4c, 0xf8, 0x94, 0xca, 0xa6, + 0x12, 0x7e, 0x67, 0x0b, 0xbf, 0xd3, 0x04, 0x68, 0xdc, 0xb0, + 0xa9, 0xc5, 0x71, 0x1d, 0x43, 0x2f, 0x9b, 0xf7, 0xee, 0x82, + 0x36, 0x5a, 0x8a, 0xe6, 0x52, 0x3e, 0x27, 0x4b, 0xff, 0x93, + 0xcd, 0xa1, 0x15, 0x79, 0x60, 0x0c, 0xb8, 0xd4, 0x05, 0x69, + 0xdd, 0xb1, 0xa8, 0xc4, 0x70, 0x1c, 0x42, 0x2e, 0x9a, 0xf6, + 0xef, 0x83, 0x37, 0x5b, 0x8b, 0xe7, 0x53, 0x3f, 0x26, 0x4a, + 0xfe, 0x92, 0xcc, 0xa0, 0x14, 0x78, 0x61, 0x0d, 0xb9, 0xd5, + 0x06, 0x6a, 0xde, 0xb2, 0xab, 0xc7, 0x73, 0x1f, 0x41, 0x2d, + 0x99, 0xf5, 0xec, 0x80, 0x34, 0x58, 0x88, 0xe4, 0x50, 0x3c, + 0x25, 0x49, 0xfd, 0x91, 0xcf, 0xa3, 0x17, 0x7b, 0x62, 0x0e, + 0xba, 0xd6, 0x07, 0x6b, 0xdf, 0xb3, 0xaa, 0xc6, 0x72, 0x1e, + 0x40, 0x2c, 0x98, 0xf4, 0xed, 0x81, 0x35, 0x59, 0x89, 0xe5, + 0x51, 0x3d, 0x24, 0x48, 0xfc, 0x90, 0xce, 0xa2, 0x16, 0x7a, + 0x63, 0x0f, 0xbb, 0xd7, 0x00, 0x6d, 0xda, 0xb7, 0xa9, 0xc4, + 0x73, 0x1e, 0x4f, 0x22, 0x95, 0xf8, 0xe6, 0x8b, 0x3c, 0x51, + 0x9e, 0xf3, 0x44, 0x29, 0x37, 0x5a, 0xed, 0x80, 0xd1, 0xbc, + 0x0b, 0x66, 0x78, 0x15, 0xa2, 0xcf, 0x21, 0x4c, 0xfb, 0x96, + 0x88, 0xe5, 0x52, 0x3f, 0x6e, 0x03, 0xb4, 0xd9, 0xc7, 0xaa, + 0x1d, 0x70, 0xbf, 0xd2, 0x65, 0x08, 0x16, 0x7b, 0xcc, 0xa1, + 0xf0, 0x9d, 0x2a, 0x47, 0x59, 0x34, 0x83, 0xee, 0x42, 0x2f, + 0x98, 0xf5, 0xeb, 0x86, 0x31, 0x5c, 0x0d, 0x60, 0xd7, 0xba, + 0xa4, 0xc9, 0x7e, 0x13, 0xdc, 0xb1, 0x06, 0x6b, 0x75, 0x18, + 0xaf, 0xc2, 0x93, 0xfe, 0x49, 0x24, 0x3a, 0x57, 0xe0, 0x8d, + 0x63, 0x0e, 0xb9, 0xd4, 0xca, 0xa7, 0x10, 0x7d, 0x2c, 0x41, + 0xf6, 0x9b, 0x85, 0xe8, 0x5f, 0x32, 0xfd, 0x90, 0x27, 0x4a, + 0x54, 0x39, 0x8e, 0xe3, 0xb2, 0xdf, 0x68, 0x05, 0x1b, 0x76, + 0xc1, 0xac, 0x84, 0xe9, 0x5e, 0x33, 0x2d, 0x40, 0xf7, 0x9a, + 0xcb, 0xa6, 0x11, 0x7c, 0x62, 0x0f, 0xb8, 0xd5, 0x1a, 0x77, + 0xc0, 0xad, 0xb3, 0xde, 0x69, 0x04, 0x55, 0x38, 0x8f, 0xe2, + 0xfc, 0x91, 0x26, 0x4b, 0xa5, 0xc8, 0x7f, 0x12, 0x0c, 0x61, + 0xd6, 0xbb, 0xea, 0x87, 0x30, 0x5d, 0x43, 0x2e, 0x99, 0xf4, + 0x3b, 0x56, 0xe1, 0x8c, 0x92, 0xff, 0x48, 0x25, 0x74, 0x19, + 0xae, 0xc3, 0xdd, 0xb0, 0x07, 0x6a, 0xc6, 0xab, 0x1c, 0x71, + 0x6f, 0x02, 0xb5, 0xd8, 0x89, 0xe4, 0x53, 0x3e, 0x20, 0x4d, + 0xfa, 0x97, 0x58, 0x35, 0x82, 0xef, 0xf1, 0x9c, 0x2b, 0x46, + 0x17, 0x7a, 0xcd, 0xa0, 0xbe, 0xd3, 0x64, 0x09, 0xe7, 0x8a, + 0x3d, 0x50, 0x4e, 0x23, 0x94, 0xf9, 0xa8, 0xc5, 0x72, 0x1f, + 0x01, 0x6c, 0xdb, 0xb6, 0x79, 0x14, 0xa3, 0xce, 0xd0, 0xbd, + 0x0a, 0x67, 0x36, 0x5b, 0xec, 0x81, 0x9f, 0xf2, 0x45, 0x28, + 0x00, 0x6e, 0xdc, 0xb2, 0xa5, 0xcb, 0x79, 0x17, 0x57, 0x39, + 0x8b, 0xe5, 0xf2, 0x9c, 0x2e, 0x40, 0xae, 0xc0, 0x72, 0x1c, + 0x0b, 0x65, 0xd7, 0xb9, 0xf9, 0x97, 0x25, 0x4b, 0x5c, 0x32, + 0x80, 0xee, 0x41, 0x2f, 0x9d, 0xf3, 0xe4, 0x8a, 0x38, 0x56, + 0x16, 0x78, 0xca, 0xa4, 0xb3, 0xdd, 0x6f, 0x01, 0xef, 0x81, + 0x33, 0x5d, 0x4a, 0x24, 0x96, 0xf8, 0xb8, 0xd6, 0x64, 0x0a, + 0x1d, 0x73, 0xc1, 0xaf, 0x82, 0xec, 0x5e, 0x30, 0x27, 0x49, + 0xfb, 0x95, 0xd5, 0xbb, 0x09, 0x67, 0x70, 0x1e, 0xac, 0xc2, + 0x2c, 0x42, 0xf0, 0x9e, 0x89, 0xe7, 0x55, 0x3b, 0x7b, 0x15, + 0xa7, 0xc9, 0xde, 0xb0, 0x02, 0x6c, 0xc3, 0xad, 0x1f, 0x71, + 0x66, 0x08, 0xba, 0xd4, 0x94, 0xfa, 0x48, 0x26, 0x31, 0x5f, + 0xed, 0x83, 0x6d, 0x03, 0xb1, 0xdf, 0xc8, 0xa6, 0x14, 0x7a, + 0x3a, 0x54, 0xe6, 0x88, 0x9f, 0xf1, 0x43, 0x2d, 0x19, 0x77, + 0xc5, 0xab, 0xbc, 0xd2, 0x60, 0x0e, 0x4e, 0x20, 0x92, 0xfc, + 0xeb, 0x85, 0x37, 0x59, 0xb7, 0xd9, 0x6b, 0x05, 0x12, 0x7c, + 0xce, 0xa0, 0xe0, 0x8e, 0x3c, 0x52, 0x45, 0x2b, 0x99, 0xf7, + 0x58, 0x36, 0x84, 0xea, 0xfd, 0x93, 0x21, 0x4f, 0x0f, 0x61, + 0xd3, 0xbd, 0xaa, 0xc4, 0x76, 0x18, 0xf6, 0x98, 0x2a, 0x44, + 0x53, 0x3d, 0x8f, 0xe1, 0xa1, 0xcf, 0x7d, 0x13, 0x04, 0x6a, + 0xd8, 0xb6, 0x9b, 0xf5, 0x47, 0x29, 0x3e, 0x50, 0xe2, 0x8c, + 0xcc, 0xa2, 0x10, 0x7e, 0x69, 0x07, 0xb5, 0xdb, 0x35, 0x5b, + 0xe9, 0x87, 0x90, 0xfe, 0x4c, 0x22, 0x62, 0x0c, 0xbe, 0xd0, + 0xc7, 0xa9, 0x1b, 0x75, 0xda, 0xb4, 0x06, 0x68, 0x7f, 0x11, + 0xa3, 0xcd, 0x8d, 0xe3, 0x51, 0x3f, 0x28, 0x46, 0xf4, 0x9a, + 0x74, 0x1a, 0xa8, 0xc6, 0xd1, 0xbf, 0x0d, 0x63, 0x23, 0x4d, + 0xff, 0x91, 0x86, 0xe8, 0x5a, 0x34, 0x00, 0x6f, 0xde, 0xb1, + 0xa1, 0xce, 0x7f, 0x10, 0x5f, 0x30, 0x81, 0xee, 0xfe, 0x91, + 0x20, 0x4f, 0xbe, 0xd1, 0x60, 0x0f, 0x1f, 0x70, 0xc1, 0xae, + 0xe1, 0x8e, 0x3f, 0x50, 0x40, 0x2f, 0x9e, 0xf1, 0x61, 0x0e, + 0xbf, 0xd0, 0xc0, 0xaf, 0x1e, 0x71, 0x3e, 0x51, 0xe0, 0x8f, + 0x9f, 0xf0, 0x41, 0x2e, 0xdf, 0xb0, 0x01, 0x6e, 0x7e, 0x11, + 0xa0, 0xcf, 0x80, 0xef, 0x5e, 0x31, 0x21, 0x4e, 0xff, 0x90, + 0xc2, 0xad, 0x1c, 0x73, 0x63, 0x0c, 0xbd, 0xd2, 0x9d, 0xf2, + 0x43, 0x2c, 0x3c, 0x53, 0xe2, 0x8d, 0x7c, 0x13, 0xa2, 0xcd, + 0xdd, 0xb2, 0x03, 0x6c, 0x23, 0x4c, 0xfd, 0x92, 0x82, 0xed, + 0x5c, 0x33, 0xa3, 0xcc, 0x7d, 0x12, 0x02, 0x6d, 0xdc, 0xb3, + 0xfc, 0x93, 0x22, 0x4d, 0x5d, 0x32, 0x83, 0xec, 0x1d, 0x72, + 0xc3, 0xac, 0xbc, 0xd3, 0x62, 0x0d, 0x42, 0x2d, 0x9c, 0xf3, + 0xe3, 0x8c, 0x3d, 0x52, 0x99, 0xf6, 0x47, 0x28, 0x38, 0x57, + 0xe6, 0x89, 0xc6, 0xa9, 0x18, 0x77, 0x67, 0x08, 0xb9, 0xd6, + 0x27, 0x48, 0xf9, 0x96, 0x86, 0xe9, 0x58, 0x37, 0x78, 0x17, + 0xa6, 0xc9, 0xd9, 0xb6, 0x07, 0x68, 0xf8, 0x97, 0x26, 0x49, + 0x59, 0x36, 0x87, 0xe8, 0xa7, 0xc8, 0x79, 0x16, 0x06, 0x69, + 0xd8, 0xb7, 0x46, 0x29, 0x98, 0xf7, 0xe7, 0x88, 0x39, 0x56, + 0x19, 0x76, 0xc7, 0xa8, 0xb8, 0xd7, 0x66, 0x09, 0x5b, 0x34, + 0x85, 0xea, 0xfa, 0x95, 0x24, 0x4b, 0x04, 0x6b, 0xda, 0xb5, + 0xa5, 0xca, 0x7b, 0x14, 0xe5, 0x8a, 0x3b, 0x54, 0x44, 0x2b, + 0x9a, 0xf5, 0xba, 0xd5, 0x64, 0x0b, 0x1b, 0x74, 0xc5, 0xaa, + 0x3a, 0x55, 0xe4, 0x8b, 0x9b, 0xf4, 0x45, 0x2a, 0x65, 0x0a, + 0xbb, 0xd4, 0xc4, 0xab, 0x1a, 0x75, 0x84, 0xeb, 0x5a, 0x35, + 0x25, 0x4a, 0xfb, 0x94, 0xdb, 0xb4, 0x05, 0x6a, 0x7a, 0x15, + 0xa4, 0xcb, 0x00, 0x70, 0xe0, 0x90, 0xdd, 0xad, 0x3d, 0x4d, + 0xa7, 0xd7, 0x47, 0x37, 0x7a, 0x0a, 0x9a, 0xea, 0x53, 0x23, + 0xb3, 0xc3, 0x8e, 0xfe, 0x6e, 0x1e, 0xf4, 0x84, 0x14, 0x64, + 0x29, 0x59, 0xc9, 0xb9, 0xa6, 0xd6, 0x46, 0x36, 0x7b, 0x0b, + 0x9b, 0xeb, 0x01, 0x71, 0xe1, 0x91, 0xdc, 0xac, 0x3c, 0x4c, + 0xf5, 0x85, 0x15, 0x65, 0x28, 0x58, 0xc8, 0xb8, 0x52, 0x22, + 0xb2, 0xc2, 0x8f, 0xff, 0x6f, 0x1f, 0x51, 0x21, 0xb1, 0xc1, + 0x8c, 0xfc, 0x6c, 0x1c, 0xf6, 0x86, 0x16, 0x66, 0x2b, 0x5b, + 0xcb, 0xbb, 0x02, 0x72, 0xe2, 0x92, 0xdf, 0xaf, 0x3f, 0x4f, + 0xa5, 0xd5, 0x45, 0x35, 0x78, 0x08, 0x98, 0xe8, 0xf7, 0x87, + 0x17, 0x67, 0x2a, 0x5a, 0xca, 0xba, 0x50, 0x20, 0xb0, 0xc0, + 0x8d, 0xfd, 0x6d, 0x1d, 0xa4, 0xd4, 0x44, 0x34, 0x79, 0x09, + 0x99, 0xe9, 0x03, 0x73, 0xe3, 0x93, 0xde, 0xae, 0x3e, 0x4e, + 0xa2, 0xd2, 0x42, 0x32, 0x7f, 0x0f, 0x9f, 0xef, 0x05, 0x75, + 0xe5, 0x95, 0xd8, 0xa8, 0x38, 0x48, 0xf1, 0x81, 0x11, 0x61, + 0x2c, 0x5c, 0xcc, 0xbc, 0x56, 0x26, 0xb6, 0xc6, 0x8b, 0xfb, + 0x6b, 0x1b, 0x04, 0x74, 0xe4, 0x94, 0xd9, 0xa9, 0x39, 0x49, + 0xa3, 0xd3, 0x43, 0x33, 0x7e, 0x0e, 0x9e, 0xee, 0x57, 0x27, + 0xb7, 0xc7, 0x8a, 0xfa, 0x6a, 0x1a, 0xf0, 0x80, 0x10, 0x60, + 0x2d, 0x5d, 0xcd, 0xbd, 0xf3, 0x83, 0x13, 0x63, 0x2e, 0x5e, + 0xce, 0xbe, 0x54, 0x24, 0xb4, 0xc4, 0x89, 0xf9, 0x69, 0x19, + 0xa0, 0xd0, 0x40, 0x30, 0x7d, 0x0d, 0x9d, 0xed, 0x07, 0x77, + 0xe7, 0x97, 0xda, 0xaa, 0x3a, 0x4a, 0x55, 0x25, 0xb5, 0xc5, + 0x88, 0xf8, 0x68, 0x18, 0xf2, 0x82, 0x12, 0x62, 0x2f, 0x5f, + 0xcf, 0xbf, 0x06, 0x76, 0xe6, 0x96, 0xdb, 0xab, 0x3b, 0x4b, + 0xa1, 0xd1, 0x41, 0x31, 0x7c, 0x0c, 0x9c, 0xec, 0x00, 0x71, + 0xe2, 0x93, 0xd9, 0xa8, 0x3b, 0x4a, 0xaf, 0xde, 0x4d, 0x3c, + 0x76, 0x07, 0x94, 0xe5, 0x43, 0x32, 0xa1, 0xd0, 0x9a, 0xeb, + 0x78, 0x09, 0xec, 0x9d, 0x0e, 0x7f, 0x35, 0x44, 0xd7, 0xa6, + 0x86, 0xf7, 0x64, 0x15, 0x5f, 0x2e, 0xbd, 0xcc, 0x29, 0x58, + 0xcb, 0xba, 0xf0, 0x81, 0x12, 0x63, 0xc5, 0xb4, 0x27, 0x56, + 0x1c, 0x6d, 0xfe, 0x8f, 0x6a, 0x1b, 0x88, 0xf9, 0xb3, 0xc2, + 0x51, 0x20, 0x11, 0x60, 0xf3, 0x82, 0xc8, 0xb9, 0x2a, 0x5b, + 0xbe, 0xcf, 0x5c, 0x2d, 0x67, 0x16, 0x85, 0xf4, 0x52, 0x23, + 0xb0, 0xc1, 0x8b, 0xfa, 0x69, 0x18, 0xfd, 0x8c, 0x1f, 0x6e, + 0x24, 0x55, 0xc6, 0xb7, 0x97, 0xe6, 0x75, 0x04, 0x4e, 0x3f, + 0xac, 0xdd, 0x38, 0x49, 0xda, 0xab, 0xe1, 0x90, 0x03, 0x72, + 0xd4, 0xa5, 0x36, 0x47, 0x0d, 0x7c, 0xef, 0x9e, 0x7b, 0x0a, + 0x99, 0xe8, 0xa2, 0xd3, 0x40, 0x31, 0x22, 0x53, 0xc0, 0xb1, + 0xfb, 0x8a, 0x19, 0x68, 0x8d, 0xfc, 0x6f, 0x1e, 0x54, 0x25, + 0xb6, 0xc7, 0x61, 0x10, 0x83, 0xf2, 0xb8, 0xc9, 0x5a, 0x2b, + 0xce, 0xbf, 0x2c, 0x5d, 0x17, 0x66, 0xf5, 0x84, 0xa4, 0xd5, + 0x46, 0x37, 0x7d, 0x0c, 0x9f, 0xee, 0x0b, 0x7a, 0xe9, 0x98, + 0xd2, 0xa3, 0x30, 0x41, 0xe7, 0x96, 0x05, 0x74, 0x3e, 0x4f, + 0xdc, 0xad, 0x48, 0x39, 0xaa, 0xdb, 0x91, 0xe0, 0x73, 0x02, + 0x33, 0x42, 0xd1, 0xa0, 0xea, 0x9b, 0x08, 0x79, 0x9c, 0xed, + 0x7e, 0x0f, 0x45, 0x34, 0xa7, 0xd6, 0x70, 0x01, 0x92, 0xe3, + 0xa9, 0xd8, 0x4b, 0x3a, 0xdf, 0xae, 0x3d, 0x4c, 0x06, 0x77, + 0xe4, 0x95, 0xb5, 0xc4, 0x57, 0x26, 0x6c, 0x1d, 0x8e, 0xff, + 0x1a, 0x6b, 0xf8, 0x89, 0xc3, 0xb2, 0x21, 0x50, 0xf6, 0x87, + 0x14, 0x65, 0x2f, 0x5e, 0xcd, 0xbc, 0x59, 0x28, 0xbb, 0xca, + 0x80, 0xf1, 0x62, 0x13, 0x00, 0x72, 0xe4, 0x96, 0xd5, 0xa7, + 0x31, 0x43, 0xb7, 0xc5, 0x53, 0x21, 0x62, 0x10, 0x86, 0xf4, + 0x73, 0x01, 0x97, 0xe5, 0xa6, 0xd4, 0x42, 0x30, 0xc4, 0xb6, + 0x20, 0x52, 0x11, 0x63, 0xf5, 0x87, 0xe6, 0x94, 0x02, 0x70, + 0x33, 0x41, 0xd7, 0xa5, 0x51, 0x23, 0xb5, 0xc7, 0x84, 0xf6, + 0x60, 0x12, 0x95, 0xe7, 0x71, 0x03, 0x40, 0x32, 0xa4, 0xd6, + 0x22, 0x50, 0xc6, 0xb4, 0xf7, 0x85, 0x13, 0x61, 0xd1, 0xa3, + 0x35, 0x47, 0x04, 0x76, 0xe0, 0x92, 0x66, 0x14, 0x82, 0xf0, + 0xb3, 0xc1, 0x57, 0x25, 0xa2, 0xd0, 0x46, 0x34, 0x77, 0x05, + 0x93, 0xe1, 0x15, 0x67, 0xf1, 0x83, 0xc0, 0xb2, 0x24, 0x56, + 0x37, 0x45, 0xd3, 0xa1, 0xe2, 0x90, 0x06, 0x74, 0x80, 0xf2, + 0x64, 0x16, 0x55, 0x27, 0xb1, 0xc3, 0x44, 0x36, 0xa0, 0xd2, + 0x91, 0xe3, 0x75, 0x07, 0xf3, 0x81, 0x17, 0x65, 0x26, 0x54, + 0xc2, 0xb0, 0xbf, 0xcd, 0x5b, 0x29, 0x6a, 0x18, 0x8e, 0xfc, + 0x08, 0x7a, 0xec, 0x9e, 0xdd, 0xaf, 0x39, 0x4b, 0xcc, 0xbe, + 0x28, 0x5a, 0x19, 0x6b, 0xfd, 0x8f, 0x7b, 0x09, 0x9f, 0xed, + 0xae, 0xdc, 0x4a, 0x38, 0x59, 0x2b, 0xbd, 0xcf, 0x8c, 0xfe, + 0x68, 0x1a, 0xee, 0x9c, 0x0a, 0x78, 0x3b, 0x49, 0xdf, 0xad, + 0x2a, 0x58, 0xce, 0xbc, 0xff, 0x8d, 0x1b, 0x69, 0x9d, 0xef, + 0x79, 0x0b, 0x48, 0x3a, 0xac, 0xde, 0x6e, 0x1c, 0x8a, 0xf8, + 0xbb, 0xc9, 0x5f, 0x2d, 0xd9, 0xab, 0x3d, 0x4f, 0x0c, 0x7e, + 0xe8, 0x9a, 0x1d, 0x6f, 0xf9, 0x8b, 0xc8, 0xba, 0x2c, 0x5e, + 0xaa, 0xd8, 0x4e, 0x3c, 0x7f, 0x0d, 0x9b, 0xe9, 0x88, 0xfa, + 0x6c, 0x1e, 0x5d, 0x2f, 0xb9, 0xcb, 0x3f, 0x4d, 0xdb, 0xa9, + 0xea, 0x98, 0x0e, 0x7c, 0xfb, 0x89, 0x1f, 0x6d, 0x2e, 0x5c, + 0xca, 0xb8, 0x4c, 0x3e, 0xa8, 0xda, 0x99, 0xeb, 0x7d, 0x0f, + 0x00, 0x73, 0xe6, 0x95, 0xd1, 0xa2, 0x37, 0x44, 0xbf, 0xcc, + 0x59, 0x2a, 0x6e, 0x1d, 0x88, 0xfb, 0x63, 0x10, 0x85, 0xf6, + 0xb2, 0xc1, 0x54, 0x27, 0xdc, 0xaf, 0x3a, 0x49, 0x0d, 0x7e, + 0xeb, 0x98, 0xc6, 0xb5, 0x20, 0x53, 0x17, 0x64, 0xf1, 0x82, + 0x79, 0x0a, 0x9f, 0xec, 0xa8, 0xdb, 0x4e, 0x3d, 0xa5, 0xd6, + 0x43, 0x30, 0x74, 0x07, 0x92, 0xe1, 0x1a, 0x69, 0xfc, 0x8f, + 0xcb, 0xb8, 0x2d, 0x5e, 0x91, 0xe2, 0x77, 0x04, 0x40, 0x33, + 0xa6, 0xd5, 0x2e, 0x5d, 0xc8, 0xbb, 0xff, 0x8c, 0x19, 0x6a, + 0xf2, 0x81, 0x14, 0x67, 0x23, 0x50, 0xc5, 0xb6, 0x4d, 0x3e, + 0xab, 0xd8, 0x9c, 0xef, 0x7a, 0x09, 0x57, 0x24, 0xb1, 0xc2, + 0x86, 0xf5, 0x60, 0x13, 0xe8, 0x9b, 0x0e, 0x7d, 0x39, 0x4a, + 0xdf, 0xac, 0x34, 0x47, 0xd2, 0xa1, 0xe5, 0x96, 0x03, 0x70, + 0x8b, 0xf8, 0x6d, 0x1e, 0x5a, 0x29, 0xbc, 0xcf, 0x3f, 0x4c, + 0xd9, 0xaa, 0xee, 0x9d, 0x08, 0x7b, 0x80, 0xf3, 0x66, 0x15, + 0x51, 0x22, 0xb7, 0xc4, 0x5c, 0x2f, 0xba, 0xc9, 0x8d, 0xfe, + 0x6b, 0x18, 0xe3, 0x90, 0x05, 0x76, 0x32, 0x41, 0xd4, 0xa7, + 0xf9, 0x8a, 0x1f, 0x6c, 0x28, 0x5b, 0xce, 0xbd, 0x46, 0x35, + 0xa0, 0xd3, 0x97, 0xe4, 0x71, 0x02, 0x9a, 0xe9, 0x7c, 0x0f, + 0x4b, 0x38, 0xad, 0xde, 0x25, 0x56, 0xc3, 0xb0, 0xf4, 0x87, + 0x12, 0x61, 0xae, 0xdd, 0x48, 0x3b, 0x7f, 0x0c, 0x99, 0xea, + 0x11, 0x62, 0xf7, 0x84, 0xc0, 0xb3, 0x26, 0x55, 0xcd, 0xbe, + 0x2b, 0x58, 0x1c, 0x6f, 0xfa, 0x89, 0x72, 0x01, 0x94, 0xe7, + 0xa3, 0xd0, 0x45, 0x36, 0x68, 0x1b, 0x8e, 0xfd, 0xb9, 0xca, + 0x5f, 0x2c, 0xd7, 0xa4, 0x31, 0x42, 0x06, 0x75, 0xe0, 0x93, + 0x0b, 0x78, 0xed, 0x9e, 0xda, 0xa9, 0x3c, 0x4f, 0xb4, 0xc7, + 0x52, 0x21, 0x65, 0x16, 0x83, 0xf0, 0x00, 0x74, 0xe8, 0x9c, + 0xcd, 0xb9, 0x25, 0x51, 0x87, 0xf3, 0x6f, 0x1b, 0x4a, 0x3e, + 0xa2, 0xd6, 0x13, 0x67, 0xfb, 0x8f, 0xde, 0xaa, 0x36, 0x42, + 0x94, 0xe0, 0x7c, 0x08, 0x59, 0x2d, 0xb1, 0xc5, 0x26, 0x52, + 0xce, 0xba, 0xeb, 0x9f, 0x03, 0x77, 0xa1, 0xd5, 0x49, 0x3d, + 0x6c, 0x18, 0x84, 0xf0, 0x35, 0x41, 0xdd, 0xa9, 0xf8, 0x8c, + 0x10, 0x64, 0xb2, 0xc6, 0x5a, 0x2e, 0x7f, 0x0b, 0x97, 0xe3, + 0x4c, 0x38, 0xa4, 0xd0, 0x81, 0xf5, 0x69, 0x1d, 0xcb, 0xbf, + 0x23, 0x57, 0x06, 0x72, 0xee, 0x9a, 0x5f, 0x2b, 0xb7, 0xc3, + 0x92, 0xe6, 0x7a, 0x0e, 0xd8, 0xac, 0x30, 0x44, 0x15, 0x61, + 0xfd, 0x89, 0x6a, 0x1e, 0x82, 0xf6, 0xa7, 0xd3, 0x4f, 0x3b, + 0xed, 0x99, 0x05, 0x71, 0x20, 0x54, 0xc8, 0xbc, 0x79, 0x0d, + 0x91, 0xe5, 0xb4, 0xc0, 0x5c, 0x28, 0xfe, 0x8a, 0x16, 0x62, + 0x33, 0x47, 0xdb, 0xaf, 0x98, 0xec, 0x70, 0x04, 0x55, 0x21, + 0xbd, 0xc9, 0x1f, 0x6b, 0xf7, 0x83, 0xd2, 0xa6, 0x3a, 0x4e, + 0x8b, 0xff, 0x63, 0x17, 0x46, 0x32, 0xae, 0xda, 0x0c, 0x78, + 0xe4, 0x90, 0xc1, 0xb5, 0x29, 0x5d, 0xbe, 0xca, 0x56, 0x22, + 0x73, 0x07, 0x9b, 0xef, 0x39, 0x4d, 0xd1, 0xa5, 0xf4, 0x80, + 0x1c, 0x68, 0xad, 0xd9, 0x45, 0x31, 0x60, 0x14, 0x88, 0xfc, + 0x2a, 0x5e, 0xc2, 0xb6, 0xe7, 0x93, 0x0f, 0x7b, 0xd4, 0xa0, + 0x3c, 0x48, 0x19, 0x6d, 0xf1, 0x85, 0x53, 0x27, 0xbb, 0xcf, + 0x9e, 0xea, 0x76, 0x02, 0xc7, 0xb3, 0x2f, 0x5b, 0x0a, 0x7e, + 0xe2, 0x96, 0x40, 0x34, 0xa8, 0xdc, 0x8d, 0xf9, 0x65, 0x11, + 0xf2, 0x86, 0x1a, 0x6e, 0x3f, 0x4b, 0xd7, 0xa3, 0x75, 0x01, + 0x9d, 0xe9, 0xb8, 0xcc, 0x50, 0x24, 0xe1, 0x95, 0x09, 0x7d, + 0x2c, 0x58, 0xc4, 0xb0, 0x66, 0x12, 0x8e, 0xfa, 0xab, 0xdf, + 0x43, 0x37, 0x00, 0x75, 0xea, 0x9f, 0xc9, 0xbc, 0x23, 0x56, + 0x8f, 0xfa, 0x65, 0x10, 0x46, 0x33, 0xac, 0xd9, 0x03, 0x76, + 0xe9, 0x9c, 0xca, 0xbf, 0x20, 0x55, 0x8c, 0xf9, 0x66, 0x13, + 0x45, 0x30, 0xaf, 0xda, 0x06, 0x73, 0xec, 0x99, 0xcf, 0xba, + 0x25, 0x50, 0x89, 0xfc, 0x63, 0x16, 0x40, 0x35, 0xaa, 0xdf, + 0x05, 0x70, 0xef, 0x9a, 0xcc, 0xb9, 0x26, 0x53, 0x8a, 0xff, + 0x60, 0x15, 0x43, 0x36, 0xa9, 0xdc, 0x0c, 0x79, 0xe6, 0x93, + 0xc5, 0xb0, 0x2f, 0x5a, 0x83, 0xf6, 0x69, 0x1c, 0x4a, 0x3f, + 0xa0, 0xd5, 0x0f, 0x7a, 0xe5, 0x90, 0xc6, 0xb3, 0x2c, 0x59, + 0x80, 0xf5, 0x6a, 0x1f, 0x49, 0x3c, 0xa3, 0xd6, 0x0a, 0x7f, + 0xe0, 0x95, 0xc3, 0xb6, 0x29, 0x5c, 0x85, 0xf0, 0x6f, 0x1a, + 0x4c, 0x39, 0xa6, 0xd3, 0x09, 0x7c, 0xe3, 0x96, 0xc0, 0xb5, + 0x2a, 0x5f, 0x86, 0xf3, 0x6c, 0x19, 0x4f, 0x3a, 0xa5, 0xd0, + 0x18, 0x6d, 0xf2, 0x87, 0xd1, 0xa4, 0x3b, 0x4e, 0x97, 0xe2, + 0x7d, 0x08, 0x5e, 0x2b, 0xb4, 0xc1, 0x1b, 0x6e, 0xf1, 0x84, + 0xd2, 0xa7, 0x38, 0x4d, 0x94, 0xe1, 0x7e, 0x0b, 0x5d, 0x28, + 0xb7, 0xc2, 0x1e, 0x6b, 0xf4, 0x81, 0xd7, 0xa2, 0x3d, 0x48, + 0x91, 0xe4, 0x7b, 0x0e, 0x58, 0x2d, 0xb2, 0xc7, 0x1d, 0x68, + 0xf7, 0x82, 0xd4, 0xa1, 0x3e, 0x4b, 0x92, 0xe7, 0x78, 0x0d, + 0x5b, 0x2e, 0xb1, 0xc4, 0x14, 0x61, 0xfe, 0x8b, 0xdd, 0xa8, + 0x37, 0x42, 0x9b, 0xee, 0x71, 0x04, 0x52, 0x27, 0xb8, 0xcd, + 0x17, 0x62, 0xfd, 0x88, 0xde, 0xab, 0x34, 0x41, 0x98, 0xed, + 0x72, 0x07, 0x51, 0x24, 0xbb, 0xce, 0x12, 0x67, 0xf8, 0x8d, + 0xdb, 0xae, 0x31, 0x44, 0x9d, 0xe8, 0x77, 0x02, 0x54, 0x21, + 0xbe, 0xcb, 0x11, 0x64, 0xfb, 0x8e, 0xd8, 0xad, 0x32, 0x47, + 0x9e, 0xeb, 0x74, 0x01, 0x57, 0x22, 0xbd, 0xc8, 0x00, 0x76, + 0xec, 0x9a, 0xc5, 0xb3, 0x29, 0x5f, 0x97, 0xe1, 0x7b, 0x0d, + 0x52, 0x24, 0xbe, 0xc8, 0x33, 0x45, 0xdf, 0xa9, 0xf6, 0x80, + 0x1a, 0x6c, 0xa4, 0xd2, 0x48, 0x3e, 0x61, 0x17, 0x8d, 0xfb, + 0x66, 0x10, 0x8a, 0xfc, 0xa3, 0xd5, 0x4f, 0x39, 0xf1, 0x87, + 0x1d, 0x6b, 0x34, 0x42, 0xd8, 0xae, 0x55, 0x23, 0xb9, 0xcf, + 0x90, 0xe6, 0x7c, 0x0a, 0xc2, 0xb4, 0x2e, 0x58, 0x07, 0x71, + 0xeb, 0x9d, 0xcc, 0xba, 0x20, 0x56, 0x09, 0x7f, 0xe5, 0x93, + 0x5b, 0x2d, 0xb7, 0xc1, 0x9e, 0xe8, 0x72, 0x04, 0xff, 0x89, + 0x13, 0x65, 0x3a, 0x4c, 0xd6, 0xa0, 0x68, 0x1e, 0x84, 0xf2, + 0xad, 0xdb, 0x41, 0x37, 0xaa, 0xdc, 0x46, 0x30, 0x6f, 0x19, + 0x83, 0xf5, 0x3d, 0x4b, 0xd1, 0xa7, 0xf8, 0x8e, 0x14, 0x62, + 0x99, 0xef, 0x75, 0x03, 0x5c, 0x2a, 0xb0, 0xc6, 0x0e, 0x78, + 0xe2, 0x94, 0xcb, 0xbd, 0x27, 0x51, 0x85, 0xf3, 0x69, 0x1f, + 0x40, 0x36, 0xac, 0xda, 0x12, 0x64, 0xfe, 0x88, 0xd7, 0xa1, + 0x3b, 0x4d, 0xb6, 0xc0, 0x5a, 0x2c, 0x73, 0x05, 0x9f, 0xe9, + 0x21, 0x57, 0xcd, 0xbb, 0xe4, 0x92, 0x08, 0x7e, 0xe3, 0x95, + 0x0f, 0x79, 0x26, 0x50, 0xca, 0xbc, 0x74, 0x02, 0x98, 0xee, + 0xb1, 0xc7, 0x5d, 0x2b, 0xd0, 0xa6, 0x3c, 0x4a, 0x15, 0x63, + 0xf9, 0x8f, 0x47, 0x31, 0xab, 0xdd, 0x82, 0xf4, 0x6e, 0x18, + 0x49, 0x3f, 0xa5, 0xd3, 0x8c, 0xfa, 0x60, 0x16, 0xde, 0xa8, + 0x32, 0x44, 0x1b, 0x6d, 0xf7, 0x81, 0x7a, 0x0c, 0x96, 0xe0, + 0xbf, 0xc9, 0x53, 0x25, 0xed, 0x9b, 0x01, 0x77, 0x28, 0x5e, + 0xc4, 0xb2, 0x2f, 0x59, 0xc3, 0xb5, 0xea, 0x9c, 0x06, 0x70, + 0xb8, 0xce, 0x54, 0x22, 0x7d, 0x0b, 0x91, 0xe7, 0x1c, 0x6a, + 0xf0, 0x86, 0xd9, 0xaf, 0x35, 0x43, 0x8b, 0xfd, 0x67, 0x11, + 0x4e, 0x38, 0xa2, 0xd4, 0x00, 0x77, 0xee, 0x99, 0xc1, 0xb6, + 0x2f, 0x58, 0x9f, 0xe8, 0x71, 0x06, 0x5e, 0x29, 0xb0, 0xc7, + 0x23, 0x54, 0xcd, 0xba, 0xe2, 0x95, 0x0c, 0x7b, 0xbc, 0xcb, + 0x52, 0x25, 0x7d, 0x0a, 0x93, 0xe4, 0x46, 0x31, 0xa8, 0xdf, + 0x87, 0xf0, 0x69, 0x1e, 0xd9, 0xae, 0x37, 0x40, 0x18, 0x6f, + 0xf6, 0x81, 0x65, 0x12, 0x8b, 0xfc, 0xa4, 0xd3, 0x4a, 0x3d, + 0xfa, 0x8d, 0x14, 0x63, 0x3b, 0x4c, 0xd5, 0xa2, 0x8c, 0xfb, + 0x62, 0x15, 0x4d, 0x3a, 0xa3, 0xd4, 0x13, 0x64, 0xfd, 0x8a, + 0xd2, 0xa5, 0x3c, 0x4b, 0xaf, 0xd8, 0x41, 0x36, 0x6e, 0x19, + 0x80, 0xf7, 0x30, 0x47, 0xde, 0xa9, 0xf1, 0x86, 0x1f, 0x68, + 0xca, 0xbd, 0x24, 0x53, 0x0b, 0x7c, 0xe5, 0x92, 0x55, 0x22, + 0xbb, 0xcc, 0x94, 0xe3, 0x7a, 0x0d, 0xe9, 0x9e, 0x07, 0x70, + 0x28, 0x5f, 0xc6, 0xb1, 0x76, 0x01, 0x98, 0xef, 0xb7, 0xc0, + 0x59, 0x2e, 0x05, 0x72, 0xeb, 0x9c, 0xc4, 0xb3, 0x2a, 0x5d, + 0x9a, 0xed, 0x74, 0x03, 0x5b, 0x2c, 0xb5, 0xc2, 0x26, 0x51, + 0xc8, 0xbf, 0xe7, 0x90, 0x09, 0x7e, 0xb9, 0xce, 0x57, 0x20, + 0x78, 0x0f, 0x96, 0xe1, 0x43, 0x34, 0xad, 0xda, 0x82, 0xf5, + 0x6c, 0x1b, 0xdc, 0xab, 0x32, 0x45, 0x1d, 0x6a, 0xf3, 0x84, + 0x60, 0x17, 0x8e, 0xf9, 0xa1, 0xd6, 0x4f, 0x38, 0xff, 0x88, + 0x11, 0x66, 0x3e, 0x49, 0xd0, 0xa7, 0x89, 0xfe, 0x67, 0x10, + 0x48, 0x3f, 0xa6, 0xd1, 0x16, 0x61, 0xf8, 0x8f, 0xd7, 0xa0, + 0x39, 0x4e, 0xaa, 0xdd, 0x44, 0x33, 0x6b, 0x1c, 0x85, 0xf2, + 0x35, 0x42, 0xdb, 0xac, 0xf4, 0x83, 0x1a, 0x6d, 0xcf, 0xb8, + 0x21, 0x56, 0x0e, 0x79, 0xe0, 0x97, 0x50, 0x27, 0xbe, 0xc9, + 0x91, 0xe6, 0x7f, 0x08, 0xec, 0x9b, 0x02, 0x75, 0x2d, 0x5a, + 0xc3, 0xb4, 0x73, 0x04, 0x9d, 0xea, 0xb2, 0xc5, 0x5c, 0x2b, + 0x00, 0x78, 0xf0, 0x88, 0xfd, 0x85, 0x0d, 0x75, 0xe7, 0x9f, + 0x17, 0x6f, 0x1a, 0x62, 0xea, 0x92, 0xd3, 0xab, 0x23, 0x5b, + 0x2e, 0x56, 0xde, 0xa6, 0x34, 0x4c, 0xc4, 0xbc, 0xc9, 0xb1, + 0x39, 0x41, 0xbb, 0xc3, 0x4b, 0x33, 0x46, 0x3e, 0xb6, 0xce, + 0x5c, 0x24, 0xac, 0xd4, 0xa1, 0xd9, 0x51, 0x29, 0x68, 0x10, + 0x98, 0xe0, 0x95, 0xed, 0x65, 0x1d, 0x8f, 0xf7, 0x7f, 0x07, + 0x72, 0x0a, 0x82, 0xfa, 0x6b, 0x13, 0x9b, 0xe3, 0x96, 0xee, + 0x66, 0x1e, 0x8c, 0xf4, 0x7c, 0x04, 0x71, 0x09, 0x81, 0xf9, + 0xb8, 0xc0, 0x48, 0x30, 0x45, 0x3d, 0xb5, 0xcd, 0x5f, 0x27, + 0xaf, 0xd7, 0xa2, 0xda, 0x52, 0x2a, 0xd0, 0xa8, 0x20, 0x58, + 0x2d, 0x55, 0xdd, 0xa5, 0x37, 0x4f, 0xc7, 0xbf, 0xca, 0xb2, + 0x3a, 0x42, 0x03, 0x7b, 0xf3, 0x8b, 0xfe, 0x86, 0x0e, 0x76, + 0xe4, 0x9c, 0x14, 0x6c, 0x19, 0x61, 0xe9, 0x91, 0xd6, 0xae, + 0x26, 0x5e, 0x2b, 0x53, 0xdb, 0xa3, 0x31, 0x49, 0xc1, 0xb9, + 0xcc, 0xb4, 0x3c, 0x44, 0x05, 0x7d, 0xf5, 0x8d, 0xf8, 0x80, + 0x08, 0x70, 0xe2, 0x9a, 0x12, 0x6a, 0x1f, 0x67, 0xef, 0x97, + 0x6d, 0x15, 0x9d, 0xe5, 0x90, 0xe8, 0x60, 0x18, 0x8a, 0xf2, + 0x7a, 0x02, 0x77, 0x0f, 0x87, 0xff, 0xbe, 0xc6, 0x4e, 0x36, + 0x43, 0x3b, 0xb3, 0xcb, 0x59, 0x21, 0xa9, 0xd1, 0xa4, 0xdc, + 0x54, 0x2c, 0xbd, 0xc5, 0x4d, 0x35, 0x40, 0x38, 0xb0, 0xc8, + 0x5a, 0x22, 0xaa, 0xd2, 0xa7, 0xdf, 0x57, 0x2f, 0x6e, 0x16, + 0x9e, 0xe6, 0x93, 0xeb, 0x63, 0x1b, 0x89, 0xf1, 0x79, 0x01, + 0x74, 0x0c, 0x84, 0xfc, 0x06, 0x7e, 0xf6, 0x8e, 0xfb, 0x83, + 0x0b, 0x73, 0xe1, 0x99, 0x11, 0x69, 0x1c, 0x64, 0xec, 0x94, + 0xd5, 0xad, 0x25, 0x5d, 0x28, 0x50, 0xd8, 0xa0, 0x32, 0x4a, + 0xc2, 0xba, 0xcf, 0xb7, 0x3f, 0x47, 0x00, 0x79, 0xf2, 0x8b, + 0xf9, 0x80, 0x0b, 0x72, 0xef, 0x96, 0x1d, 0x64, 0x16, 0x6f, + 0xe4, 0x9d, 0xc3, 0xba, 0x31, 0x48, 0x3a, 0x43, 0xc8, 0xb1, + 0x2c, 0x55, 0xde, 0xa7, 0xd5, 0xac, 0x27, 0x5e, 0x9b, 0xe2, + 0x69, 0x10, 0x62, 0x1b, 0x90, 0xe9, 0x74, 0x0d, 0x86, 0xff, + 0x8d, 0xf4, 0x7f, 0x06, 0x58, 0x21, 0xaa, 0xd3, 0xa1, 0xd8, + 0x53, 0x2a, 0xb7, 0xce, 0x45, 0x3c, 0x4e, 0x37, 0xbc, 0xc5, + 0x2b, 0x52, 0xd9, 0xa0, 0xd2, 0xab, 0x20, 0x59, 0xc4, 0xbd, + 0x36, 0x4f, 0x3d, 0x44, 0xcf, 0xb6, 0xe8, 0x91, 0x1a, 0x63, + 0x11, 0x68, 0xe3, 0x9a, 0x07, 0x7e, 0xf5, 0x8c, 0xfe, 0x87, + 0x0c, 0x75, 0xb0, 0xc9, 0x42, 0x3b, 0x49, 0x30, 0xbb, 0xc2, + 0x5f, 0x26, 0xad, 0xd4, 0xa6, 0xdf, 0x54, 0x2d, 0x73, 0x0a, + 0x81, 0xf8, 0x8a, 0xf3, 0x78, 0x01, 0x9c, 0xe5, 0x6e, 0x17, + 0x65, 0x1c, 0x97, 0xee, 0x56, 0x2f, 0xa4, 0xdd, 0xaf, 0xd6, + 0x5d, 0x24, 0xb9, 0xc0, 0x4b, 0x32, 0x40, 0x39, 0xb2, 0xcb, + 0x95, 0xec, 0x67, 0x1e, 0x6c, 0x15, 0x9e, 0xe7, 0x7a, 0x03, + 0x88, 0xf1, 0x83, 0xfa, 0x71, 0x08, 0xcd, 0xb4, 0x3f, 0x46, + 0x34, 0x4d, 0xc6, 0xbf, 0x22, 0x5b, 0xd0, 0xa9, 0xdb, 0xa2, + 0x29, 0x50, 0x0e, 0x77, 0xfc, 0x85, 0xf7, 0x8e, 0x05, 0x7c, + 0xe1, 0x98, 0x13, 0x6a, 0x18, 0x61, 0xea, 0x93, 0x7d, 0x04, + 0x8f, 0xf6, 0x84, 0xfd, 0x76, 0x0f, 0x92, 0xeb, 0x60, 0x19, + 0x6b, 0x12, 0x99, 0xe0, 0xbe, 0xc7, 0x4c, 0x35, 0x47, 0x3e, + 0xb5, 0xcc, 0x51, 0x28, 0xa3, 0xda, 0xa8, 0xd1, 0x5a, 0x23, + 0xe6, 0x9f, 0x14, 0x6d, 0x1f, 0x66, 0xed, 0x94, 0x09, 0x70, + 0xfb, 0x82, 0xf0, 0x89, 0x02, 0x7b, 0x25, 0x5c, 0xd7, 0xae, + 0xdc, 0xa5, 0x2e, 0x57, 0xca, 0xb3, 0x38, 0x41, 0x33, 0x4a, + 0xc1, 0xb8, 0x00, 0x7a, 0xf4, 0x8e, 0xf5, 0x8f, 0x01, 0x7b, + 0xf7, 0x8d, 0x03, 0x79, 0x02, 0x78, 0xf6, 0x8c, 0xf3, 0x89, + 0x07, 0x7d, 0x06, 0x7c, 0xf2, 0x88, 0x04, 0x7e, 0xf0, 0x8a, + 0xf1, 0x8b, 0x05, 0x7f, 0xfb, 0x81, 0x0f, 0x75, 0x0e, 0x74, + 0xfa, 0x80, 0x0c, 0x76, 0xf8, 0x82, 0xf9, 0x83, 0x0d, 0x77, + 0x08, 0x72, 0xfc, 0x86, 0xfd, 0x87, 0x09, 0x73, 0xff, 0x85, + 0x0b, 0x71, 0x0a, 0x70, 0xfe, 0x84, 0xeb, 0x91, 0x1f, 0x65, + 0x1e, 0x64, 0xea, 0x90, 0x1c, 0x66, 0xe8, 0x92, 0xe9, 0x93, + 0x1d, 0x67, 0x18, 0x62, 0xec, 0x96, 0xed, 0x97, 0x19, 0x63, + 0xef, 0x95, 0x1b, 0x61, 0x1a, 0x60, 0xee, 0x94, 0x10, 0x6a, + 0xe4, 0x9e, 0xe5, 0x9f, 0x11, 0x6b, 0xe7, 0x9d, 0x13, 0x69, + 0x12, 0x68, 0xe6, 0x9c, 0xe3, 0x99, 0x17, 0x6d, 0x16, 0x6c, + 0xe2, 0x98, 0x14, 0x6e, 0xe0, 0x9a, 0xe1, 0x9b, 0x15, 0x6f, + 0xcb, 0xb1, 0x3f, 0x45, 0x3e, 0x44, 0xca, 0xb0, 0x3c, 0x46, + 0xc8, 0xb2, 0xc9, 0xb3, 0x3d, 0x47, 0x38, 0x42, 0xcc, 0xb6, + 0xcd, 0xb7, 0x39, 0x43, 0xcf, 0xb5, 0x3b, 0x41, 0x3a, 0x40, + 0xce, 0xb4, 0x30, 0x4a, 0xc4, 0xbe, 0xc5, 0xbf, 0x31, 0x4b, + 0xc7, 0xbd, 0x33, 0x49, 0x32, 0x48, 0xc6, 0xbc, 0xc3, 0xb9, + 0x37, 0x4d, 0x36, 0x4c, 0xc2, 0xb8, 0x34, 0x4e, 0xc0, 0xba, + 0xc1, 0xbb, 0x35, 0x4f, 0x20, 0x5a, 0xd4, 0xae, 0xd5, 0xaf, + 0x21, 0x5b, 0xd7, 0xad, 0x23, 0x59, 0x22, 0x58, 0xd6, 0xac, + 0xd3, 0xa9, 0x27, 0x5d, 0x26, 0x5c, 0xd2, 0xa8, 0x24, 0x5e, + 0xd0, 0xaa, 0xd1, 0xab, 0x25, 0x5f, 0xdb, 0xa1, 0x2f, 0x55, + 0x2e, 0x54, 0xda, 0xa0, 0x2c, 0x56, 0xd8, 0xa2, 0xd9, 0xa3, + 0x2d, 0x57, 0x28, 0x52, 0xdc, 0xa6, 0xdd, 0xa7, 0x29, 0x53, + 0xdf, 0xa5, 0x2b, 0x51, 0x2a, 0x50, 0xde, 0xa4, 0x00, 0x7b, + 0xf6, 0x8d, 0xf1, 0x8a, 0x07, 0x7c, 0xff, 0x84, 0x09, 0x72, + 0x0e, 0x75, 0xf8, 0x83, 0xe3, 0x98, 0x15, 0x6e, 0x12, 0x69, + 0xe4, 0x9f, 0x1c, 0x67, 0xea, 0x91, 0xed, 0x96, 0x1b, 0x60, + 0xdb, 0xa0, 0x2d, 0x56, 0x2a, 0x51, 0xdc, 0xa7, 0x24, 0x5f, + 0xd2, 0xa9, 0xd5, 0xae, 0x23, 0x58, 0x38, 0x43, 0xce, 0xb5, + 0xc9, 0xb2, 0x3f, 0x44, 0xc7, 0xbc, 0x31, 0x4a, 0x36, 0x4d, + 0xc0, 0xbb, 0xab, 0xd0, 0x5d, 0x26, 0x5a, 0x21, 0xac, 0xd7, + 0x54, 0x2f, 0xa2, 0xd9, 0xa5, 0xde, 0x53, 0x28, 0x48, 0x33, + 0xbe, 0xc5, 0xb9, 0xc2, 0x4f, 0x34, 0xb7, 0xcc, 0x41, 0x3a, + 0x46, 0x3d, 0xb0, 0xcb, 0x70, 0x0b, 0x86, 0xfd, 0x81, 0xfa, + 0x77, 0x0c, 0x8f, 0xf4, 0x79, 0x02, 0x7e, 0x05, 0x88, 0xf3, + 0x93, 0xe8, 0x65, 0x1e, 0x62, 0x19, 0x94, 0xef, 0x6c, 0x17, + 0x9a, 0xe1, 0x9d, 0xe6, 0x6b, 0x10, 0x4b, 0x30, 0xbd, 0xc6, + 0xba, 0xc1, 0x4c, 0x37, 0xb4, 0xcf, 0x42, 0x39, 0x45, 0x3e, + 0xb3, 0xc8, 0xa8, 0xd3, 0x5e, 0x25, 0x59, 0x22, 0xaf, 0xd4, + 0x57, 0x2c, 0xa1, 0xda, 0xa6, 0xdd, 0x50, 0x2b, 0x90, 0xeb, + 0x66, 0x1d, 0x61, 0x1a, 0x97, 0xec, 0x6f, 0x14, 0x99, 0xe2, + 0x9e, 0xe5, 0x68, 0x13, 0x73, 0x08, 0x85, 0xfe, 0x82, 0xf9, + 0x74, 0x0f, 0x8c, 0xf7, 0x7a, 0x01, 0x7d, 0x06, 0x8b, 0xf0, + 0xe0, 0x9b, 0x16, 0x6d, 0x11, 0x6a, 0xe7, 0x9c, 0x1f, 0x64, + 0xe9, 0x92, 0xee, 0x95, 0x18, 0x63, 0x03, 0x78, 0xf5, 0x8e, + 0xf2, 0x89, 0x04, 0x7f, 0xfc, 0x87, 0x0a, 0x71, 0x0d, 0x76, + 0xfb, 0x80, 0x3b, 0x40, 0xcd, 0xb6, 0xca, 0xb1, 0x3c, 0x47, + 0xc4, 0xbf, 0x32, 0x49, 0x35, 0x4e, 0xc3, 0xb8, 0xd8, 0xa3, + 0x2e, 0x55, 0x29, 0x52, 0xdf, 0xa4, 0x27, 0x5c, 0xd1, 0xaa, + 0xd6, 0xad, 0x20, 0x5b, 0x00, 0x7c, 0xf8, 0x84, 0xed, 0x91, + 0x15, 0x69, 0xc7, 0xbb, 0x3f, 0x43, 0x2a, 0x56, 0xd2, 0xae, + 0x93, 0xef, 0x6b, 0x17, 0x7e, 0x02, 0x86, 0xfa, 0x54, 0x28, + 0xac, 0xd0, 0xb9, 0xc5, 0x41, 0x3d, 0x3b, 0x47, 0xc3, 0xbf, + 0xd6, 0xaa, 0x2e, 0x52, 0xfc, 0x80, 0x04, 0x78, 0x11, 0x6d, + 0xe9, 0x95, 0xa8, 0xd4, 0x50, 0x2c, 0x45, 0x39, 0xbd, 0xc1, + 0x6f, 0x13, 0x97, 0xeb, 0x82, 0xfe, 0x7a, 0x06, 0x76, 0x0a, + 0x8e, 0xf2, 0x9b, 0xe7, 0x63, 0x1f, 0xb1, 0xcd, 0x49, 0x35, + 0x5c, 0x20, 0xa4, 0xd8, 0xe5, 0x99, 0x1d, 0x61, 0x08, 0x74, + 0xf0, 0x8c, 0x22, 0x5e, 0xda, 0xa6, 0xcf, 0xb3, 0x37, 0x4b, + 0x4d, 0x31, 0xb5, 0xc9, 0xa0, 0xdc, 0x58, 0x24, 0x8a, 0xf6, + 0x72, 0x0e, 0x67, 0x1b, 0x9f, 0xe3, 0xde, 0xa2, 0x26, 0x5a, + 0x33, 0x4f, 0xcb, 0xb7, 0x19, 0x65, 0xe1, 0x9d, 0xf4, 0x88, + 0x0c, 0x70, 0xec, 0x90, 0x14, 0x68, 0x01, 0x7d, 0xf9, 0x85, + 0x2b, 0x57, 0xd3, 0xaf, 0xc6, 0xba, 0x3e, 0x42, 0x7f, 0x03, + 0x87, 0xfb, 0x92, 0xee, 0x6a, 0x16, 0xb8, 0xc4, 0x40, 0x3c, + 0x55, 0x29, 0xad, 0xd1, 0xd7, 0xab, 0x2f, 0x53, 0x3a, 0x46, + 0xc2, 0xbe, 0x10, 0x6c, 0xe8, 0x94, 0xfd, 0x81, 0x05, 0x79, + 0x44, 0x38, 0xbc, 0xc0, 0xa9, 0xd5, 0x51, 0x2d, 0x83, 0xff, + 0x7b, 0x07, 0x6e, 0x12, 0x96, 0xea, 0x9a, 0xe6, 0x62, 0x1e, + 0x77, 0x0b, 0x8f, 0xf3, 0x5d, 0x21, 0xa5, 0xd9, 0xb0, 0xcc, + 0x48, 0x34, 0x09, 0x75, 0xf1, 0x8d, 0xe4, 0x98, 0x1c, 0x60, + 0xce, 0xb2, 0x36, 0x4a, 0x23, 0x5f, 0xdb, 0xa7, 0xa1, 0xdd, + 0x59, 0x25, 0x4c, 0x30, 0xb4, 0xc8, 0x66, 0x1a, 0x9e, 0xe2, + 0x8b, 0xf7, 0x73, 0x0f, 0x32, 0x4e, 0xca, 0xb6, 0xdf, 0xa3, + 0x27, 0x5b, 0xf5, 0x89, 0x0d, 0x71, 0x18, 0x64, 0xe0, 0x9c, + 0x00, 0x7d, 0xfa, 0x87, 0xe9, 0x94, 0x13, 0x6e, 0xcf, 0xb2, + 0x35, 0x48, 0x26, 0x5b, 0xdc, 0xa1, 0x83, 0xfe, 0x79, 0x04, + 0x6a, 0x17, 0x90, 0xed, 0x4c, 0x31, 0xb6, 0xcb, 0xa5, 0xd8, + 0x5f, 0x22, 0x1b, 0x66, 0xe1, 0x9c, 0xf2, 0x8f, 0x08, 0x75, + 0xd4, 0xa9, 0x2e, 0x53, 0x3d, 0x40, 0xc7, 0xba, 0x98, 0xe5, + 0x62, 0x1f, 0x71, 0x0c, 0x8b, 0xf6, 0x57, 0x2a, 0xad, 0xd0, + 0xbe, 0xc3, 0x44, 0x39, 0x36, 0x4b, 0xcc, 0xb1, 0xdf, 0xa2, + 0x25, 0x58, 0xf9, 0x84, 0x03, 0x7e, 0x10, 0x6d, 0xea, 0x97, + 0xb5, 0xc8, 0x4f, 0x32, 0x5c, 0x21, 0xa6, 0xdb, 0x7a, 0x07, + 0x80, 0xfd, 0x93, 0xee, 0x69, 0x14, 0x2d, 0x50, 0xd7, 0xaa, + 0xc4, 0xb9, 0x3e, 0x43, 0xe2, 0x9f, 0x18, 0x65, 0x0b, 0x76, + 0xf1, 0x8c, 0xae, 0xd3, 0x54, 0x29, 0x47, 0x3a, 0xbd, 0xc0, + 0x61, 0x1c, 0x9b, 0xe6, 0x88, 0xf5, 0x72, 0x0f, 0x6c, 0x11, + 0x96, 0xeb, 0x85, 0xf8, 0x7f, 0x02, 0xa3, 0xde, 0x59, 0x24, + 0x4a, 0x37, 0xb0, 0xcd, 0xef, 0x92, 0x15, 0x68, 0x06, 0x7b, + 0xfc, 0x81, 0x20, 0x5d, 0xda, 0xa7, 0xc9, 0xb4, 0x33, 0x4e, + 0x77, 0x0a, 0x8d, 0xf0, 0x9e, 0xe3, 0x64, 0x19, 0xb8, 0xc5, + 0x42, 0x3f, 0x51, 0x2c, 0xab, 0xd6, 0xf4, 0x89, 0x0e, 0x73, + 0x1d, 0x60, 0xe7, 0x9a, 0x3b, 0x46, 0xc1, 0xbc, 0xd2, 0xaf, + 0x28, 0x55, 0x5a, 0x27, 0xa0, 0xdd, 0xb3, 0xce, 0x49, 0x34, + 0x95, 0xe8, 0x6f, 0x12, 0x7c, 0x01, 0x86, 0xfb, 0xd9, 0xa4, + 0x23, 0x5e, 0x30, 0x4d, 0xca, 0xb7, 0x16, 0x6b, 0xec, 0x91, + 0xff, 0x82, 0x05, 0x78, 0x41, 0x3c, 0xbb, 0xc6, 0xa8, 0xd5, + 0x52, 0x2f, 0x8e, 0xf3, 0x74, 0x09, 0x67, 0x1a, 0x9d, 0xe0, + 0xc2, 0xbf, 0x38, 0x45, 0x2b, 0x56, 0xd1, 0xac, 0x0d, 0x70, + 0xf7, 0x8a, 0xe4, 0x99, 0x1e, 0x63, 0x00, 0x7e, 0xfc, 0x82, + 0xe5, 0x9b, 0x19, 0x67, 0xd7, 0xa9, 0x2b, 0x55, 0x32, 0x4c, + 0xce, 0xb0, 0xb3, 0xcd, 0x4f, 0x31, 0x56, 0x28, 0xaa, 0xd4, + 0x64, 0x1a, 0x98, 0xe6, 0x81, 0xff, 0x7d, 0x03, 0x7b, 0x05, + 0x87, 0xf9, 0x9e, 0xe0, 0x62, 0x1c, 0xac, 0xd2, 0x50, 0x2e, + 0x49, 0x37, 0xb5, 0xcb, 0xc8, 0xb6, 0x34, 0x4a, 0x2d, 0x53, + 0xd1, 0xaf, 0x1f, 0x61, 0xe3, 0x9d, 0xfa, 0x84, 0x06, 0x78, + 0xf6, 0x88, 0x0a, 0x74, 0x13, 0x6d, 0xef, 0x91, 0x21, 0x5f, + 0xdd, 0xa3, 0xc4, 0xba, 0x38, 0x46, 0x45, 0x3b, 0xb9, 0xc7, + 0xa0, 0xde, 0x5c, 0x22, 0x92, 0xec, 0x6e, 0x10, 0x77, 0x09, + 0x8b, 0xf5, 0x8d, 0xf3, 0x71, 0x0f, 0x68, 0x16, 0x94, 0xea, + 0x5a, 0x24, 0xa6, 0xd8, 0xbf, 0xc1, 0x43, 0x3d, 0x3e, 0x40, + 0xc2, 0xbc, 0xdb, 0xa5, 0x27, 0x59, 0xe9, 0x97, 0x15, 0x6b, + 0x0c, 0x72, 0xf0, 0x8e, 0xf1, 0x8f, 0x0d, 0x73, 0x14, 0x6a, + 0xe8, 0x96, 0x26, 0x58, 0xda, 0xa4, 0xc3, 0xbd, 0x3f, 0x41, + 0x42, 0x3c, 0xbe, 0xc0, 0xa7, 0xd9, 0x5b, 0x25, 0x95, 0xeb, + 0x69, 0x17, 0x70, 0x0e, 0x8c, 0xf2, 0x8a, 0xf4, 0x76, 0x08, + 0x6f, 0x11, 0x93, 0xed, 0x5d, 0x23, 0xa1, 0xdf, 0xb8, 0xc6, + 0x44, 0x3a, 0x39, 0x47, 0xc5, 0xbb, 0xdc, 0xa2, 0x20, 0x5e, + 0xee, 0x90, 0x12, 0x6c, 0x0b, 0x75, 0xf7, 0x89, 0x07, 0x79, + 0xfb, 0x85, 0xe2, 0x9c, 0x1e, 0x60, 0xd0, 0xae, 0x2c, 0x52, + 0x35, 0x4b, 0xc9, 0xb7, 0xb4, 0xca, 0x48, 0x36, 0x51, 0x2f, + 0xad, 0xd3, 0x63, 0x1d, 0x9f, 0xe1, 0x86, 0xf8, 0x7a, 0x04, + 0x7c, 0x02, 0x80, 0xfe, 0x99, 0xe7, 0x65, 0x1b, 0xab, 0xd5, + 0x57, 0x29, 0x4e, 0x30, 0xb2, 0xcc, 0xcf, 0xb1, 0x33, 0x4d, + 0x2a, 0x54, 0xd6, 0xa8, 0x18, 0x66, 0xe4, 0x9a, 0xfd, 0x83, + 0x01, 0x7f, 0x00, 0x7f, 0xfe, 0x81, 0xe1, 0x9e, 0x1f, 0x60, + 0xdf, 0xa0, 0x21, 0x5e, 0x3e, 0x41, 0xc0, 0xbf, 0xa3, 0xdc, + 0x5d, 0x22, 0x42, 0x3d, 0xbc, 0xc3, 0x7c, 0x03, 0x82, 0xfd, + 0x9d, 0xe2, 0x63, 0x1c, 0x5b, 0x24, 0xa5, 0xda, 0xba, 0xc5, + 0x44, 0x3b, 0x84, 0xfb, 0x7a, 0x05, 0x65, 0x1a, 0x9b, 0xe4, + 0xf8, 0x87, 0x06, 0x79, 0x19, 0x66, 0xe7, 0x98, 0x27, 0x58, + 0xd9, 0xa6, 0xc6, 0xb9, 0x38, 0x47, 0xb6, 0xc9, 0x48, 0x37, + 0x57, 0x28, 0xa9, 0xd6, 0x69, 0x16, 0x97, 0xe8, 0x88, 0xf7, + 0x76, 0x09, 0x15, 0x6a, 0xeb, 0x94, 0xf4, 0x8b, 0x0a, 0x75, + 0xca, 0xb5, 0x34, 0x4b, 0x2b, 0x54, 0xd5, 0xaa, 0xed, 0x92, + 0x13, 0x6c, 0x0c, 0x73, 0xf2, 0x8d, 0x32, 0x4d, 0xcc, 0xb3, + 0xd3, 0xac, 0x2d, 0x52, 0x4e, 0x31, 0xb0, 0xcf, 0xaf, 0xd0, + 0x51, 0x2e, 0x91, 0xee, 0x6f, 0x10, 0x70, 0x0f, 0x8e, 0xf1, + 0x71, 0x0e, 0x8f, 0xf0, 0x90, 0xef, 0x6e, 0x11, 0xae, 0xd1, + 0x50, 0x2f, 0x4f, 0x30, 0xb1, 0xce, 0xd2, 0xad, 0x2c, 0x53, + 0x33, 0x4c, 0xcd, 0xb2, 0x0d, 0x72, 0xf3, 0x8c, 0xec, 0x93, + 0x12, 0x6d, 0x2a, 0x55, 0xd4, 0xab, 0xcb, 0xb4, 0x35, 0x4a, + 0xf5, 0x8a, 0x0b, 0x74, 0x14, 0x6b, 0xea, 0x95, 0x89, 0xf6, + 0x77, 0x08, 0x68, 0x17, 0x96, 0xe9, 0x56, 0x29, 0xa8, 0xd7, + 0xb7, 0xc8, 0x49, 0x36, 0xc7, 0xb8, 0x39, 0x46, 0x26, 0x59, + 0xd8, 0xa7, 0x18, 0x67, 0xe6, 0x99, 0xf9, 0x86, 0x07, 0x78, + 0x64, 0x1b, 0x9a, 0xe5, 0x85, 0xfa, 0x7b, 0x04, 0xbb, 0xc4, + 0x45, 0x3a, 0x5a, 0x25, 0xa4, 0xdb, 0x9c, 0xe3, 0x62, 0x1d, + 0x7d, 0x02, 0x83, 0xfc, 0x43, 0x3c, 0xbd, 0xc2, 0xa2, 0xdd, + 0x5c, 0x23, 0x3f, 0x40, 0xc1, 0xbe, 0xde, 0xa1, 0x20, 0x5f, + 0xe0, 0x9f, 0x1e, 0x61, 0x01, 0x7e, 0xff, 0x80, 0x00, 0x80, + 0x1d, 0x9d, 0x3a, 0xba, 0x27, 0xa7, 0x74, 0xf4, 0x69, 0xe9, + 0x4e, 0xce, 0x53, 0xd3, 0xe8, 0x68, 0xf5, 0x75, 0xd2, 0x52, + 0xcf, 0x4f, 0x9c, 0x1c, 0x81, 0x01, 0xa6, 0x26, 0xbb, 0x3b, + 0xcd, 0x4d, 0xd0, 0x50, 0xf7, 0x77, 0xea, 0x6a, 0xb9, 0x39, + 0xa4, 0x24, 0x83, 0x03, 0x9e, 0x1e, 0x25, 0xa5, 0x38, 0xb8, + 0x1f, 0x9f, 0x02, 0x82, 0x51, 0xd1, 0x4c, 0xcc, 0x6b, 0xeb, + 0x76, 0xf6, 0x87, 0x07, 0x9a, 0x1a, 0xbd, 0x3d, 0xa0, 0x20, + 0xf3, 0x73, 0xee, 0x6e, 0xc9, 0x49, 0xd4, 0x54, 0x6f, 0xef, + 0x72, 0xf2, 0x55, 0xd5, 0x48, 0xc8, 0x1b, 0x9b, 0x06, 0x86, + 0x21, 0xa1, 0x3c, 0xbc, 0x4a, 0xca, 0x57, 0xd7, 0x70, 0xf0, + 0x6d, 0xed, 0x3e, 0xbe, 0x23, 0xa3, 0x04, 0x84, 0x19, 0x99, + 0xa2, 0x22, 0xbf, 0x3f, 0x98, 0x18, 0x85, 0x05, 0xd6, 0x56, + 0xcb, 0x4b, 0xec, 0x6c, 0xf1, 0x71, 0x13, 0x93, 0x0e, 0x8e, + 0x29, 0xa9, 0x34, 0xb4, 0x67, 0xe7, 0x7a, 0xfa, 0x5d, 0xdd, + 0x40, 0xc0, 0xfb, 0x7b, 0xe6, 0x66, 0xc1, 0x41, 0xdc, 0x5c, + 0x8f, 0x0f, 0x92, 0x12, 0xb5, 0x35, 0xa8, 0x28, 0xde, 0x5e, + 0xc3, 0x43, 0xe4, 0x64, 0xf9, 0x79, 0xaa, 0x2a, 0xb7, 0x37, + 0x90, 0x10, 0x8d, 0x0d, 0x36, 0xb6, 0x2b, 0xab, 0x0c, 0x8c, + 0x11, 0x91, 0x42, 0xc2, 0x5f, 0xdf, 0x78, 0xf8, 0x65, 0xe5, + 0x94, 0x14, 0x89, 0x09, 0xae, 0x2e, 0xb3, 0x33, 0xe0, 0x60, + 0xfd, 0x7d, 0xda, 0x5a, 0xc7, 0x47, 0x7c, 0xfc, 0x61, 0xe1, + 0x46, 0xc6, 0x5b, 0xdb, 0x08, 0x88, 0x15, 0x95, 0x32, 0xb2, + 0x2f, 0xaf, 0x59, 0xd9, 0x44, 0xc4, 0x63, 0xe3, 0x7e, 0xfe, + 0x2d, 0xad, 0x30, 0xb0, 0x17, 0x97, 0x0a, 0x8a, 0xb1, 0x31, + 0xac, 0x2c, 0x8b, 0x0b, 0x96, 0x16, 0xc5, 0x45, 0xd8, 0x58, + 0xff, 0x7f, 0xe2, 0x62, 0x00, 0x81, 0x1f, 0x9e, 0x3e, 0xbf, + 0x21, 0xa0, 0x7c, 0xfd, 0x63, 0xe2, 0x42, 0xc3, 0x5d, 0xdc, + 0xf8, 0x79, 0xe7, 0x66, 0xc6, 0x47, 0xd9, 0x58, 0x84, 0x05, + 0x9b, 0x1a, 0xba, 0x3b, 0xa5, 0x24, 0xed, 0x6c, 0xf2, 0x73, + 0xd3, 0x52, 0xcc, 0x4d, 0x91, 0x10, 0x8e, 0x0f, 0xaf, 0x2e, + 0xb0, 0x31, 0x15, 0x94, 0x0a, 0x8b, 0x2b, 0xaa, 0x34, 0xb5, + 0x69, 0xe8, 0x76, 0xf7, 0x57, 0xd6, 0x48, 0xc9, 0xc7, 0x46, + 0xd8, 0x59, 0xf9, 0x78, 0xe6, 0x67, 0xbb, 0x3a, 0xa4, 0x25, + 0x85, 0x04, 0x9a, 0x1b, 0x3f, 0xbe, 0x20, 0xa1, 0x01, 0x80, + 0x1e, 0x9f, 0x43, 0xc2, 0x5c, 0xdd, 0x7d, 0xfc, 0x62, 0xe3, + 0x2a, 0xab, 0x35, 0xb4, 0x14, 0x95, 0x0b, 0x8a, 0x56, 0xd7, + 0x49, 0xc8, 0x68, 0xe9, 0x77, 0xf6, 0xd2, 0x53, 0xcd, 0x4c, + 0xec, 0x6d, 0xf3, 0x72, 0xae, 0x2f, 0xb1, 0x30, 0x90, 0x11, + 0x8f, 0x0e, 0x93, 0x12, 0x8c, 0x0d, 0xad, 0x2c, 0xb2, 0x33, + 0xef, 0x6e, 0xf0, 0x71, 0xd1, 0x50, 0xce, 0x4f, 0x6b, 0xea, + 0x74, 0xf5, 0x55, 0xd4, 0x4a, 0xcb, 0x17, 0x96, 0x08, 0x89, + 0x29, 0xa8, 0x36, 0xb7, 0x7e, 0xff, 0x61, 0xe0, 0x40, 0xc1, + 0x5f, 0xde, 0x02, 0x83, 0x1d, 0x9c, 0x3c, 0xbd, 0x23, 0xa2, + 0x86, 0x07, 0x99, 0x18, 0xb8, 0x39, 0xa7, 0x26, 0xfa, 0x7b, + 0xe5, 0x64, 0xc4, 0x45, 0xdb, 0x5a, 0x54, 0xd5, 0x4b, 0xca, + 0x6a, 0xeb, 0x75, 0xf4, 0x28, 0xa9, 0x37, 0xb6, 0x16, 0x97, + 0x09, 0x88, 0xac, 0x2d, 0xb3, 0x32, 0x92, 0x13, 0x8d, 0x0c, + 0xd0, 0x51, 0xcf, 0x4e, 0xee, 0x6f, 0xf1, 0x70, 0xb9, 0x38, + 0xa6, 0x27, 0x87, 0x06, 0x98, 0x19, 0xc5, 0x44, 0xda, 0x5b, + 0xfb, 0x7a, 0xe4, 0x65, 0x41, 0xc0, 0x5e, 0xdf, 0x7f, 0xfe, + 0x60, 0xe1, 0x3d, 0xbc, 0x22, 0xa3, 0x03, 0x82, 0x1c, 0x9d, + 0x00, 0x82, 0x19, 0x9b, 0x32, 0xb0, 0x2b, 0xa9, 0x64, 0xe6, + 0x7d, 0xff, 0x56, 0xd4, 0x4f, 0xcd, 0xc8, 0x4a, 0xd1, 0x53, + 0xfa, 0x78, 0xe3, 0x61, 0xac, 0x2e, 0xb5, 0x37, 0x9e, 0x1c, + 0x87, 0x05, 0x8d, 0x0f, 0x94, 0x16, 0xbf, 0x3d, 0xa6, 0x24, + 0xe9, 0x6b, 0xf0, 0x72, 0xdb, 0x59, 0xc2, 0x40, 0x45, 0xc7, + 0x5c, 0xde, 0x77, 0xf5, 0x6e, 0xec, 0x21, 0xa3, 0x38, 0xba, + 0x13, 0x91, 0x0a, 0x88, 0x07, 0x85, 0x1e, 0x9c, 0x35, 0xb7, + 0x2c, 0xae, 0x63, 0xe1, 0x7a, 0xf8, 0x51, 0xd3, 0x48, 0xca, + 0xcf, 0x4d, 0xd6, 0x54, 0xfd, 0x7f, 0xe4, 0x66, 0xab, 0x29, + 0xb2, 0x30, 0x99, 0x1b, 0x80, 0x02, 0x8a, 0x08, 0x93, 0x11, + 0xb8, 0x3a, 0xa1, 0x23, 0xee, 0x6c, 0xf7, 0x75, 0xdc, 0x5e, + 0xc5, 0x47, 0x42, 0xc0, 0x5b, 0xd9, 0x70, 0xf2, 0x69, 0xeb, + 0x26, 0xa4, 0x3f, 0xbd, 0x14, 0x96, 0x0d, 0x8f, 0x0e, 0x8c, + 0x17, 0x95, 0x3c, 0xbe, 0x25, 0xa7, 0x6a, 0xe8, 0x73, 0xf1, + 0x58, 0xda, 0x41, 0xc3, 0xc6, 0x44, 0xdf, 0x5d, 0xf4, 0x76, + 0xed, 0x6f, 0xa2, 0x20, 0xbb, 0x39, 0x90, 0x12, 0x89, 0x0b, + 0x83, 0x01, 0x9a, 0x18, 0xb1, 0x33, 0xa8, 0x2a, 0xe7, 0x65, + 0xfe, 0x7c, 0xd5, 0x57, 0xcc, 0x4e, 0x4b, 0xc9, 0x52, 0xd0, + 0x79, 0xfb, 0x60, 0xe2, 0x2f, 0xad, 0x36, 0xb4, 0x1d, 0x9f, + 0x04, 0x86, 0x09, 0x8b, 0x10, 0x92, 0x3b, 0xb9, 0x22, 0xa0, + 0x6d, 0xef, 0x74, 0xf6, 0x5f, 0xdd, 0x46, 0xc4, 0xc1, 0x43, + 0xd8, 0x5a, 0xf3, 0x71, 0xea, 0x68, 0xa5, 0x27, 0xbc, 0x3e, + 0x97, 0x15, 0x8e, 0x0c, 0x84, 0x06, 0x9d, 0x1f, 0xb6, 0x34, + 0xaf, 0x2d, 0xe0, 0x62, 0xf9, 0x7b, 0xd2, 0x50, 0xcb, 0x49, + 0x4c, 0xce, 0x55, 0xd7, 0x7e, 0xfc, 0x67, 0xe5, 0x28, 0xaa, + 0x31, 0xb3, 0x1a, 0x98, 0x03, 0x81, 0x00, 0x83, 0x1b, 0x98, + 0x36, 0xb5, 0x2d, 0xae, 0x6c, 0xef, 0x77, 0xf4, 0x5a, 0xd9, + 0x41, 0xc2, 0xd8, 0x5b, 0xc3, 0x40, 0xee, 0x6d, 0xf5, 0x76, + 0xb4, 0x37, 0xaf, 0x2c, 0x82, 0x01, 0x99, 0x1a, 0xad, 0x2e, + 0xb6, 0x35, 0x9b, 0x18, 0x80, 0x03, 0xc1, 0x42, 0xda, 0x59, + 0xf7, 0x74, 0xec, 0x6f, 0x75, 0xf6, 0x6e, 0xed, 0x43, 0xc0, + 0x58, 0xdb, 0x19, 0x9a, 0x02, 0x81, 0x2f, 0xac, 0x34, 0xb7, + 0x47, 0xc4, 0x5c, 0xdf, 0x71, 0xf2, 0x6a, 0xe9, 0x2b, 0xa8, + 0x30, 0xb3, 0x1d, 0x9e, 0x06, 0x85, 0x9f, 0x1c, 0x84, 0x07, + 0xa9, 0x2a, 0xb2, 0x31, 0xf3, 0x70, 0xe8, 0x6b, 0xc5, 0x46, + 0xde, 0x5d, 0xea, 0x69, 0xf1, 0x72, 0xdc, 0x5f, 0xc7, 0x44, + 0x86, 0x05, 0x9d, 0x1e, 0xb0, 0x33, 0xab, 0x28, 0x32, 0xb1, + 0x29, 0xaa, 0x04, 0x87, 0x1f, 0x9c, 0x5e, 0xdd, 0x45, 0xc6, + 0x68, 0xeb, 0x73, 0xf0, 0x8e, 0x0d, 0x95, 0x16, 0xb8, 0x3b, + 0xa3, 0x20, 0xe2, 0x61, 0xf9, 0x7a, 0xd4, 0x57, 0xcf, 0x4c, + 0x56, 0xd5, 0x4d, 0xce, 0x60, 0xe3, 0x7b, 0xf8, 0x3a, 0xb9, + 0x21, 0xa2, 0x0c, 0x8f, 0x17, 0x94, 0x23, 0xa0, 0x38, 0xbb, + 0x15, 0x96, 0x0e, 0x8d, 0x4f, 0xcc, 0x54, 0xd7, 0x79, 0xfa, + 0x62, 0xe1, 0xfb, 0x78, 0xe0, 0x63, 0xcd, 0x4e, 0xd6, 0x55, + 0x97, 0x14, 0x8c, 0x0f, 0xa1, 0x22, 0xba, 0x39, 0xc9, 0x4a, + 0xd2, 0x51, 0xff, 0x7c, 0xe4, 0x67, 0xa5, 0x26, 0xbe, 0x3d, + 0x93, 0x10, 0x88, 0x0b, 0x11, 0x92, 0x0a, 0x89, 0x27, 0xa4, + 0x3c, 0xbf, 0x7d, 0xfe, 0x66, 0xe5, 0x4b, 0xc8, 0x50, 0xd3, + 0x64, 0xe7, 0x7f, 0xfc, 0x52, 0xd1, 0x49, 0xca, 0x08, 0x8b, + 0x13, 0x90, 0x3e, 0xbd, 0x25, 0xa6, 0xbc, 0x3f, 0xa7, 0x24, + 0x8a, 0x09, 0x91, 0x12, 0xd0, 0x53, 0xcb, 0x48, 0xe6, 0x65, + 0xfd, 0x7e, 0x00, 0x84, 0x15, 0x91, 0x2a, 0xae, 0x3f, 0xbb, + 0x54, 0xd0, 0x41, 0xc5, 0x7e, 0xfa, 0x6b, 0xef, 0xa8, 0x2c, + 0xbd, 0x39, 0x82, 0x06, 0x97, 0x13, 0xfc, 0x78, 0xe9, 0x6d, + 0xd6, 0x52, 0xc3, 0x47, 0x4d, 0xc9, 0x58, 0xdc, 0x67, 0xe3, + 0x72, 0xf6, 0x19, 0x9d, 0x0c, 0x88, 0x33, 0xb7, 0x26, 0xa2, + 0xe5, 0x61, 0xf0, 0x74, 0xcf, 0x4b, 0xda, 0x5e, 0xb1, 0x35, + 0xa4, 0x20, 0x9b, 0x1f, 0x8e, 0x0a, 0x9a, 0x1e, 0x8f, 0x0b, + 0xb0, 0x34, 0xa5, 0x21, 0xce, 0x4a, 0xdb, 0x5f, 0xe4, 0x60, + 0xf1, 0x75, 0x32, 0xb6, 0x27, 0xa3, 0x18, 0x9c, 0x0d, 0x89, + 0x66, 0xe2, 0x73, 0xf7, 0x4c, 0xc8, 0x59, 0xdd, 0xd7, 0x53, + 0xc2, 0x46, 0xfd, 0x79, 0xe8, 0x6c, 0x83, 0x07, 0x96, 0x12, + 0xa9, 0x2d, 0xbc, 0x38, 0x7f, 0xfb, 0x6a, 0xee, 0x55, 0xd1, + 0x40, 0xc4, 0x2b, 0xaf, 0x3e, 0xba, 0x01, 0x85, 0x14, 0x90, + 0x29, 0xad, 0x3c, 0xb8, 0x03, 0x87, 0x16, 0x92, 0x7d, 0xf9, + 0x68, 0xec, 0x57, 0xd3, 0x42, 0xc6, 0x81, 0x05, 0x94, 0x10, + 0xab, 0x2f, 0xbe, 0x3a, 0xd5, 0x51, 0xc0, 0x44, 0xff, 0x7b, + 0xea, 0x6e, 0x64, 0xe0, 0x71, 0xf5, 0x4e, 0xca, 0x5b, 0xdf, + 0x30, 0xb4, 0x25, 0xa1, 0x1a, 0x9e, 0x0f, 0x8b, 0xcc, 0x48, + 0xd9, 0x5d, 0xe6, 0x62, 0xf3, 0x77, 0x98, 0x1c, 0x8d, 0x09, + 0xb2, 0x36, 0xa7, 0x23, 0xb3, 0x37, 0xa6, 0x22, 0x99, 0x1d, + 0x8c, 0x08, 0xe7, 0x63, 0xf2, 0x76, 0xcd, 0x49, 0xd8, 0x5c, + 0x1b, 0x9f, 0x0e, 0x8a, 0x31, 0xb5, 0x24, 0xa0, 0x4f, 0xcb, + 0x5a, 0xde, 0x65, 0xe1, 0x70, 0xf4, 0xfe, 0x7a, 0xeb, 0x6f, + 0xd4, 0x50, 0xc1, 0x45, 0xaa, 0x2e, 0xbf, 0x3b, 0x80, 0x04, + 0x95, 0x11, 0x56, 0xd2, 0x43, 0xc7, 0x7c, 0xf8, 0x69, 0xed, + 0x02, 0x86, 0x17, 0x93, 0x28, 0xac, 0x3d, 0xb9, 0x00, 0x85, + 0x17, 0x92, 0x2e, 0xab, 0x39, 0xbc, 0x5c, 0xd9, 0x4b, 0xce, + 0x72, 0xf7, 0x65, 0xe0, 0xb8, 0x3d, 0xaf, 0x2a, 0x96, 0x13, + 0x81, 0x04, 0xe4, 0x61, 0xf3, 0x76, 0xca, 0x4f, 0xdd, 0x58, + 0x6d, 0xe8, 0x7a, 0xff, 0x43, 0xc6, 0x54, 0xd1, 0x31, 0xb4, + 0x26, 0xa3, 0x1f, 0x9a, 0x08, 0x8d, 0xd5, 0x50, 0xc2, 0x47, + 0xfb, 0x7e, 0xec, 0x69, 0x89, 0x0c, 0x9e, 0x1b, 0xa7, 0x22, + 0xb0, 0x35, 0xda, 0x5f, 0xcd, 0x48, 0xf4, 0x71, 0xe3, 0x66, + 0x86, 0x03, 0x91, 0x14, 0xa8, 0x2d, 0xbf, 0x3a, 0x62, 0xe7, + 0x75, 0xf0, 0x4c, 0xc9, 0x5b, 0xde, 0x3e, 0xbb, 0x29, 0xac, + 0x10, 0x95, 0x07, 0x82, 0xb7, 0x32, 0xa0, 0x25, 0x99, 0x1c, + 0x8e, 0x0b, 0xeb, 0x6e, 0xfc, 0x79, 0xc5, 0x40, 0xd2, 0x57, + 0x0f, 0x8a, 0x18, 0x9d, 0x21, 0xa4, 0x36, 0xb3, 0x53, 0xd6, + 0x44, 0xc1, 0x7d, 0xf8, 0x6a, 0xef, 0xa9, 0x2c, 0xbe, 0x3b, + 0x87, 0x02, 0x90, 0x15, 0xf5, 0x70, 0xe2, 0x67, 0xdb, 0x5e, + 0xcc, 0x49, 0x11, 0x94, 0x06, 0x83, 0x3f, 0xba, 0x28, 0xad, + 0x4d, 0xc8, 0x5a, 0xdf, 0x63, 0xe6, 0x74, 0xf1, 0xc4, 0x41, + 0xd3, 0x56, 0xea, 0x6f, 0xfd, 0x78, 0x98, 0x1d, 0x8f, 0x0a, + 0xb6, 0x33, 0xa1, 0x24, 0x7c, 0xf9, 0x6b, 0xee, 0x52, 0xd7, + 0x45, 0xc0, 0x20, 0xa5, 0x37, 0xb2, 0x0e, 0x8b, 0x19, 0x9c, + 0x73, 0xf6, 0x64, 0xe1, 0x5d, 0xd8, 0x4a, 0xcf, 0x2f, 0xaa, + 0x38, 0xbd, 0x01, 0x84, 0x16, 0x93, 0xcb, 0x4e, 0xdc, 0x59, + 0xe5, 0x60, 0xf2, 0x77, 0x97, 0x12, 0x80, 0x05, 0xb9, 0x3c, + 0xae, 0x2b, 0x1e, 0x9b, 0x09, 0x8c, 0x30, 0xb5, 0x27, 0xa2, + 0x42, 0xc7, 0x55, 0xd0, 0x6c, 0xe9, 0x7b, 0xfe, 0xa6, 0x23, + 0xb1, 0x34, 0x88, 0x0d, 0x9f, 0x1a, 0xfa, 0x7f, 0xed, 0x68, + 0xd4, 0x51, 0xc3, 0x46, 0x00, 0x86, 0x11, 0x97, 0x22, 0xa4, + 0x33, 0xb5, 0x44, 0xc2, 0x55, 0xd3, 0x66, 0xe0, 0x77, 0xf1, + 0x88, 0x0e, 0x99, 0x1f, 0xaa, 0x2c, 0xbb, 0x3d, 0xcc, 0x4a, + 0xdd, 0x5b, 0xee, 0x68, 0xff, 0x79, 0x0d, 0x8b, 0x1c, 0x9a, + 0x2f, 0xa9, 0x3e, 0xb8, 0x49, 0xcf, 0x58, 0xde, 0x6b, 0xed, + 0x7a, 0xfc, 0x85, 0x03, 0x94, 0x12, 0xa7, 0x21, 0xb6, 0x30, + 0xc1, 0x47, 0xd0, 0x56, 0xe3, 0x65, 0xf2, 0x74, 0x1a, 0x9c, + 0x0b, 0x8d, 0x38, 0xbe, 0x29, 0xaf, 0x5e, 0xd8, 0x4f, 0xc9, + 0x7c, 0xfa, 0x6d, 0xeb, 0x92, 0x14, 0x83, 0x05, 0xb0, 0x36, + 0xa1, 0x27, 0xd6, 0x50, 0xc7, 0x41, 0xf4, 0x72, 0xe5, 0x63, + 0x17, 0x91, 0x06, 0x80, 0x35, 0xb3, 0x24, 0xa2, 0x53, 0xd5, + 0x42, 0xc4, 0x71, 0xf7, 0x60, 0xe6, 0x9f, 0x19, 0x8e, 0x08, + 0xbd, 0x3b, 0xac, 0x2a, 0xdb, 0x5d, 0xca, 0x4c, 0xf9, 0x7f, + 0xe8, 0x6e, 0x34, 0xb2, 0x25, 0xa3, 0x16, 0x90, 0x07, 0x81, + 0x70, 0xf6, 0x61, 0xe7, 0x52, 0xd4, 0x43, 0xc5, 0xbc, 0x3a, + 0xad, 0x2b, 0x9e, 0x18, 0x8f, 0x09, 0xf8, 0x7e, 0xe9, 0x6f, + 0xda, 0x5c, 0xcb, 0x4d, 0x39, 0xbf, 0x28, 0xae, 0x1b, 0x9d, + 0x0a, 0x8c, 0x7d, 0xfb, 0x6c, 0xea, 0x5f, 0xd9, 0x4e, 0xc8, + 0xb1, 0x37, 0xa0, 0x26, 0x93, 0x15, 0x82, 0x04, 0xf5, 0x73, + 0xe4, 0x62, 0xd7, 0x51, 0xc6, 0x40, 0x2e, 0xa8, 0x3f, 0xb9, + 0x0c, 0x8a, 0x1d, 0x9b, 0x6a, 0xec, 0x7b, 0xfd, 0x48, 0xce, + 0x59, 0xdf, 0xa6, 0x20, 0xb7, 0x31, 0x84, 0x02, 0x95, 0x13, + 0xe2, 0x64, 0xf3, 0x75, 0xc0, 0x46, 0xd1, 0x57, 0x23, 0xa5, + 0x32, 0xb4, 0x01, 0x87, 0x10, 0x96, 0x67, 0xe1, 0x76, 0xf0, + 0x45, 0xc3, 0x54, 0xd2, 0xab, 0x2d, 0xba, 0x3c, 0x89, 0x0f, + 0x98, 0x1e, 0xef, 0x69, 0xfe, 0x78, 0xcd, 0x4b, 0xdc, 0x5a, + 0x00, 0x87, 0x13, 0x94, 0x26, 0xa1, 0x35, 0xb2, 0x4c, 0xcb, + 0x5f, 0xd8, 0x6a, 0xed, 0x79, 0xfe, 0x98, 0x1f, 0x8b, 0x0c, + 0xbe, 0x39, 0xad, 0x2a, 0xd4, 0x53, 0xc7, 0x40, 0xf2, 0x75, + 0xe1, 0x66, 0x2d, 0xaa, 0x3e, 0xb9, 0x0b, 0x8c, 0x18, 0x9f, + 0x61, 0xe6, 0x72, 0xf5, 0x47, 0xc0, 0x54, 0xd3, 0xb5, 0x32, + 0xa6, 0x21, 0x93, 0x14, 0x80, 0x07, 0xf9, 0x7e, 0xea, 0x6d, + 0xdf, 0x58, 0xcc, 0x4b, 0x5a, 0xdd, 0x49, 0xce, 0x7c, 0xfb, + 0x6f, 0xe8, 0x16, 0x91, 0x05, 0x82, 0x30, 0xb7, 0x23, 0xa4, + 0xc2, 0x45, 0xd1, 0x56, 0xe4, 0x63, 0xf7, 0x70, 0x8e, 0x09, + 0x9d, 0x1a, 0xa8, 0x2f, 0xbb, 0x3c, 0x77, 0xf0, 0x64, 0xe3, + 0x51, 0xd6, 0x42, 0xc5, 0x3b, 0xbc, 0x28, 0xaf, 0x1d, 0x9a, + 0x0e, 0x89, 0xef, 0x68, 0xfc, 0x7b, 0xc9, 0x4e, 0xda, 0x5d, + 0xa3, 0x24, 0xb0, 0x37, 0x85, 0x02, 0x96, 0x11, 0xb4, 0x33, + 0xa7, 0x20, 0x92, 0x15, 0x81, 0x06, 0xf8, 0x7f, 0xeb, 0x6c, + 0xde, 0x59, 0xcd, 0x4a, 0x2c, 0xab, 0x3f, 0xb8, 0x0a, 0x8d, + 0x19, 0x9e, 0x60, 0xe7, 0x73, 0xf4, 0x46, 0xc1, 0x55, 0xd2, + 0x99, 0x1e, 0x8a, 0x0d, 0xbf, 0x38, 0xac, 0x2b, 0xd5, 0x52, + 0xc6, 0x41, 0xf3, 0x74, 0xe0, 0x67, 0x01, 0x86, 0x12, 0x95, + 0x27, 0xa0, 0x34, 0xb3, 0x4d, 0xca, 0x5e, 0xd9, 0x6b, 0xec, + 0x78, 0xff, 0xee, 0x69, 0xfd, 0x7a, 0xc8, 0x4f, 0xdb, 0x5c, + 0xa2, 0x25, 0xb1, 0x36, 0x84, 0x03, 0x97, 0x10, 0x76, 0xf1, + 0x65, 0xe2, 0x50, 0xd7, 0x43, 0xc4, 0x3a, 0xbd, 0x29, 0xae, + 0x1c, 0x9b, 0x0f, 0x88, 0xc3, 0x44, 0xd0, 0x57, 0xe5, 0x62, + 0xf6, 0x71, 0x8f, 0x08, 0x9c, 0x1b, 0xa9, 0x2e, 0xba, 0x3d, + 0x5b, 0xdc, 0x48, 0xcf, 0x7d, 0xfa, 0x6e, 0xe9, 0x17, 0x90, + 0x04, 0x83, 0x31, 0xb6, 0x22, 0xa5, 0x00, 0x88, 0x0d, 0x85, + 0x1a, 0x92, 0x17, 0x9f, 0x34, 0xbc, 0x39, 0xb1, 0x2e, 0xa6, + 0x23, 0xab, 0x68, 0xe0, 0x65, 0xed, 0x72, 0xfa, 0x7f, 0xf7, + 0x5c, 0xd4, 0x51, 0xd9, 0x46, 0xce, 0x4b, 0xc3, 0xd0, 0x58, + 0xdd, 0x55, 0xca, 0x42, 0xc7, 0x4f, 0xe4, 0x6c, 0xe9, 0x61, + 0xfe, 0x76, 0xf3, 0x7b, 0xb8, 0x30, 0xb5, 0x3d, 0xa2, 0x2a, + 0xaf, 0x27, 0x8c, 0x04, 0x81, 0x09, 0x96, 0x1e, 0x9b, 0x13, + 0xbd, 0x35, 0xb0, 0x38, 0xa7, 0x2f, 0xaa, 0x22, 0x89, 0x01, + 0x84, 0x0c, 0x93, 0x1b, 0x9e, 0x16, 0xd5, 0x5d, 0xd8, 0x50, + 0xcf, 0x47, 0xc2, 0x4a, 0xe1, 0x69, 0xec, 0x64, 0xfb, 0x73, + 0xf6, 0x7e, 0x6d, 0xe5, 0x60, 0xe8, 0x77, 0xff, 0x7a, 0xf2, + 0x59, 0xd1, 0x54, 0xdc, 0x43, 0xcb, 0x4e, 0xc6, 0x05, 0x8d, + 0x08, 0x80, 0x1f, 0x97, 0x12, 0x9a, 0x31, 0xb9, 0x3c, 0xb4, + 0x2b, 0xa3, 0x26, 0xae, 0x67, 0xef, 0x6a, 0xe2, 0x7d, 0xf5, + 0x70, 0xf8, 0x53, 0xdb, 0x5e, 0xd6, 0x49, 0xc1, 0x44, 0xcc, + 0x0f, 0x87, 0x02, 0x8a, 0x15, 0x9d, 0x18, 0x90, 0x3b, 0xb3, + 0x36, 0xbe, 0x21, 0xa9, 0x2c, 0xa4, 0xb7, 0x3f, 0xba, 0x32, + 0xad, 0x25, 0xa0, 0x28, 0x83, 0x0b, 0x8e, 0x06, 0x99, 0x11, + 0x94, 0x1c, 0xdf, 0x57, 0xd2, 0x5a, 0xc5, 0x4d, 0xc8, 0x40, + 0xeb, 0x63, 0xe6, 0x6e, 0xf1, 0x79, 0xfc, 0x74, 0xda, 0x52, + 0xd7, 0x5f, 0xc0, 0x48, 0xcd, 0x45, 0xee, 0x66, 0xe3, 0x6b, + 0xf4, 0x7c, 0xf9, 0x71, 0xb2, 0x3a, 0xbf, 0x37, 0xa8, 0x20, + 0xa5, 0x2d, 0x86, 0x0e, 0x8b, 0x03, 0x9c, 0x14, 0x91, 0x19, + 0x0a, 0x82, 0x07, 0x8f, 0x10, 0x98, 0x1d, 0x95, 0x3e, 0xb6, + 0x33, 0xbb, 0x24, 0xac, 0x29, 0xa1, 0x62, 0xea, 0x6f, 0xe7, + 0x78, 0xf0, 0x75, 0xfd, 0x56, 0xde, 0x5b, 0xd3, 0x4c, 0xc4, + 0x41, 0xc9, 0x00, 0x89, 0x0f, 0x86, 0x1e, 0x97, 0x11, 0x98, + 0x3c, 0xb5, 0x33, 0xba, 0x22, 0xab, 0x2d, 0xa4, 0x78, 0xf1, + 0x77, 0xfe, 0x66, 0xef, 0x69, 0xe0, 0x44, 0xcd, 0x4b, 0xc2, + 0x5a, 0xd3, 0x55, 0xdc, 0xf0, 0x79, 0xff, 0x76, 0xee, 0x67, + 0xe1, 0x68, 0xcc, 0x45, 0xc3, 0x4a, 0xd2, 0x5b, 0xdd, 0x54, + 0x88, 0x01, 0x87, 0x0e, 0x96, 0x1f, 0x99, 0x10, 0xb4, 0x3d, + 0xbb, 0x32, 0xaa, 0x23, 0xa5, 0x2c, 0xfd, 0x74, 0xf2, 0x7b, + 0xe3, 0x6a, 0xec, 0x65, 0xc1, 0x48, 0xce, 0x47, 0xdf, 0x56, + 0xd0, 0x59, 0x85, 0x0c, 0x8a, 0x03, 0x9b, 0x12, 0x94, 0x1d, + 0xb9, 0x30, 0xb6, 0x3f, 0xa7, 0x2e, 0xa8, 0x21, 0x0d, 0x84, + 0x02, 0x8b, 0x13, 0x9a, 0x1c, 0x95, 0x31, 0xb8, 0x3e, 0xb7, + 0x2f, 0xa6, 0x20, 0xa9, 0x75, 0xfc, 0x7a, 0xf3, 0x6b, 0xe2, + 0x64, 0xed, 0x49, 0xc0, 0x46, 0xcf, 0x57, 0xde, 0x58, 0xd1, + 0xe7, 0x6e, 0xe8, 0x61, 0xf9, 0x70, 0xf6, 0x7f, 0xdb, 0x52, + 0xd4, 0x5d, 0xc5, 0x4c, 0xca, 0x43, 0x9f, 0x16, 0x90, 0x19, + 0x81, 0x08, 0x8e, 0x07, 0xa3, 0x2a, 0xac, 0x25, 0xbd, 0x34, + 0xb2, 0x3b, 0x17, 0x9e, 0x18, 0x91, 0x09, 0x80, 0x06, 0x8f, + 0x2b, 0xa2, 0x24, 0xad, 0x35, 0xbc, 0x3a, 0xb3, 0x6f, 0xe6, + 0x60, 0xe9, 0x71, 0xf8, 0x7e, 0xf7, 0x53, 0xda, 0x5c, 0xd5, + 0x4d, 0xc4, 0x42, 0xcb, 0x1a, 0x93, 0x15, 0x9c, 0x04, 0x8d, + 0x0b, 0x82, 0x26, 0xaf, 0x29, 0xa0, 0x38, 0xb1, 0x37, 0xbe, + 0x62, 0xeb, 0x6d, 0xe4, 0x7c, 0xf5, 0x73, 0xfa, 0x5e, 0xd7, + 0x51, 0xd8, 0x40, 0xc9, 0x4f, 0xc6, 0xea, 0x63, 0xe5, 0x6c, + 0xf4, 0x7d, 0xfb, 0x72, 0xd6, 0x5f, 0xd9, 0x50, 0xc8, 0x41, + 0xc7, 0x4e, 0x92, 0x1b, 0x9d, 0x14, 0x8c, 0x05, 0x83, 0x0a, + 0xae, 0x27, 0xa1, 0x28, 0xb0, 0x39, 0xbf, 0x36, 0x00, 0x8a, + 0x09, 0x83, 0x12, 0x98, 0x1b, 0x91, 0x24, 0xae, 0x2d, 0xa7, + 0x36, 0xbc, 0x3f, 0xb5, 0x48, 0xc2, 0x41, 0xcb, 0x5a, 0xd0, + 0x53, 0xd9, 0x6c, 0xe6, 0x65, 0xef, 0x7e, 0xf4, 0x77, 0xfd, + 0x90, 0x1a, 0x99, 0x13, 0x82, 0x08, 0x8b, 0x01, 0xb4, 0x3e, + 0xbd, 0x37, 0xa6, 0x2c, 0xaf, 0x25, 0xd8, 0x52, 0xd1, 0x5b, + 0xca, 0x40, 0xc3, 0x49, 0xfc, 0x76, 0xf5, 0x7f, 0xee, 0x64, + 0xe7, 0x6d, 0x3d, 0xb7, 0x34, 0xbe, 0x2f, 0xa5, 0x26, 0xac, + 0x19, 0x93, 0x10, 0x9a, 0x0b, 0x81, 0x02, 0x88, 0x75, 0xff, + 0x7c, 0xf6, 0x67, 0xed, 0x6e, 0xe4, 0x51, 0xdb, 0x58, 0xd2, + 0x43, 0xc9, 0x4a, 0xc0, 0xad, 0x27, 0xa4, 0x2e, 0xbf, 0x35, + 0xb6, 0x3c, 0x89, 0x03, 0x80, 0x0a, 0x9b, 0x11, 0x92, 0x18, + 0xe5, 0x6f, 0xec, 0x66, 0xf7, 0x7d, 0xfe, 0x74, 0xc1, 0x4b, + 0xc8, 0x42, 0xd3, 0x59, 0xda, 0x50, 0x7a, 0xf0, 0x73, 0xf9, + 0x68, 0xe2, 0x61, 0xeb, 0x5e, 0xd4, 0x57, 0xdd, 0x4c, 0xc6, + 0x45, 0xcf, 0x32, 0xb8, 0x3b, 0xb1, 0x20, 0xaa, 0x29, 0xa3, + 0x16, 0x9c, 0x1f, 0x95, 0x04, 0x8e, 0x0d, 0x87, 0xea, 0x60, + 0xe3, 0x69, 0xf8, 0x72, 0xf1, 0x7b, 0xce, 0x44, 0xc7, 0x4d, + 0xdc, 0x56, 0xd5, 0x5f, 0xa2, 0x28, 0xab, 0x21, 0xb0, 0x3a, + 0xb9, 0x33, 0x86, 0x0c, 0x8f, 0x05, 0x94, 0x1e, 0x9d, 0x17, + 0x47, 0xcd, 0x4e, 0xc4, 0x55, 0xdf, 0x5c, 0xd6, 0x63, 0xe9, + 0x6a, 0xe0, 0x71, 0xfb, 0x78, 0xf2, 0x0f, 0x85, 0x06, 0x8c, + 0x1d, 0x97, 0x14, 0x9e, 0x2b, 0xa1, 0x22, 0xa8, 0x39, 0xb3, + 0x30, 0xba, 0xd7, 0x5d, 0xde, 0x54, 0xc5, 0x4f, 0xcc, 0x46, + 0xf3, 0x79, 0xfa, 0x70, 0xe1, 0x6b, 0xe8, 0x62, 0x9f, 0x15, + 0x96, 0x1c, 0x8d, 0x07, 0x84, 0x0e, 0xbb, 0x31, 0xb2, 0x38, + 0xa9, 0x23, 0xa0, 0x2a, 0x00, 0x8b, 0x0b, 0x80, 0x16, 0x9d, + 0x1d, 0x96, 0x2c, 0xa7, 0x27, 0xac, 0x3a, 0xb1, 0x31, 0xba, + 0x58, 0xd3, 0x53, 0xd8, 0x4e, 0xc5, 0x45, 0xce, 0x74, 0xff, + 0x7f, 0xf4, 0x62, 0xe9, 0x69, 0xe2, 0xb0, 0x3b, 0xbb, 0x30, + 0xa6, 0x2d, 0xad, 0x26, 0x9c, 0x17, 0x97, 0x1c, 0x8a, 0x01, + 0x81, 0x0a, 0xe8, 0x63, 0xe3, 0x68, 0xfe, 0x75, 0xf5, 0x7e, + 0xc4, 0x4f, 0xcf, 0x44, 0xd2, 0x59, 0xd9, 0x52, 0x7d, 0xf6, + 0x76, 0xfd, 0x6b, 0xe0, 0x60, 0xeb, 0x51, 0xda, 0x5a, 0xd1, + 0x47, 0xcc, 0x4c, 0xc7, 0x25, 0xae, 0x2e, 0xa5, 0x33, 0xb8, + 0x38, 0xb3, 0x09, 0x82, 0x02, 0x89, 0x1f, 0x94, 0x14, 0x9f, + 0xcd, 0x46, 0xc6, 0x4d, 0xdb, 0x50, 0xd0, 0x5b, 0xe1, 0x6a, + 0xea, 0x61, 0xf7, 0x7c, 0xfc, 0x77, 0x95, 0x1e, 0x9e, 0x15, + 0x83, 0x08, 0x88, 0x03, 0xb9, 0x32, 0xb2, 0x39, 0xaf, 0x24, + 0xa4, 0x2f, 0xfa, 0x71, 0xf1, 0x7a, 0xec, 0x67, 0xe7, 0x6c, + 0xd6, 0x5d, 0xdd, 0x56, 0xc0, 0x4b, 0xcb, 0x40, 0xa2, 0x29, + 0xa9, 0x22, 0xb4, 0x3f, 0xbf, 0x34, 0x8e, 0x05, 0x85, 0x0e, + 0x98, 0x13, 0x93, 0x18, 0x4a, 0xc1, 0x41, 0xca, 0x5c, 0xd7, + 0x57, 0xdc, 0x66, 0xed, 0x6d, 0xe6, 0x70, 0xfb, 0x7b, 0xf0, + 0x12, 0x99, 0x19, 0x92, 0x04, 0x8f, 0x0f, 0x84, 0x3e, 0xb5, + 0x35, 0xbe, 0x28, 0xa3, 0x23, 0xa8, 0x87, 0x0c, 0x8c, 0x07, + 0x91, 0x1a, 0x9a, 0x11, 0xab, 0x20, 0xa0, 0x2b, 0xbd, 0x36, + 0xb6, 0x3d, 0xdf, 0x54, 0xd4, 0x5f, 0xc9, 0x42, 0xc2, 0x49, + 0xf3, 0x78, 0xf8, 0x73, 0xe5, 0x6e, 0xee, 0x65, 0x37, 0xbc, + 0x3c, 0xb7, 0x21, 0xaa, 0x2a, 0xa1, 0x1b, 0x90, 0x10, 0x9b, + 0x0d, 0x86, 0x06, 0x8d, 0x6f, 0xe4, 0x64, 0xef, 0x79, 0xf2, + 0x72, 0xf9, 0x43, 0xc8, 0x48, 0xc3, 0x55, 0xde, 0x5e, 0xd5, + 0x00, 0x8c, 0x05, 0x89, 0x0a, 0x86, 0x0f, 0x83, 0x14, 0x98, + 0x11, 0x9d, 0x1e, 0x92, 0x1b, 0x97, 0x28, 0xa4, 0x2d, 0xa1, + 0x22, 0xae, 0x27, 0xab, 0x3c, 0xb0, 0x39, 0xb5, 0x36, 0xba, + 0x33, 0xbf, 0x50, 0xdc, 0x55, 0xd9, 0x5a, 0xd6, 0x5f, 0xd3, + 0x44, 0xc8, 0x41, 0xcd, 0x4e, 0xc2, 0x4b, 0xc7, 0x78, 0xf4, + 0x7d, 0xf1, 0x72, 0xfe, 0x77, 0xfb, 0x6c, 0xe0, 0x69, 0xe5, + 0x66, 0xea, 0x63, 0xef, 0xa0, 0x2c, 0xa5, 0x29, 0xaa, 0x26, + 0xaf, 0x23, 0xb4, 0x38, 0xb1, 0x3d, 0xbe, 0x32, 0xbb, 0x37, + 0x88, 0x04, 0x8d, 0x01, 0x82, 0x0e, 0x87, 0x0b, 0x9c, 0x10, + 0x99, 0x15, 0x96, 0x1a, 0x93, 0x1f, 0xf0, 0x7c, 0xf5, 0x79, + 0xfa, 0x76, 0xff, 0x73, 0xe4, 0x68, 0xe1, 0x6d, 0xee, 0x62, + 0xeb, 0x67, 0xd8, 0x54, 0xdd, 0x51, 0xd2, 0x5e, 0xd7, 0x5b, + 0xcc, 0x40, 0xc9, 0x45, 0xc6, 0x4a, 0xc3, 0x4f, 0x5d, 0xd1, + 0x58, 0xd4, 0x57, 0xdb, 0x52, 0xde, 0x49, 0xc5, 0x4c, 0xc0, + 0x43, 0xcf, 0x46, 0xca, 0x75, 0xf9, 0x70, 0xfc, 0x7f, 0xf3, + 0x7a, 0xf6, 0x61, 0xed, 0x64, 0xe8, 0x6b, 0xe7, 0x6e, 0xe2, + 0x0d, 0x81, 0x08, 0x84, 0x07, 0x8b, 0x02, 0x8e, 0x19, 0x95, + 0x1c, 0x90, 0x13, 0x9f, 0x16, 0x9a, 0x25, 0xa9, 0x20, 0xac, + 0x2f, 0xa3, 0x2a, 0xa6, 0x31, 0xbd, 0x34, 0xb8, 0x3b, 0xb7, + 0x3e, 0xb2, 0xfd, 0x71, 0xf8, 0x74, 0xf7, 0x7b, 0xf2, 0x7e, + 0xe9, 0x65, 0xec, 0x60, 0xe3, 0x6f, 0xe6, 0x6a, 0xd5, 0x59, + 0xd0, 0x5c, 0xdf, 0x53, 0xda, 0x56, 0xc1, 0x4d, 0xc4, 0x48, + 0xcb, 0x47, 0xce, 0x42, 0xad, 0x21, 0xa8, 0x24, 0xa7, 0x2b, + 0xa2, 0x2e, 0xb9, 0x35, 0xbc, 0x30, 0xb3, 0x3f, 0xb6, 0x3a, + 0x85, 0x09, 0x80, 0x0c, 0x8f, 0x03, 0x8a, 0x06, 0x91, 0x1d, + 0x94, 0x18, 0x9b, 0x17, 0x9e, 0x12, 0x00, 0x8d, 0x07, 0x8a, + 0x0e, 0x83, 0x09, 0x84, 0x1c, 0x91, 0x1b, 0x96, 0x12, 0x9f, + 0x15, 0x98, 0x38, 0xb5, 0x3f, 0xb2, 0x36, 0xbb, 0x31, 0xbc, + 0x24, 0xa9, 0x23, 0xae, 0x2a, 0xa7, 0x2d, 0xa0, 0x70, 0xfd, + 0x77, 0xfa, 0x7e, 0xf3, 0x79, 0xf4, 0x6c, 0xe1, 0x6b, 0xe6, + 0x62, 0xef, 0x65, 0xe8, 0x48, 0xc5, 0x4f, 0xc2, 0x46, 0xcb, + 0x41, 0xcc, 0x54, 0xd9, 0x53, 0xde, 0x5a, 0xd7, 0x5d, 0xd0, + 0xe0, 0x6d, 0xe7, 0x6a, 0xee, 0x63, 0xe9, 0x64, 0xfc, 0x71, + 0xfb, 0x76, 0xf2, 0x7f, 0xf5, 0x78, 0xd8, 0x55, 0xdf, 0x52, + 0xd6, 0x5b, 0xd1, 0x5c, 0xc4, 0x49, 0xc3, 0x4e, 0xca, 0x47, + 0xcd, 0x40, 0x90, 0x1d, 0x97, 0x1a, 0x9e, 0x13, 0x99, 0x14, + 0x8c, 0x01, 0x8b, 0x06, 0x82, 0x0f, 0x85, 0x08, 0xa8, 0x25, + 0xaf, 0x22, 0xa6, 0x2b, 0xa1, 0x2c, 0xb4, 0x39, 0xb3, 0x3e, + 0xba, 0x37, 0xbd, 0x30, 0xdd, 0x50, 0xda, 0x57, 0xd3, 0x5e, + 0xd4, 0x59, 0xc1, 0x4c, 0xc6, 0x4b, 0xcf, 0x42, 0xc8, 0x45, + 0xe5, 0x68, 0xe2, 0x6f, 0xeb, 0x66, 0xec, 0x61, 0xf9, 0x74, + 0xfe, 0x73, 0xf7, 0x7a, 0xf0, 0x7d, 0xad, 0x20, 0xaa, 0x27, + 0xa3, 0x2e, 0xa4, 0x29, 0xb1, 0x3c, 0xb6, 0x3b, 0xbf, 0x32, + 0xb8, 0x35, 0x95, 0x18, 0x92, 0x1f, 0x9b, 0x16, 0x9c, 0x11, + 0x89, 0x04, 0x8e, 0x03, 0x87, 0x0a, 0x80, 0x0d, 0x3d, 0xb0, + 0x3a, 0xb7, 0x33, 0xbe, 0x34, 0xb9, 0x21, 0xac, 0x26, 0xab, + 0x2f, 0xa2, 0x28, 0xa5, 0x05, 0x88, 0x02, 0x8f, 0x0b, 0x86, + 0x0c, 0x81, 0x19, 0x94, 0x1e, 0x93, 0x17, 0x9a, 0x10, 0x9d, + 0x4d, 0xc0, 0x4a, 0xc7, 0x43, 0xce, 0x44, 0xc9, 0x51, 0xdc, + 0x56, 0xdb, 0x5f, 0xd2, 0x58, 0xd5, 0x75, 0xf8, 0x72, 0xff, + 0x7b, 0xf6, 0x7c, 0xf1, 0x69, 0xe4, 0x6e, 0xe3, 0x67, 0xea, + 0x60, 0xed, 0x00, 0x8e, 0x01, 0x8f, 0x02, 0x8c, 0x03, 0x8d, + 0x04, 0x8a, 0x05, 0x8b, 0x06, 0x88, 0x07, 0x89, 0x08, 0x86, + 0x09, 0x87, 0x0a, 0x84, 0x0b, 0x85, 0x0c, 0x82, 0x0d, 0x83, + 0x0e, 0x80, 0x0f, 0x81, 0x10, 0x9e, 0x11, 0x9f, 0x12, 0x9c, + 0x13, 0x9d, 0x14, 0x9a, 0x15, 0x9b, 0x16, 0x98, 0x17, 0x99, + 0x18, 0x96, 0x19, 0x97, 0x1a, 0x94, 0x1b, 0x95, 0x1c, 0x92, + 0x1d, 0x93, 0x1e, 0x90, 0x1f, 0x91, 0x20, 0xae, 0x21, 0xaf, + 0x22, 0xac, 0x23, 0xad, 0x24, 0xaa, 0x25, 0xab, 0x26, 0xa8, + 0x27, 0xa9, 0x28, 0xa6, 0x29, 0xa7, 0x2a, 0xa4, 0x2b, 0xa5, + 0x2c, 0xa2, 0x2d, 0xa3, 0x2e, 0xa0, 0x2f, 0xa1, 0x30, 0xbe, + 0x31, 0xbf, 0x32, 0xbc, 0x33, 0xbd, 0x34, 0xba, 0x35, 0xbb, + 0x36, 0xb8, 0x37, 0xb9, 0x38, 0xb6, 0x39, 0xb7, 0x3a, 0xb4, + 0x3b, 0xb5, 0x3c, 0xb2, 0x3d, 0xb3, 0x3e, 0xb0, 0x3f, 0xb1, + 0x40, 0xce, 0x41, 0xcf, 0x42, 0xcc, 0x43, 0xcd, 0x44, 0xca, + 0x45, 0xcb, 0x46, 0xc8, 0x47, 0xc9, 0x48, 0xc6, 0x49, 0xc7, + 0x4a, 0xc4, 0x4b, 0xc5, 0x4c, 0xc2, 0x4d, 0xc3, 0x4e, 0xc0, + 0x4f, 0xc1, 0x50, 0xde, 0x51, 0xdf, 0x52, 0xdc, 0x53, 0xdd, + 0x54, 0xda, 0x55, 0xdb, 0x56, 0xd8, 0x57, 0xd9, 0x58, 0xd6, + 0x59, 0xd7, 0x5a, 0xd4, 0x5b, 0xd5, 0x5c, 0xd2, 0x5d, 0xd3, + 0x5e, 0xd0, 0x5f, 0xd1, 0x60, 0xee, 0x61, 0xef, 0x62, 0xec, + 0x63, 0xed, 0x64, 0xea, 0x65, 0xeb, 0x66, 0xe8, 0x67, 0xe9, + 0x68, 0xe6, 0x69, 0xe7, 0x6a, 0xe4, 0x6b, 0xe5, 0x6c, 0xe2, + 0x6d, 0xe3, 0x6e, 0xe0, 0x6f, 0xe1, 0x70, 0xfe, 0x71, 0xff, + 0x72, 0xfc, 0x73, 0xfd, 0x74, 0xfa, 0x75, 0xfb, 0x76, 0xf8, + 0x77, 0xf9, 0x78, 0xf6, 0x79, 0xf7, 0x7a, 0xf4, 0x7b, 0xf5, + 0x7c, 0xf2, 0x7d, 0xf3, 0x7e, 0xf0, 0x7f, 0xf1, 0x00, 0x8f, + 0x03, 0x8c, 0x06, 0x89, 0x05, 0x8a, 0x0c, 0x83, 0x0f, 0x80, + 0x0a, 0x85, 0x09, 0x86, 0x18, 0x97, 0x1b, 0x94, 0x1e, 0x91, + 0x1d, 0x92, 0x14, 0x9b, 0x17, 0x98, 0x12, 0x9d, 0x11, 0x9e, + 0x30, 0xbf, 0x33, 0xbc, 0x36, 0xb9, 0x35, 0xba, 0x3c, 0xb3, + 0x3f, 0xb0, 0x3a, 0xb5, 0x39, 0xb6, 0x28, 0xa7, 0x2b, 0xa4, + 0x2e, 0xa1, 0x2d, 0xa2, 0x24, 0xab, 0x27, 0xa8, 0x22, 0xad, + 0x21, 0xae, 0x60, 0xef, 0x63, 0xec, 0x66, 0xe9, 0x65, 0xea, + 0x6c, 0xe3, 0x6f, 0xe0, 0x6a, 0xe5, 0x69, 0xe6, 0x78, 0xf7, + 0x7b, 0xf4, 0x7e, 0xf1, 0x7d, 0xf2, 0x74, 0xfb, 0x77, 0xf8, + 0x72, 0xfd, 0x71, 0xfe, 0x50, 0xdf, 0x53, 0xdc, 0x56, 0xd9, + 0x55, 0xda, 0x5c, 0xd3, 0x5f, 0xd0, 0x5a, 0xd5, 0x59, 0xd6, + 0x48, 0xc7, 0x4b, 0xc4, 0x4e, 0xc1, 0x4d, 0xc2, 0x44, 0xcb, + 0x47, 0xc8, 0x42, 0xcd, 0x41, 0xce, 0xc0, 0x4f, 0xc3, 0x4c, + 0xc6, 0x49, 0xc5, 0x4a, 0xcc, 0x43, 0xcf, 0x40, 0xca, 0x45, + 0xc9, 0x46, 0xd8, 0x57, 0xdb, 0x54, 0xde, 0x51, 0xdd, 0x52, + 0xd4, 0x5b, 0xd7, 0x58, 0xd2, 0x5d, 0xd1, 0x5e, 0xf0, 0x7f, + 0xf3, 0x7c, 0xf6, 0x79, 0xf5, 0x7a, 0xfc, 0x73, 0xff, 0x70, + 0xfa, 0x75, 0xf9, 0x76, 0xe8, 0x67, 0xeb, 0x64, 0xee, 0x61, + 0xed, 0x62, 0xe4, 0x6b, 0xe7, 0x68, 0xe2, 0x6d, 0xe1, 0x6e, + 0xa0, 0x2f, 0xa3, 0x2c, 0xa6, 0x29, 0xa5, 0x2a, 0xac, 0x23, + 0xaf, 0x20, 0xaa, 0x25, 0xa9, 0x26, 0xb8, 0x37, 0xbb, 0x34, + 0xbe, 0x31, 0xbd, 0x32, 0xb4, 0x3b, 0xb7, 0x38, 0xb2, 0x3d, + 0xb1, 0x3e, 0x90, 0x1f, 0x93, 0x1c, 0x96, 0x19, 0x95, 0x1a, + 0x9c, 0x13, 0x9f, 0x10, 0x9a, 0x15, 0x99, 0x16, 0x88, 0x07, + 0x8b, 0x04, 0x8e, 0x01, 0x8d, 0x02, 0x84, 0x0b, 0x87, 0x08, + 0x82, 0x0d, 0x81, 0x0e, 0x00, 0x90, 0x3d, 0xad, 0x7a, 0xea, + 0x47, 0xd7, 0xf4, 0x64, 0xc9, 0x59, 0x8e, 0x1e, 0xb3, 0x23, + 0xf5, 0x65, 0xc8, 0x58, 0x8f, 0x1f, 0xb2, 0x22, 0x01, 0x91, + 0x3c, 0xac, 0x7b, 0xeb, 0x46, 0xd6, 0xf7, 0x67, 0xca, 0x5a, + 0x8d, 0x1d, 0xb0, 0x20, 0x03, 0x93, 0x3e, 0xae, 0x79, 0xe9, + 0x44, 0xd4, 0x02, 0x92, 0x3f, 0xaf, 0x78, 0xe8, 0x45, 0xd5, + 0xf6, 0x66, 0xcb, 0x5b, 0x8c, 0x1c, 0xb1, 0x21, 0xf3, 0x63, + 0xce, 0x5e, 0x89, 0x19, 0xb4, 0x24, 0x07, 0x97, 0x3a, 0xaa, + 0x7d, 0xed, 0x40, 0xd0, 0x06, 0x96, 0x3b, 0xab, 0x7c, 0xec, + 0x41, 0xd1, 0xf2, 0x62, 0xcf, 0x5f, 0x88, 0x18, 0xb5, 0x25, + 0x04, 0x94, 0x39, 0xa9, 0x7e, 0xee, 0x43, 0xd3, 0xf0, 0x60, + 0xcd, 0x5d, 0x8a, 0x1a, 0xb7, 0x27, 0xf1, 0x61, 0xcc, 0x5c, + 0x8b, 0x1b, 0xb6, 0x26, 0x05, 0x95, 0x38, 0xa8, 0x7f, 0xef, + 0x42, 0xd2, 0xfb, 0x6b, 0xc6, 0x56, 0x81, 0x11, 0xbc, 0x2c, + 0x0f, 0x9f, 0x32, 0xa2, 0x75, 0xe5, 0x48, 0xd8, 0x0e, 0x9e, + 0x33, 0xa3, 0x74, 0xe4, 0x49, 0xd9, 0xfa, 0x6a, 0xc7, 0x57, + 0x80, 0x10, 0xbd, 0x2d, 0x0c, 0x9c, 0x31, 0xa1, 0x76, 0xe6, + 0x4b, 0xdb, 0xf8, 0x68, 0xc5, 0x55, 0x82, 0x12, 0xbf, 0x2f, + 0xf9, 0x69, 0xc4, 0x54, 0x83, 0x13, 0xbe, 0x2e, 0x0d, 0x9d, + 0x30, 0xa0, 0x77, 0xe7, 0x4a, 0xda, 0x08, 0x98, 0x35, 0xa5, + 0x72, 0xe2, 0x4f, 0xdf, 0xfc, 0x6c, 0xc1, 0x51, 0x86, 0x16, + 0xbb, 0x2b, 0xfd, 0x6d, 0xc0, 0x50, 0x87, 0x17, 0xba, 0x2a, + 0x09, 0x99, 0x34, 0xa4, 0x73, 0xe3, 0x4e, 0xde, 0xff, 0x6f, + 0xc2, 0x52, 0x85, 0x15, 0xb8, 0x28, 0x0b, 0x9b, 0x36, 0xa6, + 0x71, 0xe1, 0x4c, 0xdc, 0x0a, 0x9a, 0x37, 0xa7, 0x70, 0xe0, + 0x4d, 0xdd, 0xfe, 0x6e, 0xc3, 0x53, 0x84, 0x14, 0xb9, 0x29, + 0x00, 0x91, 0x3f, 0xae, 0x7e, 0xef, 0x41, 0xd0, 0xfc, 0x6d, + 0xc3, 0x52, 0x82, 0x13, 0xbd, 0x2c, 0xe5, 0x74, 0xda, 0x4b, + 0x9b, 0x0a, 0xa4, 0x35, 0x19, 0x88, 0x26, 0xb7, 0x67, 0xf6, + 0x58, 0xc9, 0xd7, 0x46, 0xe8, 0x79, 0xa9, 0x38, 0x96, 0x07, + 0x2b, 0xba, 0x14, 0x85, 0x55, 0xc4, 0x6a, 0xfb, 0x32, 0xa3, + 0x0d, 0x9c, 0x4c, 0xdd, 0x73, 0xe2, 0xce, 0x5f, 0xf1, 0x60, + 0xb0, 0x21, 0x8f, 0x1e, 0xb3, 0x22, 0x8c, 0x1d, 0xcd, 0x5c, + 0xf2, 0x63, 0x4f, 0xde, 0x70, 0xe1, 0x31, 0xa0, 0x0e, 0x9f, + 0x56, 0xc7, 0x69, 0xf8, 0x28, 0xb9, 0x17, 0x86, 0xaa, 0x3b, + 0x95, 0x04, 0xd4, 0x45, 0xeb, 0x7a, 0x64, 0xf5, 0x5b, 0xca, + 0x1a, 0x8b, 0x25, 0xb4, 0x98, 0x09, 0xa7, 0x36, 0xe6, 0x77, + 0xd9, 0x48, 0x81, 0x10, 0xbe, 0x2f, 0xff, 0x6e, 0xc0, 0x51, + 0x7d, 0xec, 0x42, 0xd3, 0x03, 0x92, 0x3c, 0xad, 0x7b, 0xea, + 0x44, 0xd5, 0x05, 0x94, 0x3a, 0xab, 0x87, 0x16, 0xb8, 0x29, + 0xf9, 0x68, 0xc6, 0x57, 0x9e, 0x0f, 0xa1, 0x30, 0xe0, 0x71, + 0xdf, 0x4e, 0x62, 0xf3, 0x5d, 0xcc, 0x1c, 0x8d, 0x23, 0xb2, + 0xac, 0x3d, 0x93, 0x02, 0xd2, 0x43, 0xed, 0x7c, 0x50, 0xc1, + 0x6f, 0xfe, 0x2e, 0xbf, 0x11, 0x80, 0x49, 0xd8, 0x76, 0xe7, + 0x37, 0xa6, 0x08, 0x99, 0xb5, 0x24, 0x8a, 0x1b, 0xcb, 0x5a, + 0xf4, 0x65, 0xc8, 0x59, 0xf7, 0x66, 0xb6, 0x27, 0x89, 0x18, + 0x34, 0xa5, 0x0b, 0x9a, 0x4a, 0xdb, 0x75, 0xe4, 0x2d, 0xbc, + 0x12, 0x83, 0x53, 0xc2, 0x6c, 0xfd, 0xd1, 0x40, 0xee, 0x7f, + 0xaf, 0x3e, 0x90, 0x01, 0x1f, 0x8e, 0x20, 0xb1, 0x61, 0xf0, + 0x5e, 0xcf, 0xe3, 0x72, 0xdc, 0x4d, 0x9d, 0x0c, 0xa2, 0x33, + 0xfa, 0x6b, 0xc5, 0x54, 0x84, 0x15, 0xbb, 0x2a, 0x06, 0x97, + 0x39, 0xa8, 0x78, 0xe9, 0x47, 0xd6, 0x00, 0x92, 0x39, 0xab, + 0x72, 0xe0, 0x4b, 0xd9, 0xe4, 0x76, 0xdd, 0x4f, 0x96, 0x04, + 0xaf, 0x3d, 0xd5, 0x47, 0xec, 0x7e, 0xa7, 0x35, 0x9e, 0x0c, + 0x31, 0xa3, 0x08, 0x9a, 0x43, 0xd1, 0x7a, 0xe8, 0xb7, 0x25, + 0x8e, 0x1c, 0xc5, 0x57, 0xfc, 0x6e, 0x53, 0xc1, 0x6a, 0xf8, + 0x21, 0xb3, 0x18, 0x8a, 0x62, 0xf0, 0x5b, 0xc9, 0x10, 0x82, + 0x29, 0xbb, 0x86, 0x14, 0xbf, 0x2d, 0xf4, 0x66, 0xcd, 0x5f, + 0x73, 0xe1, 0x4a, 0xd8, 0x01, 0x93, 0x38, 0xaa, 0x97, 0x05, + 0xae, 0x3c, 0xe5, 0x77, 0xdc, 0x4e, 0xa6, 0x34, 0x9f, 0x0d, + 0xd4, 0x46, 0xed, 0x7f, 0x42, 0xd0, 0x7b, 0xe9, 0x30, 0xa2, + 0x09, 0x9b, 0xc4, 0x56, 0xfd, 0x6f, 0xb6, 0x24, 0x8f, 0x1d, + 0x20, 0xb2, 0x19, 0x8b, 0x52, 0xc0, 0x6b, 0xf9, 0x11, 0x83, + 0x28, 0xba, 0x63, 0xf1, 0x5a, 0xc8, 0xf5, 0x67, 0xcc, 0x5e, + 0x87, 0x15, 0xbe, 0x2c, 0xe6, 0x74, 0xdf, 0x4d, 0x94, 0x06, + 0xad, 0x3f, 0x02, 0x90, 0x3b, 0xa9, 0x70, 0xe2, 0x49, 0xdb, + 0x33, 0xa1, 0x0a, 0x98, 0x41, 0xd3, 0x78, 0xea, 0xd7, 0x45, + 0xee, 0x7c, 0xa5, 0x37, 0x9c, 0x0e, 0x51, 0xc3, 0x68, 0xfa, + 0x23, 0xb1, 0x1a, 0x88, 0xb5, 0x27, 0x8c, 0x1e, 0xc7, 0x55, + 0xfe, 0x6c, 0x84, 0x16, 0xbd, 0x2f, 0xf6, 0x64, 0xcf, 0x5d, + 0x60, 0xf2, 0x59, 0xcb, 0x12, 0x80, 0x2b, 0xb9, 0x95, 0x07, + 0xac, 0x3e, 0xe7, 0x75, 0xde, 0x4c, 0x71, 0xe3, 0x48, 0xda, + 0x03, 0x91, 0x3a, 0xa8, 0x40, 0xd2, 0x79, 0xeb, 0x32, 0xa0, + 0x0b, 0x99, 0xa4, 0x36, 0x9d, 0x0f, 0xd6, 0x44, 0xef, 0x7d, + 0x22, 0xb0, 0x1b, 0x89, 0x50, 0xc2, 0x69, 0xfb, 0xc6, 0x54, + 0xff, 0x6d, 0xb4, 0x26, 0x8d, 0x1f, 0xf7, 0x65, 0xce, 0x5c, + 0x85, 0x17, 0xbc, 0x2e, 0x13, 0x81, 0x2a, 0xb8, 0x61, 0xf3, + 0x58, 0xca, 0x00, 0x93, 0x3b, 0xa8, 0x76, 0xe5, 0x4d, 0xde, + 0xec, 0x7f, 0xd7, 0x44, 0x9a, 0x09, 0xa1, 0x32, 0xc5, 0x56, + 0xfe, 0x6d, 0xb3, 0x20, 0x88, 0x1b, 0x29, 0xba, 0x12, 0x81, + 0x5f, 0xcc, 0x64, 0xf7, 0x97, 0x04, 0xac, 0x3f, 0xe1, 0x72, + 0xda, 0x49, 0x7b, 0xe8, 0x40, 0xd3, 0x0d, 0x9e, 0x36, 0xa5, + 0x52, 0xc1, 0x69, 0xfa, 0x24, 0xb7, 0x1f, 0x8c, 0xbe, 0x2d, + 0x85, 0x16, 0xc8, 0x5b, 0xf3, 0x60, 0x33, 0xa0, 0x08, 0x9b, + 0x45, 0xd6, 0x7e, 0xed, 0xdf, 0x4c, 0xe4, 0x77, 0xa9, 0x3a, + 0x92, 0x01, 0xf6, 0x65, 0xcd, 0x5e, 0x80, 0x13, 0xbb, 0x28, + 0x1a, 0x89, 0x21, 0xb2, 0x6c, 0xff, 0x57, 0xc4, 0xa4, 0x37, + 0x9f, 0x0c, 0xd2, 0x41, 0xe9, 0x7a, 0x48, 0xdb, 0x73, 0xe0, + 0x3e, 0xad, 0x05, 0x96, 0x61, 0xf2, 0x5a, 0xc9, 0x17, 0x84, + 0x2c, 0xbf, 0x8d, 0x1e, 0xb6, 0x25, 0xfb, 0x68, 0xc0, 0x53, + 0x66, 0xf5, 0x5d, 0xce, 0x10, 0x83, 0x2b, 0xb8, 0x8a, 0x19, + 0xb1, 0x22, 0xfc, 0x6f, 0xc7, 0x54, 0xa3, 0x30, 0x98, 0x0b, + 0xd5, 0x46, 0xee, 0x7d, 0x4f, 0xdc, 0x74, 0xe7, 0x39, 0xaa, + 0x02, 0x91, 0xf1, 0x62, 0xca, 0x59, 0x87, 0x14, 0xbc, 0x2f, + 0x1d, 0x8e, 0x26, 0xb5, 0x6b, 0xf8, 0x50, 0xc3, 0x34, 0xa7, + 0x0f, 0x9c, 0x42, 0xd1, 0x79, 0xea, 0xd8, 0x4b, 0xe3, 0x70, + 0xae, 0x3d, 0x95, 0x06, 0x55, 0xc6, 0x6e, 0xfd, 0x23, 0xb0, + 0x18, 0x8b, 0xb9, 0x2a, 0x82, 0x11, 0xcf, 0x5c, 0xf4, 0x67, + 0x90, 0x03, 0xab, 0x38, 0xe6, 0x75, 0xdd, 0x4e, 0x7c, 0xef, + 0x47, 0xd4, 0x0a, 0x99, 0x31, 0xa2, 0xc2, 0x51, 0xf9, 0x6a, + 0xb4, 0x27, 0x8f, 0x1c, 0x2e, 0xbd, 0x15, 0x86, 0x58, 0xcb, + 0x63, 0xf0, 0x07, 0x94, 0x3c, 0xaf, 0x71, 0xe2, 0x4a, 0xd9, + 0xeb, 0x78, 0xd0, 0x43, 0x9d, 0x0e, 0xa6, 0x35, 0x00, 0x94, + 0x35, 0xa1, 0x6a, 0xfe, 0x5f, 0xcb, 0xd4, 0x40, 0xe1, 0x75, + 0xbe, 0x2a, 0x8b, 0x1f, 0xb5, 0x21, 0x80, 0x14, 0xdf, 0x4b, + 0xea, 0x7e, 0x61, 0xf5, 0x54, 0xc0, 0x0b, 0x9f, 0x3e, 0xaa, + 0x77, 0xe3, 0x42, 0xd6, 0x1d, 0x89, 0x28, 0xbc, 0xa3, 0x37, + 0x96, 0x02, 0xc9, 0x5d, 0xfc, 0x68, 0xc2, 0x56, 0xf7, 0x63, + 0xa8, 0x3c, 0x9d, 0x09, 0x16, 0x82, 0x23, 0xb7, 0x7c, 0xe8, + 0x49, 0xdd, 0xee, 0x7a, 0xdb, 0x4f, 0x84, 0x10, 0xb1, 0x25, + 0x3a, 0xae, 0x0f, 0x9b, 0x50, 0xc4, 0x65, 0xf1, 0x5b, 0xcf, + 0x6e, 0xfa, 0x31, 0xa5, 0x04, 0x90, 0x8f, 0x1b, 0xba, 0x2e, + 0xe5, 0x71, 0xd0, 0x44, 0x99, 0x0d, 0xac, 0x38, 0xf3, 0x67, + 0xc6, 0x52, 0x4d, 0xd9, 0x78, 0xec, 0x27, 0xb3, 0x12, 0x86, + 0x2c, 0xb8, 0x19, 0x8d, 0x46, 0xd2, 0x73, 0xe7, 0xf8, 0x6c, + 0xcd, 0x59, 0x92, 0x06, 0xa7, 0x33, 0xc1, 0x55, 0xf4, 0x60, + 0xab, 0x3f, 0x9e, 0x0a, 0x15, 0x81, 0x20, 0xb4, 0x7f, 0xeb, + 0x4a, 0xde, 0x74, 0xe0, 0x41, 0xd5, 0x1e, 0x8a, 0x2b, 0xbf, + 0xa0, 0x34, 0x95, 0x01, 0xca, 0x5e, 0xff, 0x6b, 0xb6, 0x22, + 0x83, 0x17, 0xdc, 0x48, 0xe9, 0x7d, 0x62, 0xf6, 0x57, 0xc3, + 0x08, 0x9c, 0x3d, 0xa9, 0x03, 0x97, 0x36, 0xa2, 0x69, 0xfd, + 0x5c, 0xc8, 0xd7, 0x43, 0xe2, 0x76, 0xbd, 0x29, 0x88, 0x1c, + 0x2f, 0xbb, 0x1a, 0x8e, 0x45, 0xd1, 0x70, 0xe4, 0xfb, 0x6f, + 0xce, 0x5a, 0x91, 0x05, 0xa4, 0x30, 0x9a, 0x0e, 0xaf, 0x3b, + 0xf0, 0x64, 0xc5, 0x51, 0x4e, 0xda, 0x7b, 0xef, 0x24, 0xb0, + 0x11, 0x85, 0x58, 0xcc, 0x6d, 0xf9, 0x32, 0xa6, 0x07, 0x93, + 0x8c, 0x18, 0xb9, 0x2d, 0xe6, 0x72, 0xd3, 0x47, 0xed, 0x79, + 0xd8, 0x4c, 0x87, 0x13, 0xb2, 0x26, 0x39, 0xad, 0x0c, 0x98, + 0x53, 0xc7, 0x66, 0xf2, 0x00, 0x95, 0x37, 0xa2, 0x6e, 0xfb, + 0x59, 0xcc, 0xdc, 0x49, 0xeb, 0x7e, 0xb2, 0x27, 0x85, 0x10, + 0xa5, 0x30, 0x92, 0x07, 0xcb, 0x5e, 0xfc, 0x69, 0x79, 0xec, + 0x4e, 0xdb, 0x17, 0x82, 0x20, 0xb5, 0x57, 0xc2, 0x60, 0xf5, + 0x39, 0xac, 0x0e, 0x9b, 0x8b, 0x1e, 0xbc, 0x29, 0xe5, 0x70, + 0xd2, 0x47, 0xf2, 0x67, 0xc5, 0x50, 0x9c, 0x09, 0xab, 0x3e, + 0x2e, 0xbb, 0x19, 0x8c, 0x40, 0xd5, 0x77, 0xe2, 0xae, 0x3b, + 0x99, 0x0c, 0xc0, 0x55, 0xf7, 0x62, 0x72, 0xe7, 0x45, 0xd0, + 0x1c, 0x89, 0x2b, 0xbe, 0x0b, 0x9e, 0x3c, 0xa9, 0x65, 0xf0, + 0x52, 0xc7, 0xd7, 0x42, 0xe0, 0x75, 0xb9, 0x2c, 0x8e, 0x1b, + 0xf9, 0x6c, 0xce, 0x5b, 0x97, 0x02, 0xa0, 0x35, 0x25, 0xb0, + 0x12, 0x87, 0x4b, 0xde, 0x7c, 0xe9, 0x5c, 0xc9, 0x6b, 0xfe, + 0x32, 0xa7, 0x05, 0x90, 0x80, 0x15, 0xb7, 0x22, 0xee, 0x7b, + 0xd9, 0x4c, 0x41, 0xd4, 0x76, 0xe3, 0x2f, 0xba, 0x18, 0x8d, + 0x9d, 0x08, 0xaa, 0x3f, 0xf3, 0x66, 0xc4, 0x51, 0xe4, 0x71, + 0xd3, 0x46, 0x8a, 0x1f, 0xbd, 0x28, 0x38, 0xad, 0x0f, 0x9a, + 0x56, 0xc3, 0x61, 0xf4, 0x16, 0x83, 0x21, 0xb4, 0x78, 0xed, + 0x4f, 0xda, 0xca, 0x5f, 0xfd, 0x68, 0xa4, 0x31, 0x93, 0x06, + 0xb3, 0x26, 0x84, 0x11, 0xdd, 0x48, 0xea, 0x7f, 0x6f, 0xfa, + 0x58, 0xcd, 0x01, 0x94, 0x36, 0xa3, 0xef, 0x7a, 0xd8, 0x4d, + 0x81, 0x14, 0xb6, 0x23, 0x33, 0xa6, 0x04, 0x91, 0x5d, 0xc8, + 0x6a, 0xff, 0x4a, 0xdf, 0x7d, 0xe8, 0x24, 0xb1, 0x13, 0x86, + 0x96, 0x03, 0xa1, 0x34, 0xf8, 0x6d, 0xcf, 0x5a, 0xb8, 0x2d, + 0x8f, 0x1a, 0xd6, 0x43, 0xe1, 0x74, 0x64, 0xf1, 0x53, 0xc6, + 0x0a, 0x9f, 0x3d, 0xa8, 0x1d, 0x88, 0x2a, 0xbf, 0x73, 0xe6, + 0x44, 0xd1, 0xc1, 0x54, 0xf6, 0x63, 0xaf, 0x3a, 0x98, 0x0d, + 0x00, 0x96, 0x31, 0xa7, 0x62, 0xf4, 0x53, 0xc5, 0xc4, 0x52, + 0xf5, 0x63, 0xa6, 0x30, 0x97, 0x01, 0x95, 0x03, 0xa4, 0x32, + 0xf7, 0x61, 0xc6, 0x50, 0x51, 0xc7, 0x60, 0xf6, 0x33, 0xa5, + 0x02, 0x94, 0x37, 0xa1, 0x06, 0x90, 0x55, 0xc3, 0x64, 0xf2, + 0xf3, 0x65, 0xc2, 0x54, 0x91, 0x07, 0xa0, 0x36, 0xa2, 0x34, + 0x93, 0x05, 0xc0, 0x56, 0xf1, 0x67, 0x66, 0xf0, 0x57, 0xc1, + 0x04, 0x92, 0x35, 0xa3, 0x6e, 0xf8, 0x5f, 0xc9, 0x0c, 0x9a, + 0x3d, 0xab, 0xaa, 0x3c, 0x9b, 0x0d, 0xc8, 0x5e, 0xf9, 0x6f, + 0xfb, 0x6d, 0xca, 0x5c, 0x99, 0x0f, 0xa8, 0x3e, 0x3f, 0xa9, + 0x0e, 0x98, 0x5d, 0xcb, 0x6c, 0xfa, 0x59, 0xcf, 0x68, 0xfe, + 0x3b, 0xad, 0x0a, 0x9c, 0x9d, 0x0b, 0xac, 0x3a, 0xff, 0x69, + 0xce, 0x58, 0xcc, 0x5a, 0xfd, 0x6b, 0xae, 0x38, 0x9f, 0x09, + 0x08, 0x9e, 0x39, 0xaf, 0x6a, 0xfc, 0x5b, 0xcd, 0xdc, 0x4a, + 0xed, 0x7b, 0xbe, 0x28, 0x8f, 0x19, 0x18, 0x8e, 0x29, 0xbf, + 0x7a, 0xec, 0x4b, 0xdd, 0x49, 0xdf, 0x78, 0xee, 0x2b, 0xbd, + 0x1a, 0x8c, 0x8d, 0x1b, 0xbc, 0x2a, 0xef, 0x79, 0xde, 0x48, + 0xeb, 0x7d, 0xda, 0x4c, 0x89, 0x1f, 0xb8, 0x2e, 0x2f, 0xb9, + 0x1e, 0x88, 0x4d, 0xdb, 0x7c, 0xea, 0x7e, 0xe8, 0x4f, 0xd9, + 0x1c, 0x8a, 0x2d, 0xbb, 0xba, 0x2c, 0x8b, 0x1d, 0xd8, 0x4e, + 0xe9, 0x7f, 0xb2, 0x24, 0x83, 0x15, 0xd0, 0x46, 0xe1, 0x77, + 0x76, 0xe0, 0x47, 0xd1, 0x14, 0x82, 0x25, 0xb3, 0x27, 0xb1, + 0x16, 0x80, 0x45, 0xd3, 0x74, 0xe2, 0xe3, 0x75, 0xd2, 0x44, + 0x81, 0x17, 0xb0, 0x26, 0x85, 0x13, 0xb4, 0x22, 0xe7, 0x71, + 0xd6, 0x40, 0x41, 0xd7, 0x70, 0xe6, 0x23, 0xb5, 0x12, 0x84, + 0x10, 0x86, 0x21, 0xb7, 0x72, 0xe4, 0x43, 0xd5, 0xd4, 0x42, + 0xe5, 0x73, 0xb6, 0x20, 0x87, 0x11, 0x00, 0x97, 0x33, 0xa4, + 0x66, 0xf1, 0x55, 0xc2, 0xcc, 0x5b, 0xff, 0x68, 0xaa, 0x3d, + 0x99, 0x0e, 0x85, 0x12, 0xb6, 0x21, 0xe3, 0x74, 0xd0, 0x47, + 0x49, 0xde, 0x7a, 0xed, 0x2f, 0xb8, 0x1c, 0x8b, 0x17, 0x80, + 0x24, 0xb3, 0x71, 0xe6, 0x42, 0xd5, 0xdb, 0x4c, 0xe8, 0x7f, + 0xbd, 0x2a, 0x8e, 0x19, 0x92, 0x05, 0xa1, 0x36, 0xf4, 0x63, + 0xc7, 0x50, 0x5e, 0xc9, 0x6d, 0xfa, 0x38, 0xaf, 0x0b, 0x9c, + 0x2e, 0xb9, 0x1d, 0x8a, 0x48, 0xdf, 0x7b, 0xec, 0xe2, 0x75, + 0xd1, 0x46, 0x84, 0x13, 0xb7, 0x20, 0xab, 0x3c, 0x98, 0x0f, + 0xcd, 0x5a, 0xfe, 0x69, 0x67, 0xf0, 0x54, 0xc3, 0x01, 0x96, + 0x32, 0xa5, 0x39, 0xae, 0x0a, 0x9d, 0x5f, 0xc8, 0x6c, 0xfb, + 0xf5, 0x62, 0xc6, 0x51, 0x93, 0x04, 0xa0, 0x37, 0xbc, 0x2b, + 0x8f, 0x18, 0xda, 0x4d, 0xe9, 0x7e, 0x70, 0xe7, 0x43, 0xd4, + 0x16, 0x81, 0x25, 0xb2, 0x5c, 0xcb, 0x6f, 0xf8, 0x3a, 0xad, + 0x09, 0x9e, 0x90, 0x07, 0xa3, 0x34, 0xf6, 0x61, 0xc5, 0x52, + 0xd9, 0x4e, 0xea, 0x7d, 0xbf, 0x28, 0x8c, 0x1b, 0x15, 0x82, + 0x26, 0xb1, 0x73, 0xe4, 0x40, 0xd7, 0x4b, 0xdc, 0x78, 0xef, + 0x2d, 0xba, 0x1e, 0x89, 0x87, 0x10, 0xb4, 0x23, 0xe1, 0x76, + 0xd2, 0x45, 0xce, 0x59, 0xfd, 0x6a, 0xa8, 0x3f, 0x9b, 0x0c, + 0x02, 0x95, 0x31, 0xa6, 0x64, 0xf3, 0x57, 0xc0, 0x72, 0xe5, + 0x41, 0xd6, 0x14, 0x83, 0x27, 0xb0, 0xbe, 0x29, 0x8d, 0x1a, + 0xd8, 0x4f, 0xeb, 0x7c, 0xf7, 0x60, 0xc4, 0x53, 0x91, 0x06, + 0xa2, 0x35, 0x3b, 0xac, 0x08, 0x9f, 0x5d, 0xca, 0x6e, 0xf9, + 0x65, 0xf2, 0x56, 0xc1, 0x03, 0x94, 0x30, 0xa7, 0xa9, 0x3e, + 0x9a, 0x0d, 0xcf, 0x58, 0xfc, 0x6b, 0xe0, 0x77, 0xd3, 0x44, + 0x86, 0x11, 0xb5, 0x22, 0x2c, 0xbb, 0x1f, 0x88, 0x4a, 0xdd, + 0x79, 0xee, 0x00, 0x98, 0x2d, 0xb5, 0x5a, 0xc2, 0x77, 0xef, + 0xb4, 0x2c, 0x99, 0x01, 0xee, 0x76, 0xc3, 0x5b, 0x75, 0xed, + 0x58, 0xc0, 0x2f, 0xb7, 0x02, 0x9a, 0xc1, 0x59, 0xec, 0x74, + 0x9b, 0x03, 0xb6, 0x2e, 0xea, 0x72, 0xc7, 0x5f, 0xb0, 0x28, + 0x9d, 0x05, 0x5e, 0xc6, 0x73, 0xeb, 0x04, 0x9c, 0x29, 0xb1, + 0x9f, 0x07, 0xb2, 0x2a, 0xc5, 0x5d, 0xe8, 0x70, 0x2b, 0xb3, + 0x06, 0x9e, 0x71, 0xe9, 0x5c, 0xc4, 0xc9, 0x51, 0xe4, 0x7c, + 0x93, 0x0b, 0xbe, 0x26, 0x7d, 0xe5, 0x50, 0xc8, 0x27, 0xbf, + 0x0a, 0x92, 0xbc, 0x24, 0x91, 0x09, 0xe6, 0x7e, 0xcb, 0x53, + 0x08, 0x90, 0x25, 0xbd, 0x52, 0xca, 0x7f, 0xe7, 0x23, 0xbb, + 0x0e, 0x96, 0x79, 0xe1, 0x54, 0xcc, 0x97, 0x0f, 0xba, 0x22, + 0xcd, 0x55, 0xe0, 0x78, 0x56, 0xce, 0x7b, 0xe3, 0x0c, 0x94, + 0x21, 0xb9, 0xe2, 0x7a, 0xcf, 0x57, 0xb8, 0x20, 0x95, 0x0d, + 0x8f, 0x17, 0xa2, 0x3a, 0xd5, 0x4d, 0xf8, 0x60, 0x3b, 0xa3, + 0x16, 0x8e, 0x61, 0xf9, 0x4c, 0xd4, 0xfa, 0x62, 0xd7, 0x4f, + 0xa0, 0x38, 0x8d, 0x15, 0x4e, 0xd6, 0x63, 0xfb, 0x14, 0x8c, + 0x39, 0xa1, 0x65, 0xfd, 0x48, 0xd0, 0x3f, 0xa7, 0x12, 0x8a, + 0xd1, 0x49, 0xfc, 0x64, 0x8b, 0x13, 0xa6, 0x3e, 0x10, 0x88, + 0x3d, 0xa5, 0x4a, 0xd2, 0x67, 0xff, 0xa4, 0x3c, 0x89, 0x11, + 0xfe, 0x66, 0xd3, 0x4b, 0x46, 0xde, 0x6b, 0xf3, 0x1c, 0x84, + 0x31, 0xa9, 0xf2, 0x6a, 0xdf, 0x47, 0xa8, 0x30, 0x85, 0x1d, + 0x33, 0xab, 0x1e, 0x86, 0x69, 0xf1, 0x44, 0xdc, 0x87, 0x1f, + 0xaa, 0x32, 0xdd, 0x45, 0xf0, 0x68, 0xac, 0x34, 0x81, 0x19, + 0xf6, 0x6e, 0xdb, 0x43, 0x18, 0x80, 0x35, 0xad, 0x42, 0xda, + 0x6f, 0xf7, 0xd9, 0x41, 0xf4, 0x6c, 0x83, 0x1b, 0xae, 0x36, + 0x6d, 0xf5, 0x40, 0xd8, 0x37, 0xaf, 0x1a, 0x82, 0x00, 0x99, + 0x2f, 0xb6, 0x5e, 0xc7, 0x71, 0xe8, 0xbc, 0x25, 0x93, 0x0a, + 0xe2, 0x7b, 0xcd, 0x54, 0x65, 0xfc, 0x4a, 0xd3, 0x3b, 0xa2, + 0x14, 0x8d, 0xd9, 0x40, 0xf6, 0x6f, 0x87, 0x1e, 0xa8, 0x31, + 0xca, 0x53, 0xe5, 0x7c, 0x94, 0x0d, 0xbb, 0x22, 0x76, 0xef, + 0x59, 0xc0, 0x28, 0xb1, 0x07, 0x9e, 0xaf, 0x36, 0x80, 0x19, + 0xf1, 0x68, 0xde, 0x47, 0x13, 0x8a, 0x3c, 0xa5, 0x4d, 0xd4, + 0x62, 0xfb, 0x89, 0x10, 0xa6, 0x3f, 0xd7, 0x4e, 0xf8, 0x61, + 0x35, 0xac, 0x1a, 0x83, 0x6b, 0xf2, 0x44, 0xdd, 0xec, 0x75, + 0xc3, 0x5a, 0xb2, 0x2b, 0x9d, 0x04, 0x50, 0xc9, 0x7f, 0xe6, + 0x0e, 0x97, 0x21, 0xb8, 0x43, 0xda, 0x6c, 0xf5, 0x1d, 0x84, + 0x32, 0xab, 0xff, 0x66, 0xd0, 0x49, 0xa1, 0x38, 0x8e, 0x17, + 0x26, 0xbf, 0x09, 0x90, 0x78, 0xe1, 0x57, 0xce, 0x9a, 0x03, + 0xb5, 0x2c, 0xc4, 0x5d, 0xeb, 0x72, 0x0f, 0x96, 0x20, 0xb9, + 0x51, 0xc8, 0x7e, 0xe7, 0xb3, 0x2a, 0x9c, 0x05, 0xed, 0x74, + 0xc2, 0x5b, 0x6a, 0xf3, 0x45, 0xdc, 0x34, 0xad, 0x1b, 0x82, + 0xd6, 0x4f, 0xf9, 0x60, 0x88, 0x11, 0xa7, 0x3e, 0xc5, 0x5c, + 0xea, 0x73, 0x9b, 0x02, 0xb4, 0x2d, 0x79, 0xe0, 0x56, 0xcf, + 0x27, 0xbe, 0x08, 0x91, 0xa0, 0x39, 0x8f, 0x16, 0xfe, 0x67, + 0xd1, 0x48, 0x1c, 0x85, 0x33, 0xaa, 0x42, 0xdb, 0x6d, 0xf4, + 0x86, 0x1f, 0xa9, 0x30, 0xd8, 0x41, 0xf7, 0x6e, 0x3a, 0xa3, + 0x15, 0x8c, 0x64, 0xfd, 0x4b, 0xd2, 0xe3, 0x7a, 0xcc, 0x55, + 0xbd, 0x24, 0x92, 0x0b, 0x5f, 0xc6, 0x70, 0xe9, 0x01, 0x98, + 0x2e, 0xb7, 0x4c, 0xd5, 0x63, 0xfa, 0x12, 0x8b, 0x3d, 0xa4, + 0xf0, 0x69, 0xdf, 0x46, 0xae, 0x37, 0x81, 0x18, 0x29, 0xb0, + 0x06, 0x9f, 0x77, 0xee, 0x58, 0xc1, 0x95, 0x0c, 0xba, 0x23, + 0xcb, 0x52, 0xe4, 0x7d, 0x00, 0x9a, 0x29, 0xb3, 0x52, 0xc8, + 0x7b, 0xe1, 0xa4, 0x3e, 0x8d, 0x17, 0xf6, 0x6c, 0xdf, 0x45, + 0x55, 0xcf, 0x7c, 0xe6, 0x07, 0x9d, 0x2e, 0xb4, 0xf1, 0x6b, + 0xd8, 0x42, 0xa3, 0x39, 0x8a, 0x10, 0xaa, 0x30, 0x83, 0x19, + 0xf8, 0x62, 0xd1, 0x4b, 0x0e, 0x94, 0x27, 0xbd, 0x5c, 0xc6, + 0x75, 0xef, 0xff, 0x65, 0xd6, 0x4c, 0xad, 0x37, 0x84, 0x1e, + 0x5b, 0xc1, 0x72, 0xe8, 0x09, 0x93, 0x20, 0xba, 0x49, 0xd3, + 0x60, 0xfa, 0x1b, 0x81, 0x32, 0xa8, 0xed, 0x77, 0xc4, 0x5e, + 0xbf, 0x25, 0x96, 0x0c, 0x1c, 0x86, 0x35, 0xaf, 0x4e, 0xd4, + 0x67, 0xfd, 0xb8, 0x22, 0x91, 0x0b, 0xea, 0x70, 0xc3, 0x59, + 0xe3, 0x79, 0xca, 0x50, 0xb1, 0x2b, 0x98, 0x02, 0x47, 0xdd, + 0x6e, 0xf4, 0x15, 0x8f, 0x3c, 0xa6, 0xb6, 0x2c, 0x9f, 0x05, + 0xe4, 0x7e, 0xcd, 0x57, 0x12, 0x88, 0x3b, 0xa1, 0x40, 0xda, + 0x69, 0xf3, 0x92, 0x08, 0xbb, 0x21, 0xc0, 0x5a, 0xe9, 0x73, + 0x36, 0xac, 0x1f, 0x85, 0x64, 0xfe, 0x4d, 0xd7, 0xc7, 0x5d, + 0xee, 0x74, 0x95, 0x0f, 0xbc, 0x26, 0x63, 0xf9, 0x4a, 0xd0, + 0x31, 0xab, 0x18, 0x82, 0x38, 0xa2, 0x11, 0x8b, 0x6a, 0xf0, + 0x43, 0xd9, 0x9c, 0x06, 0xb5, 0x2f, 0xce, 0x54, 0xe7, 0x7d, + 0x6d, 0xf7, 0x44, 0xde, 0x3f, 0xa5, 0x16, 0x8c, 0xc9, 0x53, + 0xe0, 0x7a, 0x9b, 0x01, 0xb2, 0x28, 0xdb, 0x41, 0xf2, 0x68, + 0x89, 0x13, 0xa0, 0x3a, 0x7f, 0xe5, 0x56, 0xcc, 0x2d, 0xb7, + 0x04, 0x9e, 0x8e, 0x14, 0xa7, 0x3d, 0xdc, 0x46, 0xf5, 0x6f, + 0x2a, 0xb0, 0x03, 0x99, 0x78, 0xe2, 0x51, 0xcb, 0x71, 0xeb, + 0x58, 0xc2, 0x23, 0xb9, 0x0a, 0x90, 0xd5, 0x4f, 0xfc, 0x66, + 0x87, 0x1d, 0xae, 0x34, 0x24, 0xbe, 0x0d, 0x97, 0x76, 0xec, + 0x5f, 0xc5, 0x80, 0x1a, 0xa9, 0x33, 0xd2, 0x48, 0xfb, 0x61, + 0x00, 0x9b, 0x2b, 0xb0, 0x56, 0xcd, 0x7d, 0xe6, 0xac, 0x37, + 0x87, 0x1c, 0xfa, 0x61, 0xd1, 0x4a, 0x45, 0xde, 0x6e, 0xf5, + 0x13, 0x88, 0x38, 0xa3, 0xe9, 0x72, 0xc2, 0x59, 0xbf, 0x24, + 0x94, 0x0f, 0x8a, 0x11, 0xa1, 0x3a, 0xdc, 0x47, 0xf7, 0x6c, + 0x26, 0xbd, 0x0d, 0x96, 0x70, 0xeb, 0x5b, 0xc0, 0xcf, 0x54, + 0xe4, 0x7f, 0x99, 0x02, 0xb2, 0x29, 0x63, 0xf8, 0x48, 0xd3, + 0x35, 0xae, 0x1e, 0x85, 0x09, 0x92, 0x22, 0xb9, 0x5f, 0xc4, + 0x74, 0xef, 0xa5, 0x3e, 0x8e, 0x15, 0xf3, 0x68, 0xd8, 0x43, + 0x4c, 0xd7, 0x67, 0xfc, 0x1a, 0x81, 0x31, 0xaa, 0xe0, 0x7b, + 0xcb, 0x50, 0xb6, 0x2d, 0x9d, 0x06, 0x83, 0x18, 0xa8, 0x33, + 0xd5, 0x4e, 0xfe, 0x65, 0x2f, 0xb4, 0x04, 0x9f, 0x79, 0xe2, + 0x52, 0xc9, 0xc6, 0x5d, 0xed, 0x76, 0x90, 0x0b, 0xbb, 0x20, + 0x6a, 0xf1, 0x41, 0xda, 0x3c, 0xa7, 0x17, 0x8c, 0x12, 0x89, + 0x39, 0xa2, 0x44, 0xdf, 0x6f, 0xf4, 0xbe, 0x25, 0x95, 0x0e, + 0xe8, 0x73, 0xc3, 0x58, 0x57, 0xcc, 0x7c, 0xe7, 0x01, 0x9a, + 0x2a, 0xb1, 0xfb, 0x60, 0xd0, 0x4b, 0xad, 0x36, 0x86, 0x1d, + 0x98, 0x03, 0xb3, 0x28, 0xce, 0x55, 0xe5, 0x7e, 0x34, 0xaf, + 0x1f, 0x84, 0x62, 0xf9, 0x49, 0xd2, 0xdd, 0x46, 0xf6, 0x6d, + 0x8b, 0x10, 0xa0, 0x3b, 0x71, 0xea, 0x5a, 0xc1, 0x27, 0xbc, + 0x0c, 0x97, 0x1b, 0x80, 0x30, 0xab, 0x4d, 0xd6, 0x66, 0xfd, + 0xb7, 0x2c, 0x9c, 0x07, 0xe1, 0x7a, 0xca, 0x51, 0x5e, 0xc5, + 0x75, 0xee, 0x08, 0x93, 0x23, 0xb8, 0xf2, 0x69, 0xd9, 0x42, + 0xa4, 0x3f, 0x8f, 0x14, 0x91, 0x0a, 0xba, 0x21, 0xc7, 0x5c, + 0xec, 0x77, 0x3d, 0xa6, 0x16, 0x8d, 0x6b, 0xf0, 0x40, 0xdb, + 0xd4, 0x4f, 0xff, 0x64, 0x82, 0x19, 0xa9, 0x32, 0x78, 0xe3, + 0x53, 0xc8, 0x2e, 0xb5, 0x05, 0x9e, 0x00, 0x9c, 0x25, 0xb9, + 0x4a, 0xd6, 0x6f, 0xf3, 0x94, 0x08, 0xb1, 0x2d, 0xde, 0x42, + 0xfb, 0x67, 0x35, 0xa9, 0x10, 0x8c, 0x7f, 0xe3, 0x5a, 0xc6, + 0xa1, 0x3d, 0x84, 0x18, 0xeb, 0x77, 0xce, 0x52, 0x6a, 0xf6, + 0x4f, 0xd3, 0x20, 0xbc, 0x05, 0x99, 0xfe, 0x62, 0xdb, 0x47, + 0xb4, 0x28, 0x91, 0x0d, 0x5f, 0xc3, 0x7a, 0xe6, 0x15, 0x89, + 0x30, 0xac, 0xcb, 0x57, 0xee, 0x72, 0x81, 0x1d, 0xa4, 0x38, + 0xd4, 0x48, 0xf1, 0x6d, 0x9e, 0x02, 0xbb, 0x27, 0x40, 0xdc, + 0x65, 0xf9, 0x0a, 0x96, 0x2f, 0xb3, 0xe1, 0x7d, 0xc4, 0x58, + 0xab, 0x37, 0x8e, 0x12, 0x75, 0xe9, 0x50, 0xcc, 0x3f, 0xa3, + 0x1a, 0x86, 0xbe, 0x22, 0x9b, 0x07, 0xf4, 0x68, 0xd1, 0x4d, + 0x2a, 0xb6, 0x0f, 0x93, 0x60, 0xfc, 0x45, 0xd9, 0x8b, 0x17, + 0xae, 0x32, 0xc1, 0x5d, 0xe4, 0x78, 0x1f, 0x83, 0x3a, 0xa6, + 0x55, 0xc9, 0x70, 0xec, 0xb5, 0x29, 0x90, 0x0c, 0xff, 0x63, + 0xda, 0x46, 0x21, 0xbd, 0x04, 0x98, 0x6b, 0xf7, 0x4e, 0xd2, + 0x80, 0x1c, 0xa5, 0x39, 0xca, 0x56, 0xef, 0x73, 0x14, 0x88, + 0x31, 0xad, 0x5e, 0xc2, 0x7b, 0xe7, 0xdf, 0x43, 0xfa, 0x66, + 0x95, 0x09, 0xb0, 0x2c, 0x4b, 0xd7, 0x6e, 0xf2, 0x01, 0x9d, + 0x24, 0xb8, 0xea, 0x76, 0xcf, 0x53, 0xa0, 0x3c, 0x85, 0x19, + 0x7e, 0xe2, 0x5b, 0xc7, 0x34, 0xa8, 0x11, 0x8d, 0x61, 0xfd, + 0x44, 0xd8, 0x2b, 0xb7, 0x0e, 0x92, 0xf5, 0x69, 0xd0, 0x4c, + 0xbf, 0x23, 0x9a, 0x06, 0x54, 0xc8, 0x71, 0xed, 0x1e, 0x82, + 0x3b, 0xa7, 0xc0, 0x5c, 0xe5, 0x79, 0x8a, 0x16, 0xaf, 0x33, + 0x0b, 0x97, 0x2e, 0xb2, 0x41, 0xdd, 0x64, 0xf8, 0x9f, 0x03, + 0xba, 0x26, 0xd5, 0x49, 0xf0, 0x6c, 0x3e, 0xa2, 0x1b, 0x87, + 0x74, 0xe8, 0x51, 0xcd, 0xaa, 0x36, 0x8f, 0x13, 0xe0, 0x7c, + 0xc5, 0x59, 0x00, 0x9d, 0x27, 0xba, 0x4e, 0xd3, 0x69, 0xf4, + 0x9c, 0x01, 0xbb, 0x26, 0xd2, 0x4f, 0xf5, 0x68, 0x25, 0xb8, + 0x02, 0x9f, 0x6b, 0xf6, 0x4c, 0xd1, 0xb9, 0x24, 0x9e, 0x03, + 0xf7, 0x6a, 0xd0, 0x4d, 0x4a, 0xd7, 0x6d, 0xf0, 0x04, 0x99, + 0x23, 0xbe, 0xd6, 0x4b, 0xf1, 0x6c, 0x98, 0x05, 0xbf, 0x22, + 0x6f, 0xf2, 0x48, 0xd5, 0x21, 0xbc, 0x06, 0x9b, 0xf3, 0x6e, + 0xd4, 0x49, 0xbd, 0x20, 0x9a, 0x07, 0x94, 0x09, 0xb3, 0x2e, + 0xda, 0x47, 0xfd, 0x60, 0x08, 0x95, 0x2f, 0xb2, 0x46, 0xdb, + 0x61, 0xfc, 0xb1, 0x2c, 0x96, 0x0b, 0xff, 0x62, 0xd8, 0x45, + 0x2d, 0xb0, 0x0a, 0x97, 0x63, 0xfe, 0x44, 0xd9, 0xde, 0x43, + 0xf9, 0x64, 0x90, 0x0d, 0xb7, 0x2a, 0x42, 0xdf, 0x65, 0xf8, + 0x0c, 0x91, 0x2b, 0xb6, 0xfb, 0x66, 0xdc, 0x41, 0xb5, 0x28, + 0x92, 0x0f, 0x67, 0xfa, 0x40, 0xdd, 0x29, 0xb4, 0x0e, 0x93, + 0x35, 0xa8, 0x12, 0x8f, 0x7b, 0xe6, 0x5c, 0xc1, 0xa9, 0x34, + 0x8e, 0x13, 0xe7, 0x7a, 0xc0, 0x5d, 0x10, 0x8d, 0x37, 0xaa, + 0x5e, 0xc3, 0x79, 0xe4, 0x8c, 0x11, 0xab, 0x36, 0xc2, 0x5f, + 0xe5, 0x78, 0x7f, 0xe2, 0x58, 0xc5, 0x31, 0xac, 0x16, 0x8b, + 0xe3, 0x7e, 0xc4, 0x59, 0xad, 0x30, 0x8a, 0x17, 0x5a, 0xc7, + 0x7d, 0xe0, 0x14, 0x89, 0x33, 0xae, 0xc6, 0x5b, 0xe1, 0x7c, + 0x88, 0x15, 0xaf, 0x32, 0xa1, 0x3c, 0x86, 0x1b, 0xef, 0x72, + 0xc8, 0x55, 0x3d, 0xa0, 0x1a, 0x87, 0x73, 0xee, 0x54, 0xc9, + 0x84, 0x19, 0xa3, 0x3e, 0xca, 0x57, 0xed, 0x70, 0x18, 0x85, + 0x3f, 0xa2, 0x56, 0xcb, 0x71, 0xec, 0xeb, 0x76, 0xcc, 0x51, + 0xa5, 0x38, 0x82, 0x1f, 0x77, 0xea, 0x50, 0xcd, 0x39, 0xa4, + 0x1e, 0x83, 0xce, 0x53, 0xe9, 0x74, 0x80, 0x1d, 0xa7, 0x3a, + 0x52, 0xcf, 0x75, 0xe8, 0x1c, 0x81, 0x3b, 0xa6, 0x00, 0x9e, + 0x21, 0xbf, 0x42, 0xdc, 0x63, 0xfd, 0x84, 0x1a, 0xa5, 0x3b, + 0xc6, 0x58, 0xe7, 0x79, 0x15, 0x8b, 0x34, 0xaa, 0x57, 0xc9, + 0x76, 0xe8, 0x91, 0x0f, 0xb0, 0x2e, 0xd3, 0x4d, 0xf2, 0x6c, + 0x2a, 0xb4, 0x0b, 0x95, 0x68, 0xf6, 0x49, 0xd7, 0xae, 0x30, + 0x8f, 0x11, 0xec, 0x72, 0xcd, 0x53, 0x3f, 0xa1, 0x1e, 0x80, + 0x7d, 0xe3, 0x5c, 0xc2, 0xbb, 0x25, 0x9a, 0x04, 0xf9, 0x67, + 0xd8, 0x46, 0x54, 0xca, 0x75, 0xeb, 0x16, 0x88, 0x37, 0xa9, + 0xd0, 0x4e, 0xf1, 0x6f, 0x92, 0x0c, 0xb3, 0x2d, 0x41, 0xdf, + 0x60, 0xfe, 0x03, 0x9d, 0x22, 0xbc, 0xc5, 0x5b, 0xe4, 0x7a, + 0x87, 0x19, 0xa6, 0x38, 0x7e, 0xe0, 0x5f, 0xc1, 0x3c, 0xa2, + 0x1d, 0x83, 0xfa, 0x64, 0xdb, 0x45, 0xb8, 0x26, 0x99, 0x07, + 0x6b, 0xf5, 0x4a, 0xd4, 0x29, 0xb7, 0x08, 0x96, 0xef, 0x71, + 0xce, 0x50, 0xad, 0x33, 0x8c, 0x12, 0xa8, 0x36, 0x89, 0x17, + 0xea, 0x74, 0xcb, 0x55, 0x2c, 0xb2, 0x0d, 0x93, 0x6e, 0xf0, + 0x4f, 0xd1, 0xbd, 0x23, 0x9c, 0x02, 0xff, 0x61, 0xde, 0x40, + 0x39, 0xa7, 0x18, 0x86, 0x7b, 0xe5, 0x5a, 0xc4, 0x82, 0x1c, + 0xa3, 0x3d, 0xc0, 0x5e, 0xe1, 0x7f, 0x06, 0x98, 0x27, 0xb9, + 0x44, 0xda, 0x65, 0xfb, 0x97, 0x09, 0xb6, 0x28, 0xd5, 0x4b, + 0xf4, 0x6a, 0x13, 0x8d, 0x32, 0xac, 0x51, 0xcf, 0x70, 0xee, + 0xfc, 0x62, 0xdd, 0x43, 0xbe, 0x20, 0x9f, 0x01, 0x78, 0xe6, + 0x59, 0xc7, 0x3a, 0xa4, 0x1b, 0x85, 0xe9, 0x77, 0xc8, 0x56, + 0xab, 0x35, 0x8a, 0x14, 0x6d, 0xf3, 0x4c, 0xd2, 0x2f, 0xb1, + 0x0e, 0x90, 0xd6, 0x48, 0xf7, 0x69, 0x94, 0x0a, 0xb5, 0x2b, + 0x52, 0xcc, 0x73, 0xed, 0x10, 0x8e, 0x31, 0xaf, 0xc3, 0x5d, + 0xe2, 0x7c, 0x81, 0x1f, 0xa0, 0x3e, 0x47, 0xd9, 0x66, 0xf8, + 0x05, 0x9b, 0x24, 0xba, 0x00, 0x9f, 0x23, 0xbc, 0x46, 0xd9, + 0x65, 0xfa, 0x8c, 0x13, 0xaf, 0x30, 0xca, 0x55, 0xe9, 0x76, + 0x05, 0x9a, 0x26, 0xb9, 0x43, 0xdc, 0x60, 0xff, 0x89, 0x16, + 0xaa, 0x35, 0xcf, 0x50, 0xec, 0x73, 0x0a, 0x95, 0x29, 0xb6, + 0x4c, 0xd3, 0x6f, 0xf0, 0x86, 0x19, 0xa5, 0x3a, 0xc0, 0x5f, + 0xe3, 0x7c, 0x0f, 0x90, 0x2c, 0xb3, 0x49, 0xd6, 0x6a, 0xf5, + 0x83, 0x1c, 0xa0, 0x3f, 0xc5, 0x5a, 0xe6, 0x79, 0x14, 0x8b, + 0x37, 0xa8, 0x52, 0xcd, 0x71, 0xee, 0x98, 0x07, 0xbb, 0x24, + 0xde, 0x41, 0xfd, 0x62, 0x11, 0x8e, 0x32, 0xad, 0x57, 0xc8, + 0x74, 0xeb, 0x9d, 0x02, 0xbe, 0x21, 0xdb, 0x44, 0xf8, 0x67, + 0x1e, 0x81, 0x3d, 0xa2, 0x58, 0xc7, 0x7b, 0xe4, 0x92, 0x0d, + 0xb1, 0x2e, 0xd4, 0x4b, 0xf7, 0x68, 0x1b, 0x84, 0x38, 0xa7, + 0x5d, 0xc2, 0x7e, 0xe1, 0x97, 0x08, 0xb4, 0x2b, 0xd1, 0x4e, + 0xf2, 0x6d, 0x28, 0xb7, 0x0b, 0x94, 0x6e, 0xf1, 0x4d, 0xd2, + 0xa4, 0x3b, 0x87, 0x18, 0xe2, 0x7d, 0xc1, 0x5e, 0x2d, 0xb2, + 0x0e, 0x91, 0x6b, 0xf4, 0x48, 0xd7, 0xa1, 0x3e, 0x82, 0x1d, + 0xe7, 0x78, 0xc4, 0x5b, 0x22, 0xbd, 0x01, 0x9e, 0x64, 0xfb, + 0x47, 0xd8, 0xae, 0x31, 0x8d, 0x12, 0xe8, 0x77, 0xcb, 0x54, + 0x27, 0xb8, 0x04, 0x9b, 0x61, 0xfe, 0x42, 0xdd, 0xab, 0x34, + 0x88, 0x17, 0xed, 0x72, 0xce, 0x51, 0x3c, 0xa3, 0x1f, 0x80, + 0x7a, 0xe5, 0x59, 0xc6, 0xb0, 0x2f, 0x93, 0x0c, 0xf6, 0x69, + 0xd5, 0x4a, 0x39, 0xa6, 0x1a, 0x85, 0x7f, 0xe0, 0x5c, 0xc3, + 0xb5, 0x2a, 0x96, 0x09, 0xf3, 0x6c, 0xd0, 0x4f, 0x36, 0xa9, + 0x15, 0x8a, 0x70, 0xef, 0x53, 0xcc, 0xba, 0x25, 0x99, 0x06, + 0xfc, 0x63, 0xdf, 0x40, 0x33, 0xac, 0x10, 0x8f, 0x75, 0xea, + 0x56, 0xc9, 0xbf, 0x20, 0x9c, 0x03, 0xf9, 0x66, 0xda, 0x45, + 0x00, 0xa0, 0x5d, 0xfd, 0xba, 0x1a, 0xe7, 0x47, 0x69, 0xc9, + 0x34, 0x94, 0xd3, 0x73, 0x8e, 0x2e, 0xd2, 0x72, 0x8f, 0x2f, + 0x68, 0xc8, 0x35, 0x95, 0xbb, 0x1b, 0xe6, 0x46, 0x01, 0xa1, + 0x5c, 0xfc, 0xb9, 0x19, 0xe4, 0x44, 0x03, 0xa3, 0x5e, 0xfe, + 0xd0, 0x70, 0x8d, 0x2d, 0x6a, 0xca, 0x37, 0x97, 0x6b, 0xcb, + 0x36, 0x96, 0xd1, 0x71, 0x8c, 0x2c, 0x02, 0xa2, 0x5f, 0xff, + 0xb8, 0x18, 0xe5, 0x45, 0x6f, 0xcf, 0x32, 0x92, 0xd5, 0x75, + 0x88, 0x28, 0x06, 0xa6, 0x5b, 0xfb, 0xbc, 0x1c, 0xe1, 0x41, + 0xbd, 0x1d, 0xe0, 0x40, 0x07, 0xa7, 0x5a, 0xfa, 0xd4, 0x74, + 0x89, 0x29, 0x6e, 0xce, 0x33, 0x93, 0xd6, 0x76, 0x8b, 0x2b, + 0x6c, 0xcc, 0x31, 0x91, 0xbf, 0x1f, 0xe2, 0x42, 0x05, 0xa5, + 0x58, 0xf8, 0x04, 0xa4, 0x59, 0xf9, 0xbe, 0x1e, 0xe3, 0x43, + 0x6d, 0xcd, 0x30, 0x90, 0xd7, 0x77, 0x8a, 0x2a, 0xde, 0x7e, + 0x83, 0x23, 0x64, 0xc4, 0x39, 0x99, 0xb7, 0x17, 0xea, 0x4a, + 0x0d, 0xad, 0x50, 0xf0, 0x0c, 0xac, 0x51, 0xf1, 0xb6, 0x16, + 0xeb, 0x4b, 0x65, 0xc5, 0x38, 0x98, 0xdf, 0x7f, 0x82, 0x22, + 0x67, 0xc7, 0x3a, 0x9a, 0xdd, 0x7d, 0x80, 0x20, 0x0e, 0xae, + 0x53, 0xf3, 0xb4, 0x14, 0xe9, 0x49, 0xb5, 0x15, 0xe8, 0x48, + 0x0f, 0xaf, 0x52, 0xf2, 0xdc, 0x7c, 0x81, 0x21, 0x66, 0xc6, + 0x3b, 0x9b, 0xb1, 0x11, 0xec, 0x4c, 0x0b, 0xab, 0x56, 0xf6, + 0xd8, 0x78, 0x85, 0x25, 0x62, 0xc2, 0x3f, 0x9f, 0x63, 0xc3, + 0x3e, 0x9e, 0xd9, 0x79, 0x84, 0x24, 0x0a, 0xaa, 0x57, 0xf7, + 0xb0, 0x10, 0xed, 0x4d, 0x08, 0xa8, 0x55, 0xf5, 0xb2, 0x12, + 0xef, 0x4f, 0x61, 0xc1, 0x3c, 0x9c, 0xdb, 0x7b, 0x86, 0x26, + 0xda, 0x7a, 0x87, 0x27, 0x60, 0xc0, 0x3d, 0x9d, 0xb3, 0x13, + 0xee, 0x4e, 0x09, 0xa9, 0x54, 0xf4, 0x00, 0xa1, 0x5f, 0xfe, + 0xbe, 0x1f, 0xe1, 0x40, 0x61, 0xc0, 0x3e, 0x9f, 0xdf, 0x7e, + 0x80, 0x21, 0xc2, 0x63, 0x9d, 0x3c, 0x7c, 0xdd, 0x23, 0x82, + 0xa3, 0x02, 0xfc, 0x5d, 0x1d, 0xbc, 0x42, 0xe3, 0x99, 0x38, + 0xc6, 0x67, 0x27, 0x86, 0x78, 0xd9, 0xf8, 0x59, 0xa7, 0x06, + 0x46, 0xe7, 0x19, 0xb8, 0x5b, 0xfa, 0x04, 0xa5, 0xe5, 0x44, + 0xba, 0x1b, 0x3a, 0x9b, 0x65, 0xc4, 0x84, 0x25, 0xdb, 0x7a, + 0x2f, 0x8e, 0x70, 0xd1, 0x91, 0x30, 0xce, 0x6f, 0x4e, 0xef, + 0x11, 0xb0, 0xf0, 0x51, 0xaf, 0x0e, 0xed, 0x4c, 0xb2, 0x13, + 0x53, 0xf2, 0x0c, 0xad, 0x8c, 0x2d, 0xd3, 0x72, 0x32, 0x93, + 0x6d, 0xcc, 0xb6, 0x17, 0xe9, 0x48, 0x08, 0xa9, 0x57, 0xf6, + 0xd7, 0x76, 0x88, 0x29, 0x69, 0xc8, 0x36, 0x97, 0x74, 0xd5, + 0x2b, 0x8a, 0xca, 0x6b, 0x95, 0x34, 0x15, 0xb4, 0x4a, 0xeb, + 0xab, 0x0a, 0xf4, 0x55, 0x5e, 0xff, 0x01, 0xa0, 0xe0, 0x41, + 0xbf, 0x1e, 0x3f, 0x9e, 0x60, 0xc1, 0x81, 0x20, 0xde, 0x7f, + 0x9c, 0x3d, 0xc3, 0x62, 0x22, 0x83, 0x7d, 0xdc, 0xfd, 0x5c, + 0xa2, 0x03, 0x43, 0xe2, 0x1c, 0xbd, 0xc7, 0x66, 0x98, 0x39, + 0x79, 0xd8, 0x26, 0x87, 0xa6, 0x07, 0xf9, 0x58, 0x18, 0xb9, + 0x47, 0xe6, 0x05, 0xa4, 0x5a, 0xfb, 0xbb, 0x1a, 0xe4, 0x45, + 0x64, 0xc5, 0x3b, 0x9a, 0xda, 0x7b, 0x85, 0x24, 0x71, 0xd0, + 0x2e, 0x8f, 0xcf, 0x6e, 0x90, 0x31, 0x10, 0xb1, 0x4f, 0xee, + 0xae, 0x0f, 0xf1, 0x50, 0xb3, 0x12, 0xec, 0x4d, 0x0d, 0xac, + 0x52, 0xf3, 0xd2, 0x73, 0x8d, 0x2c, 0x6c, 0xcd, 0x33, 0x92, + 0xe8, 0x49, 0xb7, 0x16, 0x56, 0xf7, 0x09, 0xa8, 0x89, 0x28, + 0xd6, 0x77, 0x37, 0x96, 0x68, 0xc9, 0x2a, 0x8b, 0x75, 0xd4, + 0x94, 0x35, 0xcb, 0x6a, 0x4b, 0xea, 0x14, 0xb5, 0xf5, 0x54, + 0xaa, 0x0b, 0x00, 0xa2, 0x59, 0xfb, 0xb2, 0x10, 0xeb, 0x49, + 0x79, 0xdb, 0x20, 0x82, 0xcb, 0x69, 0x92, 0x30, 0xf2, 0x50, + 0xab, 0x09, 0x40, 0xe2, 0x19, 0xbb, 0x8b, 0x29, 0xd2, 0x70, + 0x39, 0x9b, 0x60, 0xc2, 0xf9, 0x5b, 0xa0, 0x02, 0x4b, 0xe9, + 0x12, 0xb0, 0x80, 0x22, 0xd9, 0x7b, 0x32, 0x90, 0x6b, 0xc9, + 0x0b, 0xa9, 0x52, 0xf0, 0xb9, 0x1b, 0xe0, 0x42, 0x72, 0xd0, + 0x2b, 0x89, 0xc0, 0x62, 0x99, 0x3b, 0xef, 0x4d, 0xb6, 0x14, + 0x5d, 0xff, 0x04, 0xa6, 0x96, 0x34, 0xcf, 0x6d, 0x24, 0x86, + 0x7d, 0xdf, 0x1d, 0xbf, 0x44, 0xe6, 0xaf, 0x0d, 0xf6, 0x54, + 0x64, 0xc6, 0x3d, 0x9f, 0xd6, 0x74, 0x8f, 0x2d, 0x16, 0xb4, + 0x4f, 0xed, 0xa4, 0x06, 0xfd, 0x5f, 0x6f, 0xcd, 0x36, 0x94, + 0xdd, 0x7f, 0x84, 0x26, 0xe4, 0x46, 0xbd, 0x1f, 0x56, 0xf4, + 0x0f, 0xad, 0x9d, 0x3f, 0xc4, 0x66, 0x2f, 0x8d, 0x76, 0xd4, + 0xc3, 0x61, 0x9a, 0x38, 0x71, 0xd3, 0x28, 0x8a, 0xba, 0x18, + 0xe3, 0x41, 0x08, 0xaa, 0x51, 0xf3, 0x31, 0x93, 0x68, 0xca, + 0x83, 0x21, 0xda, 0x78, 0x48, 0xea, 0x11, 0xb3, 0xfa, 0x58, + 0xa3, 0x01, 0x3a, 0x98, 0x63, 0xc1, 0x88, 0x2a, 0xd1, 0x73, + 0x43, 0xe1, 0x1a, 0xb8, 0xf1, 0x53, 0xa8, 0x0a, 0xc8, 0x6a, + 0x91, 0x33, 0x7a, 0xd8, 0x23, 0x81, 0xb1, 0x13, 0xe8, 0x4a, + 0x03, 0xa1, 0x5a, 0xf8, 0x2c, 0x8e, 0x75, 0xd7, 0x9e, 0x3c, + 0xc7, 0x65, 0x55, 0xf7, 0x0c, 0xae, 0xe7, 0x45, 0xbe, 0x1c, + 0xde, 0x7c, 0x87, 0x25, 0x6c, 0xce, 0x35, 0x97, 0xa7, 0x05, + 0xfe, 0x5c, 0x15, 0xb7, 0x4c, 0xee, 0xd5, 0x77, 0x8c, 0x2e, + 0x67, 0xc5, 0x3e, 0x9c, 0xac, 0x0e, 0xf5, 0x57, 0x1e, 0xbc, + 0x47, 0xe5, 0x27, 0x85, 0x7e, 0xdc, 0x95, 0x37, 0xcc, 0x6e, + 0x5e, 0xfc, 0x07, 0xa5, 0xec, 0x4e, 0xb5, 0x17, 0x00, 0xa3, + 0x5b, 0xf8, 0xb6, 0x15, 0xed, 0x4e, 0x71, 0xd2, 0x2a, 0x89, + 0xc7, 0x64, 0x9c, 0x3f, 0xe2, 0x41, 0xb9, 0x1a, 0x54, 0xf7, + 0x0f, 0xac, 0x93, 0x30, 0xc8, 0x6b, 0x25, 0x86, 0x7e, 0xdd, + 0xd9, 0x7a, 0x82, 0x21, 0x6f, 0xcc, 0x34, 0x97, 0xa8, 0x0b, + 0xf3, 0x50, 0x1e, 0xbd, 0x45, 0xe6, 0x3b, 0x98, 0x60, 0xc3, + 0x8d, 0x2e, 0xd6, 0x75, 0x4a, 0xe9, 0x11, 0xb2, 0xfc, 0x5f, + 0xa7, 0x04, 0xaf, 0x0c, 0xf4, 0x57, 0x19, 0xba, 0x42, 0xe1, + 0xde, 0x7d, 0x85, 0x26, 0x68, 0xcb, 0x33, 0x90, 0x4d, 0xee, + 0x16, 0xb5, 0xfb, 0x58, 0xa0, 0x03, 0x3c, 0x9f, 0x67, 0xc4, + 0x8a, 0x29, 0xd1, 0x72, 0x76, 0xd5, 0x2d, 0x8e, 0xc0, 0x63, + 0x9b, 0x38, 0x07, 0xa4, 0x5c, 0xff, 0xb1, 0x12, 0xea, 0x49, + 0x94, 0x37, 0xcf, 0x6c, 0x22, 0x81, 0x79, 0xda, 0xe5, 0x46, + 0xbe, 0x1d, 0x53, 0xf0, 0x08, 0xab, 0x43, 0xe0, 0x18, 0xbb, + 0xf5, 0x56, 0xae, 0x0d, 0x32, 0x91, 0x69, 0xca, 0x84, 0x27, + 0xdf, 0x7c, 0xa1, 0x02, 0xfa, 0x59, 0x17, 0xb4, 0x4c, 0xef, + 0xd0, 0x73, 0x8b, 0x28, 0x66, 0xc5, 0x3d, 0x9e, 0x9a, 0x39, + 0xc1, 0x62, 0x2c, 0x8f, 0x77, 0xd4, 0xeb, 0x48, 0xb0, 0x13, + 0x5d, 0xfe, 0x06, 0xa5, 0x78, 0xdb, 0x23, 0x80, 0xce, 0x6d, + 0x95, 0x36, 0x09, 0xaa, 0x52, 0xf1, 0xbf, 0x1c, 0xe4, 0x47, + 0xec, 0x4f, 0xb7, 0x14, 0x5a, 0xf9, 0x01, 0xa2, 0x9d, 0x3e, + 0xc6, 0x65, 0x2b, 0x88, 0x70, 0xd3, 0x0e, 0xad, 0x55, 0xf6, + 0xb8, 0x1b, 0xe3, 0x40, 0x7f, 0xdc, 0x24, 0x87, 0xc9, 0x6a, + 0x92, 0x31, 0x35, 0x96, 0x6e, 0xcd, 0x83, 0x20, 0xd8, 0x7b, + 0x44, 0xe7, 0x1f, 0xbc, 0xf2, 0x51, 0xa9, 0x0a, 0xd7, 0x74, + 0x8c, 0x2f, 0x61, 0xc2, 0x3a, 0x99, 0xa6, 0x05, 0xfd, 0x5e, + 0x10, 0xb3, 0x4b, 0xe8, 0x00, 0xa4, 0x55, 0xf1, 0xaa, 0x0e, + 0xff, 0x5b, 0x49, 0xed, 0x1c, 0xb8, 0xe3, 0x47, 0xb6, 0x12, + 0x92, 0x36, 0xc7, 0x63, 0x38, 0x9c, 0x6d, 0xc9, 0xdb, 0x7f, + 0x8e, 0x2a, 0x71, 0xd5, 0x24, 0x80, 0x39, 0x9d, 0x6c, 0xc8, + 0x93, 0x37, 0xc6, 0x62, 0x70, 0xd4, 0x25, 0x81, 0xda, 0x7e, + 0x8f, 0x2b, 0xab, 0x0f, 0xfe, 0x5a, 0x01, 0xa5, 0x54, 0xf0, + 0xe2, 0x46, 0xb7, 0x13, 0x48, 0xec, 0x1d, 0xb9, 0x72, 0xd6, + 0x27, 0x83, 0xd8, 0x7c, 0x8d, 0x29, 0x3b, 0x9f, 0x6e, 0xca, + 0x91, 0x35, 0xc4, 0x60, 0xe0, 0x44, 0xb5, 0x11, 0x4a, 0xee, + 0x1f, 0xbb, 0xa9, 0x0d, 0xfc, 0x58, 0x03, 0xa7, 0x56, 0xf2, + 0x4b, 0xef, 0x1e, 0xba, 0xe1, 0x45, 0xb4, 0x10, 0x02, 0xa6, + 0x57, 0xf3, 0xa8, 0x0c, 0xfd, 0x59, 0xd9, 0x7d, 0x8c, 0x28, + 0x73, 0xd7, 0x26, 0x82, 0x90, 0x34, 0xc5, 0x61, 0x3a, 0x9e, + 0x6f, 0xcb, 0xe4, 0x40, 0xb1, 0x15, 0x4e, 0xea, 0x1b, 0xbf, + 0xad, 0x09, 0xf8, 0x5c, 0x07, 0xa3, 0x52, 0xf6, 0x76, 0xd2, + 0x23, 0x87, 0xdc, 0x78, 0x89, 0x2d, 0x3f, 0x9b, 0x6a, 0xce, + 0x95, 0x31, 0xc0, 0x64, 0xdd, 0x79, 0x88, 0x2c, 0x77, 0xd3, + 0x22, 0x86, 0x94, 0x30, 0xc1, 0x65, 0x3e, 0x9a, 0x6b, 0xcf, + 0x4f, 0xeb, 0x1a, 0xbe, 0xe5, 0x41, 0xb0, 0x14, 0x06, 0xa2, + 0x53, 0xf7, 0xac, 0x08, 0xf9, 0x5d, 0x96, 0x32, 0xc3, 0x67, + 0x3c, 0x98, 0x69, 0xcd, 0xdf, 0x7b, 0x8a, 0x2e, 0x75, 0xd1, + 0x20, 0x84, 0x04, 0xa0, 0x51, 0xf5, 0xae, 0x0a, 0xfb, 0x5f, + 0x4d, 0xe9, 0x18, 0xbc, 0xe7, 0x43, 0xb2, 0x16, 0xaf, 0x0b, + 0xfa, 0x5e, 0x05, 0xa1, 0x50, 0xf4, 0xe6, 0x42, 0xb3, 0x17, + 0x4c, 0xe8, 0x19, 0xbd, 0x3d, 0x99, 0x68, 0xcc, 0x97, 0x33, + 0xc2, 0x66, 0x74, 0xd0, 0x21, 0x85, 0xde, 0x7a, 0x8b, 0x2f, + 0x00, 0xa5, 0x57, 0xf2, 0xae, 0x0b, 0xf9, 0x5c, 0x41, 0xe4, + 0x16, 0xb3, 0xef, 0x4a, 0xb8, 0x1d, 0x82, 0x27, 0xd5, 0x70, + 0x2c, 0x89, 0x7b, 0xde, 0xc3, 0x66, 0x94, 0x31, 0x6d, 0xc8, + 0x3a, 0x9f, 0x19, 0xbc, 0x4e, 0xeb, 0xb7, 0x12, 0xe0, 0x45, + 0x58, 0xfd, 0x0f, 0xaa, 0xf6, 0x53, 0xa1, 0x04, 0x9b, 0x3e, + 0xcc, 0x69, 0x35, 0x90, 0x62, 0xc7, 0xda, 0x7f, 0x8d, 0x28, + 0x74, 0xd1, 0x23, 0x86, 0x32, 0x97, 0x65, 0xc0, 0x9c, 0x39, + 0xcb, 0x6e, 0x73, 0xd6, 0x24, 0x81, 0xdd, 0x78, 0x8a, 0x2f, + 0xb0, 0x15, 0xe7, 0x42, 0x1e, 0xbb, 0x49, 0xec, 0xf1, 0x54, + 0xa6, 0x03, 0x5f, 0xfa, 0x08, 0xad, 0x2b, 0x8e, 0x7c, 0xd9, + 0x85, 0x20, 0xd2, 0x77, 0x6a, 0xcf, 0x3d, 0x98, 0xc4, 0x61, + 0x93, 0x36, 0xa9, 0x0c, 0xfe, 0x5b, 0x07, 0xa2, 0x50, 0xf5, + 0xe8, 0x4d, 0xbf, 0x1a, 0x46, 0xe3, 0x11, 0xb4, 0x64, 0xc1, + 0x33, 0x96, 0xca, 0x6f, 0x9d, 0x38, 0x25, 0x80, 0x72, 0xd7, + 0x8b, 0x2e, 0xdc, 0x79, 0xe6, 0x43, 0xb1, 0x14, 0x48, 0xed, + 0x1f, 0xba, 0xa7, 0x02, 0xf0, 0x55, 0x09, 0xac, 0x5e, 0xfb, + 0x7d, 0xd8, 0x2a, 0x8f, 0xd3, 0x76, 0x84, 0x21, 0x3c, 0x99, + 0x6b, 0xce, 0x92, 0x37, 0xc5, 0x60, 0xff, 0x5a, 0xa8, 0x0d, + 0x51, 0xf4, 0x06, 0xa3, 0xbe, 0x1b, 0xe9, 0x4c, 0x10, 0xb5, + 0x47, 0xe2, 0x56, 0xf3, 0x01, 0xa4, 0xf8, 0x5d, 0xaf, 0x0a, + 0x17, 0xb2, 0x40, 0xe5, 0xb9, 0x1c, 0xee, 0x4b, 0xd4, 0x71, + 0x83, 0x26, 0x7a, 0xdf, 0x2d, 0x88, 0x95, 0x30, 0xc2, 0x67, + 0x3b, 0x9e, 0x6c, 0xc9, 0x4f, 0xea, 0x18, 0xbd, 0xe1, 0x44, + 0xb6, 0x13, 0x0e, 0xab, 0x59, 0xfc, 0xa0, 0x05, 0xf7, 0x52, + 0xcd, 0x68, 0x9a, 0x3f, 0x63, 0xc6, 0x34, 0x91, 0x8c, 0x29, + 0xdb, 0x7e, 0x22, 0x87, 0x75, 0xd0, 0x00, 0xa6, 0x51, 0xf7, + 0xa2, 0x04, 0xf3, 0x55, 0x59, 0xff, 0x08, 0xae, 0xfb, 0x5d, + 0xaa, 0x0c, 0xb2, 0x14, 0xe3, 0x45, 0x10, 0xb6, 0x41, 0xe7, + 0xeb, 0x4d, 0xba, 0x1c, 0x49, 0xef, 0x18, 0xbe, 0x79, 0xdf, + 0x28, 0x8e, 0xdb, 0x7d, 0x8a, 0x2c, 0x20, 0x86, 0x71, 0xd7, + 0x82, 0x24, 0xd3, 0x75, 0xcb, 0x6d, 0x9a, 0x3c, 0x69, 0xcf, + 0x38, 0x9e, 0x92, 0x34, 0xc3, 0x65, 0x30, 0x96, 0x61, 0xc7, + 0xf2, 0x54, 0xa3, 0x05, 0x50, 0xf6, 0x01, 0xa7, 0xab, 0x0d, + 0xfa, 0x5c, 0x09, 0xaf, 0x58, 0xfe, 0x40, 0xe6, 0x11, 0xb7, + 0xe2, 0x44, 0xb3, 0x15, 0x19, 0xbf, 0x48, 0xee, 0xbb, 0x1d, + 0xea, 0x4c, 0x8b, 0x2d, 0xda, 0x7c, 0x29, 0x8f, 0x78, 0xde, + 0xd2, 0x74, 0x83, 0x25, 0x70, 0xd6, 0x21, 0x87, 0x39, 0x9f, + 0x68, 0xce, 0x9b, 0x3d, 0xca, 0x6c, 0x60, 0xc6, 0x31, 0x97, + 0xc2, 0x64, 0x93, 0x35, 0xf9, 0x5f, 0xa8, 0x0e, 0x5b, 0xfd, + 0x0a, 0xac, 0xa0, 0x06, 0xf1, 0x57, 0x02, 0xa4, 0x53, 0xf5, + 0x4b, 0xed, 0x1a, 0xbc, 0xe9, 0x4f, 0xb8, 0x1e, 0x12, 0xb4, + 0x43, 0xe5, 0xb0, 0x16, 0xe1, 0x47, 0x80, 0x26, 0xd1, 0x77, + 0x22, 0x84, 0x73, 0xd5, 0xd9, 0x7f, 0x88, 0x2e, 0x7b, 0xdd, + 0x2a, 0x8c, 0x32, 0x94, 0x63, 0xc5, 0x90, 0x36, 0xc1, 0x67, + 0x6b, 0xcd, 0x3a, 0x9c, 0xc9, 0x6f, 0x98, 0x3e, 0x0b, 0xad, + 0x5a, 0xfc, 0xa9, 0x0f, 0xf8, 0x5e, 0x52, 0xf4, 0x03, 0xa5, + 0xf0, 0x56, 0xa1, 0x07, 0xb9, 0x1f, 0xe8, 0x4e, 0x1b, 0xbd, + 0x4a, 0xec, 0xe0, 0x46, 0xb1, 0x17, 0x42, 0xe4, 0x13, 0xb5, + 0x72, 0xd4, 0x23, 0x85, 0xd0, 0x76, 0x81, 0x27, 0x2b, 0x8d, + 0x7a, 0xdc, 0x89, 0x2f, 0xd8, 0x7e, 0xc0, 0x66, 0x91, 0x37, + 0x62, 0xc4, 0x33, 0x95, 0x99, 0x3f, 0xc8, 0x6e, 0x3b, 0x9d, + 0x6a, 0xcc, 0x00, 0xa7, 0x53, 0xf4, 0xa6, 0x01, 0xf5, 0x52, + 0x51, 0xf6, 0x02, 0xa5, 0xf7, 0x50, 0xa4, 0x03, 0xa2, 0x05, + 0xf1, 0x56, 0x04, 0xa3, 0x57, 0xf0, 0xf3, 0x54, 0xa0, 0x07, + 0x55, 0xf2, 0x06, 0xa1, 0x59, 0xfe, 0x0a, 0xad, 0xff, 0x58, + 0xac, 0x0b, 0x08, 0xaf, 0x5b, 0xfc, 0xae, 0x09, 0xfd, 0x5a, + 0xfb, 0x5c, 0xa8, 0x0f, 0x5d, 0xfa, 0x0e, 0xa9, 0xaa, 0x0d, + 0xf9, 0x5e, 0x0c, 0xab, 0x5f, 0xf8, 0xb2, 0x15, 0xe1, 0x46, + 0x14, 0xb3, 0x47, 0xe0, 0xe3, 0x44, 0xb0, 0x17, 0x45, 0xe2, + 0x16, 0xb1, 0x10, 0xb7, 0x43, 0xe4, 0xb6, 0x11, 0xe5, 0x42, + 0x41, 0xe6, 0x12, 0xb5, 0xe7, 0x40, 0xb4, 0x13, 0xeb, 0x4c, + 0xb8, 0x1f, 0x4d, 0xea, 0x1e, 0xb9, 0xba, 0x1d, 0xe9, 0x4e, + 0x1c, 0xbb, 0x4f, 0xe8, 0x49, 0xee, 0x1a, 0xbd, 0xef, 0x48, + 0xbc, 0x1b, 0x18, 0xbf, 0x4b, 0xec, 0xbe, 0x19, 0xed, 0x4a, + 0x79, 0xde, 0x2a, 0x8d, 0xdf, 0x78, 0x8c, 0x2b, 0x28, 0x8f, + 0x7b, 0xdc, 0x8e, 0x29, 0xdd, 0x7a, 0xdb, 0x7c, 0x88, 0x2f, + 0x7d, 0xda, 0x2e, 0x89, 0x8a, 0x2d, 0xd9, 0x7e, 0x2c, 0x8b, + 0x7f, 0xd8, 0x20, 0x87, 0x73, 0xd4, 0x86, 0x21, 0xd5, 0x72, + 0x71, 0xd6, 0x22, 0x85, 0xd7, 0x70, 0x84, 0x23, 0x82, 0x25, + 0xd1, 0x76, 0x24, 0x83, 0x77, 0xd0, 0xd3, 0x74, 0x80, 0x27, + 0x75, 0xd2, 0x26, 0x81, 0xcb, 0x6c, 0x98, 0x3f, 0x6d, 0xca, + 0x3e, 0x99, 0x9a, 0x3d, 0xc9, 0x6e, 0x3c, 0x9b, 0x6f, 0xc8, + 0x69, 0xce, 0x3a, 0x9d, 0xcf, 0x68, 0x9c, 0x3b, 0x38, 0x9f, + 0x6b, 0xcc, 0x9e, 0x39, 0xcd, 0x6a, 0x92, 0x35, 0xc1, 0x66, + 0x34, 0x93, 0x67, 0xc0, 0xc3, 0x64, 0x90, 0x37, 0x65, 0xc2, + 0x36, 0x91, 0x30, 0x97, 0x63, 0xc4, 0x96, 0x31, 0xc5, 0x62, + 0x61, 0xc6, 0x32, 0x95, 0xc7, 0x60, 0x94, 0x33, 0x00, 0xa8, + 0x4d, 0xe5, 0x9a, 0x32, 0xd7, 0x7f, 0x29, 0x81, 0x64, 0xcc, + 0xb3, 0x1b, 0xfe, 0x56, 0x52, 0xfa, 0x1f, 0xb7, 0xc8, 0x60, + 0x85, 0x2d, 0x7b, 0xd3, 0x36, 0x9e, 0xe1, 0x49, 0xac, 0x04, + 0xa4, 0x0c, 0xe9, 0x41, 0x3e, 0x96, 0x73, 0xdb, 0x8d, 0x25, + 0xc0, 0x68, 0x17, 0xbf, 0x5a, 0xf2, 0xf6, 0x5e, 0xbb, 0x13, + 0x6c, 0xc4, 0x21, 0x89, 0xdf, 0x77, 0x92, 0x3a, 0x45, 0xed, + 0x08, 0xa0, 0x55, 0xfd, 0x18, 0xb0, 0xcf, 0x67, 0x82, 0x2a, + 0x7c, 0xd4, 0x31, 0x99, 0xe6, 0x4e, 0xab, 0x03, 0x07, 0xaf, + 0x4a, 0xe2, 0x9d, 0x35, 0xd0, 0x78, 0x2e, 0x86, 0x63, 0xcb, + 0xb4, 0x1c, 0xf9, 0x51, 0xf1, 0x59, 0xbc, 0x14, 0x6b, 0xc3, + 0x26, 0x8e, 0xd8, 0x70, 0x95, 0x3d, 0x42, 0xea, 0x0f, 0xa7, + 0xa3, 0x0b, 0xee, 0x46, 0x39, 0x91, 0x74, 0xdc, 0x8a, 0x22, + 0xc7, 0x6f, 0x10, 0xb8, 0x5d, 0xf5, 0xaa, 0x02, 0xe7, 0x4f, + 0x30, 0x98, 0x7d, 0xd5, 0x83, 0x2b, 0xce, 0x66, 0x19, 0xb1, + 0x54, 0xfc, 0xf8, 0x50, 0xb5, 0x1d, 0x62, 0xca, 0x2f, 0x87, + 0xd1, 0x79, 0x9c, 0x34, 0x4b, 0xe3, 0x06, 0xae, 0x0e, 0xa6, + 0x43, 0xeb, 0x94, 0x3c, 0xd9, 0x71, 0x27, 0x8f, 0x6a, 0xc2, + 0xbd, 0x15, 0xf0, 0x58, 0x5c, 0xf4, 0x11, 0xb9, 0xc6, 0x6e, + 0x8b, 0x23, 0x75, 0xdd, 0x38, 0x90, 0xef, 0x47, 0xa2, 0x0a, + 0xff, 0x57, 0xb2, 0x1a, 0x65, 0xcd, 0x28, 0x80, 0xd6, 0x7e, + 0x9b, 0x33, 0x4c, 0xe4, 0x01, 0xa9, 0xad, 0x05, 0xe0, 0x48, + 0x37, 0x9f, 0x7a, 0xd2, 0x84, 0x2c, 0xc9, 0x61, 0x1e, 0xb6, + 0x53, 0xfb, 0x5b, 0xf3, 0x16, 0xbe, 0xc1, 0x69, 0x8c, 0x24, + 0x72, 0xda, 0x3f, 0x97, 0xe8, 0x40, 0xa5, 0x0d, 0x09, 0xa1, + 0x44, 0xec, 0x93, 0x3b, 0xde, 0x76, 0x20, 0x88, 0x6d, 0xc5, + 0xba, 0x12, 0xf7, 0x5f, 0x00, 0xa9, 0x4f, 0xe6, 0x9e, 0x37, + 0xd1, 0x78, 0x21, 0x88, 0x6e, 0xc7, 0xbf, 0x16, 0xf0, 0x59, + 0x42, 0xeb, 0x0d, 0xa4, 0xdc, 0x75, 0x93, 0x3a, 0x63, 0xca, + 0x2c, 0x85, 0xfd, 0x54, 0xb2, 0x1b, 0x84, 0x2d, 0xcb, 0x62, + 0x1a, 0xb3, 0x55, 0xfc, 0xa5, 0x0c, 0xea, 0x43, 0x3b, 0x92, + 0x74, 0xdd, 0xc6, 0x6f, 0x89, 0x20, 0x58, 0xf1, 0x17, 0xbe, + 0xe7, 0x4e, 0xa8, 0x01, 0x79, 0xd0, 0x36, 0x9f, 0x15, 0xbc, + 0x5a, 0xf3, 0x8b, 0x22, 0xc4, 0x6d, 0x34, 0x9d, 0x7b, 0xd2, + 0xaa, 0x03, 0xe5, 0x4c, 0x57, 0xfe, 0x18, 0xb1, 0xc9, 0x60, + 0x86, 0x2f, 0x76, 0xdf, 0x39, 0x90, 0xe8, 0x41, 0xa7, 0x0e, + 0x91, 0x38, 0xde, 0x77, 0x0f, 0xa6, 0x40, 0xe9, 0xb0, 0x19, + 0xff, 0x56, 0x2e, 0x87, 0x61, 0xc8, 0xd3, 0x7a, 0x9c, 0x35, + 0x4d, 0xe4, 0x02, 0xab, 0xf2, 0x5b, 0xbd, 0x14, 0x6c, 0xc5, + 0x23, 0x8a, 0x2a, 0x83, 0x65, 0xcc, 0xb4, 0x1d, 0xfb, 0x52, + 0x0b, 0xa2, 0x44, 0xed, 0x95, 0x3c, 0xda, 0x73, 0x68, 0xc1, + 0x27, 0x8e, 0xf6, 0x5f, 0xb9, 0x10, 0x49, 0xe0, 0x06, 0xaf, + 0xd7, 0x7e, 0x98, 0x31, 0xae, 0x07, 0xe1, 0x48, 0x30, 0x99, + 0x7f, 0xd6, 0x8f, 0x26, 0xc0, 0x69, 0x11, 0xb8, 0x5e, 0xf7, + 0xec, 0x45, 0xa3, 0x0a, 0x72, 0xdb, 0x3d, 0x94, 0xcd, 0x64, + 0x82, 0x2b, 0x53, 0xfa, 0x1c, 0xb5, 0x3f, 0x96, 0x70, 0xd9, + 0xa1, 0x08, 0xee, 0x47, 0x1e, 0xb7, 0x51, 0xf8, 0x80, 0x29, + 0xcf, 0x66, 0x7d, 0xd4, 0x32, 0x9b, 0xe3, 0x4a, 0xac, 0x05, + 0x5c, 0xf5, 0x13, 0xba, 0xc2, 0x6b, 0x8d, 0x24, 0xbb, 0x12, + 0xf4, 0x5d, 0x25, 0x8c, 0x6a, 0xc3, 0x9a, 0x33, 0xd5, 0x7c, + 0x04, 0xad, 0x4b, 0xe2, 0xf9, 0x50, 0xb6, 0x1f, 0x67, 0xce, + 0x28, 0x81, 0xd8, 0x71, 0x97, 0x3e, 0x46, 0xef, 0x09, 0xa0, + 0x00, 0xaa, 0x49, 0xe3, 0x92, 0x38, 0xdb, 0x71, 0x39, 0x93, + 0x70, 0xda, 0xab, 0x01, 0xe2, 0x48, 0x72, 0xd8, 0x3b, 0x91, + 0xe0, 0x4a, 0xa9, 0x03, 0x4b, 0xe1, 0x02, 0xa8, 0xd9, 0x73, + 0x90, 0x3a, 0xe4, 0x4e, 0xad, 0x07, 0x76, 0xdc, 0x3f, 0x95, + 0xdd, 0x77, 0x94, 0x3e, 0x4f, 0xe5, 0x06, 0xac, 0x96, 0x3c, + 0xdf, 0x75, 0x04, 0xae, 0x4d, 0xe7, 0xaf, 0x05, 0xe6, 0x4c, + 0x3d, 0x97, 0x74, 0xde, 0xd5, 0x7f, 0x9c, 0x36, 0x47, 0xed, + 0x0e, 0xa4, 0xec, 0x46, 0xa5, 0x0f, 0x7e, 0xd4, 0x37, 0x9d, + 0xa7, 0x0d, 0xee, 0x44, 0x35, 0x9f, 0x7c, 0xd6, 0x9e, 0x34, + 0xd7, 0x7d, 0x0c, 0xa6, 0x45, 0xef, 0x31, 0x9b, 0x78, 0xd2, + 0xa3, 0x09, 0xea, 0x40, 0x08, 0xa2, 0x41, 0xeb, 0x9a, 0x30, + 0xd3, 0x79, 0x43, 0xe9, 0x0a, 0xa0, 0xd1, 0x7b, 0x98, 0x32, + 0x7a, 0xd0, 0x33, 0x99, 0xe8, 0x42, 0xa1, 0x0b, 0xb7, 0x1d, + 0xfe, 0x54, 0x25, 0x8f, 0x6c, 0xc6, 0x8e, 0x24, 0xc7, 0x6d, + 0x1c, 0xb6, 0x55, 0xff, 0xc5, 0x6f, 0x8c, 0x26, 0x57, 0xfd, + 0x1e, 0xb4, 0xfc, 0x56, 0xb5, 0x1f, 0x6e, 0xc4, 0x27, 0x8d, + 0x53, 0xf9, 0x1a, 0xb0, 0xc1, 0x6b, 0x88, 0x22, 0x6a, 0xc0, + 0x23, 0x89, 0xf8, 0x52, 0xb1, 0x1b, 0x21, 0x8b, 0x68, 0xc2, + 0xb3, 0x19, 0xfa, 0x50, 0x18, 0xb2, 0x51, 0xfb, 0x8a, 0x20, + 0xc3, 0x69, 0x62, 0xc8, 0x2b, 0x81, 0xf0, 0x5a, 0xb9, 0x13, + 0x5b, 0xf1, 0x12, 0xb8, 0xc9, 0x63, 0x80, 0x2a, 0x10, 0xba, + 0x59, 0xf3, 0x82, 0x28, 0xcb, 0x61, 0x29, 0x83, 0x60, 0xca, + 0xbb, 0x11, 0xf2, 0x58, 0x86, 0x2c, 0xcf, 0x65, 0x14, 0xbe, + 0x5d, 0xf7, 0xbf, 0x15, 0xf6, 0x5c, 0x2d, 0x87, 0x64, 0xce, + 0xf4, 0x5e, 0xbd, 0x17, 0x66, 0xcc, 0x2f, 0x85, 0xcd, 0x67, + 0x84, 0x2e, 0x5f, 0xf5, 0x16, 0xbc, 0x00, 0xab, 0x4b, 0xe0, + 0x96, 0x3d, 0xdd, 0x76, 0x31, 0x9a, 0x7a, 0xd1, 0xa7, 0x0c, + 0xec, 0x47, 0x62, 0xc9, 0x29, 0x82, 0xf4, 0x5f, 0xbf, 0x14, + 0x53, 0xf8, 0x18, 0xb3, 0xc5, 0x6e, 0x8e, 0x25, 0xc4, 0x6f, + 0x8f, 0x24, 0x52, 0xf9, 0x19, 0xb2, 0xf5, 0x5e, 0xbe, 0x15, + 0x63, 0xc8, 0x28, 0x83, 0xa6, 0x0d, 0xed, 0x46, 0x30, 0x9b, + 0x7b, 0xd0, 0x97, 0x3c, 0xdc, 0x77, 0x01, 0xaa, 0x4a, 0xe1, + 0x95, 0x3e, 0xde, 0x75, 0x03, 0xa8, 0x48, 0xe3, 0xa4, 0x0f, + 0xef, 0x44, 0x32, 0x99, 0x79, 0xd2, 0xf7, 0x5c, 0xbc, 0x17, + 0x61, 0xca, 0x2a, 0x81, 0xc6, 0x6d, 0x8d, 0x26, 0x50, 0xfb, + 0x1b, 0xb0, 0x51, 0xfa, 0x1a, 0xb1, 0xc7, 0x6c, 0x8c, 0x27, + 0x60, 0xcb, 0x2b, 0x80, 0xf6, 0x5d, 0xbd, 0x16, 0x33, 0x98, + 0x78, 0xd3, 0xa5, 0x0e, 0xee, 0x45, 0x02, 0xa9, 0x49, 0xe2, + 0x94, 0x3f, 0xdf, 0x74, 0x37, 0x9c, 0x7c, 0xd7, 0xa1, 0x0a, + 0xea, 0x41, 0x06, 0xad, 0x4d, 0xe6, 0x90, 0x3b, 0xdb, 0x70, + 0x55, 0xfe, 0x1e, 0xb5, 0xc3, 0x68, 0x88, 0x23, 0x64, 0xcf, + 0x2f, 0x84, 0xf2, 0x59, 0xb9, 0x12, 0xf3, 0x58, 0xb8, 0x13, + 0x65, 0xce, 0x2e, 0x85, 0xc2, 0x69, 0x89, 0x22, 0x54, 0xff, + 0x1f, 0xb4, 0x91, 0x3a, 0xda, 0x71, 0x07, 0xac, 0x4c, 0xe7, + 0xa0, 0x0b, 0xeb, 0x40, 0x36, 0x9d, 0x7d, 0xd6, 0xa2, 0x09, + 0xe9, 0x42, 0x34, 0x9f, 0x7f, 0xd4, 0x93, 0x38, 0xd8, 0x73, + 0x05, 0xae, 0x4e, 0xe5, 0xc0, 0x6b, 0x8b, 0x20, 0x56, 0xfd, + 0x1d, 0xb6, 0xf1, 0x5a, 0xba, 0x11, 0x67, 0xcc, 0x2c, 0x87, + 0x66, 0xcd, 0x2d, 0x86, 0xf0, 0x5b, 0xbb, 0x10, 0x57, 0xfc, + 0x1c, 0xb7, 0xc1, 0x6a, 0x8a, 0x21, 0x04, 0xaf, 0x4f, 0xe4, + 0x92, 0x39, 0xd9, 0x72, 0x35, 0x9e, 0x7e, 0xd5, 0xa3, 0x08, + 0xe8, 0x43, 0x00, 0xac, 0x45, 0xe9, 0x8a, 0x26, 0xcf, 0x63, + 0x09, 0xa5, 0x4c, 0xe0, 0x83, 0x2f, 0xc6, 0x6a, 0x12, 0xbe, + 0x57, 0xfb, 0x98, 0x34, 0xdd, 0x71, 0x1b, 0xb7, 0x5e, 0xf2, + 0x91, 0x3d, 0xd4, 0x78, 0x24, 0x88, 0x61, 0xcd, 0xae, 0x02, + 0xeb, 0x47, 0x2d, 0x81, 0x68, 0xc4, 0xa7, 0x0b, 0xe2, 0x4e, + 0x36, 0x9a, 0x73, 0xdf, 0xbc, 0x10, 0xf9, 0x55, 0x3f, 0x93, + 0x7a, 0xd6, 0xb5, 0x19, 0xf0, 0x5c, 0x48, 0xe4, 0x0d, 0xa1, + 0xc2, 0x6e, 0x87, 0x2b, 0x41, 0xed, 0x04, 0xa8, 0xcb, 0x67, + 0x8e, 0x22, 0x5a, 0xf6, 0x1f, 0xb3, 0xd0, 0x7c, 0x95, 0x39, + 0x53, 0xff, 0x16, 0xba, 0xd9, 0x75, 0x9c, 0x30, 0x6c, 0xc0, + 0x29, 0x85, 0xe6, 0x4a, 0xa3, 0x0f, 0x65, 0xc9, 0x20, 0x8c, + 0xef, 0x43, 0xaa, 0x06, 0x7e, 0xd2, 0x3b, 0x97, 0xf4, 0x58, + 0xb1, 0x1d, 0x77, 0xdb, 0x32, 0x9e, 0xfd, 0x51, 0xb8, 0x14, + 0x90, 0x3c, 0xd5, 0x79, 0x1a, 0xb6, 0x5f, 0xf3, 0x99, 0x35, + 0xdc, 0x70, 0x13, 0xbf, 0x56, 0xfa, 0x82, 0x2e, 0xc7, 0x6b, + 0x08, 0xa4, 0x4d, 0xe1, 0x8b, 0x27, 0xce, 0x62, 0x01, 0xad, + 0x44, 0xe8, 0xb4, 0x18, 0xf1, 0x5d, 0x3e, 0x92, 0x7b, 0xd7, + 0xbd, 0x11, 0xf8, 0x54, 0x37, 0x9b, 0x72, 0xde, 0xa6, 0x0a, + 0xe3, 0x4f, 0x2c, 0x80, 0x69, 0xc5, 0xaf, 0x03, 0xea, 0x46, + 0x25, 0x89, 0x60, 0xcc, 0xd8, 0x74, 0x9d, 0x31, 0x52, 0xfe, + 0x17, 0xbb, 0xd1, 0x7d, 0x94, 0x38, 0x5b, 0xf7, 0x1e, 0xb2, + 0xca, 0x66, 0x8f, 0x23, 0x40, 0xec, 0x05, 0xa9, 0xc3, 0x6f, + 0x86, 0x2a, 0x49, 0xe5, 0x0c, 0xa0, 0xfc, 0x50, 0xb9, 0x15, + 0x76, 0xda, 0x33, 0x9f, 0xf5, 0x59, 0xb0, 0x1c, 0x7f, 0xd3, + 0x3a, 0x96, 0xee, 0x42, 0xab, 0x07, 0x64, 0xc8, 0x21, 0x8d, + 0xe7, 0x4b, 0xa2, 0x0e, 0x6d, 0xc1, 0x28, 0x84, 0x00, 0xad, + 0x47, 0xea, 0x8e, 0x23, 0xc9, 0x64, 0x01, 0xac, 0x46, 0xeb, + 0x8f, 0x22, 0xc8, 0x65, 0x02, 0xaf, 0x45, 0xe8, 0x8c, 0x21, + 0xcb, 0x66, 0x03, 0xae, 0x44, 0xe9, 0x8d, 0x20, 0xca, 0x67, + 0x04, 0xa9, 0x43, 0xee, 0x8a, 0x27, 0xcd, 0x60, 0x05, 0xa8, + 0x42, 0xef, 0x8b, 0x26, 0xcc, 0x61, 0x06, 0xab, 0x41, 0xec, + 0x88, 0x25, 0xcf, 0x62, 0x07, 0xaa, 0x40, 0xed, 0x89, 0x24, + 0xce, 0x63, 0x08, 0xa5, 0x4f, 0xe2, 0x86, 0x2b, 0xc1, 0x6c, + 0x09, 0xa4, 0x4e, 0xe3, 0x87, 0x2a, 0xc0, 0x6d, 0x0a, 0xa7, + 0x4d, 0xe0, 0x84, 0x29, 0xc3, 0x6e, 0x0b, 0xa6, 0x4c, 0xe1, + 0x85, 0x28, 0xc2, 0x6f, 0x0c, 0xa1, 0x4b, 0xe6, 0x82, 0x2f, + 0xc5, 0x68, 0x0d, 0xa0, 0x4a, 0xe7, 0x83, 0x2e, 0xc4, 0x69, + 0x0e, 0xa3, 0x49, 0xe4, 0x80, 0x2d, 0xc7, 0x6a, 0x0f, 0xa2, + 0x48, 0xe5, 0x81, 0x2c, 0xc6, 0x6b, 0x10, 0xbd, 0x57, 0xfa, + 0x9e, 0x33, 0xd9, 0x74, 0x11, 0xbc, 0x56, 0xfb, 0x9f, 0x32, + 0xd8, 0x75, 0x12, 0xbf, 0x55, 0xf8, 0x9c, 0x31, 0xdb, 0x76, + 0x13, 0xbe, 0x54, 0xf9, 0x9d, 0x30, 0xda, 0x77, 0x14, 0xb9, + 0x53, 0xfe, 0x9a, 0x37, 0xdd, 0x70, 0x15, 0xb8, 0x52, 0xff, + 0x9b, 0x36, 0xdc, 0x71, 0x16, 0xbb, 0x51, 0xfc, 0x98, 0x35, + 0xdf, 0x72, 0x17, 0xba, 0x50, 0xfd, 0x99, 0x34, 0xde, 0x73, + 0x18, 0xb5, 0x5f, 0xf2, 0x96, 0x3b, 0xd1, 0x7c, 0x19, 0xb4, + 0x5e, 0xf3, 0x97, 0x3a, 0xd0, 0x7d, 0x1a, 0xb7, 0x5d, 0xf0, + 0x94, 0x39, 0xd3, 0x7e, 0x1b, 0xb6, 0x5c, 0xf1, 0x95, 0x38, + 0xd2, 0x7f, 0x1c, 0xb1, 0x5b, 0xf6, 0x92, 0x3f, 0xd5, 0x78, + 0x1d, 0xb0, 0x5a, 0xf7, 0x93, 0x3e, 0xd4, 0x79, 0x1e, 0xb3, + 0x59, 0xf4, 0x90, 0x3d, 0xd7, 0x7a, 0x1f, 0xb2, 0x58, 0xf5, + 0x91, 0x3c, 0xd6, 0x7b, 0x00, 0xae, 0x41, 0xef, 0x82, 0x2c, + 0xc3, 0x6d, 0x19, 0xb7, 0x58, 0xf6, 0x9b, 0x35, 0xda, 0x74, + 0x32, 0x9c, 0x73, 0xdd, 0xb0, 0x1e, 0xf1, 0x5f, 0x2b, 0x85, + 0x6a, 0xc4, 0xa9, 0x07, 0xe8, 0x46, 0x64, 0xca, 0x25, 0x8b, + 0xe6, 0x48, 0xa7, 0x09, 0x7d, 0xd3, 0x3c, 0x92, 0xff, 0x51, + 0xbe, 0x10, 0x56, 0xf8, 0x17, 0xb9, 0xd4, 0x7a, 0x95, 0x3b, + 0x4f, 0xe1, 0x0e, 0xa0, 0xcd, 0x63, 0x8c, 0x22, 0xc8, 0x66, + 0x89, 0x27, 0x4a, 0xe4, 0x0b, 0xa5, 0xd1, 0x7f, 0x90, 0x3e, + 0x53, 0xfd, 0x12, 0xbc, 0xfa, 0x54, 0xbb, 0x15, 0x78, 0xd6, + 0x39, 0x97, 0xe3, 0x4d, 0xa2, 0x0c, 0x61, 0xcf, 0x20, 0x8e, + 0xac, 0x02, 0xed, 0x43, 0x2e, 0x80, 0x6f, 0xc1, 0xb5, 0x1b, + 0xf4, 0x5a, 0x37, 0x99, 0x76, 0xd8, 0x9e, 0x30, 0xdf, 0x71, + 0x1c, 0xb2, 0x5d, 0xf3, 0x87, 0x29, 0xc6, 0x68, 0x05, 0xab, + 0x44, 0xea, 0x8d, 0x23, 0xcc, 0x62, 0x0f, 0xa1, 0x4e, 0xe0, + 0x94, 0x3a, 0xd5, 0x7b, 0x16, 0xb8, 0x57, 0xf9, 0xbf, 0x11, + 0xfe, 0x50, 0x3d, 0x93, 0x7c, 0xd2, 0xa6, 0x08, 0xe7, 0x49, + 0x24, 0x8a, 0x65, 0xcb, 0xe9, 0x47, 0xa8, 0x06, 0x6b, 0xc5, + 0x2a, 0x84, 0xf0, 0x5e, 0xb1, 0x1f, 0x72, 0xdc, 0x33, 0x9d, + 0xdb, 0x75, 0x9a, 0x34, 0x59, 0xf7, 0x18, 0xb6, 0xc2, 0x6c, + 0x83, 0x2d, 0x40, 0xee, 0x01, 0xaf, 0x45, 0xeb, 0x04, 0xaa, + 0xc7, 0x69, 0x86, 0x28, 0x5c, 0xf2, 0x1d, 0xb3, 0xde, 0x70, + 0x9f, 0x31, 0x77, 0xd9, 0x36, 0x98, 0xf5, 0x5b, 0xb4, 0x1a, + 0x6e, 0xc0, 0x2f, 0x81, 0xec, 0x42, 0xad, 0x03, 0x21, 0x8f, + 0x60, 0xce, 0xa3, 0x0d, 0xe2, 0x4c, 0x38, 0x96, 0x79, 0xd7, + 0xba, 0x14, 0xfb, 0x55, 0x13, 0xbd, 0x52, 0xfc, 0x91, 0x3f, + 0xd0, 0x7e, 0x0a, 0xa4, 0x4b, 0xe5, 0x88, 0x26, 0xc9, 0x67, + 0x00, 0xaf, 0x43, 0xec, 0x86, 0x29, 0xc5, 0x6a, 0x11, 0xbe, + 0x52, 0xfd, 0x97, 0x38, 0xd4, 0x7b, 0x22, 0x8d, 0x61, 0xce, + 0xa4, 0x0b, 0xe7, 0x48, 0x33, 0x9c, 0x70, 0xdf, 0xb5, 0x1a, + 0xf6, 0x59, 0x44, 0xeb, 0x07, 0xa8, 0xc2, 0x6d, 0x81, 0x2e, + 0x55, 0xfa, 0x16, 0xb9, 0xd3, 0x7c, 0x90, 0x3f, 0x66, 0xc9, + 0x25, 0x8a, 0xe0, 0x4f, 0xa3, 0x0c, 0x77, 0xd8, 0x34, 0x9b, + 0xf1, 0x5e, 0xb2, 0x1d, 0x88, 0x27, 0xcb, 0x64, 0x0e, 0xa1, + 0x4d, 0xe2, 0x99, 0x36, 0xda, 0x75, 0x1f, 0xb0, 0x5c, 0xf3, + 0xaa, 0x05, 0xe9, 0x46, 0x2c, 0x83, 0x6f, 0xc0, 0xbb, 0x14, + 0xf8, 0x57, 0x3d, 0x92, 0x7e, 0xd1, 0xcc, 0x63, 0x8f, 0x20, + 0x4a, 0xe5, 0x09, 0xa6, 0xdd, 0x72, 0x9e, 0x31, 0x5b, 0xf4, + 0x18, 0xb7, 0xee, 0x41, 0xad, 0x02, 0x68, 0xc7, 0x2b, 0x84, + 0xff, 0x50, 0xbc, 0x13, 0x79, 0xd6, 0x3a, 0x95, 0x0d, 0xa2, + 0x4e, 0xe1, 0x8b, 0x24, 0xc8, 0x67, 0x1c, 0xb3, 0x5f, 0xf0, + 0x9a, 0x35, 0xd9, 0x76, 0x2f, 0x80, 0x6c, 0xc3, 0xa9, 0x06, + 0xea, 0x45, 0x3e, 0x91, 0x7d, 0xd2, 0xb8, 0x17, 0xfb, 0x54, + 0x49, 0xe6, 0x0a, 0xa5, 0xcf, 0x60, 0x8c, 0x23, 0x58, 0xf7, + 0x1b, 0xb4, 0xde, 0x71, 0x9d, 0x32, 0x6b, 0xc4, 0x28, 0x87, + 0xed, 0x42, 0xae, 0x01, 0x7a, 0xd5, 0x39, 0x96, 0xfc, 0x53, + 0xbf, 0x10, 0x85, 0x2a, 0xc6, 0x69, 0x03, 0xac, 0x40, 0xef, + 0x94, 0x3b, 0xd7, 0x78, 0x12, 0xbd, 0x51, 0xfe, 0xa7, 0x08, + 0xe4, 0x4b, 0x21, 0x8e, 0x62, 0xcd, 0xb6, 0x19, 0xf5, 0x5a, + 0x30, 0x9f, 0x73, 0xdc, 0xc1, 0x6e, 0x82, 0x2d, 0x47, 0xe8, + 0x04, 0xab, 0xd0, 0x7f, 0x93, 0x3c, 0x56, 0xf9, 0x15, 0xba, + 0xe3, 0x4c, 0xa0, 0x0f, 0x65, 0xca, 0x26, 0x89, 0xf2, 0x5d, + 0xb1, 0x1e, 0x74, 0xdb, 0x37, 0x98, 0x00, 0xb0, 0x7d, 0xcd, + 0xfa, 0x4a, 0x87, 0x37, 0xe9, 0x59, 0x94, 0x24, 0x13, 0xa3, + 0x6e, 0xde, 0xcf, 0x7f, 0xb2, 0x02, 0x35, 0x85, 0x48, 0xf8, + 0x26, 0x96, 0x5b, 0xeb, 0xdc, 0x6c, 0xa1, 0x11, 0x83, 0x33, + 0xfe, 0x4e, 0x79, 0xc9, 0x04, 0xb4, 0x6a, 0xda, 0x17, 0xa7, + 0x90, 0x20, 0xed, 0x5d, 0x4c, 0xfc, 0x31, 0x81, 0xb6, 0x06, + 0xcb, 0x7b, 0xa5, 0x15, 0xd8, 0x68, 0x5f, 0xef, 0x22, 0x92, + 0x1b, 0xab, 0x66, 0xd6, 0xe1, 0x51, 0x9c, 0x2c, 0xf2, 0x42, + 0x8f, 0x3f, 0x08, 0xb8, 0x75, 0xc5, 0xd4, 0x64, 0xa9, 0x19, + 0x2e, 0x9e, 0x53, 0xe3, 0x3d, 0x8d, 0x40, 0xf0, 0xc7, 0x77, + 0xba, 0x0a, 0x98, 0x28, 0xe5, 0x55, 0x62, 0xd2, 0x1f, 0xaf, + 0x71, 0xc1, 0x0c, 0xbc, 0x8b, 0x3b, 0xf6, 0x46, 0x57, 0xe7, + 0x2a, 0x9a, 0xad, 0x1d, 0xd0, 0x60, 0xbe, 0x0e, 0xc3, 0x73, + 0x44, 0xf4, 0x39, 0x89, 0x36, 0x86, 0x4b, 0xfb, 0xcc, 0x7c, + 0xb1, 0x01, 0xdf, 0x6f, 0xa2, 0x12, 0x25, 0x95, 0x58, 0xe8, + 0xf9, 0x49, 0x84, 0x34, 0x03, 0xb3, 0x7e, 0xce, 0x10, 0xa0, + 0x6d, 0xdd, 0xea, 0x5a, 0x97, 0x27, 0xb5, 0x05, 0xc8, 0x78, + 0x4f, 0xff, 0x32, 0x82, 0x5c, 0xec, 0x21, 0x91, 0xa6, 0x16, + 0xdb, 0x6b, 0x7a, 0xca, 0x07, 0xb7, 0x80, 0x30, 0xfd, 0x4d, + 0x93, 0x23, 0xee, 0x5e, 0x69, 0xd9, 0x14, 0xa4, 0x2d, 0x9d, + 0x50, 0xe0, 0xd7, 0x67, 0xaa, 0x1a, 0xc4, 0x74, 0xb9, 0x09, + 0x3e, 0x8e, 0x43, 0xf3, 0xe2, 0x52, 0x9f, 0x2f, 0x18, 0xa8, + 0x65, 0xd5, 0x0b, 0xbb, 0x76, 0xc6, 0xf1, 0x41, 0x8c, 0x3c, + 0xae, 0x1e, 0xd3, 0x63, 0x54, 0xe4, 0x29, 0x99, 0x47, 0xf7, + 0x3a, 0x8a, 0xbd, 0x0d, 0xc0, 0x70, 0x61, 0xd1, 0x1c, 0xac, + 0x9b, 0x2b, 0xe6, 0x56, 0x88, 0x38, 0xf5, 0x45, 0x72, 0xc2, + 0x0f, 0xbf, 0x00, 0xb1, 0x7f, 0xce, 0xfe, 0x4f, 0x81, 0x30, + 0xe1, 0x50, 0x9e, 0x2f, 0x1f, 0xae, 0x60, 0xd1, 0xdf, 0x6e, + 0xa0, 0x11, 0x21, 0x90, 0x5e, 0xef, 0x3e, 0x8f, 0x41, 0xf0, + 0xc0, 0x71, 0xbf, 0x0e, 0xa3, 0x12, 0xdc, 0x6d, 0x5d, 0xec, + 0x22, 0x93, 0x42, 0xf3, 0x3d, 0x8c, 0xbc, 0x0d, 0xc3, 0x72, + 0x7c, 0xcd, 0x03, 0xb2, 0x82, 0x33, 0xfd, 0x4c, 0x9d, 0x2c, + 0xe2, 0x53, 0x63, 0xd2, 0x1c, 0xad, 0x5b, 0xea, 0x24, 0x95, + 0xa5, 0x14, 0xda, 0x6b, 0xba, 0x0b, 0xc5, 0x74, 0x44, 0xf5, + 0x3b, 0x8a, 0x84, 0x35, 0xfb, 0x4a, 0x7a, 0xcb, 0x05, 0xb4, + 0x65, 0xd4, 0x1a, 0xab, 0x9b, 0x2a, 0xe4, 0x55, 0xf8, 0x49, + 0x87, 0x36, 0x06, 0xb7, 0x79, 0xc8, 0x19, 0xa8, 0x66, 0xd7, + 0xe7, 0x56, 0x98, 0x29, 0x27, 0x96, 0x58, 0xe9, 0xd9, 0x68, + 0xa6, 0x17, 0xc6, 0x77, 0xb9, 0x08, 0x38, 0x89, 0x47, 0xf6, + 0xb6, 0x07, 0xc9, 0x78, 0x48, 0xf9, 0x37, 0x86, 0x57, 0xe6, + 0x28, 0x99, 0xa9, 0x18, 0xd6, 0x67, 0x69, 0xd8, 0x16, 0xa7, + 0x97, 0x26, 0xe8, 0x59, 0x88, 0x39, 0xf7, 0x46, 0x76, 0xc7, + 0x09, 0xb8, 0x15, 0xa4, 0x6a, 0xdb, 0xeb, 0x5a, 0x94, 0x25, + 0xf4, 0x45, 0x8b, 0x3a, 0x0a, 0xbb, 0x75, 0xc4, 0xca, 0x7b, + 0xb5, 0x04, 0x34, 0x85, 0x4b, 0xfa, 0x2b, 0x9a, 0x54, 0xe5, + 0xd5, 0x64, 0xaa, 0x1b, 0xed, 0x5c, 0x92, 0x23, 0x13, 0xa2, + 0x6c, 0xdd, 0x0c, 0xbd, 0x73, 0xc2, 0xf2, 0x43, 0x8d, 0x3c, + 0x32, 0x83, 0x4d, 0xfc, 0xcc, 0x7d, 0xb3, 0x02, 0xd3, 0x62, + 0xac, 0x1d, 0x2d, 0x9c, 0x52, 0xe3, 0x4e, 0xff, 0x31, 0x80, + 0xb0, 0x01, 0xcf, 0x7e, 0xaf, 0x1e, 0xd0, 0x61, 0x51, 0xe0, + 0x2e, 0x9f, 0x91, 0x20, 0xee, 0x5f, 0x6f, 0xde, 0x10, 0xa1, + 0x70, 0xc1, 0x0f, 0xbe, 0x8e, 0x3f, 0xf1, 0x40, 0x00, 0xb2, + 0x79, 0xcb, 0xf2, 0x40, 0x8b, 0x39, 0xf9, 0x4b, 0x80, 0x32, + 0x0b, 0xb9, 0x72, 0xc0, 0xef, 0x5d, 0x96, 0x24, 0x1d, 0xaf, + 0x64, 0xd6, 0x16, 0xa4, 0x6f, 0xdd, 0xe4, 0x56, 0x9d, 0x2f, + 0xc3, 0x71, 0xba, 0x08, 0x31, 0x83, 0x48, 0xfa, 0x3a, 0x88, + 0x43, 0xf1, 0xc8, 0x7a, 0xb1, 0x03, 0x2c, 0x9e, 0x55, 0xe7, + 0xde, 0x6c, 0xa7, 0x15, 0xd5, 0x67, 0xac, 0x1e, 0x27, 0x95, + 0x5e, 0xec, 0x9b, 0x29, 0xe2, 0x50, 0x69, 0xdb, 0x10, 0xa2, + 0x62, 0xd0, 0x1b, 0xa9, 0x90, 0x22, 0xe9, 0x5b, 0x74, 0xc6, + 0x0d, 0xbf, 0x86, 0x34, 0xff, 0x4d, 0x8d, 0x3f, 0xf4, 0x46, + 0x7f, 0xcd, 0x06, 0xb4, 0x58, 0xea, 0x21, 0x93, 0xaa, 0x18, + 0xd3, 0x61, 0xa1, 0x13, 0xd8, 0x6a, 0x53, 0xe1, 0x2a, 0x98, + 0xb7, 0x05, 0xce, 0x7c, 0x45, 0xf7, 0x3c, 0x8e, 0x4e, 0xfc, + 0x37, 0x85, 0xbc, 0x0e, 0xc5, 0x77, 0x2b, 0x99, 0x52, 0xe0, + 0xd9, 0x6b, 0xa0, 0x12, 0xd2, 0x60, 0xab, 0x19, 0x20, 0x92, + 0x59, 0xeb, 0xc4, 0x76, 0xbd, 0x0f, 0x36, 0x84, 0x4f, 0xfd, + 0x3d, 0x8f, 0x44, 0xf6, 0xcf, 0x7d, 0xb6, 0x04, 0xe8, 0x5a, + 0x91, 0x23, 0x1a, 0xa8, 0x63, 0xd1, 0x11, 0xa3, 0x68, 0xda, + 0xe3, 0x51, 0x9a, 0x28, 0x07, 0xb5, 0x7e, 0xcc, 0xf5, 0x47, + 0x8c, 0x3e, 0xfe, 0x4c, 0x87, 0x35, 0x0c, 0xbe, 0x75, 0xc7, + 0xb0, 0x02, 0xc9, 0x7b, 0x42, 0xf0, 0x3b, 0x89, 0x49, 0xfb, + 0x30, 0x82, 0xbb, 0x09, 0xc2, 0x70, 0x5f, 0xed, 0x26, 0x94, + 0xad, 0x1f, 0xd4, 0x66, 0xa6, 0x14, 0xdf, 0x6d, 0x54, 0xe6, + 0x2d, 0x9f, 0x73, 0xc1, 0x0a, 0xb8, 0x81, 0x33, 0xf8, 0x4a, + 0x8a, 0x38, 0xf3, 0x41, 0x78, 0xca, 0x01, 0xb3, 0x9c, 0x2e, + 0xe5, 0x57, 0x6e, 0xdc, 0x17, 0xa5, 0x65, 0xd7, 0x1c, 0xae, + 0x97, 0x25, 0xee, 0x5c, 0x00, 0xb3, 0x7b, 0xc8, 0xf6, 0x45, + 0x8d, 0x3e, 0xf1, 0x42, 0x8a, 0x39, 0x07, 0xb4, 0x7c, 0xcf, + 0xff, 0x4c, 0x84, 0x37, 0x09, 0xba, 0x72, 0xc1, 0x0e, 0xbd, + 0x75, 0xc6, 0xf8, 0x4b, 0x83, 0x30, 0xe3, 0x50, 0x98, 0x2b, + 0x15, 0xa6, 0x6e, 0xdd, 0x12, 0xa1, 0x69, 0xda, 0xe4, 0x57, + 0x9f, 0x2c, 0x1c, 0xaf, 0x67, 0xd4, 0xea, 0x59, 0x91, 0x22, + 0xed, 0x5e, 0x96, 0x25, 0x1b, 0xa8, 0x60, 0xd3, 0xdb, 0x68, + 0xa0, 0x13, 0x2d, 0x9e, 0x56, 0xe5, 0x2a, 0x99, 0x51, 0xe2, + 0xdc, 0x6f, 0xa7, 0x14, 0x24, 0x97, 0x5f, 0xec, 0xd2, 0x61, + 0xa9, 0x1a, 0xd5, 0x66, 0xae, 0x1d, 0x23, 0x90, 0x58, 0xeb, + 0x38, 0x8b, 0x43, 0xf0, 0xce, 0x7d, 0xb5, 0x06, 0xc9, 0x7a, + 0xb2, 0x01, 0x3f, 0x8c, 0x44, 0xf7, 0xc7, 0x74, 0xbc, 0x0f, + 0x31, 0x82, 0x4a, 0xf9, 0x36, 0x85, 0x4d, 0xfe, 0xc0, 0x73, + 0xbb, 0x08, 0xab, 0x18, 0xd0, 0x63, 0x5d, 0xee, 0x26, 0x95, + 0x5a, 0xe9, 0x21, 0x92, 0xac, 0x1f, 0xd7, 0x64, 0x54, 0xe7, + 0x2f, 0x9c, 0xa2, 0x11, 0xd9, 0x6a, 0xa5, 0x16, 0xde, 0x6d, + 0x53, 0xe0, 0x28, 0x9b, 0x48, 0xfb, 0x33, 0x80, 0xbe, 0x0d, + 0xc5, 0x76, 0xb9, 0x0a, 0xc2, 0x71, 0x4f, 0xfc, 0x34, 0x87, + 0xb7, 0x04, 0xcc, 0x7f, 0x41, 0xf2, 0x3a, 0x89, 0x46, 0xf5, + 0x3d, 0x8e, 0xb0, 0x03, 0xcb, 0x78, 0x70, 0xc3, 0x0b, 0xb8, + 0x86, 0x35, 0xfd, 0x4e, 0x81, 0x32, 0xfa, 0x49, 0x77, 0xc4, + 0x0c, 0xbf, 0x8f, 0x3c, 0xf4, 0x47, 0x79, 0xca, 0x02, 0xb1, + 0x7e, 0xcd, 0x05, 0xb6, 0x88, 0x3b, 0xf3, 0x40, 0x93, 0x20, + 0xe8, 0x5b, 0x65, 0xd6, 0x1e, 0xad, 0x62, 0xd1, 0x19, 0xaa, + 0x94, 0x27, 0xef, 0x5c, 0x6c, 0xdf, 0x17, 0xa4, 0x9a, 0x29, + 0xe1, 0x52, 0x9d, 0x2e, 0xe6, 0x55, 0x6b, 0xd8, 0x10, 0xa3, + 0x00, 0xb4, 0x75, 0xc1, 0xea, 0x5e, 0x9f, 0x2b, 0xc9, 0x7d, + 0xbc, 0x08, 0x23, 0x97, 0x56, 0xe2, 0x8f, 0x3b, 0xfa, 0x4e, + 0x65, 0xd1, 0x10, 0xa4, 0x46, 0xf2, 0x33, 0x87, 0xac, 0x18, + 0xd9, 0x6d, 0x03, 0xb7, 0x76, 0xc2, 0xe9, 0x5d, 0x9c, 0x28, + 0xca, 0x7e, 0xbf, 0x0b, 0x20, 0x94, 0x55, 0xe1, 0x8c, 0x38, + 0xf9, 0x4d, 0x66, 0xd2, 0x13, 0xa7, 0x45, 0xf1, 0x30, 0x84, + 0xaf, 0x1b, 0xda, 0x6e, 0x06, 0xb2, 0x73, 0xc7, 0xec, 0x58, + 0x99, 0x2d, 0xcf, 0x7b, 0xba, 0x0e, 0x25, 0x91, 0x50, 0xe4, + 0x89, 0x3d, 0xfc, 0x48, 0x63, 0xd7, 0x16, 0xa2, 0x40, 0xf4, + 0x35, 0x81, 0xaa, 0x1e, 0xdf, 0x6b, 0x05, 0xb1, 0x70, 0xc4, + 0xef, 0x5b, 0x9a, 0x2e, 0xcc, 0x78, 0xb9, 0x0d, 0x26, 0x92, + 0x53, 0xe7, 0x8a, 0x3e, 0xff, 0x4b, 0x60, 0xd4, 0x15, 0xa1, + 0x43, 0xf7, 0x36, 0x82, 0xa9, 0x1d, 0xdc, 0x68, 0x0c, 0xb8, + 0x79, 0xcd, 0xe6, 0x52, 0x93, 0x27, 0xc5, 0x71, 0xb0, 0x04, + 0x2f, 0x9b, 0x5a, 0xee, 0x83, 0x37, 0xf6, 0x42, 0x69, 0xdd, + 0x1c, 0xa8, 0x4a, 0xfe, 0x3f, 0x8b, 0xa0, 0x14, 0xd5, 0x61, + 0x0f, 0xbb, 0x7a, 0xce, 0xe5, 0x51, 0x90, 0x24, 0xc6, 0x72, + 0xb3, 0x07, 0x2c, 0x98, 0x59, 0xed, 0x80, 0x34, 0xf5, 0x41, + 0x6a, 0xde, 0x1f, 0xab, 0x49, 0xfd, 0x3c, 0x88, 0xa3, 0x17, + 0xd6, 0x62, 0x0a, 0xbe, 0x7f, 0xcb, 0xe0, 0x54, 0x95, 0x21, + 0xc3, 0x77, 0xb6, 0x02, 0x29, 0x9d, 0x5c, 0xe8, 0x85, 0x31, + 0xf0, 0x44, 0x6f, 0xdb, 0x1a, 0xae, 0x4c, 0xf8, 0x39, 0x8d, + 0xa6, 0x12, 0xd3, 0x67, 0x09, 0xbd, 0x7c, 0xc8, 0xe3, 0x57, + 0x96, 0x22, 0xc0, 0x74, 0xb5, 0x01, 0x2a, 0x9e, 0x5f, 0xeb, + 0x86, 0x32, 0xf3, 0x47, 0x6c, 0xd8, 0x19, 0xad, 0x4f, 0xfb, + 0x3a, 0x8e, 0xa5, 0x11, 0xd0, 0x64, 0x00, 0xb5, 0x77, 0xc2, + 0xee, 0x5b, 0x99, 0x2c, 0xc1, 0x74, 0xb6, 0x03, 0x2f, 0x9a, + 0x58, 0xed, 0x9f, 0x2a, 0xe8, 0x5d, 0x71, 0xc4, 0x06, 0xb3, + 0x5e, 0xeb, 0x29, 0x9c, 0xb0, 0x05, 0xc7, 0x72, 0x23, 0x96, + 0x54, 0xe1, 0xcd, 0x78, 0xba, 0x0f, 0xe2, 0x57, 0x95, 0x20, + 0x0c, 0xb9, 0x7b, 0xce, 0xbc, 0x09, 0xcb, 0x7e, 0x52, 0xe7, + 0x25, 0x90, 0x7d, 0xc8, 0x0a, 0xbf, 0x93, 0x26, 0xe4, 0x51, + 0x46, 0xf3, 0x31, 0x84, 0xa8, 0x1d, 0xdf, 0x6a, 0x87, 0x32, + 0xf0, 0x45, 0x69, 0xdc, 0x1e, 0xab, 0xd9, 0x6c, 0xae, 0x1b, + 0x37, 0x82, 0x40, 0xf5, 0x18, 0xad, 0x6f, 0xda, 0xf6, 0x43, + 0x81, 0x34, 0x65, 0xd0, 0x12, 0xa7, 0x8b, 0x3e, 0xfc, 0x49, + 0xa4, 0x11, 0xd3, 0x66, 0x4a, 0xff, 0x3d, 0x88, 0xfa, 0x4f, + 0x8d, 0x38, 0x14, 0xa1, 0x63, 0xd6, 0x3b, 0x8e, 0x4c, 0xf9, + 0xd5, 0x60, 0xa2, 0x17, 0x8c, 0x39, 0xfb, 0x4e, 0x62, 0xd7, + 0x15, 0xa0, 0x4d, 0xf8, 0x3a, 0x8f, 0xa3, 0x16, 0xd4, 0x61, + 0x13, 0xa6, 0x64, 0xd1, 0xfd, 0x48, 0x8a, 0x3f, 0xd2, 0x67, + 0xa5, 0x10, 0x3c, 0x89, 0x4b, 0xfe, 0xaf, 0x1a, 0xd8, 0x6d, + 0x41, 0xf4, 0x36, 0x83, 0x6e, 0xdb, 0x19, 0xac, 0x80, 0x35, + 0xf7, 0x42, 0x30, 0x85, 0x47, 0xf2, 0xde, 0x6b, 0xa9, 0x1c, + 0xf1, 0x44, 0x86, 0x33, 0x1f, 0xaa, 0x68, 0xdd, 0xca, 0x7f, + 0xbd, 0x08, 0x24, 0x91, 0x53, 0xe6, 0x0b, 0xbe, 0x7c, 0xc9, + 0xe5, 0x50, 0x92, 0x27, 0x55, 0xe0, 0x22, 0x97, 0xbb, 0x0e, + 0xcc, 0x79, 0x94, 0x21, 0xe3, 0x56, 0x7a, 0xcf, 0x0d, 0xb8, + 0xe9, 0x5c, 0x9e, 0x2b, 0x07, 0xb2, 0x70, 0xc5, 0x28, 0x9d, + 0x5f, 0xea, 0xc6, 0x73, 0xb1, 0x04, 0x76, 0xc3, 0x01, 0xb4, + 0x98, 0x2d, 0xef, 0x5a, 0xb7, 0x02, 0xc0, 0x75, 0x59, 0xec, + 0x2e, 0x9b, 0x00, 0xb6, 0x71, 0xc7, 0xe2, 0x54, 0x93, 0x25, + 0xd9, 0x6f, 0xa8, 0x1e, 0x3b, 0x8d, 0x4a, 0xfc, 0xaf, 0x19, + 0xde, 0x68, 0x4d, 0xfb, 0x3c, 0x8a, 0x76, 0xc0, 0x07, 0xb1, + 0x94, 0x22, 0xe5, 0x53, 0x43, 0xf5, 0x32, 0x84, 0xa1, 0x17, + 0xd0, 0x66, 0x9a, 0x2c, 0xeb, 0x5d, 0x78, 0xce, 0x09, 0xbf, + 0xec, 0x5a, 0x9d, 0x2b, 0x0e, 0xb8, 0x7f, 0xc9, 0x35, 0x83, + 0x44, 0xf2, 0xd7, 0x61, 0xa6, 0x10, 0x86, 0x30, 0xf7, 0x41, + 0x64, 0xd2, 0x15, 0xa3, 0x5f, 0xe9, 0x2e, 0x98, 0xbd, 0x0b, + 0xcc, 0x7a, 0x29, 0x9f, 0x58, 0xee, 0xcb, 0x7d, 0xba, 0x0c, + 0xf0, 0x46, 0x81, 0x37, 0x12, 0xa4, 0x63, 0xd5, 0xc5, 0x73, + 0xb4, 0x02, 0x27, 0x91, 0x56, 0xe0, 0x1c, 0xaa, 0x6d, 0xdb, + 0xfe, 0x48, 0x8f, 0x39, 0x6a, 0xdc, 0x1b, 0xad, 0x88, 0x3e, + 0xf9, 0x4f, 0xb3, 0x05, 0xc2, 0x74, 0x51, 0xe7, 0x20, 0x96, + 0x11, 0xa7, 0x60, 0xd6, 0xf3, 0x45, 0x82, 0x34, 0xc8, 0x7e, + 0xb9, 0x0f, 0x2a, 0x9c, 0x5b, 0xed, 0xbe, 0x08, 0xcf, 0x79, + 0x5c, 0xea, 0x2d, 0x9b, 0x67, 0xd1, 0x16, 0xa0, 0x85, 0x33, + 0xf4, 0x42, 0x52, 0xe4, 0x23, 0x95, 0xb0, 0x06, 0xc1, 0x77, + 0x8b, 0x3d, 0xfa, 0x4c, 0x69, 0xdf, 0x18, 0xae, 0xfd, 0x4b, + 0x8c, 0x3a, 0x1f, 0xa9, 0x6e, 0xd8, 0x24, 0x92, 0x55, 0xe3, + 0xc6, 0x70, 0xb7, 0x01, 0x97, 0x21, 0xe6, 0x50, 0x75, 0xc3, + 0x04, 0xb2, 0x4e, 0xf8, 0x3f, 0x89, 0xac, 0x1a, 0xdd, 0x6b, + 0x38, 0x8e, 0x49, 0xff, 0xda, 0x6c, 0xab, 0x1d, 0xe1, 0x57, + 0x90, 0x26, 0x03, 0xb5, 0x72, 0xc4, 0xd4, 0x62, 0xa5, 0x13, + 0x36, 0x80, 0x47, 0xf1, 0x0d, 0xbb, 0x7c, 0xca, 0xef, 0x59, + 0x9e, 0x28, 0x7b, 0xcd, 0x0a, 0xbc, 0x99, 0x2f, 0xe8, 0x5e, + 0xa2, 0x14, 0xd3, 0x65, 0x40, 0xf6, 0x31, 0x87, 0x00, 0xb7, + 0x73, 0xc4, 0xe6, 0x51, 0x95, 0x22, 0xd1, 0x66, 0xa2, 0x15, + 0x37, 0x80, 0x44, 0xf3, 0xbf, 0x08, 0xcc, 0x7b, 0x59, 0xee, + 0x2a, 0x9d, 0x6e, 0xd9, 0x1d, 0xaa, 0x88, 0x3f, 0xfb, 0x4c, + 0x63, 0xd4, 0x10, 0xa7, 0x85, 0x32, 0xf6, 0x41, 0xb2, 0x05, + 0xc1, 0x76, 0x54, 0xe3, 0x27, 0x90, 0xdc, 0x6b, 0xaf, 0x18, + 0x3a, 0x8d, 0x49, 0xfe, 0x0d, 0xba, 0x7e, 0xc9, 0xeb, 0x5c, + 0x98, 0x2f, 0xc6, 0x71, 0xb5, 0x02, 0x20, 0x97, 0x53, 0xe4, + 0x17, 0xa0, 0x64, 0xd3, 0xf1, 0x46, 0x82, 0x35, 0x79, 0xce, + 0x0a, 0xbd, 0x9f, 0x28, 0xec, 0x5b, 0xa8, 0x1f, 0xdb, 0x6c, + 0x4e, 0xf9, 0x3d, 0x8a, 0xa5, 0x12, 0xd6, 0x61, 0x43, 0xf4, + 0x30, 0x87, 0x74, 0xc3, 0x07, 0xb0, 0x92, 0x25, 0xe1, 0x56, + 0x1a, 0xad, 0x69, 0xde, 0xfc, 0x4b, 0x8f, 0x38, 0xcb, 0x7c, + 0xb8, 0x0f, 0x2d, 0x9a, 0x5e, 0xe9, 0x91, 0x26, 0xe2, 0x55, + 0x77, 0xc0, 0x04, 0xb3, 0x40, 0xf7, 0x33, 0x84, 0xa6, 0x11, + 0xd5, 0x62, 0x2e, 0x99, 0x5d, 0xea, 0xc8, 0x7f, 0xbb, 0x0c, + 0xff, 0x48, 0x8c, 0x3b, 0x19, 0xae, 0x6a, 0xdd, 0xf2, 0x45, + 0x81, 0x36, 0x14, 0xa3, 0x67, 0xd0, 0x23, 0x94, 0x50, 0xe7, + 0xc5, 0x72, 0xb6, 0x01, 0x4d, 0xfa, 0x3e, 0x89, 0xab, 0x1c, + 0xd8, 0x6f, 0x9c, 0x2b, 0xef, 0x58, 0x7a, 0xcd, 0x09, 0xbe, + 0x57, 0xe0, 0x24, 0x93, 0xb1, 0x06, 0xc2, 0x75, 0x86, 0x31, + 0xf5, 0x42, 0x60, 0xd7, 0x13, 0xa4, 0xe8, 0x5f, 0x9b, 0x2c, + 0x0e, 0xb9, 0x7d, 0xca, 0x39, 0x8e, 0x4a, 0xfd, 0xdf, 0x68, + 0xac, 0x1b, 0x34, 0x83, 0x47, 0xf0, 0xd2, 0x65, 0xa1, 0x16, + 0xe5, 0x52, 0x96, 0x21, 0x03, 0xb4, 0x70, 0xc7, 0x8b, 0x3c, + 0xf8, 0x4f, 0x6d, 0xda, 0x1e, 0xa9, 0x5a, 0xed, 0x29, 0x9e, + 0xbc, 0x0b, 0xcf, 0x78, 0x00, 0xb8, 0x6d, 0xd5, 0xda, 0x62, + 0xb7, 0x0f, 0xa9, 0x11, 0xc4, 0x7c, 0x73, 0xcb, 0x1e, 0xa6, + 0x4f, 0xf7, 0x22, 0x9a, 0x95, 0x2d, 0xf8, 0x40, 0xe6, 0x5e, + 0x8b, 0x33, 0x3c, 0x84, 0x51, 0xe9, 0x9e, 0x26, 0xf3, 0x4b, + 0x44, 0xfc, 0x29, 0x91, 0x37, 0x8f, 0x5a, 0xe2, 0xed, 0x55, + 0x80, 0x38, 0xd1, 0x69, 0xbc, 0x04, 0x0b, 0xb3, 0x66, 0xde, + 0x78, 0xc0, 0x15, 0xad, 0xa2, 0x1a, 0xcf, 0x77, 0x21, 0x99, + 0x4c, 0xf4, 0xfb, 0x43, 0x96, 0x2e, 0x88, 0x30, 0xe5, 0x5d, + 0x52, 0xea, 0x3f, 0x87, 0x6e, 0xd6, 0x03, 0xbb, 0xb4, 0x0c, + 0xd9, 0x61, 0xc7, 0x7f, 0xaa, 0x12, 0x1d, 0xa5, 0x70, 0xc8, + 0xbf, 0x07, 0xd2, 0x6a, 0x65, 0xdd, 0x08, 0xb0, 0x16, 0xae, + 0x7b, 0xc3, 0xcc, 0x74, 0xa1, 0x19, 0xf0, 0x48, 0x9d, 0x25, + 0x2a, 0x92, 0x47, 0xff, 0x59, 0xe1, 0x34, 0x8c, 0x83, 0x3b, + 0xee, 0x56, 0x42, 0xfa, 0x2f, 0x97, 0x98, 0x20, 0xf5, 0x4d, + 0xeb, 0x53, 0x86, 0x3e, 0x31, 0x89, 0x5c, 0xe4, 0x0d, 0xb5, + 0x60, 0xd8, 0xd7, 0x6f, 0xba, 0x02, 0xa4, 0x1c, 0xc9, 0x71, + 0x7e, 0xc6, 0x13, 0xab, 0xdc, 0x64, 0xb1, 0x09, 0x06, 0xbe, + 0x6b, 0xd3, 0x75, 0xcd, 0x18, 0xa0, 0xaf, 0x17, 0xc2, 0x7a, + 0x93, 0x2b, 0xfe, 0x46, 0x49, 0xf1, 0x24, 0x9c, 0x3a, 0x82, + 0x57, 0xef, 0xe0, 0x58, 0x8d, 0x35, 0x63, 0xdb, 0x0e, 0xb6, + 0xb9, 0x01, 0xd4, 0x6c, 0xca, 0x72, 0xa7, 0x1f, 0x10, 0xa8, + 0x7d, 0xc5, 0x2c, 0x94, 0x41, 0xf9, 0xf6, 0x4e, 0x9b, 0x23, + 0x85, 0x3d, 0xe8, 0x50, 0x5f, 0xe7, 0x32, 0x8a, 0xfd, 0x45, + 0x90, 0x28, 0x27, 0x9f, 0x4a, 0xf2, 0x54, 0xec, 0x39, 0x81, + 0x8e, 0x36, 0xe3, 0x5b, 0xb2, 0x0a, 0xdf, 0x67, 0x68, 0xd0, + 0x05, 0xbd, 0x1b, 0xa3, 0x76, 0xce, 0xc1, 0x79, 0xac, 0x14, + 0x00, 0xb9, 0x6f, 0xd6, 0xde, 0x67, 0xb1, 0x08, 0xa1, 0x18, + 0xce, 0x77, 0x7f, 0xc6, 0x10, 0xa9, 0x5f, 0xe6, 0x30, 0x89, + 0x81, 0x38, 0xee, 0x57, 0xfe, 0x47, 0x91, 0x28, 0x20, 0x99, + 0x4f, 0xf6, 0xbe, 0x07, 0xd1, 0x68, 0x60, 0xd9, 0x0f, 0xb6, + 0x1f, 0xa6, 0x70, 0xc9, 0xc1, 0x78, 0xae, 0x17, 0xe1, 0x58, + 0x8e, 0x37, 0x3f, 0x86, 0x50, 0xe9, 0x40, 0xf9, 0x2f, 0x96, + 0x9e, 0x27, 0xf1, 0x48, 0x61, 0xd8, 0x0e, 0xb7, 0xbf, 0x06, + 0xd0, 0x69, 0xc0, 0x79, 0xaf, 0x16, 0x1e, 0xa7, 0x71, 0xc8, + 0x3e, 0x87, 0x51, 0xe8, 0xe0, 0x59, 0x8f, 0x36, 0x9f, 0x26, + 0xf0, 0x49, 0x41, 0xf8, 0x2e, 0x97, 0xdf, 0x66, 0xb0, 0x09, + 0x01, 0xb8, 0x6e, 0xd7, 0x7e, 0xc7, 0x11, 0xa8, 0xa0, 0x19, + 0xcf, 0x76, 0x80, 0x39, 0xef, 0x56, 0x5e, 0xe7, 0x31, 0x88, + 0x21, 0x98, 0x4e, 0xf7, 0xff, 0x46, 0x90, 0x29, 0xc2, 0x7b, + 0xad, 0x14, 0x1c, 0xa5, 0x73, 0xca, 0x63, 0xda, 0x0c, 0xb5, + 0xbd, 0x04, 0xd2, 0x6b, 0x9d, 0x24, 0xf2, 0x4b, 0x43, 0xfa, + 0x2c, 0x95, 0x3c, 0x85, 0x53, 0xea, 0xe2, 0x5b, 0x8d, 0x34, + 0x7c, 0xc5, 0x13, 0xaa, 0xa2, 0x1b, 0xcd, 0x74, 0xdd, 0x64, + 0xb2, 0x0b, 0x03, 0xba, 0x6c, 0xd5, 0x23, 0x9a, 0x4c, 0xf5, + 0xfd, 0x44, 0x92, 0x2b, 0x82, 0x3b, 0xed, 0x54, 0x5c, 0xe5, + 0x33, 0x8a, 0xa3, 0x1a, 0xcc, 0x75, 0x7d, 0xc4, 0x12, 0xab, + 0x02, 0xbb, 0x6d, 0xd4, 0xdc, 0x65, 0xb3, 0x0a, 0xfc, 0x45, + 0x93, 0x2a, 0x22, 0x9b, 0x4d, 0xf4, 0x5d, 0xe4, 0x32, 0x8b, + 0x83, 0x3a, 0xec, 0x55, 0x1d, 0xa4, 0x72, 0xcb, 0xc3, 0x7a, + 0xac, 0x15, 0xbc, 0x05, 0xd3, 0x6a, 0x62, 0xdb, 0x0d, 0xb4, + 0x42, 0xfb, 0x2d, 0x94, 0x9c, 0x25, 0xf3, 0x4a, 0xe3, 0x5a, + 0x8c, 0x35, 0x3d, 0x84, 0x52, 0xeb, 0x00, 0xba, 0x69, 0xd3, + 0xd2, 0x68, 0xbb, 0x01, 0xb9, 0x03, 0xd0, 0x6a, 0x6b, 0xd1, + 0x02, 0xb8, 0x6f, 0xd5, 0x06, 0xbc, 0xbd, 0x07, 0xd4, 0x6e, + 0xd6, 0x6c, 0xbf, 0x05, 0x04, 0xbe, 0x6d, 0xd7, 0xde, 0x64, + 0xb7, 0x0d, 0x0c, 0xb6, 0x65, 0xdf, 0x67, 0xdd, 0x0e, 0xb4, + 0xb5, 0x0f, 0xdc, 0x66, 0xb1, 0x0b, 0xd8, 0x62, 0x63, 0xd9, + 0x0a, 0xb0, 0x08, 0xb2, 0x61, 0xdb, 0xda, 0x60, 0xb3, 0x09, + 0xa1, 0x1b, 0xc8, 0x72, 0x73, 0xc9, 0x1a, 0xa0, 0x18, 0xa2, + 0x71, 0xcb, 0xca, 0x70, 0xa3, 0x19, 0xce, 0x74, 0xa7, 0x1d, + 0x1c, 0xa6, 0x75, 0xcf, 0x77, 0xcd, 0x1e, 0xa4, 0xa5, 0x1f, + 0xcc, 0x76, 0x7f, 0xc5, 0x16, 0xac, 0xad, 0x17, 0xc4, 0x7e, + 0xc6, 0x7c, 0xaf, 0x15, 0x14, 0xae, 0x7d, 0xc7, 0x10, 0xaa, + 0x79, 0xc3, 0xc2, 0x78, 0xab, 0x11, 0xa9, 0x13, 0xc0, 0x7a, + 0x7b, 0xc1, 0x12, 0xa8, 0x5f, 0xe5, 0x36, 0x8c, 0x8d, 0x37, + 0xe4, 0x5e, 0xe6, 0x5c, 0x8f, 0x35, 0x34, 0x8e, 0x5d, 0xe7, + 0x30, 0x8a, 0x59, 0xe3, 0xe2, 0x58, 0x8b, 0x31, 0x89, 0x33, + 0xe0, 0x5a, 0x5b, 0xe1, 0x32, 0x88, 0x81, 0x3b, 0xe8, 0x52, + 0x53, 0xe9, 0x3a, 0x80, 0x38, 0x82, 0x51, 0xeb, 0xea, 0x50, + 0x83, 0x39, 0xee, 0x54, 0x87, 0x3d, 0x3c, 0x86, 0x55, 0xef, + 0x57, 0xed, 0x3e, 0x84, 0x85, 0x3f, 0xec, 0x56, 0xfe, 0x44, + 0x97, 0x2d, 0x2c, 0x96, 0x45, 0xff, 0x47, 0xfd, 0x2e, 0x94, + 0x95, 0x2f, 0xfc, 0x46, 0x91, 0x2b, 0xf8, 0x42, 0x43, 0xf9, + 0x2a, 0x90, 0x28, 0x92, 0x41, 0xfb, 0xfa, 0x40, 0x93, 0x29, + 0x20, 0x9a, 0x49, 0xf3, 0xf2, 0x48, 0x9b, 0x21, 0x99, 0x23, + 0xf0, 0x4a, 0x4b, 0xf1, 0x22, 0x98, 0x4f, 0xf5, 0x26, 0x9c, + 0x9d, 0x27, 0xf4, 0x4e, 0xf6, 0x4c, 0x9f, 0x25, 0x24, 0x9e, + 0x4d, 0xf7, 0x00, 0xbb, 0x6b, 0xd0, 0xd6, 0x6d, 0xbd, 0x06, + 0xb1, 0x0a, 0xda, 0x61, 0x67, 0xdc, 0x0c, 0xb7, 0x7f, 0xc4, + 0x14, 0xaf, 0xa9, 0x12, 0xc2, 0x79, 0xce, 0x75, 0xa5, 0x1e, + 0x18, 0xa3, 0x73, 0xc8, 0xfe, 0x45, 0x95, 0x2e, 0x28, 0x93, + 0x43, 0xf8, 0x4f, 0xf4, 0x24, 0x9f, 0x99, 0x22, 0xf2, 0x49, + 0x81, 0x3a, 0xea, 0x51, 0x57, 0xec, 0x3c, 0x87, 0x30, 0x8b, + 0x5b, 0xe0, 0xe6, 0x5d, 0x8d, 0x36, 0xe1, 0x5a, 0x8a, 0x31, + 0x37, 0x8c, 0x5c, 0xe7, 0x50, 0xeb, 0x3b, 0x80, 0x86, 0x3d, + 0xed, 0x56, 0x9e, 0x25, 0xf5, 0x4e, 0x48, 0xf3, 0x23, 0x98, + 0x2f, 0x94, 0x44, 0xff, 0xf9, 0x42, 0x92, 0x29, 0x1f, 0xa4, + 0x74, 0xcf, 0xc9, 0x72, 0xa2, 0x19, 0xae, 0x15, 0xc5, 0x7e, + 0x78, 0xc3, 0x13, 0xa8, 0x60, 0xdb, 0x0b, 0xb0, 0xb6, 0x0d, + 0xdd, 0x66, 0xd1, 0x6a, 0xba, 0x01, 0x07, 0xbc, 0x6c, 0xd7, + 0xdf, 0x64, 0xb4, 0x0f, 0x09, 0xb2, 0x62, 0xd9, 0x6e, 0xd5, + 0x05, 0xbe, 0xb8, 0x03, 0xd3, 0x68, 0xa0, 0x1b, 0xcb, 0x70, + 0x76, 0xcd, 0x1d, 0xa6, 0x11, 0xaa, 0x7a, 0xc1, 0xc7, 0x7c, + 0xac, 0x17, 0x21, 0x9a, 0x4a, 0xf1, 0xf7, 0x4c, 0x9c, 0x27, + 0x90, 0x2b, 0xfb, 0x40, 0x46, 0xfd, 0x2d, 0x96, 0x5e, 0xe5, + 0x35, 0x8e, 0x88, 0x33, 0xe3, 0x58, 0xef, 0x54, 0x84, 0x3f, + 0x39, 0x82, 0x52, 0xe9, 0x3e, 0x85, 0x55, 0xee, 0xe8, 0x53, + 0x83, 0x38, 0x8f, 0x34, 0xe4, 0x5f, 0x59, 0xe2, 0x32, 0x89, + 0x41, 0xfa, 0x2a, 0x91, 0x97, 0x2c, 0xfc, 0x47, 0xf0, 0x4b, + 0x9b, 0x20, 0x26, 0x9d, 0x4d, 0xf6, 0xc0, 0x7b, 0xab, 0x10, + 0x16, 0xad, 0x7d, 0xc6, 0x71, 0xca, 0x1a, 0xa1, 0xa7, 0x1c, + 0xcc, 0x77, 0xbf, 0x04, 0xd4, 0x6f, 0x69, 0xd2, 0x02, 0xb9, + 0x0e, 0xb5, 0x65, 0xde, 0xd8, 0x63, 0xb3, 0x08, 0x00, 0xbc, + 0x65, 0xd9, 0xca, 0x76, 0xaf, 0x13, 0x89, 0x35, 0xec, 0x50, + 0x43, 0xff, 0x26, 0x9a, 0x0f, 0xb3, 0x6a, 0xd6, 0xc5, 0x79, + 0xa0, 0x1c, 0x86, 0x3a, 0xe3, 0x5f, 0x4c, 0xf0, 0x29, 0x95, + 0x1e, 0xa2, 0x7b, 0xc7, 0xd4, 0x68, 0xb1, 0x0d, 0x97, 0x2b, + 0xf2, 0x4e, 0x5d, 0xe1, 0x38, 0x84, 0x11, 0xad, 0x74, 0xc8, + 0xdb, 0x67, 0xbe, 0x02, 0x98, 0x24, 0xfd, 0x41, 0x52, 0xee, + 0x37, 0x8b, 0x3c, 0x80, 0x59, 0xe5, 0xf6, 0x4a, 0x93, 0x2f, + 0xb5, 0x09, 0xd0, 0x6c, 0x7f, 0xc3, 0x1a, 0xa6, 0x33, 0x8f, + 0x56, 0xea, 0xf9, 0x45, 0x9c, 0x20, 0xba, 0x06, 0xdf, 0x63, + 0x70, 0xcc, 0x15, 0xa9, 0x22, 0x9e, 0x47, 0xfb, 0xe8, 0x54, + 0x8d, 0x31, 0xab, 0x17, 0xce, 0x72, 0x61, 0xdd, 0x04, 0xb8, + 0x2d, 0x91, 0x48, 0xf4, 0xe7, 0x5b, 0x82, 0x3e, 0xa4, 0x18, + 0xc1, 0x7d, 0x6e, 0xd2, 0x0b, 0xb7, 0x78, 0xc4, 0x1d, 0xa1, + 0xb2, 0x0e, 0xd7, 0x6b, 0xf1, 0x4d, 0x94, 0x28, 0x3b, 0x87, + 0x5e, 0xe2, 0x77, 0xcb, 0x12, 0xae, 0xbd, 0x01, 0xd8, 0x64, + 0xfe, 0x42, 0x9b, 0x27, 0x34, 0x88, 0x51, 0xed, 0x66, 0xda, + 0x03, 0xbf, 0xac, 0x10, 0xc9, 0x75, 0xef, 0x53, 0x8a, 0x36, + 0x25, 0x99, 0x40, 0xfc, 0x69, 0xd5, 0x0c, 0xb0, 0xa3, 0x1f, + 0xc6, 0x7a, 0xe0, 0x5c, 0x85, 0x39, 0x2a, 0x96, 0x4f, 0xf3, + 0x44, 0xf8, 0x21, 0x9d, 0x8e, 0x32, 0xeb, 0x57, 0xcd, 0x71, + 0xa8, 0x14, 0x07, 0xbb, 0x62, 0xde, 0x4b, 0xf7, 0x2e, 0x92, + 0x81, 0x3d, 0xe4, 0x58, 0xc2, 0x7e, 0xa7, 0x1b, 0x08, 0xb4, + 0x6d, 0xd1, 0x5a, 0xe6, 0x3f, 0x83, 0x90, 0x2c, 0xf5, 0x49, + 0xd3, 0x6f, 0xb6, 0x0a, 0x19, 0xa5, 0x7c, 0xc0, 0x55, 0xe9, + 0x30, 0x8c, 0x9f, 0x23, 0xfa, 0x46, 0xdc, 0x60, 0xb9, 0x05, + 0x16, 0xaa, 0x73, 0xcf, 0x00, 0xbd, 0x67, 0xda, 0xce, 0x73, + 0xa9, 0x14, 0x81, 0x3c, 0xe6, 0x5b, 0x4f, 0xf2, 0x28, 0x95, + 0x1f, 0xa2, 0x78, 0xc5, 0xd1, 0x6c, 0xb6, 0x0b, 0x9e, 0x23, + 0xf9, 0x44, 0x50, 0xed, 0x37, 0x8a, 0x3e, 0x83, 0x59, 0xe4, + 0xf0, 0x4d, 0x97, 0x2a, 0xbf, 0x02, 0xd8, 0x65, 0x71, 0xcc, + 0x16, 0xab, 0x21, 0x9c, 0x46, 0xfb, 0xef, 0x52, 0x88, 0x35, + 0xa0, 0x1d, 0xc7, 0x7a, 0x6e, 0xd3, 0x09, 0xb4, 0x7c, 0xc1, + 0x1b, 0xa6, 0xb2, 0x0f, 0xd5, 0x68, 0xfd, 0x40, 0x9a, 0x27, + 0x33, 0x8e, 0x54, 0xe9, 0x63, 0xde, 0x04, 0xb9, 0xad, 0x10, + 0xca, 0x77, 0xe2, 0x5f, 0x85, 0x38, 0x2c, 0x91, 0x4b, 0xf6, + 0x42, 0xff, 0x25, 0x98, 0x8c, 0x31, 0xeb, 0x56, 0xc3, 0x7e, + 0xa4, 0x19, 0x0d, 0xb0, 0x6a, 0xd7, 0x5d, 0xe0, 0x3a, 0x87, + 0x93, 0x2e, 0xf4, 0x49, 0xdc, 0x61, 0xbb, 0x06, 0x12, 0xaf, + 0x75, 0xc8, 0xf8, 0x45, 0x9f, 0x22, 0x36, 0x8b, 0x51, 0xec, + 0x79, 0xc4, 0x1e, 0xa3, 0xb7, 0x0a, 0xd0, 0x6d, 0xe7, 0x5a, + 0x80, 0x3d, 0x29, 0x94, 0x4e, 0xf3, 0x66, 0xdb, 0x01, 0xbc, + 0xa8, 0x15, 0xcf, 0x72, 0xc6, 0x7b, 0xa1, 0x1c, 0x08, 0xb5, + 0x6f, 0xd2, 0x47, 0xfa, 0x20, 0x9d, 0x89, 0x34, 0xee, 0x53, + 0xd9, 0x64, 0xbe, 0x03, 0x17, 0xaa, 0x70, 0xcd, 0x58, 0xe5, + 0x3f, 0x82, 0x96, 0x2b, 0xf1, 0x4c, 0x84, 0x39, 0xe3, 0x5e, + 0x4a, 0xf7, 0x2d, 0x90, 0x05, 0xb8, 0x62, 0xdf, 0xcb, 0x76, + 0xac, 0x11, 0x9b, 0x26, 0xfc, 0x41, 0x55, 0xe8, 0x32, 0x8f, + 0x1a, 0xa7, 0x7d, 0xc0, 0xd4, 0x69, 0xb3, 0x0e, 0xba, 0x07, + 0xdd, 0x60, 0x74, 0xc9, 0x13, 0xae, 0x3b, 0x86, 0x5c, 0xe1, + 0xf5, 0x48, 0x92, 0x2f, 0xa5, 0x18, 0xc2, 0x7f, 0x6b, 0xd6, + 0x0c, 0xb1, 0x24, 0x99, 0x43, 0xfe, 0xea, 0x57, 0x8d, 0x30, + 0x00, 0xbe, 0x61, 0xdf, 0xc2, 0x7c, 0xa3, 0x1d, 0x99, 0x27, + 0xf8, 0x46, 0x5b, 0xe5, 0x3a, 0x84, 0x2f, 0x91, 0x4e, 0xf0, + 0xed, 0x53, 0x8c, 0x32, 0xb6, 0x08, 0xd7, 0x69, 0x74, 0xca, + 0x15, 0xab, 0x5e, 0xe0, 0x3f, 0x81, 0x9c, 0x22, 0xfd, 0x43, + 0xc7, 0x79, 0xa6, 0x18, 0x05, 0xbb, 0x64, 0xda, 0x71, 0xcf, + 0x10, 0xae, 0xb3, 0x0d, 0xd2, 0x6c, 0xe8, 0x56, 0x89, 0x37, + 0x2a, 0x94, 0x4b, 0xf5, 0xbc, 0x02, 0xdd, 0x63, 0x7e, 0xc0, + 0x1f, 0xa1, 0x25, 0x9b, 0x44, 0xfa, 0xe7, 0x59, 0x86, 0x38, + 0x93, 0x2d, 0xf2, 0x4c, 0x51, 0xef, 0x30, 0x8e, 0x0a, 0xb4, + 0x6b, 0xd5, 0xc8, 0x76, 0xa9, 0x17, 0xe2, 0x5c, 0x83, 0x3d, + 0x20, 0x9e, 0x41, 0xff, 0x7b, 0xc5, 0x1a, 0xa4, 0xb9, 0x07, + 0xd8, 0x66, 0xcd, 0x73, 0xac, 0x12, 0x0f, 0xb1, 0x6e, 0xd0, + 0x54, 0xea, 0x35, 0x8b, 0x96, 0x28, 0xf7, 0x49, 0x65, 0xdb, + 0x04, 0xba, 0xa7, 0x19, 0xc6, 0x78, 0xfc, 0x42, 0x9d, 0x23, + 0x3e, 0x80, 0x5f, 0xe1, 0x4a, 0xf4, 0x2b, 0x95, 0x88, 0x36, + 0xe9, 0x57, 0xd3, 0x6d, 0xb2, 0x0c, 0x11, 0xaf, 0x70, 0xce, + 0x3b, 0x85, 0x5a, 0xe4, 0xf9, 0x47, 0x98, 0x26, 0xa2, 0x1c, + 0xc3, 0x7d, 0x60, 0xde, 0x01, 0xbf, 0x14, 0xaa, 0x75, 0xcb, + 0xd6, 0x68, 0xb7, 0x09, 0x8d, 0x33, 0xec, 0x52, 0x4f, 0xf1, + 0x2e, 0x90, 0xd9, 0x67, 0xb8, 0x06, 0x1b, 0xa5, 0x7a, 0xc4, + 0x40, 0xfe, 0x21, 0x9f, 0x82, 0x3c, 0xe3, 0x5d, 0xf6, 0x48, + 0x97, 0x29, 0x34, 0x8a, 0x55, 0xeb, 0x6f, 0xd1, 0x0e, 0xb0, + 0xad, 0x13, 0xcc, 0x72, 0x87, 0x39, 0xe6, 0x58, 0x45, 0xfb, + 0x24, 0x9a, 0x1e, 0xa0, 0x7f, 0xc1, 0xdc, 0x62, 0xbd, 0x03, + 0xa8, 0x16, 0xc9, 0x77, 0x6a, 0xd4, 0x0b, 0xb5, 0x31, 0x8f, + 0x50, 0xee, 0xf3, 0x4d, 0x92, 0x2c, 0x00, 0xbf, 0x63, 0xdc, + 0xc6, 0x79, 0xa5, 0x1a, 0x91, 0x2e, 0xf2, 0x4d, 0x57, 0xe8, + 0x34, 0x8b, 0x3f, 0x80, 0x5c, 0xe3, 0xf9, 0x46, 0x9a, 0x25, + 0xae, 0x11, 0xcd, 0x72, 0x68, 0xd7, 0x0b, 0xb4, 0x7e, 0xc1, + 0x1d, 0xa2, 0xb8, 0x07, 0xdb, 0x64, 0xef, 0x50, 0x8c, 0x33, + 0x29, 0x96, 0x4a, 0xf5, 0x41, 0xfe, 0x22, 0x9d, 0x87, 0x38, + 0xe4, 0x5b, 0xd0, 0x6f, 0xb3, 0x0c, 0x16, 0xa9, 0x75, 0xca, + 0xfc, 0x43, 0x9f, 0x20, 0x3a, 0x85, 0x59, 0xe6, 0x6d, 0xd2, + 0x0e, 0xb1, 0xab, 0x14, 0xc8, 0x77, 0xc3, 0x7c, 0xa0, 0x1f, + 0x05, 0xba, 0x66, 0xd9, 0x52, 0xed, 0x31, 0x8e, 0x94, 0x2b, + 0xf7, 0x48, 0x82, 0x3d, 0xe1, 0x5e, 0x44, 0xfb, 0x27, 0x98, + 0x13, 0xac, 0x70, 0xcf, 0xd5, 0x6a, 0xb6, 0x09, 0xbd, 0x02, + 0xde, 0x61, 0x7b, 0xc4, 0x18, 0xa7, 0x2c, 0x93, 0x4f, 0xf0, + 0xea, 0x55, 0x89, 0x36, 0xe5, 0x5a, 0x86, 0x39, 0x23, 0x9c, + 0x40, 0xff, 0x74, 0xcb, 0x17, 0xa8, 0xb2, 0x0d, 0xd1, 0x6e, + 0xda, 0x65, 0xb9, 0x06, 0x1c, 0xa3, 0x7f, 0xc0, 0x4b, 0xf4, + 0x28, 0x97, 0x8d, 0x32, 0xee, 0x51, 0x9b, 0x24, 0xf8, 0x47, + 0x5d, 0xe2, 0x3e, 0x81, 0x0a, 0xb5, 0x69, 0xd6, 0xcc, 0x73, + 0xaf, 0x10, 0xa4, 0x1b, 0xc7, 0x78, 0x62, 0xdd, 0x01, 0xbe, + 0x35, 0x8a, 0x56, 0xe9, 0xf3, 0x4c, 0x90, 0x2f, 0x19, 0xa6, + 0x7a, 0xc5, 0xdf, 0x60, 0xbc, 0x03, 0x88, 0x37, 0xeb, 0x54, + 0x4e, 0xf1, 0x2d, 0x92, 0x26, 0x99, 0x45, 0xfa, 0xe0, 0x5f, + 0x83, 0x3c, 0xb7, 0x08, 0xd4, 0x6b, 0x71, 0xce, 0x12, 0xad, + 0x67, 0xd8, 0x04, 0xbb, 0xa1, 0x1e, 0xc2, 0x7d, 0xf6, 0x49, + 0x95, 0x2a, 0x30, 0x8f, 0x53, 0xec, 0x58, 0xe7, 0x3b, 0x84, + 0x9e, 0x21, 0xfd, 0x42, 0xc9, 0x76, 0xaa, 0x15, 0x0f, 0xb0, + 0x6c, 0xd3, 0x00, 0xc0, 0x9d, 0x5d, 0x27, 0xe7, 0xba, 0x7a, + 0x4e, 0x8e, 0xd3, 0x13, 0x69, 0xa9, 0xf4, 0x34, 0x9c, 0x5c, + 0x01, 0xc1, 0xbb, 0x7b, 0x26, 0xe6, 0xd2, 0x12, 0x4f, 0x8f, + 0xf5, 0x35, 0x68, 0xa8, 0x25, 0xe5, 0xb8, 0x78, 0x02, 0xc2, + 0x9f, 0x5f, 0x6b, 0xab, 0xf6, 0x36, 0x4c, 0x8c, 0xd1, 0x11, + 0xb9, 0x79, 0x24, 0xe4, 0x9e, 0x5e, 0x03, 0xc3, 0xf7, 0x37, + 0x6a, 0xaa, 0xd0, 0x10, 0x4d, 0x8d, 0x4a, 0x8a, 0xd7, 0x17, + 0x6d, 0xad, 0xf0, 0x30, 0x04, 0xc4, 0x99, 0x59, 0x23, 0xe3, + 0xbe, 0x7e, 0xd6, 0x16, 0x4b, 0x8b, 0xf1, 0x31, 0x6c, 0xac, + 0x98, 0x58, 0x05, 0xc5, 0xbf, 0x7f, 0x22, 0xe2, 0x6f, 0xaf, + 0xf2, 0x32, 0x48, 0x88, 0xd5, 0x15, 0x21, 0xe1, 0xbc, 0x7c, + 0x06, 0xc6, 0x9b, 0x5b, 0xf3, 0x33, 0x6e, 0xae, 0xd4, 0x14, + 0x49, 0x89, 0xbd, 0x7d, 0x20, 0xe0, 0x9a, 0x5a, 0x07, 0xc7, + 0x94, 0x54, 0x09, 0xc9, 0xb3, 0x73, 0x2e, 0xee, 0xda, 0x1a, + 0x47, 0x87, 0xfd, 0x3d, 0x60, 0xa0, 0x08, 0xc8, 0x95, 0x55, + 0x2f, 0xef, 0xb2, 0x72, 0x46, 0x86, 0xdb, 0x1b, 0x61, 0xa1, + 0xfc, 0x3c, 0xb1, 0x71, 0x2c, 0xec, 0x96, 0x56, 0x0b, 0xcb, + 0xff, 0x3f, 0x62, 0xa2, 0xd8, 0x18, 0x45, 0x85, 0x2d, 0xed, + 0xb0, 0x70, 0x0a, 0xca, 0x97, 0x57, 0x63, 0xa3, 0xfe, 0x3e, + 0x44, 0x84, 0xd9, 0x19, 0xde, 0x1e, 0x43, 0x83, 0xf9, 0x39, + 0x64, 0xa4, 0x90, 0x50, 0x0d, 0xcd, 0xb7, 0x77, 0x2a, 0xea, + 0x42, 0x82, 0xdf, 0x1f, 0x65, 0xa5, 0xf8, 0x38, 0x0c, 0xcc, + 0x91, 0x51, 0x2b, 0xeb, 0xb6, 0x76, 0xfb, 0x3b, 0x66, 0xa6, + 0xdc, 0x1c, 0x41, 0x81, 0xb5, 0x75, 0x28, 0xe8, 0x92, 0x52, + 0x0f, 0xcf, 0x67, 0xa7, 0xfa, 0x3a, 0x40, 0x80, 0xdd, 0x1d, + 0x29, 0xe9, 0xb4, 0x74, 0x0e, 0xce, 0x93, 0x53, 0x00, 0xc1, + 0x9f, 0x5e, 0x23, 0xe2, 0xbc, 0x7d, 0x46, 0x87, 0xd9, 0x18, + 0x65, 0xa4, 0xfa, 0x3b, 0x8c, 0x4d, 0x13, 0xd2, 0xaf, 0x6e, + 0x30, 0xf1, 0xca, 0x0b, 0x55, 0x94, 0xe9, 0x28, 0x76, 0xb7, + 0x05, 0xc4, 0x9a, 0x5b, 0x26, 0xe7, 0xb9, 0x78, 0x43, 0x82, + 0xdc, 0x1d, 0x60, 0xa1, 0xff, 0x3e, 0x89, 0x48, 0x16, 0xd7, + 0xaa, 0x6b, 0x35, 0xf4, 0xcf, 0x0e, 0x50, 0x91, 0xec, 0x2d, + 0x73, 0xb2, 0x0a, 0xcb, 0x95, 0x54, 0x29, 0xe8, 0xb6, 0x77, + 0x4c, 0x8d, 0xd3, 0x12, 0x6f, 0xae, 0xf0, 0x31, 0x86, 0x47, + 0x19, 0xd8, 0xa5, 0x64, 0x3a, 0xfb, 0xc0, 0x01, 0x5f, 0x9e, + 0xe3, 0x22, 0x7c, 0xbd, 0x0f, 0xce, 0x90, 0x51, 0x2c, 0xed, + 0xb3, 0x72, 0x49, 0x88, 0xd6, 0x17, 0x6a, 0xab, 0xf5, 0x34, + 0x83, 0x42, 0x1c, 0xdd, 0xa0, 0x61, 0x3f, 0xfe, 0xc5, 0x04, + 0x5a, 0x9b, 0xe6, 0x27, 0x79, 0xb8, 0x14, 0xd5, 0x8b, 0x4a, + 0x37, 0xf6, 0xa8, 0x69, 0x52, 0x93, 0xcd, 0x0c, 0x71, 0xb0, + 0xee, 0x2f, 0x98, 0x59, 0x07, 0xc6, 0xbb, 0x7a, 0x24, 0xe5, + 0xde, 0x1f, 0x41, 0x80, 0xfd, 0x3c, 0x62, 0xa3, 0x11, 0xd0, + 0x8e, 0x4f, 0x32, 0xf3, 0xad, 0x6c, 0x57, 0x96, 0xc8, 0x09, + 0x74, 0xb5, 0xeb, 0x2a, 0x9d, 0x5c, 0x02, 0xc3, 0xbe, 0x7f, + 0x21, 0xe0, 0xdb, 0x1a, 0x44, 0x85, 0xf8, 0x39, 0x67, 0xa6, + 0x1e, 0xdf, 0x81, 0x40, 0x3d, 0xfc, 0xa2, 0x63, 0x58, 0x99, + 0xc7, 0x06, 0x7b, 0xba, 0xe4, 0x25, 0x92, 0x53, 0x0d, 0xcc, + 0xb1, 0x70, 0x2e, 0xef, 0xd4, 0x15, 0x4b, 0x8a, 0xf7, 0x36, + 0x68, 0xa9, 0x1b, 0xda, 0x84, 0x45, 0x38, 0xf9, 0xa7, 0x66, + 0x5d, 0x9c, 0xc2, 0x03, 0x7e, 0xbf, 0xe1, 0x20, 0x97, 0x56, + 0x08, 0xc9, 0xb4, 0x75, 0x2b, 0xea, 0xd1, 0x10, 0x4e, 0x8f, + 0xf2, 0x33, 0x6d, 0xac, 0x00, 0xc2, 0x99, 0x5b, 0x2f, 0xed, + 0xb6, 0x74, 0x5e, 0x9c, 0xc7, 0x05, 0x71, 0xb3, 0xe8, 0x2a, + 0xbc, 0x7e, 0x25, 0xe7, 0x93, 0x51, 0x0a, 0xc8, 0xe2, 0x20, + 0x7b, 0xb9, 0xcd, 0x0f, 0x54, 0x96, 0x65, 0xa7, 0xfc, 0x3e, + 0x4a, 0x88, 0xd3, 0x11, 0x3b, 0xf9, 0xa2, 0x60, 0x14, 0xd6, + 0x8d, 0x4f, 0xd9, 0x1b, 0x40, 0x82, 0xf6, 0x34, 0x6f, 0xad, + 0x87, 0x45, 0x1e, 0xdc, 0xa8, 0x6a, 0x31, 0xf3, 0xca, 0x08, + 0x53, 0x91, 0xe5, 0x27, 0x7c, 0xbe, 0x94, 0x56, 0x0d, 0xcf, + 0xbb, 0x79, 0x22, 0xe0, 0x76, 0xb4, 0xef, 0x2d, 0x59, 0x9b, + 0xc0, 0x02, 0x28, 0xea, 0xb1, 0x73, 0x07, 0xc5, 0x9e, 0x5c, + 0xaf, 0x6d, 0x36, 0xf4, 0x80, 0x42, 0x19, 0xdb, 0xf1, 0x33, + 0x68, 0xaa, 0xde, 0x1c, 0x47, 0x85, 0x13, 0xd1, 0x8a, 0x48, + 0x3c, 0xfe, 0xa5, 0x67, 0x4d, 0x8f, 0xd4, 0x16, 0x62, 0xa0, + 0xfb, 0x39, 0x89, 0x4b, 0x10, 0xd2, 0xa6, 0x64, 0x3f, 0xfd, + 0xd7, 0x15, 0x4e, 0x8c, 0xf8, 0x3a, 0x61, 0xa3, 0x35, 0xf7, + 0xac, 0x6e, 0x1a, 0xd8, 0x83, 0x41, 0x6b, 0xa9, 0xf2, 0x30, + 0x44, 0x86, 0xdd, 0x1f, 0xec, 0x2e, 0x75, 0xb7, 0xc3, 0x01, + 0x5a, 0x98, 0xb2, 0x70, 0x2b, 0xe9, 0x9d, 0x5f, 0x04, 0xc6, + 0x50, 0x92, 0xc9, 0x0b, 0x7f, 0xbd, 0xe6, 0x24, 0x0e, 0xcc, + 0x97, 0x55, 0x21, 0xe3, 0xb8, 0x7a, 0x43, 0x81, 0xda, 0x18, + 0x6c, 0xae, 0xf5, 0x37, 0x1d, 0xdf, 0x84, 0x46, 0x32, 0xf0, + 0xab, 0x69, 0xff, 0x3d, 0x66, 0xa4, 0xd0, 0x12, 0x49, 0x8b, + 0xa1, 0x63, 0x38, 0xfa, 0x8e, 0x4c, 0x17, 0xd5, 0x26, 0xe4, + 0xbf, 0x7d, 0x09, 0xcb, 0x90, 0x52, 0x78, 0xba, 0xe1, 0x23, + 0x57, 0x95, 0xce, 0x0c, 0x9a, 0x58, 0x03, 0xc1, 0xb5, 0x77, + 0x2c, 0xee, 0xc4, 0x06, 0x5d, 0x9f, 0xeb, 0x29, 0x72, 0xb0, + 0x00, 0xc3, 0x9b, 0x58, 0x2b, 0xe8, 0xb0, 0x73, 0x56, 0x95, + 0xcd, 0x0e, 0x7d, 0xbe, 0xe6, 0x25, 0xac, 0x6f, 0x37, 0xf4, + 0x87, 0x44, 0x1c, 0xdf, 0xfa, 0x39, 0x61, 0xa2, 0xd1, 0x12, + 0x4a, 0x89, 0x45, 0x86, 0xde, 0x1d, 0x6e, 0xad, 0xf5, 0x36, + 0x13, 0xd0, 0x88, 0x4b, 0x38, 0xfb, 0xa3, 0x60, 0xe9, 0x2a, + 0x72, 0xb1, 0xc2, 0x01, 0x59, 0x9a, 0xbf, 0x7c, 0x24, 0xe7, + 0x94, 0x57, 0x0f, 0xcc, 0x8a, 0x49, 0x11, 0xd2, 0xa1, 0x62, + 0x3a, 0xf9, 0xdc, 0x1f, 0x47, 0x84, 0xf7, 0x34, 0x6c, 0xaf, + 0x26, 0xe5, 0xbd, 0x7e, 0x0d, 0xce, 0x96, 0x55, 0x70, 0xb3, + 0xeb, 0x28, 0x5b, 0x98, 0xc0, 0x03, 0xcf, 0x0c, 0x54, 0x97, + 0xe4, 0x27, 0x7f, 0xbc, 0x99, 0x5a, 0x02, 0xc1, 0xb2, 0x71, + 0x29, 0xea, 0x63, 0xa0, 0xf8, 0x3b, 0x48, 0x8b, 0xd3, 0x10, + 0x35, 0xf6, 0xae, 0x6d, 0x1e, 0xdd, 0x85, 0x46, 0x09, 0xca, + 0x92, 0x51, 0x22, 0xe1, 0xb9, 0x7a, 0x5f, 0x9c, 0xc4, 0x07, + 0x74, 0xb7, 0xef, 0x2c, 0xa5, 0x66, 0x3e, 0xfd, 0x8e, 0x4d, + 0x15, 0xd6, 0xf3, 0x30, 0x68, 0xab, 0xd8, 0x1b, 0x43, 0x80, + 0x4c, 0x8f, 0xd7, 0x14, 0x67, 0xa4, 0xfc, 0x3f, 0x1a, 0xd9, + 0x81, 0x42, 0x31, 0xf2, 0xaa, 0x69, 0xe0, 0x23, 0x7b, 0xb8, + 0xcb, 0x08, 0x50, 0x93, 0xb6, 0x75, 0x2d, 0xee, 0x9d, 0x5e, + 0x06, 0xc5, 0x83, 0x40, 0x18, 0xdb, 0xa8, 0x6b, 0x33, 0xf0, + 0xd5, 0x16, 0x4e, 0x8d, 0xfe, 0x3d, 0x65, 0xa6, 0x2f, 0xec, + 0xb4, 0x77, 0x04, 0xc7, 0x9f, 0x5c, 0x79, 0xba, 0xe2, 0x21, + 0x52, 0x91, 0xc9, 0x0a, 0xc6, 0x05, 0x5d, 0x9e, 0xed, 0x2e, + 0x76, 0xb5, 0x90, 0x53, 0x0b, 0xc8, 0xbb, 0x78, 0x20, 0xe3, + 0x6a, 0xa9, 0xf1, 0x32, 0x41, 0x82, 0xda, 0x19, 0x3c, 0xff, + 0xa7, 0x64, 0x17, 0xd4, 0x8c, 0x4f, 0x00, 0xc4, 0x95, 0x51, + 0x37, 0xf3, 0xa2, 0x66, 0x6e, 0xaa, 0xfb, 0x3f, 0x59, 0x9d, + 0xcc, 0x08, 0xdc, 0x18, 0x49, 0x8d, 0xeb, 0x2f, 0x7e, 0xba, + 0xb2, 0x76, 0x27, 0xe3, 0x85, 0x41, 0x10, 0xd4, 0xa5, 0x61, + 0x30, 0xf4, 0x92, 0x56, 0x07, 0xc3, 0xcb, 0x0f, 0x5e, 0x9a, + 0xfc, 0x38, 0x69, 0xad, 0x79, 0xbd, 0xec, 0x28, 0x4e, 0x8a, + 0xdb, 0x1f, 0x17, 0xd3, 0x82, 0x46, 0x20, 0xe4, 0xb5, 0x71, + 0x57, 0x93, 0xc2, 0x06, 0x60, 0xa4, 0xf5, 0x31, 0x39, 0xfd, + 0xac, 0x68, 0x0e, 0xca, 0x9b, 0x5f, 0x8b, 0x4f, 0x1e, 0xda, + 0xbc, 0x78, 0x29, 0xed, 0xe5, 0x21, 0x70, 0xb4, 0xd2, 0x16, + 0x47, 0x83, 0xf2, 0x36, 0x67, 0xa3, 0xc5, 0x01, 0x50, 0x94, + 0x9c, 0x58, 0x09, 0xcd, 0xab, 0x6f, 0x3e, 0xfa, 0x2e, 0xea, + 0xbb, 0x7f, 0x19, 0xdd, 0x8c, 0x48, 0x40, 0x84, 0xd5, 0x11, + 0x77, 0xb3, 0xe2, 0x26, 0xae, 0x6a, 0x3b, 0xff, 0x99, 0x5d, + 0x0c, 0xc8, 0xc0, 0x04, 0x55, 0x91, 0xf7, 0x33, 0x62, 0xa6, + 0x72, 0xb6, 0xe7, 0x23, 0x45, 0x81, 0xd0, 0x14, 0x1c, 0xd8, + 0x89, 0x4d, 0x2b, 0xef, 0xbe, 0x7a, 0x0b, 0xcf, 0x9e, 0x5a, + 0x3c, 0xf8, 0xa9, 0x6d, 0x65, 0xa1, 0xf0, 0x34, 0x52, 0x96, + 0xc7, 0x03, 0xd7, 0x13, 0x42, 0x86, 0xe0, 0x24, 0x75, 0xb1, + 0xb9, 0x7d, 0x2c, 0xe8, 0x8e, 0x4a, 0x1b, 0xdf, 0xf9, 0x3d, + 0x6c, 0xa8, 0xce, 0x0a, 0x5b, 0x9f, 0x97, 0x53, 0x02, 0xc6, + 0xa0, 0x64, 0x35, 0xf1, 0x25, 0xe1, 0xb0, 0x74, 0x12, 0xd6, + 0x87, 0x43, 0x4b, 0x8f, 0xde, 0x1a, 0x7c, 0xb8, 0xe9, 0x2d, + 0x5c, 0x98, 0xc9, 0x0d, 0x6b, 0xaf, 0xfe, 0x3a, 0x32, 0xf6, + 0xa7, 0x63, 0x05, 0xc1, 0x90, 0x54, 0x80, 0x44, 0x15, 0xd1, + 0xb7, 0x73, 0x22, 0xe6, 0xee, 0x2a, 0x7b, 0xbf, 0xd9, 0x1d, + 0x4c, 0x88, 0x00, 0xc5, 0x97, 0x52, 0x33, 0xf6, 0xa4, 0x61, + 0x66, 0xa3, 0xf1, 0x34, 0x55, 0x90, 0xc2, 0x07, 0xcc, 0x09, + 0x5b, 0x9e, 0xff, 0x3a, 0x68, 0xad, 0xaa, 0x6f, 0x3d, 0xf8, + 0x99, 0x5c, 0x0e, 0xcb, 0x85, 0x40, 0x12, 0xd7, 0xb6, 0x73, + 0x21, 0xe4, 0xe3, 0x26, 0x74, 0xb1, 0xd0, 0x15, 0x47, 0x82, + 0x49, 0x8c, 0xde, 0x1b, 0x7a, 0xbf, 0xed, 0x28, 0x2f, 0xea, + 0xb8, 0x7d, 0x1c, 0xd9, 0x8b, 0x4e, 0x17, 0xd2, 0x80, 0x45, + 0x24, 0xe1, 0xb3, 0x76, 0x71, 0xb4, 0xe6, 0x23, 0x42, 0x87, + 0xd5, 0x10, 0xdb, 0x1e, 0x4c, 0x89, 0xe8, 0x2d, 0x7f, 0xba, + 0xbd, 0x78, 0x2a, 0xef, 0x8e, 0x4b, 0x19, 0xdc, 0x92, 0x57, + 0x05, 0xc0, 0xa1, 0x64, 0x36, 0xf3, 0xf4, 0x31, 0x63, 0xa6, + 0xc7, 0x02, 0x50, 0x95, 0x5e, 0x9b, 0xc9, 0x0c, 0x6d, 0xa8, + 0xfa, 0x3f, 0x38, 0xfd, 0xaf, 0x6a, 0x0b, 0xce, 0x9c, 0x59, + 0x2e, 0xeb, 0xb9, 0x7c, 0x1d, 0xd8, 0x8a, 0x4f, 0x48, 0x8d, + 0xdf, 0x1a, 0x7b, 0xbe, 0xec, 0x29, 0xe2, 0x27, 0x75, 0xb0, + 0xd1, 0x14, 0x46, 0x83, 0x84, 0x41, 0x13, 0xd6, 0xb7, 0x72, + 0x20, 0xe5, 0xab, 0x6e, 0x3c, 0xf9, 0x98, 0x5d, 0x0f, 0xca, + 0xcd, 0x08, 0x5a, 0x9f, 0xfe, 0x3b, 0x69, 0xac, 0x67, 0xa2, + 0xf0, 0x35, 0x54, 0x91, 0xc3, 0x06, 0x01, 0xc4, 0x96, 0x53, + 0x32, 0xf7, 0xa5, 0x60, 0x39, 0xfc, 0xae, 0x6b, 0x0a, 0xcf, + 0x9d, 0x58, 0x5f, 0x9a, 0xc8, 0x0d, 0x6c, 0xa9, 0xfb, 0x3e, + 0xf5, 0x30, 0x62, 0xa7, 0xc6, 0x03, 0x51, 0x94, 0x93, 0x56, + 0x04, 0xc1, 0xa0, 0x65, 0x37, 0xf2, 0xbc, 0x79, 0x2b, 0xee, + 0x8f, 0x4a, 0x18, 0xdd, 0xda, 0x1f, 0x4d, 0x88, 0xe9, 0x2c, + 0x7e, 0xbb, 0x70, 0xb5, 0xe7, 0x22, 0x43, 0x86, 0xd4, 0x11, + 0x16, 0xd3, 0x81, 0x44, 0x25, 0xe0, 0xb2, 0x77, 0x00, 0xc6, + 0x91, 0x57, 0x3f, 0xf9, 0xae, 0x68, 0x7e, 0xb8, 0xef, 0x29, + 0x41, 0x87, 0xd0, 0x16, 0xfc, 0x3a, 0x6d, 0xab, 0xc3, 0x05, + 0x52, 0x94, 0x82, 0x44, 0x13, 0xd5, 0xbd, 0x7b, 0x2c, 0xea, + 0xe5, 0x23, 0x74, 0xb2, 0xda, 0x1c, 0x4b, 0x8d, 0x9b, 0x5d, + 0x0a, 0xcc, 0xa4, 0x62, 0x35, 0xf3, 0x19, 0xdf, 0x88, 0x4e, + 0x26, 0xe0, 0xb7, 0x71, 0x67, 0xa1, 0xf6, 0x30, 0x58, 0x9e, + 0xc9, 0x0f, 0xd7, 0x11, 0x46, 0x80, 0xe8, 0x2e, 0x79, 0xbf, + 0xa9, 0x6f, 0x38, 0xfe, 0x96, 0x50, 0x07, 0xc1, 0x2b, 0xed, + 0xba, 0x7c, 0x14, 0xd2, 0x85, 0x43, 0x55, 0x93, 0xc4, 0x02, + 0x6a, 0xac, 0xfb, 0x3d, 0x32, 0xf4, 0xa3, 0x65, 0x0d, 0xcb, + 0x9c, 0x5a, 0x4c, 0x8a, 0xdd, 0x1b, 0x73, 0xb5, 0xe2, 0x24, + 0xce, 0x08, 0x5f, 0x99, 0xf1, 0x37, 0x60, 0xa6, 0xb0, 0x76, + 0x21, 0xe7, 0x8f, 0x49, 0x1e, 0xd8, 0xb3, 0x75, 0x22, 0xe4, + 0x8c, 0x4a, 0x1d, 0xdb, 0xcd, 0x0b, 0x5c, 0x9a, 0xf2, 0x34, + 0x63, 0xa5, 0x4f, 0x89, 0xde, 0x18, 0x70, 0xb6, 0xe1, 0x27, + 0x31, 0xf7, 0xa0, 0x66, 0x0e, 0xc8, 0x9f, 0x59, 0x56, 0x90, + 0xc7, 0x01, 0x69, 0xaf, 0xf8, 0x3e, 0x28, 0xee, 0xb9, 0x7f, + 0x17, 0xd1, 0x86, 0x40, 0xaa, 0x6c, 0x3b, 0xfd, 0x95, 0x53, + 0x04, 0xc2, 0xd4, 0x12, 0x45, 0x83, 0xeb, 0x2d, 0x7a, 0xbc, + 0x64, 0xa2, 0xf5, 0x33, 0x5b, 0x9d, 0xca, 0x0c, 0x1a, 0xdc, + 0x8b, 0x4d, 0x25, 0xe3, 0xb4, 0x72, 0x98, 0x5e, 0x09, 0xcf, + 0xa7, 0x61, 0x36, 0xf0, 0xe6, 0x20, 0x77, 0xb1, 0xd9, 0x1f, + 0x48, 0x8e, 0x81, 0x47, 0x10, 0xd6, 0xbe, 0x78, 0x2f, 0xe9, + 0xff, 0x39, 0x6e, 0xa8, 0xc0, 0x06, 0x51, 0x97, 0x7d, 0xbb, + 0xec, 0x2a, 0x42, 0x84, 0xd3, 0x15, 0x03, 0xc5, 0x92, 0x54, + 0x3c, 0xfa, 0xad, 0x6b, 0x00, 0xc7, 0x93, 0x54, 0x3b, 0xfc, + 0xa8, 0x6f, 0x76, 0xb1, 0xe5, 0x22, 0x4d, 0x8a, 0xde, 0x19, + 0xec, 0x2b, 0x7f, 0xb8, 0xd7, 0x10, 0x44, 0x83, 0x9a, 0x5d, + 0x09, 0xce, 0xa1, 0x66, 0x32, 0xf5, 0xc5, 0x02, 0x56, 0x91, + 0xfe, 0x39, 0x6d, 0xaa, 0xb3, 0x74, 0x20, 0xe7, 0x88, 0x4f, + 0x1b, 0xdc, 0x29, 0xee, 0xba, 0x7d, 0x12, 0xd5, 0x81, 0x46, + 0x5f, 0x98, 0xcc, 0x0b, 0x64, 0xa3, 0xf7, 0x30, 0x97, 0x50, + 0x04, 0xc3, 0xac, 0x6b, 0x3f, 0xf8, 0xe1, 0x26, 0x72, 0xb5, + 0xda, 0x1d, 0x49, 0x8e, 0x7b, 0xbc, 0xe8, 0x2f, 0x40, 0x87, + 0xd3, 0x14, 0x0d, 0xca, 0x9e, 0x59, 0x36, 0xf1, 0xa5, 0x62, + 0x52, 0x95, 0xc1, 0x06, 0x69, 0xae, 0xfa, 0x3d, 0x24, 0xe3, + 0xb7, 0x70, 0x1f, 0xd8, 0x8c, 0x4b, 0xbe, 0x79, 0x2d, 0xea, + 0x85, 0x42, 0x16, 0xd1, 0xc8, 0x0f, 0x5b, 0x9c, 0xf3, 0x34, + 0x60, 0xa7, 0x33, 0xf4, 0xa0, 0x67, 0x08, 0xcf, 0x9b, 0x5c, + 0x45, 0x82, 0xd6, 0x11, 0x7e, 0xb9, 0xed, 0x2a, 0xdf, 0x18, + 0x4c, 0x8b, 0xe4, 0x23, 0x77, 0xb0, 0xa9, 0x6e, 0x3a, 0xfd, + 0x92, 0x55, 0x01, 0xc6, 0xf6, 0x31, 0x65, 0xa2, 0xcd, 0x0a, + 0x5e, 0x99, 0x80, 0x47, 0x13, 0xd4, 0xbb, 0x7c, 0x28, 0xef, + 0x1a, 0xdd, 0x89, 0x4e, 0x21, 0xe6, 0xb2, 0x75, 0x6c, 0xab, + 0xff, 0x38, 0x57, 0x90, 0xc4, 0x03, 0xa4, 0x63, 0x37, 0xf0, + 0x9f, 0x58, 0x0c, 0xcb, 0xd2, 0x15, 0x41, 0x86, 0xe9, 0x2e, + 0x7a, 0xbd, 0x48, 0x8f, 0xdb, 0x1c, 0x73, 0xb4, 0xe0, 0x27, + 0x3e, 0xf9, 0xad, 0x6a, 0x05, 0xc2, 0x96, 0x51, 0x61, 0xa6, + 0xf2, 0x35, 0x5a, 0x9d, 0xc9, 0x0e, 0x17, 0xd0, 0x84, 0x43, + 0x2c, 0xeb, 0xbf, 0x78, 0x8d, 0x4a, 0x1e, 0xd9, 0xb6, 0x71, + 0x25, 0xe2, 0xfb, 0x3c, 0x68, 0xaf, 0xc0, 0x07, 0x53, 0x94, + 0x00, 0xc8, 0x8d, 0x45, 0x07, 0xcf, 0x8a, 0x42, 0x0e, 0xc6, + 0x83, 0x4b, 0x09, 0xc1, 0x84, 0x4c, 0x1c, 0xd4, 0x91, 0x59, + 0x1b, 0xd3, 0x96, 0x5e, 0x12, 0xda, 0x9f, 0x57, 0x15, 0xdd, + 0x98, 0x50, 0x38, 0xf0, 0xb5, 0x7d, 0x3f, 0xf7, 0xb2, 0x7a, + 0x36, 0xfe, 0xbb, 0x73, 0x31, 0xf9, 0xbc, 0x74, 0x24, 0xec, + 0xa9, 0x61, 0x23, 0xeb, 0xae, 0x66, 0x2a, 0xe2, 0xa7, 0x6f, + 0x2d, 0xe5, 0xa0, 0x68, 0x70, 0xb8, 0xfd, 0x35, 0x77, 0xbf, + 0xfa, 0x32, 0x7e, 0xb6, 0xf3, 0x3b, 0x79, 0xb1, 0xf4, 0x3c, + 0x6c, 0xa4, 0xe1, 0x29, 0x6b, 0xa3, 0xe6, 0x2e, 0x62, 0xaa, + 0xef, 0x27, 0x65, 0xad, 0xe8, 0x20, 0x48, 0x80, 0xc5, 0x0d, + 0x4f, 0x87, 0xc2, 0x0a, 0x46, 0x8e, 0xcb, 0x03, 0x41, 0x89, + 0xcc, 0x04, 0x54, 0x9c, 0xd9, 0x11, 0x53, 0x9b, 0xde, 0x16, + 0x5a, 0x92, 0xd7, 0x1f, 0x5d, 0x95, 0xd0, 0x18, 0xe0, 0x28, + 0x6d, 0xa5, 0xe7, 0x2f, 0x6a, 0xa2, 0xee, 0x26, 0x63, 0xab, + 0xe9, 0x21, 0x64, 0xac, 0xfc, 0x34, 0x71, 0xb9, 0xfb, 0x33, + 0x76, 0xbe, 0xf2, 0x3a, 0x7f, 0xb7, 0xf5, 0x3d, 0x78, 0xb0, + 0xd8, 0x10, 0x55, 0x9d, 0xdf, 0x17, 0x52, 0x9a, 0xd6, 0x1e, + 0x5b, 0x93, 0xd1, 0x19, 0x5c, 0x94, 0xc4, 0x0c, 0x49, 0x81, + 0xc3, 0x0b, 0x4e, 0x86, 0xca, 0x02, 0x47, 0x8f, 0xcd, 0x05, + 0x40, 0x88, 0x90, 0x58, 0x1d, 0xd5, 0x97, 0x5f, 0x1a, 0xd2, + 0x9e, 0x56, 0x13, 0xdb, 0x99, 0x51, 0x14, 0xdc, 0x8c, 0x44, + 0x01, 0xc9, 0x8b, 0x43, 0x06, 0xce, 0x82, 0x4a, 0x0f, 0xc7, + 0x85, 0x4d, 0x08, 0xc0, 0xa8, 0x60, 0x25, 0xed, 0xaf, 0x67, + 0x22, 0xea, 0xa6, 0x6e, 0x2b, 0xe3, 0xa1, 0x69, 0x2c, 0xe4, + 0xb4, 0x7c, 0x39, 0xf1, 0xb3, 0x7b, 0x3e, 0xf6, 0xba, 0x72, + 0x37, 0xff, 0xbd, 0x75, 0x30, 0xf8, 0x00, 0xc9, 0x8f, 0x46, + 0x03, 0xca, 0x8c, 0x45, 0x06, 0xcf, 0x89, 0x40, 0x05, 0xcc, + 0x8a, 0x43, 0x0c, 0xc5, 0x83, 0x4a, 0x0f, 0xc6, 0x80, 0x49, + 0x0a, 0xc3, 0x85, 0x4c, 0x09, 0xc0, 0x86, 0x4f, 0x18, 0xd1, + 0x97, 0x5e, 0x1b, 0xd2, 0x94, 0x5d, 0x1e, 0xd7, 0x91, 0x58, + 0x1d, 0xd4, 0x92, 0x5b, 0x14, 0xdd, 0x9b, 0x52, 0x17, 0xde, + 0x98, 0x51, 0x12, 0xdb, 0x9d, 0x54, 0x11, 0xd8, 0x9e, 0x57, + 0x30, 0xf9, 0xbf, 0x76, 0x33, 0xfa, 0xbc, 0x75, 0x36, 0xff, + 0xb9, 0x70, 0x35, 0xfc, 0xba, 0x73, 0x3c, 0xf5, 0xb3, 0x7a, + 0x3f, 0xf6, 0xb0, 0x79, 0x3a, 0xf3, 0xb5, 0x7c, 0x39, 0xf0, + 0xb6, 0x7f, 0x28, 0xe1, 0xa7, 0x6e, 0x2b, 0xe2, 0xa4, 0x6d, + 0x2e, 0xe7, 0xa1, 0x68, 0x2d, 0xe4, 0xa2, 0x6b, 0x24, 0xed, + 0xab, 0x62, 0x27, 0xee, 0xa8, 0x61, 0x22, 0xeb, 0xad, 0x64, + 0x21, 0xe8, 0xae, 0x67, 0x60, 0xa9, 0xef, 0x26, 0x63, 0xaa, + 0xec, 0x25, 0x66, 0xaf, 0xe9, 0x20, 0x65, 0xac, 0xea, 0x23, + 0x6c, 0xa5, 0xe3, 0x2a, 0x6f, 0xa6, 0xe0, 0x29, 0x6a, 0xa3, + 0xe5, 0x2c, 0x69, 0xa0, 0xe6, 0x2f, 0x78, 0xb1, 0xf7, 0x3e, + 0x7b, 0xb2, 0xf4, 0x3d, 0x7e, 0xb7, 0xf1, 0x38, 0x7d, 0xb4, + 0xf2, 0x3b, 0x74, 0xbd, 0xfb, 0x32, 0x77, 0xbe, 0xf8, 0x31, + 0x72, 0xbb, 0xfd, 0x34, 0x71, 0xb8, 0xfe, 0x37, 0x50, 0x99, + 0xdf, 0x16, 0x53, 0x9a, 0xdc, 0x15, 0x56, 0x9f, 0xd9, 0x10, + 0x55, 0x9c, 0xda, 0x13, 0x5c, 0x95, 0xd3, 0x1a, 0x5f, 0x96, + 0xd0, 0x19, 0x5a, 0x93, 0xd5, 0x1c, 0x59, 0x90, 0xd6, 0x1f, + 0x48, 0x81, 0xc7, 0x0e, 0x4b, 0x82, 0xc4, 0x0d, 0x4e, 0x87, + 0xc1, 0x08, 0x4d, 0x84, 0xc2, 0x0b, 0x44, 0x8d, 0xcb, 0x02, + 0x47, 0x8e, 0xc8, 0x01, 0x42, 0x8b, 0xcd, 0x04, 0x41, 0x88, + 0xce, 0x07, 0x00, 0xca, 0x89, 0x43, 0x0f, 0xc5, 0x86, 0x4c, + 0x1e, 0xd4, 0x97, 0x5d, 0x11, 0xdb, 0x98, 0x52, 0x3c, 0xf6, + 0xb5, 0x7f, 0x33, 0xf9, 0xba, 0x70, 0x22, 0xe8, 0xab, 0x61, + 0x2d, 0xe7, 0xa4, 0x6e, 0x78, 0xb2, 0xf1, 0x3b, 0x77, 0xbd, + 0xfe, 0x34, 0x66, 0xac, 0xef, 0x25, 0x69, 0xa3, 0xe0, 0x2a, + 0x44, 0x8e, 0xcd, 0x07, 0x4b, 0x81, 0xc2, 0x08, 0x5a, 0x90, + 0xd3, 0x19, 0x55, 0x9f, 0xdc, 0x16, 0xf0, 0x3a, 0x79, 0xb3, + 0xff, 0x35, 0x76, 0xbc, 0xee, 0x24, 0x67, 0xad, 0xe1, 0x2b, + 0x68, 0xa2, 0xcc, 0x06, 0x45, 0x8f, 0xc3, 0x09, 0x4a, 0x80, + 0xd2, 0x18, 0x5b, 0x91, 0xdd, 0x17, 0x54, 0x9e, 0x88, 0x42, + 0x01, 0xcb, 0x87, 0x4d, 0x0e, 0xc4, 0x96, 0x5c, 0x1f, 0xd5, + 0x99, 0x53, 0x10, 0xda, 0xb4, 0x7e, 0x3d, 0xf7, 0xbb, 0x71, + 0x32, 0xf8, 0xaa, 0x60, 0x23, 0xe9, 0xa5, 0x6f, 0x2c, 0xe6, + 0xfd, 0x37, 0x74, 0xbe, 0xf2, 0x38, 0x7b, 0xb1, 0xe3, 0x29, + 0x6a, 0xa0, 0xec, 0x26, 0x65, 0xaf, 0xc1, 0x0b, 0x48, 0x82, + 0xce, 0x04, 0x47, 0x8d, 0xdf, 0x15, 0x56, 0x9c, 0xd0, 0x1a, + 0x59, 0x93, 0x85, 0x4f, 0x0c, 0xc6, 0x8a, 0x40, 0x03, 0xc9, + 0x9b, 0x51, 0x12, 0xd8, 0x94, 0x5e, 0x1d, 0xd7, 0xb9, 0x73, + 0x30, 0xfa, 0xb6, 0x7c, 0x3f, 0xf5, 0xa7, 0x6d, 0x2e, 0xe4, + 0xa8, 0x62, 0x21, 0xeb, 0x0d, 0xc7, 0x84, 0x4e, 0x02, 0xc8, + 0x8b, 0x41, 0x13, 0xd9, 0x9a, 0x50, 0x1c, 0xd6, 0x95, 0x5f, + 0x31, 0xfb, 0xb8, 0x72, 0x3e, 0xf4, 0xb7, 0x7d, 0x2f, 0xe5, + 0xa6, 0x6c, 0x20, 0xea, 0xa9, 0x63, 0x75, 0xbf, 0xfc, 0x36, + 0x7a, 0xb0, 0xf3, 0x39, 0x6b, 0xa1, 0xe2, 0x28, 0x64, 0xae, + 0xed, 0x27, 0x49, 0x83, 0xc0, 0x0a, 0x46, 0x8c, 0xcf, 0x05, + 0x57, 0x9d, 0xde, 0x14, 0x58, 0x92, 0xd1, 0x1b, 0x00, 0xcb, + 0x8b, 0x40, 0x0b, 0xc0, 0x80, 0x4b, 0x16, 0xdd, 0x9d, 0x56, + 0x1d, 0xd6, 0x96, 0x5d, 0x2c, 0xe7, 0xa7, 0x6c, 0x27, 0xec, + 0xac, 0x67, 0x3a, 0xf1, 0xb1, 0x7a, 0x31, 0xfa, 0xba, 0x71, + 0x58, 0x93, 0xd3, 0x18, 0x53, 0x98, 0xd8, 0x13, 0x4e, 0x85, + 0xc5, 0x0e, 0x45, 0x8e, 0xce, 0x05, 0x74, 0xbf, 0xff, 0x34, + 0x7f, 0xb4, 0xf4, 0x3f, 0x62, 0xa9, 0xe9, 0x22, 0x69, 0xa2, + 0xe2, 0x29, 0xb0, 0x7b, 0x3b, 0xf0, 0xbb, 0x70, 0x30, 0xfb, + 0xa6, 0x6d, 0x2d, 0xe6, 0xad, 0x66, 0x26, 0xed, 0x9c, 0x57, + 0x17, 0xdc, 0x97, 0x5c, 0x1c, 0xd7, 0x8a, 0x41, 0x01, 0xca, + 0x81, 0x4a, 0x0a, 0xc1, 0xe8, 0x23, 0x63, 0xa8, 0xe3, 0x28, + 0x68, 0xa3, 0xfe, 0x35, 0x75, 0xbe, 0xf5, 0x3e, 0x7e, 0xb5, + 0xc4, 0x0f, 0x4f, 0x84, 0xcf, 0x04, 0x44, 0x8f, 0xd2, 0x19, + 0x59, 0x92, 0xd9, 0x12, 0x52, 0x99, 0x7d, 0xb6, 0xf6, 0x3d, + 0x76, 0xbd, 0xfd, 0x36, 0x6b, 0xa0, 0xe0, 0x2b, 0x60, 0xab, + 0xeb, 0x20, 0x51, 0x9a, 0xda, 0x11, 0x5a, 0x91, 0xd1, 0x1a, + 0x47, 0x8c, 0xcc, 0x07, 0x4c, 0x87, 0xc7, 0x0c, 0x25, 0xee, + 0xae, 0x65, 0x2e, 0xe5, 0xa5, 0x6e, 0x33, 0xf8, 0xb8, 0x73, + 0x38, 0xf3, 0xb3, 0x78, 0x09, 0xc2, 0x82, 0x49, 0x02, 0xc9, + 0x89, 0x42, 0x1f, 0xd4, 0x94, 0x5f, 0x14, 0xdf, 0x9f, 0x54, + 0xcd, 0x06, 0x46, 0x8d, 0xc6, 0x0d, 0x4d, 0x86, 0xdb, 0x10, + 0x50, 0x9b, 0xd0, 0x1b, 0x5b, 0x90, 0xe1, 0x2a, 0x6a, 0xa1, + 0xea, 0x21, 0x61, 0xaa, 0xf7, 0x3c, 0x7c, 0xb7, 0xfc, 0x37, + 0x77, 0xbc, 0x95, 0x5e, 0x1e, 0xd5, 0x9e, 0x55, 0x15, 0xde, + 0x83, 0x48, 0x08, 0xc3, 0x88, 0x43, 0x03, 0xc8, 0xb9, 0x72, + 0x32, 0xf9, 0xb2, 0x79, 0x39, 0xf2, 0xaf, 0x64, 0x24, 0xef, + 0xa4, 0x6f, 0x2f, 0xe4, 0x00, 0xcc, 0x85, 0x49, 0x17, 0xdb, + 0x92, 0x5e, 0x2e, 0xe2, 0xab, 0x67, 0x39, 0xf5, 0xbc, 0x70, + 0x5c, 0x90, 0xd9, 0x15, 0x4b, 0x87, 0xce, 0x02, 0x72, 0xbe, + 0xf7, 0x3b, 0x65, 0xa9, 0xe0, 0x2c, 0xb8, 0x74, 0x3d, 0xf1, + 0xaf, 0x63, 0x2a, 0xe6, 0x96, 0x5a, 0x13, 0xdf, 0x81, 0x4d, + 0x04, 0xc8, 0xe4, 0x28, 0x61, 0xad, 0xf3, 0x3f, 0x76, 0xba, + 0xca, 0x06, 0x4f, 0x83, 0xdd, 0x11, 0x58, 0x94, 0x6d, 0xa1, + 0xe8, 0x24, 0x7a, 0xb6, 0xff, 0x33, 0x43, 0x8f, 0xc6, 0x0a, + 0x54, 0x98, 0xd1, 0x1d, 0x31, 0xfd, 0xb4, 0x78, 0x26, 0xea, + 0xa3, 0x6f, 0x1f, 0xd3, 0x9a, 0x56, 0x08, 0xc4, 0x8d, 0x41, + 0xd5, 0x19, 0x50, 0x9c, 0xc2, 0x0e, 0x47, 0x8b, 0xfb, 0x37, + 0x7e, 0xb2, 0xec, 0x20, 0x69, 0xa5, 0x89, 0x45, 0x0c, 0xc0, + 0x9e, 0x52, 0x1b, 0xd7, 0xa7, 0x6b, 0x22, 0xee, 0xb0, 0x7c, + 0x35, 0xf9, 0xda, 0x16, 0x5f, 0x93, 0xcd, 0x01, 0x48, 0x84, + 0xf4, 0x38, 0x71, 0xbd, 0xe3, 0x2f, 0x66, 0xaa, 0x86, 0x4a, + 0x03, 0xcf, 0x91, 0x5d, 0x14, 0xd8, 0xa8, 0x64, 0x2d, 0xe1, + 0xbf, 0x73, 0x3a, 0xf6, 0x62, 0xae, 0xe7, 0x2b, 0x75, 0xb9, + 0xf0, 0x3c, 0x4c, 0x80, 0xc9, 0x05, 0x5b, 0x97, 0xde, 0x12, + 0x3e, 0xf2, 0xbb, 0x77, 0x29, 0xe5, 0xac, 0x60, 0x10, 0xdc, + 0x95, 0x59, 0x07, 0xcb, 0x82, 0x4e, 0xb7, 0x7b, 0x32, 0xfe, + 0xa0, 0x6c, 0x25, 0xe9, 0x99, 0x55, 0x1c, 0xd0, 0x8e, 0x42, + 0x0b, 0xc7, 0xeb, 0x27, 0x6e, 0xa2, 0xfc, 0x30, 0x79, 0xb5, + 0xc5, 0x09, 0x40, 0x8c, 0xd2, 0x1e, 0x57, 0x9b, 0x0f, 0xc3, + 0x8a, 0x46, 0x18, 0xd4, 0x9d, 0x51, 0x21, 0xed, 0xa4, 0x68, + 0x36, 0xfa, 0xb3, 0x7f, 0x53, 0x9f, 0xd6, 0x1a, 0x44, 0x88, + 0xc1, 0x0d, 0x7d, 0xb1, 0xf8, 0x34, 0x6a, 0xa6, 0xef, 0x23, + 0x00, 0xcd, 0x87, 0x4a, 0x13, 0xde, 0x94, 0x59, 0x26, 0xeb, + 0xa1, 0x6c, 0x35, 0xf8, 0xb2, 0x7f, 0x4c, 0x81, 0xcb, 0x06, + 0x5f, 0x92, 0xd8, 0x15, 0x6a, 0xa7, 0xed, 0x20, 0x79, 0xb4, + 0xfe, 0x33, 0x98, 0x55, 0x1f, 0xd2, 0x8b, 0x46, 0x0c, 0xc1, + 0xbe, 0x73, 0x39, 0xf4, 0xad, 0x60, 0x2a, 0xe7, 0xd4, 0x19, + 0x53, 0x9e, 0xc7, 0x0a, 0x40, 0x8d, 0xf2, 0x3f, 0x75, 0xb8, + 0xe1, 0x2c, 0x66, 0xab, 0x2d, 0xe0, 0xaa, 0x67, 0x3e, 0xf3, + 0xb9, 0x74, 0x0b, 0xc6, 0x8c, 0x41, 0x18, 0xd5, 0x9f, 0x52, + 0x61, 0xac, 0xe6, 0x2b, 0x72, 0xbf, 0xf5, 0x38, 0x47, 0x8a, + 0xc0, 0x0d, 0x54, 0x99, 0xd3, 0x1e, 0xb5, 0x78, 0x32, 0xff, + 0xa6, 0x6b, 0x21, 0xec, 0x93, 0x5e, 0x14, 0xd9, 0x80, 0x4d, + 0x07, 0xca, 0xf9, 0x34, 0x7e, 0xb3, 0xea, 0x27, 0x6d, 0xa0, + 0xdf, 0x12, 0x58, 0x95, 0xcc, 0x01, 0x4b, 0x86, 0x5a, 0x97, + 0xdd, 0x10, 0x49, 0x84, 0xce, 0x03, 0x7c, 0xb1, 0xfb, 0x36, + 0x6f, 0xa2, 0xe8, 0x25, 0x16, 0xdb, 0x91, 0x5c, 0x05, 0xc8, + 0x82, 0x4f, 0x30, 0xfd, 0xb7, 0x7a, 0x23, 0xee, 0xa4, 0x69, + 0xc2, 0x0f, 0x45, 0x88, 0xd1, 0x1c, 0x56, 0x9b, 0xe4, 0x29, + 0x63, 0xae, 0xf7, 0x3a, 0x70, 0xbd, 0x8e, 0x43, 0x09, 0xc4, + 0x9d, 0x50, 0x1a, 0xd7, 0xa8, 0x65, 0x2f, 0xe2, 0xbb, 0x76, + 0x3c, 0xf1, 0x77, 0xba, 0xf0, 0x3d, 0x64, 0xa9, 0xe3, 0x2e, + 0x51, 0x9c, 0xd6, 0x1b, 0x42, 0x8f, 0xc5, 0x08, 0x3b, 0xf6, + 0xbc, 0x71, 0x28, 0xe5, 0xaf, 0x62, 0x1d, 0xd0, 0x9a, 0x57, + 0x0e, 0xc3, 0x89, 0x44, 0xef, 0x22, 0x68, 0xa5, 0xfc, 0x31, + 0x7b, 0xb6, 0xc9, 0x04, 0x4e, 0x83, 0xda, 0x17, 0x5d, 0x90, + 0xa3, 0x6e, 0x24, 0xe9, 0xb0, 0x7d, 0x37, 0xfa, 0x85, 0x48, + 0x02, 0xcf, 0x96, 0x5b, 0x11, 0xdc, 0x00, 0xce, 0x81, 0x4f, + 0x1f, 0xd1, 0x9e, 0x50, 0x3e, 0xf0, 0xbf, 0x71, 0x21, 0xef, + 0xa0, 0x6e, 0x7c, 0xb2, 0xfd, 0x33, 0x63, 0xad, 0xe2, 0x2c, + 0x42, 0x8c, 0xc3, 0x0d, 0x5d, 0x93, 0xdc, 0x12, 0xf8, 0x36, + 0x79, 0xb7, 0xe7, 0x29, 0x66, 0xa8, 0xc6, 0x08, 0x47, 0x89, + 0xd9, 0x17, 0x58, 0x96, 0x84, 0x4a, 0x05, 0xcb, 0x9b, 0x55, + 0x1a, 0xd4, 0xba, 0x74, 0x3b, 0xf5, 0xa5, 0x6b, 0x24, 0xea, + 0xed, 0x23, 0x6c, 0xa2, 0xf2, 0x3c, 0x73, 0xbd, 0xd3, 0x1d, + 0x52, 0x9c, 0xcc, 0x02, 0x4d, 0x83, 0x91, 0x5f, 0x10, 0xde, + 0x8e, 0x40, 0x0f, 0xc1, 0xaf, 0x61, 0x2e, 0xe0, 0xb0, 0x7e, + 0x31, 0xff, 0x15, 0xdb, 0x94, 0x5a, 0x0a, 0xc4, 0x8b, 0x45, + 0x2b, 0xe5, 0xaa, 0x64, 0x34, 0xfa, 0xb5, 0x7b, 0x69, 0xa7, + 0xe8, 0x26, 0x76, 0xb8, 0xf7, 0x39, 0x57, 0x99, 0xd6, 0x18, + 0x48, 0x86, 0xc9, 0x07, 0xc7, 0x09, 0x46, 0x88, 0xd8, 0x16, + 0x59, 0x97, 0xf9, 0x37, 0x78, 0xb6, 0xe6, 0x28, 0x67, 0xa9, + 0xbb, 0x75, 0x3a, 0xf4, 0xa4, 0x6a, 0x25, 0xeb, 0x85, 0x4b, + 0x04, 0xca, 0x9a, 0x54, 0x1b, 0xd5, 0x3f, 0xf1, 0xbe, 0x70, + 0x20, 0xee, 0xa1, 0x6f, 0x01, 0xcf, 0x80, 0x4e, 0x1e, 0xd0, + 0x9f, 0x51, 0x43, 0x8d, 0xc2, 0x0c, 0x5c, 0x92, 0xdd, 0x13, + 0x7d, 0xb3, 0xfc, 0x32, 0x62, 0xac, 0xe3, 0x2d, 0x2a, 0xe4, + 0xab, 0x65, 0x35, 0xfb, 0xb4, 0x7a, 0x14, 0xda, 0x95, 0x5b, + 0x0b, 0xc5, 0x8a, 0x44, 0x56, 0x98, 0xd7, 0x19, 0x49, 0x87, + 0xc8, 0x06, 0x68, 0xa6, 0xe9, 0x27, 0x77, 0xb9, 0xf6, 0x38, + 0xd2, 0x1c, 0x53, 0x9d, 0xcd, 0x03, 0x4c, 0x82, 0xec, 0x22, + 0x6d, 0xa3, 0xf3, 0x3d, 0x72, 0xbc, 0xae, 0x60, 0x2f, 0xe1, + 0xb1, 0x7f, 0x30, 0xfe, 0x90, 0x5e, 0x11, 0xdf, 0x8f, 0x41, + 0x0e, 0xc0, 0x00, 0xcf, 0x83, 0x4c, 0x1b, 0xd4, 0x98, 0x57, + 0x36, 0xf9, 0xb5, 0x7a, 0x2d, 0xe2, 0xae, 0x61, 0x6c, 0xa3, + 0xef, 0x20, 0x77, 0xb8, 0xf4, 0x3b, 0x5a, 0x95, 0xd9, 0x16, + 0x41, 0x8e, 0xc2, 0x0d, 0xd8, 0x17, 0x5b, 0x94, 0xc3, 0x0c, + 0x40, 0x8f, 0xee, 0x21, 0x6d, 0xa2, 0xf5, 0x3a, 0x76, 0xb9, + 0xb4, 0x7b, 0x37, 0xf8, 0xaf, 0x60, 0x2c, 0xe3, 0x82, 0x4d, + 0x01, 0xce, 0x99, 0x56, 0x1a, 0xd5, 0xad, 0x62, 0x2e, 0xe1, + 0xb6, 0x79, 0x35, 0xfa, 0x9b, 0x54, 0x18, 0xd7, 0x80, 0x4f, + 0x03, 0xcc, 0xc1, 0x0e, 0x42, 0x8d, 0xda, 0x15, 0x59, 0x96, + 0xf7, 0x38, 0x74, 0xbb, 0xec, 0x23, 0x6f, 0xa0, 0x75, 0xba, + 0xf6, 0x39, 0x6e, 0xa1, 0xed, 0x22, 0x43, 0x8c, 0xc0, 0x0f, + 0x58, 0x97, 0xdb, 0x14, 0x19, 0xd6, 0x9a, 0x55, 0x02, 0xcd, + 0x81, 0x4e, 0x2f, 0xe0, 0xac, 0x63, 0x34, 0xfb, 0xb7, 0x78, + 0x47, 0x88, 0xc4, 0x0b, 0x5c, 0x93, 0xdf, 0x10, 0x71, 0xbe, + 0xf2, 0x3d, 0x6a, 0xa5, 0xe9, 0x26, 0x2b, 0xe4, 0xa8, 0x67, + 0x30, 0xff, 0xb3, 0x7c, 0x1d, 0xd2, 0x9e, 0x51, 0x06, 0xc9, + 0x85, 0x4a, 0x9f, 0x50, 0x1c, 0xd3, 0x84, 0x4b, 0x07, 0xc8, + 0xa9, 0x66, 0x2a, 0xe5, 0xb2, 0x7d, 0x31, 0xfe, 0xf3, 0x3c, + 0x70, 0xbf, 0xe8, 0x27, 0x6b, 0xa4, 0xc5, 0x0a, 0x46, 0x89, + 0xde, 0x11, 0x5d, 0x92, 0xea, 0x25, 0x69, 0xa6, 0xf1, 0x3e, + 0x72, 0xbd, 0xdc, 0x13, 0x5f, 0x90, 0xc7, 0x08, 0x44, 0x8b, + 0x86, 0x49, 0x05, 0xca, 0x9d, 0x52, 0x1e, 0xd1, 0xb0, 0x7f, + 0x33, 0xfc, 0xab, 0x64, 0x28, 0xe7, 0x32, 0xfd, 0xb1, 0x7e, + 0x29, 0xe6, 0xaa, 0x65, 0x04, 0xcb, 0x87, 0x48, 0x1f, 0xd0, + 0x9c, 0x53, 0x5e, 0x91, 0xdd, 0x12, 0x45, 0x8a, 0xc6, 0x09, + 0x68, 0xa7, 0xeb, 0x24, 0x73, 0xbc, 0xf0, 0x3f, 0x00, 0xd0, + 0xbd, 0x6d, 0x67, 0xb7, 0xda, 0x0a, 0xce, 0x1e, 0x73, 0xa3, + 0xa9, 0x79, 0x14, 0xc4, 0x81, 0x51, 0x3c, 0xec, 0xe6, 0x36, + 0x5b, 0x8b, 0x4f, 0x9f, 0xf2, 0x22, 0x28, 0xf8, 0x95, 0x45, + 0x1f, 0xcf, 0xa2, 0x72, 0x78, 0xa8, 0xc5, 0x15, 0xd1, 0x01, + 0x6c, 0xbc, 0xb6, 0x66, 0x0b, 0xdb, 0x9e, 0x4e, 0x23, 0xf3, + 0xf9, 0x29, 0x44, 0x94, 0x50, 0x80, 0xed, 0x3d, 0x37, 0xe7, + 0x8a, 0x5a, 0x3e, 0xee, 0x83, 0x53, 0x59, 0x89, 0xe4, 0x34, + 0xf0, 0x20, 0x4d, 0x9d, 0x97, 0x47, 0x2a, 0xfa, 0xbf, 0x6f, + 0x02, 0xd2, 0xd8, 0x08, 0x65, 0xb5, 0x71, 0xa1, 0xcc, 0x1c, + 0x16, 0xc6, 0xab, 0x7b, 0x21, 0xf1, 0x9c, 0x4c, 0x46, 0x96, + 0xfb, 0x2b, 0xef, 0x3f, 0x52, 0x82, 0x88, 0x58, 0x35, 0xe5, + 0xa0, 0x70, 0x1d, 0xcd, 0xc7, 0x17, 0x7a, 0xaa, 0x6e, 0xbe, + 0xd3, 0x03, 0x09, 0xd9, 0xb4, 0x64, 0x7c, 0xac, 0xc1, 0x11, + 0x1b, 0xcb, 0xa6, 0x76, 0xb2, 0x62, 0x0f, 0xdf, 0xd5, 0x05, + 0x68, 0xb8, 0xfd, 0x2d, 0x40, 0x90, 0x9a, 0x4a, 0x27, 0xf7, + 0x33, 0xe3, 0x8e, 0x5e, 0x54, 0x84, 0xe9, 0x39, 0x63, 0xb3, + 0xde, 0x0e, 0x04, 0xd4, 0xb9, 0x69, 0xad, 0x7d, 0x10, 0xc0, + 0xca, 0x1a, 0x77, 0xa7, 0xe2, 0x32, 0x5f, 0x8f, 0x85, 0x55, + 0x38, 0xe8, 0x2c, 0xfc, 0x91, 0x41, 0x4b, 0x9b, 0xf6, 0x26, + 0x42, 0x92, 0xff, 0x2f, 0x25, 0xf5, 0x98, 0x48, 0x8c, 0x5c, + 0x31, 0xe1, 0xeb, 0x3b, 0x56, 0x86, 0xc3, 0x13, 0x7e, 0xae, + 0xa4, 0x74, 0x19, 0xc9, 0x0d, 0xdd, 0xb0, 0x60, 0x6a, 0xba, + 0xd7, 0x07, 0x5d, 0x8d, 0xe0, 0x30, 0x3a, 0xea, 0x87, 0x57, + 0x93, 0x43, 0x2e, 0xfe, 0xf4, 0x24, 0x49, 0x99, 0xdc, 0x0c, + 0x61, 0xb1, 0xbb, 0x6b, 0x06, 0xd6, 0x12, 0xc2, 0xaf, 0x7f, + 0x75, 0xa5, 0xc8, 0x18, 0x00, 0xd1, 0xbf, 0x6e, 0x63, 0xb2, + 0xdc, 0x0d, 0xc6, 0x17, 0x79, 0xa8, 0xa5, 0x74, 0x1a, 0xcb, + 0x91, 0x40, 0x2e, 0xff, 0xf2, 0x23, 0x4d, 0x9c, 0x57, 0x86, + 0xe8, 0x39, 0x34, 0xe5, 0x8b, 0x5a, 0x3f, 0xee, 0x80, 0x51, + 0x5c, 0x8d, 0xe3, 0x32, 0xf9, 0x28, 0x46, 0x97, 0x9a, 0x4b, + 0x25, 0xf4, 0xae, 0x7f, 0x11, 0xc0, 0xcd, 0x1c, 0x72, 0xa3, + 0x68, 0xb9, 0xd7, 0x06, 0x0b, 0xda, 0xb4, 0x65, 0x7e, 0xaf, + 0xc1, 0x10, 0x1d, 0xcc, 0xa2, 0x73, 0xb8, 0x69, 0x07, 0xd6, + 0xdb, 0x0a, 0x64, 0xb5, 0xef, 0x3e, 0x50, 0x81, 0x8c, 0x5d, + 0x33, 0xe2, 0x29, 0xf8, 0x96, 0x47, 0x4a, 0x9b, 0xf5, 0x24, + 0x41, 0x90, 0xfe, 0x2f, 0x22, 0xf3, 0x9d, 0x4c, 0x87, 0x56, + 0x38, 0xe9, 0xe4, 0x35, 0x5b, 0x8a, 0xd0, 0x01, 0x6f, 0xbe, + 0xb3, 0x62, 0x0c, 0xdd, 0x16, 0xc7, 0xa9, 0x78, 0x75, 0xa4, + 0xca, 0x1b, 0xfc, 0x2d, 0x43, 0x92, 0x9f, 0x4e, 0x20, 0xf1, + 0x3a, 0xeb, 0x85, 0x54, 0x59, 0x88, 0xe6, 0x37, 0x6d, 0xbc, + 0xd2, 0x03, 0x0e, 0xdf, 0xb1, 0x60, 0xab, 0x7a, 0x14, 0xc5, + 0xc8, 0x19, 0x77, 0xa6, 0xc3, 0x12, 0x7c, 0xad, 0xa0, 0x71, + 0x1f, 0xce, 0x05, 0xd4, 0xba, 0x6b, 0x66, 0xb7, 0xd9, 0x08, + 0x52, 0x83, 0xed, 0x3c, 0x31, 0xe0, 0x8e, 0x5f, 0x94, 0x45, + 0x2b, 0xfa, 0xf7, 0x26, 0x48, 0x99, 0x82, 0x53, 0x3d, 0xec, + 0xe1, 0x30, 0x5e, 0x8f, 0x44, 0x95, 0xfb, 0x2a, 0x27, 0xf6, + 0x98, 0x49, 0x13, 0xc2, 0xac, 0x7d, 0x70, 0xa1, 0xcf, 0x1e, + 0xd5, 0x04, 0x6a, 0xbb, 0xb6, 0x67, 0x09, 0xd8, 0xbd, 0x6c, + 0x02, 0xd3, 0xde, 0x0f, 0x61, 0xb0, 0x7b, 0xaa, 0xc4, 0x15, + 0x18, 0xc9, 0xa7, 0x76, 0x2c, 0xfd, 0x93, 0x42, 0x4f, 0x9e, + 0xf0, 0x21, 0xea, 0x3b, 0x55, 0x84, 0x89, 0x58, 0x36, 0xe7, + 0x00, 0xd2, 0xb9, 0x6b, 0x6f, 0xbd, 0xd6, 0x04, 0xde, 0x0c, + 0x67, 0xb5, 0xb1, 0x63, 0x08, 0xda, 0xa1, 0x73, 0x18, 0xca, + 0xce, 0x1c, 0x77, 0xa5, 0x7f, 0xad, 0xc6, 0x14, 0x10, 0xc2, + 0xa9, 0x7b, 0x5f, 0x8d, 0xe6, 0x34, 0x30, 0xe2, 0x89, 0x5b, + 0x81, 0x53, 0x38, 0xea, 0xee, 0x3c, 0x57, 0x85, 0xfe, 0x2c, + 0x47, 0x95, 0x91, 0x43, 0x28, 0xfa, 0x20, 0xf2, 0x99, 0x4b, + 0x4f, 0x9d, 0xf6, 0x24, 0xbe, 0x6c, 0x07, 0xd5, 0xd1, 0x03, + 0x68, 0xba, 0x60, 0xb2, 0xd9, 0x0b, 0x0f, 0xdd, 0xb6, 0x64, + 0x1f, 0xcd, 0xa6, 0x74, 0x70, 0xa2, 0xc9, 0x1b, 0xc1, 0x13, + 0x78, 0xaa, 0xae, 0x7c, 0x17, 0xc5, 0xe1, 0x33, 0x58, 0x8a, + 0x8e, 0x5c, 0x37, 0xe5, 0x3f, 0xed, 0x86, 0x54, 0x50, 0x82, + 0xe9, 0x3b, 0x40, 0x92, 0xf9, 0x2b, 0x2f, 0xfd, 0x96, 0x44, + 0x9e, 0x4c, 0x27, 0xf5, 0xf1, 0x23, 0x48, 0x9a, 0x61, 0xb3, + 0xd8, 0x0a, 0x0e, 0xdc, 0xb7, 0x65, 0xbf, 0x6d, 0x06, 0xd4, + 0xd0, 0x02, 0x69, 0xbb, 0xc0, 0x12, 0x79, 0xab, 0xaf, 0x7d, + 0x16, 0xc4, 0x1e, 0xcc, 0xa7, 0x75, 0x71, 0xa3, 0xc8, 0x1a, + 0x3e, 0xec, 0x87, 0x55, 0x51, 0x83, 0xe8, 0x3a, 0xe0, 0x32, + 0x59, 0x8b, 0x8f, 0x5d, 0x36, 0xe4, 0x9f, 0x4d, 0x26, 0xf4, + 0xf0, 0x22, 0x49, 0x9b, 0x41, 0x93, 0xf8, 0x2a, 0x2e, 0xfc, + 0x97, 0x45, 0xdf, 0x0d, 0x66, 0xb4, 0xb0, 0x62, 0x09, 0xdb, + 0x01, 0xd3, 0xb8, 0x6a, 0x6e, 0xbc, 0xd7, 0x05, 0x7e, 0xac, + 0xc7, 0x15, 0x11, 0xc3, 0xa8, 0x7a, 0xa0, 0x72, 0x19, 0xcb, + 0xcf, 0x1d, 0x76, 0xa4, 0x80, 0x52, 0x39, 0xeb, 0xef, 0x3d, + 0x56, 0x84, 0x5e, 0x8c, 0xe7, 0x35, 0x31, 0xe3, 0x88, 0x5a, + 0x21, 0xf3, 0x98, 0x4a, 0x4e, 0x9c, 0xf7, 0x25, 0xff, 0x2d, + 0x46, 0x94, 0x90, 0x42, 0x29, 0xfb, 0x00, 0xd3, 0xbb, 0x68, + 0x6b, 0xb8, 0xd0, 0x03, 0xd6, 0x05, 0x6d, 0xbe, 0xbd, 0x6e, + 0x06, 0xd5, 0xb1, 0x62, 0x0a, 0xd9, 0xda, 0x09, 0x61, 0xb2, + 0x67, 0xb4, 0xdc, 0x0f, 0x0c, 0xdf, 0xb7, 0x64, 0x7f, 0xac, + 0xc4, 0x17, 0x14, 0xc7, 0xaf, 0x7c, 0xa9, 0x7a, 0x12, 0xc1, + 0xc2, 0x11, 0x79, 0xaa, 0xce, 0x1d, 0x75, 0xa6, 0xa5, 0x76, + 0x1e, 0xcd, 0x18, 0xcb, 0xa3, 0x70, 0x73, 0xa0, 0xc8, 0x1b, + 0xfe, 0x2d, 0x45, 0x96, 0x95, 0x46, 0x2e, 0xfd, 0x28, 0xfb, + 0x93, 0x40, 0x43, 0x90, 0xf8, 0x2b, 0x4f, 0x9c, 0xf4, 0x27, + 0x24, 0xf7, 0x9f, 0x4c, 0x99, 0x4a, 0x22, 0xf1, 0xf2, 0x21, + 0x49, 0x9a, 0x81, 0x52, 0x3a, 0xe9, 0xea, 0x39, 0x51, 0x82, + 0x57, 0x84, 0xec, 0x3f, 0x3c, 0xef, 0x87, 0x54, 0x30, 0xe3, + 0x8b, 0x58, 0x5b, 0x88, 0xe0, 0x33, 0xe6, 0x35, 0x5d, 0x8e, + 0x8d, 0x5e, 0x36, 0xe5, 0xe1, 0x32, 0x5a, 0x89, 0x8a, 0x59, + 0x31, 0xe2, 0x37, 0xe4, 0x8c, 0x5f, 0x5c, 0x8f, 0xe7, 0x34, + 0x50, 0x83, 0xeb, 0x38, 0x3b, 0xe8, 0x80, 0x53, 0x86, 0x55, + 0x3d, 0xee, 0xed, 0x3e, 0x56, 0x85, 0x9e, 0x4d, 0x25, 0xf6, + 0xf5, 0x26, 0x4e, 0x9d, 0x48, 0x9b, 0xf3, 0x20, 0x23, 0xf0, + 0x98, 0x4b, 0x2f, 0xfc, 0x94, 0x47, 0x44, 0x97, 0xff, 0x2c, + 0xf9, 0x2a, 0x42, 0x91, 0x92, 0x41, 0x29, 0xfa, 0x1f, 0xcc, + 0xa4, 0x77, 0x74, 0xa7, 0xcf, 0x1c, 0xc9, 0x1a, 0x72, 0xa1, + 0xa2, 0x71, 0x19, 0xca, 0xae, 0x7d, 0x15, 0xc6, 0xc5, 0x16, + 0x7e, 0xad, 0x78, 0xab, 0xc3, 0x10, 0x13, 0xc0, 0xa8, 0x7b, + 0x60, 0xb3, 0xdb, 0x08, 0x0b, 0xd8, 0xb0, 0x63, 0xb6, 0x65, + 0x0d, 0xde, 0xdd, 0x0e, 0x66, 0xb5, 0xd1, 0x02, 0x6a, 0xb9, + 0xba, 0x69, 0x01, 0xd2, 0x07, 0xd4, 0xbc, 0x6f, 0x6c, 0xbf, + 0xd7, 0x04, 0x00, 0xd4, 0xb5, 0x61, 0x77, 0xa3, 0xc2, 0x16, + 0xee, 0x3a, 0x5b, 0x8f, 0x99, 0x4d, 0x2c, 0xf8, 0xc1, 0x15, + 0x74, 0xa0, 0xb6, 0x62, 0x03, 0xd7, 0x2f, 0xfb, 0x9a, 0x4e, + 0x58, 0x8c, 0xed, 0x39, 0x9f, 0x4b, 0x2a, 0xfe, 0xe8, 0x3c, + 0x5d, 0x89, 0x71, 0xa5, 0xc4, 0x10, 0x06, 0xd2, 0xb3, 0x67, + 0x5e, 0x8a, 0xeb, 0x3f, 0x29, 0xfd, 0x9c, 0x48, 0xb0, 0x64, + 0x05, 0xd1, 0xc7, 0x13, 0x72, 0xa6, 0x23, 0xf7, 0x96, 0x42, + 0x54, 0x80, 0xe1, 0x35, 0xcd, 0x19, 0x78, 0xac, 0xba, 0x6e, + 0x0f, 0xdb, 0xe2, 0x36, 0x57, 0x83, 0x95, 0x41, 0x20, 0xf4, + 0x0c, 0xd8, 0xb9, 0x6d, 0x7b, 0xaf, 0xce, 0x1a, 0xbc, 0x68, + 0x09, 0xdd, 0xcb, 0x1f, 0x7e, 0xaa, 0x52, 0x86, 0xe7, 0x33, + 0x25, 0xf1, 0x90, 0x44, 0x7d, 0xa9, 0xc8, 0x1c, 0x0a, 0xde, + 0xbf, 0x6b, 0x93, 0x47, 0x26, 0xf2, 0xe4, 0x30, 0x51, 0x85, + 0x46, 0x92, 0xf3, 0x27, 0x31, 0xe5, 0x84, 0x50, 0xa8, 0x7c, + 0x1d, 0xc9, 0xdf, 0x0b, 0x6a, 0xbe, 0x87, 0x53, 0x32, 0xe6, + 0xf0, 0x24, 0x45, 0x91, 0x69, 0xbd, 0xdc, 0x08, 0x1e, 0xca, + 0xab, 0x7f, 0xd9, 0x0d, 0x6c, 0xb8, 0xae, 0x7a, 0x1b, 0xcf, + 0x37, 0xe3, 0x82, 0x56, 0x40, 0x94, 0xf5, 0x21, 0x18, 0xcc, + 0xad, 0x79, 0x6f, 0xbb, 0xda, 0x0e, 0xf6, 0x22, 0x43, 0x97, + 0x81, 0x55, 0x34, 0xe0, 0x65, 0xb1, 0xd0, 0x04, 0x12, 0xc6, + 0xa7, 0x73, 0x8b, 0x5f, 0x3e, 0xea, 0xfc, 0x28, 0x49, 0x9d, + 0xa4, 0x70, 0x11, 0xc5, 0xd3, 0x07, 0x66, 0xb2, 0x4a, 0x9e, + 0xff, 0x2b, 0x3d, 0xe9, 0x88, 0x5c, 0xfa, 0x2e, 0x4f, 0x9b, + 0x8d, 0x59, 0x38, 0xec, 0x14, 0xc0, 0xa1, 0x75, 0x63, 0xb7, + 0xd6, 0x02, 0x3b, 0xef, 0x8e, 0x5a, 0x4c, 0x98, 0xf9, 0x2d, + 0xd5, 0x01, 0x60, 0xb4, 0xa2, 0x76, 0x17, 0xc3, 0x00, 0xd5, + 0xb7, 0x62, 0x73, 0xa6, 0xc4, 0x11, 0xe6, 0x33, 0x51, 0x84, + 0x95, 0x40, 0x22, 0xf7, 0xd1, 0x04, 0x66, 0xb3, 0xa2, 0x77, + 0x15, 0xc0, 0x37, 0xe2, 0x80, 0x55, 0x44, 0x91, 0xf3, 0x26, + 0xbf, 0x6a, 0x08, 0xdd, 0xcc, 0x19, 0x7b, 0xae, 0x59, 0x8c, + 0xee, 0x3b, 0x2a, 0xff, 0x9d, 0x48, 0x6e, 0xbb, 0xd9, 0x0c, + 0x1d, 0xc8, 0xaa, 0x7f, 0x88, 0x5d, 0x3f, 0xea, 0xfb, 0x2e, + 0x4c, 0x99, 0x63, 0xb6, 0xd4, 0x01, 0x10, 0xc5, 0xa7, 0x72, + 0x85, 0x50, 0x32, 0xe7, 0xf6, 0x23, 0x41, 0x94, 0xb2, 0x67, + 0x05, 0xd0, 0xc1, 0x14, 0x76, 0xa3, 0x54, 0x81, 0xe3, 0x36, + 0x27, 0xf2, 0x90, 0x45, 0xdc, 0x09, 0x6b, 0xbe, 0xaf, 0x7a, + 0x18, 0xcd, 0x3a, 0xef, 0x8d, 0x58, 0x49, 0x9c, 0xfe, 0x2b, + 0x0d, 0xd8, 0xba, 0x6f, 0x7e, 0xab, 0xc9, 0x1c, 0xeb, 0x3e, + 0x5c, 0x89, 0x98, 0x4d, 0x2f, 0xfa, 0xc6, 0x13, 0x71, 0xa4, + 0xb5, 0x60, 0x02, 0xd7, 0x20, 0xf5, 0x97, 0x42, 0x53, 0x86, + 0xe4, 0x31, 0x17, 0xc2, 0xa0, 0x75, 0x64, 0xb1, 0xd3, 0x06, + 0xf1, 0x24, 0x46, 0x93, 0x82, 0x57, 0x35, 0xe0, 0x79, 0xac, + 0xce, 0x1b, 0x0a, 0xdf, 0xbd, 0x68, 0x9f, 0x4a, 0x28, 0xfd, + 0xec, 0x39, 0x5b, 0x8e, 0xa8, 0x7d, 0x1f, 0xca, 0xdb, 0x0e, + 0x6c, 0xb9, 0x4e, 0x9b, 0xf9, 0x2c, 0x3d, 0xe8, 0x8a, 0x5f, + 0xa5, 0x70, 0x12, 0xc7, 0xd6, 0x03, 0x61, 0xb4, 0x43, 0x96, + 0xf4, 0x21, 0x30, 0xe5, 0x87, 0x52, 0x74, 0xa1, 0xc3, 0x16, + 0x07, 0xd2, 0xb0, 0x65, 0x92, 0x47, 0x25, 0xf0, 0xe1, 0x34, + 0x56, 0x83, 0x1a, 0xcf, 0xad, 0x78, 0x69, 0xbc, 0xde, 0x0b, + 0xfc, 0x29, 0x4b, 0x9e, 0x8f, 0x5a, 0x38, 0xed, 0xcb, 0x1e, + 0x7c, 0xa9, 0xb8, 0x6d, 0x0f, 0xda, 0x2d, 0xf8, 0x9a, 0x4f, + 0x5e, 0x8b, 0xe9, 0x3c, 0x00, 0xd6, 0xb1, 0x67, 0x7f, 0xa9, + 0xce, 0x18, 0xfe, 0x28, 0x4f, 0x99, 0x81, 0x57, 0x30, 0xe6, + 0xe1, 0x37, 0x50, 0x86, 0x9e, 0x48, 0x2f, 0xf9, 0x1f, 0xc9, + 0xae, 0x78, 0x60, 0xb6, 0xd1, 0x07, 0xdf, 0x09, 0x6e, 0xb8, + 0xa0, 0x76, 0x11, 0xc7, 0x21, 0xf7, 0x90, 0x46, 0x5e, 0x88, + 0xef, 0x39, 0x3e, 0xe8, 0x8f, 0x59, 0x41, 0x97, 0xf0, 0x26, + 0xc0, 0x16, 0x71, 0xa7, 0xbf, 0x69, 0x0e, 0xd8, 0xa3, 0x75, + 0x12, 0xc4, 0xdc, 0x0a, 0x6d, 0xbb, 0x5d, 0x8b, 0xec, 0x3a, + 0x22, 0xf4, 0x93, 0x45, 0x42, 0x94, 0xf3, 0x25, 0x3d, 0xeb, + 0x8c, 0x5a, 0xbc, 0x6a, 0x0d, 0xdb, 0xc3, 0x15, 0x72, 0xa4, + 0x7c, 0xaa, 0xcd, 0x1b, 0x03, 0xd5, 0xb2, 0x64, 0x82, 0x54, + 0x33, 0xe5, 0xfd, 0x2b, 0x4c, 0x9a, 0x9d, 0x4b, 0x2c, 0xfa, + 0xe2, 0x34, 0x53, 0x85, 0x63, 0xb5, 0xd2, 0x04, 0x1c, 0xca, + 0xad, 0x7b, 0x5b, 0x8d, 0xea, 0x3c, 0x24, 0xf2, 0x95, 0x43, + 0xa5, 0x73, 0x14, 0xc2, 0xda, 0x0c, 0x6b, 0xbd, 0xba, 0x6c, + 0x0b, 0xdd, 0xc5, 0x13, 0x74, 0xa2, 0x44, 0x92, 0xf5, 0x23, + 0x3b, 0xed, 0x8a, 0x5c, 0x84, 0x52, 0x35, 0xe3, 0xfb, 0x2d, + 0x4a, 0x9c, 0x7a, 0xac, 0xcb, 0x1d, 0x05, 0xd3, 0xb4, 0x62, + 0x65, 0xb3, 0xd4, 0x02, 0x1a, 0xcc, 0xab, 0x7d, 0x9b, 0x4d, + 0x2a, 0xfc, 0xe4, 0x32, 0x55, 0x83, 0xf8, 0x2e, 0x49, 0x9f, + 0x87, 0x51, 0x36, 0xe0, 0x06, 0xd0, 0xb7, 0x61, 0x79, 0xaf, + 0xc8, 0x1e, 0x19, 0xcf, 0xa8, 0x7e, 0x66, 0xb0, 0xd7, 0x01, + 0xe7, 0x31, 0x56, 0x80, 0x98, 0x4e, 0x29, 0xff, 0x27, 0xf1, + 0x96, 0x40, 0x58, 0x8e, 0xe9, 0x3f, 0xd9, 0x0f, 0x68, 0xbe, + 0xa6, 0x70, 0x17, 0xc1, 0xc6, 0x10, 0x77, 0xa1, 0xb9, 0x6f, + 0x08, 0xde, 0x38, 0xee, 0x89, 0x5f, 0x47, 0x91, 0xf6, 0x20, + 0x00, 0xd7, 0xb3, 0x64, 0x7b, 0xac, 0xc8, 0x1f, 0xf6, 0x21, + 0x45, 0x92, 0x8d, 0x5a, 0x3e, 0xe9, 0xf1, 0x26, 0x42, 0x95, + 0x8a, 0x5d, 0x39, 0xee, 0x07, 0xd0, 0xb4, 0x63, 0x7c, 0xab, + 0xcf, 0x18, 0xff, 0x28, 0x4c, 0x9b, 0x84, 0x53, 0x37, 0xe0, + 0x09, 0xde, 0xba, 0x6d, 0x72, 0xa5, 0xc1, 0x16, 0x0e, 0xd9, + 0xbd, 0x6a, 0x75, 0xa2, 0xc6, 0x11, 0xf8, 0x2f, 0x4b, 0x9c, + 0x83, 0x54, 0x30, 0xe7, 0xe3, 0x34, 0x50, 0x87, 0x98, 0x4f, + 0x2b, 0xfc, 0x15, 0xc2, 0xa6, 0x71, 0x6e, 0xb9, 0xdd, 0x0a, + 0x12, 0xc5, 0xa1, 0x76, 0x69, 0xbe, 0xda, 0x0d, 0xe4, 0x33, + 0x57, 0x80, 0x9f, 0x48, 0x2c, 0xfb, 0x1c, 0xcb, 0xaf, 0x78, + 0x67, 0xb0, 0xd4, 0x03, 0xea, 0x3d, 0x59, 0x8e, 0x91, 0x46, + 0x22, 0xf5, 0xed, 0x3a, 0x5e, 0x89, 0x96, 0x41, 0x25, 0xf2, + 0x1b, 0xcc, 0xa8, 0x7f, 0x60, 0xb7, 0xd3, 0x04, 0xdb, 0x0c, + 0x68, 0xbf, 0xa0, 0x77, 0x13, 0xc4, 0x2d, 0xfa, 0x9e, 0x49, + 0x56, 0x81, 0xe5, 0x32, 0x2a, 0xfd, 0x99, 0x4e, 0x51, 0x86, + 0xe2, 0x35, 0xdc, 0x0b, 0x6f, 0xb8, 0xa7, 0x70, 0x14, 0xc3, + 0x24, 0xf3, 0x97, 0x40, 0x5f, 0x88, 0xec, 0x3b, 0xd2, 0x05, + 0x61, 0xb6, 0xa9, 0x7e, 0x1a, 0xcd, 0xd5, 0x02, 0x66, 0xb1, + 0xae, 0x79, 0x1d, 0xca, 0x23, 0xf4, 0x90, 0x47, 0x58, 0x8f, + 0xeb, 0x3c, 0x38, 0xef, 0x8b, 0x5c, 0x43, 0x94, 0xf0, 0x27, + 0xce, 0x19, 0x7d, 0xaa, 0xb5, 0x62, 0x06, 0xd1, 0xc9, 0x1e, + 0x7a, 0xad, 0xb2, 0x65, 0x01, 0xd6, 0x3f, 0xe8, 0x8c, 0x5b, + 0x44, 0x93, 0xf7, 0x20, 0xc7, 0x10, 0x74, 0xa3, 0xbc, 0x6b, + 0x0f, 0xd8, 0x31, 0xe6, 0x82, 0x55, 0x4a, 0x9d, 0xf9, 0x2e, + 0x36, 0xe1, 0x85, 0x52, 0x4d, 0x9a, 0xfe, 0x29, 0xc0, 0x17, + 0x73, 0xa4, 0xbb, 0x6c, 0x08, 0xdf, 0x00, 0xd8, 0xad, 0x75, + 0x47, 0x9f, 0xea, 0x32, 0x8e, 0x56, 0x23, 0xfb, 0xc9, 0x11, + 0x64, 0xbc, 0x01, 0xd9, 0xac, 0x74, 0x46, 0x9e, 0xeb, 0x33, + 0x8f, 0x57, 0x22, 0xfa, 0xc8, 0x10, 0x65, 0xbd, 0x02, 0xda, + 0xaf, 0x77, 0x45, 0x9d, 0xe8, 0x30, 0x8c, 0x54, 0x21, 0xf9, + 0xcb, 0x13, 0x66, 0xbe, 0x03, 0xdb, 0xae, 0x76, 0x44, 0x9c, + 0xe9, 0x31, 0x8d, 0x55, 0x20, 0xf8, 0xca, 0x12, 0x67, 0xbf, + 0x04, 0xdc, 0xa9, 0x71, 0x43, 0x9b, 0xee, 0x36, 0x8a, 0x52, + 0x27, 0xff, 0xcd, 0x15, 0x60, 0xb8, 0x05, 0xdd, 0xa8, 0x70, + 0x42, 0x9a, 0xef, 0x37, 0x8b, 0x53, 0x26, 0xfe, 0xcc, 0x14, + 0x61, 0xb9, 0x06, 0xde, 0xab, 0x73, 0x41, 0x99, 0xec, 0x34, + 0x88, 0x50, 0x25, 0xfd, 0xcf, 0x17, 0x62, 0xba, 0x07, 0xdf, + 0xaa, 0x72, 0x40, 0x98, 0xed, 0x35, 0x89, 0x51, 0x24, 0xfc, + 0xce, 0x16, 0x63, 0xbb, 0x08, 0xd0, 0xa5, 0x7d, 0x4f, 0x97, + 0xe2, 0x3a, 0x86, 0x5e, 0x2b, 0xf3, 0xc1, 0x19, 0x6c, 0xb4, + 0x09, 0xd1, 0xa4, 0x7c, 0x4e, 0x96, 0xe3, 0x3b, 0x87, 0x5f, + 0x2a, 0xf2, 0xc0, 0x18, 0x6d, 0xb5, 0x0a, 0xd2, 0xa7, 0x7f, + 0x4d, 0x95, 0xe0, 0x38, 0x84, 0x5c, 0x29, 0xf1, 0xc3, 0x1b, + 0x6e, 0xb6, 0x0b, 0xd3, 0xa6, 0x7e, 0x4c, 0x94, 0xe1, 0x39, + 0x85, 0x5d, 0x28, 0xf0, 0xc2, 0x1a, 0x6f, 0xb7, 0x0c, 0xd4, + 0xa1, 0x79, 0x4b, 0x93, 0xe6, 0x3e, 0x82, 0x5a, 0x2f, 0xf7, + 0xc5, 0x1d, 0x68, 0xb0, 0x0d, 0xd5, 0xa0, 0x78, 0x4a, 0x92, + 0xe7, 0x3f, 0x83, 0x5b, 0x2e, 0xf6, 0xc4, 0x1c, 0x69, 0xb1, + 0x0e, 0xd6, 0xa3, 0x7b, 0x49, 0x91, 0xe4, 0x3c, 0x80, 0x58, + 0x2d, 0xf5, 0xc7, 0x1f, 0x6a, 0xb2, 0x0f, 0xd7, 0xa2, 0x7a, + 0x48, 0x90, 0xe5, 0x3d, 0x81, 0x59, 0x2c, 0xf4, 0xc6, 0x1e, + 0x6b, 0xb3, 0x00, 0xd9, 0xaf, 0x76, 0x43, 0x9a, 0xec, 0x35, + 0x86, 0x5f, 0x29, 0xf0, 0xc5, 0x1c, 0x6a, 0xb3, 0x11, 0xc8, + 0xbe, 0x67, 0x52, 0x8b, 0xfd, 0x24, 0x97, 0x4e, 0x38, 0xe1, + 0xd4, 0x0d, 0x7b, 0xa2, 0x22, 0xfb, 0x8d, 0x54, 0x61, 0xb8, + 0xce, 0x17, 0xa4, 0x7d, 0x0b, 0xd2, 0xe7, 0x3e, 0x48, 0x91, + 0x33, 0xea, 0x9c, 0x45, 0x70, 0xa9, 0xdf, 0x06, 0xb5, 0x6c, + 0x1a, 0xc3, 0xf6, 0x2f, 0x59, 0x80, 0x44, 0x9d, 0xeb, 0x32, + 0x07, 0xde, 0xa8, 0x71, 0xc2, 0x1b, 0x6d, 0xb4, 0x81, 0x58, + 0x2e, 0xf7, 0x55, 0x8c, 0xfa, 0x23, 0x16, 0xcf, 0xb9, 0x60, + 0xd3, 0x0a, 0x7c, 0xa5, 0x90, 0x49, 0x3f, 0xe6, 0x66, 0xbf, + 0xc9, 0x10, 0x25, 0xfc, 0x8a, 0x53, 0xe0, 0x39, 0x4f, 0x96, + 0xa3, 0x7a, 0x0c, 0xd5, 0x77, 0xae, 0xd8, 0x01, 0x34, 0xed, + 0x9b, 0x42, 0xf1, 0x28, 0x5e, 0x87, 0xb2, 0x6b, 0x1d, 0xc4, + 0x88, 0x51, 0x27, 0xfe, 0xcb, 0x12, 0x64, 0xbd, 0x0e, 0xd7, + 0xa1, 0x78, 0x4d, 0x94, 0xe2, 0x3b, 0x99, 0x40, 0x36, 0xef, + 0xda, 0x03, 0x75, 0xac, 0x1f, 0xc6, 0xb0, 0x69, 0x5c, 0x85, + 0xf3, 0x2a, 0xaa, 0x73, 0x05, 0xdc, 0xe9, 0x30, 0x46, 0x9f, + 0x2c, 0xf5, 0x83, 0x5a, 0x6f, 0xb6, 0xc0, 0x19, 0xbb, 0x62, + 0x14, 0xcd, 0xf8, 0x21, 0x57, 0x8e, 0x3d, 0xe4, 0x92, 0x4b, + 0x7e, 0xa7, 0xd1, 0x08, 0xcc, 0x15, 0x63, 0xba, 0x8f, 0x56, + 0x20, 0xf9, 0x4a, 0x93, 0xe5, 0x3c, 0x09, 0xd0, 0xa6, 0x7f, + 0xdd, 0x04, 0x72, 0xab, 0x9e, 0x47, 0x31, 0xe8, 0x5b, 0x82, + 0xf4, 0x2d, 0x18, 0xc1, 0xb7, 0x6e, 0xee, 0x37, 0x41, 0x98, + 0xad, 0x74, 0x02, 0xdb, 0x68, 0xb1, 0xc7, 0x1e, 0x2b, 0xf2, + 0x84, 0x5d, 0xff, 0x26, 0x50, 0x89, 0xbc, 0x65, 0x13, 0xca, + 0x79, 0xa0, 0xd6, 0x0f, 0x3a, 0xe3, 0x95, 0x4c, 0x00, 0xda, + 0xa9, 0x73, 0x4f, 0x95, 0xe6, 0x3c, 0x9e, 0x44, 0x37, 0xed, + 0xd1, 0x0b, 0x78, 0xa2, 0x21, 0xfb, 0x88, 0x52, 0x6e, 0xb4, + 0xc7, 0x1d, 0xbf, 0x65, 0x16, 0xcc, 0xf0, 0x2a, 0x59, 0x83, + 0x42, 0x98, 0xeb, 0x31, 0x0d, 0xd7, 0xa4, 0x7e, 0xdc, 0x06, + 0x75, 0xaf, 0x93, 0x49, 0x3a, 0xe0, 0x63, 0xb9, 0xca, 0x10, + 0x2c, 0xf6, 0x85, 0x5f, 0xfd, 0x27, 0x54, 0x8e, 0xb2, 0x68, + 0x1b, 0xc1, 0x84, 0x5e, 0x2d, 0xf7, 0xcb, 0x11, 0x62, 0xb8, + 0x1a, 0xc0, 0xb3, 0x69, 0x55, 0x8f, 0xfc, 0x26, 0xa5, 0x7f, + 0x0c, 0xd6, 0xea, 0x30, 0x43, 0x99, 0x3b, 0xe1, 0x92, 0x48, + 0x74, 0xae, 0xdd, 0x07, 0xc6, 0x1c, 0x6f, 0xb5, 0x89, 0x53, + 0x20, 0xfa, 0x58, 0x82, 0xf1, 0x2b, 0x17, 0xcd, 0xbe, 0x64, + 0xe7, 0x3d, 0x4e, 0x94, 0xa8, 0x72, 0x01, 0xdb, 0x79, 0xa3, + 0xd0, 0x0a, 0x36, 0xec, 0x9f, 0x45, 0x15, 0xcf, 0xbc, 0x66, + 0x5a, 0x80, 0xf3, 0x29, 0x8b, 0x51, 0x22, 0xf8, 0xc4, 0x1e, + 0x6d, 0xb7, 0x34, 0xee, 0x9d, 0x47, 0x7b, 0xa1, 0xd2, 0x08, + 0xaa, 0x70, 0x03, 0xd9, 0xe5, 0x3f, 0x4c, 0x96, 0x57, 0x8d, + 0xfe, 0x24, 0x18, 0xc2, 0xb1, 0x6b, 0xc9, 0x13, 0x60, 0xba, + 0x86, 0x5c, 0x2f, 0xf5, 0x76, 0xac, 0xdf, 0x05, 0x39, 0xe3, + 0x90, 0x4a, 0xe8, 0x32, 0x41, 0x9b, 0xa7, 0x7d, 0x0e, 0xd4, + 0x91, 0x4b, 0x38, 0xe2, 0xde, 0x04, 0x77, 0xad, 0x0f, 0xd5, + 0xa6, 0x7c, 0x40, 0x9a, 0xe9, 0x33, 0xb0, 0x6a, 0x19, 0xc3, + 0xff, 0x25, 0x56, 0x8c, 0x2e, 0xf4, 0x87, 0x5d, 0x61, 0xbb, + 0xc8, 0x12, 0xd3, 0x09, 0x7a, 0xa0, 0x9c, 0x46, 0x35, 0xef, + 0x4d, 0x97, 0xe4, 0x3e, 0x02, 0xd8, 0xab, 0x71, 0xf2, 0x28, + 0x5b, 0x81, 0xbd, 0x67, 0x14, 0xce, 0x6c, 0xb6, 0xc5, 0x1f, + 0x23, 0xf9, 0x8a, 0x50, 0x00, 0xdb, 0xab, 0x70, 0x4b, 0x90, + 0xe0, 0x3b, 0x96, 0x4d, 0x3d, 0xe6, 0xdd, 0x06, 0x76, 0xad, + 0x31, 0xea, 0x9a, 0x41, 0x7a, 0xa1, 0xd1, 0x0a, 0xa7, 0x7c, + 0x0c, 0xd7, 0xec, 0x37, 0x47, 0x9c, 0x62, 0xb9, 0xc9, 0x12, + 0x29, 0xf2, 0x82, 0x59, 0xf4, 0x2f, 0x5f, 0x84, 0xbf, 0x64, + 0x14, 0xcf, 0x53, 0x88, 0xf8, 0x23, 0x18, 0xc3, 0xb3, 0x68, + 0xc5, 0x1e, 0x6e, 0xb5, 0x8e, 0x55, 0x25, 0xfe, 0xc4, 0x1f, + 0x6f, 0xb4, 0x8f, 0x54, 0x24, 0xff, 0x52, 0x89, 0xf9, 0x22, + 0x19, 0xc2, 0xb2, 0x69, 0xf5, 0x2e, 0x5e, 0x85, 0xbe, 0x65, + 0x15, 0xce, 0x63, 0xb8, 0xc8, 0x13, 0x28, 0xf3, 0x83, 0x58, + 0xa6, 0x7d, 0x0d, 0xd6, 0xed, 0x36, 0x46, 0x9d, 0x30, 0xeb, + 0x9b, 0x40, 0x7b, 0xa0, 0xd0, 0x0b, 0x97, 0x4c, 0x3c, 0xe7, + 0xdc, 0x07, 0x77, 0xac, 0x01, 0xda, 0xaa, 0x71, 0x4a, 0x91, + 0xe1, 0x3a, 0x95, 0x4e, 0x3e, 0xe5, 0xde, 0x05, 0x75, 0xae, + 0x03, 0xd8, 0xa8, 0x73, 0x48, 0x93, 0xe3, 0x38, 0xa4, 0x7f, + 0x0f, 0xd4, 0xef, 0x34, 0x44, 0x9f, 0x32, 0xe9, 0x99, 0x42, + 0x79, 0xa2, 0xd2, 0x09, 0xf7, 0x2c, 0x5c, 0x87, 0xbc, 0x67, + 0x17, 0xcc, 0x61, 0xba, 0xca, 0x11, 0x2a, 0xf1, 0x81, 0x5a, + 0xc6, 0x1d, 0x6d, 0xb6, 0x8d, 0x56, 0x26, 0xfd, 0x50, 0x8b, + 0xfb, 0x20, 0x1b, 0xc0, 0xb0, 0x6b, 0x51, 0x8a, 0xfa, 0x21, + 0x1a, 0xc1, 0xb1, 0x6a, 0xc7, 0x1c, 0x6c, 0xb7, 0x8c, 0x57, + 0x27, 0xfc, 0x60, 0xbb, 0xcb, 0x10, 0x2b, 0xf0, 0x80, 0x5b, + 0xf6, 0x2d, 0x5d, 0x86, 0xbd, 0x66, 0x16, 0xcd, 0x33, 0xe8, + 0x98, 0x43, 0x78, 0xa3, 0xd3, 0x08, 0xa5, 0x7e, 0x0e, 0xd5, + 0xee, 0x35, 0x45, 0x9e, 0x02, 0xd9, 0xa9, 0x72, 0x49, 0x92, + 0xe2, 0x39, 0x94, 0x4f, 0x3f, 0xe4, 0xdf, 0x04, 0x74, 0xaf, + 0x00, 0xdc, 0xa5, 0x79, 0x57, 0x8b, 0xf2, 0x2e, 0xae, 0x72, + 0x0b, 0xd7, 0xf9, 0x25, 0x5c, 0x80, 0x41, 0x9d, 0xe4, 0x38, + 0x16, 0xca, 0xb3, 0x6f, 0xef, 0x33, 0x4a, 0x96, 0xb8, 0x64, + 0x1d, 0xc1, 0x82, 0x5e, 0x27, 0xfb, 0xd5, 0x09, 0x70, 0xac, + 0x2c, 0xf0, 0x89, 0x55, 0x7b, 0xa7, 0xde, 0x02, 0xc3, 0x1f, + 0x66, 0xba, 0x94, 0x48, 0x31, 0xed, 0x6d, 0xb1, 0xc8, 0x14, + 0x3a, 0xe6, 0x9f, 0x43, 0x19, 0xc5, 0xbc, 0x60, 0x4e, 0x92, + 0xeb, 0x37, 0xb7, 0x6b, 0x12, 0xce, 0xe0, 0x3c, 0x45, 0x99, + 0x58, 0x84, 0xfd, 0x21, 0x0f, 0xd3, 0xaa, 0x76, 0xf6, 0x2a, + 0x53, 0x8f, 0xa1, 0x7d, 0x04, 0xd8, 0x9b, 0x47, 0x3e, 0xe2, + 0xcc, 0x10, 0x69, 0xb5, 0x35, 0xe9, 0x90, 0x4c, 0x62, 0xbe, + 0xc7, 0x1b, 0xda, 0x06, 0x7f, 0xa3, 0x8d, 0x51, 0x28, 0xf4, + 0x74, 0xa8, 0xd1, 0x0d, 0x23, 0xff, 0x86, 0x5a, 0x32, 0xee, + 0x97, 0x4b, 0x65, 0xb9, 0xc0, 0x1c, 0x9c, 0x40, 0x39, 0xe5, + 0xcb, 0x17, 0x6e, 0xb2, 0x73, 0xaf, 0xd6, 0x0a, 0x24, 0xf8, + 0x81, 0x5d, 0xdd, 0x01, 0x78, 0xa4, 0x8a, 0x56, 0x2f, 0xf3, + 0xb0, 0x6c, 0x15, 0xc9, 0xe7, 0x3b, 0x42, 0x9e, 0x1e, 0xc2, + 0xbb, 0x67, 0x49, 0x95, 0xec, 0x30, 0xf1, 0x2d, 0x54, 0x88, + 0xa6, 0x7a, 0x03, 0xdf, 0x5f, 0x83, 0xfa, 0x26, 0x08, 0xd4, + 0xad, 0x71, 0x2b, 0xf7, 0x8e, 0x52, 0x7c, 0xa0, 0xd9, 0x05, + 0x85, 0x59, 0x20, 0xfc, 0xd2, 0x0e, 0x77, 0xab, 0x6a, 0xb6, + 0xcf, 0x13, 0x3d, 0xe1, 0x98, 0x44, 0xc4, 0x18, 0x61, 0xbd, + 0x93, 0x4f, 0x36, 0xea, 0xa9, 0x75, 0x0c, 0xd0, 0xfe, 0x22, + 0x5b, 0x87, 0x07, 0xdb, 0xa2, 0x7e, 0x50, 0x8c, 0xf5, 0x29, + 0xe8, 0x34, 0x4d, 0x91, 0xbf, 0x63, 0x1a, 0xc6, 0x46, 0x9a, + 0xe3, 0x3f, 0x11, 0xcd, 0xb4, 0x68, 0x00, 0xdd, 0xa7, 0x7a, + 0x53, 0x8e, 0xf4, 0x29, 0xa6, 0x7b, 0x01, 0xdc, 0xf5, 0x28, + 0x52, 0x8f, 0x51, 0x8c, 0xf6, 0x2b, 0x02, 0xdf, 0xa5, 0x78, + 0xf7, 0x2a, 0x50, 0x8d, 0xa4, 0x79, 0x03, 0xde, 0xa2, 0x7f, + 0x05, 0xd8, 0xf1, 0x2c, 0x56, 0x8b, 0x04, 0xd9, 0xa3, 0x7e, + 0x57, 0x8a, 0xf0, 0x2d, 0xf3, 0x2e, 0x54, 0x89, 0xa0, 0x7d, + 0x07, 0xda, 0x55, 0x88, 0xf2, 0x2f, 0x06, 0xdb, 0xa1, 0x7c, + 0x59, 0x84, 0xfe, 0x23, 0x0a, 0xd7, 0xad, 0x70, 0xff, 0x22, + 0x58, 0x85, 0xac, 0x71, 0x0b, 0xd6, 0x08, 0xd5, 0xaf, 0x72, + 0x5b, 0x86, 0xfc, 0x21, 0xae, 0x73, 0x09, 0xd4, 0xfd, 0x20, + 0x5a, 0x87, 0xfb, 0x26, 0x5c, 0x81, 0xa8, 0x75, 0x0f, 0xd2, + 0x5d, 0x80, 0xfa, 0x27, 0x0e, 0xd3, 0xa9, 0x74, 0xaa, 0x77, + 0x0d, 0xd0, 0xf9, 0x24, 0x5e, 0x83, 0x0c, 0xd1, 0xab, 0x76, + 0x5f, 0x82, 0xf8, 0x25, 0xb2, 0x6f, 0x15, 0xc8, 0xe1, 0x3c, + 0x46, 0x9b, 0x14, 0xc9, 0xb3, 0x6e, 0x47, 0x9a, 0xe0, 0x3d, + 0xe3, 0x3e, 0x44, 0x99, 0xb0, 0x6d, 0x17, 0xca, 0x45, 0x98, + 0xe2, 0x3f, 0x16, 0xcb, 0xb1, 0x6c, 0x10, 0xcd, 0xb7, 0x6a, + 0x43, 0x9e, 0xe4, 0x39, 0xb6, 0x6b, 0x11, 0xcc, 0xe5, 0x38, + 0x42, 0x9f, 0x41, 0x9c, 0xe6, 0x3b, 0x12, 0xcf, 0xb5, 0x68, + 0xe7, 0x3a, 0x40, 0x9d, 0xb4, 0x69, 0x13, 0xce, 0xeb, 0x36, + 0x4c, 0x91, 0xb8, 0x65, 0x1f, 0xc2, 0x4d, 0x90, 0xea, 0x37, + 0x1e, 0xc3, 0xb9, 0x64, 0xba, 0x67, 0x1d, 0xc0, 0xe9, 0x34, + 0x4e, 0x93, 0x1c, 0xc1, 0xbb, 0x66, 0x4f, 0x92, 0xe8, 0x35, + 0x49, 0x94, 0xee, 0x33, 0x1a, 0xc7, 0xbd, 0x60, 0xef, 0x32, + 0x48, 0x95, 0xbc, 0x61, 0x1b, 0xc6, 0x18, 0xc5, 0xbf, 0x62, + 0x4b, 0x96, 0xec, 0x31, 0xbe, 0x63, 0x19, 0xc4, 0xed, 0x30, + 0x4a, 0x97, 0x00, 0xde, 0xa1, 0x7f, 0x5f, 0x81, 0xfe, 0x20, + 0xbe, 0x60, 0x1f, 0xc1, 0xe1, 0x3f, 0x40, 0x9e, 0x61, 0xbf, + 0xc0, 0x1e, 0x3e, 0xe0, 0x9f, 0x41, 0xdf, 0x01, 0x7e, 0xa0, + 0x80, 0x5e, 0x21, 0xff, 0xc2, 0x1c, 0x63, 0xbd, 0x9d, 0x43, + 0x3c, 0xe2, 0x7c, 0xa2, 0xdd, 0x03, 0x23, 0xfd, 0x82, 0x5c, + 0xa3, 0x7d, 0x02, 0xdc, 0xfc, 0x22, 0x5d, 0x83, 0x1d, 0xc3, + 0xbc, 0x62, 0x42, 0x9c, 0xe3, 0x3d, 0x99, 0x47, 0x38, 0xe6, + 0xc6, 0x18, 0x67, 0xb9, 0x27, 0xf9, 0x86, 0x58, 0x78, 0xa6, + 0xd9, 0x07, 0xf8, 0x26, 0x59, 0x87, 0xa7, 0x79, 0x06, 0xd8, + 0x46, 0x98, 0xe7, 0x39, 0x19, 0xc7, 0xb8, 0x66, 0x5b, 0x85, + 0xfa, 0x24, 0x04, 0xda, 0xa5, 0x7b, 0xe5, 0x3b, 0x44, 0x9a, + 0xba, 0x64, 0x1b, 0xc5, 0x3a, 0xe4, 0x9b, 0x45, 0x65, 0xbb, + 0xc4, 0x1a, 0x84, 0x5a, 0x25, 0xfb, 0xdb, 0x05, 0x7a, 0xa4, + 0x2f, 0xf1, 0x8e, 0x50, 0x70, 0xae, 0xd1, 0x0f, 0x91, 0x4f, + 0x30, 0xee, 0xce, 0x10, 0x6f, 0xb1, 0x4e, 0x90, 0xef, 0x31, + 0x11, 0xcf, 0xb0, 0x6e, 0xf0, 0x2e, 0x51, 0x8f, 0xaf, 0x71, + 0x0e, 0xd0, 0xed, 0x33, 0x4c, 0x92, 0xb2, 0x6c, 0x13, 0xcd, + 0x53, 0x8d, 0xf2, 0x2c, 0x0c, 0xd2, 0xad, 0x73, 0x8c, 0x52, + 0x2d, 0xf3, 0xd3, 0x0d, 0x72, 0xac, 0x32, 0xec, 0x93, 0x4d, + 0x6d, 0xb3, 0xcc, 0x12, 0xb6, 0x68, 0x17, 0xc9, 0xe9, 0x37, + 0x48, 0x96, 0x08, 0xd6, 0xa9, 0x77, 0x57, 0x89, 0xf6, 0x28, + 0xd7, 0x09, 0x76, 0xa8, 0x88, 0x56, 0x29, 0xf7, 0x69, 0xb7, + 0xc8, 0x16, 0x36, 0xe8, 0x97, 0x49, 0x74, 0xaa, 0xd5, 0x0b, + 0x2b, 0xf5, 0x8a, 0x54, 0xca, 0x14, 0x6b, 0xb5, 0x95, 0x4b, + 0x34, 0xea, 0x15, 0xcb, 0xb4, 0x6a, 0x4a, 0x94, 0xeb, 0x35, + 0xab, 0x75, 0x0a, 0xd4, 0xf4, 0x2a, 0x55, 0x8b, 0x00, 0xdf, + 0xa3, 0x7c, 0x5b, 0x84, 0xf8, 0x27, 0xb6, 0x69, 0x15, 0xca, + 0xed, 0x32, 0x4e, 0x91, 0x71, 0xae, 0xd2, 0x0d, 0x2a, 0xf5, + 0x89, 0x56, 0xc7, 0x18, 0x64, 0xbb, 0x9c, 0x43, 0x3f, 0xe0, + 0xe2, 0x3d, 0x41, 0x9e, 0xb9, 0x66, 0x1a, 0xc5, 0x54, 0x8b, + 0xf7, 0x28, 0x0f, 0xd0, 0xac, 0x73, 0x93, 0x4c, 0x30, 0xef, + 0xc8, 0x17, 0x6b, 0xb4, 0x25, 0xfa, 0x86, 0x59, 0x7e, 0xa1, + 0xdd, 0x02, 0xd9, 0x06, 0x7a, 0xa5, 0x82, 0x5d, 0x21, 0xfe, + 0x6f, 0xb0, 0xcc, 0x13, 0x34, 0xeb, 0x97, 0x48, 0xa8, 0x77, + 0x0b, 0xd4, 0xf3, 0x2c, 0x50, 0x8f, 0x1e, 0xc1, 0xbd, 0x62, + 0x45, 0x9a, 0xe6, 0x39, 0x3b, 0xe4, 0x98, 0x47, 0x60, 0xbf, + 0xc3, 0x1c, 0x8d, 0x52, 0x2e, 0xf1, 0xd6, 0x09, 0x75, 0xaa, + 0x4a, 0x95, 0xe9, 0x36, 0x11, 0xce, 0xb2, 0x6d, 0xfc, 0x23, + 0x5f, 0x80, 0xa7, 0x78, 0x04, 0xdb, 0xaf, 0x70, 0x0c, 0xd3, + 0xf4, 0x2b, 0x57, 0x88, 0x19, 0xc6, 0xba, 0x65, 0x42, 0x9d, + 0xe1, 0x3e, 0xde, 0x01, 0x7d, 0xa2, 0x85, 0x5a, 0x26, 0xf9, + 0x68, 0xb7, 0xcb, 0x14, 0x33, 0xec, 0x90, 0x4f, 0x4d, 0x92, + 0xee, 0x31, 0x16, 0xc9, 0xb5, 0x6a, 0xfb, 0x24, 0x58, 0x87, + 0xa0, 0x7f, 0x03, 0xdc, 0x3c, 0xe3, 0x9f, 0x40, 0x67, 0xb8, + 0xc4, 0x1b, 0x8a, 0x55, 0x29, 0xf6, 0xd1, 0x0e, 0x72, 0xad, + 0x76, 0xa9, 0xd5, 0x0a, 0x2d, 0xf2, 0x8e, 0x51, 0xc0, 0x1f, + 0x63, 0xbc, 0x9b, 0x44, 0x38, 0xe7, 0x07, 0xd8, 0xa4, 0x7b, + 0x5c, 0x83, 0xff, 0x20, 0xb1, 0x6e, 0x12, 0xcd, 0xea, 0x35, + 0x49, 0x96, 0x94, 0x4b, 0x37, 0xe8, 0xcf, 0x10, 0x6c, 0xb3, + 0x22, 0xfd, 0x81, 0x5e, 0x79, 0xa6, 0xda, 0x05, 0xe5, 0x3a, + 0x46, 0x99, 0xbe, 0x61, 0x1d, 0xc2, 0x53, 0x8c, 0xf0, 0x2f, + 0x08, 0xd7, 0xab, 0x74, 0x00, 0xe0, 0xdd, 0x3d, 0xa7, 0x47, + 0x7a, 0x9a, 0x53, 0xb3, 0x8e, 0x6e, 0xf4, 0x14, 0x29, 0xc9, + 0xa6, 0x46, 0x7b, 0x9b, 0x01, 0xe1, 0xdc, 0x3c, 0xf5, 0x15, + 0x28, 0xc8, 0x52, 0xb2, 0x8f, 0x6f, 0x51, 0xb1, 0x8c, 0x6c, + 0xf6, 0x16, 0x2b, 0xcb, 0x02, 0xe2, 0xdf, 0x3f, 0xa5, 0x45, + 0x78, 0x98, 0xf7, 0x17, 0x2a, 0xca, 0x50, 0xb0, 0x8d, 0x6d, + 0xa4, 0x44, 0x79, 0x99, 0x03, 0xe3, 0xde, 0x3e, 0xa2, 0x42, + 0x7f, 0x9f, 0x05, 0xe5, 0xd8, 0x38, 0xf1, 0x11, 0x2c, 0xcc, + 0x56, 0xb6, 0x8b, 0x6b, 0x04, 0xe4, 0xd9, 0x39, 0xa3, 0x43, + 0x7e, 0x9e, 0x57, 0xb7, 0x8a, 0x6a, 0xf0, 0x10, 0x2d, 0xcd, + 0xf3, 0x13, 0x2e, 0xce, 0x54, 0xb4, 0x89, 0x69, 0xa0, 0x40, + 0x7d, 0x9d, 0x07, 0xe7, 0xda, 0x3a, 0x55, 0xb5, 0x88, 0x68, + 0xf2, 0x12, 0x2f, 0xcf, 0x06, 0xe6, 0xdb, 0x3b, 0xa1, 0x41, + 0x7c, 0x9c, 0x59, 0xb9, 0x84, 0x64, 0xfe, 0x1e, 0x23, 0xc3, + 0x0a, 0xea, 0xd7, 0x37, 0xad, 0x4d, 0x70, 0x90, 0xff, 0x1f, + 0x22, 0xc2, 0x58, 0xb8, 0x85, 0x65, 0xac, 0x4c, 0x71, 0x91, + 0x0b, 0xeb, 0xd6, 0x36, 0x08, 0xe8, 0xd5, 0x35, 0xaf, 0x4f, + 0x72, 0x92, 0x5b, 0xbb, 0x86, 0x66, 0xfc, 0x1c, 0x21, 0xc1, + 0xae, 0x4e, 0x73, 0x93, 0x09, 0xe9, 0xd4, 0x34, 0xfd, 0x1d, + 0x20, 0xc0, 0x5a, 0xba, 0x87, 0x67, 0xfb, 0x1b, 0x26, 0xc6, + 0x5c, 0xbc, 0x81, 0x61, 0xa8, 0x48, 0x75, 0x95, 0x0f, 0xef, + 0xd2, 0x32, 0x5d, 0xbd, 0x80, 0x60, 0xfa, 0x1a, 0x27, 0xc7, + 0x0e, 0xee, 0xd3, 0x33, 0xa9, 0x49, 0x74, 0x94, 0xaa, 0x4a, + 0x77, 0x97, 0x0d, 0xed, 0xd0, 0x30, 0xf9, 0x19, 0x24, 0xc4, + 0x5e, 0xbe, 0x83, 0x63, 0x0c, 0xec, 0xd1, 0x31, 0xab, 0x4b, + 0x76, 0x96, 0x5f, 0xbf, 0x82, 0x62, 0xf8, 0x18, 0x25, 0xc5, + 0x00, 0xe1, 0xdf, 0x3e, 0xa3, 0x42, 0x7c, 0x9d, 0x5b, 0xba, + 0x84, 0x65, 0xf8, 0x19, 0x27, 0xc6, 0xb6, 0x57, 0x69, 0x88, + 0x15, 0xf4, 0xca, 0x2b, 0xed, 0x0c, 0x32, 0xd3, 0x4e, 0xaf, + 0x91, 0x70, 0x71, 0x90, 0xae, 0x4f, 0xd2, 0x33, 0x0d, 0xec, + 0x2a, 0xcb, 0xf5, 0x14, 0x89, 0x68, 0x56, 0xb7, 0xc7, 0x26, + 0x18, 0xf9, 0x64, 0x85, 0xbb, 0x5a, 0x9c, 0x7d, 0x43, 0xa2, + 0x3f, 0xde, 0xe0, 0x01, 0xe2, 0x03, 0x3d, 0xdc, 0x41, 0xa0, + 0x9e, 0x7f, 0xb9, 0x58, 0x66, 0x87, 0x1a, 0xfb, 0xc5, 0x24, + 0x54, 0xb5, 0x8b, 0x6a, 0xf7, 0x16, 0x28, 0xc9, 0x0f, 0xee, + 0xd0, 0x31, 0xac, 0x4d, 0x73, 0x92, 0x93, 0x72, 0x4c, 0xad, + 0x30, 0xd1, 0xef, 0x0e, 0xc8, 0x29, 0x17, 0xf6, 0x6b, 0x8a, + 0xb4, 0x55, 0x25, 0xc4, 0xfa, 0x1b, 0x86, 0x67, 0x59, 0xb8, + 0x7e, 0x9f, 0xa1, 0x40, 0xdd, 0x3c, 0x02, 0xe3, 0xd9, 0x38, + 0x06, 0xe7, 0x7a, 0x9b, 0xa5, 0x44, 0x82, 0x63, 0x5d, 0xbc, + 0x21, 0xc0, 0xfe, 0x1f, 0x6f, 0x8e, 0xb0, 0x51, 0xcc, 0x2d, + 0x13, 0xf2, 0x34, 0xd5, 0xeb, 0x0a, 0x97, 0x76, 0x48, 0xa9, + 0xa8, 0x49, 0x77, 0x96, 0x0b, 0xea, 0xd4, 0x35, 0xf3, 0x12, + 0x2c, 0xcd, 0x50, 0xb1, 0x8f, 0x6e, 0x1e, 0xff, 0xc1, 0x20, + 0xbd, 0x5c, 0x62, 0x83, 0x45, 0xa4, 0x9a, 0x7b, 0xe6, 0x07, + 0x39, 0xd8, 0x3b, 0xda, 0xe4, 0x05, 0x98, 0x79, 0x47, 0xa6, + 0x60, 0x81, 0xbf, 0x5e, 0xc3, 0x22, 0x1c, 0xfd, 0x8d, 0x6c, + 0x52, 0xb3, 0x2e, 0xcf, 0xf1, 0x10, 0xd6, 0x37, 0x09, 0xe8, + 0x75, 0x94, 0xaa, 0x4b, 0x4a, 0xab, 0x95, 0x74, 0xe9, 0x08, + 0x36, 0xd7, 0x11, 0xf0, 0xce, 0x2f, 0xb2, 0x53, 0x6d, 0x8c, + 0xfc, 0x1d, 0x23, 0xc2, 0x5f, 0xbe, 0x80, 0x61, 0xa7, 0x46, + 0x78, 0x99, 0x04, 0xe5, 0xdb, 0x3a, 0x00, 0xe2, 0xd9, 0x3b, + 0xaf, 0x4d, 0x76, 0x94, 0x43, 0xa1, 0x9a, 0x78, 0xec, 0x0e, + 0x35, 0xd7, 0x86, 0x64, 0x5f, 0xbd, 0x29, 0xcb, 0xf0, 0x12, + 0xc5, 0x27, 0x1c, 0xfe, 0x6a, 0x88, 0xb3, 0x51, 0x11, 0xf3, + 0xc8, 0x2a, 0xbe, 0x5c, 0x67, 0x85, 0x52, 0xb0, 0x8b, 0x69, + 0xfd, 0x1f, 0x24, 0xc6, 0x97, 0x75, 0x4e, 0xac, 0x38, 0xda, + 0xe1, 0x03, 0xd4, 0x36, 0x0d, 0xef, 0x7b, 0x99, 0xa2, 0x40, + 0x22, 0xc0, 0xfb, 0x19, 0x8d, 0x6f, 0x54, 0xb6, 0x61, 0x83, + 0xb8, 0x5a, 0xce, 0x2c, 0x17, 0xf5, 0xa4, 0x46, 0x7d, 0x9f, + 0x0b, 0xe9, 0xd2, 0x30, 0xe7, 0x05, 0x3e, 0xdc, 0x48, 0xaa, + 0x91, 0x73, 0x33, 0xd1, 0xea, 0x08, 0x9c, 0x7e, 0x45, 0xa7, + 0x70, 0x92, 0xa9, 0x4b, 0xdf, 0x3d, 0x06, 0xe4, 0xb5, 0x57, + 0x6c, 0x8e, 0x1a, 0xf8, 0xc3, 0x21, 0xf6, 0x14, 0x2f, 0xcd, + 0x59, 0xbb, 0x80, 0x62, 0x44, 0xa6, 0x9d, 0x7f, 0xeb, 0x09, + 0x32, 0xd0, 0x07, 0xe5, 0xde, 0x3c, 0xa8, 0x4a, 0x71, 0x93, + 0xc2, 0x20, 0x1b, 0xf9, 0x6d, 0x8f, 0xb4, 0x56, 0x81, 0x63, + 0x58, 0xba, 0x2e, 0xcc, 0xf7, 0x15, 0x55, 0xb7, 0x8c, 0x6e, + 0xfa, 0x18, 0x23, 0xc1, 0x16, 0xf4, 0xcf, 0x2d, 0xb9, 0x5b, + 0x60, 0x82, 0xd3, 0x31, 0x0a, 0xe8, 0x7c, 0x9e, 0xa5, 0x47, + 0x90, 0x72, 0x49, 0xab, 0x3f, 0xdd, 0xe6, 0x04, 0x66, 0x84, + 0xbf, 0x5d, 0xc9, 0x2b, 0x10, 0xf2, 0x25, 0xc7, 0xfc, 0x1e, + 0x8a, 0x68, 0x53, 0xb1, 0xe0, 0x02, 0x39, 0xdb, 0x4f, 0xad, + 0x96, 0x74, 0xa3, 0x41, 0x7a, 0x98, 0x0c, 0xee, 0xd5, 0x37, + 0x77, 0x95, 0xae, 0x4c, 0xd8, 0x3a, 0x01, 0xe3, 0x34, 0xd6, + 0xed, 0x0f, 0x9b, 0x79, 0x42, 0xa0, 0xf1, 0x13, 0x28, 0xca, + 0x5e, 0xbc, 0x87, 0x65, 0xb2, 0x50, 0x6b, 0x89, 0x1d, 0xff, + 0xc4, 0x26, 0x00, 0xe3, 0xdb, 0x38, 0xab, 0x48, 0x70, 0x93, + 0x4b, 0xa8, 0x90, 0x73, 0xe0, 0x03, 0x3b, 0xd8, 0x96, 0x75, + 0x4d, 0xae, 0x3d, 0xde, 0xe6, 0x05, 0xdd, 0x3e, 0x06, 0xe5, + 0x76, 0x95, 0xad, 0x4e, 0x31, 0xd2, 0xea, 0x09, 0x9a, 0x79, + 0x41, 0xa2, 0x7a, 0x99, 0xa1, 0x42, 0xd1, 0x32, 0x0a, 0xe9, + 0xa7, 0x44, 0x7c, 0x9f, 0x0c, 0xef, 0xd7, 0x34, 0xec, 0x0f, + 0x37, 0xd4, 0x47, 0xa4, 0x9c, 0x7f, 0x62, 0x81, 0xb9, 0x5a, + 0xc9, 0x2a, 0x12, 0xf1, 0x29, 0xca, 0xf2, 0x11, 0x82, 0x61, + 0x59, 0xba, 0xf4, 0x17, 0x2f, 0xcc, 0x5f, 0xbc, 0x84, 0x67, + 0xbf, 0x5c, 0x64, 0x87, 0x14, 0xf7, 0xcf, 0x2c, 0x53, 0xb0, + 0x88, 0x6b, 0xf8, 0x1b, 0x23, 0xc0, 0x18, 0xfb, 0xc3, 0x20, + 0xb3, 0x50, 0x68, 0x8b, 0xc5, 0x26, 0x1e, 0xfd, 0x6e, 0x8d, + 0xb5, 0x56, 0x8e, 0x6d, 0x55, 0xb6, 0x25, 0xc6, 0xfe, 0x1d, + 0xc4, 0x27, 0x1f, 0xfc, 0x6f, 0x8c, 0xb4, 0x57, 0x8f, 0x6c, + 0x54, 0xb7, 0x24, 0xc7, 0xff, 0x1c, 0x52, 0xb1, 0x89, 0x6a, + 0xf9, 0x1a, 0x22, 0xc1, 0x19, 0xfa, 0xc2, 0x21, 0xb2, 0x51, + 0x69, 0x8a, 0xf5, 0x16, 0x2e, 0xcd, 0x5e, 0xbd, 0x85, 0x66, + 0xbe, 0x5d, 0x65, 0x86, 0x15, 0xf6, 0xce, 0x2d, 0x63, 0x80, + 0xb8, 0x5b, 0xc8, 0x2b, 0x13, 0xf0, 0x28, 0xcb, 0xf3, 0x10, + 0x83, 0x60, 0x58, 0xbb, 0xa6, 0x45, 0x7d, 0x9e, 0x0d, 0xee, + 0xd6, 0x35, 0xed, 0x0e, 0x36, 0xd5, 0x46, 0xa5, 0x9d, 0x7e, + 0x30, 0xd3, 0xeb, 0x08, 0x9b, 0x78, 0x40, 0xa3, 0x7b, 0x98, + 0xa0, 0x43, 0xd0, 0x33, 0x0b, 0xe8, 0x97, 0x74, 0x4c, 0xaf, + 0x3c, 0xdf, 0xe7, 0x04, 0xdc, 0x3f, 0x07, 0xe4, 0x77, 0x94, + 0xac, 0x4f, 0x01, 0xe2, 0xda, 0x39, 0xaa, 0x49, 0x71, 0x92, + 0x4a, 0xa9, 0x91, 0x72, 0xe1, 0x02, 0x3a, 0xd9, 0x00, 0xe4, + 0xd5, 0x31, 0xb7, 0x53, 0x62, 0x86, 0x73, 0x97, 0xa6, 0x42, + 0xc4, 0x20, 0x11, 0xf5, 0xe6, 0x02, 0x33, 0xd7, 0x51, 0xb5, + 0x84, 0x60, 0x95, 0x71, 0x40, 0xa4, 0x22, 0xc6, 0xf7, 0x13, + 0xd1, 0x35, 0x04, 0xe0, 0x66, 0x82, 0xb3, 0x57, 0xa2, 0x46, + 0x77, 0x93, 0x15, 0xf1, 0xc0, 0x24, 0x37, 0xd3, 0xe2, 0x06, + 0x80, 0x64, 0x55, 0xb1, 0x44, 0xa0, 0x91, 0x75, 0xf3, 0x17, + 0x26, 0xc2, 0xbf, 0x5b, 0x6a, 0x8e, 0x08, 0xec, 0xdd, 0x39, + 0xcc, 0x28, 0x19, 0xfd, 0x7b, 0x9f, 0xae, 0x4a, 0x59, 0xbd, + 0x8c, 0x68, 0xee, 0x0a, 0x3b, 0xdf, 0x2a, 0xce, 0xff, 0x1b, + 0x9d, 0x79, 0x48, 0xac, 0x6e, 0x8a, 0xbb, 0x5f, 0xd9, 0x3d, + 0x0c, 0xe8, 0x1d, 0xf9, 0xc8, 0x2c, 0xaa, 0x4e, 0x7f, 0x9b, + 0x88, 0x6c, 0x5d, 0xb9, 0x3f, 0xdb, 0xea, 0x0e, 0xfb, 0x1f, + 0x2e, 0xca, 0x4c, 0xa8, 0x99, 0x7d, 0x63, 0x87, 0xb6, 0x52, + 0xd4, 0x30, 0x01, 0xe5, 0x10, 0xf4, 0xc5, 0x21, 0xa7, 0x43, + 0x72, 0x96, 0x85, 0x61, 0x50, 0xb4, 0x32, 0xd6, 0xe7, 0x03, + 0xf6, 0x12, 0x23, 0xc7, 0x41, 0xa5, 0x94, 0x70, 0xb2, 0x56, + 0x67, 0x83, 0x05, 0xe1, 0xd0, 0x34, 0xc1, 0x25, 0x14, 0xf0, + 0x76, 0x92, 0xa3, 0x47, 0x54, 0xb0, 0x81, 0x65, 0xe3, 0x07, + 0x36, 0xd2, 0x27, 0xc3, 0xf2, 0x16, 0x90, 0x74, 0x45, 0xa1, + 0xdc, 0x38, 0x09, 0xed, 0x6b, 0x8f, 0xbe, 0x5a, 0xaf, 0x4b, + 0x7a, 0x9e, 0x18, 0xfc, 0xcd, 0x29, 0x3a, 0xde, 0xef, 0x0b, + 0x8d, 0x69, 0x58, 0xbc, 0x49, 0xad, 0x9c, 0x78, 0xfe, 0x1a, + 0x2b, 0xcf, 0x0d, 0xe9, 0xd8, 0x3c, 0xba, 0x5e, 0x6f, 0x8b, + 0x7e, 0x9a, 0xab, 0x4f, 0xc9, 0x2d, 0x1c, 0xf8, 0xeb, 0x0f, + 0x3e, 0xda, 0x5c, 0xb8, 0x89, 0x6d, 0x98, 0x7c, 0x4d, 0xa9, + 0x2f, 0xcb, 0xfa, 0x1e, 0x00, 0xe5, 0xd7, 0x32, 0xb3, 0x56, + 0x64, 0x81, 0x7b, 0x9e, 0xac, 0x49, 0xc8, 0x2d, 0x1f, 0xfa, + 0xf6, 0x13, 0x21, 0xc4, 0x45, 0xa0, 0x92, 0x77, 0x8d, 0x68, + 0x5a, 0xbf, 0x3e, 0xdb, 0xe9, 0x0c, 0xf1, 0x14, 0x26, 0xc3, + 0x42, 0xa7, 0x95, 0x70, 0x8a, 0x6f, 0x5d, 0xb8, 0x39, 0xdc, + 0xee, 0x0b, 0x07, 0xe2, 0xd0, 0x35, 0xb4, 0x51, 0x63, 0x86, + 0x7c, 0x99, 0xab, 0x4e, 0xcf, 0x2a, 0x18, 0xfd, 0xff, 0x1a, + 0x28, 0xcd, 0x4c, 0xa9, 0x9b, 0x7e, 0x84, 0x61, 0x53, 0xb6, + 0x37, 0xd2, 0xe0, 0x05, 0x09, 0xec, 0xde, 0x3b, 0xba, 0x5f, + 0x6d, 0x88, 0x72, 0x97, 0xa5, 0x40, 0xc1, 0x24, 0x16, 0xf3, + 0x0e, 0xeb, 0xd9, 0x3c, 0xbd, 0x58, 0x6a, 0x8f, 0x75, 0x90, + 0xa2, 0x47, 0xc6, 0x23, 0x11, 0xf4, 0xf8, 0x1d, 0x2f, 0xca, + 0x4b, 0xae, 0x9c, 0x79, 0x83, 0x66, 0x54, 0xb1, 0x30, 0xd5, + 0xe7, 0x02, 0xe3, 0x06, 0x34, 0xd1, 0x50, 0xb5, 0x87, 0x62, + 0x98, 0x7d, 0x4f, 0xaa, 0x2b, 0xce, 0xfc, 0x19, 0x15, 0xf0, + 0xc2, 0x27, 0xa6, 0x43, 0x71, 0x94, 0x6e, 0x8b, 0xb9, 0x5c, + 0xdd, 0x38, 0x0a, 0xef, 0x12, 0xf7, 0xc5, 0x20, 0xa1, 0x44, + 0x76, 0x93, 0x69, 0x8c, 0xbe, 0x5b, 0xda, 0x3f, 0x0d, 0xe8, + 0xe4, 0x01, 0x33, 0xd6, 0x57, 0xb2, 0x80, 0x65, 0x9f, 0x7a, + 0x48, 0xad, 0x2c, 0xc9, 0xfb, 0x1e, 0x1c, 0xf9, 0xcb, 0x2e, + 0xaf, 0x4a, 0x78, 0x9d, 0x67, 0x82, 0xb0, 0x55, 0xd4, 0x31, + 0x03, 0xe6, 0xea, 0x0f, 0x3d, 0xd8, 0x59, 0xbc, 0x8e, 0x6b, + 0x91, 0x74, 0x46, 0xa3, 0x22, 0xc7, 0xf5, 0x10, 0xed, 0x08, + 0x3a, 0xdf, 0x5e, 0xbb, 0x89, 0x6c, 0x96, 0x73, 0x41, 0xa4, + 0x25, 0xc0, 0xf2, 0x17, 0x1b, 0xfe, 0xcc, 0x29, 0xa8, 0x4d, + 0x7f, 0x9a, 0x60, 0x85, 0xb7, 0x52, 0xd3, 0x36, 0x04, 0xe1, + 0x00, 0xe6, 0xd1, 0x37, 0xbf, 0x59, 0x6e, 0x88, 0x63, 0x85, + 0xb2, 0x54, 0xdc, 0x3a, 0x0d, 0xeb, 0xc6, 0x20, 0x17, 0xf1, + 0x79, 0x9f, 0xa8, 0x4e, 0xa5, 0x43, 0x74, 0x92, 0x1a, 0xfc, + 0xcb, 0x2d, 0x91, 0x77, 0x40, 0xa6, 0x2e, 0xc8, 0xff, 0x19, + 0xf2, 0x14, 0x23, 0xc5, 0x4d, 0xab, 0x9c, 0x7a, 0x57, 0xb1, + 0x86, 0x60, 0xe8, 0x0e, 0x39, 0xdf, 0x34, 0xd2, 0xe5, 0x03, + 0x8b, 0x6d, 0x5a, 0xbc, 0x3f, 0xd9, 0xee, 0x08, 0x80, 0x66, + 0x51, 0xb7, 0x5c, 0xba, 0x8d, 0x6b, 0xe3, 0x05, 0x32, 0xd4, + 0xf9, 0x1f, 0x28, 0xce, 0x46, 0xa0, 0x97, 0x71, 0x9a, 0x7c, + 0x4b, 0xad, 0x25, 0xc3, 0xf4, 0x12, 0xae, 0x48, 0x7f, 0x99, + 0x11, 0xf7, 0xc0, 0x26, 0xcd, 0x2b, 0x1c, 0xfa, 0x72, 0x94, + 0xa3, 0x45, 0x68, 0x8e, 0xb9, 0x5f, 0xd7, 0x31, 0x06, 0xe0, + 0x0b, 0xed, 0xda, 0x3c, 0xb4, 0x52, 0x65, 0x83, 0x7e, 0x98, + 0xaf, 0x49, 0xc1, 0x27, 0x10, 0xf6, 0x1d, 0xfb, 0xcc, 0x2a, + 0xa2, 0x44, 0x73, 0x95, 0xb8, 0x5e, 0x69, 0x8f, 0x07, 0xe1, + 0xd6, 0x30, 0xdb, 0x3d, 0x0a, 0xec, 0x64, 0x82, 0xb5, 0x53, + 0xef, 0x09, 0x3e, 0xd8, 0x50, 0xb6, 0x81, 0x67, 0x8c, 0x6a, + 0x5d, 0xbb, 0x33, 0xd5, 0xe2, 0x04, 0x29, 0xcf, 0xf8, 0x1e, + 0x96, 0x70, 0x47, 0xa1, 0x4a, 0xac, 0x9b, 0x7d, 0xf5, 0x13, + 0x24, 0xc2, 0x41, 0xa7, 0x90, 0x76, 0xfe, 0x18, 0x2f, 0xc9, + 0x22, 0xc4, 0xf3, 0x15, 0x9d, 0x7b, 0x4c, 0xaa, 0x87, 0x61, + 0x56, 0xb0, 0x38, 0xde, 0xe9, 0x0f, 0xe4, 0x02, 0x35, 0xd3, + 0x5b, 0xbd, 0x8a, 0x6c, 0xd0, 0x36, 0x01, 0xe7, 0x6f, 0x89, + 0xbe, 0x58, 0xb3, 0x55, 0x62, 0x84, 0x0c, 0xea, 0xdd, 0x3b, + 0x16, 0xf0, 0xc7, 0x21, 0xa9, 0x4f, 0x78, 0x9e, 0x75, 0x93, + 0xa4, 0x42, 0xca, 0x2c, 0x1b, 0xfd, 0x00, 0xe7, 0xd3, 0x34, + 0xbb, 0x5c, 0x68, 0x8f, 0x6b, 0x8c, 0xb8, 0x5f, 0xd0, 0x37, + 0x03, 0xe4, 0xd6, 0x31, 0x05, 0xe2, 0x6d, 0x8a, 0xbe, 0x59, + 0xbd, 0x5a, 0x6e, 0x89, 0x06, 0xe1, 0xd5, 0x32, 0xb1, 0x56, + 0x62, 0x85, 0x0a, 0xed, 0xd9, 0x3e, 0xda, 0x3d, 0x09, 0xee, + 0x61, 0x86, 0xb2, 0x55, 0x67, 0x80, 0xb4, 0x53, 0xdc, 0x3b, + 0x0f, 0xe8, 0x0c, 0xeb, 0xdf, 0x38, 0xb7, 0x50, 0x64, 0x83, + 0x7f, 0x98, 0xac, 0x4b, 0xc4, 0x23, 0x17, 0xf0, 0x14, 0xf3, + 0xc7, 0x20, 0xaf, 0x48, 0x7c, 0x9b, 0xa9, 0x4e, 0x7a, 0x9d, + 0x12, 0xf5, 0xc1, 0x26, 0xc2, 0x25, 0x11, 0xf6, 0x79, 0x9e, + 0xaa, 0x4d, 0xce, 0x29, 0x1d, 0xfa, 0x75, 0x92, 0xa6, 0x41, + 0xa5, 0x42, 0x76, 0x91, 0x1e, 0xf9, 0xcd, 0x2a, 0x18, 0xff, + 0xcb, 0x2c, 0xa3, 0x44, 0x70, 0x97, 0x73, 0x94, 0xa0, 0x47, + 0xc8, 0x2f, 0x1b, 0xfc, 0xfe, 0x19, 0x2d, 0xca, 0x45, 0xa2, + 0x96, 0x71, 0x95, 0x72, 0x46, 0xa1, 0x2e, 0xc9, 0xfd, 0x1a, + 0x28, 0xcf, 0xfb, 0x1c, 0x93, 0x74, 0x40, 0xa7, 0x43, 0xa4, + 0x90, 0x77, 0xf8, 0x1f, 0x2b, 0xcc, 0x4f, 0xa8, 0x9c, 0x7b, + 0xf4, 0x13, 0x27, 0xc0, 0x24, 0xc3, 0xf7, 0x10, 0x9f, 0x78, + 0x4c, 0xab, 0x99, 0x7e, 0x4a, 0xad, 0x22, 0xc5, 0xf1, 0x16, + 0xf2, 0x15, 0x21, 0xc6, 0x49, 0xae, 0x9a, 0x7d, 0x81, 0x66, + 0x52, 0xb5, 0x3a, 0xdd, 0xe9, 0x0e, 0xea, 0x0d, 0x39, 0xde, + 0x51, 0xb6, 0x82, 0x65, 0x57, 0xb0, 0x84, 0x63, 0xec, 0x0b, + 0x3f, 0xd8, 0x3c, 0xdb, 0xef, 0x08, 0x87, 0x60, 0x54, 0xb3, + 0x30, 0xd7, 0xe3, 0x04, 0x8b, 0x6c, 0x58, 0xbf, 0x5b, 0xbc, + 0x88, 0x6f, 0xe0, 0x07, 0x33, 0xd4, 0xe6, 0x01, 0x35, 0xd2, + 0x5d, 0xba, 0x8e, 0x69, 0x8d, 0x6a, 0x5e, 0xb9, 0x36, 0xd1, + 0xe5, 0x02, 0x00, 0xe8, 0xcd, 0x25, 0x87, 0x6f, 0x4a, 0xa2, + 0x13, 0xfb, 0xde, 0x36, 0x94, 0x7c, 0x59, 0xb1, 0x26, 0xce, + 0xeb, 0x03, 0xa1, 0x49, 0x6c, 0x84, 0x35, 0xdd, 0xf8, 0x10, + 0xb2, 0x5a, 0x7f, 0x97, 0x4c, 0xa4, 0x81, 0x69, 0xcb, 0x23, + 0x06, 0xee, 0x5f, 0xb7, 0x92, 0x7a, 0xd8, 0x30, 0x15, 0xfd, + 0x6a, 0x82, 0xa7, 0x4f, 0xed, 0x05, 0x20, 0xc8, 0x79, 0x91, + 0xb4, 0x5c, 0xfe, 0x16, 0x33, 0xdb, 0x98, 0x70, 0x55, 0xbd, + 0x1f, 0xf7, 0xd2, 0x3a, 0x8b, 0x63, 0x46, 0xae, 0x0c, 0xe4, + 0xc1, 0x29, 0xbe, 0x56, 0x73, 0x9b, 0x39, 0xd1, 0xf4, 0x1c, + 0xad, 0x45, 0x60, 0x88, 0x2a, 0xc2, 0xe7, 0x0f, 0xd4, 0x3c, + 0x19, 0xf1, 0x53, 0xbb, 0x9e, 0x76, 0xc7, 0x2f, 0x0a, 0xe2, + 0x40, 0xa8, 0x8d, 0x65, 0xf2, 0x1a, 0x3f, 0xd7, 0x75, 0x9d, + 0xb8, 0x50, 0xe1, 0x09, 0x2c, 0xc4, 0x66, 0x8e, 0xab, 0x43, + 0x2d, 0xc5, 0xe0, 0x08, 0xaa, 0x42, 0x67, 0x8f, 0x3e, 0xd6, + 0xf3, 0x1b, 0xb9, 0x51, 0x74, 0x9c, 0x0b, 0xe3, 0xc6, 0x2e, + 0x8c, 0x64, 0x41, 0xa9, 0x18, 0xf0, 0xd5, 0x3d, 0x9f, 0x77, + 0x52, 0xba, 0x61, 0x89, 0xac, 0x44, 0xe6, 0x0e, 0x2b, 0xc3, + 0x72, 0x9a, 0xbf, 0x57, 0xf5, 0x1d, 0x38, 0xd0, 0x47, 0xaf, + 0x8a, 0x62, 0xc0, 0x28, 0x0d, 0xe5, 0x54, 0xbc, 0x99, 0x71, + 0xd3, 0x3b, 0x1e, 0xf6, 0xb5, 0x5d, 0x78, 0x90, 0x32, 0xda, + 0xff, 0x17, 0xa6, 0x4e, 0x6b, 0x83, 0x21, 0xc9, 0xec, 0x04, + 0x93, 0x7b, 0x5e, 0xb6, 0x14, 0xfc, 0xd9, 0x31, 0x80, 0x68, + 0x4d, 0xa5, 0x07, 0xef, 0xca, 0x22, 0xf9, 0x11, 0x34, 0xdc, + 0x7e, 0x96, 0xb3, 0x5b, 0xea, 0x02, 0x27, 0xcf, 0x6d, 0x85, + 0xa0, 0x48, 0xdf, 0x37, 0x12, 0xfa, 0x58, 0xb0, 0x95, 0x7d, + 0xcc, 0x24, 0x01, 0xe9, 0x4b, 0xa3, 0x86, 0x6e, 0x00, 0xe9, + 0xcf, 0x26, 0x83, 0x6a, 0x4c, 0xa5, 0x1b, 0xf2, 0xd4, 0x3d, + 0x98, 0x71, 0x57, 0xbe, 0x36, 0xdf, 0xf9, 0x10, 0xb5, 0x5c, + 0x7a, 0x93, 0x2d, 0xc4, 0xe2, 0x0b, 0xae, 0x47, 0x61, 0x88, + 0x6c, 0x85, 0xa3, 0x4a, 0xef, 0x06, 0x20, 0xc9, 0x77, 0x9e, + 0xb8, 0x51, 0xf4, 0x1d, 0x3b, 0xd2, 0x5a, 0xb3, 0x95, 0x7c, + 0xd9, 0x30, 0x16, 0xff, 0x41, 0xa8, 0x8e, 0x67, 0xc2, 0x2b, + 0x0d, 0xe4, 0xd8, 0x31, 0x17, 0xfe, 0x5b, 0xb2, 0x94, 0x7d, + 0xc3, 0x2a, 0x0c, 0xe5, 0x40, 0xa9, 0x8f, 0x66, 0xee, 0x07, + 0x21, 0xc8, 0x6d, 0x84, 0xa2, 0x4b, 0xf5, 0x1c, 0x3a, 0xd3, + 0x76, 0x9f, 0xb9, 0x50, 0xb4, 0x5d, 0x7b, 0x92, 0x37, 0xde, + 0xf8, 0x11, 0xaf, 0x46, 0x60, 0x89, 0x2c, 0xc5, 0xe3, 0x0a, + 0x82, 0x6b, 0x4d, 0xa4, 0x01, 0xe8, 0xce, 0x27, 0x99, 0x70, + 0x56, 0xbf, 0x1a, 0xf3, 0xd5, 0x3c, 0xad, 0x44, 0x62, 0x8b, + 0x2e, 0xc7, 0xe1, 0x08, 0xb6, 0x5f, 0x79, 0x90, 0x35, 0xdc, + 0xfa, 0x13, 0x9b, 0x72, 0x54, 0xbd, 0x18, 0xf1, 0xd7, 0x3e, + 0x80, 0x69, 0x4f, 0xa6, 0x03, 0xea, 0xcc, 0x25, 0xc1, 0x28, + 0x0e, 0xe7, 0x42, 0xab, 0x8d, 0x64, 0xda, 0x33, 0x15, 0xfc, + 0x59, 0xb0, 0x96, 0x7f, 0xf7, 0x1e, 0x38, 0xd1, 0x74, 0x9d, + 0xbb, 0x52, 0xec, 0x05, 0x23, 0xca, 0x6f, 0x86, 0xa0, 0x49, + 0x75, 0x9c, 0xba, 0x53, 0xf6, 0x1f, 0x39, 0xd0, 0x6e, 0x87, + 0xa1, 0x48, 0xed, 0x04, 0x22, 0xcb, 0x43, 0xaa, 0x8c, 0x65, + 0xc0, 0x29, 0x0f, 0xe6, 0x58, 0xb1, 0x97, 0x7e, 0xdb, 0x32, + 0x14, 0xfd, 0x19, 0xf0, 0xd6, 0x3f, 0x9a, 0x73, 0x55, 0xbc, + 0x02, 0xeb, 0xcd, 0x24, 0x81, 0x68, 0x4e, 0xa7, 0x2f, 0xc6, + 0xe0, 0x09, 0xac, 0x45, 0x63, 0x8a, 0x34, 0xdd, 0xfb, 0x12, + 0xb7, 0x5e, 0x78, 0x91, 0x00, 0xea, 0xc9, 0x23, 0x8f, 0x65, + 0x46, 0xac, 0x03, 0xe9, 0xca, 0x20, 0x8c, 0x66, 0x45, 0xaf, + 0x06, 0xec, 0xcf, 0x25, 0x89, 0x63, 0x40, 0xaa, 0x05, 0xef, + 0xcc, 0x26, 0x8a, 0x60, 0x43, 0xa9, 0x0c, 0xe6, 0xc5, 0x2f, + 0x83, 0x69, 0x4a, 0xa0, 0x0f, 0xe5, 0xc6, 0x2c, 0x80, 0x6a, + 0x49, 0xa3, 0x0a, 0xe0, 0xc3, 0x29, 0x85, 0x6f, 0x4c, 0xa6, + 0x09, 0xe3, 0xc0, 0x2a, 0x86, 0x6c, 0x4f, 0xa5, 0x18, 0xf2, + 0xd1, 0x3b, 0x97, 0x7d, 0x5e, 0xb4, 0x1b, 0xf1, 0xd2, 0x38, + 0x94, 0x7e, 0x5d, 0xb7, 0x1e, 0xf4, 0xd7, 0x3d, 0x91, 0x7b, + 0x58, 0xb2, 0x1d, 0xf7, 0xd4, 0x3e, 0x92, 0x78, 0x5b, 0xb1, + 0x14, 0xfe, 0xdd, 0x37, 0x9b, 0x71, 0x52, 0xb8, 0x17, 0xfd, + 0xde, 0x34, 0x98, 0x72, 0x51, 0xbb, 0x12, 0xf8, 0xdb, 0x31, + 0x9d, 0x77, 0x54, 0xbe, 0x11, 0xfb, 0xd8, 0x32, 0x9e, 0x74, + 0x57, 0xbd, 0x30, 0xda, 0xf9, 0x13, 0xbf, 0x55, 0x76, 0x9c, + 0x33, 0xd9, 0xfa, 0x10, 0xbc, 0x56, 0x75, 0x9f, 0x36, 0xdc, + 0xff, 0x15, 0xb9, 0x53, 0x70, 0x9a, 0x35, 0xdf, 0xfc, 0x16, + 0xba, 0x50, 0x73, 0x99, 0x3c, 0xd6, 0xf5, 0x1f, 0xb3, 0x59, + 0x7a, 0x90, 0x3f, 0xd5, 0xf6, 0x1c, 0xb0, 0x5a, 0x79, 0x93, + 0x3a, 0xd0, 0xf3, 0x19, 0xb5, 0x5f, 0x7c, 0x96, 0x39, 0xd3, + 0xf0, 0x1a, 0xb6, 0x5c, 0x7f, 0x95, 0x28, 0xc2, 0xe1, 0x0b, + 0xa7, 0x4d, 0x6e, 0x84, 0x2b, 0xc1, 0xe2, 0x08, 0xa4, 0x4e, + 0x6d, 0x87, 0x2e, 0xc4, 0xe7, 0x0d, 0xa1, 0x4b, 0x68, 0x82, + 0x2d, 0xc7, 0xe4, 0x0e, 0xa2, 0x48, 0x6b, 0x81, 0x24, 0xce, + 0xed, 0x07, 0xab, 0x41, 0x62, 0x88, 0x27, 0xcd, 0xee, 0x04, + 0xa8, 0x42, 0x61, 0x8b, 0x22, 0xc8, 0xeb, 0x01, 0xad, 0x47, + 0x64, 0x8e, 0x21, 0xcb, 0xe8, 0x02, 0xae, 0x44, 0x67, 0x8d, + 0x00, 0xeb, 0xcb, 0x20, 0x8b, 0x60, 0x40, 0xab, 0x0b, 0xe0, + 0xc0, 0x2b, 0x80, 0x6b, 0x4b, 0xa0, 0x16, 0xfd, 0xdd, 0x36, + 0x9d, 0x76, 0x56, 0xbd, 0x1d, 0xf6, 0xd6, 0x3d, 0x96, 0x7d, + 0x5d, 0xb6, 0x2c, 0xc7, 0xe7, 0x0c, 0xa7, 0x4c, 0x6c, 0x87, + 0x27, 0xcc, 0xec, 0x07, 0xac, 0x47, 0x67, 0x8c, 0x3a, 0xd1, + 0xf1, 0x1a, 0xb1, 0x5a, 0x7a, 0x91, 0x31, 0xda, 0xfa, 0x11, + 0xba, 0x51, 0x71, 0x9a, 0x58, 0xb3, 0x93, 0x78, 0xd3, 0x38, + 0x18, 0xf3, 0x53, 0xb8, 0x98, 0x73, 0xd8, 0x33, 0x13, 0xf8, + 0x4e, 0xa5, 0x85, 0x6e, 0xc5, 0x2e, 0x0e, 0xe5, 0x45, 0xae, + 0x8e, 0x65, 0xce, 0x25, 0x05, 0xee, 0x74, 0x9f, 0xbf, 0x54, + 0xff, 0x14, 0x34, 0xdf, 0x7f, 0x94, 0xb4, 0x5f, 0xf4, 0x1f, + 0x3f, 0xd4, 0x62, 0x89, 0xa9, 0x42, 0xe9, 0x02, 0x22, 0xc9, + 0x69, 0x82, 0xa2, 0x49, 0xe2, 0x09, 0x29, 0xc2, 0xb0, 0x5b, + 0x7b, 0x90, 0x3b, 0xd0, 0xf0, 0x1b, 0xbb, 0x50, 0x70, 0x9b, + 0x30, 0xdb, 0xfb, 0x10, 0xa6, 0x4d, 0x6d, 0x86, 0x2d, 0xc6, + 0xe6, 0x0d, 0xad, 0x46, 0x66, 0x8d, 0x26, 0xcd, 0xed, 0x06, + 0x9c, 0x77, 0x57, 0xbc, 0x17, 0xfc, 0xdc, 0x37, 0x97, 0x7c, + 0x5c, 0xb7, 0x1c, 0xf7, 0xd7, 0x3c, 0x8a, 0x61, 0x41, 0xaa, + 0x01, 0xea, 0xca, 0x21, 0x81, 0x6a, 0x4a, 0xa1, 0x0a, 0xe1, + 0xc1, 0x2a, 0xe8, 0x03, 0x23, 0xc8, 0x63, 0x88, 0xa8, 0x43, + 0xe3, 0x08, 0x28, 0xc3, 0x68, 0x83, 0xa3, 0x48, 0xfe, 0x15, + 0x35, 0xde, 0x75, 0x9e, 0xbe, 0x55, 0xf5, 0x1e, 0x3e, 0xd5, + 0x7e, 0x95, 0xb5, 0x5e, 0xc4, 0x2f, 0x0f, 0xe4, 0x4f, 0xa4, + 0x84, 0x6f, 0xcf, 0x24, 0x04, 0xef, 0x44, 0xaf, 0x8f, 0x64, + 0xd2, 0x39, 0x19, 0xf2, 0x59, 0xb2, 0x92, 0x79, 0xd9, 0x32, + 0x12, 0xf9, 0x52, 0xb9, 0x99, 0x72, 0x00, 0xec, 0xc5, 0x29, + 0x97, 0x7b, 0x52, 0xbe, 0x33, 0xdf, 0xf6, 0x1a, 0xa4, 0x48, + 0x61, 0x8d, 0x66, 0x8a, 0xa3, 0x4f, 0xf1, 0x1d, 0x34, 0xd8, + 0x55, 0xb9, 0x90, 0x7c, 0xc2, 0x2e, 0x07, 0xeb, 0xcc, 0x20, + 0x09, 0xe5, 0x5b, 0xb7, 0x9e, 0x72, 0xff, 0x13, 0x3a, 0xd6, + 0x68, 0x84, 0xad, 0x41, 0xaa, 0x46, 0x6f, 0x83, 0x3d, 0xd1, + 0xf8, 0x14, 0x99, 0x75, 0x5c, 0xb0, 0x0e, 0xe2, 0xcb, 0x27, + 0x85, 0x69, 0x40, 0xac, 0x12, 0xfe, 0xd7, 0x3b, 0xb6, 0x5a, + 0x73, 0x9f, 0x21, 0xcd, 0xe4, 0x08, 0xe3, 0x0f, 0x26, 0xca, + 0x74, 0x98, 0xb1, 0x5d, 0xd0, 0x3c, 0x15, 0xf9, 0x47, 0xab, + 0x82, 0x6e, 0x49, 0xa5, 0x8c, 0x60, 0xde, 0x32, 0x1b, 0xf7, + 0x7a, 0x96, 0xbf, 0x53, 0xed, 0x01, 0x28, 0xc4, 0x2f, 0xc3, + 0xea, 0x06, 0xb8, 0x54, 0x7d, 0x91, 0x1c, 0xf0, 0xd9, 0x35, + 0x8b, 0x67, 0x4e, 0xa2, 0x17, 0xfb, 0xd2, 0x3e, 0x80, 0x6c, + 0x45, 0xa9, 0x24, 0xc8, 0xe1, 0x0d, 0xb3, 0x5f, 0x76, 0x9a, + 0x71, 0x9d, 0xb4, 0x58, 0xe6, 0x0a, 0x23, 0xcf, 0x42, 0xae, + 0x87, 0x6b, 0xd5, 0x39, 0x10, 0xfc, 0xdb, 0x37, 0x1e, 0xf2, + 0x4c, 0xa0, 0x89, 0x65, 0xe8, 0x04, 0x2d, 0xc1, 0x7f, 0x93, + 0xba, 0x56, 0xbd, 0x51, 0x78, 0x94, 0x2a, 0xc6, 0xef, 0x03, + 0x8e, 0x62, 0x4b, 0xa7, 0x19, 0xf5, 0xdc, 0x30, 0x92, 0x7e, + 0x57, 0xbb, 0x05, 0xe9, 0xc0, 0x2c, 0xa1, 0x4d, 0x64, 0x88, + 0x36, 0xda, 0xf3, 0x1f, 0xf4, 0x18, 0x31, 0xdd, 0x63, 0x8f, + 0xa6, 0x4a, 0xc7, 0x2b, 0x02, 0xee, 0x50, 0xbc, 0x95, 0x79, + 0x5e, 0xb2, 0x9b, 0x77, 0xc9, 0x25, 0x0c, 0xe0, 0x6d, 0x81, + 0xa8, 0x44, 0xfa, 0x16, 0x3f, 0xd3, 0x38, 0xd4, 0xfd, 0x11, + 0xaf, 0x43, 0x6a, 0x86, 0x0b, 0xe7, 0xce, 0x22, 0x9c, 0x70, + 0x59, 0xb5, 0x00, 0xed, 0xc7, 0x2a, 0x93, 0x7e, 0x54, 0xb9, + 0x3b, 0xd6, 0xfc, 0x11, 0xa8, 0x45, 0x6f, 0x82, 0x76, 0x9b, + 0xb1, 0x5c, 0xe5, 0x08, 0x22, 0xcf, 0x4d, 0xa0, 0x8a, 0x67, + 0xde, 0x33, 0x19, 0xf4, 0xec, 0x01, 0x2b, 0xc6, 0x7f, 0x92, + 0xb8, 0x55, 0xd7, 0x3a, 0x10, 0xfd, 0x44, 0xa9, 0x83, 0x6e, + 0x9a, 0x77, 0x5d, 0xb0, 0x09, 0xe4, 0xce, 0x23, 0xa1, 0x4c, + 0x66, 0x8b, 0x32, 0xdf, 0xf5, 0x18, 0xc5, 0x28, 0x02, 0xef, + 0x56, 0xbb, 0x91, 0x7c, 0xfe, 0x13, 0x39, 0xd4, 0x6d, 0x80, + 0xaa, 0x47, 0xb3, 0x5e, 0x74, 0x99, 0x20, 0xcd, 0xe7, 0x0a, + 0x88, 0x65, 0x4f, 0xa2, 0x1b, 0xf6, 0xdc, 0x31, 0x29, 0xc4, + 0xee, 0x03, 0xba, 0x57, 0x7d, 0x90, 0x12, 0xff, 0xd5, 0x38, + 0x81, 0x6c, 0x46, 0xab, 0x5f, 0xb2, 0x98, 0x75, 0xcc, 0x21, + 0x0b, 0xe6, 0x64, 0x89, 0xa3, 0x4e, 0xf7, 0x1a, 0x30, 0xdd, + 0x97, 0x7a, 0x50, 0xbd, 0x04, 0xe9, 0xc3, 0x2e, 0xac, 0x41, + 0x6b, 0x86, 0x3f, 0xd2, 0xf8, 0x15, 0xe1, 0x0c, 0x26, 0xcb, + 0x72, 0x9f, 0xb5, 0x58, 0xda, 0x37, 0x1d, 0xf0, 0x49, 0xa4, + 0x8e, 0x63, 0x7b, 0x96, 0xbc, 0x51, 0xe8, 0x05, 0x2f, 0xc2, + 0x40, 0xad, 0x87, 0x6a, 0xd3, 0x3e, 0x14, 0xf9, 0x0d, 0xe0, + 0xca, 0x27, 0x9e, 0x73, 0x59, 0xb4, 0x36, 0xdb, 0xf1, 0x1c, + 0xa5, 0x48, 0x62, 0x8f, 0x52, 0xbf, 0x95, 0x78, 0xc1, 0x2c, + 0x06, 0xeb, 0x69, 0x84, 0xae, 0x43, 0xfa, 0x17, 0x3d, 0xd0, + 0x24, 0xc9, 0xe3, 0x0e, 0xb7, 0x5a, 0x70, 0x9d, 0x1f, 0xf2, + 0xd8, 0x35, 0x8c, 0x61, 0x4b, 0xa6, 0xbe, 0x53, 0x79, 0x94, + 0x2d, 0xc0, 0xea, 0x07, 0x85, 0x68, 0x42, 0xaf, 0x16, 0xfb, + 0xd1, 0x3c, 0xc8, 0x25, 0x0f, 0xe2, 0x5b, 0xb6, 0x9c, 0x71, + 0xf3, 0x1e, 0x34, 0xd9, 0x60, 0x8d, 0xa7, 0x4a, 0x00, 0xee, + 0xc1, 0x2f, 0x9f, 0x71, 0x5e, 0xb0, 0x23, 0xcd, 0xe2, 0x0c, + 0xbc, 0x52, 0x7d, 0x93, 0x46, 0xa8, 0x87, 0x69, 0xd9, 0x37, + 0x18, 0xf6, 0x65, 0x8b, 0xa4, 0x4a, 0xfa, 0x14, 0x3b, 0xd5, + 0x8c, 0x62, 0x4d, 0xa3, 0x13, 0xfd, 0xd2, 0x3c, 0xaf, 0x41, + 0x6e, 0x80, 0x30, 0xde, 0xf1, 0x1f, 0xca, 0x24, 0x0b, 0xe5, + 0x55, 0xbb, 0x94, 0x7a, 0xe9, 0x07, 0x28, 0xc6, 0x76, 0x98, + 0xb7, 0x59, 0x05, 0xeb, 0xc4, 0x2a, 0x9a, 0x74, 0x5b, 0xb5, + 0x26, 0xc8, 0xe7, 0x09, 0xb9, 0x57, 0x78, 0x96, 0x43, 0xad, + 0x82, 0x6c, 0xdc, 0x32, 0x1d, 0xf3, 0x60, 0x8e, 0xa1, 0x4f, + 0xff, 0x11, 0x3e, 0xd0, 0x89, 0x67, 0x48, 0xa6, 0x16, 0xf8, + 0xd7, 0x39, 0xaa, 0x44, 0x6b, 0x85, 0x35, 0xdb, 0xf4, 0x1a, + 0xcf, 0x21, 0x0e, 0xe0, 0x50, 0xbe, 0x91, 0x7f, 0xec, 0x02, + 0x2d, 0xc3, 0x73, 0x9d, 0xb2, 0x5c, 0x0a, 0xe4, 0xcb, 0x25, + 0x95, 0x7b, 0x54, 0xba, 0x29, 0xc7, 0xe8, 0x06, 0xb6, 0x58, + 0x77, 0x99, 0x4c, 0xa2, 0x8d, 0x63, 0xd3, 0x3d, 0x12, 0xfc, + 0x6f, 0x81, 0xae, 0x40, 0xf0, 0x1e, 0x31, 0xdf, 0x86, 0x68, + 0x47, 0xa9, 0x19, 0xf7, 0xd8, 0x36, 0xa5, 0x4b, 0x64, 0x8a, + 0x3a, 0xd4, 0xfb, 0x15, 0xc0, 0x2e, 0x01, 0xef, 0x5f, 0xb1, + 0x9e, 0x70, 0xe3, 0x0d, 0x22, 0xcc, 0x7c, 0x92, 0xbd, 0x53, + 0x0f, 0xe1, 0xce, 0x20, 0x90, 0x7e, 0x51, 0xbf, 0x2c, 0xc2, + 0xed, 0x03, 0xb3, 0x5d, 0x72, 0x9c, 0x49, 0xa7, 0x88, 0x66, + 0xd6, 0x38, 0x17, 0xf9, 0x6a, 0x84, 0xab, 0x45, 0xf5, 0x1b, + 0x34, 0xda, 0x83, 0x6d, 0x42, 0xac, 0x1c, 0xf2, 0xdd, 0x33, + 0xa0, 0x4e, 0x61, 0x8f, 0x3f, 0xd1, 0xfe, 0x10, 0xc5, 0x2b, + 0x04, 0xea, 0x5a, 0xb4, 0x9b, 0x75, 0xe6, 0x08, 0x27, 0xc9, + 0x79, 0x97, 0xb8, 0x56, 0x00, 0xef, 0xc3, 0x2c, 0x9b, 0x74, + 0x58, 0xb7, 0x2b, 0xc4, 0xe8, 0x07, 0xb0, 0x5f, 0x73, 0x9c, + 0x56, 0xb9, 0x95, 0x7a, 0xcd, 0x22, 0x0e, 0xe1, 0x7d, 0x92, + 0xbe, 0x51, 0xe6, 0x09, 0x25, 0xca, 0xac, 0x43, 0x6f, 0x80, + 0x37, 0xd8, 0xf4, 0x1b, 0x87, 0x68, 0x44, 0xab, 0x1c, 0xf3, + 0xdf, 0x30, 0xfa, 0x15, 0x39, 0xd6, 0x61, 0x8e, 0xa2, 0x4d, + 0xd1, 0x3e, 0x12, 0xfd, 0x4a, 0xa5, 0x89, 0x66, 0x45, 0xaa, + 0x86, 0x69, 0xde, 0x31, 0x1d, 0xf2, 0x6e, 0x81, 0xad, 0x42, + 0xf5, 0x1a, 0x36, 0xd9, 0x13, 0xfc, 0xd0, 0x3f, 0x88, 0x67, + 0x4b, 0xa4, 0x38, 0xd7, 0xfb, 0x14, 0xa3, 0x4c, 0x60, 0x8f, + 0xe9, 0x06, 0x2a, 0xc5, 0x72, 0x9d, 0xb1, 0x5e, 0xc2, 0x2d, + 0x01, 0xee, 0x59, 0xb6, 0x9a, 0x75, 0xbf, 0x50, 0x7c, 0x93, + 0x24, 0xcb, 0xe7, 0x08, 0x94, 0x7b, 0x57, 0xb8, 0x0f, 0xe0, + 0xcc, 0x23, 0x8a, 0x65, 0x49, 0xa6, 0x11, 0xfe, 0xd2, 0x3d, + 0xa1, 0x4e, 0x62, 0x8d, 0x3a, 0xd5, 0xf9, 0x16, 0xdc, 0x33, + 0x1f, 0xf0, 0x47, 0xa8, 0x84, 0x6b, 0xf7, 0x18, 0x34, 0xdb, + 0x6c, 0x83, 0xaf, 0x40, 0x26, 0xc9, 0xe5, 0x0a, 0xbd, 0x52, + 0x7e, 0x91, 0x0d, 0xe2, 0xce, 0x21, 0x96, 0x79, 0x55, 0xba, + 0x70, 0x9f, 0xb3, 0x5c, 0xeb, 0x04, 0x28, 0xc7, 0x5b, 0xb4, + 0x98, 0x77, 0xc0, 0x2f, 0x03, 0xec, 0xcf, 0x20, 0x0c, 0xe3, + 0x54, 0xbb, 0x97, 0x78, 0xe4, 0x0b, 0x27, 0xc8, 0x7f, 0x90, + 0xbc, 0x53, 0x99, 0x76, 0x5a, 0xb5, 0x02, 0xed, 0xc1, 0x2e, + 0xb2, 0x5d, 0x71, 0x9e, 0x29, 0xc6, 0xea, 0x05, 0x63, 0x8c, + 0xa0, 0x4f, 0xf8, 0x17, 0x3b, 0xd4, 0x48, 0xa7, 0x8b, 0x64, + 0xd3, 0x3c, 0x10, 0xff, 0x35, 0xda, 0xf6, 0x19, 0xae, 0x41, + 0x6d, 0x82, 0x1e, 0xf1, 0xdd, 0x32, 0x85, 0x6a, 0x46, 0xa9, + 0x00, 0xf0, 0xfd, 0x0d, 0xe7, 0x17, 0x1a, 0xea, 0xd3, 0x23, + 0x2e, 0xde, 0x34, 0xc4, 0xc9, 0x39, 0xbb, 0x4b, 0x46, 0xb6, + 0x5c, 0xac, 0xa1, 0x51, 0x68, 0x98, 0x95, 0x65, 0x8f, 0x7f, + 0x72, 0x82, 0x6b, 0x9b, 0x96, 0x66, 0x8c, 0x7c, 0x71, 0x81, + 0xb8, 0x48, 0x45, 0xb5, 0x5f, 0xaf, 0xa2, 0x52, 0xd0, 0x20, + 0x2d, 0xdd, 0x37, 0xc7, 0xca, 0x3a, 0x03, 0xf3, 0xfe, 0x0e, + 0xe4, 0x14, 0x19, 0xe9, 0xd6, 0x26, 0x2b, 0xdb, 0x31, 0xc1, + 0xcc, 0x3c, 0x05, 0xf5, 0xf8, 0x08, 0xe2, 0x12, 0x1f, 0xef, + 0x6d, 0x9d, 0x90, 0x60, 0x8a, 0x7a, 0x77, 0x87, 0xbe, 0x4e, + 0x43, 0xb3, 0x59, 0xa9, 0xa4, 0x54, 0xbd, 0x4d, 0x40, 0xb0, + 0x5a, 0xaa, 0xa7, 0x57, 0x6e, 0x9e, 0x93, 0x63, 0x89, 0x79, + 0x74, 0x84, 0x06, 0xf6, 0xfb, 0x0b, 0xe1, 0x11, 0x1c, 0xec, + 0xd5, 0x25, 0x28, 0xd8, 0x32, 0xc2, 0xcf, 0x3f, 0xb1, 0x41, + 0x4c, 0xbc, 0x56, 0xa6, 0xab, 0x5b, 0x62, 0x92, 0x9f, 0x6f, + 0x85, 0x75, 0x78, 0x88, 0x0a, 0xfa, 0xf7, 0x07, 0xed, 0x1d, + 0x10, 0xe0, 0xd9, 0x29, 0x24, 0xd4, 0x3e, 0xce, 0xc3, 0x33, + 0xda, 0x2a, 0x27, 0xd7, 0x3d, 0xcd, 0xc0, 0x30, 0x09, 0xf9, + 0xf4, 0x04, 0xee, 0x1e, 0x13, 0xe3, 0x61, 0x91, 0x9c, 0x6c, + 0x86, 0x76, 0x7b, 0x8b, 0xb2, 0x42, 0x4f, 0xbf, 0x55, 0xa5, + 0xa8, 0x58, 0x67, 0x97, 0x9a, 0x6a, 0x80, 0x70, 0x7d, 0x8d, + 0xb4, 0x44, 0x49, 0xb9, 0x53, 0xa3, 0xae, 0x5e, 0xdc, 0x2c, + 0x21, 0xd1, 0x3b, 0xcb, 0xc6, 0x36, 0x0f, 0xff, 0xf2, 0x02, + 0xe8, 0x18, 0x15, 0xe5, 0x0c, 0xfc, 0xf1, 0x01, 0xeb, 0x1b, + 0x16, 0xe6, 0xdf, 0x2f, 0x22, 0xd2, 0x38, 0xc8, 0xc5, 0x35, + 0xb7, 0x47, 0x4a, 0xba, 0x50, 0xa0, 0xad, 0x5d, 0x64, 0x94, + 0x99, 0x69, 0x83, 0x73, 0x7e, 0x8e, 0x00, 0xf1, 0xff, 0x0e, + 0xe3, 0x12, 0x1c, 0xed, 0xdb, 0x2a, 0x24, 0xd5, 0x38, 0xc9, + 0xc7, 0x36, 0xab, 0x5a, 0x54, 0xa5, 0x48, 0xb9, 0xb7, 0x46, + 0x70, 0x81, 0x8f, 0x7e, 0x93, 0x62, 0x6c, 0x9d, 0x4b, 0xba, + 0xb4, 0x45, 0xa8, 0x59, 0x57, 0xa6, 0x90, 0x61, 0x6f, 0x9e, + 0x73, 0x82, 0x8c, 0x7d, 0xe0, 0x11, 0x1f, 0xee, 0x03, 0xf2, + 0xfc, 0x0d, 0x3b, 0xca, 0xc4, 0x35, 0xd8, 0x29, 0x27, 0xd6, + 0x96, 0x67, 0x69, 0x98, 0x75, 0x84, 0x8a, 0x7b, 0x4d, 0xbc, + 0xb2, 0x43, 0xae, 0x5f, 0x51, 0xa0, 0x3d, 0xcc, 0xc2, 0x33, + 0xde, 0x2f, 0x21, 0xd0, 0xe6, 0x17, 0x19, 0xe8, 0x05, 0xf4, + 0xfa, 0x0b, 0xdd, 0x2c, 0x22, 0xd3, 0x3e, 0xcf, 0xc1, 0x30, + 0x06, 0xf7, 0xf9, 0x08, 0xe5, 0x14, 0x1a, 0xeb, 0x76, 0x87, + 0x89, 0x78, 0x95, 0x64, 0x6a, 0x9b, 0xad, 0x5c, 0x52, 0xa3, + 0x4e, 0xbf, 0xb1, 0x40, 0x31, 0xc0, 0xce, 0x3f, 0xd2, 0x23, + 0x2d, 0xdc, 0xea, 0x1b, 0x15, 0xe4, 0x09, 0xf8, 0xf6, 0x07, + 0x9a, 0x6b, 0x65, 0x94, 0x79, 0x88, 0x86, 0x77, 0x41, 0xb0, + 0xbe, 0x4f, 0xa2, 0x53, 0x5d, 0xac, 0x7a, 0x8b, 0x85, 0x74, + 0x99, 0x68, 0x66, 0x97, 0xa1, 0x50, 0x5e, 0xaf, 0x42, 0xb3, + 0xbd, 0x4c, 0xd1, 0x20, 0x2e, 0xdf, 0x32, 0xc3, 0xcd, 0x3c, + 0x0a, 0xfb, 0xf5, 0x04, 0xe9, 0x18, 0x16, 0xe7, 0xa7, 0x56, + 0x58, 0xa9, 0x44, 0xb5, 0xbb, 0x4a, 0x7c, 0x8d, 0x83, 0x72, + 0x9f, 0x6e, 0x60, 0x91, 0x0c, 0xfd, 0xf3, 0x02, 0xef, 0x1e, + 0x10, 0xe1, 0xd7, 0x26, 0x28, 0xd9, 0x34, 0xc5, 0xcb, 0x3a, + 0xec, 0x1d, 0x13, 0xe2, 0x0f, 0xfe, 0xf0, 0x01, 0x37, 0xc6, + 0xc8, 0x39, 0xd4, 0x25, 0x2b, 0xda, 0x47, 0xb6, 0xb8, 0x49, + 0xa4, 0x55, 0x5b, 0xaa, 0x9c, 0x6d, 0x63, 0x92, 0x7f, 0x8e, + 0x80, 0x71, 0x00, 0xf2, 0xf9, 0x0b, 0xef, 0x1d, 0x16, 0xe4, + 0xc3, 0x31, 0x3a, 0xc8, 0x2c, 0xde, 0xd5, 0x27, 0x9b, 0x69, + 0x62, 0x90, 0x74, 0x86, 0x8d, 0x7f, 0x58, 0xaa, 0xa1, 0x53, + 0xb7, 0x45, 0x4e, 0xbc, 0x2b, 0xd9, 0xd2, 0x20, 0xc4, 0x36, + 0x3d, 0xcf, 0xe8, 0x1a, 0x11, 0xe3, 0x07, 0xf5, 0xfe, 0x0c, + 0xb0, 0x42, 0x49, 0xbb, 0x5f, 0xad, 0xa6, 0x54, 0x73, 0x81, + 0x8a, 0x78, 0x9c, 0x6e, 0x65, 0x97, 0x56, 0xa4, 0xaf, 0x5d, + 0xb9, 0x4b, 0x40, 0xb2, 0x95, 0x67, 0x6c, 0x9e, 0x7a, 0x88, + 0x83, 0x71, 0xcd, 0x3f, 0x34, 0xc6, 0x22, 0xd0, 0xdb, 0x29, + 0x0e, 0xfc, 0xf7, 0x05, 0xe1, 0x13, 0x18, 0xea, 0x7d, 0x8f, + 0x84, 0x76, 0x92, 0x60, 0x6b, 0x99, 0xbe, 0x4c, 0x47, 0xb5, + 0x51, 0xa3, 0xa8, 0x5a, 0xe6, 0x14, 0x1f, 0xed, 0x09, 0xfb, + 0xf0, 0x02, 0x25, 0xd7, 0xdc, 0x2e, 0xca, 0x38, 0x33, 0xc1, + 0xac, 0x5e, 0x55, 0xa7, 0x43, 0xb1, 0xba, 0x48, 0x6f, 0x9d, + 0x96, 0x64, 0x80, 0x72, 0x79, 0x8b, 0x37, 0xc5, 0xce, 0x3c, + 0xd8, 0x2a, 0x21, 0xd3, 0xf4, 0x06, 0x0d, 0xff, 0x1b, 0xe9, + 0xe2, 0x10, 0x87, 0x75, 0x7e, 0x8c, 0x68, 0x9a, 0x91, 0x63, + 0x44, 0xb6, 0xbd, 0x4f, 0xab, 0x59, 0x52, 0xa0, 0x1c, 0xee, + 0xe5, 0x17, 0xf3, 0x01, 0x0a, 0xf8, 0xdf, 0x2d, 0x26, 0xd4, + 0x30, 0xc2, 0xc9, 0x3b, 0xfa, 0x08, 0x03, 0xf1, 0x15, 0xe7, + 0xec, 0x1e, 0x39, 0xcb, 0xc0, 0x32, 0xd6, 0x24, 0x2f, 0xdd, + 0x61, 0x93, 0x98, 0x6a, 0x8e, 0x7c, 0x77, 0x85, 0xa2, 0x50, + 0x5b, 0xa9, 0x4d, 0xbf, 0xb4, 0x46, 0xd1, 0x23, 0x28, 0xda, + 0x3e, 0xcc, 0xc7, 0x35, 0x12, 0xe0, 0xeb, 0x19, 0xfd, 0x0f, + 0x04, 0xf6, 0x4a, 0xb8, 0xb3, 0x41, 0xa5, 0x57, 0x5c, 0xae, + 0x89, 0x7b, 0x70, 0x82, 0x66, 0x94, 0x9f, 0x6d, 0x00, 0xf3, + 0xfb, 0x08, 0xeb, 0x18, 0x10, 0xe3, 0xcb, 0x38, 0x30, 0xc3, + 0x20, 0xd3, 0xdb, 0x28, 0x8b, 0x78, 0x70, 0x83, 0x60, 0x93, + 0x9b, 0x68, 0x40, 0xb3, 0xbb, 0x48, 0xab, 0x58, 0x50, 0xa3, + 0x0b, 0xf8, 0xf0, 0x03, 0xe0, 0x13, 0x1b, 0xe8, 0xc0, 0x33, + 0x3b, 0xc8, 0x2b, 0xd8, 0xd0, 0x23, 0x80, 0x73, 0x7b, 0x88, + 0x6b, 0x98, 0x90, 0x63, 0x4b, 0xb8, 0xb0, 0x43, 0xa0, 0x53, + 0x5b, 0xa8, 0x16, 0xe5, 0xed, 0x1e, 0xfd, 0x0e, 0x06, 0xf5, + 0xdd, 0x2e, 0x26, 0xd5, 0x36, 0xc5, 0xcd, 0x3e, 0x9d, 0x6e, + 0x66, 0x95, 0x76, 0x85, 0x8d, 0x7e, 0x56, 0xa5, 0xad, 0x5e, + 0xbd, 0x4e, 0x46, 0xb5, 0x1d, 0xee, 0xe6, 0x15, 0xf6, 0x05, + 0x0d, 0xfe, 0xd6, 0x25, 0x2d, 0xde, 0x3d, 0xce, 0xc6, 0x35, + 0x96, 0x65, 0x6d, 0x9e, 0x7d, 0x8e, 0x86, 0x75, 0x5d, 0xae, + 0xa6, 0x55, 0xb6, 0x45, 0x4d, 0xbe, 0x2c, 0xdf, 0xd7, 0x24, + 0xc7, 0x34, 0x3c, 0xcf, 0xe7, 0x14, 0x1c, 0xef, 0x0c, 0xff, + 0xf7, 0x04, 0xa7, 0x54, 0x5c, 0xaf, 0x4c, 0xbf, 0xb7, 0x44, + 0x6c, 0x9f, 0x97, 0x64, 0x87, 0x74, 0x7c, 0x8f, 0x27, 0xd4, + 0xdc, 0x2f, 0xcc, 0x3f, 0x37, 0xc4, 0xec, 0x1f, 0x17, 0xe4, + 0x07, 0xf4, 0xfc, 0x0f, 0xac, 0x5f, 0x57, 0xa4, 0x47, 0xb4, + 0xbc, 0x4f, 0x67, 0x94, 0x9c, 0x6f, 0x8c, 0x7f, 0x77, 0x84, + 0x3a, 0xc9, 0xc1, 0x32, 0xd1, 0x22, 0x2a, 0xd9, 0xf1, 0x02, + 0x0a, 0xf9, 0x1a, 0xe9, 0xe1, 0x12, 0xb1, 0x42, 0x4a, 0xb9, + 0x5a, 0xa9, 0xa1, 0x52, 0x7a, 0x89, 0x81, 0x72, 0x91, 0x62, + 0x6a, 0x99, 0x31, 0xc2, 0xca, 0x39, 0xda, 0x29, 0x21, 0xd2, + 0xfa, 0x09, 0x01, 0xf2, 0x11, 0xe2, 0xea, 0x19, 0xba, 0x49, + 0x41, 0xb2, 0x51, 0xa2, 0xaa, 0x59, 0x71, 0x82, 0x8a, 0x79, + 0x9a, 0x69, 0x61, 0x92, 0x00, 0xf4, 0xf5, 0x01, 0xf7, 0x03, + 0x02, 0xf6, 0xf3, 0x07, 0x06, 0xf2, 0x04, 0xf0, 0xf1, 0x05, + 0xfb, 0x0f, 0x0e, 0xfa, 0x0c, 0xf8, 0xf9, 0x0d, 0x08, 0xfc, + 0xfd, 0x09, 0xff, 0x0b, 0x0a, 0xfe, 0xeb, 0x1f, 0x1e, 0xea, + 0x1c, 0xe8, 0xe9, 0x1d, 0x18, 0xec, 0xed, 0x19, 0xef, 0x1b, + 0x1a, 0xee, 0x10, 0xe4, 0xe5, 0x11, 0xe7, 0x13, 0x12, 0xe6, + 0xe3, 0x17, 0x16, 0xe2, 0x14, 0xe0, 0xe1, 0x15, 0xcb, 0x3f, + 0x3e, 0xca, 0x3c, 0xc8, 0xc9, 0x3d, 0x38, 0xcc, 0xcd, 0x39, + 0xcf, 0x3b, 0x3a, 0xce, 0x30, 0xc4, 0xc5, 0x31, 0xc7, 0x33, + 0x32, 0xc6, 0xc3, 0x37, 0x36, 0xc2, 0x34, 0xc0, 0xc1, 0x35, + 0x20, 0xd4, 0xd5, 0x21, 0xd7, 0x23, 0x22, 0xd6, 0xd3, 0x27, + 0x26, 0xd2, 0x24, 0xd0, 0xd1, 0x25, 0xdb, 0x2f, 0x2e, 0xda, + 0x2c, 0xd8, 0xd9, 0x2d, 0x28, 0xdc, 0xdd, 0x29, 0xdf, 0x2b, + 0x2a, 0xde, 0x8b, 0x7f, 0x7e, 0x8a, 0x7c, 0x88, 0x89, 0x7d, + 0x78, 0x8c, 0x8d, 0x79, 0x8f, 0x7b, 0x7a, 0x8e, 0x70, 0x84, + 0x85, 0x71, 0x87, 0x73, 0x72, 0x86, 0x83, 0x77, 0x76, 0x82, + 0x74, 0x80, 0x81, 0x75, 0x60, 0x94, 0x95, 0x61, 0x97, 0x63, + 0x62, 0x96, 0x93, 0x67, 0x66, 0x92, 0x64, 0x90, 0x91, 0x65, + 0x9b, 0x6f, 0x6e, 0x9a, 0x6c, 0x98, 0x99, 0x6d, 0x68, 0x9c, + 0x9d, 0x69, 0x9f, 0x6b, 0x6a, 0x9e, 0x40, 0xb4, 0xb5, 0x41, + 0xb7, 0x43, 0x42, 0xb6, 0xb3, 0x47, 0x46, 0xb2, 0x44, 0xb0, + 0xb1, 0x45, 0xbb, 0x4f, 0x4e, 0xba, 0x4c, 0xb8, 0xb9, 0x4d, + 0x48, 0xbc, 0xbd, 0x49, 0xbf, 0x4b, 0x4a, 0xbe, 0xab, 0x5f, + 0x5e, 0xaa, 0x5c, 0xa8, 0xa9, 0x5d, 0x58, 0xac, 0xad, 0x59, + 0xaf, 0x5b, 0x5a, 0xae, 0x50, 0xa4, 0xa5, 0x51, 0xa7, 0x53, + 0x52, 0xa6, 0xa3, 0x57, 0x56, 0xa2, 0x54, 0xa0, 0xa1, 0x55, + 0x00, 0xf5, 0xf7, 0x02, 0xf3, 0x06, 0x04, 0xf1, 0xfb, 0x0e, + 0x0c, 0xf9, 0x08, 0xfd, 0xff, 0x0a, 0xeb, 0x1e, 0x1c, 0xe9, + 0x18, 0xed, 0xef, 0x1a, 0x10, 0xe5, 0xe7, 0x12, 0xe3, 0x16, + 0x14, 0xe1, 0xcb, 0x3e, 0x3c, 0xc9, 0x38, 0xcd, 0xcf, 0x3a, + 0x30, 0xc5, 0xc7, 0x32, 0xc3, 0x36, 0x34, 0xc1, 0x20, 0xd5, + 0xd7, 0x22, 0xd3, 0x26, 0x24, 0xd1, 0xdb, 0x2e, 0x2c, 0xd9, + 0x28, 0xdd, 0xdf, 0x2a, 0x8b, 0x7e, 0x7c, 0x89, 0x78, 0x8d, + 0x8f, 0x7a, 0x70, 0x85, 0x87, 0x72, 0x83, 0x76, 0x74, 0x81, + 0x60, 0x95, 0x97, 0x62, 0x93, 0x66, 0x64, 0x91, 0x9b, 0x6e, + 0x6c, 0x99, 0x68, 0x9d, 0x9f, 0x6a, 0x40, 0xb5, 0xb7, 0x42, + 0xb3, 0x46, 0x44, 0xb1, 0xbb, 0x4e, 0x4c, 0xb9, 0x48, 0xbd, + 0xbf, 0x4a, 0xab, 0x5e, 0x5c, 0xa9, 0x58, 0xad, 0xaf, 0x5a, + 0x50, 0xa5, 0xa7, 0x52, 0xa3, 0x56, 0x54, 0xa1, 0x0b, 0xfe, + 0xfc, 0x09, 0xf8, 0x0d, 0x0f, 0xfa, 0xf0, 0x05, 0x07, 0xf2, + 0x03, 0xf6, 0xf4, 0x01, 0xe0, 0x15, 0x17, 0xe2, 0x13, 0xe6, + 0xe4, 0x11, 0x1b, 0xee, 0xec, 0x19, 0xe8, 0x1d, 0x1f, 0xea, + 0xc0, 0x35, 0x37, 0xc2, 0x33, 0xc6, 0xc4, 0x31, 0x3b, 0xce, + 0xcc, 0x39, 0xc8, 0x3d, 0x3f, 0xca, 0x2b, 0xde, 0xdc, 0x29, + 0xd8, 0x2d, 0x2f, 0xda, 0xd0, 0x25, 0x27, 0xd2, 0x23, 0xd6, + 0xd4, 0x21, 0x80, 0x75, 0x77, 0x82, 0x73, 0x86, 0x84, 0x71, + 0x7b, 0x8e, 0x8c, 0x79, 0x88, 0x7d, 0x7f, 0x8a, 0x6b, 0x9e, + 0x9c, 0x69, 0x98, 0x6d, 0x6f, 0x9a, 0x90, 0x65, 0x67, 0x92, + 0x63, 0x96, 0x94, 0x61, 0x4b, 0xbe, 0xbc, 0x49, 0xb8, 0x4d, + 0x4f, 0xba, 0xb0, 0x45, 0x47, 0xb2, 0x43, 0xb6, 0xb4, 0x41, + 0xa0, 0x55, 0x57, 0xa2, 0x53, 0xa6, 0xa4, 0x51, 0x5b, 0xae, + 0xac, 0x59, 0xa8, 0x5d, 0x5f, 0xaa, 0x00, 0xf6, 0xf1, 0x07, + 0xff, 0x09, 0x0e, 0xf8, 0xe3, 0x15, 0x12, 0xe4, 0x1c, 0xea, + 0xed, 0x1b, 0xdb, 0x2d, 0x2a, 0xdc, 0x24, 0xd2, 0xd5, 0x23, + 0x38, 0xce, 0xc9, 0x3f, 0xc7, 0x31, 0x36, 0xc0, 0xab, 0x5d, + 0x5a, 0xac, 0x54, 0xa2, 0xa5, 0x53, 0x48, 0xbe, 0xb9, 0x4f, + 0xb7, 0x41, 0x46, 0xb0, 0x70, 0x86, 0x81, 0x77, 0x8f, 0x79, + 0x7e, 0x88, 0x93, 0x65, 0x62, 0x94, 0x6c, 0x9a, 0x9d, 0x6b, + 0x4b, 0xbd, 0xba, 0x4c, 0xb4, 0x42, 0x45, 0xb3, 0xa8, 0x5e, + 0x59, 0xaf, 0x57, 0xa1, 0xa6, 0x50, 0x90, 0x66, 0x61, 0x97, + 0x6f, 0x99, 0x9e, 0x68, 0x73, 0x85, 0x82, 0x74, 0x8c, 0x7a, + 0x7d, 0x8b, 0xe0, 0x16, 0x11, 0xe7, 0x1f, 0xe9, 0xee, 0x18, + 0x03, 0xf5, 0xf2, 0x04, 0xfc, 0x0a, 0x0d, 0xfb, 0x3b, 0xcd, + 0xca, 0x3c, 0xc4, 0x32, 0x35, 0xc3, 0xd8, 0x2e, 0x29, 0xdf, + 0x27, 0xd1, 0xd6, 0x20, 0x96, 0x60, 0x67, 0x91, 0x69, 0x9f, + 0x98, 0x6e, 0x75, 0x83, 0x84, 0x72, 0x8a, 0x7c, 0x7b, 0x8d, + 0x4d, 0xbb, 0xbc, 0x4a, 0xb2, 0x44, 0x43, 0xb5, 0xae, 0x58, + 0x5f, 0xa9, 0x51, 0xa7, 0xa0, 0x56, 0x3d, 0xcb, 0xcc, 0x3a, + 0xc2, 0x34, 0x33, 0xc5, 0xde, 0x28, 0x2f, 0xd9, 0x21, 0xd7, + 0xd0, 0x26, 0xe6, 0x10, 0x17, 0xe1, 0x19, 0xef, 0xe8, 0x1e, + 0x05, 0xf3, 0xf4, 0x02, 0xfa, 0x0c, 0x0b, 0xfd, 0xdd, 0x2b, + 0x2c, 0xda, 0x22, 0xd4, 0xd3, 0x25, 0x3e, 0xc8, 0xcf, 0x39, + 0xc1, 0x37, 0x30, 0xc6, 0x06, 0xf0, 0xf7, 0x01, 0xf9, 0x0f, + 0x08, 0xfe, 0xe5, 0x13, 0x14, 0xe2, 0x1a, 0xec, 0xeb, 0x1d, + 0x76, 0x80, 0x87, 0x71, 0x89, 0x7f, 0x78, 0x8e, 0x95, 0x63, + 0x64, 0x92, 0x6a, 0x9c, 0x9b, 0x6d, 0xad, 0x5b, 0x5c, 0xaa, + 0x52, 0xa4, 0xa3, 0x55, 0x4e, 0xb8, 0xbf, 0x49, 0xb1, 0x47, + 0x40, 0xb6, 0x00, 0xf7, 0xf3, 0x04, 0xfb, 0x0c, 0x08, 0xff, + 0xeb, 0x1c, 0x18, 0xef, 0x10, 0xe7, 0xe3, 0x14, 0xcb, 0x3c, + 0x38, 0xcf, 0x30, 0xc7, 0xc3, 0x34, 0x20, 0xd7, 0xd3, 0x24, + 0xdb, 0x2c, 0x28, 0xdf, 0x8b, 0x7c, 0x78, 0x8f, 0x70, 0x87, + 0x83, 0x74, 0x60, 0x97, 0x93, 0x64, 0x9b, 0x6c, 0x68, 0x9f, + 0x40, 0xb7, 0xb3, 0x44, 0xbb, 0x4c, 0x48, 0xbf, 0xab, 0x5c, + 0x58, 0xaf, 0x50, 0xa7, 0xa3, 0x54, 0x0b, 0xfc, 0xf8, 0x0f, + 0xf0, 0x07, 0x03, 0xf4, 0xe0, 0x17, 0x13, 0xe4, 0x1b, 0xec, + 0xe8, 0x1f, 0xc0, 0x37, 0x33, 0xc4, 0x3b, 0xcc, 0xc8, 0x3f, + 0x2b, 0xdc, 0xd8, 0x2f, 0xd0, 0x27, 0x23, 0xd4, 0x80, 0x77, + 0x73, 0x84, 0x7b, 0x8c, 0x88, 0x7f, 0x6b, 0x9c, 0x98, 0x6f, + 0x90, 0x67, 0x63, 0x94, 0x4b, 0xbc, 0xb8, 0x4f, 0xb0, 0x47, + 0x43, 0xb4, 0xa0, 0x57, 0x53, 0xa4, 0x5b, 0xac, 0xa8, 0x5f, + 0x16, 0xe1, 0xe5, 0x12, 0xed, 0x1a, 0x1e, 0xe9, 0xfd, 0x0a, + 0x0e, 0xf9, 0x06, 0xf1, 0xf5, 0x02, 0xdd, 0x2a, 0x2e, 0xd9, + 0x26, 0xd1, 0xd5, 0x22, 0x36, 0xc1, 0xc5, 0x32, 0xcd, 0x3a, + 0x3e, 0xc9, 0x9d, 0x6a, 0x6e, 0x99, 0x66, 0x91, 0x95, 0x62, + 0x76, 0x81, 0x85, 0x72, 0x8d, 0x7a, 0x7e, 0x89, 0x56, 0xa1, + 0xa5, 0x52, 0xad, 0x5a, 0x5e, 0xa9, 0xbd, 0x4a, 0x4e, 0xb9, + 0x46, 0xb1, 0xb5, 0x42, 0x1d, 0xea, 0xee, 0x19, 0xe6, 0x11, + 0x15, 0xe2, 0xf6, 0x01, 0x05, 0xf2, 0x0d, 0xfa, 0xfe, 0x09, + 0xd6, 0x21, 0x25, 0xd2, 0x2d, 0xda, 0xde, 0x29, 0x3d, 0xca, + 0xce, 0x39, 0xc6, 0x31, 0x35, 0xc2, 0x96, 0x61, 0x65, 0x92, + 0x6d, 0x9a, 0x9e, 0x69, 0x7d, 0x8a, 0x8e, 0x79, 0x86, 0x71, + 0x75, 0x82, 0x5d, 0xaa, 0xae, 0x59, 0xa6, 0x51, 0x55, 0xa2, + 0xb6, 0x41, 0x45, 0xb2, 0x4d, 0xba, 0xbe, 0x49, 0x00, 0xf8, + 0xed, 0x15, 0xc7, 0x3f, 0x2a, 0xd2, 0x93, 0x6b, 0x7e, 0x86, + 0x54, 0xac, 0xb9, 0x41, 0x3b, 0xc3, 0xd6, 0x2e, 0xfc, 0x04, + 0x11, 0xe9, 0xa8, 0x50, 0x45, 0xbd, 0x6f, 0x97, 0x82, 0x7a, + 0x76, 0x8e, 0x9b, 0x63, 0xb1, 0x49, 0x5c, 0xa4, 0xe5, 0x1d, + 0x08, 0xf0, 0x22, 0xda, 0xcf, 0x37, 0x4d, 0xb5, 0xa0, 0x58, + 0x8a, 0x72, 0x67, 0x9f, 0xde, 0x26, 0x33, 0xcb, 0x19, 0xe1, + 0xf4, 0x0c, 0xec, 0x14, 0x01, 0xf9, 0x2b, 0xd3, 0xc6, 0x3e, + 0x7f, 0x87, 0x92, 0x6a, 0xb8, 0x40, 0x55, 0xad, 0xd7, 0x2f, + 0x3a, 0xc2, 0x10, 0xe8, 0xfd, 0x05, 0x44, 0xbc, 0xa9, 0x51, + 0x83, 0x7b, 0x6e, 0x96, 0x9a, 0x62, 0x77, 0x8f, 0x5d, 0xa5, + 0xb0, 0x48, 0x09, 0xf1, 0xe4, 0x1c, 0xce, 0x36, 0x23, 0xdb, + 0xa1, 0x59, 0x4c, 0xb4, 0x66, 0x9e, 0x8b, 0x73, 0x32, 0xca, + 0xdf, 0x27, 0xf5, 0x0d, 0x18, 0xe0, 0xc5, 0x3d, 0x28, 0xd0, + 0x02, 0xfa, 0xef, 0x17, 0x56, 0xae, 0xbb, 0x43, 0x91, 0x69, + 0x7c, 0x84, 0xfe, 0x06, 0x13, 0xeb, 0x39, 0xc1, 0xd4, 0x2c, + 0x6d, 0x95, 0x80, 0x78, 0xaa, 0x52, 0x47, 0xbf, 0xb3, 0x4b, + 0x5e, 0xa6, 0x74, 0x8c, 0x99, 0x61, 0x20, 0xd8, 0xcd, 0x35, + 0xe7, 0x1f, 0x0a, 0xf2, 0x88, 0x70, 0x65, 0x9d, 0x4f, 0xb7, + 0xa2, 0x5a, 0x1b, 0xe3, 0xf6, 0x0e, 0xdc, 0x24, 0x31, 0xc9, + 0x29, 0xd1, 0xc4, 0x3c, 0xee, 0x16, 0x03, 0xfb, 0xba, 0x42, + 0x57, 0xaf, 0x7d, 0x85, 0x90, 0x68, 0x12, 0xea, 0xff, 0x07, + 0xd5, 0x2d, 0x38, 0xc0, 0x81, 0x79, 0x6c, 0x94, 0x46, 0xbe, + 0xab, 0x53, 0x5f, 0xa7, 0xb2, 0x4a, 0x98, 0x60, 0x75, 0x8d, + 0xcc, 0x34, 0x21, 0xd9, 0x0b, 0xf3, 0xe6, 0x1e, 0x64, 0x9c, + 0x89, 0x71, 0xa3, 0x5b, 0x4e, 0xb6, 0xf7, 0x0f, 0x1a, 0xe2, + 0x30, 0xc8, 0xdd, 0x25, 0x00, 0xf9, 0xef, 0x16, 0xc3, 0x3a, + 0x2c, 0xd5, 0x9b, 0x62, 0x74, 0x8d, 0x58, 0xa1, 0xb7, 0x4e, + 0x2b, 0xd2, 0xc4, 0x3d, 0xe8, 0x11, 0x07, 0xfe, 0xb0, 0x49, + 0x5f, 0xa6, 0x73, 0x8a, 0x9c, 0x65, 0x56, 0xaf, 0xb9, 0x40, + 0x95, 0x6c, 0x7a, 0x83, 0xcd, 0x34, 0x22, 0xdb, 0x0e, 0xf7, + 0xe1, 0x18, 0x7d, 0x84, 0x92, 0x6b, 0xbe, 0x47, 0x51, 0xa8, + 0xe6, 0x1f, 0x09, 0xf0, 0x25, 0xdc, 0xca, 0x33, 0xac, 0x55, + 0x43, 0xba, 0x6f, 0x96, 0x80, 0x79, 0x37, 0xce, 0xd8, 0x21, + 0xf4, 0x0d, 0x1b, 0xe2, 0x87, 0x7e, 0x68, 0x91, 0x44, 0xbd, + 0xab, 0x52, 0x1c, 0xe5, 0xf3, 0x0a, 0xdf, 0x26, 0x30, 0xc9, + 0xfa, 0x03, 0x15, 0xec, 0x39, 0xc0, 0xd6, 0x2f, 0x61, 0x98, + 0x8e, 0x77, 0xa2, 0x5b, 0x4d, 0xb4, 0xd1, 0x28, 0x3e, 0xc7, + 0x12, 0xeb, 0xfd, 0x04, 0x4a, 0xb3, 0xa5, 0x5c, 0x89, 0x70, + 0x66, 0x9f, 0x45, 0xbc, 0xaa, 0x53, 0x86, 0x7f, 0x69, 0x90, + 0xde, 0x27, 0x31, 0xc8, 0x1d, 0xe4, 0xf2, 0x0b, 0x6e, 0x97, + 0x81, 0x78, 0xad, 0x54, 0x42, 0xbb, 0xf5, 0x0c, 0x1a, 0xe3, + 0x36, 0xcf, 0xd9, 0x20, 0x13, 0xea, 0xfc, 0x05, 0xd0, 0x29, + 0x3f, 0xc6, 0x88, 0x71, 0x67, 0x9e, 0x4b, 0xb2, 0xa4, 0x5d, + 0x38, 0xc1, 0xd7, 0x2e, 0xfb, 0x02, 0x14, 0xed, 0xa3, 0x5a, + 0x4c, 0xb5, 0x60, 0x99, 0x8f, 0x76, 0xe9, 0x10, 0x06, 0xff, + 0x2a, 0xd3, 0xc5, 0x3c, 0x72, 0x8b, 0x9d, 0x64, 0xb1, 0x48, + 0x5e, 0xa7, 0xc2, 0x3b, 0x2d, 0xd4, 0x01, 0xf8, 0xee, 0x17, + 0x59, 0xa0, 0xb6, 0x4f, 0x9a, 0x63, 0x75, 0x8c, 0xbf, 0x46, + 0x50, 0xa9, 0x7c, 0x85, 0x93, 0x6a, 0x24, 0xdd, 0xcb, 0x32, + 0xe7, 0x1e, 0x08, 0xf1, 0x94, 0x6d, 0x7b, 0x82, 0x57, 0xae, + 0xb8, 0x41, 0x0f, 0xf6, 0xe0, 0x19, 0xcc, 0x35, 0x23, 0xda, + 0x00, 0xfa, 0xe9, 0x13, 0xcf, 0x35, 0x26, 0xdc, 0x83, 0x79, + 0x6a, 0x90, 0x4c, 0xb6, 0xa5, 0x5f, 0x1b, 0xe1, 0xf2, 0x08, + 0xd4, 0x2e, 0x3d, 0xc7, 0x98, 0x62, 0x71, 0x8b, 0x57, 0xad, + 0xbe, 0x44, 0x36, 0xcc, 0xdf, 0x25, 0xf9, 0x03, 0x10, 0xea, + 0xb5, 0x4f, 0x5c, 0xa6, 0x7a, 0x80, 0x93, 0x69, 0x2d, 0xd7, + 0xc4, 0x3e, 0xe2, 0x18, 0x0b, 0xf1, 0xae, 0x54, 0x47, 0xbd, + 0x61, 0x9b, 0x88, 0x72, 0x6c, 0x96, 0x85, 0x7f, 0xa3, 0x59, + 0x4a, 0xb0, 0xef, 0x15, 0x06, 0xfc, 0x20, 0xda, 0xc9, 0x33, + 0x77, 0x8d, 0x9e, 0x64, 0xb8, 0x42, 0x51, 0xab, 0xf4, 0x0e, + 0x1d, 0xe7, 0x3b, 0xc1, 0xd2, 0x28, 0x5a, 0xa0, 0xb3, 0x49, + 0x95, 0x6f, 0x7c, 0x86, 0xd9, 0x23, 0x30, 0xca, 0x16, 0xec, + 0xff, 0x05, 0x41, 0xbb, 0xa8, 0x52, 0x8e, 0x74, 0x67, 0x9d, + 0xc2, 0x38, 0x2b, 0xd1, 0x0d, 0xf7, 0xe4, 0x1e, 0xd8, 0x22, + 0x31, 0xcb, 0x17, 0xed, 0xfe, 0x04, 0x5b, 0xa1, 0xb2, 0x48, + 0x94, 0x6e, 0x7d, 0x87, 0xc3, 0x39, 0x2a, 0xd0, 0x0c, 0xf6, + 0xe5, 0x1f, 0x40, 0xba, 0xa9, 0x53, 0x8f, 0x75, 0x66, 0x9c, + 0xee, 0x14, 0x07, 0xfd, 0x21, 0xdb, 0xc8, 0x32, 0x6d, 0x97, + 0x84, 0x7e, 0xa2, 0x58, 0x4b, 0xb1, 0xf5, 0x0f, 0x1c, 0xe6, + 0x3a, 0xc0, 0xd3, 0x29, 0x76, 0x8c, 0x9f, 0x65, 0xb9, 0x43, + 0x50, 0xaa, 0xb4, 0x4e, 0x5d, 0xa7, 0x7b, 0x81, 0x92, 0x68, + 0x37, 0xcd, 0xde, 0x24, 0xf8, 0x02, 0x11, 0xeb, 0xaf, 0x55, + 0x46, 0xbc, 0x60, 0x9a, 0x89, 0x73, 0x2c, 0xd6, 0xc5, 0x3f, + 0xe3, 0x19, 0x0a, 0xf0, 0x82, 0x78, 0x6b, 0x91, 0x4d, 0xb7, + 0xa4, 0x5e, 0x01, 0xfb, 0xe8, 0x12, 0xce, 0x34, 0x27, 0xdd, + 0x99, 0x63, 0x70, 0x8a, 0x56, 0xac, 0xbf, 0x45, 0x1a, 0xe0, + 0xf3, 0x09, 0xd5, 0x2f, 0x3c, 0xc6, 0x00, 0xfb, 0xeb, 0x10, + 0xcb, 0x30, 0x20, 0xdb, 0x8b, 0x70, 0x60, 0x9b, 0x40, 0xbb, + 0xab, 0x50, 0x0b, 0xf0, 0xe0, 0x1b, 0xc0, 0x3b, 0x2b, 0xd0, + 0x80, 0x7b, 0x6b, 0x90, 0x4b, 0xb0, 0xa0, 0x5b, 0x16, 0xed, + 0xfd, 0x06, 0xdd, 0x26, 0x36, 0xcd, 0x9d, 0x66, 0x76, 0x8d, + 0x56, 0xad, 0xbd, 0x46, 0x1d, 0xe6, 0xf6, 0x0d, 0xd6, 0x2d, + 0x3d, 0xc6, 0x96, 0x6d, 0x7d, 0x86, 0x5d, 0xa6, 0xb6, 0x4d, + 0x2c, 0xd7, 0xc7, 0x3c, 0xe7, 0x1c, 0x0c, 0xf7, 0xa7, 0x5c, + 0x4c, 0xb7, 0x6c, 0x97, 0x87, 0x7c, 0x27, 0xdc, 0xcc, 0x37, + 0xec, 0x17, 0x07, 0xfc, 0xac, 0x57, 0x47, 0xbc, 0x67, 0x9c, + 0x8c, 0x77, 0x3a, 0xc1, 0xd1, 0x2a, 0xf1, 0x0a, 0x1a, 0xe1, + 0xb1, 0x4a, 0x5a, 0xa1, 0x7a, 0x81, 0x91, 0x6a, 0x31, 0xca, + 0xda, 0x21, 0xfa, 0x01, 0x11, 0xea, 0xba, 0x41, 0x51, 0xaa, + 0x71, 0x8a, 0x9a, 0x61, 0x58, 0xa3, 0xb3, 0x48, 0x93, 0x68, + 0x78, 0x83, 0xd3, 0x28, 0x38, 0xc3, 0x18, 0xe3, 0xf3, 0x08, + 0x53, 0xa8, 0xb8, 0x43, 0x98, 0x63, 0x73, 0x88, 0xd8, 0x23, + 0x33, 0xc8, 0x13, 0xe8, 0xf8, 0x03, 0x4e, 0xb5, 0xa5, 0x5e, + 0x85, 0x7e, 0x6e, 0x95, 0xc5, 0x3e, 0x2e, 0xd5, 0x0e, 0xf5, + 0xe5, 0x1e, 0x45, 0xbe, 0xae, 0x55, 0x8e, 0x75, 0x65, 0x9e, + 0xce, 0x35, 0x25, 0xde, 0x05, 0xfe, 0xee, 0x15, 0x74, 0x8f, + 0x9f, 0x64, 0xbf, 0x44, 0x54, 0xaf, 0xff, 0x04, 0x14, 0xef, + 0x34, 0xcf, 0xdf, 0x24, 0x7f, 0x84, 0x94, 0x6f, 0xb4, 0x4f, + 0x5f, 0xa4, 0xf4, 0x0f, 0x1f, 0xe4, 0x3f, 0xc4, 0xd4, 0x2f, + 0x62, 0x99, 0x89, 0x72, 0xa9, 0x52, 0x42, 0xb9, 0xe9, 0x12, + 0x02, 0xf9, 0x22, 0xd9, 0xc9, 0x32, 0x69, 0x92, 0x82, 0x79, + 0xa2, 0x59, 0x49, 0xb2, 0xe2, 0x19, 0x09, 0xf2, 0x29, 0xd2, + 0xc2, 0x39, 0x00, 0xfc, 0xe5, 0x19, 0xd7, 0x2b, 0x32, 0xce, + 0xb3, 0x4f, 0x56, 0xaa, 0x64, 0x98, 0x81, 0x7d, 0x7b, 0x87, + 0x9e, 0x62, 0xac, 0x50, 0x49, 0xb5, 0xc8, 0x34, 0x2d, 0xd1, + 0x1f, 0xe3, 0xfa, 0x06, 0xf6, 0x0a, 0x13, 0xef, 0x21, 0xdd, + 0xc4, 0x38, 0x45, 0xb9, 0xa0, 0x5c, 0x92, 0x6e, 0x77, 0x8b, + 0x8d, 0x71, 0x68, 0x94, 0x5a, 0xa6, 0xbf, 0x43, 0x3e, 0xc2, + 0xdb, 0x27, 0xe9, 0x15, 0x0c, 0xf0, 0xf1, 0x0d, 0x14, 0xe8, + 0x26, 0xda, 0xc3, 0x3f, 0x42, 0xbe, 0xa7, 0x5b, 0x95, 0x69, + 0x70, 0x8c, 0x8a, 0x76, 0x6f, 0x93, 0x5d, 0xa1, 0xb8, 0x44, + 0x39, 0xc5, 0xdc, 0x20, 0xee, 0x12, 0x0b, 0xf7, 0x07, 0xfb, + 0xe2, 0x1e, 0xd0, 0x2c, 0x35, 0xc9, 0xb4, 0x48, 0x51, 0xad, + 0x63, 0x9f, 0x86, 0x7a, 0x7c, 0x80, 0x99, 0x65, 0xab, 0x57, + 0x4e, 0xb2, 0xcf, 0x33, 0x2a, 0xd6, 0x18, 0xe4, 0xfd, 0x01, + 0xff, 0x03, 0x1a, 0xe6, 0x28, 0xd4, 0xcd, 0x31, 0x4c, 0xb0, + 0xa9, 0x55, 0x9b, 0x67, 0x7e, 0x82, 0x84, 0x78, 0x61, 0x9d, + 0x53, 0xaf, 0xb6, 0x4a, 0x37, 0xcb, 0xd2, 0x2e, 0xe0, 0x1c, + 0x05, 0xf9, 0x09, 0xf5, 0xec, 0x10, 0xde, 0x22, 0x3b, 0xc7, + 0xba, 0x46, 0x5f, 0xa3, 0x6d, 0x91, 0x88, 0x74, 0x72, 0x8e, + 0x97, 0x6b, 0xa5, 0x59, 0x40, 0xbc, 0xc1, 0x3d, 0x24, 0xd8, + 0x16, 0xea, 0xf3, 0x0f, 0x0e, 0xf2, 0xeb, 0x17, 0xd9, 0x25, + 0x3c, 0xc0, 0xbd, 0x41, 0x58, 0xa4, 0x6a, 0x96, 0x8f, 0x73, + 0x75, 0x89, 0x90, 0x6c, 0xa2, 0x5e, 0x47, 0xbb, 0xc6, 0x3a, + 0x23, 0xdf, 0x11, 0xed, 0xf4, 0x08, 0xf8, 0x04, 0x1d, 0xe1, + 0x2f, 0xd3, 0xca, 0x36, 0x4b, 0xb7, 0xae, 0x52, 0x9c, 0x60, + 0x79, 0x85, 0x83, 0x7f, 0x66, 0x9a, 0x54, 0xa8, 0xb1, 0x4d, + 0x30, 0xcc, 0xd5, 0x29, 0xe7, 0x1b, 0x02, 0xfe, 0x00, 0xfd, + 0xe7, 0x1a, 0xd3, 0x2e, 0x34, 0xc9, 0xbb, 0x46, 0x5c, 0xa1, + 0x68, 0x95, 0x8f, 0x72, 0x6b, 0x96, 0x8c, 0x71, 0xb8, 0x45, + 0x5f, 0xa2, 0xd0, 0x2d, 0x37, 0xca, 0x03, 0xfe, 0xe4, 0x19, + 0xd6, 0x2b, 0x31, 0xcc, 0x05, 0xf8, 0xe2, 0x1f, 0x6d, 0x90, + 0x8a, 0x77, 0xbe, 0x43, 0x59, 0xa4, 0xbd, 0x40, 0x5a, 0xa7, + 0x6e, 0x93, 0x89, 0x74, 0x06, 0xfb, 0xe1, 0x1c, 0xd5, 0x28, + 0x32, 0xcf, 0xb1, 0x4c, 0x56, 0xab, 0x62, 0x9f, 0x85, 0x78, + 0x0a, 0xf7, 0xed, 0x10, 0xd9, 0x24, 0x3e, 0xc3, 0xda, 0x27, + 0x3d, 0xc0, 0x09, 0xf4, 0xee, 0x13, 0x61, 0x9c, 0x86, 0x7b, + 0xb2, 0x4f, 0x55, 0xa8, 0x67, 0x9a, 0x80, 0x7d, 0xb4, 0x49, + 0x53, 0xae, 0xdc, 0x21, 0x3b, 0xc6, 0x0f, 0xf2, 0xe8, 0x15, + 0x0c, 0xf1, 0xeb, 0x16, 0xdf, 0x22, 0x38, 0xc5, 0xb7, 0x4a, + 0x50, 0xad, 0x64, 0x99, 0x83, 0x7e, 0x7f, 0x82, 0x98, 0x65, + 0xac, 0x51, 0x4b, 0xb6, 0xc4, 0x39, 0x23, 0xde, 0x17, 0xea, + 0xf0, 0x0d, 0x14, 0xe9, 0xf3, 0x0e, 0xc7, 0x3a, 0x20, 0xdd, + 0xaf, 0x52, 0x48, 0xb5, 0x7c, 0x81, 0x9b, 0x66, 0xa9, 0x54, + 0x4e, 0xb3, 0x7a, 0x87, 0x9d, 0x60, 0x12, 0xef, 0xf5, 0x08, + 0xc1, 0x3c, 0x26, 0xdb, 0xc2, 0x3f, 0x25, 0xd8, 0x11, 0xec, + 0xf6, 0x0b, 0x79, 0x84, 0x9e, 0x63, 0xaa, 0x57, 0x4d, 0xb0, + 0xce, 0x33, 0x29, 0xd4, 0x1d, 0xe0, 0xfa, 0x07, 0x75, 0x88, + 0x92, 0x6f, 0xa6, 0x5b, 0x41, 0xbc, 0xa5, 0x58, 0x42, 0xbf, + 0x76, 0x8b, 0x91, 0x6c, 0x1e, 0xe3, 0xf9, 0x04, 0xcd, 0x30, + 0x2a, 0xd7, 0x18, 0xe5, 0xff, 0x02, 0xcb, 0x36, 0x2c, 0xd1, + 0xa3, 0x5e, 0x44, 0xb9, 0x70, 0x8d, 0x97, 0x6a, 0x73, 0x8e, + 0x94, 0x69, 0xa0, 0x5d, 0x47, 0xba, 0xc8, 0x35, 0x2f, 0xd2, + 0x1b, 0xe6, 0xfc, 0x01, 0x00, 0xfe, 0xe1, 0x1f, 0xdf, 0x21, + 0x3e, 0xc0, 0xa3, 0x5d, 0x42, 0xbc, 0x7c, 0x82, 0x9d, 0x63, + 0x5b, 0xa5, 0xba, 0x44, 0x84, 0x7a, 0x65, 0x9b, 0xf8, 0x06, + 0x19, 0xe7, 0x27, 0xd9, 0xc6, 0x38, 0xb6, 0x48, 0x57, 0xa9, + 0x69, 0x97, 0x88, 0x76, 0x15, 0xeb, 0xf4, 0x0a, 0xca, 0x34, + 0x2b, 0xd5, 0xed, 0x13, 0x0c, 0xf2, 0x32, 0xcc, 0xd3, 0x2d, + 0x4e, 0xb0, 0xaf, 0x51, 0x91, 0x6f, 0x70, 0x8e, 0x71, 0x8f, + 0x90, 0x6e, 0xae, 0x50, 0x4f, 0xb1, 0xd2, 0x2c, 0x33, 0xcd, + 0x0d, 0xf3, 0xec, 0x12, 0x2a, 0xd4, 0xcb, 0x35, 0xf5, 0x0b, + 0x14, 0xea, 0x89, 0x77, 0x68, 0x96, 0x56, 0xa8, 0xb7, 0x49, + 0xc7, 0x39, 0x26, 0xd8, 0x18, 0xe6, 0xf9, 0x07, 0x64, 0x9a, + 0x85, 0x7b, 0xbb, 0x45, 0x5a, 0xa4, 0x9c, 0x62, 0x7d, 0x83, + 0x43, 0xbd, 0xa2, 0x5c, 0x3f, 0xc1, 0xde, 0x20, 0xe0, 0x1e, + 0x01, 0xff, 0xe2, 0x1c, 0x03, 0xfd, 0x3d, 0xc3, 0xdc, 0x22, + 0x41, 0xbf, 0xa0, 0x5e, 0x9e, 0x60, 0x7f, 0x81, 0xb9, 0x47, + 0x58, 0xa6, 0x66, 0x98, 0x87, 0x79, 0x1a, 0xe4, 0xfb, 0x05, + 0xc5, 0x3b, 0x24, 0xda, 0x54, 0xaa, 0xb5, 0x4b, 0x8b, 0x75, + 0x6a, 0x94, 0xf7, 0x09, 0x16, 0xe8, 0x28, 0xd6, 0xc9, 0x37, + 0x0f, 0xf1, 0xee, 0x10, 0xd0, 0x2e, 0x31, 0xcf, 0xac, 0x52, + 0x4d, 0xb3, 0x73, 0x8d, 0x92, 0x6c, 0x93, 0x6d, 0x72, 0x8c, + 0x4c, 0xb2, 0xad, 0x53, 0x30, 0xce, 0xd1, 0x2f, 0xef, 0x11, + 0x0e, 0xf0, 0xc8, 0x36, 0x29, 0xd7, 0x17, 0xe9, 0xf6, 0x08, + 0x6b, 0x95, 0x8a, 0x74, 0xb4, 0x4a, 0x55, 0xab, 0x25, 0xdb, + 0xc4, 0x3a, 0xfa, 0x04, 0x1b, 0xe5, 0x86, 0x78, 0x67, 0x99, + 0x59, 0xa7, 0xb8, 0x46, 0x7e, 0x80, 0x9f, 0x61, 0xa1, 0x5f, + 0x40, 0xbe, 0xdd, 0x23, 0x3c, 0xc2, 0x02, 0xfc, 0xe3, 0x1d, + 0x00, 0xff, 0xe3, 0x1c, 0xdb, 0x24, 0x38, 0xc7, 0xab, 0x54, + 0x48, 0xb7, 0x70, 0x8f, 0x93, 0x6c, 0x4b, 0xb4, 0xa8, 0x57, + 0x90, 0x6f, 0x73, 0x8c, 0xe0, 0x1f, 0x03, 0xfc, 0x3b, 0xc4, + 0xd8, 0x27, 0x96, 0x69, 0x75, 0x8a, 0x4d, 0xb2, 0xae, 0x51, + 0x3d, 0xc2, 0xde, 0x21, 0xe6, 0x19, 0x05, 0xfa, 0xdd, 0x22, + 0x3e, 0xc1, 0x06, 0xf9, 0xe5, 0x1a, 0x76, 0x89, 0x95, 0x6a, + 0xad, 0x52, 0x4e, 0xb1, 0x31, 0xce, 0xd2, 0x2d, 0xea, 0x15, + 0x09, 0xf6, 0x9a, 0x65, 0x79, 0x86, 0x41, 0xbe, 0xa2, 0x5d, + 0x7a, 0x85, 0x99, 0x66, 0xa1, 0x5e, 0x42, 0xbd, 0xd1, 0x2e, + 0x32, 0xcd, 0x0a, 0xf5, 0xe9, 0x16, 0xa7, 0x58, 0x44, 0xbb, + 0x7c, 0x83, 0x9f, 0x60, 0x0c, 0xf3, 0xef, 0x10, 0xd7, 0x28, + 0x34, 0xcb, 0xec, 0x13, 0x0f, 0xf0, 0x37, 0xc8, 0xd4, 0x2b, + 0x47, 0xb8, 0xa4, 0x5b, 0x9c, 0x63, 0x7f, 0x80, 0x62, 0x9d, + 0x81, 0x7e, 0xb9, 0x46, 0x5a, 0xa5, 0xc9, 0x36, 0x2a, 0xd5, + 0x12, 0xed, 0xf1, 0x0e, 0x29, 0xd6, 0xca, 0x35, 0xf2, 0x0d, + 0x11, 0xee, 0x82, 0x7d, 0x61, 0x9e, 0x59, 0xa6, 0xba, 0x45, + 0xf4, 0x0b, 0x17, 0xe8, 0x2f, 0xd0, 0xcc, 0x33, 0x5f, 0xa0, + 0xbc, 0x43, 0x84, 0x7b, 0x67, 0x98, 0xbf, 0x40, 0x5c, 0xa3, + 0x64, 0x9b, 0x87, 0x78, 0x14, 0xeb, 0xf7, 0x08, 0xcf, 0x30, + 0x2c, 0xd3, 0x53, 0xac, 0xb0, 0x4f, 0x88, 0x77, 0x6b, 0x94, + 0xf8, 0x07, 0x1b, 0xe4, 0x23, 0xdc, 0xc0, 0x3f, 0x18, 0xe7, + 0xfb, 0x04, 0xc3, 0x3c, 0x20, 0xdf, 0xb3, 0x4c, 0x50, 0xaf, + 0x68, 0x97, 0x8b, 0x74, 0xc5, 0x3a, 0x26, 0xd9, 0x1e, 0xe1, + 0xfd, 0x02, 0x6e, 0x91, 0x8d, 0x72, 0xb5, 0x4a, 0x56, 0xa9, + 0x8e, 0x71, 0x6d, 0x92, 0x55, 0xaa, 0xb6, 0x49, 0x25, 0xda, + 0xc6, 0x39, 0xfe, 0x01, 0x1d, 0xe2 +}; + +unsigned char gf_inv_table_base[] = { + 0x00, 0x01, 0x8e, 0xf4, 0x47, 0xa7, 0x7a, 0xba, 0xad, 0x9d, + 0xdd, 0x98, 0x3d, 0xaa, 0x5d, 0x96, 0xd8, 0x72, 0xc0, 0x58, + 0xe0, 0x3e, 0x4c, 0x66, 0x90, 0xde, 0x55, 0x80, 0xa0, 0x83, + 0x4b, 0x2a, 0x6c, 0xed, 0x39, 0x51, 0x60, 0x56, 0x2c, 0x8a, + 0x70, 0xd0, 0x1f, 0x4a, 0x26, 0x8b, 0x33, 0x6e, 0x48, 0x89, + 0x6f, 0x2e, 0xa4, 0xc3, 0x40, 0x5e, 0x50, 0x22, 0xcf, 0xa9, + 0xab, 0x0c, 0x15, 0xe1, 0x36, 0x5f, 0xf8, 0xd5, 0x92, 0x4e, + 0xa6, 0x04, 0x30, 0x88, 0x2b, 0x1e, 0x16, 0x67, 0x45, 0x93, + 0x38, 0x23, 0x68, 0x8c, 0x81, 0x1a, 0x25, 0x61, 0x13, 0xc1, + 0xcb, 0x63, 0x97, 0x0e, 0x37, 0x41, 0x24, 0x57, 0xca, 0x5b, + 0xb9, 0xc4, 0x17, 0x4d, 0x52, 0x8d, 0xef, 0xb3, 0x20, 0xec, + 0x2f, 0x32, 0x28, 0xd1, 0x11, 0xd9, 0xe9, 0xfb, 0xda, 0x79, + 0xdb, 0x77, 0x06, 0xbb, 0x84, 0xcd, 0xfe, 0xfc, 0x1b, 0x54, + 0xa1, 0x1d, 0x7c, 0xcc, 0xe4, 0xb0, 0x49, 0x31, 0x27, 0x2d, + 0x53, 0x69, 0x02, 0xf5, 0x18, 0xdf, 0x44, 0x4f, 0x9b, 0xbc, + 0x0f, 0x5c, 0x0b, 0xdc, 0xbd, 0x94, 0xac, 0x09, 0xc7, 0xa2, + 0x1c, 0x82, 0x9f, 0xc6, 0x34, 0xc2, 0x46, 0x05, 0xce, 0x3b, + 0x0d, 0x3c, 0x9c, 0x08, 0xbe, 0xb7, 0x87, 0xe5, 0xee, 0x6b, + 0xeb, 0xf2, 0xbf, 0xaf, 0xc5, 0x64, 0x07, 0x7b, 0x95, 0x9a, + 0xae, 0xb6, 0x12, 0x59, 0xa5, 0x35, 0x65, 0xb8, 0xa3, 0x9e, + 0xd2, 0xf7, 0x62, 0x5a, 0x85, 0x7d, 0xa8, 0x3a, 0x29, 0x71, + 0xc8, 0xf6, 0xf9, 0x43, 0xd7, 0xd6, 0x10, 0x73, 0x76, 0x78, + 0x99, 0x0a, 0x19, 0x91, 0x14, 0x3f, 0xe6, 0xf0, 0x86, 0xb1, + 0xe2, 0xf1, 0xfa, 0x74, 0xf3, 0xb4, 0x6d, 0x21, 0xb2, 0x6a, + 0xe3, 0xe7, 0xb5, 0xea, 0x03, 0x8f, 0xd3, 0xc9, 0x42, 0xd4, + 0xe8, 0x75, 0x7f, 0xff, 0x7e, 0xfd +}; +#endif // GF_LARGE_TABLES + +#endif //_EC_BASE_H_ diff --git a/src/spdk/isa-l/erasure_code/ec_base_aliases.c b/src/spdk/isa-l/erasure_code/ec_base_aliases.c new file mode 100644 index 000000000..d046ff61a --- /dev/null +++ b/src/spdk/isa-l/erasure_code/ec_base_aliases.c @@ -0,0 +1,61 @@ +/********************************************************************** + Copyright(c) 2011-2017 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#include "erasure_code.h" + +void gf_vect_dot_prod(int len, int vlen, unsigned char *v, + unsigned char **src, unsigned char *dest) +{ + gf_vect_dot_prod_base(len, vlen, v, src, dest); +} + +void gf_vect_mad(int len, int vec, int vec_i, + unsigned char *v, unsigned char *src, unsigned char *dest) +{ + gf_vect_mad_base(len, vec, vec_i, v, src, dest); + +} + +void ec_encode_data(int len, int srcs, int dests, unsigned char *v, + unsigned char **src, unsigned char **dest) +{ + ec_encode_data_base(len, srcs, dests, v, src, dest); +} + +void ec_encode_data_update(int len, int k, int rows, int vec_i, unsigned char *v, + unsigned char *data, unsigned char **dest) +{ + ec_encode_data_update_base(len, k, rows, vec_i, v, data, dest); +} + +int gf_vect_mul(int len, unsigned char *a, void *src, void *dest) +{ + gf_vect_mul_base(len, a, (unsigned char *)src, (unsigned char *)dest); + return 0; +} diff --git a/src/spdk/isa-l/erasure_code/ec_highlevel_func.c b/src/spdk/isa-l/erasure_code/ec_highlevel_func.c new file mode 100644 index 000000000..c57d460a6 --- /dev/null +++ b/src/spdk/isa-l/erasure_code/ec_highlevel_func.c @@ -0,0 +1,336 @@ +/********************************************************************** + Copyright(c) 2011-2015 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ +#include +#include "erasure_code.h" + +#if __x86_64__ || __i386__ || _M_X64 || _M_IX86 +void ec_encode_data_sse(int len, int k, int rows, unsigned char *g_tbls, unsigned char **data, + unsigned char **coding) +{ + + if (len < 16) { + ec_encode_data_base(len, k, rows, g_tbls, data, coding); + return; + } + + while (rows >= 4) { + gf_4vect_dot_prod_sse(len, k, g_tbls, data, coding); + g_tbls += 4 * k * 32; + coding += 4; + rows -= 4; + } + switch (rows) { + case 3: + gf_3vect_dot_prod_sse(len, k, g_tbls, data, coding); + break; + case 2: + gf_2vect_dot_prod_sse(len, k, g_tbls, data, coding); + break; + case 1: + gf_vect_dot_prod_sse(len, k, g_tbls, data, *coding); + break; + case 0: + break; + } + +} + +void ec_encode_data_avx(int len, int k, int rows, unsigned char *g_tbls, unsigned char **data, + unsigned char **coding) +{ + if (len < 16) { + ec_encode_data_base(len, k, rows, g_tbls, data, coding); + return; + } + + while (rows >= 4) { + gf_4vect_dot_prod_avx(len, k, g_tbls, data, coding); + g_tbls += 4 * k * 32; + coding += 4; + rows -= 4; + } + switch (rows) { + case 3: + gf_3vect_dot_prod_avx(len, k, g_tbls, data, coding); + break; + case 2: + gf_2vect_dot_prod_avx(len, k, g_tbls, data, coding); + break; + case 1: + gf_vect_dot_prod_avx(len, k, g_tbls, data, *coding); + break; + case 0: + break; + } + +} + +void ec_encode_data_avx2(int len, int k, int rows, unsigned char *g_tbls, unsigned char **data, + unsigned char **coding) +{ + + if (len < 32) { + ec_encode_data_base(len, k, rows, g_tbls, data, coding); + return; + } + + while (rows >= 4) { + gf_4vect_dot_prod_avx2(len, k, g_tbls, data, coding); + g_tbls += 4 * k * 32; + coding += 4; + rows -= 4; + } + switch (rows) { + case 3: + gf_3vect_dot_prod_avx2(len, k, g_tbls, data, coding); + break; + case 2: + gf_2vect_dot_prod_avx2(len, k, g_tbls, data, coding); + break; + case 1: + gf_vect_dot_prod_avx2(len, k, g_tbls, data, *coding); + break; + case 0: + break; + } + +} + +#ifdef HAVE_AS_KNOWS_AVX512 + +extern int gf_vect_dot_prod_avx512(int len, int k, unsigned char *g_tbls, unsigned char **data, + unsigned char *dest); +extern int gf_2vect_dot_prod_avx512(int len, int k, unsigned char *g_tbls, + unsigned char **data, unsigned char **coding); +extern int gf_3vect_dot_prod_avx512(int len, int k, unsigned char *g_tbls, + unsigned char **data, unsigned char **coding); +extern int gf_4vect_dot_prod_avx512(int len, int k, unsigned char *g_tbls, + unsigned char **data, unsigned char **coding); +extern void gf_vect_mad_avx512(int len, int vec, int vec_i, unsigned char *gftbls, + unsigned char *src, unsigned char *dest); +extern void gf_2vect_mad_avx512(int len, int vec, int vec_i, unsigned char *gftbls, + unsigned char *src, unsigned char **dest); +extern void gf_3vect_mad_avx512(int len, int vec, int vec_i, unsigned char *gftbls, + unsigned char *src, unsigned char **dest); +extern void gf_4vect_mad_avx512(int len, int vec, int vec_i, unsigned char *gftbls, + unsigned char *src, unsigned char **dest); + +void ec_encode_data_avx512(int len, int k, int rows, unsigned char *g_tbls, + unsigned char **data, unsigned char **coding) +{ + + if (len < 64) { + ec_encode_data_base(len, k, rows, g_tbls, data, coding); + return; + } + + while (rows >= 4) { + gf_4vect_dot_prod_avx512(len, k, g_tbls, data, coding); + g_tbls += 4 * k * 32; + coding += 4; + rows -= 4; + } + switch (rows) { + case 3: + gf_3vect_dot_prod_avx512(len, k, g_tbls, data, coding); + break; + case 2: + gf_2vect_dot_prod_avx512(len, k, g_tbls, data, coding); + break; + case 1: + gf_vect_dot_prod_avx512(len, k, g_tbls, data, *coding); + break; + case 0: + break; + } +} + +void ec_encode_data_update_avx512(int len, int k, int rows, int vec_i, unsigned char *g_tbls, + unsigned char *data, unsigned char **coding) +{ + if (len < 64) { + ec_encode_data_update_base(len, k, rows, vec_i, g_tbls, data, coding); + return; + } + + while (rows >= 4) { + gf_4vect_mad_avx512(len, k, vec_i, g_tbls, data, coding); + g_tbls += 4 * k * 32; + coding += 4; + rows -= 4; + } + switch (rows) { + case 3: + gf_3vect_mad_avx512(len, k, vec_i, g_tbls, data, coding); + break; + case 2: + gf_2vect_mad_avx512(len, k, vec_i, g_tbls, data, coding); + break; + case 1: + gf_vect_mad_avx512(len, k, vec_i, g_tbls, data, *coding); + break; + case 0: + break; + } +} + +#endif // HAVE_AS_KNOWS_AVX512 + +#if __WORDSIZE == 64 || _WIN64 || __x86_64__ + +void ec_encode_data_update_sse(int len, int k, int rows, int vec_i, unsigned char *g_tbls, + unsigned char *data, unsigned char **coding) +{ + if (len < 16) { + ec_encode_data_update_base(len, k, rows, vec_i, g_tbls, data, coding); + return; + } + + while (rows > 6) { + gf_6vect_mad_sse(len, k, vec_i, g_tbls, data, coding); + g_tbls += 6 * k * 32; + coding += 6; + rows -= 6; + } + switch (rows) { + case 6: + gf_6vect_mad_sse(len, k, vec_i, g_tbls, data, coding); + break; + case 5: + gf_5vect_mad_sse(len, k, vec_i, g_tbls, data, coding); + break; + case 4: + gf_4vect_mad_sse(len, k, vec_i, g_tbls, data, coding); + break; + case 3: + gf_3vect_mad_sse(len, k, vec_i, g_tbls, data, coding); + break; + case 2: + gf_2vect_mad_sse(len, k, vec_i, g_tbls, data, coding); + break; + case 1: + gf_vect_mad_sse(len, k, vec_i, g_tbls, data, *coding); + break; + case 0: + break; + } + +} + +void ec_encode_data_update_avx(int len, int k, int rows, int vec_i, unsigned char *g_tbls, + unsigned char *data, unsigned char **coding) +{ + if (len < 16) { + ec_encode_data_update_base(len, k, rows, vec_i, g_tbls, data, coding); + return; + } + while (rows > 6) { + gf_6vect_mad_avx(len, k, vec_i, g_tbls, data, coding); + g_tbls += 6 * k * 32; + coding += 6; + rows -= 6; + } + switch (rows) { + case 6: + gf_6vect_mad_avx(len, k, vec_i, g_tbls, data, coding); + break; + case 5: + gf_5vect_mad_avx(len, k, vec_i, g_tbls, data, coding); + break; + case 4: + gf_4vect_mad_avx(len, k, vec_i, g_tbls, data, coding); + break; + case 3: + gf_3vect_mad_avx(len, k, vec_i, g_tbls, data, coding); + break; + case 2: + gf_2vect_mad_avx(len, k, vec_i, g_tbls, data, coding); + break; + case 1: + gf_vect_mad_avx(len, k, vec_i, g_tbls, data, *coding); + break; + case 0: + break; + } + +} + +void ec_encode_data_update_avx2(int len, int k, int rows, int vec_i, unsigned char *g_tbls, + unsigned char *data, unsigned char **coding) +{ + if (len < 32) { + ec_encode_data_update_base(len, k, rows, vec_i, g_tbls, data, coding); + return; + } + while (rows > 6) { + gf_6vect_mad_avx2(len, k, vec_i, g_tbls, data, coding); + g_tbls += 6 * k * 32; + coding += 6; + rows -= 6; + } + switch (rows) { + case 6: + gf_6vect_mad_avx2(len, k, vec_i, g_tbls, data, coding); + break; + case 5: + gf_5vect_mad_avx2(len, k, vec_i, g_tbls, data, coding); + break; + case 4: + gf_4vect_mad_avx2(len, k, vec_i, g_tbls, data, coding); + break; + case 3: + gf_3vect_mad_avx2(len, k, vec_i, g_tbls, data, coding); + break; + case 2: + gf_2vect_mad_avx2(len, k, vec_i, g_tbls, data, coding); + break; + case 1: + gf_vect_mad_avx2(len, k, vec_i, g_tbls, data, *coding); + break; + case 0: + break; + } + +} + +#endif //__WORDSIZE == 64 || _WIN64 || __x86_64__ +#endif //__x86_64__ || __i386__ || _M_X64 || _M_IX86 + +struct slver { + unsigned short snum; + unsigned char ver; + unsigned char core; +}; + +// Version info +struct slver ec_init_tables_slver_00010068; +struct slver ec_init_tables_slver = { 0x0068, 0x01, 0x00 }; + +struct slver ec_encode_data_sse_slver_00020069; +struct slver ec_encode_data_sse_slver = { 0x0069, 0x02, 0x00 }; diff --git a/src/spdk/isa-l/erasure_code/ec_multibinary.asm b/src/spdk/isa-l/erasure_code/ec_multibinary.asm new file mode 100644 index 000000000..a07f45d6f --- /dev/null +++ b/src/spdk/isa-l/erasure_code/ec_multibinary.asm @@ -0,0 +1,95 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2015 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +%include "reg_sizes.asm" +%include "multibinary.asm" + +%ifidn __OUTPUT_FORMAT__, elf32 + [bits 32] +%else + default rel + [bits 64] + + extern ec_encode_data_update_sse + extern ec_encode_data_update_avx + extern ec_encode_data_update_avx2 +%ifdef HAVE_AS_KNOWS_AVX512 + extern ec_encode_data_avx512 + extern gf_vect_dot_prod_avx512 + extern ec_encode_data_update_avx512 + extern gf_vect_mad_avx512 +%endif + extern gf_vect_mul_sse + extern gf_vect_mul_avx + + extern gf_vect_mad_sse + extern gf_vect_mad_avx + extern gf_vect_mad_avx2 +%endif + +extern gf_vect_mul_base +extern ec_encode_data_base +extern ec_encode_data_update_base +extern gf_vect_dot_prod_base +extern gf_vect_mad_base + +extern gf_vect_dot_prod_sse +extern gf_vect_dot_prod_avx +extern gf_vect_dot_prod_avx2 +extern ec_encode_data_sse +extern ec_encode_data_avx +extern ec_encode_data_avx2 + +mbin_interface ec_encode_data +mbin_interface gf_vect_dot_prod +mbin_interface gf_vect_mul +mbin_interface ec_encode_data_update +mbin_interface gf_vect_mad + +%ifidn __OUTPUT_FORMAT__, elf32 + mbin_dispatch_init5 ec_encode_data, ec_encode_data_base, ec_encode_data_sse, ec_encode_data_avx, ec_encode_data_avx2 + mbin_dispatch_init5 gf_vect_dot_prod, gf_vect_dot_prod_base, gf_vect_dot_prod_sse, gf_vect_dot_prod_avx, gf_vect_dot_prod_avx2 + mbin_dispatch_init2 gf_vect_mul, gf_vect_mul_base + mbin_dispatch_init2 ec_encode_data_update, ec_encode_data_update_base + mbin_dispatch_init2 gf_vect_mad, gf_vect_mad_base +%else + + mbin_dispatch_init5 gf_vect_mul, gf_vect_mul_base, gf_vect_mul_sse, gf_vect_mul_avx, gf_vect_mul_avx + mbin_dispatch_init6 ec_encode_data, ec_encode_data_base, ec_encode_data_sse, ec_encode_data_avx, ec_encode_data_avx2, ec_encode_data_avx512 + mbin_dispatch_init6 ec_encode_data_update, ec_encode_data_update_base, ec_encode_data_update_sse, ec_encode_data_update_avx, ec_encode_data_update_avx2, ec_encode_data_update_avx512 + mbin_dispatch_init6 gf_vect_mad, gf_vect_mad_base, gf_vect_mad_sse, gf_vect_mad_avx, gf_vect_mad_avx2, gf_vect_mad_avx512 + mbin_dispatch_init6 gf_vect_dot_prod, gf_vect_dot_prod_base, gf_vect_dot_prod_sse, gf_vect_dot_prod_avx, gf_vect_dot_prod_avx2, gf_vect_dot_prod_avx512 +%endif + +;;; func core, ver, snum +slversion ec_encode_data, 00, 06, 0133 +slversion gf_vect_mul, 00, 05, 0134 +slversion ec_encode_data_update, 00, 05, 0212 +slversion gf_vect_dot_prod, 00, 05, 0138 +slversion gf_vect_mad, 00, 04, 0213 diff --git a/src/spdk/isa-l/erasure_code/erasure_code_base_perf.c b/src/spdk/isa-l/erasure_code/erasure_code_base_perf.c new file mode 100644 index 000000000..9587788d8 --- /dev/null +++ b/src/spdk/isa-l/erasure_code/erasure_code_base_perf.c @@ -0,0 +1,176 @@ +/********************************************************************** + Copyright(c) 2011-2015 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#include +#include +#include // for memset, memcmp +#include "erasure_code.h" +#include "test.h" + +//#define CACHED_TEST +#ifdef CACHED_TEST +// Cached test, loop many times over small dataset +# define TEST_SOURCES 32 +# define TEST_LEN(m) ((128*1024 / m) & ~(64-1)) +# define TEST_TYPE_STR "_warm" +#else +# ifndef TEST_CUSTOM +// Uncached test. Pull from large mem base. +# define TEST_SOURCES 32 +# define GT_L3_CACHE 32*1024*1024 /* some number > last level cache */ +# define TEST_LEN(m) ((GT_L3_CACHE / m) & ~(64-1)) +# define TEST_TYPE_STR "_cold" +# else +# define TEST_TYPE_STR "_cus" +# endif +#endif + +#define MMAX TEST_SOURCES +#define KMAX TEST_SOURCES + +#define BAD_MATRIX -1 + +typedef unsigned char u8; + +void ec_encode_perf(int m, int k, u8 * a, u8 * g_tbls, u8 ** buffs) +{ + ec_init_tables(k, m - k, &a[k * k], g_tbls); + ec_encode_data_base(TEST_LEN(m), k, m - k, g_tbls, buffs, &buffs[k]); +} + +int ec_decode_perf(int m, int k, u8 * a, u8 * g_tbls, u8 ** buffs, u8 * src_in_err, + u8 * src_err_list, int nerrs, u8 ** temp_buffs) +{ + int i, j, r; + u8 b[MMAX * KMAX], c[MMAX * KMAX], d[MMAX * KMAX]; + u8 *recov[TEST_SOURCES]; + + // Construct b by removing error rows + for (i = 0, r = 0; i < k; i++, r++) { + while (src_in_err[r]) + r++; + recov[i] = buffs[r]; + for (j = 0; j < k; j++) + b[k * i + j] = a[k * r + j]; + } + + if (gf_invert_matrix(b, d, k) < 0) + return BAD_MATRIX; + + for (i = 0; i < nerrs; i++) + for (j = 0; j < k; j++) + c[k * i + j] = d[k * src_err_list[i] + j]; + + // Recover data + ec_init_tables(k, nerrs, c, g_tbls); + ec_encode_data_base(TEST_LEN(m), k, nerrs, g_tbls, recov, temp_buffs); + + return 0; +} + +int main(int argc, char *argv[]) +{ + int i, j, m, k, nerrs, check; + void *buf; + u8 *temp_buffs[TEST_SOURCES], *buffs[TEST_SOURCES]; + u8 a[MMAX * KMAX]; + u8 g_tbls[KMAX * TEST_SOURCES * 32], src_in_err[TEST_SOURCES]; + u8 src_err_list[TEST_SOURCES]; + struct perf start; + + // Pick test parameters + m = 14; + k = 10; + nerrs = 4; + const u8 err_list[] = { 2, 4, 5, 7 }; + + printf("erasure_code_base_perf: %dx%d %d\n", m, TEST_LEN(m), nerrs); + + if (m > MMAX || k > KMAX || nerrs > (m - k)) { + printf(" Input test parameter error\n"); + return -1; + } + + memcpy(src_err_list, err_list, nerrs); + memset(src_in_err, 0, TEST_SOURCES); + for (i = 0; i < nerrs; i++) + src_in_err[src_err_list[i]] = 1; + + // Allocate the arrays + for (i = 0; i < m; i++) { + if (posix_memalign(&buf, 64, TEST_LEN(m))) { + printf("alloc error: Fail\n"); + return -1; + } + buffs[i] = buf; + } + + for (i = 0; i < (m - k); i++) { + if (posix_memalign(&buf, 64, TEST_LEN(m))) { + printf("alloc error: Fail\n"); + return -1; + } + temp_buffs[i] = buf; + } + + // Make random data + for (i = 0; i < k; i++) + for (j = 0; j < TEST_LEN(m); j++) + buffs[i][j] = rand(); + + gf_gen_rs_matrix(a, m, k); + + // Start encode test + BENCHMARK(&start, BENCHMARK_TIME, ec_encode_perf(m, k, a, g_tbls, buffs)); + printf("erasure_code_base_encode" TEST_TYPE_STR ": "); + perf_print(start, (long long)(TEST_LEN(m)) * (m)); + + // Start decode test + BENCHMARK(&start, BENCHMARK_TIME, check = + ec_decode_perf(m, k, a, g_tbls, buffs, src_in_err, src_err_list, nerrs, + temp_buffs)); + + if (check == BAD_MATRIX) { + printf("BAD MATRIX\n"); + return check; + } + + for (i = 0; i < nerrs; i++) { + if (0 != memcmp(temp_buffs[i], buffs[src_err_list[i]], TEST_LEN(m))) { + printf("Fail error recovery (%d, %d, %d) - ", m, k, nerrs); + return -1; + } + } + + printf("erasure_code_base_decode" TEST_TYPE_STR ": "); + perf_print(start, (long long)(TEST_LEN(m)) * (k + nerrs)); + + printf("done all: Pass\n"); + return 0; +} diff --git a/src/spdk/isa-l/erasure_code/erasure_code_base_test.c b/src/spdk/isa-l/erasure_code/erasure_code_base_test.c new file mode 100644 index 000000000..81e1b5778 --- /dev/null +++ b/src/spdk/isa-l/erasure_code/erasure_code_base_test.c @@ -0,0 +1,764 @@ +/********************************************************************** + Copyright(c) 2011-2015 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#include +#include +#include // for memset, memcmp +#include "erasure_code.h" +#include "types.h" + +#define TEST_LEN 8192 +#define TEST_SIZE (TEST_LEN/2) + +#ifndef TEST_SOURCES +# define TEST_SOURCES 127 +#endif +#ifndef RANDOMS +# define RANDOMS 50 +#endif + +#define MMAX TEST_SOURCES +#define KMAX TEST_SOURCES + +#define EFENCE_TEST_MIN_SIZE 16 + +#ifdef EC_ALIGNED_ADDR +// Define power of 2 range to check ptr, len alignment +# define PTR_ALIGN_CHK_B 0 +# define LEN_ALIGN_CHK_B 0 // 0 for aligned only +#else +// Define power of 2 range to check ptr, len alignment +# define PTR_ALIGN_CHK_B 32 +# define LEN_ALIGN_CHK_B 32 // 0 for aligned only +#endif + +#ifndef TEST_SEED +#define TEST_SEED 11 +#endif + +typedef unsigned char u8; + +void dump(unsigned char *buf, int len) +{ + int i; + for (i = 0; i < len;) { + printf(" %2x", 0xff & buf[i++]); + if (i % 32 == 0) + printf("\n"); + } + printf("\n"); +} + +void dump_matrix(unsigned char **s, int k, int m) +{ + int i, j; + for (i = 0; i < k; i++) { + for (j = 0; j < m; j++) { + printf(" %2x", s[i][j]); + } + printf("\n"); + } + printf("\n"); +} + +void dump_u8xu8(unsigned char *s, int k, int m) +{ + int i, j; + for (i = 0; i < k; i++) { + for (j = 0; j < m; j++) { + printf(" %2x", 0xff & s[j + (i * m)]); + } + printf("\n"); + } + printf("\n"); +} + +// Generate Random errors +static void gen_err_list(unsigned char *src_err_list, + unsigned char *src_in_err, int *pnerrs, int *pnsrcerrs, int k, int m) +{ + int i, err; + int nerrs = 0, nsrcerrs = 0; + + for (i = 0, nerrs = 0, nsrcerrs = 0; i < m && nerrs < m - k; i++) { + err = 1 & rand(); + src_in_err[i] = err; + if (err) { + src_err_list[nerrs++] = i; + if (i < k) { + nsrcerrs++; + } + } + } + if (nerrs == 0) { // should have at least one error + while ((err = (rand() % KMAX)) >= m) ; + src_err_list[nerrs++] = err; + src_in_err[err] = 1; + if (err < k) + nsrcerrs = 1; + } + *pnerrs = nerrs; + *pnsrcerrs = nsrcerrs; + return; +} + +#define NO_INVERT_MATRIX -2 +// Generate decode matrix from encode matrix +static int gf_gen_decode_matrix(unsigned char *encode_matrix, + unsigned char *decode_matrix, + unsigned char *invert_matrix, + unsigned int *decode_index, + unsigned char *src_err_list, + unsigned char *src_in_err, + int nerrs, int nsrcerrs, int k, int m) +{ + int i, j, p; + int r; + unsigned char *backup, *b, s; + int incr = 0; + + b = malloc(MMAX * KMAX); + backup = malloc(MMAX * KMAX); + + if (b == NULL || backup == NULL) { + printf("Test failure! Error with malloc\n"); + free(b); + free(backup); + return -1; + } + // Construct matrix b by removing error rows + for (i = 0, r = 0; i < k; i++, r++) { + while (src_in_err[r]) + r++; + for (j = 0; j < k; j++) { + b[k * i + j] = encode_matrix[k * r + j]; + backup[k * i + j] = encode_matrix[k * r + j]; + } + decode_index[i] = r; + } + incr = 0; + while (gf_invert_matrix(b, invert_matrix, k) < 0) { + if (nerrs == (m - k)) { + free(b); + free(backup); + printf("BAD MATRIX\n"); + return NO_INVERT_MATRIX; + } + incr++; + memcpy(b, backup, MMAX * KMAX); + for (i = nsrcerrs; i < nerrs - nsrcerrs; i++) { + if (src_err_list[i] == (decode_index[k - 1] + incr)) { + // skip the erased parity line + incr++; + continue; + } + } + if (decode_index[k - 1] + incr >= m) { + free(b); + free(backup); + printf("BAD MATRIX\n"); + return NO_INVERT_MATRIX; + } + decode_index[k - 1] += incr; + for (j = 0; j < k; j++) + b[k * (k - 1) + j] = encode_matrix[k * decode_index[k - 1] + j]; + + }; + + for (i = 0; i < nsrcerrs; i++) { + for (j = 0; j < k; j++) { + decode_matrix[k * i + j] = invert_matrix[k * src_err_list[i] + j]; + } + } + /* src_err_list from encode_matrix * invert of b for parity decoding */ + for (p = nsrcerrs; p < nerrs; p++) { + for (i = 0; i < k; i++) { + s = 0; + for (j = 0; j < k; j++) + s ^= gf_mul(invert_matrix[j * k + i], + encode_matrix[k * src_err_list[p] + j]); + + decode_matrix[k * p + i] = s; + } + } + free(b); + free(backup); + return 0; +} + +int main(int argc, char *argv[]) +{ + int re = 0; + int i, j, p, rtest, m, k; + int nerrs, nsrcerrs; + void *buf; + unsigned int decode_index[MMAX]; + unsigned char *temp_buffs[TEST_SOURCES], *buffs[TEST_SOURCES]; + unsigned char *encode_matrix, *decode_matrix, *invert_matrix, *g_tbls; + unsigned char src_in_err[TEST_SOURCES], src_err_list[TEST_SOURCES]; + unsigned char *recov[TEST_SOURCES]; + + int rows, align, size; + unsigned char *efence_buffs[TEST_SOURCES]; + unsigned int offset; + u8 *ubuffs[TEST_SOURCES]; + u8 *temp_ubuffs[TEST_SOURCES]; + + printf("erasure_code_base_test: %dx%d ", TEST_SOURCES, TEST_LEN); + srand(TEST_SEED); + + // Allocate the arrays + for (i = 0; i < TEST_SOURCES; i++) { + if (posix_memalign(&buf, 64, TEST_LEN)) { + printf("alloc error: Fail"); + return -1; + } + buffs[i] = buf; + } + + for (i = 0; i < TEST_SOURCES; i++) { + if (posix_memalign(&buf, 64, TEST_LEN)) { + printf("alloc error: Fail"); + return -1; + } + temp_buffs[i] = buf; + } + + // Test erasure code by encode and recovery + + encode_matrix = malloc(MMAX * KMAX); + decode_matrix = malloc(MMAX * KMAX); + invert_matrix = malloc(MMAX * KMAX); + g_tbls = malloc(KMAX * TEST_SOURCES * 32); + if (encode_matrix == NULL || decode_matrix == NULL + || invert_matrix == NULL || g_tbls == NULL) { + printf("Test failure! Error with malloc\n"); + return -1; + } + // Pick a first test + m = 9; + k = 5; + if (m > MMAX || k > KMAX) + return -1; + + // Make random data + for (i = 0; i < k; i++) + for (j = 0; j < TEST_LEN; j++) + buffs[i][j] = rand(); + + // Generate encode matrix encode_matrix + // The matrix generated by gf_gen_rs_matrix + // is not always invertable. + gf_gen_rs_matrix(encode_matrix, m, k); + + // Generate g_tbls from encode matrix encode_matrix + ec_init_tables(k, m - k, &encode_matrix[k * k], g_tbls); + + // Perform matrix dot_prod for EC encoding + // using g_tbls from encode matrix encode_matrix + ec_encode_data_base(TEST_LEN, k, m - k, g_tbls, buffs, &buffs[k]); + + // Choose random buffers to be in erasure + memset(src_in_err, 0, TEST_SOURCES); + gen_err_list(src_err_list, src_in_err, &nerrs, &nsrcerrs, k, m); + + // Generate decode matrix + re = gf_gen_decode_matrix(encode_matrix, decode_matrix, + invert_matrix, decode_index, src_err_list, src_in_err, + nerrs, nsrcerrs, k, m); + if (re != 0) { + printf("Fail to gf_gen_decode_matrix\n"); + return -1; + } + // Pack recovery array as list of valid sources + // Its order must be the same as the order + // to generate matrix b in gf_gen_decode_matrix + for (i = 0; i < k; i++) { + recov[i] = buffs[decode_index[i]]; + } + + // Recover data + ec_init_tables(k, nerrs, decode_matrix, g_tbls); + ec_encode_data_base(TEST_LEN, k, nerrs, g_tbls, recov, &temp_buffs[k]); + for (i = 0; i < nerrs; i++) { + + if (0 != memcmp(temp_buffs[k + i], buffs[src_err_list[i]], TEST_LEN)) { + printf("Fail error recovery (%d, %d, %d)\n", m, k, nerrs); + printf(" - erase list = "); + for (j = 0; j < nerrs; j++) + printf(" %d", src_err_list[j]); + printf(" - Index = "); + for (p = 0; p < k; p++) + printf(" %d", decode_index[p]); + printf("\nencode_matrix:\n"); + dump_u8xu8((u8 *) encode_matrix, m, k); + printf("inv b:\n"); + dump_u8xu8((u8 *) invert_matrix, k, k); + printf("\ndecode_matrix:\n"); + dump_u8xu8((u8 *) decode_matrix, m, k); + printf("recov %d:", src_err_list[i]); + dump(temp_buffs[k + i], 25); + printf("orig :"); + dump(buffs[src_err_list[i]], 25); + return -1; + } + } + + // Pick a first test + m = 9; + k = 5; + if (m > MMAX || k > KMAX) + return -1; + + // Make random data + for (i = 0; i < k; i++) + for (j = 0; j < TEST_LEN; j++) + buffs[i][j] = rand(); + + // The matrix generated by gf_gen_cauchy1_matrix + // is always invertable. + gf_gen_cauchy1_matrix(encode_matrix, m, k); + + // Generate g_tbls from encode matrix encode_matrix + ec_init_tables(k, m - k, &encode_matrix[k * k], g_tbls); + + // Perform matrix dot_prod for EC encoding + // using g_tbls from encode matrix encode_matrix + ec_encode_data_base(TEST_LEN, k, m - k, g_tbls, buffs, &buffs[k]); + + // Choose random buffers to be in erasure + memset(src_in_err, 0, TEST_SOURCES); + gen_err_list(src_err_list, src_in_err, &nerrs, &nsrcerrs, k, m); + + // Generate decode matrix + re = gf_gen_decode_matrix(encode_matrix, decode_matrix, + invert_matrix, decode_index, src_err_list, src_in_err, + nerrs, nsrcerrs, k, m); + if (re != 0) { + printf("Fail to gf_gen_decode_matrix\n"); + return -1; + } + // Pack recovery array as list of valid sources + // Its order must be the same as the order + // to generate matrix b in gf_gen_decode_matrix + for (i = 0; i < k; i++) { + recov[i] = buffs[decode_index[i]]; + } + + // Recover data + ec_init_tables(k, nerrs, decode_matrix, g_tbls); + ec_encode_data_base(TEST_LEN, k, nerrs, g_tbls, recov, &temp_buffs[k]); + for (i = 0; i < nerrs; i++) { + + if (0 != memcmp(temp_buffs[k + i], buffs[src_err_list[i]], TEST_LEN)) { + printf("Fail error recovery (%d, %d, %d)\n", m, k, nerrs); + printf(" - erase list = "); + for (j = 0; j < nerrs; j++) + printf(" %d", src_err_list[j]); + printf(" - Index = "); + for (p = 0; p < k; p++) + printf(" %d", decode_index[p]); + printf("\nencode_matrix:\n"); + dump_u8xu8((u8 *) encode_matrix, m, k); + printf("inv b:\n"); + dump_u8xu8((u8 *) invert_matrix, k, k); + printf("\ndecode_matrix:\n"); + dump_u8xu8((u8 *) decode_matrix, m, k); + printf("recov %d:", src_err_list[i]); + dump(temp_buffs[k + i], 25); + printf("orig :"); + dump(buffs[src_err_list[i]], 25); + return -1; + } + } + + // Do more random tests + for (rtest = 0; rtest < RANDOMS; rtest++) { + while ((m = (rand() % MMAX)) < 2) ; + while ((k = (rand() % KMAX)) >= m || k < 1) ; + + if (m > MMAX || k > KMAX) + continue; + + // Make random data + for (i = 0; i < k; i++) + for (j = 0; j < TEST_LEN; j++) + buffs[i][j] = rand(); + + // The matrix generated by gf_gen_cauchy1_matrix + // is always invertable. + gf_gen_cauchy1_matrix(encode_matrix, m, k); + + // Make parity vects + // Generate g_tbls from encode matrix a + ec_init_tables(k, m - k, &encode_matrix[k * k], g_tbls); + // Perform matrix dot_prod for EC encoding + // using g_tbls from encode matrix a + ec_encode_data_base(TEST_LEN, k, m - k, g_tbls, buffs, &buffs[k]); + + // Random errors + memset(src_in_err, 0, TEST_SOURCES); + gen_err_list(src_err_list, src_in_err, &nerrs, &nsrcerrs, k, m); + + // Generate decode matrix + re = gf_gen_decode_matrix(encode_matrix, decode_matrix, + invert_matrix, decode_index, src_err_list, + src_in_err, nerrs, nsrcerrs, k, m); + if (re != 0) { + printf("Fail to gf_gen_decode_matrix\n"); + return -1; + } + // Pack recovery array as list of valid sources + // Its order must be the same as the order + // to generate matrix b in gf_gen_decode_matrix + for (i = 0; i < k; i++) { + recov[i] = buffs[decode_index[i]]; + } + + // Recover data + ec_init_tables(k, nerrs, decode_matrix, g_tbls); + ec_encode_data_base(TEST_LEN, k, nerrs, g_tbls, recov, &temp_buffs[k]); + + for (i = 0; i < nerrs; i++) { + + if (0 != memcmp(temp_buffs[k + i], buffs[src_err_list[i]], TEST_LEN)) { + printf("Fail error recovery (%d, %d, %d) - ", m, k, nerrs); + printf(" - erase list = "); + for (j = 0; j < nerrs; j++) + printf(" %d", src_err_list[j]); + printf(" - Index = "); + for (p = 0; p < k; p++) + printf(" %d", decode_index[p]); + printf("\nencode_matrix:\n"); + dump_u8xu8((u8 *) encode_matrix, m, k); + printf("inv b:\n"); + dump_u8xu8((u8 *) invert_matrix, k, k); + printf("\ndecode_matrix:\n"); + dump_u8xu8((u8 *) decode_matrix, m, k); + printf("orig data:\n"); + dump_matrix(buffs, m, 25); + printf("orig :"); + dump(buffs[src_err_list[i]], 25); + printf("recov %d:", src_err_list[i]); + dump(temp_buffs[k + i], 25); + return -1; + } + } + putchar('.'); + } + + // Run tests at end of buffer for Electric Fence + k = 16; + align = (LEN_ALIGN_CHK_B != 0) ? 1 : 16; + if (k > KMAX) + return -1; + + for (rows = 1; rows <= 16; rows++) { + m = k + rows; + if (m > MMAX) + return -1; + + // Make random data + for (i = 0; i < k; i++) + for (j = 0; j < TEST_LEN; j++) + buffs[i][j] = rand(); + + for (size = EFENCE_TEST_MIN_SIZE; size <= TEST_SIZE; size += align) { + for (i = 0; i < m; i++) { // Line up TEST_SIZE from end + efence_buffs[i] = buffs[i] + TEST_LEN - size; + } + + // The matrix generated by gf_gen_cauchy1_matrix + // is always invertable. + gf_gen_cauchy1_matrix(encode_matrix, m, k); + + // Make parity vects + // Generate g_tbls from encode matrix a + ec_init_tables(k, m - k, &encode_matrix[k * k], g_tbls); + // Perform matrix dot_prod for EC encoding + // using g_tbls from encode matrix a + ec_encode_data_base(size, k, m - k, g_tbls, efence_buffs, + &efence_buffs[k]); + + // Random errors + memset(src_in_err, 0, TEST_SOURCES); + gen_err_list(src_err_list, src_in_err, &nerrs, &nsrcerrs, k, m); + + // Generate decode matrix + re = gf_gen_decode_matrix(encode_matrix, decode_matrix, + invert_matrix, decode_index, src_err_list, + src_in_err, nerrs, nsrcerrs, k, m); + if (re != 0) { + printf("Fail to gf_gen_decode_matrix\n"); + return -1; + } + // Pack recovery array as list of valid sources + // Its order must be the same as the order + // to generate matrix b in gf_gen_decode_matrix + for (i = 0; i < k; i++) { + recov[i] = efence_buffs[decode_index[i]]; + } + + // Recover data + ec_init_tables(k, nerrs, decode_matrix, g_tbls); + ec_encode_data_base(size, k, nerrs, g_tbls, recov, &temp_buffs[k]); + + for (i = 0; i < nerrs; i++) { + + if (0 != + memcmp(temp_buffs[k + i], efence_buffs[src_err_list[i]], + size)) { + printf("Efence: Fail error recovery (%d, %d, %d)\n", m, + k, nerrs); + + printf("size = %d\n", size); + + printf("Test erase list = "); + for (j = 0; j < nerrs; j++) + printf(" %d", src_err_list[j]); + printf(" - Index = "); + for (p = 0; p < k; p++) + printf(" %d", decode_index[p]); + printf("\nencode_matrix:\n"); + dump_u8xu8((u8 *) encode_matrix, m, k); + printf("inv b:\n"); + dump_u8xu8((u8 *) invert_matrix, k, k); + printf("\ndecode_matrix:\n"); + dump_u8xu8((u8 *) decode_matrix, m, k); + + printf("recov %d:", src_err_list[i]); + dump(temp_buffs[k + i], align); + printf("orig :"); + dump(efence_buffs[src_err_list[i]], align); + return -1; + } + } + } + + } + + // Test rand ptr alignment if available + + for (rtest = 0; rtest < RANDOMS; rtest++) { + while ((m = (rand() % MMAX)) < 2) ; + while ((k = (rand() % KMAX)) >= m || k < 1) ; + + if (m > MMAX || k > KMAX) + continue; + + size = (TEST_LEN - PTR_ALIGN_CHK_B) & ~15; + + offset = (PTR_ALIGN_CHK_B != 0) ? 1 : PTR_ALIGN_CHK_B; + // Add random offsets + for (i = 0; i < m; i++) { + memset(buffs[i], 0, TEST_LEN); // zero pad to check write-over + memset(temp_buffs[i], 0, TEST_LEN); // zero pad to check write-over + ubuffs[i] = buffs[i] + (rand() & (PTR_ALIGN_CHK_B - offset)); + temp_ubuffs[i] = temp_buffs[i] + (rand() & (PTR_ALIGN_CHK_B - offset)); + } + + for (i = 0; i < k; i++) + for (j = 0; j < size; j++) + ubuffs[i][j] = rand(); + + // The matrix generated by gf_gen_cauchy1_matrix + // is always invertable. + gf_gen_cauchy1_matrix(encode_matrix, m, k); + + // Make parity vects + // Generate g_tbls from encode matrix a + ec_init_tables(k, m - k, &encode_matrix[k * k], g_tbls); + // Perform matrix dot_prod for EC encoding + // using g_tbls from encode matrix a + ec_encode_data_base(size, k, m - k, g_tbls, ubuffs, &ubuffs[k]); + + // Random errors + memset(src_in_err, 0, TEST_SOURCES); + gen_err_list(src_err_list, src_in_err, &nerrs, &nsrcerrs, k, m); + + // Generate decode matrix + re = gf_gen_decode_matrix(encode_matrix, decode_matrix, + invert_matrix, decode_index, src_err_list, + src_in_err, nerrs, nsrcerrs, k, m); + if (re != 0) { + printf("Fail to gf_gen_decode_matrix\n"); + return -1; + } + // Pack recovery array as list of valid sources + // Its order must be the same as the order + // to generate matrix b in gf_gen_decode_matrix + for (i = 0; i < k; i++) { + recov[i] = ubuffs[decode_index[i]]; + } + + // Recover data + ec_init_tables(k, nerrs, decode_matrix, g_tbls); + ec_encode_data_base(size, k, nerrs, g_tbls, recov, &temp_ubuffs[k]); + + for (i = 0; i < nerrs; i++) { + + if (0 != memcmp(temp_ubuffs[k + i], ubuffs[src_err_list[i]], size)) { + printf("Fail error recovery (%d, %d, %d) - ", m, k, nerrs); + printf(" - erase list = "); + for (j = 0; j < nerrs; j++) + printf(" %d", src_err_list[j]); + printf(" - Index = "); + for (p = 0; p < k; p++) + printf(" %d", decode_index[p]); + printf("\nencode_matrix:\n"); + dump_u8xu8((unsigned char *)encode_matrix, m, k); + printf("inv b:\n"); + dump_u8xu8((unsigned char *)invert_matrix, k, k); + printf("\ndecode_matrix:\n"); + dump_u8xu8((unsigned char *)decode_matrix, m, k); + printf("orig data:\n"); + dump_matrix(ubuffs, m, 25); + printf("orig :"); + dump(ubuffs[src_err_list[i]], 25); + printf("recov %d:", src_err_list[i]); + dump(temp_ubuffs[k + i], 25); + return -1; + } + } + + // Confirm that padding around dests is unchanged + memset(temp_buffs[0], 0, PTR_ALIGN_CHK_B); // Make reference zero buff + + for (i = 0; i < m; i++) { + + offset = ubuffs[i] - buffs[i]; + + if (memcmp(buffs[i], temp_buffs[0], offset)) { + printf("Fail rand ualign encode pad start\n"); + return -1; + } + if (memcmp + (buffs[i] + offset + size, temp_buffs[0], + PTR_ALIGN_CHK_B - offset)) { + printf("Fail rand ualign encode pad end\n"); + return -1; + } + } + + for (i = 0; i < nerrs; i++) { + + offset = temp_ubuffs[k + i] - temp_buffs[k + i]; + if (memcmp(temp_buffs[k + i], temp_buffs[0], offset)) { + printf("Fail rand ualign decode pad start\n"); + return -1; + } + if (memcmp + (temp_buffs[k + i] + offset + size, temp_buffs[0], + PTR_ALIGN_CHK_B - offset)) { + printf("Fail rand ualign decode pad end\n"); + return -1; + } + } + + putchar('.'); + } + + // Test size alignment + + align = (LEN_ALIGN_CHK_B != 0) ? 13 : 16; + + for (size = TEST_LEN; size > 0; size -= align) { + while ((m = (rand() % MMAX)) < 2) ; + while ((k = (rand() % KMAX)) >= m || k < 1) ; + + if (m > MMAX || k > KMAX) + continue; + + for (i = 0; i < k; i++) + for (j = 0; j < size; j++) + buffs[i][j] = rand(); + + // The matrix generated by gf_gen_cauchy1_matrix + // is always invertable. + gf_gen_cauchy1_matrix(encode_matrix, m, k); + + // Make parity vects + // Generate g_tbls from encode matrix a + ec_init_tables(k, m - k, &encode_matrix[k * k], g_tbls); + // Perform matrix dot_prod for EC encoding + // using g_tbls from encode matrix a + ec_encode_data_base(size, k, m - k, g_tbls, buffs, &buffs[k]); + + // Random errors + memset(src_in_err, 0, TEST_SOURCES); + gen_err_list(src_err_list, src_in_err, &nerrs, &nsrcerrs, k, m); + // Generate decode matrix + re = gf_gen_decode_matrix(encode_matrix, decode_matrix, + invert_matrix, decode_index, src_err_list, + src_in_err, nerrs, nsrcerrs, k, m); + if (re != 0) { + printf("Fail to gf_gen_decode_matrix\n"); + return -1; + } + // Pack recovery array as list of valid sources + // Its order must be the same as the order + // to generate matrix b in gf_gen_decode_matrix + for (i = 0; i < k; i++) { + recov[i] = buffs[decode_index[i]]; + } + + // Recover data + ec_init_tables(k, nerrs, decode_matrix, g_tbls); + ec_encode_data_base(size, k, nerrs, g_tbls, recov, &temp_buffs[k]); + + for (i = 0; i < nerrs; i++) { + + if (0 != memcmp(temp_buffs[k + i], buffs[src_err_list[i]], size)) { + printf("Fail error recovery (%d, %d, %d) - ", m, k, nerrs); + printf(" - erase list = "); + for (j = 0; j < nerrs; j++) + printf(" %d", src_err_list[j]); + printf(" - Index = "); + for (p = 0; p < k; p++) + printf(" %d", decode_index[p]); + printf("\nencode_matrix:\n"); + dump_u8xu8((unsigned char *)encode_matrix, m, k); + printf("inv b:\n"); + dump_u8xu8((unsigned char *)invert_matrix, k, k); + printf("\ndecode_matrix:\n"); + dump_u8xu8((unsigned char *)decode_matrix, m, k); + printf("orig data:\n"); + dump_matrix(buffs, m, 25); + printf("orig :"); + dump(buffs[src_err_list[i]], 25); + printf("recov %d:", src_err_list[i]); + dump(temp_buffs[k + i], 25); + return -1; + } + } + } + + printf("done EC tests: Pass\n"); + return 0; +} diff --git a/src/spdk/isa-l/erasure_code/erasure_code_perf.c b/src/spdk/isa-l/erasure_code/erasure_code_perf.c new file mode 100644 index 000000000..c4cad880f --- /dev/null +++ b/src/spdk/isa-l/erasure_code/erasure_code_perf.c @@ -0,0 +1,176 @@ +/********************************************************************** + Copyright(c) 2011-2015 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#include +#include +#include // for memset, memcmp +#include "erasure_code.h" +#include "test.h" + +//#define CACHED_TEST +#ifdef CACHED_TEST +// Cached test, loop many times over small dataset +# define TEST_SOURCES 32 +# define TEST_LEN(m) ((128*1024 / m) & ~(64-1)) +# define TEST_TYPE_STR "_warm" +#else +# ifndef TEST_CUSTOM +// Uncached test. Pull from large mem base. +# define TEST_SOURCES 32 +# define GT_L3_CACHE 32*1024*1024 /* some number > last level cache */ +# define TEST_LEN(m) ((GT_L3_CACHE / m) & ~(64-1)) +# define TEST_TYPE_STR "_cold" +# else +# define TEST_TYPE_STR "_cus" +# endif +#endif + +#define MMAX TEST_SOURCES +#define KMAX TEST_SOURCES + +#define BAD_MATRIX -1 + +typedef unsigned char u8; + +void ec_encode_perf(int m, int k, u8 * a, u8 * g_tbls, u8 ** buffs) +{ + ec_init_tables(k, m - k, &a[k * k], g_tbls); + ec_encode_data(TEST_LEN(m), k, m - k, g_tbls, buffs, &buffs[k]); +} + +int ec_decode_perf(int m, int k, u8 * a, u8 * g_tbls, u8 ** buffs, u8 * src_in_err, + u8 * src_err_list, int nerrs, u8 ** temp_buffs) +{ + int i, j, r; + u8 b[MMAX * KMAX], c[MMAX * KMAX], d[MMAX * KMAX]; + u8 *recov[TEST_SOURCES]; + + // Construct b by removing error rows + for (i = 0, r = 0; i < k; i++, r++) { + while (src_in_err[r]) + r++; + recov[i] = buffs[r]; + for (j = 0; j < k; j++) + b[k * i + j] = a[k * r + j]; + } + + if (gf_invert_matrix(b, d, k) < 0) + return BAD_MATRIX; + + for (i = 0; i < nerrs; i++) + for (j = 0; j < k; j++) + c[k * i + j] = d[k * src_err_list[i] + j]; + + // Recover data + ec_init_tables(k, nerrs, c, g_tbls); + ec_encode_data(TEST_LEN(m), k, nerrs, g_tbls, recov, temp_buffs); + + return 0; +} + +int main(int argc, char *argv[]) +{ + int i, j, m, k, nerrs, check; + void *buf; + u8 *temp_buffs[TEST_SOURCES], *buffs[TEST_SOURCES]; + u8 a[MMAX * KMAX]; + u8 g_tbls[KMAX * TEST_SOURCES * 32], src_in_err[TEST_SOURCES]; + u8 src_err_list[TEST_SOURCES]; + struct perf start; + + // Pick test parameters + m = 14; + k = 10; + nerrs = 4; + const u8 err_list[] = { 2, 4, 5, 7 }; + + printf("erasure_code_perf: %dx%d %d\n", m, TEST_LEN(m), nerrs); + + if (m > MMAX || k > KMAX || nerrs > (m - k)) { + printf(" Input test parameter error\n"); + return -1; + } + + memcpy(src_err_list, err_list, nerrs); + memset(src_in_err, 0, TEST_SOURCES); + for (i = 0; i < nerrs; i++) + src_in_err[src_err_list[i]] = 1; + + // Allocate the arrays + for (i = 0; i < m; i++) { + if (posix_memalign(&buf, 64, TEST_LEN(m))) { + printf("alloc error: Fail\n"); + return -1; + } + buffs[i] = buf; + } + + for (i = 0; i < (m - k); i++) { + if (posix_memalign(&buf, 64, TEST_LEN(m))) { + printf("alloc error: Fail\n"); + return -1; + } + temp_buffs[i] = buf; + } + + // Make random data + for (i = 0; i < k; i++) + for (j = 0; j < TEST_LEN(m); j++) + buffs[i][j] = rand(); + + gf_gen_rs_matrix(a, m, k); + + // Start encode test + BENCHMARK(&start, BENCHMARK_TIME, ec_encode_perf(m, k, a, g_tbls, buffs)); + printf("erasure_code_encode" TEST_TYPE_STR ": "); + perf_print(start, (long long)(TEST_LEN(m)) * (m)); + + // Start decode test + BENCHMARK(&start, BENCHMARK_TIME, check = + ec_decode_perf(m, k, a, g_tbls, buffs, src_in_err, src_err_list, nerrs, + temp_buffs)); + + if (check == BAD_MATRIX) { + printf("BAD MATRIX\n"); + return check; + } + + for (i = 0; i < nerrs; i++) { + if (0 != memcmp(temp_buffs[i], buffs[src_err_list[i]], TEST_LEN(m))) { + printf("Fail error recovery (%d, %d, %d) - ", m, k, nerrs); + return -1; + } + } + + printf("erasure_code_decode" TEST_TYPE_STR ": "); + perf_print(start, (long long)(TEST_LEN(m)) * (k + nerrs)); + + printf("done all: Pass\n"); + return 0; +} diff --git a/src/spdk/isa-l/erasure_code/erasure_code_test.c b/src/spdk/isa-l/erasure_code/erasure_code_test.c new file mode 100644 index 000000000..a1736afd5 --- /dev/null +++ b/src/spdk/isa-l/erasure_code/erasure_code_test.c @@ -0,0 +1,764 @@ +/********************************************************************** + Copyright(c) 2011-2015 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#include +#include +#include // for memset, memcmp +#include "erasure_code.h" +#include "types.h" + +#define TEST_LEN 8192 +#define TEST_SIZE (TEST_LEN/2) + +#ifndef TEST_SOURCES +# define TEST_SOURCES 127 +#endif +#ifndef RANDOMS +# define RANDOMS 200 +#endif + +#define MMAX TEST_SOURCES +#define KMAX TEST_SOURCES + +#define EFENCE_TEST_MIN_SIZE 16 +#define EFENCE_TEST_MAX_SIZE EFENCE_TEST_MIN_SIZE + 0x100 + +#ifdef EC_ALIGNED_ADDR +// Define power of 2 range to check ptr, len alignment +# define PTR_ALIGN_CHK_B 0 +# define LEN_ALIGN_CHK_B 0 // 0 for aligned only +#else +// Define power of 2 range to check ptr, len alignment +# define PTR_ALIGN_CHK_B 32 +# define LEN_ALIGN_CHK_B 32 // 0 for aligned only +#endif + +#ifndef TEST_SEED +#define TEST_SEED 11 +#endif + +typedef unsigned char u8; + +void dump(unsigned char *buf, int len) +{ + int i; + for (i = 0; i < len;) { + printf(" %2x", 0xff & buf[i++]); + if (i % 32 == 0) + printf("\n"); + } + printf("\n"); +} + +void dump_matrix(unsigned char **s, int k, int m) +{ + int i, j; + for (i = 0; i < k; i++) { + for (j = 0; j < m; j++) { + printf(" %2x", s[i][j]); + } + printf("\n"); + } + printf("\n"); +} + +void dump_u8xu8(unsigned char *s, int k, int m) +{ + int i, j; + for (i = 0; i < k; i++) { + for (j = 0; j < m; j++) { + printf(" %2x", 0xff & s[j + (i * m)]); + } + printf("\n"); + } + printf("\n"); +} + +// Generate Random errors +static void gen_err_list(unsigned char *src_err_list, + unsigned char *src_in_err, int *pnerrs, int *pnsrcerrs, int k, int m) +{ + int i, err; + int nerrs = 0, nsrcerrs = 0; + + for (i = 0, nerrs = 0, nsrcerrs = 0; i < m && nerrs < m - k; i++) { + err = 1 & rand(); + src_in_err[i] = err; + if (err) { + src_err_list[nerrs++] = i; + if (i < k) { + nsrcerrs++; + } + } + } + if (nerrs == 0) { // should have at least one error + while ((err = (rand() % KMAX)) >= m) ; + src_err_list[nerrs++] = err; + src_in_err[err] = 1; + if (err < k) + nsrcerrs = 1; + } + *pnerrs = nerrs; + *pnsrcerrs = nsrcerrs; + return; +} + +#define NO_INVERT_MATRIX -2 +// Generate decode matrix from encode matrix +static int gf_gen_decode_matrix(unsigned char *encode_matrix, + unsigned char *decode_matrix, + unsigned char *invert_matrix, + unsigned int *decode_index, + unsigned char *src_err_list, + unsigned char *src_in_err, + int nerrs, int nsrcerrs, int k, int m) +{ + int i, j, p; + int r; + unsigned char *backup, *b, s; + int incr = 0; + + b = malloc(MMAX * KMAX); + backup = malloc(MMAX * KMAX); + + if (b == NULL || backup == NULL) { + printf("Test failure! Error with malloc\n"); + free(b); + free(backup); + return -1; + } + // Construct matrix b by removing error rows + for (i = 0, r = 0; i < k; i++, r++) { + while (src_in_err[r]) + r++; + for (j = 0; j < k; j++) { + b[k * i + j] = encode_matrix[k * r + j]; + backup[k * i + j] = encode_matrix[k * r + j]; + } + decode_index[i] = r; + } + incr = 0; + while (gf_invert_matrix(b, invert_matrix, k) < 0) { + if (nerrs == (m - k)) { + free(b); + free(backup); + printf("BAD MATRIX\n"); + return NO_INVERT_MATRIX; + } + incr++; + memcpy(b, backup, MMAX * KMAX); + for (i = nsrcerrs; i < nerrs - nsrcerrs; i++) { + if (src_err_list[i] == (decode_index[k - 1] + incr)) { + // skip the erased parity line + incr++; + continue; + } + } + if (decode_index[k - 1] + incr >= m) { + free(b); + free(backup); + printf("BAD MATRIX\n"); + return NO_INVERT_MATRIX; + } + decode_index[k - 1] += incr; + for (j = 0; j < k; j++) + b[k * (k - 1) + j] = encode_matrix[k * decode_index[k - 1] + j]; + + }; + + for (i = 0; i < nsrcerrs; i++) { + for (j = 0; j < k; j++) { + decode_matrix[k * i + j] = invert_matrix[k * src_err_list[i] + j]; + } + } + /* src_err_list from encode_matrix * invert of b for parity decoding */ + for (p = nsrcerrs; p < nerrs; p++) { + for (i = 0; i < k; i++) { + s = 0; + for (j = 0; j < k; j++) + s ^= gf_mul(invert_matrix[j * k + i], + encode_matrix[k * src_err_list[p] + j]); + + decode_matrix[k * p + i] = s; + } + } + free(b); + free(backup); + return 0; +} + +int main(int argc, char *argv[]) +{ + int re = 0; + int i, j, p, rtest, m, k; + int nerrs, nsrcerrs; + void *buf; + unsigned int decode_index[MMAX]; + unsigned char *temp_buffs[TEST_SOURCES], *buffs[TEST_SOURCES]; + unsigned char *encode_matrix, *decode_matrix, *invert_matrix, *g_tbls; + unsigned char src_in_err[TEST_SOURCES], src_err_list[TEST_SOURCES]; + unsigned char *recov[TEST_SOURCES]; + + int rows, align, size; + unsigned char *efence_buffs[TEST_SOURCES]; + unsigned int offset; + u8 *ubuffs[TEST_SOURCES]; + u8 *temp_ubuffs[TEST_SOURCES]; + + printf("erasure_code_test: %dx%d ", TEST_SOURCES, TEST_LEN); + srand(TEST_SEED); + + // Allocate the arrays + for (i = 0; i < TEST_SOURCES; i++) { + if (posix_memalign(&buf, 64, TEST_LEN)) { + printf("alloc error: Fail"); + return -1; + } + buffs[i] = buf; + } + + for (i = 0; i < TEST_SOURCES; i++) { + if (posix_memalign(&buf, 64, TEST_LEN)) { + printf("alloc error: Fail"); + return -1; + } + temp_buffs[i] = buf; + } + + // Test erasure code by encode and recovery + + encode_matrix = malloc(MMAX * KMAX); + decode_matrix = malloc(MMAX * KMAX); + invert_matrix = malloc(MMAX * KMAX); + g_tbls = malloc(KMAX * TEST_SOURCES * 32); + if (encode_matrix == NULL || decode_matrix == NULL + || invert_matrix == NULL || g_tbls == NULL) { + printf("Test failure! Error with malloc\n"); + return -1; + } + // Pick a first test + m = 9; + k = 5; + if (m > MMAX || k > KMAX) + return -1; + + // Make random data + for (i = 0; i < k; i++) + for (j = 0; j < TEST_LEN; j++) + buffs[i][j] = rand(); + + // Generate encode matrix encode_matrix + // The matrix generated by gf_gen_rs_matrix + // is not always invertable. + gf_gen_rs_matrix(encode_matrix, m, k); + + // Generate g_tbls from encode matrix encode_matrix + ec_init_tables(k, m - k, &encode_matrix[k * k], g_tbls); + + // Perform matrix dot_prod for EC encoding + // using g_tbls from encode matrix encode_matrix + ec_encode_data(TEST_LEN, k, m - k, g_tbls, buffs, &buffs[k]); + + // Choose random buffers to be in erasure + memset(src_in_err, 0, TEST_SOURCES); + gen_err_list(src_err_list, src_in_err, &nerrs, &nsrcerrs, k, m); + + // Generate decode matrix + re = gf_gen_decode_matrix(encode_matrix, decode_matrix, + invert_matrix, decode_index, src_err_list, src_in_err, + nerrs, nsrcerrs, k, m); + if (re != 0) { + printf("Fail to gf_gen_decode_matrix\n"); + return -1; + } + // Pack recovery array as list of valid sources + // Its order must be the same as the order + // to generate matrix b in gf_gen_decode_matrix + for (i = 0; i < k; i++) { + recov[i] = buffs[decode_index[i]]; + } + + // Recover data + ec_init_tables(k, nerrs, decode_matrix, g_tbls); + ec_encode_data(TEST_LEN, k, nerrs, g_tbls, recov, &temp_buffs[k]); + for (i = 0; i < nerrs; i++) { + + if (0 != memcmp(temp_buffs[k + i], buffs[src_err_list[i]], TEST_LEN)) { + printf("Fail error recovery (%d, %d, %d)\n", m, k, nerrs); + printf(" - erase list = "); + for (j = 0; j < nerrs; j++) + printf(" %d", src_err_list[j]); + printf(" - Index = "); + for (p = 0; p < k; p++) + printf(" %d", decode_index[p]); + printf("\nencode_matrix:\n"); + dump_u8xu8((u8 *) encode_matrix, m, k); + printf("inv b:\n"); + dump_u8xu8((u8 *) invert_matrix, k, k); + printf("\ndecode_matrix:\n"); + dump_u8xu8((u8 *) decode_matrix, m, k); + printf("recov %d:", src_err_list[i]); + dump(temp_buffs[k + i], 25); + printf("orig :"); + dump(buffs[src_err_list[i]], 25); + return -1; + } + } + + // Pick a first test + m = 9; + k = 5; + if (m > MMAX || k > KMAX) + return -1; + + // Make random data + for (i = 0; i < k; i++) + for (j = 0; j < TEST_LEN; j++) + buffs[i][j] = rand(); + + // The matrix generated by gf_gen_cauchy1_matrix + // is always invertable. + gf_gen_cauchy1_matrix(encode_matrix, m, k); + + // Generate g_tbls from encode matrix encode_matrix + ec_init_tables(k, m - k, &encode_matrix[k * k], g_tbls); + + // Perform matrix dot_prod for EC encoding + // using g_tbls from encode matrix encode_matrix + ec_encode_data(TEST_LEN, k, m - k, g_tbls, buffs, &buffs[k]); + + // Choose random buffers to be in erasure + memset(src_in_err, 0, TEST_SOURCES); + gen_err_list(src_err_list, src_in_err, &nerrs, &nsrcerrs, k, m); + + // Generate decode matrix + re = gf_gen_decode_matrix(encode_matrix, decode_matrix, + invert_matrix, decode_index, src_err_list, src_in_err, + nerrs, nsrcerrs, k, m); + if (re != 0) { + printf("Fail to gf_gen_decode_matrix\n"); + return -1; + } + // Pack recovery array as list of valid sources + // Its order must be the same as the order + // to generate matrix b in gf_gen_decode_matrix + for (i = 0; i < k; i++) { + recov[i] = buffs[decode_index[i]]; + } + + // Recover data + ec_init_tables(k, nerrs, decode_matrix, g_tbls); + ec_encode_data(TEST_LEN, k, nerrs, g_tbls, recov, &temp_buffs[k]); + for (i = 0; i < nerrs; i++) { + + if (0 != memcmp(temp_buffs[k + i], buffs[src_err_list[i]], TEST_LEN)) { + printf("Fail error recovery (%d, %d, %d)\n", m, k, nerrs); + printf(" - erase list = "); + for (j = 0; j < nerrs; j++) + printf(" %d", src_err_list[j]); + printf(" - Index = "); + for (p = 0; p < k; p++) + printf(" %d", decode_index[p]); + printf("\nencode_matrix:\n"); + dump_u8xu8((u8 *) encode_matrix, m, k); + printf("inv b:\n"); + dump_u8xu8((u8 *) invert_matrix, k, k); + printf("\ndecode_matrix:\n"); + dump_u8xu8((u8 *) decode_matrix, m, k); + printf("recov %d:", src_err_list[i]); + dump(temp_buffs[k + i], 25); + printf("orig :"); + dump(buffs[src_err_list[i]], 25); + return -1; + } + } + + // Do more random tests + for (rtest = 0; rtest < RANDOMS; rtest++) { + while ((m = (rand() % MMAX)) < 2) ; + while ((k = (rand() % KMAX)) >= m || k < 1) ; + + if (m > MMAX || k > KMAX) + continue; + + // Make random data + for (i = 0; i < k; i++) + for (j = 0; j < TEST_LEN; j++) + buffs[i][j] = rand(); + + // The matrix generated by gf_gen_cauchy1_matrix + // is always invertable. + gf_gen_cauchy1_matrix(encode_matrix, m, k); + + // Make parity vects + // Generate g_tbls from encode matrix a + ec_init_tables(k, m - k, &encode_matrix[k * k], g_tbls); + // Perform matrix dot_prod for EC encoding + // using g_tbls from encode matrix a + ec_encode_data(TEST_LEN, k, m - k, g_tbls, buffs, &buffs[k]); + + // Random errors + memset(src_in_err, 0, TEST_SOURCES); + gen_err_list(src_err_list, src_in_err, &nerrs, &nsrcerrs, k, m); + + // Generate decode matrix + re = gf_gen_decode_matrix(encode_matrix, decode_matrix, + invert_matrix, decode_index, src_err_list, + src_in_err, nerrs, nsrcerrs, k, m); + if (re != 0) { + printf("Fail to gf_gen_decode_matrix\n"); + return -1; + } + // Pack recovery array as list of valid sources + // Its order must be the same as the order + // to generate matrix b in gf_gen_decode_matrix + for (i = 0; i < k; i++) { + recov[i] = buffs[decode_index[i]]; + } + + // Recover data + ec_init_tables(k, nerrs, decode_matrix, g_tbls); + ec_encode_data(TEST_LEN, k, nerrs, g_tbls, recov, &temp_buffs[k]); + + for (i = 0; i < nerrs; i++) { + + if (0 != memcmp(temp_buffs[k + i], buffs[src_err_list[i]], TEST_LEN)) { + printf("Fail error recovery (%d, %d, %d) - ", m, k, nerrs); + printf(" - erase list = "); + for (j = 0; j < nerrs; j++) + printf(" %d", src_err_list[j]); + printf(" - Index = "); + for (p = 0; p < k; p++) + printf(" %d", decode_index[p]); + printf("\nencode_matrix:\n"); + dump_u8xu8((u8 *) encode_matrix, m, k); + printf("inv b:\n"); + dump_u8xu8((u8 *) invert_matrix, k, k); + printf("\ndecode_matrix:\n"); + dump_u8xu8((u8 *) decode_matrix, m, k); + printf("orig data:\n"); + dump_matrix(buffs, m, 25); + printf("orig :"); + dump(buffs[src_err_list[i]], 25); + printf("recov %d:", src_err_list[i]); + dump(temp_buffs[k + i], 25); + return -1; + } + } + putchar('.'); + } + + // Run tests at end of buffer for Electric Fence + k = 16; + align = (LEN_ALIGN_CHK_B != 0) ? 1 : 16; + if (k > KMAX) + return -1; + + for (rows = 1; rows <= 16; rows++) { + m = k + rows; + if (m > MMAX) + return -1; + + // Make random data + for (i = 0; i < k; i++) + for (j = 0; j < TEST_LEN; j++) + buffs[i][j] = rand(); + + for (size = EFENCE_TEST_MIN_SIZE; size <= EFENCE_TEST_MAX_SIZE; size += align) { + for (i = 0; i < m; i++) { // Line up TEST_SIZE from end + efence_buffs[i] = buffs[i] + TEST_LEN - size; + } + + // The matrix generated by gf_gen_cauchy1_matrix + // is always invertable. + gf_gen_cauchy1_matrix(encode_matrix, m, k); + + // Make parity vects + // Generate g_tbls from encode matrix a + ec_init_tables(k, m - k, &encode_matrix[k * k], g_tbls); + // Perform matrix dot_prod for EC encoding + // using g_tbls from encode matrix a + ec_encode_data(size, k, m - k, g_tbls, efence_buffs, &efence_buffs[k]); + + // Random errors + memset(src_in_err, 0, TEST_SOURCES); + gen_err_list(src_err_list, src_in_err, &nerrs, &nsrcerrs, k, m); + + // Generate decode matrix + re = gf_gen_decode_matrix(encode_matrix, decode_matrix, + invert_matrix, decode_index, src_err_list, + src_in_err, nerrs, nsrcerrs, k, m); + if (re != 0) { + printf("Fail to gf_gen_decode_matrix\n"); + return -1; + } + // Pack recovery array as list of valid sources + // Its order must be the same as the order + // to generate matrix b in gf_gen_decode_matrix + for (i = 0; i < k; i++) { + recov[i] = efence_buffs[decode_index[i]]; + } + + // Recover data + ec_init_tables(k, nerrs, decode_matrix, g_tbls); + ec_encode_data(size, k, nerrs, g_tbls, recov, &temp_buffs[k]); + + for (i = 0; i < nerrs; i++) { + + if (0 != + memcmp(temp_buffs[k + i], efence_buffs[src_err_list[i]], + size)) { + printf("Efence: Fail error recovery (%d, %d, %d)\n", m, + k, nerrs); + + printf("size = %d\n", size); + + printf("Test erase list = "); + for (j = 0; j < nerrs; j++) + printf(" %d", src_err_list[j]); + printf(" - Index = "); + for (p = 0; p < k; p++) + printf(" %d", decode_index[p]); + printf("\nencode_matrix:\n"); + dump_u8xu8((u8 *) encode_matrix, m, k); + printf("inv b:\n"); + dump_u8xu8((u8 *) invert_matrix, k, k); + printf("\ndecode_matrix:\n"); + dump_u8xu8((u8 *) decode_matrix, m, k); + + printf("recov %d:", src_err_list[i]); + dump(temp_buffs[k + i], align); + printf("orig :"); + dump(efence_buffs[src_err_list[i]], align); + return -1; + } + } + } + + } + + // Test rand ptr alignment if available + + for (rtest = 0; rtest < RANDOMS; rtest++) { + while ((m = (rand() % MMAX)) < 2) ; + while ((k = (rand() % KMAX)) >= m || k < 1) ; + + if (m > MMAX || k > KMAX) + continue; + + size = (TEST_LEN - PTR_ALIGN_CHK_B) & ~15; + + offset = (PTR_ALIGN_CHK_B != 0) ? 1 : PTR_ALIGN_CHK_B; + // Add random offsets + for (i = 0; i < m; i++) { + memset(buffs[i], 0, TEST_LEN); // zero pad to check write-over + memset(temp_buffs[i], 0, TEST_LEN); // zero pad to check write-over + ubuffs[i] = buffs[i] + (rand() & (PTR_ALIGN_CHK_B - offset)); + temp_ubuffs[i] = temp_buffs[i] + (rand() & (PTR_ALIGN_CHK_B - offset)); + } + + for (i = 0; i < k; i++) + for (j = 0; j < size; j++) + ubuffs[i][j] = rand(); + + // The matrix generated by gf_gen_cauchy1_matrix + // is always invertable. + gf_gen_cauchy1_matrix(encode_matrix, m, k); + + // Make parity vects + // Generate g_tbls from encode matrix a + ec_init_tables(k, m - k, &encode_matrix[k * k], g_tbls); + // Perform matrix dot_prod for EC encoding + // using g_tbls from encode matrix a + ec_encode_data(size, k, m - k, g_tbls, ubuffs, &ubuffs[k]); + + // Random errors + memset(src_in_err, 0, TEST_SOURCES); + gen_err_list(src_err_list, src_in_err, &nerrs, &nsrcerrs, k, m); + + // Generate decode matrix + re = gf_gen_decode_matrix(encode_matrix, decode_matrix, + invert_matrix, decode_index, src_err_list, + src_in_err, nerrs, nsrcerrs, k, m); + if (re != 0) { + printf("Fail to gf_gen_decode_matrix\n"); + return -1; + } + // Pack recovery array as list of valid sources + // Its order must be the same as the order + // to generate matrix b in gf_gen_decode_matrix + for (i = 0; i < k; i++) { + recov[i] = ubuffs[decode_index[i]]; + } + + // Recover data + ec_init_tables(k, nerrs, decode_matrix, g_tbls); + ec_encode_data(size, k, nerrs, g_tbls, recov, &temp_ubuffs[k]); + + for (i = 0; i < nerrs; i++) { + + if (0 != memcmp(temp_ubuffs[k + i], ubuffs[src_err_list[i]], size)) { + printf("Fail error recovery (%d, %d, %d) - ", m, k, nerrs); + printf(" - erase list = "); + for (j = 0; j < nerrs; j++) + printf(" %d", src_err_list[j]); + printf(" - Index = "); + for (p = 0; p < k; p++) + printf(" %d", decode_index[p]); + printf("\nencode_matrix:\n"); + dump_u8xu8((unsigned char *)encode_matrix, m, k); + printf("inv b:\n"); + dump_u8xu8((unsigned char *)invert_matrix, k, k); + printf("\ndecode_matrix:\n"); + dump_u8xu8((unsigned char *)decode_matrix, m, k); + printf("orig data:\n"); + dump_matrix(ubuffs, m, 25); + printf("orig :"); + dump(ubuffs[src_err_list[i]], 25); + printf("recov %d:", src_err_list[i]); + dump(temp_ubuffs[k + i], 25); + return -1; + } + } + + // Confirm that padding around dests is unchanged + memset(temp_buffs[0], 0, PTR_ALIGN_CHK_B); // Make reference zero buff + + for (i = 0; i < m; i++) { + + offset = ubuffs[i] - buffs[i]; + + if (memcmp(buffs[i], temp_buffs[0], offset)) { + printf("Fail rand ualign encode pad start\n"); + return -1; + } + if (memcmp + (buffs[i] + offset + size, temp_buffs[0], + PTR_ALIGN_CHK_B - offset)) { + printf("Fail rand ualign encode pad end\n"); + return -1; + } + } + + for (i = 0; i < nerrs; i++) { + + offset = temp_ubuffs[k + i] - temp_buffs[k + i]; + if (memcmp(temp_buffs[k + i], temp_buffs[0], offset)) { + printf("Fail rand ualign decode pad start\n"); + return -1; + } + if (memcmp + (temp_buffs[k + i] + offset + size, temp_buffs[0], + PTR_ALIGN_CHK_B - offset)) { + printf("Fail rand ualign decode pad end\n"); + return -1; + } + } + + putchar('.'); + } + + // Test size alignment + + align = (LEN_ALIGN_CHK_B != 0) ? 13 : 16; + + for (size = TEST_LEN; size > 0; size -= align) { + while ((m = (rand() % MMAX)) < 2) ; + while ((k = (rand() % KMAX)) >= m || k < 1) ; + + if (m > MMAX || k > KMAX) + continue; + + for (i = 0; i < k; i++) + for (j = 0; j < size; j++) + buffs[i][j] = rand(); + + // The matrix generated by gf_gen_cauchy1_matrix + // is always invertable. + gf_gen_cauchy1_matrix(encode_matrix, m, k); + + // Make parity vects + // Generate g_tbls from encode matrix a + ec_init_tables(k, m - k, &encode_matrix[k * k], g_tbls); + // Perform matrix dot_prod for EC encoding + // using g_tbls from encode matrix a + ec_encode_data(size, k, m - k, g_tbls, buffs, &buffs[k]); + + // Random errors + memset(src_in_err, 0, TEST_SOURCES); + gen_err_list(src_err_list, src_in_err, &nerrs, &nsrcerrs, k, m); + // Generate decode matrix + re = gf_gen_decode_matrix(encode_matrix, decode_matrix, + invert_matrix, decode_index, src_err_list, + src_in_err, nerrs, nsrcerrs, k, m); + if (re != 0) { + printf("Fail to gf_gen_decode_matrix\n"); + return -1; + } + // Pack recovery array as list of valid sources + // Its order must be the same as the order + // to generate matrix b in gf_gen_decode_matrix + for (i = 0; i < k; i++) { + recov[i] = buffs[decode_index[i]]; + } + + // Recover data + ec_init_tables(k, nerrs, decode_matrix, g_tbls); + ec_encode_data(size, k, nerrs, g_tbls, recov, &temp_buffs[k]); + + for (i = 0; i < nerrs; i++) { + + if (0 != memcmp(temp_buffs[k + i], buffs[src_err_list[i]], size)) { + printf("Fail error recovery (%d, %d, %d) - ", m, k, nerrs); + printf(" - erase list = "); + for (j = 0; j < nerrs; j++) + printf(" %d", src_err_list[j]); + printf(" - Index = "); + for (p = 0; p < k; p++) + printf(" %d", decode_index[p]); + printf("\nencode_matrix:\n"); + dump_u8xu8((unsigned char *)encode_matrix, m, k); + printf("inv b:\n"); + dump_u8xu8((unsigned char *)invert_matrix, k, k); + printf("\ndecode_matrix:\n"); + dump_u8xu8((unsigned char *)decode_matrix, m, k); + printf("orig data:\n"); + dump_matrix(buffs, m, 25); + printf("orig :"); + dump(buffs[src_err_list[i]], 25); + printf("recov %d:", src_err_list[i]); + dump(temp_buffs[k + i], 25); + return -1; + } + } + } + + printf("done EC tests: Pass\n"); + return 0; +} diff --git a/src/spdk/isa-l/erasure_code/erasure_code_update_perf.c b/src/spdk/isa-l/erasure_code/erasure_code_update_perf.c new file mode 100644 index 000000000..909e89414 --- /dev/null +++ b/src/spdk/isa-l/erasure_code/erasure_code_update_perf.c @@ -0,0 +1,281 @@ +/********************************************************************** + Copyright(c) 2011-2015 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#include +#include +#include // for memset, memcmp +#include "erasure_code.h" +#include "types.h" +#include "test.h" + +//By default, test multibinary version +#ifndef FUNCTION_UNDER_TEST +# define FUNCTION_UNDER_TEST ec_encode_data_update +# define REF_FUNCTION ec_encode_data +#endif + +//By default, test EC(8+4) +#if (!defined(VECT)) +# define VECT 4 +#endif + +#define str(s) #s +#define xstr(s) str(s) + +//#define CACHED_TEST +#ifdef CACHED_TEST +// Cached test, loop many times over small dataset +# define TEST_SOURCES 32 +# define TEST_LEN(m) ((128*1024 / m) & ~(64-1)) +# define TEST_TYPE_STR "_warm" +#else +# ifndef TEST_CUSTOM +// Uncached test. Pull from large mem base. +# define TEST_SOURCES 32 +# define GT_L3_CACHE 32*1024*1024 /* some number > last level cache */ +# define TEST_LEN(m) ((GT_L3_CACHE / m) & ~(64-1)) +# define TEST_TYPE_STR "_cold" +# else +# define TEST_TYPE_STR "_cus" +# endif +#endif + +#define MMAX TEST_SOURCES +#define KMAX TEST_SOURCES + +typedef unsigned char u8; + +void dump(unsigned char *buf, int len) +{ + int i; + for (i = 0; i < len;) { + printf(" %2x", 0xff & buf[i++]); + if (i % 32 == 0) + printf("\n"); + } + printf("\n"); +} + +void encode_update_test_ref(int m, int k, u8 * g_tbls, u8 ** buffs, u8 * a) +{ + ec_init_tables(k, m - k, &a[k * k], g_tbls); + REF_FUNCTION(TEST_LEN(m), k, m - k, g_tbls, buffs, &buffs[k]); +} + +void encode_update_test(int m, int k, u8 * g_tbls, u8 ** perf_update_buffs, u8 * a) +{ + int i; + + // Make parity vects + ec_init_tables(k, m - k, &a[k * k], g_tbls); + for (i = 0; i < k; i++) { + FUNCTION_UNDER_TEST(TEST_LEN(m), k, m - k, i, g_tbls, + perf_update_buffs[i], &perf_update_buffs[k]); + } +} + +int decode_test(int m, int k, u8 ** update_buffs, u8 ** recov, u8 * a, u8 * src_in_err, + u8 * src_err_list, int nerrs, u8 * g_tbls, u8 ** perf_update_buffs) +{ + int i, j, r; + u8 b[MMAX * KMAX], c[MMAX * KMAX], d[MMAX * KMAX]; + // Construct b by removing error rows + for (i = 0, r = 0; i < k; i++, r++) { + while (src_in_err[r]) + r++; + recov[i] = update_buffs[r]; + for (j = 0; j < k; j++) + b[k * i + j] = a[k * r + j]; + } + + if (gf_invert_matrix(b, d, k) < 0) { + printf("BAD MATRIX\n"); + return -1; + } + + for (i = 0; i < nerrs; i++) + for (j = 0; j < k; j++) + c[k * i + j] = d[k * src_err_list[i] + j]; + + // Recover data + ec_init_tables(k, nerrs, c, g_tbls); + for (i = 0; i < k; i++) { + FUNCTION_UNDER_TEST(TEST_LEN(m), k, nerrs, i, g_tbls, recov[i], + perf_update_buffs); + } + return 0; +} + +int main(int argc, char *argv[]) +{ + int i, j, check, m, k, nerrs; + void *buf; + u8 *temp_buffs[TEST_SOURCES], *buffs[TEST_SOURCES]; + u8 *update_buffs[TEST_SOURCES]; + u8 *perf_update_buffs[TEST_SOURCES]; + u8 a[MMAX * KMAX]; + u8 g_tbls[KMAX * TEST_SOURCES * 32], src_in_err[TEST_SOURCES]; + u8 src_err_list[TEST_SOURCES], *recov[TEST_SOURCES]; + struct perf start; + + // Pick test parameters + k = 10; + m = k + VECT; + nerrs = VECT; + const u8 err_list[] = { 0, 2, 4, 5, 7, 8 }; + + printf(xstr(FUNCTION_UNDER_TEST) "_perf: %dx%d %d\n", m, TEST_LEN(m), nerrs); + + if (m > MMAX || k > KMAX || nerrs > (m - k)) { + printf(" Input test parameter error\n"); + return -1; + } + + memcpy(src_err_list, err_list, nerrs); + memset(src_in_err, 0, TEST_SOURCES); + for (i = 0; i < nerrs; i++) + src_in_err[src_err_list[i]] = 1; + + // Allocate the arrays + for (i = 0; i < m; i++) { + if (posix_memalign(&buf, 64, TEST_LEN(m))) { + printf("alloc error: Fail\n"); + return -1; + } + buffs[i] = buf; + } + + for (i = 0; i < (m - k); i++) { + if (posix_memalign(&buf, 64, TEST_LEN(m))) { + printf("alloc error: Fail\n"); + return -1; + } + temp_buffs[i] = buf; + memset(temp_buffs[i], 0, TEST_LEN(m)); // initialize the destination buffer to be zero for update function + } + + for (i = 0; i < TEST_SOURCES; i++) { + if (posix_memalign(&buf, 64, TEST_LEN(m))) { + printf("alloc error: Fail"); + return -1; + } + update_buffs[i] = buf; + memset(update_buffs[i], 0, TEST_LEN(m)); // initialize the destination buffer to be zero for update function + } + for (i = 0; i < TEST_SOURCES; i++) { + if (posix_memalign(&buf, 64, TEST_LEN(m))) { + printf("alloc error: Fail"); + return -1; + } + perf_update_buffs[i] = buf; + memset(perf_update_buffs[i], 0, TEST_LEN(m)); // initialize the destination buffer to be zero for update function + } + + // Make random data + for (i = 0; i < k; i++) + for (j = 0; j < TEST_LEN(m); j++) { + buffs[i][j] = rand(); + update_buffs[i][j] = buffs[i][j]; + } + + gf_gen_rs_matrix(a, m, k); + + encode_update_test_ref(m, k, g_tbls, buffs, a); + encode_update_test(m, k, g_tbls, update_buffs, a); + for (i = 0; i < m - k; i++) { + if (0 != memcmp(update_buffs[k + i], buffs[k + i], TEST_LEN(m))) { + printf("\nupdate_buffs%d :", i); + dump(update_buffs[k + i], 25); + printf("buffs%d :", i); + dump(buffs[k + i], 25); + return -1; + } + } + +#ifdef DO_REF_PERF + // Start encode test + BENCHMARK(&start, BENCHMARK_TIME, encode_update_test_ref(m, k, g_tbls, buffs, a)); + printf(xstr(REF_FUNCTION) TEST_TYPE_STR ": "); + perf_print(start, (long long)(TEST_LEN(m)) * (m)); +#endif + + // Start encode test + BENCHMARK(&start, BENCHMARK_TIME, + encode_update_test(m, k, g_tbls, perf_update_buffs, a)); + printf(xstr(FUNCTION_UNDER_TEST) TEST_TYPE_STR ": "); + perf_print(start, (long long)(TEST_LEN(m)) * (m)); + + // Start encode test + BENCHMARK(&start, BENCHMARK_TIME, + // Make parity vects + ec_init_tables(k, m - k, &a[k * k], g_tbls); + FUNCTION_UNDER_TEST(TEST_LEN(m), k, m - k, 0, g_tbls, perf_update_buffs[0], + &perf_update_buffs[k])); + printf(xstr(FUNCTION_UNDER_TEST) "_single_src" TEST_TYPE_STR ": "); + perf_print(start, (long long)(TEST_LEN(m)) * (m - k + 1)); + + // Start encode test + BENCHMARK(&start, BENCHMARK_TIME, + // Make parity vects + FUNCTION_UNDER_TEST(TEST_LEN(m), k, m - k, 0, g_tbls, perf_update_buffs[0], + &perf_update_buffs[k])); + printf(xstr(FUNCTION_UNDER_TEST) "_single_src_simple" TEST_TYPE_STR ": "); + perf_print(start, (long long)(TEST_LEN(m)) * (m - k + 1)); + + for (i = k; i < m; i++) { + memset(update_buffs[i], 0, TEST_LEN(m)); // initialize the destination buffer to be zero for update function + } + for (i = 0; i < k; i++) { + FUNCTION_UNDER_TEST(TEST_LEN(m), k, m - k, i, g_tbls, update_buffs[i], + &update_buffs[k]); + } + + decode_test(m, k, update_buffs, recov, a, src_in_err, src_err_list, + nerrs, g_tbls, temp_buffs); + BENCHMARK(&start, BENCHMARK_TIME, check = + decode_test(m, k, update_buffs, recov, a, src_in_err, src_err_list, + nerrs, g_tbls, perf_update_buffs)); + if (check) { + printf("BAD_MATRIX\n"); + return -1; + } + + for (i = 0; i < nerrs; i++) { + if (0 != memcmp(temp_buffs[i], update_buffs[src_err_list[i]], TEST_LEN(m))) { + printf("Fail error recovery (%d, %d, %d) - \n", m, k, nerrs); + return -1; + } + } + + printf(xstr(FUNCTION_UNDER_TEST) "_decode" TEST_TYPE_STR ": "); + perf_print(start, (long long)(TEST_LEN(m)) * (k + nerrs)); + + printf("done all: Pass\n"); + return 0; +} diff --git a/src/spdk/isa-l/erasure_code/erasure_code_update_test.c b/src/spdk/isa-l/erasure_code/erasure_code_update_test.c new file mode 100644 index 000000000..f30a6a29b --- /dev/null +++ b/src/spdk/isa-l/erasure_code/erasure_code_update_test.c @@ -0,0 +1,959 @@ +/********************************************************************** + Copyright(c) 2011-2015 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#include +#include +#include // for memset, memcmp +#include "erasure_code.h" +#include "types.h" + +#ifndef ALIGN_SIZE +# define ALIGN_SIZE 16 +#endif + +//By default, test multibinary version +#ifndef FUNCTION_UNDER_TEST +# define FUNCTION_UNDER_TEST ec_encode_data_update +# define REF_FUNCTION ec_encode_data +#endif + +#define TEST_LEN 8192 +#define TEST_SIZE (TEST_LEN/2) + +#ifndef TEST_SOURCES +# define TEST_SOURCES 127 +#endif +#ifndef RANDOMS +# define RANDOMS 200 +#endif + +#define MMAX TEST_SOURCES +#define KMAX TEST_SOURCES + +#define EFENCE_TEST_MAX_SIZE 0x100 + +#ifdef EC_ALIGNED_ADDR +// Define power of 2 range to check ptr, len alignment +# define PTR_ALIGN_CHK_B 0 +# define LEN_ALIGN_CHK_B 0 // 0 for aligned only +#else +// Define power of 2 range to check ptr, len alignment +# define PTR_ALIGN_CHK_B ALIGN_SIZE +# define LEN_ALIGN_CHK_B ALIGN_SIZE // 0 for aligned only +#endif + +#ifndef TEST_SEED +#define TEST_SEED 11 +#endif + +#define str(s) #s +#define xstr(s) str(s) + +typedef unsigned char u8; + +void dump(unsigned char *buf, int len) +{ + int i; + for (i = 0; i < len;) { + printf(" %2x", 0xff & buf[i++]); + if (i % 32 == 0) + printf("\n"); + } + printf("\n"); +} + +void dump_matrix(unsigned char **s, int k, int m) +{ + int i, j; + for (i = 0; i < k; i++) { + for (j = 0; j < m; j++) { + printf(" %2x", s[i][j]); + } + printf("\n"); + } + printf("\n"); +} + +void dump_u8xu8(unsigned char *s, int k, int m) +{ + int i, j; + for (i = 0; i < k; i++) { + for (j = 0; j < m; j++) { + printf(" %2x", 0xff & s[j + (i * m)]); + } + printf("\n"); + } + printf("\n"); +} + +// Generate Random errors +static void gen_err_list(unsigned char *src_err_list, + unsigned char *src_in_err, int *pnerrs, int *pnsrcerrs, int k, int m) +{ + int i, err; + int nerrs = 0, nsrcerrs = 0; + + for (i = 0, nerrs = 0, nsrcerrs = 0; i < m && nerrs < m - k; i++) { + err = 1 & rand(); + src_in_err[i] = err; + if (err) { + src_err_list[nerrs++] = i; + if (i < k) { + nsrcerrs++; + } + } + } + if (nerrs == 0) { // should have at least one error + while ((err = (rand() % KMAX)) >= m) ; + src_err_list[nerrs++] = err; + src_in_err[err] = 1; + if (err < k) + nsrcerrs = 1; + } + *pnerrs = nerrs; + *pnsrcerrs = nsrcerrs; + return; +} + +#define NO_INVERT_MATRIX -2 +// Generate decode matrix from encode matrix +static int gf_gen_decode_matrix(unsigned char *encode_matrix, + unsigned char *decode_matrix, + unsigned char *invert_matrix, + unsigned int *decode_index, + unsigned char *src_err_list, + unsigned char *src_in_err, + int nerrs, int nsrcerrs, int k, int m) +{ + int i, j, p; + int r; + unsigned char *backup, *b, s; + int incr = 0; + + b = malloc(MMAX * KMAX); + backup = malloc(MMAX * KMAX); + + if (b == NULL || backup == NULL) { + printf("Test failure! Error with malloc\n"); + free(b); + free(backup); + return -1; + } + // Construct matrix b by removing error rows + for (i = 0, r = 0; i < k; i++, r++) { + while (src_in_err[r]) + r++; + for (j = 0; j < k; j++) { + b[k * i + j] = encode_matrix[k * r + j]; + backup[k * i + j] = encode_matrix[k * r + j]; + } + decode_index[i] = r; + } + incr = 0; + while (gf_invert_matrix(b, invert_matrix, k) < 0) { + if (nerrs == (m - k)) { + free(b); + free(backup); + printf("BAD MATRIX\n"); + return NO_INVERT_MATRIX; + } + incr++; + memcpy(b, backup, MMAX * KMAX); + for (i = nsrcerrs; i < nerrs - nsrcerrs; i++) { + if (src_err_list[i] == (decode_index[k - 1] + incr)) { + // skip the erased parity line + incr++; + continue; + } + } + if (decode_index[k - 1] + incr >= m) { + free(b); + free(backup); + printf("BAD MATRIX\n"); + return NO_INVERT_MATRIX; + } + decode_index[k - 1] += incr; + for (j = 0; j < k; j++) + b[k * (k - 1) + j] = encode_matrix[k * decode_index[k - 1] + j]; + + }; + + for (i = 0; i < nsrcerrs; i++) { + for (j = 0; j < k; j++) { + decode_matrix[k * i + j] = invert_matrix[k * src_err_list[i] + j]; + } + } + /* src_err_list from encode_matrix * invert of b for parity decoding */ + for (p = nsrcerrs; p < nerrs; p++) { + for (i = 0; i < k; i++) { + s = 0; + for (j = 0; j < k; j++) + s ^= gf_mul(invert_matrix[j * k + i], + encode_matrix[k * src_err_list[p] + j]); + + decode_matrix[k * p + i] = s; + } + } + free(b); + free(backup); + return 0; +} + +int main(int argc, char *argv[]) +{ + int re = 0; + int i, j, p, rtest, m, k; + int nerrs, nsrcerrs; + void *buf; + unsigned int decode_index[MMAX]; + unsigned char *temp_buffs[TEST_SOURCES], *buffs[TEST_SOURCES]; + unsigned char *update_buffs[TEST_SOURCES]; + unsigned char *encode_matrix, *decode_matrix, *invert_matrix, *g_tbls; + unsigned char src_in_err[TEST_SOURCES], src_err_list[TEST_SOURCES]; + unsigned char *recov[TEST_SOURCES]; + + int rows, align, size; + unsigned char *efence_buffs[TEST_SOURCES]; + unsigned char *efence_update_buffs[TEST_SOURCES]; + unsigned int offset; + u8 *ubuffs[TEST_SOURCES]; + u8 *update_ubuffs[TEST_SOURCES]; + u8 *temp_ubuffs[TEST_SOURCES]; + + printf("test " xstr(FUNCTION_UNDER_TEST) ": %dx%d ", TEST_SOURCES, TEST_LEN); + srand(TEST_SEED); + + // Allocate the arrays + for (i = 0; i < TEST_SOURCES; i++) { + if (posix_memalign(&buf, 64, TEST_LEN)) { + printf("alloc error: Fail"); + return -1; + } + buffs[i] = buf; + } + + for (i = 0; i < TEST_SOURCES; i++) { + if (posix_memalign(&buf, 64, TEST_LEN)) { + printf("alloc error: Fail"); + return -1; + } + temp_buffs[i] = buf; + memset(temp_buffs[i], 0, TEST_LEN); // initialize the destination buffer to be zero for update function + } + + for (i = 0; i < TEST_SOURCES; i++) { + if (posix_memalign(&buf, 64, TEST_LEN)) { + printf("alloc error: Fail"); + return -1; + } + update_buffs[i] = buf; + memset(update_buffs[i], 0, TEST_LEN); // initialize the destination buffer to be zero for update function + } + // Test erasure code by encode and recovery + + encode_matrix = malloc(MMAX * KMAX); + decode_matrix = malloc(MMAX * KMAX); + invert_matrix = malloc(MMAX * KMAX); + g_tbls = malloc(KMAX * TEST_SOURCES * 32); + if (encode_matrix == NULL || decode_matrix == NULL + || invert_matrix == NULL || g_tbls == NULL) { + printf("Test failure! Error with malloc\n"); + return -1; + } + // Pick a first test + m = 14; + k = 10; + if (m > MMAX || k > KMAX) + return -1; + + // Make random data + for (i = 0; i < k; i++) { + for (j = 0; j < TEST_LEN; j++) { + buffs[i][j] = rand(); + update_buffs[i][j] = buffs[i][j]; + } + } + + // Generate encode matrix encode_matrix + // The matrix generated by gf_gen_rs_matrix + // is not always invertable. + gf_gen_rs_matrix(encode_matrix, m, k); + + // Generate g_tbls from encode matrix encode_matrix + ec_init_tables(k, m - k, &encode_matrix[k * k], g_tbls); + + // Perform matrix dot_prod for EC encoding + // using g_tbls from encode matrix encode_matrix + REF_FUNCTION(TEST_LEN, k, m - k, g_tbls, buffs, &buffs[k]); + for (i = 0; i < k; i++) { + FUNCTION_UNDER_TEST(TEST_LEN, k, m - k, i, g_tbls, update_buffs[i], + &update_buffs[k]); + } + for (i = 0; i < m - k; i++) { + if (0 != memcmp(update_buffs[k + i], buffs[k + i], TEST_LEN)) { + printf("\nupdate_buffs%d :", i); + dump(update_buffs[k + i], 25); + printf("buffs%d :", i); + dump(buffs[k + i], 25); + return -1; + } + } + + // Choose random buffers to be in erasure + memset(src_in_err, 0, TEST_SOURCES); + gen_err_list(src_err_list, src_in_err, &nerrs, &nsrcerrs, k, m); + + // Generate decode matrix + re = gf_gen_decode_matrix(encode_matrix, decode_matrix, + invert_matrix, decode_index, src_err_list, src_in_err, + nerrs, nsrcerrs, k, m); + if (re != 0) { + printf("Fail to gf_gen_decode_matrix\n"); + return -1; + } + // Pack recovery array as list of valid sources + // Its order must be the same as the order + // to generate matrix b in gf_gen_decode_matrix + for (i = 0; i < k; i++) { + recov[i] = update_buffs[decode_index[i]]; + } + + // Recover data + ec_init_tables(k, nerrs, decode_matrix, g_tbls); + REF_FUNCTION(TEST_LEN, k, nerrs, g_tbls, recov, &temp_buffs[k]); + for (i = 0; i < nerrs; i++) { + + if (0 != memcmp(temp_buffs[k + i], update_buffs[src_err_list[i]], TEST_LEN)) { + printf("Fail error recovery (%d, %d, %d)\n", m, k, nerrs); + printf(" - erase list = "); + for (j = 0; j < nerrs; j++) + printf(" %d", src_err_list[j]); + printf(" - Index = "); + for (p = 0; p < k; p++) + printf(" %d", decode_index[p]); + printf("\nencode_matrix:\n"); + dump_u8xu8((u8 *) encode_matrix, m, k); + printf("inv b:\n"); + dump_u8xu8((u8 *) invert_matrix, k, k); + printf("\ndecode_matrix:\n"); + dump_u8xu8((u8 *) decode_matrix, m, k); + printf("recov %d:", src_err_list[i]); + dump(temp_buffs[k + i], 25); + printf("orig :"); + dump(update_buffs[src_err_list[i]], 25); + return -1; + } + } + putchar('.'); + + // Pick a first test + m = 7; + k = 5; + if (m > MMAX || k > KMAX) + return -1; + + // Zero the destination buffer for update function + for (i = k; i < TEST_SOURCES; i++) { + memset(buffs[i], 0, TEST_LEN); + memset(update_buffs[i], 0, TEST_LEN); + } + // Make random data + for (i = 0; i < k; i++) { + for (j = 0; j < TEST_LEN; j++) { + buffs[i][j] = rand(); + update_buffs[i][j] = buffs[i][j]; + } + } + + // The matrix generated by gf_gen_cauchy1_matrix + // is always invertable. + gf_gen_cauchy1_matrix(encode_matrix, m, k); + + // Generate g_tbls from encode matrix encode_matrix + ec_init_tables(k, m - k, &encode_matrix[k * k], g_tbls); + + // Perform matrix dot_prod for EC encoding + // using g_tbls from encode matrix encode_matrix + REF_FUNCTION(TEST_LEN, k, m - k, g_tbls, buffs, &buffs[k]); + for (i = 0; i < k; i++) { + FUNCTION_UNDER_TEST(TEST_LEN, k, m - k, i, g_tbls, update_buffs[i], + &update_buffs[k]); + } + for (i = 0; i < m - k; i++) { + if (0 != memcmp(update_buffs[k + i], buffs[k + i], TEST_LEN)) { + printf("\nupdate_buffs%d :", i); + dump(update_buffs[k + i], 25); + printf("buffs%d :", i); + dump(buffs[k + i], 25); + return -1; + } + } + + // Choose random buffers to be in erasure + memset(src_in_err, 0, TEST_SOURCES); + gen_err_list(src_err_list, src_in_err, &nerrs, &nsrcerrs, k, m); + + // Generate decode matrix + re = gf_gen_decode_matrix(encode_matrix, decode_matrix, + invert_matrix, decode_index, src_err_list, src_in_err, + nerrs, nsrcerrs, k, m); + if (re != 0) { + printf("Fail to gf_gen_decode_matrix\n"); + return -1; + } + // Pack recovery array as list of valid sources + // Its order must be the same as the order + // to generate matrix b in gf_gen_decode_matrix + for (i = 0; i < k; i++) { + recov[i] = update_buffs[decode_index[i]]; + } + + // Recover data + for (i = 0; i < TEST_SOURCES; i++) { + memset(temp_buffs[i], 0, TEST_LEN); + } + ec_init_tables(k, nerrs, decode_matrix, g_tbls); + for (i = 0; i < k; i++) { + FUNCTION_UNDER_TEST(TEST_LEN, k, nerrs, i, g_tbls, recov[i], &temp_buffs[k]); + } + for (i = 0; i < nerrs; i++) { + + if (0 != memcmp(temp_buffs[k + i], update_buffs[src_err_list[i]], TEST_LEN)) { + printf("Fail error recovery (%d, %d, %d)\n", m, k, nerrs); + printf(" - erase list = "); + for (j = 0; j < nerrs; j++) + printf(" %d", src_err_list[j]); + printf(" - Index = "); + for (p = 0; p < k; p++) + printf(" %d", decode_index[p]); + printf("\nencode_matrix:\n"); + dump_u8xu8((u8 *) encode_matrix, m, k); + printf("inv b:\n"); + dump_u8xu8((u8 *) invert_matrix, k, k); + printf("\ndecode_matrix:\n"); + dump_u8xu8((u8 *) decode_matrix, m, k); + printf("recov %d:", src_err_list[i]); + dump(temp_buffs[k + i], 25); + printf("orig :"); + dump(update_buffs[src_err_list[i]], 25); + return -1; + } + } + putchar('.'); + + // Do more random tests + for (rtest = 0; rtest < RANDOMS; rtest++) { + while ((m = (rand() % MMAX)) < 2) ; + while ((k = (rand() % KMAX)) >= m || k < 1) ; + + if (m > MMAX || k > KMAX) + continue; + + // Zero the destination buffer for update function + for (i = k; i < TEST_SOURCES; i++) { + memset(buffs[i], 0, TEST_LEN); + memset(update_buffs[i], 0, TEST_LEN); + } + // Make random data + for (i = 0; i < k; i++) { + for (j = 0; j < TEST_LEN; j++) { + buffs[i][j] = rand(); + update_buffs[i][j] = buffs[i][j]; + } + } + + // The matrix generated by gf_gen_cauchy1_matrix + // is always invertable. + gf_gen_cauchy1_matrix(encode_matrix, m, k); + + // Make parity vects + // Generate g_tbls from encode matrix a + ec_init_tables(k, m - k, &encode_matrix[k * k], g_tbls); + // Perform matrix dot_prod for EC encoding + // using g_tbls from encode matrix a + REF_FUNCTION(TEST_LEN, k, m - k, g_tbls, buffs, &buffs[k]); + for (i = 0; i < k; i++) { + FUNCTION_UNDER_TEST(TEST_LEN, k, m - k, i, g_tbls, update_buffs[i], + &update_buffs[k]); + } + for (i = 0; i < m - k; i++) { + if (0 != memcmp(update_buffs[k + i], buffs[k + i], TEST_LEN)) { + printf("\nupdate_buffs%d :", i); + dump(update_buffs[k + i], 25); + printf("buffs%d :", i); + dump(buffs[k + i], 25); + return -1; + } + } + + // Random errors + memset(src_in_err, 0, TEST_SOURCES); + gen_err_list(src_err_list, src_in_err, &nerrs, &nsrcerrs, k, m); + + // Generate decode matrix + re = gf_gen_decode_matrix(encode_matrix, decode_matrix, + invert_matrix, decode_index, src_err_list, + src_in_err, nerrs, nsrcerrs, k, m); + if (re != 0) { + printf("Fail to gf_gen_decode_matrix\n"); + return -1; + } + // Pack recovery array as list of valid sources + // Its order must be the same as the order + // to generate matrix b in gf_gen_decode_matrix + for (i = 0; i < k; i++) { + recov[i] = update_buffs[decode_index[i]]; + } + + // Recover data + for (i = 0; i < TEST_SOURCES; i++) { + memset(temp_buffs[i], 0, TEST_LEN); + } + ec_init_tables(k, nerrs, decode_matrix, g_tbls); + for (i = 0; i < k; i++) { + FUNCTION_UNDER_TEST(TEST_LEN, k, nerrs, i, g_tbls, recov[i], + &temp_buffs[k]); + } + + for (i = 0; i < nerrs; i++) { + + if (0 != + memcmp(temp_buffs[k + i], update_buffs[src_err_list[i]], + TEST_LEN)) { + printf("Fail error recovery (%d, %d, %d) - ", m, k, nerrs); + printf(" - erase list = "); + for (j = 0; j < nerrs; j++) + printf(" %d", src_err_list[j]); + printf(" - Index = "); + for (p = 0; p < k; p++) + printf(" %d", decode_index[p]); + printf("\nencode_matrix:\n"); + dump_u8xu8((u8 *) encode_matrix, m, k); + printf("inv b:\n"); + dump_u8xu8((u8 *) invert_matrix, k, k); + printf("\ndecode_matrix:\n"); + dump_u8xu8((u8 *) decode_matrix, m, k); + printf("orig data:\n"); + dump_matrix(update_buffs, m, 25); + printf("orig :"); + dump(update_buffs[src_err_list[i]], 25); + printf("recov %d:", src_err_list[i]); + dump(temp_buffs[k + i], 25); + return -1; + } + } + putchar('.'); + } + + // Run tests at end of buffer for Electric Fence + k = 16; + align = (LEN_ALIGN_CHK_B != 0) ? 1 : ALIGN_SIZE; + if (k > KMAX) + return -1; + + for (rows = 1; rows <= 16; rows++) { + m = k + rows; + if (m > MMAX) + return -1; + + for (i = k; i < TEST_SOURCES; i++) { + memset(buffs[i], 0, TEST_LEN); + memset(update_buffs[i], 0, TEST_LEN); + } + // Make random data + for (i = 0; i < k; i++) { + for (j = 0; j < TEST_LEN; j++) { + buffs[i][j] = rand(); + update_buffs[i][j] = buffs[i][j]; + } + } + + for (size = 0; size <= EFENCE_TEST_MAX_SIZE; size += align) { + for (i = 0; i < m; i++) { // Line up TEST_SIZE from end + efence_buffs[i] = buffs[i] + TEST_LEN - size; + efence_update_buffs[i] = update_buffs[i] + TEST_LEN - size; + } + // Zero the destination buffer for update function + for (i = k; i < m; i++) { + memset(efence_buffs[i], 0, size); + memset(efence_update_buffs[i], 0, size); + } + + // The matrix generated by gf_gen_cauchy1_matrix + // is always invertable. + gf_gen_cauchy1_matrix(encode_matrix, m, k); + + // Make parity vects + // Generate g_tbls from encode matrix a + ec_init_tables(k, m - k, &encode_matrix[k * k], g_tbls); + // Perform matrix dot_prod for EC encoding + // using g_tbls from encode matrix a + REF_FUNCTION(size, k, m - k, g_tbls, efence_buffs, &efence_buffs[k]); + for (i = 0; i < k; i++) { + FUNCTION_UNDER_TEST(size, k, m - k, i, g_tbls, + efence_update_buffs[i], + &efence_update_buffs[k]); + } + for (i = 0; i < m - k; i++) { + if (0 != + memcmp(efence_update_buffs[k + i], efence_buffs[k + i], + size)) { + printf("\nefence_update_buffs%d :", i); + dump(efence_update_buffs[k + i], 25); + printf("efence_buffs%d :", i); + dump(efence_buffs[k + i], 25); + return -1; + } + } + + // Random errors + memset(src_in_err, 0, TEST_SOURCES); + gen_err_list(src_err_list, src_in_err, &nerrs, &nsrcerrs, k, m); + + // Generate decode matrix + re = gf_gen_decode_matrix(encode_matrix, decode_matrix, + invert_matrix, decode_index, src_err_list, + src_in_err, nerrs, nsrcerrs, k, m); + if (re != 0) { + printf("Fail to gf_gen_decode_matrix\n"); + return -1; + } + // Pack recovery array as list of valid sources + // Its order must be the same as the order + // to generate matrix b in gf_gen_decode_matrix + for (i = 0; i < k; i++) { + recov[i] = efence_update_buffs[decode_index[i]]; + } + + // Recover data + for (i = 0; i < TEST_SOURCES; i++) { + memset(temp_buffs[i], 0, TEST_LEN); + } + ec_init_tables(k, nerrs, decode_matrix, g_tbls); + for (i = 0; i < k; i++) { + FUNCTION_UNDER_TEST(size, k, nerrs, i, g_tbls, recov[i], + &temp_buffs[k]); + } + + for (i = 0; i < nerrs; i++) { + + if (0 != + memcmp(temp_buffs[k + i], + efence_update_buffs[src_err_list[i]], size)) { + printf("Efence: Fail error recovery (%d, %d, %d)\n", m, + k, nerrs); + + printf("size = %d\n", size); + + printf("Test erase list = "); + for (j = 0; j < nerrs; j++) + printf(" %d", src_err_list[j]); + printf(" - Index = "); + for (p = 0; p < k; p++) + printf(" %d", decode_index[p]); + printf("\nencode_matrix:\n"); + dump_u8xu8((u8 *) encode_matrix, m, k); + printf("inv b:\n"); + dump_u8xu8((u8 *) invert_matrix, k, k); + printf("\ndecode_matrix:\n"); + dump_u8xu8((u8 *) decode_matrix, m, k); + + printf("recov %d:", src_err_list[i]); + dump(temp_buffs[k + i], align); + printf("orig :"); + dump(efence_update_buffs[src_err_list[i]], align); + return -1; + } + } + } + putchar('.'); + + } + + // Test rand ptr alignment if available + + for (rtest = 0; rtest < RANDOMS; rtest++) { + while ((m = (rand() % MMAX)) < 2) ; + while ((k = (rand() % KMAX)) >= m || k < 1) ; + + if (m > MMAX || k > KMAX) + continue; + + size = (TEST_LEN - PTR_ALIGN_CHK_B) & ~15; + + offset = (PTR_ALIGN_CHK_B != 0) ? 1 : PTR_ALIGN_CHK_B; + // Add random offsets + for (i = 0; i < m; i++) { + memset(buffs[i], 0, TEST_LEN); // zero pad to check write-over + memset(update_buffs[i], 0, TEST_LEN); // zero pad to check write-over + memset(temp_buffs[i], 0, TEST_LEN); // zero pad to check write-over + ubuffs[i] = buffs[i] + (rand() & (PTR_ALIGN_CHK_B - offset)); + update_ubuffs[i] = + update_buffs[i] + (rand() & (PTR_ALIGN_CHK_B - offset)); + temp_ubuffs[i] = temp_buffs[i] + (rand() & (PTR_ALIGN_CHK_B - offset)); + } + + // Zero the destination buffer for update function + for (i = k; i < m; i++) { + memset(ubuffs[i], 0, size); + memset(update_ubuffs[i], 0, size); + } + // Make random data + for (i = 0; i < k; i++) { + for (j = 0; j < size; j++) { + ubuffs[i][j] = rand(); + update_ubuffs[i][j] = ubuffs[i][j]; + } + } + + // The matrix generated by gf_gen_cauchy1_matrix + // is always invertable. + gf_gen_cauchy1_matrix(encode_matrix, m, k); + + // Make parity vects + // Generate g_tbls from encode matrix a + ec_init_tables(k, m - k, &encode_matrix[k * k], g_tbls); + // Perform matrix dot_prod for EC encoding + // using g_tbls from encode matrix a + REF_FUNCTION(size, k, m - k, g_tbls, ubuffs, &ubuffs[k]); + for (i = 0; i < k; i++) { + FUNCTION_UNDER_TEST(size, k, m - k, i, g_tbls, update_ubuffs[i], + &update_ubuffs[k]); + } + for (i = 0; i < m - k; i++) { + if (0 != memcmp(update_ubuffs[k + i], ubuffs[k + i], size)) { + printf("\nupdate_ubuffs%d :", i); + dump(update_ubuffs[k + i], 25); + printf("ubuffs%d :", i); + dump(ubuffs[k + i], 25); + return -1; + } + } + + // Random errors + memset(src_in_err, 0, TEST_SOURCES); + gen_err_list(src_err_list, src_in_err, &nerrs, &nsrcerrs, k, m); + + // Generate decode matrix + re = gf_gen_decode_matrix(encode_matrix, decode_matrix, + invert_matrix, decode_index, src_err_list, + src_in_err, nerrs, nsrcerrs, k, m); + if (re != 0) { + printf("Fail to gf_gen_decode_matrix\n"); + return -1; + } + // Pack recovery array as list of valid sources + // Its order must be the same as the order + // to generate matrix b in gf_gen_decode_matrix + for (i = 0; i < k; i++) { + recov[i] = update_ubuffs[decode_index[i]]; + } + + // Recover data + for (i = 0; i < m; i++) { + memset(temp_ubuffs[i], 0, size); + } + ec_init_tables(k, nerrs, decode_matrix, g_tbls); + for (i = 0; i < k; i++) { + FUNCTION_UNDER_TEST(size, k, nerrs, i, g_tbls, recov[i], + &temp_ubuffs[k]); + } + + for (i = 0; i < nerrs; i++) { + + if (0 != + memcmp(temp_ubuffs[k + i], update_ubuffs[src_err_list[i]], size)) { + printf("Fail error recovery (%d, %d, %d) - ", m, k, nerrs); + printf(" - erase list = "); + for (j = 0; j < nerrs; j++) + printf(" %d", src_err_list[j]); + printf(" - Index = "); + for (p = 0; p < k; p++) + printf(" %d", decode_index[p]); + printf("\nencode_matrix:\n"); + dump_u8xu8((unsigned char *)encode_matrix, m, k); + printf("inv b:\n"); + dump_u8xu8((unsigned char *)invert_matrix, k, k); + printf("\ndecode_matrix:\n"); + dump_u8xu8((unsigned char *)decode_matrix, m, k); + printf("orig data:\n"); + dump_matrix(update_ubuffs, m, 25); + printf("orig :"); + dump(update_ubuffs[src_err_list[i]], 25); + printf("recov %d:", src_err_list[i]); + dump(temp_ubuffs[k + i], 25); + return -1; + } + } + + // Confirm that padding around dests is unchanged + memset(temp_buffs[0], 0, PTR_ALIGN_CHK_B); // Make reference zero buff + + for (i = 0; i < m; i++) { + + offset = update_ubuffs[i] - update_buffs[i]; + + if (memcmp(update_buffs[i], temp_buffs[0], offset)) { + printf("Fail rand ualign encode pad start\n"); + return -1; + } + if (memcmp + (update_buffs[i] + offset + size, temp_buffs[0], + PTR_ALIGN_CHK_B - offset)) { + printf("Fail rand ualign encode pad end\n"); + return -1; + } + } + + for (i = 0; i < nerrs; i++) { + + offset = temp_ubuffs[k + i] - temp_buffs[k + i]; + if (memcmp(temp_buffs[k + i], temp_buffs[0], offset)) { + printf("Fail rand ualign decode pad start\n"); + return -1; + } + if (memcmp + (temp_buffs[k + i] + offset + size, temp_buffs[0], + PTR_ALIGN_CHK_B - offset)) { + printf("Fail rand ualign decode pad end\n"); + return -1; + } + } + + putchar('.'); + } + + // Test size alignment + + align = (LEN_ALIGN_CHK_B != 0) ? 13 : ALIGN_SIZE; + + for (size = TEST_LEN; size >= 0; size -= align) { + while ((m = (rand() % MMAX)) < 2) ; + while ((k = (rand() % KMAX)) >= m || k < 1) ; + + if (m > MMAX || k > KMAX) + continue; + + // Zero the destination buffer for update function + for (i = k; i < TEST_SOURCES; i++) { + memset(buffs[i], 0, size); + memset(update_buffs[i], 0, size); + } + // Make random data + for (i = 0; i < k; i++) { + for (j = 0; j < size; j++) { + buffs[i][j] = rand(); + update_buffs[i][j] = buffs[i][j]; + } + } + + // The matrix generated by gf_gen_cauchy1_matrix + // is always invertable. + gf_gen_cauchy1_matrix(encode_matrix, m, k); + + // Make parity vects + // Generate g_tbls from encode matrix a + ec_init_tables(k, m - k, &encode_matrix[k * k], g_tbls); + // Perform matrix dot_prod for EC encoding + // using g_tbls from encode matrix a + REF_FUNCTION(size, k, m - k, g_tbls, buffs, &buffs[k]); + for (i = 0; i < k; i++) { + FUNCTION_UNDER_TEST(size, k, m - k, i, g_tbls, update_buffs[i], + &update_buffs[k]); + } + for (i = 0; i < m - k; i++) { + if (0 != memcmp(update_buffs[k + i], buffs[k + i], size)) { + printf("\nupdate_buffs%d (size=%d) :", i, size); + dump(update_buffs[k + i], 25); + printf("buffs%d (size=%d) :", i, size); + dump(buffs[k + i], 25); + return -1; + } + } + + // Random errors + memset(src_in_err, 0, TEST_SOURCES); + gen_err_list(src_err_list, src_in_err, &nerrs, &nsrcerrs, k, m); + // Generate decode matrix + re = gf_gen_decode_matrix(encode_matrix, decode_matrix, + invert_matrix, decode_index, src_err_list, + src_in_err, nerrs, nsrcerrs, k, m); + if (re != 0) { + printf("Fail to gf_gen_decode_matrix\n"); + return -1; + } + // Pack recovery array as list of valid sources + // Its order must be the same as the order + // to generate matrix b in gf_gen_decode_matrix + for (i = 0; i < k; i++) { + recov[i] = update_buffs[decode_index[i]]; + } + + // Recover data + for (i = 0; i < TEST_SOURCES; i++) { + memset(temp_buffs[i], 0, TEST_LEN); + } + ec_init_tables(k, nerrs, decode_matrix, g_tbls); + for (i = 0; i < k; i++) { + FUNCTION_UNDER_TEST(size, k, nerrs, i, g_tbls, recov[i], + &temp_buffs[k]); + } + + for (i = 0; i < nerrs; i++) { + + if (0 != + memcmp(temp_buffs[k + i], update_buffs[src_err_list[i]], size)) { + printf("Fail error recovery (%d, %d, %d) - ", m, k, nerrs); + printf(" - erase list = "); + for (j = 0; j < nerrs; j++) + printf(" %d", src_err_list[j]); + printf(" - Index = "); + for (p = 0; p < k; p++) + printf(" %d", decode_index[p]); + printf("\nencode_matrix:\n"); + dump_u8xu8((unsigned char *)encode_matrix, m, k); + printf("inv b:\n"); + dump_u8xu8((unsigned char *)invert_matrix, k, k); + printf("\ndecode_matrix:\n"); + dump_u8xu8((unsigned char *)decode_matrix, m, k); + printf("orig data:\n"); + dump_matrix(update_buffs, m, 25); + printf("orig :"); + dump(update_buffs[src_err_list[i]], 25); + printf("recov %d:", src_err_list[i]); + dump(temp_buffs[k + i], 25); + return -1; + } + } + putchar('.'); + } + + printf("done EC tests: Pass\n"); + return 0; +} diff --git a/src/spdk/isa-l/erasure_code/gen_rs_matrix_limits.c b/src/spdk/isa-l/erasure_code/gen_rs_matrix_limits.c new file mode 100644 index 000000000..85061484b --- /dev/null +++ b/src/spdk/isa-l/erasure_code/gen_rs_matrix_limits.c @@ -0,0 +1,115 @@ +#include +#include +#include +#include "erasure_code.h" + +#define MAX_CHECK 63 /* Size is limited by using uint64_t to represent subsets */ +#define M_MAX 0x20 +#define K_MAX 0x10 +#define ROWS M_MAX +#define COLS K_MAX + +static inline int min(int a, int b) +{ + if (a <= b) + return a; + else + return b; +} + +void gen_sub_matrix(unsigned char *out_matrix, int dim, unsigned char *in_matrix, int rows, + int cols, uint64_t row_indicator, uint64_t col_indicator) +{ + int i, j, r, s; + + for (i = 0, r = 0; i < rows; i++) { + if (!(row_indicator & ((uint64_t) 1 << i))) + continue; + + for (j = 0, s = 0; j < cols; j++) { + if (!(col_indicator & ((uint64_t) 1 << j))) + continue; + out_matrix[dim * r + s] = in_matrix[cols * i + j]; + s++; + } + r++; + } +} + +/* Gosper's Hack */ +uint64_t next_subset(uint64_t * subset, uint64_t element_count, uint64_t subsize) +{ + uint64_t tmp1 = *subset & -*subset; + uint64_t tmp2 = *subset + tmp1; + *subset = (((*subset ^ tmp2) >> 2) / tmp1) | tmp2; + if (*subset & (((uint64_t) 1 << element_count))) { + /* Overflow on last subset */ + *subset = ((uint64_t) 1 << subsize) - 1; + return 1; + } + + return 0; +} + +int are_submatrices_singular(unsigned char *vmatrix, int rows, int cols) +{ + unsigned char matrix[COLS * COLS]; + unsigned char invert_matrix[COLS * COLS]; + uint64_t row_indicator, col_indicator, subset_init, subsize; + + /* Check all square subsize x subsize submatrices of the rows x cols + * vmatrix for singularity*/ + for (subsize = 1; subsize <= min(rows, cols); subsize++) { + subset_init = (1 << subsize) - 1; + col_indicator = subset_init; + do { + row_indicator = subset_init; + do { + gen_sub_matrix(matrix, subsize, vmatrix, rows, + cols, row_indicator, col_indicator); + if (gf_invert_matrix(matrix, invert_matrix, subsize)) + return 1; + + } while (next_subset(&row_indicator, rows, subsize) == 0); + } while (next_subset(&col_indicator, cols, subsize) == 0); + } + + return 0; +} + +int main(int argc, char **argv) +{ + unsigned char vmatrix[(ROWS + COLS) * COLS]; + int rows, cols; + + if (K_MAX > MAX_CHECK) { + printf("K_MAX too large for this test\n"); + return 0; + } + if (M_MAX > MAX_CHECK) { + printf("M_MAX too large for this test\n"); + return 0; + } + if (M_MAX < K_MAX) { + printf("M_MAX must be smaller than K_MAX"); + return 0; + } + + printf("Checking gen_rs_matrix for k <= %d and m <= %d.\n", K_MAX, M_MAX); + printf("gen_rs_matrix creates erasure codes for:\n"); + + for (cols = 1; cols <= K_MAX; cols++) { + for (rows = 1; rows <= M_MAX - cols; rows++) { + gf_gen_rs_matrix(vmatrix, rows + cols, cols); + + /* Verify the Vandermonde portion of vmatrix contains no + * singular submatrix */ + if (are_submatrices_singular(&vmatrix[cols * cols], rows, cols)) + break; + + } + printf(" k = %2d, m <= %2d \n", cols, rows + cols - 1); + + } + return 0; +} diff --git a/src/spdk/isa-l/erasure_code/gf_2vect_dot_prod_avx.asm b/src/spdk/isa-l/erasure_code/gf_2vect_dot_prod_avx.asm new file mode 100644 index 000000000..6b68d93f5 --- /dev/null +++ b/src/spdk/isa-l/erasure_code/gf_2vect_dot_prod_avx.asm @@ -0,0 +1,337 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2015 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;;; +;;; gf_2vect_dot_prod_avx(len, vec, *g_tbls, **buffs, **dests); +;;; + +%include "reg_sizes.asm" + +%ifidn __OUTPUT_FORMAT__, elf64 + %define arg0 rdi + %define arg1 rsi + %define arg2 rdx + %define arg3 rcx + %define arg4 r8 + %define arg5 r9 + + %define tmp r11 + %define tmp2 r10 + %define tmp3 r9 + %define tmp4 r12 ; must be saved and restored + %define return rax + %macro SLDR 2 + %endmacro + %define SSTR SLDR + %define PS 8 + %define LOG_PS 3 + + %define func(x) x: + %macro FUNC_SAVE 0 + push r12 + %endmacro + %macro FUNC_RESTORE 0 + pop r12 + %endmacro +%endif + +%ifidn __OUTPUT_FORMAT__, win64 + %define arg0 rcx + %define arg1 rdx + %define arg2 r8 + %define arg3 r9 + + %define arg4 r12 ; must be saved, loaded and restored + %define tmp r11 + %define tmp2 r10 + %define tmp3 r13 ; must be saved and restored + %define tmp4 r14 ; must be saved and restored + %define return rax + %macro SLDR 2 + %endmacro + %define SSTR SLDR + %define PS 8 + %define LOG_PS 3 + %define stack_size 3*16 + 3*8 ; must be an odd multiple of 8 + %define arg(x) [rsp + stack_size + PS + PS*x] + + %define func(x) proc_frame x + %macro FUNC_SAVE 0 + alloc_stack stack_size + save_xmm128 xmm6, 0*16 + save_xmm128 xmm7, 1*16 + save_xmm128 xmm8, 2*16 + save_reg r12, 3*16 + 0*8 + save_reg r13, 3*16 + 1*8 + save_reg r14, 3*16 + 2*8 + end_prolog + mov arg4, arg(4) + %endmacro + + %macro FUNC_RESTORE 0 + vmovdqa xmm6, [rsp + 0*16] + vmovdqa xmm7, [rsp + 1*16] + vmovdqa xmm8, [rsp + 2*16] + mov r12, [rsp + 3*16 + 0*8] + mov r13, [rsp + 3*16 + 1*8] + mov r14, [rsp + 3*16 + 2*8] + add rsp, stack_size + %endmacro +%endif + +%ifidn __OUTPUT_FORMAT__, elf32 + +;;;================== High Address; +;;; arg4 +;;; arg3 +;;; arg2 +;;; arg1 +;;; arg0 +;;; return +;;;<================= esp of caller +;;; ebp +;;;<================= ebp = esp +;;; var0 +;;; esi +;;; edi +;;; ebx +;;;<================= esp of callee +;;; +;;;================== Low Address; + + %define PS 4 + %define LOG_PS 2 + %define func(x) x: + %define arg(x) [ebp + PS*2 + PS*x] + %define var(x) [ebp - PS - PS*x] + + %define trans ecx + %define trans2 esi + %define arg0 trans ;trans and trans2 are for the variables in stack + %define arg0_m arg(0) + %define arg1 ebx + %define arg2 arg2_m + %define arg2_m arg(2) + %define arg3 trans + %define arg3_m arg(3) + %define arg4 trans + %define arg4_m arg(4) + %define tmp edx + %define tmp2 edi + %define tmp3 trans2 + %define tmp4 trans2 + %define tmp4_m var(0) + %define return eax + %macro SLDR 2 ;; stack load/restore + mov %1, %2 + %endmacro + %define SSTR SLDR + + %macro FUNC_SAVE 0 + push ebp + mov ebp, esp + sub esp, PS*1 ;1 local variable + push esi + push edi + push ebx + mov arg1, arg(1) + %endmacro + + %macro FUNC_RESTORE 0 + pop ebx + pop edi + pop esi + add esp, PS*1 ;1 local variable + pop ebp + %endmacro + +%endif ; output formats + +%define len arg0 +%define vec arg1 +%define mul_array arg2 +%define src arg3 +%define dest1 arg4 + +%define vec_i tmp2 +%define ptr tmp3 +%define dest2 tmp4 +%define pos return + + %ifidn PS,4 ;32-bit code + %define len_m arg0_m + %define src_m arg3_m + %define dest1_m arg4_m + %define dest2_m tmp4_m + %endif + +%ifndef EC_ALIGNED_ADDR +;;; Use Un-aligned load/store + %define XLDR vmovdqu + %define XSTR vmovdqu +%else +;;; Use Non-temporal load/stor + %ifdef NO_NT_LDST + %define XLDR vmovdqa + %define XSTR vmovdqa + %else + %define XLDR vmovntdqa + %define XSTR vmovntdq + %endif +%endif + +%ifidn PS,8 ; 64-bit code + default rel + [bits 64] +%endif + +section .text + +%ifidn PS,8 ;64-bit code + %define xmask0f xmm8 + %define xgft1_lo xmm7 + %define xgft1_hi xmm6 + %define xgft2_lo xmm5 + %define xgft2_hi xmm4 + + %define x0 xmm0 + %define xtmpa xmm1 + %define xp1 xmm2 + %define xp2 xmm3 +%else ;32-bit code + %define xmask0f xmm4 + %define xgft1_lo xmm7 + %define xgft1_hi xmm6 + %define xgft2_lo xgft1_lo + %define xgft2_hi xgft1_hi + + %define x0 xmm0 + %define xtmpa xmm1 + %define xp1 xmm2 + %define xp2 xmm3 +%endif + +align 16 +global gf_2vect_dot_prod_avx:ISAL_SYM_TYPE_FUNCTION + +func(gf_2vect_dot_prod_avx) + FUNC_SAVE + SLDR len, len_m + sub len, 16 + SSTR len_m, len + jl .return_fail + xor pos, pos + vmovdqa xmask0f, [mask0f] ;Load mask of lower nibble in each byte + sal vec, LOG_PS ;vec *= PS. Make vec_i count by PS + SLDR dest1, dest1_m + mov dest2, [dest1+PS] + SSTR dest2_m, dest2 + mov dest1, [dest1] + SSTR dest1_m, dest1 + +.loop16: + vpxor xp1, xp1 + vpxor xp2, xp2 + mov tmp, mul_array + xor vec_i, vec_i + +.next_vect: + SLDR src, src_m + mov ptr, [src+vec_i] + + vmovdqu xgft1_lo, [tmp] ;Load array Ax{00}, Ax{01}, ..., Ax{0f} + vmovdqu xgft1_hi, [tmp+16] ; " Ax{00}, Ax{10}, ..., Ax{f0} + %ifidn PS,8 ; 64-bit code + vmovdqu xgft2_lo, [tmp+vec*(32/PS)] ;Load array Bx{00}, Bx{01}, ..., Bx{0f} + vmovdqu xgft2_hi, [tmp+vec*(32/PS)+16] ; " Bx{00}, Bx{10}, ..., Bx{f0} + add tmp, 32 + add vec_i, PS + %endif + XLDR x0, [ptr+pos] ;Get next source vector + + vpand xtmpa, x0, xmask0f ;Mask low src nibble in bits 4-0 + vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0 + vpand x0, x0, xmask0f ;Mask high src nibble in bits 4-0 + + vpshufb xgft1_hi, x0 ;Lookup mul table of high nibble + vpshufb xgft1_lo, xtmpa ;Lookup mul table of low nibble + vpxor xgft1_hi, xgft1_lo ;GF add high and low partials + vpxor xp1, xgft1_hi ;xp1 += partial + + %ifidn PS,4 ; 32-bit code + vmovdqu xgft2_lo, [tmp+vec*(32/PS)] ;Load array Bx{00}, Bx{01}, ..., Bx{0f} + vmovdqu xgft2_hi, [tmp+vec*(32/PS)+16] ; " Bx{00}, Bx{10}, ..., Bx{f0} + add tmp, 32 + add vec_i, PS + %endif + vpshufb xgft2_hi, x0 ;Lookup mul table of high nibble + vpshufb xgft2_lo, xtmpa ;Lookup mul table of low nibble + vpxor xgft2_hi, xgft2_lo ;GF add high and low partials + vpxor xp2, xgft2_hi ;xp2 += partial + + cmp vec_i, vec + jl .next_vect + + SLDR dest1, dest1_m + SLDR dest2, dest2_m + XSTR [dest1+pos], xp1 + XSTR [dest2+pos], xp2 + + SLDR len, len_m + add pos, 16 ;Loop on 16 bytes at a time + cmp pos, len + jle .loop16 + + lea tmp, [len + 16] + cmp pos, tmp + je .return_pass + + ;; Tail len + mov pos, len ;Overlapped offset length-16 + jmp .loop16 ;Do one more overlap pass + +.return_pass: + mov return, 0 + FUNC_RESTORE + ret + +.return_fail: + mov return, 1 + FUNC_RESTORE + ret + +endproc_frame + +section .data + +align 16 +mask0f: dq 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f + +;;; func core, ver, snum +slversion gf_2vect_dot_prod_avx, 02, 05, 0191 diff --git a/src/spdk/isa-l/erasure_code/gf_2vect_dot_prod_avx2.asm b/src/spdk/isa-l/erasure_code/gf_2vect_dot_prod_avx2.asm new file mode 100644 index 000000000..db37b0e2e --- /dev/null +++ b/src/spdk/isa-l/erasure_code/gf_2vect_dot_prod_avx2.asm @@ -0,0 +1,356 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2015 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;;; +;;; gf_2vect_dot_prod_avx2(len, vec, *g_tbls, **buffs, **dests); +;;; + +%include "reg_sizes.asm" + +%ifidn __OUTPUT_FORMAT__, elf64 + %define arg0 rdi + %define arg1 rsi + %define arg2 rdx + %define arg3 rcx + %define arg4 r8 + %define arg5 r9 + + %define tmp r11 + %define tmp.w r11d + %define tmp.b r11b + %define tmp2 r10 + %define tmp3 r9 + %define tmp4 r12 ; must be saved and restored + %define return rax + %macro SLDR 2 + %endmacro + %define SSTR SLDR + %define PS 8 + %define LOG_PS 3 + + %define func(x) x: + %macro FUNC_SAVE 0 + push r12 + %endmacro + %macro FUNC_RESTORE 0 + pop r12 + %endmacro +%endif + +%ifidn __OUTPUT_FORMAT__, win64 + %define arg0 rcx + %define arg1 rdx + %define arg2 r8 + %define arg3 r9 + + %define arg4 r12 ; must be saved, loaded and restored + %define tmp r11 + %define tmp.w r11d + %define tmp.b r11b + %define tmp2 r10 + %define tmp3 r13 ; must be saved and restored + %define tmp4 r14 ; must be saved and restored + %define return rax + %macro SLDR 2 + %endmacro + %define SSTR SLDR + %define PS 8 + %define LOG_PS 3 + %define stack_size 3*16 + 3*8 ; must be an odd multiple of 8 + %define arg(x) [rsp + stack_size + PS + PS*x] + + %define func(x) proc_frame x + %macro FUNC_SAVE 0 + alloc_stack stack_size + vmovdqa [rsp + 0*16], xmm6 + vmovdqa [rsp + 1*16], xmm7 + vmovdqa [rsp + 2*16], xmm8 + save_reg r12, 3*16 + 0*8 + save_reg r13, 3*16 + 1*8 + save_reg r14, 3*16 + 2*8 + end_prolog + mov arg4, arg(4) + %endmacro + + %macro FUNC_RESTORE 0 + vmovdqa xmm6, [rsp + 0*16] + vmovdqa xmm7, [rsp + 1*16] + vmovdqa xmm8, [rsp + 2*16] + mov r12, [rsp + 3*16 + 0*8] + mov r13, [rsp + 3*16 + 1*8] + mov r14, [rsp + 3*16 + 2*8] + add rsp, stack_size + %endmacro +%endif + +%ifidn __OUTPUT_FORMAT__, elf32 + +;;;================== High Address; +;;; arg4 +;;; arg3 +;;; arg2 +;;; arg1 +;;; arg0 +;;; return +;;;<================= esp of caller +;;; ebp +;;;<================= ebp = esp +;;; var0 +;;; esi +;;; edi +;;; ebx +;;;<================= esp of callee +;;; +;;;================== Low Address; + + %define PS 4 + %define LOG_PS 2 + %define func(x) x: + %define arg(x) [ebp + PS*2 + PS*x] + %define var(x) [ebp - PS - PS*x] + + %define trans ecx + %define trans2 esi + %define arg0 trans ;trans and trans2 are for the variables in stack + %define arg0_m arg(0) + %define arg1 ebx + %define arg2 arg2_m + %define arg2_m arg(2) + %define arg3 trans + %define arg3_m arg(3) + %define arg4 trans + %define arg4_m arg(4) + %define tmp edx + %define tmp.w edx + %define tmp.b dl + %define tmp2 edi + %define tmp3 trans2 + %define tmp4 trans2 + %define tmp4_m var(0) + %define return eax + %macro SLDR 2 ;stack load/restore + mov %1, %2 + %endmacro + %define SSTR SLDR + + %macro FUNC_SAVE 0 + push ebp + mov ebp, esp + sub esp, PS*1 ;1 local variable + push esi + push edi + push ebx + mov arg1, arg(1) + %endmacro + + %macro FUNC_RESTORE 0 + pop ebx + pop edi + pop esi + add esp, PS*1 ;1 local variable + pop ebp + %endmacro + +%endif ; output formats + +%define len arg0 +%define vec arg1 +%define mul_array arg2 +%define src arg3 +%define dest1 arg4 + +%define vec_i tmp2 +%define ptr tmp3 +%define dest2 tmp4 +%define pos return + +%ifidn PS,4 ;32-bit code + %define len_m arg0_m + %define src_m arg3_m + %define dest1_m arg4_m + %define dest2_m tmp4_m +%endif + +%ifndef EC_ALIGNED_ADDR +;;; Use Un-aligned load/store + %define XLDR vmovdqu + %define XSTR vmovdqu +%else + +;;; Use Non-temporal load/stor + %ifdef NO_NT_LDST + %define XLDR vmovdqa + %define XSTR vmovdqa + %else + %define XLDR vmovntdqa + %define XSTR vmovntdq + %endif +%endif + +%ifidn PS,8 ;64-bit code + default rel + [bits 64] +%endif + +section .text + +%ifidn PS,8 ;64-bit code + %define xmask0f ymm8 + %define xmask0fx xmm8 + %define xgft1_lo ymm7 + %define xgft1_hi ymm6 + %define xgft2_lo ymm5 + %define xgft2_hi ymm4 + + %define x0 ymm0 + %define xtmpa ymm1 + %define xp1 ymm2 + %define xp2 ymm3 +%else ;32-bit code + %define xmask0f ymm7 + %define xmask0fx xmm7 + %define xgft1_lo ymm5 + %define xgft1_hi ymm4 + %define xgft2_lo xgft1_lo + %define xgft2_hi xgft1_hi + + %define x0 ymm0 + %define xtmpa ymm1 + %define xp1 ymm2 + %define xp2 ymm3 + +%endif + +align 16 +global gf_2vect_dot_prod_avx2:ISAL_SYM_TYPE_FUNCTION + +func(gf_2vect_dot_prod_avx2) + FUNC_SAVE + SLDR len, len_m + sub len, 32 + SSTR len_m, len + jl .return_fail + xor pos, pos + mov tmp.b, 0x0f + vpinsrb xmask0fx, xmask0fx, tmp.w, 0 + vpbroadcastb xmask0f, xmask0fx ;Construct mask 0x0f0f0f... + + sal vec, LOG_PS ;vec *= PS. Make vec_i count by PS + SLDR dest1, dest1_m + mov dest2, [dest1+PS] + SSTR dest2_m, dest2 + mov dest1, [dest1] + SSTR dest1_m, dest1 + +.loop32: + vpxor xp1, xp1 + vpxor xp2, xp2 + mov tmp, mul_array + xor vec_i, vec_i + +.next_vect: + SLDR src, src_m + mov ptr, [src+vec_i] + + vmovdqu xgft1_lo, [tmp] ;Load array Ax{00}, Ax{01}, ..., Ax{0f} + ; " Ax{00}, Ax{10}, ..., Ax{f0} + vperm2i128 xgft1_hi, xgft1_lo, xgft1_lo, 0x11 ; swapped to hi | hi + vperm2i128 xgft1_lo, xgft1_lo, xgft1_lo, 0x00 ; swapped to lo | lo + %ifidn PS,8 ; 64-bit code + vmovdqu xgft2_lo, [tmp+vec*(32/PS)] ;Load array Bx{00}, Bx{01}, ..., Bx{0f} + ; " Bx{00}, Bx{10}, ..., Bx{f0} + vperm2i128 xgft2_hi, xgft2_lo, xgft2_lo, 0x11 ; swapped to hi | hi + vperm2i128 xgft2_lo, xgft2_lo, xgft2_lo, 0x00 ; swapped to lo | lo + + XLDR x0, [ptr+pos] ;Get next source vector + add tmp, 32 + add vec_i, PS + %else + XLDR x0, [ptr+pos] ;Get next source vector + %endif + + vpand xtmpa, x0, xmask0f ;Mask low src nibble in bits 4-0 + vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0 + vpand x0, x0, xmask0f ;Mask high src nibble in bits 4-0 + + vpshufb xgft1_hi, x0 ;Lookup mul table of high nibble + vpshufb xgft1_lo, xtmpa ;Lookup mul table of low nibble + vpxor xgft1_hi, xgft1_lo ;GF add high and low partials + vpxor xp1, xgft1_hi ;xp1 += partial + + %ifidn PS,4 ; 32-bit code + vmovdqu xgft2_lo, [tmp+vec*(32/PS)] ;Load array Bx{00}, Bx{01}, ..., Bx{0f} + ; " Bx{00}, Bx{10}, ..., Bx{f0} + vperm2i128 xgft2_hi, xgft2_lo, xgft2_lo, 0x11 ; swapped to hi | hi + vperm2i128 xgft2_lo, xgft2_lo, xgft2_lo, 0x00 ; swapped to lo | lo + add tmp, 32 + add vec_i, PS + %endif + vpshufb xgft2_hi, x0 ;Lookup mul table of high nibble + vpshufb xgft2_lo, xtmpa ;Lookup mul table of low nibble + vpxor xgft2_hi, xgft2_lo ;GF add high and low partials + vpxor xp2, xgft2_hi ;xp2 += partial + + cmp vec_i, vec + jl .next_vect + + SLDR dest1, dest1_m + SLDR dest2, dest2_m + XSTR [dest1+pos], xp1 + XSTR [dest2+pos], xp2 + + SLDR len, len_m + add pos, 32 ;Loop on 32 bytes at a time + cmp pos, len + jle .loop32 + + lea tmp, [len + 32] + cmp pos, tmp + je .return_pass + + ;; Tail len + mov pos, len ;Overlapped offset length-16 + jmp .loop32 ;Do one more overlap pass + +.return_pass: + mov return, 0 + FUNC_RESTORE + ret + +.return_fail: + mov return, 1 + FUNC_RESTORE + ret + +endproc_frame + +section .data + +;;; func core, ver, snum +slversion gf_2vect_dot_prod_avx2, 04, 05, 0196 diff --git a/src/spdk/isa-l/erasure_code/gf_2vect_dot_prod_avx512.asm b/src/spdk/isa-l/erasure_code/gf_2vect_dot_prod_avx512.asm new file mode 100644 index 000000000..470051d69 --- /dev/null +++ b/src/spdk/isa-l/erasure_code/gf_2vect_dot_prod_avx512.asm @@ -0,0 +1,245 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2015 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;;; +;;; gf_2vect_dot_prod_avx512(len, vec, *g_tbls, **buffs, **dests); +;;; + +%include "reg_sizes.asm" + +%ifdef HAVE_AS_KNOWS_AVX512 + +%ifidn __OUTPUT_FORMAT__, elf64 + %define arg0 rdi + %define arg1 rsi + %define arg2 rdx + %define arg3 rcx + %define arg4 r8 + %define arg5 r9 + + %define tmp r11 + %define tmp2 r10 + %define tmp3 r12 ; must be saved and restored + %define return rax + %define PS 8 + %define LOG_PS 3 + + %define func(x) x: + %macro FUNC_SAVE 0 + push r12 + %endmacro + %macro FUNC_RESTORE 0 + pop r12 + %endmacro +%endif + +%ifidn __OUTPUT_FORMAT__, win64 + %define arg0 rcx + %define arg1 rdx + %define arg2 r8 + %define arg3 r9 + + %define arg4 r12 ; must be saved, loaded and restored + %define arg5 r15 ; must be saved and restored + %define tmp r11 + %define tmp2 r10 + %define tmp3 r13 ; must be saved and restored + %define return rax + %define PS 8 + %define LOG_PS 3 + %define stack_size 9*16 + 5*8 ; must be an odd multiple of 8 + %define arg(x) [rsp + stack_size + PS + PS*x] + + %define func(x) proc_frame x + %macro FUNC_SAVE 0 + alloc_stack stack_size + vmovdqa [rsp + 0*16], xmm6 + vmovdqa [rsp + 1*16], xmm7 + vmovdqa [rsp + 2*16], xmm8 + vmovdqa [rsp + 3*16], xmm9 + vmovdqa [rsp + 4*16], xmm10 + vmovdqa [rsp + 5*16], xmm11 + vmovdqa [rsp + 6*16], xmm12 + vmovdqa [rsp + 7*16], xmm13 + vmovdqa [rsp + 8*16], xmm14 + save_reg r12, 9*16 + 0*8 + save_reg r13, 9*16 + 1*8 + save_reg r14, 9*16 + 2*8 + save_reg r15, 9*16 + 3*8 + end_prolog + mov arg4, arg(4) + %endmacro + + %macro FUNC_RESTORE 0 + vmovdqa xmm6, [rsp + 0*16] + vmovdqa xmm7, [rsp + 1*16] + vmovdqa xmm8, [rsp + 2*16] + vmovdqa xmm9, [rsp + 3*16] + vmovdqa xmm10, [rsp + 4*16] + vmovdqa xmm11, [rsp + 5*16] + vmovdqa xmm12, [rsp + 6*16] + vmovdqa xmm13, [rsp + 7*16] + vmovdqa xmm14, [rsp + 8*16] + mov r12, [rsp + 9*16 + 0*8] + mov r13, [rsp + 9*16 + 1*8] + mov r14, [rsp + 9*16 + 2*8] + mov r15, [rsp + 9*16 + 3*8] + add rsp, stack_size + %endmacro +%endif + + +%define len arg0 +%define vec arg1 +%define mul_array arg2 +%define src arg3 +%define dest1 arg4 +%define ptr arg5 +%define vec_i tmp2 +%define dest2 tmp3 +%define pos return + + +%ifndef EC_ALIGNED_ADDR +;;; Use Un-aligned load/store + %define XLDR vmovdqu8 + %define XSTR vmovdqu8 +%else +;;; Use Non-temporal load/stor + %ifdef NO_NT_LDST + %define XLDR vmovdqa + %define XSTR vmovdqa + %else + %define XLDR vmovntdqa + %define XSTR vmovntdq + %endif +%endif + +%define xmask0f zmm8 +%define xgft1_lo zmm7 +%define xgft1_loy ymm7 +%define xgft1_hi zmm6 +%define xgft2_lo zmm5 +%define xgft2_loy ymm5 +%define xgft2_hi zmm4 + +%define x0 zmm0 +%define xtmpa zmm1 +%define xp1 zmm2 +%define xp2 zmm3 + +default rel +[bits 64] + +section .text + +align 16 +global gf_2vect_dot_prod_avx512:ISAL_SYM_TYPE_FUNCTION +func(gf_2vect_dot_prod_avx512) + FUNC_SAVE + sub len, 64 + jl .return_fail + + xor pos, pos + mov tmp, 0x0f + vpbroadcastb xmask0f, tmp ;Construct mask 0x0f0f0f... + sal vec, LOG_PS ;vec *= PS. Make vec_i count by PS + mov dest2, [dest1+PS] + mov dest1, [dest1] + +.loop64: + vpxorq xp1, xp1, xp1 + vpxorq xp2, xp2, xp2 + mov tmp, mul_array + xor vec_i, vec_i + +.next_vect: + mov ptr, [src+vec_i] + XLDR x0, [ptr+pos] ;Get next source vector + add vec_i, PS + + vpandq xtmpa, x0, xmask0f ;Mask low src nibble in bits 4-0 + vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0 + vpandq x0, x0, xmask0f ;Mask high src nibble in bits 4-0 + + vmovdqu8 xgft1_loy, [tmp] ;Load array Ax{00}..{0f}, Ax{00}..{f0} + vmovdqu8 xgft2_loy, [tmp+vec*(32/PS)] ;Load array Bx{00}..{0f}, Bx{00}..{f0} + add tmp, 32 + + vshufi64x2 xgft1_hi, xgft1_lo, xgft1_lo, 0x55 + vshufi64x2 xgft1_lo, xgft1_lo, xgft1_lo, 0x00 + vshufi64x2 xgft2_hi, xgft2_lo, xgft2_lo, 0x55 + vshufi64x2 xgft2_lo, xgft2_lo, xgft2_lo, 0x00 + + vpshufb xgft1_hi, xgft1_hi, x0 ;Lookup mul table of high nibble + vpshufb xgft1_lo, xgft1_lo, xtmpa ;Lookup mul table of low nibble + vpxorq xgft1_hi, xgft1_hi, xgft1_lo ;GF add high and low partials + vpxorq xp1, xp1, xgft1_hi ;xp1 += partial + + vpshufb xgft2_hi, xgft2_hi, x0 ;Lookup mul table of high nibble + vpshufb xgft2_lo, xgft2_lo, xtmpa ;Lookup mul table of low nibble + vpxorq xgft2_hi, xgft2_hi, xgft2_lo ;GF add high and low partials + vpxorq xp2, xp2, xgft2_hi ;xp2 += partial + + cmp vec_i, vec + jl .next_vect + + XSTR [dest1+pos], xp1 + XSTR [dest2+pos], xp2 + + add pos, 64 ;Loop on 64 bytes at a time + cmp pos, len + jle .loop64 + + lea tmp, [len + 64] + cmp pos, tmp + je .return_pass + + ;; Tail len + mov pos, len ;Overlapped offset length-64 + jmp .loop64 ;Do one more overlap pass + +.return_pass: + mov return, 0 + FUNC_RESTORE + ret + +.return_fail: + mov return, 1 + FUNC_RESTORE + ret + +endproc_frame + +%else +%ifidn __OUTPUT_FORMAT__, win64 +global no_gf_2vect_dot_prod_avx512 +no_gf_2vect_dot_prod_avx512: +%endif +%endif ; ifdef HAVE_AS_KNOWS_AVX512 diff --git a/src/spdk/isa-l/erasure_code/gf_2vect_dot_prod_sse.asm b/src/spdk/isa-l/erasure_code/gf_2vect_dot_prod_sse.asm new file mode 100644 index 000000000..05a0c28a4 --- /dev/null +++ b/src/spdk/isa-l/erasure_code/gf_2vect_dot_prod_sse.asm @@ -0,0 +1,339 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2015 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;;; +;;; gf_2vect_dot_prod_sse(len, vec, *g_tbls, **buffs, **dests); +;;; + +%include "reg_sizes.asm" + +%ifidn __OUTPUT_FORMAT__, elf64 + %define arg0 rdi + %define arg1 rsi + %define arg2 rdx + %define arg3 rcx + %define arg4 r8 + %define arg5 r9 + + %define tmp r11 + %define tmp2 r10 + %define tmp3 r9 + %define tmp4 r12 ; must be saved and restored + %define return rax + %macro SLDR 2 + %endmacro + %define SSTR SLDR + %define PS 8 + %define LOG_PS 3 + + %define func(x) x: + %macro FUNC_SAVE 0 + push r12 + %endmacro + %macro FUNC_RESTORE 0 + pop r12 + %endmacro +%endif + +%ifidn __OUTPUT_FORMAT__, win64 + %define arg0 rcx + %define arg1 rdx + %define arg2 r8 + %define arg3 r9 + + %define arg4 r12 ; must be saved, loaded and restored + %define tmp r11 + %define tmp2 r10 + %define tmp3 r13 ; must be saved and restored + %define tmp4 r14 ; must be saved and restored + %define return rax + %macro SLDR 2 + %endmacro + %define SSTR SLDR + %define PS 8 + %define LOG_PS 3 + %define stack_size 3*16 + 3*8 ; must be an odd multiple of 8 + %define arg(x) [rsp + stack_size + PS + PS*x] + + %define func(x) proc_frame x + %macro FUNC_SAVE 0 + alloc_stack stack_size + save_xmm128 xmm6, 0*16 + save_xmm128 xmm7, 1*16 + save_xmm128 xmm8, 2*16 + save_reg r12, 3*16 + 0*8 + save_reg r13, 3*16 + 1*8 + save_reg r14, 3*16 + 2*8 + end_prolog + mov arg4, arg(4) + %endmacro + + %macro FUNC_RESTORE 0 + movdqa xmm6, [rsp + 0*16] + movdqa xmm7, [rsp + 1*16] + movdqa xmm8, [rsp + 2*16] + mov r12, [rsp + 3*16 + 0*8] + mov r13, [rsp + 3*16 + 1*8] + mov r14, [rsp + 3*16 + 2*8] + add rsp, stack_size + %endmacro +%endif + +%ifidn __OUTPUT_FORMAT__, elf32 + +;;;================== High Address; +;;; arg4 +;;; arg3 +;;; arg2 +;;; arg1 +;;; arg0 +;;; return +;;;<================= esp of caller +;;; ebp +;;;<================= ebp = esp +;;; var0 +;;; esi +;;; edi +;;; ebx +;;;<================= esp of callee +;;; +;;;================== Low Address; + + %define PS 4 + %define LOG_PS 2 + %define func(x) x: + %define arg(x) [ebp + PS*2 + PS*x] + %define var(x) [ebp - PS - PS*x] + + %define trans ecx + %define trans2 esi + %define arg0 trans ;trans and trans2 are for the variables in stack + %define arg0_m arg(0) + %define arg1 ebx + %define arg2 arg2_m + %define arg2_m arg(2) + %define arg3 trans + %define arg3_m arg(3) + %define arg4 trans + %define arg4_m arg(4) + %define tmp edx + %define tmp2 edi + %define tmp3 trans2 + %define tmp4 trans2 + %define tmp4_m var(0) + %define return eax + %macro SLDR 2 ;; stack load/restore + mov %1, %2 + %endmacro + %define SSTR SLDR + + %macro FUNC_SAVE 0 + push ebp + mov ebp, esp + sub esp, PS*1 ;1 local variable + push esi + push edi + push ebx + mov arg1, arg(1) + %endmacro + + %macro FUNC_RESTORE 0 + pop ebx + pop edi + pop esi + add esp, PS*1 ;1 local variable + pop ebp + %endmacro + +%endif ; output formats + +%define len arg0 +%define vec arg1 +%define mul_array arg2 +%define src arg3 +%define dest1 arg4 + +%define vec_i tmp2 +%define ptr tmp3 +%define dest2 tmp4 +%define pos return + + %ifidn PS,4 ;32-bit code + %define len_m arg0_m + %define src_m arg3_m + %define dest1_m arg4_m + %define dest2_m tmp4_m + %endif + +%ifndef EC_ALIGNED_ADDR +;;; Use Un-aligned load/store + %define XLDR movdqu + %define XSTR movdqu +%else +;;; Use Non-temporal load/stor + %ifdef NO_NT_LDST + %define XLDR movdqa + %define XSTR movdqa + %else + %define XLDR movntdqa + %define XSTR movntdq + %endif +%endif + +%ifidn PS,8 ;64-bit code + default rel + [bits 64] +%endif + +section .text + +%ifidn PS,8 ;64-bit code + %define xmask0f xmm8 + %define xgft1_lo xmm7 + %define xgft1_hi xmm6 + %define xgft2_lo xmm5 + %define xgft2_hi xmm4 + + %define x0 xmm0 + %define xtmpa xmm1 + %define xp1 xmm2 + %define xp2 xmm3 +%else ;32-bit code + %define xmask0f xmm4 + %define xgft1_lo xmm7 + %define xgft1_hi xmm6 + %define xgft2_lo xgft1_lo + %define xgft2_hi xgft1_hi + + %define x0 xmm0 + %define xtmpa xmm1 + %define xp1 xmm2 + %define xp2 xmm3 +%endif + +align 16 +global gf_2vect_dot_prod_sse:ISAL_SYM_TYPE_FUNCTION + +func(gf_2vect_dot_prod_sse) + FUNC_SAVE + SLDR len, len_m + sub len, 16 + SSTR len_m, len + jl .return_fail + xor pos, pos + movdqa xmask0f, [mask0f] ;Load mask of lower nibble in each byte + sal vec, LOG_PS ;vec *= PS. Make vec_i count by PS + SLDR dest1, dest1_m + mov dest2, [dest1+PS] + SSTR dest2_m, dest2 + mov dest1, [dest1] + SSTR dest1_m, dest1 + +.loop16: + pxor xp1, xp1 + pxor xp2, xp2 + mov tmp, mul_array + xor vec_i, vec_i + +.next_vect: + SLDR src, src_m + mov ptr, [src+vec_i] + + movdqu xgft1_lo, [tmp] ;Load array Ax{00}, Ax{01}, ..., Ax{0f} + movdqu xgft1_hi, [tmp+16] ; " Ax{00}, Ax{10}, ..., Ax{f0} + %ifidn PS,8 ;64-bit code + movdqu xgft2_lo, [tmp+vec*(32/PS)] ;Load array Bx{00}, Bx{01}, ..., Bx{0f} + movdqu xgft2_hi, [tmp+vec*(32/PS)+16] ; " Bx{00}, Bx{10}, ..., Bx{f0} + add tmp, 32 + add vec_i, PS + %endif + XLDR x0, [ptr+pos] ;Get next source vector + + movdqa xtmpa, x0 ;Keep unshifted copy of src + psraw x0, 4 ;Shift to put high nibble into bits 4-0 + pand x0, xmask0f ;Mask high src nibble in bits 4-0 + pand xtmpa, xmask0f ;Mask low src nibble in bits 4-0 + + pshufb xgft1_hi, x0 ;Lookup mul table of high nibble + pshufb xgft1_lo, xtmpa ;Lookup mul table of low nibble + pxor xgft1_hi, xgft1_lo ;GF add high and low partials + pxor xp1, xgft1_hi ;xp1 += partial + + %ifidn PS,4 ;32-bit code + movdqu xgft2_lo, [tmp+vec*(32/PS)] ;Load array Bx{00}, Bx{01}, ..., Bx{0f} + movdqu xgft2_hi, [tmp+vec*(32/PS)+16] ; " Bx{00}, Bx{10}, ..., Bx{f0} + + add tmp, 32 + add vec_i, PS + %endif + pshufb xgft2_hi, x0 ;Lookup mul table of high nibble + pshufb xgft2_lo, xtmpa ;Lookup mul table of low nibble + pxor xgft2_hi, xgft2_lo ;GF add high and low partials + pxor xp2, xgft2_hi ;xp2 += partial + + cmp vec_i, vec + jl .next_vect + + SLDR dest1, dest1_m + SLDR dest2, dest2_m + XSTR [dest1+pos], xp1 + XSTR [dest2+pos], xp2 + + SLDR len, len_m + add pos, 16 ;Loop on 16 bytes at a time + cmp pos, len + jle .loop16 + + lea tmp, [len + 16] + cmp pos, tmp + je .return_pass + + ;; Tail len + mov pos, len ;Overlapped offset length-16 + jmp .loop16 ;Do one more overlap pass + +.return_pass: + mov return, 0 + FUNC_RESTORE + ret + +.return_fail: + mov return, 1 + FUNC_RESTORE + ret + +endproc_frame + +section .data + +align 16 +mask0f: dq 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f + +;;; func core, ver, snum +slversion gf_2vect_dot_prod_sse, 00, 04, 0062 diff --git a/src/spdk/isa-l/erasure_code/gf_2vect_dot_prod_sse_test.c b/src/spdk/isa-l/erasure_code/gf_2vect_dot_prod_sse_test.c new file mode 100644 index 000000000..f4fd9d0ef --- /dev/null +++ b/src/spdk/isa-l/erasure_code/gf_2vect_dot_prod_sse_test.c @@ -0,0 +1,480 @@ +/********************************************************************** + Copyright(c) 2011-2015 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#include +#include +#include // for memset, memcmp +#include "erasure_code.h" +#include "types.h" + +#ifndef FUNCTION_UNDER_TEST +# define FUNCTION_UNDER_TEST gf_2vect_dot_prod_sse +#endif +#ifndef TEST_MIN_SIZE +# define TEST_MIN_SIZE 16 +#endif + +#define str(s) #s +#define xstr(s) str(s) + +#define TEST_LEN 8192 +#define TEST_SIZE (TEST_LEN/2) +#define TEST_MEM TEST_SIZE +#define TEST_LOOPS 10000 +#define TEST_TYPE_STR "" + +#ifndef TEST_SOURCES +# define TEST_SOURCES 16 +#endif +#ifndef RANDOMS +# define RANDOMS 20 +#endif + +#ifdef EC_ALIGNED_ADDR +// Define power of 2 range to check ptr, len alignment +# define PTR_ALIGN_CHK_B 0 +# define LEN_ALIGN_CHK_B 0 // 0 for aligned only +#else +// Define power of 2 range to check ptr, len alignment +# define PTR_ALIGN_CHK_B 32 +# define LEN_ALIGN_CHK_B 32 // 0 for aligned only +#endif + +typedef unsigned char u8; + +extern void FUNCTION_UNDER_TEST(int len, int vlen, unsigned char *gftbls, + unsigned char **src, unsigned char **dest); + +void dump(unsigned char *buf, int len) +{ + int i; + for (i = 0; i < len;) { + printf(" %2x", 0xff & buf[i++]); + if (i % 32 == 0) + printf("\n"); + } + printf("\n"); +} + +void dump_matrix(unsigned char **s, int k, int m) +{ + int i, j; + for (i = 0; i < k; i++) { + for (j = 0; j < m; j++) { + printf(" %2x", s[i][j]); + } + printf("\n"); + } + printf("\n"); +} + +void dump_u8xu8(unsigned char *s, int k, int m) +{ + int i, j; + for (i = 0; i < k; i++) { + for (j = 0; j < m; j++) { + printf(" %2x", 0xff & s[j + (i * m)]); + } + printf("\n"); + } + printf("\n"); +} + +int main(int argc, char *argv[]) +{ + int i, j, rtest, srcs; + void *buf; + u8 g1[TEST_SOURCES], g2[TEST_SOURCES], g_tbls[2 * TEST_SOURCES * 32]; + u8 *dest1, *dest2, *dest_ref1, *dest_ref2, *dest_ptrs[2]; + u8 *buffs[TEST_SOURCES]; + + int align, size; + unsigned char *efence_buffs[TEST_SOURCES]; + unsigned int offset; + u8 *ubuffs[TEST_SOURCES]; + u8 *udest_ptrs[2]; + + printf(xstr(FUNCTION_UNDER_TEST) ": %dx%d ", TEST_SOURCES, TEST_LEN); + + // Allocate the arrays + for (i = 0; i < TEST_SOURCES; i++) { + if (posix_memalign(&buf, 64, TEST_LEN)) { + printf("alloc error: Fail"); + return -1; + } + buffs[i] = buf; + } + + if (posix_memalign(&buf, 64, TEST_LEN)) { + printf("alloc error: Fail"); + return -1; + } + dest1 = buf; + + if (posix_memalign(&buf, 64, TEST_LEN)) { + printf("alloc error: Fail"); + return -1; + } + dest2 = buf; + + if (posix_memalign(&buf, 64, TEST_LEN)) { + printf("alloc error: Fail"); + return -1; + } + dest_ref1 = buf; + + if (posix_memalign(&buf, 64, TEST_LEN)) { + printf("alloc error: Fail"); + return -1; + } + dest_ref2 = buf; + + dest_ptrs[0] = dest1; + dest_ptrs[1] = dest2; + + // Test of all zeros + for (i = 0; i < TEST_SOURCES; i++) + memset(buffs[i], 0, TEST_LEN); + + memset(dest1, 0, TEST_LEN); + memset(dest2, 0, TEST_LEN); + memset(dest_ref1, 0, TEST_LEN); + memset(dest_ref2, 0, TEST_LEN); + memset(g1, 2, TEST_SOURCES); + memset(g2, 1, TEST_SOURCES); + + for (i = 0; i < TEST_SOURCES; i++) { + gf_vect_mul_init(g1[i], &g_tbls[i * 32]); + gf_vect_mul_init(g2[i], &g_tbls[32 * TEST_SOURCES + i * 32]); + } + + gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref1); + gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[32 * TEST_SOURCES], buffs, + dest_ref2); + + FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest_ptrs); + + if (0 != memcmp(dest_ref1, dest1, TEST_LEN)) { + printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test1\n"); + dump_matrix(buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref1, 25); + printf("dprod_dut:"); + dump(dest1, 25); + return -1; + } + if (0 != memcmp(dest_ref2, dest2, TEST_LEN)) { + printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test2\n"); + dump_matrix(buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref2, 25); + printf("dprod_dut:"); + dump(dest2, 25); + return -1; + } + + putchar('.'); + + // Rand data test + + for (rtest = 0; rtest < RANDOMS; rtest++) { + for (i = 0; i < TEST_SOURCES; i++) + for (j = 0; j < TEST_LEN; j++) + buffs[i][j] = rand(); + + for (i = 0; i < TEST_SOURCES; i++) { + g1[i] = rand(); + g2[i] = rand(); + } + + for (i = 0; i < TEST_SOURCES; i++) { + gf_vect_mul_init(g1[i], &g_tbls[i * 32]); + gf_vect_mul_init(g2[i], &g_tbls[(32 * TEST_SOURCES) + (i * 32)]); + } + + gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref1); + gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[32 * TEST_SOURCES], + buffs, dest_ref2); + + FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest_ptrs); + + if (0 != memcmp(dest_ref1, dest1, TEST_LEN)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test1 %d\n", rtest); + dump_matrix(buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref1, 25); + printf("dprod_dut:"); + dump(dest1, 25); + return -1; + } + if (0 != memcmp(dest_ref2, dest2, TEST_LEN)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test2 %d\n", rtest); + dump_matrix(buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref2, 25); + printf("dprod_dut:"); + dump(dest2, 25); + return -1; + } + + putchar('.'); + } + + // Rand data test with varied parameters + for (rtest = 0; rtest < RANDOMS; rtest++) { + for (srcs = TEST_SOURCES; srcs > 0; srcs--) { + for (i = 0; i < srcs; i++) + for (j = 0; j < TEST_LEN; j++) + buffs[i][j] = rand(); + + for (i = 0; i < srcs; i++) { + g1[i] = rand(); + g2[i] = rand(); + } + + for (i = 0; i < srcs; i++) { + gf_vect_mul_init(g1[i], &g_tbls[i * 32]); + gf_vect_mul_init(g2[i], &g_tbls[(32 * srcs) + (i * 32)]); + } + + gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[0], buffs, dest_ref1); + gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[32 * srcs], buffs, + dest_ref2); + + FUNCTION_UNDER_TEST(TEST_LEN, srcs, g_tbls, buffs, dest_ptrs); + + if (0 != memcmp(dest_ref1, dest1, TEST_LEN)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) + " test1 srcs=%d\n", srcs); + dump_matrix(buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref1, 25); + printf("dprod_dut:"); + dump(dest1, 25); + return -1; + } + if (0 != memcmp(dest_ref2, dest2, TEST_LEN)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) + " test2 srcs=%d\n", srcs); + dump_matrix(buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref2, 25); + printf("dprod_dut:"); + dump(dest2, 25); + return -1; + } + + putchar('.'); + } + } + + // Run tests at end of buffer for Electric Fence + align = (LEN_ALIGN_CHK_B != 0) ? 1 : 16; + for (size = TEST_MIN_SIZE; size <= TEST_SIZE; size += align) { + for (i = 0; i < TEST_SOURCES; i++) + for (j = 0; j < TEST_LEN; j++) + buffs[i][j] = rand(); + + for (i = 0; i < TEST_SOURCES; i++) // Line up TEST_SIZE from end + efence_buffs[i] = buffs[i] + TEST_LEN - size; + + for (i = 0; i < TEST_SOURCES; i++) { + g1[i] = rand(); + g2[i] = rand(); + } + + for (i = 0; i < TEST_SOURCES; i++) { + gf_vect_mul_init(g1[i], &g_tbls[i * 32]); + gf_vect_mul_init(g2[i], &g_tbls[(32 * TEST_SOURCES) + (i * 32)]); + } + + gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[0], efence_buffs, dest_ref1); + gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[32 * TEST_SOURCES], + efence_buffs, dest_ref2); + + FUNCTION_UNDER_TEST(size, TEST_SOURCES, g_tbls, efence_buffs, dest_ptrs); + + if (0 != memcmp(dest_ref1, dest1, size)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test1 %d\n", rtest); + dump_matrix(efence_buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref1, align); + printf("dprod_dut:"); + dump(dest1, align); + return -1; + } + + if (0 != memcmp(dest_ref2, dest2, size)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test2 %d\n", rtest); + dump_matrix(efence_buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref2, align); + printf("dprod_dut:"); + dump(dest2, align); + return -1; + } + + putchar('.'); + } + + // Test rand ptr alignment if available + + for (rtest = 0; rtest < RANDOMS; rtest++) { + size = (TEST_LEN - PTR_ALIGN_CHK_B) & ~(TEST_MIN_SIZE - 1); + srcs = rand() % TEST_SOURCES; + if (srcs == 0) + continue; + + offset = (PTR_ALIGN_CHK_B != 0) ? 1 : PTR_ALIGN_CHK_B; + // Add random offsets + for (i = 0; i < srcs; i++) + ubuffs[i] = buffs[i] + (rand() & (PTR_ALIGN_CHK_B - offset)); + + udest_ptrs[0] = dest1 + (rand() & (PTR_ALIGN_CHK_B - offset)); + udest_ptrs[1] = dest2 + (rand() & (PTR_ALIGN_CHK_B - offset)); + + memset(dest1, 0, TEST_LEN); // zero pad to check write-over + memset(dest2, 0, TEST_LEN); + + for (i = 0; i < srcs; i++) + for (j = 0; j < size; j++) + ubuffs[i][j] = rand(); + + for (i = 0; i < srcs; i++) { + g1[i] = rand(); + g2[i] = rand(); + } + + for (i = 0; i < srcs; i++) { + gf_vect_mul_init(g1[i], &g_tbls[i * 32]); + gf_vect_mul_init(g2[i], &g_tbls[(32 * srcs) + (i * 32)]); + } + + gf_vect_dot_prod_base(size, srcs, &g_tbls[0], ubuffs, dest_ref1); + gf_vect_dot_prod_base(size, srcs, &g_tbls[32 * srcs], ubuffs, dest_ref2); + + FUNCTION_UNDER_TEST(size, srcs, g_tbls, ubuffs, udest_ptrs); + + if (memcmp(dest_ref1, udest_ptrs[0], size)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign srcs=%d\n", + srcs); + dump_matrix(ubuffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref1, 25); + printf("dprod_dut:"); + dump(udest_ptrs[0], 25); + return -1; + } + if (memcmp(dest_ref2, udest_ptrs[1], size)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign srcs=%d\n", + srcs); + dump_matrix(ubuffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref2, 25); + printf("dprod_dut:"); + dump(udest_ptrs[1], 25); + return -1; + } + // Confirm that padding around dests is unchanged + memset(dest_ref1, 0, PTR_ALIGN_CHK_B); // Make reference zero buff + offset = udest_ptrs[0] - dest1; + + if (memcmp(dest1, dest_ref1, offset)) { + printf("Fail rand ualign pad1 start\n"); + return -1; + } + if (memcmp(dest1 + offset + size, dest_ref1, PTR_ALIGN_CHK_B - offset)) { + printf("Fail rand ualign pad1 end\n"); + return -1; + } + + offset = udest_ptrs[1] - dest2; + if (memcmp(dest2, dest_ref1, offset)) { + printf("Fail rand ualign pad2 start\n"); + return -1; + } + if (memcmp(dest2 + offset + size, dest_ref1, PTR_ALIGN_CHK_B - offset)) { + printf("Fail rand ualign pad2 end\n"); + return -1; + } + + putchar('.'); + } + + // Test all size alignment + align = (LEN_ALIGN_CHK_B != 0) ? 1 : 16; + + for (size = TEST_LEN; size >= TEST_MIN_SIZE; size -= align) { + srcs = TEST_SOURCES; + + for (i = 0; i < srcs; i++) + for (j = 0; j < size; j++) + buffs[i][j] = rand(); + + for (i = 0; i < srcs; i++) { + g1[i] = rand(); + g2[i] = rand(); + } + + for (i = 0; i < srcs; i++) { + gf_vect_mul_init(g1[i], &g_tbls[i * 32]); + gf_vect_mul_init(g2[i], &g_tbls[(32 * srcs) + (i * 32)]); + } + + gf_vect_dot_prod_base(size, srcs, &g_tbls[0], buffs, dest_ref1); + gf_vect_dot_prod_base(size, srcs, &g_tbls[32 * srcs], buffs, dest_ref2); + + FUNCTION_UNDER_TEST(size, srcs, g_tbls, buffs, dest_ptrs); + + if (memcmp(dest_ref1, dest_ptrs[0], size)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign len=%d\n", + size); + dump_matrix(buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref1, 25); + printf("dprod_dut:"); + dump(dest_ptrs[0], 25); + return -1; + } + if (memcmp(dest_ref2, dest_ptrs[1], size)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign len=%d\n", + size); + dump_matrix(buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref2, 25); + printf("dprod_dut:"); + dump(dest_ptrs[1], 25); + return -1; + } + } + + printf("Pass\n"); + return 0; + +} diff --git a/src/spdk/isa-l/erasure_code/gf_2vect_mad_avx.asm b/src/spdk/isa-l/erasure_code/gf_2vect_mad_avx.asm new file mode 100644 index 000000000..fcf3a7545 --- /dev/null +++ b/src/spdk/isa-l/erasure_code/gf_2vect_mad_avx.asm @@ -0,0 +1,236 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2015 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;;; +;;; gf_2vect_mad_avx(len, vec, vec_i, mul_array, src, dest); +;;; + +%include "reg_sizes.asm" + +%define PS 8 + +%ifidn __OUTPUT_FORMAT__, win64 + %define arg0 rcx + %define arg0.w ecx + %define arg1 rdx + %define arg2 r8 + %define arg3 r9 + %define arg4 r12 + %define arg5 r15 + %define tmp r11 + %define tmp2 r10 + %define return rax + %define return.w eax + %define stack_size 16*9 + 3*8 + %define arg(x) [rsp + stack_size + PS + PS*x] + %define func(x) proc_frame x + +%macro FUNC_SAVE 0 + sub rsp, stack_size + movdqa [rsp+16*0],xmm6 + movdqa [rsp+16*1],xmm7 + movdqa [rsp+16*2],xmm8 + movdqa [rsp+16*3],xmm9 + movdqa [rsp+16*4],xmm10 + movdqa [rsp+16*5],xmm11 + movdqa [rsp+16*6],xmm12 + movdqa [rsp+16*7],xmm13 + movdqa [rsp+16*8],xmm14 + save_reg r12, 9*16 + 0*8 + save_reg r15, 9*16 + 1*8 + end_prolog + mov arg4, arg(4) + mov arg5, arg(5) +%endmacro + +%macro FUNC_RESTORE 0 + movdqa xmm6, [rsp+16*0] + movdqa xmm7, [rsp+16*1] + movdqa xmm8, [rsp+16*2] + movdqa xmm9, [rsp+16*3] + movdqa xmm10, [rsp+16*4] + movdqa xmm11, [rsp+16*5] + movdqa xmm12, [rsp+16*6] + movdqa xmm13, [rsp+16*7] + movdqa xmm14, [rsp+16*8] + mov r12, [rsp + 9*16 + 0*8] + mov r15, [rsp + 9*16 + 1*8] + add rsp, stack_size +%endmacro + +%elifidn __OUTPUT_FORMAT__, elf64 + %define arg0 rdi + %define arg0.w edi + %define arg1 rsi + %define arg2 rdx + %define arg3 rcx + %define arg4 r8 + %define arg5 r9 + %define tmp r11 + %define tmp2 r10 + %define return rax + %define return.w eax + + %define func(x) x: + %define FUNC_SAVE + %define FUNC_RESTORE +%endif + +;;; gf_2vect_mad_avx(len, vec, vec_i, mul_array, src, dest) +%define len arg0 +%define len.w arg0.w +%define vec arg1 +%define vec_i arg2 +%define mul_array arg3 +%define src arg4 +%define dest1 arg5 +%define pos return +%define pos.w return.w + +%define dest2 tmp2 + +%ifndef EC_ALIGNED_ADDR +;;; Use Un-aligned load/store + %define XLDR vmovdqu + %define XSTR vmovdqu +%else +;;; Use Non-temporal load/stor + %ifdef NO_NT_LDST + %define XLDR vmovdqa + %define XSTR vmovdqa + %else + %define XLDR vmovntdqa + %define XSTR vmovntdq + %endif +%endif + + +default rel + +[bits 64] +section .text + +%define xmask0f xmm14 +%define xgft1_lo xmm13 +%define xgft1_hi xmm12 +%define xgft2_lo xmm11 +%define xgft2_hi xmm10 + +%define x0 xmm0 +%define xtmpa xmm1 +%define xtmph1 xmm2 +%define xtmpl1 xmm3 +%define xtmph2 xmm4 +%define xtmpl2 xmm5 +%define xd1 xmm6 +%define xd2 xmm7 +%define xtmpd1 xmm8 +%define xtmpd2 xmm9 + + +align 16 +global gf_2vect_mad_avx:ISAL_SYM_TYPE_FUNCTION + +func(gf_2vect_mad_avx) + FUNC_SAVE + sub len, 16 + jl .return_fail + + xor pos, pos + vmovdqa xmask0f, [mask0f] ;Load mask of lower nibble in each byte + sal vec_i, 5 ;Multiply by 32 + sal vec, 5 + lea tmp, [mul_array + vec_i] + vmovdqu xgft1_lo, [tmp] ;Load array Ax{00}, Ax{01}, Ax{02}, ... + vmovdqu xgft1_hi, [tmp+16] ; " Ax{00}, Ax{10}, Ax{20}, ... , Ax{f0} + vmovdqu xgft2_lo, [tmp+vec] ;Load array Bx{00}, Bx{01}, Bx{02}, ... + vmovdqu xgft2_hi, [tmp+vec+16] ; " Bx{00}, Bx{10}, Bx{20}, ... , Bx{f0} + + mov dest2, [dest1+PS] + mov dest1, [dest1] + + XLDR xtmpd1, [dest1+len] ;backup the last 16 bytes in dest + XLDR xtmpd2, [dest2+len] ;backup the last 16 bytes in dest + +.loop16: + XLDR xd1, [dest1+pos] ;Get next dest vector + XLDR xd2, [dest2+pos] ;Get next dest vector +.loop16_overlap: + XLDR x0, [src+pos] ;Get next source vector + + vpand xtmpa, x0, xmask0f ;Mask low src nibble in bits 4-0 + vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0 + vpand x0, x0, xmask0f ;Mask high src nibble in bits 4-0 + + vpshufb xtmph1, xgft1_hi, x0 ;Lookup mul table of high nibble + vpshufb xtmpl1, xgft1_lo, xtmpa ;Lookup mul table of low nibble + vpxor xtmph1, xtmph1, xtmpl1 ;GF add high and low partials + vpxor xd1, xd1, xtmph1 ;xd1 += partial + + vpshufb xtmph2, xgft2_hi, x0 ;Lookup mul table of high nibble + vpshufb xtmpl2, xgft2_lo, xtmpa ;Lookup mul table of low nibble + vpxor xtmph2, xtmph2, xtmpl2 ;GF add high and low partials + vpxor xd2, xd2, xtmph2 ;xd2 += partial + + XSTR [dest1+pos], xd1 + XSTR [dest2+pos], xd2 + + add pos, 16 ;Loop on 16 bytes at a time + cmp pos, len + jle .loop16 + + lea tmp, [len + 16] + cmp pos, tmp + je .return_pass + + ;; Tail len + mov pos, len ;Overlapped offset length-16 + vmovdqa xd1, xtmpd1 ;Restore xd1 + vmovdqa xd2, xtmpd2 ;Restore xd2 + jmp .loop16_overlap ;Do one more overlap pass + +.return_pass: + mov return, 0 + FUNC_RESTORE + ret + +.return_fail: + mov return, 1 + FUNC_RESTORE + ret + +endproc_frame + +section .data + +align 16 +mask0f: dq 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f + +;;; func core, ver, snum +slversion gf_2vect_mad_avx, 02, 01, 0204 diff --git a/src/spdk/isa-l/erasure_code/gf_2vect_mad_avx2.asm b/src/spdk/isa-l/erasure_code/gf_2vect_mad_avx2.asm new file mode 100644 index 000000000..0e77ebef4 --- /dev/null +++ b/src/spdk/isa-l/erasure_code/gf_2vect_mad_avx2.asm @@ -0,0 +1,247 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2015 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;;; +;;; gf_2vect_mad_avx2(len, vec, vec_i, mul_array, src, dest); +;;; + +%include "reg_sizes.asm" + +%define PS 8 + +%ifidn __OUTPUT_FORMAT__, win64 + %define arg0 rcx + %define arg0.w ecx + %define arg1 rdx + %define arg2 r8 + %define arg3 r9 + %define arg4 r12 + %define arg5 r15 + + %define tmp r11 + %define tmp.w r11d + %define tmp.b r11b + %define tmp2 r10 + %define return rax + %define return.w eax + %define stack_size 16*9 + 3*8 ; must be an odd multiple of 8 + %define arg(x) [rsp + stack_size + PS + PS*x] + + %define func(x) proc_frame x + %macro FUNC_SAVE 0 + sub rsp, stack_size + vmovdqa [rsp+16*0],xmm6 + vmovdqa [rsp+16*1],xmm7 + vmovdqa [rsp+16*2],xmm8 + vmovdqa [rsp+16*3],xmm9 + vmovdqa [rsp+16*4],xmm10 + vmovdqa [rsp+16*5],xmm11 + vmovdqa [rsp+16*6],xmm12 + vmovdqa [rsp+16*7],xmm13 + vmovdqa [rsp+16*8],xmm14 + save_reg r12, 9*16 + 0*8 + save_reg r15, 9*16 + 1*8 + end_prolog + mov arg4, arg(4) + mov arg5, arg(5) + %endmacro + + %macro FUNC_RESTORE 0 + vmovdqa xmm6, [rsp+16*0] + vmovdqa xmm7, [rsp+16*1] + vmovdqa xmm8, [rsp+16*2] + vmovdqa xmm9, [rsp+16*3] + vmovdqa xmm10, [rsp+16*4] + vmovdqa xmm11, [rsp+16*5] + vmovdqa xmm12, [rsp+16*6] + vmovdqa xmm13, [rsp+16*7] + vmovdqa xmm14, [rsp+16*8] + mov r12, [rsp + 9*16 + 0*8] + mov r15, [rsp + 9*16 + 1*8] + add rsp, stack_size + %endmacro +%endif + +%ifidn __OUTPUT_FORMAT__, elf64 + %define arg0 rdi + %define arg0.w edi + %define arg1 rsi + %define arg2 rdx + %define arg3 rcx + %define arg4 r8 + %define arg5 r9 + + %define tmp r11 + %define tmp.w r11d + %define tmp.b r11b + %define tmp2 r10 + %define return rax + %define return.w eax + + %define func(x) x: + %define FUNC_SAVE + %define FUNC_RESTORE +%endif + +;;; gf_2vect_mad_avx2(len, vec, vec_i, mul_array, src, dest) +%define len arg0 +%define len.w arg0.w +%define vec arg1 +%define vec_i arg2 +%define mul_array arg3 +%define src arg4 +%define dest1 arg5 +%define pos return +%define pos.w return.w + +%define dest2 tmp2 + +%ifndef EC_ALIGNED_ADDR +;;; Use Un-aligned load/store + %define XLDR vmovdqu + %define XSTR vmovdqu +%else + +;;; Use Non-temporal load/stor + %ifdef NO_NT_LDST + %define XLDR vmovdqa + %define XSTR vmovdqa + %else + %define XLDR vmovntdqa + %define XSTR vmovntdq + %endif +%endif + + +default rel + +[bits 64] +section .text + +%define xmask0f ymm14 +%define xmask0fx xmm14 +%define xgft1_lo ymm13 +%define xgft1_hi ymm12 +%define xgft2_lo ymm11 +%define xgft2_hi ymm10 + +%define x0 ymm0 +%define xtmpa ymm1 +%define xtmph1 ymm2 +%define xtmpl1 ymm3 +%define xtmph2 ymm4 +%define xtmpl2 ymm5 +%define xd1 ymm6 +%define xd2 ymm7 +%define xtmpd1 ymm8 +%define xtmpd2 ymm9 + +align 16 +global gf_2vect_mad_avx2:ISAL_SYM_TYPE_FUNCTION + +func(gf_2vect_mad_avx2) + FUNC_SAVE + sub len, 32 + jl .return_fail + xor pos, pos + mov tmp.b, 0x0f + vpinsrb xmask0fx, xmask0fx, tmp.w, 0 + vpbroadcastb xmask0f, xmask0fx ;Construct mask 0x0f0f0f... + + sal vec_i, 5 ;Multiply by 32 + sal vec, 5 + lea tmp, [mul_array + vec_i] + vmovdqu xgft1_lo, [tmp] ;Load array Ax{00}, Ax{01}, ..., Ax{0f} + ; " Ax{00}, Ax{10}, ..., Ax{f0} + vmovdqu xgft2_lo, [tmp+vec] ;Load array Bx{00}, Bx{01}, ..., Bx{0f} + ; " Bx{00}, Bx{10}, ..., Bx{f0} + + vperm2i128 xgft1_hi, xgft1_lo, xgft1_lo, 0x11 ; swapped to hi | hi + vperm2i128 xgft1_lo, xgft1_lo, xgft1_lo, 0x00 ; swapped to lo | lo + vperm2i128 xgft2_hi, xgft2_lo, xgft2_lo, 0x11 ; swapped to hi | hi + vperm2i128 xgft2_lo, xgft2_lo, xgft2_lo, 0x00 ; swapped to lo | lo + mov dest2, [dest1+PS] ; reuse mul_array + mov dest1, [dest1] + + XLDR xtmpd1, [dest1+len] ;backup the last 16 bytes in dest + XLDR xtmpd2, [dest2+len] ;backup the last 16 bytes in dest + +.loop32: + XLDR xd1, [dest1+pos] ;Get next dest vector + XLDR xd2, [dest2+pos] ;Get next dest vector +.loop32_overlap: + XLDR x0, [src+pos] ;Get next source vector + + vpand xtmpa, x0, xmask0f ;Mask low src nibble in bits 4-0 + vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0 + vpand x0, x0, xmask0f ;Mask high src nibble in bits 4-0 + + vpshufb xtmph1, xgft1_hi, x0 ;Lookup mul table of high nibble + vpshufb xtmpl1, xgft1_lo, xtmpa ;Lookup mul table of low nibble + vpxor xtmph1, xtmph1, xtmpl1 ;GF add high and low partials + vpxor xd1, xd1, xtmph1 ;xd1 += partial + + vpshufb xtmph2, xgft2_hi, x0 ;Lookup mul table of high nibble + vpshufb xtmpl2, xgft2_lo, xtmpa ;Lookup mul table of low nibble + vpxor xtmph2, xtmph2, xtmpl2 ;GF add high and low partials + vpxor xd2, xd2, xtmph2 ;xd2 += partial + + XSTR [dest1+pos], xd1 + XSTR [dest2+pos], xd2 + + add pos, 32 ;Loop on 32 bytes at a time + cmp pos, len + jle .loop32 + + lea tmp, [len + 32] + cmp pos, tmp + je .return_pass + + ;; Tail len + mov pos, len ;Overlapped offset length-32 + vmovdqa xd1, xtmpd1 ;Restore xd1 + vmovdqa xd2, xtmpd2 ;Restore xd2 + jmp .loop32_overlap ;Do one more overlap pass + +.return_pass: + mov return, 0 + FUNC_RESTORE + ret + +.return_fail: + mov return, 1 + FUNC_RESTORE + ret + +endproc_frame + +section .data + +;;; func core, ver, snum +slversion gf_2vect_mad_avx2, 04, 01, 0205 diff --git a/src/spdk/isa-l/erasure_code/gf_2vect_mad_avx512.asm b/src/spdk/isa-l/erasure_code/gf_2vect_mad_avx512.asm new file mode 100644 index 000000000..6d972bba5 --- /dev/null +++ b/src/spdk/isa-l/erasure_code/gf_2vect_mad_avx512.asm @@ -0,0 +1,230 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2015 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;;; +;;; gf_2vect_mad_avx512(len, vec, vec_i, mul_array, src, dest); +;;; + +%include "reg_sizes.asm" + +%ifdef HAVE_AS_KNOWS_AVX512 + +%ifidn __OUTPUT_FORMAT__, elf64 + %define arg0 rdi + %define arg1 rsi + %define arg2 rdx + %define arg3 rcx + %define arg4 r8 + %define arg5 r9 + %define tmp r11 + %define tmp2 r10 + %define return rax + %define func(x) x: + %define FUNC_SAVE + %define FUNC_RESTORE +%endif + +%ifidn __OUTPUT_FORMAT__, win64 + %define arg0 rcx + %define arg1 rdx + %define arg2 r8 + %define arg3 r9 + %define arg4 r12 + %define arg5 r15 + %define tmp r11 + %define tmp2 r10 + %define return rax + %define stack_size 16*9 + 3*8 ; must be an odd multiple of 8 + %define arg(x) [rsp + stack_size + PS + PS*x] + + %define func(x) proc_frame x + %macro FUNC_SAVE 0 + sub rsp, stack_size + vmovdqa [rsp+16*0],xmm6 + vmovdqa [rsp+16*1],xmm7 + vmovdqa [rsp+16*2],xmm8 + vmovdqa [rsp+16*3],xmm9 + vmovdqa [rsp+16*4],xmm10 + vmovdqa [rsp+16*5],xmm11 + vmovdqa [rsp+16*6],xmm12 + vmovdqa [rsp+16*7],xmm13 + vmovdqa [rsp+16*8],xmm14 + save_reg r12, 9*16 + 0*8 + save_reg r15, 9*16 + 1*8 + end_prolog + mov arg4, arg(4) + mov arg5, arg(5) + %endmacro + + %macro FUNC_RESTORE 0 + vmovdqa xmm6, [rsp+16*0] + vmovdqa xmm7, [rsp+16*1] + vmovdqa xmm8, [rsp+16*2] + vmovdqa xmm9, [rsp+16*3] + vmovdqa xmm10, [rsp+16*4] + vmovdqa xmm11, [rsp+16*5] + vmovdqa xmm12, [rsp+16*6] + vmovdqa xmm13, [rsp+16*7] + vmovdqa xmm14, [rsp+16*8] + mov r12, [rsp + 9*16 + 0*8] + mov r15, [rsp + 9*16 + 1*8] + add rsp, stack_size + %endmacro +%endif + + +%define PS 8 +%define len arg0 +%define len.w arg0.w +%define vec arg1 +%define vec_i arg2 +%define mul_array arg3 +%define src arg4 +%define dest1 arg5 +%define pos return +%define pos.w return.w +%define dest2 tmp2 + +%ifndef EC_ALIGNED_ADDR +;;; Use Un-aligned load/store + %define XLDR vmovdqu8 + %define XSTR vmovdqu8 +%else +;;; Use Non-temporal load/stor + %ifdef NO_NT_LDST + %define XLDR vmovdqa + %define XSTR vmovdqa + %else + %define XLDR vmovntdqa + %define XSTR vmovntdq + %endif +%endif + +default rel +[bits 64] +section .text + +%define x0 zmm0 +%define xtmpa zmm1 +%define xtmph1 zmm2 +%define xtmpl1 zmm3 +%define xtmph2 zmm4 +%define xtmpl2 zmm5 +%define xd1 zmm6 +%define xd2 zmm7 +%define xtmpd1 zmm8 +%define xtmpd2 zmm9 +%define xgft1_hi zmm10 +%define xgft1_lo zmm11 +%define xgft1_loy ymm11 +%define xgft2_hi zmm12 +%define xgft2_lo zmm13 +%define xgft2_loy ymm13 +%define xmask0f zmm14 + +align 16 +global gf_2vect_mad_avx512:ISAL_SYM_TYPE_FUNCTION +func(gf_2vect_mad_avx512) + FUNC_SAVE + sub len, 64 + jl .return_fail + xor pos, pos + mov tmp, 0x0f + vpbroadcastb xmask0f, tmp ;Construct mask 0x0f0f0f... + sal vec_i, 5 ;Multiply by 32 + sal vec, 5 + lea tmp, [mul_array + vec_i] + vmovdqu xgft1_loy, [tmp] ;Load array Ax{00}..{0f}, Ax{00}..{f0} + vmovdqu xgft2_loy, [tmp+vec] ;Load array Bx{00}..{0f}, Bx{00}..{f0} + vshufi64x2 xgft1_hi, xgft1_lo, xgft1_lo, 0x55 + vshufi64x2 xgft1_lo, xgft1_lo, xgft1_lo, 0x00 + vshufi64x2 xgft2_hi, xgft2_lo, xgft2_lo, 0x55 + vshufi64x2 xgft2_lo, xgft2_lo, xgft2_lo, 0x00 + mov dest2, [dest1+PS] ; reuse mul_array + mov dest1, [dest1] + mov tmp, -1 + kmovq k1, tmp + +.loop64: + XLDR xd1, [dest1+pos] ;Get next dest vector + XLDR xd2, [dest2+pos] ;Get next dest vector + XLDR x0, [src+pos] ;Get next source vector + + vpandq xtmpa, x0, xmask0f ;Mask low src nibble in bits 4-0 + vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0 + vpandq x0, x0, xmask0f ;Mask high src nibble in bits 4-0 + + vpshufb xtmph1 {k1}{z}, xgft1_hi, x0 ;Lookup mul table of high nibble + vpshufb xtmpl1 {k1}{z}, xgft1_lo, xtmpa ;Lookup mul table of low nibble + vpxorq xtmph1, xtmph1, xtmpl1 ;GF add high and low partials + vpxorq xd1, xd1, xtmph1 ;xd1 += partial + + vpshufb xtmph2 {k1}{z}, xgft2_hi, x0 ;Lookup mul table of high nibble + vpshufb xtmpl2 {k1}{z}, xgft2_lo, xtmpa ;Lookup mul table of low nibble + vpxorq xtmph2, xtmph2, xtmpl2 ;GF add high and low partials + vpxorq xd2, xd2, xtmph2 ;xd2 += partial + + XSTR [dest1+pos], xd1 + XSTR [dest2+pos], xd2 + + add pos, 64 ;Loop on 64 bytes at a time + cmp pos, len + jle .loop64 + + lea tmp, [len + 64] + cmp pos, tmp + je .return_pass + + ;; Tail len + mov pos, (1 << 63) + lea tmp, [len + 64 - 1] + and tmp, 63 + sarx pos, pos, tmp + kmovq k1, pos + mov pos, len ;Overlapped offset length-64 + jmp .loop64 ;Do one more overlap pass + +.return_pass: + mov return, 0 + FUNC_RESTORE + ret + +.return_fail: + mov return, 1 + FUNC_RESTORE + ret + +endproc_frame + +%else +%ifidn __OUTPUT_FORMAT__, win64 +global no_gf_2vect_mad_avx512 +no_gf_2vect_mad_avx512: +%endif +%endif ; ifdef HAVE_AS_KNOWS_AVX512 diff --git a/src/spdk/isa-l/erasure_code/gf_2vect_mad_sse.asm b/src/spdk/isa-l/erasure_code/gf_2vect_mad_sse.asm new file mode 100644 index 000000000..7ee1b249a --- /dev/null +++ b/src/spdk/isa-l/erasure_code/gf_2vect_mad_sse.asm @@ -0,0 +1,239 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2015 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;;; +;;; gf_2vect_mad_sse(len, vec, vec_i, mul_array, src, dest); +;;; + +%include "reg_sizes.asm" + +%define PS 8 + +%ifidn __OUTPUT_FORMAT__, win64 + %define arg0 rcx + %define arg0.w ecx + %define arg1 rdx + %define arg2 r8 + %define arg3 r9 + %define arg4 r12 + %define arg5 r15 + %define tmp r11 + %define tmp2 r10 + %define return rax + %define return.w eax + %define stack_size 16*9 + 3*8 + %define arg(x) [rsp + stack_size + PS + PS*x] + %define func(x) proc_frame x + +%macro FUNC_SAVE 0 + sub rsp, stack_size + movdqa [rsp+16*0],xmm6 + movdqa [rsp+16*1],xmm7 + movdqa [rsp+16*2],xmm8 + movdqa [rsp+16*3],xmm9 + movdqa [rsp+16*4],xmm10 + movdqa [rsp+16*5],xmm11 + movdqa [rsp+16*6],xmm12 + movdqa [rsp+16*7],xmm13 + movdqa [rsp+16*8],xmm14 + save_reg r12, 9*16 + 0*8 + save_reg r15, 9*16 + 1*8 + end_prolog + mov arg4, arg(4) + mov arg5, arg(5) +%endmacro + +%macro FUNC_RESTORE 0 + movdqa xmm6, [rsp+16*0] + movdqa xmm7, [rsp+16*1] + movdqa xmm8, [rsp+16*2] + movdqa xmm9, [rsp+16*3] + movdqa xmm10, [rsp+16*4] + movdqa xmm11, [rsp+16*5] + movdqa xmm12, [rsp+16*6] + movdqa xmm13, [rsp+16*7] + movdqa xmm14, [rsp+16*8] + mov r12, [rsp + 9*16 + 0*8] + mov r15, [rsp + 9*16 + 1*8] + add rsp, stack_size +%endmacro + +%elifidn __OUTPUT_FORMAT__, elf64 + %define arg0 rdi + %define arg0.w edi + %define arg1 rsi + %define arg2 rdx + %define arg3 rcx + %define arg4 r8 + %define arg5 r9 + %define tmp r11 + %define tmp2 r10 + %define return rax + %define return.w eax + + %define func(x) x: + %define FUNC_SAVE + %define FUNC_RESTORE +%endif + +;;; gf_2vect_mad_sse(len, vec, vec_i, mul_array, src, dest) +%define len arg0 +%define len.w arg0.w +%define vec arg1 +%define vec_i arg2 +%define mul_array arg3 +%define src arg4 +%define dest1 arg5 +%define pos return +%define pos.w return.w + +%define dest2 tmp2 + +%ifndef EC_ALIGNED_ADDR +;;; Use Un-aligned load/store + %define XLDR movdqu + %define XSTR movdqu +%else +;;; Use Non-temporal load/stor + %ifdef NO_NT_LDST + %define XLDR movdqa + %define XSTR movdqa + %else + %define XLDR movntdqa + %define XSTR movntdq + %endif +%endif + +default rel + +[bits 64] +section .text + +%define xmask0f xmm14 +%define xgft1_lo xmm13 +%define xgft1_hi xmm12 +%define xgft2_lo xmm11 +%define xgft2_hi xmm10 + +%define x0 xmm0 +%define xtmpa xmm1 +%define xtmph1 xmm2 +%define xtmpl1 xmm3 +%define xtmph2 xmm4 +%define xtmpl2 xmm5 +%define xd1 xmm6 +%define xd2 xmm7 +%define xtmpd1 xmm8 +%define xtmpd2 xmm9 + + +align 16 +global gf_2vect_mad_sse:ISAL_SYM_TYPE_FUNCTION +func(gf_2vect_mad_sse) + FUNC_SAVE + sub len, 16 + jl .return_fail + + xor pos, pos + movdqa xmask0f, [mask0f] ;Load mask of lower nibble in each byte + sal vec_i, 5 ;Multiply by 32 + sal vec, 5 + lea tmp, [mul_array + vec_i] + movdqu xgft1_lo,[tmp] ;Load array Ax{00}, Ax{01}, Ax{02}, ... + movdqu xgft1_hi, [tmp+16] ; " Ax{00}, Ax{10}, Ax{20}, ... , Ax{f0} + movdqu xgft2_lo, [tmp+vec] ;Load array Bx{00}, Bx{01}, Bx{02}, ... + movdqu xgft2_hi, [tmp+vec+16] ; " Bx{00}, Bx{10}, Bx{20}, ... , Bx{f0} + mov dest2, [dest1+PS] + mov dest1, [dest1] + + XLDR xtmpd1, [dest1+len] ;backup the last 16 bytes in dest + XLDR xtmpd2, [dest2+len] ;backup the last 16 bytes in dest + +.loop16: + XLDR xd1, [dest1+pos] ;Get next dest vector + XLDR xd2, [dest2+pos] ;Get next dest vector +.loop16_overlap: + XLDR x0, [src+pos] ;Get next source vector + movdqa xtmph1, xgft1_hi ;Reload const array registers + movdqa xtmpl1, xgft1_lo + movdqa xtmph2, xgft2_hi ;Reload const array registers + movdqa xtmpl2, xgft2_lo + movdqa xtmpa, x0 ;Keep unshifted copy of src + psraw x0, 4 ;Shift to put high nibble into bits 4-0 + pand x0, xmask0f ;Mask high src nibble in bits 4-0 + pand xtmpa, xmask0f ;Mask low src nibble in bits 4-0 + + pshufb xtmph1, x0 ;Lookup mul table of high nibble + pshufb xtmpl1, xtmpa ;Lookup mul table of low nibble + pxor xtmph1, xtmpl1 ;GF add high and low partials + pxor xd1, xtmph1 + + pshufb xtmph2, x0 ;Lookup mul table of high nibble + pshufb xtmpl2, xtmpa ;Lookup mul table of low nibble + pxor xtmph2, xtmpl2 ;GF add high and low partials + pxor xd2, xtmph2 + + XSTR [dest1+pos], xd1 ;Store result + XSTR [dest2+pos], xd2 ;Store result + + add pos, 16 ;Loop on 16 bytes at a time + cmp pos, len + jle .loop16 + + lea tmp, [len + 16] + cmp pos, tmp + je .return_pass + + ;; Tail len + mov pos, len ;Overlapped offset length-16 + movdqa xd1, xtmpd1 ;Restore xd1 + movdqa xd2, xtmpd2 ;Restore xd2 + jmp .loop16_overlap ;Do one more overlap pass + +.return_pass: + FUNC_RESTORE + mov return, 0 + ret + +.return_fail: + FUNC_RESTORE + mov return, 1 + ret + +endproc_frame + +section .data + +align 16 + +mask0f: + dq 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f + +;;; func core, ver, snum +slversion gf_2vect_mad_sse, 00, 01, 0203 diff --git a/src/spdk/isa-l/erasure_code/gf_3vect_dot_prod_avx.asm b/src/spdk/isa-l/erasure_code/gf_3vect_dot_prod_avx.asm new file mode 100644 index 000000000..b006cf13a --- /dev/null +++ b/src/spdk/isa-l/erasure_code/gf_3vect_dot_prod_avx.asm @@ -0,0 +1,377 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2015 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;;; +;;; gf_3vect_dot_prod_avx(len, vec, *g_tbls, **buffs, **dests); +;;; + +%include "reg_sizes.asm" + +%ifidn __OUTPUT_FORMAT__, elf64 + %define arg0 rdi + %define arg1 rsi + %define arg2 rdx + %define arg3 rcx + %define arg4 r8 + %define arg5 r9 + + %define tmp r11 + %define tmp2 r10 + %define tmp3 r13 ; must be saved and restored + %define tmp4 r12 ; must be saved and restored + %define return rax + %macro SLDR 2 + %endmacro + %define SSTR SLDR + %define PS 8 + %define LOG_PS 3 + + %define func(x) x: + %macro FUNC_SAVE 0 + push r12 + push r13 + %endmacro + %macro FUNC_RESTORE 0 + pop r13 + pop r12 + %endmacro +%endif + +%ifidn __OUTPUT_FORMAT__, win64 + %define arg0 rcx + %define arg1 rdx + %define arg2 r8 + %define arg3 r9 + + %define arg4 r12 ; must be saved, loaded and restored + %define arg5 r15 ; must be saved and restored + %define tmp r11 + %define tmp2 r10 + %define tmp3 r13 ; must be saved and restored + %define tmp4 r14 ; must be saved and restored + %define return rax + %macro SLDR 2 + %endmacro + %define SSTR SLDR + %define PS 8 + %define LOG_PS 3 + %define stack_size 6*16 + 5*8 ; must be an odd multiple of 8 + %define arg(x) [rsp + stack_size + PS + PS*x] + + %define func(x) proc_frame x + %macro FUNC_SAVE 0 + alloc_stack stack_size + save_xmm128 xmm6, 0*16 + save_xmm128 xmm7, 1*16 + save_xmm128 xmm8, 2*16 + save_xmm128 xmm9, 3*16 + save_xmm128 xmm10, 4*16 + save_xmm128 xmm11, 5*16 + save_reg r12, 6*16 + 0*8 + save_reg r13, 6*16 + 1*8 + save_reg r14, 6*16 + 2*8 + save_reg r15, 6*16 + 3*8 + end_prolog + mov arg4, arg(4) + %endmacro + + %macro FUNC_RESTORE 0 + vmovdqa xmm6, [rsp + 0*16] + vmovdqa xmm7, [rsp + 1*16] + vmovdqa xmm8, [rsp + 2*16] + vmovdqa xmm9, [rsp + 3*16] + vmovdqa xmm10, [rsp + 4*16] + vmovdqa xmm11, [rsp + 5*16] + mov r12, [rsp + 6*16 + 0*8] + mov r13, [rsp + 6*16 + 1*8] + mov r14, [rsp + 6*16 + 2*8] + mov r15, [rsp + 6*16 + 3*8] + add rsp, stack_size + %endmacro +%endif + +%ifidn __OUTPUT_FORMAT__, elf32 + +;;;================== High Address; +;;; arg4 +;;; arg3 +;;; arg2 +;;; arg1 +;;; arg0 +;;; return +;;;<================= esp of caller +;;; ebp +;;;<================= ebp = esp +;;; var0 +;;; var1 +;;; esi +;;; edi +;;; ebx +;;;<================= esp of callee +;;; +;;;================== Low Address; + + %define PS 4 + %define LOG_PS 2 + %define func(x) x: + %define arg(x) [ebp + PS*2 + PS*x] + %define var(x) [ebp - PS - PS*x] + + %define trans ecx + %define trans2 esi + %define arg0 trans ;trans and trans2 are for the variables in stack + %define arg0_m arg(0) + %define arg1 ebx + %define arg2 arg2_m + %define arg2_m arg(2) + %define arg3 trans + %define arg3_m arg(3) + %define arg4 trans + %define arg4_m arg(4) + %define arg5 trans2 + %define tmp edx + %define tmp2 edi + %define tmp3 trans2 + %define tmp3_m var(0) + %define tmp4 trans2 + %define tmp4_m var(1) + %define return eax + %macro SLDR 2 ;; stack load/restore + mov %1, %2 + %endmacro + %define SSTR SLDR + + %macro FUNC_SAVE 0 + push ebp + mov ebp, esp + sub esp, PS*2 ;2 local variables + push esi + push edi + push ebx + mov arg1, arg(1) + %endmacro + + %macro FUNC_RESTORE 0 + pop ebx + pop edi + pop esi + add esp, PS*2 ;2 local variables + pop ebp + %endmacro + +%endif ; output formats + +%define len arg0 +%define vec arg1 +%define mul_array arg2 +%define src arg3 +%define dest1 arg4 +%define ptr arg5 + +%define vec_i tmp2 +%define dest2 tmp3 +%define dest3 tmp4 +%define pos return + + %ifidn PS,4 ;32-bit code + %define len_m arg0_m + %define src_m arg3_m + %define dest1_m arg4_m + %define dest2_m tmp3_m + %define dest3_m tmp4_m + %endif + +%ifndef EC_ALIGNED_ADDR +;;; Use Un-aligned load/store + %define XLDR vmovdqu + %define XSTR vmovdqu +%else +;;; Use Non-temporal load/stor + %ifdef NO_NT_LDST + %define XLDR vmovdqa + %define XSTR vmovdqa + %else + %define XLDR vmovntdqa + %define XSTR vmovntdq + %endif +%endif + +%ifidn PS,8 ; 64-bit code + default rel + [bits 64] +%endif + + +section .text + +%ifidn PS,8 ;64-bit code + %define xmask0f xmm11 + %define xgft1_lo xmm10 + %define xgft1_hi xmm9 + %define xgft2_lo xmm8 + %define xgft2_hi xmm7 + %define xgft3_lo xmm6 + %define xgft3_hi xmm5 + + %define x0 xmm0 + %define xtmpa xmm1 + %define xp1 xmm2 + %define xp2 xmm3 + %define xp3 xmm4 +%else + %define xmask0f xmm7 + %define xgft1_lo xmm6 + %define xgft1_hi xmm5 + %define xgft2_lo xgft1_lo + %define xgft2_hi xgft1_hi + %define xgft3_lo xgft1_lo + %define xgft3_hi xgft1_hi + + %define x0 xmm0 + %define xtmpa xmm1 + %define xp1 xmm2 + %define xp2 xmm3 + %define xp3 xmm4 +%endif + +align 16 +global gf_3vect_dot_prod_avx:ISAL_SYM_TYPE_FUNCTION +func(gf_3vect_dot_prod_avx) + FUNC_SAVE + SLDR len, len_m + sub len, 16 + SSTR len_m, len + jl .return_fail + xor pos, pos + vmovdqa xmask0f, [mask0f] ;Load mask of lower nibble in each byte + sal vec, LOG_PS ;vec *= PS. Make vec_i count by PS + SLDR dest1, dest1_m + mov dest2, [dest1+PS] + SSTR dest2_m, dest2 + mov dest3, [dest1+2*PS] + SSTR dest3_m, dest3 + mov dest1, [dest1] + SSTR dest1_m, dest1 + +.loop16: + vpxor xp1, xp1 + vpxor xp2, xp2 + vpxor xp3, xp3 + mov tmp, mul_array + xor vec_i, vec_i + +.next_vect: + SLDR src, src_m + mov ptr, [src+vec_i] + + vmovdqu xgft1_lo, [tmp] ;Load array Ax{00}, Ax{01}, ..., Ax{0f} + vmovdqu xgft1_hi, [tmp+16] ; " Ax{00}, Ax{10}, ..., Ax{f0} + %ifidn PS,8 ; 64-bit code + vmovdqu xgft2_lo, [tmp+vec*(32/PS)] ;Load array Bx{00}, Bx{01}, ..., Bx{0f} + vmovdqu xgft2_hi, [tmp+vec*(32/PS)+16] ; " Bx{00}, Bx{10}, ..., Bx{f0} + vmovdqu xgft3_lo, [tmp+vec*(64/PS)] ;Load array Cx{00}, Cx{01}, ..., Cx{0f} + vmovdqu xgft3_hi, [tmp+vec*(64/PS)+16] ; " Cx{00}, Cx{10}, ..., Cx{f0} + add tmp, 32 + add vec_i, PS + %endif + XLDR x0, [ptr+pos] ;Get next source vector + + vpand xtmpa, x0, xmask0f ;Mask low src nibble in bits 4-0 + vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0 + vpand x0, x0, xmask0f ;Mask high src nibble in bits 4-0 + + vpshufb xgft1_hi, x0 ;Lookup mul table of high nibble + vpshufb xgft1_lo, xtmpa ;Lookup mul table of low nibble + vpxor xgft1_hi, xgft1_lo ;GF add high and low partials + vpxor xp1, xgft1_hi ;xp1 += partial + + %ifidn PS,4 ; 32-bit code + vmovdqu xgft2_lo, [tmp+vec*(32/PS)] ;Load array Bx{00}, Bx{01}, ..., Bx{0f} + vmovdqu xgft2_hi, [tmp+vec*(32/PS)+16] ; " Bx{00}, Bx{10}, ..., Bx{f0} + %endif + vpshufb xgft2_hi, x0 ;Lookup mul table of high nibble + vpshufb xgft2_lo, xtmpa ;Lookup mul table of low nibble + vpxor xgft2_hi, xgft2_lo ;GF add high and low partials + vpxor xp2, xgft2_hi ;xp2 += partial + + %ifidn PS,4 ; 32-bit code + sal vec, 1 + vmovdqu xgft3_lo, [tmp+vec*(32/PS)] ;Load array Cx{00}, Cx{01}, ..., Cx{0f} + vmovdqu xgft3_hi, [tmp+vec*(32/PS)+16] ; " Cx{00}, Cx{10}, ..., Cx{f0} + sar vec, 1 + add tmp, 32 + add vec_i, PS + %endif + vpshufb xgft3_hi, x0 ;Lookup mul table of high nibble + vpshufb xgft3_lo, xtmpa ;Lookup mul table of low nibble + vpxor xgft3_hi, xgft3_lo ;GF add high and low partials + vpxor xp3, xgft3_hi ;xp3 += partial + + cmp vec_i, vec + jl .next_vect + + SLDR dest1, dest1_m + SLDR dest2, dest2_m + XSTR [dest1+pos], xp1 + XSTR [dest2+pos], xp2 + SLDR dest3, dest3_m + XSTR [dest3+pos], xp3 + + SLDR len, len_m + add pos, 16 ;Loop on 16 bytes at a time + cmp pos, len + jle .loop16 + + lea tmp, [len + 16] + cmp pos, tmp + je .return_pass + + ;; Tail len + mov pos, len ;Overlapped offset length-16 + jmp .loop16 ;Do one more overlap pass + +.return_pass: + mov return, 0 + FUNC_RESTORE + ret + +.return_fail: + mov return, 1 + FUNC_RESTORE + ret + +endproc_frame + +section .data + +align 16 +mask0f: dq 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f + +;;; func core, ver, snum +slversion gf_3vect_dot_prod_avx, 02, 05, 0192 diff --git a/src/spdk/isa-l/erasure_code/gf_3vect_dot_prod_avx2.asm b/src/spdk/isa-l/erasure_code/gf_3vect_dot_prod_avx2.asm new file mode 100644 index 000000000..38dddcf21 --- /dev/null +++ b/src/spdk/isa-l/erasure_code/gf_3vect_dot_prod_avx2.asm @@ -0,0 +1,397 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2015 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;;; +;;; gf_3vect_dot_prod_avx2(len, vec, *g_tbls, **buffs, **dests); +;;; + +%include "reg_sizes.asm" + +%ifidn __OUTPUT_FORMAT__, elf64 + %define arg0 rdi + %define arg1 rsi + %define arg2 rdx + %define arg3 rcx + %define arg4 r8 + %define arg5 r9 + + %define tmp r11 + %define tmp.w r11d + %define tmp.b r11b + %define tmp2 r10 + %define tmp3 r13 ; must be saved and restored + %define tmp4 r12 ; must be saved and restored + %define return rax + %macro SLDR 2 + %endmacro + %define SSTR SLDR + %define PS 8 + %define LOG_PS 3 + + %define func(x) x: + %macro FUNC_SAVE 0 + push r12 + push r13 + %endmacro + %macro FUNC_RESTORE 0 + pop r13 + pop r12 + %endmacro +%endif + +%ifidn __OUTPUT_FORMAT__, win64 + %define arg0 rcx + %define arg1 rdx + %define arg2 r8 + %define arg3 r9 + + %define arg4 r12 ; must be saved, loaded and restored + %define arg5 r15 ; must be saved and restored + %define tmp r11 + %define tmp.w r11d + %define tmp.b r11b + %define tmp2 r10 + %define tmp3 r13 ; must be saved and restored + %define tmp4 r14 ; must be saved and restored + %define return rax + %macro SLDR 2 + %endmacro + %define SSTR SLDR + %define PS 8 + %define LOG_PS 3 + %define stack_size 6*16 + 5*8 ; must be an odd multiple of 8 + %define arg(x) [rsp + stack_size + PS + PS*x] + + %define func(x) proc_frame x + %macro FUNC_SAVE 0 + alloc_stack stack_size + vmovdqa [rsp + 0*16], xmm6 + vmovdqa [rsp + 1*16], xmm7 + vmovdqa [rsp + 2*16], xmm8 + vmovdqa [rsp + 3*16], xmm9 + vmovdqa [rsp + 4*16], xmm10 + vmovdqa [rsp + 5*16], xmm11 + save_reg r12, 6*16 + 0*8 + save_reg r13, 6*16 + 1*8 + save_reg r14, 6*16 + 2*8 + save_reg r15, 6*16 + 3*8 + end_prolog + mov arg4, arg(4) + %endmacro + + %macro FUNC_RESTORE 0 + vmovdqa xmm6, [rsp + 0*16] + vmovdqa xmm7, [rsp + 1*16] + vmovdqa xmm8, [rsp + 2*16] + vmovdqa xmm9, [rsp + 3*16] + vmovdqa xmm10, [rsp + 4*16] + vmovdqa xmm11, [rsp + 5*16] + mov r12, [rsp + 6*16 + 0*8] + mov r13, [rsp + 6*16 + 1*8] + mov r14, [rsp + 6*16 + 2*8] + mov r15, [rsp + 6*16 + 3*8] + add rsp, stack_size + %endmacro +%endif + +%ifidn __OUTPUT_FORMAT__, elf32 + +;;;================== High Address; +;;; arg4 +;;; arg3 +;;; arg2 +;;; arg1 +;;; arg0 +;;; return +;;;<================= esp of caller +;;; ebp +;;;<================= ebp = esp +;;; var0 +;;; var1 +;;; esi +;;; edi +;;; ebx +;;;<================= esp of callee +;;; +;;;================== Low Address; + + %define PS 4 + %define LOG_PS 2 + %define func(x) x: + %define arg(x) [ebp + PS*2 + PS*x] + %define var(x) [ebp - PS - PS*x] + + %define trans ecx + %define trans2 esi + %define arg0 trans ;trans and trans2 are for the variables in stack + %define arg0_m arg(0) + %define arg1 ebx + %define arg2 arg2_m + %define arg2_m arg(2) + %define arg3 trans + %define arg3_m arg(3) + %define arg4 trans + %define arg4_m arg(4) + %define arg5 trans2 + %define tmp edx + %define tmp.w edx + %define tmp.b dl + %define tmp2 edi + %define tmp3 trans2 + %define tmp3_m var(0) + %define tmp4 trans2 + %define tmp4_m var(1) + %define return eax + %macro SLDR 2 ;stack load/restore + mov %1, %2 + %endmacro + %define SSTR SLDR + + %macro FUNC_SAVE 0 + push ebp + mov ebp, esp + sub esp, PS*2 ;2 local variables + push esi + push edi + push ebx + mov arg1, arg(1) + %endmacro + + %macro FUNC_RESTORE 0 + pop ebx + pop edi + pop esi + add esp, PS*2 ;2 local variables + pop ebp + %endmacro + +%endif ; output formats + +%define len arg0 +%define vec arg1 +%define mul_array arg2 +%define src arg3 +%define dest1 arg4 +%define ptr arg5 + +%define vec_i tmp2 +%define dest2 tmp3 +%define dest3 tmp4 +%define pos return + +%ifidn PS,4 ;32-bit code + %define len_m arg0_m + %define src_m arg3_m + %define dest1_m arg4_m + %define dest2_m tmp3_m + %define dest3_m tmp4_m +%endif + +%ifndef EC_ALIGNED_ADDR +;;; Use Un-aligned load/store + %define XLDR vmovdqu + %define XSTR vmovdqu +%else +;;; Use Non-temporal load/stor + %ifdef NO_NT_LDST + %define XLDR vmovdqa + %define XSTR vmovdqa + %else + %define XLDR vmovntdqa + %define XSTR vmovntdq + %endif +%endif + +%ifidn PS,8 ;64-bit code + default rel + [bits 64] +%endif + +section .text + +%ifidn PS,8 ;64-bit code + %define xmask0f ymm11 + %define xmask0fx xmm11 + %define xgft1_lo ymm10 + %define xgft1_hi ymm9 + %define xgft2_lo ymm8 + %define xgft2_hi ymm7 + %define xgft3_lo ymm6 + %define xgft3_hi ymm5 + + %define x0 ymm0 + %define xtmpa ymm1 + %define xp1 ymm2 + %define xp2 ymm3 + %define xp3 ymm4 +%else + %define xmask0f ymm7 + %define xmask0fx xmm7 + %define xgft1_lo ymm6 + %define xgft1_hi ymm5 + %define xgft2_lo xgft1_lo + %define xgft2_hi xgft1_hi + %define xgft3_lo xgft1_lo + %define xgft3_hi xgft1_hi + + %define x0 ymm0 + %define xtmpa ymm1 + %define xp1 ymm2 + %define xp2 ymm3 + %define xp3 ymm4 + +%endif + +align 16 +global gf_3vect_dot_prod_avx2:ISAL_SYM_TYPE_FUNCTION +func(gf_3vect_dot_prod_avx2) + FUNC_SAVE + SLDR len, len_m + sub len, 32 + SSTR len_m, len + jl .return_fail + xor pos, pos + mov tmp.b, 0x0f + vpinsrb xmask0fx, xmask0fx, tmp.w, 0 + vpbroadcastb xmask0f, xmask0fx ;Construct mask 0x0f0f0f... + + sal vec, LOG_PS ;vec *= PS. Make vec_i count by PS + SLDR dest1, dest1_m + mov dest2, [dest1+PS] + SSTR dest2_m, dest2 + mov dest3, [dest1+2*PS] + SSTR dest3_m, dest3 + mov dest1, [dest1] + SSTR dest1_m, dest1 + +.loop32: + vpxor xp1, xp1 + vpxor xp2, xp2 + vpxor xp3, xp3 + mov tmp, mul_array + xor vec_i, vec_i + +.next_vect: + SLDR src, src_m + mov ptr, [src+vec_i] + + vmovdqu xgft1_lo, [tmp] ;Load array Ax{00}, Ax{01}, ..., Ax{0f} + ; " Ax{00}, Ax{10}, ..., Ax{f0} + vperm2i128 xgft1_hi, xgft1_lo, xgft1_lo, 0x11 ; swapped to hi | hi + vperm2i128 xgft1_lo, xgft1_lo, xgft1_lo, 0x00 ; swapped to lo | lo + %ifidn PS,8 ; 64-bit code + vmovdqu xgft2_lo, [tmp+vec*(32/PS)] ;Load array Bx{00}, Bx{01}, ..., Bx{0f} + ; " Bx{00}, Bx{10}, ..., Bx{f0} + vperm2i128 xgft2_hi, xgft2_lo, xgft2_lo, 0x11 ; swapped to hi | hi + vperm2i128 xgft2_lo, xgft2_lo, xgft2_lo, 0x00 ; swapped to lo | lo + + vmovdqu xgft3_lo, [tmp+vec*(64/PS)] ;Load array Cx{00}, Cx{01}, ..., Cx{0f} + ; " Cx{00}, Cx{10}, ..., Cx{f0} + vperm2i128 xgft3_hi, xgft3_lo, xgft3_lo, 0x11 ; swapped to hi | hi + vperm2i128 xgft3_lo, xgft3_lo, xgft3_lo, 0x00 ; swapped to lo | lo + + add tmp, 32 + add vec_i, PS + %endif + XLDR x0, [ptr+pos] ;Get next source vector + + vpand xtmpa, x0, xmask0f ;Mask low src nibble in bits 4-0 + vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0 + vpand x0, x0, xmask0f ;Mask high src nibble in bits 4-0 + + vpshufb xgft1_hi, x0 ;Lookup mul table of high nibble + vpshufb xgft1_lo, xtmpa ;Lookup mul table of low nibble + vpxor xgft1_hi, xgft1_lo ;GF add high and low partials + vpxor xp1, xgft1_hi ;xp1 += partial + + %ifidn PS,4 ; 32-bit code + vmovdqu xgft2_lo, [tmp+vec*(32/PS)] ;Load array Bx{00}, Bx{01}, ..., Bx{0f} + ; " Bx{00}, Bx{10}, ..., Bx{f0} + vperm2i128 xgft2_hi, xgft2_lo, xgft2_lo, 0x11 ; swapped to hi | hi + vperm2i128 xgft2_lo, xgft2_lo, xgft2_lo, 0x00 ; swapped to lo | lo + %endif + vpshufb xgft2_hi, x0 ;Lookup mul table of high nibble + vpshufb xgft2_lo, xtmpa ;Lookup mul table of low nibble + vpxor xgft2_hi, xgft2_lo ;GF add high and low partials + vpxor xp2, xgft2_hi ;xp2 += partial + + %ifidn PS,4 ; 32-bit code + sal vec, 1 + vmovdqu xgft3_lo, [tmp+vec*(32/PS)] ;Load array Cx{00}, Cx{01}, ..., Cx{0f} + ; " Cx{00}, Cx{10}, ..., Cx{f0} + vperm2i128 xgft3_hi, xgft3_lo, xgft3_lo, 0x11 ; swapped to hi | hi + vperm2i128 xgft3_lo, xgft3_lo, xgft3_lo, 0x00 ; swapped to lo | lo + sar vec, 1 + add tmp, 32 + add vec_i, PS + %endif + vpshufb xgft3_hi, x0 ;Lookup mul table of high nibble + vpshufb xgft3_lo, xtmpa ;Lookup mul table of low nibble + vpxor xgft3_hi, xgft3_lo ;GF add high and low partials + vpxor xp3, xgft3_hi ;xp3 += partial + + cmp vec_i, vec + jl .next_vect + + SLDR dest1, dest1_m + SLDR dest2, dest2_m + XSTR [dest1+pos], xp1 + XSTR [dest2+pos], xp2 + SLDR dest3, dest3_m + XSTR [dest3+pos], xp3 + + SLDR len, len_m + add pos, 32 ;Loop on 32 bytes at a time + cmp pos, len + jle .loop32 + + lea tmp, [len + 32] + cmp pos, tmp + je .return_pass + + ;; Tail len + mov pos, len ;Overlapped offset length-16 + jmp .loop32 ;Do one more overlap pass + +.return_pass: + mov return, 0 + FUNC_RESTORE + ret + +.return_fail: + mov return, 1 + FUNC_RESTORE + ret + +endproc_frame + +section .data + +;;; func core, ver, snum +slversion gf_3vect_dot_prod_avx2, 04, 05, 0197 diff --git a/src/spdk/isa-l/erasure_code/gf_3vect_dot_prod_avx512.asm b/src/spdk/isa-l/erasure_code/gf_3vect_dot_prod_avx512.asm new file mode 100644 index 000000000..057cd3730 --- /dev/null +++ b/src/spdk/isa-l/erasure_code/gf_3vect_dot_prod_avx512.asm @@ -0,0 +1,270 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2015 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;;; +;;; gf_3vect_dot_prod_avx512(len, vec, *g_tbls, **buffs, **dests); +;;; + +%include "reg_sizes.asm" + +%ifdef HAVE_AS_KNOWS_AVX512 + +%ifidn __OUTPUT_FORMAT__, elf64 + %define arg0 rdi + %define arg1 rsi + %define arg2 rdx + %define arg3 rcx + %define arg4 r8 + %define arg5 r9 + + %define tmp r11 + %define tmp.w r11d + %define tmp.b r11b + %define tmp2 r10 + %define tmp3 r13 ; must be saved and restored + %define tmp4 r12 ; must be saved and restored + %define return rax + %define PS 8 + %define LOG_PS 3 + + %define func(x) x: + %macro FUNC_SAVE 0 + push r12 + push r13 + %endmacro + %macro FUNC_RESTORE 0 + pop r13 + pop r12 + %endmacro +%endif + +%ifidn __OUTPUT_FORMAT__, win64 + %define arg0 rcx + %define arg1 rdx + %define arg2 r8 + %define arg3 r9 + + %define arg4 r12 ; must be saved, loaded and restored + %define arg5 r15 ; must be saved and restored + %define tmp r11 + %define tmp.w r11d + %define tmp.b r11b + %define tmp2 r10 + %define tmp3 r13 ; must be saved and restored + %define tmp4 r14 ; must be saved and restored + %define return rax + %define PS 8 + %define LOG_PS 3 + %define stack_size 9*16 + 5*8 ; must be an odd multiple of 8 + %define arg(x) [rsp + stack_size + PS + PS*x] + + %define func(x) proc_frame x + %macro FUNC_SAVE 0 + alloc_stack stack_size + vmovdqa [rsp + 0*16], xmm6 + vmovdqa [rsp + 1*16], xmm7 + vmovdqa [rsp + 2*16], xmm8 + vmovdqa [rsp + 3*16], xmm9 + vmovdqa [rsp + 4*16], xmm10 + vmovdqa [rsp + 5*16], xmm11 + vmovdqa [rsp + 6*16], xmm12 + vmovdqa [rsp + 7*16], xmm13 + vmovdqa [rsp + 8*16], xmm14 + save_reg r12, 9*16 + 0*8 + save_reg r13, 9*16 + 1*8 + save_reg r14, 9*16 + 2*8 + save_reg r15, 9*16 + 3*8 + end_prolog + mov arg4, arg(4) + %endmacro + + %macro FUNC_RESTORE 0 + vmovdqa xmm6, [rsp + 0*16] + vmovdqa xmm7, [rsp + 1*16] + vmovdqa xmm8, [rsp + 2*16] + vmovdqa xmm9, [rsp + 3*16] + vmovdqa xmm10, [rsp + 4*16] + vmovdqa xmm11, [rsp + 5*16] + vmovdqa xmm12, [rsp + 6*16] + vmovdqa xmm13, [rsp + 7*16] + vmovdqa xmm14, [rsp + 8*16] + mov r12, [rsp + 9*16 + 0*8] + mov r13, [rsp + 9*16 + 1*8] + mov r14, [rsp + 9*16 + 2*8] + mov r15, [rsp + 9*16 + 3*8] + add rsp, stack_size + %endmacro +%endif + + +%define len arg0 +%define vec arg1 +%define mul_array arg2 +%define src arg3 +%define dest1 arg4 +%define ptr arg5 +%define vec_i tmp2 +%define dest2 tmp3 +%define dest3 tmp4 +%define pos return + + +%ifndef EC_ALIGNED_ADDR +;;; Use Un-aligned load/store + %define XLDR vmovdqu8 + %define XSTR vmovdqu8 +%else +;;; Use Non-temporal load/stor + %ifdef NO_NT_LDST + %define XLDR vmovdqa + %define XSTR vmovdqa + %else + %define XLDR vmovntdqa + %define XSTR vmovntdq + %endif +%endif + +%define xmask0f zmm11 +%define xgft1_lo zmm10 +%define xgft1_loy ymm10 +%define xgft1_hi zmm9 +%define xgft2_lo zmm8 +%define xgft2_loy ymm8 +%define xgft2_hi zmm7 +%define xgft3_lo zmm6 +%define xgft3_loy ymm6 +%define xgft3_hi zmm5 + +%define x0 zmm0 +%define xtmpa zmm1 +%define xp1 zmm2 +%define xp2 zmm3 +%define xp3 zmm4 + +default rel +[bits 64] + +section .text + +align 16 +global gf_3vect_dot_prod_avx512:ISAL_SYM_TYPE_FUNCTION +func(gf_3vect_dot_prod_avx512) + FUNC_SAVE + sub len, 64 + jl .return_fail + + xor pos, pos + mov tmp, 0x0f + vpbroadcastb xmask0f, tmp ;Construct mask 0x0f0f0f... + sal vec, LOG_PS ;vec *= PS. Make vec_i count by PS + mov dest2, [dest1+PS] + mov dest3, [dest1+2*PS] + mov dest1, [dest1] + +.loop64: + vpxorq xp1, xp1, xp1 + vpxorq xp2, xp2, xp2 + vpxorq xp3, xp3, xp3 + mov tmp, mul_array + xor vec_i, vec_i + +.next_vect: + mov ptr, [src+vec_i] + XLDR x0, [ptr+pos] ;Get next source vector + add vec_i, PS + + vpandq xtmpa, x0, xmask0f ;Mask low src nibble in bits 4-0 + vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0 + vpandq x0, x0, xmask0f ;Mask high src nibble in bits 4-0 + + vmovdqu8 xgft1_loy, [tmp] ;Load array Ax{00}..{0f}, Ax{00}..{f0} + vmovdqu8 xgft2_loy, [tmp+vec*(32/PS)] ;Load array Bx{00}..{0f}, Bx{00}..{f0} + vmovdqu8 xgft3_loy, [tmp+vec*(64/PS)] ;Load array Cx{00}..{0f}, Cx{00}..{f0} + add tmp, 32 + + vshufi64x2 xgft1_hi, xgft1_lo, xgft1_lo, 0x55 + vshufi64x2 xgft1_lo, xgft1_lo, xgft1_lo, 0x00 + vshufi64x2 xgft2_hi, xgft2_lo, xgft2_lo, 0x55 + vshufi64x2 xgft2_lo, xgft2_lo, xgft2_lo, 0x00 + + vpshufb xgft1_hi, xgft1_hi, x0 ;Lookup mul table of high nibble + vpshufb xgft1_lo, xgft1_lo, xtmpa ;Lookup mul table of low nibble + vpxorq xgft1_hi, xgft1_hi, xgft1_lo ;GF add high and low partials + vpxorq xp1, xp1, xgft1_hi ;xp1 += partial + + vpshufb xgft2_hi, xgft2_hi, x0 ;Lookup mul table of high nibble + vpshufb xgft2_lo, xgft2_lo, xtmpa ;Lookup mul table of low nibble + vpxorq xgft2_hi, xgft2_hi, xgft2_lo ;GF add high and low partials + vpxorq xp2, xp2, xgft2_hi ;xp2 += partial + + vshufi64x2 xgft3_hi, xgft3_lo, xgft3_lo, 0x55 + vshufi64x2 xgft3_lo, xgft3_lo, xgft3_lo, 0x00 + + vpshufb xgft3_hi, xgft3_hi, x0 ;Lookup mul table of high nibble + vpshufb xgft3_lo, xgft3_lo, xtmpa ;Lookup mul table of low nibble + vpxorq xgft3_hi, xgft3_hi, xgft3_lo ;GF add high and low partials + vpxorq xp3, xp3, xgft3_hi ;xp3 += partial + + cmp vec_i, vec + jl .next_vect + + XSTR [dest1+pos], xp1 + XSTR [dest2+pos], xp2 + XSTR [dest3+pos], xp3 + + add pos, 64 ;Loop on 64 bytes at a time + cmp pos, len + jle .loop64 + + lea tmp, [len + 64] + cmp pos, tmp + je .return_pass + + ;; Tail len + mov pos, len ;Overlapped offset length-64 + jmp .loop64 ;Do one more overlap pass + +.return_pass: + mov return, 0 + FUNC_RESTORE + ret + +.return_fail: + mov return, 1 + FUNC_RESTORE + ret + +endproc_frame + +%else +%ifidn __OUTPUT_FORMAT__, win64 +global no_gf_3vect_dot_prod_avx512 +no_gf_3vect_dot_prod_avx512: +%endif +%endif ; ifdef HAVE_AS_KNOWS_AVX512 diff --git a/src/spdk/isa-l/erasure_code/gf_3vect_dot_prod_sse.asm b/src/spdk/isa-l/erasure_code/gf_3vect_dot_prod_sse.asm new file mode 100644 index 000000000..da0bdf920 --- /dev/null +++ b/src/spdk/isa-l/erasure_code/gf_3vect_dot_prod_sse.asm @@ -0,0 +1,378 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2015 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;;; +;;; gf_3vect_dot_prod_sse(len, vec, *g_tbls, **buffs, **dests); +;;; + +%include "reg_sizes.asm" + +%ifidn __OUTPUT_FORMAT__, elf64 + %define arg0 rdi + %define arg1 rsi + %define arg2 rdx + %define arg3 rcx + %define arg4 r8 + %define arg5 r9 + + %define tmp r11 + %define tmp2 r10 + %define tmp3 r13 ; must be saved and restored + %define tmp4 r12 ; must be saved and restored + %define return rax + %macro SLDR 2 + %endmacro + %define SSTR SLDR + %define PS 8 + %define LOG_PS 3 + + %define func(x) x: + %macro FUNC_SAVE 0 + push r12 + push r13 + %endmacro + %macro FUNC_RESTORE 0 + pop r13 + pop r12 + %endmacro +%endif + +%ifidn __OUTPUT_FORMAT__, win64 + %define arg0 rcx + %define arg1 rdx + %define arg2 r8 + %define arg3 r9 + + %define arg4 r12 ; must be saved, loaded and restored + %define arg5 r15 ; must be saved and restored + %define tmp r11 + %define tmp2 r10 + %define tmp3 r13 ; must be saved and restored + %define tmp4 r14 ; must be saved and restored + %define return rax + %macro SLDR 2 + %endmacro + %define SSTR SLDR + %define PS 8 + %define LOG_PS 3 + %define stack_size 6*16 + 5*8 ; must be an odd multiple of 8 + %define arg(x) [rsp + stack_size + PS + PS*x] + + %define func(x) proc_frame x + %macro FUNC_SAVE 0 + alloc_stack stack_size + save_xmm128 xmm6, 0*16 + save_xmm128 xmm7, 1*16 + save_xmm128 xmm8, 2*16 + save_xmm128 xmm9, 3*16 + save_xmm128 xmm10, 4*16 + save_xmm128 xmm11, 5*16 + save_reg r12, 6*16 + 0*8 + save_reg r13, 6*16 + 1*8 + save_reg r14, 6*16 + 2*8 + save_reg r15, 6*16 + 3*8 + end_prolog + mov arg4, arg(4) + %endmacro + + %macro FUNC_RESTORE 0 + movdqa xmm6, [rsp + 0*16] + movdqa xmm7, [rsp + 1*16] + movdqa xmm8, [rsp + 2*16] + movdqa xmm9, [rsp + 3*16] + movdqa xmm10, [rsp + 4*16] + movdqa xmm11, [rsp + 5*16] + mov r12, [rsp + 6*16 + 0*8] + mov r13, [rsp + 6*16 + 1*8] + mov r14, [rsp + 6*16 + 2*8] + mov r15, [rsp + 6*16 + 3*8] + add rsp, stack_size + %endmacro +%endif + +%ifidn __OUTPUT_FORMAT__, elf32 + +;;;================== High Address; +;;; arg4 +;;; arg3 +;;; arg2 +;;; arg1 +;;; arg0 +;;; return +;;;<================= esp of caller +;;; ebp +;;;<================= ebp = esp +;;; var0 +;;; var1 +;;; esi +;;; edi +;;; ebx +;;;<================= esp of callee +;;; +;;;================== Low Address; + + %define PS 4 + %define LOG_PS 2 + %define func(x) x: + %define arg(x) [ebp + PS*2 + PS*x] + %define var(x) [ebp - PS - PS*x] + + %define trans ecx + %define trans2 esi + %define arg0 trans ;trans and trans2 are for the variables in stack + %define arg0_m arg(0) + %define arg1 ebx + %define arg2 arg2_m + %define arg2_m arg(2) + %define arg3 trans + %define arg3_m arg(3) + %define arg4 trans + %define arg4_m arg(4) + %define arg5 trans2 + %define tmp edx + %define tmp2 edi + %define tmp3 trans2 + %define tmp3_m var(0) + %define tmp4 trans2 + %define tmp4_m var(1) + %define return eax + %macro SLDR 2 ;; stack load/restore + mov %1, %2 + %endmacro + %define SSTR SLDR + + %macro FUNC_SAVE 0 + push ebp + mov ebp, esp + sub esp, PS*2 ;2 local variables + push esi + push edi + push ebx + mov arg1, arg(1) + %endmacro + + %macro FUNC_RESTORE 0 + pop ebx + pop edi + pop esi + add esp, PS*2 ;2 local variables + pop ebp + %endmacro + +%endif ; output formats + +%define len arg0 +%define vec arg1 +%define mul_array arg2 +%define src arg3 +%define dest1 arg4 +%define ptr arg5 + +%define vec_i tmp2 +%define dest2 tmp3 +%define dest3 tmp4 +%define pos return + + %ifidn PS,4 ;32-bit code + %define len_m arg0_m + %define src_m arg3_m + %define dest1_m arg4_m + %define dest2_m tmp3_m + %define dest3_m tmp4_m + %endif + +%ifndef EC_ALIGNED_ADDR +;;; Use Un-aligned load/store + %define XLDR movdqu + %define XSTR movdqu +%else +;;; Use Non-temporal load/stor + %ifdef NO_NT_LDST + %define XLDR movdqa + %define XSTR movdqa + %else + %define XLDR movntdqa + %define XSTR movntdq + %endif +%endif + +%ifidn PS,8 ; 64-bit code + default rel + [bits 64] +%endif + + +section .text + +%ifidn PS,8 ;64-bit code + %define xmask0f xmm11 + %define xgft1_lo xmm2 + %define xgft1_hi xmm3 + %define xgft2_lo xmm4 + %define xgft2_hi xmm7 + %define xgft3_lo xmm6 + %define xgft3_hi xmm5 + + %define x0 xmm0 + %define xtmpa xmm1 + %define xp1 xmm10 + %define xp2 xmm9 + %define xp3 xmm8 +%else + %define xmask0f xmm7 + %define xgft1_lo xmm6 + %define xgft1_hi xmm5 + %define xgft2_lo xgft1_lo + %define xgft2_hi xgft1_hi + %define xgft3_lo xgft1_lo + %define xgft3_hi xgft1_hi + + %define x0 xmm0 + %define xtmpa xmm1 + %define xp1 xmm2 + %define xp2 xmm3 + %define xp3 xmm4 +%endif + +align 16 +global gf_3vect_dot_prod_sse:ISAL_SYM_TYPE_FUNCTION +func(gf_3vect_dot_prod_sse) + FUNC_SAVE + SLDR len, len_m + sub len, 16 + SSTR len_m, len + jl .return_fail + xor pos, pos + movdqa xmask0f, [mask0f] ;Load mask of lower nibble in each byte + sal vec, LOG_PS ;vec *= PS. Make vec_i count by PS + SLDR dest1, dest1_m + mov dest2, [dest1+PS] + SSTR dest2_m, dest2 + mov dest3, [dest1+2*PS] + SSTR dest3_m, dest3 + mov dest1, [dest1] + SSTR dest1_m, dest1 + +.loop16: + pxor xp1, xp1 + pxor xp2, xp2 + pxor xp3, xp3 + mov tmp, mul_array + xor vec_i, vec_i + +.next_vect: + SLDR src, src_m + mov ptr, [src+vec_i] + + movdqu xgft1_lo, [tmp] ;Load array Ax{00}, Ax{01}, ..., Ax{0f} + movdqu xgft1_hi, [tmp+16] ; " Ax{00}, Ax{10}, ..., Ax{f0} + %ifidn PS,8 ;64-bit code + movdqu xgft2_lo, [tmp+vec*(32/PS)] ;Load array Bx{00}, Bx{01}, ..., Bx{0f} + movdqu xgft2_hi, [tmp+vec*(32/PS)+16] ; " Bx{00}, Bx{10}, ..., Bx{f0} + movdqu xgft3_lo, [tmp+vec*(64/PS)] ;Load array Cx{00}, Cx{01}, ..., Cx{0f} + movdqu xgft3_hi, [tmp+vec*(64/PS)+16] ; " Cx{00}, Cx{10}, ..., Cx{f0} + add tmp, 32 + add vec_i, PS + %endif + XLDR x0, [ptr+pos] ;Get next source vector + + movdqa xtmpa, x0 ;Keep unshifted copy of src + psraw x0, 4 ;Shift to put high nibble into bits 4-0 + pand x0, xmask0f ;Mask high src nibble in bits 4-0 + pand xtmpa, xmask0f ;Mask low src nibble in bits 4-0 + + pshufb xgft1_hi, x0 ;Lookup mul table of high nibble + pshufb xgft1_lo, xtmpa ;Lookup mul table of low nibble + pxor xgft1_hi, xgft1_lo ;GF add high and low partials + pxor xp1, xgft1_hi ;xp1 += partial + + %ifidn PS,4 ;32-bit code + movdqu xgft2_lo, [tmp+vec*(32/PS)] ;Load array Bx{00}, Bx{01}, ..., Bx{0f} + movdqu xgft2_hi, [tmp+vec*(32/PS)+16] ; " Bx{00}, Bx{10}, ..., Bx{f0} + %endif + pshufb xgft2_hi, x0 ;Lookup mul table of high nibble + pshufb xgft2_lo, xtmpa ;Lookup mul table of low nibble + pxor xgft2_hi, xgft2_lo ;GF add high and low partials + pxor xp2, xgft2_hi ;xp2 += partial + + %ifidn PS,4 ;32-bit code + sal vec, 1 + movdqu xgft3_lo, [tmp+vec*(32/PS)] ;Load array Cx{00}, Cx{01}, ..., Cx{0f} + movdqu xgft3_hi, [tmp+vec*(32/PS)+16] ; " Cx{00}, Cx{10}, ..., Cx{f0} + sar vec, 1 + add tmp, 32 + add vec_i, PS + %endif + pshufb xgft3_hi, x0 ;Lookup mul table of high nibble + pshufb xgft3_lo, xtmpa ;Lookup mul table of low nibble + pxor xgft3_hi, xgft3_lo ;GF add high and low partials + pxor xp3, xgft3_hi ;xp3 += partial + + cmp vec_i, vec + jl .next_vect + + SLDR dest1, dest1_m + SLDR dest2, dest2_m + XSTR [dest1+pos], xp1 + XSTR [dest2+pos], xp2 + SLDR dest3, dest3_m + XSTR [dest3+pos], xp3 + + SLDR len, len_m + add pos, 16 ;Loop on 16 bytes at a time + cmp pos, len + jle .loop16 + + lea tmp, [len + 16] + cmp pos, tmp + je .return_pass + + ;; Tail len + mov pos, len ;Overlapped offset length-16 + jmp .loop16 ;Do one more overlap pass + +.return_pass: + mov return, 0 + FUNC_RESTORE + ret + +.return_fail: + mov return, 1 + FUNC_RESTORE + ret + +endproc_frame + +section .data + +align 16 +mask0f: dq 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f + +;;; func core, ver, snum +slversion gf_3vect_dot_prod_sse, 00, 06, 0063 diff --git a/src/spdk/isa-l/erasure_code/gf_3vect_dot_prod_sse_test.c b/src/spdk/isa-l/erasure_code/gf_3vect_dot_prod_sse_test.c new file mode 100644 index 000000000..32609c757 --- /dev/null +++ b/src/spdk/isa-l/erasure_code/gf_3vect_dot_prod_sse_test.c @@ -0,0 +1,586 @@ +/********************************************************************** + Copyright(c) 2011-2015 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#include +#include +#include // for memset, memcmp +#include "erasure_code.h" +#include "types.h" + +#ifndef FUNCTION_UNDER_TEST +# define FUNCTION_UNDER_TEST gf_3vect_dot_prod_sse +#endif +#ifndef TEST_MIN_SIZE +# define TEST_MIN_SIZE 16 +#endif + +#define str(s) #s +#define xstr(s) str(s) + +#define TEST_LEN 8192 +#define TEST_SIZE (TEST_LEN/2) +#define TEST_MEM TEST_SIZE +#define TEST_LOOPS 10000 +#define TEST_TYPE_STR "" + +#ifndef TEST_SOURCES +# define TEST_SOURCES 16 +#endif +#ifndef RANDOMS +# define RANDOMS 20 +#endif + +#ifdef EC_ALIGNED_ADDR +// Define power of 2 range to check ptr, len alignment +# define PTR_ALIGN_CHK_B 0 +# define LEN_ALIGN_CHK_B 0 // 0 for aligned only +#else +// Define power of 2 range to check ptr, len alignment +# define PTR_ALIGN_CHK_B 32 +# define LEN_ALIGN_CHK_B 32 // 0 for aligned only +#endif + +typedef unsigned char u8; + +extern void FUNCTION_UNDER_TEST(int len, int vlen, unsigned char *gftbls, + unsigned char **src, unsigned char **dest); + +void dump(unsigned char *buf, int len) +{ + int i; + for (i = 0; i < len;) { + printf(" %2x", 0xff & buf[i++]); + if (i % 32 == 0) + printf("\n"); + } + printf("\n"); +} + +void dump_matrix(unsigned char **s, int k, int m) +{ + int i, j; + for (i = 0; i < k; i++) { + for (j = 0; j < m; j++) { + printf(" %2x", s[i][j]); + } + printf("\n"); + } + printf("\n"); +} + +void dump_u8xu8(unsigned char *s, int k, int m) +{ + int i, j; + for (i = 0; i < k; i++) { + for (j = 0; j < m; j++) { + printf(" %2x", 0xff & s[j + (i * m)]); + } + printf("\n"); + } + printf("\n"); +} + +int main(int argc, char *argv[]) +{ + int i, j, rtest, srcs; + void *buf; + u8 g1[TEST_SOURCES], g2[TEST_SOURCES], g3[TEST_SOURCES]; + u8 g_tbls[3 * TEST_SOURCES * 32], *dest_ptrs[3], *buffs[TEST_SOURCES]; + u8 *dest1, *dest2, *dest3, *dest_ref1, *dest_ref2, *dest_ref3; + + int align, size; + unsigned char *efence_buffs[TEST_SOURCES]; + unsigned int offset; + u8 *ubuffs[TEST_SOURCES]; + u8 *udest_ptrs[3]; + printf(xstr(FUNCTION_UNDER_TEST) "_test: %dx%d ", TEST_SOURCES, TEST_LEN); + + // Allocate the arrays + for (i = 0; i < TEST_SOURCES; i++) { + if (posix_memalign(&buf, 64, TEST_LEN)) { + printf("alloc error: Fail"); + return -1; + } + buffs[i] = buf; + } + + if (posix_memalign(&buf, 64, TEST_LEN)) { + printf("alloc error: Fail"); + return -1; + } + dest1 = buf; + + if (posix_memalign(&buf, 64, TEST_LEN)) { + printf("alloc error: Fail"); + return -1; + } + dest2 = buf; + + if (posix_memalign(&buf, 64, TEST_LEN)) { + printf("alloc error: Fail"); + return -1; + } + dest3 = buf; + + if (posix_memalign(&buf, 64, TEST_LEN)) { + printf("alloc error: Fail"); + return -1; + } + dest_ref1 = buf; + + if (posix_memalign(&buf, 64, TEST_LEN)) { + printf("alloc error: Fail");; + return -1; + } + dest_ref2 = buf; + + if (posix_memalign(&buf, 64, TEST_LEN)) { + printf("alloc error: Fail"); + return -1; + } + dest_ref3 = buf; + + dest_ptrs[0] = dest1; + dest_ptrs[1] = dest2; + dest_ptrs[2] = dest3; + + // Test of all zeros + for (i = 0; i < TEST_SOURCES; i++) + memset(buffs[i], 0, TEST_LEN); + + memset(dest1, 0, TEST_LEN); + memset(dest2, 0, TEST_LEN); + memset(dest3, 0, TEST_LEN); + memset(dest_ref1, 0, TEST_LEN); + memset(dest_ref2, 0, TEST_LEN); + memset(dest_ref3, 0, TEST_LEN); + memset(g1, 2, TEST_SOURCES); + memset(g2, 1, TEST_SOURCES); + memset(g3, 7, TEST_SOURCES); + + for (i = 0; i < TEST_SOURCES; i++) { + gf_vect_mul_init(g1[i], &g_tbls[i * 32]); + gf_vect_mul_init(g2[i], &g_tbls[32 * TEST_SOURCES + i * 32]); + gf_vect_mul_init(g3[i], &g_tbls[64 * TEST_SOURCES + i * 32]); + } + + gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref1); + gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[32 * TEST_SOURCES], buffs, + dest_ref2); + gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[64 * TEST_SOURCES], buffs, + dest_ref3); + + FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest_ptrs); + + if (0 != memcmp(dest_ref1, dest1, TEST_LEN)) { + printf("Fail zero" xstr(FUNCTION_UNDER_TEST) " test1\n"); + dump_matrix(buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref1, 25); + printf("dprod_dut:"); + dump(dest1, 25); + return -1; + } + if (0 != memcmp(dest_ref2, dest2, TEST_LEN)) { + printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test2\n"); + dump_matrix(buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref2, 25); + printf("dprod_dut:"); + dump(dest2, 25); + return -1; + } + if (0 != memcmp(dest_ref3, dest3, TEST_LEN)) { + printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test3\n"); + dump_matrix(buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref3, 25); + printf("dprod_dut:"); + dump(dest3, 25); + return -1; + } + + putchar('.'); + + // Rand data test + + for (rtest = 0; rtest < RANDOMS; rtest++) { + for (i = 0; i < TEST_SOURCES; i++) + for (j = 0; j < TEST_LEN; j++) + buffs[i][j] = rand(); + + for (i = 0; i < TEST_SOURCES; i++) { + g1[i] = rand(); + g2[i] = rand(); + g3[i] = rand(); + } + + for (i = 0; i < TEST_SOURCES; i++) { + gf_vect_mul_init(g1[i], &g_tbls[i * 32]); + gf_vect_mul_init(g2[i], &g_tbls[(32 * TEST_SOURCES) + (i * 32)]); + gf_vect_mul_init(g3[i], &g_tbls[(64 * TEST_SOURCES) + (i * 32)]); + } + + gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref1); + gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[32 * TEST_SOURCES], + buffs, dest_ref2); + gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[64 * TEST_SOURCES], + buffs, dest_ref3); + + FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest_ptrs); + + if (0 != memcmp(dest_ref1, dest1, TEST_LEN)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test1 %d\n", rtest); + dump_matrix(buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref1, 25); + printf("dprod_dut:"); + dump(dest1, 25); + return -1; + } + if (0 != memcmp(dest_ref2, dest2, TEST_LEN)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test2 %d\n", rtest); + dump_matrix(buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref2, 25); + printf("dprod_dut:"); + dump(dest2, 25); + return -1; + } + if (0 != memcmp(dest_ref3, dest3, TEST_LEN)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test3 %d\n", rtest); + dump_matrix(buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref3, 25); + printf("dprod_dut:"); + dump(dest3, 25); + return -1; + } + + putchar('.'); + } + + // Rand data test with varied parameters + for (rtest = 0; rtest < RANDOMS; rtest++) { + for (srcs = TEST_SOURCES; srcs > 0; srcs--) { + for (i = 0; i < srcs; i++) + for (j = 0; j < TEST_LEN; j++) + buffs[i][j] = rand(); + + for (i = 0; i < srcs; i++) { + g1[i] = rand(); + g2[i] = rand(); + g3[i] = rand(); + } + + for (i = 0; i < srcs; i++) { + gf_vect_mul_init(g1[i], &g_tbls[i * 32]); + gf_vect_mul_init(g2[i], &g_tbls[(32 * srcs) + (i * 32)]); + gf_vect_mul_init(g3[i], &g_tbls[(64 * srcs) + (i * 32)]); + } + + gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[0], buffs, dest_ref1); + gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[32 * srcs], buffs, + dest_ref2); + gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[64 * srcs], buffs, + dest_ref3); + + FUNCTION_UNDER_TEST(TEST_LEN, srcs, g_tbls, buffs, dest_ptrs); + + if (0 != memcmp(dest_ref1, dest1, TEST_LEN)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) + " test1 srcs=%d\n", srcs); + dump_matrix(buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref1, 25); + printf("dprod_dut:"); + dump(dest1, 25); + return -1; + } + if (0 != memcmp(dest_ref2, dest2, TEST_LEN)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) + " test2 srcs=%d\n", srcs); + dump_matrix(buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref2, 25); + printf("dprod_dut:"); + dump(dest2, 25); + return -1; + } + if (0 != memcmp(dest_ref3, dest3, TEST_LEN)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) + " test3 srcs=%d\n", srcs); + dump_matrix(buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref3, 25); + printf("dprod_dut:"); + dump(dest3, 25); + return -1; + } + + putchar('.'); + } + } + + // Run tests at end of buffer for Electric Fence + align = (LEN_ALIGN_CHK_B != 0) ? 1 : 16; + for (size = TEST_MIN_SIZE; size <= TEST_SIZE; size += align) { + for (i = 0; i < TEST_SOURCES; i++) + for (j = 0; j < TEST_LEN; j++) + buffs[i][j] = rand(); + + for (i = 0; i < TEST_SOURCES; i++) // Line up TEST_SIZE from end + efence_buffs[i] = buffs[i] + TEST_LEN - size; + + for (i = 0; i < TEST_SOURCES; i++) { + g1[i] = rand(); + g2[i] = rand(); + g3[i] = rand(); + } + + for (i = 0; i < TEST_SOURCES; i++) { + gf_vect_mul_init(g1[i], &g_tbls[i * 32]); + gf_vect_mul_init(g2[i], &g_tbls[(32 * TEST_SOURCES) + (i * 32)]); + gf_vect_mul_init(g3[i], &g_tbls[(64 * TEST_SOURCES) + (i * 32)]); + } + + gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[0], efence_buffs, dest_ref1); + gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[32 * TEST_SOURCES], + efence_buffs, dest_ref2); + gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[64 * TEST_SOURCES], + efence_buffs, dest_ref3); + + FUNCTION_UNDER_TEST(size, TEST_SOURCES, g_tbls, efence_buffs, dest_ptrs); + + if (0 != memcmp(dest_ref1, dest1, size)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test1 %d\n", rtest); + dump_matrix(efence_buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref1, align); + printf("dprod_dut:"); + dump(dest1, align); + return -1; + } + + if (0 != memcmp(dest_ref2, dest2, size)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test2 %d\n", rtest); + dump_matrix(efence_buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref2, align); + printf("dprod_dut:"); + dump(dest2, align); + return -1; + } + + if (0 != memcmp(dest_ref3, dest3, size)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test3 %d\n", rtest); + dump_matrix(efence_buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref3, align); + printf("dprod_dut:"); + dump(dest3, align); + return -1; + } + + putchar('.'); + } + + // Test rand ptr alignment if available + + for (rtest = 0; rtest < RANDOMS; rtest++) { + size = (TEST_LEN - PTR_ALIGN_CHK_B) & ~(TEST_MIN_SIZE - 1); + srcs = rand() % TEST_SOURCES; + if (srcs == 0) + continue; + + offset = (PTR_ALIGN_CHK_B != 0) ? 1 : PTR_ALIGN_CHK_B; + // Add random offsets + for (i = 0; i < srcs; i++) + ubuffs[i] = buffs[i] + (rand() & (PTR_ALIGN_CHK_B - offset)); + + udest_ptrs[0] = dest1 + (rand() & (PTR_ALIGN_CHK_B - offset)); + udest_ptrs[1] = dest2 + (rand() & (PTR_ALIGN_CHK_B - offset)); + udest_ptrs[2] = dest3 + (rand() & (PTR_ALIGN_CHK_B - offset)); + + memset(dest1, 0, TEST_LEN); // zero pad to check write-over + memset(dest2, 0, TEST_LEN); + memset(dest3, 0, TEST_LEN); + + for (i = 0; i < srcs; i++) + for (j = 0; j < size; j++) + ubuffs[i][j] = rand(); + + for (i = 0; i < srcs; i++) { + g1[i] = rand(); + g2[i] = rand(); + g3[i] = rand(); + } + + for (i = 0; i < srcs; i++) { + gf_vect_mul_init(g1[i], &g_tbls[i * 32]); + gf_vect_mul_init(g2[i], &g_tbls[(32 * srcs) + (i * 32)]); + gf_vect_mul_init(g3[i], &g_tbls[(64 * srcs) + (i * 32)]); + } + + gf_vect_dot_prod_base(size, srcs, &g_tbls[0], ubuffs, dest_ref1); + gf_vect_dot_prod_base(size, srcs, &g_tbls[32 * srcs], ubuffs, dest_ref2); + gf_vect_dot_prod_base(size, srcs, &g_tbls[64 * srcs], ubuffs, dest_ref3); + + FUNCTION_UNDER_TEST(size, srcs, g_tbls, ubuffs, udest_ptrs); + + if (memcmp(dest_ref1, udest_ptrs[0], size)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign srcs=%d\n", + srcs); + dump_matrix(ubuffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref1, 25); + printf("dprod_dut:"); + dump(udest_ptrs[0], 25); + return -1; + } + if (memcmp(dest_ref2, udest_ptrs[1], size)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign srcs=%d\n", + srcs); + dump_matrix(ubuffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref2, 25); + printf("dprod_dut:"); + dump(udest_ptrs[1], 25); + return -1; + } + if (memcmp(dest_ref3, udest_ptrs[2], size)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign srcs=%d\n", + srcs); + dump_matrix(ubuffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref3, 25); + printf("dprod_dut:"); + dump(udest_ptrs[2], 25); + return -1; + } + // Confirm that padding around dests is unchanged + memset(dest_ref1, 0, PTR_ALIGN_CHK_B); // Make reference zero buff + offset = udest_ptrs[0] - dest1; + + if (memcmp(dest1, dest_ref1, offset)) { + printf("Fail rand ualign pad1 start\n"); + return -1; + } + if (memcmp(dest1 + offset + size, dest_ref1, PTR_ALIGN_CHK_B - offset)) { + printf("Fail rand ualign pad1 end\n"); + return -1; + } + + offset = udest_ptrs[1] - dest2; + if (memcmp(dest2, dest_ref1, offset)) { + printf("Fail rand ualign pad2 start\n"); + return -1; + } + if (memcmp(dest2 + offset + size, dest_ref1, PTR_ALIGN_CHK_B - offset)) { + printf("Fail rand ualign pad2 end\n"); + return -1; + } + + offset = udest_ptrs[2] - dest3; + if (memcmp(dest3, dest_ref1, offset)) { + printf("Fail rand ualign pad3 start\n"); + return -1; + } + if (memcmp(dest3 + offset + size, dest_ref1, PTR_ALIGN_CHK_B - offset)) { + printf("Fail rand ualign pad3 end\n");; + return -1; + } + + putchar('.'); + } + + // Test all size alignment + align = (LEN_ALIGN_CHK_B != 0) ? 1 : 16; + + for (size = TEST_LEN; size >= TEST_MIN_SIZE; size -= align) { + srcs = TEST_SOURCES; + + for (i = 0; i < srcs; i++) + for (j = 0; j < size; j++) + buffs[i][j] = rand(); + + for (i = 0; i < srcs; i++) { + g1[i] = rand(); + g2[i] = rand(); + g3[i] = rand(); + } + + for (i = 0; i < srcs; i++) { + gf_vect_mul_init(g1[i], &g_tbls[i * 32]); + gf_vect_mul_init(g2[i], &g_tbls[(32 * srcs) + (i * 32)]); + gf_vect_mul_init(g3[i], &g_tbls[(64 * srcs) + (i * 32)]); + } + + gf_vect_dot_prod_base(size, srcs, &g_tbls[0], buffs, dest_ref1); + gf_vect_dot_prod_base(size, srcs, &g_tbls[32 * srcs], buffs, dest_ref2); + gf_vect_dot_prod_base(size, srcs, &g_tbls[64 * srcs], buffs, dest_ref3); + + FUNCTION_UNDER_TEST(size, srcs, g_tbls, buffs, dest_ptrs); + + if (memcmp(dest_ref1, dest_ptrs[0], size)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign len=%d\n", + size); + dump_matrix(buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref1, 25); + printf("dprod_dut:"); + dump(dest_ptrs[0], 25); + return -1; + } + if (memcmp(dest_ref2, dest_ptrs[1], size)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign len=%d\n", + size); + dump_matrix(buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref2, 25); + printf("dprod_dut:"); + dump(dest_ptrs[1], 25); + return -1; + } + if (memcmp(dest_ref3, dest_ptrs[2], size)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign len=%d\n", + size); + dump_matrix(buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref3, 25); + printf("dprod_dut:"); + dump(dest_ptrs[2], 25); + return -1; + } + } + + printf("Pass\n"); + return 0; + +} diff --git a/src/spdk/isa-l/erasure_code/gf_3vect_mad_avx.asm b/src/spdk/isa-l/erasure_code/gf_3vect_mad_avx.asm new file mode 100644 index 000000000..1f40eb780 --- /dev/null +++ b/src/spdk/isa-l/erasure_code/gf_3vect_mad_avx.asm @@ -0,0 +1,288 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2015 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;;; +;;; gf_3vect_mad_avx(len, vec, vec_i, mul_array, src, dest); +;;; + +%include "reg_sizes.asm" + +%define PS 8 + +%ifidn __OUTPUT_FORMAT__, win64 + %define arg0 rcx + %define arg0.w ecx + %define arg1 rdx + %define arg2 r8 + %define arg3 r9 + %define arg4 r12 + %define arg5 r15 + %define tmp r11 + %define return rax + %define return.w eax + %define stack_size 16*10 + 3*8 + %define arg(x) [rsp + stack_size + PS + PS*x] + %define func(x) proc_frame x + +%macro FUNC_SAVE 0 + sub rsp, stack_size + vmovdqa [rsp+16*0],xmm6 + vmovdqa [rsp+16*1],xmm7 + vmovdqa [rsp+16*2],xmm8 + vmovdqa [rsp+16*3],xmm9 + vmovdqa [rsp+16*4],xmm10 + vmovdqa [rsp+16*5],xmm11 + vmovdqa [rsp+16*6],xmm12 + vmovdqa [rsp+16*7],xmm13 + vmovdqa [rsp+16*8],xmm14 + vmovdqa [rsp+16*9],xmm15 + save_reg r12, 10*16 + 0*8 + save_reg r15, 10*16 + 1*8 + end_prolog + mov arg4, arg(4) + mov arg5, arg(5) +%endmacro + +%macro FUNC_RESTORE 0 + vmovdqa xmm6, [rsp+16*0] + vmovdqa xmm7, [rsp+16*1] + vmovdqa xmm8, [rsp+16*2] + vmovdqa xmm9, [rsp+16*3] + vmovdqa xmm10, [rsp+16*4] + vmovdqa xmm11, [rsp+16*5] + vmovdqa xmm12, [rsp+16*6] + vmovdqa xmm13, [rsp+16*7] + vmovdqa xmm14, [rsp+16*8] + vmovdqa xmm15, [rsp+16*9] + mov r12, [rsp + 10*16 + 0*8] + mov r15, [rsp + 10*16 + 1*8] + add rsp, stack_size +%endmacro + +%elifidn __OUTPUT_FORMAT__, elf64 + %define arg0 rdi + %define arg0.w edi + %define arg1 rsi + %define arg2 rdx + %define arg3 rcx + %define arg4 r8 + %define arg5 r9 + %define tmp r11 + %define return rax + %define return.w eax + + %define func(x) x: + %define FUNC_SAVE + %define FUNC_RESTORE +%endif + +;;; gf_3vect_mad_avx(len, vec, vec_i, mul_array, src, dest) +%define len arg0 +%define len.w arg0.w +%define vec arg1 +%define vec_i arg2 +%define mul_array arg3 +%define src arg4 +%define dest1 arg5 +%define pos return +%define pos.w return.w + +%define dest2 mul_array +%define dest3 vec_i + +%ifndef EC_ALIGNED_ADDR +;;; Use Un-aligned load/store + %define XLDR vmovdqu + %define XSTR vmovdqu +%else +;;; Use Non-temporal load/stor + %ifdef NO_NT_LDST + %define XLDR vmovdqa + %define XSTR vmovdqa + %else + %define XLDR vmovntdqa + %define XSTR vmovntdq + %endif +%endif + + +default rel + +[bits 64] +section .text + +%define xmask0f xmm15 +%define xgft1_lo xmm14 +%define xgft1_hi xmm13 +%define xgft2_lo xmm12 +%define xgft2_hi xmm11 +%define xgft3_lo xmm10 +%define xgft3_hi xmm9 + +%define x0 xmm0 +%define xtmpa xmm1 +%define xtmph1 xmm2 +%define xtmpl1 xmm3 +%define xtmph2 xmm4 +%define xtmpl2 xmm5 +%define xtmph3 xmm6 +%define xtmpl3 xmm7 +%define xd1 xmm8 +%define xd2 xtmpl1 +%define xd3 xtmph1 + +align 16 +global gf_3vect_mad_avx:ISAL_SYM_TYPE_FUNCTION +func(gf_3vect_mad_avx) + FUNC_SAVE + sub len, 16 + jl .return_fail + xor pos, pos + vmovdqa xmask0f, [mask0f] ;Load mask of lower nibble in each byte + + sal vec_i, 5 ;Multiply by 32 + sal vec, 5 + lea tmp, [mul_array + vec_i] + vmovdqu xgft1_lo, [tmp] ;Load array Ax{00}, Ax{01}, Ax{02}, ... + vmovdqu xgft1_hi, [tmp+16] ; " Ax{00}, Ax{10}, Ax{20}, ... , Ax{f0} + vmovdqu xgft2_lo, [tmp+vec] ;Load array Bx{00}, Bx{01}, Bx{02}, ... + vmovdqu xgft2_hi, [tmp+vec+16] ; " Bx{00}, Bx{10}, Bx{20}, ... , Bx{f0} + vmovdqu xgft3_lo, [tmp+2*vec] ;Load array Cx{00}, Cx{01}, Cx{02}, ... + vmovdqu xgft3_hi, [tmp+2*vec+16]; " Cx{00}, Cx{10}, Cx{20}, ... , Cx{f0} + mov dest2, [dest1+PS] ; reuse mul_array + mov dest3, [dest1+2*PS] ; reuse vec_i + mov dest1, [dest1] + +.loop16: + XLDR x0, [src+pos] ;Get next source vector + XLDR xd1, [dest1+pos] ;Get next dest vector + + vpand xtmpa, x0, xmask0f ;Mask low src nibble in bits 4-0 + vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0 + vpand x0, x0, xmask0f ;Mask high src nibble in bits 4-0 + + ; dest1 + vpshufb xtmph1, xgft1_hi, x0 ;Lookup mul table of high nibble + vpshufb xtmpl1, xgft1_lo, xtmpa ;Lookup mul table of low nibble + vpxor xtmph1, xtmph1, xtmpl1 ;GF add high and low partials + vpxor xd1, xd1, xtmph1 ;xd1 += partial + + XLDR xd2, [dest2+pos] ;reuse xtmpl1. Get next dest vector + XLDR xd3, [dest3+pos] ;reuse xtmph1. Get next dest vector + + ; dest2 + vpshufb xtmph2, xgft2_hi, x0 ;Lookup mul table of high nibble + vpshufb xtmpl2, xgft2_lo, xtmpa ;Lookup mul table of low nibble + vpxor xtmph2, xtmph2, xtmpl2 ;GF add high and low partials + vpxor xd2, xd2, xtmph2 ;xd2 += partial + + ; dest3 + vpshufb xtmph3, xgft3_hi, x0 ;Lookup mul table of high nibble + vpshufb xtmpl3, xgft3_lo, xtmpa ;Lookup mul table of low nibble + vpxor xtmph3, xtmph3, xtmpl3 ;GF add high and low partials + vpxor xd3, xd3, xtmph3 ;xd3 += partial + + XSTR [dest1+pos], xd1 + XSTR [dest2+pos], xd2 + XSTR [dest3+pos], xd3 + + add pos, 16 ;Loop on 16 bytes at a time + cmp pos, len + jle .loop16 + + lea tmp, [len + 16] + cmp pos, tmp + je .return_pass + +.lessthan16: + ;; Tail len + ;; Do one more overlap pass + mov tmp, len ;Overlapped offset length-16 + XLDR x0, [src+tmp] ;Get next source vector + XLDR xd1, [dest1+tmp] ;Get next dest vector + XLDR xd2, [dest2+tmp] ;reuse xtmpl1. Get next dest vector + XLDR xd3, [dest3+tmp] ;reuse xtmph1. Get next dest vector + + sub len, pos + + movdqa xtmph3, [constip16] ;Load const of i + 16 + vpinsrb xtmpl3, xtmpl3, len.w, 15 + vpshufb xtmpl3, xtmpl3, xmask0f ;Broadcast len to all bytes + vpcmpgtb xtmpl3, xtmpl3, xtmph3 + + vpand xtmpa, x0, xmask0f ;Mask low src nibble in bits 4-0 + vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0 + vpand x0, x0, xmask0f ;Mask high src nibble in bits 4-0 + + ; dest1 + vpshufb xgft1_hi, xgft1_hi, x0 ;Lookup mul table of high nibble + vpshufb xgft1_lo, xgft1_lo, xtmpa ;Lookup mul table of low nibble + vpxor xgft1_hi, xgft1_hi, xgft1_lo ;GF add high and low partials + vpand xgft1_hi, xgft1_hi, xtmpl3 + vpxor xd1, xd1, xgft1_hi + + ; dest2 + vpshufb xgft2_hi, xgft2_hi, x0 ;Lookup mul table of high nibble + vpshufb xgft2_lo, xgft2_lo, xtmpa ;Lookup mul table of low nibble + vpxor xgft2_hi, xgft2_hi, xgft2_lo ;GF add high and low partials + vpand xgft2_hi, xgft2_hi, xtmpl3 + vpxor xd2, xd2, xgft2_hi + + ; dest3 + vpshufb xgft3_hi, xgft3_hi, x0 ;Lookup mul table of high nibble + vpshufb xgft3_lo, xgft3_lo, xtmpa ;Lookup mul table of low nibble + vpxor xgft3_hi, xgft3_hi, xgft3_lo ;GF add high and low partials + vpand xgft3_hi, xgft3_hi, xtmpl3 + vpxor xd3, xd3, xgft3_hi + + XSTR [dest1+tmp], xd1 + XSTR [dest2+tmp], xd2 + XSTR [dest3+tmp], xd3 + +.return_pass: + mov return, 0 + FUNC_RESTORE + ret + +.return_fail: + mov return, 1 + FUNC_RESTORE + ret + +endproc_frame + +section .data + +align 16 +mask0f: dq 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f +constip16: + dq 0xf8f9fafbfcfdfeff, 0xf0f1f2f3f4f5f6f7 + +;;; func core, ver, snum +slversion gf_3vect_mad_avx, 02, 01, 0207 diff --git a/src/spdk/isa-l/erasure_code/gf_3vect_mad_avx2.asm b/src/spdk/isa-l/erasure_code/gf_3vect_mad_avx2.asm new file mode 100644 index 000000000..0b3666171 --- /dev/null +++ b/src/spdk/isa-l/erasure_code/gf_3vect_mad_avx2.asm @@ -0,0 +1,317 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2015 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;;; +;;; gf_3vect_mad_avx2(len, vec, vec_i, mul_array, src, dest); +;;; + +%include "reg_sizes.asm" + +%define PS 8 + +%ifidn __OUTPUT_FORMAT__, win64 + %define arg0 rcx + %define arg0.w ecx + %define arg1 rdx + %define arg2 r8 + %define arg3 r9 + %define arg4 r12 ; must be saved, loaded and restored + %define arg5 r15 ; must be saved and restored + + %define tmp r11 + %define tmp.w r11d + %define tmp.b r11b + %define return rax + %define return.w eax + %define stack_size 16*10 + 3*8 + %define arg(x) [rsp + stack_size + PS + PS*x] + %define func(x) proc_frame x + + %macro FUNC_SAVE 0 + sub rsp, stack_size + vmovdqa [rsp+16*0],xmm6 + vmovdqa [rsp+16*1],xmm7 + vmovdqa [rsp+16*2],xmm8 + vmovdqa [rsp+16*3],xmm9 + vmovdqa [rsp+16*4],xmm10 + vmovdqa [rsp+16*5],xmm11 + vmovdqa [rsp+16*6],xmm12 + vmovdqa [rsp+16*7],xmm13 + vmovdqa [rsp+16*8],xmm14 + vmovdqa [rsp+16*9],xmm15 + save_reg r12, 10*16 + 0*8 + save_reg r15, 10*16 + 1*8 + end_prolog + mov arg4, arg(4) + mov arg5, arg(5) + %endmacro + + %macro FUNC_RESTORE 0 + vmovdqa xmm6, [rsp+16*0] + vmovdqa xmm7, [rsp+16*1] + vmovdqa xmm8, [rsp+16*2] + vmovdqa xmm9, [rsp+16*3] + vmovdqa xmm10, [rsp+16*4] + vmovdqa xmm11, [rsp+16*5] + vmovdqa xmm12, [rsp+16*6] + vmovdqa xmm13, [rsp+16*7] + vmovdqa xmm14, [rsp+16*8] + vmovdqa xmm15, [rsp+16*9] + mov r12, [rsp + 10*16 + 0*8] + mov r15, [rsp + 10*16 + 1*8] + add rsp, stack_size + %endmacro + +%elifidn __OUTPUT_FORMAT__, elf64 + %define arg0 rdi + %define arg0.w edi + %define arg1 rsi + %define arg2 rdx + %define arg3 rcx + %define arg4 r8 + %define arg5 r9 + + %define tmp r11 + %define tmp.w r11d + %define tmp.b r11b + %define return rax + %define return.w eax + + %define func(x) x: + %define FUNC_SAVE + %define FUNC_RESTORE +%endif + +;;; gf_3vect_mad_avx2(len, vec, vec_i, mul_array, src, dest) +%define len arg0 +%define len.w arg0.w +%define vec arg1 +%define vec_i arg2 +%define mul_array arg3 +%define src arg4 +%define dest1 arg5 +%define pos return +%define pos.w return.w + +%define dest2 mul_array +%define dest3 vec_i + +%ifndef EC_ALIGNED_ADDR +;;; Use Un-aligned load/store + %define XLDR vmovdqu + %define XSTR vmovdqu +%else +;;; Use Non-temporal load/stor + %ifdef NO_NT_LDST + %define XLDR vmovdqa + %define XSTR vmovdqa + %else + %define XLDR vmovntdqa + %define XSTR vmovntdq + %endif +%endif + + +default rel + +[bits 64] +section .text + +%define xmask0f ymm15 +%define xmask0fx xmm15 +%define xgft1_lo ymm14 +%define xgft1_hi ymm13 +%define xgft2_lo ymm12 +%define xgft3_lo ymm11 + +%define x0 ymm0 +%define xtmpa ymm1 +%define xtmph1 ymm2 +%define xtmpl1 ymm3 +%define xtmph2 ymm4 +%define xtmpl2 ymm5 +%define xtmpl2x xmm5 +%define xtmph3 ymm6 +%define xtmpl3 ymm7 +%define xtmpl3x xmm7 +%define xd1 ymm8 +%define xd2 ymm9 +%define xd3 ymm10 + +align 16 +global gf_3vect_mad_avx2:ISAL_SYM_TYPE_FUNCTION +func(gf_3vect_mad_avx2) + FUNC_SAVE + sub len, 32 + jl .return_fail + xor pos, pos + mov tmp.b, 0x0f + vpinsrb xmask0fx, xmask0fx, tmp.w, 0 + vpbroadcastb xmask0f, xmask0fx ;Construct mask 0x0f0f0f... + + sal vec_i, 5 ;Multiply by 32 + sal vec, 5 + lea tmp, [mul_array + vec_i] + + vmovdqu xgft1_lo, [tmp] ;Load array Ax{00}, Ax{01}, ..., Ax{0f} + ; " Ax{00}, Ax{10}, ..., Ax{f0} + vperm2i128 xgft1_hi, xgft1_lo, xgft1_lo, 0x11 ; swapped to hi | hi + vperm2i128 xgft1_lo, xgft1_lo, xgft1_lo, 0x00 ; swapped to lo | lo + + vmovdqu xgft2_lo, [tmp+vec] ;Load array Bx{00}, Bx{01}, Bx{02}, ... + ; " Bx{00}, Bx{10}, Bx{20}, ... , Bx{f0} + vmovdqu xgft3_lo, [tmp+2*vec] ;Load array Cx{00}, Cx{01}, Cx{02}, ... + ; " Cx{00}, Cx{10}, Cx{20}, ... , Cx{f0} + mov dest2, [dest1+PS] ; reuse mul_array + mov dest3, [dest1+2*PS] ; reuse vec_i + mov dest1, [dest1] + +.loop32: + XLDR x0, [src+pos] ;Get next source vector + XLDR xd1, [dest1+pos] ;Get next dest vector + XLDR xd2, [dest2+pos] ;Get next dest vector + XLDR xd3, [dest3+pos] ;Get next dest vector + vperm2i128 xtmph2, xgft2_lo, xgft2_lo, 0x11 ; swapped to hi | hi + vperm2i128 xtmpl2, xgft2_lo, xgft2_lo, 0x00 ; swapped to lo | lo + + vperm2i128 xtmph3, xgft3_lo, xgft3_lo, 0x11 ; swapped to hi | hi + vperm2i128 xtmpl3, xgft3_lo, xgft3_lo, 0x00 ; swapped to lo | lo + + vpand xtmpa, x0, xmask0f ;Mask low src nibble in bits 4-0 + vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0 + vpand x0, x0, xmask0f ;Mask high src nibble in bits 4-0 + + ; dest1 + vpshufb xtmph1, xgft1_hi, x0 ;Lookup mul table of high nibble + vpshufb xtmpl1, xgft1_lo, xtmpa ;Lookup mul table of low nibble + vpxor xtmph1, xtmph1, xtmpl1 ;GF add high and low partials + vpxor xd1, xd1, xtmph1 ;xd1 += partial + + ; dest2 + vpshufb xtmph2, x0 ;Lookup mul table of high nibble + vpshufb xtmpl2, xtmpa ;Lookup mul table of low nibble + vpxor xtmph2, xtmpl2 ;GF add high and low partials + vpxor xd2, xtmph2 ;xd2 += partial + + ; dest3 + vpshufb xtmph3, x0 ;Lookup mul table of high nibble + vpshufb xtmpl3, xtmpa ;Lookup mul table of low nibble + vpxor xtmph3, xtmpl3 ;GF add high and low partials + vpxor xd3, xtmph3 ;xd3 += partial + + XSTR [dest1+pos], xd1 + XSTR [dest2+pos], xd2 + XSTR [dest3+pos], xd3 + + add pos, 32 ;Loop on 32 bytes at a time + cmp pos, len + jle .loop32 + + lea tmp, [len + 32] + cmp pos, tmp + je .return_pass + +.lessthan32: + ;; Tail len + ;; Do one more overlap pass + mov tmp.b, 0x1f + vpinsrb xtmpl2x, xtmpl2x, tmp.w, 0 + vpbroadcastb xtmpl2, xtmpl2x ;Construct mask 0x1f1f1f... + + mov tmp, len ;Overlapped offset length-32 + + XLDR x0, [src+tmp] ;Get next source vector + XLDR xd1, [dest1+tmp] ;Get next dest vector + XLDR xd2, [dest2+tmp] ;Get next dest vector + XLDR xd3, [dest3+tmp] ;Get next dest vector + + sub len, pos + + vmovdqa xtmph3, [constip32] ;Load const of i + 32 + vpinsrb xtmpl3x, xtmpl3x, len.w, 15 + vinserti128 xtmpl3, xtmpl3, xtmpl3x, 1 ;swapped to xtmpl3x | xtmpl3x + vpshufb xtmpl3, xtmpl3, xtmpl2 ;Broadcast len to all bytes. xtmpl2=0x1f1f1f... + vpcmpgtb xtmpl3, xtmpl3, xtmph3 + + vperm2i128 xtmph2, xgft2_lo, xgft2_lo, 0x11 ; swapped to hi | hi + vperm2i128 xgft2_lo, xgft2_lo, xgft2_lo, 0x00 ; swapped to lo | lo + + vperm2i128 xtmph3, xgft3_lo, xgft3_lo, 0x11 ; swapped to hi | hi + vperm2i128 xgft3_lo, xgft3_lo, xgft3_lo, 0x00 ; swapped to lo | lo + + vpand xtmpa, x0, xmask0f ;Mask low src nibble in bits 4-0 + vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0 + vpand x0, x0, xmask0f ;Mask high src nibble in bits 4-0 + + ; dest1 + vpshufb xtmph1, xgft1_hi, x0 ;Lookup mul table of high nibble + vpshufb xtmpl1, xgft1_lo, xtmpa ;Lookup mul table of low nibble + vpxor xtmph1, xtmph1, xtmpl1 ;GF add high and low partials + vpand xtmph1, xtmph1, xtmpl3 + vpxor xd1, xd1, xtmph1 ;xd1 += partial + + ; dest2 + vpshufb xtmph2, xtmph2, x0 ;Lookup mul table of high nibble + vpshufb xgft2_lo, xgft2_lo, xtmpa ;Lookup mul table of low nibble + vpxor xtmph2, xtmph2, xgft2_lo ;GF add high and low partials + vpand xtmph2, xtmph2, xtmpl3 + vpxor xd2, xd2, xtmph2 ;xd2 += partial + + ; dest3 + vpshufb xtmph3, xtmph3, x0 ;Lookup mul table of high nibble + vpshufb xgft3_lo, xgft3_lo, xtmpa ;Lookup mul table of low nibble + vpxor xtmph3, xtmph3, xgft3_lo ;GF add high and low partials + vpand xtmph3, xtmph3, xtmpl3 + vpxor xd3, xd3, xtmph3 ;xd3 += partial + + XSTR [dest1+tmp], xd1 + XSTR [dest2+tmp], xd2 + XSTR [dest3+tmp], xd3 + +.return_pass: + mov return, 0 + FUNC_RESTORE + ret + +.return_fail: + mov return, 1 + FUNC_RESTORE + ret + +endproc_frame + +section .data + +align 32 +constip32: + dq 0xf8f9fafbfcfdfeff, 0xf0f1f2f3f4f5f6f7 + dq 0xe8e9eaebecedeeef, 0xe0e1e2e3e4e5e6e7 + +;;; func core, ver, snum +slversion gf_3vect_mad_avx2, 04, 01, 0208 diff --git a/src/spdk/isa-l/erasure_code/gf_3vect_mad_avx512.asm b/src/spdk/isa-l/erasure_code/gf_3vect_mad_avx512.asm new file mode 100644 index 000000000..dcafbc791 --- /dev/null +++ b/src/spdk/isa-l/erasure_code/gf_3vect_mad_avx512.asm @@ -0,0 +1,247 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2015 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;;; +;;; gf_3vect_mad_avx512(len, vec, vec_i, mul_array, src, dest); +;;; + +%include "reg_sizes.asm" + +%ifdef HAVE_AS_KNOWS_AVX512 + +%ifidn __OUTPUT_FORMAT__, elf64 + %define arg0 rdi + %define arg1 rsi + %define arg2 rdx + %define arg3 rcx + %define arg4 r8 + %define arg5 r9 + %define tmp r11 + %define return rax + %define func(x) x: + %define FUNC_SAVE + %define FUNC_RESTORE +%endif + +%ifidn __OUTPUT_FORMAT__, win64 + %define arg0 rcx + %define arg1 rdx + %define arg2 r8 + %define arg3 r9 + %define arg4 r12 ; must be saved, loaded and restored + %define arg5 r15 ; must be saved and restored + %define tmp r11 + %define return rax + %define return.w eax + %define stack_size 16*10 + 3*8 + %define arg(x) [rsp + stack_size + PS + PS*x] + %define func(x) proc_frame x + + %macro FUNC_SAVE 0 + sub rsp, stack_size + vmovdqa [rsp+16*0],xmm6 + vmovdqa [rsp+16*1],xmm7 + vmovdqa [rsp+16*2],xmm8 + vmovdqa [rsp+16*3],xmm9 + vmovdqa [rsp+16*4],xmm10 + vmovdqa [rsp+16*5],xmm11 + vmovdqa [rsp+16*6],xmm12 + vmovdqa [rsp+16*7],xmm13 + vmovdqa [rsp+16*8],xmm14 + vmovdqa [rsp+16*9],xmm15 + save_reg r12, 10*16 + 0*8 + save_reg r15, 10*16 + 1*8 + end_prolog + mov arg4, arg(4) + mov arg5, arg(5) + %endmacro + + %macro FUNC_RESTORE 0 + vmovdqa xmm6, [rsp+16*0] + vmovdqa xmm7, [rsp+16*1] + vmovdqa xmm8, [rsp+16*2] + vmovdqa xmm9, [rsp+16*3] + vmovdqa xmm10, [rsp+16*4] + vmovdqa xmm11, [rsp+16*5] + vmovdqa xmm12, [rsp+16*6] + vmovdqa xmm13, [rsp+16*7] + vmovdqa xmm14, [rsp+16*8] + vmovdqa xmm15, [rsp+16*9] + mov r12, [rsp + 10*16 + 0*8] + mov r15, [rsp + 10*16 + 1*8] + add rsp, stack_size + %endmacro +%endif + +%define PS 8 +%define len arg0 +%define vec arg1 +%define vec_i arg2 +%define mul_array arg3 +%define src arg4 +%define dest1 arg5 +%define pos return +%define dest2 mul_array +%define dest3 vec_i + +%ifndef EC_ALIGNED_ADDR +;;; Use Un-aligned load/store + %define XLDR vmovdqu8 + %define XSTR vmovdqu8 +%else +;;; Use Non-temporal load/stor + %ifdef NO_NT_LDST + %define XLDR vmovdqa + %define XSTR vmovdqa + %else + %define XLDR vmovntdqa + %define XSTR vmovntdq + %endif +%endif + +default rel +[bits 64] +section .text + +%define x0 zmm0 +%define xtmpa zmm1 +%define xtmph1 zmm2 +%define xtmpl1 zmm3 +%define xtmph2 zmm4 +%define xtmpl2 zmm5 +%define xtmph3 zmm6 +%define xtmpl3 zmm7 +%define xgft1_hi zmm8 +%define xgft1_lo zmm9 +%define xgft1_loy ymm9 +%define xgft2_hi zmm10 +%define xgft2_lo zmm11 +%define xgft2_loy ymm11 +%define xgft3_hi zmm12 +%define xgft3_lo zmm13 +%define xgft3_loy ymm13 +%define xd1 zmm14 +%define xd2 zmm15 +%define xd3 zmm16 +%define xmask0f zmm17 + +align 16 +global gf_3vect_mad_avx512:ISAL_SYM_TYPE_FUNCTION +func(gf_3vect_mad_avx512) + FUNC_SAVE + sub len, 64 + jl .return_fail + xor pos, pos + mov tmp, 0x0f + vpbroadcastb xmask0f, tmp ;Construct mask 0x0f0f0f... + sal vec_i, 5 ;Multiply by 32 + sal vec, 5 + lea tmp, [mul_array + vec_i] + vmovdqu xgft1_loy, [tmp] ;Load array Ax{00}..{0f}, Ax{00}..{f0} + vmovdqu xgft2_loy, [tmp+vec] ;Load array Bx{00}..{0f}, Bx{00}..{f0} + vmovdqu xgft3_loy, [tmp+2*vec] ;Load array Cx{00}..{0f}, Cx{00}..{f0} + vshufi64x2 xgft1_hi, xgft1_lo, xgft1_lo, 0x55 + vshufi64x2 xgft1_lo, xgft1_lo, xgft1_lo, 0x00 + vshufi64x2 xgft2_hi, xgft2_lo, xgft2_lo, 0x55 + vshufi64x2 xgft2_lo, xgft2_lo, xgft2_lo, 0x00 + vshufi64x2 xgft3_hi, xgft3_lo, xgft3_lo, 0x55 + vshufi64x2 xgft3_lo, xgft3_lo, xgft3_lo, 0x00 + mov dest2, [dest1+PS] ; reuse mul_array + mov dest3, [dest1+2*PS] ; reuse vec_i + mov dest1, [dest1] + mov tmp, -1 + kmovq k1, tmp + +.loop64: + XLDR x0, [src+pos] ;Get next source vector + XLDR xd1, [dest1+pos] ;Get next dest vector + XLDR xd2, [dest2+pos] ;Get next dest vector + XLDR xd3, [dest3+pos] ;Get next dest vector + + vpandq xtmpa, x0, xmask0f ;Mask low src nibble in bits 4-0 + vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0 + vpandq x0, x0, xmask0f ;Mask high src nibble in bits 4-0 + + ; dest1 + vpshufb xtmph1 {k1}{z}, xgft1_hi, x0 ;Lookup mul table of high nibble + vpshufb xtmpl1 {k1}{z}, xgft1_lo, xtmpa ;Lookup mul table of low nibble + vpxorq xtmph1, xtmph1, xtmpl1 ;GF add high and low partials + vpxorq xd1, xd1, xtmph1 ;xd1 += partial + + ; dest2 + vpshufb xtmph2 {k1}{z}, xgft2_hi, x0 ;Lookup mul table of high nibble + vpshufb xtmpl2 {k1}{z}, xgft2_lo, xtmpa ;Lookup mul table of low nibble + vpxorq xtmph2, xtmph2, xtmpl2 ;GF add high and low partials + vpxorq xd2, xd2, xtmph2 ;xd2 += partial + + ; dest3 + vpshufb xtmph3 {k1}{z}, xgft3_hi, x0 ;Lookup mul table of high nibble + vpshufb xtmpl3 {k1}{z}, xgft3_lo, xtmpa ;Lookup mul table of low nibble + vpxorq xtmph3, xtmph3, xtmpl3 ;GF add high and low partials + vpxorq xd3, xd3, xtmph3 ;xd2 += partial + + XSTR [dest1+pos], xd1 + XSTR [dest2+pos], xd2 + XSTR [dest3+pos], xd3 + + add pos, 64 ;Loop on 64 bytes at a time + cmp pos, len + jle .loop64 + + lea tmp, [len + 64] + cmp pos, tmp + je .return_pass + + ;; Tail len + mov pos, (1 << 63) + lea tmp, [len + 64 - 1] + and tmp, 63 + sarx pos, pos, tmp + kmovq k1, pos + mov pos, len ;Overlapped offset length-64 + jmp .loop64 ;Do one more overlap pass + +.return_pass: + mov return, 0 + FUNC_RESTORE + ret + +.return_fail: + mov return, 1 + FUNC_RESTORE + ret + +endproc_frame + +%else +%ifidn __OUTPUT_FORMAT__, win64 +global no_gf_3vect_mad_avx512 +no_gf_3vect_mad_avx512: +%endif +%endif ; ifdef HAVE_AS_KNOWS_AVX512 diff --git a/src/spdk/isa-l/erasure_code/gf_3vect_mad_sse.asm b/src/spdk/isa-l/erasure_code/gf_3vect_mad_sse.asm new file mode 100644 index 000000000..0d9028bc8 --- /dev/null +++ b/src/spdk/isa-l/erasure_code/gf_3vect_mad_sse.asm @@ -0,0 +1,298 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2015 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;;; +;;; gf_3vect_mad_sse(len, vec, vec_i, mul_array, src, dest); +;;; + +%include "reg_sizes.asm" + +%define PS 8 +%ifidn __OUTPUT_FORMAT__, win64 + %define arg0 rcx + %define arg0.w ecx + %define arg1 rdx + %define arg2 r8 + %define arg3 r9 + %define arg4 r12 + %define arg5 r15 + %define tmp r11 + %define return rax + %define return.w eax + %define stack_size 16*10 + 3*8 + %define arg(x) [rsp + stack_size + PS + PS*x] + %define func(x) proc_frame x + +%macro FUNC_SAVE 0 + sub rsp, stack_size + movdqa [rsp+16*0],xmm6 + movdqa [rsp+16*1],xmm7 + movdqa [rsp+16*2],xmm8 + movdqa [rsp+16*3],xmm9 + movdqa [rsp+16*4],xmm10 + movdqa [rsp+16*5],xmm11 + movdqa [rsp+16*6],xmm12 + movdqa [rsp+16*7],xmm13 + movdqa [rsp+16*8],xmm14 + movdqa [rsp+16*9],xmm15 + save_reg r12, 10*16 + 0*8 + save_reg r15, 10*16 + 1*8 + end_prolog + mov arg4, arg(4) + mov arg5, arg(5) +%endmacro + +%macro FUNC_RESTORE 0 + movdqa xmm6, [rsp+16*0] + movdqa xmm7, [rsp+16*1] + movdqa xmm8, [rsp+16*2] + movdqa xmm9, [rsp+16*3] + movdqa xmm10, [rsp+16*4] + movdqa xmm11, [rsp+16*5] + movdqa xmm12, [rsp+16*6] + movdqa xmm13, [rsp+16*7] + movdqa xmm14, [rsp+16*8] + movdqa xmm15, [rsp+16*9] + mov r12, [rsp + 10*16 + 0*8] + mov r15, [rsp + 10*16 + 1*8] + add rsp, stack_size +%endmacro + +%elifidn __OUTPUT_FORMAT__, elf64 + %define arg0 rdi + %define arg0.w edi + %define arg1 rsi + %define arg2 rdx + %define arg3 rcx + %define arg4 r8 + %define arg5 r9 + %define tmp r11 + %define return rax + %define return.w eax + + %define func(x) x: + %define FUNC_SAVE + %define FUNC_RESTORE +%endif + +;;; gf_3vect_mad_sse(len, vec, vec_i, mul_array, src, dest) +%define len arg0 +%define len.w arg0.w +%define vec arg1 +%define vec_i arg2 +%define mul_array arg3 +%define src arg4 +%define dest1 arg5 +%define pos return +%define pos.w return.w + +%define dest2 mul_array +%define dest3 vec_i + +%ifndef EC_ALIGNED_ADDR +;;; Use Un-aligned load/store + %define XLDR movdqu + %define XSTR movdqu +%else +;;; Use Non-temporal load/stor + %ifdef NO_NT_LDST + %define XLDR movdqa + %define XSTR movdqa + %else + %define XLDR movntdqa + %define XSTR movntdq + %endif +%endif + +default rel + +[bits 64] +section .text + +%define xmask0f xmm15 +%define xgft1_lo xmm14 +%define xgft1_hi xmm13 +%define xgft2_lo xmm12 +%define xgft2_hi xmm11 +%define xgft3_lo xmm10 +%define xgft3_hi xmm9 + +%define x0 xmm0 +%define xtmpa xmm1 +%define xtmph1 xmm2 +%define xtmpl1 xmm3 +%define xtmph2 xmm4 +%define xtmpl2 xmm5 +%define xtmph3 xmm6 +%define xtmpl3 xmm7 +%define xd1 xmm8 +%define xd2 xtmpl1 +%define xd3 xtmph1 + +align 16 +global gf_3vect_mad_sse:ISAL_SYM_TYPE_FUNCTION +func(gf_3vect_mad_sse) + FUNC_SAVE + sub len, 16 + jl .return_fail + xor pos, pos + movdqa xmask0f, [mask0f] ;Load mask of lower nibble in each byte + sal vec_i, 5 ;Multiply by 32 + sal vec, 5 + lea tmp, [mul_array + vec_i] + + movdqu xgft1_lo, [tmp] ;Load array Ax{00}, Ax{01}, Ax{02}, ... + movdqu xgft1_hi, [tmp+16] ; " Ax{00}, Ax{10}, Ax{20}, ... , Ax{f0} + movdqu xgft2_lo, [tmp+vec] ;Load array Bx{00}, Bx{01}, Bx{02}, ... + movdqu xgft2_hi, [tmp+vec+16] ; " Bx{00}, Bx{10}, Bx{20}, ... , Bx{f0} + movdqu xgft3_lo, [tmp+2*vec] ;Load array Cx{00}, Cx{01}, Cx{02}, ... + movdqu xgft3_hi, [tmp+2*vec+16] ; " Cx{00}, Cx{10}, Cx{20}, ... , Cx{f0} + mov dest2, [dest1+PS] ; reuse mul_array + mov dest3, [dest1+2*PS] ; reuse vec_i + mov dest1, [dest1] + +.loop16: + XLDR x0, [src+pos] ;Get next source vector + movdqa xtmph1, xgft1_hi ;Reload const array registers + movdqa xtmpl1, xgft1_lo + movdqa xtmph2, xgft2_hi ;Reload const array registers + movdqa xtmpl2, xgft2_lo + movdqa xtmph3, xgft3_hi ;Reload const array registers + movdqa xtmpl3, xgft3_lo + + XLDR xd1, [dest1+pos] ;Get next dest vector + + movdqa xtmpa, x0 ;Keep unshifted copy of src + psraw x0, 4 ;Shift to put high nibble into bits 4-0 + pand x0, xmask0f ;Mask high src nibble in bits 4-0 + pand xtmpa, xmask0f ;Mask low src nibble in bits 4-0 + + ; dest1 + pshufb xtmph1, x0 ;Lookup mul table of high nibble + pshufb xtmpl1, xtmpa ;Lookup mul table of low nibble + pxor xtmph1, xtmpl1 ;GF add high and low partials + pxor xd1, xtmph1 + + XLDR xd2, [dest2+pos] ;reuse xtmpl1. Get next dest vector + XLDR xd3, [dest3+pos] ;reuse xtmph1. Get next dest vector + + ; dest2 + pshufb xtmph2, x0 ;Lookup mul table of high nibble + pshufb xtmpl2, xtmpa ;Lookup mul table of low nibble + pxor xtmph2, xtmpl2 ;GF add high and low partials + pxor xd2, xtmph2 + + ; dest3 + pshufb xtmph3, x0 ;Lookup mul table of high nibble + pshufb xtmpl3, xtmpa ;Lookup mul table of low nibble + pxor xtmph3, xtmpl3 ;GF add high and low partials + pxor xd3, xtmph3 + + XSTR [dest1+pos], xd1 ;Store result + XSTR [dest2+pos], xd2 ;Store result + XSTR [dest3+pos], xd3 ;Store result + + add pos, 16 ;Loop on 16 bytes at a time + cmp pos, len + jle .loop16 + + lea tmp, [len + 16] + cmp pos, tmp + je .return_pass + +.lessthan16: + ;; Tail len + ;; Do one more overlap pass + mov tmp, len ;Overlapped offset length-16 + + XLDR x0, [src+tmp] ;Get next source vector + XLDR xd1, [dest1+tmp] ;Get next dest vector + XLDR xd2, [dest2+tmp] ;reuse xtmpl1. Get next dest vector + XLDR xd3, [dest3+tmp] ;reuse xtmph1. Get next dest vector + + sub len, pos + + movdqa xtmph3, [constip16] ;Load const of i + 16 + pinsrb xtmpl3, len.w, 15 + pshufb xtmpl3, xmask0f ;Broadcast len to all bytes + pcmpgtb xtmpl3, xtmph3 + + movdqa xtmpa, x0 ;Keep unshifted copy of src + psraw x0, 4 ;Shift to put high nibble into bits 4-0 + pand x0, xmask0f ;Mask high src nibble in bits 4-0 + pand xtmpa, xmask0f ;Mask low src nibble in bits 4-0 + + ; dest1 + pshufb xgft1_hi, x0 ;Lookup mul table of high nibble + pshufb xgft1_lo, xtmpa ;Lookup mul table of low nibble + pxor xgft1_hi, xgft1_lo ;GF add high and low partials + pand xgft1_hi, xtmpl3 + pxor xd1, xgft1_hi + + ; dest2 + pshufb xgft2_hi, x0 ;Lookup mul table of high nibble + pshufb xgft2_lo, xtmpa ;Lookup mul table of low nibble + pxor xgft2_hi, xgft2_lo ;GF add high and low partials + pand xgft2_hi, xtmpl3 + pxor xd2, xgft2_hi + + ; dest3 + pshufb xgft3_hi, x0 ;Lookup mul table of high nibble + pshufb xgft3_lo, xtmpa ;Lookup mul table of low nibble + pxor xgft3_hi, xgft3_lo ;GF add high and low partials + pand xgft3_hi, xtmpl3 + pxor xd3, xgft3_hi + + XSTR [dest1+tmp], xd1 ;Store result + XSTR [dest2+tmp], xd2 ;Store result + XSTR [dest3+tmp], xd3 ;Store result + +.return_pass: + FUNC_RESTORE + mov return, 0 + ret + +.return_fail: + FUNC_RESTORE + mov return, 1 + ret + +endproc_frame + +section .data + +align 16 + +mask0f: + dq 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f +constip16: + dq 0xf8f9fafbfcfdfeff, 0xf0f1f2f3f4f5f6f7 + +;;; func core, ver, snum +slversion gf_3vect_mad_sse, 00, 01, 0206 diff --git a/src/spdk/isa-l/erasure_code/gf_4vect_dot_prod_avx.asm b/src/spdk/isa-l/erasure_code/gf_4vect_dot_prod_avx.asm new file mode 100644 index 000000000..ccfc8ce53 --- /dev/null +++ b/src/spdk/isa-l/erasure_code/gf_4vect_dot_prod_avx.asm @@ -0,0 +1,441 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2015 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;;; +;;; gf_4vect_dot_prod_avx(len, vec, *g_tbls, **buffs, **dests); +;;; + +%include "reg_sizes.asm" + +%ifidn __OUTPUT_FORMAT__, elf64 + %define arg0 rdi + %define arg1 rsi + %define arg2 rdx + %define arg3 rcx + %define arg4 r8 + %define arg5 r9 + + %define tmp r11 + %define tmp2 r10 + %define tmp3 r13 ; must be saved and restored + %define tmp4 r12 ; must be saved and restored + %define tmp5 r14 ; must be saved and restored + %define tmp6 r15 ; must be saved and restored + %define return rax + %macro SLDR 2 + %endmacro + %define SSTR SLDR + %define PS 8 + %define LOG_PS 3 + + %define func(x) x: + %macro FUNC_SAVE 0 + push r12 + push r13 + push r14 + push r15 + %endmacro + %macro FUNC_RESTORE 0 + pop r15 + pop r14 + pop r13 + pop r12 + %endmacro +%endif + +%ifidn __OUTPUT_FORMAT__, win64 + %define arg0 rcx + %define arg1 rdx + %define arg2 r8 + %define arg3 r9 + + %define arg4 r12 ; must be saved, loaded and restored + %define arg5 r15 ; must be saved and restored + %define tmp r11 + %define tmp2 r10 + %define tmp3 r13 ; must be saved and restored + %define tmp4 r14 ; must be saved and restored + %define tmp5 rdi ; must be saved and restored + %define tmp6 rsi ; must be saved and restored + %define return rax + %macro SLDR 2 + %endmacro + %define SSTR SLDR + %define PS 8 + %define LOG_PS 3 + %define stack_size 9*16 + 7*8 ; must be an odd multiple of 8 + %define arg(x) [rsp + stack_size + PS + PS*x] + + %define func(x) proc_frame x + %macro FUNC_SAVE 0 + alloc_stack stack_size + save_xmm128 xmm6, 0*16 + save_xmm128 xmm7, 1*16 + save_xmm128 xmm8, 2*16 + save_xmm128 xmm9, 3*16 + save_xmm128 xmm10, 4*16 + save_xmm128 xmm11, 5*16 + save_xmm128 xmm12, 6*16 + save_xmm128 xmm13, 7*16 + save_xmm128 xmm14, 8*16 + save_reg r12, 9*16 + 0*8 + save_reg r13, 9*16 + 1*8 + save_reg r14, 9*16 + 2*8 + save_reg r15, 9*16 + 3*8 + save_reg rdi, 9*16 + 4*8 + save_reg rsi, 9*16 + 5*8 + end_prolog + mov arg4, arg(4) + %endmacro + + %macro FUNC_RESTORE 0 + vmovdqa xmm6, [rsp + 0*16] + vmovdqa xmm7, [rsp + 1*16] + vmovdqa xmm8, [rsp + 2*16] + vmovdqa xmm9, [rsp + 3*16] + vmovdqa xmm10, [rsp + 4*16] + vmovdqa xmm11, [rsp + 5*16] + vmovdqa xmm12, [rsp + 6*16] + vmovdqa xmm13, [rsp + 7*16] + vmovdqa xmm14, [rsp + 8*16] + mov r12, [rsp + 9*16 + 0*8] + mov r13, [rsp + 9*16 + 1*8] + mov r14, [rsp + 9*16 + 2*8] + mov r15, [rsp + 9*16 + 3*8] + mov rdi, [rsp + 9*16 + 4*8] + mov rsi, [rsp + 9*16 + 5*8] + add rsp, stack_size + %endmacro +%endif + +%ifidn __OUTPUT_FORMAT__, elf32 + +;;;================== High Address; +;;; arg4 +;;; arg3 +;;; arg2 +;;; arg1 +;;; arg0 +;;; return +;;;<================= esp of caller +;;; ebp +;;;<================= ebp = esp +;;; var0 +;;; var1 +;;; var2 +;;; var3 +;;; esi +;;; edi +;;; ebx +;;;<================= esp of callee +;;; +;;;================== Low Address; + + %define PS 4 + %define LOG_PS 2 + %define func(x) x: + %define arg(x) [ebp + PS*2 + PS*x] + %define var(x) [ebp - PS - PS*x] + + %define trans ecx + %define trans2 esi + %define arg0 trans ;trans and trans2 are for the variables in stack + %define arg0_m arg(0) + %define arg1 ebx + %define arg2 arg2_m + %define arg2_m arg(2) + %define arg3 trans + %define arg3_m arg(3) + %define arg4 trans + %define arg4_m arg(4) + %define arg5 trans2 + %define tmp edx + %define tmp2 edi + %define tmp3 trans2 + %define tmp3_m var(0) + %define tmp4 trans2 + %define tmp4_m var(1) + %define tmp5 trans2 + %define tmp5_m var(2) + %define tmp6 trans2 + %define tmp6_m var(3) + %define return eax + %macro SLDR 2 ;stack load/restore + mov %1, %2 + %endmacro + %define SSTR SLDR + + %macro FUNC_SAVE 0 + push ebp + mov ebp, esp + sub esp, PS*4 ;4 local variables + push esi + push edi + push ebx + mov arg1, arg(1) + %endmacro + + %macro FUNC_RESTORE 0 + pop ebx + pop edi + pop esi + add esp, PS*4 ;4 local variables + pop ebp + %endmacro + +%endif ; output formats + +%define len arg0 +%define vec arg1 +%define mul_array arg2 +%define src arg3 +%define dest1 arg4 +%define ptr arg5 +%define vec_i tmp2 +%define dest2 tmp3 +%define dest3 tmp4 +%define dest4 tmp5 +%define vskip3 tmp6 +%define pos return + + %ifidn PS,4 ;32-bit code + %define len_m arg0_m + %define src_m arg3_m + %define dest1_m arg4_m + %define dest2_m tmp3_m + %define dest3_m tmp4_m + %define dest4_m tmp5_m + %define vskip3_m tmp6_m + %endif + +%ifndef EC_ALIGNED_ADDR +;;; Use Un-aligned load/store + %define XLDR vmovdqu + %define XSTR vmovdqu +%else +;;; Use Non-temporal load/stor + %ifdef NO_NT_LDST + %define XLDR vmovdqa + %define XSTR vmovdqa + %else + %define XLDR vmovntdqa + %define XSTR vmovntdq + %endif +%endif + +%ifidn PS,8 ; 64-bit code + default rel + [bits 64] +%endif + + +section .text + +%ifidn PS,8 ;64-bit code + %define xmask0f xmm14 + %define xgft1_lo xmm13 + %define xgft1_hi xmm12 + %define xgft2_lo xmm11 + %define xgft2_hi xmm10 + %define xgft3_lo xmm9 + %define xgft3_hi xmm8 + %define xgft4_lo xmm7 + %define xgft4_hi xmm6 + + %define x0 xmm0 + %define xtmpa xmm1 + %define xp1 xmm2 + %define xp2 xmm3 + %define xp3 xmm4 + %define xp4 xmm5 +%else + %define xmm_trans xmm7 ;reuse xmask0f and xgft1_lo + %define xmask0f xmm_trans + %define xgft1_lo xmm_trans + %define xgft1_hi xmm6 + %define xgft2_lo xgft1_lo + %define xgft2_hi xgft1_hi + %define xgft3_lo xgft1_lo + %define xgft3_hi xgft1_hi + %define xgft4_lo xgft1_lo + %define xgft4_hi xgft1_hi + + %define x0 xmm0 + %define xtmpa xmm1 + %define xp1 xmm2 + %define xp2 xmm3 + %define xp3 xmm4 + %define xp4 xmm5 +%endif +align 16 +global gf_4vect_dot_prod_avx:ISAL_SYM_TYPE_FUNCTION +func(gf_4vect_dot_prod_avx) + FUNC_SAVE + SLDR len, len_m + sub len, 16 + SSTR len_m, len + jl .return_fail + xor pos, pos + vmovdqa xmask0f, [mask0f] ;Load mask of lower nibble in each byte + mov vskip3, vec + imul vskip3, 96 + SSTR vskip3_m, vskip3 + sal vec, LOG_PS ;vec *= PS. Make vec_i count by PS + SLDR dest1, dest1_m + mov dest2, [dest1+PS] + SSTR dest2_m, dest2 + mov dest3, [dest1+2*PS] + SSTR dest3_m, dest3 + mov dest4, [dest1+3*PS] + SSTR dest4_m, dest4 + mov dest1, [dest1] + SSTR dest1_m, dest1 + +.loop16: + vpxor xp1, xp1 + vpxor xp2, xp2 + vpxor xp3, xp3 + vpxor xp4, xp4 + mov tmp, mul_array + xor vec_i, vec_i + +.next_vect: + SLDR src, src_m + mov ptr, [src+vec_i] + + %ifidn PS,8 ;64-bit code + vmovdqu xgft1_lo, [tmp] ;Load array Ax{00}, Ax{01}, ..., Ax{0f} + vmovdqu xgft1_hi, [tmp+16] ; " Ax{00}, Ax{10}, ..., Ax{f0} + vmovdqu xgft2_lo, [tmp+vec*(32/PS)] ;Load array Bx{00}, Bx{01}, ..., Bx{0f} + vmovdqu xgft2_hi, [tmp+vec*(32/PS)+16] ; " Bx{00}, Bx{10}, ..., Bx{f0} + vmovdqu xgft3_lo, [tmp+vec*(64/PS)] ;Load array Cx{00}, Cx{01}, ..., Cx{0f} + vmovdqu xgft3_hi, [tmp+vec*(64/PS)+16] ; " Cx{00}, Cx{10}, ..., Cx{f0} + vmovdqu xgft4_lo, [tmp+vskip3] ;Load array Dx{00}, Dx{01}, ..., Dx{0f} + vmovdqu xgft4_hi, [tmp+vskip3+16] ; " Dx{00}, Dx{10}, ..., Dx{f0} + + XLDR x0, [ptr+pos] ;Get next source vector + add tmp, 32 + add vec_i, PS + + vpand xtmpa, x0, xmask0f ;Mask low src nibble in bits 4-0 + vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0 + vpand x0, x0, xmask0f ;Mask high src nibble in bits 4-0 + %else ;32-bit code + XLDR x0, [ptr+pos] ;Get next source vector + vmovdqa xmask0f, [mask0f] ;Load mask of lower nibble in each byte + + vpand xtmpa, x0, xmask0f ;Mask low src nibble in bits 4-0 + vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0 + vpand x0, x0, xmask0f ;Mask high src nibble in bits 4-0 + + vmovdqu xgft1_lo, [tmp] ;Load array Ax{00}, Ax{01}, ..., Ax{0f} + vmovdqu xgft1_hi, [tmp+16] ; " Ax{00}, Ax{10}, ..., Ax{f0} + %endif + + vpshufb xgft1_hi, x0 ;Lookup mul table of high nibble + vpshufb xgft1_lo, xtmpa ;Lookup mul table of low nibble + vpxor xgft1_hi, xgft1_lo ;GF add high and low partials + vpxor xp1, xgft1_hi ;xp1 += partial + + %ifidn PS,4 ;32-bit code + vmovdqu xgft2_lo, [tmp+vec*(32/PS)] ;Load array Bx{00}, Bx{01}, ..., Bx{0f} + vmovdqu xgft2_hi, [tmp+vec*(32/PS)+16] ; " Bx{00}, Bx{10}, ..., Bx{f0} + %endif + vpshufb xgft2_hi, x0 ;Lookup mul table of high nibble + vpshufb xgft2_lo, xtmpa ;Lookup mul table of low nibble + vpxor xgft2_hi, xgft2_lo ;GF add high and low partials + vpxor xp2, xgft2_hi ;xp2 += partial + + %ifidn PS,4 ;32-bit code + sal vec, 1 + vmovdqu xgft3_lo, [tmp+vec*(32/PS)] ;Load array Cx{00}, Cx{01}, ..., Cx{0f} + vmovdqu xgft3_hi, [tmp+vec*(32/PS)+16] ; " Cx{00}, Cx{10}, ..., Cx{f0} + sar vec, 1 + %endif + vpshufb xgft3_hi, x0 ;Lookup mul table of high nibble + vpshufb xgft3_lo, xtmpa ;Lookup mul table of low nibble + vpxor xgft3_hi, xgft3_lo ;GF add high and low partials + vpxor xp3, xgft3_hi ;xp3 += partial + + %ifidn PS,4 ;32-bit code + SLDR vskip3, vskip3_m + vmovdqu xgft4_lo, [tmp+vskip3] ;Load array Dx{00}, Dx{01}, ..., Dx{0f} + vmovdqu xgft4_hi, [tmp+vskip3+16] ; " Dx{00}, Dx{10}, ..., Dx{f0} + add tmp, 32 + add vec_i, PS + %endif + vpshufb xgft4_hi, x0 ;Lookup mul table of high nibble + vpshufb xgft4_lo, xtmpa ;Lookup mul table of low nibble + vpxor xgft4_hi, xgft4_lo ;GF add high and low partials + vpxor xp4, xgft4_hi ;xp4 += partial + + cmp vec_i, vec + jl .next_vect + + SLDR dest1, dest1_m + SLDR dest2, dest2_m + XSTR [dest1+pos], xp1 + XSTR [dest2+pos], xp2 + SLDR dest3, dest3_m + XSTR [dest3+pos], xp3 + SLDR dest4, dest4_m + XSTR [dest4+pos], xp4 + + SLDR len, len_m + add pos, 16 ;Loop on 16 bytes at a time + cmp pos, len + jle .loop16 + + lea tmp, [len + 16] + cmp pos, tmp + je .return_pass + + ;; Tail len + mov pos, len ;Overlapped offset length-16 + jmp .loop16 ;Do one more overlap pass + +.return_pass: + mov return, 0 + FUNC_RESTORE + ret + +.return_fail: + mov return, 1 + FUNC_RESTORE + ret + +endproc_frame + +section .data + +align 16 +mask0f: dq 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f + +;;; func core, ver, snum +slversion gf_4vect_dot_prod_avx, 02, 05, 0193 diff --git a/src/spdk/isa-l/erasure_code/gf_4vect_dot_prod_avx2.asm b/src/spdk/isa-l/erasure_code/gf_4vect_dot_prod_avx2.asm new file mode 100644 index 000000000..181a18d9d --- /dev/null +++ b/src/spdk/isa-l/erasure_code/gf_4vect_dot_prod_avx2.asm @@ -0,0 +1,460 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2015 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;;; +;;; gf_4vect_dot_prod_avx2(len, vec, *g_tbls, **buffs, **dests); +;;; + +%include "reg_sizes.asm" + +%ifidn __OUTPUT_FORMAT__, elf64 + %define arg0 rdi + %define arg1 rsi + %define arg2 rdx + %define arg3 rcx + %define arg4 r8 + %define arg5 r9 + + %define tmp r11 + %define tmp.w r11d + %define tmp.b r11b + %define tmp2 r10 + %define tmp3 r13 ; must be saved and restored + %define tmp4 r12 ; must be saved and restored + %define tmp5 r14 ; must be saved and restored + %define tmp6 r15 ; must be saved and restored + %define return rax + %macro SLDR 2 + %endmacro + %define SSTR SLDR + %define PS 8 + %define LOG_PS 3 + + %define func(x) x: + %macro FUNC_SAVE 0 + push r12 + push r13 + push r14 + push r15 + %endmacro + %macro FUNC_RESTORE 0 + pop r15 + pop r14 + pop r13 + pop r12 + %endmacro +%endif + +%ifidn __OUTPUT_FORMAT__, win64 + %define arg0 rcx + %define arg1 rdx + %define arg2 r8 + %define arg3 r9 + + %define arg4 r12 ; must be saved, loaded and restored + %define arg5 r15 ; must be saved and restored + %define tmp r11 + %define tmp.w r11d + %define tmp.b r11b + %define tmp2 r10 + %define tmp3 r13 ; must be saved and restored + %define tmp4 r14 ; must be saved and restored + %define tmp5 rdi ; must be saved and restored + %define tmp6 rsi ; must be saved and restored + %define return rax + %macro SLDR 2 + %endmacro + %define SSTR SLDR + %define PS 8 + %define LOG_PS 3 + %define stack_size 9*16 + 7*8 ; must be an odd multiple of 8 + %define arg(x) [rsp + stack_size + PS + PS*x] + + %define func(x) proc_frame x + %macro FUNC_SAVE 0 + alloc_stack stack_size + vmovdqa [rsp + 0*16], xmm6 + vmovdqa [rsp + 1*16], xmm7 + vmovdqa [rsp + 2*16], xmm8 + vmovdqa [rsp + 3*16], xmm9 + vmovdqa [rsp + 4*16], xmm10 + vmovdqa [rsp + 5*16], xmm11 + vmovdqa [rsp + 6*16], xmm12 + vmovdqa [rsp + 7*16], xmm13 + vmovdqa [rsp + 8*16], xmm14 + save_reg r12, 9*16 + 0*8 + save_reg r13, 9*16 + 1*8 + save_reg r14, 9*16 + 2*8 + save_reg r15, 9*16 + 3*8 + save_reg rdi, 9*16 + 4*8 + save_reg rsi, 9*16 + 5*8 + end_prolog + mov arg4, arg(4) + %endmacro + + %macro FUNC_RESTORE 0 + vmovdqa xmm6, [rsp + 0*16] + vmovdqa xmm7, [rsp + 1*16] + vmovdqa xmm8, [rsp + 2*16] + vmovdqa xmm9, [rsp + 3*16] + vmovdqa xmm10, [rsp + 4*16] + vmovdqa xmm11, [rsp + 5*16] + vmovdqa xmm12, [rsp + 6*16] + vmovdqa xmm13, [rsp + 7*16] + vmovdqa xmm14, [rsp + 8*16] + mov r12, [rsp + 9*16 + 0*8] + mov r13, [rsp + 9*16 + 1*8] + mov r14, [rsp + 9*16 + 2*8] + mov r15, [rsp + 9*16 + 3*8] + mov rdi, [rsp + 9*16 + 4*8] + mov rsi, [rsp + 9*16 + 5*8] + add rsp, stack_size + %endmacro +%endif + +%ifidn __OUTPUT_FORMAT__, elf32 + +;;;================== High Address; +;;; arg4 +;;; arg3 +;;; arg2 +;;; arg1 +;;; arg0 +;;; return +;;;<================= esp of caller +;;; ebp +;;;<================= ebp = esp +;;; var0 +;;; var1 +;;; var2 +;;; var3 +;;; esi +;;; edi +;;; ebx +;;;<================= esp of callee +;;; +;;;================== Low Address; + + %define PS 4 + %define LOG_PS 2 + %define func(x) x: + %define arg(x) [ebp + PS*2 + PS*x] + %define var(x) [ebp - PS - PS*x] + + %define trans ecx + %define trans2 esi + %define arg0 trans ;trans and trans2 are for the variables in stack + %define arg0_m arg(0) + %define arg1 ebx + %define arg2 arg2_m + %define arg2_m arg(2) + %define arg3 trans + %define arg3_m arg(3) + %define arg4 trans + %define arg4_m arg(4) + %define arg5 trans2 + %define tmp edx + %define tmp.w edx + %define tmp.b dl + %define tmp2 edi + %define tmp3 trans2 + %define tmp3_m var(0) + %define tmp4 trans2 + %define tmp4_m var(1) + %define tmp5 trans2 + %define tmp5_m var(2) + %define tmp6 trans2 + %define tmp6_m var(3) + %define return eax + %macro SLDR 2 ;stack load/restore + mov %1, %2 + %endmacro + %define SSTR SLDR + + %macro FUNC_SAVE 0 + push ebp + mov ebp, esp + sub esp, PS*4 ;4 local variables + push esi + push edi + push ebx + mov arg1, arg(1) + %endmacro + + %macro FUNC_RESTORE 0 + pop ebx + pop edi + pop esi + add esp, PS*4 ;4 local variables + pop ebp + %endmacro + +%endif ; output formats + +%define len arg0 +%define vec arg1 +%define mul_array arg2 +%define src arg3 +%define dest1 arg4 +%define ptr arg5 +%define vec_i tmp2 +%define dest2 tmp3 +%define dest3 tmp4 +%define dest4 tmp5 +%define vskip3 tmp6 +%define pos return + + %ifidn PS,4 ;32-bit code + %define len_m arg0_m + %define src_m arg3_m + %define dest1_m arg4_m + %define dest2_m tmp3_m + %define dest3_m tmp4_m + %define dest4_m tmp5_m + %define vskip3_m tmp6_m + %endif + +%ifndef EC_ALIGNED_ADDR +;;; Use Un-aligned load/store + %define XLDR vmovdqu + %define XSTR vmovdqu +%else +;;; Use Non-temporal load/stor + %ifdef NO_NT_LDST + %define XLDR vmovdqa + %define XSTR vmovdqa + %else + %define XLDR vmovntdqa + %define XSTR vmovntdq + %endif +%endif + +%ifidn PS,8 ;64-bit code + default rel + [bits 64] +%endif + + +section .text + +%ifidn PS,8 ;64-bit code + %define xmask0f ymm14 + %define xmask0fx xmm14 + %define xgft1_lo ymm13 + %define xgft1_hi ymm12 + %define xgft2_lo ymm11 + %define xgft2_hi ymm10 + %define xgft3_lo ymm9 + %define xgft3_hi ymm8 + %define xgft4_lo ymm7 + %define xgft4_hi ymm6 + + %define x0 ymm0 + %define xtmpa ymm1 + %define xp1 ymm2 + %define xp2 ymm3 + %define xp3 ymm4 + %define xp4 ymm5 +%else + %define ymm_trans ymm7 ;reuse xmask0f and xgft1_hi + %define xmask0f ymm_trans + %define xmask0fx xmm7 + %define xgft1_lo ymm6 + %define xgft1_hi ymm_trans + %define xgft2_lo xgft1_lo + %define xgft2_hi xgft1_hi + %define xgft3_lo xgft1_lo + %define xgft3_hi xgft1_hi + %define xgft4_lo xgft1_lo + %define xgft4_hi xgft1_hi + + %define x0 ymm0 + %define xtmpa ymm1 + %define xp1 ymm2 + %define xp2 ymm3 + %define xp3 ymm4 + %define xp4 ymm5 +%endif +align 16 +global gf_4vect_dot_prod_avx2:ISAL_SYM_TYPE_FUNCTION +func(gf_4vect_dot_prod_avx2) + FUNC_SAVE + SLDR len, len_m + sub len, 32 + SSTR len_m, len + jl .return_fail + xor pos, pos + mov tmp.b, 0x0f + vpinsrb xmask0fx, xmask0fx, tmp.w, 0 + vpbroadcastb xmask0f, xmask0fx ;Construct mask 0x0f0f0f... + mov vskip3, vec + imul vskip3, 96 + SSTR vskip3_m, vskip3 + sal vec, LOG_PS ;vec *= PS. Make vec_i count by PS + SLDR dest1, dest1_m + mov dest2, [dest1+PS] + SSTR dest2_m, dest2 + mov dest3, [dest1+2*PS] + SSTR dest3_m, dest3 + mov dest4, [dest1+3*PS] + SSTR dest4_m, dest4 + mov dest1, [dest1] + SSTR dest1_m, dest1 + +.loop32: + vpxor xp1, xp1 + vpxor xp2, xp2 + vpxor xp3, xp3 + vpxor xp4, xp4 + mov tmp, mul_array + xor vec_i, vec_i + +.next_vect: + SLDR src, src_m + mov ptr, [src+vec_i] + XLDR x0, [ptr+pos] ;Get next source vector + + add vec_i, PS + %ifidn PS,8 ;64-bit code + vpand xgft4_lo, x0, xmask0f ;Mask low src nibble in bits 4-0 + vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0 + vpand x0, x0, xmask0f ;Mask high src nibble in bits 4-0 + vperm2i128 xtmpa, xgft4_lo, x0, 0x30 ;swap xtmpa from 1lo|2lo to 1lo|2hi + vperm2i128 x0, xgft4_lo, x0, 0x12 ;swap x0 from 1hi|2hi to 1hi|2lo + + vmovdqu xgft1_lo, [tmp] ;Load array Ax{00}, Ax{01}, ..., Ax{0f} + ; " Ax{00}, Ax{10}, ..., Ax{f0} + vmovdqu xgft2_lo, [tmp+vec*(32/PS)] ;Load array Bx{00}, Bx{01}, ..., Bx{0f} + ; " Bx{00}, Bx{10}, ..., Bx{f0} + vmovdqu xgft3_lo, [tmp+vec*(64/PS)] ;Load array Cx{00}, Cx{01}, ..., Cx{0f} + ; " Cx{00}, Cx{10}, ..., Cx{f0} + vmovdqu xgft4_lo, [tmp+vskip3] ;Load array Dx{00}, Dx{01}, ..., Dx{0f} + ; " Dx{00}, Dx{10}, ..., Dx{f0} + + vperm2i128 xgft1_hi, xgft1_lo, xgft1_lo, 0x01 ; swapped to hi | lo + vperm2i128 xgft2_hi, xgft2_lo, xgft2_lo, 0x01 ; swapped to hi | lo + vperm2i128 xgft3_hi, xgft3_lo, xgft3_lo, 0x01 ; swapped to hi | lo + vperm2i128 xgft4_hi, xgft4_lo, xgft4_lo, 0x01 ; swapped to hi | lo + add tmp, 32 + %else ;32-bit code + mov cl, 0x0f ;use ecx as a temp variable + vpinsrb xmask0fx, xmask0fx, ecx, 0 + vpbroadcastb xmask0f, xmask0fx ;Construct mask 0x0f0f0f... + + vpand xgft4_lo, x0, xmask0f ;Mask low src nibble in bits 4-0 + vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0 + vpand x0, x0, xmask0f ;Mask high src nibble in bits 4-0 + vperm2i128 xtmpa, xgft4_lo, x0, 0x30 ;swap xtmpa from 1lo|2lo to 1lo|2hi + vperm2i128 x0, xgft4_lo, x0, 0x12 ;swap x0 from 1hi|2hi to 1hi|2lo + + vmovdqu xgft1_lo, [tmp] ;Load array Ax{00}, Ax{01}, ..., Ax{0f} + ; " Ax{00}, Ax{10}, ..., Ax{f0} + vperm2i128 xgft1_hi, xgft1_lo, xgft1_lo, 0x01 ; swapped to hi | lo + %endif + + vpshufb xgft1_hi, x0 ;Lookup mul table of high nibble + vpshufb xgft1_lo, xtmpa ;Lookup mul table of low nibble + vpxor xgft1_hi, xgft1_lo ;GF add high and low partials + vpxor xp1, xgft1_hi ;xp1 += partial + + %ifidn PS,4 ; 32-bit code + vmovdqu xgft2_lo, [tmp+vec*(32/PS)] ;Load array Bx{00}, Bx{01}, ..., Bx{0f} + ; " Bx{00}, Bx{10}, ..., Bx{f0} + vperm2i128 xgft2_hi, xgft2_lo, xgft2_lo, 0x01 ; swapped to hi | lo + %endif + vpshufb xgft2_hi, x0 ;Lookup mul table of high nibble + vpshufb xgft2_lo, xtmpa ;Lookup mul table of low nibble + vpxor xgft2_hi, xgft2_lo ;GF add high and low partials + vpxor xp2, xgft2_hi ;xp2 += partial + + %ifidn PS,4 ; 32-bit code + sal vec, 1 + vmovdqu xgft3_lo, [tmp+vec*(32/PS)] ;Load array Cx{00}, Cx{01}, ..., Cx{0f} + ; " Cx{00}, Cx{10}, ..., Cx{f0} + vperm2i128 xgft3_hi, xgft3_lo, xgft3_lo, 0x01 ; swapped to hi | lo + sar vec, 1 + %endif + vpshufb xgft3_hi, x0 ;Lookup mul table of high nibble + vpshufb xgft3_lo, xtmpa ;Lookup mul table of low nibble + vpxor xgft3_hi, xgft3_lo ;GF add high and low partials + vpxor xp3, xgft3_hi ;xp3 += partial + + %ifidn PS,4 ; 32-bit code + SLDR vskip3, vskip3_m + vmovdqu xgft4_lo, [tmp+vskip3] ;Load array Dx{00}, Dx{01}, ..., Dx{0f} + ; " DX{00}, Dx{10}, ..., Dx{f0} + vperm2i128 xgft4_hi, xgft4_lo, xgft4_lo, 0x01 ; swapped to hi | lo + add tmp, 32 + %endif + vpshufb xgft4_hi, x0 ;Lookup mul table of high nibble + vpshufb xgft4_lo, xtmpa ;Lookup mul table of low nibble + vpxor xgft4_hi, xgft4_lo ;GF add high and low partials + vpxor xp4, xgft4_hi ;xp4 += partial + + cmp vec_i, vec + jl .next_vect + + SLDR dest1, dest1_m + SLDR dest2, dest2_m + XSTR [dest1+pos], xp1 + XSTR [dest2+pos], xp2 + SLDR dest3, dest3_m + XSTR [dest3+pos], xp3 + SLDR dest4, dest4_m + XSTR [dest4+pos], xp4 + + SLDR len, len_m + add pos, 32 ;Loop on 32 bytes at a time + cmp pos, len + jle .loop32 + + lea tmp, [len + 32] + cmp pos, tmp + je .return_pass + + ;; Tail len + mov pos, len ;Overlapped offset length-32 + jmp .loop32 ;Do one more overlap pass + +.return_pass: + mov return, 0 + FUNC_RESTORE + ret + +.return_fail: + mov return, 1 + FUNC_RESTORE + ret + +endproc_frame + +section .data + +;;; func core, ver, snum +slversion gf_4vect_dot_prod_avx2, 04, 05, 0198 diff --git a/src/spdk/isa-l/erasure_code/gf_4vect_dot_prod_avx512.asm b/src/spdk/isa-l/erasure_code/gf_4vect_dot_prod_avx512.asm new file mode 100644 index 000000000..92886782d --- /dev/null +++ b/src/spdk/isa-l/erasure_code/gf_4vect_dot_prod_avx512.asm @@ -0,0 +1,301 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2015 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;;; +;;; gf_4vect_dot_prod_avx512(len, vec, *g_tbls, **buffs, **dests); +;;; + +%include "reg_sizes.asm" + +%ifdef HAVE_AS_KNOWS_AVX512 + +%ifidn __OUTPUT_FORMAT__, elf64 + %define arg0 rdi + %define arg1 rsi + %define arg2 rdx + %define arg3 rcx + %define arg4 r8 + %define arg5 r9 + + %define tmp r11 + %define tmp.w r11d + %define tmp.b r11b + %define tmp2 r10 + %define tmp3 r13 ; must be saved and restored + %define tmp4 r12 ; must be saved and restored + %define tmp5 r14 ; must be saved and restored + %define tmp6 r15 ; must be saved and restored + %define return rax + %define PS 8 + %define LOG_PS 3 + + %define func(x) x: + %macro FUNC_SAVE 0 + push r12 + push r13 + push r14 + push r15 + %endmacro + %macro FUNC_RESTORE 0 + pop r15 + pop r14 + pop r13 + pop r12 + %endmacro +%endif + +%ifidn __OUTPUT_FORMAT__, win64 + %define arg0 rcx + %define arg1 rdx + %define arg2 r8 + %define arg3 r9 + + %define arg4 r12 ; must be saved, loaded and restored + %define arg5 r15 ; must be saved and restored + %define tmp r11 + %define tmp.w r11d + %define tmp.b r11b + %define tmp2 r10 + %define tmp3 r13 ; must be saved and restored + %define tmp4 r14 ; must be saved and restored + %define tmp5 rdi ; must be saved and restored + %define tmp6 rsi ; must be saved and restored + %define return rax + %define PS 8 + %define LOG_PS 3 + %define stack_size 9*16 + 7*8 ; must be an odd multiple of 8 + %define arg(x) [rsp + stack_size + PS + PS*x] + + %define func(x) proc_frame x + %macro FUNC_SAVE 0 + alloc_stack stack_size + vmovdqa [rsp + 0*16], xmm6 + vmovdqa [rsp + 1*16], xmm7 + vmovdqa [rsp + 2*16], xmm8 + vmovdqa [rsp + 3*16], xmm9 + vmovdqa [rsp + 4*16], xmm10 + vmovdqa [rsp + 5*16], xmm11 + vmovdqa [rsp + 6*16], xmm12 + vmovdqa [rsp + 7*16], xmm13 + vmovdqa [rsp + 8*16], xmm14 + save_reg r12, 9*16 + 0*8 + save_reg r13, 9*16 + 1*8 + save_reg r14, 9*16 + 2*8 + save_reg r15, 9*16 + 3*8 + save_reg rdi, 9*16 + 4*8 + save_reg rsi, 9*16 + 5*8 + end_prolog + mov arg4, arg(4) + %endmacro + + %macro FUNC_RESTORE 0 + vmovdqa xmm6, [rsp + 0*16] + vmovdqa xmm7, [rsp + 1*16] + vmovdqa xmm8, [rsp + 2*16] + vmovdqa xmm9, [rsp + 3*16] + vmovdqa xmm10, [rsp + 4*16] + vmovdqa xmm11, [rsp + 5*16] + vmovdqa xmm12, [rsp + 6*16] + vmovdqa xmm13, [rsp + 7*16] + vmovdqa xmm14, [rsp + 8*16] + mov r12, [rsp + 9*16 + 0*8] + mov r13, [rsp + 9*16 + 1*8] + mov r14, [rsp + 9*16 + 2*8] + mov r15, [rsp + 9*16 + 3*8] + mov rdi, [rsp + 9*16 + 4*8] + mov rsi, [rsp + 9*16 + 5*8] + add rsp, stack_size + %endmacro +%endif + + +%define len arg0 +%define vec arg1 +%define mul_array arg2 +%define src arg3 +%define dest1 arg4 +%define ptr arg5 +%define vec_i tmp2 +%define dest2 tmp3 +%define dest3 tmp4 +%define dest4 tmp5 +%define vskip3 tmp6 +%define pos return + + +%ifndef EC_ALIGNED_ADDR +;;; Use Un-aligned load/store + %define XLDR vmovdqu8 + %define XSTR vmovdqu8 +%else +;;; Use Non-temporal load/stor + %ifdef NO_NT_LDST + %define XLDR vmovdqa + %define XSTR vmovdqa + %else + %define XLDR vmovntdqa + %define XSTR vmovntdq + %endif +%endif + +%define xmask0f zmm14 +%define xgft1_lo zmm13 +%define xgft1_loy ymm13 +%define xgft1_hi zmm12 +%define xgft2_lo zmm11 +%define xgft2_loy ymm11 +%define xgft2_hi zmm10 +%define xgft3_lo zmm9 +%define xgft3_loy ymm9 +%define xgft3_hi zmm8 +%define xgft4_lo zmm7 +%define xgft4_loy ymm7 +%define xgft4_hi zmm6 + +%define x0 zmm0 +%define xtmpa zmm1 +%define xp1 zmm2 +%define xp2 zmm3 +%define xp3 zmm4 +%define xp4 zmm5 + +default rel +[bits 64] + +section .text + +align 16 +global gf_4vect_dot_prod_avx512:ISAL_SYM_TYPE_FUNCTION +func(gf_4vect_dot_prod_avx512) + FUNC_SAVE + sub len, 64 + jl .return_fail + + xor pos, pos + mov tmp, 0x0f + vpbroadcastb xmask0f, tmp ;Construct mask 0x0f0f0f... + mov vskip3, vec + imul vskip3, 96 + sal vec, LOG_PS ;vec *= PS. Make vec_i count by PS + mov dest2, [dest1+PS] + mov dest3, [dest1+2*PS] + mov dest4, [dest1+3*PS] + mov dest1, [dest1] + +.loop64: + vpxorq xp1, xp1, xp1 + vpxorq xp2, xp2, xp2 + vpxorq xp3, xp3, xp3 + vpxorq xp4, xp4, xp4 + mov tmp, mul_array + xor vec_i, vec_i + +.next_vect: + mov ptr, [src+vec_i] + XLDR x0, [ptr+pos] ;Get next source vector + add vec_i, PS + + vpandq xtmpa, x0, xmask0f ;Mask low src nibble in bits 4-0 + vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0 + vpandq x0, x0, xmask0f ;Mask high src nibble in bits 4-0 + + vmovdqu8 xgft1_loy, [tmp] ;Load array Ax{00}..{0f}, Ax{00}..{f0} + vmovdqu8 xgft2_loy, [tmp+vec*(32/PS)] ;Load array Bx{00}..{0f}, Bx{00}..{f0} + vmovdqu8 xgft3_loy, [tmp+vec*(64/PS)] ;Load array Cx{00}..{0f}, Cx{00}..{f0} + vmovdqu8 xgft4_loy, [tmp+vskip3] ;Load array Dx{00}..{0f}, Dx{00}..{f0} + add tmp, 32 + + vshufi64x2 xgft1_hi, xgft1_lo, xgft1_lo, 0x55 + vshufi64x2 xgft1_lo, xgft1_lo, xgft1_lo, 0x00 + vshufi64x2 xgft2_hi, xgft2_lo, xgft2_lo, 0x55 + vshufi64x2 xgft2_lo, xgft2_lo, xgft2_lo, 0x00 + + vpshufb xgft1_hi, xgft1_hi, x0 ;Lookup mul table of high nibble + vpshufb xgft1_lo, xgft1_lo, xtmpa ;Lookup mul table of low nibble + vpxorq xgft1_hi, xgft1_hi, xgft1_lo ;GF add high and low partials + vpxorq xp1, xp1, xgft1_hi ;xp1 += partial + + vpshufb xgft2_hi, xgft2_hi, x0 ;Lookup mul table of high nibble + vpshufb xgft2_lo, xgft2_lo, xtmpa ;Lookup mul table of low nibble + vpxorq xgft2_hi, xgft2_hi, xgft2_lo ;GF add high and low partials + vpxorq xp2, xp2, xgft2_hi ;xp2 += partial + + vshufi64x2 xgft3_hi, xgft3_lo, xgft3_lo, 0x55 + vshufi64x2 xgft3_lo, xgft3_lo, xgft3_lo, 0x00 + vshufi64x2 xgft4_hi, xgft4_lo, xgft4_lo, 0x55 + vshufi64x2 xgft4_lo, xgft4_lo, xgft4_lo, 0x00 + + vpshufb xgft3_hi, xgft3_hi, x0 ;Lookup mul table of high nibble + vpshufb xgft3_lo, xgft3_lo, xtmpa ;Lookup mul table of low nibble + vpxorq xgft3_hi, xgft3_hi, xgft3_lo ;GF add high and low partials + vpxorq xp3, xp3, xgft3_hi ;xp3 += partial + + vpshufb xgft4_hi, xgft4_hi, x0 ;Lookup mul table of high nibble + vpshufb xgft4_lo, xgft4_lo, xtmpa ;Lookup mul table of low nibble + vpxorq xgft4_hi, xgft4_hi, xgft4_lo ;GF add high and low partials + vpxorq xp4, xp4, xgft4_hi ;xp4 += partial + + cmp vec_i, vec + jl .next_vect + + XSTR [dest1+pos], xp1 + XSTR [dest2+pos], xp2 + XSTR [dest3+pos], xp3 + XSTR [dest4+pos], xp4 + + add pos, 64 ;Loop on 64 bytes at a time + cmp pos, len + jle .loop64 + + lea tmp, [len + 64] + cmp pos, tmp + je .return_pass + + ;; Tail len + mov pos, len ;Overlapped offset length-64 + jmp .loop64 ;Do one more overlap pass + +.return_pass: + mov return, 0 + FUNC_RESTORE + ret + +.return_fail: + mov return, 1 + FUNC_RESTORE + ret + +endproc_frame + +%else +%ifidn __OUTPUT_FORMAT__, win64 +global no_gf_4vect_dot_prod_avx512 +no_gf_4vect_dot_prod_avx512: +%endif +%endif ; ifdef HAVE_AS_KNOWS_AVX512 diff --git a/src/spdk/isa-l/erasure_code/gf_4vect_dot_prod_sse.asm b/src/spdk/isa-l/erasure_code/gf_4vect_dot_prod_sse.asm new file mode 100644 index 000000000..b32962490 --- /dev/null +++ b/src/spdk/isa-l/erasure_code/gf_4vect_dot_prod_sse.asm @@ -0,0 +1,443 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2015 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;;; +;;; gf_4vect_dot_prod_sse(len, vec, *g_tbls, **buffs, **dests); +;;; + +%include "reg_sizes.asm" + +%ifidn __OUTPUT_FORMAT__, elf64 + %define arg0 rdi + %define arg1 rsi + %define arg2 rdx + %define arg3 rcx + %define arg4 r8 + %define arg5 r9 + + %define tmp r11 + %define tmp2 r10 + %define tmp3 r13 ; must be saved and restored + %define tmp4 r12 ; must be saved and restored + %define tmp5 r14 ; must be saved and restored + %define tmp6 r15 ; must be saved and restored + %define return rax + %macro SLDR 2 + %endmacro + %define SSTR SLDR + %define PS 8 + %define LOG_PS 3 + + %define func(x) x: + %macro FUNC_SAVE 0 + push r12 + push r13 + push r14 + push r15 + %endmacro + %macro FUNC_RESTORE 0 + pop r15 + pop r14 + pop r13 + pop r12 + %endmacro +%endif + +%ifidn __OUTPUT_FORMAT__, win64 + %define arg0 rcx + %define arg1 rdx + %define arg2 r8 + %define arg3 r9 + + %define arg4 r12 ; must be saved, loaded and restored + %define arg5 r15 ; must be saved and restored + %define tmp r11 + %define tmp2 r10 + %define tmp3 r13 ; must be saved and restored + %define tmp4 r14 ; must be saved and restored + %define tmp5 rdi ; must be saved and restored + %define tmp6 rsi ; must be saved and restored + %define return rax + %macro SLDR 2 + %endmacro + %define SSTR SLDR + %define PS 8 + %define LOG_PS 3 + %define stack_size 9*16 + 7*8 ; must be an odd multiple of 8 + %define arg(x) [rsp + stack_size + PS + PS*x] + + %define func(x) proc_frame x + %macro FUNC_SAVE 0 + alloc_stack stack_size + save_xmm128 xmm6, 0*16 + save_xmm128 xmm7, 1*16 + save_xmm128 xmm8, 2*16 + save_xmm128 xmm9, 3*16 + save_xmm128 xmm10, 4*16 + save_xmm128 xmm11, 5*16 + save_xmm128 xmm12, 6*16 + save_xmm128 xmm13, 7*16 + save_xmm128 xmm14, 8*16 + save_reg r12, 9*16 + 0*8 + save_reg r13, 9*16 + 1*8 + save_reg r14, 9*16 + 2*8 + save_reg r15, 9*16 + 3*8 + save_reg rdi, 9*16 + 4*8 + save_reg rsi, 9*16 + 5*8 + end_prolog + mov arg4, arg(4) + %endmacro + + %macro FUNC_RESTORE 0 + movdqa xmm6, [rsp + 0*16] + movdqa xmm7, [rsp + 1*16] + movdqa xmm8, [rsp + 2*16] + movdqa xmm9, [rsp + 3*16] + movdqa xmm10, [rsp + 4*16] + movdqa xmm11, [rsp + 5*16] + movdqa xmm12, [rsp + 6*16] + movdqa xmm13, [rsp + 7*16] + movdqa xmm14, [rsp + 8*16] + mov r12, [rsp + 9*16 + 0*8] + mov r13, [rsp + 9*16 + 1*8] + mov r14, [rsp + 9*16 + 2*8] + mov r15, [rsp + 9*16 + 3*8] + mov rdi, [rsp + 9*16 + 4*8] + mov rsi, [rsp + 9*16 + 5*8] + add rsp, stack_size + %endmacro +%endif + +%ifidn __OUTPUT_FORMAT__, elf32 + +;;;================== High Address; +;;; arg4 +;;; arg3 +;;; arg2 +;;; arg1 +;;; arg0 +;;; return +;;;<================= esp of caller +;;; ebp +;;;<================= ebp = esp +;;; var0 +;;; var1 +;;; var2 +;;; var3 +;;; esi +;;; edi +;;; ebx +;;;<================= esp of callee +;;; +;;;================== Low Address; + + %define PS 4 + %define LOG_PS 2 + %define func(x) x: + %define arg(x) [ebp + PS*2 + PS*x] + %define var(x) [ebp - PS - PS*x] + + %define trans ecx + %define trans2 esi + %define arg0 trans ;trans and trans2 are for the variables in stack + %define arg0_m arg(0) + %define arg1 ebx + %define arg2 arg2_m + %define arg2_m arg(2) + %define arg3 trans + %define arg3_m arg(3) + %define arg4 trans + %define arg4_m arg(4) + %define arg5 trans2 + %define tmp edx + %define tmp2 edi + %define tmp3 trans2 + %define tmp3_m var(0) + %define tmp4 trans2 + %define tmp4_m var(1) + %define tmp5 trans2 + %define tmp5_m var(2) + %define tmp6 trans2 + %define tmp6_m var(3) + %define return eax + %macro SLDR 2 ;stack load/restore + mov %1, %2 + %endmacro + %define SSTR SLDR + + %macro FUNC_SAVE 0 + push ebp + mov ebp, esp + sub esp, PS*4 ;4 local variables + push esi + push edi + push ebx + mov arg1, arg(1) + %endmacro + + %macro FUNC_RESTORE 0 + pop ebx + pop edi + pop esi + add esp, PS*4 ;4 local variables + pop ebp + %endmacro + +%endif ; output formats + +%define len arg0 +%define vec arg1 +%define mul_array arg2 +%define src arg3 +%define dest1 arg4 +%define ptr arg5 +%define vec_i tmp2 +%define dest2 tmp3 +%define dest3 tmp4 +%define dest4 tmp5 +%define vskip3 tmp6 +%define pos return + + %ifidn PS,4 ;32-bit code + %define len_m arg0_m + %define src_m arg3_m + %define dest1_m arg4_m + %define dest2_m tmp3_m + %define dest3_m tmp4_m + %define dest4_m tmp5_m + %define vskip3_m tmp6_m + %endif + +%ifndef EC_ALIGNED_ADDR +;;; Use Un-aligned load/store + %define XLDR movdqu + %define XSTR movdqu +%else +;;; Use Non-temporal load/stor + %ifdef NO_NT_LDST + %define XLDR movdqa + %define XSTR movdqa + %else + %define XLDR movntdqa + %define XSTR movntdq + %endif +%endif + +%ifidn PS,8 ; 64-bit code + default rel + [bits 64] +%endif + + +section .text + +%ifidn PS,8 ;64-bit code + %define xmask0f xmm14 + %define xgft1_lo xmm2 + %define xgft1_hi xmm3 + %define xgft2_lo xmm11 + %define xgft2_hi xmm4 + %define xgft3_lo xmm9 + %define xgft3_hi xmm5 + %define xgft4_lo xmm7 + %define xgft4_hi xmm6 + + %define x0 xmm0 + %define xtmpa xmm1 + %define xp1 xmm8 + %define xp2 xmm10 + %define xp3 xmm12 + %define xp4 xmm13 +%else + %define xmm_trans xmm7 ;reuse xmask0f and xgft1_lo + %define xmask0f xmm_trans + %define xgft1_lo xmm_trans + %define xgft1_hi xmm6 + %define xgft2_lo xgft1_lo + %define xgft2_hi xgft1_hi + %define xgft3_lo xgft1_lo + %define xgft3_hi xgft1_hi + %define xgft4_lo xgft1_lo + %define xgft4_hi xgft1_hi + + %define x0 xmm0 + %define xtmpa xmm1 + %define xp1 xmm2 + %define xp2 xmm3 + %define xp3 xmm4 + %define xp4 xmm5 +%endif +align 16 +global gf_4vect_dot_prod_sse:ISAL_SYM_TYPE_FUNCTION +func(gf_4vect_dot_prod_sse) + FUNC_SAVE + SLDR len, len_m + sub len, 16 + SSTR len_m, len + jl .return_fail + xor pos, pos + movdqa xmask0f, [mask0f] ;Load mask of lower nibble in each byte + mov vskip3, vec + imul vskip3, 96 + SSTR vskip3_m, vskip3 + sal vec, LOG_PS ;vec *= PS. Make vec_i count by PS + SLDR dest1, dest1_m + mov dest2, [dest1+PS] + SSTR dest2_m, dest2 + mov dest3, [dest1+2*PS] + SSTR dest3_m, dest3 + mov dest4, [dest1+3*PS] + SSTR dest4_m, dest4 + mov dest1, [dest1] + SSTR dest1_m, dest1 + +.loop16: + pxor xp1, xp1 + pxor xp2, xp2 + pxor xp3, xp3 + pxor xp4, xp4 + mov tmp, mul_array + xor vec_i, vec_i + +.next_vect: + SLDR src, src_m + mov ptr, [src+vec_i] + + %ifidn PS,8 ;64-bit code + movdqu xgft1_lo, [tmp] ;Load array Ax{00}, Ax{01}, ..., Ax{0f} + movdqu xgft1_hi, [tmp+16] ; " Ax{00}, Ax{10}, ..., Ax{f0} + movdqu xgft2_lo, [tmp+vec*(32/PS)] ;Load array Bx{00}, Bx{01}, ..., Bx{0f} + movdqu xgft2_hi, [tmp+vec*(32/PS)+16] ; " Bx{00}, Bx{10}, ..., Bx{f0} + movdqu xgft3_lo, [tmp+vec*(64/PS)] ;Load array Cx{00}, Cx{01}, ..., Cx{0f} + movdqu xgft3_hi, [tmp+vec*(64/PS)+16] ; " Cx{00}, Cx{10}, ..., Cx{f0} + movdqu xgft4_lo, [tmp+vskip3] ;Load array Dx{00}, Dx{01}, ..., Dx{0f} + movdqu xgft4_hi, [tmp+vskip3+16] ; " Dx{00}, Dx{10}, ..., Dx{f0} + + XLDR x0, [ptr+pos] ;Get next source vector + add tmp, 32 + add vec_i, PS + + movdqa xtmpa, x0 ;Keep unshifted copy of src + psraw x0, 4 ;Shift to put high nibble into bits 4-0 + pand x0, xmask0f ;Mask high src nibble in bits 4-0 + pand xtmpa, xmask0f ;Mask low src nibble in bits 4-0 + %else ;32-bit code + XLDR x0, [ptr+pos] ;Get next source vector + movdqa xmask0f, [mask0f] ;Load mask of lower nibble in each byte + + movdqa xtmpa, x0 ;Keep unshifted copy of src + psraw x0, 4 ;Shift to put high nibble into bits 4-0 + pand x0, xmask0f ;Mask high src nibble in bits 4-0 + pand xtmpa, xmask0f ;Mask low src nibble in bits 4-0 + + movdqu xgft1_lo, [tmp] ;Load array Ax{00}, Ax{01}, ..., Ax{0f} + movdqu xgft1_hi, [tmp+16] ; " Ax{00}, Ax{10}, ..., Ax{f0} + %endif + + pshufb xgft1_hi, x0 ;Lookup mul table of high nibble + pshufb xgft1_lo, xtmpa ;Lookup mul table of low nibble + pxor xgft1_hi, xgft1_lo ;GF add high and low partials + pxor xp1, xgft1_hi ;xp1 += partial + + %ifidn PS,4 ;32-bit code + movdqu xgft2_lo, [tmp+vec*(32/PS)] ;Load array Bx{00}, Bx{01}, ..., Bx{0f} + movdqu xgft2_hi, [tmp+vec*(32/PS)+16] ; " Bx{00}, Bx{10}, ..., Bx{f0} + %endif + pshufb xgft2_hi, x0 ;Lookup mul table of high nibble + pshufb xgft2_lo, xtmpa ;Lookup mul table of low nibble + pxor xgft2_hi, xgft2_lo ;GF add high and low partials + pxor xp2, xgft2_hi ;xp2 += partial + + %ifidn PS,4 ;32-bit code + sal vec, 1 + movdqu xgft3_lo, [tmp+vec*(32/PS)] ;Load array Cx{00}, Cx{01}, ..., Cx{0f} + movdqu xgft3_hi, [tmp+vec*(32/PS)+16] ; " Cx{00}, Cx{10}, ..., Cx{f0} + sar vec, 1 + %endif + pshufb xgft3_hi, x0 ;Lookup mul table of high nibble + pshufb xgft3_lo, xtmpa ;Lookup mul table of low nibble + pxor xgft3_hi, xgft3_lo ;GF add high and low partials + pxor xp3, xgft3_hi ;xp3 += partial + + %ifidn PS,4 ;32-bit code + SLDR vskip3, vskip3_m + movdqu xgft4_lo, [tmp+vskip3] ;Load array Dx{00}, Dx{01}, ..., Dx{0f} + movdqu xgft4_hi, [tmp+vskip3+16] ; " Dx{00}, Dx{10}, ..., Dx{f0} + add tmp, 32 + add vec_i, PS + %endif + pshufb xgft4_hi, x0 ;Lookup mul table of high nibble + pshufb xgft4_lo, xtmpa ;Lookup mul table of low nibble + pxor xgft4_hi, xgft4_lo ;GF add high and low partials + pxor xp4, xgft4_hi ;xp4 += partial + + cmp vec_i, vec + jl .next_vect + + SLDR dest1, dest1_m + SLDR dest2, dest2_m + XSTR [dest1+pos], xp1 + XSTR [dest2+pos], xp2 + SLDR dest3, dest3_m + XSTR [dest3+pos], xp3 + SLDR dest4, dest4_m + XSTR [dest4+pos], xp4 + + SLDR len, len_m + add pos, 16 ;Loop on 16 bytes at a time + cmp pos, len + jle .loop16 + + lea tmp, [len + 16] + cmp pos, tmp + je .return_pass + + ;; Tail len + mov pos, len ;Overlapped offset length-16 + jmp .loop16 ;Do one more overlap pass + +.return_pass: + mov return, 0 + FUNC_RESTORE + ret + +.return_fail: + mov return, 1 + FUNC_RESTORE + ret + +endproc_frame + +section .data + +align 16 +mask0f: dq 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f + +;;; func core, ver, snum +slversion gf_4vect_dot_prod_sse, 00, 06, 0064 diff --git a/src/spdk/isa-l/erasure_code/gf_4vect_dot_prod_sse_test.c b/src/spdk/isa-l/erasure_code/gf_4vect_dot_prod_sse_test.c new file mode 100644 index 000000000..0352eefa8 --- /dev/null +++ b/src/spdk/isa-l/erasure_code/gf_4vect_dot_prod_sse_test.c @@ -0,0 +1,695 @@ +/********************************************************************** + Copyright(c) 2011-2015 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#include +#include +#include // for memset, memcmp +#include "erasure_code.h" +#include "types.h" + +#ifndef FUNCTION_UNDER_TEST +# define FUNCTION_UNDER_TEST gf_4vect_dot_prod_sse +#endif +#ifndef TEST_MIN_SIZE +# define TEST_MIN_SIZE 16 +#endif + +#define str(s) #s +#define xstr(s) str(s) + +#define TEST_LEN 8192 +#define TEST_SIZE (TEST_LEN/2) +#define TEST_MEM TEST_SIZE +#define TEST_LOOPS 10000 +#define TEST_TYPE_STR "" + +#ifndef TEST_SOURCES +# define TEST_SOURCES 16 +#endif +#ifndef RANDOMS +# define RANDOMS 20 +#endif + +#ifdef EC_ALIGNED_ADDR +// Define power of 2 range to check ptr, len alignment +# define PTR_ALIGN_CHK_B 0 +# define LEN_ALIGN_CHK_B 0 // 0 for aligned only +#else +// Define power of 2 range to check ptr, len alignment +# define PTR_ALIGN_CHK_B 32 +# define LEN_ALIGN_CHK_B 32 // 0 for aligned only +#endif + +typedef unsigned char u8; + +extern void FUNCTION_UNDER_TEST(int len, int vlen, unsigned char *gftbls, + unsigned char **src, unsigned char **dest); + +void dump(unsigned char *buf, int len) +{ + int i; + for (i = 0; i < len;) { + printf(" %2x", 0xff & buf[i++]); + if (i % 32 == 0) + printf("\n"); + } + printf("\n"); +} + +void dump_matrix(unsigned char **s, int k, int m) +{ + int i, j; + for (i = 0; i < k; i++) { + for (j = 0; j < m; j++) { + printf(" %2x", s[i][j]); + } + printf("\n"); + } + printf("\n"); +} + +void dump_u8xu8(unsigned char *s, int k, int m) +{ + int i, j; + for (i = 0; i < k; i++) { + for (j = 0; j < m; j++) { + printf(" %2x", 0xff & s[j + (i * m)]); + } + printf("\n"); + } + printf("\n"); +} + +int main(int argc, char *argv[]) +{ + int i, j, rtest, srcs; + void *buf; + u8 g1[TEST_SOURCES], g2[TEST_SOURCES], g3[TEST_SOURCES]; + u8 g4[TEST_SOURCES], g_tbls[4 * TEST_SOURCES * 32], *buffs[TEST_SOURCES]; + u8 *dest1, *dest2, *dest3, *dest4, *dest_ref1, *dest_ref2, *dest_ref3; + u8 *dest_ref4, *dest_ptrs[4]; + + int align, size; + unsigned char *efence_buffs[TEST_SOURCES]; + unsigned int offset; + u8 *ubuffs[TEST_SOURCES]; + u8 *udest_ptrs[4]; + printf(xstr(FUNCTION_UNDER_TEST) ": %dx%d ", TEST_SOURCES, TEST_LEN); + + // Allocate the arrays + for (i = 0; i < TEST_SOURCES; i++) { + if (posix_memalign(&buf, 64, TEST_LEN)) { + printf("alloc error: Fail"); + return -1; + } + buffs[i] = buf; + } + + if (posix_memalign(&buf, 64, TEST_LEN)) { + printf("alloc error: Fail"); + return -1; + } + dest1 = buf; + + if (posix_memalign(&buf, 64, TEST_LEN)) { + printf("alloc error: Fail"); + return -1; + } + dest2 = buf; + + if (posix_memalign(&buf, 64, TEST_LEN)) { + printf("alloc error: Fail"); + return -1; + } + dest3 = buf; + + if (posix_memalign(&buf, 64, TEST_LEN)) { + printf("alloc error: Fail"); + return -1; + } + dest4 = buf; + + if (posix_memalign(&buf, 64, TEST_LEN)) { + printf("alloc error: Fail"); + return -1; + } + dest_ref1 = buf; + + if (posix_memalign(&buf, 64, TEST_LEN)) { + printf("alloc error: Fail"); + return -1; + } + dest_ref2 = buf; + + if (posix_memalign(&buf, 64, TEST_LEN)) { + printf("alloc error: Fail"); + return -1; + } + dest_ref3 = buf; + + if (posix_memalign(&buf, 64, TEST_LEN)) { + printf("alloc error: Fail"); + return -1; + } + dest_ref4 = buf; + + dest_ptrs[0] = dest1; + dest_ptrs[1] = dest2; + dest_ptrs[2] = dest3; + dest_ptrs[3] = dest4; + + // Test of all zeros + for (i = 0; i < TEST_SOURCES; i++) + memset(buffs[i], 0, TEST_LEN); + + memset(dest1, 0, TEST_LEN); + memset(dest2, 0, TEST_LEN); + memset(dest3, 0, TEST_LEN); + memset(dest4, 0, TEST_LEN); + memset(dest_ref1, 0, TEST_LEN); + memset(dest_ref2, 0, TEST_LEN); + memset(dest_ref3, 0, TEST_LEN); + memset(dest_ref4, 0, TEST_LEN); + memset(g1, 2, TEST_SOURCES); + memset(g2, 1, TEST_SOURCES); + memset(g3, 7, TEST_SOURCES); + memset(g4, 3, TEST_SOURCES); + + for (i = 0; i < TEST_SOURCES; i++) { + gf_vect_mul_init(g1[i], &g_tbls[i * 32]); + gf_vect_mul_init(g2[i], &g_tbls[32 * TEST_SOURCES + i * 32]); + gf_vect_mul_init(g3[i], &g_tbls[64 * TEST_SOURCES + i * 32]); + gf_vect_mul_init(g4[i], &g_tbls[96 * TEST_SOURCES + i * 32]); + } + + gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref1); + gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[32 * TEST_SOURCES], buffs, + dest_ref2); + gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[64 * TEST_SOURCES], buffs, + dest_ref3); + gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[96 * TEST_SOURCES], buffs, + dest_ref4); + + FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest_ptrs); + + if (0 != memcmp(dest_ref1, dest1, TEST_LEN)) { + printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test1\n"); + dump_matrix(buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref1, 25); + printf("dprod_dut:"); + dump(dest1, 25); + return -1; + } + if (0 != memcmp(dest_ref2, dest2, TEST_LEN)) { + printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test2\n"); + dump_matrix(buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref2, 25); + printf("dprod_dut:"); + dump(dest2, 25); + return -1; + } + if (0 != memcmp(dest_ref3, dest3, TEST_LEN)) { + printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test3\n"); + dump_matrix(buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref3, 25); + printf("dprod_dut:"); + dump(dest3, 25); + return -1; + } + if (0 != memcmp(dest_ref4, dest4, TEST_LEN)) { + printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test4\n"); + dump_matrix(buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref4, 25); + printf("dprod_dut:"); + dump(dest4, 25); + return -1; + } + + putchar('.'); + + // Rand data test + + for (rtest = 0; rtest < RANDOMS; rtest++) { + for (i = 0; i < TEST_SOURCES; i++) + for (j = 0; j < TEST_LEN; j++) + buffs[i][j] = rand(); + + for (i = 0; i < TEST_SOURCES; i++) { + g1[i] = rand(); + g2[i] = rand(); + g3[i] = rand(); + g4[i] = rand(); + } + + for (i = 0; i < TEST_SOURCES; i++) { + gf_vect_mul_init(g1[i], &g_tbls[i * 32]); + gf_vect_mul_init(g2[i], &g_tbls[(32 * TEST_SOURCES) + (i * 32)]); + gf_vect_mul_init(g3[i], &g_tbls[(64 * TEST_SOURCES) + (i * 32)]); + gf_vect_mul_init(g4[i], &g_tbls[(96 * TEST_SOURCES) + (i * 32)]); + } + + gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref1); + gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[32 * TEST_SOURCES], + buffs, dest_ref2); + gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[64 * TEST_SOURCES], + buffs, dest_ref3); + gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[96 * TEST_SOURCES], + buffs, dest_ref4); + + FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest_ptrs); + + if (0 != memcmp(dest_ref1, dest1, TEST_LEN)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test1 %d\n", rtest); + dump_matrix(buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref1, 25); + printf("dprod_dut:"); + dump(dest1, 25); + return -1; + } + if (0 != memcmp(dest_ref2, dest2, TEST_LEN)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test2 %d\n", rtest); + dump_matrix(buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref2, 25); + printf("dprod_dut:"); + dump(dest2, 25); + return -1; + } + if (0 != memcmp(dest_ref3, dest3, TEST_LEN)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test3 %d\n", rtest); + dump_matrix(buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref3, 25); + printf("dprod_dut:"); + dump(dest3, 25); + return -1; + } + if (0 != memcmp(dest_ref4, dest4, TEST_LEN)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test4 %d\n", rtest); + dump_matrix(buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref4, 25); + printf("dprod_dut:"); + dump(dest4, 25); + return -1; + } + + putchar('.'); + } + + // Rand data test with varied parameters + for (rtest = 0; rtest < RANDOMS; rtest++) { + for (srcs = TEST_SOURCES; srcs > 0; srcs--) { + for (i = 0; i < srcs; i++) + for (j = 0; j < TEST_LEN; j++) + buffs[i][j] = rand(); + + for (i = 0; i < srcs; i++) { + g1[i] = rand(); + g2[i] = rand(); + g3[i] = rand(); + g4[i] = rand(); + } + + for (i = 0; i < srcs; i++) { + gf_vect_mul_init(g1[i], &g_tbls[i * 32]); + gf_vect_mul_init(g2[i], &g_tbls[(32 * srcs) + (i * 32)]); + gf_vect_mul_init(g3[i], &g_tbls[(64 * srcs) + (i * 32)]); + gf_vect_mul_init(g4[i], &g_tbls[(96 * srcs) + (i * 32)]); + } + + gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[0], buffs, dest_ref1); + gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[32 * srcs], buffs, + dest_ref2); + gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[64 * srcs], buffs, + dest_ref3); + gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[96 * srcs], buffs, + dest_ref4); + + FUNCTION_UNDER_TEST(TEST_LEN, srcs, g_tbls, buffs, dest_ptrs); + + if (0 != memcmp(dest_ref1, dest1, TEST_LEN)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) + " test1 srcs=%d\n", srcs); + dump_matrix(buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref1, 25); + printf("dprod_dut:"); + dump(dest1, 25); + return -1; + } + if (0 != memcmp(dest_ref2, dest2, TEST_LEN)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) + " test2 srcs=%d\n", srcs); + dump_matrix(buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref2, 25); + printf("dprod_dut:"); + dump(dest2, 25); + return -1; + } + if (0 != memcmp(dest_ref3, dest3, TEST_LEN)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) + " test3 srcs=%d\n", srcs); + dump_matrix(buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref3, 25); + printf("dprod_dut:"); + dump(dest3, 25); + return -1; + } + if (0 != memcmp(dest_ref4, dest4, TEST_LEN)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) + " test4 srcs=%d\n", srcs); + dump_matrix(buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref4, 25); + printf("dprod_dut:"); + dump(dest4, 25); + return -1; + } + + putchar('.'); + } + } + + // Run tests at end of buffer for Electric Fence + align = (LEN_ALIGN_CHK_B != 0) ? 1 : 32; + for (size = TEST_MIN_SIZE; size <= TEST_SIZE; size += align) { + for (i = 0; i < TEST_SOURCES; i++) + for (j = 0; j < TEST_LEN; j++) + buffs[i][j] = rand(); + + for (i = 0; i < TEST_SOURCES; i++) // Line up TEST_SIZE from end + efence_buffs[i] = buffs[i] + TEST_LEN - size; + + for (i = 0; i < TEST_SOURCES; i++) { + g1[i] = rand(); + g2[i] = rand(); + g3[i] = rand(); + g4[i] = rand(); + } + + for (i = 0; i < TEST_SOURCES; i++) { + gf_vect_mul_init(g1[i], &g_tbls[i * 32]); + gf_vect_mul_init(g2[i], &g_tbls[(32 * TEST_SOURCES) + (i * 32)]); + gf_vect_mul_init(g3[i], &g_tbls[(64 * TEST_SOURCES) + (i * 32)]); + gf_vect_mul_init(g4[i], &g_tbls[(96 * TEST_SOURCES) + (i * 32)]); + } + + gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[0], efence_buffs, dest_ref1); + gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[32 * TEST_SOURCES], + efence_buffs, dest_ref2); + gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[64 * TEST_SOURCES], + efence_buffs, dest_ref3); + gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[96 * TEST_SOURCES], + efence_buffs, dest_ref4); + + FUNCTION_UNDER_TEST(size, TEST_SOURCES, g_tbls, efence_buffs, dest_ptrs); + + if (0 != memcmp(dest_ref1, dest1, size)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test1 %d\n", rtest); + dump_matrix(efence_buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref1, align); + printf("dprod_dut:"); + dump(dest1, align); + return -1; + } + + if (0 != memcmp(dest_ref2, dest2, size)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test2 %d\n", rtest); + dump_matrix(efence_buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref2, align); + printf("dprod_dut:"); + dump(dest2, align); + return -1; + } + + if (0 != memcmp(dest_ref3, dest3, size)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test3 %d\n", rtest); + dump_matrix(efence_buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref3, align); + printf("dprod_dut:"); + dump(dest3, align); + return -1; + } + + if (0 != memcmp(dest_ref4, dest4, size)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test4 %d\n", rtest); + dump_matrix(efence_buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref4, align); + printf("dprod_dut:"); + dump(dest4, align); + return -1; + } + + putchar('.'); + } + + // Test rand ptr alignment if available + + for (rtest = 0; rtest < RANDOMS; rtest++) { + size = (TEST_LEN - PTR_ALIGN_CHK_B) & ~(TEST_MIN_SIZE - 1); + srcs = rand() % TEST_SOURCES; + if (srcs == 0) + continue; + + offset = (PTR_ALIGN_CHK_B != 0) ? 1 : PTR_ALIGN_CHK_B; + // Add random offsets + for (i = 0; i < srcs; i++) + ubuffs[i] = buffs[i] + (rand() & (PTR_ALIGN_CHK_B - offset)); + + udest_ptrs[0] = dest1 + (rand() & (PTR_ALIGN_CHK_B - offset)); + udest_ptrs[1] = dest2 + (rand() & (PTR_ALIGN_CHK_B - offset)); + udest_ptrs[2] = dest3 + (rand() & (PTR_ALIGN_CHK_B - offset)); + udest_ptrs[3] = dest4 + (rand() & (PTR_ALIGN_CHK_B - offset)); + + memset(dest1, 0, TEST_LEN); // zero pad to check write-over + memset(dest2, 0, TEST_LEN); + memset(dest3, 0, TEST_LEN); + memset(dest4, 0, TEST_LEN); + + for (i = 0; i < srcs; i++) + for (j = 0; j < size; j++) + ubuffs[i][j] = rand(); + + for (i = 0; i < srcs; i++) { + g1[i] = rand(); + g2[i] = rand(); + g3[i] = rand(); + g4[i] = rand(); + } + + for (i = 0; i < srcs; i++) { + gf_vect_mul_init(g1[i], &g_tbls[i * 32]); + gf_vect_mul_init(g2[i], &g_tbls[(32 * srcs) + (i * 32)]); + gf_vect_mul_init(g3[i], &g_tbls[(64 * srcs) + (i * 32)]); + gf_vect_mul_init(g4[i], &g_tbls[(96 * srcs) + (i * 32)]); + } + + gf_vect_dot_prod_base(size, srcs, &g_tbls[0], ubuffs, dest_ref1); + gf_vect_dot_prod_base(size, srcs, &g_tbls[32 * srcs], ubuffs, dest_ref2); + gf_vect_dot_prod_base(size, srcs, &g_tbls[64 * srcs], ubuffs, dest_ref3); + gf_vect_dot_prod_base(size, srcs, &g_tbls[96 * srcs], ubuffs, dest_ref4); + + FUNCTION_UNDER_TEST(size, srcs, g_tbls, ubuffs, udest_ptrs); + + if (memcmp(dest_ref1, udest_ptrs[0], size)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign srcs=%d\n", + srcs); + dump_matrix(ubuffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref1, 25); + printf("dprod_dut:"); + dump(udest_ptrs[0], 25); + return -1; + } + if (memcmp(dest_ref2, udest_ptrs[1], size)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign srcs=%d\n", + srcs); + dump_matrix(ubuffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref2, 25); + printf("dprod_dut:"); + dump(udest_ptrs[1], 25); + return -1; + } + if (memcmp(dest_ref3, udest_ptrs[2], size)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign srcs=%d\n", + srcs); + dump_matrix(ubuffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref3, 25); + printf("dprod_dut:"); + dump(udest_ptrs[2], 25); + return -1; + } + if (memcmp(dest_ref4, udest_ptrs[3], size)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign srcs=%d\n", + srcs); + dump_matrix(ubuffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref4, 25); + printf("dprod_dut:"); + dump(udest_ptrs[3], 25); + return -1; + } + // Confirm that padding around dests is unchanged + memset(dest_ref1, 0, PTR_ALIGN_CHK_B); // Make reference zero buff + offset = udest_ptrs[0] - dest1; + + if (memcmp(dest1, dest_ref1, offset)) { + printf("Fail rand ualign pad1 start\n"); + return -1; + } + if (memcmp(dest1 + offset + size, dest_ref1, PTR_ALIGN_CHK_B - offset)) { + printf("Fail rand ualign pad1 end\n"); + printf("size=%d offset=%d srcs=%d\n", size, offset, srcs); + return -1; + } + + offset = udest_ptrs[1] - dest2; + if (memcmp(dest2, dest_ref1, offset)) { + printf("Fail rand ualign pad2 start\n"); + return -1; + } + if (memcmp(dest2 + offset + size, dest_ref1, PTR_ALIGN_CHK_B - offset)) { + printf("Fail rand ualign pad2 end\n"); + return -1; + } + + offset = udest_ptrs[2] - dest3; + if (memcmp(dest3, dest_ref1, offset)) { + printf("Fail rand ualign pad3 start\n"); + return -1; + } + if (memcmp(dest3 + offset + size, dest_ref1, PTR_ALIGN_CHK_B - offset)) { + printf("Fail rand ualign pad3 end\n"); + return -1; + } + + offset = udest_ptrs[3] - dest4; + if (memcmp(dest4, dest_ref1, offset)) { + printf("Fail rand ualign pad4 start\n"); + return -1; + } + if (memcmp(dest4 + offset + size, dest_ref1, PTR_ALIGN_CHK_B - offset)) { + printf("Fail rand ualign pad4 end\n"); + return -1; + } + + putchar('.'); + } + + // Test all size alignment + align = (LEN_ALIGN_CHK_B != 0) ? 1 : 32; + + for (size = TEST_LEN; size >= TEST_MIN_SIZE; size -= align) { + srcs = TEST_SOURCES; + + for (i = 0; i < srcs; i++) + for (j = 0; j < size; j++) + buffs[i][j] = rand(); + + for (i = 0; i < srcs; i++) { + g1[i] = rand(); + g2[i] = rand(); + g3[i] = rand(); + g4[i] = rand(); + } + + for (i = 0; i < srcs; i++) { + gf_vect_mul_init(g1[i], &g_tbls[i * 32]); + gf_vect_mul_init(g2[i], &g_tbls[(32 * srcs) + (i * 32)]); + gf_vect_mul_init(g3[i], &g_tbls[(64 * srcs) + (i * 32)]); + gf_vect_mul_init(g4[i], &g_tbls[(96 * srcs) + (i * 32)]); + } + + gf_vect_dot_prod_base(size, srcs, &g_tbls[0], buffs, dest_ref1); + gf_vect_dot_prod_base(size, srcs, &g_tbls[32 * srcs], buffs, dest_ref2); + gf_vect_dot_prod_base(size, srcs, &g_tbls[64 * srcs], buffs, dest_ref3); + gf_vect_dot_prod_base(size, srcs, &g_tbls[96 * srcs], buffs, dest_ref4); + + FUNCTION_UNDER_TEST(size, srcs, g_tbls, buffs, dest_ptrs); + + if (memcmp(dest_ref1, dest_ptrs[0], size)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign len=%d\n", + size); + dump_matrix(buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref1, 25); + printf("dprod_dut:"); + dump(dest_ptrs[0], 25); + return -1; + } + if (memcmp(dest_ref2, dest_ptrs[1], size)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign len=%d\n", + size); + dump_matrix(buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref2, 25); + printf("dprod_dut:"); + dump(dest_ptrs[1], 25); + return -1; + } + if (memcmp(dest_ref3, dest_ptrs[2], size)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign len=%d\n", + size); + dump_matrix(buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref3, 25); + printf("dprod_dut:"); + dump(dest_ptrs[2], 25); + return -1; + } + if (memcmp(dest_ref4, dest_ptrs[3], size)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign len=%d\n", + size); + dump_matrix(buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref4, 25); + printf("dprod_dut:"); + dump(dest_ptrs[3], 25); + return -1; + } + } + + printf("Pass\n"); + return 0; + +} diff --git a/src/spdk/isa-l/erasure_code/gf_4vect_mad_avx.asm b/src/spdk/isa-l/erasure_code/gf_4vect_mad_avx.asm new file mode 100644 index 000000000..62441c192 --- /dev/null +++ b/src/spdk/isa-l/erasure_code/gf_4vect_mad_avx.asm @@ -0,0 +1,336 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2015 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;;; +;;; gf_4vect_mad_avx(len, vec, vec_i, mul_array, src, dest); +;;; + +%include "reg_sizes.asm" + +%define PS 8 + +%ifidn __OUTPUT_FORMAT__, win64 + %define arg0 rcx + %define arg0.w ecx + %define arg1 rdx + %define arg2 r8 + %define arg3 r9 + %define arg4 r12 + %define arg5 r15 + %define tmp r11 + %define tmp2 r10 + %define tmp3 r13 + %define return rax + %define return.w eax + %define stack_size 16*10 + 3*8 + %define arg(x) [rsp + stack_size + PS + PS*x] + %define func(x) proc_frame x + +%macro FUNC_SAVE 0 + sub rsp, stack_size + movdqa [rsp+16*0],xmm6 + movdqa [rsp+16*1],xmm7 + movdqa [rsp+16*2],xmm8 + movdqa [rsp+16*3],xmm9 + movdqa [rsp+16*4],xmm10 + movdqa [rsp+16*5],xmm11 + movdqa [rsp+16*6],xmm12 + movdqa [rsp+16*7],xmm13 + movdqa [rsp+16*8],xmm14 + movdqa [rsp+16*9],xmm15 + save_reg r12, 10*16 + 0*8 + save_reg r13, 10*16 + 1*8 + save_reg r15, 10*16 + 2*8 + end_prolog + mov arg4, arg(4) + mov arg5, arg(5) +%endmacro + +%macro FUNC_RESTORE 0 + movdqa xmm6, [rsp+16*0] + movdqa xmm7, [rsp+16*1] + movdqa xmm8, [rsp+16*2] + movdqa xmm9, [rsp+16*3] + movdqa xmm10, [rsp+16*4] + movdqa xmm11, [rsp+16*5] + movdqa xmm12, [rsp+16*6] + movdqa xmm13, [rsp+16*7] + movdqa xmm14, [rsp+16*8] + movdqa xmm15, [rsp+16*9] + mov r12, [rsp + 10*16 + 0*8] + mov r13, [rsp + 10*16 + 1*8] + mov r15, [rsp + 10*16 + 2*8] + add rsp, stack_size +%endmacro + +%elifidn __OUTPUT_FORMAT__, elf64 + %define arg0 rdi + %define arg0.w edi + %define arg1 rsi + %define arg2 rdx + %define arg3 rcx + %define arg4 r8 + %define arg5 r9 + %define tmp r11 + %define tmp2 r10 + %define tmp3 r12 + %define return rax + %define return.w eax + + %define func(x) x: + %macro FUNC_SAVE 0 + push r12 + %endmacro + %macro FUNC_RESTORE 0 + pop r12 + %endmacro +%endif + +;;; gf_4vect_mad_avx(len, vec, vec_i, mul_array, src, dest) +%define len arg0 +%define len.w arg0.w +%define vec arg1 +%define vec_i arg2 +%define mul_array arg3 +%define src arg4 +%define dest1 arg5 +%define pos return +%define pos.w return.w + +%define dest2 mul_array +%define dest3 tmp2 +%define dest4 vec_i + +%ifndef EC_ALIGNED_ADDR +;;; Use Un-aligned load/store + %define XLDR vmovdqu + %define XSTR vmovdqu +%else +;;; Use Non-temporal load/stor + %ifdef NO_NT_LDST + %define XLDR vmovdqa + %define XSTR vmovdqa + %else + %define XLDR vmovntdqa + %define XSTR vmovntdq + %endif +%endif + + +default rel + +[bits 64] +section .text + +%define xmask0f xmm15 +%define xgft3_hi xmm14 +%define xgft4_hi xmm13 +%define xgft4_lo xmm12 + +%define x0 xmm0 +%define xtmpa xmm1 +%define xtmph1 xmm2 +%define xtmpl1 xmm3 +%define xtmph2 xmm4 +%define xtmpl2 xmm5 +%define xtmph3 xmm6 +%define xtmpl3 xmm7 +%define xtmph4 xmm8 +%define xtmpl4 xmm9 +%define xd1 xmm10 +%define xd2 xmm11 +%define xd3 xtmph1 +%define xd4 xtmpl1 + +align 16 +global gf_4vect_mad_avx:ISAL_SYM_TYPE_FUNCTION +func(gf_4vect_mad_avx) + FUNC_SAVE + sub len, 16 + jl .return_fail + xor pos, pos + vmovdqa xmask0f, [mask0f] ;Load mask of lower nibble in each byte + + mov tmp, vec + + sal vec_i, 5 ;Multiply by 32 + lea tmp3, [mul_array + vec_i] + + sal tmp, 6 ;Multiply by 64 + vmovdqu xgft3_hi, [tmp3+tmp+16] ; " Cx{00}, Cx{10}, Cx{20}, ... , Cx{f0} + sal vec, 5 ;Multiply by 32 + add tmp, vec + vmovdqu xgft4_lo, [tmp3+tmp] ;Load array Dx{00}, Dx{01}, Dx{02}, ... + vmovdqu xgft4_hi, [tmp3+tmp+16] ; " Dx{00}, Dx{10}, Dx{20}, ... , Dx{f0} + + mov dest2, [dest1+PS] ; reuse mul_array + mov dest3, [dest1+2*PS] + mov dest4, [dest1+3*PS] ; reuse vec_i + mov dest1, [dest1] + +.loop16: + XLDR x0, [src+pos] ;Get next source vector + vmovdqu xtmph1, [tmp3+16] ; " Ax{00}, Ax{10}, Ax{20}, ... , Ax{f0} + vmovdqu xtmpl1, [tmp3] ;Load array Ax{00}, Ax{01}, Ax{02}, ... + vmovdqu xtmph2, [tmp3+vec+16] ; " Bx{00}, Bx{10}, Bx{20}, ... , Bx{f0} + vmovdqu xtmpl2, [tmp3+vec] ;Load array Bx{00}, Bx{01}, Bx{02}, ... + vmovdqu xtmpl3, [tmp3+2*vec] ;Load array Cx{00}, Cx{01}, Cx{02}, ... + + XLDR xd1, [dest1+pos] ;Get next dest vector + XLDR xd2, [dest2+pos] ;Get next dest vector + + vpand xtmpa, x0, xmask0f ;Mask low src nibble in bits 4-0 + vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0 + vpand x0, x0, xmask0f ;Mask high src nibble in bits 4-0 + + ; dest1 + vpshufb xtmph1, xtmph1, x0 ;Lookup mul table of high nibble + vpshufb xtmpl1, xtmpl1, xtmpa ;Lookup mul table of low nibble + vpxor xtmph1, xtmph1, xtmpl1 ;GF add high and low partials + vpxor xd1, xd1, xtmph1 + + XLDR xd3, [dest3+pos] ;Reuse xtmph1, Get next dest vector + XLDR xd4, [dest4+pos] ;Reuse xtmpl1, Get next dest vector + + ; dest2 + vpshufb xtmph2, xtmph2, x0 ;Lookup mul table of high nibble + vpshufb xtmpl2, xtmpl2, xtmpa ;Lookup mul table of low nibble + vpxor xtmph2, xtmph2, xtmpl2 ;GF add high and low partials + vpxor xd2, xd2, xtmph2 + + ; dest3 + vpshufb xtmph3, xgft3_hi, x0 ;Lookup mul table of high nibble + vpshufb xtmpl3, xtmpl3, xtmpa ;Lookup mul table of low nibble + vpxor xtmph3, xtmph3, xtmpl3 ;GF add high and low partials + vpxor xd3, xd3, xtmph3 + + ; dest4 + vpshufb xtmph4, xgft4_hi, x0 ;Lookup mul table of high nibble + vpshufb xtmpl4, xgft4_lo, xtmpa ;Lookup mul table of low nibble + vpxor xtmph4, xtmph4, xtmpl4 ;GF add high and low partials + vpxor xd4, xd4, xtmph4 + + XSTR [dest1+pos], xd1 ;Store result + XSTR [dest2+pos], xd2 ;Store result + XSTR [dest3+pos], xd3 ;Store result + XSTR [dest4+pos], xd4 ;Store result + + add pos, 16 ;Loop on 16 bytes at a time + cmp pos, len + jle .loop16 + + lea tmp, [len + 16] + cmp pos, tmp + je .return_pass + +.lessthan16: + ;; Tail len + ;; Do one more overlap pass + + mov tmp, len ;Overlapped offset length-16 + + XLDR x0, [src+tmp] ;Get next source vector + + vmovdqu xtmph1, [tmp3+16] ; " Ax{00}, Ax{10}, Ax{20}, ... , Ax{f0} + vmovdqu xtmpl1, [tmp3] ;Load array Ax{00}, Ax{01}, Ax{02}, ... + vmovdqu xtmph2, [tmp3+vec+16] ; " Bx{00}, Bx{10}, Bx{20}, ... , Bx{f0} + vmovdqu xtmpl2, [tmp3+vec] ;Load array Bx{00}, Bx{01}, Bx{02}, ... + vmovdqu xtmpl3, [tmp3+2*vec] ;Load array Cx{00}, Cx{01}, Cx{02}, ... + + XLDR xd1, [dest1+tmp] ;Get next dest vector + XLDR xd2, [dest2+tmp] ;Get next dest vector + XLDR xtmph4, [dest3+tmp] ;Get next dest vector + + sub len, pos + + vmovdqa xtmpl4, [constip16] ;Load const of i + 16 + vpinsrb xtmph3, xtmph3, len.w, 15 + vpshufb xtmph3, xtmph3, xmask0f ;Broadcast len to all bytes + vpcmpgtb xtmph3, xtmph3, xtmpl4 + + XLDR xtmpl4, [dest4+tmp] ;Get next dest vector + + vpand xtmpa, x0, xmask0f ;Mask low src nibble in bits 4-0 + vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0 + vpand x0, x0, xmask0f ;Mask high src nibble in bits 4-0 + + ; dest1 + vpshufb xtmph1, xtmph1, x0 ;Lookup mul table of high nibble + vpshufb xtmpl1, xtmpl1, xtmpa ;Lookup mul table of low nibble + vpxor xtmph1, xtmph1, xtmpl1 ;GF add high and low partials + vpand xtmph1, xtmph1, xtmph3 + vpxor xd1, xd1, xtmph1 + + ; dest2 + vpshufb xtmph2, xtmph2, x0 ;Lookup mul table of high nibble + vpshufb xtmpl2, xtmpl2, xtmpa ;Lookup mul table of low nibble + vpxor xtmph2, xtmph2, xtmpl2 ;GF add high and low partials + vpand xtmph2, xtmph2, xtmph3 + vpxor xd2, xd2, xtmph2 + + ; dest3 + vpshufb xgft3_hi, xgft3_hi, x0 ;Lookup mul table of high nibble + vpshufb xtmpl3, xtmpl3, xtmpa ;Lookup mul table of low nibble + vpxor xgft3_hi, xgft3_hi, xtmpl3 ;GF add high and low partials + vpand xgft3_hi, xgft3_hi, xtmph3 + vpxor xtmph4, xtmph4, xgft3_hi + + ; dest4 + vpshufb xgft4_hi, xgft4_hi, x0 ;Lookup mul table of high nibble + vpshufb xgft4_lo, xgft4_lo, xtmpa ;Lookup mul table of low nibble + vpxor xgft4_hi, xgft4_hi, xgft4_lo ;GF add high and low partials + vpand xgft4_hi, xgft4_hi, xtmph3 + vpxor xtmpl4, xtmpl4, xgft4_hi + + XSTR [dest1+tmp], xd1 ;Store result + XSTR [dest2+tmp], xd2 ;Store result + XSTR [dest3+tmp], xtmph4 ;Store result + XSTR [dest4+tmp], xtmpl4 ;Store result + +.return_pass: + mov return, 0 + FUNC_RESTORE + ret + +.return_fail: + mov return, 1 + FUNC_RESTORE + ret + +endproc_frame + +section .data + +align 16 +mask0f: dq 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f +constip16: + dq 0xf8f9fafbfcfdfeff, 0xf0f1f2f3f4f5f6f7 + +;;; func core, ver, snum +slversion gf_4vect_mad_avx, 02, 01, 020a diff --git a/src/spdk/isa-l/erasure_code/gf_4vect_mad_avx2.asm b/src/spdk/isa-l/erasure_code/gf_4vect_mad_avx2.asm new file mode 100644 index 000000000..9a7b7d94b --- /dev/null +++ b/src/spdk/isa-l/erasure_code/gf_4vect_mad_avx2.asm @@ -0,0 +1,342 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2015 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;;; +;;; gf_4vect_mad_avx2(len, vec, vec_i, mul_array, src, dest); +;;; + +%include "reg_sizes.asm" + +%define PS 8 + +%ifidn __OUTPUT_FORMAT__, win64 + %define arg0 rcx + %define arg0.w ecx + %define arg1 rdx + %define arg2 r8 + %define arg3 r9 + %define arg4 r12 + %define arg5 r15 + %define tmp r11 + %define tmp.w r11d + %define tmp.b r11b + %define return rax + %define return.w eax + %define stack_size 16*10 + 3*8 + %define arg(x) [rsp + stack_size + PS + PS*x] + %define func(x) proc_frame x + +%macro FUNC_SAVE 0 + sub rsp, stack_size + movdqa [rsp+16*0],xmm6 + movdqa [rsp+16*1],xmm7 + movdqa [rsp+16*2],xmm8 + movdqa [rsp+16*3],xmm9 + movdqa [rsp+16*4],xmm10 + movdqa [rsp+16*5],xmm11 + movdqa [rsp+16*6],xmm12 + movdqa [rsp+16*7],xmm13 + movdqa [rsp+16*8],xmm14 + movdqa [rsp+16*9],xmm15 + save_reg r12, 10*16 + 0*8 + save_reg r15, 10*16 + 1*8 + end_prolog + mov arg4, arg(4) + mov arg5, arg(5) +%endmacro + +%macro FUNC_RESTORE 0 + movdqa xmm6, [rsp+16*0] + movdqa xmm7, [rsp+16*1] + movdqa xmm8, [rsp+16*2] + movdqa xmm9, [rsp+16*3] + movdqa xmm10, [rsp+16*4] + movdqa xmm11, [rsp+16*5] + movdqa xmm12, [rsp+16*6] + movdqa xmm13, [rsp+16*7] + movdqa xmm14, [rsp+16*8] + movdqa xmm15, [rsp+16*9] + mov r12, [rsp + 10*16 + 0*8] + mov r15, [rsp + 10*16 + 1*8] + add rsp, stack_size +%endmacro + +%elifidn __OUTPUT_FORMAT__, elf64 + %define arg0 rdi + %define arg0.w edi + %define arg1 rsi + %define arg2 rdx + %define arg3 rcx + %define arg4 r8 + %define arg5 r9 + %define tmp r11 + %define tmp.w r11d + %define tmp.b r11b + %define return rax + %define return.w eax + + %define func(x) x: + %define FUNC_SAVE + %define FUNC_RESTORE +%endif + + +;;; gf_4vect_mad_avx2(len, vec, vec_i, mul_array, src, dest) +%define len arg0 +%define len.w arg0.w +%define vec arg1 +%define vec_i arg2 +%define mul_array arg3 +%define src arg4 +%define dest1 arg5 +%define pos return +%define pos.w return.w + +%define dest2 mul_array +%define dest3 vec +%define dest4 vec_i + +%ifndef EC_ALIGNED_ADDR +;;; Use Un-aligned load/store + %define XLDR vmovdqu + %define XSTR vmovdqu +%else +;;; Use Non-temporal load/stor + %ifdef NO_NT_LDST + %define XLDR vmovdqa + %define XSTR vmovdqa + %else + %define XLDR vmovntdqa + %define XSTR vmovntdq + %endif +%endif + + +default rel + +[bits 64] +section .text + +%define xmask0f ymm15 +%define xmask0fx xmm15 +%define xgft1_lo ymm14 +%define xgft2_lo ymm13 +%define xgft3_lo ymm12 +%define xgft4_lo ymm11 + +%define x0 ymm0 +%define xtmpa ymm1 +%define xtmpl ymm2 +%define xtmplx xmm2 +%define xtmph1 ymm3 +%define xtmph1x xmm3 +%define xtmph2 ymm4 +%define xtmph3 ymm5 +%define xtmph4 ymm6 +%define xd1 ymm7 +%define xd2 ymm8 +%define xd3 ymm9 +%define xd4 ymm10 + +align 16 +global gf_4vect_mad_avx2:ISAL_SYM_TYPE_FUNCTION +func(gf_4vect_mad_avx2) + FUNC_SAVE + sub len, 32 + jl .return_fail + xor pos, pos + mov tmp.b, 0x0f + vpinsrb xmask0fx, xmask0fx, tmp.w, 0 + vpbroadcastb xmask0f, xmask0fx ;Construct mask 0x0f0f0f... + + sal vec_i, 5 ;Multiply by 32 + sal vec, 5 ;Multiply by 32 + lea tmp, [mul_array + vec_i] + + vmovdqu xgft1_lo, [tmp] ;Load array Ax{00}, Ax{01}, Ax{02}, ... + ; " Ax{00}, Ax{10}, Ax{20}, ... , Ax{f0} + vmovdqu xgft2_lo, [tmp+vec] ;Load array Bx{00}, Bx{01}, Bx{02}, ... + ; " Bx{00}, Bx{10}, Bx{20}, ... , Bx{f0} + vmovdqu xgft3_lo, [tmp+2*vec] ;Load array Cx{00}, Cx{01}, Cx{02}, ... + ; " Cx{00}, Cx{10}, Cx{20}, ... , Cx{f0} + add tmp, vec + vmovdqu xgft4_lo, [tmp+2*vec] ;Load array Dx{00}, Dx{01}, Dx{02}, ... + ; " Dx{00}, Dx{10}, Dx{20}, ... , Dx{f0} + + mov dest2, [dest1+PS] ; reuse mul_array + mov dest3, [dest1+2*PS] ; reuse vec + mov dest4, [dest1+3*PS] ; reuse vec_i + mov dest1, [dest1] + +.loop32: + XLDR x0, [src+pos] ;Get next source vector + + XLDR xd1, [dest1+pos] ;Get next dest vector + XLDR xd2, [dest2+pos] ;Get next dest vector + XLDR xd3, [dest3+pos] ;Get next dest vector + XLDR xd4, [dest4+pos] ;reuse xtmpl1. Get next dest vector + + vpand xtmpl, x0, xmask0f ;Mask low src nibble in bits 4-0 + vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0 + vpand x0, x0, xmask0f ;Mask high src nibble in bits 4-0 + + vperm2i128 xtmpa, xtmpl, x0, 0x30 ;swap xtmpa from 1lo|2lo to 1lo|2hi + vperm2i128 x0, xtmpl, x0, 0x12 ;swap x0 from 1hi|2hi to 1hi|2lo + + vperm2i128 xtmph1, xgft1_lo, xgft1_lo, 0x01 ; swapped to hi | lo + vperm2i128 xtmph2, xgft2_lo, xgft2_lo, 0x01 ; swapped to hi | lo + vperm2i128 xtmph3, xgft3_lo, xgft3_lo, 0x01 ; swapped to hi | lo + vperm2i128 xtmph4, xgft4_lo, xgft4_lo, 0x01 ; swapped to hi | lo + + ; dest1 + vpshufb xtmph1, xtmph1, x0 ;Lookup mul table of high nibble + vpshufb xtmpl, xgft1_lo, xtmpa ;Lookup mul table of low nibble + vpxor xtmph1, xtmph1, xtmpl ;GF add high and low partials + vpxor xd1, xd1, xtmph1 ;xd1 += partial + + ; dest2 + vpshufb xtmph2, xtmph2, x0 ;Lookup mul table of high nibble + vpshufb xtmpl, xgft2_lo, xtmpa ;Lookup mul table of low nibble + vpxor xtmph2, xtmph2, xtmpl ;GF add high and low partials + vpxor xd2, xd2, xtmph2 ;xd2 += partial + + ; dest3 + vpshufb xtmph3, xtmph3, x0 ;Lookup mul table of high nibble + vpshufb xtmpl, xgft3_lo, xtmpa ;Lookup mul table of low nibble + vpxor xtmph3, xtmph3, xtmpl ;GF add high and low partials + vpxor xd3, xd3, xtmph3 ;xd3 += partial + + ; dest4 + vpshufb xtmph4, xtmph4, x0 ;Lookup mul table of high nibble + vpshufb xtmpl, xgft4_lo, xtmpa ;Lookup mul table of low nibble + vpxor xtmph4, xtmph4, xtmpl ;GF add high and low partials + vpxor xd4, xd4, xtmph4 ;xd4 += partial + + XSTR [dest1+pos], xd1 + XSTR [dest2+pos], xd2 + XSTR [dest3+pos], xd3 + XSTR [dest4+pos], xd4 + + add pos, 32 ;Loop on 32 bytes at a time + cmp pos, len + jle .loop32 + + lea tmp, [len + 32] + cmp pos, tmp + je .return_pass + +.lessthan32: + ;; Tail len + ;; Do one more overlap pass + mov tmp.b, 0x1f + vpinsrb xtmph1x, xtmph1x, tmp.w, 0 + vpbroadcastb xtmph1, xtmph1x ;Construct mask 0x1f1f1f... + + mov tmp, len ;Overlapped offset length-32 + + XLDR x0, [src+tmp] ;Get next source vector + + XLDR xd1, [dest1+tmp] ;Get next dest vector + XLDR xd2, [dest2+tmp] ;Get next dest vector + XLDR xd3, [dest3+tmp] ;Get next dest vector + XLDR xd4, [dest4+tmp] ;Get next dest vector + + sub len, pos + + vmovdqa xtmph2, [constip32] ;Load const of i + 32 + vpinsrb xtmplx, xtmplx, len.w, 15 + vinserti128 xtmpl, xtmpl, xtmplx, 1 ;swapped to xtmplx | xtmplx + vpshufb xtmpl, xtmpl, xtmph1 ;Broadcast len to all bytes. xtmph1=0x1f1f1f... + vpcmpgtb xtmpl, xtmpl, xtmph2 + + vpand xtmph1, x0, xmask0f ;Mask low src nibble in bits 4-0 + vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0 + vpand x0, x0, xmask0f ;Mask high src nibble in bits 4-0 + + vperm2i128 xtmpa, xtmph1, x0, 0x30 ;swap xtmpa from 1lo|2lo to 1lo|2hi + vperm2i128 x0, xtmph1, x0, 0x12 ;swap x0 from 1hi|2hi to 1hi|2lo + + vperm2i128 xtmph1, xgft1_lo, xgft1_lo, 0x01 ; swapped to hi | lo + vperm2i128 xtmph2, xgft2_lo, xgft2_lo, 0x01 ; swapped to hi | lo + vperm2i128 xtmph3, xgft3_lo, xgft3_lo, 0x01 ; swapped to hi | lo + vperm2i128 xtmph4, xgft4_lo, xgft4_lo, 0x01 ; swapped to hi | lo + + ; dest1 + vpshufb xtmph1, xtmph1, x0 ;Lookup mul table of high nibble + vpshufb xgft1_lo, xgft1_lo, xtmpa ;Lookup mul table of low nibble + vpxor xtmph1, xtmph1, xgft1_lo ;GF add high and low partials + vpand xtmph1, xtmph1, xtmpl + vpxor xd1, xd1, xtmph1 ;xd1 += partial + + ; dest2 + vpshufb xtmph2, xtmph2, x0 ;Lookup mul table of high nibble + vpshufb xgft2_lo, xgft2_lo, xtmpa ;Lookup mul table of low nibble + vpxor xtmph2, xtmph2, xgft2_lo ;GF add high and low partials + vpand xtmph2, xtmph2, xtmpl + vpxor xd2, xd2, xtmph2 ;xd2 += partial + + ; dest3 + vpshufb xtmph3, xtmph3, x0 ;Lookup mul table of high nibble + vpshufb xgft3_lo, xgft3_lo, xtmpa ;Lookup mul table of low nibble + vpxor xtmph3, xtmph3, xgft3_lo ;GF add high and low partials + vpand xtmph3, xtmph3, xtmpl + vpxor xd3, xd3, xtmph3 ;xd3 += partial + + ; dest4 + vpshufb xtmph4, xtmph4, x0 ;Lookup mul table of high nibble + vpshufb xgft4_lo, xgft4_lo, xtmpa ;Lookup mul table of low nibble + vpxor xtmph4, xtmph4, xgft4_lo ;GF add high and low partials + vpand xtmph4, xtmph4, xtmpl + vpxor xd4, xd4, xtmph4 ;xd4 += partial + + XSTR [dest1+tmp], xd1 + XSTR [dest2+tmp], xd2 + XSTR [dest3+tmp], xd3 + XSTR [dest4+tmp], xd4 + +.return_pass: + mov return, 0 + FUNC_RESTORE + ret + +.return_fail: + mov return, 1 + FUNC_RESTORE + ret + +endproc_frame + +section .data +align 32 +constip32: + dq 0xf8f9fafbfcfdfeff, 0xf0f1f2f3f4f5f6f7 + dq 0xe8e9eaebecedeeef, 0xe0e1e2e3e4e5e6e7 + +;;; func core, ver, snum +slversion gf_4vect_mad_avx2, 04, 01, 020b diff --git a/src/spdk/isa-l/erasure_code/gf_4vect_mad_avx512.asm b/src/spdk/isa-l/erasure_code/gf_4vect_mad_avx512.asm new file mode 100644 index 000000000..bc836af6b --- /dev/null +++ b/src/spdk/isa-l/erasure_code/gf_4vect_mad_avx512.asm @@ -0,0 +1,267 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2015 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;;; +;;; gf_4vect_mad_avx512(len, vec, vec_i, mul_array, src, dest); +;;; + +%include "reg_sizes.asm" + +%ifdef HAVE_AS_KNOWS_AVX512 + +%ifidn __OUTPUT_FORMAT__, elf64 + %define arg0 rdi + %define arg1 rsi + %define arg2 rdx + %define arg3 rcx + %define arg4 r8 + %define arg5 r9 + %define tmp r11 + %define return rax + %define func(x) x: + %define FUNC_SAVE + %define FUNC_RESTORE +%endif + +%ifidn __OUTPUT_FORMAT__, win64 + %define arg0 rcx + %define arg1 rdx + %define arg2 r8 + %define arg3 r9 + %define arg4 r12 + %define arg5 r15 + %define tmp r11 + %define return rax + %define stack_size 16*10 + 3*8 + %define arg(x) [rsp + stack_size + PS + PS*x] + %define func(x) proc_frame x + +%macro FUNC_SAVE 0 + sub rsp, stack_size + movdqa [rsp+16*0],xmm6 + movdqa [rsp+16*1],xmm7 + movdqa [rsp+16*2],xmm8 + movdqa [rsp+16*3],xmm9 + movdqa [rsp+16*4],xmm10 + movdqa [rsp+16*5],xmm11 + movdqa [rsp+16*6],xmm12 + movdqa [rsp+16*7],xmm13 + movdqa [rsp+16*8],xmm14 + movdqa [rsp+16*9],xmm15 + save_reg r12, 10*16 + 0*8 + save_reg r15, 10*16 + 1*8 + end_prolog + mov arg4, arg(4) + mov arg5, arg(5) +%endmacro + +%macro FUNC_RESTORE 0 + movdqa xmm6, [rsp+16*0] + movdqa xmm7, [rsp+16*1] + movdqa xmm8, [rsp+16*2] + movdqa xmm9, [rsp+16*3] + movdqa xmm10, [rsp+16*4] + movdqa xmm11, [rsp+16*5] + movdqa xmm12, [rsp+16*6] + movdqa xmm13, [rsp+16*7] + movdqa xmm14, [rsp+16*8] + movdqa xmm15, [rsp+16*9] + mov r12, [rsp + 10*16 + 0*8] + mov r15, [rsp + 10*16 + 1*8] + add rsp, stack_size +%endmacro +%endif + +%define PS 8 +%define len arg0 +%define vec arg1 +%define vec_i arg2 +%define mul_array arg3 +%define src arg4 +%define dest1 arg5 +%define pos return +%define dest2 mul_array +%define dest3 vec +%define dest4 vec_i + +%ifndef EC_ALIGNED_ADDR +;;; Use Un-aligned load/store + %define XLDR vmovdqu8 + %define XSTR vmovdqu8 +%else +;;; Use Non-temporal load/stor + %ifdef NO_NT_LDST + %define XLDR vmovdqa + %define XSTR vmovdqa + %else + %define XLDR vmovntdqa + %define XSTR vmovntdq + %endif +%endif + +default rel +[bits 64] +section .text + +%define x0 zmm0 +%define xtmpa zmm1 +%define xtmpl1 zmm2 +%define xtmph1 zmm3 +%define xtmph2 zmm4 +%define xtmph3 zmm5 +%define xtmph4 zmm6 +%define xgft1_hi zmm7 +%define xgft1_lo zmm8 +%define xgft1_loy ymm8 +%define xgft2_hi zmm9 +%define xgft2_lo zmm10 +%define xgft2_loy ymm10 +%define xgft3_hi zmm11 +%define xgft3_lo zmm12 +%define xgft3_loy ymm12 +%define xgft4_hi zmm13 +%define xgft4_lo zmm14 +%define xgft4_loy ymm14 +%define xd1 zmm15 +%define xd2 zmm16 +%define xd3 zmm17 +%define xd4 zmm18 +%define xmask0f zmm19 +%define xtmpl2 zmm20 +%define xtmpl3 zmm21 +%define xtmpl4 zmm22 +%define xtmpl5 zmm23 + +align 16 +global gf_4vect_mad_avx512:ISAL_SYM_TYPE_FUNCTION +func(gf_4vect_mad_avx512) + FUNC_SAVE + sub len, 64 + jl .return_fail + xor pos, pos + mov tmp, 0x0f + vpbroadcastb xmask0f, tmp ;Construct mask 0x0f0f0f... + sal vec_i, 5 ;Multiply by 32 + sal vec, 5 ;Multiply by 32 + lea tmp, [mul_array + vec_i] + vmovdqu xgft1_loy, [tmp] ;Load array Ax{00}..{0f}, Ax{00}..{f0} + vmovdqu xgft2_loy, [tmp+vec] ;Load array Bx{00}..{0f}, Bx{00}..{f0} + vmovdqu xgft3_loy, [tmp+2*vec] ;Load array Cx{00}..{0f}, Cx{00}..{f0} + add tmp, vec + vmovdqu xgft4_loy, [tmp+2*vec] ;Load array Dx{00}..{0f}, Dx{00}..{f0} + vshufi64x2 xgft1_hi, xgft1_lo, xgft1_lo, 0x55 + vshufi64x2 xgft1_lo, xgft1_lo, xgft1_lo, 0x00 + vshufi64x2 xgft2_hi, xgft2_lo, xgft2_lo, 0x55 + vshufi64x2 xgft2_lo, xgft2_lo, xgft2_lo, 0x00 + vshufi64x2 xgft3_hi, xgft3_lo, xgft3_lo, 0x55 + vshufi64x2 xgft3_lo, xgft3_lo, xgft3_lo, 0x00 + vshufi64x2 xgft4_hi, xgft4_lo, xgft4_lo, 0x55 + vshufi64x2 xgft4_lo, xgft4_lo, xgft4_lo, 0x00 + mov dest2, [dest1+PS] ; reuse mul_array + mov dest3, [dest1+2*PS] ; reuse vec + mov dest4, [dest1+3*PS] ; reuse vec_i + mov dest1, [dest1] + mov tmp, -1 + kmovq k1, tmp + +.loop64: + XLDR x0, [src+pos] ;Get next source vector + XLDR xd1, [dest1+pos] ;Get next dest vector + XLDR xd2, [dest2+pos] ;Get next dest vector + XLDR xd3, [dest3+pos] ;Get next dest vector + XLDR xd4, [dest4+pos] ;reuse xtmpl1. Get next dest vector + + vpandq xtmpa, x0, xmask0f ;Mask low src nibble in bits 4-0 + vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0 + vpandq x0, x0, xmask0f ;Mask high src nibble in bits 4-0 + + ; dest1 + vpshufb xtmph1 {k1}{z}, xgft1_hi, x0 ;Lookup mul table of high nibble + vpshufb xtmpl1 {k1}{z}, xgft1_lo, xtmpa ;Lookup mul table of low nibble + vpxorq xtmph1, xtmph1, xtmpl1 ;GF add high and low partials + vpxorq xd1, xd1, xtmph1 ;xd1 += partial + + ; dest2 + vpshufb xtmph2 {k1}{z}, xgft2_hi, x0 ;Lookup mul table of high nibble + vpshufb xtmpl2 {k1}{z}, xgft2_lo, xtmpa ;Lookup mul table of low nibble + vpxorq xtmph2, xtmph2, xtmpl2 ;GF add high and low partials + vpxorq xd2, xd2, xtmph2 ;xd2 += partial + + ; dest3 + vpshufb xtmph3 {k1}{z}, xgft3_hi, x0 ;Lookup mul table of high nibble + vpshufb xtmpl3 {k1}{z}, xgft3_lo, xtmpa ;Lookup mul table of low nibble + vpxorq xtmph3, xtmph3, xtmpl3 ;GF add high and low partials + vpxorq xd3, xd3, xtmph3 ;xd2 += partial + + ; dest4 + vpshufb xtmph4 {k1}{z}, xgft4_hi, x0 ;Lookup mul table of high nibble + vpshufb xtmpl4 {k1}{z}, xgft4_lo, xtmpa ;Lookup mul table of low nibble + vpxorq xtmph4, xtmph4, xtmpl4 ;GF add high and low partials + vpxorq xd4, xd4, xtmph4 ;xd2 += partial + + XSTR [dest1+pos], xd1 + XSTR [dest2+pos], xd2 + XSTR [dest3+pos], xd3 + XSTR [dest4+pos], xd4 + + add pos, 64 ;Loop on 64 bytes at a time + cmp pos, len + jle .loop64 + + lea tmp, [len + 64] + cmp pos, tmp + je .return_pass + + ;; Tail len + mov pos, (1 << 63) + lea tmp, [len + 64 - 1] + and tmp, 63 + sarx pos, pos, tmp + kmovq k1, pos + mov pos, len ;Overlapped offset length-64 + jmp .loop64 ;Do one more overlap pass + +.return_pass: + mov return, 0 + FUNC_RESTORE + ret + +.return_fail: + mov return, 1 + FUNC_RESTORE + ret + +endproc_frame + +%else +%ifidn __OUTPUT_FORMAT__, win64 +global no_gf_4vect_mad_avx512 +no_gf_4vect_mad_avx512: +%endif +%endif ; ifdef HAVE_AS_KNOWS_AVX512 diff --git a/src/spdk/isa-l/erasure_code/gf_4vect_mad_sse.asm b/src/spdk/isa-l/erasure_code/gf_4vect_mad_sse.asm new file mode 100644 index 000000000..c3d4c5d77 --- /dev/null +++ b/src/spdk/isa-l/erasure_code/gf_4vect_mad_sse.asm @@ -0,0 +1,342 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2015 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;;; +;;; gf_4vect_mad_sse(len, vec, vec_i, mul_array, src, dest); +;;; + +%include "reg_sizes.asm" + +%define PS 8 + +%ifidn __OUTPUT_FORMAT__, win64 + %define arg0 rcx + %define arg0.w ecx + %define arg1 rdx + %define arg2 r8 + %define arg3 r9 + %define arg4 r12 + %define arg5 r15 + %define tmp r11 + %define tmp2 r10 + %define tmp3 r13 + %define return rax + %define return.w eax + %define stack_size 16*10 + 3*8 + %define arg(x) [rsp + stack_size + PS + PS*x] + %define func(x) proc_frame x + +%macro FUNC_SAVE 0 + sub rsp, stack_size + movdqa [rsp+16*0],xmm6 + movdqa [rsp+16*1],xmm7 + movdqa [rsp+16*2],xmm8 + movdqa [rsp+16*3],xmm9 + movdqa [rsp+16*4],xmm10 + movdqa [rsp+16*5],xmm11 + movdqa [rsp+16*6],xmm12 + movdqa [rsp+16*7],xmm13 + movdqa [rsp+16*8],xmm14 + movdqa [rsp+16*9],xmm15 + save_reg r12, 10*16 + 0*8 + save_reg r13, 10*16 + 1*8 + save_reg r15, 10*16 + 2*8 + end_prolog + mov arg4, arg(4) + mov arg5, arg(5) +%endmacro + +%macro FUNC_RESTORE 0 + movdqa xmm6, [rsp+16*0] + movdqa xmm7, [rsp+16*1] + movdqa xmm8, [rsp+16*2] + movdqa xmm9, [rsp+16*3] + movdqa xmm10, [rsp+16*4] + movdqa xmm11, [rsp+16*5] + movdqa xmm12, [rsp+16*6] + movdqa xmm13, [rsp+16*7] + movdqa xmm14, [rsp+16*8] + movdqa xmm15, [rsp+16*9] + mov r12, [rsp + 10*16 + 0*8] + mov r13, [rsp + 10*16 + 1*8] + mov r15, [rsp + 10*16 + 2*8] + add rsp, stack_size +%endmacro + +%elifidn __OUTPUT_FORMAT__, elf64 + %define arg0 rdi + %define arg0.w edi + %define arg1 rsi + %define arg2 rdx + %define arg3 rcx + %define arg4 r8 + %define arg5 r9 + %define tmp r11 + %define tmp2 r10 + %define tmp3 r12 + %define return rax + %define return.w eax + + %define func(x) x: + %macro FUNC_SAVE 0 + push r12 + %endmacro + %macro FUNC_RESTORE 0 + pop r12 + %endmacro +%endif + +;;; gf_4vect_mad_sse(len, vec, vec_i, mul_array, src, dest) +%define len arg0 +%define len.w arg0.w +%define vec arg1 +%define vec_i arg2 +%define mul_array arg3 +%define src arg4 +%define dest1 arg5 +%define pos return +%define pos.w return.w + +%define dest2 mul_array +%define dest3 tmp2 +%define dest4 vec_i + +%ifndef EC_ALIGNED_ADDR +;;; Use Un-aligned load/store + %define XLDR movdqu + %define XSTR movdqu +%else +;;; Use Non-temporal load/stor + %ifdef NO_NT_LDST + %define XLDR movdqa + %define XSTR movdqa + %else + %define XLDR movntdqa + %define XSTR movntdq + %endif +%endif + +default rel + +[bits 64] +section .text + +%define xmask0f xmm15 +%define xgft3_hi xmm14 +%define xgft4_hi xmm13 +%define xgft4_lo xmm12 + +%define x0 xmm0 +%define xtmpa xmm1 +%define xtmph1 xmm2 +%define xtmpl1 xmm3 +%define xtmph2 xmm4 +%define xtmpl2 xmm5 +%define xtmph3 xmm6 +%define xtmpl3 xmm7 +%define xtmph4 xmm8 +%define xtmpl4 xmm9 +%define xd1 xmm10 +%define xd2 xmm11 +%define xd3 xtmph1 +%define xd4 xtmpl1 + +align 16 +global gf_4vect_mad_sse:ISAL_SYM_TYPE_FUNCTION +func(gf_4vect_mad_sse) + FUNC_SAVE + sub len, 16 + jl .return_fail + xor pos, pos + movdqa xmask0f, [mask0f] ;Load mask of lower nibble in each byte + mov tmp, vec + + sal vec_i, 5 ;Multiply by 32 + lea tmp3, [mul_array + vec_i] + + sal tmp, 6 ;Multiply by 64 + + movdqu xgft3_hi, [tmp3+tmp+16] ; " Cx{00}, Cx{10}, Cx{20}, ... , Cx{f0} + sal vec, 5 ;Multiply by 32 + add tmp, vec + movdqu xgft4_lo, [tmp3+tmp] ;Load array Dx{00}, Dx{01}, Dx{02}, ... + movdqu xgft4_hi, [tmp3+tmp+16] ; " Dx{00}, Dx{10}, Dx{20}, ... , Dx{f0} + + mov dest2, [dest1+PS] ; reuse mul_array + mov dest3, [dest1+2*PS] + mov dest4, [dest1+3*PS] ; reuse vec_i + mov dest1, [dest1] + +.loop16: + XLDR x0, [src+pos] ;Get next source vector + movdqu xtmph1, [tmp3+16] ; " Ax{00}, Ax{10}, Ax{20}, ... , Ax{f0} + movdqu xtmpl1, [tmp3] ;Load array Ax{00}, Ax{01}, Ax{02}, ... + movdqu xtmph2, [tmp3+vec+16] ; " Bx{00}, Bx{10}, Bx{20}, ... , Bx{f0} + movdqu xtmpl2, [tmp3+vec] ;Load array Bx{00}, Bx{01}, Bx{02}, ... + movdqu xtmpl3, [tmp3+2*vec] ;Load array Cx{00}, Cx{01}, Cx{02}, ... + + movdqa xtmph3, xgft3_hi + movdqa xtmpl4, xgft4_lo + movdqa xtmph4, xgft4_hi + + XLDR xd1, [dest1+pos] ;Get next dest vector + XLDR xd2, [dest2+pos] ;Get next dest vector + + movdqa xtmpa, x0 ;Keep unshifted copy of src + psraw x0, 4 ;Shift to put high nibble into bits 4-0 + pand x0, xmask0f ;Mask high src nibble in bits 4-0 + pand xtmpa, xmask0f ;Mask low src nibble in bits 4-0 + + ; dest1 + pshufb xtmph1, x0 ;Lookup mul table of high nibble + pshufb xtmpl1, xtmpa ;Lookup mul table of low nibble + pxor xtmph1, xtmpl1 ;GF add high and low partials + pxor xd1, xtmph1 + + XLDR xd3, [dest3+pos] ;Reuse xtmph1, Get next dest vector + XLDR xd4, [dest4+pos] ;Reuse xtmpl1, Get next dest vector + + ; dest2 + pshufb xtmph2, x0 ;Lookup mul table of high nibble + pshufb xtmpl2, xtmpa ;Lookup mul table of low nibble + pxor xtmph2, xtmpl2 ;GF add high and low partials + pxor xd2, xtmph2 + + ; dest3 + pshufb xtmph3, x0 ;Lookup mul table of high nibble + pshufb xtmpl3, xtmpa ;Lookup mul table of low nibble + pxor xtmph3, xtmpl3 ;GF add high and low partials + pxor xd3, xtmph3 + + ; dest4 + pshufb xtmph4, x0 ;Lookup mul table of high nibble + pshufb xtmpl4, xtmpa ;Lookup mul table of low nibble + pxor xtmph4, xtmpl4 ;GF add high and low partials + pxor xd4, xtmph4 + + XSTR [dest1+pos], xd1 ;Store result + XSTR [dest2+pos], xd2 ;Store result + XSTR [dest3+pos], xd3 ;Store result + XSTR [dest4+pos], xd4 ;Store result + + add pos, 16 ;Loop on 16 bytes at a time + cmp pos, len + jle .loop16 + + lea tmp, [len + 16] + cmp pos, tmp + je .return_pass + +.lessthan16: + ;; Tail len + ;; Do one more overlap pass + mov tmp, len ;Overlapped offset length-16 + + XLDR x0, [src+tmp] ;Get next source vector + + movdqu xtmph1, [tmp3+16] ; " Ax{00}, Ax{10}, Ax{20}, ... , Ax{f0} + movdqu xtmpl1, [tmp3] ;Load array Ax{00}, Ax{01}, Ax{02}, ... + movdqu xtmph2, [tmp3+vec+16] ; " Bx{00}, Bx{10}, Bx{20}, ... , Bx{f0} + movdqu xtmpl2, [tmp3+vec] ;Load array Bx{00}, Bx{01}, Bx{02}, ... + movdqu xtmpl3, [tmp3+2*vec] ;Load array Cx{00}, Cx{01}, Cx{02}, ... + + XLDR xd1, [dest1+tmp] ;Get next dest vector + XLDR xd2, [dest2+tmp] ;Get next dest vector + XLDR xtmph4, [dest3+tmp] ;Reuse xtmph1. Get next dest vector + + sub len, pos + + movdqa xtmpl4, [constip16] ;Load const of i + 16 + pinsrb xtmph3, len.w, 15 + pshufb xtmph3, xmask0f ;Broadcast len to all bytes + pcmpgtb xtmph3, xtmpl4 + + XLDR xtmpl4, [dest4+tmp] ;Get next dest vector + + movdqa xtmpa, x0 ;Keep unshifted copy of src + psraw x0, 4 ;Shift to put high nibble into bits 4-0 + pand x0, xmask0f ;Mask high src nibble in bits 4-0 + pand xtmpa, xmask0f ;Mask low src nibble in bits 4-0 + + ; dest1 + pshufb xtmph1, x0 ;Lookup mul table of high nibble + pshufb xtmpl1, xtmpa ;Lookup mul table of low nibble + pxor xtmph1, xtmpl1 ;GF add high and low partials + pand xtmph1, xtmph3 + pxor xd1, xtmph1 + + ; dest2 + pshufb xtmph2, x0 ;Lookup mul table of high nibble + pshufb xtmpl2, xtmpa ;Lookup mul table of low nibble + pxor xtmph2, xtmpl2 ;GF add high and low partials + pand xtmph2, xtmph3 + pxor xd2, xtmph2 + + ; dest3 + pshufb xgft3_hi, x0 ;Lookup mul table of high nibble + pshufb xtmpl3, xtmpa ;Lookup mul table of low nibble + pxor xgft3_hi, xtmpl3 ;GF add high and low partials + pand xgft3_hi, xtmph3 + pxor xtmph4, xgft3_hi + + ; dest4 + pshufb xgft4_hi, x0 ;Lookup mul table of high nibble + pshufb xgft4_lo, xtmpa ;Lookup mul table of low nibble + pxor xgft4_hi, xgft4_lo ;GF add high and low partials + pand xgft4_hi, xtmph3 + pxor xtmpl4, xgft4_hi + + XSTR [dest1+tmp], xd1 ;Store result + XSTR [dest2+tmp], xd2 ;Store result + XSTR [dest3+tmp], xtmph4 ;Store result + XSTR [dest4+tmp], xtmpl4 ;Store result + +.return_pass: + FUNC_RESTORE + mov return, 0 + ret + +.return_fail: + FUNC_RESTORE + mov return, 1 + ret + +endproc_frame + +section .data + +align 16 + +mask0f: + dq 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f +constip16: + dq 0xf8f9fafbfcfdfeff, 0xf0f1f2f3f4f5f6f7 + +;;; func core, ver, snum +slversion gf_4vect_mad_sse, 00, 01, 0209 diff --git a/src/spdk/isa-l/erasure_code/gf_5vect_dot_prod_avx.asm b/src/spdk/isa-l/erasure_code/gf_5vect_dot_prod_avx.asm new file mode 100644 index 000000000..eb1c15ed4 --- /dev/null +++ b/src/spdk/isa-l/erasure_code/gf_5vect_dot_prod_avx.asm @@ -0,0 +1,303 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2015 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;;; +;;; gf_5vect_dot_prod_avx(len, vec, *g_tbls, **buffs, **dests); +;;; + +%include "reg_sizes.asm" + +%ifidn __OUTPUT_FORMAT__, elf64 + %define arg0 rdi + %define arg1 rsi + %define arg2 rdx + %define arg3 rcx + %define arg4 r8 + %define arg5 r9 + + %define tmp r11 + %define tmp2 r10 + %define tmp3 r13 ; must be saved and restored + %define tmp4 r12 ; must be saved and restored + %define tmp5 r14 ; must be saved and restored + %define tmp6 r15 ; must be saved and restored + %define return rax + %define PS 8 + %define LOG_PS 3 + + %define func(x) x: + %macro FUNC_SAVE 0 + push r12 + push r13 + push r14 + push r15 + %endmacro + %macro FUNC_RESTORE 0 + pop r15 + pop r14 + pop r13 + pop r12 + %endmacro +%endif + +%ifidn __OUTPUT_FORMAT__, win64 + %define arg0 rcx + %define arg1 rdx + %define arg2 r8 + %define arg3 r9 + + %define arg4 r12 ; must be saved, loaded and restored + %define arg5 r15 ; must be saved and restored + %define tmp r11 + %define tmp2 r10 + %define tmp3 r13 ; must be saved and restored + %define tmp4 r14 ; must be saved and restored + %define tmp5 rdi ; must be saved and restored + %define tmp6 rsi ; must be saved and restored + %define return rax + %define PS 8 + %define LOG_PS 3 + %define stack_size 10*16 + 7*8 ; must be an odd multiple of 8 + %define arg(x) [rsp + stack_size + PS + PS*x] + + %define func(x) proc_frame x + %macro FUNC_SAVE 0 + alloc_stack stack_size + save_xmm128 xmm6, 0*16 + save_xmm128 xmm7, 1*16 + save_xmm128 xmm8, 2*16 + save_xmm128 xmm9, 3*16 + save_xmm128 xmm10, 4*16 + save_xmm128 xmm11, 5*16 + save_xmm128 xmm12, 6*16 + save_xmm128 xmm13, 7*16 + save_xmm128 xmm14, 8*16 + save_xmm128 xmm15, 9*16 + save_reg r12, 10*16 + 0*8 + save_reg r13, 10*16 + 1*8 + save_reg r14, 10*16 + 2*8 + save_reg r15, 10*16 + 3*8 + save_reg rdi, 10*16 + 4*8 + save_reg rsi, 10*16 + 5*8 + end_prolog + mov arg4, arg(4) + %endmacro + + %macro FUNC_RESTORE 0 + vmovdqa xmm6, [rsp + 0*16] + vmovdqa xmm7, [rsp + 1*16] + vmovdqa xmm8, [rsp + 2*16] + vmovdqa xmm9, [rsp + 3*16] + vmovdqa xmm10, [rsp + 4*16] + vmovdqa xmm11, [rsp + 5*16] + vmovdqa xmm12, [rsp + 6*16] + vmovdqa xmm13, [rsp + 7*16] + vmovdqa xmm14, [rsp + 8*16] + vmovdqa xmm15, [rsp + 9*16] + mov r12, [rsp + 10*16 + 0*8] + mov r13, [rsp + 10*16 + 1*8] + mov r14, [rsp + 10*16 + 2*8] + mov r15, [rsp + 10*16 + 3*8] + mov rdi, [rsp + 10*16 + 4*8] + mov rsi, [rsp + 10*16 + 5*8] + add rsp, stack_size + %endmacro +%endif + +%define len arg0 +%define vec arg1 +%define mul_array arg2 +%define src arg3 +%define dest arg4 +%define ptr arg5 +%define vec_i tmp2 +%define dest1 tmp3 +%define dest2 tmp4 +%define vskip1 tmp5 +%define vskip3 tmp6 +%define pos return + + +%ifndef EC_ALIGNED_ADDR +;;; Use Un-aligned load/store + %define XLDR vmovdqu + %define XSTR vmovdqu +%else +;;; Use Non-temporal load/stor + %ifdef NO_NT_LDST + %define XLDR vmovdqa + %define XSTR vmovdqa + %else + %define XLDR vmovntdqa + %define XSTR vmovntdq + %endif +%endif + +default rel + +[bits 64] +section .text + +%define xmask0f xmm15 +%define xgft1_lo xmm14 +%define xgft1_hi xmm13 +%define xgft2_lo xmm12 +%define xgft2_hi xmm11 +%define xgft3_lo xmm10 +%define xgft3_hi xmm9 +%define xgft4_lo xmm8 +%define xgft4_hi xmm7 + + +%define x0 xmm0 +%define xtmpa xmm1 +%define xp1 xmm2 +%define xp2 xmm3 +%define xp3 xmm4 +%define xp4 xmm5 +%define xp5 xmm6 + +align 16 +global gf_5vect_dot_prod_avx:ISAL_SYM_TYPE_FUNCTION +func(gf_5vect_dot_prod_avx) + FUNC_SAVE + sub len, 16 + jl .return_fail + xor pos, pos + vmovdqa xmask0f, [mask0f] ;Load mask of lower nibble in each byte + mov vskip1, vec + imul vskip1, 32 + mov vskip3, vec + imul vskip3, 96 + sal vec, LOG_PS ;vec *= PS. Make vec_i count by PS + mov dest1, [dest] + mov dest2, [dest+PS] + + +.loop16: + mov tmp, mul_array + xor vec_i, vec_i + vpxor xp1, xp1 + vpxor xp2, xp2 + vpxor xp3, xp3 + vpxor xp4, xp4 + vpxor xp5, xp5 + + +.next_vect: + mov ptr, [src+vec_i] + add vec_i, PS + XLDR x0, [ptr+pos] ;Get next source vector + + vmovdqu xgft1_lo, [tmp] ;Load array Ax{00}, Ax{01}, ..., Ax{0f} + vmovdqu xgft1_hi, [tmp+16] ; " Ax{00}, Ax{10}, ..., Ax{f0} + vmovdqu xgft2_lo, [tmp+vskip1*1] ;Load array Bx{00}, Bx{01}, ..., Bx{0f} + vmovdqu xgft2_hi, [tmp+vskip1*1+16] ; " Bx{00}, Bx{10}, ..., Bx{f0} + vmovdqu xgft3_lo, [tmp+vskip1*2] ;Load array Cx{00}, Cx{01}, ..., Cx{0f} + vmovdqu xgft3_hi, [tmp+vskip1*2+16] ; " Cx{00}, Cx{10}, ..., Cx{f0} + vmovdqu xgft4_lo, [tmp+vskip3] ;Load array Dx{00}, Dx{01}, ..., Dx{0f} + vmovdqu xgft4_hi, [tmp+vskip3+16] ; " Dx{00}, Dx{10}, ..., Dx{f0} + + vpand xtmpa, x0, xmask0f ;Mask low src nibble in bits 4-0 + vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0 + vpand x0, x0, xmask0f ;Mask high src nibble in bits 4-0 + + vpshufb xgft1_hi, x0 ;Lookup mul table of high nibble + vpshufb xgft1_lo, xtmpa ;Lookup mul table of low nibble + vpxor xgft1_hi, xgft1_lo ;GF add high and low partials + vpxor xp1, xgft1_hi ;xp1 += partial + + vpshufb xgft2_hi, x0 ;Lookup mul table of high nibble + vpshufb xgft2_lo, xtmpa ;Lookup mul table of low nibble + vpxor xgft2_hi, xgft2_lo ;GF add high and low partials + vpxor xp2, xgft2_hi ;xp2 += partial + + vmovdqu xgft1_lo, [tmp+vskip1*4] ;Load array Ex{00}, Ex{01}, ..., Ex{0f} + vmovdqu xgft1_hi, [tmp+vskip1*4+16] ; " Ex{00}, Ex{10}, ..., Ex{f0} + add tmp, 32 + + vpshufb xgft3_hi, x0 ;Lookup mul table of high nibble + vpshufb xgft3_lo, xtmpa ;Lookup mul table of low nibble + vpxor xgft3_hi, xgft3_lo ;GF add high and low partials + vpxor xp3, xgft3_hi ;xp3 += partial + + vpshufb xgft4_hi, x0 ;Lookup mul table of high nibble + vpshufb xgft4_lo, xtmpa ;Lookup mul table of low nibble + vpxor xgft4_hi, xgft4_lo ;GF add high and low partials + vpxor xp4, xgft4_hi ;xp4 += partial + + vpshufb xgft1_hi, x0 ;Lookup mul table of high nibble + vpshufb xgft1_lo, xtmpa ;Lookup mul table of low nibble + vpxor xgft1_hi, xgft1_lo ;GF add high and low partials + vpxor xp5, xgft1_hi ;xp5 += partial + + cmp vec_i, vec + jl .next_vect + + mov tmp, [dest+2*PS] + mov ptr, [dest+3*PS] + mov vec_i, [dest+4*PS] + + XSTR [dest1+pos], xp1 + XSTR [dest2+pos], xp2 + XSTR [tmp+pos], xp3 + XSTR [ptr+pos], xp4 + XSTR [vec_i+pos], xp5 + + add pos, 16 ;Loop on 16 bytes at a time + cmp pos, len + jle .loop16 + + lea tmp, [len + 16] + cmp pos, tmp + je .return_pass + + ;; Tail len + mov pos, len ;Overlapped offset length-16 + jmp .loop16 ;Do one more overlap pass + +.return_pass: + FUNC_RESTORE + mov return, 0 + ret + +.return_fail: + FUNC_RESTORE + mov return, 1 + ret + +endproc_frame + +section .data + +align 16 +mask0f: dq 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f + +;;; func core, ver, snum +slversion gf_5vect_dot_prod_avx, 02, 04, 0194 diff --git a/src/spdk/isa-l/erasure_code/gf_5vect_dot_prod_avx2.asm b/src/spdk/isa-l/erasure_code/gf_5vect_dot_prod_avx2.asm new file mode 100644 index 000000000..dfafd8ab5 --- /dev/null +++ b/src/spdk/isa-l/erasure_code/gf_5vect_dot_prod_avx2.asm @@ -0,0 +1,315 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2015 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;;; +;;; gf_5vect_dot_prod_avx2(len, vec, *g_tbls, **buffs, **dests); +;;; + +%include "reg_sizes.asm" + +%ifidn __OUTPUT_FORMAT__, elf64 + %define arg0 rdi + %define arg1 rsi + %define arg2 rdx + %define arg3 rcx + %define arg4 r8 + %define arg5 r9 + + %define tmp r11 + %define tmp.w r11d + %define tmp.b r11b + %define tmp2 r10 + %define tmp3 r13 ; must be saved and restored + %define tmp4 r12 ; must be saved and restored + %define tmp5 r14 ; must be saved and restored + %define tmp6 r15 ; must be saved and restored + %define return rax + %define PS 8 + %define LOG_PS 3 + + %define func(x) x: + %macro FUNC_SAVE 0 + push r12 + push r13 + push r14 + push r15 + %endmacro + %macro FUNC_RESTORE 0 + pop r15 + pop r14 + pop r13 + pop r12 + %endmacro +%endif + +%ifidn __OUTPUT_FORMAT__, win64 + %define arg0 rcx + %define arg1 rdx + %define arg2 r8 + %define arg3 r9 + + %define arg4 r12 ; must be saved, loaded and restored + %define arg5 r15 ; must be saved and restored + %define tmp r11 + %define tmp.w r11d + %define tmp.b r11b + %define tmp2 r10 + %define tmp3 r13 ; must be saved and restored + %define tmp4 r14 ; must be saved and restored + %define tmp5 rdi ; must be saved and restored + %define tmp6 rsi ; must be saved and restored + %define return rax + %define PS 8 + %define LOG_PS 3 + %define stack_size 10*16 + 7*8 ; must be an odd multiple of 8 + %define arg(x) [rsp + stack_size + PS + PS*x] + + %define func(x) proc_frame x + %macro FUNC_SAVE 0 + alloc_stack stack_size + vmovdqa [rsp + 0*16], xmm6 + vmovdqa [rsp + 1*16], xmm7 + vmovdqa [rsp + 2*16], xmm8 + vmovdqa [rsp + 3*16], xmm9 + vmovdqa [rsp + 4*16], xmm10 + vmovdqa [rsp + 5*16], xmm11 + vmovdqa [rsp + 6*16], xmm12 + vmovdqa [rsp + 7*16], xmm13 + vmovdqa [rsp + 8*16], xmm14 + vmovdqa [rsp + 9*16], xmm15 + save_reg r12, 10*16 + 0*8 + save_reg r13, 10*16 + 1*8 + save_reg r14, 10*16 + 2*8 + save_reg r15, 10*16 + 3*8 + save_reg rdi, 10*16 + 4*8 + save_reg rsi, 10*16 + 5*8 + end_prolog + mov arg4, arg(4) + %endmacro + + %macro FUNC_RESTORE 0 + vmovdqa xmm6, [rsp + 0*16] + vmovdqa xmm7, [rsp + 1*16] + vmovdqa xmm8, [rsp + 2*16] + vmovdqa xmm9, [rsp + 3*16] + vmovdqa xmm10, [rsp + 4*16] + vmovdqa xmm11, [rsp + 5*16] + vmovdqa xmm12, [rsp + 6*16] + vmovdqa xmm13, [rsp + 7*16] + vmovdqa xmm14, [rsp + 8*16] + vmovdqa xmm15, [rsp + 9*16] + mov r12, [rsp + 10*16 + 0*8] + mov r13, [rsp + 10*16 + 1*8] + mov r14, [rsp + 10*16 + 2*8] + mov r15, [rsp + 10*16 + 3*8] + mov rdi, [rsp + 10*16 + 4*8] + mov rsi, [rsp + 10*16 + 5*8] + add rsp, stack_size + %endmacro +%endif + +%define len arg0 +%define vec arg1 +%define mul_array arg2 +%define src arg3 +%define dest arg4 +%define ptr arg5 +%define vec_i tmp2 +%define dest1 tmp3 +%define dest2 tmp4 +%define vskip1 tmp5 +%define vskip3 tmp6 +%define pos return + + +%ifndef EC_ALIGNED_ADDR +;;; Use Un-aligned load/store + %define XLDR vmovdqu + %define XSTR vmovdqu +%else +;;; Use Non-temporal load/stor + %ifdef NO_NT_LDST + %define XLDR vmovdqa + %define XSTR vmovdqa + %else + %define XLDR vmovntdqa + %define XSTR vmovntdq + %endif +%endif + +default rel + +[bits 64] +section .text + +%define xmask0f ymm15 +%define xmask0fx xmm15 +%define xgft1_lo ymm14 +%define xgft1_hi ymm13 +%define xgft2_lo ymm12 +%define xgft2_hi ymm11 +%define xgft3_lo ymm10 +%define xgft3_hi ymm9 +%define xgft4_lo ymm8 +%define xgft4_hi ymm7 + + +%define x0 ymm0 +%define xtmpa ymm1 +%define xp1 ymm2 +%define xp2 ymm3 +%define xp3 ymm4 +%define xp4 ymm5 +%define xp5 ymm6 + +align 16 +global gf_5vect_dot_prod_avx2:ISAL_SYM_TYPE_FUNCTION +func(gf_5vect_dot_prod_avx2) + FUNC_SAVE + sub len, 32 + jl .return_fail + xor pos, pos + mov tmp.b, 0x0f + vpinsrb xmask0fx, xmask0fx, tmp.w, 0 + vpbroadcastb xmask0f, xmask0fx ;Construct mask 0x0f0f0f... + mov vskip1, vec + imul vskip1, 32 + mov vskip3, vec + imul vskip3, 96 + sal vec, LOG_PS ;vec *= PS. Make vec_i count by PS + mov dest1, [dest] + mov dest2, [dest+PS] + + +.loop32: + mov tmp, mul_array + xor vec_i, vec_i + vpxor xp1, xp1 + vpxor xp2, xp2 + vpxor xp3, xp3 + vpxor xp4, xp4 + vpxor xp5, xp5 + + +.next_vect: + mov ptr, [src+vec_i] + XLDR x0, [ptr+pos] ;Get next source vector + add vec_i, PS + + vpand xgft4_lo, x0, xmask0f ;Mask low src nibble in bits 4-0 + vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0 + vpand x0, x0, xmask0f ;Mask high src nibble in bits 4-0 + vperm2i128 xtmpa, xgft4_lo, x0, 0x30 ;swap xtmpa from 1lo|2lo to 1lo|2hi + vperm2i128 x0, xgft4_lo, x0, 0x12 ;swap x0 from 1hi|2hi to 1hi|2lo + + vmovdqu xgft1_lo, [tmp] ;Load array Ax{00}, Ax{01}, ..., Ax{0f} + ; " Ax{00}, Ax{10}, ..., Ax{f0} + vmovdqu xgft2_lo, [tmp+vskip1*1] ;Load array Bx{00}, Bx{01}, ..., Bx{0f} + ; " Bx{00}, Bx{10}, ..., Bx{f0} + vmovdqu xgft3_lo, [tmp+vskip1*2] ;Load array Cx{00}, Cx{01}, ..., Cx{0f} + ; " Cx{00}, Cx{10}, ..., Cx{f0} + vmovdqu xgft4_lo, [tmp+vskip3] ;Load array Dx{00}, Dx{01}, ..., Dx{0f} + ; " Dx{00}, Dx{10}, ..., Dx{f0} + + vperm2i128 xgft1_hi, xgft1_lo, xgft1_lo, 0x01 ; swapped to hi | lo + vperm2i128 xgft2_hi, xgft2_lo, xgft2_lo, 0x01 ; swapped to hi | lo + vperm2i128 xgft3_hi, xgft3_lo, xgft3_lo, 0x01 ; swapped to hi | lo + vperm2i128 xgft4_hi, xgft4_lo, xgft4_lo, 0x01 ; swapped to hi | lo + + vpshufb xgft1_hi, x0 ;Lookup mul table of high nibble + vpshufb xgft1_lo, xtmpa ;Lookup mul table of low nibble + vpxor xgft1_hi, xgft1_lo ;GF add high and low partials + vpxor xp1, xgft1_hi ;xp1 += partial + + vpshufb xgft2_hi, x0 ;Lookup mul table of high nibble + vpshufb xgft2_lo, xtmpa ;Lookup mul table of low nibble + vpxor xgft2_hi, xgft2_lo ;GF add high and low partials + vpxor xp2, xgft2_hi ;xp2 += partial + + vmovdqu xgft1_lo, [tmp+vskip1*4] ;Load array Ex{00}, Ex{01}, ..., Ex{0f} + ; " Ex{00}, Ex{10}, ..., Ex{f0} + vperm2i128 xgft1_hi, xgft1_lo, xgft1_lo, 0x01 ; swapped to hi | lo + add tmp, 32 + + vpshufb xgft3_hi, x0 ;Lookup mul table of high nibble + vpshufb xgft3_lo, xtmpa ;Lookup mul table of low nibble + vpxor xgft3_hi, xgft3_lo ;GF add high and low partials + vpxor xp3, xgft3_hi ;xp3 += partial + + vpshufb xgft4_hi, x0 ;Lookup mul table of high nibble + vpshufb xgft4_lo, xtmpa ;Lookup mul table of low nibble + vpxor xgft4_hi, xgft4_lo ;GF add high and low partials + vpxor xp4, xgft4_hi ;xp4 += partial + + vpshufb xgft1_hi, x0 ;Lookup mul table of high nibble + vpshufb xgft1_lo, xtmpa ;Lookup mul table of low nibble + vpxor xgft1_hi, xgft1_lo ;GF add high and low partials + vpxor xp5, xgft1_hi ;xp5 += partial + + cmp vec_i, vec + jl .next_vect + + mov tmp, [dest+2*PS] + mov ptr, [dest+3*PS] + mov vec_i, [dest+4*PS] + + XSTR [dest1+pos], xp1 + XSTR [dest2+pos], xp2 + XSTR [tmp+pos], xp3 + XSTR [ptr+pos], xp4 + XSTR [vec_i+pos], xp5 + + add pos, 32 ;Loop on 32 bytes at a time + cmp pos, len + jle .loop32 + + lea tmp, [len + 32] + cmp pos, tmp + je .return_pass + + ;; Tail len + mov pos, len ;Overlapped offset length-16 + jmp .loop32 ;Do one more overlap pass + +.return_pass: + FUNC_RESTORE + mov return, 0 + ret + +.return_fail: + FUNC_RESTORE + mov return, 1 + ret + +endproc_frame + +section .data + +;;; func core, ver, snum +slversion gf_5vect_dot_prod_avx2, 04, 04, 0199 diff --git a/src/spdk/isa-l/erasure_code/gf_5vect_dot_prod_sse.asm b/src/spdk/isa-l/erasure_code/gf_5vect_dot_prod_sse.asm new file mode 100644 index 000000000..59b0ac2bc --- /dev/null +++ b/src/spdk/isa-l/erasure_code/gf_5vect_dot_prod_sse.asm @@ -0,0 +1,304 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2015 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;;; +;;; gf_5vect_dot_prod_sse(len, vec, *g_tbls, **buffs, **dests); +;;; + +%include "reg_sizes.asm" + +%ifidn __OUTPUT_FORMAT__, elf64 + %define arg0 rdi + %define arg1 rsi + %define arg2 rdx + %define arg3 rcx + %define arg4 r8 + %define arg5 r9 + + %define tmp r11 + %define tmp2 r10 + %define tmp3 r13 ; must be saved and restored + %define tmp4 r12 ; must be saved and restored + %define tmp5 r14 ; must be saved and restored + %define tmp6 r15 ; must be saved and restored + %define return rax + %define PS 8 + %define LOG_PS 3 + + %define func(x) x: + %macro FUNC_SAVE 0 + push r12 + push r13 + push r14 + push r15 + %endmacro + %macro FUNC_RESTORE 0 + pop r15 + pop r14 + pop r13 + pop r12 + %endmacro +%endif + +%ifidn __OUTPUT_FORMAT__, win64 + %define arg0 rcx + %define arg1 rdx + %define arg2 r8 + %define arg3 r9 + + %define arg4 r12 ; must be saved, loaded and restored + %define arg5 r15 ; must be saved and restored + %define tmp r11 + %define tmp2 r10 + %define tmp3 r13 ; must be saved and restored + %define tmp4 r14 ; must be saved and restored + %define tmp5 rdi ; must be saved and restored + %define tmp6 rsi ; must be saved and restored + %define return rax + %define PS 8 + %define LOG_PS 3 + %define stack_size 10*16 + 7*8 ; must be an odd multiple of 8 + %define arg(x) [rsp + stack_size + PS + PS*x] + + %define func(x) proc_frame x + %macro FUNC_SAVE 0 + alloc_stack stack_size + save_xmm128 xmm6, 0*16 + save_xmm128 xmm7, 1*16 + save_xmm128 xmm8, 2*16 + save_xmm128 xmm9, 3*16 + save_xmm128 xmm10, 4*16 + save_xmm128 xmm11, 5*16 + save_xmm128 xmm12, 6*16 + save_xmm128 xmm13, 7*16 + save_xmm128 xmm14, 8*16 + save_xmm128 xmm15, 9*16 + save_reg r12, 10*16 + 0*8 + save_reg r13, 10*16 + 1*8 + save_reg r14, 10*16 + 2*8 + save_reg r15, 10*16 + 3*8 + save_reg rdi, 10*16 + 4*8 + save_reg rsi, 10*16 + 5*8 + end_prolog + mov arg4, arg(4) + %endmacro + + %macro FUNC_RESTORE 0 + movdqa xmm6, [rsp + 0*16] + movdqa xmm7, [rsp + 1*16] + movdqa xmm8, [rsp + 2*16] + movdqa xmm9, [rsp + 3*16] + movdqa xmm10, [rsp + 4*16] + movdqa xmm11, [rsp + 5*16] + movdqa xmm12, [rsp + 6*16] + movdqa xmm13, [rsp + 7*16] + movdqa xmm14, [rsp + 8*16] + movdqa xmm15, [rsp + 9*16] + mov r12, [rsp + 10*16 + 0*8] + mov r13, [rsp + 10*16 + 1*8] + mov r14, [rsp + 10*16 + 2*8] + mov r15, [rsp + 10*16 + 3*8] + mov rdi, [rsp + 10*16 + 4*8] + mov rsi, [rsp + 10*16 + 5*8] + add rsp, stack_size + %endmacro +%endif + +%define len arg0 +%define vec arg1 +%define mul_array arg2 +%define src arg3 +%define dest arg4 +%define ptr arg5 +%define vec_i tmp2 +%define dest1 tmp3 +%define dest2 tmp4 +%define vskip1 tmp5 +%define vskip3 tmp6 +%define pos return + + +%ifndef EC_ALIGNED_ADDR +;;; Use Un-aligned load/store + %define XLDR movdqu + %define XSTR movdqu +%else +;;; Use Non-temporal load/stor + %ifdef NO_NT_LDST + %define XLDR movdqa + %define XSTR movdqa + %else + %define XLDR movntdqa + %define XSTR movntdq + %endif +%endif + +default rel + +[bits 64] +section .text + +%define xmask0f xmm15 +%define xgft1_lo xmm2 +%define xgft1_hi xmm3 +%define xgft2_lo xmm4 +%define xgft2_hi xmm5 +%define xgft3_lo xmm10 +%define xgft3_hi xmm6 +%define xgft4_lo xmm8 +%define xgft4_hi xmm7 + + +%define x0 xmm0 +%define xtmpa xmm1 +%define xp1 xmm9 +%define xp2 xmm11 +%define xp3 xmm12 +%define xp4 xmm13 +%define xp5 xmm14 + +align 16 +global gf_5vect_dot_prod_sse:ISAL_SYM_TYPE_FUNCTION +func(gf_5vect_dot_prod_sse) + FUNC_SAVE + sub len, 16 + jl .return_fail + xor pos, pos + movdqa xmask0f, [mask0f] ;Load mask of lower nibble in each byte + mov vskip1, vec + imul vskip1, 32 + mov vskip3, vec + imul vskip3, 96 + sal vec, LOG_PS ;vec *= PS. Make vec_i count by PS + mov dest1, [dest] + mov dest2, [dest+PS] + + +.loop16: + mov tmp, mul_array + xor vec_i, vec_i + pxor xp1, xp1 + pxor xp2, xp2 + pxor xp3, xp3 + pxor xp4, xp4 + pxor xp5, xp5 + + +.next_vect: + mov ptr, [src+vec_i] + add vec_i, PS + XLDR x0, [ptr+pos] ;Get next source vector + + movdqu xgft1_lo, [tmp] ;Load array Ax{00}, Ax{01}, ..., Ax{0f} + movdqu xgft1_hi, [tmp+16] ; " Ax{00}, Ax{10}, ..., Ax{f0} + movdqu xgft2_lo, [tmp+vskip1*1] ;Load array Bx{00}, Bx{01}, ..., Bx{0f} + movdqu xgft2_hi, [tmp+vskip1*1+16] ; " Bx{00}, Bx{10}, ..., Bx{f0} + movdqu xgft3_lo, [tmp+vskip1*2] ;Load array Cx{00}, Cx{01}, ..., Cx{0f} + movdqu xgft3_hi, [tmp+vskip1*2+16] ; " Cx{00}, Cx{10}, ..., Cx{f0} + movdqu xgft4_lo, [tmp+vskip3] ;Load array Dx{00}, Dx{01}, ..., Dx{0f} + movdqu xgft4_hi, [tmp+vskip3+16] ; " Dx{00}, Dx{10}, ..., Dx{f0} + + movdqa xtmpa, x0 ;Keep unshifted copy of src + psraw x0, 4 ;Shift to put high nibble into bits 4-0 + pand x0, xmask0f ;Mask high src nibble in bits 4-0 + pand xtmpa, xmask0f ;Mask low src nibble in bits 4-0 + + pshufb xgft1_hi, x0 ;Lookup mul table of high nibble + pshufb xgft1_lo, xtmpa ;Lookup mul table of low nibble + pxor xgft1_hi, xgft1_lo ;GF add high and low partials + pxor xp1, xgft1_hi ;xp1 += partial + + pshufb xgft2_hi, x0 ;Lookup mul table of high nibble + pshufb xgft2_lo, xtmpa ;Lookup mul table of low nibble + pxor xgft2_hi, xgft2_lo ;GF add high and low partials + pxor xp2, xgft2_hi ;xp2 += partial + + movdqu xgft1_lo, [tmp+vskip1*4] ;Load array Ex{00}, Ex{01}, ..., Ex{0f} + movdqu xgft1_hi, [tmp+vskip1*4+16] ; " Ex{00}, Ex{10}, ..., Ex{f0} + add tmp, 32 + + pshufb xgft3_hi, x0 ;Lookup mul table of high nibble + pshufb xgft3_lo, xtmpa ;Lookup mul table of low nibble + pxor xgft3_hi, xgft3_lo ;GF add high and low partials + pxor xp3, xgft3_hi ;xp3 += partial + + pshufb xgft4_hi, x0 ;Lookup mul table of high nibble + pshufb xgft4_lo, xtmpa ;Lookup mul table of low nibble + pxor xgft4_hi, xgft4_lo ;GF add high and low partials + pxor xp4, xgft4_hi ;xp4 += partial + + pshufb xgft1_hi, x0 ;Lookup mul table of high nibble + pshufb xgft1_lo, xtmpa ;Lookup mul table of low nibble + pxor xgft1_hi, xgft1_lo ;GF add high and low partials + pxor xp5, xgft1_hi ;xp5 += partial + + cmp vec_i, vec + jl .next_vect + + mov tmp, [dest+2*PS] + mov ptr, [dest+3*PS] + mov vec_i, [dest+4*PS] + + XSTR [dest1+pos], xp1 + XSTR [dest2+pos], xp2 + XSTR [tmp+pos], xp3 + XSTR [ptr+pos], xp4 + XSTR [vec_i+pos], xp5 + + add pos, 16 ;Loop on 16 bytes at a time + cmp pos, len + jle .loop16 + + lea tmp, [len + 16] + cmp pos, tmp + je .return_pass + + ;; Tail len + mov pos, len ;Overlapped offset length-16 + jmp .loop16 ;Do one more overlap pass + +.return_pass: + FUNC_RESTORE + mov return, 0 + ret + +.return_fail: + FUNC_RESTORE + mov return, 1 + ret + +endproc_frame + +section .data + +align 16 +mask0f: dq 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f + +;;; func core, ver, snum +slversion gf_5vect_dot_prod_sse, 00, 05, 0065 diff --git a/src/spdk/isa-l/erasure_code/gf_5vect_dot_prod_sse_test.c b/src/spdk/isa-l/erasure_code/gf_5vect_dot_prod_sse_test.c new file mode 100644 index 000000000..977054cbe --- /dev/null +++ b/src/spdk/isa-l/erasure_code/gf_5vect_dot_prod_sse_test.c @@ -0,0 +1,805 @@ +/********************************************************************** + Copyright(c) 2011-2015 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#include +#include +#include // for memset, memcmp +#include "erasure_code.h" +#include "types.h" + +#ifndef FUNCTION_UNDER_TEST +# define FUNCTION_UNDER_TEST gf_5vect_dot_prod_sse +#endif +#ifndef TEST_MIN_SIZE +# define TEST_MIN_SIZE 16 +#endif + +#define str(s) #s +#define xstr(s) str(s) + +#define TEST_LEN 8192 +#define TEST_SIZE (TEST_LEN/2) +#define TEST_MEM TEST_SIZE +#define TEST_LOOPS 20000 +#define TEST_TYPE_STR "" + +#ifndef TEST_SOURCES +# define TEST_SOURCES 16 +#endif +#ifndef RANDOMS +# define RANDOMS 20 +#endif + +#ifdef EC_ALIGNED_ADDR +// Define power of 2 range to check ptr, len alignment +# define PTR_ALIGN_CHK_B 0 +# define LEN_ALIGN_CHK_B 0 // 0 for aligned only +#else +// Define power of 2 range to check ptr, len alignment +# define PTR_ALIGN_CHK_B 32 +# define LEN_ALIGN_CHK_B 32 // 0 for aligned only +#endif + +typedef unsigned char u8; + +void dump(unsigned char *buf, int len) +{ + int i; + for (i = 0; i < len;) { + printf(" %2x", 0xff & buf[i++]); + if (i % 32 == 0) + printf("\n"); + } + printf("\n"); +} + +void dump_matrix(unsigned char **s, int k, int m) +{ + int i, j; + for (i = 0; i < k; i++) { + for (j = 0; j < m; j++) { + printf(" %2x", s[i][j]); + } + printf("\n"); + } + printf("\n"); +} + +void dump_u8xu8(unsigned char *s, int k, int m) +{ + int i, j; + for (i = 0; i < k; i++) { + for (j = 0; j < m; j++) { + printf(" %2x", 0xff & s[j + (i * m)]); + } + printf("\n"); + } + printf("\n"); +} + +int main(int argc, char *argv[]) +{ + int i, j, rtest, srcs; + void *buf; + u8 g1[TEST_SOURCES], g2[TEST_SOURCES], g3[TEST_SOURCES]; + u8 g4[TEST_SOURCES], g5[TEST_SOURCES], *g_tbls; + u8 *dest1, *dest2, *dest3, *dest4, *dest5, *buffs[TEST_SOURCES]; + u8 *dest_ref1, *dest_ref2, *dest_ref3, *dest_ref4, *dest_ref5; + u8 *dest_ptrs[5]; + + int align, size; + unsigned char *efence_buffs[TEST_SOURCES]; + unsigned int offset; + u8 *ubuffs[TEST_SOURCES]; + u8 *udest_ptrs[5]; + printf(xstr(FUNCTION_UNDER_TEST) ": %dx%d ", TEST_SOURCES, TEST_LEN); + + // Allocate the arrays + for (i = 0; i < TEST_SOURCES; i++) { + if (posix_memalign(&buf, 64, TEST_LEN)) { + printf("alloc error: Fail"); + return -1; + } + buffs[i] = buf; + } + + if (posix_memalign(&buf, 16, 2 * (6 * TEST_SOURCES * 32))) { + printf("alloc error: Fail"); + return -1; + } + g_tbls = buf; + + if (posix_memalign(&buf, 64, TEST_LEN)) { + printf("alloc error: Fail"); + return -1; + } + dest1 = buf; + + if (posix_memalign(&buf, 64, TEST_LEN)) { + printf("alloc error: Fail"); + return -1; + } + dest2 = buf; + + if (posix_memalign(&buf, 64, TEST_LEN)) { + printf("alloc error: Fail"); + return -1; + } + dest3 = buf; + + if (posix_memalign(&buf, 64, TEST_LEN)) { + printf("alloc error: Fail"); + return -1; + } + dest4 = buf; + + if (posix_memalign(&buf, 64, TEST_LEN)) { + printf("alloc error: Fail"); + return -1; + } + dest5 = buf; + + if (posix_memalign(&buf, 64, TEST_LEN)) { + printf("alloc error: Fail"); + return -1; + } + dest_ref1 = buf; + + if (posix_memalign(&buf, 64, TEST_LEN)) { + printf("alloc error: Fail"); + return -1; + } + dest_ref2 = buf; + + if (posix_memalign(&buf, 64, TEST_LEN)) { + printf("alloc error: Fail"); + return -1; + } + dest_ref3 = buf; + + if (posix_memalign(&buf, 64, TEST_LEN)) { + printf("alloc error: Fail"); + return -1; + } + dest_ref4 = buf; + + if (posix_memalign(&buf, 64, TEST_LEN)) { + printf("alloc error: Fail"); + return -1; + } + dest_ref5 = buf; + + dest_ptrs[0] = dest1; + dest_ptrs[1] = dest2; + dest_ptrs[2] = dest3; + dest_ptrs[3] = dest4; + dest_ptrs[4] = dest5; + + // Test of all zeros + for (i = 0; i < TEST_SOURCES; i++) + memset(buffs[i], 0, TEST_LEN); + + memset(dest1, 0, TEST_LEN); + memset(dest2, 0, TEST_LEN); + memset(dest3, 0, TEST_LEN); + memset(dest4, 0, TEST_LEN); + memset(dest5, 0, TEST_LEN); + memset(dest_ref1, 0, TEST_LEN); + memset(dest_ref2, 0, TEST_LEN); + memset(dest_ref3, 0, TEST_LEN); + memset(dest_ref4, 0, TEST_LEN); + memset(dest_ref5, 0, TEST_LEN); + memset(g1, 2, TEST_SOURCES); + memset(g2, 1, TEST_SOURCES); + memset(g3, 7, TEST_SOURCES); + memset(g4, 9, TEST_SOURCES); + memset(g5, 4, TEST_SOURCES); + + for (i = 0; i < TEST_SOURCES; i++) { + gf_vect_mul_init(g1[i], &g_tbls[i * 32]); + gf_vect_mul_init(g2[i], &g_tbls[32 * TEST_SOURCES + i * 32]); + gf_vect_mul_init(g3[i], &g_tbls[64 * TEST_SOURCES + i * 32]); + gf_vect_mul_init(g4[i], &g_tbls[96 * TEST_SOURCES + i * 32]); + gf_vect_mul_init(g5[i], &g_tbls[128 * TEST_SOURCES + i * 32]); + } + + gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref1); + gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[32 * TEST_SOURCES], buffs, + dest_ref2); + gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[64 * TEST_SOURCES], buffs, + dest_ref3); + gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[96 * TEST_SOURCES], buffs, + dest_ref4); + gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[128 * TEST_SOURCES], buffs, + dest_ref5); + + FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest_ptrs); + + if (0 != memcmp(dest_ref1, dest1, TEST_LEN)) { + printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test1\n"); + dump_matrix(buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref1, 25); + printf("dprod_dut:"); + dump(dest1, 25); + return -1; + } + if (0 != memcmp(dest_ref2, dest2, TEST_LEN)) { + printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test2\n"); + dump_matrix(buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref2, 25); + printf("dprod_dut:"); + dump(dest2, 25); + return -1; + } + if (0 != memcmp(dest_ref3, dest3, TEST_LEN)) { + printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test3\n"); + dump_matrix(buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref3, 25); + printf("dprod_dut:"); + dump(dest3, 25); + return -1; + } + if (0 != memcmp(dest_ref4, dest4, TEST_LEN)) { + printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test4\n"); + dump_matrix(buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref4, 25); + printf("dprod_dut:"); + dump(dest4, 25); + return -1; + } + if (0 != memcmp(dest_ref5, dest5, TEST_LEN)) { + printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test5\n"); + dump_matrix(buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref5, 25); + printf("dprod_dut:"); + dump(dest5, 25); + return -1; + } + putchar('.'); + + // Rand data test + + for (rtest = 0; rtest < RANDOMS; rtest++) { + for (i = 0; i < TEST_SOURCES; i++) + for (j = 0; j < TEST_LEN; j++) + buffs[i][j] = rand(); + + for (i = 0; i < TEST_SOURCES; i++) { + g1[i] = rand(); + g2[i] = rand(); + g3[i] = rand(); + g4[i] = rand(); + g5[i] = rand(); + } + + for (i = 0; i < TEST_SOURCES; i++) { + gf_vect_mul_init(g1[i], &g_tbls[i * 32]); + gf_vect_mul_init(g2[i], &g_tbls[(32 * TEST_SOURCES) + (i * 32)]); + gf_vect_mul_init(g3[i], &g_tbls[(64 * TEST_SOURCES) + (i * 32)]); + gf_vect_mul_init(g4[i], &g_tbls[(96 * TEST_SOURCES) + (i * 32)]); + gf_vect_mul_init(g5[i], &g_tbls[(128 * TEST_SOURCES) + (i * 32)]); + } + + gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref1); + gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[32 * TEST_SOURCES], + buffs, dest_ref2); + gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[64 * TEST_SOURCES], + buffs, dest_ref3); + gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[96 * TEST_SOURCES], + buffs, dest_ref4); + gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[128 * TEST_SOURCES], + buffs, dest_ref5); + + FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest_ptrs); + + if (0 != memcmp(dest_ref1, dest1, TEST_LEN)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test1 %d\n", rtest); + dump_matrix(buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref1, 25); + printf("dprod_dut:"); + dump(dest1, 25); + return -1; + } + if (0 != memcmp(dest_ref2, dest2, TEST_LEN)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test2 %d\n", rtest); + dump_matrix(buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref2, 25); + printf("dprod_dut:"); + dump(dest2, 25); + return -1; + } + if (0 != memcmp(dest_ref3, dest3, TEST_LEN)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test3 %d\n", rtest); + dump_matrix(buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref3, 25); + printf("dprod_dut:"); + dump(dest3, 25); + return -1; + } + if (0 != memcmp(dest_ref4, dest4, TEST_LEN)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test4 %d\n", rtest); + dump_matrix(buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref4, 25); + printf("dprod_dut:"); + dump(dest4, 25); + return -1; + } + if (0 != memcmp(dest_ref5, dest5, TEST_LEN)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test5 %d\n", rtest); + dump_matrix(buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref5, 25); + printf("dprod_dut:"); + dump(dest5, 25); + return -1; + } + + putchar('.'); + } + + // Rand data test with varied parameters + for (rtest = 0; rtest < RANDOMS; rtest++) { + for (srcs = TEST_SOURCES; srcs > 0; srcs--) { + for (i = 0; i < srcs; i++) + for (j = 0; j < TEST_LEN; j++) + buffs[i][j] = rand(); + + for (i = 0; i < srcs; i++) { + g1[i] = rand(); + g2[i] = rand(); + g3[i] = rand(); + g4[i] = rand(); + g5[i] = rand(); + } + + for (i = 0; i < srcs; i++) { + gf_vect_mul_init(g1[i], &g_tbls[i * 32]); + gf_vect_mul_init(g2[i], &g_tbls[(32 * srcs) + (i * 32)]); + gf_vect_mul_init(g3[i], &g_tbls[(64 * srcs) + (i * 32)]); + gf_vect_mul_init(g4[i], &g_tbls[(96 * srcs) + (i * 32)]); + gf_vect_mul_init(g5[i], &g_tbls[(128 * srcs) + (i * 32)]); + } + + gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[0], buffs, dest_ref1); + gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[32 * srcs], buffs, + dest_ref2); + gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[64 * srcs], buffs, + dest_ref3); + gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[96 * srcs], buffs, + dest_ref4); + gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[128 * srcs], buffs, + dest_ref5); + + FUNCTION_UNDER_TEST(TEST_LEN, srcs, g_tbls, buffs, dest_ptrs); + + if (0 != memcmp(dest_ref1, dest1, TEST_LEN)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) + " test1 srcs=%d\n", srcs); + dump_matrix(buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref1, 25); + printf("dprod_dut:"); + dump(dest1, 25); + return -1; + } + if (0 != memcmp(dest_ref2, dest2, TEST_LEN)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) + " test2 srcs=%d\n", srcs); + dump_matrix(buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref2, 25); + printf("dprod_dut:"); + dump(dest2, 25); + return -1; + } + if (0 != memcmp(dest_ref3, dest3, TEST_LEN)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) + " test3 srcs=%d\n", srcs); + dump_matrix(buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref3, 25); + printf("dprod_dut:"); + dump(dest3, 25); + return -1; + } + if (0 != memcmp(dest_ref4, dest4, TEST_LEN)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) + " test4 srcs=%d\n", srcs); + dump_matrix(buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref4, 25); + printf("dprod_dut:"); + dump(dest4, 25); + return -1; + } + if (0 != memcmp(dest_ref5, dest5, TEST_LEN)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) + " test5 srcs=%d\n", srcs); + dump_matrix(buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref5, 25); + printf("dprod_dut:"); + dump(dest5, 25); + return -1; + } + + putchar('.'); + } + } + + // Run tests at end of buffer for Electric Fence + align = (LEN_ALIGN_CHK_B != 0) ? 1 : 16; + for (size = TEST_MIN_SIZE; size <= TEST_SIZE; size += align) { + for (i = 0; i < TEST_SOURCES; i++) + for (j = 0; j < TEST_LEN; j++) + buffs[i][j] = rand(); + + for (i = 0; i < TEST_SOURCES; i++) // Line up TEST_SIZE from end + efence_buffs[i] = buffs[i] + TEST_LEN - size; + + for (i = 0; i < TEST_SOURCES; i++) { + g1[i] = rand(); + g2[i] = rand(); + g3[i] = rand(); + g4[i] = rand(); + g5[i] = rand(); + } + + for (i = 0; i < TEST_SOURCES; i++) { + gf_vect_mul_init(g1[i], &g_tbls[i * 32]); + gf_vect_mul_init(g2[i], &g_tbls[(32 * TEST_SOURCES) + (i * 32)]); + gf_vect_mul_init(g3[i], &g_tbls[(64 * TEST_SOURCES) + (i * 32)]); + gf_vect_mul_init(g4[i], &g_tbls[(96 * TEST_SOURCES) + (i * 32)]); + gf_vect_mul_init(g5[i], &g_tbls[(128 * TEST_SOURCES) + (i * 32)]); + } + + gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[0], efence_buffs, dest_ref1); + gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[32 * TEST_SOURCES], + efence_buffs, dest_ref2); + gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[64 * TEST_SOURCES], + efence_buffs, dest_ref3); + gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[96 * TEST_SOURCES], + efence_buffs, dest_ref4); + gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[128 * TEST_SOURCES], + efence_buffs, dest_ref5); + + FUNCTION_UNDER_TEST(size, TEST_SOURCES, g_tbls, efence_buffs, dest_ptrs); + + if (0 != memcmp(dest_ref1, dest1, size)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test1 %d\n", rtest); + dump_matrix(efence_buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref1, align); + printf("dprod_dut:"); + dump(dest1, align); + return -1; + } + + if (0 != memcmp(dest_ref2, dest2, size)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test2 %d\n", rtest); + dump_matrix(efence_buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref2, align); + printf("dprod_dut:"); + dump(dest2, align); + return -1; + } + + if (0 != memcmp(dest_ref3, dest3, size)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test3 %d\n", rtest); + dump_matrix(efence_buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref3, align); + printf("dprod_dut:"); + dump(dest3, align); + return -1; + } + + if (0 != memcmp(dest_ref4, dest4, size)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test4 %d\n", rtest); + dump_matrix(efence_buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref4, align); + printf("dprod_dut:"); + dump(dest4, align); + return -1; + } + + if (0 != memcmp(dest_ref5, dest5, size)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test5 %d\n", rtest); + dump_matrix(efence_buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref5, align); + printf("dprod_dut:"); + dump(dest5, align); + return -1; + } + + putchar('.'); + } + + // Test rand ptr alignment if available + + for (rtest = 0; rtest < RANDOMS; rtest++) { + size = (TEST_LEN - PTR_ALIGN_CHK_B) & ~(TEST_MIN_SIZE - 1); + srcs = rand() % TEST_SOURCES; + if (srcs == 0) + continue; + + offset = (PTR_ALIGN_CHK_B != 0) ? 1 : PTR_ALIGN_CHK_B; + // Add random offsets + for (i = 0; i < srcs; i++) + ubuffs[i] = buffs[i] + (rand() & (PTR_ALIGN_CHK_B - offset)); + + udest_ptrs[0] = dest1 + (rand() & (PTR_ALIGN_CHK_B - offset)); + udest_ptrs[1] = dest2 + (rand() & (PTR_ALIGN_CHK_B - offset)); + udest_ptrs[2] = dest3 + (rand() & (PTR_ALIGN_CHK_B - offset)); + udest_ptrs[3] = dest4 + (rand() & (PTR_ALIGN_CHK_B - offset)); + udest_ptrs[4] = dest5 + (rand() & (PTR_ALIGN_CHK_B - offset)); + + memset(dest1, 0, TEST_LEN); // zero pad to check write-over + memset(dest2, 0, TEST_LEN); + memset(dest3, 0, TEST_LEN); + memset(dest4, 0, TEST_LEN); + memset(dest5, 0, TEST_LEN); + + for (i = 0; i < srcs; i++) + for (j = 0; j < size; j++) + ubuffs[i][j] = rand(); + + for (i = 0; i < srcs; i++) { + g1[i] = rand(); + g2[i] = rand(); + g3[i] = rand(); + g4[i] = rand(); + g5[i] = rand(); + } + + for (i = 0; i < srcs; i++) { + gf_vect_mul_init(g1[i], &g_tbls[i * 32]); + gf_vect_mul_init(g2[i], &g_tbls[(32 * srcs) + (i * 32)]); + gf_vect_mul_init(g3[i], &g_tbls[(64 * srcs) + (i * 32)]); + gf_vect_mul_init(g4[i], &g_tbls[(96 * srcs) + (i * 32)]); + gf_vect_mul_init(g5[i], &g_tbls[(128 * srcs) + (i * 32)]); + } + + gf_vect_dot_prod_base(size, srcs, &g_tbls[0], ubuffs, dest_ref1); + gf_vect_dot_prod_base(size, srcs, &g_tbls[32 * srcs], ubuffs, dest_ref2); + gf_vect_dot_prod_base(size, srcs, &g_tbls[64 * srcs], ubuffs, dest_ref3); + gf_vect_dot_prod_base(size, srcs, &g_tbls[96 * srcs], ubuffs, dest_ref4); + gf_vect_dot_prod_base(size, srcs, &g_tbls[128 * srcs], ubuffs, dest_ref5); + + FUNCTION_UNDER_TEST(size, srcs, g_tbls, ubuffs, udest_ptrs); + + if (memcmp(dest_ref1, udest_ptrs[0], size)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign srcs=%d\n", + srcs); + dump_matrix(ubuffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref1, 25); + printf("dprod_dut:"); + dump(udest_ptrs[0], 25); + return -1; + } + if (memcmp(dest_ref2, udest_ptrs[1], size)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign srcs=%d\n", + srcs); + dump_matrix(ubuffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref2, 25); + printf("dprod_dut:"); + dump(udest_ptrs[1], 25); + return -1; + } + if (memcmp(dest_ref3, udest_ptrs[2], size)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign srcs=%d\n", + srcs); + dump_matrix(ubuffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref3, 25); + printf("dprod_dut:"); + dump(udest_ptrs[2], 25); + return -1; + } + if (memcmp(dest_ref4, udest_ptrs[3], size)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign srcs=%d\n", + srcs); + dump_matrix(ubuffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref4, 25); + printf("dprod_dut:"); + dump(udest_ptrs[3], 25); + return -1; + } + if (memcmp(dest_ref5, udest_ptrs[4], size)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign srcs=%d\n", + srcs); + dump_matrix(ubuffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref5, 25); + printf("dprod_dut:"); + dump(udest_ptrs[4], 25); + return -1; + } + // Confirm that padding around dests is unchanged + memset(dest_ref1, 0, PTR_ALIGN_CHK_B); // Make reference zero buff + offset = udest_ptrs[0] - dest1; + + if (memcmp(dest1, dest_ref1, offset)) { + printf("Fail rand ualign pad1 start\n"); + return -1; + } + if (memcmp(dest1 + offset + size, dest_ref1, PTR_ALIGN_CHK_B - offset)) { + printf("Fail rand ualign pad1 end\n"); + return -1; + } + + offset = udest_ptrs[1] - dest2; + if (memcmp(dest2, dest_ref1, offset)) { + printf("Fail rand ualign pad2 start\n"); + return -1; + } + if (memcmp(dest2 + offset + size, dest_ref1, PTR_ALIGN_CHK_B - offset)) { + printf("Fail rand ualign pad2 end\n"); + return -1; + } + + offset = udest_ptrs[2] - dest3; + if (memcmp(dest3, dest_ref1, offset)) { + printf("Fail rand ualign pad3 start\n"); + return -1; + } + if (memcmp(dest3 + offset + size, dest_ref1, PTR_ALIGN_CHK_B - offset)) { + printf("Fail rand ualign pad3 end\n"); + return -1; + } + + offset = udest_ptrs[3] - dest4; + if (memcmp(dest4, dest_ref1, offset)) { + printf("Fail rand ualign pad4 start\n"); + return -1; + } + if (memcmp(dest4 + offset + size, dest_ref1, PTR_ALIGN_CHK_B - offset)) { + printf("Fail rand ualign pad4 end\n"); + return -1; + } + + offset = udest_ptrs[4] - dest5; + if (memcmp(dest5, dest_ref1, offset)) { + printf("Fail rand ualign pad5 start\n"); + return -1; + } + if (memcmp(dest5 + offset + size, dest_ref1, PTR_ALIGN_CHK_B - offset)) { + printf("Fail rand ualign pad5 end\n"); + return -1; + } + + putchar('.'); + } + + // Test all size alignment + align = (LEN_ALIGN_CHK_B != 0) ? 1 : 16; + + for (size = TEST_LEN; size >= TEST_MIN_SIZE; size -= align) { + srcs = TEST_SOURCES; + + for (i = 0; i < srcs; i++) + for (j = 0; j < size; j++) + buffs[i][j] = rand(); + + for (i = 0; i < srcs; i++) { + g1[i] = rand(); + g2[i] = rand(); + g3[i] = rand(); + g4[i] = rand(); + g5[i] = rand(); + } + + for (i = 0; i < srcs; i++) { + gf_vect_mul_init(g1[i], &g_tbls[i * 32]); + gf_vect_mul_init(g2[i], &g_tbls[(32 * srcs) + (i * 32)]); + gf_vect_mul_init(g3[i], &g_tbls[(64 * srcs) + (i * 32)]); + gf_vect_mul_init(g4[i], &g_tbls[(96 * srcs) + (i * 32)]); + gf_vect_mul_init(g5[i], &g_tbls[(128 * srcs) + (i * 32)]); + } + + gf_vect_dot_prod_base(size, srcs, &g_tbls[0], buffs, dest_ref1); + gf_vect_dot_prod_base(size, srcs, &g_tbls[32 * srcs], buffs, dest_ref2); + gf_vect_dot_prod_base(size, srcs, &g_tbls[64 * srcs], buffs, dest_ref3); + gf_vect_dot_prod_base(size, srcs, &g_tbls[96 * srcs], buffs, dest_ref4); + gf_vect_dot_prod_base(size, srcs, &g_tbls[128 * srcs], buffs, dest_ref5); + + FUNCTION_UNDER_TEST(size, srcs, g_tbls, buffs, dest_ptrs); + + if (memcmp(dest_ref1, dest_ptrs[0], size)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign len=%d\n", + size); + dump_matrix(buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref1, 25); + printf("dprod_dut:"); + dump(dest_ptrs[0], 25); + + return -1; + } + if (memcmp(dest_ref2, dest_ptrs[1], size)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign len=%d\n", + size); + dump_matrix(buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref2, 25); + printf("dprod_dut:"); + dump(dest_ptrs[1], 25); + return -1; + } + if (memcmp(dest_ref3, dest_ptrs[2], size)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign len=%d\n", + size); + dump_matrix(buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref3, 25); + printf("dprod_dut:"); + dump(dest_ptrs[2], 25); + return -1; + } + if (memcmp(dest_ref4, dest_ptrs[3], size)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign len=%d\n", + size); + dump_matrix(buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref4, 25); + printf("dprod_dut:"); + dump(dest_ptrs[3], 25); + return -1; + } + if (memcmp(dest_ref5, dest_ptrs[4], size)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign len=%d\n", + size); + dump_matrix(buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref5, 25); + printf("dprod_dut:"); + dump(dest_ptrs[4], 25); + return -1; + } + } + + printf("Pass\n"); + return 0; + +} diff --git a/src/spdk/isa-l/erasure_code/gf_5vect_mad_avx.asm b/src/spdk/isa-l/erasure_code/gf_5vect_mad_avx.asm new file mode 100644 index 000000000..696b6a0dc --- /dev/null +++ b/src/spdk/isa-l/erasure_code/gf_5vect_mad_avx.asm @@ -0,0 +1,365 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2015 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;;; +;;; gf_5vect_mad_avx(len, vec, vec_i, mul_array, src, dest); +;;; + +%include "reg_sizes.asm" + +%define PS 8 + +%ifidn __OUTPUT_FORMAT__, win64 + %define arg0 rcx + %define arg0.w ecx + %define arg1 rdx + %define arg2 r8 + %define arg3 r9 + %define arg4 r12 + %define arg5 r15 + %define tmp r11 + %define tmp2 r10 + %define tmp3 r13 + %define tmp4 r14 + %define return rax + %define return.w eax + %define stack_size 16*10 + 5*8 + %define arg(x) [rsp + stack_size + PS + PS*x] + %define func(x) proc_frame x + +%macro FUNC_SAVE 0 + sub rsp, stack_size + movdqa [rsp+16*0],xmm6 + movdqa [rsp+16*1],xmm7 + movdqa [rsp+16*2],xmm8 + movdqa [rsp+16*3],xmm9 + movdqa [rsp+16*4],xmm10 + movdqa [rsp+16*5],xmm11 + movdqa [rsp+16*6],xmm12 + movdqa [rsp+16*7],xmm13 + movdqa [rsp+16*8],xmm14 + movdqa [rsp+16*9],xmm15 + save_reg r12, 10*16 + 0*8 + save_reg r13, 10*16 + 1*8 + save_reg r14, 10*16 + 2*8 + save_reg r15, 10*16 + 3*8 + end_prolog + mov arg4, arg(4) + mov arg5, arg(5) +%endmacro + +%macro FUNC_RESTORE 0 + movdqa xmm6, [rsp+16*0] + movdqa xmm7, [rsp+16*1] + movdqa xmm8, [rsp+16*2] + movdqa xmm9, [rsp+16*3] + movdqa xmm10, [rsp+16*4] + movdqa xmm11, [rsp+16*5] + movdqa xmm12, [rsp+16*6] + movdqa xmm13, [rsp+16*7] + movdqa xmm14, [rsp+16*8] + movdqa xmm15, [rsp+16*9] + mov r12, [rsp + 10*16 + 0*8] + mov r13, [rsp + 10*16 + 1*8] + mov r14, [rsp + 10*16 + 2*8] + mov r15, [rsp + 10*16 + 3*8] + add rsp, stack_size +%endmacro + +%elifidn __OUTPUT_FORMAT__, elf64 + %define arg0 rdi + %define arg0.w edi + %define arg1 rsi + %define arg2 rdx + %define arg3 rcx + %define arg4 r8 + %define arg5 r9 + %define tmp r11 + %define tmp2 r10 + %define tmp3 r12 + %define tmp4 r13 + %define return rax + %define return.w eax + + %define func(x) x: + %macro FUNC_SAVE 0 + push r12 + push r13 + %endmacro + %macro FUNC_RESTORE 0 + pop r13 + pop r12 + %endmacro +%endif + +;;; gf_5vect_mad_avx(len, vec, vec_i, mul_array, src, dest) +%define len arg0 +%define len.w arg0.w +%define vec arg1 +%define vec_i arg2 +%define mul_array arg3 +%define src arg4 +%define dest1 arg5 +%define pos return +%define pos.w return.w + +%define dest2 tmp4 +%define dest3 mul_array +%define dest4 tmp2 +%define dest5 vec_i + + +%ifndef EC_ALIGNED_ADDR +;;; Use Un-aligned load/store + %define XLDR vmovdqu + %define XSTR vmovdqu +%else +;;; Use Non-temporal load/stor + %ifdef NO_NT_LDST + %define XLDR vmovdqa + %define XSTR vmovdqa + %else + %define XLDR vmovntdqa + %define XSTR vmovntdq + %endif +%endif + +default rel + +[bits 64] +section .text + +%define xmask0f xmm15 +%define xgft5_hi xmm14 +%define xgft4_lo xmm13 +%define xgft4_hi xmm12 + +%define x0 xmm0 +%define xtmpa xmm1 +%define xtmph1 xmm2 +%define xtmpl1 xmm3 +%define xtmph2 xmm4 +%define xtmpl2 xmm5 +%define xtmph3 xmm6 +%define xtmpl3 xmm7 +%define xtmph5 xmm8 +%define xtmpl5 xmm9 +%define xd1 xmm10 +%define xd2 xmm11 +%define xd3 xtmpl1 +%define xd4 xtmph1 +%define xd5 xtmpl2 + + +align 16 +global gf_5vect_mad_avx:ISAL_SYM_TYPE_FUNCTION +func(gf_5vect_mad_avx) + FUNC_SAVE + sub len, 16 + jl .return_fail + xor pos, pos + vmovdqa xmask0f, [mask0f] ;Load mask of lower nibble in each byte + mov tmp, vec + sal vec_i, 5 ;Multiply by 32 + lea tmp3, [mul_array + vec_i] + sal tmp, 6 ;Multiply by 64 + vmovdqu xgft5_hi, [tmp3+2*tmp+16] ; " Ex{00}, Ex{10}, ..., Ex{f0} + sal vec, 5 ;Multiply by 32 + add tmp, vec + vmovdqu xgft4_hi, [tmp3+tmp+16] ; " Dx{00}, Dx{10}, Dx{20}, ... , Dx{f0} + vmovdqu xgft4_lo, [tmp3+tmp] ;Load array Dx{00}, Dx{01}, Dx{02}, ... + + mov dest3, [dest1+2*PS] ; reuse mul_array + mov dest4, [dest1+3*PS] + mov dest5, [dest1+4*PS] ; reuse vec_i + mov dest2, [dest1+PS] + mov dest1, [dest1] + +.loop16: + XLDR x0, [src+pos] ;Get next source vector + + vmovdqu xtmph1, [tmp3+16] ; " Ax{00}, Ax{10}, Ax{20}, ... , Ax{f0} + vmovdqu xtmpl1, [tmp3] ;Load array Ax{00}, Ax{01}, Ax{02}, ... + vmovdqu xtmph2, [tmp3+vec+16] ; " Bx{00}, Bx{10}, Bx{20}, ... , Bx{f0} + vmovdqu xtmpl2, [tmp3+vec] ;Load array Bx{00}, Bx{01}, Bx{02}, ... + vmovdqu xtmph3, [tmp3+2*vec+16] ; " Cx{00}, Cx{10}, Cx{20}, ... , Cx{f0} + vmovdqu xtmpl3, [tmp3+2*vec] ;Load array Cx{00}, Cx{01}, Cx{02}, ... + vmovdqu xtmpl5, [tmp3+4*vec] ;Load array Ex{00}, Ex{01}, ..., Ex{0f} + + XLDR xd1, [dest1+pos] ;Get next dest vector + XLDR xd2, [dest2+pos] ;Get next dest vector + + vpand xtmpa, x0, xmask0f ;Mask low src nibble in bits 4-0 + vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0 + vpand x0, x0, xmask0f ;Mask high src nibble in bits 4-0 + + ; dest1 + vpshufb xtmph1, xtmph1, x0 ;Lookup mul table of high nibble + vpshufb xtmpl1, xtmpl1, xtmpa ;Lookup mul table of low nibble + vpxor xtmph1, xtmph1, xtmpl1 ;GF add high and low partials + vpxor xd1, xd1, xtmph1 + + XLDR xd3, [dest3+pos] ;Reuse xtmpl1, Get next dest vector + XLDR xd4, [dest4+pos] ;Reuse xtmph1, Get next dest vector + + ; dest2 + vpshufb xtmph2, xtmph2, x0 ;Lookup mul table of high nibble + vpshufb xtmpl2, xtmpl2, xtmpa ;Lookup mul table of low nibble + vpxor xtmph2, xtmph2, xtmpl2 ;GF add high and low partials + vpxor xd2, xd2, xtmph2 + + XLDR xd5, [dest5+pos] ;Reuse xtmpl2. Get next dest vector + + ; dest3 + vpshufb xtmph3, xtmph3, x0 ;Lookup mul table of high nibble + vpshufb xtmpl3, xtmpl3, xtmpa ;Lookup mul table of low nibble + vpxor xtmph3, xtmph3, xtmpl3 ;GF add high and low partials + vpxor xd3, xd3, xtmph3 + + ; dest4 + vpshufb xtmph2, xgft4_hi, x0 ;Lookup mul table of high nibble + vpshufb xtmpl3, xgft4_lo, xtmpa ;Lookup mul table of low nibble + vpxor xtmph2, xtmph2, xtmpl3 ;GF add high and low partials + vpxor xd4, xd4, xtmph2 + + ; dest5 + vpshufb xtmph5, xgft5_hi, x0 ;Lookup mul table of high nibble + vpshufb xtmpl5, xtmpl5, xtmpa ;Lookup mul table of low nibble + vpxor xtmph5, xtmph5, xtmpl5 ;GF add high and low partials + vpxor xd5, xd5, xtmph5 + + XSTR [dest1+pos], xd1 ;Store result into dest1 + XSTR [dest2+pos], xd2 ;Store result into dest2 + XSTR [dest3+pos], xd3 ;Store result into dest3 + XSTR [dest4+pos], xd4 ;Store result into dest4 + XSTR [dest5+pos], xd5 ;Store result into dest5 + + add pos, 16 ;Loop on 16 bytes at a time + cmp pos, len + jle .loop16 + + lea tmp, [len + 16] + cmp pos, tmp + je .return_pass + +.lessthan16: + ;; Tail len + ;; Do one more overlap pass + mov tmp, len ;Overlapped offset length-16 + XLDR x0, [src+tmp] ;Get next source vector + + sub len, pos + + vmovdqa xtmph1, [constip16] ;Load const of i + 16 + vpinsrb xtmph5, len.w, 15 + vpshufb xtmph5, xmask0f ;Broadcast len to all bytes + vpcmpgtb xtmph5, xtmph5, xtmph1 + + vmovdqu xtmph1, [tmp3+16] ; " Ax{00}, Ax{10}, Ax{20}, ... , Ax{f0} + vmovdqu xtmpl1, [tmp3] ;Load array Ax{00}, Ax{01}, Ax{02}, ... + vmovdqu xtmph2, [tmp3+vec+16] ; " Bx{00}, Bx{10}, Bx{20}, ... , Bx{f0} + vmovdqu xtmpl2, [tmp3+vec] ;Load array Bx{00}, Bx{01}, Bx{02}, ... + vmovdqu xtmph3, [tmp3+2*vec+16] ; " Cx{00}, Cx{10}, Cx{20}, ... , Cx{f0} + vmovdqu xtmpl3, [tmp3+2*vec] ;Load array Cx{00}, Cx{01}, Cx{02}, ... + vmovdqu xtmpl5, [tmp3+4*vec] ;Load array Ex{00}, Ex{01}, ..., Ex{0f} + + XLDR xd1, [dest1+tmp] ;Get next dest vector + XLDR xd2, [dest2+tmp] ;Get next dest vector + + vpand xtmpa, x0, xmask0f ;Mask low src nibble in bits 4-0 + vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0 + vpand x0, x0, xmask0f ;Mask high src nibble in bits 4-0 + + ; dest1 + vpshufb xtmph1, xtmph1, x0 ;Lookup mul table of high nibble + vpshufb xtmpl1, xtmpl1, xtmpa ;Lookup mul table of low nibble + vpxor xtmph1, xtmph1, xtmpl1 ;GF add high and low partials + vpand xtmph1, xtmph1, xtmph5 + vpxor xd1, xd1, xtmph1 + + XLDR xd3, [dest3+tmp] ;Reuse xtmpl1, Get next dest vector + XLDR xd4, [dest4+tmp] ;Reuse xtmph1, Get next dest vector + + ; dest2 + vpshufb xtmph2, xtmph2, x0 ;Lookup mul table of high nibble + vpshufb xtmpl2, xtmpl2, xtmpa ;Lookup mul table of low nibble + vpxor xtmph2, xtmph2, xtmpl2 ;GF add high and low partials + vpand xtmph2, xtmph2, xtmph5 + vpxor xd2, xd2, xtmph2 + + XLDR xd5, [dest5+tmp] ;Reuse xtmpl2. Get next dest vector + + ; dest3 + vpshufb xtmph3, xtmph3, x0 ;Lookup mul table of high nibble + vpshufb xtmpl3, xtmpl3, xtmpa ;Lookup mul table of low nibble + vpxor xtmph3, xtmph3, xtmpl3 ;GF add high and low partials + vpand xtmph3, xtmph3, xtmph5 + vpxor xd3, xd3, xtmph3 + + ; dest4 + vpshufb xgft4_hi, xgft4_hi, x0 ;Lookup mul table of high nibble + vpshufb xgft4_lo, xgft4_lo, xtmpa ;Lookup mul table of low nibble + vpxor xgft4_hi, xgft4_hi, xgft4_lo ;GF add high and low partials + vpand xgft4_hi, xgft4_hi, xtmph5 + vpxor xd4, xd4, xgft4_hi + + ; dest5 + vpshufb xgft5_hi, xgft5_hi, x0 ;Lookup mul table of high nibble + vpshufb xtmpl5, xtmpl5, xtmpa ;Lookup mul table of low nibble + vpxor xgft5_hi, xgft5_hi, xtmpl5 ;GF add high and low partials + vpand xgft5_hi, xgft5_hi, xtmph5 + vpxor xd5, xd5, xgft5_hi + + XSTR [dest1+tmp], xd1 ;Store result into dest1 + XSTR [dest2+tmp], xd2 ;Store result into dest2 + XSTR [dest3+tmp], xd3 ;Store result into dest3 + XSTR [dest4+tmp], xd4 ;Store result into dest4 + XSTR [dest5+tmp], xd5 ;Store result into dest5 + +.return_pass: + FUNC_RESTORE + mov return, 0 + ret + +.return_fail: + FUNC_RESTORE + mov return, 1 + ret + +endproc_frame + +section .data + +align 16 +mask0f: dq 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f +constip16: + dq 0xf8f9fafbfcfdfeff, 0xf0f1f2f3f4f5f6f7 + +;;; func core, ver, snum +slversion gf_5vect_mad_avx, 02, 01, 020d diff --git a/src/spdk/isa-l/erasure_code/gf_5vect_mad_avx2.asm b/src/spdk/isa-l/erasure_code/gf_5vect_mad_avx2.asm new file mode 100644 index 000000000..3c65c05ea --- /dev/null +++ b/src/spdk/isa-l/erasure_code/gf_5vect_mad_avx2.asm @@ -0,0 +1,363 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2015 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;;; +;;; gf_5vect_mad_avx2(len, vec, vec_i, mul_array, src, dest); +;;; + +%include "reg_sizes.asm" + +%define PS 8 + +%ifidn __OUTPUT_FORMAT__, win64 + %define arg0 rcx + %define arg0.w ecx + %define arg1 rdx + %define arg2 r8 + %define arg3 r9 + %define arg4 r12 + %define arg5 r15 + %define tmp r11 + %define tmp.w r11d + %define tmp.b r11b + %define tmp2 r10 + %define return rax + %define return.w eax + %define stack_size 16*10 + 3*8 + %define arg(x) [rsp + stack_size + PS + PS*x] + %define func(x) proc_frame x + +%macro FUNC_SAVE 0 + sub rsp, stack_size + movdqa [rsp+16*0],xmm6 + movdqa [rsp+16*1],xmm7 + movdqa [rsp+16*2],xmm8 + movdqa [rsp+16*3],xmm9 + movdqa [rsp+16*4],xmm10 + movdqa [rsp+16*5],xmm11 + movdqa [rsp+16*6],xmm12 + movdqa [rsp+16*7],xmm13 + movdqa [rsp+16*8],xmm14 + movdqa [rsp+16*9],xmm15 + save_reg r12, 10*16 + 0*8 + save_reg r15, 10*16 + 1*8 + end_prolog + mov arg4, arg(4) + mov arg5, arg(5) +%endmacro + +%macro FUNC_RESTORE 0 + movdqa xmm6, [rsp+16*0] + movdqa xmm7, [rsp+16*1] + movdqa xmm8, [rsp+16*2] + movdqa xmm9, [rsp+16*3] + movdqa xmm10, [rsp+16*4] + movdqa xmm11, [rsp+16*5] + movdqa xmm12, [rsp+16*6] + movdqa xmm13, [rsp+16*7] + movdqa xmm14, [rsp+16*8] + movdqa xmm15, [rsp+16*9] + mov r12, [rsp + 10*16 + 0*8] + mov r15, [rsp + 10*16 + 1*8] + add rsp, stack_size +%endmacro + +%elifidn __OUTPUT_FORMAT__, elf64 + %define arg0 rdi + %define arg0.w edi + %define arg1 rsi + %define arg2 rdx + %define arg3 rcx + %define arg4 r8 + %define arg5 r9 + %define tmp r11 + %define tmp.w r11d + %define tmp.b r11b + %define tmp2 r10 + %define return rax + %define return.w eax + + %define func(x) x: + %define FUNC_SAVE + %define FUNC_RESTORE +%endif + +;;; gf_5vect_mad_avx2(len, vec, vec_i, mul_array, src, dest) +%define len arg0 +%define len.w arg0.w +%define vec arg1 +%define vec_i arg2 +%define mul_array arg3 +%define src arg4 +%define dest1 arg5 +%define pos return +%define pos.w return.w + +%define dest2 tmp2 +%define dest3 mul_array +%define dest4 vec +%define dest5 vec_i + +%ifndef EC_ALIGNED_ADDR +;;; Use Un-aligned load/store + %define XLDR vmovdqu + %define XSTR vmovdqu +%else +;;; Use Non-temporal load/stor + %ifdef NO_NT_LDST + %define XLDR vmovdqa + %define XSTR vmovdqa + %else + %define XLDR vmovntdqa + %define XSTR vmovntdq + %endif +%endif + +default rel + +[bits 64] +section .text + +%define xmask0f ymm15 +%define xmask0fx xmm15 +%define xgft1_lo ymm14 +%define xgft2_lo ymm13 +%define xgft3_lo ymm12 +%define xgft4_lo ymm11 +%define xgft5_lo ymm10 + +%define x0 ymm0 +%define xtmpa ymm1 +%define xtmpl ymm2 +%define xtmplx xmm2 +%define xtmph1 ymm3 +%define xtmph1x xmm3 +%define xtmph2 ymm4 +%define xd1 ymm5 +%define xd2 ymm6 +%define xd3 ymm7 +%define xd4 ymm8 +%define xd5 ymm9 + +align 16 +global gf_5vect_mad_avx2:ISAL_SYM_TYPE_FUNCTION +func(gf_5vect_mad_avx2) + FUNC_SAVE + sub len, 32 + jl .return_fail + xor pos, pos + mov tmp.b, 0x0f + vpinsrb xmask0fx, xmask0fx, tmp.w, 0 + vpbroadcastb xmask0f, xmask0fx ;Construct mask 0x0f0f0f... + + sal vec_i, 5 ;Multiply by 32 + sal vec, 5 ;Multiply by 32 + lea tmp, [mul_array + vec_i] + + vmovdqu xgft1_lo, [tmp] ;Load array Ax{00}, Ax{01}, ..., Ax{0f} + ; " Ax{00}, Ax{10}, ..., Ax{f0} + vmovdqu xgft2_lo, [tmp+vec] ;Load array Bx{00}, Bx{01}, ..., Bx{0f} + ; " Bx{00}, Bx{10}, ..., Bx{f0} + vmovdqu xgft3_lo, [tmp+2*vec] ;Load array Cx{00}, Cx{01}, ..., Cx{0f} + ; " Cx{00}, Cx{10}, ..., Cx{f0} + vmovdqu xgft5_lo, [tmp+4*vec] ;Load array Ex{00}, Ex{01}, ..., Ex{0f} + ; " Ex{00}, Ex{10}, ..., Ex{f0} + add tmp, vec + vmovdqu xgft4_lo, [tmp+2*vec] ;Load array Dx{00}, Dx{01}, ..., Dx{0f} + ; " Dx{00}, Dx{10}, ..., Dx{f0} + + mov dest3, [dest1+2*PS] ; reuse mul_array + mov dest4, [dest1+3*PS] ; reuse vec + mov dest5, [dest1+4*PS] ; reuse vec_i + mov dest2, [dest1+PS] + mov dest1, [dest1] + +.loop32: + XLDR x0, [src+pos] ;Get next source vector + + XLDR xd1, [dest1+pos] ;Get next dest vector + XLDR xd2, [dest2+pos] ;Get next dest vector + XLDR xd3, [dest3+pos] ;Get next dest vector + XLDR xd4, [dest4+pos] ;Get next dest vector + XLDR xd5, [dest5+pos] ;Get next dest vector + + vpand xtmpl, x0, xmask0f ;Mask low src nibble in bits 4-0 + vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0 + vpand x0, x0, xmask0f ;Mask high src nibble in bits 4-0 + vperm2i128 xtmpa, xtmpl, x0, 0x30 ;swap xtmpa from 1lo|2lo to 1lo|2hi + vperm2i128 x0, xtmpl, x0, 0x12 ;swap x0 from 1hi|2hi to 1hi|2lo + + vperm2i128 xtmph1, xgft1_lo, xgft1_lo, 0x01 ; swapped to hi | lo + vperm2i128 xtmph2, xgft2_lo, xgft2_lo, 0x01 ; swapped to hi | lo + + ; dest1 + vpshufb xtmph1, xtmph1, x0 ;Lookup mul table of high nibble + vpshufb xtmpl, xgft1_lo, xtmpa ;Lookup mul table of low nibble + vpxor xtmph1, xtmph1, xtmpl ;GF add high and low partials + vpxor xd1, xd1, xtmph1 ;xd1 += partial + + vperm2i128 xtmph1, xgft3_lo, xgft3_lo, 0x01 ; swapped to hi | lo + ; dest2 + vpshufb xtmph2, xtmph2, x0 ;Lookup mul table of high nibble + vpshufb xtmpl, xgft2_lo, xtmpa ;Lookup mul table of low nibble + vpxor xtmph2, xtmph2, xtmpl ;GF add high and low partials + vpxor xd2, xd2, xtmph2 ;xd2 += partial + + vperm2i128 xtmph2, xgft4_lo, xgft4_lo, 0x01 ; swapped to hi | lo + ; dest3 + vpshufb xtmph1, xtmph1, x0 ;Lookup mul table of high nibble + vpshufb xtmpl, xgft3_lo, xtmpa ;Lookup mul table of low nibble + vpxor xtmph1, xtmph1, xtmpl ;GF add high and low partials + vpxor xd3, xd3, xtmph1 ;xd3 += partial + + vperm2i128 xtmph1, xgft5_lo, xgft5_lo, 0x01 ; swapped to hi | lo + ; dest4 + vpshufb xtmph2, xtmph2, x0 ;Lookup mul table of high nibble + vpshufb xtmpl, xgft4_lo, xtmpa ;Lookup mul table of low nibble + vpxor xtmph2, xtmph2, xtmpl ;GF add high and low partials + vpxor xd4, xd4, xtmph2 ;xd4 += partial + + ; dest5 + vpshufb xtmph1, xtmph1, x0 ;Lookup mul table of high nibble + vpshufb xtmpl, xgft5_lo, xtmpa ;Lookup mul table of low nibble + vpxor xtmph1, xtmph1, xtmpl ;GF add high and low partials + vpxor xd5, xd5, xtmph1 ;xd5 += partial + + XSTR [dest1+pos], xd1 + XSTR [dest2+pos], xd2 + XSTR [dest3+pos], xd3 + XSTR [dest4+pos], xd4 + XSTR [dest5+pos], xd5 + + add pos, 32 ;Loop on 32 bytes at a time + cmp pos, len + jle .loop32 + + lea tmp, [len + 32] + cmp pos, tmp + je .return_pass + +.lessthan32: + ;; Tail len + ;; Do one more overlap pass + mov tmp.b, 0x1f + vpinsrb xtmph1x, xtmph1x, tmp.w, 0 + vpbroadcastb xtmph1, xtmph1x ;Construct mask 0x1f1f1f... + + mov tmp, len ;Overlapped offset length-32 + + XLDR x0, [src+tmp] ;Get next source vector + + XLDR xd1, [dest1+tmp] ;Get next dest vector + XLDR xd2, [dest2+tmp] ;Get next dest vector + XLDR xd3, [dest3+tmp] ;Get next dest vector + XLDR xd4, [dest4+tmp] ;Get next dest vector + XLDR xd5, [dest5+tmp] ;Get next dest vector + + sub len, pos + + vmovdqa xtmph2, [constip32] ;Load const of i + 32 + vpinsrb xtmplx, xtmplx, len.w, 15 + vinserti128 xtmpl, xtmpl, xtmplx, 1 ;swapped to xtmplx | xtmplx + vpshufb xtmpl, xtmpl, xtmph1 ;Broadcast len to all bytes. xtmph1=0x1f1f1f... + vpcmpgtb xtmpl, xtmpl, xtmph2 + + vpand xtmph1, x0, xmask0f ;Mask low src nibble in bits 4-0 + vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0 + vpand x0, x0, xmask0f ;Mask high src nibble in bits 4-0 + vperm2i128 xtmpa, xtmph1, x0, 0x30 ;swap xtmpa from 1lo|2lo to 1lo|2hi + vperm2i128 x0, xtmph1, x0, 0x12 ;swap x0 from 1hi|2hi to 1hi|2lo + + vperm2i128 xtmph1, xgft1_lo, xgft1_lo, 0x01 ; swapped to hi | lo + vperm2i128 xtmph2, xgft2_lo, xgft2_lo, 0x01 ; swapped to hi | lo + + ; dest1 + vpshufb xtmph1, xtmph1, x0 ;Lookup mul table of high nibble + vpshufb xgft1_lo, xgft1_lo, xtmpa ;Lookup mul table of low nibble + vpxor xtmph1, xtmph1, xgft1_lo ;GF add high and low partials + vpand xtmph1, xtmph1, xtmpl + vpxor xd1, xd1, xtmph1 ;xd1 += partial + + vperm2i128 xtmph1, xgft3_lo, xgft3_lo, 0x01 ; swapped to hi | lo + ; dest2 + vpshufb xtmph2, xtmph2, x0 ;Lookup mul table of high nibble + vpshufb xgft2_lo, xgft2_lo, xtmpa ;Lookup mul table of low nibble + vpxor xtmph2, xtmph2, xgft2_lo ;GF add high and low partials + vpand xtmph2, xtmph2, xtmpl + vpxor xd2, xd2, xtmph2 ;xd2 += partial + + vperm2i128 xtmph2, xgft4_lo, xgft4_lo, 0x01 ; swapped to hi | lo + ; dest3 + vpshufb xtmph1, xtmph1, x0 ;Lookup mul table of high nibble + vpshufb xgft3_lo, xgft3_lo, xtmpa ;Lookup mul table of low nibble + vpxor xtmph1, xtmph1, xgft3_lo ;GF add high and low partials + vpand xtmph1, xtmph1, xtmpl + vpxor xd3, xd3, xtmph1 ;xd3 += partial + + vperm2i128 xtmph1, xgft5_lo, xgft5_lo, 0x01 ; swapped to hi | lo + ; dest4 + vpshufb xtmph2, xtmph2, x0 ;Lookup mul table of high nibble + vpshufb xgft4_lo, xgft4_lo, xtmpa ;Lookup mul table of low nibble + vpxor xtmph2, xtmph2, xgft4_lo ;GF add high and low partials + vpand xtmph2, xtmph2, xtmpl + vpxor xd4, xd4, xtmph2 ;xd4 += partial + + ; dest5 + vpshufb xtmph1, xtmph1, x0 ;Lookup mul table of high nibble + vpshufb xgft5_lo, xgft5_lo, xtmpa ;Lookup mul table of low nibble + vpxor xtmph1, xtmph1, xgft5_lo ;GF add high and low partials + vpand xtmph1, xtmph1, xtmpl + vpxor xd5, xd5, xtmph1 ;xd5 += partial + + XSTR [dest1+tmp], xd1 + XSTR [dest2+tmp], xd2 + XSTR [dest3+tmp], xd3 + XSTR [dest4+tmp], xd4 + XSTR [dest5+tmp], xd5 + +.return_pass: + FUNC_RESTORE + mov return, 0 + ret + +.return_fail: + FUNC_RESTORE + mov return, 1 + ret + +endproc_frame + +section .data +align 32 +constip32: + dq 0xf8f9fafbfcfdfeff, 0xf0f1f2f3f4f5f6f7 + dq 0xe8e9eaebecedeeef, 0xe0e1e2e3e4e5e6e7 + +;;; func core, ver, snum +slversion gf_5vect_mad_avx2, 04, 01, 020e diff --git a/src/spdk/isa-l/erasure_code/gf_5vect_mad_sse.asm b/src/spdk/isa-l/erasure_code/gf_5vect_mad_sse.asm new file mode 100644 index 000000000..b16f4058c --- /dev/null +++ b/src/spdk/isa-l/erasure_code/gf_5vect_mad_sse.asm @@ -0,0 +1,373 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2015 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;;; +;;; gf_5vect_mad_sse(len, vec, vec_i, mul_array, src, dest); +;;; + +%include "reg_sizes.asm" + +%define PS 8 + +%ifidn __OUTPUT_FORMAT__, win64 + %define arg0 rcx + %define arg0.w ecx + %define arg1 rdx + %define arg2 r8 + %define arg3 r9 + %define arg4 r12 + %define arg5 r15 + %define tmp r11 + %define tmp2 r10 + %define tmp3 r13 + %define tmp4 r14 + %define return rax + %define return.w eax + %define stack_size 16*10 + 5*8 + %define arg(x) [rsp + stack_size + PS + PS*x] + %define func(x) proc_frame x + +%macro FUNC_SAVE 0 + sub rsp, stack_size + movdqa [rsp+16*0],xmm6 + movdqa [rsp+16*1],xmm7 + movdqa [rsp+16*2],xmm8 + movdqa [rsp+16*3],xmm9 + movdqa [rsp+16*4],xmm10 + movdqa [rsp+16*5],xmm11 + movdqa [rsp+16*6],xmm12 + movdqa [rsp+16*7],xmm13 + movdqa [rsp+16*8],xmm14 + movdqa [rsp+16*9],xmm15 + save_reg r12, 10*16 + 0*8 + save_reg r13, 10*16 + 1*8 + save_reg r14, 10*16 + 2*8 + save_reg r15, 10*16 + 3*8 + end_prolog + mov arg4, arg(4) + mov arg5, arg(5) +%endmacro + +%macro FUNC_RESTORE 0 + movdqa xmm6, [rsp+16*0] + movdqa xmm7, [rsp+16*1] + movdqa xmm8, [rsp+16*2] + movdqa xmm9, [rsp+16*3] + movdqa xmm10, [rsp+16*4] + movdqa xmm11, [rsp+16*5] + movdqa xmm12, [rsp+16*6] + movdqa xmm13, [rsp+16*7] + movdqa xmm14, [rsp+16*8] + movdqa xmm15, [rsp+16*9] + mov r12, [rsp + 10*16 + 0*8] + mov r13, [rsp + 10*16 + 1*8] + mov r14, [rsp + 10*16 + 2*8] + mov r15, [rsp + 10*16 + 3*8] + add rsp, stack_size +%endmacro + +%elifidn __OUTPUT_FORMAT__, elf64 + %define arg0 rdi + %define arg0.w edi + %define arg1 rsi + %define arg2 rdx + %define arg3 rcx + %define arg4 r8 + %define arg5 r9 + %define tmp r11 + %define tmp2 r10 + %define tmp3 r12 + %define tmp4 r13 + %define return rax + %define return.w eax + + %define func(x) x: + %macro FUNC_SAVE 0 + push r12 + push r13 + %endmacro + %macro FUNC_RESTORE 0 + pop r13 + pop r12 + %endmacro +%endif + +;;; gf_5vect_mad_sse(len, vec, vec_i, mul_array, src, dest) +%define len arg0 +%define len.w arg0.w +%define vec arg1 +%define vec_i arg2 +%define mul_array arg3 +%define src arg4 +%define dest1 arg5 +%define pos return +%define pos.w return.w + +%define dest2 tmp4 +%define dest3 mul_array +%define dest4 tmp2 +%define dest5 vec_i + +%ifndef EC_ALIGNED_ADDR +;;; Use Un-aligned load/store + %define XLDR movdqu + %define XSTR movdqu +%else +;;; Use Non-temporal load/stor + %ifdef NO_NT_LDST + %define XLDR movdqa + %define XSTR movdqa + %else + %define XLDR movntdqa + %define XSTR movntdq + %endif +%endif + +default rel + +[bits 64] +section .text + +%define xmask0f xmm15 +%define xgft5_hi xmm14 +%define xgft4_lo xmm13 +%define xgft4_hi xmm12 + +%define x0 xmm0 +%define xtmpa xmm1 +%define xtmph1 xmm2 +%define xtmpl1 xmm3 +%define xtmph2 xmm4 +%define xtmpl2 xmm5 +%define xtmph3 xmm6 +%define xtmpl3 xmm7 +%define xtmph5 xmm8 +%define xtmpl5 xmm9 +%define xd1 xmm10 +%define xd2 xmm11 +%define xd3 xtmpl1 +%define xd4 xtmph1 +%define xd5 xtmpl2 + + +align 16 +global gf_5vect_mad_sse:ISAL_SYM_TYPE_FUNCTION +func(gf_5vect_mad_sse) + FUNC_SAVE + sub len, 16 + jl .return_fail + xor pos, pos + + movdqa xmask0f, [mask0f] ;Load mask of lower nibble in each byte + mov tmp, vec + sal vec_i, 5 ;Multiply by 32 + lea tmp3, [mul_array + vec_i] + sal tmp, 6 ;Multiply by 64 + movdqu xgft5_hi, [tmp3+2*tmp+16] ; " Ex{00}, Ex{10}, ..., Ex{f0} + sal vec, 5 ;Multiply by 32 + add tmp, vec + movdqu xgft4_hi, [tmp3+tmp+16] ; " Dx{00}, Dx{10}, Dx{20}, ... , Dx{f0} + movdqu xgft4_lo, [tmp3+tmp] ;Load array Dx{00}, Dx{01}, Dx{02}, ... + + mov dest3, [dest1+2*PS] ; reuse mul_array + mov dest4, [dest1+3*PS] + mov dest5, [dest1+4*PS] ; reuse vec_i + mov dest2, [dest1+PS] + mov dest1, [dest1] + +.loop16: + XLDR x0, [src+pos] ;Get next source vector + + movdqu xtmph1, [tmp3+16] ; " Ax{00}, Ax{10}, Ax{20}, ... , Ax{f0} + movdqu xtmpl1, [tmp3] ;Load array Ax{00}, Ax{01}, Ax{02}, ... + movdqu xtmph2, [tmp3+vec+16] ; " Bx{00}, Bx{10}, Bx{20}, ... , Bx{f0} + movdqu xtmpl2, [tmp3+vec] ;Load array Bx{00}, Bx{01}, Bx{02}, ... + movdqu xtmph3, [tmp3+2*vec+16] ; " Cx{00}, Cx{10}, Cx{20}, ... , Cx{f0} + movdqu xtmpl3, [tmp3+2*vec] ;Load array Cx{00}, Cx{01}, Cx{02}, ... + movdqu xtmpl5, [tmp3+4*vec] ;Load array Ex{00}, Ex{01}, ..., Ex{0f} + movdqa xtmph5, xgft5_hi ;Reload const array registers + + XLDR xd1, [dest1+pos] ;Get next dest vector + XLDR xd2, [dest2+pos] ;Get next dest vector + + movdqa xtmpa, x0 ;Keep unshifted copy of src + psraw x0, 4 ;Shift to put high nibble into bits 4-0 + pand x0, xmask0f ;Mask high src nibble in bits 4-0 + pand xtmpa, xmask0f ;Mask low src nibble in bits 4-0 + + ; dest1 + pshufb xtmph1, x0 ;Lookup mul table of high nibble + pshufb xtmpl1, xtmpa ;Lookup mul table of low nibble + pxor xtmph1, xtmpl1 ;GF add high and low partials + pxor xd1, xtmph1 + + XLDR xd3, [dest3+pos] ;Reuse xtmpl1, Get next dest vector + XLDR xd4, [dest4+pos] ;Reuse xtmph1. Get next dest vector + + ; dest2 + pshufb xtmph2, x0 ;Lookup mul table of high nibble + pshufb xtmpl2, xtmpa ;Lookup mul table of low nibble + pxor xtmph2, xtmpl2 ;GF add high and low partials + pxor xd2, xtmph2 + + XLDR xd5, [dest5+pos] ;Reuse xtmpl2. Get next dest vector + + ; dest3 + pshufb xtmph3, x0 ;Lookup mul table of high nibble + pshufb xtmpl3, xtmpa ;Lookup mul table of low nibble + pxor xtmph3, xtmpl3 ;GF add high and low partials + pxor xd3, xtmph3 + + movdqa xtmph2, xgft4_hi ;Reload const array registers + movdqa xtmpl3, xgft4_lo ;Reload const array registers + + ; dest5 + pshufb xtmph5, x0 ;Lookup mul table of high nibble + pshufb xtmpl5, xtmpa ;Lookup mul table of low nibble + pxor xtmph5, xtmpl5 ;GF add high and low partials + pxor xd5, xtmph5 + + ; dest4 + pshufb xtmph2, x0 ;Lookup mul table of high nibble + pshufb xtmpl3, xtmpa ;Lookup mul table of low nibble + pxor xtmph2, xtmpl3 ;GF add high and low partials + pxor xd4, xtmph2 + + XSTR [dest1+pos], xd1 ;Store result into dest1 + XSTR [dest2+pos], xd2 ;Store result into dest2 + XSTR [dest3+pos], xd3 ;Store result into dest3 + XSTR [dest4+pos], xd4 ;Store result into dest4 + XSTR [dest5+pos], xd5 ;Store result into dest5 + + add pos, 16 ;Loop on 16 bytes at a time + cmp pos, len + jle .loop16 + + lea tmp, [len + 16] + cmp pos, tmp + je .return_pass + +.lessthan16: + ;; Tail len + ;; Do one more overlap pass + mov tmp, len ;Overlapped offset length-16 + XLDR x0, [src+tmp] ;Get next source vector + + sub len, pos + + movdqa xtmpl1, [constip16] ;Load const of i + 16 + pinsrb xtmph5, len.w, 15 + pshufb xtmph5, xmask0f ;Broadcast len to all bytes + pcmpgtb xtmph5, xtmpl1 + + movdqu xtmph1, [tmp3+16] ; " Ax{00}, Ax{10}, Ax{20}, ... , Ax{f0} + movdqu xtmpl1, [tmp3] ;Load array Ax{00}, Ax{01}, Ax{02}, ... + movdqu xtmph2, [tmp3+vec+16] ; " Bx{00}, Bx{10}, Bx{20}, ... , Bx{f0} + movdqu xtmpl2, [tmp3+vec] ;Load array Bx{00}, Bx{01}, Bx{02}, ... + movdqu xtmph3, [tmp3+2*vec+16] ; " Cx{00}, Cx{10}, Cx{20}, ... , Cx{f0} + movdqu xtmpl3, [tmp3+2*vec] ;Load array Cx{00}, Cx{01}, Cx{02}, ... + movdqu xtmpl5, [tmp3+4*vec] ;Load array Ex{00}, Ex{01}, ..., Ex{0f} + + XLDR xd1, [dest1+tmp] ;Get next dest vector + XLDR xd2, [dest2+tmp] ;Get next dest vector + + movdqa xtmpa, x0 ;Keep unshifted copy of src + psraw x0, 4 ;Shift to put high nibble into bits 4-0 + pand x0, xmask0f ;Mask high src nibble in bits 4-0 + pand xtmpa, xmask0f ;Mask low src nibble in bits 4-0 + + ; dest1 + pshufb xtmph1, x0 ;Lookup mul table of high nibble + pshufb xtmpl1, xtmpa ;Lookup mul table of low nibble + pxor xtmph1, xtmpl1 ;GF add high and low partials + pand xtmph1, xtmph5 + pxor xd1, xtmph1 + + XLDR xd3, [dest3+tmp] ;Reuse xtmpl1, Get next dest vector + XLDR xd4, [dest4+tmp] ;Reuse xtmph1. Get next dest vector + + ; dest2 + pshufb xtmph2, x0 ;Lookup mul table of high nibble + pshufb xtmpl2, xtmpa ;Lookup mul table of low nibble + pxor xtmph2, xtmpl2 ;GF add high and low partials + pand xtmph2, xtmph5 + pxor xd2, xtmph2 + + XLDR xd5, [dest5+tmp] ;Reuse xtmpl2. Get next dest vector + + ; dest3 + pshufb xtmph3, x0 ;Lookup mul table of high nibble + pshufb xtmpl3, xtmpa ;Lookup mul table of low nibble + pxor xtmph3, xtmpl3 ;GF add high and low partials + pand xtmph3, xtmph5 + pxor xd3, xtmph3 + + ; dest4 + pshufb xgft4_hi, x0 ;Lookup mul table of high nibble + pshufb xgft4_lo, xtmpa ;Lookup mul table of low nibble + pxor xgft4_hi, xgft4_lo ;GF add high and low partials + pand xgft4_hi, xtmph5 + pxor xd4, xgft4_hi + + ; dest5 + pshufb xgft5_hi, x0 ;Lookup mul table of high nibble + pshufb xtmpl5, xtmpa ;Lookup mul table of low nibble + pxor xgft5_hi, xtmpl5 ;GF add high and low partials + pand xgft5_hi, xtmph5 + pxor xd5, xgft5_hi + + XSTR [dest1+tmp], xd1 ;Store result into dest1 + XSTR [dest2+tmp], xd2 ;Store result into dest2 + XSTR [dest3+tmp], xd3 ;Store result into dest3 + XSTR [dest4+tmp], xd4 ;Store result into dest4 + XSTR [dest5+tmp], xd5 ;Store result into dest5 + +.return_pass: + FUNC_RESTORE + mov return, 0 + ret + +.return_fail: + FUNC_RESTORE + mov return, 1 + ret + +endproc_frame + +section .data + +align 16 + +mask0f: + dq 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f +constip16: + dq 0xf8f9fafbfcfdfeff, 0xf0f1f2f3f4f5f6f7 + +;;; func core, ver, snum +slversion gf_5vect_mad_sse, 00, 01, 020c diff --git a/src/spdk/isa-l/erasure_code/gf_6vect_dot_prod_avx.asm b/src/spdk/isa-l/erasure_code/gf_6vect_dot_prod_avx.asm new file mode 100644 index 000000000..a519d5224 --- /dev/null +++ b/src/spdk/isa-l/erasure_code/gf_6vect_dot_prod_avx.asm @@ -0,0 +1,315 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2015 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;;; +;;; gf_6vect_dot_prod_avx(len, vec, *g_tbls, **buffs, **dests); +;;; + +%include "reg_sizes.asm" + +%ifidn __OUTPUT_FORMAT__, elf64 + %define arg0 rdi + %define arg1 rsi + %define arg2 rdx + %define arg3 rcx + %define arg4 r8 + %define arg5 r9 + + %define tmp r11 + %define tmp2 r10 + %define tmp3 r13 ; must be saved and restored + %define tmp4 r12 ; must be saved and restored + %define tmp5 r14 ; must be saved and restored + %define tmp6 r15 ; must be saved and restored + %define return rax + %define PS 8 + %define LOG_PS 3 + + %define func(x) x: + %macro FUNC_SAVE 0 + push r12 + push r13 + push r14 + push r15 + %endmacro + %macro FUNC_RESTORE 0 + pop r15 + pop r14 + pop r13 + pop r12 + %endmacro +%endif + +%ifidn __OUTPUT_FORMAT__, win64 + %define arg0 rcx + %define arg1 rdx + %define arg2 r8 + %define arg3 r9 + + %define arg4 r12 ; must be saved, loaded and restored + %define arg5 r15 ; must be saved and restored + %define tmp r11 + %define tmp2 r10 + %define tmp3 r13 ; must be saved and restored + %define tmp4 r14 ; must be saved and restored + %define tmp5 rdi ; must be saved and restored + %define tmp6 rsi ; must be saved and restored + %define return rax + %define PS 8 + %define LOG_PS 3 + %define stack_size 10*16 + 7*8 ; must be an odd multiple of 8 + %define arg(x) [rsp + stack_size + PS + PS*x] + + %define func(x) proc_frame x + %macro FUNC_SAVE 0 + alloc_stack stack_size + save_xmm128 xmm6, 0*16 + save_xmm128 xmm7, 1*16 + save_xmm128 xmm8, 2*16 + save_xmm128 xmm9, 3*16 + save_xmm128 xmm10, 4*16 + save_xmm128 xmm11, 5*16 + save_xmm128 xmm12, 6*16 + save_xmm128 xmm13, 7*16 + save_xmm128 xmm14, 8*16 + save_xmm128 xmm15, 9*16 + save_reg r12, 10*16 + 0*8 + save_reg r13, 10*16 + 1*8 + save_reg r14, 10*16 + 2*8 + save_reg r15, 10*16 + 3*8 + save_reg rdi, 10*16 + 4*8 + save_reg rsi, 10*16 + 5*8 + end_prolog + mov arg4, arg(4) + %endmacro + + %macro FUNC_RESTORE 0 + vmovdqa xmm6, [rsp + 0*16] + vmovdqa xmm7, [rsp + 1*16] + vmovdqa xmm8, [rsp + 2*16] + vmovdqa xmm9, [rsp + 3*16] + vmovdqa xmm10, [rsp + 4*16] + vmovdqa xmm11, [rsp + 5*16] + vmovdqa xmm12, [rsp + 6*16] + vmovdqa xmm13, [rsp + 7*16] + vmovdqa xmm14, [rsp + 8*16] + vmovdqa xmm15, [rsp + 9*16] + mov r12, [rsp + 10*16 + 0*8] + mov r13, [rsp + 10*16 + 1*8] + mov r14, [rsp + 10*16 + 2*8] + mov r15, [rsp + 10*16 + 3*8] + mov rdi, [rsp + 10*16 + 4*8] + mov rsi, [rsp + 10*16 + 5*8] + add rsp, stack_size + %endmacro +%endif + +%define len arg0 +%define vec arg1 +%define mul_array arg2 +%define src arg3 +%define dest arg4 +%define ptr arg5 +%define vec_i tmp2 +%define dest1 tmp3 +%define dest2 tmp4 +%define vskip1 tmp5 +%define vskip3 tmp6 +%define pos return + + +%ifndef EC_ALIGNED_ADDR +;;; Use Un-aligned load/store + %define XLDR vmovdqu + %define XSTR vmovdqu +%else +;;; Use Non-temporal load/stor + %ifdef NO_NT_LDST + %define XLDR vmovdqa + %define XSTR vmovdqa + %else + %define XLDR vmovntdqa + %define XSTR vmovntdq + %endif +%endif + + +default rel + +[bits 64] +section .text + +%define xmask0f xmm15 +%define xgft1_lo xmm14 +%define xgft1_hi xmm13 +%define xgft2_lo xmm12 +%define xgft2_hi xmm11 +%define xgft3_lo xmm10 +%define xgft3_hi xmm9 +%define x0 xmm0 +%define xtmpa xmm1 +%define xp1 xmm2 +%define xp2 xmm3 +%define xp3 xmm4 +%define xp4 xmm5 +%define xp5 xmm6 +%define xp6 xmm7 + +align 16 +global gf_6vect_dot_prod_avx:ISAL_SYM_TYPE_FUNCTION +func(gf_6vect_dot_prod_avx) + FUNC_SAVE + sub len, 16 + jl .return_fail + xor pos, pos + vmovdqa xmask0f, [mask0f] ;Load mask of lower nibble in each byte + mov vskip1, vec + imul vskip1, 32 + mov vskip3, vec + imul vskip3, 96 + sal vec, LOG_PS ;vec *= PS. Make vec_i count by PS + mov dest1, [dest] + mov dest2, [dest+PS] + + +.loop16: + mov tmp, mul_array + xor vec_i, vec_i + vpxor xp1, xp1 + vpxor xp2, xp2 + vpxor xp3, xp3 + vpxor xp4, xp4 + vpxor xp5, xp5 + vpxor xp6, xp6 + +.next_vect: + mov ptr, [src+vec_i] + add vec_i, PS + XLDR x0, [ptr+pos] ;Get next source vector + + vmovdqu xgft1_lo, [tmp] ;Load array Ax{00}, Ax{01}, ..., Ax{0f} + vmovdqu xgft1_hi, [tmp+16] ; " Ax{00}, Ax{10}, ..., Ax{f0} + vmovdqu xgft2_lo, [tmp+vskip1*1] ;Load array Bx{00}, Bx{01}, ..., Bx{0f} + vmovdqu xgft2_hi, [tmp+vskip1*1+16] ; " Bx{00}, Bx{10}, ..., Bx{f0} + vmovdqu xgft3_lo, [tmp+vskip1*2] ;Load array Cx{00}, Cx{01}, ..., Cx{0f} + vmovdqu xgft3_hi, [tmp+vskip1*2+16] ; " Cx{00}, Cx{10}, ..., Cx{f0} + lea ptr, [vskip1 + vskip1*4] ;ptr = vskip5 + + vpand xtmpa, x0, xmask0f ;Mask low src nibble in bits 4-0 + vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0 + vpand x0, x0, xmask0f ;Mask high src nibble in bits 4-0 + + + vpshufb xgft1_hi, x0 ;Lookup mul table of high nibble + vpshufb xgft1_lo, xtmpa ;Lookup mul table of low nibble + vpxor xgft1_hi, xgft1_lo ;GF add high and low partials + vpxor xp1, xgft1_hi ;xp1 += partial + + vpshufb xgft2_hi, x0 ;Lookup mul table of high nibble + vpshufb xgft2_lo, xtmpa ;Lookup mul table of low nibble + vpxor xgft2_hi, xgft2_lo ;GF add high and low partials + vpxor xp2, xgft2_hi ;xp2 += partial + + vpshufb xgft3_hi, x0 ;Lookup mul table of high nibble + vpshufb xgft3_lo, xtmpa ;Lookup mul table of low nibble + vpxor xgft3_hi, xgft3_lo ;GF add high and low partials + vpxor xp3, xgft3_hi ;xp3 += partial + + + vmovdqu xgft1_lo, [tmp+vskip3] ;Load array Dx{00}, Dx{01}, ..., Dx{0f} + vmovdqu xgft1_hi, [tmp+vskip3+16] ; " Dx{00}, Dx{10}, ..., Dx{f0} + vmovdqu xgft2_lo, [tmp+vskip1*4] ;Load array Ex{00}, Ex{01}, ..., Ex{0f} + vmovdqu xgft2_hi, [tmp+vskip1*4+16] ; " Ex{00}, Ex{10}, ..., Ex{f0} + vmovdqu xgft3_lo, [tmp+ptr] ;Load array Fx{00}, Fx{01}, ..., Fx{0f} + vmovdqu xgft3_hi, [tmp+ptr+16] ; " Fx{00}, Fx{10}, ..., Fx{f0} + add tmp, 32 + + + vpshufb xgft1_hi, x0 ;Lookup mul table of high nibble + vpshufb xgft1_lo, xtmpa ;Lookup mul table of low nibble + vpxor xgft1_hi, xgft1_lo ;GF add high and low partials + vpxor xp4, xgft1_hi ;xp4 += partial + + vpshufb xgft2_hi, x0 ;Lookup mul table of high nibble + vpshufb xgft2_lo, xtmpa ;Lookup mul table of low nibble + vpxor xgft2_hi, xgft2_lo ;GF add high and low partials + vpxor xp5, xgft2_hi ;xp5 += partial + + vpshufb xgft3_hi, x0 ;Lookup mul table of high nibble + vpshufb xgft3_lo, xtmpa ;Lookup mul table of low nibble + vpxor xgft3_hi, xgft3_lo ;GF add high and low partials + vpxor xp6, xgft3_hi ;xp6 += partial + + cmp vec_i, vec + jl .next_vect + + + mov tmp, [dest+2*PS] + mov ptr, [dest+3*PS] + mov vec_i, [dest+4*PS] + + XSTR [dest1+pos], xp1 + XSTR [dest2+pos], xp2 + XSTR [tmp+pos], xp3 + mov tmp, [dest+5*PS] + XSTR [ptr+pos], xp4 + XSTR [vec_i+pos], xp5 + XSTR [tmp+pos], xp6 + + add pos, 16 ;Loop on 16 bytes at a time + cmp pos, len + jle .loop16 + + lea tmp, [len + 16] + cmp pos, tmp + je .return_pass + + ;; Tail len + mov pos, len ;Overlapped offset length-16 + jmp .loop16 ;Do one more overlap pass + +.return_pass: + FUNC_RESTORE + mov return, 0 + ret + +.return_fail: + FUNC_RESTORE + mov return, 1 + ret + +endproc_frame + +section .data + +align 16 +mask0f: dq 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f + +;;; func core, ver, snum +slversion gf_6vect_dot_prod_avx, 02, 04, 0195 diff --git a/src/spdk/isa-l/erasure_code/gf_6vect_dot_prod_avx2.asm b/src/spdk/isa-l/erasure_code/gf_6vect_dot_prod_avx2.asm new file mode 100644 index 000000000..a57c52a00 --- /dev/null +++ b/src/spdk/isa-l/erasure_code/gf_6vect_dot_prod_avx2.asm @@ -0,0 +1,326 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2015 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;;; +;;; gf_6vect_dot_prod_avx2(len, vec, *g_tbls, **buffs, **dests); +;;; + +%include "reg_sizes.asm" + +%ifidn __OUTPUT_FORMAT__, elf64 + %define arg0 rdi + %define arg1 rsi + %define arg2 rdx + %define arg3 rcx + %define arg4 r8 + %define arg5 r9 + + %define tmp r11 + %define tmp.w r11d + %define tmp.b r11b + %define tmp2 r10 + %define tmp3 r13 ; must be saved and restored + %define tmp4 r12 ; must be saved and restored + %define tmp5 r14 ; must be saved and restored + %define tmp6 r15 ; must be saved and restored + %define return rax + %define PS 8 + %define LOG_PS 3 + + %define func(x) x: + %macro FUNC_SAVE 0 + push r12 + push r13 + push r14 + push r15 + %endmacro + %macro FUNC_RESTORE 0 + pop r15 + pop r14 + pop r13 + pop r12 + %endmacro +%endif + +%ifidn __OUTPUT_FORMAT__, win64 + %define arg0 rcx + %define arg1 rdx + %define arg2 r8 + %define arg3 r9 + + %define arg4 r12 ; must be saved, loaded and restored + %define arg5 r15 ; must be saved and restored + %define tmp r11 + %define tmp.w r11d + %define tmp.b r11b + %define tmp2 r10 + %define tmp3 r13 ; must be saved and restored + %define tmp4 r14 ; must be saved and restored + %define tmp5 rdi ; must be saved and restored + %define tmp6 rsi ; must be saved and restored + %define return rax + %define PS 8 + %define LOG_PS 3 + %define stack_size 10*16 + 7*8 ; must be an odd multiple of 8 + %define arg(x) [rsp + stack_size + PS + PS*x] + + %define func(x) proc_frame x + %macro FUNC_SAVE 0 + alloc_stack stack_size + vmovdqa [rsp + 0*16], xmm6 + vmovdqa [rsp + 1*16], xmm7 + vmovdqa [rsp + 2*16], xmm8 + vmovdqa [rsp + 3*16], xmm9 + vmovdqa [rsp + 4*16], xmm10 + vmovdqa [rsp + 5*16], xmm11 + vmovdqa [rsp + 6*16], xmm12 + vmovdqa [rsp + 7*16], xmm13 + vmovdqa [rsp + 8*16], xmm14 + vmovdqa [rsp + 9*16], xmm15 + save_reg r12, 10*16 + 0*8 + save_reg r13, 10*16 + 1*8 + save_reg r14, 10*16 + 2*8 + save_reg r15, 10*16 + 3*8 + save_reg rdi, 10*16 + 4*8 + save_reg rsi, 10*16 + 5*8 + end_prolog + mov arg4, arg(4) + %endmacro + + %macro FUNC_RESTORE 0 + vmovdqa xmm6, [rsp + 0*16] + vmovdqa xmm7, [rsp + 1*16] + vmovdqa xmm8, [rsp + 2*16] + vmovdqa xmm9, [rsp + 3*16] + vmovdqa xmm10, [rsp + 4*16] + vmovdqa xmm11, [rsp + 5*16] + vmovdqa xmm12, [rsp + 6*16] + vmovdqa xmm13, [rsp + 7*16] + vmovdqa xmm14, [rsp + 8*16] + vmovdqa xmm15, [rsp + 9*16] + mov r12, [rsp + 10*16 + 0*8] + mov r13, [rsp + 10*16 + 1*8] + mov r14, [rsp + 10*16 + 2*8] + mov r15, [rsp + 10*16 + 3*8] + mov rdi, [rsp + 10*16 + 4*8] + mov rsi, [rsp + 10*16 + 5*8] + add rsp, stack_size + %endmacro +%endif + +%define len arg0 +%define vec arg1 +%define mul_array arg2 +%define src arg3 +%define dest arg4 +%define ptr arg5 +%define vec_i tmp2 +%define dest1 tmp3 +%define dest2 tmp4 +%define vskip1 tmp5 +%define vskip3 tmp6 +%define pos return + + +%ifndef EC_ALIGNED_ADDR +;;; Use Un-aligned load/store + %define XLDR vmovdqu + %define XSTR vmovdqu +%else +;;; Use Non-temporal load/stor + %ifdef NO_NT_LDST + %define XLDR vmovdqa + %define XSTR vmovdqa + %else + %define XLDR vmovntdqa + %define XSTR vmovntdq + %endif +%endif + + +default rel + +[bits 64] +section .text + +%define xmask0f ymm15 +%define xmask0fx xmm15 +%define xgft1_lo ymm14 +%define xgft1_hi ymm13 +%define xgft2_lo ymm12 +%define xgft2_hi ymm11 +%define xgft3_lo ymm10 +%define xgft3_hi ymm9 +%define x0 ymm0 +%define xtmpa ymm1 +%define xp1 ymm2 +%define xp2 ymm3 +%define xp3 ymm4 +%define xp4 ymm5 +%define xp5 ymm6 +%define xp6 ymm7 + +align 16 +global gf_6vect_dot_prod_avx2:ISAL_SYM_TYPE_FUNCTION +func(gf_6vect_dot_prod_avx2) + FUNC_SAVE + sub len, 32 + jl .return_fail + xor pos, pos + mov tmp.b, 0x0f + vpinsrb xmask0fx, xmask0fx, tmp.w, 0 + vpbroadcastb xmask0f, xmask0fx ;Construct mask 0x0f0f0f... + mov vskip1, vec + imul vskip1, 32 + mov vskip3, vec + imul vskip3, 96 + sal vec, LOG_PS ;vec *= PS. Make vec_i count by PS + mov dest1, [dest] + mov dest2, [dest+PS] + + +.loop32: + mov tmp, mul_array + xor vec_i, vec_i + vpxor xp1, xp1 + vpxor xp2, xp2 + vpxor xp3, xp3 + vpxor xp4, xp4 + vpxor xp5, xp5 + vpxor xp6, xp6 + +.next_vect: + mov ptr, [src+vec_i] + XLDR x0, [ptr+pos] ;Get next source vector + add vec_i, PS + + vpand xgft3_lo, x0, xmask0f ;Mask low src nibble in bits 4-0 + vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0 + vpand x0, x0, xmask0f ;Mask high src nibble in bits 4-0 + vperm2i128 xtmpa, xgft3_lo, x0, 0x30 ;swap xtmpa from 1lo|2lo to 1lo|2hi + vperm2i128 x0, xgft3_lo, x0, 0x12 ;swap x0 from 1hi|2hi to 1hi|2lo + + vmovdqu xgft1_lo, [tmp] ;Load array Ax{00}, Ax{01}, ..., Ax{0f} + ; " Ax{00}, Ax{10}, ..., Ax{f0} + vmovdqu xgft2_lo, [tmp+vskip1*1] ;Load array Bx{00}, Bx{01}, ..., Bx{0f} + ; " Bx{00}, Bx{10}, ..., Bx{f0} + vmovdqu xgft3_lo, [tmp+vskip1*2] ;Load array Cx{00}, Cx{01}, ..., Cx{0f} + ; " Cx{00}, Cx{10}, ..., Cx{f0} + lea ptr, [vskip1 + vskip1*4] ;ptr = vskip5 + + vperm2i128 xgft1_hi, xgft1_lo, xgft1_lo, 0x01 ; swapped to hi | lo + vperm2i128 xgft2_hi, xgft2_lo, xgft2_lo, 0x01 ; swapped to hi | lo + vperm2i128 xgft3_hi, xgft3_lo, xgft3_lo, 0x01 ; swapped to hi | lo + + vpshufb xgft1_hi, x0 ;Lookup mul table of high nibble + vpshufb xgft1_lo, xtmpa ;Lookup mul table of low nibble + vpxor xgft1_hi, xgft1_lo ;GF add high and low partials + vpxor xp1, xgft1_hi ;xp1 += partial + + vpshufb xgft2_hi, x0 ;Lookup mul table of high nibble + vpshufb xgft2_lo, xtmpa ;Lookup mul table of low nibble + vpxor xgft2_hi, xgft2_lo ;GF add high and low partials + vpxor xp2, xgft2_hi ;xp2 += partial + + vpshufb xgft3_hi, x0 ;Lookup mul table of high nibble + vpshufb xgft3_lo, xtmpa ;Lookup mul table of low nibble + vpxor xgft3_hi, xgft3_lo ;GF add high and low partials + vpxor xp3, xgft3_hi ;xp3 += partial + + + vmovdqu xgft1_lo, [tmp+vskip3] ;Load array Dx{00}, Dx{01}, ..., Dx{0f} + ; " Dx{00}, Dx{10}, ..., Dx{f0} + vmovdqu xgft2_lo, [tmp+vskip1*4] ;Load array Ex{00}, Ex{01}, ..., Ex{0f} + ; " Ex{00}, Ex{10}, ..., Ex{f0} + vmovdqu xgft3_lo, [tmp+ptr] ;Load array Fx{00}, Fx{01}, ..., Fx{0f} + ; " Fx{00}, Fx{10}, ..., Fx{f0} + add tmp, 32 + vperm2i128 xgft1_hi, xgft1_lo, xgft1_lo, 0x01 ; swapped to hi | lo + vperm2i128 xgft2_hi, xgft2_lo, xgft2_lo, 0x01 ; swapped to hi | lo + vperm2i128 xgft3_hi, xgft3_lo, xgft3_lo, 0x01 ; swapped to hi | lo + + vpshufb xgft1_hi, x0 ;Lookup mul table of high nibble + vpshufb xgft1_lo, xtmpa ;Lookup mul table of low nibble + vpxor xgft1_hi, xgft1_lo ;GF add high and low partials + vpxor xp4, xgft1_hi ;xp4 += partial + + vpshufb xgft2_hi, x0 ;Lookup mul table of high nibble + vpshufb xgft2_lo, xtmpa ;Lookup mul table of low nibble + vpxor xgft2_hi, xgft2_lo ;GF add high and low partials + vpxor xp5, xgft2_hi ;xp5 += partial + + vpshufb xgft3_hi, x0 ;Lookup mul table of high nibble + vpshufb xgft3_lo, xtmpa ;Lookup mul table of low nibble + vpxor xgft3_hi, xgft3_lo ;GF add high and low partials + vpxor xp6, xgft3_hi ;xp6 += partial + + cmp vec_i, vec + jl .next_vect + + + mov tmp, [dest+2*PS] + mov ptr, [dest+3*PS] + mov vec_i, [dest+4*PS] + + XSTR [dest1+pos], xp1 + XSTR [dest2+pos], xp2 + XSTR [tmp+pos], xp3 + mov tmp, [dest+5*PS] + XSTR [ptr+pos], xp4 + XSTR [vec_i+pos], xp5 + XSTR [tmp+pos], xp6 + + add pos, 32 ;Loop on 32 bytes at a time + cmp pos, len + jle .loop32 + + lea tmp, [len + 32] + cmp pos, tmp + je .return_pass + + ;; Tail len + mov pos, len ;Overlapped offset length-16 + jmp .loop32 ;Do one more overlap pass + +.return_pass: + FUNC_RESTORE + mov return, 0 + ret + +.return_fail: + FUNC_RESTORE + mov return, 1 + ret + +endproc_frame + +section .data + +;;; func core, ver, snum +slversion gf_6vect_dot_prod_avx2, 04, 04, 019a diff --git a/src/spdk/isa-l/erasure_code/gf_6vect_dot_prod_sse.asm b/src/spdk/isa-l/erasure_code/gf_6vect_dot_prod_sse.asm new file mode 100644 index 000000000..b62881136 --- /dev/null +++ b/src/spdk/isa-l/erasure_code/gf_6vect_dot_prod_sse.asm @@ -0,0 +1,315 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2015 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;;; +;;; gf_6vect_dot_prod_sse(len, vec, *g_tbls, **buffs, **dests); +;;; + +%include "reg_sizes.asm" + +%ifidn __OUTPUT_FORMAT__, elf64 + %define arg0 rdi + %define arg1 rsi + %define arg2 rdx + %define arg3 rcx + %define arg4 r8 + %define arg5 r9 + + %define tmp r11 + %define tmp2 r10 + %define tmp3 r13 ; must be saved and restored + %define tmp4 r12 ; must be saved and restored + %define tmp5 r14 ; must be saved and restored + %define tmp6 r15 ; must be saved and restored + %define return rax + %define PS 8 + %define LOG_PS 3 + + %define func(x) x: + %macro FUNC_SAVE 0 + push r12 + push r13 + push r14 + push r15 + %endmacro + %macro FUNC_RESTORE 0 + pop r15 + pop r14 + pop r13 + pop r12 + %endmacro +%endif + +%ifidn __OUTPUT_FORMAT__, win64 + %define arg0 rcx + %define arg1 rdx + %define arg2 r8 + %define arg3 r9 + + %define arg4 r12 ; must be saved, loaded and restored + %define arg5 r15 ; must be saved and restored + %define tmp r11 + %define tmp2 r10 + %define tmp3 r13 ; must be saved and restored + %define tmp4 r14 ; must be saved and restored + %define tmp5 rdi ; must be saved and restored + %define tmp6 rsi ; must be saved and restored + %define return rax + %define PS 8 + %define LOG_PS 3 + %define stack_size 10*16 + 7*8 ; must be an odd multiple of 8 + %define arg(x) [rsp + stack_size + PS + PS*x] + + %define func(x) proc_frame x + %macro FUNC_SAVE 0 + alloc_stack stack_size + save_xmm128 xmm6, 0*16 + save_xmm128 xmm7, 1*16 + save_xmm128 xmm8, 2*16 + save_xmm128 xmm9, 3*16 + save_xmm128 xmm10, 4*16 + save_xmm128 xmm11, 5*16 + save_xmm128 xmm12, 6*16 + save_xmm128 xmm13, 7*16 + save_xmm128 xmm14, 8*16 + save_xmm128 xmm15, 9*16 + save_reg r12, 10*16 + 0*8 + save_reg r13, 10*16 + 1*8 + save_reg r14, 10*16 + 2*8 + save_reg r15, 10*16 + 3*8 + save_reg rdi, 10*16 + 4*8 + save_reg rsi, 10*16 + 5*8 + end_prolog + mov arg4, arg(4) + %endmacro + + %macro FUNC_RESTORE 0 + movdqa xmm6, [rsp + 0*16] + movdqa xmm7, [rsp + 1*16] + movdqa xmm8, [rsp + 2*16] + movdqa xmm9, [rsp + 3*16] + movdqa xmm10, [rsp + 4*16] + movdqa xmm11, [rsp + 5*16] + movdqa xmm12, [rsp + 6*16] + movdqa xmm13, [rsp + 7*16] + movdqa xmm14, [rsp + 8*16] + movdqa xmm15, [rsp + 9*16] + mov r12, [rsp + 10*16 + 0*8] + mov r13, [rsp + 10*16 + 1*8] + mov r14, [rsp + 10*16 + 2*8] + mov r15, [rsp + 10*16 + 3*8] + mov rdi, [rsp + 10*16 + 4*8] + mov rsi, [rsp + 10*16 + 5*8] + add rsp, stack_size + %endmacro +%endif + +%define len arg0 +%define vec arg1 +%define mul_array arg2 +%define src arg3 +%define dest arg4 +%define ptr arg5 +%define vec_i tmp2 +%define dest1 tmp3 +%define dest2 tmp4 +%define vskip1 tmp5 +%define vskip3 tmp6 +%define pos return + + +%ifndef EC_ALIGNED_ADDR +;;; Use Un-aligned load/store + %define XLDR movdqu + %define XSTR movdqu +%else +;;; Use Non-temporal load/stor + %ifdef NO_NT_LDST + %define XLDR movdqa + %define XSTR movdqa + %else + %define XLDR movntdqa + %define XSTR movntdq + %endif +%endif + + +default rel + +[bits 64] +section .text + +%define xmask0f xmm15 +%define xgft1_lo xmm2 +%define xgft1_hi xmm3 +%define xgft2_lo xmm4 +%define xgft2_hi xmm5 +%define xgft3_lo xmm6 +%define xgft3_hi xmm7 +%define x0 xmm0 +%define xtmpa xmm1 +%define xp1 xmm8 +%define xp2 xmm9 +%define xp3 xmm10 +%define xp4 xmm11 +%define xp5 xmm12 +%define xp6 xmm13 + +align 16 +global gf_6vect_dot_prod_sse:ISAL_SYM_TYPE_FUNCTION +func(gf_6vect_dot_prod_sse) + FUNC_SAVE + sub len, 16 + jl .return_fail + xor pos, pos + movdqa xmask0f, [mask0f] ;Load mask of lower nibble in each byte + mov vskip1, vec + imul vskip1, 32 + mov vskip3, vec + imul vskip3, 96 + sal vec, LOG_PS ;vec *= PS. Make vec_i count by PS + mov dest1, [dest] + mov dest2, [dest+PS] + + +.loop16: + mov tmp, mul_array + xor vec_i, vec_i + pxor xp1, xp1 + pxor xp2, xp2 + pxor xp3, xp3 + pxor xp4, xp4 + pxor xp5, xp5 + pxor xp6, xp6 + +.next_vect: + mov ptr, [src+vec_i] + add vec_i, PS + XLDR x0, [ptr+pos] ;Get next source vector + + movdqu xgft1_lo, [tmp] ;Load array Ax{00}, Ax{01}, ..., Ax{0f} + movdqu xgft1_hi, [tmp+16] ; " Ax{00}, Ax{10}, ..., Ax{f0} + movdqu xgft2_lo, [tmp+vskip1*1] ;Load array Bx{00}, Bx{01}, ..., Bx{0f} + movdqu xgft2_hi, [tmp+vskip1*1+16] ; " Bx{00}, Bx{10}, ..., Bx{f0} + movdqu xgft3_lo, [tmp+vskip1*2] ;Load array Cx{00}, Cx{01}, ..., Cx{0f} + movdqu xgft3_hi, [tmp+vskip1*2+16] ; " Cx{00}, Cx{10}, ..., Cx{f0} + lea ptr, [vskip1 + vskip1*4] ;ptr = vskip5 + + movdqa xtmpa, x0 ;Keep unshifted copy of src + psraw x0, 4 ;Shift to put high nibble into bits 4-0 + pand x0, xmask0f ;Mask high src nibble in bits 4-0 + pand xtmpa, xmask0f ;Mask low src nibble in bits 4-0 + + pshufb xgft1_hi, x0 ;Lookup mul table of high nibble + pshufb xgft1_lo, xtmpa ;Lookup mul table of low nibble + pxor xgft1_hi, xgft1_lo ;GF add high and low partials + pxor xp1, xgft1_hi ;xp1 += partial + + pshufb xgft2_hi, x0 ;Lookup mul table of high nibble + pshufb xgft2_lo, xtmpa ;Lookup mul table of low nibble + pxor xgft2_hi, xgft2_lo ;GF add high and low partials + pxor xp2, xgft2_hi ;xp2 += partial + + pshufb xgft3_hi, x0 ;Lookup mul table of high nibble + pshufb xgft3_lo, xtmpa ;Lookup mul table of low nibble + pxor xgft3_hi, xgft3_lo ;GF add high and low partials + pxor xp3, xgft3_hi ;xp3 += partial + + + movdqu xgft1_lo, [tmp+vskip3] ;Load array Dx{00}, Dx{01}, ..., Dx{0f} + movdqu xgft1_hi, [tmp+vskip3+16] ; " Dx{00}, Dx{10}, ..., Dx{f0} + movdqu xgft2_lo, [tmp+vskip1*4] ;Load array Ex{00}, Ex{01}, ..., Ex{0f} + movdqu xgft2_hi, [tmp+vskip1*4+16] ; " Ex{00}, Ex{10}, ..., Ex{f0} + movdqu xgft3_lo, [tmp+ptr] ;Load array Fx{00}, Fx{01}, ..., Fx{0f} + movdqu xgft3_hi, [tmp+ptr+16] ; " Fx{00}, Fx{10}, ..., Fx{f0} + add tmp, 32 + + + pshufb xgft1_hi, x0 ;Lookup mul table of high nibble + pshufb xgft1_lo, xtmpa ;Lookup mul table of low nibble + pxor xgft1_hi, xgft1_lo ;GF add high and low partials + pxor xp4, xgft1_hi ;xp4 += partial + + pshufb xgft2_hi, x0 ;Lookup mul table of high nibble + pshufb xgft2_lo, xtmpa ;Lookup mul table of low nibble + pxor xgft2_hi, xgft2_lo ;GF add high and low partials + pxor xp5, xgft2_hi ;xp5 += partial + + pshufb xgft3_hi, x0 ;Lookup mul table of high nibble + pshufb xgft3_lo, xtmpa ;Lookup mul table of low nibble + pxor xgft3_hi, xgft3_lo ;GF add high and low partials + pxor xp6, xgft3_hi ;xp6 += partial + + cmp vec_i, vec + jl .next_vect + + + mov tmp, [dest+2*PS] + mov ptr, [dest+3*PS] + mov vec_i, [dest+4*PS] + + XSTR [dest1+pos], xp1 + XSTR [dest2+pos], xp2 + XSTR [tmp+pos], xp3 + mov tmp, [dest+5*PS] + XSTR [ptr+pos], xp4 + XSTR [vec_i+pos], xp5 + XSTR [tmp+pos], xp6 + + add pos, 16 ;Loop on 16 bytes at a time + cmp pos, len + jle .loop16 + + lea tmp, [len + 16] + cmp pos, tmp + je .return_pass + + ;; Tail len + mov pos, len ;Overlapped offset length-16 + jmp .loop16 ;Do one more overlap pass + +.return_pass: + FUNC_RESTORE + mov return, 0 + ret + +.return_fail: + FUNC_RESTORE + mov return, 1 + ret + +endproc_frame + +section .data + +align 16 +mask0f: dq 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f + +;;; func core, ver, snum +slversion gf_6vect_dot_prod_sse, 00, 05, 0066 diff --git a/src/spdk/isa-l/erasure_code/gf_6vect_dot_prod_sse_test.c b/src/spdk/isa-l/erasure_code/gf_6vect_dot_prod_sse_test.c new file mode 100644 index 000000000..96f67f19a --- /dev/null +++ b/src/spdk/isa-l/erasure_code/gf_6vect_dot_prod_sse_test.c @@ -0,0 +1,911 @@ +/********************************************************************** + Copyright(c) 2011-2015 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#include +#include +#include // for memset, memcmp +#include "erasure_code.h" +#include "types.h" + +#ifndef FUNCTION_UNDER_TEST +# define FUNCTION_UNDER_TEST gf_6vect_dot_prod_sse +#endif +#ifndef TEST_MIN_SIZE +# define TEST_MIN_SIZE 16 +#endif + +#define str(s) #s +#define xstr(s) str(s) + +#define TEST_LEN 8192 +#define TEST_SIZE (TEST_LEN/2) +#define TEST_MEM TEST_SIZE +#define TEST_LOOPS 20000 +#define TEST_TYPE_STR "" + +#ifndef TEST_SOURCES +# define TEST_SOURCES 16 +#endif +#ifndef RANDOMS +# define RANDOMS 20 +#endif + +#ifdef EC_ALIGNED_ADDR +// Define power of 2 range to check ptr, len alignment +# define PTR_ALIGN_CHK_B 0 +# define LEN_ALIGN_CHK_B 0 // 0 for aligned only +#else +// Define power of 2 range to check ptr, len alignment +# define PTR_ALIGN_CHK_B 32 +# define LEN_ALIGN_CHK_B 32 // 0 for aligned only +#endif + +typedef unsigned char u8; + +void dump(unsigned char *buf, int len) +{ + int i; + for (i = 0; i < len;) { + printf(" %2x", 0xff & buf[i++]); + if (i % 32 == 0) + printf("\n"); + } + printf("\n"); +} + +void dump_matrix(unsigned char **s, int k, int m) +{ + int i, j; + for (i = 0; i < k; i++) { + for (j = 0; j < m; j++) { + printf(" %2x", s[i][j]); + } + printf("\n"); + } + printf("\n"); +} + +void dump_u8xu8(unsigned char *s, int k, int m) +{ + int i, j; + for (i = 0; i < k; i++) { + for (j = 0; j < m; j++) { + printf(" %2x", 0xff & s[j + (i * m)]); + } + printf("\n"); + } + printf("\n"); +} + +int main(int argc, char *argv[]) +{ + int i, j, rtest, srcs; + void *buf; + u8 g1[TEST_SOURCES], g2[TEST_SOURCES], g3[TEST_SOURCES]; + u8 g4[TEST_SOURCES], g5[TEST_SOURCES], g6[TEST_SOURCES], *g_tbls; + u8 *dest1, *dest2, *dest3, *dest4, *dest5, *dest6, *dest_ref1; + u8 *dest_ref2, *dest_ref3, *dest_ref4, *dest_ref5, *dest_ref6; + u8 *dest_ptrs[6], *buffs[TEST_SOURCES]; + + int align, size; + unsigned char *efence_buffs[TEST_SOURCES]; + unsigned int offset; + u8 *ubuffs[TEST_SOURCES]; + u8 *udest_ptrs[6]; + printf(xstr(FUNCTION_UNDER_TEST) ": %dx%d ", TEST_SOURCES, TEST_LEN); + + // Allocate the arrays + for (i = 0; i < TEST_SOURCES; i++) { + if (posix_memalign(&buf, 64, TEST_LEN)) { + printf("alloc error: Fail"); + return -1; + } + buffs[i] = buf; + } + + if (posix_memalign(&buf, 16, 2 * (6 * TEST_SOURCES * 32))) { + printf("alloc error: Fail"); + return -1; + } + g_tbls = buf; + + if (posix_memalign(&buf, 64, TEST_LEN)) { + printf("alloc error: Fail"); + return -1; + } + dest1 = buf; + + if (posix_memalign(&buf, 64, TEST_LEN)) { + printf("alloc error: Fail"); + return -1; + } + dest2 = buf; + + if (posix_memalign(&buf, 64, TEST_LEN)) { + printf("alloc error: Fail"); + return -1; + } + dest3 = buf; + + if (posix_memalign(&buf, 64, TEST_LEN)) { + printf("alloc error: Fail"); + return -1; + } + dest4 = buf; + + if (posix_memalign(&buf, 64, TEST_LEN)) { + printf("alloc error: Fail"); + return -1; + } + dest5 = buf; + + if (posix_memalign(&buf, 64, TEST_LEN)) { + printf("alloc error: Fail"); + return -1; + } + dest6 = buf; + + if (posix_memalign(&buf, 64, TEST_LEN)) { + printf("alloc error: Fail"); + return -1; + } + dest_ref1 = buf; + + if (posix_memalign(&buf, 64, TEST_LEN)) { + printf("alloc error: Fail"); + return -1; + } + dest_ref2 = buf; + + if (posix_memalign(&buf, 64, TEST_LEN)) { + printf("alloc error: Fail"); + return -1; + } + dest_ref3 = buf; + + if (posix_memalign(&buf, 64, TEST_LEN)) { + printf("alloc error: Fail"); + return -1; + } + dest_ref4 = buf; + + if (posix_memalign(&buf, 64, TEST_LEN)) { + printf("alloc error: Fail"); + return -1; + } + dest_ref5 = buf; + + if (posix_memalign(&buf, 64, TEST_LEN)) { + printf("alloc error: Fail"); + return -1; + } + dest_ref6 = buf; + + dest_ptrs[0] = dest1; + dest_ptrs[1] = dest2; + dest_ptrs[2] = dest3; + dest_ptrs[3] = dest4; + dest_ptrs[4] = dest5; + dest_ptrs[5] = dest6; + + // Test of all zeros + for (i = 0; i < TEST_SOURCES; i++) + memset(buffs[i], 0, TEST_LEN); + + memset(dest1, 0, TEST_LEN); + memset(dest2, 0, TEST_LEN); + memset(dest3, 0, TEST_LEN); + memset(dest4, 0, TEST_LEN); + memset(dest5, 0, TEST_LEN); + memset(dest6, 0, TEST_LEN); + memset(dest_ref1, 0, TEST_LEN); + memset(dest_ref2, 0, TEST_LEN); + memset(dest_ref3, 0, TEST_LEN); + memset(dest_ref4, 0, TEST_LEN); + memset(dest_ref5, 0, TEST_LEN); + memset(dest_ref6, 0, TEST_LEN); + memset(g1, 2, TEST_SOURCES); + memset(g2, 1, TEST_SOURCES); + memset(g3, 7, TEST_SOURCES); + memset(g4, 9, TEST_SOURCES); + memset(g5, 4, TEST_SOURCES); + memset(g6, 0xe6, TEST_SOURCES); + + for (i = 0; i < TEST_SOURCES; i++) { + gf_vect_mul_init(g1[i], &g_tbls[i * 32]); + gf_vect_mul_init(g2[i], &g_tbls[32 * TEST_SOURCES + i * 32]); + gf_vect_mul_init(g3[i], &g_tbls[64 * TEST_SOURCES + i * 32]); + gf_vect_mul_init(g4[i], &g_tbls[96 * TEST_SOURCES + i * 32]); + gf_vect_mul_init(g5[i], &g_tbls[128 * TEST_SOURCES + i * 32]); + gf_vect_mul_init(g6[i], &g_tbls[160 * TEST_SOURCES + i * 32]); + } + + gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref1); + gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[32 * TEST_SOURCES], buffs, + dest_ref2); + gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[64 * TEST_SOURCES], buffs, + dest_ref3); + gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[96 * TEST_SOURCES], buffs, + dest_ref4); + gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[128 * TEST_SOURCES], buffs, + dest_ref5); + gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[160 * TEST_SOURCES], buffs, + dest_ref6); + + FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest_ptrs); + + if (0 != memcmp(dest_ref1, dest1, TEST_LEN)) { + printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test1\n"); + dump_matrix(buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref1, 25); + printf("dprod_dut:"); + dump(dest1, 25); + return -1; + } + if (0 != memcmp(dest_ref2, dest2, TEST_LEN)) { + printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test2\n"); + dump_matrix(buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref2, 25); + printf("dprod_dut:"); + dump(dest2, 25); + return -1; + } + if (0 != memcmp(dest_ref3, dest3, TEST_LEN)) { + printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test3\n"); + dump_matrix(buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref3, 25); + printf("dprod_dut:"); + dump(dest3, 25); + return -1; + } + if (0 != memcmp(dest_ref4, dest4, TEST_LEN)) { + printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test4\n"); + dump_matrix(buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref4, 25); + printf("dprod_dut:"); + dump(dest4, 25); + return -1; + } + if (0 != memcmp(dest_ref5, dest5, TEST_LEN)) { + printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test5\n"); + dump_matrix(buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref5, 25); + printf("dprod_dut:"); + dump(dest5, 25); + return -1; + } + if (0 != memcmp(dest_ref6, dest6, TEST_LEN)) { + printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test6\n"); + dump_matrix(buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref6, 25); + printf("dprod_dut:"); + dump(dest6, 25); + return -1; + } + putchar('.'); + + // Rand data test + + for (rtest = 0; rtest < RANDOMS; rtest++) { + for (i = 0; i < TEST_SOURCES; i++) + for (j = 0; j < TEST_LEN; j++) + buffs[i][j] = rand(); + + for (i = 0; i < TEST_SOURCES; i++) { + g1[i] = rand(); + g2[i] = rand(); + g3[i] = rand(); + g4[i] = rand(); + g5[i] = rand(); + g6[i] = rand(); + } + + for (i = 0; i < TEST_SOURCES; i++) { + gf_vect_mul_init(g1[i], &g_tbls[i * 32]); + gf_vect_mul_init(g2[i], &g_tbls[(32 * TEST_SOURCES) + (i * 32)]); + gf_vect_mul_init(g3[i], &g_tbls[(64 * TEST_SOURCES) + (i * 32)]); + gf_vect_mul_init(g4[i], &g_tbls[(96 * TEST_SOURCES) + (i * 32)]); + gf_vect_mul_init(g5[i], &g_tbls[(128 * TEST_SOURCES) + (i * 32)]); + gf_vect_mul_init(g6[i], &g_tbls[(160 * TEST_SOURCES) + (i * 32)]); + } + + gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref1); + gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[32 * TEST_SOURCES], + buffs, dest_ref2); + gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[64 * TEST_SOURCES], + buffs, dest_ref3); + gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[96 * TEST_SOURCES], + buffs, dest_ref4); + gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[128 * TEST_SOURCES], + buffs, dest_ref5); + gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[160 * TEST_SOURCES], + buffs, dest_ref6); + + FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest_ptrs); + + if (0 != memcmp(dest_ref1, dest1, TEST_LEN)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test1 %d\n", rtest); + dump_matrix(buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref1, 25); + printf("dprod_dut:"); + dump(dest1, 25); + return -1; + } + if (0 != memcmp(dest_ref2, dest2, TEST_LEN)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test2 %d\n", rtest); + dump_matrix(buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref2, 25); + printf("dprod_dut:"); + dump(dest2, 25); + return -1; + } + if (0 != memcmp(dest_ref3, dest3, TEST_LEN)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test3 %d\n", rtest); + dump_matrix(buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref3, 25); + printf("dprod_dut:"); + dump(dest3, 25); + return -1; + } + if (0 != memcmp(dest_ref4, dest4, TEST_LEN)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test4 %d\n", rtest); + dump_matrix(buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref4, 25); + printf("dprod_dut:"); + dump(dest4, 25); + return -1; + } + if (0 != memcmp(dest_ref5, dest5, TEST_LEN)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test5 %d\n", rtest); + dump_matrix(buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref5, 25); + printf("dprod_dut:"); + dump(dest5, 25); + return -1; + } + if (0 != memcmp(dest_ref6, dest6, TEST_LEN)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test6 %d\n", rtest); + dump_matrix(buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref6, 25); + printf("dprod_dut:"); + dump(dest6, 25); + return -1; + } + + putchar('.'); + } + + // Rand data test with varied parameters + for (rtest = 0; rtest < RANDOMS; rtest++) { + for (srcs = TEST_SOURCES; srcs > 0; srcs--) { + for (i = 0; i < srcs; i++) + for (j = 0; j < TEST_LEN; j++) + buffs[i][j] = rand(); + + for (i = 0; i < srcs; i++) { + g1[i] = rand(); + g2[i] = rand(); + g3[i] = rand(); + g4[i] = rand(); + g5[i] = rand(); + g6[i] = rand(); + } + + for (i = 0; i < srcs; i++) { + gf_vect_mul_init(g1[i], &g_tbls[i * 32]); + gf_vect_mul_init(g2[i], &g_tbls[(32 * srcs) + (i * 32)]); + gf_vect_mul_init(g3[i], &g_tbls[(64 * srcs) + (i * 32)]); + gf_vect_mul_init(g4[i], &g_tbls[(96 * srcs) + (i * 32)]); + gf_vect_mul_init(g5[i], &g_tbls[(128 * srcs) + (i * 32)]); + gf_vect_mul_init(g6[i], &g_tbls[(160 * srcs) + (i * 32)]); + } + + gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[0], buffs, dest_ref1); + gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[32 * srcs], buffs, + dest_ref2); + gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[64 * srcs], buffs, + dest_ref3); + gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[96 * srcs], buffs, + dest_ref4); + gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[128 * srcs], buffs, + dest_ref5); + gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[160 * srcs], buffs, + dest_ref6); + + FUNCTION_UNDER_TEST(TEST_LEN, srcs, g_tbls, buffs, dest_ptrs); + + if (0 != memcmp(dest_ref1, dest1, TEST_LEN)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) + " test1 srcs=%d\n", srcs); + dump_matrix(buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref1, 25); + printf("dprod_dut:"); + dump(dest1, 25); + return -1; + } + if (0 != memcmp(dest_ref2, dest2, TEST_LEN)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) + " test2 srcs=%d\n", srcs); + dump_matrix(buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref2, 25); + printf("dprod_dut:"); + dump(dest2, 25); + return -1; + } + if (0 != memcmp(dest_ref3, dest3, TEST_LEN)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) + " test3 srcs=%d\n", srcs); + dump_matrix(buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref3, 25); + printf("dprod_dut:"); + dump(dest3, 25); + return -1; + } + if (0 != memcmp(dest_ref4, dest4, TEST_LEN)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) + " test4 srcs=%d\n", srcs); + dump_matrix(buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref4, 25); + printf("dprod_dut:"); + dump(dest4, 25); + return -1; + } + if (0 != memcmp(dest_ref5, dest5, TEST_LEN)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) + " test5 srcs=%d\n", srcs); + dump_matrix(buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref5, 25); + printf("dprod_dut:"); + dump(dest5, 25); + return -1; + } + if (0 != memcmp(dest_ref6, dest6, TEST_LEN)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) + " test6 srcs=%d\n", srcs); + dump_matrix(buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref6, 25); + printf("dprod_dut:"); + dump(dest6, 25); + return -1; + } + + putchar('.'); + } + } + + // Run tests at end of buffer for Electric Fence + align = (LEN_ALIGN_CHK_B != 0) ? 1 : 16; + for (size = TEST_MIN_SIZE; size <= TEST_SIZE; size += align) { + for (i = 0; i < TEST_SOURCES; i++) + for (j = 0; j < TEST_LEN; j++) + buffs[i][j] = rand(); + + for (i = 0; i < TEST_SOURCES; i++) // Line up TEST_SIZE from end + efence_buffs[i] = buffs[i] + TEST_LEN - size; + + for (i = 0; i < TEST_SOURCES; i++) { + g1[i] = rand(); + g2[i] = rand(); + g3[i] = rand(); + g4[i] = rand(); + g5[i] = rand(); + g6[i] = rand(); + } + + for (i = 0; i < TEST_SOURCES; i++) { + gf_vect_mul_init(g1[i], &g_tbls[i * 32]); + gf_vect_mul_init(g2[i], &g_tbls[(32 * TEST_SOURCES) + (i * 32)]); + gf_vect_mul_init(g3[i], &g_tbls[(64 * TEST_SOURCES) + (i * 32)]); + gf_vect_mul_init(g4[i], &g_tbls[(96 * TEST_SOURCES) + (i * 32)]); + gf_vect_mul_init(g5[i], &g_tbls[(128 * TEST_SOURCES) + (i * 32)]); + gf_vect_mul_init(g6[i], &g_tbls[(160 * TEST_SOURCES) + (i * 32)]); + } + + gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[0], efence_buffs, dest_ref1); + gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[32 * TEST_SOURCES], + efence_buffs, dest_ref2); + gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[64 * TEST_SOURCES], + efence_buffs, dest_ref3); + gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[96 * TEST_SOURCES], + efence_buffs, dest_ref4); + gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[128 * TEST_SOURCES], + efence_buffs, dest_ref5); + gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[160 * TEST_SOURCES], + efence_buffs, dest_ref6); + + FUNCTION_UNDER_TEST(size, TEST_SOURCES, g_tbls, efence_buffs, dest_ptrs); + + if (0 != memcmp(dest_ref1, dest1, size)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test1 %d\n", rtest); + dump_matrix(efence_buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref1, align); + printf("dprod_dut:"); + dump(dest1, align); + return -1; + } + + if (0 != memcmp(dest_ref2, dest2, size)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test2 %d\n", rtest); + dump_matrix(efence_buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref2, align); + printf("dprod_dut:"); + dump(dest2, align); + return -1; + } + + if (0 != memcmp(dest_ref3, dest3, size)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test3 %d\n", rtest); + dump_matrix(efence_buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref3, align); + printf("dprod_dut:"); + dump(dest3, align); + return -1; + } + + if (0 != memcmp(dest_ref4, dest4, size)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test4 %d\n", rtest); + dump_matrix(efence_buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref4, align); + printf("dprod_dut:"); + dump(dest4, align); + return -1; + } + + if (0 != memcmp(dest_ref5, dest5, size)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test5 %d\n", rtest); + dump_matrix(efence_buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref5, align); + printf("dprod_dut:"); + dump(dest5, align); + return -1; + } + + if (0 != memcmp(dest_ref6, dest6, size)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test6 %d\n", rtest); + dump_matrix(efence_buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref6, align); + printf("dprod_dut:"); + dump(dest6, align); + return -1; + } + + putchar('.'); + } + + // Test rand ptr alignment if available + + for (rtest = 0; rtest < RANDOMS; rtest++) { + size = (TEST_LEN - PTR_ALIGN_CHK_B) & ~(TEST_MIN_SIZE - 1); + srcs = rand() % TEST_SOURCES; + if (srcs == 0) + continue; + + offset = (PTR_ALIGN_CHK_B != 0) ? 1 : PTR_ALIGN_CHK_B; + // Add random offsets + for (i = 0; i < srcs; i++) + ubuffs[i] = buffs[i] + (rand() & (PTR_ALIGN_CHK_B - offset)); + + udest_ptrs[0] = dest1 + (rand() & (PTR_ALIGN_CHK_B - offset)); + udest_ptrs[1] = dest2 + (rand() & (PTR_ALIGN_CHK_B - offset)); + udest_ptrs[2] = dest3 + (rand() & (PTR_ALIGN_CHK_B - offset)); + udest_ptrs[3] = dest4 + (rand() & (PTR_ALIGN_CHK_B - offset)); + udest_ptrs[4] = dest5 + (rand() & (PTR_ALIGN_CHK_B - offset)); + udest_ptrs[5] = dest6 + (rand() & (PTR_ALIGN_CHK_B - offset)); + + memset(dest1, 0, TEST_LEN); // zero pad to check write-over + memset(dest2, 0, TEST_LEN); + memset(dest3, 0, TEST_LEN); + memset(dest4, 0, TEST_LEN); + memset(dest5, 0, TEST_LEN); + memset(dest6, 0, TEST_LEN); + + for (i = 0; i < srcs; i++) + for (j = 0; j < size; j++) + ubuffs[i][j] = rand(); + + for (i = 0; i < srcs; i++) { + g1[i] = rand(); + g2[i] = rand(); + g3[i] = rand(); + g4[i] = rand(); + g5[i] = rand(); + g6[i] = rand(); + } + + for (i = 0; i < srcs; i++) { + gf_vect_mul_init(g1[i], &g_tbls[i * 32]); + gf_vect_mul_init(g2[i], &g_tbls[(32 * srcs) + (i * 32)]); + gf_vect_mul_init(g3[i], &g_tbls[(64 * srcs) + (i * 32)]); + gf_vect_mul_init(g4[i], &g_tbls[(96 * srcs) + (i * 32)]); + gf_vect_mul_init(g5[i], &g_tbls[(128 * srcs) + (i * 32)]); + gf_vect_mul_init(g6[i], &g_tbls[(160 * srcs) + (i * 32)]); + } + + gf_vect_dot_prod_base(size, srcs, &g_tbls[0], ubuffs, dest_ref1); + gf_vect_dot_prod_base(size, srcs, &g_tbls[32 * srcs], ubuffs, dest_ref2); + gf_vect_dot_prod_base(size, srcs, &g_tbls[64 * srcs], ubuffs, dest_ref3); + gf_vect_dot_prod_base(size, srcs, &g_tbls[96 * srcs], ubuffs, dest_ref4); + gf_vect_dot_prod_base(size, srcs, &g_tbls[128 * srcs], ubuffs, dest_ref5); + gf_vect_dot_prod_base(size, srcs, &g_tbls[160 * srcs], ubuffs, dest_ref6); + + FUNCTION_UNDER_TEST(size, srcs, g_tbls, ubuffs, udest_ptrs); + + if (memcmp(dest_ref1, udest_ptrs[0], size)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign srcs=%d\n", + srcs); + dump_matrix(ubuffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref1, 25); + printf("dprod_dut:"); + dump(udest_ptrs[0], 25); + return -1; + } + if (memcmp(dest_ref2, udest_ptrs[1], size)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign srcs=%d\n", + srcs); + dump_matrix(ubuffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref2, 25); + printf("dprod_dut:"); + dump(udest_ptrs[1], 25); + return -1; + } + if (memcmp(dest_ref3, udest_ptrs[2], size)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign srcs=%d\n", + srcs); + dump_matrix(ubuffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref3, 25); + printf("dprod_dut:"); + dump(udest_ptrs[2], 25); + return -1; + } + if (memcmp(dest_ref4, udest_ptrs[3], size)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign srcs=%d\n", + srcs); + dump_matrix(ubuffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref4, 25); + printf("dprod_dut:"); + dump(udest_ptrs[3], 25); + return -1; + } + if (memcmp(dest_ref5, udest_ptrs[4], size)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign srcs=%d\n", + srcs); + dump_matrix(ubuffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref5, 25); + printf("dprod_dut:"); + dump(udest_ptrs[4], 25); + return -1; + } + if (memcmp(dest_ref6, udest_ptrs[5], size)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign srcs=%d\n", + srcs); + dump_matrix(ubuffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref6, 25); + printf("dprod_dut:"); + dump(udest_ptrs[5], 25); + return -1; + } + // Confirm that padding around dests is unchanged + memset(dest_ref1, 0, PTR_ALIGN_CHK_B); // Make reference zero buff + offset = udest_ptrs[0] - dest1; + + if (memcmp(dest1, dest_ref1, offset)) { + printf("Fail rand ualign pad1 start\n"); + return -1; + } + if (memcmp(dest1 + offset + size, dest_ref1, PTR_ALIGN_CHK_B - offset)) { + printf("Fail rand ualign pad1 end\n"); + return -1; + } + + offset = udest_ptrs[1] - dest2; + if (memcmp(dest2, dest_ref1, offset)) { + printf("Fail rand ualign pad2 start\n"); + return -1; + } + if (memcmp(dest2 + offset + size, dest_ref1, PTR_ALIGN_CHK_B - offset)) { + printf("Fail rand ualign pad2 end\n"); + return -1; + } + + offset = udest_ptrs[2] - dest3; + if (memcmp(dest3, dest_ref1, offset)) { + printf("Fail rand ualign pad3 start\n"); + return -1; + } + if (memcmp(dest3 + offset + size, dest_ref1, PTR_ALIGN_CHK_B - offset)) { + printf("Fail rand ualign pad3 end\n"); + return -1; + } + + offset = udest_ptrs[3] - dest4; + if (memcmp(dest4, dest_ref1, offset)) { + printf("Fail rand ualign pad4 start\n"); + return -1; + } + if (memcmp(dest4 + offset + size, dest_ref1, PTR_ALIGN_CHK_B - offset)) { + printf("Fail rand ualign pad4 end\n"); + return -1; + } + + offset = udest_ptrs[4] - dest5; + if (memcmp(dest5, dest_ref1, offset)) { + printf("Fail rand ualign pad5 start\n"); + return -1; + } + if (memcmp(dest5 + offset + size, dest_ref1, PTR_ALIGN_CHK_B - offset)) { + printf("Fail rand ualign pad5 end\n"); + return -1; + } + + offset = udest_ptrs[5] - dest6; + if (memcmp(dest6, dest_ref1, offset)) { + printf("Fail rand ualign pad6 start\n"); + return -1; + } + if (memcmp(dest6 + offset + size, dest_ref1, PTR_ALIGN_CHK_B - offset)) { + printf("Fail rand ualign pad6 end\n"); + return -1; + } + + putchar('.'); + } + + // Test all size alignment + align = (LEN_ALIGN_CHK_B != 0) ? 1 : 16; + + for (size = TEST_LEN; size >= TEST_MIN_SIZE; size -= align) { + srcs = TEST_SOURCES; + + for (i = 0; i < srcs; i++) + for (j = 0; j < size; j++) + buffs[i][j] = rand(); + + for (i = 0; i < srcs; i++) { + g1[i] = rand(); + g2[i] = rand(); + g3[i] = rand(); + g4[i] = rand(); + g5[i] = rand(); + g6[i] = rand(); + } + + for (i = 0; i < srcs; i++) { + gf_vect_mul_init(g1[i], &g_tbls[i * 32]); + gf_vect_mul_init(g2[i], &g_tbls[(32 * srcs) + (i * 32)]); + gf_vect_mul_init(g3[i], &g_tbls[(64 * srcs) + (i * 32)]); + gf_vect_mul_init(g4[i], &g_tbls[(96 * srcs) + (i * 32)]); + gf_vect_mul_init(g5[i], &g_tbls[(128 * srcs) + (i * 32)]); + gf_vect_mul_init(g6[i], &g_tbls[(160 * srcs) + (i * 32)]); + } + + gf_vect_dot_prod_base(size, srcs, &g_tbls[0], buffs, dest_ref1); + gf_vect_dot_prod_base(size, srcs, &g_tbls[32 * srcs], buffs, dest_ref2); + gf_vect_dot_prod_base(size, srcs, &g_tbls[64 * srcs], buffs, dest_ref3); + gf_vect_dot_prod_base(size, srcs, &g_tbls[96 * srcs], buffs, dest_ref4); + gf_vect_dot_prod_base(size, srcs, &g_tbls[128 * srcs], buffs, dest_ref5); + gf_vect_dot_prod_base(size, srcs, &g_tbls[160 * srcs], buffs, dest_ref6); + + FUNCTION_UNDER_TEST(size, srcs, g_tbls, buffs, dest_ptrs); + + if (memcmp(dest_ref1, dest_ptrs[0], size)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign len=%d\n", + size); + dump_matrix(buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref1, 25); + printf("dprod_dut:"); + dump(dest_ptrs[0], 25); + return -1; + } + if (memcmp(dest_ref2, dest_ptrs[1], size)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign len=%d\n", + size); + dump_matrix(buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref2, 25); + printf("dprod_dut:"); + dump(dest_ptrs[1], 25); + return -1; + } + if (memcmp(dest_ref3, dest_ptrs[2], size)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign len=%d\n", + size); + dump_matrix(buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref3, 25); + printf("dprod_dut:"); + dump(dest_ptrs[2], 25); + return -1; + } + if (memcmp(dest_ref4, dest_ptrs[3], size)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign len=%d\n", + size); + dump_matrix(buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref4, 25); + printf("dprod_dut:"); + dump(dest_ptrs[3], 25); + return -1; + } + if (memcmp(dest_ref5, dest_ptrs[4], size)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign len=%d\n", + size); + dump_matrix(buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref5, 25); + printf("dprod_dut:"); + dump(dest_ptrs[4], 25); + return -1; + } + if (memcmp(dest_ref6, dest_ptrs[5], size)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign len=%d\n", + size); + dump_matrix(buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref6, 25); + printf("dprod_dut:"); + dump(dest_ptrs[5], 25); + return -1; + } + } + + printf("Pass\n"); + return 0; + +} diff --git a/src/spdk/isa-l/erasure_code/gf_6vect_mad_avx.asm b/src/spdk/isa-l/erasure_code/gf_6vect_mad_avx.asm new file mode 100644 index 000000000..f2e04cd70 --- /dev/null +++ b/src/spdk/isa-l/erasure_code/gf_6vect_mad_avx.asm @@ -0,0 +1,394 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2015 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;;; +;;; gf_6vect_mad_avx(len, vec, vec_i, mul_array, src, dest); +;;; + +%include "reg_sizes.asm" + +%define PS 8 + +%ifidn __OUTPUT_FORMAT__, win64 + %define arg0 rcx + %define arg0.w ecx + %define arg1 rdx + %define arg2 r8 + %define arg3 r9 + %define arg4 r12 + %define arg5 r15 + %define tmp r11 + %define tmp2 r10 + %define tmp3 r13 + %define tmp4 r14 + %define tmp5 rdi + %define return rax + %define return.w eax + %define stack_size 16*10 + 5*8 + %define arg(x) [rsp + stack_size + PS + PS*x] + %define func(x) proc_frame x + +%macro FUNC_SAVE 0 + sub rsp, stack_size + movdqa [rsp+16*0],xmm6 + movdqa [rsp+16*1],xmm7 + movdqa [rsp+16*2],xmm8 + movdqa [rsp+16*3],xmm9 + movdqa [rsp+16*4],xmm10 + movdqa [rsp+16*5],xmm11 + movdqa [rsp+16*6],xmm12 + movdqa [rsp+16*7],xmm13 + movdqa [rsp+16*8],xmm14 + movdqa [rsp+16*9],xmm15 + save_reg r12, 10*16 + 0*8 + save_reg r13, 10*16 + 1*8 + save_reg r14, 10*16 + 2*8 + save_reg r15, 10*16 + 3*8 + save_reg rdi, 10*16 + 4*8 + end_prolog + mov arg4, arg(4) + mov arg5, arg(5) +%endmacro + +%macro FUNC_RESTORE 0 + movdqa xmm6, [rsp+16*0] + movdqa xmm7, [rsp+16*1] + movdqa xmm8, [rsp+16*2] + movdqa xmm9, [rsp+16*3] + movdqa xmm10, [rsp+16*4] + movdqa xmm11, [rsp+16*5] + movdqa xmm12, [rsp+16*6] + movdqa xmm13, [rsp+16*7] + movdqa xmm14, [rsp+16*8] + movdqa xmm15, [rsp+16*9] + mov r12, [rsp + 10*16 + 0*8] + mov r13, [rsp + 10*16 + 1*8] + mov r14, [rsp + 10*16 + 2*8] + mov r15, [rsp + 10*16 + 3*8] + mov rdi, [rsp + 10*16 + 4*8] + add rsp, stack_size +%endmacro + +%elifidn __OUTPUT_FORMAT__, elf64 + %define arg0 rdi + %define arg0.w edi + %define arg1 rsi + %define arg2 rdx + %define arg3 rcx + %define arg4 r8 + %define arg5 r9 + %define tmp r11 + %define tmp2 r10 + %define tmp3 r12 + %define tmp4 r13 + %define tmp5 r14 + %define return rax + %define return.w eax + + %define func(x) x: + %macro FUNC_SAVE 0 + push r12 + push r13 + push r14 + %endmacro + %macro FUNC_RESTORE 0 + pop r14 + pop r13 + pop r12 + %endmacro +%endif + +;;; gf_6vect_mad_avx(len, vec, vec_i, mul_array, src, dest) +%define len arg0 +%define len.w arg0.w +%define vec arg1 +%define vec_i arg2 +%define mul_array arg3 +%define src arg4 +%define dest1 arg5 +%define pos return +%define pos.w return.w + +%define dest2 tmp4 +%define dest3 tmp2 +%define dest4 mul_array +%define dest5 tmp5 +%define dest6 vec_i + +%ifndef EC_ALIGNED_ADDR +;;; Use Un-aligned load/store + %define XLDR vmovdqu + %define XSTR vmovdqu +%else +;;; Use Non-temporal load/stor + %ifdef NO_NT_LDST + %define XLDR vmovdqa + %define XSTR vmovdqa + %else + %define XLDR vmovntdqa + %define XSTR vmovntdq + %endif +%endif + + +default rel + +[bits 64] +section .text + +%define xmask0f xmm15 +%define xgft4_lo xmm14 +%define xgft4_hi xmm13 +%define xgft5_lo xmm12 +%define xgft5_hi xmm11 +%define xgft6_lo xmm10 +%define xgft6_hi xmm9 + +%define x0 xmm0 +%define xtmpa xmm1 +%define xtmph1 xmm2 +%define xtmpl1 xmm3 +%define xtmph2 xmm4 +%define xtmpl2 xmm5 +%define xtmph3 xmm6 +%define xtmpl3 xmm7 +%define xd1 xmm8 +%define xd2 xtmpl1 +%define xd3 xtmph1 + + +align 16 +global gf_6vect_mad_avx:ISAL_SYM_TYPE_FUNCTION +func(gf_6vect_mad_avx) + FUNC_SAVE + sub len, 16 + jl .return_fail + xor pos, pos + vmovdqa xmask0f, [mask0f] ;Load mask of lower nibble in each byte + mov tmp, vec + sal vec_i, 5 ;Multiply by 32 + lea tmp3, [mul_array + vec_i] + sal tmp, 6 ;Multiply by 64 + + sal vec, 5 ;Multiply by 32 + lea vec_i, [tmp + vec] ;vec_i = vec*96 + lea mul_array, [tmp + vec_i] ;mul_array = vec*160 + + vmovdqu xgft5_lo, [tmp3+2*tmp] ;Load array Ex{00}, Ex{01}, ..., Ex{0f} + vmovdqu xgft5_hi, [tmp3+2*tmp+16] ; " Ex{00}, Ex{10}, ..., Ex{f0} + vmovdqu xgft4_lo, [tmp3+vec_i] ;Load array Dx{00}, Dx{01}, Dx{02}, ... + vmovdqu xgft4_hi, [tmp3+vec_i+16] ; " Dx{00}, Dx{10}, Dx{20}, ... , Dx{f0} + vmovdqu xgft6_lo, [tmp3+mul_array] ;Load array Fx{00}, Fx{01}, ..., Fx{0f} + vmovdqu xgft6_hi, [tmp3+mul_array+16] ; " Fx{00}, Fx{10}, ..., Fx{f0} + + mov dest2, [dest1+PS] + mov dest3, [dest1+2*PS] + mov dest4, [dest1+3*PS] ; reuse mul_array + mov dest5, [dest1+4*PS] + mov dest6, [dest1+5*PS] ; reuse vec_i + mov dest1, [dest1] + +.loop16: + XLDR x0, [src+pos] ;Get next source vector + + vmovdqu xtmpl1, [tmp3] ;Load array Ax{00}, Ax{01}, Ax{02}, ... + vmovdqu xtmph1, [tmp3+16] ; " Ax{00}, Ax{10}, Ax{20}, ... , Ax{f0} + vmovdqu xtmpl2, [tmp3+vec] ;Load array Bx{00}, Bx{01}, Bx{02}, ... + vmovdqu xtmph2, [tmp3+vec+16] ; " Bx{00}, Bx{10}, Bx{20}, ... , Bx{f0} + vmovdqu xtmpl3, [tmp3+2*vec] ;Load array Cx{00}, Cx{01}, Cx{02}, ... + vmovdqu xtmph3, [tmp3+2*vec+16] ; " Cx{00}, Cx{10}, Cx{20}, ... , Cx{f0} + XLDR xd1, [dest1+pos] ;Get next dest vector + + vpand xtmpa, x0, xmask0f ;Mask low src nibble in bits 4-0 + vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0 + vpand x0, x0, xmask0f ;Mask high src nibble in bits 4-0 + + + ;dest1 + vpshufb xtmph1, x0 ;Lookup mul table of high nibble + vpshufb xtmpl1, xtmpa ;Lookup mul table of low nibble + vpxor xtmph1, xtmpl1 ;GF add high and low partials + vpxor xd1, xtmph1 + + XLDR xd2, [dest2+pos] ;reuse xtmpl1. Get next dest vector + XLDR xd3, [dest3+pos] ;reuse xtmph1. Get next dest vector + + ;dest2 + vpshufb xtmph2, x0 ;Lookup mul table of high nibble + vpshufb xtmpl2, xtmpa ;Lookup mul table of low nibble + vpxor xtmph2, xtmpl2 ;GF add high and low partials + vpxor xd2, xtmph2 + + ;dest3 + vpshufb xtmph3, x0 ;Lookup mul table of high nibble + vpshufb xtmpl3, xtmpa ;Lookup mul table of low nibble + vpxor xtmph3, xtmpl3 ;GF add high and low partials + vpxor xd3, xtmph3 + + XSTR [dest1+pos], xd1 ;Store result into dest1 + XSTR [dest2+pos], xd2 ;Store result into dest2 + XSTR [dest3+pos], xd3 ;Store result into dest3 + + ;dest4 + XLDR xd1, [dest4+pos] ;Get next dest vector + vpshufb xtmph1, xgft4_hi, x0 ;Lookup mul table of high nibble + vpshufb xtmpl1, xgft4_lo, xtmpa ;Lookup mul table of low nibble + vpxor xtmph1, xtmph1, xtmpl1 ;GF add high and low partials + vpxor xd1, xd1, xtmph1 + + XLDR xd2, [dest5+pos] ;reuse xtmpl1. Get next dest vector + XLDR xd3, [dest6+pos] ;reuse xtmph1. Get next dest vector + + ;dest5 + vpshufb xtmph2, xgft5_hi, x0 ;Lookup mul table of high nibble + vpshufb xtmpl2, xgft5_lo, xtmpa ;Lookup mul table of low nibble + vpxor xtmph2, xtmph2, xtmpl2 ;GF add high and low partials + vpxor xd2, xd2, xtmph2 + + ;dest6 + vpshufb xtmph3, xgft6_hi, x0 ;Lookup mul table of high nibble + vpshufb xtmpl3, xgft6_lo, xtmpa ;Lookup mul table of low nibble + vpxor xtmph3, xtmph3, xtmpl3 ;GF add high and low partials + vpxor xd3, xd3, xtmph3 + + XSTR [dest4+pos], xd1 ;Store result into dest4 + XSTR [dest5+pos], xd2 ;Store result into dest5 + XSTR [dest6+pos], xd3 ;Store result into dest6 + + add pos, 16 ;Loop on 16 bytes at a time + cmp pos, len + jle .loop16 + + lea tmp, [len + 16] + cmp pos, tmp + je .return_pass + +.lessthan16: + ;; Tail len + ;; Do one more overlap pass + ;; Overlapped offset length-16 + mov tmp, len ;Backup len as len=rdi + + XLDR x0, [src+tmp] ;Get next source vector + XLDR xd1, [dest4+tmp] ;Get next dest vector + XLDR xd2, [dest5+tmp] ;reuse xtmpl1. Get next dest vector + XLDR xd3, [dest6+tmp] ;reuse xtmph1. Get next dest vector + + sub len, pos + + vmovdqa xtmph3, [constip16] ;Load const of i + 16 + vpinsrb xtmpl3, len.w, 15 + vpshufb xtmpl3, xmask0f ;Broadcast len to all bytes + vpcmpgtb xtmpl3, xtmpl3, xtmph3 + + vpand xtmpa, x0, xmask0f ;Mask low src nibble in bits 4-0 + vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0 + vpand x0, x0, xmask0f ;Mask high src nibble in bits 4-0 + + ;dest4 + vpshufb xgft4_hi, xgft4_hi, x0 ;Lookup mul table of high nibble + vpshufb xgft4_lo, xgft4_lo, xtmpa ;Lookup mul table of low nibble + vpxor xgft4_hi, xgft4_hi, xgft4_lo ;GF add high and low partials + vpand xgft4_hi, xgft4_hi, xtmpl3 + vpxor xd1, xd1, xgft4_hi + + ;dest5 + vpshufb xgft5_hi, xgft5_hi, x0 ;Lookup mul table of high nibble + vpshufb xgft5_lo, xgft5_lo, xtmpa ;Lookup mul table of low nibble + vpxor xgft5_hi, xgft5_hi, xgft5_lo ;GF add high and low partials + vpand xgft5_hi, xgft5_hi, xtmpl3 + vpxor xd2, xd2, xgft5_hi + + ;dest6 + vpshufb xgft6_hi, xgft6_hi, x0 ;Lookup mul table of high nibble + vpshufb xgft6_lo, xgft6_lo, xtmpa ;Lookup mul table of low nibble + vpxor xgft6_hi, xgft6_hi, xgft6_lo ;GF add high and low partials + vpand xgft6_hi, xgft6_hi, xtmpl3 + vpxor xd3, xd3, xgft6_hi + + XSTR [dest4+tmp], xd1 ;Store result into dest4 + XSTR [dest5+tmp], xd2 ;Store result into dest5 + XSTR [dest6+tmp], xd3 ;Store result into dest6 + + vmovdqu xgft4_lo, [tmp3] ;Load array Ax{00}, Ax{01}, Ax{02}, ... + vmovdqu xgft4_hi, [tmp3+16] ; " Ax{00}, Ax{10}, Ax{20}, ... , Ax{f0} + vmovdqu xgft5_lo, [tmp3+vec] ;Load array Bx{00}, Bx{01}, Bx{02}, ... + vmovdqu xgft5_hi, [tmp3+vec+16] ; " Bx{00}, Bx{10}, Bx{20}, ... , Bx{f0} + vmovdqu xgft6_lo, [tmp3+2*vec] ;Load array Cx{00}, Cx{01}, Cx{02}, ... + vmovdqu xgft6_hi, [tmp3+2*vec+16] ; " Cx{00}, Cx{10}, Cx{20}, ... , Cx{f0} + XLDR xd1, [dest1+tmp] ;Get next dest vector + XLDR xd2, [dest2+tmp] ;reuse xtmpl1. Get next dest vector + XLDR xd3, [dest3+tmp] ;reuse xtmph1. Get next dest3 vector + + ;dest1 + vpshufb xgft4_hi, xgft4_hi, x0 ;Lookup mul table of high nibble + vpshufb xgft4_lo, xgft4_lo, xtmpa ;Lookup mul table of low nibble + vpxor xgft4_hi, xgft4_hi, xgft4_lo ;GF add high and low partials + vpand xgft4_hi, xgft4_hi, xtmpl3 + vpxor xd1, xd1, xgft4_hi + + ;dest2 + vpshufb xgft5_hi, xgft5_hi, x0 ;Lookup mul table of high nibble + vpshufb xgft5_lo, xgft5_lo, xtmpa ;Lookup mul table of low nibble + vpxor xgft5_hi, xgft5_hi, xgft5_lo ;GF add high and low partials + vpand xgft5_hi, xgft5_hi, xtmpl3 + vpxor xd2, xd2, xgft5_hi + + ;dest3 + vpshufb xgft6_hi, xgft6_hi, x0 ;Lookup mul table of high nibble + vpshufb xgft6_lo, xgft6_lo, xtmpa ;Lookup mul table of low nibble + vpxor xgft6_hi, xgft6_hi, xgft6_lo ;GF add high and low partials + vpand xgft6_hi, xgft6_hi, xtmpl3 + vpxor xd3, xd3, xgft6_hi + + XSTR [dest1+tmp], xd1 ;Store result into dest1 + XSTR [dest2+tmp], xd2 ;Store result into dest2 + XSTR [dest3+tmp], xd3 ;Store result into dest3 + +.return_pass: + FUNC_RESTORE + mov return, 0 + ret + +.return_fail: + FUNC_RESTORE + mov return, 1 + ret + +endproc_frame + +section .data + +align 16 +mask0f: dq 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f +constip16: + dq 0xf8f9fafbfcfdfeff, 0xf0f1f2f3f4f5f6f7 + +;;; func core, ver, snum +slversion gf_6vect_mad_avx, 02, 01, 0210 diff --git a/src/spdk/isa-l/erasure_code/gf_6vect_mad_avx2.asm b/src/spdk/isa-l/erasure_code/gf_6vect_mad_avx2.asm new file mode 100644 index 000000000..b344532fe --- /dev/null +++ b/src/spdk/isa-l/erasure_code/gf_6vect_mad_avx2.asm @@ -0,0 +1,400 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2015 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;;; +;;; gf_6vect_mad_avx2(len, vec, vec_i, mul_array, src, dest); +;;; + +%include "reg_sizes.asm" + +%define PS 8 + +%ifidn __OUTPUT_FORMAT__, win64 + %define arg0 rcx + %define arg0.w ecx + %define arg1 rdx + %define arg2 r8 + %define arg3 r9 + %define arg4 r12 + %define arg5 r15 + %define tmp r11 + %define tmp.w r11d + %define tmp.b r11b + %define tmp2 r10 + %define tmp3 r13 + %define return rax + %define return.w eax + %define stack_size 16*10 + 3*8 + %define arg(x) [rsp + stack_size + PS + PS*x] + %define func(x) proc_frame x + +%macro FUNC_SAVE 0 + sub rsp, stack_size + movdqa [rsp+16*0],xmm6 + movdqa [rsp+16*1],xmm7 + movdqa [rsp+16*2],xmm8 + movdqa [rsp+16*3],xmm9 + movdqa [rsp+16*4],xmm10 + movdqa [rsp+16*5],xmm11 + movdqa [rsp+16*6],xmm12 + movdqa [rsp+16*7],xmm13 + movdqa [rsp+16*8],xmm14 + movdqa [rsp+16*9],xmm15 + save_reg r12, 10*16 + 0*8 + save_reg r13, 10*16 + 1*8 + save_reg r15, 10*16 + 2*8 + end_prolog + mov arg4, arg(4) + mov arg5, arg(5) +%endmacro + +%macro FUNC_RESTORE 0 + movdqa xmm6, [rsp+16*0] + movdqa xmm7, [rsp+16*1] + movdqa xmm8, [rsp+16*2] + movdqa xmm9, [rsp+16*3] + movdqa xmm10, [rsp+16*4] + movdqa xmm11, [rsp+16*5] + movdqa xmm12, [rsp+16*6] + movdqa xmm13, [rsp+16*7] + movdqa xmm14, [rsp+16*8] + movdqa xmm15, [rsp+16*9] + mov r12, [rsp + 10*16 + 0*8] + mov r13, [rsp + 10*16 + 1*8] + mov r15, [rsp + 10*16 + 2*8] + add rsp, stack_size +%endmacro + +%elifidn __OUTPUT_FORMAT__, elf64 + %define arg0 rdi + %define arg0.w edi + %define arg1 rsi + %define arg2 rdx + %define arg3 rcx + %define arg4 r8 + %define arg5 r9 + %define tmp r11 + %define tmp.w r11d + %define tmp.b r11b + %define tmp2 r10 + %define tmp3 r12 + %define return rax + %define return.w eax + + %define func(x) x: + %macro FUNC_SAVE 0 + push r12 + %endmacro + %macro FUNC_RESTORE 0 + pop r12 + %endmacro +%endif + +;;; gf_6vect_mad_avx2(len, vec, vec_i, mul_array, src, dest) +%define len arg0 +%define len.w arg0.w +%define vec arg1 +%define vec_i arg2 +%define mul_array arg3 +%define src arg4 +%define dest1 arg5 +%define pos return +%define pos.w return.w + +%define dest2 tmp3 +%define dest3 tmp2 +%define dest4 mul_array +%define dest5 vec +%define dest6 vec_i + +%ifndef EC_ALIGNED_ADDR +;;; Use Un-aligned load/store + %define XLDR vmovdqu + %define XSTR vmovdqu +%else +;;; Use Non-temporal load/stor + %ifdef NO_NT_LDST + %define XLDR vmovdqa + %define XSTR vmovdqa + %else + %define XLDR vmovntdqa + %define XSTR vmovntdq + %endif +%endif + + +default rel + +[bits 64] +section .text + +%define xmask0f ymm15 +%define xmask0fx xmm15 +%define xgft1_lo ymm14 +%define xgft2_lo ymm13 +%define xgft3_lo ymm12 +%define xgft4_lo ymm11 +%define xgft5_lo ymm10 +%define xgft6_lo ymm9 + +%define x0 ymm0 +%define xtmpa ymm1 +%define xtmpl ymm2 +%define xtmplx xmm2 +%define xtmph ymm3 +%define xtmphx xmm3 +%define xd1 ymm4 +%define xd2 ymm5 +%define xd3 ymm6 +%define xd4 ymm7 +%define xd5 ymm8 +%define xd6 xd1 + +align 16 +global gf_6vect_mad_avx2:ISAL_SYM_TYPE_FUNCTION +func(gf_6vect_mad_avx2) + FUNC_SAVE + sub len, 32 + jl .return_fail + xor pos, pos + mov tmp.b, 0x0f + vpinsrb xmask0fx, xmask0fx, tmp.w, 0 + vpbroadcastb xmask0f, xmask0fx ;Construct mask 0x0f0f0f... + + sal vec_i, 5 ;Multiply by 32 + sal vec, 5 ;Multiply by 32 + lea tmp, [mul_array + vec_i] + mov vec_i, vec + mov mul_array, vec + sal vec_i, 1 + sal mul_array, 1 + add vec_i, vec ;vec_i=vec*96 + add mul_array, vec_i ;vec_i=vec*160 + + vmovdqu xgft1_lo, [tmp] ;Load array Ax{00}, Ax{01}, ..., Ax{0f} + ; " Ax{00}, Ax{10}, ..., Ax{f0} + vmovdqu xgft2_lo, [tmp+vec] ;Load array Bx{00}, Bx{01}, ..., Bx{0f} + ; " Bx{00}, Bx{10}, ..., Bx{f0} + vmovdqu xgft3_lo, [tmp+2*vec] ;Load array Cx{00}, Cx{01}, ..., Cx{0f} + ; " Cx{00}, Cx{10}, ..., Cx{f0} + vmovdqu xgft4_lo, [tmp+vec_i] ;Load array Fx{00}, Fx{01}, ..., Fx{0f} + ; " Fx{00}, Fx{10}, ..., Fx{f0} + vmovdqu xgft5_lo, [tmp+4*vec] ;Load array Ex{00}, Ex{01}, ..., Ex{0f} + ; " Ex{00}, Ex{10}, ..., Ex{f0} + vmovdqu xgft6_lo, [tmp+mul_array] ;Load array Dx{00}, Dx{01}, ..., Dx{0f} + ; " Dx{00}, Dx{10}, ..., Dx{f0} + + mov dest2, [dest1+PS] ; reuse tmp3 + mov dest3, [dest1+2*PS] ; reuse tmp2 + mov dest4, [dest1+3*PS] ; reuse mul_array + mov dest5, [dest1+4*PS] ; reuse vec + mov dest6, [dest1+5*PS] ; reuse vec_i + mov dest1, [dest1] + +.loop32: + XLDR x0, [src+pos] ;Get next source vector + XLDR xd1, [dest1+pos] ;Get next dest vector + XLDR xd2, [dest2+pos] ;Get next dest vector + XLDR xd3, [dest3+pos] ;Get next dest vector + XLDR xd4, [dest4+pos] ;Get next dest vector + XLDR xd5, [dest5+pos] ;Get next dest vector + + vpand xtmpl, x0, xmask0f ;Mask low src nibble in bits 4-0 + vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0 + vpand x0, x0, xmask0f ;Mask high src nibble in bits 4-0 + vperm2i128 xtmpa, xtmpl, x0, 0x30 ;swap xtmpa from 1lo|2lo to 1lo|2hi + vperm2i128 x0, xtmpl, x0, 0x12 ;swap x0 from 1hi|2hi to 1hi|2lo + + ;dest1 + vperm2i128 xtmph, xgft1_lo, xgft1_lo, 0x01 ; swapped to hi | lo + vpshufb xtmph, xtmph, x0 ;Lookup mul table of high nibble + vpshufb xtmpl, xgft1_lo, xtmpa ;Lookup mul table of low nibble + vpxor xtmph, xtmph, xtmpl ;GF add high and low partials + vpxor xd1, xd1, xtmph ;xd1 += partial + + XSTR [dest1+pos], xd1 ;Store result into dest1 + + ;dest2 + vperm2i128 xtmph, xgft2_lo, xgft2_lo, 0x01 ; swapped to hi | lo + vpshufb xtmph, xtmph, x0 ;Lookup mul table of high nibble + vpshufb xtmpl, xgft2_lo, xtmpa ;Lookup mul table of low nibble + vpxor xtmph, xtmph, xtmpl ;GF add high and low partials + vpxor xd2, xd2, xtmph ;xd2 += partial + + ;dest3 + vperm2i128 xtmph, xgft3_lo, xgft3_lo, 0x01 ; swapped to hi | lo + vpshufb xtmph, xtmph, x0 ;Lookup mul table of high nibble + vpshufb xtmpl, xgft3_lo, xtmpa ;Lookup mul table of low nibble + vpxor xtmph, xtmph, xtmpl ;GF add high and low partials + vpxor xd3, xd3, xtmph ;xd3 += partial + + XLDR xd6, [dest6+pos] ;reuse xd1. Get next dest vector + + ;dest4 + vperm2i128 xtmph, xgft4_lo, xgft4_lo, 0x01 ; swapped to hi | lo + vpshufb xtmph, xtmph, x0 ;Lookup mul table of high nibble + vpshufb xtmpl, xgft4_lo, xtmpa ;Lookup mul table of low nibble + vpxor xtmph, xtmph, xtmpl ;GF add high and low partials + vpxor xd4, xd4, xtmph ;xd4 += partial + + ;dest5 + vperm2i128 xtmph, xgft5_lo, xgft5_lo, 0x01 ; swapped to hi | lo + vpshufb xtmph, xtmph, x0 ;Lookup mul table of high nibble + vpshufb xtmpl, xgft5_lo, xtmpa ;Lookup mul table of low nibble + vpxor xtmph, xtmph, xtmpl ;GF add high and low partials + vpxor xd5, xd5, xtmph ;xd5 += partial + + ;dest6 + vperm2i128 xtmph, xgft6_lo, xgft6_lo, 0x01 ; swapped to hi | lo + vpshufb xtmph, xtmph, x0 ;Lookup mul table of high nibble + vpshufb xtmpl, xgft6_lo, xtmpa ;Lookup mul table of low nibble + vpxor xtmph, xtmph, xtmpl ;GF add high and low partials + vpxor xd6, xd6, xtmph ;xd6 += partial + + XSTR [dest2+pos], xd2 ;Store result into dest2 + XSTR [dest3+pos], xd3 ;Store result into dest3 + XSTR [dest4+pos], xd4 ;Store result into dest4 + XSTR [dest5+pos], xd5 ;Store result into dest5 + XSTR [dest6+pos], xd6 ;Store result into dest6 + + add pos, 32 ;Loop on 32 bytes at a time + cmp pos, len + jle .loop32 + + lea tmp, [len + 32] + cmp pos, tmp + je .return_pass + +.lessthan32: + ;; Tail len + ;; Do one more overlap pass + mov tmp.b, 0x1f + vpinsrb xtmphx, xtmphx, tmp.w, 0 + vpbroadcastb xtmph, xtmphx ;Construct mask 0x1f1f1f... + + mov tmp, len ;Overlapped offset length-32 + + XLDR x0, [src+tmp] ;Get next source vector + XLDR xd1, [dest1+tmp] ;Get next dest vector + XLDR xd2, [dest2+tmp] ;Get next dest vector + XLDR xd3, [dest3+tmp] ;Get next dest vector + XLDR xd4, [dest4+tmp] ;Get next dest vector + XLDR xd5, [dest5+tmp] ;Get next dest vector + + sub len, pos + + vpinsrb xtmplx, xtmplx, len.w, 15 + vinserti128 xtmpl, xtmpl, xtmplx, 1 ;swapped to xtmplx | xtmplx + vpshufb xtmpl, xtmpl, xtmph ;Broadcast len to all bytes. xtmph=0x1f1f1f... + vpcmpgtb xtmpl, xtmpl, [constip32] + + vpand xtmph, x0, xmask0f ;Mask low src nibble in bits 4-0 + vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0 + vpand x0, x0, xmask0f ;Mask high src nibble in bits 4-0 + vperm2i128 xtmpa, xtmph, x0, 0x30 ;swap xtmpa from 1lo|2lo to 1lo|2hi + vperm2i128 x0, xtmph, x0, 0x12 ;swap x0 from 1hi|2hi to 1hi|2lo + + ;dest1 + vperm2i128 xtmph, xgft1_lo, xgft1_lo, 0x01 ; swapped to hi | lo + vpshufb xtmph, xtmph, x0 ;Lookup mul table of high nibble + vpshufb xgft1_lo, xgft1_lo, xtmpa ;Lookup mul table of low nibble + vpxor xtmph, xtmph, xgft1_lo ;GF add high and low partials + vpand xtmph, xtmph, xtmpl + vpxor xd1, xd1, xtmph ;xd1 += partial + + XSTR [dest1+tmp], xd1 ;Store result into dest1 + + ;dest2 + vperm2i128 xtmph, xgft2_lo, xgft2_lo, 0x01 ; swapped to hi | lo + vpshufb xtmph, xtmph, x0 ;Lookup mul table of high nibble + vpshufb xgft2_lo, xgft2_lo, xtmpa ;Lookup mul table of low nibble + vpxor xtmph, xtmph, xgft2_lo ;GF add high and low partials + vpand xtmph, xtmph, xtmpl + vpxor xd2, xd2, xtmph ;xd2 += partial + + ;dest3 + vperm2i128 xtmph, xgft3_lo, xgft3_lo, 0x01 ; swapped to hi | lo + vpshufb xtmph, xtmph, x0 ;Lookup mul table of high nibble + vpshufb xgft3_lo, xgft3_lo, xtmpa ;Lookup mul table of low nibble + vpxor xtmph, xtmph, xgft3_lo ;GF add high and low partials + vpand xtmph, xtmph, xtmpl + vpxor xd3, xd3, xtmph ;xd3 += partial + + XLDR xd6, [dest6+tmp] ;reuse xd1. Get next dest vector + + ;dest4 + vperm2i128 xtmph, xgft4_lo, xgft4_lo, 0x01 ; swapped to hi | lo + vpshufb xtmph, xtmph, x0 ;Lookup mul table of high nibble + vpshufb xgft4_lo, xgft4_lo, xtmpa ;Lookup mul table of low nibble + vpxor xtmph, xtmph, xgft4_lo ;GF add high and low partials + vpand xtmph, xtmph, xtmpl + vpxor xd4, xd4, xtmph ;xd4 += partial + + ;dest5 + vperm2i128 xtmph, xgft5_lo, xgft5_lo, 0x01 ; swapped to hi | lo + vpshufb xtmph, xtmph, x0 ;Lookup mul table of high nibble + vpshufb xgft5_lo, xgft5_lo, xtmpa ;Lookup mul table of low nibble + vpxor xtmph, xtmph, xgft5_lo ;GF add high and low partials + vpand xtmph, xtmph, xtmpl + vpxor xd5, xd5, xtmph ;xd5 += partial + + ;dest6 + vperm2i128 xtmph, xgft6_lo, xgft6_lo, 0x01 ; swapped to hi | lo + vpshufb xtmph, xtmph, x0 ;Lookup mul table of high nibble + vpshufb xgft6_lo, xgft6_lo, xtmpa ;Lookup mul table of low nibble + vpxor xtmph, xtmph, xgft6_lo ;GF add high and low partials + vpand xtmph, xtmph, xtmpl + vpxor xd6, xd6, xtmph ;xd6 += partial + + XSTR [dest2+tmp], xd2 ;Store result into dest2 + XSTR [dest3+tmp], xd3 ;Store result into dest3 + XSTR [dest4+tmp], xd4 ;Store result into dest4 + XSTR [dest5+tmp], xd5 ;Store result into dest5 + XSTR [dest6+tmp], xd6 ;Store result into dest6 + +.return_pass: + FUNC_RESTORE + mov return, 0 + ret + +.return_fail: + FUNC_RESTORE + mov return, 1 + ret + +endproc_frame + +section .data +align 32 +constip32: + dq 0xf8f9fafbfcfdfeff, 0xf0f1f2f3f4f5f6f7 + dq 0xe8e9eaebecedeeef, 0xe0e1e2e3e4e5e6e7 + +;;; func core, ver, snum +slversion gf_6vect_mad_avx2, 04, 01, 0211 diff --git a/src/spdk/isa-l/erasure_code/gf_6vect_mad_sse.asm b/src/spdk/isa-l/erasure_code/gf_6vect_mad_sse.asm new file mode 100644 index 000000000..4fed2aad9 --- /dev/null +++ b/src/spdk/isa-l/erasure_code/gf_6vect_mad_sse.asm @@ -0,0 +1,406 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2015 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;;; +;;; gf_6vect_mad_sse(len, vec, vec_i, mul_array, src, dest); +;;; + +%include "reg_sizes.asm" + +%define PS 8 + +%ifidn __OUTPUT_FORMAT__, win64 + %define arg0 rcx + %define arg0.w ecx + %define arg1 rdx + %define arg2 r8 + %define arg3 r9 + %define arg4 r12 + %define arg5 r15 + %define tmp r11 + %define tmp.w r11d + %define tmp2 r10 + %define tmp3 r13 + %define tmp4 r14 + %define tmp5 rdi + %define return rax + %define return.w eax + %define stack_size 16*10 + 5*8 + %define arg(x) [rsp + stack_size + PS + PS*x] + %define func(x) proc_frame x + +%macro FUNC_SAVE 0 + sub rsp, stack_size + movdqa [rsp+16*0],xmm6 + movdqa [rsp+16*1],xmm7 + movdqa [rsp+16*2],xmm8 + movdqa [rsp+16*3],xmm9 + movdqa [rsp+16*4],xmm10 + movdqa [rsp+16*5],xmm11 + movdqa [rsp+16*6],xmm12 + movdqa [rsp+16*7],xmm13 + movdqa [rsp+16*8],xmm14 + movdqa [rsp+16*9],xmm15 + save_reg r12, 10*16 + 0*8 + save_reg r13, 10*16 + 1*8 + save_reg r14, 10*16 + 2*8 + save_reg r15, 10*16 + 3*8 + save_reg rdi, 10*16 + 4*8 + end_prolog + mov arg4, arg(4) + mov arg5, arg(5) +%endmacro + +%macro FUNC_RESTORE 0 + movdqa xmm6, [rsp+16*0] + movdqa xmm7, [rsp+16*1] + movdqa xmm8, [rsp+16*2] + movdqa xmm9, [rsp+16*3] + movdqa xmm10, [rsp+16*4] + movdqa xmm11, [rsp+16*5] + movdqa xmm12, [rsp+16*6] + movdqa xmm13, [rsp+16*7] + movdqa xmm14, [rsp+16*8] + movdqa xmm15, [rsp+16*9] + mov r12, [rsp + 10*16 + 0*8] + mov r13, [rsp + 10*16 + 1*8] + mov r14, [rsp + 10*16 + 2*8] + mov r15, [rsp + 10*16 + 3*8] + mov rdi, [rsp + 10*16 + 4*8] + add rsp, stack_size +%endmacro + +%elifidn __OUTPUT_FORMAT__, elf64 + %define arg0 rdi + %define arg0.w edi + %define arg1 rsi + %define arg2 rdx + %define arg3 rcx + %define arg4 r8 + %define arg5 r9 + %define tmp r11 + %define tmp.w r11d + %define tmp2 r10 + %define tmp3 r12 + %define tmp4 r13 + %define tmp5 r14 + %define return rax + %define return.w eax + + %define func(x) x: + %macro FUNC_SAVE 0 + push r12 + push r13 + push r14 + %endmacro + %macro FUNC_RESTORE 0 + pop r14 + pop r13 + pop r12 + %endmacro +%endif + +;;; gf_6vect_mad_sse(len, vec, vec_i, mul_array, src, dest) +%define len arg0 +%define len.w arg0.w +%define vec arg1 +%define vec_i arg2 +%define mul_array arg3 +%define src arg4 +%define dest1 arg5 +%define pos return +%define pos.w return.w + +%define dest2 mul_array +%define dest3 tmp2 +%define dest4 tmp4 +%define dest5 tmp5 +%define dest6 vec_i + +%ifndef EC_ALIGNED_ADDR +;;; Use Un-aligned load/store + %define XLDR movdqu + %define XSTR movdqu +%else +;;; Use Non-temporal load/stor + %ifdef NO_NT_LDST + %define XLDR movdqa + %define XSTR movdqa + %else + %define XLDR movntdqa + %define XSTR movntdq + %endif +%endif + +default rel + +[bits 64] +section .text + +%define xmask0f xmm15 +%define xgft4_lo xmm14 +%define xgft4_hi xmm13 +%define xgft5_lo xmm12 +%define xgft5_hi xmm11 +%define xgft6_lo xmm10 +%define xgft6_hi xmm9 + +%define x0 xmm0 +%define xtmpa xmm1 +%define xtmph1 xmm2 +%define xtmpl1 xmm3 +%define xtmph2 xmm4 +%define xtmpl2 xmm5 +%define xtmph3 xmm6 +%define xtmpl3 xmm7 +%define xd1 xmm8 +%define xd2 xtmpl1 +%define xd3 xtmph1 + + +align 16 +global gf_6vect_mad_sse:ISAL_SYM_TYPE_FUNCTION +func(gf_6vect_mad_sse) + FUNC_SAVE + sub len, 16 + jl .return_fail + + xor pos, pos + movdqa xmask0f, [mask0f] ;Load mask of lower nibble in each byte + + mov tmp, vec + sal vec_i, 5 ;Multiply by 32 + lea tmp3, [mul_array + vec_i] + sal tmp, 6 ;Multiply by 64 + + sal vec, 5 ;Multiply by 32 + lea vec_i, [tmp + vec] ;vec_i = 96 + lea mul_array, [tmp + vec_i] ;mul_array = 160 + + movdqu xgft5_lo, [tmp3+2*tmp] ;Load array Ex{00}, Ex{01}, ..., Ex{0f} + movdqu xgft5_hi, [tmp3+2*tmp+16] ; " Ex{00}, Ex{10}, ..., Ex{f0} + movdqu xgft4_lo, [tmp3+vec_i] ;Load array Dx{00}, Dx{01}, Dx{02}, ... + movdqu xgft4_hi, [tmp3+vec_i+16] ; " Dx{00}, Dx{10}, Dx{20}, ... , Dx{f0} + movdqu xgft6_lo, [tmp3+mul_array] ;Load array Fx{00}, Fx{01}, ..., Fx{0f} + movdqu xgft6_hi, [tmp3+mul_array+16] ; " Fx{00}, Fx{10}, ..., Fx{f0} + + mov dest2, [dest1+PS] + mov dest3, [dest1+2*PS] + mov dest4, [dest1+3*PS] ; reuse mul_array + mov dest5, [dest1+4*PS] + mov dest6, [dest1+5*PS] ; reuse vec_i + mov dest1, [dest1] + +.loop16: + XLDR x0, [src+pos] ;Get next source vector + + movdqu xtmpl1, [tmp3] ;Load array Ax{00}, Ax{01}, Ax{02}, ... + movdqu xtmph1, [tmp3+16] ; " Ax{00}, Ax{10}, Ax{20}, ... , Ax{f0} + movdqu xtmpl2, [tmp3+vec] ;Load array Bx{00}, Bx{01}, Bx{02}, ... + movdqu xtmph2, [tmp3+vec+16] ; " Bx{00}, Bx{10}, Bx{20}, ... , Bx{f0} + movdqu xtmpl3, [tmp3+2*vec] ;Load array Cx{00}, Cx{01}, Cx{02}, ... + movdqu xtmph3, [tmp3+2*vec+16] ; " Cx{00}, Cx{10}, Cx{20}, ... , Cx{f0} + XLDR xd1, [dest1+pos] ;Get next dest vector + + movdqa xtmpa, x0 ;Keep unshifted copy of src + psraw x0, 4 ;Shift to put high nibble into bits 4-0 + pand x0, xmask0f ;Mask high src nibble in bits 4-0 + pand xtmpa, xmask0f ;Mask low src nibble in bits 4-0 + + ;dest1 + pshufb xtmph1, x0 ;Lookup mul table of high nibble + pshufb xtmpl1, xtmpa ;Lookup mul table of low nibble + pxor xtmph1, xtmpl1 ;GF add high and low partials + pxor xd1, xtmph1 + + XLDR xd2, [dest2+pos] ;reuse xtmpl1. Get next dest vector + XLDR xd3, [dest3+pos] ;reuse xtmph1. Get next dest3 vector + + ;dest2 + pshufb xtmph2, x0 ;Lookup mul table of high nibble + pshufb xtmpl2, xtmpa ;Lookup mul table of low nibble + pxor xtmph2, xtmpl2 ;GF add high and low partials + pxor xd2, xtmph2 + + ;dest3 + pshufb xtmph3, x0 ;Lookup mul table of high nibble + pshufb xtmpl3, xtmpa ;Lookup mul table of low nibble + pxor xtmph3, xtmpl3 ;GF add high and low partials + pxor xd3, xtmph3 + + XSTR [dest1+pos], xd1 ;Store result into dest1 + XSTR [dest2+pos], xd2 ;Store result into dest2 + XSTR [dest3+pos], xd3 ;Store result into dest3 + + movdqa xtmph1, xgft4_hi ;Reload const array registers + movdqa xtmpl1, xgft4_lo ;Reload const array registers + movdqa xtmph2, xgft5_hi ;Reload const array registers + movdqa xtmpl2, xgft5_lo ;Reload const array registers + movdqa xtmph3, xgft6_hi ;Reload const array registers + movdqa xtmpl3, xgft6_lo ;Reload const array registers + + ;dest4 + XLDR xd1, [dest4+pos] ;Get next dest vector + pshufb xtmph1, x0 ;Lookup mul table of high nibble + pshufb xtmpl1, xtmpa ;Lookup mul table of low nibble + pxor xtmph1, xtmpl1 ;GF add high and low partials + pxor xd1, xtmph1 + + XLDR xd2, [dest5+pos] ;reuse xtmpl1. Get next dest vector + XLDR xd3, [dest6+pos] ;reuse xtmph1. Get next dest vector + + ;dest5 + pshufb xtmph2, x0 ;Lookup mul table of high nibble + pshufb xtmpl2, xtmpa ;Lookup mul table of low nibble + pxor xtmph2, xtmpl2 ;GF add high and low partials + pxor xd2, xtmph2 + + ;dest6 + pshufb xtmph3, x0 ;Lookup mul table of high nibble + pshufb xtmpl3, xtmpa ;Lookup mul table of low nibble + pxor xtmph3, xtmpl3 ;GF add high and low partials + pxor xd3, xtmph3 + + XSTR [dest4+pos], xd1 ;Store result into dest4 + XSTR [dest5+pos], xd2 ;Store result into dest5 + XSTR [dest6+pos], xd3 ;Store result into dest6 + + add pos, 16 ;Loop on 16 bytes at a time + cmp pos, len + jle .loop16 + + lea tmp, [len + 16] + cmp pos, tmp + je .return_pass + +.lessthan16: + ;; Tail len + ;; Do one more overlap pass + ;; Overlapped offset length-16 + mov tmp, len ;Backup len as len=rdi + + XLDR x0, [src+tmp] ;Get next source vector + XLDR xd1, [dest4+tmp] ;Get next dest vector + XLDR xd2, [dest5+tmp] ;reuse xtmpl1. Get next dest vector + XLDR xd3, [dest6+tmp] ;reuse xtmph1. Get next dest vector + + sub len, pos + + movdqa xtmph3, [constip16] ;Load const of i + 16 + pinsrb xtmpl3, len.w, 15 + pshufb xtmpl3, xmask0f ;Broadcast len to all bytes + pcmpgtb xtmpl3, xtmph3 + + movdqa xtmpa, x0 ;Keep unshifted copy of src + psraw x0, 4 ;Shift to put high nibble into bits 4-0 + pand x0, xmask0f ;Mask high src nibble in bits 4-0 + pand xtmpa, xmask0f ;Mask low src nibble in bits 4-0 + + ;dest4 + pshufb xgft4_hi, x0 ;Lookup mul table of high nibble + pshufb xgft4_lo, xtmpa ;Lookup mul table of low nibble + pxor xgft4_hi, xgft4_lo ;GF add high and low partials + pand xgft4_hi, xtmpl3 + pxor xd1, xgft4_hi + + ;dest5 + pshufb xgft5_hi, x0 ;Lookup mul table of high nibble + pshufb xgft5_lo, xtmpa ;Lookup mul table of low nibble + pxor xgft5_hi, xgft5_lo ;GF add high and low partials + pand xgft5_hi, xtmpl3 + pxor xd2, xgft5_hi + + ;dest6 + pshufb xgft6_hi, x0 ;Lookup mul table of high nibble + pshufb xgft6_lo, xtmpa ;Lookup mul table of low nibble + pxor xgft6_hi, xgft6_lo ;GF add high and low partials + pand xgft6_hi, xtmpl3 + pxor xd3, xgft6_hi + + XSTR [dest4+tmp], xd1 ;Store result into dest4 + XSTR [dest5+tmp], xd2 ;Store result into dest5 + XSTR [dest6+tmp], xd3 ;Store result into dest6 + + movdqu xgft4_lo, [tmp3] ;Load array Ax{00}, Ax{01}, Ax{02}, ... + movdqu xgft4_hi, [tmp3+16] ; " Ax{00}, Ax{10}, Ax{20}, ... , Ax{f0} + movdqu xgft5_lo, [tmp3+vec] ;Load array Bx{00}, Bx{01}, Bx{02}, ... + movdqu xgft5_hi, [tmp3+vec+16] ; " Bx{00}, Bx{10}, Bx{20}, ... , Bx{f0} + movdqu xgft6_lo, [tmp3+2*vec] ;Load array Cx{00}, Cx{01}, Cx{02}, ... + movdqu xgft6_hi, [tmp3+2*vec+16] ; " Cx{00}, Cx{10}, Cx{20}, ... , Cx{f0} + XLDR xd1, [dest1+tmp] ;Get next dest vector + XLDR xd2, [dest2+tmp] ;reuse xtmpl1. Get next dest vector + XLDR xd3, [dest3+tmp] ;reuse xtmph1. Get next dest3 vector + + ;dest1 + pshufb xgft4_hi, x0 ;Lookup mul table of high nibble + pshufb xgft4_lo, xtmpa ;Lookup mul table of low nibble + pxor xgft4_hi, xgft4_lo ;GF add high and low partials + pand xgft4_hi, xtmpl3 + pxor xd1, xgft4_hi + + ;dest2 + pshufb xgft5_hi, x0 ;Lookup mul table of high nibble + pshufb xgft5_lo, xtmpa ;Lookup mul table of low nibble + pxor xgft5_hi, xgft5_lo ;GF add high and low partials + pand xgft5_hi, xtmpl3 + pxor xd2, xgft5_hi + + ;dest3 + pshufb xgft6_hi, x0 ;Lookup mul table of high nibble + pshufb xgft6_lo, xtmpa ;Lookup mul table of low nibble + pxor xgft6_hi, xgft6_lo ;GF add high and low partials + pand xgft6_hi, xtmpl3 + pxor xd3, xgft6_hi + + XSTR [dest1+tmp], xd1 ;Store result into dest1 + XSTR [dest2+tmp], xd2 ;Store result into dest2 + XSTR [dest3+tmp], xd3 ;Store result into dest3 + +.return_pass: + FUNC_RESTORE + mov return, 0 + ret + +.return_fail: + FUNC_RESTORE + mov return, 1 + ret + +endproc_frame + +section .data + +align 16 + +mask0f: dq 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f +constip16: + dq 0xf8f9fafbfcfdfeff, 0xf0f1f2f3f4f5f6f7 + +;;; func core, ver, snum +slversion gf_6vect_mad_sse, 00, 01, 020f diff --git a/src/spdk/isa-l/erasure_code/gf_inverse_test.c b/src/spdk/isa-l/erasure_code/gf_inverse_test.c new file mode 100644 index 000000000..418d7c195 --- /dev/null +++ b/src/spdk/isa-l/erasure_code/gf_inverse_test.c @@ -0,0 +1,225 @@ +/********************************************************************** + Copyright(c) 2011-2015 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#include +#include +#include // for memset, memcmp +#include + +#include "erasure_code.h" + +#define TEST_LEN 8192 + +#ifndef TEST_SOURCES +# define TEST_SOURCES 128 +#endif +#ifndef RANDOMS +# define RANDOMS 200 +#endif + +#define KMAX TEST_SOURCES + +typedef unsigned char u8; + +void matrix_mult(u8 * a, u8 * b, u8 * c, int n) +{ + int i, j, k; + u8 d; + + for (i = 0; i < n; i++) { + for (j = 0; j < n; j++) { + d = 0; + for (k = 0; k < n; k++) { + d ^= gf_mul(a[n * i + k], b[n * k + j]); + } + c[i * n + j] = d; + } + } +} + +void print_matrix(u8 * a, int n) +{ + int i, j; + + for (i = 0; i < n; i++) { + for (j = 0; j < n; j++) { + printf(" %2x", a[i * n + j]); + } + printf("\n"); + } + printf("\n"); +} + +int is_ident(u8 * a, const int n) +{ + int i, j; + u8 c; + for (i = 0; i < n; i++) { + for (j = 0; j < n; j++) { + c = *a++; + if (i == j) + c--; + if (c != 0) + return -1; + } + } + return 0; +} + +int inv_test(u8 * in, u8 * inv, u8 * sav, int n) +{ + memcpy(sav, in, n * n); + + if (gf_invert_matrix(in, inv, n)) { + printf("Given singular matrix\n"); + print_matrix(sav, n); + return -1; + } + + matrix_mult(inv, sav, in, n); + + if (is_ident(in, n)) { + printf("fail\n"); + print_matrix(sav, n); + print_matrix(inv, n); + print_matrix(in, n); + return -1; + } + putchar('.'); + + return 0; +} + +int main(int argc, char *argv[]) +{ + int i, k, t; + u8 *test_mat, *save_mat, *invr_mat; + + u8 test1[] = { 1, 1, 6, + 1, 1, 1, + 7, 1, 9 + }; + + u8 test2[] = { 0, 1, 6, + 1, 0, 1, + 0, 1, 9 + }; + + u8 test3[] = { 0, 0, 1, + 1, 0, 0, + 0, 1, 1 + }; + + u8 test4[] = { 0, 1, 6, 7, + 1, 1, 0, 0, + 0, 1, 2, 3, + 3, 2, 2, 3 + }; // = row3+3*row2 + + printf("gf_inverse_test: max=%d ", KMAX); + + test_mat = malloc(KMAX * KMAX); + save_mat = malloc(KMAX * KMAX); + invr_mat = malloc(KMAX * KMAX); + + if (NULL == test_mat || NULL == save_mat || NULL == invr_mat) + return -1; + + // Test with lots of leading 1's + k = 3; + memcpy(test_mat, test1, k * k); + if (inv_test(test_mat, invr_mat, save_mat, k)) + return -1; + + // Test with leading zeros + k = 3; + memcpy(test_mat, test2, k * k); + if (inv_test(test_mat, invr_mat, save_mat, k)) + return -1; + + // Test 3 + k = 3; + memcpy(test_mat, test3, k * k); + if (inv_test(test_mat, invr_mat, save_mat, k)) + return -1; + + // Test 4 - try a singular matrix + k = 4; + memcpy(test_mat, test4, k * k); + if (!gf_invert_matrix(test_mat, invr_mat, k)) { + printf("Fail: didn't catch singular matrix\n"); + print_matrix(test4, 4); + return -1; + } + // Do random test of size KMAX + k = KMAX; + + for (i = 0; i < k * k; i++) + test_mat[i] = save_mat[i] = rand(); + + if (gf_invert_matrix(test_mat, invr_mat, k)) { + printf("rand picked a singular matrix, try again\n"); + return -1; + } + + matrix_mult(invr_mat, save_mat, test_mat, k); + + if (is_ident(test_mat, k)) { + printf("fail\n"); + print_matrix(save_mat, k); + print_matrix(invr_mat, k); + print_matrix(test_mat, k); + return -1; + } + // Do Randoms. Random size and coefficients + for (t = 0; t < RANDOMS; t++) { + k = rand() % KMAX; + + for (i = 0; i < k * k; i++) + test_mat[i] = save_mat[i] = rand(); + + if (gf_invert_matrix(test_mat, invr_mat, k)) + continue; + + matrix_mult(invr_mat, save_mat, test_mat, k); + + if (is_ident(test_mat, k)) { + printf("fail rand k=%d\n", k); + print_matrix(save_mat, k); + print_matrix(invr_mat, k); + print_matrix(test_mat, k); + return -1; + } + if (0 == (t % 8)) + putchar('.'); + } + + printf(" Pass\n"); + return 0; +} diff --git a/src/spdk/isa-l/erasure_code/gf_vect_dot_prod_1tbl.c b/src/spdk/isa-l/erasure_code/gf_vect_dot_prod_1tbl.c new file mode 100644 index 000000000..1d23eb67f --- /dev/null +++ b/src/spdk/isa-l/erasure_code/gf_vect_dot_prod_1tbl.c @@ -0,0 +1,152 @@ +/********************************************************************** + Copyright(c) 2011-2015 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#include +#include +#include // for memset, memcmp +#include "test.h" +#include "erasure_code.h" + +//#define CACHED_TEST +#ifdef CACHED_TEST +// Cached test, loop many times over small dataset +# define TEST_SOURCES 10 +# define TEST_LEN 8*1024 +# define TEST_TYPE_STR "_warm" +#else +# ifndef TEST_CUSTOM +// Uncached test. Pull from large mem base. +# define TEST_SOURCES 10 +# define GT_L3_CACHE 32*1024*1024 /* some number > last level cache */ +# define TEST_LEN GT_L3_CACHE / TEST_SOURCES +# define TEST_TYPE_STR "_cold" +# else +# define TEST_TYPE_STR "_cus" +# endif +#endif + +typedef unsigned char u8; + +// Global GF(256) tables +u8 gff[256]; +u8 gflog[256]; +u8 gf_mul_table[256 * 256]; + +void mk_gf_field(void) +{ + int i; + u8 s = 1; + gflog[0] = 0; + + for (i = 0; i < 256; i++) { + gff[i] = s; + gflog[s] = i; + s = (s << 1) ^ ((s & 0x80) ? 0x1d : 0); // mult by GF{2} + } +} + +void mk_gf_mul_table(u8 * table) +{ + // Populate a single table with all multiply combinations for a fast, + // single-table lookup of GF(2^8) multiply at the expense of memory. + int i, j; + for (i = 0; i < 256; i++) + for (j = 0; j < 256; j++) + table[i * 256 + j] = gf_mul(i, j); +} + +void gf_vect_dot_prod_ref(int len, int vlen, u8 * v, u8 ** src, u8 * dest) +{ + int i, j; + u8 s; + for (i = 0; i < len; i++) { + s = 0; + for (j = 0; j < vlen; j++) + s ^= gf_mul(src[j][i], v[j]); + + dest[i] = s; + } +} + +void gf_vect_dot_prod_mult(int len, int vlen, u8 * v, u8 ** src, u8 * dest) +{ + int i, j; + u8 s; + for (i = 0; i < len; i++) { + s = 0; + for (j = 0; j < vlen; j++) { + s ^= gf_mul_table[v[j] * 256 + src[j][i]]; + } + dest[i] = s; + } + +} + +int main(void) +{ + int i, j; + u8 vec[TEST_SOURCES], dest1[TEST_LEN], dest2[TEST_LEN]; + u8 *matrix[TEST_SOURCES]; + struct perf start; + + mk_gf_field(); + mk_gf_mul_table(gf_mul_table); + + //generate random vector and matrix/data + for (i = 0; i < TEST_SOURCES; i++) { + vec[i] = rand(); + + if (!(matrix[i] = malloc(TEST_LEN))) { + fprintf(stderr, "Error failure\n\n"); + return -1; + } + for (j = 0; j < TEST_LEN; j++) + matrix[i][j] = rand(); + + } + + BENCHMARK(&start, BENCHMARK_TIME, + gf_vect_dot_prod_ref(TEST_LEN, TEST_SOURCES, vec, matrix, dest1)); + printf("gf_vect_dot_prod_2tbl" TEST_TYPE_STR ": "); + perf_print(start, (long long)TEST_LEN * (TEST_SOURCES + 1)); + + BENCHMARK(&start, BENCHMARK_TIME, + gf_vect_dot_prod_mult(TEST_LEN, TEST_SOURCES, vec, matrix, dest2)); + printf("gf_vect_dot_prod_1tbl" TEST_TYPE_STR ": "); + perf_print(start, (long long)TEST_LEN * (TEST_SOURCES + 1)); + + // Compare with reference function + if (0 != memcmp(dest1, dest2, TEST_LEN)) { + printf("Error, different results!\n\n"); + return -1; + } + + printf("Pass functional test\n"); + return 0; +} diff --git a/src/spdk/isa-l/erasure_code/gf_vect_dot_prod_avx.asm b/src/spdk/isa-l/erasure_code/gf_vect_dot_prod_avx.asm new file mode 100644 index 000000000..c123a3d90 --- /dev/null +++ b/src/spdk/isa-l/erasure_code/gf_vect_dot_prod_avx.asm @@ -0,0 +1,271 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2015 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;;; +;;; gf_vect_dot_prod_avx(len, vec, *g_tbls, **buffs, *dest); +;;; + +%include "reg_sizes.asm" + +%ifidn __OUTPUT_FORMAT__, elf64 + %define arg0 rdi + %define arg1 rsi + %define arg2 rdx + %define arg3 rcx + %define arg4 r8 + + %define tmp r11 + %define tmp2 r10 + %define tmp3 r9 + %define return rax + %macro SLDR 2 + %endmacro + %define SSTR SLDR + %define PS 8 + %define func(x) x: + %define FUNC_SAVE + %define FUNC_RESTORE +%endif + +%ifidn __OUTPUT_FORMAT__, win64 + %define arg0 rcx + %define arg1 rdx + %define arg2 r8 + %define arg3 r9 + + %define arg4 r12 ; must be saved and loaded + %define tmp r11 + %define tmp2 r10 + %define tmp3 rdi ; must be saved and loaded + %define return rax + %macro SLDR 2 + %endmacro + %define SSTR SLDR + %define PS 8 + %define frame_size 2*8 + %define arg(x) [rsp + frame_size + PS + PS*x] + + %define func(x) proc_frame x + %macro FUNC_SAVE 0 + rex_push_reg r12 + push_reg rdi + end_prolog + mov arg4, arg(4) + %endmacro + + %macro FUNC_RESTORE 0 + pop rdi + pop r12 + %endmacro +%endif + +%ifidn __OUTPUT_FORMAT__, elf32 + +;;;================== High Address; +;;; arg4 +;;; arg3 +;;; arg2 +;;; arg1 +;;; arg0 +;;; return +;;;<================= esp of caller +;;; ebp +;;;<================= ebp = esp +;;; esi +;;; edi +;;; ebx +;;;<================= esp of callee +;;; +;;;================== Low Address; + + %define PS 4 + %define LOG_PS 2 + %define func(x) x: + %define arg(x) [ebp + PS*2 + PS*x] + + %define trans ecx ;trans is for the variables in stack + %define arg0 trans + %define arg0_m arg(0) + %define arg1 trans + %define arg1_m arg(1) + %define arg2 arg2_m + %define arg2_m arg(2) + %define arg3 ebx + %define arg4 trans + %define arg4_m arg(4) + %define tmp edx + %define tmp2 edi + %define tmp3 esi + %define return eax + %macro SLDR 2 ;; stack load/restore + mov %1, %2 + %endmacro + %define SSTR SLDR + + %macro FUNC_SAVE 0 + push ebp + mov ebp, esp + push esi + push edi + push ebx + mov arg3, arg(3) + %endmacro + + %macro FUNC_RESTORE 0 + pop ebx + pop edi + pop esi + mov esp, ebp + pop ebp + %endmacro + +%endif ; output formats + +%define len arg0 +%define vec arg1 +%define mul_array arg2 +%define src arg3 +%define dest arg4 + +%define vec_i tmp2 +%define ptr tmp3 +%define pos return + + %ifidn PS,4 ;32-bit code + %define vec_m arg1_m + %define len_m arg0_m + %define dest_m arg4_m + %endif + +%ifndef EC_ALIGNED_ADDR +;;; Use Un-aligned load/store + %define XLDR vmovdqu + %define XSTR vmovdqu +%else +;;; Use Non-temporal load/stor + %ifdef NO_NT_LDST + %define XLDR vmovdqa + %define XSTR vmovdqa + %else + %define XLDR vmovntdqa + %define XSTR vmovntdq + %endif +%endif + +%ifidn PS,8 ; 64-bit code + default rel + [bits 64] +%endif + +section .text + +%define xmask0f xmm5 +%define xgft_lo xmm4 +%define xgft_hi xmm3 + +%define x0 xmm0 +%define xtmpa xmm1 +%define xp xmm2 + +align 16 +global gf_vect_dot_prod_avx:ISAL_SYM_TYPE_FUNCTION +func(gf_vect_dot_prod_avx) + FUNC_SAVE + SLDR len, len_m + sub len, 16 + SSTR len_m, len + jl .return_fail + xor pos, pos + vmovdqa xmask0f, [mask0f] ;Load mask of lower nibble in each byte + +.loop16: + vpxor xp, xp + mov tmp, mul_array + xor vec_i, vec_i + +.next_vect: + + mov ptr, [src+vec_i*PS] + vmovdqu xgft_lo, [tmp] ;Load array Cx{00}, Cx{01}, ..., Cx{0f} + vmovdqu xgft_hi, [tmp+16] ; " Cx{00}, Cx{10}, ..., Cx{f0} + XLDR x0, [ptr+pos] ;Get next source vector + + add tmp, 32 + add vec_i, 1 + + vpand xtmpa, x0, xmask0f ;Mask low src nibble in bits 4-0 + vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0 + vpand x0, x0, xmask0f ;Mask high src nibble in bits 4-0 + + vpshufb xgft_hi, xgft_hi, x0 ;Lookup mul table of high nibble + vpshufb xgft_lo, xgft_lo, xtmpa ;Lookup mul table of low nibble + vpxor xgft_hi, xgft_hi, xgft_lo ;GF add high and low partials + vpxor xp, xp, xgft_hi ;xp += partial + + SLDR vec, vec_m + cmp vec_i, vec + jl .next_vect + + SLDR dest, dest_m + XSTR [dest+pos], xp + + add pos, 16 ;Loop on 16 bytes at a time + SLDR len, len_m + cmp pos, len + jle .loop16 + + lea tmp, [len + 16] + cmp pos, tmp + je .return_pass + + ;; Tail len + mov pos, len ;Overlapped offset length-16 + jmp .loop16 ;Do one more overlap pass + +.return_pass: + mov return, 0 + FUNC_RESTORE + ret + +.return_fail: + mov return, 1 + FUNC_RESTORE + ret + +endproc_frame + +section .data + +align 16 + +mask0f: +dq 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f + +;;; func core, ver, snum +slversion gf_vect_dot_prod_avx, 02, 05, 0061 diff --git a/src/spdk/isa-l/erasure_code/gf_vect_dot_prod_avx2.asm b/src/spdk/isa-l/erasure_code/gf_vect_dot_prod_avx2.asm new file mode 100644 index 000000000..f84dd4756 --- /dev/null +++ b/src/spdk/isa-l/erasure_code/gf_vect_dot_prod_avx2.asm @@ -0,0 +1,280 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2015 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;;; +;;; gf_vect_dot_prod_avx2(len, vec, *g_tbls, **buffs, *dest); +;;; + +%include "reg_sizes.asm" + +%ifidn __OUTPUT_FORMAT__, elf64 + %define arg0 rdi + %define arg1 rsi + %define arg2 rdx + %define arg3 rcx + %define arg4 r8 + %define arg5 r9 + + %define tmp r11 + %define tmp.w r11d + %define tmp.b r11b + %define tmp2 r10 + %define tmp3 r9 + %define return rax + %macro SLDR 2 + %endmacro + %define SSTR SLDR + %define PS 8 + %define func(x) x: + %define FUNC_SAVE + %define FUNC_RESTORE +%endif + +%ifidn __OUTPUT_FORMAT__, win64 + %define arg0 rcx + %define arg1 rdx + %define arg2 r8 + %define arg3 r9 + + %define arg4 r12 ; must be saved and loaded + %define tmp r11 + %define tmp.w r11d + %define tmp.b r11b + %define tmp2 r10 + %define tmp3 rdi ; must be saved and loaded + %define return rax + %macro SLDR 2 + %endmacro + %define SSTR SLDR + %define PS 8 + %define frame_size 2*8 + %define arg(x) [rsp + frame_size + PS + PS*x] + + %define func(x) proc_frame x + %macro FUNC_SAVE 0 + rex_push_reg r12 + push_reg rdi + end_prolog + mov arg4, arg(4) + %endmacro + + %macro FUNC_RESTORE 0 + pop rdi + pop r12 + %endmacro +%endif + +%ifidn __OUTPUT_FORMAT__, elf32 + +;;;================== High Address; +;;; arg4 +;;; arg3 +;;; arg2 +;;; arg1 +;;; arg0 +;;; return +;;;<================= esp of caller +;;; ebp +;;;<================= ebp = esp +;;; esi +;;; edi +;;; ebx +;;;<================= esp of callee +;;; +;;;================== Low Address; + + %define PS 4 + %define LOG_PS 2 + %define func(x) x: + %define arg(x) [ebp + PS*2 + PS*x] + + %define trans ecx ;trans is for the variables in stack + %define arg0 trans + %define arg0_m arg(0) + %define arg1 trans + %define arg1_m arg(1) + %define arg2 arg2_m + %define arg2_m arg(2) + %define arg3 ebx + %define arg4 trans + %define arg4_m arg(4) + %define tmp edx + %define tmp.w edx + %define tmp.b dl + %define tmp2 edi + %define tmp3 esi + %define return eax + %macro SLDR 2 ;stack load/restore + mov %1, %2 + %endmacro + %define SSTR SLDR + + %macro FUNC_SAVE 0 + push ebp + mov ebp, esp + push esi + push edi + push ebx + mov arg3, arg(3) + %endmacro + + %macro FUNC_RESTORE 0 + pop ebx + pop edi + pop esi + mov esp, ebp + pop ebp + %endmacro + +%endif ; output formats + +%define len arg0 +%define vec arg1 +%define mul_array arg2 +%define src arg3 +%define dest arg4 + +%define vec_i tmp2 +%define ptr tmp3 +%define pos return + +%ifidn PS,4 ;32-bit code + %define vec_m arg1_m + %define len_m arg0_m + %define dest_m arg4_m +%endif + +%ifndef EC_ALIGNED_ADDR +;;; Use Un-aligned load/store + %define XLDR vmovdqu + %define XSTR vmovdqu +%else +;;; Use Non-temporal load/stor + %ifdef NO_NT_LDST + %define XLDR vmovdqa + %define XSTR vmovdqa + %else + %define XLDR vmovntdqa + %define XSTR vmovntdq + %endif +%endif + +%ifidn PS,8 ;64-bit code + default rel + [bits 64] +%endif + +section .text + +%define xmask0f ymm3 +%define xmask0fx xmm3 +%define xgft_lo ymm4 +%define xgft_hi ymm5 + +%define x0 ymm0 +%define xtmpa ymm1 +%define xp ymm2 + +align 16 +global gf_vect_dot_prod_avx2:ISAL_SYM_TYPE_FUNCTION +func(gf_vect_dot_prod_avx2) + FUNC_SAVE + SLDR len, len_m + sub len, 32 + SSTR len_m, len + jl .return_fail + xor pos, pos + mov tmp.b, 0x0f + vpinsrb xmask0fx, xmask0fx, tmp.w, 0 + vpbroadcastb xmask0f, xmask0fx ;Construct mask 0x0f0f0f... + +.loop32: + vpxor xp, xp + mov tmp, mul_array + xor vec_i, vec_i + +.next_vect: + + mov ptr, [src+vec_i*PS] + + vmovdqu xgft_lo, [tmp] ;Load array Cx{00}, Cx{01}, Cx{02}, ... + ; " Cx{00}, Cx{10}, Cx{20}, ... , Cx{f0} + vperm2i128 xgft_hi, xgft_lo, xgft_lo, 0x11 ; swapped to hi | hi + vperm2i128 xgft_lo, xgft_lo, xgft_lo, 0x00 ; swapped to lo | lo + + XLDR x0, [ptr+pos] ;Get next source vector + + add tmp, 32 + add vec_i, 1 + + vpand xtmpa, x0, xmask0f ;Mask low src nibble in bits 4-0 + vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0 + vpand x0, x0, xmask0f ;Mask high src nibble in bits 4-0 + + vpshufb xgft_hi, xgft_hi, x0 ;Lookup mul table of high nibble + vpshufb xgft_lo, xgft_lo, xtmpa ;Lookup mul table of low nibble + vpxor xgft_hi, xgft_hi, xgft_lo ;GF add high and low partials + vpxor xp, xp, xgft_hi ;xp += partial + + SLDR vec, vec_m + cmp vec_i, vec + jl .next_vect + + SLDR dest, dest_m + XSTR [dest+pos], xp + + add pos, 32 ;Loop on 32 bytes at a time + SLDR len, len_m + cmp pos, len + jle .loop32 + + lea tmp, [len + 32] + cmp pos, tmp + je .return_pass + + ;; Tail len + mov pos, len ;Overlapped offset length-32 + jmp .loop32 ;Do one more overlap pass + +.return_pass: + mov return, 0 + FUNC_RESTORE + ret + +.return_fail: + mov return, 1 + FUNC_RESTORE + ret + +endproc_frame + +section .data + +;;; func core, ver, snum +slversion gf_vect_dot_prod_avx2, 04, 05, 0190 diff --git a/src/spdk/isa-l/erasure_code/gf_vect_dot_prod_avx512.asm b/src/spdk/isa-l/erasure_code/gf_vect_dot_prod_avx512.asm new file mode 100644 index 000000000..ad01fcf89 --- /dev/null +++ b/src/spdk/isa-l/erasure_code/gf_vect_dot_prod_avx512.asm @@ -0,0 +1,240 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2015 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;;; +;;; gf_vect_dot_prod_avx512(len, vec, *g_tbls, **buffs, *dest); +;;; + +%include "reg_sizes.asm" + +%ifdef HAVE_AS_KNOWS_AVX512 + +%ifidn __OUTPUT_FORMAT__, elf64 + %define arg0 rdi + %define arg1 rsi + %define arg2 rdx + %define arg3 rcx + %define arg4 r8 + %define arg5 r9 + + %define tmp r11 + %define tmp2 r10 + %define return rax + %define PS 8 + %define LOG_PS 3 + + %define func(x) x: + %define FUNC_SAVE + %define FUNC_RESTORE +%endif + +%ifidn __OUTPUT_FORMAT__, win64 + %define arg0 rcx + %define arg1 rdx + %define arg2 r8 + %define arg3 r9 + + %define arg4 r12 ; must be saved, loaded and restored + %define arg5 r15 ; must be saved and restored + %define tmp r11 + %define tmp2 r10 + %define return rax + %define PS 8 + %define LOG_PS 3 + %define stack_size 0*16 + 3*8 ; must be an odd multiple of 8 + %define arg(x) [rsp + stack_size + PS + PS*x] + + %define func(x) proc_frame x + %macro FUNC_SAVE 0 + alloc_stack stack_size + save_reg r12, 9*16 + 0*8 + save_reg r15, 9*16 + 3*8 + end_prolog + mov arg4, arg(4) + %endmacro + + %macro FUNC_RESTORE 0 + mov r12, [rsp + 9*16 + 0*8] + mov r15, [rsp + 9*16 + 3*8] + add rsp, stack_size + %endmacro +%endif + + +%define len arg0 +%define vec arg1 +%define mul_array arg2 +%define src arg3 +%define dest1 arg4 +%define ptr arg5 +%define vec_i tmp2 +%define pos return + + +%ifndef EC_ALIGNED_ADDR +;;; Use Un-aligned load/store + %define XLDR vmovdqu8 + %define XSTR vmovdqu8 +%else +;;; Use Non-temporal load/stor + %ifdef NO_NT_LDST + %define XLDR vmovdqa + %define XSTR vmovdqa + %else + %define XLDR vmovntdqa + %define XSTR vmovntdq + %endif +%endif + +%define xmask0f zmm5 +%define xgft1_lo zmm4 +%define xgft1_loy ymm4 +%define xgft1_hi zmm3 +%define x0 zmm0 +%define xgft1_loy ymm4 +%define x0y ymm0 +%define xtmpa zmm1 +%define xp1 zmm2 +%define xp1y ymm2 + +default rel +[bits 64] +section .text + +align 16 +global gf_vect_dot_prod_avx512:ISAL_SYM_TYPE_FUNCTION +func(gf_vect_dot_prod_avx512) + FUNC_SAVE + xor pos, pos + mov tmp, 0x0f + vpbroadcastb xmask0f, tmp ;Construct mask 0x0f0f0f... + sal vec, LOG_PS ;vec *= PS. Make vec_i count by PS + sub len, 64 + jl .len_lt_64 + +.loop64: + vpxorq xp1, xp1, xp1 + mov tmp, mul_array + xor vec_i, vec_i + +.next_vect: + mov ptr, [src+vec_i] + XLDR x0, [ptr+pos] ;Get next source vector + add vec_i, PS + + vpandq xtmpa, x0, xmask0f ;Mask low src nibble in bits 4-0 + vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0 + vpandq x0, x0, xmask0f ;Mask high src nibble in bits 4-0 + + vmovdqu8 xgft1_loy, [tmp] ;Load array Ax{00}..{0f}, Ax{00}..{f0} + add tmp, 32 + + vshufi64x2 xgft1_hi, xgft1_lo, xgft1_lo, 0x55 + vshufi64x2 xgft1_lo, xgft1_lo, xgft1_lo, 0x00 + + vpshufb xgft1_hi, xgft1_hi, x0 ;Lookup mul table of high nibble + vpshufb xgft1_lo, xgft1_lo, xtmpa ;Lookup mul table of low nibble + vpxorq xgft1_hi, xgft1_hi, xgft1_lo ;GF add high and low partials + vpxorq xp1, xp1, xgft1_hi ;xp1 += partial + + cmp vec_i, vec + jl .next_vect + + XSTR [dest1+pos], xp1 + + add pos, 64 ;Loop on 64 bytes at a time + cmp pos, len + jle .loop64 + + lea tmp, [len + 64] + cmp pos, tmp + je .return_pass + + ;; Tail len + mov pos, len ;Overlapped offset length-64 + jmp .loop64 ;Do one more overlap pass + + +.len_lt_64: ; 32-byte version + add len, 32 + jl .return_fail + +.loop32: + vpxorq xp1, xp1, xp1 + mov tmp, mul_array + xor vec_i, vec_i + +.next_vect2: + mov ptr, [src+vec_i] + XLDR x0y, [ptr+pos] ;Get next source vector 32B + add vec_i, PS + vpsraw xtmpa, x0, 4 ;Shift to put high nibble into bits 4-0 + vshufi64x2 x0, x0, xtmpa, 0x44 ;put x0 = xl:xh + vpandq x0, x0, xmask0f ;Mask bits 4-0 + vmovdqu8 xgft1_loy, [tmp] ;Load array Ax{00}..{0f}, Ax{00}..{f0} + add tmp, 32 + vshufi64x2 xgft1_lo, xgft1_lo, xgft1_lo, 0x50 ;=AlAh:AlAh + vpshufb xgft1_lo, xgft1_lo, x0 ;Lookup mul table + vshufi64x2 xgft1_hi, xgft1_lo, xgft1_lo, 0x0e ;=xh: + vpxorq xgft1_hi, xgft1_hi, xgft1_lo ;GF add high and low partials + vpxorq xp1, xp1, xgft1_hi ;xp1 += partial + cmp vec_i, vec + jl .next_vect2 + + XSTR [dest1+pos], xp1y + add pos, 32 ;Loop on 32 bytes at a time + cmp pos, len + jle .loop32 + + lea tmp, [len + 32] + cmp pos, tmp + je .return_pass + + ;; Tail len + mov pos, len ;Overlapped offset length-32 + jmp .loop32 ;Do one more overlap pass + +.return_pass: + mov return, 0 + FUNC_RESTORE + ret + +.return_fail: + mov return, 1 + FUNC_RESTORE + ret + +endproc_frame + +%else +%ifidn __OUTPUT_FORMAT__, win64 +global no_gf_vect_dot_prod_avx512 +no_gf_vect_dot_prod_avx512: +%endif +%endif ; ifdef HAVE_AS_KNOWS_AVX512 diff --git a/src/spdk/isa-l/erasure_code/gf_vect_dot_prod_base_test.c b/src/spdk/isa-l/erasure_code/gf_vect_dot_prod_base_test.c new file mode 100644 index 000000000..2b4dfbbe0 --- /dev/null +++ b/src/spdk/isa-l/erasure_code/gf_vect_dot_prod_base_test.c @@ -0,0 +1,290 @@ +/********************************************************************** + Copyright(c) 2011-2015 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#include +#include +#include // for memset, memcmp +#include "erasure_code.h" +#include "types.h" + +#define TEST_LEN 8192 +#define TEST_SIZE (TEST_LEN/2) + +#ifndef TEST_SOURCES +# define TEST_SOURCES 250 +#endif +#ifndef RANDOMS +# define RANDOMS 20 +#endif + +#define MMAX TEST_SOURCES +#define KMAX TEST_SOURCES + +typedef unsigned char u8; + +void dump(unsigned char *buf, int len) +{ + int i; + for (i = 0; i < len;) { + printf(" %2x", 0xff & buf[i++]); + if (i % 32 == 0) + printf("\n"); + } + printf("\n"); +} + +void dump_matrix(unsigned char **s, int k, int m) +{ + int i, j; + for (i = 0; i < k; i++) { + for (j = 0; j < m; j++) { + printf(" %2x", s[i][j]); + } + printf("\n"); + } + printf("\n"); +} + +void dump_u8xu8(unsigned char *s, int k, int m) +{ + int i, j; + for (i = 0; i < k; i++) { + for (j = 0; j < m; j++) { + printf(" %2x", 0xff & s[j + (i * m)]); + } + printf("\n"); + } + printf("\n"); +} + +int main(int argc, char *argv[]) +{ + int i, j, rtest, m, k, nerrs, r, err; + void *buf; + u8 g[TEST_SOURCES], g_tbls[TEST_SOURCES * 32], src_in_err[TEST_SOURCES]; + u8 *dest, *dest_ref, *temp_buff, *buffs[TEST_SOURCES]; + u8 a[MMAX * KMAX], b[MMAX * KMAX], d[MMAX * KMAX]; + u8 src_err_list[TEST_SOURCES], *recov[TEST_SOURCES]; + + printf("gf_vect_dot_prod_base: %dx%d ", TEST_SOURCES, TEST_LEN); + + // Allocate the arrays + for (i = 0; i < TEST_SOURCES; i++) { + if (posix_memalign(&buf, 64, TEST_LEN)) { + printf("alloc error: Fail"); + return -1; + } + buffs[i] = buf; + } + + if (posix_memalign(&buf, 64, TEST_LEN)) { + printf("alloc error: Fail"); + return -1; + } + dest = buf; + + if (posix_memalign(&buf, 64, TEST_LEN)) { + printf("alloc error: Fail"); + return -1; + } + dest_ref = buf; + + if (posix_memalign(&buf, 64, TEST_LEN)) { + printf("alloc error: Fail"); + return -1; + } + temp_buff = buf; + + // Init + for (i = 0; i < TEST_SOURCES; i++) + memset(buffs[i], 0, TEST_LEN); + + memset(dest, 0, TEST_LEN); + memset(temp_buff, 0, TEST_LEN); + memset(dest_ref, 0, TEST_LEN); + memset(g, 0, TEST_SOURCES); + + // Test erasure code using gf_vect_dot_prod + // Pick a first test + m = 9; + k = 5; + if (m > MMAX || k > KMAX) + return -1; + + gf_gen_cauchy1_matrix(a, m, k); + + // Make random data + for (i = 0; i < k; i++) + for (j = 0; j < TEST_LEN; j++) + buffs[i][j] = rand(); + + // Make parity vects + for (i = k; i < m; i++) { + for (j = 0; j < k; j++) + gf_vect_mul_init(a[k * i + j], &g_tbls[j * 32]); + + gf_vect_dot_prod_base(TEST_LEN, k, g_tbls, buffs, buffs[i]); + } + + // Random buffers in erasure + memset(src_in_err, 0, TEST_SOURCES); + for (i = 0, nerrs = 0; i < k && nerrs < m - k; i++) { + err = 1 & rand(); + src_in_err[i] = err; + if (err) + src_err_list[nerrs++] = i; + } + + // construct b by removing error rows + for (i = 0, r = 0; i < k; i++, r++) { + while (src_in_err[r]) { + r++; + continue; + } + for (j = 0; j < k; j++) + b[k * i + j] = a[k * r + j]; + } + + if (gf_invert_matrix((u8 *) b, (u8 *) d, k) < 0) + printf("BAD MATRIX\n"); + + for (i = 0, r = 0; i < k; i++, r++) { + while (src_in_err[r]) { + r++; + continue; + } + recov[i] = buffs[r]; + } + + // Recover data + for (i = 0; i < nerrs; i++) { + for (j = 0; j < k; j++) + gf_vect_mul_init(d[k * src_err_list[i] + j], &g_tbls[j * 32]); + + gf_vect_dot_prod_base(TEST_LEN, k, g_tbls, recov, temp_buff); + + if (0 != memcmp(temp_buff, buffs[src_err_list[i]], TEST_LEN)) { + printf("Fail error recovery (%d, %d, %d)\n", m, k, nerrs); + printf("recov %d:", src_err_list[i]); + dump(temp_buff, 25); + printf("orig :"); + dump(buffs[src_err_list[i]], 25); + return -1; + } + } + + // Do more random tests + + for (rtest = 0; rtest < RANDOMS; rtest++) { + while ((m = (rand() % MMAX)) < 2) ; + while ((k = (rand() % KMAX)) >= m || k < 1) ; + + if (m > MMAX || k > KMAX) + continue; + + gf_gen_cauchy1_matrix(a, m, k); + + // Make random data + for (i = 0; i < k; i++) + for (j = 0; j < TEST_LEN; j++) + buffs[i][j] = rand(); + + // Make parity vects + for (i = k; i < m; i++) { + for (j = 0; j < k; j++) + gf_vect_mul_init(a[k * i + j], &g_tbls[j * 32]); + + gf_vect_dot_prod_base(TEST_LEN, k, g_tbls, buffs, buffs[i]); + } + + // Random errors + memset(src_in_err, 0, TEST_SOURCES); + for (i = 0, nerrs = 0; i < k && nerrs < m - k; i++) { + err = 1 & rand(); + src_in_err[i] = err; + if (err) + src_err_list[nerrs++] = i; + } + if (nerrs == 0) { // should have at least one error + while ((err = (rand() % KMAX)) >= k) ; + src_err_list[nerrs++] = err; + src_in_err[err] = 1; + } + // construct b by removing error rows + for (i = 0, r = 0; i < k; i++, r++) { + while (src_in_err[r]) { + r++; + continue; + } + for (j = 0; j < k; j++) + b[k * i + j] = a[k * r + j]; + } + + if (gf_invert_matrix((u8 *) b, (u8 *) d, k) < 0) + printf("BAD MATRIX\n"); + + for (i = 0, r = 0; i < k; i++, r++) { + while (src_in_err[r]) { + r++; + continue; + } + recov[i] = buffs[r]; + } + + // Recover data + for (i = 0; i < nerrs; i++) { + for (j = 0; j < k; j++) + gf_vect_mul_init(d[k * src_err_list[i] + j], &g_tbls[j * 32]); + + gf_vect_dot_prod_base(TEST_LEN, k, g_tbls, recov, temp_buff); + + if (0 != memcmp(temp_buff, buffs[src_err_list[i]], TEST_LEN)) { + printf("Fail error recovery (%d, %d, %d) - ", m, k, nerrs); + printf(" - erase list = "); + for (i = 0; i < nerrs; i++) + printf(" %d", src_err_list[i]); + printf("\na:\n"); + dump_u8xu8((u8 *) a, m, k); + printf("inv b:\n"); + dump_u8xu8((u8 *) d, k, k); + printf("orig data:\n"); + dump_matrix(buffs, m, 25); + printf("orig :"); + dump(buffs[src_err_list[i]], 25); + printf("recov %d:", src_err_list[i]); + dump(temp_buff, 25); + return -1; + } + } + putchar('.'); + } + + printf("done all: Pass\n"); + return 0; +} diff --git a/src/spdk/isa-l/erasure_code/gf_vect_dot_prod_perf.c b/src/spdk/isa-l/erasure_code/gf_vect_dot_prod_perf.c new file mode 100644 index 000000000..bd2b555b0 --- /dev/null +++ b/src/spdk/isa-l/erasure_code/gf_vect_dot_prod_perf.c @@ -0,0 +1,174 @@ +/********************************************************************** + Copyright(c) 2011-2015 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#include +#include +#include // for memset, memcmp +#include "erasure_code.h" +#include "test.h" + +#ifndef FUNCTION_UNDER_TEST +# define FUNCTION_UNDER_TEST gf_vect_dot_prod +#endif + +#define str(s) #s +#define xstr(s) str(s) + +//#define CACHED_TEST +#ifdef CACHED_TEST +// Cached test, loop many times over small dataset +# define TEST_SOURCES 10 +# define TEST_LEN 8*1024 +# define TEST_TYPE_STR "_warm" +#else +# ifndef TEST_CUSTOM +// Uncached test. Pull from large mem base. +# define TEST_SOURCES 10 +# define GT_L3_CACHE 32*1024*1024 /* some number > last level cache */ +# define TEST_LEN ((GT_L3_CACHE / TEST_SOURCES) & ~(64-1)) +# define TEST_TYPE_STR "_cold" +# else +# define TEST_TYPE_STR "_cus" +# endif +#endif + +typedef unsigned char u8; + +void dump(unsigned char *buf, int len) +{ + int i; + for (i = 0; i < len;) { + printf(" %2x", 0xff & buf[i++]); + if (i % 32 == 0) + printf("\n"); + } + printf("\n"); +} + +void dump_matrix(unsigned char **s, int k, int m) +{ + int i, j; + for (i = 0; i < k; i++) { + for (j = 0; j < m; j++) { + printf(" %2x", s[i][j]); + } + printf("\n"); + } + printf("\n"); +} + +void vect_dot_prod_perf(void (*fun_ptr) + (int, int, unsigned char *, unsigned char **, unsigned char *), + u8 * g, u8 * g_tbls, u8 ** buffs, u8 * dest_ref) +{ + int j; + for (j = 0; j < TEST_SOURCES; j++) + gf_vect_mul_init(g[j], &g_tbls[j * 32]); + + (*fun_ptr) (TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref); +} + +int main(int argc, char *argv[]) +{ + int i, j; + void *buf; + u8 g[TEST_SOURCES], g_tbls[TEST_SOURCES * 32], *dest, *dest_ref; + u8 *temp_buff, *buffs[TEST_SOURCES]; + struct perf start; + + printf(xstr(FUNCTION_UNDER_TEST) ": %dx%d\n", TEST_SOURCES, TEST_LEN); + + // Allocate the arrays + for (i = 0; i < TEST_SOURCES; i++) { + if (posix_memalign(&buf, 64, TEST_LEN)) { + printf("alloc error: Fail"); + return -1; + } + buffs[i] = buf; + } + + if (posix_memalign(&buf, 64, TEST_LEN)) { + printf("alloc error: Fail"); + return -1; + } + dest = buf; + + if (posix_memalign(&buf, 64, TEST_LEN)) { + printf("alloc error: Fail"); + return -1; + } + dest_ref = buf; + + if (posix_memalign(&buf, 64, TEST_LEN)) { + printf("alloc error: Fail"); + return -1; + } + temp_buff = buf; + + // Performance test + for (i = 0; i < TEST_SOURCES; i++) + for (j = 0; j < TEST_LEN; j++) + buffs[i][j] = rand(); + + memset(dest, 0, TEST_LEN); + memset(temp_buff, 0, TEST_LEN); + memset(dest_ref, 0, TEST_LEN); + memset(g, 0, TEST_SOURCES); + + for (i = 0; i < TEST_SOURCES; i++) + g[i] = rand(); + +#ifdef DO_REF_PERF + BENCHMARK(&start, BENCHMARK_TIME, + vect_dot_prod_perf(&gf_vect_dot_prod_base, g, g_tbls, buffs, dest_ref) + ); + printf("gf_vect_dot_prod_base" TEST_TYPE_STR ": "); + perf_print(start, (long long)TEST_LEN * (TEST_SOURCES + 1)); +#else + vect_dot_prod_perf(&gf_vect_dot_prod_base, g, g_tbls, buffs, dest_ref); +#endif + + BENCHMARK(&start, BENCHMARK_TIME, + vect_dot_prod_perf(&FUNCTION_UNDER_TEST, g, g_tbls, buffs, dest)); + printf(xstr(FUNCTION_UNDER_TEST) TEST_TYPE_STR ": "); + perf_print(start, (long long)TEST_LEN * (TEST_SOURCES + 1)); + + if (0 != memcmp(dest_ref, dest, TEST_LEN)) { + printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test\n"); + dump_matrix(buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref, 25); + printf("dprod:"); + dump(dest, 25); + return -1; + } + + printf("pass perf check\n"); + return 0; +} diff --git a/src/spdk/isa-l/erasure_code/gf_vect_dot_prod_sse.asm b/src/spdk/isa-l/erasure_code/gf_vect_dot_prod_sse.asm new file mode 100644 index 000000000..108fa36a4 --- /dev/null +++ b/src/spdk/isa-l/erasure_code/gf_vect_dot_prod_sse.asm @@ -0,0 +1,271 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2015 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;;; +;;; gf_vect_dot_prod_sse(len, vec, *g_tbls, **buffs, *dest); +;;; + +%include "reg_sizes.asm" + +%ifidn __OUTPUT_FORMAT__, elf64 + %define arg0 rdi + %define arg1 rsi + %define arg2 rdx + %define arg3 rcx + %define arg4 r8 + + %define tmp r11 + %define tmp2 r10 + %define tmp3 r9 + %define return rax + %macro SLDR 2 + %endmacro + %define SSTR SLDR + %define PS 8 + %define func(x) x: + %define FUNC_SAVE + %define FUNC_RESTORE +%endif + +%ifidn __OUTPUT_FORMAT__, win64 + %define arg0 rcx + %define arg1 rdx + %define arg2 r8 + %define arg3 r9 + + %define arg4 r12 ; must be saved and loaded + %define tmp r11 + %define tmp2 r10 + %define tmp3 rdi ; must be saved and loaded + %define return rax + %macro SLDR 2 + %endmacro + %define SSTR SLDR + %define PS 8 + %define frame_size 2*8 + %define arg(x) [rsp + frame_size + PS + PS*x] + + %define func(x) proc_frame x + %macro FUNC_SAVE 0 + rex_push_reg r12 + push_reg rdi + end_prolog + mov arg4, arg(4) + %endmacro + + %macro FUNC_RESTORE 0 + pop rdi + pop r12 + %endmacro +%endif + +%ifidn __OUTPUT_FORMAT__, elf32 + +;;;================== High Address; +;;; arg4 +;;; arg3 +;;; arg2 +;;; arg1 +;;; arg0 +;;; return +;;;<================= esp of caller +;;; ebp +;;;<================= ebp = esp +;;; esi +;;; edi +;;; ebx +;;;<================= esp of callee +;;; +;;;================== Low Address; + + %define PS 4 + %define LOG_PS 2 + %define func(x) x: + %define arg(x) [ebp + PS*2 + PS*x] + + %define trans ecx ;trans is for the variables in stack + %define arg0 trans + %define arg0_m arg(0) + %define arg1 trans + %define arg1_m arg(1) + %define arg2 arg2_m + %define arg2_m arg(2) + %define arg3 ebx + %define arg4 trans + %define arg4_m arg(4) + %define tmp edx + %define tmp2 edi + %define tmp3 esi + %define return eax + %macro SLDR 2 ;; stack load/restore + mov %1, %2 + %endmacro + %define SSTR SLDR + + %macro FUNC_SAVE 0 + push ebp + mov ebp, esp + push esi + push edi + push ebx + mov arg3, arg(3) + %endmacro + + %macro FUNC_RESTORE 0 + pop ebx + pop edi + pop esi + mov esp, ebp + pop ebp + %endmacro + +%endif ; output formats + +%define len arg0 +%define vec arg1 +%define mul_array arg2 +%define src arg3 +%define dest arg4 + +%define vec_i tmp2 +%define ptr tmp3 +%define pos return + + %ifidn PS,4 ;32-bit code + %define vec_m arg1_m + %define len_m arg0_m + %define dest_m arg4_m + %endif + +%ifndef EC_ALIGNED_ADDR +;;; Use Un-aligned load/store + %define XLDR movdqu + %define XSTR movdqu +%else +;;; Use Non-temporal load/stor + %ifdef NO_NT_LDST + %define XLDR movdqa + %define XSTR movdqa + %else + %define XLDR movntdqa + %define XSTR movntdq + %endif +%endif + +%ifidn PS,8 ;64-bit code + default rel + [bits 64] +%endif + +section .text + +%define xmask0f xmm5 +%define xgft_lo xmm4 +%define xgft_hi xmm3 + +%define x0 xmm0 +%define xtmpa xmm1 +%define xp xmm2 + +align 16 +global gf_vect_dot_prod_sse:ISAL_SYM_TYPE_FUNCTION +func(gf_vect_dot_prod_sse) + FUNC_SAVE + SLDR len, len_m + sub len, 16 + SSTR len_m, len + jl .return_fail + xor pos, pos + movdqa xmask0f, [mask0f] ;Load mask of lower nibble in each byte + +.loop16: + pxor xp, xp + mov tmp, mul_array + xor vec_i, vec_i + +.next_vect: + + mov ptr, [src+vec_i*PS] + movdqu xgft_lo, [tmp] ;Load array Cx{00}, Cx{01}, ..., Cx{0f} + movdqu xgft_hi, [tmp+16] ; " Cx{00}, Cx{10}, ..., Cx{f0} + XLDR x0, [ptr+pos] ;Get next source vector + + add tmp, 32 + add vec_i, 1 + + movdqa xtmpa, x0 ;Keep unshifted copy of src + psraw x0, 4 ;Shift to put high nibble into bits 4-0 + pand x0, xmask0f ;Mask high src nibble in bits 4-0 + pand xtmpa, xmask0f ;Mask low src nibble in bits 4-0 + + pshufb xgft_hi, x0 ;Lookup mul table of high nibble + pshufb xgft_lo, xtmpa ;Lookup mul table of low nibble + pxor xgft_hi, xgft_lo ;GF add high and low partials + pxor xp, xgft_hi ;xp += partial + + SLDR vec, vec_m + cmp vec_i, vec + jl .next_vect + + SLDR dest, dest_m + XSTR [dest+pos], xp + + add pos, 16 ;Loop on 16 bytes at a time + SLDR len, len_m + cmp pos, len + jle .loop16 + + lea tmp, [len + 16] + cmp pos, tmp + je .return_pass + + ;; Tail len + mov pos, len ;Overlapped offset length-16 + jmp .loop16 ;Do one more overlap pass + +.return_pass: + mov return, 0 + FUNC_RESTORE + ret + +.return_fail: + mov return, 1 + FUNC_RESTORE + ret + +endproc_frame + +section .data + +align 16 + +mask0f: dq 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f + +;;; func core, ver, snum +slversion gf_vect_dot_prod_sse, 00, 05, 0060 diff --git a/src/spdk/isa-l/erasure_code/gf_vect_dot_prod_test.c b/src/spdk/isa-l/erasure_code/gf_vect_dot_prod_test.c new file mode 100644 index 000000000..1c0232cca --- /dev/null +++ b/src/spdk/isa-l/erasure_code/gf_vect_dot_prod_test.c @@ -0,0 +1,525 @@ +/********************************************************************** + Copyright(c) 2011-2015 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#include +#include +#include // for memset, memcmp +#include "erasure_code.h" +#include "types.h" + +#ifndef FUNCTION_UNDER_TEST +# define FUNCTION_UNDER_TEST gf_vect_dot_prod +#endif +#ifndef TEST_MIN_SIZE +# define TEST_MIN_SIZE 32 +#endif + +#define str(s) #s +#define xstr(s) str(s) + +#define TEST_LEN 8192 +#define TEST_SIZE (TEST_LEN/2) + +#ifndef TEST_SOURCES +# define TEST_SOURCES 16 +#endif +#ifndef RANDOMS +# define RANDOMS 20 +#endif + +#define MMAX TEST_SOURCES +#define KMAX TEST_SOURCES + +#ifdef EC_ALIGNED_ADDR +// Define power of 2 range to check ptr, len alignment +# define PTR_ALIGN_CHK_B 0 +# define LEN_ALIGN_CHK_B 0 // 0 for aligned only +#else +// Define power of 2 range to check ptr, len alignment +# define PTR_ALIGN_CHK_B 32 +# define LEN_ALIGN_CHK_B 32 // 0 for aligned only +#endif + +typedef unsigned char u8; + +void dump(unsigned char *buf, int len) +{ + int i; + for (i = 0; i < len;) { + printf(" %2x", 0xff & buf[i++]); + if (i % 32 == 0) + printf("\n"); + } + printf("\n"); +} + +void dump_matrix(unsigned char **s, int k, int m) +{ + int i, j; + for (i = 0; i < k; i++) { + for (j = 0; j < m; j++) { + printf(" %2x", s[i][j]); + } + printf("\n"); + } + printf("\n"); +} + +void dump_u8xu8(unsigned char *s, int k, int m) +{ + int i, j; + for (i = 0; i < k; i++) { + for (j = 0; j < m; j++) { + printf(" %2x", 0xff & s[j + (i * m)]); + } + printf("\n"); + } + printf("\n"); +} + +int main(int argc, char *argv[]) +{ + int i, j, rtest, srcs, m, k, nerrs, r, err; + void *buf; + u8 g[TEST_SOURCES], g_tbls[TEST_SOURCES * 32], src_in_err[TEST_SOURCES]; + u8 *dest, *dest_ref, *temp_buff, *buffs[TEST_SOURCES]; + u8 a[MMAX * KMAX], b[MMAX * KMAX], d[MMAX * KMAX]; + u8 src_err_list[TEST_SOURCES], *recov[TEST_SOURCES]; + + int align, size; + unsigned char *efence_buffs[TEST_SOURCES]; + unsigned int offset; + u8 *ubuffs[TEST_SOURCES]; + u8 *udest_ptr; + + printf(xstr(FUNCTION_UNDER_TEST) ": %dx%d ", TEST_SOURCES, TEST_LEN); + + // Allocate the arrays + for (i = 0; i < TEST_SOURCES; i++) { + if (posix_memalign(&buf, 64, TEST_LEN)) { + printf("alloc error: Fail"); + return -1; + } + buffs[i] = buf; + } + + if (posix_memalign(&buf, 64, TEST_LEN)) { + printf("alloc error: Fail"); + return -1; + } + dest = buf; + + if (posix_memalign(&buf, 64, TEST_LEN)) { + printf("alloc error: Fail"); + return -1; + } + dest_ref = buf; + + if (posix_memalign(&buf, 64, TEST_LEN)) { + printf("alloc error: Fail"); + return -1; + } + temp_buff = buf; + + // Test of all zeros + for (i = 0; i < TEST_SOURCES; i++) + memset(buffs[i], 0, TEST_LEN); + + memset(dest, 0, TEST_LEN); + memset(temp_buff, 0, TEST_LEN); + memset(dest_ref, 0, TEST_LEN); + memset(g, 0, TEST_SOURCES); + + for (i = 0; i < TEST_SOURCES; i++) + gf_vect_mul_init(g[i], &g_tbls[i * 32]); + + gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref); + + FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest); + + if (0 != memcmp(dest_ref, dest, TEST_LEN)) { + printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " \n"); + dump_matrix(buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref, 25); + printf("dprod:"); + dump(dest, 25); + return -1; + } else + putchar('.'); + + // Rand data test + for (rtest = 0; rtest < RANDOMS; rtest++) { + for (i = 0; i < TEST_SOURCES; i++) + for (j = 0; j < TEST_LEN; j++) + buffs[i][j] = rand(); + + for (i = 0; i < TEST_SOURCES; i++) + g[i] = rand(); + + for (i = 0; i < TEST_SOURCES; i++) + gf_vect_mul_init(g[i], &g_tbls[i * 32]); + + gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref); + FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest); + + if (0 != memcmp(dest_ref, dest, TEST_LEN)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " 1\n"); + dump_matrix(buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref, 25); + printf("dprod:"); + dump(dest, 25); + return -1; + } + + putchar('.'); + } + + // Rand data test with varied parameters + for (rtest = 0; rtest < RANDOMS; rtest++) { + for (srcs = TEST_SOURCES; srcs > 0; srcs--) { + for (i = 0; i < srcs; i++) + for (j = 0; j < TEST_LEN; j++) + buffs[i][j] = rand(); + + for (i = 0; i < srcs; i++) + g[i] = rand(); + + for (i = 0; i < srcs; i++) + gf_vect_mul_init(g[i], &g_tbls[i * 32]); + + gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[0], buffs, dest_ref); + FUNCTION_UNDER_TEST(TEST_LEN, srcs, g_tbls, buffs, dest); + + if (0 != memcmp(dest_ref, dest, TEST_LEN)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test 2\n"); + dump_matrix(buffs, 5, srcs); + printf("dprod_base:"); + dump(dest_ref, 5); + printf("dprod:"); + dump(dest, 5); + return -1; + } + + putchar('.'); + } + } + + // Test erasure code using gf_vect_dot_prod + + // Pick a first test + m = 9; + k = 5; + if (m > MMAX || k > KMAX) + return -1; + + gf_gen_rs_matrix(a, m, k); + + // Make random data + for (i = 0; i < k; i++) + for (j = 0; j < TEST_LEN; j++) + buffs[i][j] = rand(); + + // Make parity vects + for (i = k; i < m; i++) { + for (j = 0; j < k; j++) + gf_vect_mul_init(a[k * i + j], &g_tbls[j * 32]); +#ifndef USEREF + FUNCTION_UNDER_TEST(TEST_LEN, k, g_tbls, buffs, buffs[i]); +#else + gf_vect_dot_prod_base(TEST_LEN, k, &g_tbls[0], buffs, buffs[i]); +#endif + } + + // Random buffers in erasure + memset(src_in_err, 0, TEST_SOURCES); + for (i = 0, nerrs = 0; i < k && nerrs < m - k; i++) { + err = 1 & rand(); + src_in_err[i] = err; + if (err) + src_err_list[nerrs++] = i; + } + + // construct b by removing error rows + for (i = 0, r = 0; i < k; i++, r++) { + while (src_in_err[r]) { + r++; + continue; + } + for (j = 0; j < k; j++) + b[k * i + j] = a[k * r + j]; + } + + if (gf_invert_matrix((u8 *) b, (u8 *) d, k) < 0) + printf("BAD MATRIX\n"); + + for (i = 0, r = 0; i < k; i++, r++) { + while (src_in_err[r]) { + r++; + continue; + } + recov[i] = buffs[r]; + } + + // Recover data + for (i = 0; i < nerrs; i++) { + for (j = 0; j < k; j++) + gf_vect_mul_init(d[k * src_err_list[i] + j], &g_tbls[j * 32]); +#ifndef USEREF + FUNCTION_UNDER_TEST(TEST_LEN, k, g_tbls, recov, temp_buff); +#else + gf_vect_dot_prod_base(TEST_LEN, k, &g_tbls[0], recov, temp_buff); +#endif + + if (0 != memcmp(temp_buff, buffs[src_err_list[i]], TEST_LEN)) { + printf("Fail error recovery (%d, %d, %d)\n", m, k, nerrs); + printf("recov %d:", src_err_list[i]); + dump(temp_buff, 25); + printf("orig :"); + dump(buffs[src_err_list[i]], 25); + return -1; + } + } + + // Do more random tests + + for (rtest = 0; rtest < RANDOMS; rtest++) { + while ((m = (rand() % MMAX)) < 2) ; + while ((k = (rand() % KMAX)) >= m || k < 1) ; + + if (m > MMAX || k > KMAX) + continue; + + gf_gen_rs_matrix(a, m, k); + + // Make random data + for (i = 0; i < k; i++) + for (j = 0; j < TEST_LEN; j++) + buffs[i][j] = rand(); + + // Make parity vects + for (i = k; i < m; i++) { + for (j = 0; j < k; j++) + gf_vect_mul_init(a[k * i + j], &g_tbls[j * 32]); +#ifndef USEREF + FUNCTION_UNDER_TEST(TEST_LEN, k, g_tbls, buffs, buffs[i]); +#else + gf_vect_dot_prod_base(TEST_LEN, k, &g_tbls[0], buffs, buffs[i]); +#endif + } + + // Random errors + memset(src_in_err, 0, TEST_SOURCES); + for (i = 0, nerrs = 0; i < k && nerrs < m - k; i++) { + err = 1 & rand(); + src_in_err[i] = err; + if (err) + src_err_list[nerrs++] = i; + } + if (nerrs == 0) { // should have at least one error + while ((err = (rand() % KMAX)) >= k) ; + src_err_list[nerrs++] = err; + src_in_err[err] = 1; + } + // construct b by removing error rows + for (i = 0, r = 0; i < k; i++, r++) { + while (src_in_err[r]) { + r++; + continue; + } + for (j = 0; j < k; j++) + b[k * i + j] = a[k * r + j]; + } + + if (gf_invert_matrix((u8 *) b, (u8 *) d, k) < 0) + printf("BAD MATRIX\n"); + + for (i = 0, r = 0; i < k; i++, r++) { + while (src_in_err[r]) { + r++; + continue; + } + recov[i] = buffs[r]; + } + + // Recover data + for (i = 0; i < nerrs; i++) { + for (j = 0; j < k; j++) + gf_vect_mul_init(d[k * src_err_list[i] + j], &g_tbls[j * 32]); +#ifndef USEREF + FUNCTION_UNDER_TEST(TEST_LEN, k, g_tbls, recov, temp_buff); +#else + gf_vect_dot_prod_base(TEST_LEN, k, &g_tbls[0], recov, temp_buff); +#endif + if (0 != memcmp(temp_buff, buffs[src_err_list[i]], TEST_LEN)) { + printf("Fail error recovery (%d, %d, %d) - ", m, k, nerrs); + printf(" - erase list = "); + for (i = 0; i < nerrs; i++) + printf(" %d", src_err_list[i]); + printf("\na:\n"); + dump_u8xu8((u8 *) a, m, k); + printf("inv b:\n"); + dump_u8xu8((u8 *) d, k, k); + printf("orig data:\n"); + dump_matrix(buffs, m, 25); + printf("orig :"); + dump(buffs[src_err_list[i]], 25); + printf("recov %d:", src_err_list[i]); + dump(temp_buff, 25); + return -1; + } + } + putchar('.'); + } + + // Run tests at end of buffer for Electric Fence + align = (LEN_ALIGN_CHK_B != 0) ? 1 : 16; + for (size = TEST_MIN_SIZE; size <= TEST_SIZE; size += align) { + for (i = 0; i < TEST_SOURCES; i++) + for (j = 0; j < TEST_LEN; j++) + buffs[i][j] = rand(); + + for (i = 0; i < TEST_SOURCES; i++) // Line up TEST_SIZE from end + efence_buffs[i] = buffs[i] + TEST_LEN - size; + + for (i = 0; i < TEST_SOURCES; i++) + g[i] = rand(); + + for (i = 0; i < TEST_SOURCES; i++) + gf_vect_mul_init(g[i], &g_tbls[i * 32]); + + gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[0], efence_buffs, dest_ref); + FUNCTION_UNDER_TEST(size, TEST_SOURCES, g_tbls, efence_buffs, dest); + + if (0 != memcmp(dest_ref, dest, size)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test 3\n"); + dump_matrix(efence_buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref, align); + printf("dprod:"); + dump(dest, align); + return -1; + } + + putchar('.'); + } + + // Test rand ptr alignment if available + + for (rtest = 0; rtest < RANDOMS; rtest++) { + size = (TEST_LEN - PTR_ALIGN_CHK_B) & ~(TEST_MIN_SIZE - 1); + srcs = rand() % TEST_SOURCES; + if (srcs == 0) + continue; + + offset = (PTR_ALIGN_CHK_B != 0) ? 1 : PTR_ALIGN_CHK_B; + // Add random offsets + for (i = 0; i < srcs; i++) + ubuffs[i] = buffs[i] + (rand() & (PTR_ALIGN_CHK_B - offset)); + + udest_ptr = dest + (rand() & (PTR_ALIGN_CHK_B - offset)); + + memset(dest, 0, TEST_LEN); // zero pad to check write-over + + for (i = 0; i < srcs; i++) + for (j = 0; j < size; j++) + ubuffs[i][j] = rand(); + + for (i = 0; i < srcs; i++) + g[i] = rand(); + + for (i = 0; i < srcs; i++) + gf_vect_mul_init(g[i], &g_tbls[i * 32]); + + gf_vect_dot_prod_base(size, srcs, &g_tbls[0], ubuffs, dest_ref); + + FUNCTION_UNDER_TEST(size, srcs, g_tbls, ubuffs, udest_ptr); + + if (memcmp(dest_ref, udest_ptr, size)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " ualign srcs=%d\n", + srcs); + dump_matrix(ubuffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref, 25); + printf("dprod:"); + dump(udest_ptr, 25); + return -1; + } + // Confirm that padding around dests is unchanged + memset(dest_ref, 0, PTR_ALIGN_CHK_B); // Make reference zero buff + offset = udest_ptr - dest; + + if (memcmp(dest, dest_ref, offset)) { + printf("Fail rand ualign pad start\n"); + return -1; + } + if (memcmp(dest + offset + size, dest_ref, PTR_ALIGN_CHK_B - offset)) { + printf("Fail rand ualign pad end\n"); + return -1; + } + + putchar('.'); + } + + // Test all size alignment + align = (LEN_ALIGN_CHK_B != 0) ? 1 : 16; + + for (size = TEST_LEN; size >= TEST_MIN_SIZE; size -= align) { + srcs = TEST_SOURCES; + + for (i = 0; i < srcs; i++) + for (j = 0; j < size; j++) + buffs[i][j] = rand(); + + for (i = 0; i < srcs; i++) + g[i] = rand(); + + for (i = 0; i < srcs; i++) + gf_vect_mul_init(g[i], &g_tbls[i * 32]); + + gf_vect_dot_prod_base(size, srcs, &g_tbls[0], buffs, dest_ref); + + FUNCTION_UNDER_TEST(size, srcs, g_tbls, buffs, dest); + + if (memcmp(dest_ref, dest, size)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " ualign len=%d\n", + size); + dump_matrix(buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref, 25); + printf("dprod:"); + dump(dest, 25); + return -1; + } + } + + printf("done all: Pass\n"); + return 0; +} diff --git a/src/spdk/isa-l/erasure_code/gf_vect_mad_avx.asm b/src/spdk/isa-l/erasure_code/gf_vect_mad_avx.asm new file mode 100644 index 000000000..f444d113b --- /dev/null +++ b/src/spdk/isa-l/erasure_code/gf_vect_mad_avx.asm @@ -0,0 +1,196 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2015 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;;; +;;; gf_vect_mad_avx(len, vec, vec_i, mul_array, src, dest); +;;; + +%include "reg_sizes.asm" + +%ifidn __OUTPUT_FORMAT__, win64 + %define arg0 rcx + %define arg0.w ecx + %define arg1 rdx + %define arg2 r8 + %define arg3 r9 + %define arg4 r12 + %define arg5 r15 + %define tmp r11 + %define return rax + %define return.w eax + %define PS 8 + %define stack_size 16*3 + 3*8 + %define arg(x) [rsp + stack_size + PS + PS*x] + %define func(x) proc_frame x + +%macro FUNC_SAVE 0 + sub rsp, stack_size + vmovdqa [rsp+16*0],xmm6 + vmovdqa [rsp+16*1],xmm7 + vmovdqa [rsp+16*2],xmm8 + save_reg r12, 3*16 + 0*8 + save_reg r15, 3*16 + 1*8 + end_prolog + mov arg4, arg(4) + mov arg5, arg(5) +%endmacro + +%macro FUNC_RESTORE 0 + vmovdqa xmm6, [rsp+16*0] + vmovdqa xmm7, [rsp+16*1] + vmovdqa xmm8, [rsp+16*2] + mov r12, [rsp + 3*16 + 0*8] + mov r15, [rsp + 3*16 + 1*8] + add rsp, stack_size +%endmacro + +%elifidn __OUTPUT_FORMAT__, elf64 + %define arg0 rdi + %define arg0.w edi + %define arg1 rsi + %define arg2 rdx + %define arg3 rcx + %define arg4 r8 + %define arg5 r9 + %define tmp r11 + %define return rax + %define return.w eax + + %define func(x) x: + %define FUNC_SAVE + %define FUNC_RESTORE +%endif + +;;; gf_vect_mad_avx(len, vec, vec_i, mul_array, src, dest) +%define len arg0 +%define len.w arg0.w +%define vec arg1 +%define vec_i arg2 +%define mul_array arg3 +%define src arg4 +%define dest arg5 +%define pos return +%define pos.w return.w + +%ifndef EC_ALIGNED_ADDR +;;; Use Un-aligned load/store + %define XLDR vmovdqu + %define XSTR vmovdqu +%else +;;; Use Non-temporal load/stor + %ifdef NO_NT_LDST + %define XLDR vmovdqa + %define XSTR vmovdqa + %else + %define XLDR vmovntdqa + %define XSTR vmovntdq + %endif +%endif + + +default rel + +[bits 64] +section .text + +%define xmask0f xmm8 +%define xgft_lo xmm7 +%define xgft_hi xmm6 + +%define x0 xmm0 +%define xtmpa xmm1 +%define xtmph xmm2 +%define xtmpl xmm3 +%define xd xmm4 +%define xtmpd xmm5 + +align 16 +global gf_vect_mad_avx:ISAL_SYM_TYPE_FUNCTION +func(gf_vect_mad_avx) + FUNC_SAVE + sub len, 16 + jl .return_fail + + xor pos, pos + vmovdqa xmask0f, [mask0f] ;Load mask of lower nibble in each byte + + sal vec_i, 5 ;Multiply by 32 + vmovdqu xgft_lo, [vec_i+mul_array] ;Load array Cx{00}, Cx{01}, Cx{02}, ... + vmovdqu xgft_hi, [vec_i+mul_array+16] ; " Cx{00}, Cx{10}, Cx{20}, ... , Cx{f0} + + XLDR xtmpd, [dest+len] ;backup the last 16 bytes in dest + +.loop16: + XLDR xd, [dest+pos] ;Get next dest vector +.loop16_overlap: + XLDR x0, [src+pos] ;Get next source vector + + vpand xtmpa, x0, xmask0f ;Mask low src nibble in bits 4-0 + vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0 + vpand x0, x0, xmask0f ;Mask high src nibble in bits 4-0 + + vpshufb xtmph, xgft_hi, x0 ;Lookup mul table of high nibble + vpshufb xtmpl, xgft_lo, xtmpa ;Lookup mul table of low nibble + vpxor xtmph, xtmph, xtmpl ;GF add high and low partials + vpxor xd, xd, xtmph ;xd += partial + + XSTR [dest+pos], xd + add pos, 16 ;Loop on 16 bytes at a time + cmp pos, len + jle .loop16 + + lea tmp, [len + 16] + cmp pos, tmp + je .return_pass + + ;; Tail len + mov pos, len ;Overlapped offset length-16 + vmovdqa xd, xtmpd ;Restore xd + jmp .loop16_overlap ;Do one more overlap pass + +.return_pass: + mov return, 0 + FUNC_RESTORE + ret + +.return_fail: + mov return, 1 + FUNC_RESTORE + ret + +endproc_frame + +section .data + +align 16 + +mask0f: dq 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f + +;;; func core, ver, snum +slversion gf_vect_mad_avx, 02, 01, 0201 diff --git a/src/spdk/isa-l/erasure_code/gf_vect_mad_avx2.asm b/src/spdk/isa-l/erasure_code/gf_vect_mad_avx2.asm new file mode 100644 index 000000000..b65d0aae3 --- /dev/null +++ b/src/spdk/isa-l/erasure_code/gf_vect_mad_avx2.asm @@ -0,0 +1,203 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2015 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;;; +;;; gf_vect_mad_avx2(len, vec, vec_i, mul_array, src, dest); +;;; + +%include "reg_sizes.asm" + +%ifidn __OUTPUT_FORMAT__, win64 + %define arg0 rcx + %define arg0.w ecx + %define arg1 rdx + %define arg2 r8 + %define arg3 r9 + %define arg4 r12 ; must be saved and loaded + %define arg5 r15 + + %define tmp r11 + %define tmp.w r11d + %define tmp.b r11b + %define return rax + %define return.w eax + %define PS 8 + %define stack_size 16*3 + 3*8 + %define arg(x) [rsp + stack_size + PS + PS*x] + %define func(x) proc_frame x + + %macro FUNC_SAVE 0 + sub rsp, stack_size + vmovdqa [rsp+16*0],xmm6 + vmovdqa [rsp+16*1],xmm7 + vmovdqa [rsp+16*2],xmm8 + save_reg r12, 3*16 + 0*8 + save_reg r15, 3*16 + 1*8 + end_prolog + mov arg4, arg(4) + mov arg5, arg(5) + %endmacro + + %macro FUNC_RESTORE 0 + vmovdqa xmm6, [rsp+16*0] + vmovdqa xmm7, [rsp+16*1] + vmovdqa xmm8, [rsp+16*2] + mov r12, [rsp + 3*16 + 0*8] + mov r15, [rsp + 3*16 + 1*8] + add rsp, stack_size + %endmacro + +%elifidn __OUTPUT_FORMAT__, elf64 + %define arg0 rdi + %define arg0.w edi + %define arg1 rsi + %define arg2 rdx + %define arg3 rcx + %define arg4 r8 + %define arg5 r9 + + %define tmp r11 + %define tmp.w r11d + %define tmp.b r11b + %define return rax + %define return.w eax + + %define func(x) x: + %define FUNC_SAVE + %define FUNC_RESTORE +%endif + + +;;; gf_vect_mad_avx2(len, vec, vec_i, mul_array, src, dest) +%define len arg0 +%define len.w arg0.w +%define vec arg1 +%define vec_i arg2 +%define mul_array arg3 +%define src arg4 +%define dest arg5 +%define pos return +%define pos.w return.w + +%ifndef EC_ALIGNED_ADDR +;;; Use Un-aligned load/store + %define XLDR vmovdqu + %define XSTR vmovdqu +%else +;;; Use Non-temporal load/stor + %ifdef NO_NT_LDST + %define XLDR vmovdqa + %define XSTR vmovdqa + %else + %define XLDR vmovntdqa + %define XSTR vmovntdq + %endif +%endif + + +default rel + +[bits 64] +section .text + +%define xmask0f ymm8 +%define xmask0fx xmm8 +%define xgft_lo ymm7 +%define xgft_hi ymm6 + +%define x0 ymm0 +%define xtmpa ymm1 +%define xtmph ymm2 +%define xtmpl ymm3 +%define xd ymm4 +%define xtmpd ymm5 + +align 16 +global gf_vect_mad_avx2:ISAL_SYM_TYPE_FUNCTION +func(gf_vect_mad_avx2) + FUNC_SAVE + sub len, 32 + jl .return_fail + xor pos, pos + mov tmp.b, 0x0f + vpinsrb xmask0fx, xmask0fx, tmp.w, 0 + vpbroadcastb xmask0f, xmask0fx ;Construct mask 0x0f0f0f... + + sal vec_i, 5 ;Multiply by 32 + vmovdqu xgft_lo, [vec_i+mul_array] ;Load array Cx{00}, Cx{01}, Cx{02}, ... + ; " Cx{00}, Cx{10}, Cx{20}, ... , Cx{f0} + vperm2i128 xgft_hi, xgft_lo, xgft_lo, 0x11 ; swapped to hi | hi + vperm2i128 xgft_lo, xgft_lo, xgft_lo, 0x00 ; swapped to lo | lo + + XLDR xtmpd, [dest+len] ;backup the last 32 bytes in dest + +.loop32: + XLDR xd, [dest+pos] ;Get next dest vector +.loop32_overlap: + XLDR x0, [src+pos] ;Get next source vector + + vpand xtmpa, x0, xmask0f ;Mask low src nibble in bits 4-0 + vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0 + vpand x0, x0, xmask0f ;Mask high src nibble in bits 4-0 + + vpshufb xtmph, xgft_hi, x0 ;Lookup mul table of high nibble + vpshufb xtmpl, xgft_lo, xtmpa ;Lookup mul table of low nibble + vpxor xtmph, xtmph, xtmpl ;GF add high and low partials + vpxor xd, xd, xtmph ;xd += partial + + XSTR [dest+pos], xd + add pos, 32 ;Loop on 32 bytes at a time + cmp pos, len + jle .loop32 + + lea tmp, [len + 32] + cmp pos, tmp + je .return_pass + + ;; Tail len + mov pos, len ;Overlapped offset length-32 + vmovdqa xd, xtmpd ;Restore xd + jmp .loop32_overlap ;Do one more overlap pass + +.return_pass: + mov return, 0 + FUNC_RESTORE + ret + +.return_fail: + mov return, 1 + FUNC_RESTORE + ret + +endproc_frame + +section .data + +;;; func core, ver, snum +slversion gf_vect_mad_avx2, 04, 01, 0202 diff --git a/src/spdk/isa-l/erasure_code/gf_vect_mad_avx512.asm b/src/spdk/isa-l/erasure_code/gf_vect_mad_avx512.asm new file mode 100644 index 000000000..44fb653f5 --- /dev/null +++ b/src/spdk/isa-l/erasure_code/gf_vect_mad_avx512.asm @@ -0,0 +1,193 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2015 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;;; +;;; gf_vect_mad_avx512(len, vec, vec_i, mul_array, src, dest); +;;; + +%include "reg_sizes.asm" + +%ifdef HAVE_AS_KNOWS_AVX512 + +%ifidn __OUTPUT_FORMAT__, elf64 + %define arg0 rdi + %define arg1 rsi + %define arg2 rdx + %define arg3 rcx + %define arg4 r8 + %define arg5 r9 + %define tmp r11 + %define return rax + %define func(x) x: + %define FUNC_SAVE + %define FUNC_RESTORE +%endif + +%ifidn __OUTPUT_FORMAT__, win64 + %define arg0 rcx + %define arg1 rdx + %define arg2 r8 + %define arg3 r9 + %define arg4 r12 ; must be saved and loaded + %define arg5 r15 + %define tmp r11 + %define return rax + %define PS 8 + %define stack_size 16*3 + 3*8 + %define arg(x) [rsp + stack_size + PS + PS*x] + %define func(x) proc_frame x + + %macro FUNC_SAVE 0 + sub rsp, stack_size + vmovdqa [rsp+16*0],xmm6 + vmovdqa [rsp+16*1],xmm7 + vmovdqa [rsp+16*2],xmm8 + save_reg r12, 3*16 + 0*8 + save_reg r15, 3*16 + 1*8 + end_prolog + mov arg4, arg(4) + mov arg5, arg(5) + %endmacro + + %macro FUNC_RESTORE 0 + vmovdqa xmm6, [rsp+16*0] + vmovdqa xmm7, [rsp+16*1] + vmovdqa xmm8, [rsp+16*2] + mov r12, [rsp + 3*16 + 0*8] + mov r15, [rsp + 3*16 + 1*8] + add rsp, stack_size + %endmacro +%endif + +;;; gf_vect_mad_avx512(len, vec, vec_i, mul_array, src, dest) +%define len arg0 +%define vec arg1 +%define vec_i arg2 +%define mul_array arg3 +%define src arg4 +%define dest arg5 +%define pos return + +%ifndef EC_ALIGNED_ADDR +;;; Use Un-aligned load/store + %define XLDR vmovdqu8 + %define XSTR vmovdqu8 +%else +;;; Use Non-temporal load/stor + %ifdef NO_NT_LDST + %define XLDR vmovdqa + %define XSTR vmovdqa + %else + %define XLDR vmovntdqa + %define XSTR vmovntdq + %endif +%endif + + +default rel + +[bits 64] +section .text + +%define x0 zmm0 +%define xtmpa zmm1 +%define xtmph zmm2 +%define xtmpl zmm3 +%define xd zmm4 +%define xtmpd zmm5 +%define xgft_hi zmm6 +%define xgft_lo zmm7 +%define xgft_loy ymm7 +%define xmask0f zmm8 + +align 16 +global gf_vect_mad_avx512:ISAL_SYM_TYPE_FUNCTION +func(gf_vect_mad_avx512) + FUNC_SAVE + sub len, 64 + jl .return_fail + xor pos, pos + mov tmp, 0x0f + vpbroadcastb xmask0f, tmp ;Construct mask 0x0f0f0f... + sal vec_i, 5 ;Multiply by 32 + vmovdqu8 xgft_loy, [vec_i+mul_array] ;Load array Cx{00}..{0f}, Cx{00}..{f0} + vshufi64x2 xgft_hi, xgft_lo, xgft_lo, 0x55 + vshufi64x2 xgft_lo, xgft_lo, xgft_lo, 0x00 + mov tmp, -1 + kmovq k1, tmp + +.loop64: + XLDR xd, [dest+pos] ;Get next dest vector + XLDR x0, [src+pos] ;Get next source vector + + vpandq xtmpa, x0, xmask0f ;Mask low src nibble in bits 4-0 + vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0 + vpandq x0, x0, xmask0f ;Mask high src nibble in bits 4-0 + + vpshufb xtmph {k1}{z}, xgft_hi, x0 ;Lookup mul table of high nibble + vpshufb xtmpl {k1}{z}, xgft_lo, xtmpa ;Lookup mul table of low nibble + vpxorq xtmph, xtmph, xtmpl ;GF add high and low partials + vpxorq xd, xd, xtmph ;xd += partial + + XSTR [dest+pos], xd + add pos, 64 ;Loop on 64 bytes at a time + cmp pos, len + jle .loop64 + + lea tmp, [len + 64] + cmp pos, tmp + je .return_pass + + ;; Tail len + mov pos, (1 << 63) + lea tmp, [len + 64 - 1] + and tmp, 63 + sarx pos, pos, tmp + kmovq k1, pos + mov pos, len ;Overlapped offset length-64 + jmp .loop64 ;Do one more overlap pass + +.return_pass: + mov return, 0 + FUNC_RESTORE + ret + +.return_fail: + mov return, 1 + FUNC_RESTORE + ret + +endproc_frame + +%else +%ifidn __OUTPUT_FORMAT__, win64 +global no_gf_vect_mad_avx512 +no_gf_vect_mad_avx512: +%endif +%endif ; ifdef HAVE_AS_KNOWS_AVX512 diff --git a/src/spdk/isa-l/erasure_code/gf_vect_mad_sse.asm b/src/spdk/isa-l/erasure_code/gf_vect_mad_sse.asm new file mode 100644 index 000000000..8d7e5eecd --- /dev/null +++ b/src/spdk/isa-l/erasure_code/gf_vect_mad_sse.asm @@ -0,0 +1,197 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2015 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;;; +;;; gf_vect_mad_sse(len, vec, vec_i, mul_array, src, dest); +;;; + +%include "reg_sizes.asm" + +%ifidn __OUTPUT_FORMAT__, win64 + %define arg0 rcx + %define arg0.w ecx + %define arg1 rdx + %define arg2 r8 + %define arg3 r9 + %define arg4 r12 + %define arg5 r15 + %define tmp r11 + %define return rax + %define return.w eax + %define PS 8 + %define stack_size 16*3 + 3*8 + %define arg(x) [rsp + stack_size + PS + PS*x] + %define func(x) proc_frame x + +%macro FUNC_SAVE 0 + sub rsp, stack_size + movdqa [rsp+16*0],xmm6 + movdqa [rsp+16*1],xmm7 + movdqa [rsp+16*2],xmm8 + save_reg r12, 3*16 + 0*8 + save_reg r15, 3*16 + 1*8 + end_prolog + mov arg4, arg(4) + mov arg5, arg(5) +%endmacro + +%macro FUNC_RESTORE 0 + movdqa xmm6, [rsp+16*0] + movdqa xmm7, [rsp+16*1] + movdqa xmm8, [rsp+16*2] + mov r12, [rsp + 3*16 + 0*8] + mov r15, [rsp + 3*16 + 1*8] + add rsp, stack_size +%endmacro + +%elifidn __OUTPUT_FORMAT__, elf64 + %define arg0 rdi + %define arg0.w edi + %define arg1 rsi + %define arg2 rdx + %define arg3 rcx + %define arg4 r8 + %define arg5 r9 + %define tmp r11 + %define return rax + %define return.w eax + + %define func(x) x: + %define FUNC_SAVE + %define FUNC_RESTORE +%endif + +;;; gf_vect_mad_sse(len, vec, vec_i, mul_array, src, dest) +%define len arg0 +%define len.w arg0.w +%define vec arg1 +%define vec_i arg2 +%define mul_array arg3 +%define src arg4 +%define dest arg5 +%define pos return +%define pos.w return.w + +%ifndef EC_ALIGNED_ADDR +;;; Use Un-aligned load/store + %define XLDR movdqu + %define XSTR movdqu +%else +;;; Use Non-temporal load/stor + %ifdef NO_NT_LDST + %define XLDR movdqa + %define XSTR movdqa + %else + %define XLDR movntdqa + %define XSTR movntdq + %endif +%endif + +default rel + +[bits 64] +section .text + +%define xmask0f xmm8 +%define xgft_lo xmm7 +%define xgft_hi xmm6 + +%define x0 xmm0 +%define xtmpa xmm1 +%define xtmph xmm2 +%define xtmpl xmm3 +%define xd xmm4 +%define xtmpd xmm5 + + +align 16 +global gf_vect_mad_sse:ISAL_SYM_TYPE_FUNCTION +func(gf_vect_mad_sse) + FUNC_SAVE + sub len, 16 + jl .return_fail + + xor pos, pos + movdqa xmask0f, [mask0f] ;Load mask of lower nibble in each byte + sal vec_i, 5 ;Multiply by 32 + movdqu xgft_lo, [vec_i+mul_array] ;Load array Cx{00}, Cx{01}, Cx{02}, ... + movdqu xgft_hi, [vec_i+mul_array+16] ; " Cx{00}, Cx{10}, Cx{20}, ... , Cx{f0} + + XLDR xtmpd, [dest+len] ;backup the last 16 bytes in dest + +.loop16: + XLDR xd, [dest+pos] ;Get next dest vector +.loop16_overlap: + XLDR x0, [src+pos] ;Get next source vector + movdqa xtmph, xgft_hi ;Reload const array registers + movdqa xtmpl, xgft_lo + movdqa xtmpa, x0 ;Keep unshifted copy of src + psraw x0, 4 ;Shift to put high nibble into bits 4-0 + pand x0, xmask0f ;Mask high src nibble in bits 4-0 + pand xtmpa, xmask0f ;Mask low src nibble in bits 4-0 + pshufb xtmph, x0 ;Lookup mul table of high nibble + pshufb xtmpl, xtmpa ;Lookup mul table of low nibble + pxor xtmph, xtmpl ;GF add high and low partials + + pxor xd, xtmph + XSTR [dest+pos], xd ;Store result + + add pos, 16 ;Loop on 16 bytes at a time + cmp pos, len + jle .loop16 + + lea tmp, [len + 16] + cmp pos, tmp + je .return_pass + + ;; Tail len + mov pos, len ;Overlapped offset length-16 + movdqa xd, xtmpd ;Restore xd + jmp .loop16_overlap ;Do one more overlap pass + +.return_pass: + mov return, 0 + FUNC_RESTORE + ret + +.return_fail: + mov return, 1 + FUNC_RESTORE + ret + +endproc_frame + +section .data + +align 16 + +mask0f: dq 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f + +;;; func core, ver, snum +slversion gf_vect_mad_sse, 00, 01, 0200 diff --git a/src/spdk/isa-l/erasure_code/gf_vect_mad_test.c b/src/spdk/isa-l/erasure_code/gf_vect_mad_test.c new file mode 100644 index 000000000..cecbc1669 --- /dev/null +++ b/src/spdk/isa-l/erasure_code/gf_vect_mad_test.c @@ -0,0 +1,519 @@ +/********************************************************************** + Copyright(c) 2011-2015 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#include +#include +#include // for memset, memcmp +#include "erasure_code.h" +#include "types.h" + +#ifndef ALIGN_SIZE +# define ALIGN_SIZE 32 +#endif + +#ifndef FUNCTION_UNDER_TEST +//By default, test multi-binary version +# define FUNCTION_UNDER_TEST gf_vect_mad +# define REF_FUNCTION gf_vect_dot_prod +# define VECT 1 +#endif + +#ifndef TEST_MIN_SIZE +# define TEST_MIN_SIZE 64 +#endif + +#define str(s) #s +#define xstr(s) str(s) + +#define TEST_LEN 8192 +#define TEST_SIZE (TEST_LEN/2) +#define TEST_MEM TEST_SIZE +#define TEST_LOOPS 20000 +#define TEST_TYPE_STR "" + +#ifndef TEST_SOURCES +# define TEST_SOURCES 16 +#endif +#ifndef RANDOMS +# define RANDOMS 20 +#endif + +#ifdef EC_ALIGNED_ADDR +// Define power of 2 range to check ptr, len alignment +# define PTR_ALIGN_CHK_B 0 +# define LEN_ALIGN_CHK_B 0 // 0 for aligned only +#else +// Define power of 2 range to check ptr, len alignment +# define PTR_ALIGN_CHK_B ALIGN_SIZE +# define LEN_ALIGN_CHK_B ALIGN_SIZE // 0 for aligned only +#endif + +#define str(s) #s +#define xstr(s) str(s) + +typedef unsigned char u8; + +#if (VECT == 1) +# define LAST_ARG *dest +#else +# define LAST_ARG **dest +#endif + +extern void FUNCTION_UNDER_TEST(int len, int vec, int vec_i, unsigned char *gftbls, + unsigned char *src, unsigned char LAST_ARG); +extern void REF_FUNCTION(int len, int vlen, unsigned char *gftbls, unsigned char **src, + unsigned char LAST_ARG); + +void dump(unsigned char *buf, int len) +{ + int i; + for (i = 0; i < len;) { + printf(" %2x", 0xff & buf[i++]); + if (i % 32 == 0) + printf("\n"); + } + printf("\n"); +} + +void dump_matrix(unsigned char **s, int k, int m) +{ + int i, j; + for (i = 0; i < k; i++) { + for (j = 0; j < m; j++) { + printf(" %2x", s[i][j]); + } + printf("\n"); + } + printf("\n"); +} + +void dump_u8xu8(unsigned char *s, int k, int m) +{ + int i, j; + for (i = 0; i < k; i++) { + for (j = 0; j < m; j++) { + printf(" %2x", 0xff & s[j + (i * m)]); + } + printf("\n"); + } + printf("\n"); +} + +int main(int argc, char *argv[]) +{ + int i, j, rtest, srcs; + void *buf; + u8 gf[6][TEST_SOURCES]; + u8 *g_tbls; + u8 *dest_ref[VECT]; + u8 *dest_ptrs[VECT], *buffs[TEST_SOURCES]; + int vector = VECT; + + int align, size; + unsigned char *efence_buffs[TEST_SOURCES]; + unsigned int offset; + u8 *ubuffs[TEST_SOURCES]; + u8 *udest_ptrs[VECT]; + printf("test" xstr(FUNCTION_UNDER_TEST) ": %dx%d ", TEST_SOURCES, TEST_LEN); + + // Allocate the arrays + for (i = 0; i < TEST_SOURCES; i++) { + if (posix_memalign(&buf, 64, TEST_LEN)) { + printf("alloc error: Fail"); + return -1; + } + buffs[i] = buf; + } + + if (posix_memalign(&buf, 16, 2 * (vector * TEST_SOURCES * 32))) { + printf("alloc error: Fail"); + return -1; + } + g_tbls = buf; + + for (i = 0; i < vector; i++) { + if (posix_memalign(&buf, 64, TEST_LEN)) { + printf("alloc error: Fail"); + return -1; + } + dest_ptrs[i] = buf; + memset(dest_ptrs[i], 0, TEST_LEN); + } + + for (i = 0; i < vector; i++) { + if (posix_memalign(&buf, 64, TEST_LEN)) { + printf("alloc error: Fail"); + return -1; + } + dest_ref[i] = buf; + memset(dest_ref[i], 0, TEST_LEN); + } + + // Test of all zeros + for (i = 0; i < TEST_SOURCES; i++) + memset(buffs[i], 0, TEST_LEN); + + switch (vector) { + case 6: + memset(gf[5], 0xe6, TEST_SOURCES); + case 5: + memset(gf[4], 4, TEST_SOURCES); + case 4: + memset(gf[3], 9, TEST_SOURCES); + case 3: + memset(gf[2], 7, TEST_SOURCES); + case 2: + memset(gf[1], 1, TEST_SOURCES); + case 1: + memset(gf[0], 2, TEST_SOURCES); + break; + default: + return -1; + } + + for (i = 0; i < TEST_SOURCES; i++) + for (j = 0; j < TEST_LEN; j++) + buffs[i][j] = rand(); + + for (i = 0; i < vector; i++) + for (j = 0; j < TEST_SOURCES; j++) { + gf[i][j] = rand(); + gf_vect_mul_init(gf[i][j], &g_tbls[i * (32 * TEST_SOURCES) + j * 32]); + } + + for (i = 0; i < vector; i++) + gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[i * 32 * TEST_SOURCES], + buffs, dest_ref[i]); + + for (i = 0; i < vector; i++) + memset(dest_ptrs[i], 0, TEST_LEN); + for (i = 0; i < TEST_SOURCES; i++) { +#if (VECT == 1) + FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, i, g_tbls, buffs[i], *dest_ptrs); +#else + FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, i, g_tbls, buffs[i], dest_ptrs); +#endif + } + for (i = 0; i < vector; i++) { + if (0 != memcmp(dest_ref[i], dest_ptrs[i], TEST_LEN)) { + printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test%d\n", i); + dump_matrix(buffs, vector, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref[i], 25); + printf("dprod_dut:"); + dump(dest_ptrs[i], 25); + return -1; + } + } + +#if (VECT == 1) + REF_FUNCTION(TEST_LEN, TEST_SOURCES, g_tbls, buffs, *dest_ref); +#else + REF_FUNCTION(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest_ref); +#endif + for (i = 0; i < vector; i++) { + if (0 != memcmp(dest_ref[i], dest_ptrs[i], TEST_LEN)) { + printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test%d\n", i); + dump_matrix(buffs, vector, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref[i], 25); + printf("dprod_dut:"); + dump(dest_ptrs[i], 25); + return -1; + } + } + + putchar('.'); + + // Rand data test + + for (rtest = 0; rtest < RANDOMS; rtest++) { + for (i = 0; i < TEST_SOURCES; i++) + for (j = 0; j < TEST_LEN; j++) + buffs[i][j] = rand(); + + for (i = 0; i < vector; i++) + for (j = 0; j < TEST_SOURCES; j++) { + gf[i][j] = rand(); + gf_vect_mul_init(gf[i][j], + &g_tbls[i * (32 * TEST_SOURCES) + j * 32]); + } + + for (i = 0; i < vector; i++) + gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, + &g_tbls[i * 32 * TEST_SOURCES], buffs, + dest_ref[i]); + + for (i = 0; i < vector; i++) + memset(dest_ptrs[i], 0, TEST_LEN); + for (i = 0; i < TEST_SOURCES; i++) { +#if (VECT == 1) + FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, i, g_tbls, buffs[i], + *dest_ptrs); +#else + FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, i, g_tbls, buffs[i], + dest_ptrs); +#endif + } + for (i = 0; i < vector; i++) { + if (0 != memcmp(dest_ref[i], dest_ptrs[i], TEST_LEN)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test%d %d\n", + i, rtest); + dump_matrix(buffs, vector, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref[i], 25); + printf("dprod_dut:"); + dump(dest_ptrs[i], 25); + return -1; + } + } + + putchar('.'); + } + + // Rand data test with varied parameters + for (rtest = 0; rtest < RANDOMS; rtest++) { + for (srcs = TEST_SOURCES; srcs > 0; srcs--) { + for (i = 0; i < srcs; i++) + for (j = 0; j < TEST_LEN; j++) + buffs[i][j] = rand(); + + for (i = 0; i < vector; i++) + for (j = 0; j < srcs; j++) { + gf[i][j] = rand(); + gf_vect_mul_init(gf[i][j], + &g_tbls[i * (32 * srcs) + j * 32]); + } + + for (i = 0; i < vector; i++) + gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[i * 32 * srcs], + buffs, dest_ref[i]); + + for (i = 0; i < vector; i++) + memset(dest_ptrs[i], 0, TEST_LEN); + for (i = 0; i < srcs; i++) { +#if (VECT == 1) + FUNCTION_UNDER_TEST(TEST_LEN, srcs, i, g_tbls, buffs[i], + *dest_ptrs); +#else + FUNCTION_UNDER_TEST(TEST_LEN, srcs, i, g_tbls, buffs[i], + dest_ptrs); +#endif + + } + for (i = 0; i < vector; i++) { + if (0 != memcmp(dest_ref[i], dest_ptrs[i], TEST_LEN)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) + " test%d srcs=%d\n", i, srcs); + dump_matrix(buffs, vector, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref[i], 25); + printf("dprod_dut:"); + dump(dest_ptrs[i], 25); + return -1; + } + } + + putchar('.'); + } + } + + // Run tests at end of buffer for Electric Fence + align = (LEN_ALIGN_CHK_B != 0) ? 1 : ALIGN_SIZE; + for (size = TEST_MIN_SIZE; size <= TEST_SIZE; size += align) { + for (i = 0; i < TEST_SOURCES; i++) + for (j = 0; j < TEST_LEN; j++) + buffs[i][j] = rand(); + + for (i = 0; i < TEST_SOURCES; i++) // Line up TEST_SIZE from end + efence_buffs[i] = buffs[i] + TEST_LEN - size; + + for (i = 0; i < vector; i++) + for (j = 0; j < TEST_SOURCES; j++) { + gf[i][j] = rand(); + gf_vect_mul_init(gf[i][j], + &g_tbls[i * (32 * TEST_SOURCES) + j * 32]); + } + + for (i = 0; i < vector; i++) + gf_vect_dot_prod_base(size, TEST_SOURCES, + &g_tbls[i * 32 * TEST_SOURCES], efence_buffs, + dest_ref[i]); + + for (i = 0; i < vector; i++) + memset(dest_ptrs[i], 0, size); + for (i = 0; i < TEST_SOURCES; i++) { +#if (VECT == 1) + FUNCTION_UNDER_TEST(size, TEST_SOURCES, i, g_tbls, efence_buffs[i], + *dest_ptrs); +#else + FUNCTION_UNDER_TEST(size, TEST_SOURCES, i, g_tbls, efence_buffs[i], + dest_ptrs); +#endif + } + for (i = 0; i < vector; i++) { + if (0 != memcmp(dest_ref[i], dest_ptrs[i], size)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) + " test%d size=%d\n", i, size); + dump_matrix(buffs, vector, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref[i], TEST_MIN_SIZE + align); + printf("dprod_dut:"); + dump(dest_ptrs[i], TEST_MIN_SIZE + align); + return -1; + } + } + + putchar('.'); + } + + // Test rand ptr alignment if available + + for (rtest = 0; rtest < RANDOMS; rtest++) { + size = (TEST_LEN - PTR_ALIGN_CHK_B) & ~(TEST_MIN_SIZE - 1); + srcs = rand() % TEST_SOURCES; + if (srcs == 0) + continue; + + offset = (PTR_ALIGN_CHK_B != 0) ? 1 : PTR_ALIGN_CHK_B; + // Add random offsets + for (i = 0; i < srcs; i++) + ubuffs[i] = buffs[i] + (rand() & (PTR_ALIGN_CHK_B - offset)); + + for (i = 0; i < vector; i++) { + udest_ptrs[i] = dest_ptrs[i] + (rand() & (PTR_ALIGN_CHK_B - offset)); + memset(dest_ptrs[i], 0, TEST_LEN); // zero pad to check write-over + } + + for (i = 0; i < srcs; i++) + for (j = 0; j < size; j++) + ubuffs[i][j] = rand(); + + for (i = 0; i < vector; i++) + for (j = 0; j < srcs; j++) { + gf[i][j] = rand(); + gf_vect_mul_init(gf[i][j], &g_tbls[i * (32 * srcs) + j * 32]); + } + + for (i = 0; i < vector; i++) + gf_vect_dot_prod_base(size, srcs, &g_tbls[i * 32 * srcs], ubuffs, + dest_ref[i]); + + for (i = 0; i < srcs; i++) { +#if (VECT == 1) + FUNCTION_UNDER_TEST(size, srcs, i, g_tbls, ubuffs[i], *udest_ptrs); +#else + FUNCTION_UNDER_TEST(size, srcs, i, g_tbls, ubuffs[i], udest_ptrs); +#endif + } + for (i = 0; i < vector; i++) { + if (0 != memcmp(dest_ref[i], udest_ptrs[i], size)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) + " test%d ualign srcs=%d\n", i, srcs); + dump_matrix(buffs, vector, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref[i], 25); + printf("dprod_dut:"); + dump(udest_ptrs[i], 25); + return -1; + } + } + + // Confirm that padding around dests is unchanged + memset(dest_ref[0], 0, PTR_ALIGN_CHK_B); // Make reference zero buff + + for (i = 0; i < vector; i++) { + offset = udest_ptrs[i] - dest_ptrs[i]; + if (memcmp(dest_ptrs[i], dest_ref[0], offset)) { + printf("Fail rand ualign pad1 start\n"); + return -1; + } + if (memcmp + (dest_ptrs[i] + offset + size, dest_ref[0], + PTR_ALIGN_CHK_B - offset)) { + printf("Fail rand ualign pad1 end\n"); + return -1; + } + } + + putchar('.'); + } + + // Test all size alignment + align = (LEN_ALIGN_CHK_B != 0) ? 1 : ALIGN_SIZE; + + for (size = TEST_LEN; size >= TEST_MIN_SIZE; size -= align) { + for (i = 0; i < TEST_SOURCES; i++) + for (j = 0; j < size; j++) + buffs[i][j] = rand(); + + for (i = 0; i < vector; i++) { + for (j = 0; j < TEST_SOURCES; j++) { + gf[i][j] = rand(); + gf_vect_mul_init(gf[i][j], + &g_tbls[i * (32 * TEST_SOURCES) + j * 32]); + } + memset(dest_ptrs[i], 0, TEST_LEN); // zero pad to check write-over + } + + for (i = 0; i < vector; i++) + gf_vect_dot_prod_base(size, TEST_SOURCES, + &g_tbls[i * 32 * TEST_SOURCES], buffs, + dest_ref[i]); + + for (i = 0; i < TEST_SOURCES; i++) { +#if (VECT == 1) + FUNCTION_UNDER_TEST(size, TEST_SOURCES, i, g_tbls, buffs[i], + *dest_ptrs); +#else + FUNCTION_UNDER_TEST(size, TEST_SOURCES, i, g_tbls, buffs[i], + dest_ptrs); +#endif + } + for (i = 0; i < vector; i++) { + if (0 != memcmp(dest_ref[i], dest_ptrs[i], size)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) + " test%d ualign len=%d\n", i, size); + dump_matrix(buffs, vector, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref[i], 25); + printf("dprod_dut:"); + dump(dest_ptrs[i], 25); + return -1; + } + } + + putchar('.'); + + } + + printf("Pass\n"); + return 0; + +} diff --git a/src/spdk/isa-l/erasure_code/gf_vect_mul_avx.asm b/src/spdk/isa-l/erasure_code/gf_vect_mul_avx.asm new file mode 100644 index 000000000..c1a9b9799 --- /dev/null +++ b/src/spdk/isa-l/erasure_code/gf_vect_mul_avx.asm @@ -0,0 +1,164 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2015 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;;; +;;; gf_vect_mul_avx(len, mul_array, src, dest) +;;; + +%include "reg_sizes.asm" + +%ifidn __OUTPUT_FORMAT__, elf64 + %define arg0 rdi + %define arg1 rsi + %define arg2 rdx + %define arg3 rcx + %define arg4 r8 + %define arg5 r9 + %define tmp r11 + %define return rax + %define func(x) x: + %define FUNC_SAVE + %define FUNC_RESTORE + +%elifidn __OUTPUT_FORMAT__, win64 + %define arg0 rcx + %define arg1 rdx + %define arg2 r8 + %define arg3 r9 + %define return rax + %define stack_size 5*16 + 8 ; must be an odd multiple of 8 + %define func(x) proc_frame x + %macro FUNC_SAVE 0 + alloc_stack stack_size + save_xmm128 xmm6, 0*16 + save_xmm128 xmm7, 1*16 + save_xmm128 xmm13, 2*16 + save_xmm128 xmm14, 3*16 + save_xmm128 xmm15, 4*16 + end_prolog + %endmacro + + %macro FUNC_RESTORE 0 + vmovdqa xmm6, [rsp + 0*16] + vmovdqa xmm7, [rsp + 1*16] + vmovdqa xmm13, [rsp + 2*16] + vmovdqa xmm14, [rsp + 3*16] + vmovdqa xmm15, [rsp + 4*16] + add rsp, stack_size + %endmacro + +%endif + + +%define len arg0 +%define mul_array arg1 +%define src arg2 +%define dest arg3 +%define pos return + + +;;; Use Non-temporal load/stor +%ifdef NO_NT_LDST + %define XLDR vmovdqa + %define XSTR vmovdqa +%else + %define XLDR vmovntdqa + %define XSTR vmovntdq +%endif + +default rel + +[bits 64] +section .text + +%define xmask0f xmm15 +%define xgft_lo xmm14 +%define xgft_hi xmm13 + +%define x0 xmm0 +%define xtmp1a xmm1 +%define xtmp1b xmm2 +%define xtmp1c xmm3 +%define x1 xmm4 +%define xtmp2a xmm5 +%define xtmp2b xmm6 +%define xtmp2c xmm7 + +align 16 +global gf_vect_mul_avx:ISAL_SYM_TYPE_FUNCTION +func(gf_vect_mul_avx) + FUNC_SAVE + mov pos, 0 + vmovdqa xmask0f, [mask0f] ;Load mask of lower nibble in each byte + vmovdqu xgft_lo, [mul_array] ;Load array Cx{00}, Cx{01}, Cx{02}, ... + vmovdqu xgft_hi, [mul_array+16] ; " Cx{00}, Cx{10}, Cx{20}, ... , Cx{f0} + +loop32: + XLDR x0, [src+pos] ;Get next source vector + XLDR x1, [src+pos+16] ;Get next source vector + 16B ahead + add pos, 32 ;Loop on 16 bytes at a time + cmp pos, len + vpand xtmp1a, x0, xmask0f ;Mask low src nibble in bits 4-0 + vpand xtmp2a, x1, xmask0f + vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0 + vpsraw x1, x1, 4 + vpand x0, x0, xmask0f ;Mask high src nibble in bits 4-0 + vpand x1, x1, xmask0f + vpshufb xtmp1b, xgft_hi, x0 ;Lookup mul table of high nibble + vpshufb xtmp1c, xgft_lo, xtmp1a ;Lookup mul table of low nibble + vpshufb xtmp2b, xgft_hi, x1 ;Lookup mul table of high nibble + vpshufb xtmp2c, xgft_lo, xtmp2a ;Lookup mul table of low nibble + vpxor xtmp1b, xtmp1b, xtmp1c ;GF add high and low partials + vpxor xtmp2b, xtmp2b, xtmp2c + XSTR [dest+pos-32], xtmp1b ;Store result + XSTR [dest+pos-16], xtmp2b ;Store +16B result + jl loop32 + + +return_pass: + FUNC_RESTORE + sub pos, len + ret + +return_fail: + FUNC_RESTORE + mov return, 1 + ret + +endproc_frame + +section .data + +align 16 + +mask0f: +dq 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f + +;;; func core, ver, snum +slversion gf_vect_mul_avx, 01, 03, 0036 diff --git a/src/spdk/isa-l/erasure_code/gf_vect_mul_base_test.c b/src/spdk/isa-l/erasure_code/gf_vect_mul_base_test.c new file mode 100644 index 000000000..c47d2365d --- /dev/null +++ b/src/spdk/isa-l/erasure_code/gf_vect_mul_base_test.c @@ -0,0 +1,129 @@ +/********************************************************************** + Copyright(c) 2011-2015 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#include +#include +#include // for memset +#include "erasure_code.h" + +#define TEST_SIZE 8192 +#define TEST_MEM TEST_SIZE +#define TEST_LOOPS 100000 +#define TEST_TYPE_STR "" + +typedef unsigned char u8; + +int main(int argc, char *argv[]) +{ + int i; + u8 *buff1, *buff2, *buff3, gf_const_tbl[64], a = 2; + int align, size; + unsigned char *efence_buff1; + unsigned char *efence_buff2; + + printf("gf_vect_mul_base_test:\n"); + + gf_vect_mul_init(a, gf_const_tbl); + + buff1 = (u8 *) malloc(TEST_SIZE); + buff2 = (u8 *) malloc(TEST_SIZE); + buff3 = (u8 *) malloc(TEST_SIZE); + + if (NULL == buff1 || NULL == buff2 || NULL == buff3) { + printf("buffer alloc error\n"); + return -1; + } + // Fill with rand data + for (i = 0; i < TEST_SIZE; i++) + buff1[i] = rand(); + + gf_vect_mul_base(TEST_SIZE, gf_const_tbl, buff1, buff2); + + for (i = 0; i < TEST_SIZE; i++) + if (gf_mul(a, buff1[i]) != buff2[i]) { + printf("fail at %d, 0x%x x 2 = 0x%x (0x%x)\n", i, buff1[i], buff2[i], + gf_mul(2, buff1[i])); + return 1; + } + + gf_vect_mul_base(TEST_SIZE, gf_const_tbl, buff1, buff3); + + // Check reference function + for (i = 0; i < TEST_SIZE; i++) + if (buff2[i] != buff3[i]) { + printf("fail at %d, 0x%x x 0x%d = 0x%x (0x%x)\n", + i, a, buff1[i], buff2[i], gf_mul(a, buff1[i])); + return 1; + } + + for (i = 0; i < TEST_SIZE; i++) + buff1[i] = rand(); + + // Check each possible constant + printf("Random tests "); + for (a = 0; a != 255; a++) { + gf_vect_mul_init(a, gf_const_tbl); + gf_vect_mul_base(TEST_SIZE, gf_const_tbl, buff1, buff2); + + for (i = 0; i < TEST_SIZE; i++) + if (gf_mul(a, buff1[i]) != buff2[i]) { + printf("fail at %d, 0x%x x %d = 0x%x (0x%x)\n", + i, a, buff1[i], buff2[i], gf_mul(2, buff1[i])); + return 1; + } + putchar('.'); + } + + // Run tests at end of buffer for Electric Fence + align = 32; + a = 2; + + gf_vect_mul_init(a, gf_const_tbl); + for (size = 0; size < TEST_SIZE; size += align) { + // Line up TEST_SIZE from end + efence_buff1 = buff1 + size; + efence_buff2 = buff2 + size; + + gf_vect_mul_base(TEST_SIZE - size, gf_const_tbl, efence_buff1, efence_buff2); + + for (i = 0; i < TEST_SIZE - size; i++) + if (gf_mul(a, efence_buff1[i]) != efence_buff2[i]) { + printf("fail at %d, 0x%x x 2 = 0x%x (0x%x)\n", + i, efence_buff1[i], efence_buff2[i], gf_mul(2, + efence_buff1 + [i])); + return 1; + } + + putchar('.'); + } + + printf(" done: Pass\n"); + return 0; +} diff --git a/src/spdk/isa-l/erasure_code/gf_vect_mul_perf.c b/src/spdk/isa-l/erasure_code/gf_vect_mul_perf.c new file mode 100644 index 000000000..58194cceb --- /dev/null +++ b/src/spdk/isa-l/erasure_code/gf_vect_mul_perf.c @@ -0,0 +1,90 @@ +/********************************************************************** + Copyright(c) 2011-2015 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#include +#include +#include // for memset +#include "erasure_code.h" +#include "test.h" + +//#define CACHED_TEST +#ifdef CACHED_TEST +// Cached test, loop many times over small dataset +# define TEST_LEN 8*1024 +# define TEST_TYPE_STR "_warm" +#else +# ifndef TEST_CUSTOM +// Uncached test. Pull from large mem base. +# define TEST_SOURCES 10 +# define GT_L3_CACHE 32*1024*1024 /* some number > last level cache */ +# define TEST_LEN GT_L3_CACHE / 2 +# define TEST_TYPE_STR "_cold" +# else +# define TEST_TYPE_STR "_cus" +# endif +#endif + +#define TEST_MEM (2 * TEST_LEN) + +typedef unsigned char u8; + +void gf_vect_mul_perf(u8 a, u8 * gf_const_tbl, u8 * buff1, u8 * buff2) +{ + gf_vect_mul_init(a, gf_const_tbl); + gf_vect_mul(TEST_LEN, gf_const_tbl, buff1, buff2); +} + +int main(int argc, char *argv[]) +{ + u8 *buff1, *buff2, gf_const_tbl[64], a = 2; + struct perf start; + + printf("gf_vect_mul_perf:\n"); + + // Allocate large mem region + buff1 = (u8 *) malloc(TEST_LEN); + buff2 = (u8 *) malloc(TEST_LEN); + if (NULL == buff1 || NULL == buff2) { + printf("Failed to allocate %dB\n", TEST_LEN); + return 1; + } + + memset(buff1, 0, TEST_LEN); + memset(buff2, 0, TEST_LEN); + + printf("Start timed tests\n"); + fflush(0); + + BENCHMARK(&start, BENCHMARK_TIME, gf_vect_mul_perf(a, gf_const_tbl, buff1, buff2)); + + printf("gf_vect_mul" TEST_TYPE_STR ": "); + perf_print(start, (long long)TEST_LEN); + + return 0; +} diff --git a/src/spdk/isa-l/erasure_code/gf_vect_mul_sse.asm b/src/spdk/isa-l/erasure_code/gf_vect_mul_sse.asm new file mode 100644 index 000000000..36323d639 --- /dev/null +++ b/src/spdk/isa-l/erasure_code/gf_vect_mul_sse.asm @@ -0,0 +1,170 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2015 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;;; +;;; gf_vect_mul_sse(len, mul_array, src, dest) +;;; + +%include "reg_sizes.asm" + +%ifidn __OUTPUT_FORMAT__, elf64 + %define arg0 rdi + %define arg1 rsi + %define arg2 rdx + %define arg3 rcx + %define arg4 r8 + %define arg5 r9 + %define tmp r11 + %define return rax + %define func(x) x: + %define FUNC_SAVE + %define FUNC_RESTORE + +%elifidn __OUTPUT_FORMAT__, win64 + %define arg0 rcx + %define arg1 rdx + %define arg2 r8 + %define arg3 r9 + %define return rax + %define stack_size 5*16 + 8 ; must be an odd multiple of 8 + %define func(x) proc_frame x + %macro FUNC_SAVE 0 + alloc_stack stack_size + save_xmm128 xmm6, 0*16 + save_xmm128 xmm7, 1*16 + save_xmm128 xmm13, 2*16 + save_xmm128 xmm14, 3*16 + save_xmm128 xmm15, 4*16 + end_prolog + %endmacro + + %macro FUNC_RESTORE 0 + movdqa xmm6, [rsp + 0*16] + movdqa xmm7, [rsp + 1*16] + movdqa xmm13, [rsp + 2*16] + movdqa xmm14, [rsp + 3*16] + movdqa xmm15, [rsp + 4*16] + add rsp, stack_size + %endmacro + +%endif + + +%define len arg0 +%define mul_array arg1 +%define src arg2 +%define dest arg3 +%define pos return + + +;;; Use Non-temporal load/stor +%ifdef NO_NT_LDST + %define XLDR movdqa + %define XSTR movdqa +%else + %define XLDR movntdqa + %define XSTR movntdq +%endif + +default rel + +[bits 64] +section .text + +%define xmask0f xmm15 +%define xgft_lo xmm14 +%define xgft_hi xmm13 + +%define x0 xmm0 +%define xtmp1a xmm1 +%define xtmp1b xmm2 +%define xtmp1c xmm3 +%define x1 xmm4 +%define xtmp2a xmm5 +%define xtmp2b xmm6 +%define xtmp2c xmm7 + + +align 16 +global gf_vect_mul_sse:ISAL_SYM_TYPE_FUNCTION +func(gf_vect_mul_sse) + FUNC_SAVE + mov pos, 0 + movdqa xmask0f, [mask0f] ;Load mask of lower nibble in each byte + movdqu xgft_lo, [mul_array] ;Load array Cx{00}, Cx{01}, Cx{02}, ... + movdqu xgft_hi, [mul_array+16] ; " Cx{00}, Cx{10}, Cx{20}, ... , Cx{f0} + +loop32: + XLDR x0, [src+pos] ;Get next source vector + XLDR x1, [src+pos+16] ;Get next source vector + 16B ahead + movdqa xtmp1b, xgft_hi ;Reload const array registers + movdqa xtmp1c, xgft_lo + movdqa xtmp2b, xgft_hi + movdqa xtmp2c, xgft_lo + movdqa xtmp1a, x0 ;Keep unshifted copy of src + movdqa xtmp2a, x1 + psraw x0, 4 ;Shift to put high nibble into bits 4-0 + psraw x1, 4 + pand xtmp1a, xmask0f ;Mask low src nibble in bits 4-0 + pand xtmp2a, xmask0f + pand x0, xmask0f ;Mask high src nibble in bits 4-0 + pand x1, xmask0f + pshufb xtmp1b, x0 ;Lookup mul table of high nibble + pshufb xtmp1c, xtmp1a ;Lookup mul table of low nibble + pshufb xtmp2b, x1 + pshufb xtmp2c, xtmp2a + pxor xtmp1b, xtmp1c ;GF add high and low partials + pxor xtmp2b, xtmp2c + XSTR [dest+pos], xtmp1b ;Store result + XSTR [dest+pos+16], xtmp2b ;Store +16B result + add pos, 32 ;Loop on 32 bytes at at time + cmp pos, len + jl loop32 + + +return_pass: + sub pos, len + FUNC_RESTORE + ret + +return_fail: + mov return, 1 + FUNC_RESTORE + ret + +endproc_frame + +section .data + +align 16 +mask0f: +dq 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f + +;;; func core, ver, snum +slversion gf_vect_mul_sse, 00, 03, 0034 diff --git a/src/spdk/isa-l/erasure_code/gf_vect_mul_test.c b/src/spdk/isa-l/erasure_code/gf_vect_mul_test.c new file mode 100644 index 000000000..b1a406624 --- /dev/null +++ b/src/spdk/isa-l/erasure_code/gf_vect_mul_test.c @@ -0,0 +1,158 @@ +/********************************************************************** + Copyright(c) 2011-2015 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#include +#include +#include "erasure_code.h" + +#define TEST_SIZE (128*1024) + +typedef unsigned char u8; + +int main(int argc, char *argv[]) +{ + int i; + u8 *buff1, *buff2, *buff3, gf_const_tbl[64], a = 2; + int tsize; + int align, size; + unsigned char *efence_buff1; + unsigned char *efence_buff2; + unsigned char *efence_buff3; + + printf("gf_vect_mul_test: "); + + gf_vect_mul_init(a, gf_const_tbl); + + buff1 = (u8 *) malloc(TEST_SIZE); + buff2 = (u8 *) malloc(TEST_SIZE); + buff3 = (u8 *) malloc(TEST_SIZE); + + if (NULL == buff1 || NULL == buff2 || NULL == buff3) { + printf("buffer alloc error\n"); + return -1; + } + // Fill with rand data + for (i = 0; i < TEST_SIZE; i++) + buff1[i] = rand(); + + gf_vect_mul(TEST_SIZE, gf_const_tbl, buff1, buff2); + + for (i = 0; i < TEST_SIZE; i++) { + if (gf_mul(a, buff1[i]) != buff2[i]) { + printf("fail at %d, 0x%x x 2 = 0x%x (0x%x)\n", i, + buff1[i], buff2[i], gf_mul(2, buff1[i])); + return -1; + } + } + + gf_vect_mul_base(TEST_SIZE, gf_const_tbl, buff1, buff3); + + // Check reference function + for (i = 0; i < TEST_SIZE; i++) { + if (buff2[i] != buff3[i]) { + printf("fail at %d, 0x%x x 0x%d = 0x%x (0x%x)\n", + i, a, buff1[i], buff2[i], gf_mul(a, buff1[i])); + return -1; + } + } + + for (i = 0; i < TEST_SIZE; i++) + buff1[i] = rand(); + + // Check each possible constant + for (a = 0; a != 255; a++) { + gf_vect_mul_init(a, gf_const_tbl); + gf_vect_mul(TEST_SIZE, gf_const_tbl, buff1, buff2); + + for (i = 0; i < TEST_SIZE; i++) + if (gf_mul(a, buff1[i]) != buff2[i]) { + printf("fail at %d, 0x%x x %d = 0x%x (0x%x)\n", + i, a, buff1[i], buff2[i], gf_mul(2, buff1[i])); + return -1; + } + putchar('.'); + } + + // Check buffer len + for (tsize = TEST_SIZE; tsize > 0; tsize -= 32) { + a = rand(); + gf_vect_mul_init(a, gf_const_tbl); + gf_vect_mul(tsize, gf_const_tbl, buff1, buff2); + + for (i = 0; i < tsize; i++) + if (gf_mul(a, buff1[i]) != buff2[i]) { + printf("fail at %d, 0x%x x %d = 0x%x (0x%x)\n", + i, a, buff1[i], buff2[i], gf_mul(2, buff1[i])); + return -1; + } + if (0 == tsize % (32 * 8)) { + putchar('.'); + fflush(0); + } + } + + // Run tests at end of buffer for Electric Fence + align = 32; + a = 2; + + gf_vect_mul_init(a, gf_const_tbl); + for (size = 0; size < TEST_SIZE; size += align) { + // Line up TEST_SIZE from end + efence_buff1 = buff1 + size; + efence_buff2 = buff2 + size; + efence_buff3 = buff3 + size; + + gf_vect_mul(TEST_SIZE - size, gf_const_tbl, efence_buff1, efence_buff2); + + for (i = 0; i < TEST_SIZE - size; i++) + if (gf_mul(a, efence_buff1[i]) != efence_buff2[i]) { + printf("fail at %d, 0x%x x 2 = 0x%x (0x%x)\n", + i, efence_buff1[i], efence_buff2[i], + gf_mul(2, efence_buff1[i])); + return 1; + } + + gf_vect_mul_base(TEST_SIZE - size, gf_const_tbl, efence_buff1, efence_buff3); + + // Check reference function + for (i = 0; i < TEST_SIZE - size; i++) + if (efence_buff2[i] != efence_buff3[i]) { + printf("fail at %d, 0x%x x 0x%d = 0x%x (0x%x)\n", + i, a, efence_buff2[i], efence_buff3[i], + gf_mul(2, efence_buff1[i])); + return 1; + } + + putchar('.'); + } + + printf(" done: Pass\n"); + fflush(0); + return 0; +} diff --git a/src/spdk/isa-l/examples/ec/Makefile.am b/src/spdk/isa-l/examples/ec/Makefile.am new file mode 100644 index 000000000..e7121af83 --- /dev/null +++ b/src/spdk/isa-l/examples/ec/Makefile.am @@ -0,0 +1,33 @@ +######################################################################## +# Copyright(c) 2011-2018 Intel Corporation All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +######################################################################## + +src_include += -I $(srcdir)/examples/ec + +examples += examples/ec/ec_simple_example +examples += examples/ec/ec_piggyback_example diff --git a/src/spdk/isa-l/examples/ec/Makefile.unx b/src/spdk/isa-l/examples/ec/Makefile.unx new file mode 100644 index 000000000..b04cfdfe6 --- /dev/null +++ b/src/spdk/isa-l/examples/ec/Makefile.unx @@ -0,0 +1,8 @@ + +default: ex + +include ../../erasure_code/Makefile.am +include Makefile.am +include ../../make.inc + +VPATH = . ../../erasure_code ../../include diff --git a/src/spdk/isa-l/examples/ec/ec_piggyback_example.c b/src/spdk/isa-l/examples/ec/ec_piggyback_example.c new file mode 100644 index 000000000..e19abc067 --- /dev/null +++ b/src/spdk/isa-l/examples/ec/ec_piggyback_example.c @@ -0,0 +1,506 @@ +/********************************************************************** + Copyright(c) 2011-2018 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#include +#include +#include +#include +#include "erasure_code.h" // use instead when linking against installed +#include "test.h" + +#define MMAX 255 +#define KMAX 255 + +typedef unsigned char u8; +int verbose = 0; + +int usage(void) +{ + fprintf(stderr, + "Usage: ec_piggyback_example [options]\n" + " -h Help\n" + " -k Number of source fragments\n" + " -p Number of parity fragments\n" + " -l Length of fragments\n" + " -e Simulate erasure on frag index val. Zero based. Can be repeated.\n" + " -v Verbose\n" + " -b Run timed benchmark\n" + " -s Toggle use of sparse matrix opt\n" + " -r Pick random (k, p) with seed\n"); + exit(0); +} + +// Cauchy-based matrix +void gf_gen_full_pb_cauchy_matrix(u8 * a, int m, int k) +{ + int i, j, p = m - k; + + // Identity matrix in top k x k to indicate a symetric code + memset(a, 0, k * m); + for (i = 0; i < k; i++) + a[k * i + i] = 1; + + for (i = k; i < (k + p / 2); i++) { + for (j = 0; j < k / 2; j++) + a[k * i + j] = gf_inv(i ^ j); + for (; j < k; j++) + a[k * i + j] = 0; + } + for (; i < m; i++) { + for (j = 0; j < k / 2; j++) + a[k * i + j] = 0; + for (; j < k; j++) + a[k * i + j] = gf_inv((i - p / 2) ^ (j - k / 2)); + } + + // Fill in mixture of B parity depending on a few localized A sources + int r = 0, c = 0; + int repeat_len = k / (p - 2); + int parity_rows = p / 2; + + for (i = 1 + k + parity_rows; i < m; i++, r++) { + if (r == (parity_rows - 1) - ((k / 2 % (parity_rows - 1)))) + repeat_len++; + + for (j = 0; j < repeat_len; j++, c++) + a[k * i + c] = gf_inv((k + 1) ^ c); + } +} + +// Vandermonde based matrix - not recommended due to limits when invertable +void gf_gen_full_pb_vand_matrix(u8 * a, int m, int k) +{ + int i, j, p = m - k; + unsigned char q, gen = 1; + + // Identity matrix in top k x k to indicate a symetric code + memset(a, 0, k * m); + for (i = 0; i < k; i++) + a[k * i + i] = 1; + + for (i = k; i < (k + (p / 2)); i++) { + q = 1; + for (j = 0; j < k / 2; j++) { + a[k * i + j] = q; + q = gf_mul(q, gen); + } + for (; j < k; j++) + a[k * i + j] = 0; + gen = gf_mul(gen, 2); + } + gen = 1; + for (; i < m; i++) { + q = 1; + for (j = 0; j < k / 2; j++) { + a[k * i + j] = 0; + } + for (; j < k; j++) { + a[k * i + j] = q; + q = gf_mul(q, gen); + } + gen = gf_mul(gen, 2); + } + + // Fill in mixture of B parity depending on a few localized A sources + int r = 0, c = 0; + int repeat_len = k / (p - 2); + int parity_rows = p / 2; + + for (i = 1 + k + parity_rows; i < m; i++, r++) { + if (r == (parity_rows - 1) - ((k / 2 % (parity_rows - 1)))) + repeat_len++; + + for (j = 0; j < repeat_len; j++) + a[k * i + c++] = 1; + } +} + +void print_matrix(int m, int k, unsigned char *s, const char *msg) +{ + int i, j; + + printf("%s:\n", msg); + for (i = 0; i < m; i++) { + printf("%3d- ", i); + for (j = 0; j < k; j++) { + printf(" %2x", 0xff & s[j + (i * k)]); + } + printf("\n"); + } + printf("\n"); +} + +void print_list(int n, unsigned char *s, const char *msg) +{ + int i; + if (!verbose) + return; + + printf("%s: ", msg); + for (i = 0; i < n; i++) + printf(" %d", s[i]); + printf("\n"); +} + +static int gf_gen_decode_matrix(u8 * encode_matrix, + u8 * decode_matrix, + u8 * invert_matrix, + u8 * temp_matrix, + u8 * decode_index, + u8 * frag_err_list, int nerrs, int k, int m); + +int main(int argc, char *argv[]) +{ + int i, j, m, c, e, ret; + int k = 10, p = 4, len = 8 * 1024; // Default params + int nerrs = 0; + int benchmark = 0; + int sparse_matrix_opt = 1; + + // Fragment buffer pointers + u8 *frag_ptrs[MMAX]; + u8 *parity_ptrs[KMAX]; + u8 *recover_srcs[KMAX]; + u8 *recover_outp[KMAX]; + u8 frag_err_list[MMAX]; + + // Coefficient matrices + u8 *encode_matrix, *decode_matrix; + u8 *invert_matrix, *temp_matrix; + u8 *g_tbls; + u8 decode_index[MMAX]; + + if (argc == 1) + for (i = 0; i < p; i++) + frag_err_list[nerrs++] = rand() % (k + p); + + while ((c = getopt(argc, argv, "k:p:l:e:r:hvbs")) != -1) { + switch (c) { + case 'k': + k = atoi(optarg); + break; + case 'p': + p = atoi(optarg); + break; + case 'l': + len = atoi(optarg); + if (len < 0) + usage(); + break; + case 'e': + e = atoi(optarg); + frag_err_list[nerrs++] = e; + break; + case 'r': + srand(atoi(optarg)); + k = (rand() % MMAX) / 4; + k = (k < 2) ? 2 : k; + p = (rand() % (MMAX - k)) / 4; + p = (p < 2) ? 2 : p; + for (i = 0; i < k && nerrs < p; i++) + if (rand() & 1) + frag_err_list[nerrs++] = i; + break; + case 'v': + verbose++; + break; + case 'b': + benchmark = 1; + break; + case 's': + sparse_matrix_opt = !sparse_matrix_opt; + break; + case 'h': + default: + usage(); + break; + } + } + m = k + p; + + // Check for valid parameters + if (m > (MMAX / 2) || k > (KMAX / 2) || m < 0 || p < 2 || k < 1) { + printf(" Input test parameter error m=%d, k=%d, p=%d, erasures=%d\n", + m, k, p, nerrs); + usage(); + } + if (nerrs > p) { + printf(" Number of erasures chosen exceeds power of code erasures=%d p=%d\n", + nerrs, p); + } + for (i = 0; i < nerrs; i++) { + if (frag_err_list[i] >= m) + printf(" fragment %d not in range\n", frag_err_list[i]); + } + + printf("ec_piggyback_example:\n"); + + /* + * One simple way to implement piggyback codes is to keep a 2x wide matrix + * that covers the how each parity is related to both A and B sources. This + * keeps it easy to generalize in parameters m,k and the resulting sparse + * matrix multiplication can be optimized by pre-removal of zero items. + */ + + int k2 = 2 * k; + int p2 = 2 * p; + int m2 = k2 + p2; + int nerrs2 = nerrs; + + encode_matrix = malloc(m2 * k2); + decode_matrix = malloc(m2 * k2); + invert_matrix = malloc(m2 * k2); + temp_matrix = malloc(m2 * k2); + g_tbls = malloc(k2 * p2 * 32); + + if (encode_matrix == NULL || decode_matrix == NULL + || invert_matrix == NULL || temp_matrix == NULL || g_tbls == NULL) { + printf("Test failure! Error with malloc\n"); + return -1; + } + // Allocate the src fragments + for (i = 0; i < k; i++) { + if (NULL == (frag_ptrs[i] = malloc(len))) { + printf("alloc error: Fail\n"); + return -1; + } + } + // Allocate the parity fragments + for (i = 0; i < p2; i++) { + if (NULL == (parity_ptrs[i] = malloc(len / 2))) { + printf("alloc error: Fail\n"); + return -1; + } + } + + // Allocate buffers for recovered data + for (i = 0; i < p2; i++) { + if (NULL == (recover_outp[i] = malloc(len / 2))) { + printf("alloc error: Fail\n"); + return -1; + } + } + + // Fill sources with random data + for (i = 0; i < k; i++) + for (j = 0; j < len; j++) + frag_ptrs[i][j] = rand(); + + printf(" encode (m,k,p)=(%d,%d,%d) len=%d\n", m, k, p, len); + + // Pick an encode matrix. + gf_gen_full_pb_cauchy_matrix(encode_matrix, m2, k2); + + if (verbose) + print_matrix(m2, k2, encode_matrix, "encode matrix"); + + // Initialize g_tbls from encode matrix + ec_init_tables(k2, p2, &encode_matrix[k2 * k2], g_tbls); + + // Fold A and B into single list of fragments + for (i = 0; i < k; i++) + frag_ptrs[i + k] = &frag_ptrs[i][len / 2]; + + if (!sparse_matrix_opt) { + // Standard encode using no assumptions on the encode matrix + + // Generate EC parity blocks from sources + ec_encode_data(len / 2, k2, p2, g_tbls, frag_ptrs, parity_ptrs); + + if (benchmark) { + struct perf start; + BENCHMARK(&start, BENCHMARK_TIME, + ec_encode_data(len / 2, k2, p2, g_tbls, frag_ptrs, + parity_ptrs)); + printf("ec_piggyback_encode_std: "); + perf_print(start, m2 * len / 2); + } + } else { + // Sparse matrix optimization - use fact that input matrix is sparse + + // Keep an encode matrix with some zero elements removed + u8 *encode_matrix_faster, *g_tbls_faster; + encode_matrix_faster = malloc(m * k); + g_tbls_faster = malloc(k * p * 32); + if (encode_matrix_faster == NULL || g_tbls_faster == NULL) { + printf("Test failure! Error with malloc\n"); + return -1; + } + + /* + * Pack with only the part that we know are non-zero. Alternatively + * we could search and keep track of non-zero elements but for + * simplicity we just skip the lower quadrant. + */ + for (i = k, j = k2; i < m; i++, j++) + memcpy(&encode_matrix_faster[k * i], &encode_matrix[k2 * j], k); + + if (verbose) { + print_matrix(p, k, &encode_matrix_faster[k * k], + "encode via sparse-opt"); + print_matrix(p2 / 2, k2, &encode_matrix[(k2 + p2 / 2) * k2], + "encode via sparse-opt"); + } + // Initialize g_tbls from encode matrix + ec_init_tables(k, p, &encode_matrix_faster[k * k], g_tbls_faster); + + // Generate EC parity blocks from sources + ec_encode_data(len / 2, k, p, g_tbls_faster, frag_ptrs, parity_ptrs); + ec_encode_data(len / 2, k2, p, &g_tbls[k2 * p * 32], frag_ptrs, + &parity_ptrs[p]); + + if (benchmark) { + struct perf start; + BENCHMARK(&start, BENCHMARK_TIME, + ec_encode_data(len / 2, k, p, g_tbls_faster, frag_ptrs, + parity_ptrs); + ec_encode_data(len / 2, k2, p, &g_tbls[k2 * p * 32], + frag_ptrs, &parity_ptrs[p])); + printf("ec_piggyback_encode_sparse: "); + perf_print(start, m2 * len / 2); + } + } + + if (nerrs <= 0) + return 0; + + printf(" recover %d fragments\n", nerrs); + + // Set frag pointers to correspond to parity + for (i = k2; i < m2; i++) + frag_ptrs[i] = parity_ptrs[i - k2]; + + print_list(nerrs2, frag_err_list, " frag err list"); + + // Find a decode matrix to regenerate all erasures from remaining frags + ret = gf_gen_decode_matrix(encode_matrix, decode_matrix, + invert_matrix, temp_matrix, decode_index, frag_err_list, + nerrs2, k2, m2); + + if (ret != 0) { + printf("Fail on generate decode matrix\n"); + return -1; + } + // Pack recovery array pointers as list of valid fragments + for (i = 0; i < k2; i++) + if (decode_index[i] < k2) + recover_srcs[i] = frag_ptrs[decode_index[i]]; + else + recover_srcs[i] = parity_ptrs[decode_index[i] - k2]; + + print_list(k2, decode_index, " decode index"); + + // Recover data + ec_init_tables(k2, nerrs2, decode_matrix, g_tbls); + ec_encode_data(len / 2, k2, nerrs2, g_tbls, recover_srcs, recover_outp); + + if (benchmark) { + struct perf start; + BENCHMARK(&start, BENCHMARK_TIME, + ec_encode_data(len / 2, k2, nerrs2, g_tbls, recover_srcs, + recover_outp)); + printf("ec_piggyback_decode: "); + perf_print(start, (k2 + nerrs2) * len / 2); + } + // Check that recovered buffers are the same as original + printf(" check recovery of block {"); + for (i = 0; i < nerrs2; i++) { + printf(" %d", frag_err_list[i]); + if (memcmp(recover_outp[i], frag_ptrs[frag_err_list[i]], len / 2)) { + printf(" Fail erasure recovery %d, frag %d\n", i, frag_err_list[i]); + return -1; + } + } + printf(" } done all: Pass\n"); + + return 0; +} + +// Generate decode matrix from encode matrix and erasure list + +static int gf_gen_decode_matrix(u8 * encode_matrix, + u8 * decode_matrix, + u8 * invert_matrix, + u8 * temp_matrix, + u8 * decode_index, u8 * frag_err_list, int nerrs, int k, int m) +{ + int i, j, p, r; + int nsrcerrs = 0; + u8 s, *b = temp_matrix; + u8 frag_in_err[MMAX]; + + memset(frag_in_err, 0, sizeof(frag_in_err)); + + // Order the fragments in erasure for easier sorting + for (i = 0; i < nerrs; i++) { + if (frag_err_list[i] < k) + nsrcerrs++; + frag_in_err[frag_err_list[i]] = 1; + } + + // Construct b (matrix that encoded remaining frags) by removing erased rows + for (i = 0, r = 0; i < k; i++, r++) { + while (frag_in_err[r]) + r++; + for (j = 0; j < k; j++) + b[k * i + j] = encode_matrix[k * r + j]; + decode_index[i] = r; + } + if (verbose > 1) + print_matrix(k, k, b, "matrix to invert"); + + // Invert matrix to get recovery matrix + if (gf_invert_matrix(b, invert_matrix, k) < 0) + return -1; + + if (verbose > 2) + print_matrix(k, k, invert_matrix, "matrix inverted"); + + // Get decode matrix with only wanted recovery rows + for (i = 0; i < nsrcerrs; i++) { + for (j = 0; j < k; j++) { + decode_matrix[k * i + j] = invert_matrix[k * frag_err_list[i] + j]; + } + } + + // For non-src (parity) erasures need to multiply encode matrix * invert + for (p = nsrcerrs; p < nerrs; p++) { + for (i = 0; i < k; i++) { + s = 0; + for (j = 0; j < k; j++) + s ^= gf_mul(invert_matrix[j * k + i], + encode_matrix[k * frag_err_list[p] + j]); + + decode_matrix[k * p + i] = s; + } + } + if (verbose > 1) + print_matrix(nerrs, k, decode_matrix, "decode matrix"); + return 0; +} diff --git a/src/spdk/isa-l/examples/ec/ec_simple_example.c b/src/spdk/isa-l/examples/ec/ec_simple_example.c new file mode 100644 index 000000000..82efa6b48 --- /dev/null +++ b/src/spdk/isa-l/examples/ec/ec_simple_example.c @@ -0,0 +1,277 @@ +/********************************************************************** + Copyright(c) 2011-2018 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#include +#include +#include +#include +#include "erasure_code.h" // use instead when linking against installed + +#define MMAX 255 +#define KMAX 255 + +typedef unsigned char u8; + +int usage(void) +{ + fprintf(stderr, + "Usage: ec_simple_example [options]\n" + " -h Help\n" + " -k Number of source fragments\n" + " -p Number of parity fragments\n" + " -l Length of fragments\n" + " -e Simulate erasure on frag index val. Zero based. Can be repeated.\n" + " -r Pick random (k, p) with seed\n"); + exit(0); +} + +static int gf_gen_decode_matrix_simple(u8 * encode_matrix, + u8 * decode_matrix, + u8 * invert_matrix, + u8 * temp_matrix, + u8 * decode_index, + u8 * frag_err_list, int nerrs, int k, int m); + +int main(int argc, char *argv[]) +{ + int i, j, m, c, e, ret; + int k = 10, p = 4, len = 8 * 1024; // Default params + int nerrs = 0; + + // Fragment buffer pointers + u8 *frag_ptrs[MMAX]; + u8 *recover_srcs[KMAX]; + u8 *recover_outp[KMAX]; + u8 frag_err_list[MMAX]; + + // Coefficient matrices + u8 *encode_matrix, *decode_matrix; + u8 *invert_matrix, *temp_matrix; + u8 *g_tbls; + u8 decode_index[MMAX]; + + if (argc == 1) + for (i = 0; i < p; i++) + frag_err_list[nerrs++] = rand() % (k + p); + + while ((c = getopt(argc, argv, "k:p:l:e:r:h")) != -1) { + switch (c) { + case 'k': + k = atoi(optarg); + break; + case 'p': + p = atoi(optarg); + break; + case 'l': + len = atoi(optarg); + if (len < 0) + usage(); + break; + case 'e': + e = atoi(optarg); + frag_err_list[nerrs++] = e; + break; + case 'r': + srand(atoi(optarg)); + k = (rand() % (MMAX - 1)) + 1; // Pick k {1 to MMAX - 1} + p = (rand() % (MMAX - k)) + 1; // Pick p {1 to MMAX - k} + + for (i = 0; i < k + p && nerrs < p; i++) + if (rand() & 1) + frag_err_list[nerrs++] = i; + break; + case 'h': + default: + usage(); + break; + } + } + m = k + p; + + // Check for valid parameters + if (m > MMAX || k > KMAX || m < 0 || p < 1 || k < 1) { + printf(" Input test parameter error m=%d, k=%d, p=%d, erasures=%d\n", + m, k, p, nerrs); + usage(); + } + if (nerrs > p) { + printf(" Number of erasures chosen exceeds power of code erasures=%d p=%d\n", + nerrs, p); + usage(); + } + for (i = 0; i < nerrs; i++) { + if (frag_err_list[i] >= m) { + printf(" fragment %d not in range\n", frag_err_list[i]); + usage(); + } + } + + printf("ec_simple_example:\n"); + + // Allocate coding matrices + encode_matrix = malloc(m * k); + decode_matrix = malloc(m * k); + invert_matrix = malloc(m * k); + temp_matrix = malloc(m * k); + g_tbls = malloc(k * p * 32); + + if (encode_matrix == NULL || decode_matrix == NULL + || invert_matrix == NULL || temp_matrix == NULL || g_tbls == NULL) { + printf("Test failure! Error with malloc\n"); + return -1; + } + // Allocate the src & parity buffers + for (i = 0; i < m; i++) { + if (NULL == (frag_ptrs[i] = malloc(len))) { + printf("alloc error: Fail\n"); + return -1; + } + } + + // Allocate buffers for recovered data + for (i = 0; i < p; i++) { + if (NULL == (recover_outp[i] = malloc(len))) { + printf("alloc error: Fail\n"); + return -1; + } + } + + // Fill sources with random data + for (i = 0; i < k; i++) + for (j = 0; j < len; j++) + frag_ptrs[i][j] = rand(); + + printf(" encode (m,k,p)=(%d,%d,%d) len=%d\n", m, k, p, len); + + // Pick an encode matrix. A Cauchy matrix is a good choice as even + // large k are always invertable keeping the recovery rule simple. + gf_gen_cauchy1_matrix(encode_matrix, m, k); + + // Initialize g_tbls from encode matrix + ec_init_tables(k, p, &encode_matrix[k * k], g_tbls); + + // Generate EC parity blocks from sources + ec_encode_data(len, k, p, g_tbls, frag_ptrs, &frag_ptrs[k]); + + if (nerrs <= 0) + return 0; + + printf(" recover %d fragments\n", nerrs); + + // Find a decode matrix to regenerate all erasures from remaining frags + ret = gf_gen_decode_matrix_simple(encode_matrix, decode_matrix, + invert_matrix, temp_matrix, decode_index, + frag_err_list, nerrs, k, m); + if (ret != 0) { + printf("Fail on generate decode matrix\n"); + return -1; + } + // Pack recovery array pointers as list of valid fragments + for (i = 0; i < k; i++) + recover_srcs[i] = frag_ptrs[decode_index[i]]; + + // Recover data + ec_init_tables(k, nerrs, decode_matrix, g_tbls); + ec_encode_data(len, k, nerrs, g_tbls, recover_srcs, recover_outp); + + // Check that recovered buffers are the same as original + printf(" check recovery of block {"); + for (i = 0; i < nerrs; i++) { + printf(" %d", frag_err_list[i]); + if (memcmp(recover_outp[i], frag_ptrs[frag_err_list[i]], len)) { + printf(" Fail erasure recovery %d, frag %d\n", i, frag_err_list[i]); + return -1; + } + } + + printf(" } done all: Pass\n"); + return 0; +} + +/* + * Generate decode matrix from encode matrix and erasure list + * + */ + +static int gf_gen_decode_matrix_simple(u8 * encode_matrix, + u8 * decode_matrix, + u8 * invert_matrix, + u8 * temp_matrix, + u8 * decode_index, u8 * frag_err_list, int nerrs, int k, + int m) +{ + int i, j, p, r; + int nsrcerrs = 0; + u8 s, *b = temp_matrix; + u8 frag_in_err[MMAX]; + + memset(frag_in_err, 0, sizeof(frag_in_err)); + + // Order the fragments in erasure for easier sorting + for (i = 0; i < nerrs; i++) { + if (frag_err_list[i] < k) + nsrcerrs++; + frag_in_err[frag_err_list[i]] = 1; + } + + // Construct b (matrix that encoded remaining frags) by removing erased rows + for (i = 0, r = 0; i < k; i++, r++) { + while (frag_in_err[r]) + r++; + for (j = 0; j < k; j++) + b[k * i + j] = encode_matrix[k * r + j]; + decode_index[i] = r; + } + + // Invert matrix to get recovery matrix + if (gf_invert_matrix(b, invert_matrix, k) < 0) + return -1; + + // Get decode matrix with only wanted recovery rows + for (i = 0; i < nerrs; i++) { + if (frag_err_list[i] < k) // A src err + for (j = 0; j < k; j++) + decode_matrix[k * i + j] = + invert_matrix[k * frag_err_list[i] + j]; + } + + // For non-src (parity) erasures need to multiply encode matrix * invert + for (p = 0; p < nerrs; p++) { + if (frag_err_list[p] >= k) { // A parity err + for (i = 0; i < k; i++) { + s = 0; + for (j = 0; j < k; j++) + s ^= gf_mul(invert_matrix[j * k + i], + encode_matrix[k * frag_err_list[p] + j]); + decode_matrix[k * p + i] = s; + } + } + } + return 0; +} diff --git a/src/spdk/isa-l/igzip/Makefile.am b/src/spdk/isa-l/igzip/Makefile.am new file mode 100644 index 000000000..7ae55507b --- /dev/null +++ b/src/spdk/isa-l/igzip/Makefile.am @@ -0,0 +1,138 @@ +######################################################################## +# Copyright(c) 2011-2016 Intel Corporation All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +######################################################################## + +lsrc += igzip/igzip.c \ + igzip/hufftables_c.c \ + igzip/igzip_base.c \ + igzip/igzip_icf_base.c \ + igzip/adler32_base.c \ + igzip/flatten_ll.c \ + igzip/encode_df.c \ + igzip/igzip_icf_body.c + +lsrc_base_aliases += igzip/igzip_base_aliases.c igzip/proc_heap_base.c +lsrc_x86_32 += igzip/igzip_base_aliases.c igzip/proc_heap_base.c + +lsrc_aarch64 += igzip/aarch64/igzip_inflate_multibinary_arm64.S \ + igzip/aarch64/igzip_multibinary_arm64.S \ + igzip/aarch64/igzip_isal_adler32_neon.S \ + igzip/aarch64/igzip_multibinary_aarch64_dispatcher.c \ + igzip/aarch64/igzip_deflate_body_aarch64.S \ + igzip/aarch64/igzip_deflate_finish_aarch64.S \ + igzip/aarch64/isal_deflate_icf_body_hash_hist.S \ + igzip/aarch64/isal_deflate_icf_finish_hash_hist.S \ + igzip/aarch64/isal_update_histogram.S \ + igzip/proc_heap_base.c + +lsrc_x86_64 += igzip/igzip_body.asm \ + igzip/igzip_finish.asm \ + igzip/igzip_icf_body_h1_gr_bt.asm \ + igzip/igzip_icf_finish.asm \ + igzip/rfc1951_lookup.asm \ + igzip/adler32_sse.asm \ + igzip/adler32_avx2_4.asm \ + igzip/igzip_multibinary.asm \ + igzip/igzip_update_histogram_01.asm \ + igzip/igzip_update_histogram_04.asm \ + igzip/igzip_decode_block_stateless_01.asm \ + igzip/igzip_decode_block_stateless_04.asm \ + igzip/igzip_inflate_multibinary.asm \ + igzip/encode_df_04.asm \ + igzip/encode_df_06.asm \ + igzip/proc_heap.asm \ + igzip/igzip_deflate_hash.asm \ + igzip/igzip_gen_icf_map_lh1_06.asm \ + igzip/igzip_gen_icf_map_lh1_04.asm \ + igzip/igzip_set_long_icf_fg_04.asm \ + igzip/igzip_set_long_icf_fg_06.asm + +src_include += -I $(srcdir)/igzip +extern_hdrs += include/igzip_lib.h + +check_tests += igzip/igzip_rand_test +check_tests += igzip/igzip_wrapper_hdr_test +check_tests += igzip/checksum32_funcs_test + +other_tests += igzip/igzip_file_perf igzip/igzip_hist_perf +other_tests += igzip/igzip_perf +other_tests += igzip/igzip_semi_dyn_file_perf +other_tests += igzip/igzip_build_hash_table_perf + +other_src += igzip/bitbuf2.asm \ + igzip/data_struct2.asm \ + igzip/inflate_data_structs.asm \ + igzip/igzip_body.asm \ + igzip/igzip_finish.asm \ + igzip/lz0a_const.asm \ + igzip/options.asm \ + igzip/stdmac.asm \ + igzip/igzip_compare_types.asm \ + igzip/bitbuf2.h \ + igzip/repeated_char_result.h \ + igzip/igzip_update_histogram.asm \ + igzip/huffman.asm \ + include/reg_sizes.asm \ + include/multibinary.asm \ + include/test.h \ + include/unaligned.h \ + igzip/huffman.h \ + igzip/igzip_level_buf_structs.h \ + igzip/igzip_decode_block_stateless.asm \ + igzip/inflate_std_vects.h \ + igzip/flatten_ll.h \ + igzip/encode_df.h \ + igzip/heap_macros.asm \ + igzip/igzip_wrapper.h \ + igzip/static_inflate.h \ + igzip/igzip_checksums.h + +perf_tests += igzip/adler32_perf + +examples += igzip/igzip_example igzip/igzip_sync_flush_example + +igzip_igzip_rand_test_LDADD = libisal.la + +# Include tools to make custom Huffman tables based on sample data +other_tests += igzip/generate_custom_hufftables +other_tests += igzip/generate_static_inflate +other_src += igzip/huff_codes.h +lsrc += igzip/huff_codes.c + +# Include tools and tests using the reference inflate +other_tests += igzip/igzip_inflate_test +lsrc += igzip/igzip_inflate.c +other_src += igzip/checksum_test_ref.h + +igzip_perf: LDLIBS += -lz +igzip_igzip_perf_LDADD = libisal.la +igzip_igzip_perf_LDFLAGS = -lz +igzip_inflate_test: LDLIBS += -lz +igzip_igzip_inflate_test_LDADD = libisal.la +igzip_igzip_inflate_test_LDFLAGS = -lz +igzip_igzip_hist_perf_LDADD = libisal.la diff --git a/src/spdk/isa-l/igzip/aarch64/bitbuf2_aarch64.h b/src/spdk/isa-l/igzip/aarch64/bitbuf2_aarch64.h new file mode 100644 index 000000000..88eb18dfd --- /dev/null +++ b/src/spdk/isa-l/igzip/aarch64/bitbuf2_aarch64.h @@ -0,0 +1,57 @@ +/********************************************************************** + Copyright(c) 2019 Arm Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Arm Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#ifndef __BITBUF2_AARCH64_H__ +#define __BITBUF2_AARCH64_H__ +#include "options_aarch64.h" + +#ifdef __ASSEMBLY__ +.macro update_bits stream:req,code:req,code_len:req,m_bits:req,m_bit_count:req \ + m_out_buf:req + + lsl x_\code,x_\code,x_\m_bit_count + orr x_\m_bits,x_\code,x_\m_bits + add x_\m_bit_count,x_\code_len,x_\m_bit_count + + str x_\m_bits,[x_\m_out_buf] + + and w_\code,w_\m_bit_count,-8 + lsr w_\code_len,w_\m_bit_count,3 + add x_\m_out_buf,x_\m_out_buf,w_\code_len,uxtw + sub w_\m_bit_count,w_\m_bit_count,w_\code + lsr x_\m_bits,x_\m_bits,x_\code + + str x_\m_bits,[stream,_internal_state_bitbuf_m_bits] + str w_\m_bit_count,[stream,_internal_state_bitbuf_m_bit_count] + str x_\m_out_buf,[stream,_internal_state_bitbuf_m_out_buf] + + +.endm +#endif +#endif diff --git a/src/spdk/isa-l/igzip/aarch64/data_struct_aarch64.h b/src/spdk/isa-l/igzip/aarch64/data_struct_aarch64.h new file mode 100644 index 000000000..71160fe1b --- /dev/null +++ b/src/spdk/isa-l/igzip/aarch64/data_struct_aarch64.h @@ -0,0 +1,226 @@ +/********************************************************************** + Copyright(c) 2019 Arm Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Arm Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ +#ifndef __AARCH64_DATA_STRUCT_H__ +#define __AARCH64_DATA_STRUCT_H__ +#ifdef __ASSEMBLY__ + +#define FIELD(name,size,align) \ + .set _FIELD_OFFSET,(_FIELD_OFFSET + (align) - 1) & (~ ((align)-1)); \ + .equ name,_FIELD_OFFSET ; \ + .set _FIELD_OFFSET,_FIELD_OFFSET + size; \ + .if align > _STRUCT_ALIGN; \ + .set _STRUCT_ALIGN, align; \ + .endif; + +#define START_STRUCT(name) .set _FIELD_OFFSET,0;.set _STRUCT_ALIGN,0; + +#define END_STRUCT(name) .set _##name##_size,_FIELD_OFFSET;\ + .set _##name##_align,_STRUCT_ALIGN + +#define CONST(name,value) .equ name,value + + +/// BitBuf2 +START_STRUCT(BitBuf2) + /// name size align + FIELD ( _m_bits, 8, 8 ) + FIELD ( _m_bit_count, 4, 4 ) + FIELD ( _m_out_buf, 8, 8 ) + FIELD ( _m_out_end, 8, 8 ) + FIELD ( _m_out_start, 8, 8 ) +END_STRUCT(BitBuf2) + + +/// isal_mod_hist +#define HIST_ELEM_SIZE 4 +START_STRUCT(isal_mod_hist) + /// name size align + FIELD ( _d_hist, 30*HIST_ELEM_SIZE, HIST_ELEM_SIZE ) + FIELD ( _ll_hist, 513*HIST_ELEM_SIZE, HIST_ELEM_SIZE ) +END_STRUCT(isal_mod_hist) + + +/// hufftables_icf +#define HUFF_CODE_SIZE 4 +START_STRUCT(hufftables_icf) + /// name size align + FIELD ( _dist_table, 31 * HUFF_CODE_SIZE, HUFF_CODE_SIZE ) + FIELD ( _lit_len_table, 513 * HUFF_CODE_SIZE, HUFF_CODE_SIZE ) +END_STRUCT(hufftables_icf) + + +/// hash8k_buf +START_STRUCT(hash8k_buf) + /// name size align + FIELD ( _hash8k_table, 2 * IGZIP_HASH8K_HASH_SIZE, 2 ) +END_STRUCT(hash8k_buf) + + +/// hash_map_buf +START_STRUCT(hash_map_buf) + /// name size align + FIELD ( _hash_table, 2 * IGZIP_HASH_MAP_HASH_SIZE, 2 ) + FIELD ( _matches_next, 8, 8 ) + FIELD ( _matches_end, 8, 8 ) + FIELD ( _matches, 4*4*1024, 4 ) + FIELD ( _overflow, 4*LA, 4 ) +END_STRUCT(hash_map_buf) + + +/// level_buf +#define DEF_MAX_HDR_SIZE 328 +START_STRUCT(level_buf) + /// name size align + FIELD ( _encode_tables, _hufftables_icf_size, _hufftables_icf_align ) + FIELD ( _hist, _isal_mod_hist_size, _isal_mod_hist_align ) + FIELD ( _deflate_hdr_count, 4, 4 ) + FIELD ( _deflate_hdr_extra_bits,4, 4 ) + FIELD ( _deflate_hdr, DEF_MAX_HDR_SIZE, 1 ) + FIELD ( _icf_buf_next, 8, 8 ) + FIELD ( _icf_buf_avail_out, 8, 8 ) + FIELD ( _icf_buf_start, 8, 8 ) + FIELD ( _lvl_extra, _hash_map_buf_size, _hash_map_buf_align ) +END_STRUCT(level_buf) + + +CONST( _hash8k_hash_table , _lvl_extra + _hash8k_table ) +CONST( _hash_map_hash_table , _lvl_extra + _hash_table ) +CONST( _hash_map_matches_next , _lvl_extra + _matches_next ) +CONST( _hash_map_matches_end , _lvl_extra + _matches_end ) +CONST( _hash_map_matches , _lvl_extra + _matches ) +CONST( _hist_lit_len , _hist+_ll_hist ) +CONST( _hist_dist , _hist+_d_hist ) + + +/// isal_zstate +START_STRUCT(isal_zstate) + /// name size align + FIELD ( _total_in_start,4, 4 ) + FIELD ( _block_next, 4, 4 ) + FIELD ( _block_end, 4, 4 ) + FIELD ( _dist_mask, 4, 4 ) + FIELD ( _hash_mask, 4, 4 ) + FIELD ( _state, 4, 4 ) + FIELD ( _bitbuf, _BitBuf2_size, _BitBuf2_align ) + FIELD ( _crc, 4, 4 ) + FIELD ( _has_wrap_hdr, 1, 1 ) + FIELD ( _has_eob_hdr, 1, 1 ) + FIELD ( _has_eob, 1, 1 ) + FIELD ( _has_hist, 1, 1 ) + FIELD ( _has_level_buf_init, 2, 2 ) + FIELD ( _count, 4, 4 ) + FIELD ( _tmp_out_buff, 16, 1 ) + FIELD ( _tmp_out_start, 4, 4 ) + FIELD ( _tmp_out_end, 4, 4 ) + FIELD ( _b_bytes_valid, 4, 4 ) + FIELD ( _b_bytes_processed, 4, 4 ) + FIELD ( _buffer, BSIZE, 1 ) + FIELD ( _head, IGZIP_LVL0_HASH_SIZE*2, 2 ) +END_STRUCT(isal_zstate) + + + +CONST( _bitbuf_m_bits , _bitbuf+_m_bits ) +CONST( _bitbuf_m_bit_count , _bitbuf+_m_bit_count ) +CONST( _bitbuf_m_out_buf , _bitbuf+_m_out_buf ) +CONST( _bitbuf_m_out_end , _bitbuf+_m_out_end ) +CONST( _bitbuf_m_out_start , _bitbuf+_m_out_start ) + + +/// isal_zstream +START_STRUCT(isal_zstream) + /// name size align + FIELD ( _next_in, 8, 8 ) + FIELD ( _avail_in, 4, 4 ) + FIELD ( _total_in, 4, 4 ) + FIELD ( _next_out, 8, 8 ) + FIELD ( _avail_out, 4, 4 ) + FIELD ( _total_out, 4, 4 ) + FIELD ( _hufftables, 8, 8 ) + FIELD ( _level, 4, 4 ) + FIELD ( _level_buf_size, 4, 4 ) + FIELD ( _level_buf, 8, 8 ) + FIELD ( _end_of_stream, 2, 2 ) + FIELD ( _flush, 2, 2 ) + FIELD ( _gzip_flag, 2, 2 ) + FIELD ( _hist_bits, 2, 2 ) + FIELD ( _internal_state, _isal_zstate_size, _isal_zstate_align ) +END_STRUCT(isal_zstream) + + + +CONST( _internal_state_total_in_start , _internal_state+_total_in_start ) +CONST( _internal_state_block_next , _internal_state+_block_next ) +CONST( _internal_state_block_end , _internal_state+_block_end ) +CONST( _internal_state_b_bytes_valid , _internal_state+_b_bytes_valid ) +CONST( _internal_state_b_bytes_processed , _internal_state+_b_bytes_processed ) +CONST( _internal_state_crc , _internal_state+_crc ) +CONST( _internal_state_dist_mask , _internal_state+_dist_mask ) +CONST( _internal_state_hash_mask , _internal_state+_hash_mask ) +CONST( _internal_state_bitbuf , _internal_state+_bitbuf ) +CONST( _internal_state_state , _internal_state+_state ) +CONST( _internal_state_count , _internal_state+_count ) +CONST( _internal_state_tmp_out_buff , _internal_state+_tmp_out_buff ) +CONST( _internal_state_tmp_out_start , _internal_state+_tmp_out_start ) +CONST( _internal_state_tmp_out_end , _internal_state+_tmp_out_end ) +CONST( _internal_state_has_wrap_hdr , _internal_state+_has_wrap_hdr ) +CONST( _internal_state_has_eob , _internal_state+_has_eob ) +CONST( _internal_state_has_eob_hdr , _internal_state+_has_eob_hdr ) +CONST( _internal_state_has_hist , _internal_state+_has_hist ) +CONST( _internal_state_has_level_buf_init , _internal_state+_has_level_buf_init ) +CONST( _internal_state_buffer , _internal_state+_buffer ) +CONST( _internal_state_head , _internal_state+_head ) +CONST( _internal_state_bitbuf_m_bits , _internal_state+_bitbuf_m_bits ) +CONST( _internal_state_bitbuf_m_bit_count , _internal_state+_bitbuf_m_bit_count ) +CONST( _internal_state_bitbuf_m_out_buf , _internal_state+_bitbuf_m_out_buf ) +CONST( _internal_state_bitbuf_m_out_end , _internal_state+_bitbuf_m_out_end ) +CONST( _internal_state_bitbuf_m_out_start , _internal_state+_bitbuf_m_out_start ) + +/// Internal States +CONST( ZSTATE_NEW_HDR , 0 ) +CONST( ZSTATE_HDR , (ZSTATE_NEW_HDR + 1) ) +CONST( ZSTATE_CREATE_HDR , (ZSTATE_HDR + 1) ) +CONST( ZSTATE_BODY , (ZSTATE_CREATE_HDR + 1) ) +CONST( ZSTATE_FLUSH_READ_BUFFER , (ZSTATE_BODY + 1) ) +CONST( ZSTATE_FLUSH_ICF_BUFFER , (ZSTATE_FLUSH_READ_BUFFER + 1) ) +CONST( ZSTATE_TYPE0_HDR , (ZSTATE_FLUSH_ICF_BUFFER + 1) ) +CONST( ZSTATE_TYPE0_BODY , (ZSTATE_TYPE0_HDR + 1) ) +CONST( ZSTATE_SYNC_FLUSH , (ZSTATE_TYPE0_BODY + 1) ) +CONST( ZSTATE_FLUSH_WRITE_BUFFER , (ZSTATE_SYNC_FLUSH + 1) ) +CONST( ZSTATE_TRL , (ZSTATE_FLUSH_WRITE_BUFFER + 1) ) + +CONST( _NO_FLUSH , 0 ) +CONST( _SYNC_FLUSH , 1 ) +CONST( _FULL_FLUSH , 2 ) +CONST( _STORED_BLK , 0 ) +CONST( IGZIP_NO_HIST , 0 ) +CONST( IGZIP_HIST , 1 ) +CONST( IGZIP_DICT_HIST , 2 ) +#endif +#endif diff --git a/src/spdk/isa-l/igzip/aarch64/huffman_aarch64.h b/src/spdk/isa-l/igzip/aarch64/huffman_aarch64.h new file mode 100644 index 000000000..c5ef2555c --- /dev/null +++ b/src/spdk/isa-l/igzip/aarch64/huffman_aarch64.h @@ -0,0 +1,154 @@ +/********************************************************************** + Copyright(c) 2019 Arm Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Arm Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#ifndef __HUFFMAN_AARCH64_H__ +#define __HUFFMAN_AARCH64_H__ + +#ifdef __ASSEMBLY__ +#ifdef LONGER_HUFFTABLE + #if (D > 8192) + #error History D is larger than 8K + #else + #define DIST_TABLE_SIZE 8192 + #define DECODE_OFFSET 26 + #endif +#else + #define DIST_TABLE_SIZE 2 + #define DECODE_OFFSET 0 +#endif + +#define LEN_TABLE_SIZE 256 +#define LIT_TABLE_SIZE 257 + +#define DIST_TABLE_START (ISAL_DEF_MAX_HDR_SIZE + 8) //328+8 +#define DIST_TABLE_OFFSET (DIST_TABLE_START + - 4 * 1) //336-4 +#define LEN_TABLE_OFFSET (DIST_TABLE_START + DIST_TABLE_SIZE * 4 - 4*3) //332 + 2*4 -4*3 =328 +#define LIT_TABLE_OFFSET (DIST_TABLE_START + 4 * DIST_TABLE_SIZE + 4 * LEN_TABLE_SIZE) +#define LIT_TABLE_SIZES_OFFSET (LIT_TABLE_OFFSET + 2 * LIT_TABLE_SIZE) +#define DCODE_TABLE_OFFSET (LIT_TABLE_SIZES_OFFSET + LIT_TABLE_SIZE + 1 - DECODE_OFFSET * 2) +#define DCODE_TABLE_SIZE_OFFSET (DCODE_TABLE_OFFSET + 2 * 30 - DECODE_OFFSET) + +#define IGZIP_DECODE_OFFSET 0 +#define IGZIP_DIST_TABLE_SIZE 2 + +.macro get_len_code hufftables:req,length:req,code:req,code_len:req,tmp0:req + add x_\tmp0,\hufftables,LEN_TABLE_OFFSET + ldr w_\code_len,[x_\tmp0,x_\length,lsl 2] + lsr w_\code, w_\code_len , 5 + and x_\code_len,x_\code_len,0x1f +.endm + +.macro get_lit_code hufftables:req,lit:req,code:req,code_len:req + add x_\code,\hufftables,LIT_TABLE_OFFSET + ldrh w_\code,[x_\code,x_\lit,lsl 1] + add x_\code_len,\hufftables,LIT_TABLE_SIZES_OFFSET + ldrb w_\code_len,[x_\code_len,x_\lit] +.endm + +.macro get_dist_code hufftables:req,dist:req,code:req,code_len:req,tmp0:req,tmp1:req,tmp2:req + cmp dist,DIST_TABLE_SIZE + bhi _compute_dist_code + add x_\tmp0,\hufftables,DIST_TABLE_OFFSET + ldr w_\code_len,[x_\tmp0,x_\dist,lsl 2] + lsr w_\code, w_\code_len , 5 + and x_\code_len,x_\code_len,0x1f + b _end_get_dist_code +_compute_dist_code: + and w_\dist,w_\dist,0xffff + sub w_\dist,w_\dist,1 + clz w_\tmp0,w_\dist + mov w_\tmp1,30 + sub w_\tmp0,w_\tmp1,w_\tmp0 //tmp0== num_extra_bists + mov w_\tmp1,1 + lsl w_\tmp1,w_\tmp1,w_\tmp0 + sub w_\tmp1,w_\tmp1,1 + and w_\tmp1,w_\tmp1,w_\dist //tmp1=extra_bits + asr w_\dist,w_\dist,w_\tmp0 + lsl w_\tmp2,w_\tmp0,1 + add w_\tmp2,w_\dist,w_\tmp2 //tmp2=sym + + add x_\code,\hufftables,DCODE_TABLE_OFFSET - IGZIP_DECODE_OFFSET*2 + add x_\code_len,\hufftables,DCODE_TABLE_SIZE_OFFSET - IGZIP_DECODE_OFFSET + ldrh w_\code,[x_\code,x_\tmp2,lsl 1] + ldrb w_\code_len,[x_\code_len,x_\tmp2] + lsl w_\tmp1,w_\tmp1,w_\code_len + orr w_\code,w_\code,w_\tmp1 + add w_\code_len,w_\code_len,w_\tmp0 + + //compute_dist_code +_end_get_dist_code: +.endm + + +.macro compare_258_bytes str0:req,str1:req,match_length:req,tmp0:req,tmp1:req + mov x_\match_length,0 +_compare_258_loop: + ldr x_\tmp0,[x_\str0,x_\match_length] + ldr x_\tmp1,[x_\str1,x_\match_length] + eor x_\tmp0,x_\tmp1,x_\tmp0 + rbit x_\tmp0,x_\tmp0 + clz x_\tmp0,x_\tmp0 + lsr x_\tmp0,x_\tmp0,3 + add x_\match_length,x_\match_length,x_\tmp0 + + + cmp x_\match_length,257 + ccmp x_\tmp0,8,0,ls + beq _compare_258_loop + + cmp x_\match_length,258 + mov x_\tmp1,258 + csel x_\match_length,x_\match_length,x_\tmp1,ls +.endm + +.macro compare_max_258_bytes str0:req,str1:req,max_length:req,match_length:req,tmp0:req,tmp1:req + mov x_\match_length,0 + mov x_\tmp0,258 + cmp x_\max_length,x_\tmp0 + csel x_\max_length,x_\max_length,x_\tmp0,ls +_compare_258_loop: + ldr x_\tmp0,[x_\str0,x_\match_length] + ldr x_\tmp1,[x_\str1,x_\match_length] + eor x_\tmp0,x_\tmp1,x_\tmp0 + rbit x_\tmp0,x_\tmp0 + clz x_\tmp0,x_\tmp0 + lsr x_\tmp0,x_\tmp0,3 + add x_\match_length,x_\match_length,x_\tmp0 + + + cmp x_\max_length,x_\match_length + ccmp x_\tmp0,8,0,hi + beq _compare_258_loop + + cmp x_\match_length,x_\max_length + csel x_\match_length,x_\match_length,x_\max_length,ls +.endm + +#endif +#endif diff --git a/src/spdk/isa-l/igzip/aarch64/igzip_deflate_body_aarch64.S b/src/spdk/isa-l/igzip/aarch64/igzip_deflate_body_aarch64.S new file mode 100644 index 000000000..9f0e8cd73 --- /dev/null +++ b/src/spdk/isa-l/igzip/aarch64/igzip_deflate_body_aarch64.S @@ -0,0 +1,261 @@ +/********************************************************************** + Copyright(c) 2019 Arm Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Arm Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + .arch armv8-a+crc + .text + .align 2 +#include "lz0a_const_aarch64.h" +#include "data_struct_aarch64.h" +#include "huffman_aarch64.h" +#include "bitbuf2_aarch64.h" +#include "stdmac_aarch64.h" +/* +declare Macros +*/ + +.macro declare_generic_reg name:req,reg:req,default:req + \name .req \default\reg + w_\name .req w\reg + x_\name .req x\reg +.endm + +.macro update_state stream:req,start_in:req,next_in:req,end_in:req, \ + m_out_buf:req,m_out_start:req,tmp0:req,tmp1:req + + //m_out_buf=bytes_written + sub x_\m_out_buf,x_\m_out_buf,x_\m_out_start + cmp next_in,start_in + bls skip_has_hist + mov w_\tmp0,1 + strb w_\tmp0,[x_\stream,_internal_state_has_hist] +skip_has_hist: + ldr w_\tmp0,[\stream,_total_in] + ldr x_\m_out_start,[\stream,_next_out] //m_out_start = next_out + + str x_\next_in,[\stream,_next_in] + sub x_\start_in,x_\next_in,x_\start_in + sub x_\end_in,x_\end_in,x_\next_in + add w_\tmp0,w_\tmp0,w_\start_in + stp w_\end_in,w_\tmp0,[\stream,_avail_in] + //next_in=avail_out,start_in=total_out + ldp w_\next_in,w_\start_in,[\stream,_avail_out] + add x_\m_out_start,x_\m_out_start,x_\m_out_buf + str x_\m_out_start,[\stream,_next_out] + add w_\start_in,w_\start_in,w_\m_out_buf + sub w_\next_in,w_\next_in,w_\m_out_buf + stp w_\next_in,w_\start_in,[\stream,_avail_out] +.endm + + + .global isal_deflate_body_aarch64 + .type isal_deflate_body_aarch64, %function +/* + void isal_deflate_body_aarch64(struct isal_zstream *stream) +*/ + declare_generic_reg stream, 0,x //struct isal_zstream *stream + declare_generic_reg state, 8,x //&stream->state + declare_generic_reg avail_in, 9,w + declare_generic_reg end_of_stream, 10,w //can be used in loop + + declare_generic_reg hash_mask, 11,w + declare_generic_reg match_length, 12,w + declare_generic_reg hufftables, 13,x + + declare_generic_reg m_out_buf, 14,x + declare_generic_reg m_out_start, 15,x + declare_generic_reg m_out_end, 16,x + declare_generic_reg m_bits, 17,x + declare_generic_reg m_bit_count, 18,w + + declare_generic_reg start_in, 19,x + declare_generic_reg end_in, 20,x + declare_generic_reg next_in, 21,x + declare_generic_reg loop_end_cnt, 22,x + + declare_generic_reg literal, 23,w + declare_generic_reg hash, 24,w + declare_generic_reg dist, 25,w + + declare_generic_reg last_seen, 26,x + declare_generic_reg file_start, 27,x + declare_generic_reg hist_size, 28,w + + declare_generic_reg tmp0, 5 ,w + declare_generic_reg tmp1, 6 ,w + declare_generic_reg tmp2, 7 ,w + + declare_generic_reg code, 3,x + declare_generic_reg code_len, 24,x + declare_generic_reg code2, 10,x + declare_generic_reg code_len2, 4,x + + +isal_deflate_body_aarch64: + //save registers + push_stack + ldr avail_in, [stream, _avail_in] + cbz avail_in, exit_save_state + + // set_buf(&state->bitbuf, stream->next_out, stream->avail_out); + ldr w_m_out_end,[stream,_avail_out] + ldr m_out_buf,[stream,_next_out] + add m_out_end,m_out_buf,w_m_out_end,uxtw + sub m_out_end,m_out_end , 8 + mov m_out_start,m_out_buf + stp m_out_buf,m_out_end,[stream, _bitbuf + _internal_state + _m_out_buf] + str m_out_start,[stream, _bitbuf + _internal_state + _m_out_start] + ldr m_bit_count ,[stream,_internal_state_bitbuf_m_bit_count] + ldr m_bits ,[stream,_internal_state_bitbuf_m_bits] + + + //init variables + //last_seen=&stream.internal_state.head = _internal_state+_head + add last_seen,stream,65536 + add last_seen,last_seen,_internal_state+_head -65536 + + + //start_in=stream->next_in;next_in=start_in + ldr start_in,[stream,_next_in] + mov next_in,start_in + add end_in,start_in,avail_in,uxtw //avail_in reg is free now + sub loop_end_cnt,end_in,289 //loop end + cmp next_in,loop_end_cnt + + + //file_start = (uint8_t *) ((uintptr_t) stream->next_in - stream->total_in); + ldr w_file_start,[stream,_total_in] + sub file_start,next_in,file_start,uxtw + + //uint32_t hist_size = state->dist_mask; + ldr hist_size,[stream,_internal_state + _dist_mask] + + //uint32_t hash_mask = state->hash_mask; + ldr hash_mask,[stream,_internal_state + _hash_mask] + + ldr hufftables,[stream,_hufftables] + + bhi main_loop_end +main_loop_start: + //is_full(&state->bitbuf) + cmp m_out_buf,m_out_end + bhi update_state_exit + + ldr literal,[next_in] + crc32cw hash,wzr,literal + and hash,hash,hash_mask + + ///dist = (next_in - file_start - last_seen[hash]) & 0xFFFF; + ldrh w_tmp0,[last_seen,x_hash,lsl 1] //tmp_w last_seen[hash] + sub x_dist,next_in,file_start + //last_seen[hash] = (uint64_t) (next_in - file_start); + strh dist,[last_seen,x_hash,lsl 1] + sub dist,dist,w_tmp0 + and dist,dist,0xffff + + sub w_tmp0,dist,1 + cmp hist_size,w_tmp0 + bls get_lit_code + + ///match_length = compare258(next_in - dist, next_in, 258); + sub x_tmp2,next_in,x_dist + compare_258_bytes tmp2,next_in,match_length,tmp0,tmp1 + cmp match_length,3 + bls get_lit_code + + sub x_tmp0,next_in,file_start + ldr literal,[next_in,1] + crc32cw hash,wzr,literal + and hash,hash,hash_mask + add tmp0,tmp0,1 + strh tmp0,[last_seen,x_hash,lsl 1] + //call_print_b hash,dist,last_seen + + ldr literal,[next_in,2] + crc32cw hash,wzr,literal + and hash,hash,hash_mask + add tmp0,tmp0,1 + strh tmp0,[last_seen,x_hash,lsl 1] + + //get_len_code(stream->hufftables, match_length, &code, + // &code_len); + get_len_code hufftables,match_length,code,code_len,tmp0 + + //get_dist_code(stream->hufftables, dist, &code2, &code_len2); + get_dist_code hufftables,dist,code2,code_len2,tmp0,tmp1,tmp2 + + //code |= code2 << code_len; + //code_len += code_len2; + lsl code2,code2,code_len + orr code,code,code2 + add code_len,code_len,code_len2 + + //next_in += match_length; + add next_in,next_in,match_length,uxtw + + //write_bits(&state->bitbuf, code, code_len); + update_bits stream,code,code_len,m_bits,m_bit_count,m_out_buf + + + + cmp next_in,loop_end_cnt + bls main_loop_start + b main_loop_end +get_lit_code: + //get_lit_code(stream->hufftables, literal & 0xFF, &code, &code_len); + and literal,literal,0xff + get_lit_code hufftables,literal,code,code_len + + //next_in++; + add next_in,next_in,1 + + //write_bits(&state->bitbuf, code, code_len); + update_bits stream,code,code_len,m_bits,m_bit_count,m_out_buf + cmp next_in,loop_end_cnt + bls main_loop_start + +main_loop_end: + //update state here + + //load end_of_stream and flush together + ldr w_end_of_stream, [stream, _end_of_stream] + //(stream->end_of_stream || stream->flush != 0) + cbz w_end_of_stream, update_state_exit + mov w_tmp0 , ZSTATE_FLUSH_READ_BUFFER + str w_tmp0, [stream, _internal_state+_state] +update_state_exit: + update_state stream,start_in,next_in,end_in,m_out_buf,m_out_start,tmp0,tmp1 +exit_ret: + pop_stack + ret +exit_save_state: + ldr w_end_of_stream, [stream, _end_of_stream] + cbz w_end_of_stream, exit_ret //(stream->end_of_stream || stream->flush != 0) + mov w_tmp0 , ZSTATE_FLUSH_READ_BUFFER + str w_tmp0, [stream, _internal_state+_state] + b exit_ret + .size isal_deflate_body_aarch64, .-isal_deflate_body_aarch64 diff --git a/src/spdk/isa-l/igzip/aarch64/igzip_deflate_finish_aarch64.S b/src/spdk/isa-l/igzip/aarch64/igzip_deflate_finish_aarch64.S new file mode 100644 index 000000000..671091346 --- /dev/null +++ b/src/spdk/isa-l/igzip/aarch64/igzip_deflate_finish_aarch64.S @@ -0,0 +1,264 @@ +/********************************************************************** + Copyright(c) 2019 Arm Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Arm Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + .arch armv8-a+crc + .text + .align 2 + +#include "lz0a_const_aarch64.h" +#include "data_struct_aarch64.h" +#include "huffman_aarch64.h" +#include "bitbuf2_aarch64.h" +#include "stdmac_aarch64.h" + + +/* +declare Macros +*/ + +.macro declare_generic_reg name:req,reg:req,default:req + \name .req \default\reg + w_\name .req w\reg + x_\name .req x\reg +.endm + +.macro update_state stream:req,start_in:req,next_in:req,end_in:req, \ + m_out_buf:req,m_out_start:req,tmp0:req,tmp1:req + + //m_out_buf=bytes_written + sub x_\m_out_buf,x_\m_out_buf,x_\m_out_start + cmp next_in,start_in + bls skip_has_hist + mov w_\tmp0,1 + strb w_\tmp0,[x_\stream,_internal_state_has_hist] +skip_has_hist: + ldr w_\tmp0,[\stream,_total_in] + ldr x_\m_out_start,[\stream,_next_out] //m_out_start = next_out + + str x_\next_in,[\stream,_next_in] + sub x_\start_in,x_\next_in,x_\start_in + sub x_\end_in,x_\end_in,x_\next_in + add w_\tmp0,w_\tmp0,w_\start_in + stp w_\end_in,w_\tmp0,[\stream,_avail_in] + //next_in=avail_out,start_in=total_out + ldp w_\next_in,w_\start_in,[\stream,_avail_out] + add x_\m_out_start,x_\m_out_start,x_\m_out_buf + str x_\m_out_start,[\stream,_next_out] + add w_\start_in,w_\start_in,w_\m_out_buf + sub w_\next_in,w_\next_in,w_\m_out_buf + stp w_\next_in,w_\start_in,[\stream,_avail_out] +.endm + .global isal_deflate_finish_aarch64 + .arch armv8-a+crc + .type isal_deflate_finish_aarch64, %function +/* + void isal_deflate_finish_aarch64(struct isal_zstream *stream) +*/ + declare_generic_reg stream, 0,x //struct isal_zstream *stream + declare_generic_reg state, 8,x //&stream->state + declare_generic_reg avail_in, 9,w + declare_generic_reg end_of_stream, 10,w //can be used in loop + + declare_generic_reg hash_mask, 11,w + declare_generic_reg match_length, 12,w + declare_generic_reg hufftables, 13,x + + declare_generic_reg m_out_buf, 14,x + declare_generic_reg m_out_start, 15,x + declare_generic_reg m_out_end, 16,x + declare_generic_reg m_bits, 17,x + declare_generic_reg m_bit_count, 18,w + + declare_generic_reg start_in, 19,x + declare_generic_reg end_in, 20,x + declare_generic_reg next_in, 21,x + declare_generic_reg loop_end_cnt, 22,x + + declare_generic_reg literal, 23,w + declare_generic_reg hash, 24,w + declare_generic_reg dist, 25,w + + declare_generic_reg last_seen, 26,x + declare_generic_reg file_start, 27,x + declare_generic_reg hist_size, 28,w + + declare_generic_reg tmp0, 5 ,w + declare_generic_reg tmp1, 6 ,w + declare_generic_reg tmp2, 7 ,w + + declare_generic_reg code, 3,x + declare_generic_reg code_len, 24,x + declare_generic_reg code2, 10,x + declare_generic_reg code_len2, 4,x + + +isal_deflate_finish_aarch64: + //save registers + push_stack + + // set_buf(&state->bitbuf, stream->next_out, stream->avail_out); + ldr w_m_out_end,[stream,_avail_out] + ldr m_out_buf,[stream,_next_out] + add m_out_end,m_out_buf,w_m_out_end,uxtw + sub m_out_end,m_out_end , 8 + mov m_out_start,m_out_buf + stp m_out_buf,m_out_end,[stream, _bitbuf + _internal_state + _m_out_buf] + str m_out_start,[stream, _bitbuf + _internal_state + _m_out_start] + ldr m_bit_count ,[stream,_internal_state_bitbuf_m_bit_count] + ldr m_bits ,[stream,_internal_state_bitbuf_m_bits] + + //init variables + //last_seen=&stream.internal_state.head = _internal_state+_head + add last_seen,stream,65536 + add last_seen,last_seen,_internal_state+_head -65536 + + + //start_in=stream->next_in;next_in=start_in + ldr avail_in, [stream, _avail_in] + ldr start_in,[stream,_next_in] + mov next_in,start_in + add end_in,start_in,avail_in,uxtw //avail_in reg is free now + ldr hufftables,[stream,_hufftables] + cbz avail_in, update_not_full + + + sub loop_end_cnt,end_in,4 //loop end + cmp next_in,loop_end_cnt + + + //file_start = (uint8_t *) ((uintptr_t) stream->next_in - stream->total_in); + ldr w_file_start,[stream,_total_in] + sub file_start,next_in,file_start,uxtw + + //uint32_t hist_size = state->dist_mask; + ldr hist_size,[stream,_internal_state + _dist_mask] + + //uint32_t hash_mask = state->hash_mask; + ldr hash_mask,[stream,_internal_state + _hash_mask] + + bhi main_loop_end +main_loop_start: + //is_full(&state->bitbuf) + cmp m_out_buf,m_out_end + bhi update_state_exit + + ldr literal,[next_in] + crc32cw hash,wzr,literal + and hash,hash,hash_mask + + ///dist = (next_in - file_start - last_seen[hash]) & 0xFFFF; + ldrh w_tmp0,[last_seen,x_hash,lsl 1] //tmp_w last_seen[hash] + sub x_dist,next_in,file_start + //last_seen[hash] = (uint64_t) (next_in - file_start); + strh dist,[last_seen,x_hash,lsl 1] + sub dist,dist,w_tmp0 + and dist,dist,0xffff + + sub w_tmp0,dist,1 + cmp hist_size,w_tmp0 + bls get_lit_code + + /// match_length = compare258(next_in - dist, next_in, 258); + sub x_tmp2,next_in,x_dist + sub x_hash,end_in,next_in + compare_max_258_bytes tmp2,next_in,hash,match_length,tmp0,tmp1 + cmp match_length,3 + bls get_lit_code + + get_len_code hufftables,match_length,code,code_len,tmp0 + get_dist_code hufftables,dist,code2,code_len2,tmp0,tmp1,tmp2 + + //code |= code2 << code_len; + //code_len += code_len2; + lsl code2,code2,code_len + orr code,code,code2 + add code_len,code_len,code_len2 + + //next_in += match_length; + add next_in,next_in,match_length,uxtw + + //write_bits(&state->bitbuf, code, code_len); + update_bits stream,code,code_len,m_bits,m_bit_count,m_out_buf + + cmp next_in,loop_end_cnt + bls main_loop_start + b main_loop_end +get_lit_code: + //get_lit_code(stream->hufftables, literal & 0xFF, &code, &code_len); + and literal,literal,0xff + get_lit_code hufftables,literal,code,code_len + + //next_in++; + add next_in,next_in,1 + + //write_bits(&state->bitbuf, code, code_len); + update_bits stream,code,code_len,m_bits,m_bit_count,m_out_buf + cmp next_in,loop_end_cnt + bls main_loop_start +main_loop_end: + sub loop_end_cnt,end_in,1 + cmp next_in,loop_end_cnt + bhi update_not_full +second_loop_start: + cmp m_out_buf,m_out_end + bhi update_state_exit + ldr literal,[next_in] + and literal,literal,0xff + get_lit_code hufftables,literal,code,code_len + //next_in++; + add next_in,next_in,1 + + //write_bits(&state->bitbuf, code, code_len); + update_bits stream,code,code_len,m_bits,m_bit_count,m_out_buf + cmp next_in,loop_end_cnt + bls second_loop_start + +update_not_full: + cmp m_out_buf,m_out_end + bhi update_state_exit + + mov literal,256 + get_lit_code hufftables,literal,code,code_len + + //write_bits(&state->bitbuf, code, code_len); + update_bits stream,code,code_len,m_bits,m_bit_count,m_out_buf + ldrh w_end_of_stream, [stream, _end_of_stream] + mov w_tmp0,1 + strb w_tmp0,[stream,_internal_state_has_eob] + cmp w_end_of_stream,w_tmp0 + mov w_tmp0, ZSTATE_TRL + mov w_tmp1, ZSTATE_SYNC_FLUSH + csel w_tmp0,w_tmp0,w_tmp1,eq + str w_tmp0, [stream, _internal_state+_state] + +update_state_exit: + update_state stream,start_in,next_in,end_in,m_out_buf,m_out_start,tmp0,tmp1 + pop_stack + ret + + .size isal_deflate_finish_aarch64, .-isal_deflate_finish_aarch64 diff --git a/src/spdk/isa-l/igzip/aarch64/igzip_inflate_multibinary_arm64.S b/src/spdk/isa-l/igzip/aarch64/igzip_inflate_multibinary_arm64.S new file mode 100644 index 000000000..441fd4d5e --- /dev/null +++ b/src/spdk/isa-l/igzip/aarch64/igzip_inflate_multibinary_arm64.S @@ -0,0 +1,32 @@ +/********************************************************************** + Copyright(c) 2019 Arm Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Arm Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#include "aarch64_multibinary.h" + +mbin_interface_base decode_huffman_code_block_stateless,decode_huffman_code_block_stateless_base diff --git a/src/spdk/isa-l/igzip/aarch64/igzip_isal_adler32_neon.S b/src/spdk/isa-l/igzip/aarch64/igzip_isal_adler32_neon.S new file mode 100644 index 000000000..78d23940d --- /dev/null +++ b/src/spdk/isa-l/igzip/aarch64/igzip_isal_adler32_neon.S @@ -0,0 +1,178 @@ +/********************************************************************** + Copyright(c) 2019 Arm Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Arm Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + .arch armv8-a+crypto + .text + .align 3 + +/* +Macros +*/ + +.macro declare_var_vector_reg name:req,reg:req + \name\()_q .req q\reg + \name\()_v .req v\reg + \name\()_s .req s\reg + \name\()_d .req d\reg +.endm + +.macro mod_adler dest:req,tmp:req + umull \tmp\()_x,\dest,const_div1 + lsr \tmp\()_x,\tmp\()_x,47 + msub \dest,\tmp,const_div2,\dest +.endm + +/* + uint32_t adler32_neon(uint32_t adler32, uint8_t * start, uint32_t length); +*/ +/* +Arguements list +*/ + adler32 .req w0 + start .req x1 + length .req x2 + .global adler32_neon + .type adler32_neon, %function +adler32_neon: +/* +local variables +*/ + declare_var_vector_reg factor0 , 6 + declare_var_vector_reg factor1 , 7 + declare_var_vector_reg d0 , 4 + declare_var_vector_reg d1 , 5 + declare_var_vector_reg adacc , 2 + declare_var_vector_reg s2acc , 3 + declare_var_vector_reg zero , 16 + declare_var_vector_reg adler , 17 + declare_var_vector_reg back_d0 , 18 + declare_var_vector_reg back_d1 , 19 + declare_var_vector_reg sum2 , 20 + declare_var_vector_reg tmp2 , 20 + + adler0 .req w4 + adler1 .req w5 + adler0_x .req x4 + adler1_x .req x5 + end .req x0 + tmp .req w8 + tmp_x .req x8 + tmp1_x .req x9 + loop_cnt .req x10 + loop_const .req x11 + const_div1 .req w6 + const_div2 .req w7 + mov const_div1, 32881 + movk const_div1, 0x8007, lsl 16 + mov const_div2, 65521 + and adler0, adler32, 0xffff + lsr adler1, adler32, 16 + + lsr loop_cnt,length,5 + adrp x3,factors + add x3,x3,:lo12:factors + ld1 {factor0_v.16b-factor1_v.16b},[x3] + + add end,start,length + cbz loop_cnt,final_accum32 + ld1 {back_d0_v.16b-back_d1_v.16b},[start] + mov loop_const,173 + + movi v16.4s,0 + + + + +great_than_32: + cmp loop_cnt,173 + csel loop_const,loop_cnt,loop_const,le + mov adacc_v.16b,zero_v.16b + mov s2acc_v.16b,zero_v.16b + ins adacc_v.s[0],adler0 + ins s2acc_v.s[0],adler1 + add tmp_x,start,loop_const,lsl 5 + +accum32_neon: + add start,start,32 + mov d0_v.16b,back_d0_v.16b + mov d1_v.16b,back_d1_v.16b + ld1 {back_d0_v.16b-back_d1_v.16b},[start] + + shl tmp2_v.4s,adacc_v.4s,5 + add s2acc_v.4s,s2acc_v.4s,tmp2_v.4s + + uaddlp adler_v.8h,d0_v.16b + uadalp adler_v.8h,d1_v.16b + uadalp adacc_v.4s,adler_v.8h + + umull sum2_v.8h,factor0_v.8b ,d0_v.8b + umlal2 sum2_v.8h,factor0_v.16b,d0_v.16b + umlal sum2_v.8h,factor1_v.8b ,d1_v.8b + umlal2 sum2_v.8h,factor1_v.16b,d1_v.16b + uadalp s2acc_v.4s,sum2_v.8h + + cmp start,tmp_x + bne accum32_neon + + uaddlv adacc_d,adacc_v.4s + uaddlv s2acc_d,s2acc_v.4s + fmov adler0_x,adacc_d + fmov adler1_x,s2acc_d + + mod_adler adler0,tmp + mod_adler adler1,tmp + sub loop_cnt,loop_cnt,loop_const + cbnz loop_cnt,great_than_32 + +final_accum32: + and length,length,31 + cbz length,end_func + +accum32_body: + cmp start,end + beq end_func + ldrb tmp,[start],1 + add adler0,adler0,tmp + add adler1,adler1,adler0 + b accum32_body + +end_func: + mod_adler adler0,tmp + mod_adler adler1,tmp + orr w0,adler0,adler1,lsl 16 + ret + + .size adler32_neon, .-adler32_neon + .section .rodata.cst16,"aM",@progbits,16 + .align 4 +factors: + .quad 0x191a1b1c1d1e1f20 + .quad 0x1112131415161718 + .quad 0x090a0b0c0d0e0f10 + .quad 0x0102030405060708 + diff --git a/src/spdk/isa-l/igzip/aarch64/igzip_multibinary_aarch64_dispatcher.c b/src/spdk/isa-l/igzip/aarch64/igzip_multibinary_aarch64_dispatcher.c new file mode 100644 index 000000000..968b013a8 --- /dev/null +++ b/src/spdk/isa-l/igzip/aarch64/igzip_multibinary_aarch64_dispatcher.c @@ -0,0 +1,123 @@ +/********************************************************************** + Copyright(c) 2019 Arm Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Arm Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ +#include + +DEFINE_INTERFACE_DISPATCHER(isal_adler32) +{ + unsigned long auxval = getauxval(AT_HWCAP); + if (auxval & HWCAP_ASIMD) + return PROVIDER_INFO(adler32_neon); + + return PROVIDER_BASIC(adler32); + +} + +DEFINE_INTERFACE_DISPATCHER(isal_deflate_body) +{ + unsigned long auxval = getauxval(AT_HWCAP); + + if (auxval & HWCAP_CRC32) + return PROVIDER_INFO(isal_deflate_body_aarch64); + + return PROVIDER_BASIC(isal_deflate_body); + +} + +DEFINE_INTERFACE_DISPATCHER(isal_deflate_finish) +{ + unsigned long auxval = getauxval(AT_HWCAP); + if (auxval & HWCAP_CRC32) + return PROVIDER_INFO(isal_deflate_finish_aarch64); + + return PROVIDER_BASIC(isal_deflate_finish); + +} + +DEFINE_INTERFACE_DISPATCHER(isal_deflate_icf_body_lvl1) +{ + unsigned long auxval = getauxval(AT_HWCAP); + if (auxval & HWCAP_CRC32) + return PROVIDER_INFO(isal_deflate_icf_body_hash_hist_aarch64); + + return PROVIDER_BASIC(isal_deflate_icf_body_hash_hist); +} + +DEFINE_INTERFACE_DISPATCHER(isal_deflate_icf_finish_lvl1) +{ + unsigned long auxval = getauxval(AT_HWCAP); + if (auxval & HWCAP_CRC32) + return PROVIDER_INFO(isal_deflate_icf_finish_hash_hist_aarch64); + + return PROVIDER_BASIC(isal_deflate_icf_finish_hash_hist); +} + +DEFINE_INTERFACE_DISPATCHER(isal_deflate_icf_body_lvl2) +{ + unsigned long auxval = getauxval(AT_HWCAP); + if (auxval & HWCAP_CRC32) + return PROVIDER_INFO(isal_deflate_icf_body_hash_hist_aarch64); + + return PROVIDER_BASIC(isal_deflate_icf_body_hash_hist); +} + +DEFINE_INTERFACE_DISPATCHER(isal_deflate_icf_finish_lvl2) +{ + unsigned long auxval = getauxval(AT_HWCAP); + if (auxval & HWCAP_CRC32) + return PROVIDER_INFO(isal_deflate_icf_finish_hash_hist_aarch64); + + return PROVIDER_BASIC(isal_deflate_icf_finish_hash_hist); +} + +DEFINE_INTERFACE_DISPATCHER(isal_deflate_icf_body_lvl3) +{ + unsigned long auxval = getauxval(AT_HWCAP); + if (auxval & HWCAP_CRC32) + return PROVIDER_INFO(icf_body_hash1_fillgreedy_lazy); + + return PROVIDER_INFO(icf_body_hash1_fillgreedy_lazy); +} + +DEFINE_INTERFACE_DISPATCHER(isal_deflate_icf_finish_lvl3) +{ + unsigned long auxval = getauxval(AT_HWCAP); + if (auxval & HWCAP_CRC32) + return PROVIDER_INFO(isal_deflate_icf_finish_hash_map_base); + + return PROVIDER_BASIC(isal_deflate_icf_finish_hash_map); +} + +DEFINE_INTERFACE_DISPATCHER(isal_update_histogram) +{ + unsigned long auxval = getauxval(AT_HWCAP); + if (auxval & HWCAP_CRC32) + return PROVIDER_INFO(isal_update_histogram_aarch64); + + return PROVIDER_BASIC(isal_update_histogram); +} diff --git a/src/spdk/isa-l/igzip/aarch64/igzip_multibinary_arm64.S b/src/spdk/isa-l/igzip/aarch64/igzip_multibinary_arm64.S new file mode 100644 index 000000000..3d96c731c --- /dev/null +++ b/src/spdk/isa-l/igzip/aarch64/igzip_multibinary_arm64.S @@ -0,0 +1,50 @@ +/********************************************************************** + Copyright(c) 2019 Arm Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Arm Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#include "aarch64_multibinary.h" + + +mbin_interface isal_deflate_icf_body_lvl1 +mbin_interface isal_deflate_icf_body_lvl2 +mbin_interface isal_deflate_icf_body_lvl3 +mbin_interface isal_deflate_icf_finish_lvl1 +mbin_interface isal_deflate_icf_finish_lvl2 +mbin_interface isal_deflate_icf_finish_lvl3 +mbin_interface isal_update_histogram +mbin_interface_base encode_deflate_icf , encode_deflate_icf_base +mbin_interface_base set_long_icf_fg , set_long_icf_fg_base +mbin_interface_base gen_icf_map_lh1 , gen_icf_map_h1_base +mbin_interface_base isal_deflate_hash_lvl0 , isal_deflate_hash_base +mbin_interface_base isal_deflate_hash_lvl1 , isal_deflate_hash_base +mbin_interface_base isal_deflate_hash_lvl2 , isal_deflate_hash_base +mbin_interface_base isal_deflate_hash_lvl3 , isal_deflate_hash_base + +mbin_interface isal_deflate_body +mbin_interface isal_deflate_finish +mbin_interface isal_adler32 diff --git a/src/spdk/isa-l/igzip/aarch64/isal_deflate_icf_body_hash_hist.S b/src/spdk/isa-l/igzip/aarch64/isal_deflate_icf_body_hash_hist.S new file mode 100644 index 000000000..217cc5b73 --- /dev/null +++ b/src/spdk/isa-l/igzip/aarch64/isal_deflate_icf_body_hash_hist.S @@ -0,0 +1,364 @@ +/********************************************************************** + Copyright(c) 2019 Arm Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Arm Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + .arch armv8-a+crc + .text + .align 2 + +#include "lz0a_const_aarch64.h" +#include "data_struct_aarch64.h" +#include "huffman_aarch64.h" +#include "bitbuf2_aarch64.h" +#include "stdmac_aarch64.h" + +/* +declare Macros +*/ +.macro declare_generic_reg name:req,reg:req,default:req + \name .req \default\reg + w_\name .req w\reg + x_\name .req x\reg +.endm + + .global isal_deflate_icf_body_hash_hist_aarch64 + .type isal_deflate_icf_body_hash_hist_aarch64, %function +/* +void isal_deflate_icf_body_hash_hist_base(struct isal_zstream *stream); +*/ + +/* constant */ + +/* offset of struct isal_zstream */ +.equ offset_next_in, 0 +.equ offset_avail_in, 8 +.equ offset_total_in, 12 +.equ offset_next_out, 16 +.equ offset_avail_out, 24 +.equ offset_total_out, 28 +.equ offset_hufftables, 32 +.equ offset_level, 40 +.equ offset_level_buf_size, 44 +.equ offset_level_buf, 48 +.equ offset_end_of_stream, 56 +.equ offset_flush, 58 +.equ offset_gzip_flag, 60 +.equ offset_hist_bits, 62 +.equ offset_state, 64 +.equ offset_state_block_end, 72 +.equ offset_state_has_hist, 135 + +/* offset of struct level_buf */ +.equ offset_encode_tables, 0 +.equ offset_hist, 2176 +.equ offset_hist_d_hist, 2176 +.equ offset_hist_ll_hist, 2296 +.equ offset_deflate_hdr_count, 4348 +.equ offset_deflate_hdr_extra_bits, 4352 +.equ offset_deflate_hdr, 4356 +.equ offset_icf_buf_next, 4688 +.equ offset_icf_buf_avail_out, 4696 +.equ offset_icf_buf_start, 4704 +.equ offset_hash8k, 4712 +.equ offset_hash_hist, 4712 + +/* offset of struct isal_zstate */ +.equ offset_dist_mask, 12 +.equ offset_hash_mask, 16 + +/* macros*/ +.equ ISAL_LOOK_AHEAD, 288 + + /* arguments */ + declare_generic_reg stream, 0,x + declare_generic_reg stream_saved, 11,x + + declare_generic_reg param0, 0,x + declare_generic_reg param1, 1,x + declare_generic_reg param2, 2,x + + /* local varibale */ + declare_generic_reg level_buf, 18,x + declare_generic_reg avail_in, 13,w + declare_generic_reg end_in, 13,x + declare_generic_reg start_in, 19,x + declare_generic_reg next_in, 9,x + declare_generic_reg next_in_iter, 14,x + declare_generic_reg state, 24,x + declare_generic_reg hist_size, 22,w + declare_generic_reg hash_mask, 21,w + declare_generic_reg start_out, 12,x + declare_generic_reg end_out, 12,x + declare_generic_reg next_out, 8,x + declare_generic_reg file_start, 20,x + declare_generic_reg last_seen, 15,x + declare_generic_reg total_in, 25,x + declare_generic_reg NULL_DIST_SYM, 23,w + declare_generic_reg match_length, 3,x + declare_generic_reg dist, 7,x + declare_generic_reg dist_inc, 26,w // dist - 1 + declare_generic_reg literal, 10,x + + declare_generic_reg tmp0, 4,x + declare_generic_reg tmp1, 5,x + +isal_deflate_icf_body_hash_hist_aarch64: + stp x29, x30, [sp, -80]! + add x29, sp, 0 + str x24, [sp, 56] + + ldr avail_in, [stream, offset_avail_in] + cbnz avail_in, .stream_available + + ldr w1, [stream, offset_end_of_stream] // w1 keeps two values of end_of_stream and flush + cbz w1, .done + + add state, stream, offset_state + b .state_flush_read_buffer + + .align 2 +.stream_available: + stp x19, x20, [x29, 16] + stp x21, x22, [x29, 32] + str x23, [x29, 48] + stp x25, x26, [x29, 64] + + ldr level_buf, [stream, offset_level_buf] + add state, stream, offset_state // 64 + mov stream_saved, stream + ldr start_in, [stream, offset_next_in] // 0 + ldr w_total_in, [stream, offset_total_in] + + mov x0, offset_hash_hist + add last_seen, level_buf, x0 + + ldr x0, [level_buf, offset_icf_buf_avail_out] // 4696 + ldr start_out, [level_buf, offset_icf_buf_next] // 4688 + + mov next_in, start_in + and x0, x0, -4 + ldp hist_size, hash_mask, [state, offset_dist_mask] // 12 + add end_in, start_in, x_avail_in, uxtw + mov next_out, start_out + add end_out, start_out, x0 + + add x0, next_in, ISAL_LOOK_AHEAD // 288 + sub file_start, start_in, total_in, uxtw + mov NULL_DIST_SYM, 30 + add next_in_iter, next_in, 1 + cmp end_in, x0 + bls .while_loop_end + + .align 3 +.while_loop: + cmp next_out, end_out + bcs .state_create_hdr + + ldr w_literal, [next_in] + mov w0, w_literal + crc32cw w0, wzr, w0 + + and w0, w0, hash_mask + sub x1, next_in, file_start + lsl x0, x0, 1 + + ldrh w_dist, [last_seen, x0] + strh w1, [last_seen, x0] + sub w1, w1, w_dist + and w_dist, w1, 65535 + + sub dist_inc, w_dist, #1 + cmp dist_inc, hist_size + bcc .dist_vs_hist_size + +.while_latter_part: + and w_literal, w_literal, 255 + mov next_in, next_in_iter + add next_out, next_out, 4 + add x1, level_buf, literal, uxtb 2 + ldr w0, [x1, 2296] + add w0, w0, 1 + str w0, [x1, 2296] + ldrh w0, [next_out, -4] + bfi w0, w_literal, 0, 10 + strh w0, [next_out, -4] + ldr w0, [next_out, -4] + bfi w0, NULL_DIST_SYM, 10, 9 + str w0, [next_out, -4] + ubfx x0, x0, 16, 3 + strh w0, [next_out, -2] + +.while_loop_check: + add x0, next_in, ISAL_LOOK_AHEAD // 288 + add next_in_iter, next_in, 1 + cmp end_in, x0 + bhi .while_loop + b .while_loop_end + + .align 2 +.dist_vs_hist_size: + mov x1, next_in + mov w2, 258 + sub x0, next_in, dist, uxth + compare_258_bytes param0,param1,match_length,tmp0,tmp1 + + and w1, w_match_length, 65535 // 0xffff + cmp w1, 3 + bls .while_latter_part + + ldr w0, [next_in, 1] + mov x4, next_in + add next_in, next_in, x1, uxth + crc32cw w0, wzr, w0 + + and w0, hash_mask, w0 + sub next_in_iter, next_in_iter, file_start + strh w_next_in_iter, [last_seen, x0, lsl 1] + ldr w0, [x4, 2]! + crc32cw w0, wzr, w0 + + and w0, hash_mask, w0 + and w_match_length, w_match_length, 65535 // 0xffff + sub x4, x4, file_start + + // get_len_icf_code + add w_match_length, w_match_length, 254 + // get_dist_icf_code, first part + mov w1, 0 // w1 => dist_extra + strh w4, [last_seen, x0, lsl 1] + cmp w_dist, 2 + ubfiz x0, match_length, 2, 17 + add x0, level_buf, x0 + bhi .compute_dist_icf_code + +.match_length_end: + // handle level_buf->hist + ldr w2, [x0, offset_hist_ll_hist] // 2296, ll_hist + add x4, level_buf, dist_inc, uxtw 2 // d_hist + add next_out, next_out, 4 + add w2, w2, 1 // ll_hist + str w2, [x0, offset_hist_ll_hist] // 2296, ll_hist + ldr w0, [x4, offset_hist_d_hist] // 2176, d_hist + add w0, w0, 1 // d_hist + str w0, [x4, offset_hist_d_hist] // 2176, d_hist + + // write_deflate_icf + ldrh w0, [next_out, -4] + bfi w0, w3, 0, 10 + strh w0, [next_out, -4] + ldr w0, [next_out, -4] + bfi w0, dist_inc, 10, 9 + str w0, [next_out, -4] + lsr w0, w0, 16 + bfi w0, w1, 3, 13 // w1 => dist_extra + strh w0, [next_out, -2] + b .while_loop_check + + .align 2 +// get_dist_icf_code, 2nd part +.compute_dist_icf_code: + clz w1, dist_inc + mov w2, 30 + sub w2, w2, w1 + mov w1, 1 + lsl w1, w1, w2 + sub w1, w1, #1 + and w1, w1, dist_inc + lsr dist_inc, dist_inc, w2 + add dist_inc, dist_inc, w2, lsl 1 + and w1, w1, 8191 + b .match_length_end + +.while_loop_end: + sub x19, next_in, x19 + cmp x19, 0 + ble .skip_igzip_hist2 + + mov w0, 1 + strb w0, [stream_saved, offset_state_has_hist] // 135 + +.skip_igzip_hist2: + add w19, w_total_in, w19 + ldr w0, [stream_saved, offset_end_of_stream] // 56 + sub x12, end_out, next_out + asr x12, x12, 2 // x12 => end_out - next_out + str next_in, [stream_saved] + str w19, [stream_saved, offset_total_in] // 12 + sub next_in, end_in, next_in + str w19, [stream_saved, offset_state_block_end] // 72 + + ldp x25, x26, [x29, 64] + ldr x23, [x29, 48] + ldp x21, x22, [x29, 32] + ldp x19, x20, [x29, 16] + + str w9, [stream_saved, offset_avail_in] // 8 + str next_out, [level_buf, offset_icf_buf_next] // 4688 + str x12, [level_buf, offset_icf_buf_avail_out] // 4696, x12 => end_out - next_out + cbnz w0, .state_flush_read_buffer + b .done + + .align 2 +.state_create_hdr: + mov w0, 2 + str w0, [x24, 20] + sub start_in, next_in, start_in + cmp start_in, 0 + ble .skip_igzip_hist + + mov w0, 1 + strb w0, [stream_saved, offset_state_has_hist] // 135 + +.skip_igzip_hist: + add w_total_in, w_total_in, w19 + sub x12, end_out, next_out + asr x12, x12, 2 // x12 => end_out - next_out + str next_in, [stream_saved] + sub next_in, end_in, next_in + str w_total_in, [stream_saved, offset_total_in] // 12 + str w_total_in, [stream_saved, offset_state_block_end] // 72 + + ldp x25, x26, [x29, 64] + ldr x23, [x29, 48] + ldp x21, x22, [x29, 32] + ldp x19, x20, [x29, 16] + + str w9, [stream_saved, offset_avail_in] // 8 + str next_out, [level_buf, offset_icf_buf_next] // 4688 + str x12, [level_buf, offset_icf_buf_avail_out] // 4696, x12 => end_out - next_out + b .done + +.state_flush_read_buffer: + mov w0, 4 + str w0, [x24, 20] + +.done: + ldr x24, [sp, 56] + ldp x29, x30, [sp], 80 + ret + + .size isal_deflate_icf_body_hash_hist_aarch64, .-isal_deflate_icf_body_hash_hist_aarch64 diff --git a/src/spdk/isa-l/igzip/aarch64/isal_deflate_icf_finish_hash_hist.S b/src/spdk/isa-l/igzip/aarch64/isal_deflate_icf_finish_hash_hist.S new file mode 100644 index 000000000..3e72c8c78 --- /dev/null +++ b/src/spdk/isa-l/igzip/aarch64/isal_deflate_icf_finish_hash_hist.S @@ -0,0 +1,397 @@ +/********************************************************************** + Copyright(c) 2019 Arm Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Arm Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + .arch armv8-a+crc + .text + +#include "lz0a_const_aarch64.h" +#include "data_struct_aarch64.h" +#include "huffman_aarch64.h" +#include "bitbuf2_aarch64.h" +#include "stdmac_aarch64.h" + +/* +declare Macros +*/ +.macro declare_generic_reg name:req,reg:req,default:req + \name .req \default\reg + w_\name .req w\reg + x_\name .req x\reg +.endm + +/* +void isal_deflate_icf_finish_hash_hist_aarch64(struct isal_zstream *stream); +*/ + +/* constant */ + +/* offset of struct isal_zstream */ +.equ offset_next_in, 0 +.equ offset_avail_in, 8 +.equ offset_total_in, 12 +.equ offset_next_out, 16 +.equ offset_avail_out, 24 +.equ offset_total_out, 28 +.equ offset_hufftables, 32 +.equ offset_level, 40 +.equ offset_level_buf_size, 44 +.equ offset_level_buf, 48 +.equ offset_end_of_stream, 56 +.equ offset_flush, 58 +.equ offset_gzip_flag, 60 +.equ offset_hist_bits, 62 +.equ offset_state, 64 +.equ offset_state_block_end, 72 +.equ offset_state_state, 84 +.equ offset_state_has_hist, 135 + +/* offset of struct level_buf */ +.equ offset_encode_tables, 0 +.equ offset_hist, 2176 +.equ offset_hist_d_hist, 2176 +.equ offset_hist_ll_hist, 2296 +.equ offset_deflate_hdr_count, 4348 +.equ offset_deflate_hdr_extra_bits, 4352 +.equ offset_deflate_hdr, 4356 +.equ offset_icf_buf_next, 4688 +.equ offset_icf_buf_avail_out, 4696 +.equ offset_icf_buf_start, 4704 +.equ offset_hash8k, 4712 +.equ offset_hash_hist, 4712 + +/* offset of struct isal_zstate */ +.equ offset_dist_mask, 12 +.equ offset_hash_mask, 16 +.equ offset_state_of_zstate, 20 + +/* macros*/ +.equ ISAL_LOOK_AHEAD, 288 + + /* arguments */ + declare_generic_reg stream, 0,x + + declare_generic_reg param0, 0,x + declare_generic_reg param1, 1,x + declare_generic_reg param2, 2,x + declare_generic_reg param3, 3,x + declare_generic_reg param4, 4,x + declare_generic_reg param5, 5,x + declare_generic_reg param6, 6,x + + /* local variable */ + declare_generic_reg stream_saved, 15,x + declare_generic_reg level_buf, 13,x + declare_generic_reg start_in, 21,x + declare_generic_reg start_out, 22,x + declare_generic_reg state, 23,x + declare_generic_reg end_out, 12,x + declare_generic_reg end_in, 11,x + declare_generic_reg next_in, 8,x + declare_generic_reg next_out, 10,x + declare_generic_reg next_out_iter, 5,x + declare_generic_reg file_start, 18,x + declare_generic_reg last_seen, 14,x + + declare_generic_reg literal_code, 9,w + declare_generic_reg hash_mask, 19,w + declare_generic_reg hist_size, 20,w + declare_generic_reg dist, 7,w + declare_generic_reg dist_inc, 24,w + + declare_generic_reg tmp0, 25,x + declare_generic_reg tmp1, 26,x + declare_generic_reg tmp2, 27,x + declare_generic_reg tmp3, 28,x + + .align 2 + .type write_deflate_icf_constprop, %function +write_deflate_icf_constprop: + ldrh w2, [x0] + mov w3, 30 + bfi w2, w1, 0, 10 + strh w2, [x0] + ldr w1, [x0] + bfi w1, w3, 10, 9 + str w1, [x0] + ubfx x1, x1, 16, 3 + strh w1, [x0, 2] + ret + .size write_deflate_icf_constprop, .-write_deflate_icf_constprop + + .align 2 + .type write_deflate_icf, %function +write_deflate_icf: + ldrh w4, [x0] + bfi w4, w1, 0, 10 + strh w4, [x0] + ldr w1, [x0] + bfi w1, w2, 10, 9 + str w1, [x0] + lsr w1, w1, 16 + bfi w1, w3, 3, 13 + strh w1, [x0, 2] + ret + .size write_deflate_icf, .-write_deflate_icf + + .align 2 + .type update_state, %function +update_state: + sub x7, x2, x1 + ldr x4, [x0, 48] + cmp x7, 0 + ble .L48 + mov w1, 1 + strb w1, [x0, 135] +.L48: + ldr w1, [x0, 12] + sub x6, x6, x5 + str x2, [x0] + sub x3, x3, x2 + add w1, w1, w7 + stp w3, w1, [x0, 8] + str w1, [x0, 72] + asr x6, x6, 2 + str x5, [x4, 4688] + str x6, [x4, 4696] + ret + .size update_state, .-update_state + + .align 2 + .global isal_deflate_icf_finish_hash_hist_aarch64 + .type isal_deflate_icf_finish_hash_hist_aarch64, %function +isal_deflate_icf_finish_hash_hist_aarch64: + ldr w_end_in, [stream, 8] // stream->avail_in + cbz w_end_in, .stream_not_available + + stp x29, x30, [sp, -96]! + add x29, sp, 0 + stp x19, x20, [sp, 16] + stp x21, x22, [sp, 32] + stp x23, x24, [sp, 48] + stp x25, x26, [sp, 64] + stp x27, x28, [sp, 80] + + mov stream_saved, stream + ldr level_buf, [stream, offset_level_buf] // 48 + ldr start_in, [stream, offset_next_in] // 0 + ldr start_out, [level_buf, offset_icf_buf_next] // 4688 + add state, stream, offset_state // 64 + ldr end_out, [level_buf, offset_icf_buf_avail_out] // 4696 + mov next_in, start_in + ldr w_file_start, [stream, offset_total_in] // 12 + mov tmp0, offset_hash_hist // 4712 + add last_seen, level_buf, tmp0 + add end_in, start_in, end_in, uxtw + and end_out, end_out, -4 + mov next_out, start_out + ldp hist_size, hash_mask, [state, offset_dist_mask] // 12 + sub file_start, start_in, file_start + add end_out, start_out, end_out + mov next_out_iter, next_out + + add x0, next_in, 3 + cmp end_in, x0 // x0 <= next_in + 3 + bls .while_first_end + + .p2align 3 +.while_first: + cmp next_out, end_out + bcs .save_and_update_state + ldr literal_code, [next_in] + mov w0, literal_code + crc32cw w0, wzr, w0 + and w0, w0, hash_mask + sub x2, next_in, file_start + lsl x0, x0, 1 + ldrh dist, [last_seen, x0] + strh w2, [last_seen, x0] + sub w2, w2, dist + and dist, w2, 65535 + sub dist_inc, dist, #1 + cmp dist_inc, hist_size + bcs .skip_compare258 + + mov x2, 0 + sub w2, w_end_in, w8 + mov x1, next_in + sub x0, next_in, x7, uxth + + compare_max_258_bytes param0,param1,param2,tmp2,tmp0,tmp1 + mov w0, w_tmp2 + and w2, w0, 65535 + + cmp w2, 3 + bhi .while_first_match_length + +.skip_compare258: + and literal_code, literal_code, 255 // get_lit_icf_code + add next_in, next_in, 1 + mov w1, literal_code + mov x0, next_out + add x_literal_code, level_buf, x_literal_code, uxtb 2 // level_buf->hist.ll_hist + + ldr w_tmp0, [x_literal_code, offset_hist_ll_hist] // 2296 + add w_tmp0, w_tmp0, 1 + str w_tmp0, [x_literal_code, offset_hist_ll_hist] // 2296 + + bl write_deflate_icf_constprop // write_deflate_icf + + add next_out, next_out, 4 +.while_first_check: + add x0, next_in, 3 + mov next_out_iter, next_out + cmp end_in, x0 + bhi .while_first + +.while_first_end: + cmp next_in, end_in + bcs .while_2nd_end + + cmp next_out, end_out + bcc .while_2nd_handle + b .save_and_update_state_2nd + + .p2align 2 +.while_2nd: + cmp end_out, next_out_iter + bls .save_and_update_state_2nd + +.while_2nd_handle: + ldrb w2, [next_in], 1 + mov x0, next_out_iter + add next_out_iter, next_out_iter, 4 + mov w1, w2 + add x2, level_buf, x2, uxtb 2 + + ldr w_tmp0, [x2, offset_hist_ll_hist] // 2296 + add w_tmp0, w_tmp0, 1 + str w_tmp0, [x2, offset_hist_ll_hist] // 2296 + + bl write_deflate_icf_constprop + cmp end_in, next_in + bne .while_2nd + + mov next_in, end_in + b .end_of_stream_check_and_exit + + .p2align 2 +.while_first_match_length: + and w0, w0, 65535 + mov w3, 0 + add w1, w0, 254 // get_len_icf_code + cmp dist, 2 + bhi .compute_dist_icf_code + +.while_first_match_length_end: + ubfiz x_tmp2, x1, 2, 17 + add x_tmp1, level_buf, x24, uxtw 2 + add x_tmp2, level_buf, x_tmp2 + + add next_in, next_in, x2, uxth + mov w2, dist_inc + + ldr w_tmp0, [x_tmp2, offset_hist_ll_hist] // 2296 + add w_tmp0, w_tmp0, 1 + str w_tmp0, [x_tmp2, offset_hist_ll_hist] // 2296 + + mov x0, next_out + ldr w_tmp0, [x_tmp1, offset_hist_d_hist] // 2176 + add w_tmp0, w_tmp0, 1 + str w_tmp0, [x_tmp1, offset_hist_d_hist] // 2176 + + bl write_deflate_icf + add next_out, next_out, 4 + b .while_first_check + +// compute_dist_icf_code + .p2align 2 +.compute_dist_icf_code: + clz w3, dist_inc + mov w0, 30 + sub w0, w0, w3 + + mov w3, 1 + lsl w3, w3, w0 + sub w3, w3, #1 + and w3, w3, dist_inc + lsl w4, w0, 1 + lsr dist_inc, dist_inc, w0 + add dist_inc, dist_inc, w4 + b .while_first_match_length_end + +.while_2nd_end: + beq .end_of_stream_check_and_exit + mov param6, end_out + b .update_state + +.end_of_stream_check_and_exit: + ldr w_tmp0, [stream_saved, offset_end_of_stream] // 56 + cbz w_tmp0, .update_state_2nd + b .save_and_update_state_2nd + + .p2align 3 +.save_and_update_state_2nd: + mov w_tmp0, 2 + str w_tmp0, [state, offset_state_of_zstate] // 20 +.update_state_2nd: + mov param6, end_out + b .update_state + + .p2align 2 +.save_and_update_state: + mov param6, end_out + mov param5, next_out + mov w_tmp0, 2 + str w_tmp0, [state, offset_state_of_zstate] // 20 +.update_state: + mov param4, start_out + mov param1, start_in + mov param3, end_in + mov param2, next_in + mov param0, stream_saved + + ldp x19, x20, [sp, 16] + ldp x21, x22, [sp, 32] + ldp x23, x24, [sp, 48] + ldp x25, x26, [sp, 64] + ldp x27, x28, [sp, 80] + ldp x29, x30, [sp], 96 + + b update_state + + .p2align 2 +.stream_not_available: + ldr w1, [stream, offset_end_of_stream] // 56 + cbz w1, .done + + mov w1, 2 + str w1, [stream, offset_state_state] // 84 +.done: + ret + + .size isal_deflate_icf_finish_hash_hist_aarch64, .-isal_deflate_icf_finish_hash_hist_aarch64 diff --git a/src/spdk/isa-l/igzip/aarch64/isal_update_histogram.S b/src/spdk/isa-l/igzip/aarch64/isal_update_histogram.S new file mode 100644 index 000000000..43b916f8e --- /dev/null +++ b/src/spdk/isa-l/igzip/aarch64/isal_update_histogram.S @@ -0,0 +1,311 @@ +/********************************************************************** + Copyright(c) 2019 Arm Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Arm Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + .arch armv8-a+crc + .text + .align 2 + +#include "lz0a_const_aarch64.h" +#include "data_struct_aarch64.h" +#include "huffman_aarch64.h" +#include "bitbuf2_aarch64.h" +#include "stdmac_aarch64.h" + +/* +declare Macros +*/ + +.macro declare_generic_reg name:req,reg:req,default:req + \name .req \default\reg + w_\name .req w\reg + x_\name .req x\reg +.endm + +.macro convert_dist_to_dist_sym dist:req,tmp0:req,tmp1:req + mov w_\tmp0, w_\dist + mov w_\dist, -1 + cmp w_\tmp0, 32768 + bhi .dist2code_done + sub w_\dist, w_\tmp0, #1 + cmp w_\tmp0, 4 + bls .dist2code_done + clz w_\tmp1, w_\dist + mov w_\tmp0, 30 + sub w_\tmp0, w_\tmp0, w_\tmp1 + lsr w_\dist, w_\dist, w_\tmp0 + add w_\dist, w_\dist, w_\tmp0, lsl 1 +.dist2code_done: +.endm + +.macro convert_length_to_len_sym length:req,length_out:req,tmp0:req + adrp x_\tmp0, .len_to_code_tab_lanchor + add x_\tmp0, x_\tmp0, :lo12:.len_to_code_tab_lanchor + ldr w_\length_out, [x_\tmp0, w_\length, uxtw 2] + add w_\length_out, w_\length_out, 256 +.endm + + .section .rodata + .align 4 +.len_to_code_tab_lanchor = . + 0 + .type len_to_code_tab, %object + .size len_to_code_tab, 1056 +len_to_code_tab: + .word 0x00, 0x00, 0x00 + .word 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 + .word 0x09, 0x09, 0x0a, 0x0a, 0x0b, 0x0b, 0x0c, 0x0c + .word 0x0d, 0x0d, 0x0d, 0x0d, 0x0e, 0x0e, 0x0e, 0x0e + .word 0x0f, 0x0f, 0x0f, 0x0f, 0x10, 0x10, 0x10, 0x10 + .word 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11 + .word 0x12, 0x12, 0x12, 0x12, 0x12, 0x12, 0x12, 0x12 + .word 0x13, 0x13, 0x13, 0x13, 0x13, 0x13, 0x13, 0x13 + .word 0x14, 0x14, 0x14, 0x14, 0x14, 0x14, 0x14, 0x14 + .word 0x15, 0x15, 0x15, 0x15, 0x15, 0x15, 0x15, 0x15 + .word 0x15, 0x15, 0x15, 0x15, 0x15, 0x15, 0x15, 0x15 + .word 0x16, 0x16, 0x16, 0x16, 0x16, 0x16, 0x16, 0x16 + .word 0x16, 0x16, 0x16, 0x16, 0x16, 0x16, 0x16, 0x16 + .word 0x17, 0x17, 0x17, 0x17, 0x17, 0x17, 0x17, 0x17 + .word 0x17, 0x17, 0x17, 0x17, 0x17, 0x17, 0x17, 0x17 + .word 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18 + .word 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18 + .word 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19 + .word 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19 + .word 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19 + .word 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19 + .word 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a + .word 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a + .word 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a + .word 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a + .word 0x1b, 0x1b, 0x1b, 0x1b, 0x1b, 0x1b, 0x1b, 0x1b + .word 0x1b, 0x1b, 0x1b, 0x1b, 0x1b, 0x1b, 0x1b, 0x1b + .word 0x1b, 0x1b, 0x1b, 0x1b, 0x1b, 0x1b, 0x1b, 0x1b + .word 0x1b, 0x1b, 0x1b, 0x1b, 0x1b, 0x1b, 0x1b, 0x1b + .word 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c + .word 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c + .word 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c + .word 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1d + .word 0x00, 0x00, 0x00, 0x00, 0x00 + + .text + .global isal_update_histogram_aarch64 + .arch armv8-a+crc + .type isal_update_histogram_aarch64, %function + +/* +void isal_update_histogram_aarch64(uint8_t * start_stream, int length, + struct isal_huff_histogram *histogram); +*/ + + /* arguments */ + declare_generic_reg start_stream, 0,x + declare_generic_reg length, 1,x + declare_generic_reg histogram, 2,x + + declare_generic_reg param0, 0,x + declare_generic_reg param1, 1,x + declare_generic_reg param2, 2,x + + /* local variable */ + declare_generic_reg start_stream_saved, 10,x + declare_generic_reg histogram_saved, 23,x + declare_generic_reg current, 19,x + declare_generic_reg last_seen, 20,x + declare_generic_reg end_stream, 21,x + declare_generic_reg loop_end_iter, 22,x + declare_generic_reg dist_histogram, 12,x + declare_generic_reg lit_len_histogram, 23,x + declare_generic_reg literal, 8,x + declare_generic_reg next_hash, 9,x + declare_generic_reg end, 4,x + declare_generic_reg dist, 7,x + declare_generic_reg D, 11,w + declare_generic_reg match_length, 3,w + + declare_generic_reg tmp0, 5,w + declare_generic_reg tmp1, 6,w + +/* constant */ +.equ LIT_LEN, 286 +.equ DIST_LEN, 30 + +.equ lit_len_offset, 0 +.equ dist_offset, (8*LIT_LEN) // 2288 +.equ hash_offset, (dist_offset + 8*DIST_LEN) // 2528 +.equ hash_table_size, (8*1024*2) // 16384 + +isal_update_histogram_aarch64: + cmp w_length, 0 + ble .done + + stp x29, x30, [sp, -64]! + add x29, sp, 0 + stp x19, x20, [sp, 16] + stp x21, x22, [sp, 32] + str x23, [sp, 48] + + add last_seen, histogram, hash_offset + add end_stream, start_stream, length, sxtw + mov current, start_stream + sub loop_end_iter, end_stream, #3 + mov histogram_saved, histogram + + mov x0, last_seen + mov w1, 0 + mov x2, hash_table_size + bl memset + + cmp current, loop_end_iter + bcs .loop_end + + mov start_stream_saved, current + add dist_histogram, histogram_saved, dist_offset + mov D, 32766 + b .loop + + .align 2 +.loop_2nd_stream: + and literal, literal, 0xff + mov current, next_hash + cmp loop_end_iter, current + + ldr x0, [lit_len_histogram, literal, lsl 3] + add x0, x0, 1 + str x0, [lit_len_histogram, literal, lsl 3] + bls .loop_end + +.loop: + ldr w_literal, [current] + add next_hash, current, 1 + + mov w0, w_literal + crc32cw w0, wzr, w0 + + ubfiz x0, x0, 1, 13 + sub x2, current, start_stream_saved + ldrh w_dist, [last_seen, x0] + strh w2, [last_seen, x0] + sub w2, w2, w_dist + and w_dist, w2, 65535 + + sub w0, w_dist, #1 + cmp w0, D + bhi .loop_2nd_stream + + sub w2, w_end_stream, w_current + mov x1, current + sub x0, current, dist, uxth + compare_max_258_bytes param0,param1,param2,match_length,tmp0,tmp1 + + cmp match_length, 3 + bls .loop_2nd_stream + + add end, current, 3 + cmp end, loop_end_iter + csel end, end, loop_end_iter, ls + cmp end, next_hash + bls .skip_inner_loop + + .align 3 +.inner_loop: + ldr w0, [next_hash] + crc32cw w0, wzr, w0 + + ubfiz x0, x0, 1, 13 + sub x1, next_hash, start_stream_saved + add next_hash, next_hash, 1 + cmp next_hash, end + strh w1, [last_seen, x0] + bne .inner_loop + +.skip_inner_loop: + convert_dist_to_dist_sym dist, tmp0, tmp1 + uxtw x2, w_dist + ldr x1, [dist_histogram, x2, lsl 3] + add x1, x1, 1 + str x1, [dist_histogram, x2, lsl 3] + + convert_length_to_len_sym match_length,tmp1,tmp0 + uxtw x0, w_tmp1 + ldr x1, [lit_len_histogram, x0, lsl 3] + add x1, x1, 1 + str x1, [lit_len_histogram, x0, lsl 3] + + sub match_length, match_length, #1 + add x3, x3, 1 + add current, current, x3 + cmp loop_end_iter, current + bhi .loop + + .align 3 +// fold the last for loop +.loop_end: + cmp end_stream, current + bls .loop_fold_end + + mov x0, current + ldrb w1, [x0], 1 + cmp end_stream, x0 + ldr x0, [lit_len_histogram, x1, lsl 3] + add x0, x0, 1 + str x0, [lit_len_histogram, x1, lsl 3] + bls .loop_fold_end + + ldrb w1, [current, 1] + add x0, current, 2 + cmp end_stream, x0 + ldr x0, [lit_len_histogram, x1, lsl 3] + add x0, x0, 1 + str x0, [lit_len_histogram, x1, lsl 3] + bls .loop_fold_end + + ldrb w1, [current, 2] + add x0, current, 3 + cmp end_stream, x0 + ldr x0, [lit_len_histogram, x1, lsl 3] + add x0, x0, 1 + str x0, [lit_len_histogram, x1, lsl 3] + bls .loop_fold_end + + ldrb w1, [current, 3] + ldr x0, [lit_len_histogram, x1, lsl 3] + add x0, x0, 1 + str x0, [lit_len_histogram, x1, lsl 3] + +.loop_fold_end: + ldr x0, [lit_len_histogram, (256*8)] + add x0, x0, 1 + str x0, [lit_len_histogram, (256*8)] + + ldr x23, [sp, 48] + ldp x19, x20, [sp, 16] + ldp x21, x22, [sp, 32] + ldp x29, x30, [sp], 64 + ret + .align 2 +.done: + ret + .size isal_update_histogram_aarch64, .-isal_update_histogram_aarch64 diff --git a/src/spdk/isa-l/igzip/aarch64/lz0a_const_aarch64.h b/src/spdk/isa-l/igzip/aarch64/lz0a_const_aarch64.h new file mode 100644 index 000000000..d55ec09dc --- /dev/null +++ b/src/spdk/isa-l/igzip/aarch64/lz0a_const_aarch64.h @@ -0,0 +1,72 @@ +/********************************************************************** + Copyright(c) 2019 Arm Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Arm Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#ifndef __LZ0A_CONST_AARCH64_H__ +#define __LZ0A_CONST_AARCH64_H__ +#include "options_aarch64.h" + +#ifdef __ASSEMBLY__ +.set K , 1024 +.set D , IGZIP_HIST_SIZE // Amount of history +.set LA , 18 * 16 // Max look-ahead, rounded up to 32 byte boundary +.set BSIZE , 2*IGZIP_HIST_SIZE + LA // Nominal buffer size + +/// Constants for stateless compression +#define LAST_BYTES_COUNT 3 // Bytes to prevent reading out of array bounds +#define LA_STATELESS 258 // No round up since no data is copied to a buffer + +.set IGZIP_LVL0_HASH_SIZE , (8 * K) +.set IGZIP_HASH8K_HASH_SIZE , (8 * K) +.set IGZIP_HASH_HIST_HASH_SIZE , IGZIP_HIST_SIZE +.set IGZIP_HASH_MAP_HASH_SIZE , IGZIP_HIST_SIZE + +#define LVL0_HASH_MASK (IGZIP_LVL0_HASH_SIZE - 1) +#define HASH8K_HASH_MASK (IGZIP_HASH8K_HASH_SIZE - 1) +#define HASH_HIST_HASH_MASK (IGZIP_HASH_HIST_HASH_SIZE - 1) +#define HASH_MAP_HASH_MASK (IGZIP_HASH_MAP_HASH_SIZE - 1) + +.set MIN_DEF_MATCH , 3 // Minimum length of a match in deflate +.set SHORTEST_MATCH , 4 + +.set SLOP , 8 + +#define ICF_CODE_BYTES 4 +#define LIT_LEN_BIT_COUNT 10 +#define DIST_LIT_BIT_COUNT 9 + +#define LIT_LEN_MASK ((1 << LIT_LEN_BIT_COUNT) - 1) +#define LIT_DIST_MASK ((1 << DIST_LIT_BIT_COUNT) - 1) + +#define DIST_OFFSET LIT_LEN_BIT_COUNT +#define EXTRA_BITS_OFFSET (DIST_OFFSET + DIST_LIT_BIT_COUNT) +#define LIT (0x1E << DIST_OFFSET) + + +#endif +#endif diff --git a/src/spdk/isa-l/igzip/aarch64/options_aarch64.h b/src/spdk/isa-l/igzip/aarch64/options_aarch64.h new file mode 100644 index 000000000..32db918f3 --- /dev/null +++ b/src/spdk/isa-l/igzip/aarch64/options_aarch64.h @@ -0,0 +1,71 @@ +/********************************************************************** + Copyright(c) 2019 Arm Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Arm Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#ifndef __OPTIONS_AARCH64_H__ +#define __OPTIONS_AARCH64_H__ + + +#ifdef __ASSEMBLY__ + +/// Options:dir +/// m - reschedule mem reads +/// e b - bitbuff style +/// t s x - compare style +/// h - limit hash updates +/// l - use longer huffman table +/// f - fix cache read + +#ifndef IGZIP_HIST_SIZE +#define IGZIP_HIST_SIZE (32 * 1024) +#endif + +#if (IGZIP_HIST_SIZE > (32 * 1024)) +#undef IGZIP_HIST_SIZE +#define IGZIP_HIST_SIZE (32 * 1024) +#endif + +#ifdef LONGER_HUFFTABLE +#if (IGZIP_HIST_SIZE > 8 * 1024) +#undef IGZIP_HIST_SIZE +#define IGZIP_HIST_SIZE (8 * 1024) +#endif +#endif + +/// (h) limit hash update +#define LIMIT_HASH_UPDATE + +/// (f) fix cache read problem +#define FIX_CACHE_READ + +#define ISAL_DEF_MAX_HDR_SIZE 328 + + + +#endif +#endif diff --git a/src/spdk/isa-l/igzip/aarch64/stdmac_aarch64.h b/src/spdk/isa-l/igzip/aarch64/stdmac_aarch64.h new file mode 100644 index 000000000..39afbc640 --- /dev/null +++ b/src/spdk/isa-l/igzip/aarch64/stdmac_aarch64.h @@ -0,0 +1,57 @@ +/********************************************************************** + Copyright(c) 2019 Arm Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Arm Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#ifndef __STDMAC_AARCH64_H__ +#define __STDMAC_AARCH64_H__ + +#ifdef __ASSEMBLY__ + +#define DEBUG_STACK 144 + +.macro push_stack + stp x29, x30,[sp,0-DEBUG_STACK]! + mov x29, sp + stp x19, x20, [sp, 16] + stp x21, x22, [sp, 32] + stp x23, x24, [sp, 48] + stp x25, x26, [sp, 64] + stp x27, x28, [sp, 80] +.endm +.macro pop_stack + ldp x19, x20, [sp, 16] + ldp x21, x22, [sp, 32] + ldp x23, x24, [sp, 48] + ldp x25, x26, [sp, 64] + ldp x27, x28, [sp, 80] + + ldp x29, x30, [sp], DEBUG_STACK +.endm + +#endif +#endif diff --git a/src/spdk/isa-l/igzip/adler32_avx2_4.asm b/src/spdk/isa-l/igzip/adler32_avx2_4.asm new file mode 100644 index 000000000..8f9d6d507 --- /dev/null +++ b/src/spdk/isa-l/igzip/adler32_avx2_4.asm @@ -0,0 +1,292 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2017 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +; uint32_t adler32_avx2(uint32_t init, const unsigned char *buf, uint64_t len) + +%define LIMIT 5552 +%define BASE 0xFFF1 ; 65521 + +%define CHUNKSIZE 16 +%define CHUNKSIZE_M1 (CHUNKSIZE-1) + +%include "reg_sizes.asm" + +default rel +[bits 64] + +; need to keep free: eax, ecx, edx + +%ifidn __OUTPUT_FORMAT__, elf64 + %define arg1 rdi + %define arg2 rsi + %define arg3 rdx + + %define init_d edi + %define data r9 + %define size r10 + %define s r11 + %define a_d r12d + %define b_d r8d + %define end r13 + + %define func(x) x: + %macro FUNC_SAVE 0 + push r12 + push r13 + %endmacro + %macro FUNC_RESTORE 0 + pop r13 + pop r12 + %endmacro +%endif + +%ifidn __OUTPUT_FORMAT__, win64 + %define arg1 rcx + %define arg2 rdx + %define arg3 r8 + + %define init_d r12d + %define data r9 + %define size r10 + %define s r11 + %define a_d esi + %define b_d edi + %define end r13 + + %define stack_size 2*16 + 5*8 ; must be an odd multiple of 8 + %define arg(x) [rsp + stack_size + PS + PS*x] + %define func(x) proc_frame x + %macro FUNC_SAVE 0 + alloc_stack stack_size + vmovdqa [rsp + 0*16], xmm6 + vmovdqa [rsp + 1*16], xmm7 + save_reg rdi, 2*16 + 0*8 + save_reg rsi, 2*16 + 1*8 + save_reg r12, 2*16 + 2*8 + save_reg r13, 2*16 + 3*8 + end_prolog + mov init_d, ecx ; initalize init_d from arg1 to keep ecx free + %endmacro + + %macro FUNC_RESTORE 0 + vmovdqa xmm6, [rsp + 0*16] + vmovdqa xmm7, [rsp + 1*16] + mov rdi, [rsp + 2*16 + 0*8] + mov rsi, [rsp + 2*16 + 1*8] + mov r12, [rsp + 2*16 + 2*8] + mov r13, [rsp + 2*16 + 3*8] + add rsp, stack_size + %endmacro +%endif + +%define ya ymm0 +%define yb ymm1 +%define ydata0 ymm2 +%define ydata1 ymm3 +%define ysa ymm4 +%define ydata ysa +%define ytmp0 ydata0 +%define ytmp1 ydata1 +%define ytmp2 ymm5 +%define xa xmm0 +%define xb xmm1 +%define xtmp0 xmm2 +%define xtmp1 xmm3 +%define xsa xmm4 +%define xtmp2 xmm5 +%define yshuf0 ymm6 +%define yshuf1 ymm7 + + +global adler32_avx2_4:ISAL_SYM_TYPE_FUNCTION +func(adler32_avx2_4) + FUNC_SAVE + + vmovdqa yshuf0, [SHUF0] + vmovdqa yshuf1, [SHUF1] + + mov data, arg2 + mov size, arg3 + + mov b_d, init_d + shr b_d, 16 + and init_d, 0xFFFF + cmp size, 32 + jb .lt64 + vmovd xa, init_d + vpxor yb, yb, yb +.sloop1: + mov s, LIMIT + cmp s, size + cmova s, size ; s = min(size, LIMIT) + lea end, [data + s - CHUNKSIZE_M1] + cmp data, end + jae .skip_loop_1a +align 32 +.sloop1a: + ; do CHUNKSIZE adds + vbroadcastf128 ydata, [data] + add data, CHUNKSIZE + vpshufb ydata0, ydata, yshuf0 + vpaddd ya, ya, ydata0 + vpaddd yb, yb, ya + vpshufb ydata1, ydata, yshuf1 + vpaddd ya, ya, ydata1 + vpaddd yb, yb, ya + cmp data, end + jb .sloop1a + +.skip_loop_1a: + add end, CHUNKSIZE_M1 + + test s, CHUNKSIZE_M1 + jnz .do_final + + ; either we're done, or we just did LIMIT + sub size, s + + ; reduce + vpslld yb, 3 ; b is scaled by 8 + vpmulld ysa, ya, [A_SCALE] ; scaled a + + ; compute horizontal sums of ya, yb, ysa + vextracti128 xtmp0, ya, 1 + vextracti128 xtmp1, yb, 1 + vextracti128 xtmp2, ysa, 1 + vpaddd xa, xa, xtmp0 + vpaddd xb, xb, xtmp1 + vpaddd xsa, xsa, xtmp2 + vphaddd xa, xa, xa + vphaddd xb, xb, xb + vphaddd xsa, xsa, xsa + vphaddd xa, xa, xa + vphaddd xb, xb, xb + vphaddd xsa, xsa, xsa + + vmovd eax, xa + xor edx, edx + mov ecx, BASE + div ecx ; divide edx:eax by ecx, quot->eax, rem->edx + mov a_d, edx + + vpsubd xb, xb, xsa + vmovd eax, xb + add eax, b_d + xor edx, edx + mov ecx, BASE + div ecx ; divide edx:eax by ecx, quot->eax, rem->edx + mov b_d, edx + + test size, size + jz .finish + + ; continue loop + vmovd xa, a_d + vpxor yb, yb + jmp .sloop1 + +.finish: + mov eax, b_d + shl eax, 16 + or eax, a_d + jmp .end + +.lt64: + mov a_d, init_d + lea end, [data + size] + test size, size + jnz .final_loop + jmp .zero_size + + ; handle remaining 1...15 bytes +.do_final: + ; reduce + vpslld yb, 3 ; b is scaled by 8 + vpmulld ysa, ya, [A_SCALE] ; scaled a + + vextracti128 xtmp0, ya, 1 + vextracti128 xtmp1, yb, 1 + vextracti128 xtmp2, ysa, 1 + vpaddd xa, xa, xtmp0 + vpaddd xb, xb, xtmp1 + vpaddd xsa, xsa, xtmp2 + vphaddd xa, xa, xa + vphaddd xb, xb, xb + vphaddd xsa, xsa, xsa + vphaddd xa, xa, xa + vphaddd xb, xb, xb + vphaddd xsa, xsa, xsa + vpsubd xb, xb, xsa + + vmovd a_d, xa + vmovd eax, xb + add b_d, eax + +align 32 +.final_loop: + movzx eax, byte[data] + add a_d, eax + inc data + add b_d, a_d + cmp data, end + jb .final_loop + +.zero_size: + mov eax, a_d + xor edx, edx + mov ecx, BASE + div ecx ; divide edx:eax by ecx, quot->eax, rem->edx + mov a_d, edx + + mov eax, b_d + xor edx, edx + mov ecx, BASE + div ecx ; divide edx:eax by ecx, quot->eax, rem->edx + shl edx, 16 + or edx, a_d + mov eax, edx + +.end: + FUNC_RESTORE + ret + +endproc_frame + +section .data +align 32 +A_SCALE: + dq 0x0000000100000000, 0x0000000300000002 + dq 0x0000000500000004, 0x0000000700000006 +SHUF0: + dq 0xFFFFFF01FFFFFF00, 0xFFFFFF03FFFFFF02 + dq 0xFFFFFF05FFFFFF04, 0xFFFFFF07FFFFFF06 +SHUF1: + dq 0xFFFFFF09FFFFFF08, 0xFFFFFF0BFFFFFF0A + dq 0xFFFFFF0DFFFFFF0C, 0xFFFFFF0FFFFFFF0E + diff --git a/src/spdk/isa-l/igzip/adler32_base.c b/src/spdk/isa-l/igzip/adler32_base.c new file mode 100644 index 000000000..034b71a41 --- /dev/null +++ b/src/spdk/isa-l/igzip/adler32_base.c @@ -0,0 +1,63 @@ +/********************************************************************** + Copyright(c) 2011-2017 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#include +#include "igzip_checksums.h" + +uint32_t adler32_base(uint32_t adler32, uint8_t * start, uint32_t length) +{ + uint8_t *end, *next = start; + uint64_t A, B; + + A = adler32 & 0xffff; + B = adler32 >> 16; + + while (length > MAX_ADLER_BUF) { + end = next + MAX_ADLER_BUF; + for (; next < end; next++) { + A += *next; + B += A; + } + + A = A % ADLER_MOD; + B = B % ADLER_MOD; + length -= MAX_ADLER_BUF; + } + + end = next + length; + for (; next < end; next++) { + A += *next; + B += A; + } + + A = A % ADLER_MOD; + B = B % ADLER_MOD; + + return B << 16 | A; +} diff --git a/src/spdk/isa-l/igzip/adler32_perf.c b/src/spdk/isa-l/igzip/adler32_perf.c new file mode 100644 index 000000000..055e0725f --- /dev/null +++ b/src/spdk/isa-l/igzip/adler32_perf.c @@ -0,0 +1,72 @@ +/********************************************************************** + Copyright(c) 2011-2019 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#include +#include +#include +#include +#include "igzip_lib.h" +#include "test.h" + +//#define CACHED_TEST +#ifdef CACHED_TEST +// Cached test, loop many times over small dataset +#define TEST_LEN 8*1024 +#define TEST_TYPE_STR "_warm" +#else +// Uncached test. Pull from large mem base. +#define GT_L3_CACHE 32*1024*1024 /* some number > last level cache */ +#define TEST_LEN (2 * GT_L3_CACHE) +#define TEST_TYPE_STR "_cold" +#endif + +#ifndef TEST_SEED +#define TEST_SEED 0x1234 +#endif + +int main(int argc, char *argv[]) +{ + void *buf; + uint32_t checksum = 0; + struct perf start; + + printf("adler32_perf:\n"); + + if (posix_memalign(&buf, 1024, TEST_LEN)) { + printf("alloc error: Fail"); + return -1; + } + memset(buf, 0, TEST_LEN); + + BENCHMARK(&start, BENCHMARK_TIME, checksum |= isal_adler32(TEST_SEED, buf, TEST_LEN)); + printf("adler32" TEST_TYPE_STR ": "); + perf_print(start, (long long)TEST_LEN); + + return 0; +} diff --git a/src/spdk/isa-l/igzip/adler32_sse.asm b/src/spdk/isa-l/igzip/adler32_sse.asm new file mode 100644 index 000000000..83f577d24 --- /dev/null +++ b/src/spdk/isa-l/igzip/adler32_sse.asm @@ -0,0 +1,249 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2017 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +; uint32_t adler32_avx2(uint32_t init, const unsigned char *buf, uint64_t len) + +%define LIMIT 5552 +%define BASE 0xFFF1 ; 65521 + +%include "reg_sizes.asm" + +default rel +[bits 64] + +; need to keep free: eax, ecx, edx + +%ifidn __OUTPUT_FORMAT__, elf64 + %define arg1 rdi + %define arg2 rsi + %define arg3 rdx + + %define init_d edi + %define data r9 + %define size r10 + %define s r11 + %define a_d r12d + %define b_d r8d + %define end r13 + + %define func(x) x: + %macro FUNC_SAVE 0 + push r12 + push r13 + %endmacro +%macro FUNC_RESTORE 0 + pop r13 + pop r12 + %endmacro +%endif + + +%ifidn __OUTPUT_FORMAT__, win64 + %define arg1 rcx + %define arg2 rdx + %define arg3 r8 + + %define init_d r12d + %define data r9 + %define size r10 + %define s r11 + %define a_d esi + %define b_d edi + %define end r13 + + %define stack_size 5*8 ; must be an odd multiple of 8 + %define func(x) proc_frame x + %macro FUNC_SAVE 0 + alloc_stack stack_size + save_reg rdi, 0*8 + save_reg rsi, 1*8 + save_reg r12, 2*8 + save_reg r13, 3*8 + end_prolog + mov init_d, ecx ; initalize init_d from arg1 to keep ecx free + %endmacro + + %macro FUNC_RESTORE 0 + mov rdi, [rsp + 0*8] + mov rsi, [rsp + 1*8] + mov r12, [rsp + 2*8] + mov r13, [rsp + 3*8] + add rsp, stack_size + %endmacro +%endif + +%define xa xmm0 +%define xb xmm1 +%define xdata0 xmm2 +%define xdata1 xmm3 +%define xsa xmm4 + +global adler32_sse:ISAL_SYM_TYPE_FUNCTION +func(adler32_sse) + FUNC_SAVE + + mov data, arg2 + mov size, arg3 + + mov b_d, init_d + shr b_d, 16 + and init_d, 0xFFFF + cmp size, 32 + jb .lt64 + movd xa, init_d + pxor xb, xb +.sloop1: + mov s, LIMIT + cmp s, size + cmova s, size ; s = min(size, LIMIT) + lea end, [data + s - 7] + cmp data, end + jae .skip_loop_1a +align 32 +.sloop1a: + ; do 8 adds + pmovzxbd xdata0, [data] + pmovzxbd xdata1, [data + 4] + add data, 8 + paddd xa, xdata0 + paddd xb, xa + paddd xa, xdata1 + paddd xb, xa + cmp data, end + jb .sloop1a + +.skip_loop_1a: + add end, 7 + + test s, 7 + jnz .do_final + + ; either we're done, or we just did LIMIT + sub size, s + + ; reduce + pslld xb, 2 ; b is scaled by 4 + movdqa xsa, xa ; scaled a + pmulld xsa, [A_SCALE] + + phaddd xa, xa + phaddd xb, xb + phaddd xsa, xsa + phaddd xa, xa + phaddd xb, xb + phaddd xsa, xsa + + movd eax, xa + xor edx, edx + mov ecx, BASE + div ecx ; divide edx:eax by ecx, quot->eax, rem->edx + mov a_d, edx + + psubd xb, xsa + movd eax, xb + add eax, b_d + xor edx, edx + mov ecx, BASE + div ecx ; divide edx:eax by ecx, quot->eax, rem->edx + mov b_d, edx + + test size, size + jz .finish + + ; continue loop + movd xa, a_d + pxor xb, xb + jmp .sloop1 + +.finish: + mov eax, b_d + shl eax, 16 + or eax, a_d + jmp .end + +.lt64: + mov a_d, init_d + lea end, [data + size] + test size, size + jnz .final_loop + jmp .zero_size + + ; handle remaining 1...15 bytes +.do_final: + ; reduce + pslld xb, 2 ; b is scaled by 4 + movdqa xsa, xa ; scaled a + pmulld xsa, [A_SCALE] + + phaddd xa, xa + phaddd xb, xb + phaddd xsa, xsa + phaddd xa, xa + phaddd xb, xb + phaddd xsa, xsa + psubd xb, xsa + + movd a_d, xa + movd eax, xb + add b_d, eax + +align 32 +.final_loop: + movzx eax, byte[data] + add a_d, eax + inc data + add b_d, a_d + cmp data, end + jb .final_loop + +.zero_size: + mov eax, a_d + xor edx, edx + mov ecx, BASE + div ecx ; divide edx:eax by ecx, quot->eax, rem->edx + mov a_d, edx + + mov eax, b_d + xor edx, edx + mov ecx, BASE + div ecx ; divide edx:eax by ecx, quot->eax, rem->edx + shl edx, 16 + or edx, a_d + mov eax, edx + +.end: + FUNC_RESTORE + ret + +endproc_frame + +section .data +align 32 +A_SCALE: + dq 0x0000000100000000, 0x0000000300000002 diff --git a/src/spdk/isa-l/igzip/bitbuf2.asm b/src/spdk/isa-l/igzip/bitbuf2.asm new file mode 100644 index 000000000..71493825e --- /dev/null +++ b/src/spdk/isa-l/igzip/bitbuf2.asm @@ -0,0 +1,64 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2016 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +%include "options.asm" +%include "stdmac.asm" + +; Assumes m_out_buf is a register +; Clobbers RCX +; code is clobbered +; write_bits_always m_bits, m_bit_count, code, count, m_out_buf +%macro write_bits 5 +%define %%m_bits %1 +%define %%m_bit_count %2 +%define %%code %3 +%define %%count %4 +%define %%m_out_buf %5 + + SHLX %%code, %%code, %%m_bit_count + + or %%m_bits, %%code + add %%m_bit_count, %%count + + mov [%%m_out_buf], %%m_bits + mov rcx, %%m_bit_count + shr rcx, 3 ; rcx = bytes + add %%m_out_buf, rcx + shl rcx, 3 ; rcx = bits + and %%m_bit_count, 0x7 + + SHRX %%m_bits, %%m_bits, rcx +%endm + +%macro write_dword 2 +%define %%data %1d +%define %%addr %2 + mov [%%addr], %%data + add %%addr, 4 +%endm diff --git a/src/spdk/isa-l/igzip/bitbuf2.h b/src/spdk/isa-l/igzip/bitbuf2.h new file mode 100644 index 000000000..51bd752d0 --- /dev/null +++ b/src/spdk/isa-l/igzip/bitbuf2.h @@ -0,0 +1,130 @@ +/********************************************************************** + Copyright(c) 2011-2016 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ +#ifndef BITBUF2_H +#define BITBUF2_H + +#include "igzip_lib.h" +#include "unaligned.h" + +#ifdef _MSC_VER +#define inline __inline +#endif + + +/* MAX_BITBUF_BIT WRITE is the maximum number of bits than can be safely written + * by consecutive calls of write_bits. Note this assumes the bitbuf is in a + * state that is possible at the exit of write_bits */ +#define MAX_BITBUF_BIT_WRITE 56 + +static inline void init(struct BitBuf2 *me) +{ + me->m_bits = 0; + me->m_bit_count = 0; +} + +static inline void set_buf(struct BitBuf2 *me, unsigned char *buf, unsigned int len) +{ + unsigned int slop = 8; + me->m_out_buf = me->m_out_start = buf; + me->m_out_end = buf + len - slop; +} + +static inline int is_full(struct BitBuf2 *me) +{ + return (me->m_out_buf > me->m_out_end); +} + +static inline uint8_t * buffer_ptr(struct BitBuf2 *me) +{ + return me->m_out_buf; +} + +static inline uint32_t buffer_used(struct BitBuf2 *me) +{ + return (uint32_t)(me->m_out_buf - me->m_out_start); +} + +static inline uint32_t buffer_bits_used(struct BitBuf2 *me) +{ + return (8 * (uint32_t)(me->m_out_buf - me->m_out_start) + me->m_bit_count); +} + +static inline void flush_bits(struct BitBuf2 *me) +{ + uint32_t bits; + store_u64(me->m_out_buf, me->m_bits); + bits = me->m_bit_count & ~7; + me->m_bit_count -= bits; + me->m_out_buf += bits/8; + me->m_bits >>= bits; + +} + +/* Can write up to 8 bytes to output buffer */ +static inline void flush(struct BitBuf2 *me) +{ + uint32_t bytes; + if (me->m_bit_count) { + store_u64(me->m_out_buf, me->m_bits); + bytes = (me->m_bit_count + 7) / 8; + me->m_out_buf += bytes; + } + me->m_bits = 0; + me->m_bit_count = 0; +} + +static inline void check_space(struct BitBuf2 *me, uint32_t num_bits) +{ + /* Checks if bitbuf has num_bits extra space and flushes the bytes in + * the bitbuf if it doesn't. */ + if (63 - me->m_bit_count < num_bits) + flush_bits(me); +} + +static inline void write_bits_unsafe(struct BitBuf2 *me, uint64_t code, uint32_t count) +{ + me->m_bits |= code << me->m_bit_count; + me->m_bit_count += count; +} + +static inline void write_bits(struct BitBuf2 *me, uint64_t code, uint32_t count) +{ /* Assumes there is space to fit code into m_bits. */ + me->m_bits |= code << me->m_bit_count; + me->m_bit_count += count; + flush_bits(me); +} + +static inline void write_bits_flush(struct BitBuf2 *me, uint64_t code, uint32_t count) +{ /* Assumes there is space to fit code into m_bits. */ + me->m_bits |= code << me->m_bit_count; + me->m_bit_count += count; + flush(me); +} + +#endif //BITBUF2_H diff --git a/src/spdk/isa-l/igzip/checksum32_funcs_test.c b/src/spdk/isa-l/igzip/checksum32_funcs_test.c new file mode 100644 index 000000000..cbb5d1bf5 --- /dev/null +++ b/src/spdk/isa-l/igzip/checksum32_funcs_test.c @@ -0,0 +1,308 @@ +/********************************************************************** + Copyright(c) 2011-2018 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#include +#include +#include +#include +#include "igzip_checksums.h" +#include "checksum_test_ref.h" +#include "types.h" + +#ifndef TEST_SEED +# define TEST_SEED 0x1234 +#endif + +#define MAX_BUF 512 +#define TEST_SIZE 20 + +typedef uint32_t(*checksum32_func_t) (uint32_t, const unsigned char *, uint64_t); + +typedef struct func_case { + char *note; + checksum32_func_t checksum32_func_call; + checksum32_func_t checksum32_ref_call; +} func_case_t; + +func_case_t test_funcs[] = { + {"checksum32_adler", isal_adler32, adler_ref}, +}; + +// Generates pseudo-random data + +void rand_buffer(unsigned char *buf, long buffer_size) +{ + long i; + for (i = 0; i < buffer_size; i++) + buf[i] = rand(); +} + +// Test cases +int zeros_test(func_case_t * test_func); +int simple_pattern_test(func_case_t * test_func); +int seeds_sizes_test(func_case_t * test_func); +int eob_test(func_case_t * test_func); +int update_test(func_case_t * test_func); +int update_over_mod_test(func_case_t * test_func); + +int verbose = 0; +void *buf_alloc = NULL; + +int main(int argc, char *argv[]) +{ + int fail = 0, fail_case; + int i, ret; + func_case_t *test_func; + + verbose = argc - 1; + + // Align to MAX_BUF boundary + ret = posix_memalign(&buf_alloc, MAX_BUF, MAX_BUF * TEST_SIZE); + if (ret) { + printf("alloc error: Fail"); + return -1; + } + srand(TEST_SEED); + printf("CHECKSUM32 Tests seed=0x%x\n", TEST_SEED); + + for (i = 0; i < sizeof(test_funcs) / sizeof(test_funcs[0]); i++) { + fail_case = 0; + test_func = &test_funcs[i]; + + printf("Test %s ", test_func->note); + fail_case += zeros_test(test_func); + fail_case += simple_pattern_test(test_func); + fail_case += seeds_sizes_test(test_func); + fail_case += eob_test(test_func); + fail_case += update_test(test_func); + fail_case += update_over_mod_test(test_func); + printf("Test %s done: %s\n", test_func->note, fail_case ? "Fail" : "Pass"); + + if (fail_case) { + printf("\n%s Failed %d tests\n", test_func->note, fail_case); + fail++; + } + } + + printf("CHECKSUM32 Tests all done: %s\n", fail ? "Fail" : "Pass"); + + return fail; +} + +// Test of all zeros +int zeros_test(func_case_t * test_func) +{ + uint32_t c_dut, c_ref; + int fail = 0; + unsigned char *buf = NULL; + + buf = (unsigned char *)buf_alloc; + memset(buf, 0, MAX_BUF * 10); + c_dut = test_func->checksum32_func_call(TEST_SEED, buf, MAX_BUF * 10); + c_ref = test_func->checksum32_ref_call(TEST_SEED, buf, MAX_BUF * 10); + + if (c_dut != c_ref) { + fail++; + printf("\n opt ref\n"); + printf(" ------ ------\n"); + printf("checksum zero = 0x%8x 0x%8x \n", c_dut, c_ref); + } else + printf("."); + + return fail; +} + +// Another simple test pattern +int simple_pattern_test(func_case_t * test_func) +{ + uint32_t c_dut, c_ref; + int fail = 0; + unsigned char *buf = NULL; + + buf = (unsigned char *)buf_alloc; + memset(buf, 0x8a, MAX_BUF); + c_dut = test_func->checksum32_func_call(TEST_SEED, buf, MAX_BUF); + c_ref = test_func->checksum32_ref_call(TEST_SEED, buf, MAX_BUF); + if (c_dut != c_ref) + fail++; + if (verbose) + printf("checksum all 8a = 0x%8x 0x%8x\n", c_dut, c_ref); + else + printf("."); + + return fail; +} + +int seeds_sizes_test(func_case_t * test_func) +{ + uint32_t c_dut, c_ref; + int fail = 0; + int i; + uint32_t r, s; + unsigned char *buf = NULL; + + // Do a few random tests + buf = (unsigned char *)buf_alloc; //reset buf + r = rand(); + rand_buffer(buf, MAX_BUF * TEST_SIZE); + + for (i = 0; i < TEST_SIZE; i++) { + c_dut = test_func->checksum32_func_call(r, buf, MAX_BUF); + c_ref = test_func->checksum32_ref_call(r, buf, MAX_BUF); + if (c_dut != c_ref) + fail++; + if (verbose) + printf("checksum rand%3d = 0x%8x 0x%8x\n", i, c_dut, c_ref); + else + printf("."); + buf += MAX_BUF; + } + + // Do a few random sizes + buf = (unsigned char *)buf_alloc; //reset buf + r = rand(); + + for (i = MAX_BUF; i >= 0; i--) { + c_dut = test_func->checksum32_func_call(r, buf, i); + c_ref = test_func->checksum32_ref_call(r, buf, i); + if (c_dut != c_ref) { + fail++; + printf("fail random size%i 0x%8x 0x%8x\n", i, c_dut, c_ref); + } else + printf("."); + } + + // Try different seeds + for (s = 0; s < 20; s++) { + buf = (unsigned char *)buf_alloc; //reset buf + + r = rand(); // just to get a new seed + rand_buffer(buf, MAX_BUF * TEST_SIZE); // new pseudo-rand data + + if (verbose) + printf("seed = 0x%x\n", r); + + for (i = 0; i < TEST_SIZE; i++) { + c_dut = test_func->checksum32_func_call(r, buf, MAX_BUF); + c_ref = test_func->checksum32_ref_call(r, buf, MAX_BUF); + if (c_dut != c_ref) + fail++; + if (verbose) + printf("checksum rand%3d = 0x%8x 0x%8x\n", i, c_dut, c_ref); + else + printf("."); + buf += MAX_BUF; + } + } + + return fail; +} + +// Run tests at end of buffer +int eob_test(func_case_t * test_func) +{ + uint32_t c_dut, c_ref; + int fail = 0; + int i; + unsigned char *buf = NULL; + + buf = (unsigned char *)buf_alloc; //reset buf + buf = buf + ((MAX_BUF - 1) * TEST_SIZE); //Line up TEST_SIZE from end + for (i = 0; i < TEST_SIZE; i++) { + c_dut = test_func->checksum32_func_call(TEST_SEED, buf + i, TEST_SIZE - i); + c_ref = test_func->checksum32_ref_call(TEST_SEED, buf + i, TEST_SIZE - i); + if (c_dut != c_ref) + fail++; + if (verbose) + printf("checksum eob rand%3d = 0x%8x 0x%8x\n", i, c_dut, c_ref); + else + printf("."); + } + + return fail; +} + +int update_test(func_case_t * test_func) +{ + uint32_t c_dut, c_ref; + int fail = 0; + int i; + uint32_t r; + unsigned char *buf = NULL; + + buf = (unsigned char *)buf_alloc; //reset buf + r = rand(); + // Process the whole buf with reference func single call. + c_ref = test_func->checksum32_ref_call(r, buf, MAX_BUF * TEST_SIZE); + // Process buf with update method. + for (i = 0; i < TEST_SIZE; i++) { + c_dut = test_func->checksum32_func_call(r, buf, MAX_BUF); + // Update checksum seeds and buf pointer. + r = c_dut; + buf += MAX_BUF; + } + + if (c_dut != c_ref) + fail++; + if (verbose) + printf("checksum rand%3d = 0x%8x 0x%8x\n", i, c_dut, c_ref); + else + printf("."); + + return fail; +} + +int update_over_mod_test(func_case_t * test_func) +{ + uint32_t c_dut, c_ref; + int fail = 0; + int i; + unsigned char *buf = NULL; + + buf = malloc(ADLER_MOD); + memset(buf, 0xff, ADLER_MOD); + + c_ref = c_dut = rand(); + + // Process buf with update method. + for (i = 0; i < 20; i++) { + c_ref = test_func->checksum32_ref_call(c_ref, buf, ADLER_MOD - 64); + c_dut = test_func->checksum32_func_call(c_dut, buf, ADLER_MOD - 64); + } + + if (c_dut != c_ref) + fail++; + if (verbose) + printf("checksum rand%3d = 0x%8x 0x%8x\n", i, c_dut, c_ref); + else + printf("."); + + free(buf); + return fail; +} diff --git a/src/spdk/isa-l/igzip/checksum_test_ref.h b/src/spdk/isa-l/igzip/checksum_test_ref.h new file mode 100644 index 000000000..b561be975 --- /dev/null +++ b/src/spdk/isa-l/igzip/checksum_test_ref.h @@ -0,0 +1,102 @@ +/* + * Reference checksums used in compression tests + */ + +#ifndef CHECKSUM_TEST_REF_H +#define CHECKSUM_TEST_REF_H + +#include + +uint32_t inflate_crc_table[256] = { + 0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, + 0x076dc419, 0x706af48f, 0xe963a535, 0x9e6495a3, + 0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988, + 0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, 0x90bf1d91, + 0x1db71064, 0x6ab020f2, 0xf3b97148, 0x84be41de, + 0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7, + 0x136c9856, 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec, + 0x14015c4f, 0x63066cd9, 0xfa0f3d63, 0x8d080df5, + 0x3b6e20c8, 0x4c69105e, 0xd56041e4, 0xa2677172, + 0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b, + 0x35b5a8fa, 0x42b2986c, 0xdbbbc9d6, 0xacbcf940, + 0x32d86ce3, 0x45df5c75, 0xdcd60dcf, 0xabd13d59, + 0x26d930ac, 0x51de003a, 0xc8d75180, 0xbfd06116, + 0x21b4f4b5, 0x56b3c423, 0xcfba9599, 0xb8bda50f, + 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924, + 0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d, + 0x76dc4190, 0x01db7106, 0x98d220bc, 0xefd5102a, + 0x71b18589, 0x06b6b51f, 0x9fbfe4a5, 0xe8b8d433, + 0x7807c9a2, 0x0f00f934, 0x9609a88e, 0xe10e9818, + 0x7f6a0dbb, 0x086d3d2d, 0x91646c97, 0xe6635c01, + 0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e, + 0x6c0695ed, 0x1b01a57b, 0x8208f4c1, 0xf50fc457, + 0x65b0d9c6, 0x12b7e950, 0x8bbeb8ea, 0xfcb9887c, + 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, 0xfbd44c65, + 0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2, + 0x4adfa541, 0x3dd895d7, 0xa4d1c46d, 0xd3d6f4fb, + 0x4369e96a, 0x346ed9fc, 0xad678846, 0xda60b8d0, + 0x44042d73, 0x33031de5, 0xaa0a4c5f, 0xdd0d7cc9, + 0x5005713c, 0x270241aa, 0xbe0b1010, 0xc90c2086, + 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f, + 0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4, + 0x59b33d17, 0x2eb40d81, 0xb7bd5c3b, 0xc0ba6cad, + 0xedb88320, 0x9abfb3b6, 0x03b6e20c, 0x74b1d29a, + 0xead54739, 0x9dd277af, 0x04db2615, 0x73dc1683, + 0xe3630b12, 0x94643b84, 0x0d6d6a3e, 0x7a6a5aa8, + 0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1, + 0xf00f9344, 0x8708a3d2, 0x1e01f268, 0x6906c2fe, + 0xf762575d, 0x806567cb, 0x196c3671, 0x6e6b06e7, + 0xfed41b76, 0x89d32be0, 0x10da7a5a, 0x67dd4acc, + 0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5, + 0xd6d6a3e8, 0xa1d1937e, 0x38d8c2c4, 0x4fdff252, + 0xd1bb67f1, 0xa6bc5767, 0x3fb506dd, 0x48b2364b, + 0xd80d2bda, 0xaf0a1b4c, 0x36034af6, 0x41047a60, + 0xdf60efc3, 0xa867df55, 0x316e8eef, 0x4669be79, + 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236, + 0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f, + 0xc5ba3bbe, 0xb2bd0b28, 0x2bb45a92, 0x5cb36a04, + 0xc2d7ffa7, 0xb5d0cf31, 0x2cd99e8b, 0x5bdeae1d, + 0x9b64c2b0, 0xec63f226, 0x756aa39c, 0x026d930a, + 0x9c0906a9, 0xeb0e363f, 0x72076785, 0x05005713, + 0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38, + 0x92d28e9b, 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21, + 0x86d3d2d4, 0xf1d4e242, 0x68ddb3f8, 0x1fda836e, + 0x81be16cd, 0xf6b9265b, 0x6fb077e1, 0x18b74777, + 0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c, + 0x8f659eff, 0xf862ae69, 0x616bffd3, 0x166ccf45, + 0xa00ae278, 0xd70dd2ee, 0x4e048354, 0x3903b3c2, + 0xa7672661, 0xd06016f7, 0x4969474d, 0x3e6e77db, + 0xaed16a4a, 0xd9d65adc, 0x40df0b66, 0x37d83bf0, + 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9, + 0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, + 0xbad03605, 0xcdd70693, 0x54de5729, 0x23d967bf, + 0xb3667a2e, 0xc4614ab8, 0x5d681b02, 0x2a6f2b94, + 0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d}; + + +uint32_t crc32_gzip_refl_ref(uint32_t crc, const unsigned char *buf, uint64_t len) +{ + uint64_t i; + crc = ~crc; + for (i = 0; i < len; i++) + crc = (crc >> 8) ^ inflate_crc_table[(crc & 0xff) ^ buf[i]]; + return ~crc; +} + +#define ADLER_MOD 65521 + + +uint32_t adler_ref(uint32_t init, const unsigned char *buf, uint64_t len) +{ + uint64_t i; + uint32_t a = init & 0xffff; + uint32_t b = init >> 16; + + for (i = 0; i < len; i++) { + a = (a + buf[i]) % ADLER_MOD; + b = (b + a) % ADLER_MOD; + } + return (b << 16) | a; +} + +#endif /* CHECKSUM_TEST_REF_H */ diff --git a/src/spdk/isa-l/igzip/data_struct2.asm b/src/spdk/isa-l/igzip/data_struct2.asm new file mode 100644 index 000000000..233e264d3 --- /dev/null +++ b/src/spdk/isa-l/igzip/data_struct2.asm @@ -0,0 +1,275 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2016 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; START_FIELDS +%macro START_FIELDS 0 +%assign _FIELD_OFFSET 0 +%assign _STRUCT_ALIGN 0 +%endm + +;; FIELD name size align +%macro FIELD 3 +%define %%name %1 +%define %%size %2 +%define %%align %3 + +%assign _FIELD_OFFSET (_FIELD_OFFSET + (%%align) - 1) & (~ ((%%align)-1)) +%%name equ _FIELD_OFFSET +%assign _FIELD_OFFSET _FIELD_OFFSET + (%%size) +%if (%%align > _STRUCT_ALIGN) +%assign _STRUCT_ALIGN %%align +%endif +%endm + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +START_FIELDS ;; BitBuf2 + +;; name size align +FIELD _m_bits, 8, 8 +FIELD _m_bit_count, 4, 4 +FIELD _m_out_buf, 8, 8 +FIELD _m_out_end, 8, 8 +FIELD _m_out_start, 8, 8 + +%assign _BitBuf2_size _FIELD_OFFSET +%assign _BitBuf2_align _STRUCT_ALIGN + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +%define HIST_ELEM_SIZE 4 + +START_FIELDS ;; isal_mod_hist + +;; name size align +FIELD _d_hist, 30*HIST_ELEM_SIZE, HIST_ELEM_SIZE +FIELD _ll_hist, 513*HIST_ELEM_SIZE, HIST_ELEM_SIZE + +%assign _isal_mod_hist_size _FIELD_OFFSET +%assign _isal_mod_hist_align _STRUCT_ALIGN + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +%define HUFF_CODE_SIZE 4 + +START_FIELDS ;; hufftables_icf + +;; name size align +FIELD _dist_table, 31 * HUFF_CODE_SIZE, HUFF_CODE_SIZE +FIELD _lit_len_table, 513 * HUFF_CODE_SIZE, HUFF_CODE_SIZE + +%assign _hufftables_icf_size _FIELD_OFFSET +%assign _hufftables_icf_align _STRUCT_ALIGN + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +START_FIELDS ;; hash8k_buf + +;; name size align +FIELD _hash8k_table, 2 * IGZIP_HASH8K_HASH_SIZE, 2 + +%assign _hash_buf1_size _FIELD_OFFSET +%assign _hash_buf1_align _STRUCT_ALIGN + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +START_FIELDS ;; hash_map_buf + +;; name size align +FIELD _hash_table, 2 * IGZIP_HASH_MAP_HASH_SIZE, 2 +FIELD _matches_next, 8, 8 +FIELD _matches_end, 8, 8 +FIELD _matches, 4*4*1024, 4 +FIELD _overflow, 4*LA, 4 + +%assign _hash_map_buf_size _FIELD_OFFSET +%assign _hash_map_buf_align _STRUCT_ALIGN + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +%define DEF_MAX_HDR_SIZE 328 +START_FIELDS ;; level_buf + +;; name size align +FIELD _encode_tables, _hufftables_icf_size, _hufftables_icf_align +FIELD _hist, _isal_mod_hist_size, _isal_mod_hist_align +FIELD _deflate_hdr_count, 4, 4 +FIELD _deflate_hdr_extra_bits,4, 4 +FIELD _deflate_hdr, DEF_MAX_HDR_SIZE, 1 +FIELD _icf_buf_next, 8, 8 +FIELD _icf_buf_avail_out, 8, 8 +FIELD _icf_buf_start, 8, 8 +FIELD _lvl_extra, _hash_map_buf_size, _hash_map_buf_align + +%assign _level_buf_base_size _FIELD_OFFSET +%assign _level_buf_base_align _STRUCT_ALIGN + +_hash8k_hash_table equ _lvl_extra + _hash8k_table +_hash_map_hash_table equ _lvl_extra + _hash_table +_hash_map_matches_next equ _lvl_extra + _matches_next +_hash_map_matches_end equ _lvl_extra + _matches_end +_hash_map_matches equ _lvl_extra + _matches +_hist_lit_len equ _hist+_ll_hist +_hist_dist equ _hist+_d_hist + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +START_FIELDS ;; isal_zstate + +;; name size align +FIELD _total_in_start,4, 4 +FIELD _block_next, 4, 4 +FIELD _block_end, 4, 4 +FIELD _dist_mask, 4, 4 +FIELD _hash_mask, 4, 4 +FIELD _state, 4, 4 +FIELD _bitbuf, _BitBuf2_size, _BitBuf2_align +FIELD _crc, 4, 4 +FIELD _has_wrap_hdr, 1, 1 +FIELD _has_eob_hdr, 1, 1 +FIELD _has_eob, 1, 1 +FIELD _has_hist, 1, 1 +FIELD _has_level_buf_init, 2, 2 +FIELD _count, 4, 4 +FIELD _tmp_out_buff, 16, 1 +FIELD _tmp_out_start, 4, 4 +FIELD _tmp_out_end, 4, 4 +FIELD _b_bytes_valid, 4, 4 +FIELD _b_bytes_processed, 4, 4 +FIELD _buffer, BSIZE, 1 +FIELD _head, IGZIP_LVL0_HASH_SIZE*2, 2 +%assign _isal_zstate_size _FIELD_OFFSET +%assign _isal_zstate_align _STRUCT_ALIGN + +_bitbuf_m_bits equ _bitbuf+_m_bits +_bitbuf_m_bit_count equ _bitbuf+_m_bit_count +_bitbuf_m_out_buf equ _bitbuf+_m_out_buf +_bitbuf_m_out_end equ _bitbuf+_m_out_end +_bitbuf_m_out_start equ _bitbuf+_m_out_start + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +START_FIELDS ;; isal_zstream + +;; name size align +FIELD _next_in, 8, 8 +FIELD _avail_in, 4, 4 +FIELD _total_in, 4, 4 +FIELD _next_out, 8, 8 +FIELD _avail_out, 4, 4 +FIELD _total_out, 4, 4 +FIELD _hufftables, 8, 8 +FIELD _level, 4, 4 +FIELD _level_buf_size, 4, 4 +FIELD _level_buf, 8, 8 +FIELD _end_of_stream, 2, 2 +FIELD _flush, 2, 2 +FIELD _gzip_flag, 2, 2 +FIELD _hist_bits, 2, 2 +FIELD _internal_state, _isal_zstate_size, _isal_zstate_align + +%assign _isal_zstream_size _FIELD_OFFSET +%assign _isal_zstream_align _STRUCT_ALIGN + +_internal_state_total_in_start equ _internal_state+_total_in_start +_internal_state_block_next equ _internal_state+_block_next +_internal_state_block_end equ _internal_state+_block_end +_internal_state_b_bytes_valid equ _internal_state+_b_bytes_valid +_internal_state_b_bytes_processed equ _internal_state+_b_bytes_processed +_internal_state_crc equ _internal_state+_crc +_internal_state_dist_mask equ _internal_state+_dist_mask +_internal_state_hash_mask equ _internal_state+_hash_mask +_internal_state_bitbuf equ _internal_state+_bitbuf +_internal_state_state equ _internal_state+_state +_internal_state_count equ _internal_state+_count +_internal_state_tmp_out_buff equ _internal_state+_tmp_out_buff +_internal_state_tmp_out_start equ _internal_state+_tmp_out_start +_internal_state_tmp_out_end equ _internal_state+_tmp_out_end +_internal_state_has_wrap_hdr equ _internal_state+_has_wrap_hdr +_internal_state_has_eob equ _internal_state+_has_eob +_internal_state_has_eob_hdr equ _internal_state+_has_eob_hdr +_internal_state_has_hist equ _internal_state+_has_hist +_internal_state_has_level_buf_init equ _internal_state+_has_level_buf_init +_internal_state_buffer equ _internal_state+_buffer +_internal_state_head equ _internal_state+_head +_internal_state_bitbuf_m_bits equ _internal_state+_bitbuf_m_bits +_internal_state_bitbuf_m_bit_count equ _internal_state+_bitbuf_m_bit_count +_internal_state_bitbuf_m_out_buf equ _internal_state+_bitbuf_m_out_buf +_internal_state_bitbuf_m_out_end equ _internal_state+_bitbuf_m_out_end +_internal_state_bitbuf_m_out_start equ _internal_state+_bitbuf_m_out_start + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Internal States +ZSTATE_NEW_HDR equ 0 +ZSTATE_HDR equ (ZSTATE_NEW_HDR + 1) +ZSTATE_CREATE_HDR equ (ZSTATE_HDR + 1) +ZSTATE_BODY equ (ZSTATE_CREATE_HDR + 1) +ZSTATE_FLUSH_READ_BUFFER equ (ZSTATE_BODY + 1) +ZSTATE_FLUSH_ICF_BUFFER equ (ZSTATE_FLUSH_READ_BUFFER + 1) +ZSTATE_TYPE0_HDR equ (ZSTATE_FLUSH_ICF_BUFFER + 1) +ZSTATE_TYPE0_BODY equ (ZSTATE_TYPE0_HDR + 1) +ZSTATE_SYNC_FLUSH equ (ZSTATE_TYPE0_BODY + 1) +ZSTATE_FLUSH_WRITE_BUFFER equ (ZSTATE_SYNC_FLUSH + 1) +ZSTATE_TRL equ (ZSTATE_FLUSH_WRITE_BUFFER + 1) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +_NO_FLUSH equ 0 +_SYNC_FLUSH equ 1 +_FULL_FLUSH equ 2 +_STORED_BLK equ 0 +%assign _STORED_BLK_END 65535 +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +IGZIP_NO_HIST equ 0 +IGZIP_HIST equ 1 +IGZIP_DICT_HIST equ 2 + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; diff --git a/src/spdk/isa-l/igzip/encode_df.c b/src/spdk/isa-l/igzip/encode_df.c new file mode 100644 index 000000000..d26d1c942 --- /dev/null +++ b/src/spdk/isa-l/igzip/encode_df.c @@ -0,0 +1,38 @@ +#include +#include +#include +#include +#include + +#if __x86_64__ || __i386__ || _M_X64 || _M_IX86 +#ifdef _MSC_VER +# include +#else +# include +#endif +#endif //__x86_64__ || __i386__ || _M_X64 || _M_IX86 + +#include "encode_df.h" +#include "bitbuf2.h" + +struct deflate_icf *encode_deflate_icf_base(struct deflate_icf *next_in, + struct deflate_icf *end_in, struct BitBuf2 *bb, + struct hufftables_icf *hufftables) +{ + struct huff_code lsym, dsym; + + while (next_in < end_in && !is_full(bb)) { + lsym = hufftables->lit_len_table[next_in->lit_len]; + dsym = hufftables->dist_lit_table[next_in->lit_dist]; + + // insert ll code, dist_code, and extra_bits + write_bits_unsafe(bb, lsym.code_and_extra, lsym.length); + write_bits_unsafe(bb, dsym.code, dsym.length); + write_bits_unsafe(bb, next_in->dist_extra, dsym.extra_bit_count); + flush_bits(bb); + + next_in++; + } + + return next_in; +} diff --git a/src/spdk/isa-l/igzip/encode_df.h b/src/spdk/isa-l/igzip/encode_df.h new file mode 100644 index 000000000..f3e4f754d --- /dev/null +++ b/src/spdk/isa-l/igzip/encode_df.h @@ -0,0 +1,30 @@ +#ifndef ENCODE_DF_H +#define ENCODE_DF_H + +#include +#include "igzip_lib.h" +#include "huff_codes.h" + +/* Deflate Intermediate Compression Format */ +#define LIT_LEN_BIT_COUNT 10 +#define LIT_LEN_MASK ((1 << LIT_LEN_BIT_COUNT) - 1) +#define DIST_LIT_BIT_COUNT 9 +#define DIST_LIT_MASK ((1 << DIST_LIT_BIT_COUNT) - 1) +#define ICF_DIST_OFFSET LIT_LEN_BIT_COUNT +#define NULL_DIST_SYM 30 + +#define LEN_START ISAL_DEF_LIT_SYMBOLS +#define LEN_OFFSET (LEN_START - ISAL_DEF_MIN_MATCH) +#define LEN_MAX (LEN_OFFSET + ISAL_DEF_MAX_MATCH) +#define LIT_START (NULL_DIST_SYM + 1) +#define ICF_CODE_LEN 32 + +struct deflate_icf { + uint32_t lit_len:LIT_LEN_BIT_COUNT; + uint32_t lit_dist:DIST_LIT_BIT_COUNT; + uint32_t dist_extra:ICF_CODE_LEN - DIST_LIT_BIT_COUNT - ICF_DIST_OFFSET; +}; + +struct deflate_icf *encode_deflate_icf(struct deflate_icf *next_in, struct deflate_icf *end_in, + struct BitBuf2 *bb, struct hufftables_icf * hufftables); +#endif diff --git a/src/spdk/isa-l/igzip/encode_df_04.asm b/src/spdk/isa-l/igzip/encode_df_04.asm new file mode 100644 index 000000000..81287ccfe --- /dev/null +++ b/src/spdk/isa-l/igzip/encode_df_04.asm @@ -0,0 +1,576 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2018 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +%include "reg_sizes.asm" +%include "lz0a_const.asm" +%include "data_struct2.asm" +%include "stdmac.asm" + +%define ARCH 04 +%define USE_HSWNI + +; tree entry is 4 bytes: +; lit/len tree (513 entries) +; | 3 | 2 | 1 | 0 | +; | len | code | +; +; dist tree +; | 3 | 2 | 1 | 0 | +; |eblen:codlen| code | + +; token format: +; DIST_OFFSET:0 : lit/len +; 31:(DIST_OFFSET + 5) : dist Extra Bits +; (DIST_OFFSET + 5):DIST_OFFSET : dist code +; lit/len: 0-256 (literal) +; 257-512 (dist + 254) + +; returns final token pointer +; equal to token_end if successful +; uint32_t* encode_df(uint32_t *token_start, uint32_t *token_end, +; BitBuf *out_buf, uint32_t *trees); + +%ifidn __OUTPUT_FORMAT__, win64 +%define arg1 rcx +%define arg2 rdx +%define arg3 r8 +%define arg4 r9 +%define sym rsi +%define dsym rdi +%define hufftables r9 +%define ptr r11 +%else +; Linux +%define arg1 rdi +%define arg2 rsi +%define arg3 rdx +%define arg4 rcx +%define sym r9 +%define dsym r8 +%define hufftables r11 +%define ptr rdi +%endif + +%define in_buf_end arg2 +%define bitbuf arg3 +%define out_buf bitbuf +; bit_count is rcx +%define bits rax +%define data r12 +%define tmp rbx +%define len dsym +%define tmp2 r10 +%define end_ptr rbp + +%define LIT_MASK ((0x1 << LIT_LEN_BIT_COUNT) - 1) +%define DIST_MASK ((0x1 << DIST_LIT_BIT_COUNT) - 1) + +%define codes1 ymm1 +%define code_lens1 ymm2 +%define codes2 ymm3 +%define code_lens2 ymm4 +%define codes3 ymm5 +%define code_lens3 ymm6 +%define codes4 ymm7 +%define syms ymm7 + +%define code_lens4 ymm8 +%define dsyms ymm8 + +%define ytmp ymm9 +%define codes_lookup1 ymm10 +%define codes_lookup2 ymm11 +%define datas ymm12 +%define ybits ymm13 +%define ybits_count ymm14 +%define yoffset_mask ymm15 + +%define VECTOR_SIZE 0x20 +%define VECTOR_LOOP_PROCESSED (2 * VECTOR_SIZE) +%define VECTOR_SLOP 0x20 - 8 + +gpr_save_mem_offset equ 0 +gpr_save_mem_size equ 8 * 6 +xmm_save_mem_offset equ gpr_save_mem_offset + gpr_save_mem_size +xmm_save_mem_size equ 10 * 16 +bitbuf_mem_offset equ xmm_save_mem_offset + xmm_save_mem_size +bitbuf_mem_size equ 8 +stack_size equ gpr_save_mem_size + xmm_save_mem_size + bitbuf_mem_size + + +%macro FUNC_SAVE 0 + sub rsp, stack_size + mov [rsp + gpr_save_mem_offset + 0*8], rbx + mov [rsp + gpr_save_mem_offset + 1*8], rbp + mov [rsp + gpr_save_mem_offset + 2*8], r12 + +%ifidn __OUTPUT_FORMAT__, win64 + mov [rsp + gpr_save_mem_offset + 3*8], rsi + mov [rsp + gpr_save_mem_offset + 4*8], rdi + + MOVDQU [rsp + xmm_save_mem_offset + 0*8], xmm6 + MOVDQU [rsp + xmm_save_mem_offset + 1*8], xmm7 + MOVDQU [rsp + xmm_save_mem_offset + 2*8], xmm8 + MOVDQU [rsp + xmm_save_mem_offset + 3*8], xmm9 + MOVDQU [rsp + xmm_save_mem_offset + 4*8], xmm10 + MOVDQU [rsp + xmm_save_mem_offset + 5*8], xmm11 + MOVDQU [rsp + xmm_save_mem_offset + 6*8], xmm12 + MOVDQU [rsp + xmm_save_mem_offset + 7*8], xmm13 + MOVDQU [rsp + xmm_save_mem_offset + 8*8], xmm14 + MOVDQU [rsp + xmm_save_mem_offset + 9*8], xmm15 +%endif + +%endm + +%macro FUNC_RESTORE 0 + mov rbx, [rsp + gpr_save_mem_offset + 0*8] + mov rbp, [rsp + gpr_save_mem_offset + 1*8] + mov r12, [rsp + gpr_save_mem_offset + 2*8] + +%ifidn __OUTPUT_FORMAT__, win64 + mov rsi, [rsp + gpr_save_mem_offset + 3*8] + mov rdi, [rsp + gpr_save_mem_offset + 4*8] + + MOVDQU xmm6, [rsp + xmm_save_mem_offset + 0*8] + MOVDQU xmm7, [rsp + xmm_save_mem_offset + 1*8] + MOVDQU xmm8, [rsp + xmm_save_mem_offset + 2*8] + MOVDQU xmm9, [rsp + xmm_save_mem_offset + 3*8] + MOVDQU xmm10, [rsp + xmm_save_mem_offset + 4*8] + MOVDQU xmm11, [rsp + xmm_save_mem_offset + 5*8] + MOVDQU xmm12, [rsp + xmm_save_mem_offset + 6*8] + MOVDQU xmm13, [rsp + xmm_save_mem_offset + 7*8] + MOVDQU xmm14, [rsp + xmm_save_mem_offset + 8*8] + MOVDQU xmm15, [rsp + xmm_save_mem_offset + 9*8] +%endif + add rsp, stack_size + +%endmacro + +global encode_deflate_icf_ %+ ARCH +encode_deflate_icf_ %+ ARCH: + FUNC_SAVE + +%ifnidn ptr, arg1 + mov ptr, arg1 +%endif +%ifnidn hufftables, arg4 + mov hufftables, arg4 +%endif + + mov [rsp + bitbuf_mem_offset], bitbuf + mov bits, [bitbuf + _m_bits] + mov ecx, [bitbuf + _m_bit_count] + mov end_ptr, [bitbuf + _m_out_end] + mov out_buf, [bitbuf + _m_out_buf] ; clobbers bitbuf + + sub end_ptr, VECTOR_SLOP + sub in_buf_end, VECTOR_LOOP_PROCESSED + cmp ptr, in_buf_end + jge .finish + + vpcmpeqq ytmp, ytmp, ytmp + vmovdqu datas, [ptr] + vpand syms, datas, [lit_mask] + vpgatherdd codes_lookup1, [hufftables + _lit_len_table + 4 * syms], ytmp + + vpcmpeqq ytmp, ytmp, ytmp + vpsrld dsyms, datas, DIST_OFFSET + vpand dsyms, dsyms, [dist_mask] + vpgatherdd codes_lookup2, [hufftables + _dist_table + 4 * dsyms], ytmp + + vmovq ybits %+ x, bits + vmovq ybits_count %+ x, rcx + vmovdqa yoffset_mask, [offset_mask] + +.main_loop: + ;; Sets codes1 to contain lit/len codes andcode_lens1 the corresponding lengths + vpsrld code_lens1, codes_lookup1, 24 + vpand codes1, codes_lookup1, [lit_icr_mask] + + ;; Sets codes2 to contain dist codes, code_lens2 the corresponding lengths, + ;; and code_lens3 the extra bit counts + vpblendw codes2, ybits, codes_lookup2, 0x55 ;Bits 8 and above of ybits are 0 + vpsrld code_lens2, codes_lookup2, 24 + vpsrld code_lens3, codes_lookup2, 16 + vpand code_lens3, [eb_icr_mask] + + ;; Set codes3 to contain the extra bits + vpsrld codes3, datas, EXTRA_BITS_OFFSET + + cmp out_buf, end_ptr + ja .main_loop_exit + + ;; Start code lookups for next iteration + add ptr, VECTOR_SIZE + vpcmpeqq ytmp, ytmp, ytmp + vmovdqu datas, [ptr] + vpand syms, datas, [lit_mask] + vpgatherdd codes_lookup1, [hufftables + _lit_len_table + 4 * syms], ytmp + + vpcmpeqq ytmp, ytmp, ytmp + vpsrld dsyms, datas, DIST_OFFSET + vpand dsyms, dsyms, [dist_mask] + vpgatherdd codes_lookup2, [hufftables + _dist_table + 4 * dsyms], ytmp + + ;; Merge dist code with extra bits + vpsllvd codes3, codes3, code_lens2 + vpxor codes2, codes2, codes3 + vpaddd code_lens2, code_lens2, code_lens3 + + ;; Check for long codes + vpaddd code_lens3, code_lens1, code_lens2 + vpcmpgtd ytmp, code_lens3, [max_write_d] + vptest ytmp, ytmp + jnz .long_codes + + ;; Merge dist and len codes + vpsllvd codes2, codes2, code_lens1 + vpxor codes1, codes1, codes2 + + ;; Split buffer data into qwords, ytmp is 0 after last branch + vpblendd codes3, ytmp, codes1, 0x55 + vpsrlq codes1, codes1, 32 + vpsrlq code_lens1, code_lens3, 32 + vpblendd code_lens3, ytmp, code_lens3, 0x55 + + ;; Merge bitbuf bits + vpsllvq codes3, codes3, ybits_count + vpxor codes3, codes3, ybits + vpaddq code_lens3, code_lens3, ybits_count + + ;; Merge two symbols into qwords + vpsllvq codes1, codes1, code_lens3 + vpxor codes1, codes1, codes3 + vpaddq code_lens1, code_lens1, code_lens3 + + ;; Split buffer data into dqwords, ytmp is 0 after last branch + vpblendd codes2, ytmp, codes1, 0x33 + vpblendd code_lens2, ytmp, code_lens1, 0x33 + vpsrldq codes1, 8 + vpsrldq code_lens1, 8 + + ;; Bit align dqwords + vpaddq code_lens1, code_lens1, code_lens2 + vpand ybits_count, code_lens1, yoffset_mask ;Extra bits + vpermq ybits_count, ybits_count, 0xcf + vpaddq code_lens2, ybits_count + vpsllvq codes2, codes2, ybits_count + + ;; Merge two qwords into dqwords + vmovdqa ytmp, [q_64] + vpsubq code_lens3, ytmp, code_lens2 + vpsrlvq codes3, codes1, code_lens3 + vpslldq codes3, codes3, 8 + + vpsllvq codes1, codes1, code_lens2 + + vpxor codes1, codes1, codes3 + vpxor codes1, codes1, codes2 + + vmovq tmp, code_lens1 %+ x ;Number of bytes + shr tmp, 3 + + ;; Extract last bytes + vpaddq code_lens2, code_lens1, ybits_count + vpsrlq code_lens2, code_lens2, 3 + vpshufb codes2, codes1, code_lens2 + vpand codes2, codes2, [bytes_mask] + vextracti128 ybits %+ x, codes2, 1 + + ;; Check for short codes + vptest code_lens2, [min_write_mask] + jz .short_codes +.short_codes_next: + + vpermq codes2, codes2, 0x45 + vpor codes1, codes1, codes2 + + ;; bit shift upper dqword combined bits to line up with lower dqword + vextracti128 code_lens2 %+ x, code_lens1, 1 + + ; Write out lower dqword of combined bits + vmovdqu [out_buf], codes1 + vpaddq code_lens1, code_lens1, code_lens2 + + vmovq tmp2, code_lens1 %+ x ;Number of bytes + shr tmp2, 3 + vpand ybits_count, code_lens1, yoffset_mask ;Extra bits + + ; Write out upper dqword of combined bits + vextracti128 [out_buf + tmp], codes1, 1 + add out_buf, tmp2 + + cmp ptr, in_buf_end + jbe .main_loop + +.main_loop_exit: + vmovq rcx, ybits_count %+ x + vmovq bits, ybits %+ x + jmp .finish + +.short_codes: + ;; Merge last bytes when the second dqword contains less than a byte + vpor ybits %+ x, codes2 %+ x + jmp .short_codes_next + +.long_codes: + add end_ptr, VECTOR_SLOP + sub ptr, VECTOR_SIZE + + vpxor ytmp, ytmp, ytmp + vpblendd codes3, ytmp, codes1, 0x55 + vpblendd code_lens3, ytmp, code_lens1, 0x55 + vpblendd codes4, ytmp, codes2, 0x55 + + vpsllvq codes4, codes4, code_lens3 + vpxor codes3, codes3, codes4 + vpaddd code_lens3, code_lens1, code_lens2 + + vpsrlq codes1, codes1, 32 + vpsrlq code_lens1, code_lens1, 32 + vpsrlq codes2, codes2, 32 + + vpsllvq codes2, codes2, code_lens1 + vpxor codes1, codes1, codes2 + + vpsrlq code_lens1, code_lens3, 32 + vpblendd code_lens3, ytmp, code_lens3, 0x55 + + ;; Merge bitbuf bits + vpsllvq codes3, codes3, ybits_count + vpxor codes3, codes3, ybits + vpaddq code_lens3, code_lens3, ybits_count + vpaddq code_lens1, code_lens1, code_lens3 + + xor bits, bits + xor rcx, rcx + vpsubq code_lens1, code_lens1, code_lens3 +%rep 2 +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + cmp out_buf, end_ptr + ja .overflow + ;; insert LL code + vmovq sym, codes3 %+ x + vmovq tmp2, code_lens3 %+ x + SHLX sym, sym, rcx + or bits, sym + add rcx, tmp2 + + ; empty bits + mov [out_buf], bits + mov tmp, rcx + shr tmp, 3 ; byte count + add out_buf, tmp + mov tmp, rcx + and rcx, ~7 + SHRX bits, bits, rcx + mov rcx, tmp + and rcx, 7 + add ptr, 4 + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + cmp out_buf, end_ptr + ja .overflow + ;; insert LL code + vmovq sym, codes1 %+ x + vmovq tmp2, code_lens1 %+ x + SHLX sym, sym, rcx + or bits, sym + add rcx, tmp2 + + ; empty bits + mov [out_buf], bits + mov tmp, rcx + shr tmp, 3 ; byte count + add out_buf, tmp + mov tmp, rcx + and rcx, ~7 + SHRX bits, bits, rcx + mov rcx, tmp + and rcx, 7 + add ptr, 4 + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + cmp out_buf, end_ptr + ja .overflow + ;; insert LL code + vpextrq sym, codes3 %+ x, 1 + vpextrq tmp2, code_lens3 %+ x, 1 + SHLX sym, sym, rcx + or bits, sym + add rcx, tmp2 + + ; empty bits + mov [out_buf], bits + mov tmp, rcx + shr tmp, 3 ; byte count + add out_buf, tmp + mov tmp, rcx + and rcx, ~7 + SHRX bits, bits, rcx + mov rcx, tmp + and rcx, 7 + add ptr, 4 + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + cmp out_buf, end_ptr + ja .overflow + ;; insert LL code + vpextrq sym, codes1 %+ x, 1 + vpextrq tmp2, code_lens1 %+ x, 1 + SHLX sym, sym, rcx + or bits, sym + add rcx, tmp2 + + ; empty bits + mov [out_buf], bits + mov tmp, rcx + shr tmp, 3 ; byte count + add out_buf, tmp + mov tmp, rcx + and rcx, ~7 + SHRX bits, bits, rcx + mov rcx, tmp + and rcx, 7 + add ptr, 4 + + vextracti128 codes3 %+ x, codes3, 1 + vextracti128 code_lens3 %+ x, code_lens3, 1 + vextracti128 codes1 %+ x, codes1, 1 + vextracti128 code_lens1 %+ x, code_lens1, 1 +%endrep + sub end_ptr, VECTOR_SLOP + + vmovq ybits %+ x, bits + vmovq ybits_count %+ x, rcx + cmp ptr, in_buf_end + jbe .main_loop + +.finish: + add in_buf_end, VECTOR_LOOP_PROCESSED + add end_ptr, VECTOR_SLOP + + cmp ptr, in_buf_end + jge .overflow + +.finish_loop: + mov DWORD(data), [ptr] + + cmp out_buf, end_ptr + ja .overflow + + mov sym, data + and sym, LIT_MASK ; sym has ll_code + mov DWORD(sym), [hufftables + _lit_len_table + sym * 4] + + ; look up dist sym + mov dsym, data + shr dsym, DIST_OFFSET + and dsym, DIST_MASK + mov DWORD(dsym), [hufftables + _dist_table + dsym * 4] + + ; insert LL code + ; sym: 31:24 length; 23:0 code + mov tmp2, sym + and sym, 0xFFFFFF + SHLX sym, sym, rcx + shr tmp2, 24 + or bits, sym + add rcx, tmp2 + + ; insert dist code + movzx tmp, WORD(dsym) + SHLX tmp, tmp, rcx + or bits, tmp + mov tmp, dsym + shr tmp, 24 + add rcx, tmp + + ; insert dist extra bits + shr data, EXTRA_BITS_OFFSET + add ptr, 4 + SHLX data, data, rcx + or bits, data + shr dsym, 16 + and dsym, 0xFF + add rcx, dsym + + ; empty bits + mov [out_buf], bits + mov tmp, rcx + shr tmp, 3 ; byte count + add out_buf, tmp + mov tmp, rcx + and rcx, ~7 + SHRX bits, bits, rcx + mov rcx, tmp + and rcx, 7 + + cmp ptr, in_buf_end + jb .finish_loop + +.overflow: + mov tmp, [rsp + bitbuf_mem_offset] + mov [tmp + _m_bits], bits + mov [tmp + _m_bit_count], ecx + mov [tmp + _m_out_buf], out_buf + + mov rax, ptr + + FUNC_RESTORE + + ret + +section .data + align 32 +max_write_d: + dd 0x1c, 0x1d, 0x1f, 0x20, 0x1c, 0x1d, 0x1f, 0x20 +min_write_mask: + dq 0x00, 0x00, 0xff, 0x00 +offset_mask: + dq 0x0000000000000007, 0x0000000000000000 + dq 0x0000000000000000, 0x0000000000000000 +q_64: + dq 0x0000000000000040, 0x0000000000000000 + dq 0x0000000000000040, 0x0000000000000000 +lit_mask: + dd LIT_MASK, LIT_MASK, LIT_MASK, LIT_MASK + dd LIT_MASK, LIT_MASK, LIT_MASK, LIT_MASK +dist_mask: + dd DIST_MASK, DIST_MASK, DIST_MASK, DIST_MASK + dd DIST_MASK, DIST_MASK, DIST_MASK, DIST_MASK +lit_icr_mask: + dd 0x00FFFFFF, 0x00FFFFFF, 0x00FFFFFF, 0x00FFFFFF + dd 0x00FFFFFF, 0x00FFFFFF, 0x00FFFFFF, 0x00FFFFFF +eb_icr_mask: + dd 0x000000FF, 0x000000FF, 0x000000FF, 0x000000FF + dd 0x000000FF, 0x000000FF, 0x000000FF, 0x000000FF +bytes_mask: + dq 0x00000000000000ff, 0x0000000000000000 + dq 0x00000000000000ff, 0x0000000000000000 diff --git a/src/spdk/isa-l/igzip/encode_df_06.asm b/src/spdk/isa-l/igzip/encode_df_06.asm new file mode 100644 index 000000000..9fa516326 --- /dev/null +++ b/src/spdk/isa-l/igzip/encode_df_06.asm @@ -0,0 +1,620 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2018 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +%include "reg_sizes.asm" +%include "lz0a_const.asm" +%include "data_struct2.asm" +%include "stdmac.asm" + +%ifdef HAVE_AS_KNOWS_AVX512 + +%define ARCH 06 +%define USE_HSWNI + +; tree entry is 4 bytes: +; lit/len tree (513 entries) +; | 3 | 2 | 1 | 0 | +; | len | code | +; +; dist tree +; | 3 | 2 | 1 | 0 | +; |eblen:codlen| code | + +; token format: +; DIST_OFFSET:0 : lit/len +; 31:(DIST_OFFSET + 5) : dist Extra Bits +; (DIST_OFFSET + 5):DIST_OFFSET : dist code +; lit/len: 0-256 (literal) +; 257-512 (dist + 254) + +; returns final token pointer +; equal to token_end if successful +; uint32_t* encode_df(uint32_t *token_start, uint32_t *token_end, +; BitBuf *out_buf, uint32_t *trees); + +%ifidn __OUTPUT_FORMAT__, win64 +%define arg1 rcx +%define arg2 rdx +%define arg3 r8 +%define arg4 r9 +%define sym rsi +%define dsym rdi +%define hufftables r9 +%define ptr r11 +%else +; Linux +%define arg1 rdi +%define arg2 rsi +%define arg3 rdx +%define arg4 rcx +%define sym r9 +%define dsym r8 +%define hufftables r11 +%define ptr rdi +%endif + +%define in_buf_end arg2 +%define bitbuf arg3 +%define out_buf bitbuf +; bit_count is rcx +%define bits rax +%define data r12 +%define tmp rbx +%define len dsym +%define tmp2 r10 +%define end_ptr rbp + +%define LIT_MASK ((0x1 << LIT_LEN_BIT_COUNT) - 1) +%define DIST_MASK ((0x1 << DIST_LIT_BIT_COUNT) - 1) + +%define codes1 zmm1 +%define code_lens1 zmm2 +%define codes2 zmm3 +%define code_lens2 zmm4 +%define codes3 zmm5 +%define ztmp zmm5 +%define code_lens3 zmm6 +%define codes4 zmm7 +%define syms zmm7 + +%define code_lens4 zmm8 +%define dsyms zmm8 +%define zbits_count_q zmm8 + +%define codes_lookup1 zmm9 +%define codes_lookup2 zmm10 +%define datas zmm11 +%define zbits zmm12 +%define zbits_count zmm13 +%define zoffset_mask zmm14 +%define znotoffset_mask zmm23 + +%define zq_64 zmm15 +%define zlit_mask zmm16 +%define zdist_mask zmm17 +%define zlit_icr_mask zmm18 +%define zeb_icr_mask zmm19 +%define zmax_write zmm20 +%define zrot_perm zmm21 +%define zq_8 zmm22 + +%define VECTOR_SIZE 0x40 +%define VECTOR_LOOP_PROCESSED (2 * VECTOR_SIZE) +%define VECTOR_SLOP 0x40 - 8 + +gpr_save_mem_offset equ 0 +gpr_save_mem_size equ 8 * 6 +xmm_save_mem_offset equ gpr_save_mem_offset + gpr_save_mem_size +xmm_save_mem_size equ 10 * 16 +bitbuf_mem_offset equ xmm_save_mem_offset + xmm_save_mem_size +bitbuf_mem_size equ 8 +stack_size equ gpr_save_mem_size + xmm_save_mem_size + bitbuf_mem_size + + +%macro FUNC_SAVE 0 + sub rsp, stack_size + mov [rsp + gpr_save_mem_offset + 0*8], rbx + mov [rsp + gpr_save_mem_offset + 1*8], rbp + mov [rsp + gpr_save_mem_offset + 2*8], r12 + +%ifidn __OUTPUT_FORMAT__, win64 + mov [rsp + gpr_save_mem_offset + 3*8], rsi + mov [rsp + gpr_save_mem_offset + 4*8], rdi + + MOVDQU [rsp + xmm_save_mem_offset + 0*8], xmm6 + MOVDQU [rsp + xmm_save_mem_offset + 1*8], xmm7 + MOVDQU [rsp + xmm_save_mem_offset + 2*8], xmm8 + MOVDQU [rsp + xmm_save_mem_offset + 3*8], xmm9 + MOVDQU [rsp + xmm_save_mem_offset + 4*8], xmm10 + MOVDQU [rsp + xmm_save_mem_offset + 5*8], xmm11 + MOVDQU [rsp + xmm_save_mem_offset + 6*8], xmm12 + MOVDQU [rsp + xmm_save_mem_offset + 7*8], xmm13 + MOVDQU [rsp + xmm_save_mem_offset + 8*8], xmm14 + MOVDQU [rsp + xmm_save_mem_offset + 9*8], xmm15 +%endif + +%endm + +%macro FUNC_RESTORE 0 + mov rbx, [rsp + gpr_save_mem_offset + 0*8] + mov rbp, [rsp + gpr_save_mem_offset + 1*8] + mov r12, [rsp + gpr_save_mem_offset + 2*8] + +%ifidn __OUTPUT_FORMAT__, win64 + mov rsi, [rsp + gpr_save_mem_offset + 3*8] + mov rdi, [rsp + gpr_save_mem_offset + 4*8] + + MOVDQU xmm6, [rsp + xmm_save_mem_offset + 0*8] + MOVDQU xmm7, [rsp + xmm_save_mem_offset + 1*8] + MOVDQU xmm8, [rsp + xmm_save_mem_offset + 2*8] + MOVDQU xmm9, [rsp + xmm_save_mem_offset + 3*8] + MOVDQU xmm10, [rsp + xmm_save_mem_offset + 4*8] + MOVDQU xmm11, [rsp + xmm_save_mem_offset + 5*8] + MOVDQU xmm12, [rsp + xmm_save_mem_offset + 6*8] + MOVDQU xmm13, [rsp + xmm_save_mem_offset + 7*8] + MOVDQU xmm14, [rsp + xmm_save_mem_offset + 8*8] + MOVDQU xmm15, [rsp + xmm_save_mem_offset + 9*8] +%endif + add rsp, stack_size + +%endmacro + +global encode_deflate_icf_ %+ ARCH +encode_deflate_icf_ %+ ARCH: + FUNC_SAVE + +%ifnidn ptr, arg1 + mov ptr, arg1 +%endif +%ifnidn hufftables, arg4 + mov hufftables, arg4 +%endif + + mov [rsp + bitbuf_mem_offset], bitbuf + mov bits, [bitbuf + _m_bits] + mov ecx, [bitbuf + _m_bit_count] + mov end_ptr, [bitbuf + _m_out_end] + mov out_buf, [bitbuf + _m_out_buf] ; clobbers bitbuf + + sub end_ptr, VECTOR_SLOP + sub in_buf_end, VECTOR_LOOP_PROCESSED + cmp ptr, in_buf_end + jge .finish + + kxorq k0, k0, k0 + kmovq k1, [k_mask_1] + kmovq k2, [k_mask_2] + kmovq k3, [k_mask_3] + kmovq k4, [k_mask_4] + kmovq k5, [k_mask_5] + + vmovdqa64 zrot_perm, [rot_perm] + + vbroadcasti64x2 zq_64, [q_64] + vbroadcasti64x2 zq_8, [q_8] + + vpbroadcastq zoffset_mask, [offset_mask] + vpternlogd znotoffset_mask, znotoffset_mask, zoffset_mask, 0x55 + + vpbroadcastd zlit_mask, [lit_mask] + vpbroadcastd zdist_mask, [dist_mask] + vpbroadcastd zlit_icr_mask, [lit_icr_mask] + vpbroadcastd zeb_icr_mask, [eb_icr_mask] + vpbroadcastd zmax_write, [max_write_d] + + knotq k6, k0 + vmovdqu64 datas, [ptr] + vpandd syms, datas, zlit_mask + vpgatherdd codes_lookup1 {k6}, [hufftables + _lit_len_table + 4 * syms] + + knotq k7, k0 + vpsrld dsyms, datas, DIST_OFFSET + vpandd dsyms, dsyms, zdist_mask + vpgatherdd codes_lookup2 {k7}, [hufftables + _dist_table + 4 * dsyms] + + vmovq zbits %+ x, bits + vmovq zbits_count %+ x, rcx + +.main_loop: + ;; Sets codes1 to contain lit/len codes andcode_lens1 the corresponding lengths + vpsrld code_lens1, codes_lookup1, 24 + vpandd codes1, codes_lookup1, zlit_icr_mask + + ;; Sets codes2 to contain dist codes, code_lens2 the corresponding lengths, + ;; and code_lens3 the extra bit counts + vmovdqu16 codes2 {k1}{z}, codes_lookup2 ;Bits 8 and above of zbits are 0 + vpsrld code_lens2, codes_lookup2, 24 + vpsrld code_lens3, codes_lookup2, 16 + vpandd code_lens3, code_lens3, zeb_icr_mask + + ;; Set codes3 to contain the extra bits + vpsrld codes3, datas, EXTRA_BITS_OFFSET + + cmp out_buf, end_ptr + ja .main_loop_exit + + ;; Start code lookups for next iteration + knotq k6, k0 + add ptr, VECTOR_SIZE + vmovdqu64 datas, [ptr] + vpandd syms, datas, zlit_mask + vpgatherdd codes_lookup1 {k6}, [hufftables + _lit_len_table + 4 * syms] + + knotq k7, k0 + vpsrld dsyms, datas, DIST_OFFSET + vpandd dsyms, dsyms, zdist_mask + vpgatherdd codes_lookup2 {k7}, [hufftables + _dist_table + 4 * dsyms] + + ;; Merge dist code with extra bits + vpsllvd codes3, codes3, code_lens2 + vpxord codes2, codes2, codes3 + vpaddd code_lens2, code_lens2, code_lens3 + + ;; Check for long codes + vpaddd code_lens3, code_lens1, code_lens2 + vpcmpgtd k6, code_lens3, zmax_write + ktestd k6, k6 + jnz .long_codes + + ;; Merge dist and len codes + vpsllvd codes2, codes2, code_lens1 + vpxord codes1, codes1, codes2 + + vmovdqa32 codes3 {k1}{z}, codes1 + vpsrlq codes1, codes1, 32 + vpsrlq code_lens1, code_lens3, 32 + vmovdqa32 code_lens3 {k1}{z}, code_lens3 + + ;; Merge bitbuf bits + vpsllvq codes3, codes3, zbits_count + vpxord codes3, codes3, zbits + vpaddq code_lens3, code_lens3, zbits_count + + ;; Merge two symbols into qwords + vpsllvq codes1, codes1, code_lens3 + vpxord codes1, codes1, codes3 + vpaddq code_lens1, code_lens1, code_lens3 + + ;; Determine total bits at end of each qword + vpermq zbits_count {k5}{z}, zrot_perm, code_lens1 + vpaddq code_lens2, zbits_count, code_lens1 + vshufi64x2 zbits_count {k3}{z}, code_lens2, code_lens2, 0x90 + vpaddq code_lens2, code_lens2, zbits_count + vshufi64x2 zbits_count {k2}{z}, code_lens2, code_lens2, 0x40 + vpaddq code_lens2, code_lens2, zbits_count + + ;; Bit align quadwords + vpandd zbits_count, code_lens2, zoffset_mask + vpermq zbits_count_q {k5}{z}, zrot_perm, zbits_count + vpsllvq codes1, codes1, zbits_count_q + + ;; Check whether any of the last bytes overlap + vpcmpq k6 {k5}, code_lens1, zbits_count, 1 + + ;; Get last byte in each qword + vpsrlq code_lens2, code_lens2, 3 + vpaddq code_lens1, code_lens1, zbits_count_q + vpandq code_lens1, code_lens1, znotoffset_mask + vpsrlvq codes3, codes1, code_lens1 + + ;; Branch to handle overlapping last bytes + ktestd k6, k6 + jnz .small_codes + +.small_codes_next: + ;; Save off zbits and zbits_count for next loop + knotq k7, k5 + vpermq zbits {k7}{z}, zrot_perm, codes3 + vpermq zbits_count {k7}{z}, zrot_perm, zbits_count + + ;; Merge last byte in each qword with the next qword + vpermq codes3 {k5}{z}, zrot_perm, codes3 + vpxord codes1, codes1, codes3 + + ;; Determine total bytes written + vextracti64x2 code_lens1 %+ x, code_lens2, 3 + vpextrq tmp2, code_lens1 %+ x, 1 + + ;; Write out qwords + knotq k6, k0 + vpermq code_lens2 {k5}{z}, zrot_perm, code_lens2 + vpscatterqq [out_buf + code_lens2] {k6}, codes1 + + add out_buf, tmp2 + + cmp ptr, in_buf_end + jbe .main_loop + +.main_loop_exit: + vmovq rcx, zbits_count %+ x + vmovq bits, zbits %+ x + jmp .finish + +.small_codes: + ;; Merge overlapping last bytes + vpermq codes4 {k6}{z}, zrot_perm, codes3 + vporq codes3, codes3, codes4 + kshiftlq k7, k6, 1 + ktestd k6, k7 + jz .small_codes_next + + kandq k6, k6, k7 + jmp .small_codes + +.long_codes: + add end_ptr, VECTOR_SLOP + sub ptr, VECTOR_SIZE + + vmovdqa32 codes3 {k1}{z}, codes1 + vmovdqa32 code_lens3 {k1}{z}, code_lens1 + vmovdqa32 codes4 {k1}{z}, codes2 + + vpsllvq codes4, codes4, code_lens3 + vpxord codes3, codes3, codes4 + vpaddd code_lens3, code_lens1, code_lens2 + + vpsrlq codes1, codes1, 32 + vpsrlq code_lens1, code_lens1, 32 + vpsrlq codes2, codes2, 32 + + vpsllvq codes2, codes2, code_lens1 + vpxord codes1, codes1, codes2 + + vpsrlq code_lens1, code_lens3, 32 + vmovdqa32 code_lens3 {k1}{z}, code_lens3 + + ;; Merge bitbuf bits + vpsllvq codes3, codes3, zbits_count + vpxord codes3, codes3, zbits + vpaddq code_lens3, code_lens3, zbits_count + vpaddq code_lens1, code_lens1, code_lens3 + + xor bits, bits + xor rcx, rcx + vpsubq code_lens1, code_lens1, code_lens3 + + vmovdqu64 codes2, codes1 + vmovdqu64 code_lens2, code_lens1 + vmovdqu64 codes4, codes3 + vmovdqu64 code_lens4, code_lens3 +%assign i 0 +%rep 4 +%assign i (i + 1) +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + cmp out_buf, end_ptr + ja .overflow + ;; insert LL code + vmovq sym, codes3 %+ x + vmovq tmp2, code_lens3 %+ x + SHLX sym, sym, rcx + or bits, sym + add rcx, tmp2 + + ; empty bits + mov [out_buf], bits + mov tmp, rcx + shr tmp, 3 ; byte count + add out_buf, tmp + mov tmp, rcx + and rcx, ~7 + SHRX bits, bits, rcx + mov rcx, tmp + and rcx, 7 + add ptr, 4 + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + cmp out_buf, end_ptr + ja .overflow + ;; insert LL code + vmovq sym, codes1 %+ x + vmovq tmp2, code_lens1 %+ x + SHLX sym, sym, rcx + or bits, sym + add rcx, tmp2 + + ; empty bits + mov [out_buf], bits + mov tmp, rcx + shr tmp, 3 ; byte count + add out_buf, tmp + mov tmp, rcx + and rcx, ~7 + SHRX bits, bits, rcx + mov rcx, tmp + and rcx, 7 + add ptr, 4 + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + cmp out_buf, end_ptr + ja .overflow + ;; insert LL code + vpextrq sym, codes3 %+ x, 1 + vpextrq tmp2, code_lens3 %+ x, 1 + SHLX sym, sym, rcx + or bits, sym + add rcx, tmp2 + + ; empty bits + mov [out_buf], bits + mov tmp, rcx + shr tmp, 3 ; byte count + add out_buf, tmp + mov tmp, rcx + and rcx, ~7 + SHRX bits, bits, rcx + mov rcx, tmp + and rcx, 7 + add ptr, 4 + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + cmp out_buf, end_ptr + ja .overflow + ;; insert LL code + vpextrq sym, codes1 %+ x, 1 + vpextrq tmp2, code_lens1 %+ x, 1 + SHLX sym, sym, rcx + or bits, sym + add rcx, tmp2 + + ; empty bits + mov [out_buf], bits + mov tmp, rcx + shr tmp, 3 ; byte count + add out_buf, tmp + mov tmp, rcx + and rcx, ~7 + SHRX bits, bits, rcx + mov rcx, tmp + and rcx, 7 + add ptr, 4 + + vextracti32x4 codes3 %+ x, codes4, i + vextracti32x4 code_lens3 %+ x, code_lens4, i + vextracti32x4 codes1 %+ x, codes2, i + vextracti32x4 code_lens1 %+ x, code_lens2, i +%endrep + sub end_ptr, VECTOR_SLOP + + vmovq zbits %+ x, bits + vmovq zbits_count %+ x, rcx + cmp ptr, in_buf_end + jbe .main_loop + +.finish: + add in_buf_end, VECTOR_LOOP_PROCESSED + add end_ptr, VECTOR_SLOP + + cmp ptr, in_buf_end + jge .overflow + +.finish_loop: + mov DWORD(data), [ptr] + + cmp out_buf, end_ptr + ja .overflow + + mov sym, data + and sym, LIT_MASK ; sym has ll_code + mov DWORD(sym), [hufftables + _lit_len_table + sym * 4] + + ; look up dist sym + mov dsym, data + shr dsym, DIST_OFFSET + and dsym, DIST_MASK + mov DWORD(dsym), [hufftables + _dist_table + dsym * 4] + + ; insert LL code + ; sym: 31:24 length; 23:0 code + mov tmp2, sym + and sym, 0xFFFFFF + SHLX sym, sym, rcx + shr tmp2, 24 + or bits, sym + add rcx, tmp2 + + ; insert dist code + movzx tmp, WORD(dsym) + SHLX tmp, tmp, rcx + or bits, tmp + mov tmp, dsym + shr tmp, 24 + add rcx, tmp + + ; insert dist extra bits + shr data, EXTRA_BITS_OFFSET + add ptr, 4 + SHLX data, data, rcx + or bits, data + shr dsym, 16 + and dsym, 0xFF + add rcx, dsym + + ; empty bits + mov [out_buf], bits + mov tmp, rcx + shr tmp, 3 ; byte count + add out_buf, tmp + mov tmp, rcx + and rcx, ~7 + SHRX bits, bits, rcx + mov rcx, tmp + and rcx, 7 + + cmp ptr, in_buf_end + jb .finish_loop + +.overflow: + mov tmp, [rsp + bitbuf_mem_offset] + mov [tmp + _m_bits], bits + mov [tmp + _m_bit_count], ecx + mov [tmp + _m_out_buf], out_buf + + mov rax, ptr + + FUNC_RESTORE + + ret + +section .data + align 64 +;; 64 byte data +rot_perm: + dq 0x00000007, 0x00000000, 0x00000001, 0x00000002 + dq 0x00000003, 0x00000004, 0x00000005, 0x00000006 + +;; 16 byte data +q_64: + dq 0x0000000000000040, 0x0000000000000000 +q_8 : + dq 0x0000000000000000, 0x0000000000000008 + +;; 8 byte data +offset_mask: + dq 0x0000000000000007 + +;; 4 byte data +max_write_d: + dd 0x1c +lit_mask: + dd LIT_MASK +dist_mask: + dd DIST_MASK +lit_icr_mask: + dd 0x00ffffff +eb_icr_mask: + dd 0x000000ff + +;; k mask constants +k_mask_1: dq 0x55555555 +k_mask_2: dq 0xfffffff0 +k_mask_3: dq 0xfffffffc +k_mask_4: dw 0x0101, 0x0101, 0x0101, 0x0101 +k_mask_5: dq 0xfffffffe + +%endif diff --git a/src/spdk/isa-l/igzip/flatten_ll.c b/src/spdk/isa-l/igzip/flatten_ll.c new file mode 100644 index 000000000..1eb13b559 --- /dev/null +++ b/src/spdk/isa-l/igzip/flatten_ll.c @@ -0,0 +1,41 @@ +#include +#include +#include + +#include "flatten_ll.h" + +void flatten_ll(uint32_t * ll_hist) +{ + uint32_t i, j; + uint32_t *s = ll_hist, x, *p; + + s[265] += s[266]; + s[266] = s[267] + s[268]; + s[267] = s[269] + s[270]; + s[268] = s[271] + s[272]; + s[269] = s[273] + s[274] + s[275] + s[276]; + s[270] = s[277] + s[278] + s[279] + s[280]; + s[271] = s[281] + s[282] + s[283] + s[284]; + s[272] = s[285] + s[286] + s[287] + s[288]; + p = s + 289; + for (i = 273; i < 277; i++) { + x = *(p++); + for (j = 1; j < 8; j++) + x += *(p++); + s[i] = x; + } + for (; i < 281; i++) { + x = *(p++); + for (j = 1; j < 16; j++) + x += *(p++); + s[i] = x; + } + for (; i < 285; i++) { + x = *(p++); + for (j = 1; j < 32; j++) + x += *(p++); + s[i] = x; + } + s[284] -= s[512]; + s[285] = s[512]; +} diff --git a/src/spdk/isa-l/igzip/flatten_ll.h b/src/spdk/isa-l/igzip/flatten_ll.h new file mode 100644 index 000000000..9aaf89106 --- /dev/null +++ b/src/spdk/isa-l/igzip/flatten_ll.h @@ -0,0 +1,3 @@ +#include + +void flatten_ll(uint32_t *ll_hist); diff --git a/src/spdk/isa-l/igzip/generate_custom_hufftables.c b/src/spdk/isa-l/igzip/generate_custom_hufftables.c new file mode 100644 index 000000000..60df1b085 --- /dev/null +++ b/src/spdk/isa-l/igzip/generate_custom_hufftables.c @@ -0,0 +1,308 @@ +/********************************************************************** + Copyright(c) 2011-2016 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +/* This program can be used to generate custom a custom huffman encoding to get + * better data compression. This is most useful when the type of data being + * compressed is well known. + * + * To use generate_custom_hufftables, pass a sequence of files to the program + * that together form an accurate representation of the data that is being + * compressed. Generate_custom_hufftables will then produce the file + * hufftables_c.c, which should be moved to replace its counterpart in the igzip + * source folder. After recompiling the Isa-l library, the igzip compression + * functions will use the new hufftables. + * + * Generate_custom_hufftables should be compiled with the same compile time + * parameters as the igzip source code. Generating custom hufftables with + * different compile time parameters may cause igzip to produce invalid output + * for the reasons described below. The default parameters used by + * generate_custom_hufftables are the same as the default parameters used by + * igzip. + * + * *WARNING* generate custom hufftables must be compiled with a IGZIP_HIST_SIZE + * that is at least as large as the IGZIP_HIST_SIZE used by igzip. By default + * IGZIP_HIST_SIZE is 32K, the maximum usable IGZIP_HIST_SIZE is 32K. The reason + * for this is to generate better compression. Igzip cannot produce look back + * distances with sizes larger than the IGZIP_HIST_SIZE igzip was compiled with, + * so look back distances with sizes larger than IGZIP_HIST_SIZE are not + * assigned a huffman code. The definition of LONGER_HUFFTABLES must be + * consistent as well since that definition changes the size of the structures + * printed by this tool. + * + */ + +#include +#include +#include +#include +#include +#include "igzip_lib.h" + +/*These max code lengths are limited by how the data is stored in + * hufftables.asm. The deflate standard max is 15.*/ + +#define MAX_HEADER_SIZE ISAL_DEF_MAX_HDR_SIZE + +#define GZIP_HEADER_SIZE 10 +#define GZIP_TRAILER_SIZE 8 +#define ZLIB_HEADER_SIZE 2 +#define ZLIB_TRAILER_SIZE 4 + +/** + * @brief Prints a table of uint8_t elements to a file. + * @param outfile: the file the table is printed to. + * @param table: the table to be printed. + * @param length: number of elements to be printed. + * @param header: header to append in front of the table. + * @param footer: footer to append at the end of the table. + * @param begin_line: string printed at beginning of new line + */ +void fprint_uint8_table(FILE * outfile, uint8_t * table, uint64_t length, char *header, + char *footer, char *begin_line) +{ + int i; + fprintf(outfile, "%s", header); + for (i = 0; i < length - 1; i++) { + if ((i & 7) == 0) + fprintf(outfile, "\n%s", begin_line); + else + fprintf(outfile, " "); + fprintf(outfile, "0x%02x,", table[i]); + } + + if ((i & 7) == 0) + fprintf(outfile, "\n%s", begin_line); + else + fprintf(outfile, " "); + fprintf(outfile, "0x%02x", table[i]); + fprintf(outfile, "%s", footer); + +} + +/** + * @brief Prints a table of uint16_t elements to a file. + * @param outfile: the file the table is printed to. + * @param table: the table to be printed. + * @param length: number of elements to be printed. + * @param header: header to append in front of the table. + * @param footer: footer to append at the end of the table. + * @param begin_line: string printed at beginning of new line + */ +void fprint_uint16_table(FILE * outfile, uint16_t * table, uint64_t length, char *header, + char *footer, char *begin_line) +{ + int i; + fprintf(outfile, "%s", header); + for (i = 0; i < length - 1; i++) { + if ((i & 7) == 0) + fprintf(outfile, "\n%s", begin_line); + else + fprintf(outfile, " "); + fprintf(outfile, "0x%04x,", table[i]); + } + + if ((i & 7) == 0) + fprintf(outfile, "\n%s", begin_line); + else + fprintf(outfile, " "); + fprintf(outfile, "0x%04x", table[i]); + fprintf(outfile, "%s", footer); + +} + +/** + * @brief Prints a table of uint32_t elements to a file. + * @param outfile: the file the table is printed to. + * @param table: the table to be printed. + * @param length: number of elements to be printed. + * @param header: header to append in front of the table. + * @param footer: footer to append at the end of the table. + * @param begin_line: string printed at beginning of new line + */ +void fprint_uint32_table(FILE * outfile, uint32_t * table, uint64_t length, char *header, + char *footer, char *begin_line) +{ + int i; + fprintf(outfile, "%s", header); + for (i = 0; i < length - 1; i++) { + if ((i & 3) == 0) + fprintf(outfile, "\n%s", begin_line); + else + fprintf(outfile, " "); + fprintf(outfile, "0x%08x,", table[i]); + } + + if ((i & 3) == 0) + fprintf(outfile, "%s", begin_line); + else + fprintf(outfile, " "); + fprintf(outfile, "0x%08x", table[i]); + fprintf(outfile, "%s", footer); + +} + +void fprint_hufftables(FILE * output_file, char *hufftables_name, + struct isal_hufftables *hufftables) +{ + fprintf(output_file, "struct isal_hufftables %s = {\n\n", hufftables_name); + + fprint_uint8_table(output_file, hufftables->deflate_hdr, + hufftables->deflate_hdr_count + + (hufftables->deflate_hdr_extra_bits + 7) / 8, + "\t.deflate_hdr = {", "},\n\n", "\t\t"); + + fprintf(output_file, "\t.deflate_hdr_count = %d,\n", hufftables->deflate_hdr_count); + fprintf(output_file, "\t.deflate_hdr_extra_bits = %d,\n\n", + hufftables->deflate_hdr_extra_bits); + + fprint_uint32_table(output_file, hufftables->dist_table, IGZIP_DIST_TABLE_SIZE, + "\t.dist_table = {", "},\n\n", "\t\t"); + + fprint_uint32_table(output_file, hufftables->len_table, IGZIP_LEN_TABLE_SIZE, + "\t.len_table = {", "},\n\n", "\t\t"); + + fprint_uint16_table(output_file, hufftables->lit_table, IGZIP_LIT_TABLE_SIZE, + "\t.lit_table = {", "},\n\n", "\t\t"); + fprint_uint8_table(output_file, hufftables->lit_table_sizes, IGZIP_LIT_TABLE_SIZE, + "\t.lit_table_sizes = {", "},\n\n", "\t\t"); + + fprint_uint16_table(output_file, hufftables->dcodes, + ISAL_DEF_DIST_SYMBOLS - IGZIP_DECODE_OFFSET, + "\t.dcodes = {", "},\n\n", "\t\t"); + fprint_uint8_table(output_file, hufftables->dcodes_sizes, + ISAL_DEF_DIST_SYMBOLS - IGZIP_DECODE_OFFSET, + "\t.dcodes_sizes = {", "}\n", "\t\t"); + fprintf(output_file, "};\n"); +} + +void fprint_header(FILE * output_file) +{ + + fprintf(output_file, "#include \n"); + fprintf(output_file, "#include \n\n"); + + fprintf(output_file, "#if IGZIP_HIST_SIZE > %d\n" + "# error \"Invalid history size for the custom hufftable\"\n" + "#endif\n", IGZIP_HIST_SIZE); + +#ifdef LONGER_HUFFTABLE + fprintf(output_file, "#ifndef LONGER_HUFFTABLE\n" + "# error \"Custom hufftable requires LONGER_HUFFTABLE to be defined \"\n" + "#endif\n"); +#else + fprintf(output_file, "#ifdef LONGER_HUFFTABLE\n" + "# error \"Custom hufftable requires LONGER_HUFFTABLE to not be defined \"\n" + "#endif\n"); +#endif + fprintf(output_file, "\n"); + + fprintf(output_file, "const uint8_t gzip_hdr[] = {\n" + "\t0x1f, 0x8b, 0x08, 0x00, 0x00,\n" "\t0x00, 0x00, 0x00, 0x00, 0xff\t};\n\n"); + + fprintf(output_file, "const uint32_t gzip_hdr_bytes = %d;\n", GZIP_HEADER_SIZE); + fprintf(output_file, "const uint32_t gzip_trl_bytes = %d;\n\n", GZIP_TRAILER_SIZE); + + fprintf(output_file, "const uint8_t zlib_hdr[] = { 0x78, 0x01 };\n\n"); + fprintf(output_file, "const uint32_t zlib_hdr_bytes = %d;\n", ZLIB_HEADER_SIZE); + fprintf(output_file, "const uint32_t zlib_trl_bytes = %d;\n", ZLIB_TRAILER_SIZE); +} + +int main(int argc, char *argv[]) +{ + long int file_length; + uint8_t *stream = NULL; + struct isal_hufftables hufftables; + struct isal_huff_histogram histogram; + struct isal_zstream tmp_stream; + FILE *file; + + if (argc == 1) { + printf("Error, no input file.\n"); + return 1; + } + + memset(&histogram, 0, sizeof(histogram)); /* Initialize histograms. */ + + while (argc > 1) { + printf("Processing %s\n", argv[argc - 1]); + file = fopen(argv[argc - 1], "r"); + if (file == NULL) { + printf("Error opening file\n"); + return 1; + } + fseek(file, 0, SEEK_END); + file_length = ftell(file); + fseek(file, 0, SEEK_SET); + file_length -= ftell(file); + stream = malloc(file_length); + if (stream == NULL) { + printf("Failed to allocate memory to read in file\n"); + fclose(file); + return 1; + } + if (fread(stream, 1, file_length, file) != file_length) { + printf("Error occurred when reading file"); + fclose(file); + free(stream); + return 1; + } + + /* Create a histogram of frequency of symbols found in stream to + * generate the huffman tree.*/ + isal_update_histogram(stream, file_length, &histogram); + + fclose(file); + free(stream); + argc--; + } + + isal_create_hufftables(&hufftables, &histogram); + + file = fopen("hufftables_c.c", "w"); + if (file == NULL) { + printf("Error creating file hufftables_c.c\n"); + return 1; + } + + fprint_header(file); + + fprintf(file, "\n"); + + fprint_hufftables(file, "hufftables_default", &hufftables); + + fprintf(file, "\n"); + + isal_deflate_stateless_init(&tmp_stream); + isal_deflate_set_hufftables(&tmp_stream, NULL, IGZIP_HUFFTABLE_STATIC); + fprint_hufftables(file, "hufftables_static", tmp_stream.hufftables); + + fclose(file); + + return 0; +} diff --git a/src/spdk/isa-l/igzip/generate_static_inflate.c b/src/spdk/isa-l/igzip/generate_static_inflate.c new file mode 100644 index 000000000..f4bf5acce --- /dev/null +++ b/src/spdk/isa-l/igzip/generate_static_inflate.c @@ -0,0 +1,163 @@ +/********************************************************************** + Copyright(c) 2011-2018 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#include +#include +#include +#include +#include +#include "igzip_lib.h" + +#define STATIC_INFLATE_FILE "static_inflate.h" + +/** + * @brief Prints a table of uint16_t elements to a file. + * @param outfile: the file the table is printed to. + * @param table: the table to be printed. + * @param length: number of elements to be printed. + * @param header: header to append in front of the table. + * @param footer: footer to append at the end of the table. + * @param begin_line: string printed at beginning of new line + */ +void fprint_uint16_table(FILE * outfile, uint16_t * table, uint64_t length, char *header, + char *footer, char *begin_line) +{ + int i; + fprintf(outfile, "%s", header); + for (i = 0; i < length - 1; i++) { + if ((i & 7) == 0) + fprintf(outfile, "\n%s", begin_line); + else + fprintf(outfile, " "); + fprintf(outfile, "0x%04x,", table[i]); + } + + if ((i & 7) == 0) + fprintf(outfile, "\n%s", begin_line); + else + fprintf(outfile, " "); + fprintf(outfile, "0x%04x", table[i]); + fprintf(outfile, "%s", footer); + +} + +/** + * @brief Prints a table of uint32_t elements to a file. + * @param outfile: the file the table is printed to. + * @param table: the table to be printed. + * @param length: number of elements to be printed. + * @param header: header to append in front of the table. + * @param footer: footer to append at the end of the table. + * @param begin_line: string printed at beginning of new line + */ +void fprint_uint32_table(FILE * outfile, uint32_t * table, uint64_t length, char *header, + char *footer, char *begin_line) +{ + int i; + fprintf(outfile, "%s", header); + for (i = 0; i < length - 1; i++) { + if ((i & 3) == 0) + fprintf(outfile, "\n%s", begin_line); + else + fprintf(outfile, " "); + fprintf(outfile, "0x%08x,", table[i]); + } + + if ((i & 3) == 0) + fprintf(outfile, "%s", begin_line); + else + fprintf(outfile, " "); + fprintf(outfile, "0x%08x", table[i]); + fprintf(outfile, "%s", footer); + +} + +void fprint_header(FILE * output_file) +{ + fprintf(output_file, "#include \"igzip_lib.h\"\n\n"); + fprintf(output_file, "#define LONG_BITS_CHECK %d\n", ISAL_DECODE_LONG_BITS); + fprintf(output_file, "#define SHORT_BITS_CHECK %d\n", ISAL_DECODE_SHORT_BITS); + fprintf(output_file, + "#if (LONG_BITS_CHECK == ISAL_DECODE_LONG_BITS) && (SHORT_BITS_CHECK == ISAL_DECODE_SHORT_BITS)\n" + "# define ISAL_STATIC_INFLATE_TABLE\n" + "#else\n" + "# warning \"Incompatible compile time defines for optimized static inflate table.\"\n" + "#endif\n\n"); +} + +int main(int argc, char *argv[]) +{ + struct inflate_state state; + FILE *file; + uint8_t static_deflate_hdr = 3; + uint8_t tmp_space[8]; + + isal_inflate_init(&state); + + state.next_in = &static_deflate_hdr; + state.avail_in = sizeof(static_deflate_hdr); + state.next_out = tmp_space; + state.avail_out = sizeof(tmp_space); + + isal_inflate(&state); + + file = fopen(STATIC_INFLATE_FILE, "w"); + + if (file == NULL) { + printf("Error creating file hufftables_c.c\n"); + return 1; + } + + fprintf(file, "#ifndef STATIC_HEADER_H\n" "#define STATIC_HEADER_H\n\n"); + + fprint_header(file); + + fprintf(file, "struct inflate_huff_code_large static_lit_huff_code = {\n"); + fprint_uint32_table(file, state.lit_huff_code.short_code_lookup, + sizeof(state.lit_huff_code.short_code_lookup) / sizeof(uint32_t), + "\t.short_code_lookup = {", "\t},\n\n", "\t\t"); + fprint_uint16_table(file, state.lit_huff_code.long_code_lookup, + sizeof(state.lit_huff_code.long_code_lookup) / sizeof(uint16_t), + "\t.long_code_lookup = {", "\t}\n", "\t\t"); + fprintf(file, "};\n\n"); + + fprintf(file, "struct inflate_huff_code_small static_dist_huff_code = {\n"); + fprint_uint16_table(file, state.dist_huff_code.short_code_lookup, + sizeof(state.dist_huff_code.short_code_lookup) / sizeof(uint16_t), + "\t.short_code_lookup = {", "\t},\n\n", "\t\t"); + fprint_uint16_table(file, state.dist_huff_code.long_code_lookup, + sizeof(state.dist_huff_code.long_code_lookup) / sizeof(uint16_t), + "\t.long_code_lookup = {", "\t}\n", "\t\t"); + fprintf(file, "};\n\n"); + + fprintf(file, "#endif\n"); + fclose(file); + + return 0; +} diff --git a/src/spdk/isa-l/igzip/heap_macros.asm b/src/spdk/isa-l/igzip/heap_macros.asm new file mode 100644 index 000000000..4385fae66 --- /dev/null +++ b/src/spdk/isa-l/igzip/heap_macros.asm @@ -0,0 +1,98 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2018 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +; heapify heap, heap_size, i, child, tmp1, tmp2, tmpd +%macro heapify2 7 +%define %%heap %1 ; qword ptr +%define %%heap_size %2 ; dword +%define %%i %3 ; dword +%define %%child %4 ; dword +%define %%tmp1 %5 ; qword +%define %%tmp2 %6 ; qword +%define %%tmpd %7 ; dword + align 16 +%%heapify1: + lea %%child, [%%i + %%i] + cmp %%child, %%heap_size + ja %%end_heapify1 + mov %%tmp1, [%%heap + %%child] + mov %%tmpd, %%child + mov %%tmp2, [%%heap + %%child) + 8] + lea %%child, [%%child + 1] + cmove %%tmp2, %%tmp1 + cmp %%tmp1, %%tmp2 + cmovbe %%child, %%tmpd + cmovbe %%tmp2, %%tmp1 + ; child is correct, %%tmp2 = heap[child] + mov %%tmp1, [%%heap + %%i] + cmp %%tmp1, %%tmp2 + jbe %%end_heapify1 + mov [%%heap + %%i], %%tmp2 + mov [%%heap + %%child], %%tmp1 + mov %%i, %%child + jmp %%heapify1 +%%end_heapify1 +%endm + +; heapify heap, heap_size, i, child, tmp1, tmp2, tmpd, tmp3 +%macro heapify 8 +%define %%heap %1 ; qword ptr +%define %%heap_size %2 ; qword +%define %%i %3 ; qword +%define %%child %4 ; qword +%define %%tmp1 %5 ; qword +%define %%tmp2 %6 ; qword +%define %%tmpd %7 ; qword +%define %%tmp3 %8 + align 16 +%%heapify1: + lea %%child, [%%i + %%i] +; mov %%child, %%i +; add %%child, %%child + cmp %%child, %%heap_size + ja %%end_heapify1 + mov %%tmp1, [%%heap + %%child*8] + mov %%tmp2, [%%heap + %%child*8 + 8] + mov %%tmp3, [%%heap + %%i*8] + mov %%tmpd, %%child + add %%tmpd, 1 + + cmp %%tmp2, %%tmp1 + cmovb %%child, %%tmpd + cmovb %%tmp1, %%tmp2 + ; child is correct, tmp1 = heap[child] + cmp %%tmp3, %%tmp1 + jbe %%end_heapify1 + ; swap i and child + mov [%%heap + %%i*8], %%tmp1 + mov [%%heap + %%child*8], %%tmp3 + mov %%i, %%child + jmp %%heapify1 +%%end_heapify1: +%endm diff --git a/src/spdk/isa-l/igzip/huff_codes.c b/src/spdk/isa-l/igzip/huff_codes.c new file mode 100644 index 000000000..7512af234 --- /dev/null +++ b/src/spdk/isa-l/igzip/huff_codes.c @@ -0,0 +1,1694 @@ +/********************************************************************** + Copyright(c) 2011-2016 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#include "huff_codes.h" +#include "huffman.h" +#include "flatten_ll.h" + +/* The order code length codes are written in the dynamic code header. This is + * defined in RFC 1951 page 13 */ +static const uint8_t code_length_code_order[] = + { 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15 }; + +static const uint32_t len_code_extra_bits[] = { + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x1, 0x1, 0x1, 0x1, 0x2, 0x2, 0x2, 0x2, + 0x3, 0x3, 0x3, 0x3, 0x4, 0x4, 0x4, 0x4, + 0x5, 0x5, 0x5, 0x5, 0x0 +}; + +static const uint32_t dist_code_extra_bits[] = { + 0x0, 0x0, 0x0, 0x0, 0x1, 0x1, 0x2, 0x2, + 0x3, 0x3, 0x4, 0x4, 0x5, 0x5, 0x6, 0x6, + 0x7, 0x7, 0x8, 0x8, 0x9, 0x9, 0xa, 0xa, + 0xb, 0xb, 0xc, 0xc, 0xd, 0xd +}; + +static struct hufftables_icf static_hufftables = { + .lit_len_table = { + {{{.code_and_extra = 0x00c,.length2 = 0x8}}}, + {{{.code_and_extra = 0x08c,.length2 = 0x8}}}, + {{{.code_and_extra = 0x04c,.length2 = 0x8}}}, + {{{.code_and_extra = 0x0cc,.length2 = 0x8}}}, + {{{.code_and_extra = 0x02c,.length2 = 0x8}}}, + {{{.code_and_extra = 0x0ac,.length2 = 0x8}}}, + {{{.code_and_extra = 0x06c,.length2 = 0x8}}}, + {{{.code_and_extra = 0x0ec,.length2 = 0x8}}}, + {{{.code_and_extra = 0x01c,.length2 = 0x8}}}, + {{{.code_and_extra = 0x09c,.length2 = 0x8}}}, + {{{.code_and_extra = 0x05c,.length2 = 0x8}}}, + {{{.code_and_extra = 0x0dc,.length2 = 0x8}}}, + {{{.code_and_extra = 0x03c,.length2 = 0x8}}}, + {{{.code_and_extra = 0x0bc,.length2 = 0x8}}}, + {{{.code_and_extra = 0x07c,.length2 = 0x8}}}, + {{{.code_and_extra = 0x0fc,.length2 = 0x8}}}, + {{{.code_and_extra = 0x002,.length2 = 0x8}}}, + {{{.code_and_extra = 0x082,.length2 = 0x8}}}, + {{{.code_and_extra = 0x042,.length2 = 0x8}}}, + {{{.code_and_extra = 0x0c2,.length2 = 0x8}}}, + {{{.code_and_extra = 0x022,.length2 = 0x8}}}, + {{{.code_and_extra = 0x0a2,.length2 = 0x8}}}, + {{{.code_and_extra = 0x062,.length2 = 0x8}}}, + {{{.code_and_extra = 0x0e2,.length2 = 0x8}}}, + {{{.code_and_extra = 0x012,.length2 = 0x8}}}, + {{{.code_and_extra = 0x092,.length2 = 0x8}}}, + {{{.code_and_extra = 0x052,.length2 = 0x8}}}, + {{{.code_and_extra = 0x0d2,.length2 = 0x8}}}, + {{{.code_and_extra = 0x032,.length2 = 0x8}}}, + {{{.code_and_extra = 0x0b2,.length2 = 0x8}}}, + {{{.code_and_extra = 0x072,.length2 = 0x8}}}, + {{{.code_and_extra = 0x0f2,.length2 = 0x8}}}, + {{{.code_and_extra = 0x00a,.length2 = 0x8}}}, + {{{.code_and_extra = 0x08a,.length2 = 0x8}}}, + {{{.code_and_extra = 0x04a,.length2 = 0x8}}}, + {{{.code_and_extra = 0x0ca,.length2 = 0x8}}}, + {{{.code_and_extra = 0x02a,.length2 = 0x8}}}, + {{{.code_and_extra = 0x0aa,.length2 = 0x8}}}, + {{{.code_and_extra = 0x06a,.length2 = 0x8}}}, + {{{.code_and_extra = 0x0ea,.length2 = 0x8}}}, + {{{.code_and_extra = 0x01a,.length2 = 0x8}}}, + {{{.code_and_extra = 0x09a,.length2 = 0x8}}}, + {{{.code_and_extra = 0x05a,.length2 = 0x8}}}, + {{{.code_and_extra = 0x0da,.length2 = 0x8}}}, + {{{.code_and_extra = 0x03a,.length2 = 0x8}}}, + {{{.code_and_extra = 0x0ba,.length2 = 0x8}}}, + {{{.code_and_extra = 0x07a,.length2 = 0x8}}}, + {{{.code_and_extra = 0x0fa,.length2 = 0x8}}}, + {{{.code_and_extra = 0x006,.length2 = 0x8}}}, + {{{.code_and_extra = 0x086,.length2 = 0x8}}}, + {{{.code_and_extra = 0x046,.length2 = 0x8}}}, + {{{.code_and_extra = 0x0c6,.length2 = 0x8}}}, + {{{.code_and_extra = 0x026,.length2 = 0x8}}}, + {{{.code_and_extra = 0x0a6,.length2 = 0x8}}}, + {{{.code_and_extra = 0x066,.length2 = 0x8}}}, + {{{.code_and_extra = 0x0e6,.length2 = 0x8}}}, + {{{.code_and_extra = 0x016,.length2 = 0x8}}}, + {{{.code_and_extra = 0x096,.length2 = 0x8}}}, + {{{.code_and_extra = 0x056,.length2 = 0x8}}}, + {{{.code_and_extra = 0x0d6,.length2 = 0x8}}}, + {{{.code_and_extra = 0x036,.length2 = 0x8}}}, + {{{.code_and_extra = 0x0b6,.length2 = 0x8}}}, + {{{.code_and_extra = 0x076,.length2 = 0x8}}}, + {{{.code_and_extra = 0x0f6,.length2 = 0x8}}}, + {{{.code_and_extra = 0x00e,.length2 = 0x8}}}, + {{{.code_and_extra = 0x08e,.length2 = 0x8}}}, + {{{.code_and_extra = 0x04e,.length2 = 0x8}}}, + {{{.code_and_extra = 0x0ce,.length2 = 0x8}}}, + {{{.code_and_extra = 0x02e,.length2 = 0x8}}}, + {{{.code_and_extra = 0x0ae,.length2 = 0x8}}}, + {{{.code_and_extra = 0x06e,.length2 = 0x8}}}, + {{{.code_and_extra = 0x0ee,.length2 = 0x8}}}, + {{{.code_and_extra = 0x01e,.length2 = 0x8}}}, + {{{.code_and_extra = 0x09e,.length2 = 0x8}}}, + {{{.code_and_extra = 0x05e,.length2 = 0x8}}}, + {{{.code_and_extra = 0x0de,.length2 = 0x8}}}, + {{{.code_and_extra = 0x03e,.length2 = 0x8}}}, + {{{.code_and_extra = 0x0be,.length2 = 0x8}}}, + {{{.code_and_extra = 0x07e,.length2 = 0x8}}}, + {{{.code_and_extra = 0x0fe,.length2 = 0x8}}}, + {{{.code_and_extra = 0x001,.length2 = 0x8}}}, + {{{.code_and_extra = 0x081,.length2 = 0x8}}}, + {{{.code_and_extra = 0x041,.length2 = 0x8}}}, + {{{.code_and_extra = 0x0c1,.length2 = 0x8}}}, + {{{.code_and_extra = 0x021,.length2 = 0x8}}}, + {{{.code_and_extra = 0x0a1,.length2 = 0x8}}}, + {{{.code_and_extra = 0x061,.length2 = 0x8}}}, + {{{.code_and_extra = 0x0e1,.length2 = 0x8}}}, + {{{.code_and_extra = 0x011,.length2 = 0x8}}}, + {{{.code_and_extra = 0x091,.length2 = 0x8}}}, + {{{.code_and_extra = 0x051,.length2 = 0x8}}}, + {{{.code_and_extra = 0x0d1,.length2 = 0x8}}}, + {{{.code_and_extra = 0x031,.length2 = 0x8}}}, + {{{.code_and_extra = 0x0b1,.length2 = 0x8}}}, + {{{.code_and_extra = 0x071,.length2 = 0x8}}}, + {{{.code_and_extra = 0x0f1,.length2 = 0x8}}}, + {{{.code_and_extra = 0x009,.length2 = 0x8}}}, + {{{.code_and_extra = 0x089,.length2 = 0x8}}}, + {{{.code_and_extra = 0x049,.length2 = 0x8}}}, + {{{.code_and_extra = 0x0c9,.length2 = 0x8}}}, + {{{.code_and_extra = 0x029,.length2 = 0x8}}}, + {{{.code_and_extra = 0x0a9,.length2 = 0x8}}}, + {{{.code_and_extra = 0x069,.length2 = 0x8}}}, + {{{.code_and_extra = 0x0e9,.length2 = 0x8}}}, + {{{.code_and_extra = 0x019,.length2 = 0x8}}}, + {{{.code_and_extra = 0x099,.length2 = 0x8}}}, + {{{.code_and_extra = 0x059,.length2 = 0x8}}}, + {{{.code_and_extra = 0x0d9,.length2 = 0x8}}}, + {{{.code_and_extra = 0x039,.length2 = 0x8}}}, + {{{.code_and_extra = 0x0b9,.length2 = 0x8}}}, + {{{.code_and_extra = 0x079,.length2 = 0x8}}}, + {{{.code_and_extra = 0x0f9,.length2 = 0x8}}}, + {{{.code_and_extra = 0x005,.length2 = 0x8}}}, + {{{.code_and_extra = 0x085,.length2 = 0x8}}}, + {{{.code_and_extra = 0x045,.length2 = 0x8}}}, + {{{.code_and_extra = 0x0c5,.length2 = 0x8}}}, + {{{.code_and_extra = 0x025,.length2 = 0x8}}}, + {{{.code_and_extra = 0x0a5,.length2 = 0x8}}}, + {{{.code_and_extra = 0x065,.length2 = 0x8}}}, + {{{.code_and_extra = 0x0e5,.length2 = 0x8}}}, + {{{.code_and_extra = 0x015,.length2 = 0x8}}}, + {{{.code_and_extra = 0x095,.length2 = 0x8}}}, + {{{.code_and_extra = 0x055,.length2 = 0x8}}}, + {{{.code_and_extra = 0x0d5,.length2 = 0x8}}}, + {{{.code_and_extra = 0x035,.length2 = 0x8}}}, + {{{.code_and_extra = 0x0b5,.length2 = 0x8}}}, + {{{.code_and_extra = 0x075,.length2 = 0x8}}}, + {{{.code_and_extra = 0x0f5,.length2 = 0x8}}}, + {{{.code_and_extra = 0x00d,.length2 = 0x8}}}, + {{{.code_and_extra = 0x08d,.length2 = 0x8}}}, + {{{.code_and_extra = 0x04d,.length2 = 0x8}}}, + {{{.code_and_extra = 0x0cd,.length2 = 0x8}}}, + {{{.code_and_extra = 0x02d,.length2 = 0x8}}}, + {{{.code_and_extra = 0x0ad,.length2 = 0x8}}}, + {{{.code_and_extra = 0x06d,.length2 = 0x8}}}, + {{{.code_and_extra = 0x0ed,.length2 = 0x8}}}, + {{{.code_and_extra = 0x01d,.length2 = 0x8}}}, + {{{.code_and_extra = 0x09d,.length2 = 0x8}}}, + {{{.code_and_extra = 0x05d,.length2 = 0x8}}}, + {{{.code_and_extra = 0x0dd,.length2 = 0x8}}}, + {{{.code_and_extra = 0x03d,.length2 = 0x8}}}, + {{{.code_and_extra = 0x0bd,.length2 = 0x8}}}, + {{{.code_and_extra = 0x07d,.length2 = 0x8}}}, + {{{.code_and_extra = 0x0fd,.length2 = 0x8}}}, + {{{.code_and_extra = 0x013,.length2 = 0x9}}}, + {{{.code_and_extra = 0x113,.length2 = 0x9}}}, + {{{.code_and_extra = 0x093,.length2 = 0x9}}}, + {{{.code_and_extra = 0x193,.length2 = 0x9}}}, + {{{.code_and_extra = 0x053,.length2 = 0x9}}}, + {{{.code_and_extra = 0x153,.length2 = 0x9}}}, + {{{.code_and_extra = 0x0d3,.length2 = 0x9}}}, + {{{.code_and_extra = 0x1d3,.length2 = 0x9}}}, + {{{.code_and_extra = 0x033,.length2 = 0x9}}}, + {{{.code_and_extra = 0x133,.length2 = 0x9}}}, + {{{.code_and_extra = 0x0b3,.length2 = 0x9}}}, + {{{.code_and_extra = 0x1b3,.length2 = 0x9}}}, + {{{.code_and_extra = 0x073,.length2 = 0x9}}}, + {{{.code_and_extra = 0x173,.length2 = 0x9}}}, + {{{.code_and_extra = 0x0f3,.length2 = 0x9}}}, + {{{.code_and_extra = 0x1f3,.length2 = 0x9}}}, + {{{.code_and_extra = 0x00b,.length2 = 0x9}}}, + {{{.code_and_extra = 0x10b,.length2 = 0x9}}}, + {{{.code_and_extra = 0x08b,.length2 = 0x9}}}, + {{{.code_and_extra = 0x18b,.length2 = 0x9}}}, + {{{.code_and_extra = 0x04b,.length2 = 0x9}}}, + {{{.code_and_extra = 0x14b,.length2 = 0x9}}}, + {{{.code_and_extra = 0x0cb,.length2 = 0x9}}}, + {{{.code_and_extra = 0x1cb,.length2 = 0x9}}}, + {{{.code_and_extra = 0x02b,.length2 = 0x9}}}, + {{{.code_and_extra = 0x12b,.length2 = 0x9}}}, + {{{.code_and_extra = 0x0ab,.length2 = 0x9}}}, + {{{.code_and_extra = 0x1ab,.length2 = 0x9}}}, + {{{.code_and_extra = 0x06b,.length2 = 0x9}}}, + {{{.code_and_extra = 0x16b,.length2 = 0x9}}}, + {{{.code_and_extra = 0x0eb,.length2 = 0x9}}}, + {{{.code_and_extra = 0x1eb,.length2 = 0x9}}}, + {{{.code_and_extra = 0x01b,.length2 = 0x9}}}, + {{{.code_and_extra = 0x11b,.length2 = 0x9}}}, + {{{.code_and_extra = 0x09b,.length2 = 0x9}}}, + {{{.code_and_extra = 0x19b,.length2 = 0x9}}}, + {{{.code_and_extra = 0x05b,.length2 = 0x9}}}, + {{{.code_and_extra = 0x15b,.length2 = 0x9}}}, + {{{.code_and_extra = 0x0db,.length2 = 0x9}}}, + {{{.code_and_extra = 0x1db,.length2 = 0x9}}}, + {{{.code_and_extra = 0x03b,.length2 = 0x9}}}, + {{{.code_and_extra = 0x13b,.length2 = 0x9}}}, + {{{.code_and_extra = 0x0bb,.length2 = 0x9}}}, + {{{.code_and_extra = 0x1bb,.length2 = 0x9}}}, + {{{.code_and_extra = 0x07b,.length2 = 0x9}}}, + {{{.code_and_extra = 0x17b,.length2 = 0x9}}}, + {{{.code_and_extra = 0x0fb,.length2 = 0x9}}}, + {{{.code_and_extra = 0x1fb,.length2 = 0x9}}}, + {{{.code_and_extra = 0x007,.length2 = 0x9}}}, + {{{.code_and_extra = 0x107,.length2 = 0x9}}}, + {{{.code_and_extra = 0x087,.length2 = 0x9}}}, + {{{.code_and_extra = 0x187,.length2 = 0x9}}}, + {{{.code_and_extra = 0x047,.length2 = 0x9}}}, + {{{.code_and_extra = 0x147,.length2 = 0x9}}}, + {{{.code_and_extra = 0x0c7,.length2 = 0x9}}}, + {{{.code_and_extra = 0x1c7,.length2 = 0x9}}}, + {{{.code_and_extra = 0x027,.length2 = 0x9}}}, + {{{.code_and_extra = 0x127,.length2 = 0x9}}}, + {{{.code_and_extra = 0x0a7,.length2 = 0x9}}}, + {{{.code_and_extra = 0x1a7,.length2 = 0x9}}}, + {{{.code_and_extra = 0x067,.length2 = 0x9}}}, + {{{.code_and_extra = 0x167,.length2 = 0x9}}}, + {{{.code_and_extra = 0x0e7,.length2 = 0x9}}}, + {{{.code_and_extra = 0x1e7,.length2 = 0x9}}}, + {{{.code_and_extra = 0x017,.length2 = 0x9}}}, + {{{.code_and_extra = 0x117,.length2 = 0x9}}}, + {{{.code_and_extra = 0x097,.length2 = 0x9}}}, + {{{.code_and_extra = 0x197,.length2 = 0x9}}}, + {{{.code_and_extra = 0x057,.length2 = 0x9}}}, + {{{.code_and_extra = 0x157,.length2 = 0x9}}}, + {{{.code_and_extra = 0x0d7,.length2 = 0x9}}}, + {{{.code_and_extra = 0x1d7,.length2 = 0x9}}}, + {{{.code_and_extra = 0x037,.length2 = 0x9}}}, + {{{.code_and_extra = 0x137,.length2 = 0x9}}}, + {{{.code_and_extra = 0x0b7,.length2 = 0x9}}}, + {{{.code_and_extra = 0x1b7,.length2 = 0x9}}}, + {{{.code_and_extra = 0x077,.length2 = 0x9}}}, + {{{.code_and_extra = 0x177,.length2 = 0x9}}}, + {{{.code_and_extra = 0x0f7,.length2 = 0x9}}}, + {{{.code_and_extra = 0x1f7,.length2 = 0x9}}}, + {{{.code_and_extra = 0x00f,.length2 = 0x9}}}, + {{{.code_and_extra = 0x10f,.length2 = 0x9}}}, + {{{.code_and_extra = 0x08f,.length2 = 0x9}}}, + {{{.code_and_extra = 0x18f,.length2 = 0x9}}}, + {{{.code_and_extra = 0x04f,.length2 = 0x9}}}, + {{{.code_and_extra = 0x14f,.length2 = 0x9}}}, + {{{.code_and_extra = 0x0cf,.length2 = 0x9}}}, + {{{.code_and_extra = 0x1cf,.length2 = 0x9}}}, + {{{.code_and_extra = 0x02f,.length2 = 0x9}}}, + {{{.code_and_extra = 0x12f,.length2 = 0x9}}}, + {{{.code_and_extra = 0x0af,.length2 = 0x9}}}, + {{{.code_and_extra = 0x1af,.length2 = 0x9}}}, + {{{.code_and_extra = 0x06f,.length2 = 0x9}}}, + {{{.code_and_extra = 0x16f,.length2 = 0x9}}}, + {{{.code_and_extra = 0x0ef,.length2 = 0x9}}}, + {{{.code_and_extra = 0x1ef,.length2 = 0x9}}}, + {{{.code_and_extra = 0x01f,.length2 = 0x9}}}, + {{{.code_and_extra = 0x11f,.length2 = 0x9}}}, + {{{.code_and_extra = 0x09f,.length2 = 0x9}}}, + {{{.code_and_extra = 0x19f,.length2 = 0x9}}}, + {{{.code_and_extra = 0x05f,.length2 = 0x9}}}, + {{{.code_and_extra = 0x15f,.length2 = 0x9}}}, + {{{.code_and_extra = 0x0df,.length2 = 0x9}}}, + {{{.code_and_extra = 0x1df,.length2 = 0x9}}}, + {{{.code_and_extra = 0x03f,.length2 = 0x9}}}, + {{{.code_and_extra = 0x13f,.length2 = 0x9}}}, + {{{.code_and_extra = 0x0bf,.length2 = 0x9}}}, + {{{.code_and_extra = 0x1bf,.length2 = 0x9}}}, + {{{.code_and_extra = 0x07f,.length2 = 0x9}}}, + {{{.code_and_extra = 0x17f,.length2 = 0x9}}}, + {{{.code_and_extra = 0x0ff,.length2 = 0x9}}}, + {{{.code_and_extra = 0x1ff,.length2 = 0x9}}}, + {{{.code_and_extra = 0x000,.length2 = 0x7}}}, + {{{.code_and_extra = 0x040,.length2 = 0x7}}}, + {{{.code_and_extra = 0x020,.length2 = 0x7}}}, + {{{.code_and_extra = 0x060,.length2 = 0x7}}}, + {{{.code_and_extra = 0x010,.length2 = 0x7}}}, + {{{.code_and_extra = 0x050,.length2 = 0x7}}}, + {{{.code_and_extra = 0x030,.length2 = 0x7}}}, + {{{.code_and_extra = 0x070,.length2 = 0x7}}}, + {{{.code_and_extra = 0x008,.length2 = 0x7}}}, + {{{.code_and_extra = 0x048,.length2 = 0x7}}}, + {{{.code_and_extra = 0x028,.length2 = 0x7}}}, + {{{.code_and_extra = 0x068,.length2 = 0x7}}}, + {{{.code_and_extra = 0x018,.length2 = 0x7}}}, + {{{.code_and_extra = 0x058,.length2 = 0x7}}}, + {{{.code_and_extra = 0x038,.length2 = 0x7}}}, + {{{.code_and_extra = 0x078,.length2 = 0x7}}}, + {{{.code_and_extra = 0x004,.length2 = 0x7}}}, + {{{.code_and_extra = 0x044,.length2 = 0x7}}}, + {{{.code_and_extra = 0x024,.length2 = 0x7}}}, + {{{.code_and_extra = 0x064,.length2 = 0x7}}}, + {{{.code_and_extra = 0x014,.length2 = 0x7}}}, + {{{.code_and_extra = 0x054,.length2 = 0x7}}}, + {{{.code_and_extra = 0x034,.length2 = 0x7}}}, + {{{.code_and_extra = 0x074,.length2 = 0x7}}}, + {{{.code_and_extra = 0x003,.length2 = 0x8}}}, + {{{.code_and_extra = 0x083,.length2 = 0x8}}}, + {{{.code_and_extra = 0x043,.length2 = 0x8}}}, + {{{.code_and_extra = 0x0c3,.length2 = 0x8}}}, + {{{.code_and_extra = 0x023,.length2 = 0x8}}}, + {{{.code_and_extra = 0x0a3,.length2 = 0x8}}}, + {{{.code_and_extra = 0x063,.length2 = 0x8}}}, + {{{.code_and_extra = 0x0e3,.length2 = 0x8}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}}, + .dist_table = { + {{{.code_and_extra = 0x000,.length2 = 0x5}}}, + {{{.code_and_extra = 0x010,.length2 = 0x5}}}, + {{{.code_and_extra = 0x008,.length2 = 0x5}}}, + {{{.code_and_extra = 0x018,.length2 = 0x5}}}, + {{{.code_and_extra = 0x10004,.length2 = 0x5}}}, + {{{.code_and_extra = 0x10014,.length2 = 0x5}}}, + {{{.code_and_extra = 0x2000c,.length2 = 0x5}}}, + {{{.code_and_extra = 0x2001c,.length2 = 0x5}}}, + {{{.code_and_extra = 0x30002,.length2 = 0x5}}}, + {{{.code_and_extra = 0x30012,.length2 = 0x5}}}, + {{{.code_and_extra = 0x4000a,.length2 = 0x5}}}, + {{{.code_and_extra = 0x4001a,.length2 = 0x5}}}, + {{{.code_and_extra = 0x50006,.length2 = 0x5}}}, + {{{.code_and_extra = 0x50016,.length2 = 0x5}}}, + {{{.code_and_extra = 0x6000e,.length2 = 0x5}}}, + {{{.code_and_extra = 0x6001e,.length2 = 0x5}}}, + {{{.code_and_extra = 0x70001,.length2 = 0x5}}}, + {{{.code_and_extra = 0x70011,.length2 = 0x5}}}, + {{{.code_and_extra = 0x80009,.length2 = 0x5}}}, + {{{.code_and_extra = 0x80019,.length2 = 0x5}}}, + {{{.code_and_extra = 0x90005,.length2 = 0x5}}}, + {{{.code_and_extra = 0x90015,.length2 = 0x5}}}, + {{{.code_and_extra = 0xa000d,.length2 = 0x5}}}, + {{{.code_and_extra = 0xa001d,.length2 = 0x5}}}, + {{{.code_and_extra = 0xb0003,.length2 = 0x5}}}, + {{{.code_and_extra = 0xb0013,.length2 = 0x5}}}, + {{{.code_and_extra = 0xc000b,.length2 = 0x5}}}, + {{{.code_and_extra = 0xc001b,.length2 = 0x5}}}, + {{{.code_and_extra = 0xd0007,.length2 = 0x5}}}, + {{{.code_and_extra = 0xd0017,.length2 = 0x5}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}} +}; + +struct slver { + uint16_t snum; + uint8_t ver; + uint8_t core; +}; + +/* Version info */ +struct slver isal_update_histogram_slver_00010085; +struct slver isal_update_histogram_slver = { 0x0085, 0x01, 0x00 }; + +struct slver isal_create_hufftables_slver_00010086; +struct slver isal_create_hufftables_slver = { 0x0086, 0x01, 0x00 }; + +struct slver isal_create_hufftables_subset_slver_00010087; +struct slver isal_create_hufftables_subset_slver = { 0x0087, 0x01, 0x00 }; + +extern uint32_t build_huff_tree(struct heap_tree *heap, uint64_t heap_size, uint64_t node_ptr); +extern void build_heap(uint64_t * heap, uint64_t heap_size); + +static uint32_t convert_dist_to_dist_sym(uint32_t dist); +static uint32_t convert_length_to_len_sym(uint32_t length); + +static const uint8_t bitrev8[0x100] = { + 0x00, 0x80, 0x40, 0xC0, 0x20, 0xA0, 0x60, 0xE0, + 0x10, 0x90, 0x50, 0xD0, 0x30, 0xB0, 0x70, 0xF0, + 0x08, 0x88, 0x48, 0xC8, 0x28, 0xA8, 0x68, 0xE8, + 0x18, 0x98, 0x58, 0xD8, 0x38, 0xB8, 0x78, 0xF8, + 0x04, 0x84, 0x44, 0xC4, 0x24, 0xA4, 0x64, 0xE4, + 0x14, 0x94, 0x54, 0xD4, 0x34, 0xB4, 0x74, 0xF4, + 0x0C, 0x8C, 0x4C, 0xCC, 0x2C, 0xAC, 0x6C, 0xEC, + 0x1C, 0x9C, 0x5C, 0xDC, 0x3C, 0xBC, 0x7C, 0xFC, + 0x02, 0x82, 0x42, 0xC2, 0x22, 0xA2, 0x62, 0xE2, + 0x12, 0x92, 0x52, 0xD2, 0x32, 0xB2, 0x72, 0xF2, + 0x0A, 0x8A, 0x4A, 0xCA, 0x2A, 0xAA, 0x6A, 0xEA, + 0x1A, 0x9A, 0x5A, 0xDA, 0x3A, 0xBA, 0x7A, 0xFA, + 0x06, 0x86, 0x46, 0xC6, 0x26, 0xA6, 0x66, 0xE6, + 0x16, 0x96, 0x56, 0xD6, 0x36, 0xB6, 0x76, 0xF6, + 0x0E, 0x8E, 0x4E, 0xCE, 0x2E, 0xAE, 0x6E, 0xEE, + 0x1E, 0x9E, 0x5E, 0xDE, 0x3E, 0xBE, 0x7E, 0xFE, + 0x01, 0x81, 0x41, 0xC1, 0x21, 0xA1, 0x61, 0xE1, + 0x11, 0x91, 0x51, 0xD1, 0x31, 0xB1, 0x71, 0xF1, + 0x09, 0x89, 0x49, 0xC9, 0x29, 0xA9, 0x69, 0xE9, + 0x19, 0x99, 0x59, 0xD9, 0x39, 0xB9, 0x79, 0xF9, + 0x05, 0x85, 0x45, 0xC5, 0x25, 0xA5, 0x65, 0xE5, + 0x15, 0x95, 0x55, 0xD5, 0x35, 0xB5, 0x75, 0xF5, + 0x0D, 0x8D, 0x4D, 0xCD, 0x2D, 0xAD, 0x6D, 0xED, + 0x1D, 0x9D, 0x5D, 0xDD, 0x3D, 0xBD, 0x7D, 0xFD, + 0x03, 0x83, 0x43, 0xC3, 0x23, 0xA3, 0x63, 0xE3, + 0x13, 0x93, 0x53, 0xD3, 0x33, 0xB3, 0x73, 0xF3, + 0x0B, 0x8B, 0x4B, 0xCB, 0x2B, 0xAB, 0x6B, 0xEB, + 0x1B, 0x9B, 0x5B, 0xDB, 0x3B, 0xBB, 0x7B, 0xFB, + 0x07, 0x87, 0x47, 0xC7, 0x27, 0xA7, 0x67, 0xE7, + 0x17, 0x97, 0x57, 0xD7, 0x37, 0xB7, 0x77, 0xF7, + 0x0F, 0x8F, 0x4F, 0xCF, 0x2F, 0xAF, 0x6F, 0xEF, + 0x1F, 0x9F, 0x5F, 0xDF, 0x3F, 0xBF, 0x7F, 0xFF +}; + +// bit reverse low order LENGTH bits in code, and return result in low order bits +static inline uint16_t bit_reverse(uint16_t code, uint32_t length) +{ + code = (bitrev8[code & 0x00FF] << 8) | (bitrev8[code >> 8]); + return (code >> (16 - length)); +} + +void isal_update_histogram_base(uint8_t * start_stream, int length, + struct isal_huff_histogram *histogram) +{ + uint32_t literal = 0, hash; + uint16_t seen, *last_seen = histogram->hash_table; + uint8_t *current, *end_stream, *next_hash, *end; + uint32_t match_length; + uint32_t dist; + uint64_t *lit_len_histogram = histogram->lit_len_histogram; + uint64_t *dist_histogram = histogram->dist_histogram; + + if (length <= 0) + return; + + end_stream = start_stream + length; + memset(last_seen, 0, sizeof(histogram->hash_table)); /* Initialize last_seen to be 0. */ + for (current = start_stream; current < end_stream - 3; current++) { + literal = load_u32(current); + hash = compute_hash(literal) & LVL0_HASH_MASK; + seen = last_seen[hash]; + last_seen[hash] = (current - start_stream) & 0xFFFF; + dist = (current - start_stream - seen) & 0xFFFF; + if (dist - 1 < D - 1) { + assert(start_stream <= current - dist); + match_length = + compare258(current - dist, current, end_stream - current); + if (match_length >= SHORTEST_MATCH) { + next_hash = current; +#ifdef ISAL_LIMIT_HASH_UPDATE + end = next_hash + 3; +#else + end = next_hash + match_length; +#endif + if (end > end_stream - 3) + end = end_stream - 3; + next_hash++; + for (; next_hash < end; next_hash++) { + literal = load_u32(next_hash); + hash = compute_hash(literal) & LVL0_HASH_MASK; + last_seen[hash] = (next_hash - start_stream) & 0xFFFF; + } + + dist_histogram[convert_dist_to_dist_sym(dist)] += 1; + lit_len_histogram[convert_length_to_len_sym(match_length)] += + 1; + current += match_length - 1; + continue; + } + } + lit_len_histogram[literal & 0xFF] += 1; + } + + for (; current < end_stream; current++) + lit_len_histogram[*current] += 1; + + lit_len_histogram[256] += 1; + return; +} + +/** + * @brief Returns the deflate symbol value for a look back distance. + */ +static uint32_t convert_dist_to_dist_sym(uint32_t dist) +{ + assert(dist <= 32768 && dist > 0); + if (dist <= 32768) { + uint32_t msb = dist > 4 ? bsr(dist - 1) - 2 : 0; + return (msb * 2) + ((dist - 1) >> msb); + } else { + return ~0; + } +} + +/** + * @brief Returns the deflate symbol value for a repeat length. + */ +static uint32_t convert_length_to_len_sym(uint32_t length) +{ + assert(length > 2 && length < 259); + + /* Based on tables on page 11 in RFC 1951 */ + if (length < 11) + return 257 + length - 3; + else if (length < 19) + return 261 + (length - 3) / 2; + else if (length < 35) + return 265 + (length - 3) / 4; + else if (length < 67) + return 269 + (length - 3) / 8; + else if (length < 131) + return 273 + (length - 3) / 16; + else if (length < 258) + return 277 + (length - 3) / 32; + else + return 285; +} + +// Upon return, codes[] contains the code lengths, +// and bl_count is the count of the lengths + +/* Init heap with the histogram, and return the histogram size */ +static inline uint32_t init_heap32(struct heap_tree *heap_space, uint32_t * histogram, + uint32_t hist_size) +{ + uint32_t heap_size, i; + + memset(heap_space, 0, sizeof(struct heap_tree)); + + heap_size = 0; + for (i = 0; i < hist_size; i++) { + if (histogram[i] != 0) + heap_space->heap[++heap_size] = + (((uint64_t) histogram[i]) << FREQ_SHIFT) | i; + } + + // make sure heap has at least two elements in it + if (heap_size < 2) { + if (heap_size == 0) { + heap_space->heap[1] = 1ULL << FREQ_SHIFT; + heap_space->heap[2] = (1ULL << FREQ_SHIFT) | 1; + heap_size = 2; + } else { + // heap size == 1 + if (histogram[0] == 0) + heap_space->heap[2] = 1ULL << FREQ_SHIFT; + else + heap_space->heap[2] = (1ULL << FREQ_SHIFT) | 1; + heap_size = 2; + } + } + + build_heap(heap_space->heap, heap_size); + + return heap_size; +} + +static inline uint32_t init_heap64(struct heap_tree *heap_space, uint64_t * histogram, + uint64_t hist_size) +{ + uint32_t heap_size, i; + + memset(heap_space, 0, sizeof(struct heap_tree)); + + heap_size = 0; + for (i = 0; i < hist_size; i++) { + if (histogram[i] != 0) + heap_space->heap[++heap_size] = ((histogram[i]) << FREQ_SHIFT) | i; + } + + // make sure heap has at least two elements in it + if (heap_size < 2) { + if (heap_size == 0) { + heap_space->heap[1] = 1ULL << FREQ_SHIFT; + heap_space->heap[2] = (1ULL << FREQ_SHIFT) | 1; + heap_size = 2; + } else { + // heap size == 1 + if (histogram[0] == 0) + heap_space->heap[2] = 1ULL << FREQ_SHIFT; + else + heap_space->heap[2] = (1ULL << FREQ_SHIFT) | 1; + heap_size = 2; + } + } + + build_heap(heap_space->heap, heap_size); + + return heap_size; +} + +static inline uint32_t init_heap64_semi_complete(struct heap_tree *heap_space, + uint64_t * histogram, uint64_t hist_size, + uint64_t complete_start) +{ + uint32_t heap_size, i; + + memset(heap_space, 0, sizeof(struct heap_tree)); + + heap_size = 0; + for (i = 0; i < complete_start; i++) { + if (histogram[i] != 0) + heap_space->heap[++heap_size] = ((histogram[i]) << FREQ_SHIFT) | i; + } + + for (; i < hist_size; i++) + heap_space->heap[++heap_size] = ((histogram[i]) << FREQ_SHIFT) | i; + + // make sure heap has at least two elements in it + if (heap_size < 2) { + if (heap_size == 0) { + heap_space->heap[1] = 1ULL << FREQ_SHIFT; + heap_space->heap[2] = (1ULL << FREQ_SHIFT) | 1; + heap_size = 2; + } else { + // heap size == 1 + if (histogram[0] == 0) + heap_space->heap[2] = 1ULL << FREQ_SHIFT; + else + heap_space->heap[2] = (1ULL << FREQ_SHIFT) | 1; + heap_size = 2; + } + } + + build_heap(heap_space->heap, heap_size); + + return heap_size; +} + +static inline uint32_t init_heap64_complete(struct heap_tree *heap_space, uint64_t * histogram, + uint64_t hist_size) +{ + uint32_t heap_size, i; + + memset(heap_space, 0, sizeof(struct heap_tree)); + + heap_size = 0; + for (i = 0; i < hist_size; i++) + heap_space->heap[++heap_size] = ((histogram[i]) << FREQ_SHIFT) | i; + + build_heap(heap_space->heap, heap_size); + + return heap_size; +} + +static inline uint32_t fix_code_lens(struct heap_tree *heap_space, uint32_t root_node, + uint32_t * bl_count, uint32_t max_code_len) +{ + struct tree_node *tree = heap_space->tree; + uint64_t *code_len_count = heap_space->code_len_count; + uint32_t i, j, k, child, depth, code_len; + + // compute code lengths and code length counts + code_len = 0; + j = root_node; + for (i = root_node; i <= HEAP_TREE_NODE_START; i++) { + child = tree[i].child; + if (child > MAX_HISTHEAP_SIZE) { + depth = 1 + tree[i].depth; + + tree[child].depth = depth; + tree[child - 1].depth = depth; + } else { + tree[j++] = tree[i]; + depth = tree[i].depth; + while (code_len < depth) { + code_len++; + code_len_count[code_len] = 0; + } + code_len_count[depth]++; + } + } + + if (code_len > max_code_len) { + while (code_len > max_code_len) { + assert(code_len_count[code_len] > 1); + for (i = max_code_len - 1; i != 0; i--) + if (code_len_count[i] != 0) + break; + assert(i != 0); + code_len_count[i]--; + code_len_count[i + 1] += 2; + code_len_count[code_len - 1]++; + code_len_count[code_len] -= 2; + if (code_len_count[code_len] == 0) + code_len--; + } + + bl_count[0] = 0; + for (i = 1; i <= code_len; i++) + bl_count[i] = code_len_count[i]; + for (; i <= max_code_len; i++) + bl_count[i] = 0; + + for (k = 1; code_len_count[k] == 0; k++) ; + for (i = root_node; i < j; i++) { + tree[i].depth = k; + code_len_count[k]--; + for (; code_len_count[k] == 0; k++) ; + } + } else { + bl_count[0] = 0; + for (i = 1; i <= code_len; i++) + bl_count[i] = code_len_count[i]; + for (; i <= max_code_len; i++) + bl_count[i] = 0; + } + + return j; + +} + +static inline void +gen_huff_code_lens(struct heap_tree *heap_space, uint32_t heap_size, uint32_t * bl_count, + struct huff_code *codes, uint32_t codes_count, uint32_t max_code_len) +{ + struct tree_node *tree = heap_space->tree; + uint32_t root_node = HEAP_TREE_NODE_START, node_ptr; + uint32_t end_node; + + root_node = build_huff_tree(heap_space, heap_size, root_node); + + end_node = fix_code_lens(heap_space, root_node, bl_count, max_code_len); + + memset(codes, 0, codes_count * sizeof(*codes)); + for (node_ptr = root_node; node_ptr < end_node; node_ptr++) + codes[tree[node_ptr].child].length = tree[node_ptr].depth; + +} + +/** + * @brief Determines the code each element of a deflate compliant huffman tree and stores + * it in a lookup table + * @requires table has been initialized to already contain the code length for each element. + * @param table: A lookup table used to store the codes. + * @param table_length: The length of table. + * @param count: a histogram representing the number of occurences of codes of a given length + */ +static inline uint32_t set_huff_codes(struct huff_code *huff_code_table, int table_length, + uint32_t * count) +{ + /* Uses the algorithm mentioned in the deflate standard, Rfc 1951. */ + int i; + uint16_t code = 0; + uint16_t next_code[MAX_HUFF_TREE_DEPTH + 1]; + uint32_t max_code = 0; + + next_code[0] = code; + + for (i = 1; i < MAX_HUFF_TREE_DEPTH + 1; i++) + next_code[i] = (next_code[i - 1] + count[i - 1]) << 1; + + for (i = 0; i < table_length; i++) { + if (huff_code_table[i].length != 0) { + huff_code_table[i].code = + bit_reverse(next_code[huff_code_table[i].length], + huff_code_table[i].length); + next_code[huff_code_table[i].length] += 1; + max_code = i; + } + } + + return max_code; +} + +// on input, codes contain the code lengths +// on output, code contains: +// 23:16 code length +// 15:0 code value in low order bits +// returns max code value +static inline uint32_t set_dist_huff_codes(struct huff_code *codes, uint32_t * bl_count) +{ + uint32_t code, code_len, bits, i; + uint32_t next_code[MAX_DEFLATE_CODE_LEN + 1]; + uint32_t max_code = 0; + const uint32_t num_codes = DIST_LEN; + + code = bl_count[0] = 0; + for (bits = 1; bits <= MAX_HUFF_TREE_DEPTH; bits++) { + code = (code + bl_count[bits - 1]) << 1; + next_code[bits] = code; + } + for (i = 0; i < num_codes; i++) { + code_len = codes[i].length; + if (code_len != 0) { + codes[i].code = bit_reverse(next_code[code_len], code_len); + codes[i].extra_bit_count = dist_code_extra_bits[i]; + next_code[code_len] += 1; + max_code = i; + } + } + return max_code; +} + +/** + * @brief Creates the header for run length encoded huffman trees. + * @param header: the output header. + * @param lookup_table: a huffman lookup table. + * @param huffman_rep: a run length encoded huffman tree. + * @extra_bits: extra bits associated with the corresponding spot in huffman_rep + * @param huffman_rep_length: the length of huffman_rep. + * @param end_of_block: Value determining whether end of block header is produced or not; + * 0 corresponds to not end of block and all other inputs correspond to end of block. + * @param hclen: Length of huffman code for huffman codes minus 4. + * @param hlit: Length of literal/length table minus 257. + * @parm hdist: Length of distance table minus 1. + */ +static int create_huffman_header(struct BitBuf2 *header_bitbuf, + struct huff_code *lookup_table, + struct rl_code *huffman_rep, + uint16_t huffman_rep_length, uint32_t end_of_block, + uint32_t hclen, uint32_t hlit, uint32_t hdist) +{ + /* hlit, hdist, hclen are as defined in the deflate standard, head is the + * first three deflate header bits.*/ + int i; + uint64_t bit_count; + uint64_t data; + struct huff_code huffman_value; + const uint32_t extra_bits[3] = { 2, 3, 7 }; + + bit_count = buffer_bits_used(header_bitbuf); + + data = (end_of_block ? 5 : 4) | (hlit << 3) | (hdist << 8) | (hclen << 13); + data |= ((lookup_table[code_length_code_order[0]].length) << DYN_HDR_START_LEN); + write_bits(header_bitbuf, data, DYN_HDR_START_LEN + 3); + data = 0; + for (i = hclen + 3; i >= 1; i--) + data = (data << 3) | lookup_table[code_length_code_order[i]].length; + + write_bits(header_bitbuf, data, (hclen + 3) * 3); + + for (i = 0; i < huffman_rep_length; i++) { + huffman_value = lookup_table[huffman_rep[i].code]; + + write_bits(header_bitbuf, (uint64_t) huffman_value.code, + (uint32_t) huffman_value.length); + + if (huffman_rep[i].code > 15) { + write_bits(header_bitbuf, (uint64_t) huffman_rep[i].extra_bits, + (uint32_t) extra_bits[huffman_rep[i].code - 16]); + } + } + bit_count = buffer_bits_used(header_bitbuf) - bit_count; + + return bit_count; +} + +/** + * @brief Creates the dynamic huffman deflate header. + * @returns Returns the length of header in bits. + * @requires This function requires header is large enough to store the whole header. + * @param header: The output header. + * @param lit_huff_table: A literal/length code huffman lookup table.\ + * @param dist_huff_table: A distance huffman code lookup table. + * @param end_of_block: Value determining whether end of block header is produced or not; + * 0 corresponds to not end of block and all other inputs correspond to end of block. + */ +static inline int create_header(struct BitBuf2 *header_bitbuf, struct rl_code *huffman_rep, + uint32_t length, uint64_t * histogram, uint32_t hlit, + uint32_t hdist, uint32_t end_of_block) +{ + int i; + + uint32_t heap_size; + struct heap_tree heap_space; + uint32_t code_len_count[MAX_HUFF_TREE_DEPTH + 1]; + struct huff_code lookup_table[HUFF_LEN]; + + /* hlit, hdist, and hclen are defined in RFC 1951 page 13 */ + uint32_t hclen; + uint64_t bit_count; + + /* Create a huffman tree to encode run length encoded representation. */ + heap_size = init_heap64(&heap_space, histogram, HUFF_LEN); + gen_huff_code_lens(&heap_space, heap_size, code_len_count, + (struct huff_code *)lookup_table, HUFF_LEN, 7); + set_huff_codes(lookup_table, HUFF_LEN, code_len_count); + + /* Calculate hclen */ + for (i = CODE_LEN_CODES - 1; i > 3; i--) /* i must be at least 4 */ + if (lookup_table[code_length_code_order[i]].length != 0) + break; + + hclen = i - 3; + + /* Generate actual header. */ + bit_count = create_huffman_header(header_bitbuf, lookup_table, huffman_rep, + length, end_of_block, hclen, hlit, hdist); + + return bit_count; +} + +static inline + struct rl_code *write_rl(struct rl_code *pout, uint16_t last_len, uint32_t run_len, + uint64_t * counts) +{ + if (last_len == 0) { + while (run_len > 138) { + pout->code = 18; + pout->extra_bits = 138 - 11; + pout++; + run_len -= 138; + counts[18]++; + } + // 1 <= run_len <= 138 + if (run_len > 10) { + pout->code = 18; + pout->extra_bits = run_len - 11; + pout++; + counts[18]++; + } else if (run_len > 2) { + pout->code = 17; + pout->extra_bits = run_len - 3; + pout++; + counts[17]++; + } else if (run_len == 1) { + pout->code = 0; + pout->extra_bits = 0; + pout++; + counts[0]++; + } else { + assert(run_len == 2); + pout[0].code = 0; + pout[0].extra_bits = 0; + pout[1].code = 0; + pout[1].extra_bits = 0; + pout += 2; + counts[0] += 2; + } + } else { + // last_len != 0 + pout->code = last_len; + pout->extra_bits = 0; + pout++; + counts[last_len]++; + run_len--; + if (run_len != 0) { + while (run_len > 6) { + pout->code = 16; + pout->extra_bits = 6 - 3; + pout++; + run_len -= 6; + counts[16]++; + } + // 1 <= run_len <= 6 + switch (run_len) { + case 1: + pout->code = last_len; + pout->extra_bits = 0; + pout++; + counts[last_len]++; + break; + case 2: + pout[0].code = last_len; + pout[0].extra_bits = 0; + pout[1].code = last_len; + pout[1].extra_bits = 0; + pout += 2; + counts[last_len] += 2; + break; + default: // 3...6 + pout->code = 16; + pout->extra_bits = run_len - 3; + pout++; + counts[16]++; + } + } + } + return pout; +} + +// convert codes into run-length symbols, write symbols into OUT +// generate histogram into COUNTS (assumed to be initialized to 0) +// Format of OUT: +// 4:0 code (0...18) +// 15:8 Extra bits (0...127) +// returns number of symbols in out +static inline uint32_t rl_encode(uint16_t * codes, uint32_t num_codes, uint64_t * counts, + struct rl_code *out) +{ + uint32_t i, run_len; + uint16_t last_len, len; + struct rl_code *pout; + + pout = out; + last_len = codes[0]; + run_len = 1; + for (i = 1; i < num_codes; i++) { + len = codes[i]; + if (len == last_len) { + run_len++; + continue; + } + pout = write_rl(pout, last_len, run_len, counts); + last_len = len; + run_len = 1; + } + pout = write_rl(pout, last_len, run_len, counts); + + return (uint32_t) (pout - out); +} + +/** + * @brief Creates a two table representation of huffman codes. + * @param code_table: output table containing the code + * @param code_size_table: output table containing the code length + * @param length: the lenght of hufftable + * @param hufftable: a huffman lookup table + */ +static void create_code_tables(uint16_t * code_table, uint8_t * code_length_table, + uint32_t length, struct huff_code *hufftable) +{ + int i; + for (i = 0; i < length; i++) { + code_table[i] = hufftable[i].code; + code_length_table[i] = hufftable[i].length; + } +} + +/** + * @brief Creates a packed representation of length huffman codes. + * @details In packed_table, bits 32:8 contain the extra bits appended to the huffman + * code and bits 8:0 contain the code length. + * @param packed_table: the output table + * @param length: the length of lit_len_hufftable + * @param lit_len_hufftable: a literal/length huffman lookup table + */ +static void create_packed_len_table(uint32_t * packed_table, + struct huff_code *lit_len_hufftable) +{ + int i, count = 0; + uint16_t extra_bits; + uint16_t extra_bits_count = 0; + + /* Gain extra bits is the next place where the number of extra bits in + * lenght codes increases. */ + uint16_t gain_extra_bits = LEN_EXTRA_BITS_START; + + for (i = 257; i < LIT_LEN - 1; i++) { + for (extra_bits = 0; extra_bits < (1 << extra_bits_count); extra_bits++) { + if (count > 254) + break; + packed_table[count++] = + (extra_bits << (lit_len_hufftable[i].length + LENGTH_BITS)) | + (lit_len_hufftable[i].code << LENGTH_BITS) | + (lit_len_hufftable[i].length + extra_bits_count); + } + + if (i == gain_extra_bits) { + gain_extra_bits += LEN_EXTRA_BITS_INTERVAL; + extra_bits_count += 1; + } + } + + packed_table[count] = (lit_len_hufftable[LIT_LEN - 1].code << LENGTH_BITS) | + (lit_len_hufftable[LIT_LEN - 1].length); +} + +/** + * @brief Creates a packed representation of distance huffman codes. + * @details In packed_table, bits 32:8 contain the extra bits appended to the huffman + * code and bits 8:0 contain the code length. + * @param packed_table: the output table + * @param length: the length of lit_len_hufftable + * @param dist_hufftable: a distance huffman lookup table + */ +static void create_packed_dist_table(uint32_t * packed_table, uint32_t length, + struct huff_code *dist_hufftable) +{ + int i, count = 0; + uint16_t extra_bits; + uint16_t extra_bits_count = 0; + + /* Gain extra bits is the next place where the number of extra bits in + * distance codes increases. */ + uint16_t gain_extra_bits = DIST_EXTRA_BITS_START; + + for (i = 0; i < DIST_LEN; i++) { + for (extra_bits = 0; extra_bits < (1 << extra_bits_count); extra_bits++) { + if (count >= length) + return; + + packed_table[count++] = + (extra_bits << (dist_hufftable[i].length + LENGTH_BITS)) | + (dist_hufftable[i].code << LENGTH_BITS) | + (dist_hufftable[i].length + extra_bits_count); + + } + + if (i == gain_extra_bits) { + gain_extra_bits += DIST_EXTRA_BITS_INTERVAL; + extra_bits_count += 1; + } + } +} + +/** + * @brief Checks to see if the hufftable is usable by igzip + * + * @param lit_len_hufftable: literal/length huffman code + * @param dist_hufftable: distance huffman code + * @returns Returns 0 if the table is usable + */ +static int are_hufftables_useable(struct huff_code *lit_len_hufftable, + struct huff_code *dist_hufftable) +{ + int max_lit_code_len = 0, max_len_code_len = 0, max_dist_code_len = 0; + int dist_extra_bits = 0, len_extra_bits = 0; + int gain_dist_extra_bits = DIST_EXTRA_BITS_START; + int gain_len_extra_bits = LEN_EXTRA_BITS_START; + int max_code_len; + int i; + + for (i = 0; i < LIT_LEN; i++) + if (lit_len_hufftable[i].length > max_lit_code_len) + max_lit_code_len = lit_len_hufftable[i].length; + + for (i = 257; i < LIT_LEN - 1; i++) { + if (lit_len_hufftable[i].length + len_extra_bits > max_len_code_len) + max_len_code_len = lit_len_hufftable[i].length + len_extra_bits; + + if (i == gain_len_extra_bits) { + gain_len_extra_bits += LEN_EXTRA_BITS_INTERVAL; + len_extra_bits += 1; + } + } + + for (i = 0; i < DIST_LEN; i++) { + if (dist_hufftable[i].length + dist_extra_bits > max_dist_code_len) + max_dist_code_len = dist_hufftable[i].length + dist_extra_bits; + + if (i == gain_dist_extra_bits) { + gain_dist_extra_bits += DIST_EXTRA_BITS_INTERVAL; + dist_extra_bits += 1; + } + } + + max_code_len = max_lit_code_len + max_len_code_len + max_dist_code_len; + + /* Some versions of igzip can write upto one literal, one length and one + * distance code at the same time. This checks to make sure that is + * always writeable in bitbuf*/ + return (max_code_len > MAX_BITBUF_BIT_WRITE); +} + +int isal_create_hufftables(struct isal_hufftables *hufftables, + struct isal_huff_histogram *histogram) +{ + struct huff_code lit_huff_table[LIT_LEN], dist_huff_table[DIST_LEN]; + uint64_t bit_count; + int max_dist = convert_dist_to_dist_sym(IGZIP_HIST_SIZE); + struct heap_tree heap_space; + uint32_t heap_size; + uint32_t code_len_count[MAX_HUFF_TREE_DEPTH + 1]; + struct BitBuf2 header_bitbuf; + uint32_t max_lit_len_sym; + uint32_t max_dist_sym; + uint32_t hlit, hdist, i; + uint16_t combined_table[LIT_LEN + DIST_LEN]; + uint64_t count_histogram[HUFF_LEN]; + struct rl_code rl_huff[LIT_LEN + DIST_LEN]; + uint32_t rl_huff_len; + + uint32_t *dist_table = hufftables->dist_table; + uint32_t *len_table = hufftables->len_table; + uint16_t *lit_table = hufftables->lit_table; + uint16_t *dcodes = hufftables->dcodes; + uint8_t *lit_table_sizes = hufftables->lit_table_sizes; + uint8_t *dcodes_sizes = hufftables->dcodes_sizes; + uint64_t *lit_len_histogram = histogram->lit_len_histogram; + uint64_t *dist_histogram = histogram->dist_histogram; + + memset(hufftables, 0, sizeof(struct isal_hufftables)); + + heap_size = init_heap64_complete(&heap_space, lit_len_histogram, LIT_LEN); + gen_huff_code_lens(&heap_space, heap_size, code_len_count, + (struct huff_code *)lit_huff_table, LIT_LEN, MAX_DEFLATE_CODE_LEN); + max_lit_len_sym = set_huff_codes(lit_huff_table, LIT_LEN, code_len_count); + + heap_size = init_heap64_complete(&heap_space, dist_histogram, DIST_LEN); + gen_huff_code_lens(&heap_space, heap_size, code_len_count, + (struct huff_code *)dist_huff_table, max_dist, + MAX_DEFLATE_CODE_LEN); + max_dist_sym = set_huff_codes(dist_huff_table, DIST_LEN, code_len_count); + + if (are_hufftables_useable(lit_huff_table, dist_huff_table)) { + heap_size = init_heap64_complete(&heap_space, lit_len_histogram, LIT_LEN); + gen_huff_code_lens(&heap_space, heap_size, code_len_count, + (struct huff_code *)lit_huff_table, LIT_LEN, + MAX_SAFE_LIT_CODE_LEN); + max_lit_len_sym = set_huff_codes(lit_huff_table, LIT_LEN, code_len_count); + + heap_size = init_heap64_complete(&heap_space, dist_histogram, DIST_LEN); + gen_huff_code_lens(&heap_space, heap_size, code_len_count, + (struct huff_code *)dist_huff_table, max_dist, + MAX_SAFE_DIST_CODE_LEN); + max_dist_sym = set_huff_codes(dist_huff_table, DIST_LEN, code_len_count); + + } + + create_code_tables(dcodes, dcodes_sizes, DIST_LEN - DCODE_OFFSET, + dist_huff_table + DCODE_OFFSET); + + create_code_tables(lit_table, lit_table_sizes, IGZIP_LIT_TABLE_SIZE, lit_huff_table); + + create_packed_len_table(len_table, lit_huff_table); + create_packed_dist_table(dist_table, IGZIP_DIST_TABLE_SIZE, dist_huff_table); + + set_buf(&header_bitbuf, hufftables->deflate_hdr, sizeof(hufftables->deflate_hdr)); + init(&header_bitbuf); + + hlit = max_lit_len_sym - 256; + hdist = max_dist_sym; + + /* Run length encode the length and distance huffman codes */ + memset(count_histogram, 0, sizeof(count_histogram)); + for (i = 0; i < 257 + hlit; i++) + combined_table[i] = lit_huff_table[i].length; + for (i = 0; i < 1 + hdist; i++) + combined_table[i + hlit + 257] = dist_huff_table[i].length; + rl_huff_len = + rl_encode(combined_table, hlit + 257 + hdist + 1, count_histogram, rl_huff); + + /* Create header */ + bit_count = + create_header(&header_bitbuf, rl_huff, rl_huff_len, + count_histogram, hlit, hdist, LAST_BLOCK); + flush(&header_bitbuf); + + hufftables->deflate_hdr_count = bit_count / 8; + hufftables->deflate_hdr_extra_bits = bit_count % 8; + + return 0; +} + +int isal_create_hufftables_subset(struct isal_hufftables *hufftables, + struct isal_huff_histogram *histogram) +{ + struct huff_code lit_huff_table[LIT_LEN], dist_huff_table[DIST_LEN]; + uint64_t bit_count; + int max_dist = convert_dist_to_dist_sym(IGZIP_HIST_SIZE); + struct heap_tree heap_space; + uint32_t heap_size; + uint32_t code_len_count[MAX_HUFF_TREE_DEPTH + 1]; + struct BitBuf2 header_bitbuf; + uint32_t max_lit_len_sym; + uint32_t max_dist_sym; + uint32_t hlit, hdist, i; + uint16_t combined_table[LIT_LEN + DIST_LEN]; + uint64_t count_histogram[HUFF_LEN]; + struct rl_code rl_huff[LIT_LEN + DIST_LEN]; + uint32_t rl_huff_len; + + uint32_t *dist_table = hufftables->dist_table; + uint32_t *len_table = hufftables->len_table; + uint16_t *lit_table = hufftables->lit_table; + uint16_t *dcodes = hufftables->dcodes; + uint8_t *lit_table_sizes = hufftables->lit_table_sizes; + uint8_t *dcodes_sizes = hufftables->dcodes_sizes; + uint64_t *lit_len_histogram = histogram->lit_len_histogram; + uint64_t *dist_histogram = histogram->dist_histogram; + + memset(hufftables, 0, sizeof(struct isal_hufftables)); + + heap_size = + init_heap64_semi_complete(&heap_space, lit_len_histogram, LIT_LEN, + ISAL_DEF_LIT_SYMBOLS); + gen_huff_code_lens(&heap_space, heap_size, code_len_count, + (struct huff_code *)lit_huff_table, LIT_LEN, MAX_DEFLATE_CODE_LEN); + max_lit_len_sym = set_huff_codes(lit_huff_table, LIT_LEN, code_len_count); + + heap_size = init_heap64_complete(&heap_space, dist_histogram, DIST_LEN); + gen_huff_code_lens(&heap_space, heap_size, code_len_count, + (struct huff_code *)dist_huff_table, max_dist, + MAX_DEFLATE_CODE_LEN); + max_dist_sym = set_huff_codes(dist_huff_table, DIST_LEN, code_len_count); + + if (are_hufftables_useable(lit_huff_table, dist_huff_table)) { + heap_size = init_heap64_complete(&heap_space, lit_len_histogram, LIT_LEN); + gen_huff_code_lens(&heap_space, heap_size, code_len_count, + (struct huff_code *)lit_huff_table, LIT_LEN, + MAX_SAFE_LIT_CODE_LEN); + max_lit_len_sym = set_huff_codes(lit_huff_table, LIT_LEN, code_len_count); + + heap_size = init_heap64_complete(&heap_space, dist_histogram, DIST_LEN); + gen_huff_code_lens(&heap_space, heap_size, code_len_count, + (struct huff_code *)dist_huff_table, max_dist, + MAX_SAFE_DIST_CODE_LEN); + max_dist_sym = set_huff_codes(dist_huff_table, DIST_LEN, code_len_count); + + } + + create_code_tables(dcodes, dcodes_sizes, DIST_LEN - DCODE_OFFSET, + dist_huff_table + DCODE_OFFSET); + + create_code_tables(lit_table, lit_table_sizes, IGZIP_LIT_TABLE_SIZE, lit_huff_table); + + create_packed_len_table(len_table, lit_huff_table); + create_packed_dist_table(dist_table, IGZIP_DIST_TABLE_SIZE, dist_huff_table); + + set_buf(&header_bitbuf, hufftables->deflate_hdr, sizeof(hufftables->deflate_hdr)); + init(&header_bitbuf); + + hlit = max_lit_len_sym - 256; + hdist = max_dist_sym; + + /* Run length encode the length and distance huffman codes */ + memset(count_histogram, 0, sizeof(count_histogram)); + for (i = 0; i < 257 + hlit; i++) + combined_table[i] = lit_huff_table[i].length; + for (i = 0; i < 1 + hdist; i++) + combined_table[i + hlit + 257] = dist_huff_table[i].length; + rl_huff_len = + rl_encode(combined_table, hlit + 257 + hdist + 1, count_histogram, rl_huff); + + /* Create header */ + bit_count = + create_header(&header_bitbuf, rl_huff, rl_huff_len, + count_histogram, hlit, hdist, LAST_BLOCK); + flush(&header_bitbuf); + + hufftables->deflate_hdr_count = bit_count / 8; + hufftables->deflate_hdr_extra_bits = bit_count % 8; + + return 0; +} + +static void expand_hufftables_icf(struct hufftables_icf *hufftables) +{ + uint32_t i, eb, j, k, len, code; + struct huff_code orig[21], *p_code; + struct huff_code *lit_len_codes = hufftables->lit_len_table; + struct huff_code *dist_codes = hufftables->dist_table; + + for (i = 0; i < 21; i++) + orig[i] = lit_len_codes[i + 265]; + + p_code = &lit_len_codes[265]; + + i = 0; + for (eb = 1; eb < 6; eb++) { + for (k = 0; k < 4; k++) { + len = orig[i].length; + code = orig[i++].code; + for (j = 0; j < (1u << eb); j++) { + p_code->code_and_extra = code | (j << len); + p_code->length = len + eb; + p_code++; + } + } // end for k + } // end for eb + // fix up last record + p_code[-1] = orig[i]; + + dist_codes[DIST_LEN].code_and_extra = 0; + dist_codes[DIST_LEN].length = 0; +} + +uint64_t +create_hufftables_icf(struct BitBuf2 *bb, struct hufftables_icf *hufftables, + struct isal_mod_hist *hist, uint32_t end_of_block) +{ + uint32_t bl_count[MAX_DEFLATE_CODE_LEN + 1]; + uint32_t max_ll_code, max_d_code; + struct heap_tree heap_space; + uint32_t heap_size; + struct rl_code cl_tokens[LIT_LEN + DIST_LEN]; + uint32_t num_cl_tokens; + uint64_t cl_counts[CODE_LEN_CODES]; + uint16_t combined_table[LIT_LEN + DIST_LEN]; + int i; + uint64_t compressed_len = 0; + uint64_t static_compressed_len = 3; /* The static header size */ + struct BitBuf2 bb_tmp; + + struct huff_code *ll_codes = hufftables->lit_len_table; + struct huff_code *d_codes = hufftables->dist_table; + uint32_t *ll_hist = hist->ll_hist; + uint32_t *d_hist = hist->d_hist; + struct huff_code *static_ll_codes = static_hufftables.lit_len_table; + struct huff_code *static_d_codes = static_hufftables.dist_table; + + memcpy(&bb_tmp, bb, sizeof(struct BitBuf2)); + + flatten_ll(hist->ll_hist); + + // make sure EOB is present + if (ll_hist[256] == 0) + ll_hist[256] = 1; + + heap_size = init_heap32(&heap_space, ll_hist, LIT_LEN); + gen_huff_code_lens(&heap_space, heap_size, bl_count, + ll_codes, LIT_LEN, MAX_DEFLATE_CODE_LEN); + max_ll_code = set_huff_codes(ll_codes, LIT_LEN, bl_count); + + heap_size = init_heap32(&heap_space, d_hist, DIST_LEN); + gen_huff_code_lens(&heap_space, heap_size, bl_count, d_codes, + DIST_LEN, MAX_DEFLATE_CODE_LEN); + max_d_code = set_dist_huff_codes(d_codes, bl_count); + + assert(max_ll_code >= 256); // must be EOB code + assert(max_d_code != 0); + + /* Run length encode the length and distance huffman codes */ + memset(cl_counts, 0, sizeof(cl_counts)); + + for (i = 0; i <= 256; i++) { + combined_table[i] = ll_codes[i].length; + compressed_len += ll_codes[i].length * ll_hist[i]; + static_compressed_len += static_ll_codes[i].length * ll_hist[i]; + } + + for (; i < max_ll_code + 1; i++) { + combined_table[i] = ll_codes[i].length; + compressed_len += + (ll_codes[i].length + len_code_extra_bits[i - 257]) * ll_hist[i]; + static_compressed_len += + (static_ll_codes[i].length + len_code_extra_bits[i - 257]) * ll_hist[i]; + } + + for (i = 0; i < max_d_code + 1; i++) { + combined_table[i + max_ll_code + 1] = d_codes[i].length; + compressed_len += (d_codes[i].length + dist_code_extra_bits[i]) * d_hist[i]; + static_compressed_len += + (static_d_codes[i].length + dist_code_extra_bits[i]) * d_hist[i]; + } + + if (static_compressed_len > compressed_len) { + num_cl_tokens = rl_encode(combined_table, max_ll_code + max_d_code + 2, + cl_counts, cl_tokens); + + /* Create header */ + create_header(bb, cl_tokens, num_cl_tokens, cl_counts, max_ll_code - 256, + max_d_code, end_of_block); + compressed_len += 8 * buffer_used(bb) + bb->m_bit_count; + } + + /* Substitute in static block since it creates smaller block */ + if (static_compressed_len <= compressed_len) { + memcpy(hufftables, &static_hufftables, sizeof(struct hufftables_icf)); + memcpy(bb, &bb_tmp, sizeof(struct BitBuf2)); + end_of_block = end_of_block ? 1 : 0; + write_bits(bb, 0x2 | end_of_block, 3); + compressed_len = static_compressed_len; + } + + expand_hufftables_icf(hufftables); + return compressed_len; +} diff --git a/src/spdk/isa-l/igzip/huff_codes.h b/src/spdk/isa-l/igzip/huff_codes.h new file mode 100644 index 000000000..d353d27ea --- /dev/null +++ b/src/spdk/isa-l/igzip/huff_codes.h @@ -0,0 +1,170 @@ +/********************************************************************** + Copyright(c) 2011-2016 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#ifndef HUFF_CODES_H +#define HUFF_CODES_H + +#include +#include +#include +#include "igzip_lib.h" +#include "bitbuf2.h" + +#if __x86_64__ || __i386__ || _M_X64 || _M_IX86 +# include +#ifdef _MSC_VER +# include +#else +# include +#endif +#endif //__x86_64__ || __i386__ || _M_X64 || _M_IX86 + +#define LIT_LEN ISAL_DEF_LIT_LEN_SYMBOLS +#define DIST_LEN ISAL_DEF_DIST_SYMBOLS +#define CODE_LEN_CODES 19 +#define HUFF_LEN 19 +#ifdef LONGER_HUFFTABLE +# define DCODE_OFFSET 26 +#else +# define DCODE_OFFSET 0 +#endif +#define DYN_HDR_START_LEN 17 +#define MAX_HISTHEAP_SIZE LIT_LEN +#define MAX_HUFF_TREE_DEPTH 15 +#define D IGZIP_HIST_SIZE /* Amount of history */ + +#define MAX_DEFLATE_CODE_LEN 15 +#define MAX_SAFE_LIT_CODE_LEN 13 +#define MAX_SAFE_DIST_CODE_LEN 12 + +#define LONG_DIST_TABLE_SIZE 8192 +#define SHORT_DIST_TABLE_SIZE 2 +#define LEN_TABLE_SIZE 256 +#define LIT_TABLE_SIZE 257 +#define LAST_BLOCK 1 + +#define LEN_EXTRA_BITS_START 264 +#define LEN_EXTRA_BITS_INTERVAL 4 +#define DIST_EXTRA_BITS_START 3 +#define DIST_EXTRA_BITS_INTERVAL 2 + +#define INVALID_LIT_LEN_HUFFCODE 1 +#define INVALID_DIST_HUFFCODE 1 +#define INVALID_HUFFCODE 1 + +#define HASH8K_HASH_MASK (IGZIP_HASH8K_HASH_SIZE - 1) +#define HASH_HIST_HASH_MASK (IGZIP_HASH_HIST_SIZE - 1) +#define HASH_MAP_HASH_MASK (IGZIP_HASH_MAP_HASH_SIZE - 1) + +#define LVL0_HASH_MASK (IGZIP_LVL0_HASH_SIZE - 1) +#define LVL1_HASH_MASK (IGZIP_LVL1_HASH_SIZE - 1) +#define LVL2_HASH_MASK (IGZIP_LVL2_HASH_SIZE - 1) +#define LVL3_HASH_MASK (IGZIP_LVL3_HASH_SIZE - 1) +#define SHORTEST_MATCH 4 + +#define LENGTH_BITS 5 +#define FREQ_SHIFT 16 +#define FREQ_MASK_HI (0xFFFFFFFFFFFF0000) +#define DEPTH_SHIFT 24 +#define DEPTH_MASK 0x7F +#define DEPTH_MASK_HI (DEPTH_MASK << DEPTH_SHIFT) +#define DEPTH_1 (1 << DEPTH_SHIFT) +#define HEAP_TREE_SIZE (3*MAX_HISTHEAP_SIZE + 1) +#define HEAP_TREE_NODE_START (HEAP_TREE_SIZE-1) +#define MAX_BL_CODE_LEN 7 + +/** + * @brief Structure used to store huffman codes + */ +struct huff_code { + union { + struct { + uint32_t code_and_extra:24; + uint32_t length2:8; + }; + + struct { + uint16_t code; + uint8_t extra_bit_count; + uint8_t length; + }; + + uint32_t code_and_length; + }; +}; + +struct tree_node { + uint32_t child; + uint32_t depth; +}; + +struct heap_tree { + union { + uint64_t heap[HEAP_TREE_SIZE]; + uint64_t code_len_count[MAX_HUFF_TREE_DEPTH + 1]; + struct tree_node tree[HEAP_TREE_SIZE]; + }; +}; + +struct rl_code { + uint8_t code; + uint8_t extra_bits; +}; + +struct hufftables_icf { + union { + struct { + struct huff_code dist_lit_table[288]; + struct huff_code len_table[256]; + }; + + struct { + struct huff_code dist_table[31]; + struct huff_code lit_len_table[513]; + }; + }; +}; + +/** + * @brief Creates a representation of the huffman code from a histogram used to + * decompress the intermediate compression format. + * + * @param bb: bitbuf structure where the header huffman code header is written + * @param hufftables: output huffman code representation + * @param hist: histogram used to generat huffman code + * @param end_of_block: flag whether this is the final huffman code + * + * @returns Returns the length in bits of the block with histogram hist encoded + * with the set hufftable + */ +uint64_t +create_hufftables_icf(struct BitBuf2 *bb, struct hufftables_icf * hufftables, + struct isal_mod_hist *hist, uint32_t end_of_block); + +#endif diff --git a/src/spdk/isa-l/igzip/huffman.asm b/src/spdk/isa-l/igzip/huffman.asm new file mode 100644 index 000000000..9056b5ee4 --- /dev/null +++ b/src/spdk/isa-l/igzip/huffman.asm @@ -0,0 +1,249 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2016 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +%include "options.asm" +%include "lz0a_const.asm" +%include "stdmac.asm" + +; Macros for doing Huffman Encoding + +%ifdef LONGER_HUFFTABLE + %if (D > 8192) + %error History D is larger than 8K, cannot use %LONGER_HUFFTABLE + % error + %else + %define DIST_TABLE_SIZE 8192 + %define DECODE_OFFSET 26 + %endif +%else + %define DIST_TABLE_SIZE 2 + %define DECODE_OFFSET 0 +%endif + +%define LEN_TABLE_SIZE 256 +%define LIT_TABLE_SIZE 257 + +%define DIST_TABLE_START (ISAL_DEF_MAX_HDR_SIZE + 8) +%define DIST_TABLE_OFFSET (DIST_TABLE_START + - 4 * 1) +%define LEN_TABLE_OFFSET (DIST_TABLE_START + DIST_TABLE_SIZE * 4 - 4*3) +%define LIT_TABLE_OFFSET (DIST_TABLE_START + 4 * DIST_TABLE_SIZE + 4 * LEN_TABLE_SIZE) +%define LIT_TABLE_SIZES_OFFSET (LIT_TABLE_OFFSET + 2 * LIT_TABLE_SIZE) +%define DCODE_TABLE_OFFSET (LIT_TABLE_SIZES_OFFSET + LIT_TABLE_SIZE + 1 - DECODE_OFFSET * 2) +%define DCODE_TABLE_SIZE_OFFSET (DCODE_TABLE_OFFSET + 2 * 30 - DECODE_OFFSET) +;; /** @brief Holds the huffman tree used to huffman encode the input stream **/ +;; struct isal_hufftables { +;; // deflate huffman tree header +;; uint8_t deflate_huff_hdr[ISAL_DEF_MAX_HDR_SIZE]; +;; +;; //!< Number of whole bytes in deflate_huff_hdr +;; uint32_t deflate_huff_hdr_count; +;; +;; //!< Number of bits in the partial byte in header +;; uint32_t deflate_huff_hdr_extra_bits; +;; +;; //!< bits 7:0 are the code length, bits 31:8 are the code +;; uint32_t dist_table[DIST_TABLE_SIZE]; +;; +;; //!< bits 7:0 are the code length, bits 31:8 are the code +;; uint32_t len_table[LEN_TABLE_SIZE]; +;; +;; //!< bits 3:0 are the code length, bits 15:4 are the code +;; uint16_t lit_table[LIT_TABLE_SIZE]; +;; +;; //!< bits 3:0 are the code length, bits 15:4 are the code +;; uint16_t dcodes[30 - DECODE_OFFSET]; + +;; }; + + +%ifdef LONGER_HUFFTABLE +; Uses RCX, clobbers dist +; get_dist_code dist, code, len +%macro get_dist_code 4 +%define %%dist %1 ; 64-bit IN +%define %%code %2d ; 32-bit OUT +%define %%len %3d ; 32-bit OUT +%define %%hufftables %4 ; address of the hufftable + + mov %%len, [%%hufftables + DIST_TABLE_OFFSET + 4*(%%dist + 1) ] + mov %%code, %%len + and %%len, 0x1F; + shr %%code, 5 +%endm + +%macro get_packed_dist_code 3 +%define %%dist %1 ; 64-bit IN +%define %%code_len %2d ; 32-bit OUT +%define %%hufftables %3 ; address of the hufftable + mov %%code_len, [%%hufftables + DIST_TABLE_OFFSET + 4*%%dist ] +%endm + +%macro unpack_dist_code 2 +%define %%code %1d ; 32-bit OUT +%define %%len %2d ; 32-bit OUT + + mov %%len, %%code + and %%len, 0x1F; + shr %%code, 5 +%endm + +%else +; Assumes (dist != 0) +; Uses RCX, clobbers dist +; void compute_dist_code dist, code, len +%macro compute_dist_code 4 +%define %%dist %1 ; IN, clobbered +%define %%distq %1 +%define %%code %2 ; OUT +%define %%len %3 ; OUT +%define %%hufftables %4 + + bsr rcx, %%dist ; ecx = msb = bsr(dist) + dec rcx ; ecx = num_extra_bits = msb - N + BZHI %%code, %%dist, rcx, %%len + SHRX %%dist, %%dist, rcx ; dist >>= num_extra_bits + lea %%dist, [%%dist + 2*rcx] ; dist = sym = dist + num_extra_bits*2 + mov %%len, rcx ; len = num_extra_bits + movzx rcx, byte [hufftables + DCODE_TABLE_SIZE_OFFSET + %%distq WRT_OPT] + movzx %%dist, word [hufftables + DCODE_TABLE_OFFSET + 2 * %%distq WRT_OPT] + SHLX %%code, %%code, rcx ; code = extra_bits << (sym & 0xF) + or %%code, %%dist ; code = (sym >> 4) | (extra_bits << (sym & 0xF)) + add %%len, rcx ; len = num_extra_bits + (sym & 0xF) +%endm + +; Uses RCX, clobbers dist +; get_dist_code dist, code, len +%macro get_dist_code 4 +%define %%dist %1 ; 32-bit IN, clobbered +%define %%distq %1 ; 64-bit IN, clobbered +%define %%code %2 ; 32-bit OUT +%define %%len %3 ; 32-bit OUT +%define %%hufftables %4 + + cmp %%dist, DIST_TABLE_SIZE - 1 + jg %%do_compute +%ifndef IACA + mov %%len %+ d, dword [hufftables + DIST_TABLE_OFFSET + 4*(%%distq + 1) WRT_OPT] + mov %%code, %%len + and %%len, 0x1F; + shr %%code, 5 + jmp %%done +%endif +%%do_compute: + compute_dist_code %%distq, %%code, %%len, %%hufftables +%%done: +%endm + +%macro get_packed_dist_code 3 +%define %%dist %1 ; 64-bit IN +%define %%code_len %2d ; 32-bit OUT +%define %%hufftables %3 ; address of the hufftable +%endm + +%endif + + +; Macros for doing Huffman Encoding + +; Assumes (dist != 0) +; Uses RCX, clobbers dist +; void compute_dist_code dist, code, len +%macro compute_dist_icf_code 3 +%define %%dist %1 ; IN, clobbered +%define %%distq %1 +%define %%code %2 ; OUT +%define %%tmp1 %3 + + bsr rcx, %%dist ; ecx = msb = bsr(dist) + dec rcx ; ecx = num_extra_bits = msb - N + BZHI %%code, %%dist, rcx, %%tmp1 + SHRX %%dist, %%dist, rcx ; dist >>= num_extra_bits + lea %%dist, [%%dist + 2*rcx] ; code = sym = dist + num_extra_bits*2 + shl %%code, EXTRA_BITS_OFFSET - DIST_OFFSET + add %%code, %%dist ; code = extra_bits | sym + +%endm + +; Uses RCX, clobbers dist +; get_dist_code dist, code, len +%macro get_dist_icf_code 3 +%define %%dist %1 ; 32-bit IN, clobbered +%define %%distq %1 ; 64-bit IN, clobbered +%define %%code %2 ; 32-bit OUT +%define %%tmp1 %3 + + cmp %%dist, 1 + jg %%do_compute + +%ifnidn %%code, %%dist + mov %%code, %%dist +%endif + jmp %%done +%%do_compute: + compute_dist_icf_code %%distq, %%code, %%tmp1 +%%done: + shl %%code, DIST_OFFSET +%endm + + +; "len" can be same register as "length" +; get_len_code length, code, len +%macro get_len_code 4 +%define %%length %1 ; 64-bit IN +%define %%code %2d ; 32-bit OUT +%define %%len %3d ; 32-bit OUT +%define %%hufftables %4 + + mov %%len, [%%hufftables + LEN_TABLE_OFFSET + 4 * %%length] + mov %%code, %%len + and %%len, 0x1F + shr %%code, 5 +%endm + + +%macro get_lit_code 4 +%define %%lit %1 ; 64-bit IN or CONST +%define %%code %2d ; 32-bit OUT +%define %%len %3d ; 32-bit OUT +%define %%hufftables %4 + + movzx %%len, byte [%%hufftables + LIT_TABLE_SIZES_OFFSET + %%lit] + movzx %%code, word [%%hufftables + LIT_TABLE_OFFSET + 2 * %%lit] + +%endm + + +;; Compute hash of first 3 bytes of data +%macro compute_hash 2 +%define %%result %1d ; 32-bit reg +%define %%data %2d ; 32-bit reg (low byte not clobbered) + + xor %%result, %%result + crc32 %%result, %%data +%endm diff --git a/src/spdk/isa-l/igzip/huffman.h b/src/spdk/isa-l/igzip/huffman.h new file mode 100644 index 000000000..2b44b617b --- /dev/null +++ b/src/spdk/isa-l/igzip/huffman.h @@ -0,0 +1,359 @@ +/********************************************************************** + Copyright(c) 2011-2016 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#include +#include +#include +#include "igzip_lib.h" +#include "unaligned.h" + +#if __x86_64__ || __i386__ || _M_X64 || _M_IX86 +#ifdef _MSC_VER +# include +# define inline __inline +#else +# include +#endif +#else +# define inline __inline +#endif //__x86_64__ || __i386__ || _M_X64 || _M_IX86 + +/** + * @brief Calculate the bit offset of the msb. + * @param val 32-bit unsigned integer input + * + * @returns bit offset of msb starting at 1 for first bit + */ +static inline uint32_t bsr(uint32_t val) +{ + uint32_t msb; +#if defined(_MSC_VER) + unsigned long ret = 0; + if (val != 0) { + _BitScanReverse(&ret, val); + msb = ret + 1; + } + else + msb = 0; +#elif defined( __LZCNT__) + msb = 32 - __lzcnt32(val); +#elif defined(__x86_64__) || defined(__aarch64__) + msb = (val == 0)? 0 : 32 - __builtin_clz(val); +#else + for(msb = 0; val > 0; val >>= 1) + msb++; +#endif + return msb; +} + +static inline uint32_t tzbytecnt(uint64_t val) +{ + uint32_t cnt; + +#ifdef __BMI__ + cnt = __tzcnt_u64(val); + cnt = cnt / 8; +#elif defined(__x86_64__) || defined(__aarch64__) + + cnt = (val == 0)? 64 : __builtin_ctzll(val); + cnt = cnt / 8; + +#else + for(cnt = 8; val > 0; val <<= 8) + cnt -= 1; +#endif + return cnt; +} + +static void compute_dist_code(struct isal_hufftables *hufftables, uint16_t dist, uint64_t *p_code, uint64_t *p_len) +{ + assert(dist > IGZIP_DIST_TABLE_SIZE); + + dist -= 1; + uint32_t msb; + uint32_t num_extra_bits; + uint32_t extra_bits; + uint32_t sym; + uint32_t len; + uint32_t code; + + msb = bsr(dist); + assert(msb >= 1); + num_extra_bits = msb - 2; + extra_bits = dist & ((1 << num_extra_bits) - 1); + dist >>= num_extra_bits; + sym = dist + 2 * num_extra_bits; + assert(sym < 30); + code = hufftables->dcodes[sym - IGZIP_DECODE_OFFSET]; + len = hufftables->dcodes_sizes[sym - IGZIP_DECODE_OFFSET]; + *p_code = code | (extra_bits << len); + *p_len = len + num_extra_bits; +} + +static inline void get_dist_code(struct isal_hufftables *hufftables, uint32_t dist, uint64_t *code, uint64_t *len) +{ + if (dist < 1) + dist = 0; + assert(dist >= 1); + assert(dist <= 32768); + if (dist <= IGZIP_DIST_TABLE_SIZE) { + uint64_t code_len; + code_len = hufftables->dist_table[dist - 1]; + *code = code_len >> 5; + *len = code_len & 0x1F; + } else { + compute_dist_code(hufftables, dist, code, len); + } +} + +static inline void get_len_code(struct isal_hufftables *hufftables, uint32_t length, uint64_t *code, uint64_t *len) +{ + assert(length >= 3); + assert(length <= 258); + + uint64_t code_len; + code_len = hufftables->len_table[length - 3]; + *code = code_len >> 5; + *len = code_len & 0x1F; +} + +static inline void get_lit_code(struct isal_hufftables *hufftables, uint32_t lit, uint64_t *code, uint64_t *len) +{ + assert(lit <= 256); + + *code = hufftables->lit_table[lit]; + *len = hufftables->lit_table_sizes[lit]; +} + +static void compute_dist_icf_code(uint32_t dist, uint32_t *code, uint32_t *extra_bits) +{ + uint32_t msb; + uint32_t num_extra_bits; + + dist -= 1; + msb = bsr(dist); + assert(msb >= 1); + num_extra_bits = msb - 2; + *extra_bits = dist & ((1 << num_extra_bits) - 1); + dist >>= num_extra_bits; + *code = dist + 2 * num_extra_bits; + assert(*code < 30); +} + +static inline void get_dist_icf_code(uint32_t dist, uint32_t *code, uint32_t *extra_bits) +{ + assert(dist >= 1); + assert(dist <= 32768); + if (dist <= 2) { + *code = dist - 1; + *extra_bits = 0; + } else { + compute_dist_icf_code(dist, code, extra_bits); + } +} + +static inline void get_len_icf_code(uint32_t length, uint32_t *code) +{ + assert(length >= 3); + assert(length <= 258); + + *code = length + 254; +} + +static inline void get_lit_icf_code(uint32_t lit, uint32_t *code) +{ + assert(lit <= 256); + + *code = lit; +} + +/** + * @brief Returns a hash of the first 3 bytes of input data. + */ +static inline uint32_t compute_hash(uint32_t data) +{ +#ifdef __SSE4_2__ + + return _mm_crc32_u32(0, data); + +#else + uint64_t hash; + /* Use multiplication to create a hash, 0xBDD06057 is a prime number */ + hash = data; + hash *= 0xB2D06057; + hash >>= 16; + hash *= 0xB2D06057; + hash >>= 16; + + return hash; + +#endif /* __SSE4_2__ */ +} + +#define PROD1 0xFFFFE84B +#define PROD2 0xFFFF97B1 +static inline uint32_t compute_hash_mad(uint32_t data) +{ + int16_t data_low; + int16_t data_high; + + data_low = data; + data_high = data >> 16; + data = PROD1 * data_low + PROD2 * data_high; + + data_low = data; + data_high = data >> 16; + data = PROD1 * data_low + PROD2 * data_high; + + return data; +} + +static inline uint32_t compute_long_hash(uint64_t data) { + + return compute_hash(data >> 32)^compute_hash(data); +} + +/** + * @brief Returns how long str1 and str2 have the same symbols. + * @param str1: First input string. + * @param str2: Second input string. + * @param max_length: length of the smaller string. + */ +static inline int compare258(uint8_t * str1, uint8_t * str2, uint32_t max_length) +{ + uint32_t count; + uint64_t test; + uint64_t loop_length; + + if(max_length > 258) + max_length = 258; + + loop_length = max_length & ~0x7; + + for(count = 0; count < loop_length; count += 8){ + test = load_u64(str1); + test ^= load_u64(str2); + if(test != 0) + return count + tzbytecnt(test); + str1 += 8; + str2 += 8; + } + + switch(max_length % 8){ + + case 7: + if(*str1++ != *str2++) + return count; + count++; + case 6: + if(*str1++ != *str2++) + return count; + count++; + case 5: + if(*str1++ != *str2++) + return count; + count++; + case 4: + if(*str1++ != *str2++) + return count; + count++; + case 3: + if(*str1++ != *str2++) + return count; + count++; + case 2: + if(*str1++ != *str2++) + return count; + count++; + case 1: + if(*str1 != *str2) + return count; + count++; + } + + return count; +} + +/** + * @brief Returns how long str1 and str2 have the same symbols. + * @param str1: First input string. + * @param str2: Second input string. + * @param max_length: length of the smaller string. + */ +static inline int compare(uint8_t * str1, uint8_t * str2, uint32_t max_length) +{ + uint32_t count; + uint64_t test; + uint64_t loop_length; + + loop_length = max_length & ~0x7; + + for(count = 0; count < loop_length; count += 8){ + test = load_u64(str1); + test ^= load_u64(str2); + if(test != 0) + return count + tzbytecnt(test); + str1 += 8; + str2 += 8; + } + + switch(max_length % 8){ + + case 7: + if(*str1++ != *str2++) + return count; + count++; + case 6: + if(*str1++ != *str2++) + return count; + count++; + case 5: + if(*str1++ != *str2++) + return count; + count++; + case 4: + if(*str1++ != *str2++) + return count; + count++; + case 3: + if(*str1++ != *str2++) + return count; + count++; + case 2: + if(*str1++ != *str2++) + return count; + count++; + case 1: + if(*str1 != *str2) + return count; + count++; + } + + return count; +} diff --git a/src/spdk/isa-l/igzip/hufftables_c.c b/src/spdk/isa-l/igzip/hufftables_c.c new file mode 100644 index 000000000..281f3e940 --- /dev/null +++ b/src/spdk/isa-l/igzip/hufftables_c.c @@ -0,0 +1,6742 @@ +/********************************************************************** + Copyright(c) 2011-2016 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ +#include +#include + +#if (IGZIP_HIST_SIZE <= 8192) + +const uint8_t gzip_hdr[] = { + 0x1f, 0x8b, 0x08, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xff +}; + +const uint32_t gzip_hdr_bytes = 10; +const uint32_t gzip_trl_bytes = 8; + +const uint8_t zlib_hdr[] = { 0x78, 0x01 }; + +const uint32_t zlib_hdr_bytes = 2; +const uint32_t zlib_trl_bytes = 4; + +struct isal_hufftables hufftables_default = { + + .deflate_hdr = { + 0xed, 0xf9, 0x09, 0x60, 0x54, 0xd5, 0xf9, 0x37, + 0x00, 0x9f, 0x90, 0x04, 0xc8, 0x40, 0x00, 0x77, + 0xdb, 0x5a, 0x38, 0x22, 0x4a, 0xd0, 0xc9, 0x98, + 0x15, 0x02, 0x20, 0x24, 0x09, 0x5b, 0x10, 0x20, + 0x12, 0x10, 0x77, 0x39, 0x33, 0xf7, 0xcc, 0xcc, + 0x25, 0x77, 0xee, 0x1d, 0xef, 0xbd, 0x37, 0xc3, + 0x50, 0x55, 0x5a, 0x6d, 0xb5, 0xb5, 0xad, 0x76, + 0xdf, 0x5b, 0xdb, 0x5a, 0x6b, 0x77, 0xdb, 0xda, + 0xbd, 0x56, 0x84, 0xb6, 0xda, 0x55, 0xbb, 0xef, + 0x2d, 0x56, 0x5b, 0xed, 0x2a, 0x56, 0xdb, 0x62, + 0x8b, 0xe4, 0xfb, 0x7e, 0xcf, 0x39, 0x77, 0xe6, + 0x24, 0x09, 0xae, 0xfd, 0xbf, 0xef, 0xff, 0xfd, + 0xbe, 0x22, 0x92, 0xdc, 0x7b, 0xcf, 0x79, 0xce, + 0xb3, 0x9f, 0xdf, 0xf3, 0x3c}, + + .deflate_hdr_count = 109, + .deflate_hdr_extra_bits = 0, + + .dist_table = { + 0x00000fe9, 0x00003fea, +#ifdef LONGER_HUFFTABLE + 0x00002fe9, 0x00007fea, 0x00001fea, 0x00005fea, + 0x000007e9, 0x000027e9, 0x000017ea, 0x000037ea, + 0x000057ea, 0x000077ea, 0x000001e8, 0x000009e8, + 0x000011e8, 0x000019e8, 0x000005e9, 0x00000de9, + 0x000015e9, 0x00001de9, 0x000025e9, 0x00002de9, + 0x000035e9, 0x00003de9, 0x000003e9, 0x00000be9, + 0x000013e9, 0x00001be9, 0x000023e9, 0x00002be9, + 0x000033e9, 0x00003be9, 0x00000169, 0x00000569, + 0x00000969, 0x00000d69, 0x00001169, 0x00001569, + 0x00001969, 0x00001d69, 0x00002169, 0x00002569, + 0x00002969, 0x00002d69, 0x00003169, 0x00003569, + 0x00003969, 0x00003d69, 0x00000369, 0x00000769, + 0x00000b69, 0x00000f69, 0x00001369, 0x00001769, + 0x00001b69, 0x00001f69, 0x00002369, 0x00002769, + 0x00002b69, 0x00002f69, 0x00003369, 0x00003769, + 0x00003b69, 0x00003f69, 0x00000089, 0x00000289, + 0x00000489, 0x00000689, 0x00000889, 0x00000a89, + 0x00000c89, 0x00000e89, 0x00001089, 0x00001289, + 0x00001489, 0x00001689, 0x00001889, 0x00001a89, + 0x00001c89, 0x00001e89, 0x00002089, 0x00002289, + 0x00002489, 0x00002689, 0x00002889, 0x00002a89, + 0x00002c89, 0x00002e89, 0x00003089, 0x00003289, + 0x00003489, 0x00003689, 0x00003889, 0x00003a89, + 0x00003c89, 0x00003e89, 0x000000ea, 0x000004ea, + 0x000008ea, 0x00000cea, 0x000010ea, 0x000014ea, + 0x000018ea, 0x00001cea, 0x000020ea, 0x000024ea, + 0x000028ea, 0x00002cea, 0x000030ea, 0x000034ea, + 0x000038ea, 0x00003cea, 0x000040ea, 0x000044ea, + 0x000048ea, 0x00004cea, 0x000050ea, 0x000054ea, + 0x000058ea, 0x00005cea, 0x000060ea, 0x000064ea, + 0x000068ea, 0x00006cea, 0x000070ea, 0x000074ea, + 0x000078ea, 0x00007cea, 0x0000018a, 0x0000038a, + 0x0000058a, 0x0000078a, 0x0000098a, 0x00000b8a, + 0x00000d8a, 0x00000f8a, 0x0000118a, 0x0000138a, + 0x0000158a, 0x0000178a, 0x0000198a, 0x00001b8a, + 0x00001d8a, 0x00001f8a, 0x0000218a, 0x0000238a, + 0x0000258a, 0x0000278a, 0x0000298a, 0x00002b8a, + 0x00002d8a, 0x00002f8a, 0x0000318a, 0x0000338a, + 0x0000358a, 0x0000378a, 0x0000398a, 0x00003b8a, + 0x00003d8a, 0x00003f8a, 0x0000418a, 0x0000438a, + 0x0000458a, 0x0000478a, 0x0000498a, 0x00004b8a, + 0x00004d8a, 0x00004f8a, 0x0000518a, 0x0000538a, + 0x0000558a, 0x0000578a, 0x0000598a, 0x00005b8a, + 0x00005d8a, 0x00005f8a, 0x0000618a, 0x0000638a, + 0x0000658a, 0x0000678a, 0x0000698a, 0x00006b8a, + 0x00006d8a, 0x00006f8a, 0x0000718a, 0x0000738a, + 0x0000758a, 0x0000778a, 0x0000798a, 0x00007b8a, + 0x00007d8a, 0x00007f8a, 0x0000004a, 0x0000024a, + 0x0000044a, 0x0000064a, 0x0000084a, 0x00000a4a, + 0x00000c4a, 0x00000e4a, 0x0000104a, 0x0000124a, + 0x0000144a, 0x0000164a, 0x0000184a, 0x00001a4a, + 0x00001c4a, 0x00001e4a, 0x0000204a, 0x0000224a, + 0x0000244a, 0x0000264a, 0x0000284a, 0x00002a4a, + 0x00002c4a, 0x00002e4a, 0x0000304a, 0x0000324a, + 0x0000344a, 0x0000364a, 0x0000384a, 0x00003a4a, + 0x00003c4a, 0x00003e4a, 0x0000404a, 0x0000424a, + 0x0000444a, 0x0000464a, 0x0000484a, 0x00004a4a, + 0x00004c4a, 0x00004e4a, 0x0000504a, 0x0000524a, + 0x0000544a, 0x0000564a, 0x0000584a, 0x00005a4a, + 0x00005c4a, 0x00005e4a, 0x0000604a, 0x0000624a, + 0x0000644a, 0x0000664a, 0x0000684a, 0x00006a4a, + 0x00006c4a, 0x00006e4a, 0x0000704a, 0x0000724a, + 0x0000744a, 0x0000764a, 0x0000784a, 0x00007a4a, + 0x00007c4a, 0x00007e4a, 0x0000014b, 0x0000034b, + 0x0000054b, 0x0000074b, 0x0000094b, 0x00000b4b, + 0x00000d4b, 0x00000f4b, 0x0000114b, 0x0000134b, + 0x0000154b, 0x0000174b, 0x0000194b, 0x00001b4b, + 0x00001d4b, 0x00001f4b, 0x0000214b, 0x0000234b, + 0x0000254b, 0x0000274b, 0x0000294b, 0x00002b4b, + 0x00002d4b, 0x00002f4b, 0x0000314b, 0x0000334b, + 0x0000354b, 0x0000374b, 0x0000394b, 0x00003b4b, + 0x00003d4b, 0x00003f4b, 0x0000414b, 0x0000434b, + 0x0000454b, 0x0000474b, 0x0000494b, 0x00004b4b, + 0x00004d4b, 0x00004f4b, 0x0000514b, 0x0000534b, + 0x0000554b, 0x0000574b, 0x0000594b, 0x00005b4b, + 0x00005d4b, 0x00005f4b, 0x0000614b, 0x0000634b, + 0x0000654b, 0x0000674b, 0x0000694b, 0x00006b4b, + 0x00006d4b, 0x00006f4b, 0x0000714b, 0x0000734b, + 0x0000754b, 0x0000774b, 0x0000794b, 0x00007b4b, + 0x00007d4b, 0x00007f4b, 0x0000814b, 0x0000834b, + 0x0000854b, 0x0000874b, 0x0000894b, 0x00008b4b, + 0x00008d4b, 0x00008f4b, 0x0000914b, 0x0000934b, + 0x0000954b, 0x0000974b, 0x0000994b, 0x00009b4b, + 0x00009d4b, 0x00009f4b, 0x0000a14b, 0x0000a34b, + 0x0000a54b, 0x0000a74b, 0x0000a94b, 0x0000ab4b, + 0x0000ad4b, 0x0000af4b, 0x0000b14b, 0x0000b34b, + 0x0000b54b, 0x0000b74b, 0x0000b94b, 0x0000bb4b, + 0x0000bd4b, 0x0000bf4b, 0x0000c14b, 0x0000c34b, + 0x0000c54b, 0x0000c74b, 0x0000c94b, 0x0000cb4b, + 0x0000cd4b, 0x0000cf4b, 0x0000d14b, 0x0000d34b, + 0x0000d54b, 0x0000d74b, 0x0000d94b, 0x0000db4b, + 0x0000dd4b, 0x0000df4b, 0x0000e14b, 0x0000e34b, + 0x0000e54b, 0x0000e74b, 0x0000e94b, 0x0000eb4b, + 0x0000ed4b, 0x0000ef4b, 0x0000f14b, 0x0000f34b, + 0x0000f54b, 0x0000f74b, 0x0000f94b, 0x0000fb4b, + 0x0000fd4b, 0x0000ff4b, 0x000000cb, 0x000002cb, + 0x000004cb, 0x000006cb, 0x000008cb, 0x00000acb, + 0x00000ccb, 0x00000ecb, 0x000010cb, 0x000012cb, + 0x000014cb, 0x000016cb, 0x000018cb, 0x00001acb, + 0x00001ccb, 0x00001ecb, 0x000020cb, 0x000022cb, + 0x000024cb, 0x000026cb, 0x000028cb, 0x00002acb, + 0x00002ccb, 0x00002ecb, 0x000030cb, 0x000032cb, + 0x000034cb, 0x000036cb, 0x000038cb, 0x00003acb, + 0x00003ccb, 0x00003ecb, 0x000040cb, 0x000042cb, + 0x000044cb, 0x000046cb, 0x000048cb, 0x00004acb, + 0x00004ccb, 0x00004ecb, 0x000050cb, 0x000052cb, + 0x000054cb, 0x000056cb, 0x000058cb, 0x00005acb, + 0x00005ccb, 0x00005ecb, 0x000060cb, 0x000062cb, + 0x000064cb, 0x000066cb, 0x000068cb, 0x00006acb, + 0x00006ccb, 0x00006ecb, 0x000070cb, 0x000072cb, + 0x000074cb, 0x000076cb, 0x000078cb, 0x00007acb, + 0x00007ccb, 0x00007ecb, 0x000080cb, 0x000082cb, + 0x000084cb, 0x000086cb, 0x000088cb, 0x00008acb, + 0x00008ccb, 0x00008ecb, 0x000090cb, 0x000092cb, + 0x000094cb, 0x000096cb, 0x000098cb, 0x00009acb, + 0x00009ccb, 0x00009ecb, 0x0000a0cb, 0x0000a2cb, + 0x0000a4cb, 0x0000a6cb, 0x0000a8cb, 0x0000aacb, + 0x0000accb, 0x0000aecb, 0x0000b0cb, 0x0000b2cb, + 0x0000b4cb, 0x0000b6cb, 0x0000b8cb, 0x0000bacb, + 0x0000bccb, 0x0000becb, 0x0000c0cb, 0x0000c2cb, + 0x0000c4cb, 0x0000c6cb, 0x0000c8cb, 0x0000cacb, + 0x0000cccb, 0x0000cecb, 0x0000d0cb, 0x0000d2cb, + 0x0000d4cb, 0x0000d6cb, 0x0000d8cb, 0x0000dacb, + 0x0000dccb, 0x0000decb, 0x0000e0cb, 0x0000e2cb, + 0x0000e4cb, 0x0000e6cb, 0x0000e8cb, 0x0000eacb, + 0x0000eccb, 0x0000eecb, 0x0000f0cb, 0x0000f2cb, + 0x0000f4cb, 0x0000f6cb, 0x0000f8cb, 0x0000facb, + 0x0000fccb, 0x0000fecb, 0x000001cc, 0x000003cc, + 0x000005cc, 0x000007cc, 0x000009cc, 0x00000bcc, + 0x00000dcc, 0x00000fcc, 0x000011cc, 0x000013cc, + 0x000015cc, 0x000017cc, 0x000019cc, 0x00001bcc, + 0x00001dcc, 0x00001fcc, 0x000021cc, 0x000023cc, + 0x000025cc, 0x000027cc, 0x000029cc, 0x00002bcc, + 0x00002dcc, 0x00002fcc, 0x000031cc, 0x000033cc, + 0x000035cc, 0x000037cc, 0x000039cc, 0x00003bcc, + 0x00003dcc, 0x00003fcc, 0x000041cc, 0x000043cc, + 0x000045cc, 0x000047cc, 0x000049cc, 0x00004bcc, + 0x00004dcc, 0x00004fcc, 0x000051cc, 0x000053cc, + 0x000055cc, 0x000057cc, 0x000059cc, 0x00005bcc, + 0x00005dcc, 0x00005fcc, 0x000061cc, 0x000063cc, + 0x000065cc, 0x000067cc, 0x000069cc, 0x00006bcc, + 0x00006dcc, 0x00006fcc, 0x000071cc, 0x000073cc, + 0x000075cc, 0x000077cc, 0x000079cc, 0x00007bcc, + 0x00007dcc, 0x00007fcc, 0x000081cc, 0x000083cc, + 0x000085cc, 0x000087cc, 0x000089cc, 0x00008bcc, + 0x00008dcc, 0x00008fcc, 0x000091cc, 0x000093cc, + 0x000095cc, 0x000097cc, 0x000099cc, 0x00009bcc, + 0x00009dcc, 0x00009fcc, 0x0000a1cc, 0x0000a3cc, + 0x0000a5cc, 0x0000a7cc, 0x0000a9cc, 0x0000abcc, + 0x0000adcc, 0x0000afcc, 0x0000b1cc, 0x0000b3cc, + 0x0000b5cc, 0x0000b7cc, 0x0000b9cc, 0x0000bbcc, + 0x0000bdcc, 0x0000bfcc, 0x0000c1cc, 0x0000c3cc, + 0x0000c5cc, 0x0000c7cc, 0x0000c9cc, 0x0000cbcc, + 0x0000cdcc, 0x0000cfcc, 0x0000d1cc, 0x0000d3cc, + 0x0000d5cc, 0x0000d7cc, 0x0000d9cc, 0x0000dbcc, + 0x0000ddcc, 0x0000dfcc, 0x0000e1cc, 0x0000e3cc, + 0x0000e5cc, 0x0000e7cc, 0x0000e9cc, 0x0000ebcc, + 0x0000edcc, 0x0000efcc, 0x0000f1cc, 0x0000f3cc, + 0x0000f5cc, 0x0000f7cc, 0x0000f9cc, 0x0000fbcc, + 0x0000fdcc, 0x0000ffcc, 0x000101cc, 0x000103cc, + 0x000105cc, 0x000107cc, 0x000109cc, 0x00010bcc, + 0x00010dcc, 0x00010fcc, 0x000111cc, 0x000113cc, + 0x000115cc, 0x000117cc, 0x000119cc, 0x00011bcc, + 0x00011dcc, 0x00011fcc, 0x000121cc, 0x000123cc, + 0x000125cc, 0x000127cc, 0x000129cc, 0x00012bcc, + 0x00012dcc, 0x00012fcc, 0x000131cc, 0x000133cc, + 0x000135cc, 0x000137cc, 0x000139cc, 0x00013bcc, + 0x00013dcc, 0x00013fcc, 0x000141cc, 0x000143cc, + 0x000145cc, 0x000147cc, 0x000149cc, 0x00014bcc, + 0x00014dcc, 0x00014fcc, 0x000151cc, 0x000153cc, + 0x000155cc, 0x000157cc, 0x000159cc, 0x00015bcc, + 0x00015dcc, 0x00015fcc, 0x000161cc, 0x000163cc, + 0x000165cc, 0x000167cc, 0x000169cc, 0x00016bcc, + 0x00016dcc, 0x00016fcc, 0x000171cc, 0x000173cc, + 0x000175cc, 0x000177cc, 0x000179cc, 0x00017bcc, + 0x00017dcc, 0x00017fcc, 0x000181cc, 0x000183cc, + 0x000185cc, 0x000187cc, 0x000189cc, 0x00018bcc, + 0x00018dcc, 0x00018fcc, 0x000191cc, 0x000193cc, + 0x000195cc, 0x000197cc, 0x000199cc, 0x00019bcc, + 0x00019dcc, 0x00019fcc, 0x0001a1cc, 0x0001a3cc, + 0x0001a5cc, 0x0001a7cc, 0x0001a9cc, 0x0001abcc, + 0x0001adcc, 0x0001afcc, 0x0001b1cc, 0x0001b3cc, + 0x0001b5cc, 0x0001b7cc, 0x0001b9cc, 0x0001bbcc, + 0x0001bdcc, 0x0001bfcc, 0x0001c1cc, 0x0001c3cc, + 0x0001c5cc, 0x0001c7cc, 0x0001c9cc, 0x0001cbcc, + 0x0001cdcc, 0x0001cfcc, 0x0001d1cc, 0x0001d3cc, + 0x0001d5cc, 0x0001d7cc, 0x0001d9cc, 0x0001dbcc, + 0x0001ddcc, 0x0001dfcc, 0x0001e1cc, 0x0001e3cc, + 0x0001e5cc, 0x0001e7cc, 0x0001e9cc, 0x0001ebcc, + 0x0001edcc, 0x0001efcc, 0x0001f1cc, 0x0001f3cc, + 0x0001f5cc, 0x0001f7cc, 0x0001f9cc, 0x0001fbcc, + 0x0001fdcc, 0x0001ffcc, 0x0000002c, 0x0000022c, + 0x0000042c, 0x0000062c, 0x0000082c, 0x00000a2c, + 0x00000c2c, 0x00000e2c, 0x0000102c, 0x0000122c, + 0x0000142c, 0x0000162c, 0x0000182c, 0x00001a2c, + 0x00001c2c, 0x00001e2c, 0x0000202c, 0x0000222c, + 0x0000242c, 0x0000262c, 0x0000282c, 0x00002a2c, + 0x00002c2c, 0x00002e2c, 0x0000302c, 0x0000322c, + 0x0000342c, 0x0000362c, 0x0000382c, 0x00003a2c, + 0x00003c2c, 0x00003e2c, 0x0000402c, 0x0000422c, + 0x0000442c, 0x0000462c, 0x0000482c, 0x00004a2c, + 0x00004c2c, 0x00004e2c, 0x0000502c, 0x0000522c, + 0x0000542c, 0x0000562c, 0x0000582c, 0x00005a2c, + 0x00005c2c, 0x00005e2c, 0x0000602c, 0x0000622c, + 0x0000642c, 0x0000662c, 0x0000682c, 0x00006a2c, + 0x00006c2c, 0x00006e2c, 0x0000702c, 0x0000722c, + 0x0000742c, 0x0000762c, 0x0000782c, 0x00007a2c, + 0x00007c2c, 0x00007e2c, 0x0000802c, 0x0000822c, + 0x0000842c, 0x0000862c, 0x0000882c, 0x00008a2c, + 0x00008c2c, 0x00008e2c, 0x0000902c, 0x0000922c, + 0x0000942c, 0x0000962c, 0x0000982c, 0x00009a2c, + 0x00009c2c, 0x00009e2c, 0x0000a02c, 0x0000a22c, + 0x0000a42c, 0x0000a62c, 0x0000a82c, 0x0000aa2c, + 0x0000ac2c, 0x0000ae2c, 0x0000b02c, 0x0000b22c, + 0x0000b42c, 0x0000b62c, 0x0000b82c, 0x0000ba2c, + 0x0000bc2c, 0x0000be2c, 0x0000c02c, 0x0000c22c, + 0x0000c42c, 0x0000c62c, 0x0000c82c, 0x0000ca2c, + 0x0000cc2c, 0x0000ce2c, 0x0000d02c, 0x0000d22c, + 0x0000d42c, 0x0000d62c, 0x0000d82c, 0x0000da2c, + 0x0000dc2c, 0x0000de2c, 0x0000e02c, 0x0000e22c, + 0x0000e42c, 0x0000e62c, 0x0000e82c, 0x0000ea2c, + 0x0000ec2c, 0x0000ee2c, 0x0000f02c, 0x0000f22c, + 0x0000f42c, 0x0000f62c, 0x0000f82c, 0x0000fa2c, + 0x0000fc2c, 0x0000fe2c, 0x0001002c, 0x0001022c, + 0x0001042c, 0x0001062c, 0x0001082c, 0x00010a2c, + 0x00010c2c, 0x00010e2c, 0x0001102c, 0x0001122c, + 0x0001142c, 0x0001162c, 0x0001182c, 0x00011a2c, + 0x00011c2c, 0x00011e2c, 0x0001202c, 0x0001222c, + 0x0001242c, 0x0001262c, 0x0001282c, 0x00012a2c, + 0x00012c2c, 0x00012e2c, 0x0001302c, 0x0001322c, + 0x0001342c, 0x0001362c, 0x0001382c, 0x00013a2c, + 0x00013c2c, 0x00013e2c, 0x0001402c, 0x0001422c, + 0x0001442c, 0x0001462c, 0x0001482c, 0x00014a2c, + 0x00014c2c, 0x00014e2c, 0x0001502c, 0x0001522c, + 0x0001542c, 0x0001562c, 0x0001582c, 0x00015a2c, + 0x00015c2c, 0x00015e2c, 0x0001602c, 0x0001622c, + 0x0001642c, 0x0001662c, 0x0001682c, 0x00016a2c, + 0x00016c2c, 0x00016e2c, 0x0001702c, 0x0001722c, + 0x0001742c, 0x0001762c, 0x0001782c, 0x00017a2c, + 0x00017c2c, 0x00017e2c, 0x0001802c, 0x0001822c, + 0x0001842c, 0x0001862c, 0x0001882c, 0x00018a2c, + 0x00018c2c, 0x00018e2c, 0x0001902c, 0x0001922c, + 0x0001942c, 0x0001962c, 0x0001982c, 0x00019a2c, + 0x00019c2c, 0x00019e2c, 0x0001a02c, 0x0001a22c, + 0x0001a42c, 0x0001a62c, 0x0001a82c, 0x0001aa2c, + 0x0001ac2c, 0x0001ae2c, 0x0001b02c, 0x0001b22c, + 0x0001b42c, 0x0001b62c, 0x0001b82c, 0x0001ba2c, + 0x0001bc2c, 0x0001be2c, 0x0001c02c, 0x0001c22c, + 0x0001c42c, 0x0001c62c, 0x0001c82c, 0x0001ca2c, + 0x0001cc2c, 0x0001ce2c, 0x0001d02c, 0x0001d22c, + 0x0001d42c, 0x0001d62c, 0x0001d82c, 0x0001da2c, + 0x0001dc2c, 0x0001de2c, 0x0001e02c, 0x0001e22c, + 0x0001e42c, 0x0001e62c, 0x0001e82c, 0x0001ea2c, + 0x0001ec2c, 0x0001ee2c, 0x0001f02c, 0x0001f22c, + 0x0001f42c, 0x0001f62c, 0x0001f82c, 0x0001fa2c, + 0x0001fc2c, 0x0001fe2c, 0x0000012d, 0x0000032d, + 0x0000052d, 0x0000072d, 0x0000092d, 0x00000b2d, + 0x00000d2d, 0x00000f2d, 0x0000112d, 0x0000132d, + 0x0000152d, 0x0000172d, 0x0000192d, 0x00001b2d, + 0x00001d2d, 0x00001f2d, 0x0000212d, 0x0000232d, + 0x0000252d, 0x0000272d, 0x0000292d, 0x00002b2d, + 0x00002d2d, 0x00002f2d, 0x0000312d, 0x0000332d, + 0x0000352d, 0x0000372d, 0x0000392d, 0x00003b2d, + 0x00003d2d, 0x00003f2d, 0x0000412d, 0x0000432d, + 0x0000452d, 0x0000472d, 0x0000492d, 0x00004b2d, + 0x00004d2d, 0x00004f2d, 0x0000512d, 0x0000532d, + 0x0000552d, 0x0000572d, 0x0000592d, 0x00005b2d, + 0x00005d2d, 0x00005f2d, 0x0000612d, 0x0000632d, + 0x0000652d, 0x0000672d, 0x0000692d, 0x00006b2d, + 0x00006d2d, 0x00006f2d, 0x0000712d, 0x0000732d, + 0x0000752d, 0x0000772d, 0x0000792d, 0x00007b2d, + 0x00007d2d, 0x00007f2d, 0x0000812d, 0x0000832d, + 0x0000852d, 0x0000872d, 0x0000892d, 0x00008b2d, + 0x00008d2d, 0x00008f2d, 0x0000912d, 0x0000932d, + 0x0000952d, 0x0000972d, 0x0000992d, 0x00009b2d, + 0x00009d2d, 0x00009f2d, 0x0000a12d, 0x0000a32d, + 0x0000a52d, 0x0000a72d, 0x0000a92d, 0x0000ab2d, + 0x0000ad2d, 0x0000af2d, 0x0000b12d, 0x0000b32d, + 0x0000b52d, 0x0000b72d, 0x0000b92d, 0x0000bb2d, + 0x0000bd2d, 0x0000bf2d, 0x0000c12d, 0x0000c32d, + 0x0000c52d, 0x0000c72d, 0x0000c92d, 0x0000cb2d, + 0x0000cd2d, 0x0000cf2d, 0x0000d12d, 0x0000d32d, + 0x0000d52d, 0x0000d72d, 0x0000d92d, 0x0000db2d, + 0x0000dd2d, 0x0000df2d, 0x0000e12d, 0x0000e32d, + 0x0000e52d, 0x0000e72d, 0x0000e92d, 0x0000eb2d, + 0x0000ed2d, 0x0000ef2d, 0x0000f12d, 0x0000f32d, + 0x0000f52d, 0x0000f72d, 0x0000f92d, 0x0000fb2d, + 0x0000fd2d, 0x0000ff2d, 0x0001012d, 0x0001032d, + 0x0001052d, 0x0001072d, 0x0001092d, 0x00010b2d, + 0x00010d2d, 0x00010f2d, 0x0001112d, 0x0001132d, + 0x0001152d, 0x0001172d, 0x0001192d, 0x00011b2d, + 0x00011d2d, 0x00011f2d, 0x0001212d, 0x0001232d, + 0x0001252d, 0x0001272d, 0x0001292d, 0x00012b2d, + 0x00012d2d, 0x00012f2d, 0x0001312d, 0x0001332d, + 0x0001352d, 0x0001372d, 0x0001392d, 0x00013b2d, + 0x00013d2d, 0x00013f2d, 0x0001412d, 0x0001432d, + 0x0001452d, 0x0001472d, 0x0001492d, 0x00014b2d, + 0x00014d2d, 0x00014f2d, 0x0001512d, 0x0001532d, + 0x0001552d, 0x0001572d, 0x0001592d, 0x00015b2d, + 0x00015d2d, 0x00015f2d, 0x0001612d, 0x0001632d, + 0x0001652d, 0x0001672d, 0x0001692d, 0x00016b2d, + 0x00016d2d, 0x00016f2d, 0x0001712d, 0x0001732d, + 0x0001752d, 0x0001772d, 0x0001792d, 0x00017b2d, + 0x00017d2d, 0x00017f2d, 0x0001812d, 0x0001832d, + 0x0001852d, 0x0001872d, 0x0001892d, 0x00018b2d, + 0x00018d2d, 0x00018f2d, 0x0001912d, 0x0001932d, + 0x0001952d, 0x0001972d, 0x0001992d, 0x00019b2d, + 0x00019d2d, 0x00019f2d, 0x0001a12d, 0x0001a32d, + 0x0001a52d, 0x0001a72d, 0x0001a92d, 0x0001ab2d, + 0x0001ad2d, 0x0001af2d, 0x0001b12d, 0x0001b32d, + 0x0001b52d, 0x0001b72d, 0x0001b92d, 0x0001bb2d, + 0x0001bd2d, 0x0001bf2d, 0x0001c12d, 0x0001c32d, + 0x0001c52d, 0x0001c72d, 0x0001c92d, 0x0001cb2d, + 0x0001cd2d, 0x0001cf2d, 0x0001d12d, 0x0001d32d, + 0x0001d52d, 0x0001d72d, 0x0001d92d, 0x0001db2d, + 0x0001dd2d, 0x0001df2d, 0x0001e12d, 0x0001e32d, + 0x0001e52d, 0x0001e72d, 0x0001e92d, 0x0001eb2d, + 0x0001ed2d, 0x0001ef2d, 0x0001f12d, 0x0001f32d, + 0x0001f52d, 0x0001f72d, 0x0001f92d, 0x0001fb2d, + 0x0001fd2d, 0x0001ff2d, 0x0002012d, 0x0002032d, + 0x0002052d, 0x0002072d, 0x0002092d, 0x00020b2d, + 0x00020d2d, 0x00020f2d, 0x0002112d, 0x0002132d, + 0x0002152d, 0x0002172d, 0x0002192d, 0x00021b2d, + 0x00021d2d, 0x00021f2d, 0x0002212d, 0x0002232d, + 0x0002252d, 0x0002272d, 0x0002292d, 0x00022b2d, + 0x00022d2d, 0x00022f2d, 0x0002312d, 0x0002332d, + 0x0002352d, 0x0002372d, 0x0002392d, 0x00023b2d, + 0x00023d2d, 0x00023f2d, 0x0002412d, 0x0002432d, + 0x0002452d, 0x0002472d, 0x0002492d, 0x00024b2d, + 0x00024d2d, 0x00024f2d, 0x0002512d, 0x0002532d, + 0x0002552d, 0x0002572d, 0x0002592d, 0x00025b2d, + 0x00025d2d, 0x00025f2d, 0x0002612d, 0x0002632d, + 0x0002652d, 0x0002672d, 0x0002692d, 0x00026b2d, + 0x00026d2d, 0x00026f2d, 0x0002712d, 0x0002732d, + 0x0002752d, 0x0002772d, 0x0002792d, 0x00027b2d, + 0x00027d2d, 0x00027f2d, 0x0002812d, 0x0002832d, + 0x0002852d, 0x0002872d, 0x0002892d, 0x00028b2d, + 0x00028d2d, 0x00028f2d, 0x0002912d, 0x0002932d, + 0x0002952d, 0x0002972d, 0x0002992d, 0x00029b2d, + 0x00029d2d, 0x00029f2d, 0x0002a12d, 0x0002a32d, + 0x0002a52d, 0x0002a72d, 0x0002a92d, 0x0002ab2d, + 0x0002ad2d, 0x0002af2d, 0x0002b12d, 0x0002b32d, + 0x0002b52d, 0x0002b72d, 0x0002b92d, 0x0002bb2d, + 0x0002bd2d, 0x0002bf2d, 0x0002c12d, 0x0002c32d, + 0x0002c52d, 0x0002c72d, 0x0002c92d, 0x0002cb2d, + 0x0002cd2d, 0x0002cf2d, 0x0002d12d, 0x0002d32d, + 0x0002d52d, 0x0002d72d, 0x0002d92d, 0x0002db2d, + 0x0002dd2d, 0x0002df2d, 0x0002e12d, 0x0002e32d, + 0x0002e52d, 0x0002e72d, 0x0002e92d, 0x0002eb2d, + 0x0002ed2d, 0x0002ef2d, 0x0002f12d, 0x0002f32d, + 0x0002f52d, 0x0002f72d, 0x0002f92d, 0x0002fb2d, + 0x0002fd2d, 0x0002ff2d, 0x0003012d, 0x0003032d, + 0x0003052d, 0x0003072d, 0x0003092d, 0x00030b2d, + 0x00030d2d, 0x00030f2d, 0x0003112d, 0x0003132d, + 0x0003152d, 0x0003172d, 0x0003192d, 0x00031b2d, + 0x00031d2d, 0x00031f2d, 0x0003212d, 0x0003232d, + 0x0003252d, 0x0003272d, 0x0003292d, 0x00032b2d, + 0x00032d2d, 0x00032f2d, 0x0003312d, 0x0003332d, + 0x0003352d, 0x0003372d, 0x0003392d, 0x00033b2d, + 0x00033d2d, 0x00033f2d, 0x0003412d, 0x0003432d, + 0x0003452d, 0x0003472d, 0x0003492d, 0x00034b2d, + 0x00034d2d, 0x00034f2d, 0x0003512d, 0x0003532d, + 0x0003552d, 0x0003572d, 0x0003592d, 0x00035b2d, + 0x00035d2d, 0x00035f2d, 0x0003612d, 0x0003632d, + 0x0003652d, 0x0003672d, 0x0003692d, 0x00036b2d, + 0x00036d2d, 0x00036f2d, 0x0003712d, 0x0003732d, + 0x0003752d, 0x0003772d, 0x0003792d, 0x00037b2d, + 0x00037d2d, 0x00037f2d, 0x0003812d, 0x0003832d, + 0x0003852d, 0x0003872d, 0x0003892d, 0x00038b2d, + 0x00038d2d, 0x00038f2d, 0x0003912d, 0x0003932d, + 0x0003952d, 0x0003972d, 0x0003992d, 0x00039b2d, + 0x00039d2d, 0x00039f2d, 0x0003a12d, 0x0003a32d, + 0x0003a52d, 0x0003a72d, 0x0003a92d, 0x0003ab2d, + 0x0003ad2d, 0x0003af2d, 0x0003b12d, 0x0003b32d, + 0x0003b52d, 0x0003b72d, 0x0003b92d, 0x0003bb2d, + 0x0003bd2d, 0x0003bf2d, 0x0003c12d, 0x0003c32d, + 0x0003c52d, 0x0003c72d, 0x0003c92d, 0x0003cb2d, + 0x0003cd2d, 0x0003cf2d, 0x0003d12d, 0x0003d32d, + 0x0003d52d, 0x0003d72d, 0x0003d92d, 0x0003db2d, + 0x0003dd2d, 0x0003df2d, 0x0003e12d, 0x0003e32d, + 0x0003e52d, 0x0003e72d, 0x0003e92d, 0x0003eb2d, + 0x0003ed2d, 0x0003ef2d, 0x0003f12d, 0x0003f32d, + 0x0003f52d, 0x0003f72d, 0x0003f92d, 0x0003fb2d, + 0x0003fd2d, 0x0003ff2d, 0x000002ee, 0x000006ee, + 0x00000aee, 0x00000eee, 0x000012ee, 0x000016ee, + 0x00001aee, 0x00001eee, 0x000022ee, 0x000026ee, + 0x00002aee, 0x00002eee, 0x000032ee, 0x000036ee, + 0x00003aee, 0x00003eee, 0x000042ee, 0x000046ee, + 0x00004aee, 0x00004eee, 0x000052ee, 0x000056ee, + 0x00005aee, 0x00005eee, 0x000062ee, 0x000066ee, + 0x00006aee, 0x00006eee, 0x000072ee, 0x000076ee, + 0x00007aee, 0x00007eee, 0x000082ee, 0x000086ee, + 0x00008aee, 0x00008eee, 0x000092ee, 0x000096ee, + 0x00009aee, 0x00009eee, 0x0000a2ee, 0x0000a6ee, + 0x0000aaee, 0x0000aeee, 0x0000b2ee, 0x0000b6ee, + 0x0000baee, 0x0000beee, 0x0000c2ee, 0x0000c6ee, + 0x0000caee, 0x0000ceee, 0x0000d2ee, 0x0000d6ee, + 0x0000daee, 0x0000deee, 0x0000e2ee, 0x0000e6ee, + 0x0000eaee, 0x0000eeee, 0x0000f2ee, 0x0000f6ee, + 0x0000faee, 0x0000feee, 0x000102ee, 0x000106ee, + 0x00010aee, 0x00010eee, 0x000112ee, 0x000116ee, + 0x00011aee, 0x00011eee, 0x000122ee, 0x000126ee, + 0x00012aee, 0x00012eee, 0x000132ee, 0x000136ee, + 0x00013aee, 0x00013eee, 0x000142ee, 0x000146ee, + 0x00014aee, 0x00014eee, 0x000152ee, 0x000156ee, + 0x00015aee, 0x00015eee, 0x000162ee, 0x000166ee, + 0x00016aee, 0x00016eee, 0x000172ee, 0x000176ee, + 0x00017aee, 0x00017eee, 0x000182ee, 0x000186ee, + 0x00018aee, 0x00018eee, 0x000192ee, 0x000196ee, + 0x00019aee, 0x00019eee, 0x0001a2ee, 0x0001a6ee, + 0x0001aaee, 0x0001aeee, 0x0001b2ee, 0x0001b6ee, + 0x0001baee, 0x0001beee, 0x0001c2ee, 0x0001c6ee, + 0x0001caee, 0x0001ceee, 0x0001d2ee, 0x0001d6ee, + 0x0001daee, 0x0001deee, 0x0001e2ee, 0x0001e6ee, + 0x0001eaee, 0x0001eeee, 0x0001f2ee, 0x0001f6ee, + 0x0001faee, 0x0001feee, 0x000202ee, 0x000206ee, + 0x00020aee, 0x00020eee, 0x000212ee, 0x000216ee, + 0x00021aee, 0x00021eee, 0x000222ee, 0x000226ee, + 0x00022aee, 0x00022eee, 0x000232ee, 0x000236ee, + 0x00023aee, 0x00023eee, 0x000242ee, 0x000246ee, + 0x00024aee, 0x00024eee, 0x000252ee, 0x000256ee, + 0x00025aee, 0x00025eee, 0x000262ee, 0x000266ee, + 0x00026aee, 0x00026eee, 0x000272ee, 0x000276ee, + 0x00027aee, 0x00027eee, 0x000282ee, 0x000286ee, + 0x00028aee, 0x00028eee, 0x000292ee, 0x000296ee, + 0x00029aee, 0x00029eee, 0x0002a2ee, 0x0002a6ee, + 0x0002aaee, 0x0002aeee, 0x0002b2ee, 0x0002b6ee, + 0x0002baee, 0x0002beee, 0x0002c2ee, 0x0002c6ee, + 0x0002caee, 0x0002ceee, 0x0002d2ee, 0x0002d6ee, + 0x0002daee, 0x0002deee, 0x0002e2ee, 0x0002e6ee, + 0x0002eaee, 0x0002eeee, 0x0002f2ee, 0x0002f6ee, + 0x0002faee, 0x0002feee, 0x000302ee, 0x000306ee, + 0x00030aee, 0x00030eee, 0x000312ee, 0x000316ee, + 0x00031aee, 0x00031eee, 0x000322ee, 0x000326ee, + 0x00032aee, 0x00032eee, 0x000332ee, 0x000336ee, + 0x00033aee, 0x00033eee, 0x000342ee, 0x000346ee, + 0x00034aee, 0x00034eee, 0x000352ee, 0x000356ee, + 0x00035aee, 0x00035eee, 0x000362ee, 0x000366ee, + 0x00036aee, 0x00036eee, 0x000372ee, 0x000376ee, + 0x00037aee, 0x00037eee, 0x000382ee, 0x000386ee, + 0x00038aee, 0x00038eee, 0x000392ee, 0x000396ee, + 0x00039aee, 0x00039eee, 0x0003a2ee, 0x0003a6ee, + 0x0003aaee, 0x0003aeee, 0x0003b2ee, 0x0003b6ee, + 0x0003baee, 0x0003beee, 0x0003c2ee, 0x0003c6ee, + 0x0003caee, 0x0003ceee, 0x0003d2ee, 0x0003d6ee, + 0x0003daee, 0x0003deee, 0x0003e2ee, 0x0003e6ee, + 0x0003eaee, 0x0003eeee, 0x0003f2ee, 0x0003f6ee, + 0x0003faee, 0x0003feee, 0x000402ee, 0x000406ee, + 0x00040aee, 0x00040eee, 0x000412ee, 0x000416ee, + 0x00041aee, 0x00041eee, 0x000422ee, 0x000426ee, + 0x00042aee, 0x00042eee, 0x000432ee, 0x000436ee, + 0x00043aee, 0x00043eee, 0x000442ee, 0x000446ee, + 0x00044aee, 0x00044eee, 0x000452ee, 0x000456ee, + 0x00045aee, 0x00045eee, 0x000462ee, 0x000466ee, + 0x00046aee, 0x00046eee, 0x000472ee, 0x000476ee, + 0x00047aee, 0x00047eee, 0x000482ee, 0x000486ee, + 0x00048aee, 0x00048eee, 0x000492ee, 0x000496ee, + 0x00049aee, 0x00049eee, 0x0004a2ee, 0x0004a6ee, + 0x0004aaee, 0x0004aeee, 0x0004b2ee, 0x0004b6ee, + 0x0004baee, 0x0004beee, 0x0004c2ee, 0x0004c6ee, + 0x0004caee, 0x0004ceee, 0x0004d2ee, 0x0004d6ee, + 0x0004daee, 0x0004deee, 0x0004e2ee, 0x0004e6ee, + 0x0004eaee, 0x0004eeee, 0x0004f2ee, 0x0004f6ee, + 0x0004faee, 0x0004feee, 0x000502ee, 0x000506ee, + 0x00050aee, 0x00050eee, 0x000512ee, 0x000516ee, + 0x00051aee, 0x00051eee, 0x000522ee, 0x000526ee, + 0x00052aee, 0x00052eee, 0x000532ee, 0x000536ee, + 0x00053aee, 0x00053eee, 0x000542ee, 0x000546ee, + 0x00054aee, 0x00054eee, 0x000552ee, 0x000556ee, + 0x00055aee, 0x00055eee, 0x000562ee, 0x000566ee, + 0x00056aee, 0x00056eee, 0x000572ee, 0x000576ee, + 0x00057aee, 0x00057eee, 0x000582ee, 0x000586ee, + 0x00058aee, 0x00058eee, 0x000592ee, 0x000596ee, + 0x00059aee, 0x00059eee, 0x0005a2ee, 0x0005a6ee, + 0x0005aaee, 0x0005aeee, 0x0005b2ee, 0x0005b6ee, + 0x0005baee, 0x0005beee, 0x0005c2ee, 0x0005c6ee, + 0x0005caee, 0x0005ceee, 0x0005d2ee, 0x0005d6ee, + 0x0005daee, 0x0005deee, 0x0005e2ee, 0x0005e6ee, + 0x0005eaee, 0x0005eeee, 0x0005f2ee, 0x0005f6ee, + 0x0005faee, 0x0005feee, 0x000602ee, 0x000606ee, + 0x00060aee, 0x00060eee, 0x000612ee, 0x000616ee, + 0x00061aee, 0x00061eee, 0x000622ee, 0x000626ee, + 0x00062aee, 0x00062eee, 0x000632ee, 0x000636ee, + 0x00063aee, 0x00063eee, 0x000642ee, 0x000646ee, + 0x00064aee, 0x00064eee, 0x000652ee, 0x000656ee, + 0x00065aee, 0x00065eee, 0x000662ee, 0x000666ee, + 0x00066aee, 0x00066eee, 0x000672ee, 0x000676ee, + 0x00067aee, 0x00067eee, 0x000682ee, 0x000686ee, + 0x00068aee, 0x00068eee, 0x000692ee, 0x000696ee, + 0x00069aee, 0x00069eee, 0x0006a2ee, 0x0006a6ee, + 0x0006aaee, 0x0006aeee, 0x0006b2ee, 0x0006b6ee, + 0x0006baee, 0x0006beee, 0x0006c2ee, 0x0006c6ee, + 0x0006caee, 0x0006ceee, 0x0006d2ee, 0x0006d6ee, + 0x0006daee, 0x0006deee, 0x0006e2ee, 0x0006e6ee, + 0x0006eaee, 0x0006eeee, 0x0006f2ee, 0x0006f6ee, + 0x0006faee, 0x0006feee, 0x000702ee, 0x000706ee, + 0x00070aee, 0x00070eee, 0x000712ee, 0x000716ee, + 0x00071aee, 0x00071eee, 0x000722ee, 0x000726ee, + 0x00072aee, 0x00072eee, 0x000732ee, 0x000736ee, + 0x00073aee, 0x00073eee, 0x000742ee, 0x000746ee, + 0x00074aee, 0x00074eee, 0x000752ee, 0x000756ee, + 0x00075aee, 0x00075eee, 0x000762ee, 0x000766ee, + 0x00076aee, 0x00076eee, 0x000772ee, 0x000776ee, + 0x00077aee, 0x00077eee, 0x000782ee, 0x000786ee, + 0x00078aee, 0x00078eee, 0x000792ee, 0x000796ee, + 0x00079aee, 0x00079eee, 0x0007a2ee, 0x0007a6ee, + 0x0007aaee, 0x0007aeee, 0x0007b2ee, 0x0007b6ee, + 0x0007baee, 0x0007beee, 0x0007c2ee, 0x0007c6ee, + 0x0007caee, 0x0007ceee, 0x0007d2ee, 0x0007d6ee, + 0x0007daee, 0x0007deee, 0x0007e2ee, 0x0007e6ee, + 0x0007eaee, 0x0007eeee, 0x0007f2ee, 0x0007f6ee, + 0x0007faee, 0x0007feee, 0x0000000d, 0x0000010d, + 0x0000020d, 0x0000030d, 0x0000040d, 0x0000050d, + 0x0000060d, 0x0000070d, 0x0000080d, 0x0000090d, + 0x00000a0d, 0x00000b0d, 0x00000c0d, 0x00000d0d, + 0x00000e0d, 0x00000f0d, 0x0000100d, 0x0000110d, + 0x0000120d, 0x0000130d, 0x0000140d, 0x0000150d, + 0x0000160d, 0x0000170d, 0x0000180d, 0x0000190d, + 0x00001a0d, 0x00001b0d, 0x00001c0d, 0x00001d0d, + 0x00001e0d, 0x00001f0d, 0x0000200d, 0x0000210d, + 0x0000220d, 0x0000230d, 0x0000240d, 0x0000250d, + 0x0000260d, 0x0000270d, 0x0000280d, 0x0000290d, + 0x00002a0d, 0x00002b0d, 0x00002c0d, 0x00002d0d, + 0x00002e0d, 0x00002f0d, 0x0000300d, 0x0000310d, + 0x0000320d, 0x0000330d, 0x0000340d, 0x0000350d, + 0x0000360d, 0x0000370d, 0x0000380d, 0x0000390d, + 0x00003a0d, 0x00003b0d, 0x00003c0d, 0x00003d0d, + 0x00003e0d, 0x00003f0d, 0x0000400d, 0x0000410d, + 0x0000420d, 0x0000430d, 0x0000440d, 0x0000450d, + 0x0000460d, 0x0000470d, 0x0000480d, 0x0000490d, + 0x00004a0d, 0x00004b0d, 0x00004c0d, 0x00004d0d, + 0x00004e0d, 0x00004f0d, 0x0000500d, 0x0000510d, + 0x0000520d, 0x0000530d, 0x0000540d, 0x0000550d, + 0x0000560d, 0x0000570d, 0x0000580d, 0x0000590d, + 0x00005a0d, 0x00005b0d, 0x00005c0d, 0x00005d0d, + 0x00005e0d, 0x00005f0d, 0x0000600d, 0x0000610d, + 0x0000620d, 0x0000630d, 0x0000640d, 0x0000650d, + 0x0000660d, 0x0000670d, 0x0000680d, 0x0000690d, + 0x00006a0d, 0x00006b0d, 0x00006c0d, 0x00006d0d, + 0x00006e0d, 0x00006f0d, 0x0000700d, 0x0000710d, + 0x0000720d, 0x0000730d, 0x0000740d, 0x0000750d, + 0x0000760d, 0x0000770d, 0x0000780d, 0x0000790d, + 0x00007a0d, 0x00007b0d, 0x00007c0d, 0x00007d0d, + 0x00007e0d, 0x00007f0d, 0x0000800d, 0x0000810d, + 0x0000820d, 0x0000830d, 0x0000840d, 0x0000850d, + 0x0000860d, 0x0000870d, 0x0000880d, 0x0000890d, + 0x00008a0d, 0x00008b0d, 0x00008c0d, 0x00008d0d, + 0x00008e0d, 0x00008f0d, 0x0000900d, 0x0000910d, + 0x0000920d, 0x0000930d, 0x0000940d, 0x0000950d, + 0x0000960d, 0x0000970d, 0x0000980d, 0x0000990d, + 0x00009a0d, 0x00009b0d, 0x00009c0d, 0x00009d0d, + 0x00009e0d, 0x00009f0d, 0x0000a00d, 0x0000a10d, + 0x0000a20d, 0x0000a30d, 0x0000a40d, 0x0000a50d, + 0x0000a60d, 0x0000a70d, 0x0000a80d, 0x0000a90d, + 0x0000aa0d, 0x0000ab0d, 0x0000ac0d, 0x0000ad0d, + 0x0000ae0d, 0x0000af0d, 0x0000b00d, 0x0000b10d, + 0x0000b20d, 0x0000b30d, 0x0000b40d, 0x0000b50d, + 0x0000b60d, 0x0000b70d, 0x0000b80d, 0x0000b90d, + 0x0000ba0d, 0x0000bb0d, 0x0000bc0d, 0x0000bd0d, + 0x0000be0d, 0x0000bf0d, 0x0000c00d, 0x0000c10d, + 0x0000c20d, 0x0000c30d, 0x0000c40d, 0x0000c50d, + 0x0000c60d, 0x0000c70d, 0x0000c80d, 0x0000c90d, + 0x0000ca0d, 0x0000cb0d, 0x0000cc0d, 0x0000cd0d, + 0x0000ce0d, 0x0000cf0d, 0x0000d00d, 0x0000d10d, + 0x0000d20d, 0x0000d30d, 0x0000d40d, 0x0000d50d, + 0x0000d60d, 0x0000d70d, 0x0000d80d, 0x0000d90d, + 0x0000da0d, 0x0000db0d, 0x0000dc0d, 0x0000dd0d, + 0x0000de0d, 0x0000df0d, 0x0000e00d, 0x0000e10d, + 0x0000e20d, 0x0000e30d, 0x0000e40d, 0x0000e50d, + 0x0000e60d, 0x0000e70d, 0x0000e80d, 0x0000e90d, + 0x0000ea0d, 0x0000eb0d, 0x0000ec0d, 0x0000ed0d, + 0x0000ee0d, 0x0000ef0d, 0x0000f00d, 0x0000f10d, + 0x0000f20d, 0x0000f30d, 0x0000f40d, 0x0000f50d, + 0x0000f60d, 0x0000f70d, 0x0000f80d, 0x0000f90d, + 0x0000fa0d, 0x0000fb0d, 0x0000fc0d, 0x0000fd0d, + 0x0000fe0d, 0x0000ff0d, 0x0001000d, 0x0001010d, + 0x0001020d, 0x0001030d, 0x0001040d, 0x0001050d, + 0x0001060d, 0x0001070d, 0x0001080d, 0x0001090d, + 0x00010a0d, 0x00010b0d, 0x00010c0d, 0x00010d0d, + 0x00010e0d, 0x00010f0d, 0x0001100d, 0x0001110d, + 0x0001120d, 0x0001130d, 0x0001140d, 0x0001150d, + 0x0001160d, 0x0001170d, 0x0001180d, 0x0001190d, + 0x00011a0d, 0x00011b0d, 0x00011c0d, 0x00011d0d, + 0x00011e0d, 0x00011f0d, 0x0001200d, 0x0001210d, + 0x0001220d, 0x0001230d, 0x0001240d, 0x0001250d, + 0x0001260d, 0x0001270d, 0x0001280d, 0x0001290d, + 0x00012a0d, 0x00012b0d, 0x00012c0d, 0x00012d0d, + 0x00012e0d, 0x00012f0d, 0x0001300d, 0x0001310d, + 0x0001320d, 0x0001330d, 0x0001340d, 0x0001350d, + 0x0001360d, 0x0001370d, 0x0001380d, 0x0001390d, + 0x00013a0d, 0x00013b0d, 0x00013c0d, 0x00013d0d, + 0x00013e0d, 0x00013f0d, 0x0001400d, 0x0001410d, + 0x0001420d, 0x0001430d, 0x0001440d, 0x0001450d, + 0x0001460d, 0x0001470d, 0x0001480d, 0x0001490d, + 0x00014a0d, 0x00014b0d, 0x00014c0d, 0x00014d0d, + 0x00014e0d, 0x00014f0d, 0x0001500d, 0x0001510d, + 0x0001520d, 0x0001530d, 0x0001540d, 0x0001550d, + 0x0001560d, 0x0001570d, 0x0001580d, 0x0001590d, + 0x00015a0d, 0x00015b0d, 0x00015c0d, 0x00015d0d, + 0x00015e0d, 0x00015f0d, 0x0001600d, 0x0001610d, + 0x0001620d, 0x0001630d, 0x0001640d, 0x0001650d, + 0x0001660d, 0x0001670d, 0x0001680d, 0x0001690d, + 0x00016a0d, 0x00016b0d, 0x00016c0d, 0x00016d0d, + 0x00016e0d, 0x00016f0d, 0x0001700d, 0x0001710d, + 0x0001720d, 0x0001730d, 0x0001740d, 0x0001750d, + 0x0001760d, 0x0001770d, 0x0001780d, 0x0001790d, + 0x00017a0d, 0x00017b0d, 0x00017c0d, 0x00017d0d, + 0x00017e0d, 0x00017f0d, 0x0001800d, 0x0001810d, + 0x0001820d, 0x0001830d, 0x0001840d, 0x0001850d, + 0x0001860d, 0x0001870d, 0x0001880d, 0x0001890d, + 0x00018a0d, 0x00018b0d, 0x00018c0d, 0x00018d0d, + 0x00018e0d, 0x00018f0d, 0x0001900d, 0x0001910d, + 0x0001920d, 0x0001930d, 0x0001940d, 0x0001950d, + 0x0001960d, 0x0001970d, 0x0001980d, 0x0001990d, + 0x00019a0d, 0x00019b0d, 0x00019c0d, 0x00019d0d, + 0x00019e0d, 0x00019f0d, 0x0001a00d, 0x0001a10d, + 0x0001a20d, 0x0001a30d, 0x0001a40d, 0x0001a50d, + 0x0001a60d, 0x0001a70d, 0x0001a80d, 0x0001a90d, + 0x0001aa0d, 0x0001ab0d, 0x0001ac0d, 0x0001ad0d, + 0x0001ae0d, 0x0001af0d, 0x0001b00d, 0x0001b10d, + 0x0001b20d, 0x0001b30d, 0x0001b40d, 0x0001b50d, + 0x0001b60d, 0x0001b70d, 0x0001b80d, 0x0001b90d, + 0x0001ba0d, 0x0001bb0d, 0x0001bc0d, 0x0001bd0d, + 0x0001be0d, 0x0001bf0d, 0x0001c00d, 0x0001c10d, + 0x0001c20d, 0x0001c30d, 0x0001c40d, 0x0001c50d, + 0x0001c60d, 0x0001c70d, 0x0001c80d, 0x0001c90d, + 0x0001ca0d, 0x0001cb0d, 0x0001cc0d, 0x0001cd0d, + 0x0001ce0d, 0x0001cf0d, 0x0001d00d, 0x0001d10d, + 0x0001d20d, 0x0001d30d, 0x0001d40d, 0x0001d50d, + 0x0001d60d, 0x0001d70d, 0x0001d80d, 0x0001d90d, + 0x0001da0d, 0x0001db0d, 0x0001dc0d, 0x0001dd0d, + 0x0001de0d, 0x0001df0d, 0x0001e00d, 0x0001e10d, + 0x0001e20d, 0x0001e30d, 0x0001e40d, 0x0001e50d, + 0x0001e60d, 0x0001e70d, 0x0001e80d, 0x0001e90d, + 0x0001ea0d, 0x0001eb0d, 0x0001ec0d, 0x0001ed0d, + 0x0001ee0d, 0x0001ef0d, 0x0001f00d, 0x0001f10d, + 0x0001f20d, 0x0001f30d, 0x0001f40d, 0x0001f50d, + 0x0001f60d, 0x0001f70d, 0x0001f80d, 0x0001f90d, + 0x0001fa0d, 0x0001fb0d, 0x0001fc0d, 0x0001fd0d, + 0x0001fe0d, 0x0001ff0d, 0x0002000d, 0x0002010d, + 0x0002020d, 0x0002030d, 0x0002040d, 0x0002050d, + 0x0002060d, 0x0002070d, 0x0002080d, 0x0002090d, + 0x00020a0d, 0x00020b0d, 0x00020c0d, 0x00020d0d, + 0x00020e0d, 0x00020f0d, 0x0002100d, 0x0002110d, + 0x0002120d, 0x0002130d, 0x0002140d, 0x0002150d, + 0x0002160d, 0x0002170d, 0x0002180d, 0x0002190d, + 0x00021a0d, 0x00021b0d, 0x00021c0d, 0x00021d0d, + 0x00021e0d, 0x00021f0d, 0x0002200d, 0x0002210d, + 0x0002220d, 0x0002230d, 0x0002240d, 0x0002250d, + 0x0002260d, 0x0002270d, 0x0002280d, 0x0002290d, + 0x00022a0d, 0x00022b0d, 0x00022c0d, 0x00022d0d, + 0x00022e0d, 0x00022f0d, 0x0002300d, 0x0002310d, + 0x0002320d, 0x0002330d, 0x0002340d, 0x0002350d, + 0x0002360d, 0x0002370d, 0x0002380d, 0x0002390d, + 0x00023a0d, 0x00023b0d, 0x00023c0d, 0x00023d0d, + 0x00023e0d, 0x00023f0d, 0x0002400d, 0x0002410d, + 0x0002420d, 0x0002430d, 0x0002440d, 0x0002450d, + 0x0002460d, 0x0002470d, 0x0002480d, 0x0002490d, + 0x00024a0d, 0x00024b0d, 0x00024c0d, 0x00024d0d, + 0x00024e0d, 0x00024f0d, 0x0002500d, 0x0002510d, + 0x0002520d, 0x0002530d, 0x0002540d, 0x0002550d, + 0x0002560d, 0x0002570d, 0x0002580d, 0x0002590d, + 0x00025a0d, 0x00025b0d, 0x00025c0d, 0x00025d0d, + 0x00025e0d, 0x00025f0d, 0x0002600d, 0x0002610d, + 0x0002620d, 0x0002630d, 0x0002640d, 0x0002650d, + 0x0002660d, 0x0002670d, 0x0002680d, 0x0002690d, + 0x00026a0d, 0x00026b0d, 0x00026c0d, 0x00026d0d, + 0x00026e0d, 0x00026f0d, 0x0002700d, 0x0002710d, + 0x0002720d, 0x0002730d, 0x0002740d, 0x0002750d, + 0x0002760d, 0x0002770d, 0x0002780d, 0x0002790d, + 0x00027a0d, 0x00027b0d, 0x00027c0d, 0x00027d0d, + 0x00027e0d, 0x00027f0d, 0x0002800d, 0x0002810d, + 0x0002820d, 0x0002830d, 0x0002840d, 0x0002850d, + 0x0002860d, 0x0002870d, 0x0002880d, 0x0002890d, + 0x00028a0d, 0x00028b0d, 0x00028c0d, 0x00028d0d, + 0x00028e0d, 0x00028f0d, 0x0002900d, 0x0002910d, + 0x0002920d, 0x0002930d, 0x0002940d, 0x0002950d, + 0x0002960d, 0x0002970d, 0x0002980d, 0x0002990d, + 0x00029a0d, 0x00029b0d, 0x00029c0d, 0x00029d0d, + 0x00029e0d, 0x00029f0d, 0x0002a00d, 0x0002a10d, + 0x0002a20d, 0x0002a30d, 0x0002a40d, 0x0002a50d, + 0x0002a60d, 0x0002a70d, 0x0002a80d, 0x0002a90d, + 0x0002aa0d, 0x0002ab0d, 0x0002ac0d, 0x0002ad0d, + 0x0002ae0d, 0x0002af0d, 0x0002b00d, 0x0002b10d, + 0x0002b20d, 0x0002b30d, 0x0002b40d, 0x0002b50d, + 0x0002b60d, 0x0002b70d, 0x0002b80d, 0x0002b90d, + 0x0002ba0d, 0x0002bb0d, 0x0002bc0d, 0x0002bd0d, + 0x0002be0d, 0x0002bf0d, 0x0002c00d, 0x0002c10d, + 0x0002c20d, 0x0002c30d, 0x0002c40d, 0x0002c50d, + 0x0002c60d, 0x0002c70d, 0x0002c80d, 0x0002c90d, + 0x0002ca0d, 0x0002cb0d, 0x0002cc0d, 0x0002cd0d, + 0x0002ce0d, 0x0002cf0d, 0x0002d00d, 0x0002d10d, + 0x0002d20d, 0x0002d30d, 0x0002d40d, 0x0002d50d, + 0x0002d60d, 0x0002d70d, 0x0002d80d, 0x0002d90d, + 0x0002da0d, 0x0002db0d, 0x0002dc0d, 0x0002dd0d, + 0x0002de0d, 0x0002df0d, 0x0002e00d, 0x0002e10d, + 0x0002e20d, 0x0002e30d, 0x0002e40d, 0x0002e50d, + 0x0002e60d, 0x0002e70d, 0x0002e80d, 0x0002e90d, + 0x0002ea0d, 0x0002eb0d, 0x0002ec0d, 0x0002ed0d, + 0x0002ee0d, 0x0002ef0d, 0x0002f00d, 0x0002f10d, + 0x0002f20d, 0x0002f30d, 0x0002f40d, 0x0002f50d, + 0x0002f60d, 0x0002f70d, 0x0002f80d, 0x0002f90d, + 0x0002fa0d, 0x0002fb0d, 0x0002fc0d, 0x0002fd0d, + 0x0002fe0d, 0x0002ff0d, 0x0003000d, 0x0003010d, + 0x0003020d, 0x0003030d, 0x0003040d, 0x0003050d, + 0x0003060d, 0x0003070d, 0x0003080d, 0x0003090d, + 0x00030a0d, 0x00030b0d, 0x00030c0d, 0x00030d0d, + 0x00030e0d, 0x00030f0d, 0x0003100d, 0x0003110d, + 0x0003120d, 0x0003130d, 0x0003140d, 0x0003150d, + 0x0003160d, 0x0003170d, 0x0003180d, 0x0003190d, + 0x00031a0d, 0x00031b0d, 0x00031c0d, 0x00031d0d, + 0x00031e0d, 0x00031f0d, 0x0003200d, 0x0003210d, + 0x0003220d, 0x0003230d, 0x0003240d, 0x0003250d, + 0x0003260d, 0x0003270d, 0x0003280d, 0x0003290d, + 0x00032a0d, 0x00032b0d, 0x00032c0d, 0x00032d0d, + 0x00032e0d, 0x00032f0d, 0x0003300d, 0x0003310d, + 0x0003320d, 0x0003330d, 0x0003340d, 0x0003350d, + 0x0003360d, 0x0003370d, 0x0003380d, 0x0003390d, + 0x00033a0d, 0x00033b0d, 0x00033c0d, 0x00033d0d, + 0x00033e0d, 0x00033f0d, 0x0003400d, 0x0003410d, + 0x0003420d, 0x0003430d, 0x0003440d, 0x0003450d, + 0x0003460d, 0x0003470d, 0x0003480d, 0x0003490d, + 0x00034a0d, 0x00034b0d, 0x00034c0d, 0x00034d0d, + 0x00034e0d, 0x00034f0d, 0x0003500d, 0x0003510d, + 0x0003520d, 0x0003530d, 0x0003540d, 0x0003550d, + 0x0003560d, 0x0003570d, 0x0003580d, 0x0003590d, + 0x00035a0d, 0x00035b0d, 0x00035c0d, 0x00035d0d, + 0x00035e0d, 0x00035f0d, 0x0003600d, 0x0003610d, + 0x0003620d, 0x0003630d, 0x0003640d, 0x0003650d, + 0x0003660d, 0x0003670d, 0x0003680d, 0x0003690d, + 0x00036a0d, 0x00036b0d, 0x00036c0d, 0x00036d0d, + 0x00036e0d, 0x00036f0d, 0x0003700d, 0x0003710d, + 0x0003720d, 0x0003730d, 0x0003740d, 0x0003750d, + 0x0003760d, 0x0003770d, 0x0003780d, 0x0003790d, + 0x00037a0d, 0x00037b0d, 0x00037c0d, 0x00037d0d, + 0x00037e0d, 0x00037f0d, 0x0003800d, 0x0003810d, + 0x0003820d, 0x0003830d, 0x0003840d, 0x0003850d, + 0x0003860d, 0x0003870d, 0x0003880d, 0x0003890d, + 0x00038a0d, 0x00038b0d, 0x00038c0d, 0x00038d0d, + 0x00038e0d, 0x00038f0d, 0x0003900d, 0x0003910d, + 0x0003920d, 0x0003930d, 0x0003940d, 0x0003950d, + 0x0003960d, 0x0003970d, 0x0003980d, 0x0003990d, + 0x00039a0d, 0x00039b0d, 0x00039c0d, 0x00039d0d, + 0x00039e0d, 0x00039f0d, 0x0003a00d, 0x0003a10d, + 0x0003a20d, 0x0003a30d, 0x0003a40d, 0x0003a50d, + 0x0003a60d, 0x0003a70d, 0x0003a80d, 0x0003a90d, + 0x0003aa0d, 0x0003ab0d, 0x0003ac0d, 0x0003ad0d, + 0x0003ae0d, 0x0003af0d, 0x0003b00d, 0x0003b10d, + 0x0003b20d, 0x0003b30d, 0x0003b40d, 0x0003b50d, + 0x0003b60d, 0x0003b70d, 0x0003b80d, 0x0003b90d, + 0x0003ba0d, 0x0003bb0d, 0x0003bc0d, 0x0003bd0d, + 0x0003be0d, 0x0003bf0d, 0x0003c00d, 0x0003c10d, + 0x0003c20d, 0x0003c30d, 0x0003c40d, 0x0003c50d, + 0x0003c60d, 0x0003c70d, 0x0003c80d, 0x0003c90d, + 0x0003ca0d, 0x0003cb0d, 0x0003cc0d, 0x0003cd0d, + 0x0003ce0d, 0x0003cf0d, 0x0003d00d, 0x0003d10d, + 0x0003d20d, 0x0003d30d, 0x0003d40d, 0x0003d50d, + 0x0003d60d, 0x0003d70d, 0x0003d80d, 0x0003d90d, + 0x0003da0d, 0x0003db0d, 0x0003dc0d, 0x0003dd0d, + 0x0003de0d, 0x0003df0d, 0x0003e00d, 0x0003e10d, + 0x0003e20d, 0x0003e30d, 0x0003e40d, 0x0003e50d, + 0x0003e60d, 0x0003e70d, 0x0003e80d, 0x0003e90d, + 0x0003ea0d, 0x0003eb0d, 0x0003ec0d, 0x0003ed0d, + 0x0003ee0d, 0x0003ef0d, 0x0003f00d, 0x0003f10d, + 0x0003f20d, 0x0003f30d, 0x0003f40d, 0x0003f50d, + 0x0003f60d, 0x0003f70d, 0x0003f80d, 0x0003f90d, + 0x0003fa0d, 0x0003fb0d, 0x0003fc0d, 0x0003fd0d, + 0x0003fe0d, 0x0003ff0d, 0x000000ae, 0x000002ae, + 0x000004ae, 0x000006ae, 0x000008ae, 0x00000aae, + 0x00000cae, 0x00000eae, 0x000010ae, 0x000012ae, + 0x000014ae, 0x000016ae, 0x000018ae, 0x00001aae, + 0x00001cae, 0x00001eae, 0x000020ae, 0x000022ae, + 0x000024ae, 0x000026ae, 0x000028ae, 0x00002aae, + 0x00002cae, 0x00002eae, 0x000030ae, 0x000032ae, + 0x000034ae, 0x000036ae, 0x000038ae, 0x00003aae, + 0x00003cae, 0x00003eae, 0x000040ae, 0x000042ae, + 0x000044ae, 0x000046ae, 0x000048ae, 0x00004aae, + 0x00004cae, 0x00004eae, 0x000050ae, 0x000052ae, + 0x000054ae, 0x000056ae, 0x000058ae, 0x00005aae, + 0x00005cae, 0x00005eae, 0x000060ae, 0x000062ae, + 0x000064ae, 0x000066ae, 0x000068ae, 0x00006aae, + 0x00006cae, 0x00006eae, 0x000070ae, 0x000072ae, + 0x000074ae, 0x000076ae, 0x000078ae, 0x00007aae, + 0x00007cae, 0x00007eae, 0x000080ae, 0x000082ae, + 0x000084ae, 0x000086ae, 0x000088ae, 0x00008aae, + 0x00008cae, 0x00008eae, 0x000090ae, 0x000092ae, + 0x000094ae, 0x000096ae, 0x000098ae, 0x00009aae, + 0x00009cae, 0x00009eae, 0x0000a0ae, 0x0000a2ae, + 0x0000a4ae, 0x0000a6ae, 0x0000a8ae, 0x0000aaae, + 0x0000acae, 0x0000aeae, 0x0000b0ae, 0x0000b2ae, + 0x0000b4ae, 0x0000b6ae, 0x0000b8ae, 0x0000baae, + 0x0000bcae, 0x0000beae, 0x0000c0ae, 0x0000c2ae, + 0x0000c4ae, 0x0000c6ae, 0x0000c8ae, 0x0000caae, + 0x0000ccae, 0x0000ceae, 0x0000d0ae, 0x0000d2ae, + 0x0000d4ae, 0x0000d6ae, 0x0000d8ae, 0x0000daae, + 0x0000dcae, 0x0000deae, 0x0000e0ae, 0x0000e2ae, + 0x0000e4ae, 0x0000e6ae, 0x0000e8ae, 0x0000eaae, + 0x0000ecae, 0x0000eeae, 0x0000f0ae, 0x0000f2ae, + 0x0000f4ae, 0x0000f6ae, 0x0000f8ae, 0x0000faae, + 0x0000fcae, 0x0000feae, 0x000100ae, 0x000102ae, + 0x000104ae, 0x000106ae, 0x000108ae, 0x00010aae, + 0x00010cae, 0x00010eae, 0x000110ae, 0x000112ae, + 0x000114ae, 0x000116ae, 0x000118ae, 0x00011aae, + 0x00011cae, 0x00011eae, 0x000120ae, 0x000122ae, + 0x000124ae, 0x000126ae, 0x000128ae, 0x00012aae, + 0x00012cae, 0x00012eae, 0x000130ae, 0x000132ae, + 0x000134ae, 0x000136ae, 0x000138ae, 0x00013aae, + 0x00013cae, 0x00013eae, 0x000140ae, 0x000142ae, + 0x000144ae, 0x000146ae, 0x000148ae, 0x00014aae, + 0x00014cae, 0x00014eae, 0x000150ae, 0x000152ae, + 0x000154ae, 0x000156ae, 0x000158ae, 0x00015aae, + 0x00015cae, 0x00015eae, 0x000160ae, 0x000162ae, + 0x000164ae, 0x000166ae, 0x000168ae, 0x00016aae, + 0x00016cae, 0x00016eae, 0x000170ae, 0x000172ae, + 0x000174ae, 0x000176ae, 0x000178ae, 0x00017aae, + 0x00017cae, 0x00017eae, 0x000180ae, 0x000182ae, + 0x000184ae, 0x000186ae, 0x000188ae, 0x00018aae, + 0x00018cae, 0x00018eae, 0x000190ae, 0x000192ae, + 0x000194ae, 0x000196ae, 0x000198ae, 0x00019aae, + 0x00019cae, 0x00019eae, 0x0001a0ae, 0x0001a2ae, + 0x0001a4ae, 0x0001a6ae, 0x0001a8ae, 0x0001aaae, + 0x0001acae, 0x0001aeae, 0x0001b0ae, 0x0001b2ae, + 0x0001b4ae, 0x0001b6ae, 0x0001b8ae, 0x0001baae, + 0x0001bcae, 0x0001beae, 0x0001c0ae, 0x0001c2ae, + 0x0001c4ae, 0x0001c6ae, 0x0001c8ae, 0x0001caae, + 0x0001ccae, 0x0001ceae, 0x0001d0ae, 0x0001d2ae, + 0x0001d4ae, 0x0001d6ae, 0x0001d8ae, 0x0001daae, + 0x0001dcae, 0x0001deae, 0x0001e0ae, 0x0001e2ae, + 0x0001e4ae, 0x0001e6ae, 0x0001e8ae, 0x0001eaae, + 0x0001ecae, 0x0001eeae, 0x0001f0ae, 0x0001f2ae, + 0x0001f4ae, 0x0001f6ae, 0x0001f8ae, 0x0001faae, + 0x0001fcae, 0x0001feae, 0x000200ae, 0x000202ae, + 0x000204ae, 0x000206ae, 0x000208ae, 0x00020aae, + 0x00020cae, 0x00020eae, 0x000210ae, 0x000212ae, + 0x000214ae, 0x000216ae, 0x000218ae, 0x00021aae, + 0x00021cae, 0x00021eae, 0x000220ae, 0x000222ae, + 0x000224ae, 0x000226ae, 0x000228ae, 0x00022aae, + 0x00022cae, 0x00022eae, 0x000230ae, 0x000232ae, + 0x000234ae, 0x000236ae, 0x000238ae, 0x00023aae, + 0x00023cae, 0x00023eae, 0x000240ae, 0x000242ae, + 0x000244ae, 0x000246ae, 0x000248ae, 0x00024aae, + 0x00024cae, 0x00024eae, 0x000250ae, 0x000252ae, + 0x000254ae, 0x000256ae, 0x000258ae, 0x00025aae, + 0x00025cae, 0x00025eae, 0x000260ae, 0x000262ae, + 0x000264ae, 0x000266ae, 0x000268ae, 0x00026aae, + 0x00026cae, 0x00026eae, 0x000270ae, 0x000272ae, + 0x000274ae, 0x000276ae, 0x000278ae, 0x00027aae, + 0x00027cae, 0x00027eae, 0x000280ae, 0x000282ae, + 0x000284ae, 0x000286ae, 0x000288ae, 0x00028aae, + 0x00028cae, 0x00028eae, 0x000290ae, 0x000292ae, + 0x000294ae, 0x000296ae, 0x000298ae, 0x00029aae, + 0x00029cae, 0x00029eae, 0x0002a0ae, 0x0002a2ae, + 0x0002a4ae, 0x0002a6ae, 0x0002a8ae, 0x0002aaae, + 0x0002acae, 0x0002aeae, 0x0002b0ae, 0x0002b2ae, + 0x0002b4ae, 0x0002b6ae, 0x0002b8ae, 0x0002baae, + 0x0002bcae, 0x0002beae, 0x0002c0ae, 0x0002c2ae, + 0x0002c4ae, 0x0002c6ae, 0x0002c8ae, 0x0002caae, + 0x0002ccae, 0x0002ceae, 0x0002d0ae, 0x0002d2ae, + 0x0002d4ae, 0x0002d6ae, 0x0002d8ae, 0x0002daae, + 0x0002dcae, 0x0002deae, 0x0002e0ae, 0x0002e2ae, + 0x0002e4ae, 0x0002e6ae, 0x0002e8ae, 0x0002eaae, + 0x0002ecae, 0x0002eeae, 0x0002f0ae, 0x0002f2ae, + 0x0002f4ae, 0x0002f6ae, 0x0002f8ae, 0x0002faae, + 0x0002fcae, 0x0002feae, 0x000300ae, 0x000302ae, + 0x000304ae, 0x000306ae, 0x000308ae, 0x00030aae, + 0x00030cae, 0x00030eae, 0x000310ae, 0x000312ae, + 0x000314ae, 0x000316ae, 0x000318ae, 0x00031aae, + 0x00031cae, 0x00031eae, 0x000320ae, 0x000322ae, + 0x000324ae, 0x000326ae, 0x000328ae, 0x00032aae, + 0x00032cae, 0x00032eae, 0x000330ae, 0x000332ae, + 0x000334ae, 0x000336ae, 0x000338ae, 0x00033aae, + 0x00033cae, 0x00033eae, 0x000340ae, 0x000342ae, + 0x000344ae, 0x000346ae, 0x000348ae, 0x00034aae, + 0x00034cae, 0x00034eae, 0x000350ae, 0x000352ae, + 0x000354ae, 0x000356ae, 0x000358ae, 0x00035aae, + 0x00035cae, 0x00035eae, 0x000360ae, 0x000362ae, + 0x000364ae, 0x000366ae, 0x000368ae, 0x00036aae, + 0x00036cae, 0x00036eae, 0x000370ae, 0x000372ae, + 0x000374ae, 0x000376ae, 0x000378ae, 0x00037aae, + 0x00037cae, 0x00037eae, 0x000380ae, 0x000382ae, + 0x000384ae, 0x000386ae, 0x000388ae, 0x00038aae, + 0x00038cae, 0x00038eae, 0x000390ae, 0x000392ae, + 0x000394ae, 0x000396ae, 0x000398ae, 0x00039aae, + 0x00039cae, 0x00039eae, 0x0003a0ae, 0x0003a2ae, + 0x0003a4ae, 0x0003a6ae, 0x0003a8ae, 0x0003aaae, + 0x0003acae, 0x0003aeae, 0x0003b0ae, 0x0003b2ae, + 0x0003b4ae, 0x0003b6ae, 0x0003b8ae, 0x0003baae, + 0x0003bcae, 0x0003beae, 0x0003c0ae, 0x0003c2ae, + 0x0003c4ae, 0x0003c6ae, 0x0003c8ae, 0x0003caae, + 0x0003ccae, 0x0003ceae, 0x0003d0ae, 0x0003d2ae, + 0x0003d4ae, 0x0003d6ae, 0x0003d8ae, 0x0003daae, + 0x0003dcae, 0x0003deae, 0x0003e0ae, 0x0003e2ae, + 0x0003e4ae, 0x0003e6ae, 0x0003e8ae, 0x0003eaae, + 0x0003ecae, 0x0003eeae, 0x0003f0ae, 0x0003f2ae, + 0x0003f4ae, 0x0003f6ae, 0x0003f8ae, 0x0003faae, + 0x0003fcae, 0x0003feae, 0x000400ae, 0x000402ae, + 0x000404ae, 0x000406ae, 0x000408ae, 0x00040aae, + 0x00040cae, 0x00040eae, 0x000410ae, 0x000412ae, + 0x000414ae, 0x000416ae, 0x000418ae, 0x00041aae, + 0x00041cae, 0x00041eae, 0x000420ae, 0x000422ae, + 0x000424ae, 0x000426ae, 0x000428ae, 0x00042aae, + 0x00042cae, 0x00042eae, 0x000430ae, 0x000432ae, + 0x000434ae, 0x000436ae, 0x000438ae, 0x00043aae, + 0x00043cae, 0x00043eae, 0x000440ae, 0x000442ae, + 0x000444ae, 0x000446ae, 0x000448ae, 0x00044aae, + 0x00044cae, 0x00044eae, 0x000450ae, 0x000452ae, + 0x000454ae, 0x000456ae, 0x000458ae, 0x00045aae, + 0x00045cae, 0x00045eae, 0x000460ae, 0x000462ae, + 0x000464ae, 0x000466ae, 0x000468ae, 0x00046aae, + 0x00046cae, 0x00046eae, 0x000470ae, 0x000472ae, + 0x000474ae, 0x000476ae, 0x000478ae, 0x00047aae, + 0x00047cae, 0x00047eae, 0x000480ae, 0x000482ae, + 0x000484ae, 0x000486ae, 0x000488ae, 0x00048aae, + 0x00048cae, 0x00048eae, 0x000490ae, 0x000492ae, + 0x000494ae, 0x000496ae, 0x000498ae, 0x00049aae, + 0x00049cae, 0x00049eae, 0x0004a0ae, 0x0004a2ae, + 0x0004a4ae, 0x0004a6ae, 0x0004a8ae, 0x0004aaae, + 0x0004acae, 0x0004aeae, 0x0004b0ae, 0x0004b2ae, + 0x0004b4ae, 0x0004b6ae, 0x0004b8ae, 0x0004baae, + 0x0004bcae, 0x0004beae, 0x0004c0ae, 0x0004c2ae, + 0x0004c4ae, 0x0004c6ae, 0x0004c8ae, 0x0004caae, + 0x0004ccae, 0x0004ceae, 0x0004d0ae, 0x0004d2ae, + 0x0004d4ae, 0x0004d6ae, 0x0004d8ae, 0x0004daae, + 0x0004dcae, 0x0004deae, 0x0004e0ae, 0x0004e2ae, + 0x0004e4ae, 0x0004e6ae, 0x0004e8ae, 0x0004eaae, + 0x0004ecae, 0x0004eeae, 0x0004f0ae, 0x0004f2ae, + 0x0004f4ae, 0x0004f6ae, 0x0004f8ae, 0x0004faae, + 0x0004fcae, 0x0004feae, 0x000500ae, 0x000502ae, + 0x000504ae, 0x000506ae, 0x000508ae, 0x00050aae, + 0x00050cae, 0x00050eae, 0x000510ae, 0x000512ae, + 0x000514ae, 0x000516ae, 0x000518ae, 0x00051aae, + 0x00051cae, 0x00051eae, 0x000520ae, 0x000522ae, + 0x000524ae, 0x000526ae, 0x000528ae, 0x00052aae, + 0x00052cae, 0x00052eae, 0x000530ae, 0x000532ae, + 0x000534ae, 0x000536ae, 0x000538ae, 0x00053aae, + 0x00053cae, 0x00053eae, 0x000540ae, 0x000542ae, + 0x000544ae, 0x000546ae, 0x000548ae, 0x00054aae, + 0x00054cae, 0x00054eae, 0x000550ae, 0x000552ae, + 0x000554ae, 0x000556ae, 0x000558ae, 0x00055aae, + 0x00055cae, 0x00055eae, 0x000560ae, 0x000562ae, + 0x000564ae, 0x000566ae, 0x000568ae, 0x00056aae, + 0x00056cae, 0x00056eae, 0x000570ae, 0x000572ae, + 0x000574ae, 0x000576ae, 0x000578ae, 0x00057aae, + 0x00057cae, 0x00057eae, 0x000580ae, 0x000582ae, + 0x000584ae, 0x000586ae, 0x000588ae, 0x00058aae, + 0x00058cae, 0x00058eae, 0x000590ae, 0x000592ae, + 0x000594ae, 0x000596ae, 0x000598ae, 0x00059aae, + 0x00059cae, 0x00059eae, 0x0005a0ae, 0x0005a2ae, + 0x0005a4ae, 0x0005a6ae, 0x0005a8ae, 0x0005aaae, + 0x0005acae, 0x0005aeae, 0x0005b0ae, 0x0005b2ae, + 0x0005b4ae, 0x0005b6ae, 0x0005b8ae, 0x0005baae, + 0x0005bcae, 0x0005beae, 0x0005c0ae, 0x0005c2ae, + 0x0005c4ae, 0x0005c6ae, 0x0005c8ae, 0x0005caae, + 0x0005ccae, 0x0005ceae, 0x0005d0ae, 0x0005d2ae, + 0x0005d4ae, 0x0005d6ae, 0x0005d8ae, 0x0005daae, + 0x0005dcae, 0x0005deae, 0x0005e0ae, 0x0005e2ae, + 0x0005e4ae, 0x0005e6ae, 0x0005e8ae, 0x0005eaae, + 0x0005ecae, 0x0005eeae, 0x0005f0ae, 0x0005f2ae, + 0x0005f4ae, 0x0005f6ae, 0x0005f8ae, 0x0005faae, + 0x0005fcae, 0x0005feae, 0x000600ae, 0x000602ae, + 0x000604ae, 0x000606ae, 0x000608ae, 0x00060aae, + 0x00060cae, 0x00060eae, 0x000610ae, 0x000612ae, + 0x000614ae, 0x000616ae, 0x000618ae, 0x00061aae, + 0x00061cae, 0x00061eae, 0x000620ae, 0x000622ae, + 0x000624ae, 0x000626ae, 0x000628ae, 0x00062aae, + 0x00062cae, 0x00062eae, 0x000630ae, 0x000632ae, + 0x000634ae, 0x000636ae, 0x000638ae, 0x00063aae, + 0x00063cae, 0x00063eae, 0x000640ae, 0x000642ae, + 0x000644ae, 0x000646ae, 0x000648ae, 0x00064aae, + 0x00064cae, 0x00064eae, 0x000650ae, 0x000652ae, + 0x000654ae, 0x000656ae, 0x000658ae, 0x00065aae, + 0x00065cae, 0x00065eae, 0x000660ae, 0x000662ae, + 0x000664ae, 0x000666ae, 0x000668ae, 0x00066aae, + 0x00066cae, 0x00066eae, 0x000670ae, 0x000672ae, + 0x000674ae, 0x000676ae, 0x000678ae, 0x00067aae, + 0x00067cae, 0x00067eae, 0x000680ae, 0x000682ae, + 0x000684ae, 0x000686ae, 0x000688ae, 0x00068aae, + 0x00068cae, 0x00068eae, 0x000690ae, 0x000692ae, + 0x000694ae, 0x000696ae, 0x000698ae, 0x00069aae, + 0x00069cae, 0x00069eae, 0x0006a0ae, 0x0006a2ae, + 0x0006a4ae, 0x0006a6ae, 0x0006a8ae, 0x0006aaae, + 0x0006acae, 0x0006aeae, 0x0006b0ae, 0x0006b2ae, + 0x0006b4ae, 0x0006b6ae, 0x0006b8ae, 0x0006baae, + 0x0006bcae, 0x0006beae, 0x0006c0ae, 0x0006c2ae, + 0x0006c4ae, 0x0006c6ae, 0x0006c8ae, 0x0006caae, + 0x0006ccae, 0x0006ceae, 0x0006d0ae, 0x0006d2ae, + 0x0006d4ae, 0x0006d6ae, 0x0006d8ae, 0x0006daae, + 0x0006dcae, 0x0006deae, 0x0006e0ae, 0x0006e2ae, + 0x0006e4ae, 0x0006e6ae, 0x0006e8ae, 0x0006eaae, + 0x0006ecae, 0x0006eeae, 0x0006f0ae, 0x0006f2ae, + 0x0006f4ae, 0x0006f6ae, 0x0006f8ae, 0x0006faae, + 0x0006fcae, 0x0006feae, 0x000700ae, 0x000702ae, + 0x000704ae, 0x000706ae, 0x000708ae, 0x00070aae, + 0x00070cae, 0x00070eae, 0x000710ae, 0x000712ae, + 0x000714ae, 0x000716ae, 0x000718ae, 0x00071aae, + 0x00071cae, 0x00071eae, 0x000720ae, 0x000722ae, + 0x000724ae, 0x000726ae, 0x000728ae, 0x00072aae, + 0x00072cae, 0x00072eae, 0x000730ae, 0x000732ae, + 0x000734ae, 0x000736ae, 0x000738ae, 0x00073aae, + 0x00073cae, 0x00073eae, 0x000740ae, 0x000742ae, + 0x000744ae, 0x000746ae, 0x000748ae, 0x00074aae, + 0x00074cae, 0x00074eae, 0x000750ae, 0x000752ae, + 0x000754ae, 0x000756ae, 0x000758ae, 0x00075aae, + 0x00075cae, 0x00075eae, 0x000760ae, 0x000762ae, + 0x000764ae, 0x000766ae, 0x000768ae, 0x00076aae, + 0x00076cae, 0x00076eae, 0x000770ae, 0x000772ae, + 0x000774ae, 0x000776ae, 0x000778ae, 0x00077aae, + 0x00077cae, 0x00077eae, 0x000780ae, 0x000782ae, + 0x000784ae, 0x000786ae, 0x000788ae, 0x00078aae, + 0x00078cae, 0x00078eae, 0x000790ae, 0x000792ae, + 0x000794ae, 0x000796ae, 0x000798ae, 0x00079aae, + 0x00079cae, 0x00079eae, 0x0007a0ae, 0x0007a2ae, + 0x0007a4ae, 0x0007a6ae, 0x0007a8ae, 0x0007aaae, + 0x0007acae, 0x0007aeae, 0x0007b0ae, 0x0007b2ae, + 0x0007b4ae, 0x0007b6ae, 0x0007b8ae, 0x0007baae, + 0x0007bcae, 0x0007beae, 0x0007c0ae, 0x0007c2ae, + 0x0007c4ae, 0x0007c6ae, 0x0007c8ae, 0x0007caae, + 0x0007ccae, 0x0007ceae, 0x0007d0ae, 0x0007d2ae, + 0x0007d4ae, 0x0007d6ae, 0x0007d8ae, 0x0007daae, + 0x0007dcae, 0x0007deae, 0x0007e0ae, 0x0007e2ae, + 0x0007e4ae, 0x0007e6ae, 0x0007e8ae, 0x0007eaae, + 0x0007ecae, 0x0007eeae, 0x0007f0ae, 0x0007f2ae, + 0x0007f4ae, 0x0007f6ae, 0x0007f8ae, 0x0007faae, + 0x0007fcae, 0x0007feae, 0x000001af, 0x000003af, + 0x000005af, 0x000007af, 0x000009af, 0x00000baf, + 0x00000daf, 0x00000faf, 0x000011af, 0x000013af, + 0x000015af, 0x000017af, 0x000019af, 0x00001baf, + 0x00001daf, 0x00001faf, 0x000021af, 0x000023af, + 0x000025af, 0x000027af, 0x000029af, 0x00002baf, + 0x00002daf, 0x00002faf, 0x000031af, 0x000033af, + 0x000035af, 0x000037af, 0x000039af, 0x00003baf, + 0x00003daf, 0x00003faf, 0x000041af, 0x000043af, + 0x000045af, 0x000047af, 0x000049af, 0x00004baf, + 0x00004daf, 0x00004faf, 0x000051af, 0x000053af, + 0x000055af, 0x000057af, 0x000059af, 0x00005baf, + 0x00005daf, 0x00005faf, 0x000061af, 0x000063af, + 0x000065af, 0x000067af, 0x000069af, 0x00006baf, + 0x00006daf, 0x00006faf, 0x000071af, 0x000073af, + 0x000075af, 0x000077af, 0x000079af, 0x00007baf, + 0x00007daf, 0x00007faf, 0x000081af, 0x000083af, + 0x000085af, 0x000087af, 0x000089af, 0x00008baf, + 0x00008daf, 0x00008faf, 0x000091af, 0x000093af, + 0x000095af, 0x000097af, 0x000099af, 0x00009baf, + 0x00009daf, 0x00009faf, 0x0000a1af, 0x0000a3af, + 0x0000a5af, 0x0000a7af, 0x0000a9af, 0x0000abaf, + 0x0000adaf, 0x0000afaf, 0x0000b1af, 0x0000b3af, + 0x0000b5af, 0x0000b7af, 0x0000b9af, 0x0000bbaf, + 0x0000bdaf, 0x0000bfaf, 0x0000c1af, 0x0000c3af, + 0x0000c5af, 0x0000c7af, 0x0000c9af, 0x0000cbaf, + 0x0000cdaf, 0x0000cfaf, 0x0000d1af, 0x0000d3af, + 0x0000d5af, 0x0000d7af, 0x0000d9af, 0x0000dbaf, + 0x0000ddaf, 0x0000dfaf, 0x0000e1af, 0x0000e3af, + 0x0000e5af, 0x0000e7af, 0x0000e9af, 0x0000ebaf, + 0x0000edaf, 0x0000efaf, 0x0000f1af, 0x0000f3af, + 0x0000f5af, 0x0000f7af, 0x0000f9af, 0x0000fbaf, + 0x0000fdaf, 0x0000ffaf, 0x000101af, 0x000103af, + 0x000105af, 0x000107af, 0x000109af, 0x00010baf, + 0x00010daf, 0x00010faf, 0x000111af, 0x000113af, + 0x000115af, 0x000117af, 0x000119af, 0x00011baf, + 0x00011daf, 0x00011faf, 0x000121af, 0x000123af, + 0x000125af, 0x000127af, 0x000129af, 0x00012baf, + 0x00012daf, 0x00012faf, 0x000131af, 0x000133af, + 0x000135af, 0x000137af, 0x000139af, 0x00013baf, + 0x00013daf, 0x00013faf, 0x000141af, 0x000143af, + 0x000145af, 0x000147af, 0x000149af, 0x00014baf, + 0x00014daf, 0x00014faf, 0x000151af, 0x000153af, + 0x000155af, 0x000157af, 0x000159af, 0x00015baf, + 0x00015daf, 0x00015faf, 0x000161af, 0x000163af, + 0x000165af, 0x000167af, 0x000169af, 0x00016baf, + 0x00016daf, 0x00016faf, 0x000171af, 0x000173af, + 0x000175af, 0x000177af, 0x000179af, 0x00017baf, + 0x00017daf, 0x00017faf, 0x000181af, 0x000183af, + 0x000185af, 0x000187af, 0x000189af, 0x00018baf, + 0x00018daf, 0x00018faf, 0x000191af, 0x000193af, + 0x000195af, 0x000197af, 0x000199af, 0x00019baf, + 0x00019daf, 0x00019faf, 0x0001a1af, 0x0001a3af, + 0x0001a5af, 0x0001a7af, 0x0001a9af, 0x0001abaf, + 0x0001adaf, 0x0001afaf, 0x0001b1af, 0x0001b3af, + 0x0001b5af, 0x0001b7af, 0x0001b9af, 0x0001bbaf, + 0x0001bdaf, 0x0001bfaf, 0x0001c1af, 0x0001c3af, + 0x0001c5af, 0x0001c7af, 0x0001c9af, 0x0001cbaf, + 0x0001cdaf, 0x0001cfaf, 0x0001d1af, 0x0001d3af, + 0x0001d5af, 0x0001d7af, 0x0001d9af, 0x0001dbaf, + 0x0001ddaf, 0x0001dfaf, 0x0001e1af, 0x0001e3af, + 0x0001e5af, 0x0001e7af, 0x0001e9af, 0x0001ebaf, + 0x0001edaf, 0x0001efaf, 0x0001f1af, 0x0001f3af, + 0x0001f5af, 0x0001f7af, 0x0001f9af, 0x0001fbaf, + 0x0001fdaf, 0x0001ffaf, 0x000201af, 0x000203af, + 0x000205af, 0x000207af, 0x000209af, 0x00020baf, + 0x00020daf, 0x00020faf, 0x000211af, 0x000213af, + 0x000215af, 0x000217af, 0x000219af, 0x00021baf, + 0x00021daf, 0x00021faf, 0x000221af, 0x000223af, + 0x000225af, 0x000227af, 0x000229af, 0x00022baf, + 0x00022daf, 0x00022faf, 0x000231af, 0x000233af, + 0x000235af, 0x000237af, 0x000239af, 0x00023baf, + 0x00023daf, 0x00023faf, 0x000241af, 0x000243af, + 0x000245af, 0x000247af, 0x000249af, 0x00024baf, + 0x00024daf, 0x00024faf, 0x000251af, 0x000253af, + 0x000255af, 0x000257af, 0x000259af, 0x00025baf, + 0x00025daf, 0x00025faf, 0x000261af, 0x000263af, + 0x000265af, 0x000267af, 0x000269af, 0x00026baf, + 0x00026daf, 0x00026faf, 0x000271af, 0x000273af, + 0x000275af, 0x000277af, 0x000279af, 0x00027baf, + 0x00027daf, 0x00027faf, 0x000281af, 0x000283af, + 0x000285af, 0x000287af, 0x000289af, 0x00028baf, + 0x00028daf, 0x00028faf, 0x000291af, 0x000293af, + 0x000295af, 0x000297af, 0x000299af, 0x00029baf, + 0x00029daf, 0x00029faf, 0x0002a1af, 0x0002a3af, + 0x0002a5af, 0x0002a7af, 0x0002a9af, 0x0002abaf, + 0x0002adaf, 0x0002afaf, 0x0002b1af, 0x0002b3af, + 0x0002b5af, 0x0002b7af, 0x0002b9af, 0x0002bbaf, + 0x0002bdaf, 0x0002bfaf, 0x0002c1af, 0x0002c3af, + 0x0002c5af, 0x0002c7af, 0x0002c9af, 0x0002cbaf, + 0x0002cdaf, 0x0002cfaf, 0x0002d1af, 0x0002d3af, + 0x0002d5af, 0x0002d7af, 0x0002d9af, 0x0002dbaf, + 0x0002ddaf, 0x0002dfaf, 0x0002e1af, 0x0002e3af, + 0x0002e5af, 0x0002e7af, 0x0002e9af, 0x0002ebaf, + 0x0002edaf, 0x0002efaf, 0x0002f1af, 0x0002f3af, + 0x0002f5af, 0x0002f7af, 0x0002f9af, 0x0002fbaf, + 0x0002fdaf, 0x0002ffaf, 0x000301af, 0x000303af, + 0x000305af, 0x000307af, 0x000309af, 0x00030baf, + 0x00030daf, 0x00030faf, 0x000311af, 0x000313af, + 0x000315af, 0x000317af, 0x000319af, 0x00031baf, + 0x00031daf, 0x00031faf, 0x000321af, 0x000323af, + 0x000325af, 0x000327af, 0x000329af, 0x00032baf, + 0x00032daf, 0x00032faf, 0x000331af, 0x000333af, + 0x000335af, 0x000337af, 0x000339af, 0x00033baf, + 0x00033daf, 0x00033faf, 0x000341af, 0x000343af, + 0x000345af, 0x000347af, 0x000349af, 0x00034baf, + 0x00034daf, 0x00034faf, 0x000351af, 0x000353af, + 0x000355af, 0x000357af, 0x000359af, 0x00035baf, + 0x00035daf, 0x00035faf, 0x000361af, 0x000363af, + 0x000365af, 0x000367af, 0x000369af, 0x00036baf, + 0x00036daf, 0x00036faf, 0x000371af, 0x000373af, + 0x000375af, 0x000377af, 0x000379af, 0x00037baf, + 0x00037daf, 0x00037faf, 0x000381af, 0x000383af, + 0x000385af, 0x000387af, 0x000389af, 0x00038baf, + 0x00038daf, 0x00038faf, 0x000391af, 0x000393af, + 0x000395af, 0x000397af, 0x000399af, 0x00039baf, + 0x00039daf, 0x00039faf, 0x0003a1af, 0x0003a3af, + 0x0003a5af, 0x0003a7af, 0x0003a9af, 0x0003abaf, + 0x0003adaf, 0x0003afaf, 0x0003b1af, 0x0003b3af, + 0x0003b5af, 0x0003b7af, 0x0003b9af, 0x0003bbaf, + 0x0003bdaf, 0x0003bfaf, 0x0003c1af, 0x0003c3af, + 0x0003c5af, 0x0003c7af, 0x0003c9af, 0x0003cbaf, + 0x0003cdaf, 0x0003cfaf, 0x0003d1af, 0x0003d3af, + 0x0003d5af, 0x0003d7af, 0x0003d9af, 0x0003dbaf, + 0x0003ddaf, 0x0003dfaf, 0x0003e1af, 0x0003e3af, + 0x0003e5af, 0x0003e7af, 0x0003e9af, 0x0003ebaf, + 0x0003edaf, 0x0003efaf, 0x0003f1af, 0x0003f3af, + 0x0003f5af, 0x0003f7af, 0x0003f9af, 0x0003fbaf, + 0x0003fdaf, 0x0003ffaf, 0x000401af, 0x000403af, + 0x000405af, 0x000407af, 0x000409af, 0x00040baf, + 0x00040daf, 0x00040faf, 0x000411af, 0x000413af, + 0x000415af, 0x000417af, 0x000419af, 0x00041baf, + 0x00041daf, 0x00041faf, 0x000421af, 0x000423af, + 0x000425af, 0x000427af, 0x000429af, 0x00042baf, + 0x00042daf, 0x00042faf, 0x000431af, 0x000433af, + 0x000435af, 0x000437af, 0x000439af, 0x00043baf, + 0x00043daf, 0x00043faf, 0x000441af, 0x000443af, + 0x000445af, 0x000447af, 0x000449af, 0x00044baf, + 0x00044daf, 0x00044faf, 0x000451af, 0x000453af, + 0x000455af, 0x000457af, 0x000459af, 0x00045baf, + 0x00045daf, 0x00045faf, 0x000461af, 0x000463af, + 0x000465af, 0x000467af, 0x000469af, 0x00046baf, + 0x00046daf, 0x00046faf, 0x000471af, 0x000473af, + 0x000475af, 0x000477af, 0x000479af, 0x00047baf, + 0x00047daf, 0x00047faf, 0x000481af, 0x000483af, + 0x000485af, 0x000487af, 0x000489af, 0x00048baf, + 0x00048daf, 0x00048faf, 0x000491af, 0x000493af, + 0x000495af, 0x000497af, 0x000499af, 0x00049baf, + 0x00049daf, 0x00049faf, 0x0004a1af, 0x0004a3af, + 0x0004a5af, 0x0004a7af, 0x0004a9af, 0x0004abaf, + 0x0004adaf, 0x0004afaf, 0x0004b1af, 0x0004b3af, + 0x0004b5af, 0x0004b7af, 0x0004b9af, 0x0004bbaf, + 0x0004bdaf, 0x0004bfaf, 0x0004c1af, 0x0004c3af, + 0x0004c5af, 0x0004c7af, 0x0004c9af, 0x0004cbaf, + 0x0004cdaf, 0x0004cfaf, 0x0004d1af, 0x0004d3af, + 0x0004d5af, 0x0004d7af, 0x0004d9af, 0x0004dbaf, + 0x0004ddaf, 0x0004dfaf, 0x0004e1af, 0x0004e3af, + 0x0004e5af, 0x0004e7af, 0x0004e9af, 0x0004ebaf, + 0x0004edaf, 0x0004efaf, 0x0004f1af, 0x0004f3af, + 0x0004f5af, 0x0004f7af, 0x0004f9af, 0x0004fbaf, + 0x0004fdaf, 0x0004ffaf, 0x000501af, 0x000503af, + 0x000505af, 0x000507af, 0x000509af, 0x00050baf, + 0x00050daf, 0x00050faf, 0x000511af, 0x000513af, + 0x000515af, 0x000517af, 0x000519af, 0x00051baf, + 0x00051daf, 0x00051faf, 0x000521af, 0x000523af, + 0x000525af, 0x000527af, 0x000529af, 0x00052baf, + 0x00052daf, 0x00052faf, 0x000531af, 0x000533af, + 0x000535af, 0x000537af, 0x000539af, 0x00053baf, + 0x00053daf, 0x00053faf, 0x000541af, 0x000543af, + 0x000545af, 0x000547af, 0x000549af, 0x00054baf, + 0x00054daf, 0x00054faf, 0x000551af, 0x000553af, + 0x000555af, 0x000557af, 0x000559af, 0x00055baf, + 0x00055daf, 0x00055faf, 0x000561af, 0x000563af, + 0x000565af, 0x000567af, 0x000569af, 0x00056baf, + 0x00056daf, 0x00056faf, 0x000571af, 0x000573af, + 0x000575af, 0x000577af, 0x000579af, 0x00057baf, + 0x00057daf, 0x00057faf, 0x000581af, 0x000583af, + 0x000585af, 0x000587af, 0x000589af, 0x00058baf, + 0x00058daf, 0x00058faf, 0x000591af, 0x000593af, + 0x000595af, 0x000597af, 0x000599af, 0x00059baf, + 0x00059daf, 0x00059faf, 0x0005a1af, 0x0005a3af, + 0x0005a5af, 0x0005a7af, 0x0005a9af, 0x0005abaf, + 0x0005adaf, 0x0005afaf, 0x0005b1af, 0x0005b3af, + 0x0005b5af, 0x0005b7af, 0x0005b9af, 0x0005bbaf, + 0x0005bdaf, 0x0005bfaf, 0x0005c1af, 0x0005c3af, + 0x0005c5af, 0x0005c7af, 0x0005c9af, 0x0005cbaf, + 0x0005cdaf, 0x0005cfaf, 0x0005d1af, 0x0005d3af, + 0x0005d5af, 0x0005d7af, 0x0005d9af, 0x0005dbaf, + 0x0005ddaf, 0x0005dfaf, 0x0005e1af, 0x0005e3af, + 0x0005e5af, 0x0005e7af, 0x0005e9af, 0x0005ebaf, + 0x0005edaf, 0x0005efaf, 0x0005f1af, 0x0005f3af, + 0x0005f5af, 0x0005f7af, 0x0005f9af, 0x0005fbaf, + 0x0005fdaf, 0x0005ffaf, 0x000601af, 0x000603af, + 0x000605af, 0x000607af, 0x000609af, 0x00060baf, + 0x00060daf, 0x00060faf, 0x000611af, 0x000613af, + 0x000615af, 0x000617af, 0x000619af, 0x00061baf, + 0x00061daf, 0x00061faf, 0x000621af, 0x000623af, + 0x000625af, 0x000627af, 0x000629af, 0x00062baf, + 0x00062daf, 0x00062faf, 0x000631af, 0x000633af, + 0x000635af, 0x000637af, 0x000639af, 0x00063baf, + 0x00063daf, 0x00063faf, 0x000641af, 0x000643af, + 0x000645af, 0x000647af, 0x000649af, 0x00064baf, + 0x00064daf, 0x00064faf, 0x000651af, 0x000653af, + 0x000655af, 0x000657af, 0x000659af, 0x00065baf, + 0x00065daf, 0x00065faf, 0x000661af, 0x000663af, + 0x000665af, 0x000667af, 0x000669af, 0x00066baf, + 0x00066daf, 0x00066faf, 0x000671af, 0x000673af, + 0x000675af, 0x000677af, 0x000679af, 0x00067baf, + 0x00067daf, 0x00067faf, 0x000681af, 0x000683af, + 0x000685af, 0x000687af, 0x000689af, 0x00068baf, + 0x00068daf, 0x00068faf, 0x000691af, 0x000693af, + 0x000695af, 0x000697af, 0x000699af, 0x00069baf, + 0x00069daf, 0x00069faf, 0x0006a1af, 0x0006a3af, + 0x0006a5af, 0x0006a7af, 0x0006a9af, 0x0006abaf, + 0x0006adaf, 0x0006afaf, 0x0006b1af, 0x0006b3af, + 0x0006b5af, 0x0006b7af, 0x0006b9af, 0x0006bbaf, + 0x0006bdaf, 0x0006bfaf, 0x0006c1af, 0x0006c3af, + 0x0006c5af, 0x0006c7af, 0x0006c9af, 0x0006cbaf, + 0x0006cdaf, 0x0006cfaf, 0x0006d1af, 0x0006d3af, + 0x0006d5af, 0x0006d7af, 0x0006d9af, 0x0006dbaf, + 0x0006ddaf, 0x0006dfaf, 0x0006e1af, 0x0006e3af, + 0x0006e5af, 0x0006e7af, 0x0006e9af, 0x0006ebaf, + 0x0006edaf, 0x0006efaf, 0x0006f1af, 0x0006f3af, + 0x0006f5af, 0x0006f7af, 0x0006f9af, 0x0006fbaf, + 0x0006fdaf, 0x0006ffaf, 0x000701af, 0x000703af, + 0x000705af, 0x000707af, 0x000709af, 0x00070baf, + 0x00070daf, 0x00070faf, 0x000711af, 0x000713af, + 0x000715af, 0x000717af, 0x000719af, 0x00071baf, + 0x00071daf, 0x00071faf, 0x000721af, 0x000723af, + 0x000725af, 0x000727af, 0x000729af, 0x00072baf, + 0x00072daf, 0x00072faf, 0x000731af, 0x000733af, + 0x000735af, 0x000737af, 0x000739af, 0x00073baf, + 0x00073daf, 0x00073faf, 0x000741af, 0x000743af, + 0x000745af, 0x000747af, 0x000749af, 0x00074baf, + 0x00074daf, 0x00074faf, 0x000751af, 0x000753af, + 0x000755af, 0x000757af, 0x000759af, 0x00075baf, + 0x00075daf, 0x00075faf, 0x000761af, 0x000763af, + 0x000765af, 0x000767af, 0x000769af, 0x00076baf, + 0x00076daf, 0x00076faf, 0x000771af, 0x000773af, + 0x000775af, 0x000777af, 0x000779af, 0x00077baf, + 0x00077daf, 0x00077faf, 0x000781af, 0x000783af, + 0x000785af, 0x000787af, 0x000789af, 0x00078baf, + 0x00078daf, 0x00078faf, 0x000791af, 0x000793af, + 0x000795af, 0x000797af, 0x000799af, 0x00079baf, + 0x00079daf, 0x00079faf, 0x0007a1af, 0x0007a3af, + 0x0007a5af, 0x0007a7af, 0x0007a9af, 0x0007abaf, + 0x0007adaf, 0x0007afaf, 0x0007b1af, 0x0007b3af, + 0x0007b5af, 0x0007b7af, 0x0007b9af, 0x0007bbaf, + 0x0007bdaf, 0x0007bfaf, 0x0007c1af, 0x0007c3af, + 0x0007c5af, 0x0007c7af, 0x0007c9af, 0x0007cbaf, + 0x0007cdaf, 0x0007cfaf, 0x0007d1af, 0x0007d3af, + 0x0007d5af, 0x0007d7af, 0x0007d9af, 0x0007dbaf, + 0x0007ddaf, 0x0007dfaf, 0x0007e1af, 0x0007e3af, + 0x0007e5af, 0x0007e7af, 0x0007e9af, 0x0007ebaf, + 0x0007edaf, 0x0007efaf, 0x0007f1af, 0x0007f3af, + 0x0007f5af, 0x0007f7af, 0x0007f9af, 0x0007fbaf, + 0x0007fdaf, 0x0007ffaf, 0x000801af, 0x000803af, + 0x000805af, 0x000807af, 0x000809af, 0x00080baf, + 0x00080daf, 0x00080faf, 0x000811af, 0x000813af, + 0x000815af, 0x000817af, 0x000819af, 0x00081baf, + 0x00081daf, 0x00081faf, 0x000821af, 0x000823af, + 0x000825af, 0x000827af, 0x000829af, 0x00082baf, + 0x00082daf, 0x00082faf, 0x000831af, 0x000833af, + 0x000835af, 0x000837af, 0x000839af, 0x00083baf, + 0x00083daf, 0x00083faf, 0x000841af, 0x000843af, + 0x000845af, 0x000847af, 0x000849af, 0x00084baf, + 0x00084daf, 0x00084faf, 0x000851af, 0x000853af, + 0x000855af, 0x000857af, 0x000859af, 0x00085baf, + 0x00085daf, 0x00085faf, 0x000861af, 0x000863af, + 0x000865af, 0x000867af, 0x000869af, 0x00086baf, + 0x00086daf, 0x00086faf, 0x000871af, 0x000873af, + 0x000875af, 0x000877af, 0x000879af, 0x00087baf, + 0x00087daf, 0x00087faf, 0x000881af, 0x000883af, + 0x000885af, 0x000887af, 0x000889af, 0x00088baf, + 0x00088daf, 0x00088faf, 0x000891af, 0x000893af, + 0x000895af, 0x000897af, 0x000899af, 0x00089baf, + 0x00089daf, 0x00089faf, 0x0008a1af, 0x0008a3af, + 0x0008a5af, 0x0008a7af, 0x0008a9af, 0x0008abaf, + 0x0008adaf, 0x0008afaf, 0x0008b1af, 0x0008b3af, + 0x0008b5af, 0x0008b7af, 0x0008b9af, 0x0008bbaf, + 0x0008bdaf, 0x0008bfaf, 0x0008c1af, 0x0008c3af, + 0x0008c5af, 0x0008c7af, 0x0008c9af, 0x0008cbaf, + 0x0008cdaf, 0x0008cfaf, 0x0008d1af, 0x0008d3af, + 0x0008d5af, 0x0008d7af, 0x0008d9af, 0x0008dbaf, + 0x0008ddaf, 0x0008dfaf, 0x0008e1af, 0x0008e3af, + 0x0008e5af, 0x0008e7af, 0x0008e9af, 0x0008ebaf, + 0x0008edaf, 0x0008efaf, 0x0008f1af, 0x0008f3af, + 0x0008f5af, 0x0008f7af, 0x0008f9af, 0x0008fbaf, + 0x0008fdaf, 0x0008ffaf, 0x000901af, 0x000903af, + 0x000905af, 0x000907af, 0x000909af, 0x00090baf, + 0x00090daf, 0x00090faf, 0x000911af, 0x000913af, + 0x000915af, 0x000917af, 0x000919af, 0x00091baf, + 0x00091daf, 0x00091faf, 0x000921af, 0x000923af, + 0x000925af, 0x000927af, 0x000929af, 0x00092baf, + 0x00092daf, 0x00092faf, 0x000931af, 0x000933af, + 0x000935af, 0x000937af, 0x000939af, 0x00093baf, + 0x00093daf, 0x00093faf, 0x000941af, 0x000943af, + 0x000945af, 0x000947af, 0x000949af, 0x00094baf, + 0x00094daf, 0x00094faf, 0x000951af, 0x000953af, + 0x000955af, 0x000957af, 0x000959af, 0x00095baf, + 0x00095daf, 0x00095faf, 0x000961af, 0x000963af, + 0x000965af, 0x000967af, 0x000969af, 0x00096baf, + 0x00096daf, 0x00096faf, 0x000971af, 0x000973af, + 0x000975af, 0x000977af, 0x000979af, 0x00097baf, + 0x00097daf, 0x00097faf, 0x000981af, 0x000983af, + 0x000985af, 0x000987af, 0x000989af, 0x00098baf, + 0x00098daf, 0x00098faf, 0x000991af, 0x000993af, + 0x000995af, 0x000997af, 0x000999af, 0x00099baf, + 0x00099daf, 0x00099faf, 0x0009a1af, 0x0009a3af, + 0x0009a5af, 0x0009a7af, 0x0009a9af, 0x0009abaf, + 0x0009adaf, 0x0009afaf, 0x0009b1af, 0x0009b3af, + 0x0009b5af, 0x0009b7af, 0x0009b9af, 0x0009bbaf, + 0x0009bdaf, 0x0009bfaf, 0x0009c1af, 0x0009c3af, + 0x0009c5af, 0x0009c7af, 0x0009c9af, 0x0009cbaf, + 0x0009cdaf, 0x0009cfaf, 0x0009d1af, 0x0009d3af, + 0x0009d5af, 0x0009d7af, 0x0009d9af, 0x0009dbaf, + 0x0009ddaf, 0x0009dfaf, 0x0009e1af, 0x0009e3af, + 0x0009e5af, 0x0009e7af, 0x0009e9af, 0x0009ebaf, + 0x0009edaf, 0x0009efaf, 0x0009f1af, 0x0009f3af, + 0x0009f5af, 0x0009f7af, 0x0009f9af, 0x0009fbaf, + 0x0009fdaf, 0x0009ffaf, 0x000a01af, 0x000a03af, + 0x000a05af, 0x000a07af, 0x000a09af, 0x000a0baf, + 0x000a0daf, 0x000a0faf, 0x000a11af, 0x000a13af, + 0x000a15af, 0x000a17af, 0x000a19af, 0x000a1baf, + 0x000a1daf, 0x000a1faf, 0x000a21af, 0x000a23af, + 0x000a25af, 0x000a27af, 0x000a29af, 0x000a2baf, + 0x000a2daf, 0x000a2faf, 0x000a31af, 0x000a33af, + 0x000a35af, 0x000a37af, 0x000a39af, 0x000a3baf, + 0x000a3daf, 0x000a3faf, 0x000a41af, 0x000a43af, + 0x000a45af, 0x000a47af, 0x000a49af, 0x000a4baf, + 0x000a4daf, 0x000a4faf, 0x000a51af, 0x000a53af, + 0x000a55af, 0x000a57af, 0x000a59af, 0x000a5baf, + 0x000a5daf, 0x000a5faf, 0x000a61af, 0x000a63af, + 0x000a65af, 0x000a67af, 0x000a69af, 0x000a6baf, + 0x000a6daf, 0x000a6faf, 0x000a71af, 0x000a73af, + 0x000a75af, 0x000a77af, 0x000a79af, 0x000a7baf, + 0x000a7daf, 0x000a7faf, 0x000a81af, 0x000a83af, + 0x000a85af, 0x000a87af, 0x000a89af, 0x000a8baf, + 0x000a8daf, 0x000a8faf, 0x000a91af, 0x000a93af, + 0x000a95af, 0x000a97af, 0x000a99af, 0x000a9baf, + 0x000a9daf, 0x000a9faf, 0x000aa1af, 0x000aa3af, + 0x000aa5af, 0x000aa7af, 0x000aa9af, 0x000aabaf, + 0x000aadaf, 0x000aafaf, 0x000ab1af, 0x000ab3af, + 0x000ab5af, 0x000ab7af, 0x000ab9af, 0x000abbaf, + 0x000abdaf, 0x000abfaf, 0x000ac1af, 0x000ac3af, + 0x000ac5af, 0x000ac7af, 0x000ac9af, 0x000acbaf, + 0x000acdaf, 0x000acfaf, 0x000ad1af, 0x000ad3af, + 0x000ad5af, 0x000ad7af, 0x000ad9af, 0x000adbaf, + 0x000addaf, 0x000adfaf, 0x000ae1af, 0x000ae3af, + 0x000ae5af, 0x000ae7af, 0x000ae9af, 0x000aebaf, + 0x000aedaf, 0x000aefaf, 0x000af1af, 0x000af3af, + 0x000af5af, 0x000af7af, 0x000af9af, 0x000afbaf, + 0x000afdaf, 0x000affaf, 0x000b01af, 0x000b03af, + 0x000b05af, 0x000b07af, 0x000b09af, 0x000b0baf, + 0x000b0daf, 0x000b0faf, 0x000b11af, 0x000b13af, + 0x000b15af, 0x000b17af, 0x000b19af, 0x000b1baf, + 0x000b1daf, 0x000b1faf, 0x000b21af, 0x000b23af, + 0x000b25af, 0x000b27af, 0x000b29af, 0x000b2baf, + 0x000b2daf, 0x000b2faf, 0x000b31af, 0x000b33af, + 0x000b35af, 0x000b37af, 0x000b39af, 0x000b3baf, + 0x000b3daf, 0x000b3faf, 0x000b41af, 0x000b43af, + 0x000b45af, 0x000b47af, 0x000b49af, 0x000b4baf, + 0x000b4daf, 0x000b4faf, 0x000b51af, 0x000b53af, + 0x000b55af, 0x000b57af, 0x000b59af, 0x000b5baf, + 0x000b5daf, 0x000b5faf, 0x000b61af, 0x000b63af, + 0x000b65af, 0x000b67af, 0x000b69af, 0x000b6baf, + 0x000b6daf, 0x000b6faf, 0x000b71af, 0x000b73af, + 0x000b75af, 0x000b77af, 0x000b79af, 0x000b7baf, + 0x000b7daf, 0x000b7faf, 0x000b81af, 0x000b83af, + 0x000b85af, 0x000b87af, 0x000b89af, 0x000b8baf, + 0x000b8daf, 0x000b8faf, 0x000b91af, 0x000b93af, + 0x000b95af, 0x000b97af, 0x000b99af, 0x000b9baf, + 0x000b9daf, 0x000b9faf, 0x000ba1af, 0x000ba3af, + 0x000ba5af, 0x000ba7af, 0x000ba9af, 0x000babaf, + 0x000badaf, 0x000bafaf, 0x000bb1af, 0x000bb3af, + 0x000bb5af, 0x000bb7af, 0x000bb9af, 0x000bbbaf, + 0x000bbdaf, 0x000bbfaf, 0x000bc1af, 0x000bc3af, + 0x000bc5af, 0x000bc7af, 0x000bc9af, 0x000bcbaf, + 0x000bcdaf, 0x000bcfaf, 0x000bd1af, 0x000bd3af, + 0x000bd5af, 0x000bd7af, 0x000bd9af, 0x000bdbaf, + 0x000bddaf, 0x000bdfaf, 0x000be1af, 0x000be3af, + 0x000be5af, 0x000be7af, 0x000be9af, 0x000bebaf, + 0x000bedaf, 0x000befaf, 0x000bf1af, 0x000bf3af, + 0x000bf5af, 0x000bf7af, 0x000bf9af, 0x000bfbaf, + 0x000bfdaf, 0x000bffaf, 0x000c01af, 0x000c03af, + 0x000c05af, 0x000c07af, 0x000c09af, 0x000c0baf, + 0x000c0daf, 0x000c0faf, 0x000c11af, 0x000c13af, + 0x000c15af, 0x000c17af, 0x000c19af, 0x000c1baf, + 0x000c1daf, 0x000c1faf, 0x000c21af, 0x000c23af, + 0x000c25af, 0x000c27af, 0x000c29af, 0x000c2baf, + 0x000c2daf, 0x000c2faf, 0x000c31af, 0x000c33af, + 0x000c35af, 0x000c37af, 0x000c39af, 0x000c3baf, + 0x000c3daf, 0x000c3faf, 0x000c41af, 0x000c43af, + 0x000c45af, 0x000c47af, 0x000c49af, 0x000c4baf, + 0x000c4daf, 0x000c4faf, 0x000c51af, 0x000c53af, + 0x000c55af, 0x000c57af, 0x000c59af, 0x000c5baf, + 0x000c5daf, 0x000c5faf, 0x000c61af, 0x000c63af, + 0x000c65af, 0x000c67af, 0x000c69af, 0x000c6baf, + 0x000c6daf, 0x000c6faf, 0x000c71af, 0x000c73af, + 0x000c75af, 0x000c77af, 0x000c79af, 0x000c7baf, + 0x000c7daf, 0x000c7faf, 0x000c81af, 0x000c83af, + 0x000c85af, 0x000c87af, 0x000c89af, 0x000c8baf, + 0x000c8daf, 0x000c8faf, 0x000c91af, 0x000c93af, + 0x000c95af, 0x000c97af, 0x000c99af, 0x000c9baf, + 0x000c9daf, 0x000c9faf, 0x000ca1af, 0x000ca3af, + 0x000ca5af, 0x000ca7af, 0x000ca9af, 0x000cabaf, + 0x000cadaf, 0x000cafaf, 0x000cb1af, 0x000cb3af, + 0x000cb5af, 0x000cb7af, 0x000cb9af, 0x000cbbaf, + 0x000cbdaf, 0x000cbfaf, 0x000cc1af, 0x000cc3af, + 0x000cc5af, 0x000cc7af, 0x000cc9af, 0x000ccbaf, + 0x000ccdaf, 0x000ccfaf, 0x000cd1af, 0x000cd3af, + 0x000cd5af, 0x000cd7af, 0x000cd9af, 0x000cdbaf, + 0x000cddaf, 0x000cdfaf, 0x000ce1af, 0x000ce3af, + 0x000ce5af, 0x000ce7af, 0x000ce9af, 0x000cebaf, + 0x000cedaf, 0x000cefaf, 0x000cf1af, 0x000cf3af, + 0x000cf5af, 0x000cf7af, 0x000cf9af, 0x000cfbaf, + 0x000cfdaf, 0x000cffaf, 0x000d01af, 0x000d03af, + 0x000d05af, 0x000d07af, 0x000d09af, 0x000d0baf, + 0x000d0daf, 0x000d0faf, 0x000d11af, 0x000d13af, + 0x000d15af, 0x000d17af, 0x000d19af, 0x000d1baf, + 0x000d1daf, 0x000d1faf, 0x000d21af, 0x000d23af, + 0x000d25af, 0x000d27af, 0x000d29af, 0x000d2baf, + 0x000d2daf, 0x000d2faf, 0x000d31af, 0x000d33af, + 0x000d35af, 0x000d37af, 0x000d39af, 0x000d3baf, + 0x000d3daf, 0x000d3faf, 0x000d41af, 0x000d43af, + 0x000d45af, 0x000d47af, 0x000d49af, 0x000d4baf, + 0x000d4daf, 0x000d4faf, 0x000d51af, 0x000d53af, + 0x000d55af, 0x000d57af, 0x000d59af, 0x000d5baf, + 0x000d5daf, 0x000d5faf, 0x000d61af, 0x000d63af, + 0x000d65af, 0x000d67af, 0x000d69af, 0x000d6baf, + 0x000d6daf, 0x000d6faf, 0x000d71af, 0x000d73af, + 0x000d75af, 0x000d77af, 0x000d79af, 0x000d7baf, + 0x000d7daf, 0x000d7faf, 0x000d81af, 0x000d83af, + 0x000d85af, 0x000d87af, 0x000d89af, 0x000d8baf, + 0x000d8daf, 0x000d8faf, 0x000d91af, 0x000d93af, + 0x000d95af, 0x000d97af, 0x000d99af, 0x000d9baf, + 0x000d9daf, 0x000d9faf, 0x000da1af, 0x000da3af, + 0x000da5af, 0x000da7af, 0x000da9af, 0x000dabaf, + 0x000dadaf, 0x000dafaf, 0x000db1af, 0x000db3af, + 0x000db5af, 0x000db7af, 0x000db9af, 0x000dbbaf, + 0x000dbdaf, 0x000dbfaf, 0x000dc1af, 0x000dc3af, + 0x000dc5af, 0x000dc7af, 0x000dc9af, 0x000dcbaf, + 0x000dcdaf, 0x000dcfaf, 0x000dd1af, 0x000dd3af, + 0x000dd5af, 0x000dd7af, 0x000dd9af, 0x000ddbaf, + 0x000dddaf, 0x000ddfaf, 0x000de1af, 0x000de3af, + 0x000de5af, 0x000de7af, 0x000de9af, 0x000debaf, + 0x000dedaf, 0x000defaf, 0x000df1af, 0x000df3af, + 0x000df5af, 0x000df7af, 0x000df9af, 0x000dfbaf, + 0x000dfdaf, 0x000dffaf, 0x000e01af, 0x000e03af, + 0x000e05af, 0x000e07af, 0x000e09af, 0x000e0baf, + 0x000e0daf, 0x000e0faf, 0x000e11af, 0x000e13af, + 0x000e15af, 0x000e17af, 0x000e19af, 0x000e1baf, + 0x000e1daf, 0x000e1faf, 0x000e21af, 0x000e23af, + 0x000e25af, 0x000e27af, 0x000e29af, 0x000e2baf, + 0x000e2daf, 0x000e2faf, 0x000e31af, 0x000e33af, + 0x000e35af, 0x000e37af, 0x000e39af, 0x000e3baf, + 0x000e3daf, 0x000e3faf, 0x000e41af, 0x000e43af, + 0x000e45af, 0x000e47af, 0x000e49af, 0x000e4baf, + 0x000e4daf, 0x000e4faf, 0x000e51af, 0x000e53af, + 0x000e55af, 0x000e57af, 0x000e59af, 0x000e5baf, + 0x000e5daf, 0x000e5faf, 0x000e61af, 0x000e63af, + 0x000e65af, 0x000e67af, 0x000e69af, 0x000e6baf, + 0x000e6daf, 0x000e6faf, 0x000e71af, 0x000e73af, + 0x000e75af, 0x000e77af, 0x000e79af, 0x000e7baf, + 0x000e7daf, 0x000e7faf, 0x000e81af, 0x000e83af, + 0x000e85af, 0x000e87af, 0x000e89af, 0x000e8baf, + 0x000e8daf, 0x000e8faf, 0x000e91af, 0x000e93af, + 0x000e95af, 0x000e97af, 0x000e99af, 0x000e9baf, + 0x000e9daf, 0x000e9faf, 0x000ea1af, 0x000ea3af, + 0x000ea5af, 0x000ea7af, 0x000ea9af, 0x000eabaf, + 0x000eadaf, 0x000eafaf, 0x000eb1af, 0x000eb3af, + 0x000eb5af, 0x000eb7af, 0x000eb9af, 0x000ebbaf, + 0x000ebdaf, 0x000ebfaf, 0x000ec1af, 0x000ec3af, + 0x000ec5af, 0x000ec7af, 0x000ec9af, 0x000ecbaf, + 0x000ecdaf, 0x000ecfaf, 0x000ed1af, 0x000ed3af, + 0x000ed5af, 0x000ed7af, 0x000ed9af, 0x000edbaf, + 0x000eddaf, 0x000edfaf, 0x000ee1af, 0x000ee3af, + 0x000ee5af, 0x000ee7af, 0x000ee9af, 0x000eebaf, + 0x000eedaf, 0x000eefaf, 0x000ef1af, 0x000ef3af, + 0x000ef5af, 0x000ef7af, 0x000ef9af, 0x000efbaf, + 0x000efdaf, 0x000effaf, 0x000f01af, 0x000f03af, + 0x000f05af, 0x000f07af, 0x000f09af, 0x000f0baf, + 0x000f0daf, 0x000f0faf, 0x000f11af, 0x000f13af, + 0x000f15af, 0x000f17af, 0x000f19af, 0x000f1baf, + 0x000f1daf, 0x000f1faf, 0x000f21af, 0x000f23af, + 0x000f25af, 0x000f27af, 0x000f29af, 0x000f2baf, + 0x000f2daf, 0x000f2faf, 0x000f31af, 0x000f33af, + 0x000f35af, 0x000f37af, 0x000f39af, 0x000f3baf, + 0x000f3daf, 0x000f3faf, 0x000f41af, 0x000f43af, + 0x000f45af, 0x000f47af, 0x000f49af, 0x000f4baf, + 0x000f4daf, 0x000f4faf, 0x000f51af, 0x000f53af, + 0x000f55af, 0x000f57af, 0x000f59af, 0x000f5baf, + 0x000f5daf, 0x000f5faf, 0x000f61af, 0x000f63af, + 0x000f65af, 0x000f67af, 0x000f69af, 0x000f6baf, + 0x000f6daf, 0x000f6faf, 0x000f71af, 0x000f73af, + 0x000f75af, 0x000f77af, 0x000f79af, 0x000f7baf, + 0x000f7daf, 0x000f7faf, 0x000f81af, 0x000f83af, + 0x000f85af, 0x000f87af, 0x000f89af, 0x000f8baf, + 0x000f8daf, 0x000f8faf, 0x000f91af, 0x000f93af, + 0x000f95af, 0x000f97af, 0x000f99af, 0x000f9baf, + 0x000f9daf, 0x000f9faf, 0x000fa1af, 0x000fa3af, + 0x000fa5af, 0x000fa7af, 0x000fa9af, 0x000fabaf, + 0x000fadaf, 0x000fafaf, 0x000fb1af, 0x000fb3af, + 0x000fb5af, 0x000fb7af, 0x000fb9af, 0x000fbbaf, + 0x000fbdaf, 0x000fbfaf, 0x000fc1af, 0x000fc3af, + 0x000fc5af, 0x000fc7af, 0x000fc9af, 0x000fcbaf, + 0x000fcdaf, 0x000fcfaf, 0x000fd1af, 0x000fd3af, + 0x000fd5af, 0x000fd7af, 0x000fd9af, 0x000fdbaf, + 0x000fddaf, 0x000fdfaf, 0x000fe1af, 0x000fe3af, + 0x000fe5af, 0x000fe7af, 0x000fe9af, 0x000febaf, + 0x000fedaf, 0x000fefaf, 0x000ff1af, 0x000ff3af, + 0x000ff5af, 0x000ff7af, 0x000ff9af, 0x000ffbaf, + 0x000ffdaf, 0x000fffaf, 0x0000006f, 0x0000026f, + 0x0000046f, 0x0000066f, 0x0000086f, 0x00000a6f, + 0x00000c6f, 0x00000e6f, 0x0000106f, 0x0000126f, + 0x0000146f, 0x0000166f, 0x0000186f, 0x00001a6f, + 0x00001c6f, 0x00001e6f, 0x0000206f, 0x0000226f, + 0x0000246f, 0x0000266f, 0x0000286f, 0x00002a6f, + 0x00002c6f, 0x00002e6f, 0x0000306f, 0x0000326f, + 0x0000346f, 0x0000366f, 0x0000386f, 0x00003a6f, + 0x00003c6f, 0x00003e6f, 0x0000406f, 0x0000426f, + 0x0000446f, 0x0000466f, 0x0000486f, 0x00004a6f, + 0x00004c6f, 0x00004e6f, 0x0000506f, 0x0000526f, + 0x0000546f, 0x0000566f, 0x0000586f, 0x00005a6f, + 0x00005c6f, 0x00005e6f, 0x0000606f, 0x0000626f, + 0x0000646f, 0x0000666f, 0x0000686f, 0x00006a6f, + 0x00006c6f, 0x00006e6f, 0x0000706f, 0x0000726f, + 0x0000746f, 0x0000766f, 0x0000786f, 0x00007a6f, + 0x00007c6f, 0x00007e6f, 0x0000806f, 0x0000826f, + 0x0000846f, 0x0000866f, 0x0000886f, 0x00008a6f, + 0x00008c6f, 0x00008e6f, 0x0000906f, 0x0000926f, + 0x0000946f, 0x0000966f, 0x0000986f, 0x00009a6f, + 0x00009c6f, 0x00009e6f, 0x0000a06f, 0x0000a26f, + 0x0000a46f, 0x0000a66f, 0x0000a86f, 0x0000aa6f, + 0x0000ac6f, 0x0000ae6f, 0x0000b06f, 0x0000b26f, + 0x0000b46f, 0x0000b66f, 0x0000b86f, 0x0000ba6f, + 0x0000bc6f, 0x0000be6f, 0x0000c06f, 0x0000c26f, + 0x0000c46f, 0x0000c66f, 0x0000c86f, 0x0000ca6f, + 0x0000cc6f, 0x0000ce6f, 0x0000d06f, 0x0000d26f, + 0x0000d46f, 0x0000d66f, 0x0000d86f, 0x0000da6f, + 0x0000dc6f, 0x0000de6f, 0x0000e06f, 0x0000e26f, + 0x0000e46f, 0x0000e66f, 0x0000e86f, 0x0000ea6f, + 0x0000ec6f, 0x0000ee6f, 0x0000f06f, 0x0000f26f, + 0x0000f46f, 0x0000f66f, 0x0000f86f, 0x0000fa6f, + 0x0000fc6f, 0x0000fe6f, 0x0001006f, 0x0001026f, + 0x0001046f, 0x0001066f, 0x0001086f, 0x00010a6f, + 0x00010c6f, 0x00010e6f, 0x0001106f, 0x0001126f, + 0x0001146f, 0x0001166f, 0x0001186f, 0x00011a6f, + 0x00011c6f, 0x00011e6f, 0x0001206f, 0x0001226f, + 0x0001246f, 0x0001266f, 0x0001286f, 0x00012a6f, + 0x00012c6f, 0x00012e6f, 0x0001306f, 0x0001326f, + 0x0001346f, 0x0001366f, 0x0001386f, 0x00013a6f, + 0x00013c6f, 0x00013e6f, 0x0001406f, 0x0001426f, + 0x0001446f, 0x0001466f, 0x0001486f, 0x00014a6f, + 0x00014c6f, 0x00014e6f, 0x0001506f, 0x0001526f, + 0x0001546f, 0x0001566f, 0x0001586f, 0x00015a6f, + 0x00015c6f, 0x00015e6f, 0x0001606f, 0x0001626f, + 0x0001646f, 0x0001666f, 0x0001686f, 0x00016a6f, + 0x00016c6f, 0x00016e6f, 0x0001706f, 0x0001726f, + 0x0001746f, 0x0001766f, 0x0001786f, 0x00017a6f, + 0x00017c6f, 0x00017e6f, 0x0001806f, 0x0001826f, + 0x0001846f, 0x0001866f, 0x0001886f, 0x00018a6f, + 0x00018c6f, 0x00018e6f, 0x0001906f, 0x0001926f, + 0x0001946f, 0x0001966f, 0x0001986f, 0x00019a6f, + 0x00019c6f, 0x00019e6f, 0x0001a06f, 0x0001a26f, + 0x0001a46f, 0x0001a66f, 0x0001a86f, 0x0001aa6f, + 0x0001ac6f, 0x0001ae6f, 0x0001b06f, 0x0001b26f, + 0x0001b46f, 0x0001b66f, 0x0001b86f, 0x0001ba6f, + 0x0001bc6f, 0x0001be6f, 0x0001c06f, 0x0001c26f, + 0x0001c46f, 0x0001c66f, 0x0001c86f, 0x0001ca6f, + 0x0001cc6f, 0x0001ce6f, 0x0001d06f, 0x0001d26f, + 0x0001d46f, 0x0001d66f, 0x0001d86f, 0x0001da6f, + 0x0001dc6f, 0x0001de6f, 0x0001e06f, 0x0001e26f, + 0x0001e46f, 0x0001e66f, 0x0001e86f, 0x0001ea6f, + 0x0001ec6f, 0x0001ee6f, 0x0001f06f, 0x0001f26f, + 0x0001f46f, 0x0001f66f, 0x0001f86f, 0x0001fa6f, + 0x0001fc6f, 0x0001fe6f, 0x0002006f, 0x0002026f, + 0x0002046f, 0x0002066f, 0x0002086f, 0x00020a6f, + 0x00020c6f, 0x00020e6f, 0x0002106f, 0x0002126f, + 0x0002146f, 0x0002166f, 0x0002186f, 0x00021a6f, + 0x00021c6f, 0x00021e6f, 0x0002206f, 0x0002226f, + 0x0002246f, 0x0002266f, 0x0002286f, 0x00022a6f, + 0x00022c6f, 0x00022e6f, 0x0002306f, 0x0002326f, + 0x0002346f, 0x0002366f, 0x0002386f, 0x00023a6f, + 0x00023c6f, 0x00023e6f, 0x0002406f, 0x0002426f, + 0x0002446f, 0x0002466f, 0x0002486f, 0x00024a6f, + 0x00024c6f, 0x00024e6f, 0x0002506f, 0x0002526f, + 0x0002546f, 0x0002566f, 0x0002586f, 0x00025a6f, + 0x00025c6f, 0x00025e6f, 0x0002606f, 0x0002626f, + 0x0002646f, 0x0002666f, 0x0002686f, 0x00026a6f, + 0x00026c6f, 0x00026e6f, 0x0002706f, 0x0002726f, + 0x0002746f, 0x0002766f, 0x0002786f, 0x00027a6f, + 0x00027c6f, 0x00027e6f, 0x0002806f, 0x0002826f, + 0x0002846f, 0x0002866f, 0x0002886f, 0x00028a6f, + 0x00028c6f, 0x00028e6f, 0x0002906f, 0x0002926f, + 0x0002946f, 0x0002966f, 0x0002986f, 0x00029a6f, + 0x00029c6f, 0x00029e6f, 0x0002a06f, 0x0002a26f, + 0x0002a46f, 0x0002a66f, 0x0002a86f, 0x0002aa6f, + 0x0002ac6f, 0x0002ae6f, 0x0002b06f, 0x0002b26f, + 0x0002b46f, 0x0002b66f, 0x0002b86f, 0x0002ba6f, + 0x0002bc6f, 0x0002be6f, 0x0002c06f, 0x0002c26f, + 0x0002c46f, 0x0002c66f, 0x0002c86f, 0x0002ca6f, + 0x0002cc6f, 0x0002ce6f, 0x0002d06f, 0x0002d26f, + 0x0002d46f, 0x0002d66f, 0x0002d86f, 0x0002da6f, + 0x0002dc6f, 0x0002de6f, 0x0002e06f, 0x0002e26f, + 0x0002e46f, 0x0002e66f, 0x0002e86f, 0x0002ea6f, + 0x0002ec6f, 0x0002ee6f, 0x0002f06f, 0x0002f26f, + 0x0002f46f, 0x0002f66f, 0x0002f86f, 0x0002fa6f, + 0x0002fc6f, 0x0002fe6f, 0x0003006f, 0x0003026f, + 0x0003046f, 0x0003066f, 0x0003086f, 0x00030a6f, + 0x00030c6f, 0x00030e6f, 0x0003106f, 0x0003126f, + 0x0003146f, 0x0003166f, 0x0003186f, 0x00031a6f, + 0x00031c6f, 0x00031e6f, 0x0003206f, 0x0003226f, + 0x0003246f, 0x0003266f, 0x0003286f, 0x00032a6f, + 0x00032c6f, 0x00032e6f, 0x0003306f, 0x0003326f, + 0x0003346f, 0x0003366f, 0x0003386f, 0x00033a6f, + 0x00033c6f, 0x00033e6f, 0x0003406f, 0x0003426f, + 0x0003446f, 0x0003466f, 0x0003486f, 0x00034a6f, + 0x00034c6f, 0x00034e6f, 0x0003506f, 0x0003526f, + 0x0003546f, 0x0003566f, 0x0003586f, 0x00035a6f, + 0x00035c6f, 0x00035e6f, 0x0003606f, 0x0003626f, + 0x0003646f, 0x0003666f, 0x0003686f, 0x00036a6f, + 0x00036c6f, 0x00036e6f, 0x0003706f, 0x0003726f, + 0x0003746f, 0x0003766f, 0x0003786f, 0x00037a6f, + 0x00037c6f, 0x00037e6f, 0x0003806f, 0x0003826f, + 0x0003846f, 0x0003866f, 0x0003886f, 0x00038a6f, + 0x00038c6f, 0x00038e6f, 0x0003906f, 0x0003926f, + 0x0003946f, 0x0003966f, 0x0003986f, 0x00039a6f, + 0x00039c6f, 0x00039e6f, 0x0003a06f, 0x0003a26f, + 0x0003a46f, 0x0003a66f, 0x0003a86f, 0x0003aa6f, + 0x0003ac6f, 0x0003ae6f, 0x0003b06f, 0x0003b26f, + 0x0003b46f, 0x0003b66f, 0x0003b86f, 0x0003ba6f, + 0x0003bc6f, 0x0003be6f, 0x0003c06f, 0x0003c26f, + 0x0003c46f, 0x0003c66f, 0x0003c86f, 0x0003ca6f, + 0x0003cc6f, 0x0003ce6f, 0x0003d06f, 0x0003d26f, + 0x0003d46f, 0x0003d66f, 0x0003d86f, 0x0003da6f, + 0x0003dc6f, 0x0003de6f, 0x0003e06f, 0x0003e26f, + 0x0003e46f, 0x0003e66f, 0x0003e86f, 0x0003ea6f, + 0x0003ec6f, 0x0003ee6f, 0x0003f06f, 0x0003f26f, + 0x0003f46f, 0x0003f66f, 0x0003f86f, 0x0003fa6f, + 0x0003fc6f, 0x0003fe6f, 0x0004006f, 0x0004026f, + 0x0004046f, 0x0004066f, 0x0004086f, 0x00040a6f, + 0x00040c6f, 0x00040e6f, 0x0004106f, 0x0004126f, + 0x0004146f, 0x0004166f, 0x0004186f, 0x00041a6f, + 0x00041c6f, 0x00041e6f, 0x0004206f, 0x0004226f, + 0x0004246f, 0x0004266f, 0x0004286f, 0x00042a6f, + 0x00042c6f, 0x00042e6f, 0x0004306f, 0x0004326f, + 0x0004346f, 0x0004366f, 0x0004386f, 0x00043a6f, + 0x00043c6f, 0x00043e6f, 0x0004406f, 0x0004426f, + 0x0004446f, 0x0004466f, 0x0004486f, 0x00044a6f, + 0x00044c6f, 0x00044e6f, 0x0004506f, 0x0004526f, + 0x0004546f, 0x0004566f, 0x0004586f, 0x00045a6f, + 0x00045c6f, 0x00045e6f, 0x0004606f, 0x0004626f, + 0x0004646f, 0x0004666f, 0x0004686f, 0x00046a6f, + 0x00046c6f, 0x00046e6f, 0x0004706f, 0x0004726f, + 0x0004746f, 0x0004766f, 0x0004786f, 0x00047a6f, + 0x00047c6f, 0x00047e6f, 0x0004806f, 0x0004826f, + 0x0004846f, 0x0004866f, 0x0004886f, 0x00048a6f, + 0x00048c6f, 0x00048e6f, 0x0004906f, 0x0004926f, + 0x0004946f, 0x0004966f, 0x0004986f, 0x00049a6f, + 0x00049c6f, 0x00049e6f, 0x0004a06f, 0x0004a26f, + 0x0004a46f, 0x0004a66f, 0x0004a86f, 0x0004aa6f, + 0x0004ac6f, 0x0004ae6f, 0x0004b06f, 0x0004b26f, + 0x0004b46f, 0x0004b66f, 0x0004b86f, 0x0004ba6f, + 0x0004bc6f, 0x0004be6f, 0x0004c06f, 0x0004c26f, + 0x0004c46f, 0x0004c66f, 0x0004c86f, 0x0004ca6f, + 0x0004cc6f, 0x0004ce6f, 0x0004d06f, 0x0004d26f, + 0x0004d46f, 0x0004d66f, 0x0004d86f, 0x0004da6f, + 0x0004dc6f, 0x0004de6f, 0x0004e06f, 0x0004e26f, + 0x0004e46f, 0x0004e66f, 0x0004e86f, 0x0004ea6f, + 0x0004ec6f, 0x0004ee6f, 0x0004f06f, 0x0004f26f, + 0x0004f46f, 0x0004f66f, 0x0004f86f, 0x0004fa6f, + 0x0004fc6f, 0x0004fe6f, 0x0005006f, 0x0005026f, + 0x0005046f, 0x0005066f, 0x0005086f, 0x00050a6f, + 0x00050c6f, 0x00050e6f, 0x0005106f, 0x0005126f, + 0x0005146f, 0x0005166f, 0x0005186f, 0x00051a6f, + 0x00051c6f, 0x00051e6f, 0x0005206f, 0x0005226f, + 0x0005246f, 0x0005266f, 0x0005286f, 0x00052a6f, + 0x00052c6f, 0x00052e6f, 0x0005306f, 0x0005326f, + 0x0005346f, 0x0005366f, 0x0005386f, 0x00053a6f, + 0x00053c6f, 0x00053e6f, 0x0005406f, 0x0005426f, + 0x0005446f, 0x0005466f, 0x0005486f, 0x00054a6f, + 0x00054c6f, 0x00054e6f, 0x0005506f, 0x0005526f, + 0x0005546f, 0x0005566f, 0x0005586f, 0x00055a6f, + 0x00055c6f, 0x00055e6f, 0x0005606f, 0x0005626f, + 0x0005646f, 0x0005666f, 0x0005686f, 0x00056a6f, + 0x00056c6f, 0x00056e6f, 0x0005706f, 0x0005726f, + 0x0005746f, 0x0005766f, 0x0005786f, 0x00057a6f, + 0x00057c6f, 0x00057e6f, 0x0005806f, 0x0005826f, + 0x0005846f, 0x0005866f, 0x0005886f, 0x00058a6f, + 0x00058c6f, 0x00058e6f, 0x0005906f, 0x0005926f, + 0x0005946f, 0x0005966f, 0x0005986f, 0x00059a6f, + 0x00059c6f, 0x00059e6f, 0x0005a06f, 0x0005a26f, + 0x0005a46f, 0x0005a66f, 0x0005a86f, 0x0005aa6f, + 0x0005ac6f, 0x0005ae6f, 0x0005b06f, 0x0005b26f, + 0x0005b46f, 0x0005b66f, 0x0005b86f, 0x0005ba6f, + 0x0005bc6f, 0x0005be6f, 0x0005c06f, 0x0005c26f, + 0x0005c46f, 0x0005c66f, 0x0005c86f, 0x0005ca6f, + 0x0005cc6f, 0x0005ce6f, 0x0005d06f, 0x0005d26f, + 0x0005d46f, 0x0005d66f, 0x0005d86f, 0x0005da6f, + 0x0005dc6f, 0x0005de6f, 0x0005e06f, 0x0005e26f, + 0x0005e46f, 0x0005e66f, 0x0005e86f, 0x0005ea6f, + 0x0005ec6f, 0x0005ee6f, 0x0005f06f, 0x0005f26f, + 0x0005f46f, 0x0005f66f, 0x0005f86f, 0x0005fa6f, + 0x0005fc6f, 0x0005fe6f, 0x0006006f, 0x0006026f, + 0x0006046f, 0x0006066f, 0x0006086f, 0x00060a6f, + 0x00060c6f, 0x00060e6f, 0x0006106f, 0x0006126f, + 0x0006146f, 0x0006166f, 0x0006186f, 0x00061a6f, + 0x00061c6f, 0x00061e6f, 0x0006206f, 0x0006226f, + 0x0006246f, 0x0006266f, 0x0006286f, 0x00062a6f, + 0x00062c6f, 0x00062e6f, 0x0006306f, 0x0006326f, + 0x0006346f, 0x0006366f, 0x0006386f, 0x00063a6f, + 0x00063c6f, 0x00063e6f, 0x0006406f, 0x0006426f, + 0x0006446f, 0x0006466f, 0x0006486f, 0x00064a6f, + 0x00064c6f, 0x00064e6f, 0x0006506f, 0x0006526f, + 0x0006546f, 0x0006566f, 0x0006586f, 0x00065a6f, + 0x00065c6f, 0x00065e6f, 0x0006606f, 0x0006626f, + 0x0006646f, 0x0006666f, 0x0006686f, 0x00066a6f, + 0x00066c6f, 0x00066e6f, 0x0006706f, 0x0006726f, + 0x0006746f, 0x0006766f, 0x0006786f, 0x00067a6f, + 0x00067c6f, 0x00067e6f, 0x0006806f, 0x0006826f, + 0x0006846f, 0x0006866f, 0x0006886f, 0x00068a6f, + 0x00068c6f, 0x00068e6f, 0x0006906f, 0x0006926f, + 0x0006946f, 0x0006966f, 0x0006986f, 0x00069a6f, + 0x00069c6f, 0x00069e6f, 0x0006a06f, 0x0006a26f, + 0x0006a46f, 0x0006a66f, 0x0006a86f, 0x0006aa6f, + 0x0006ac6f, 0x0006ae6f, 0x0006b06f, 0x0006b26f, + 0x0006b46f, 0x0006b66f, 0x0006b86f, 0x0006ba6f, + 0x0006bc6f, 0x0006be6f, 0x0006c06f, 0x0006c26f, + 0x0006c46f, 0x0006c66f, 0x0006c86f, 0x0006ca6f, + 0x0006cc6f, 0x0006ce6f, 0x0006d06f, 0x0006d26f, + 0x0006d46f, 0x0006d66f, 0x0006d86f, 0x0006da6f, + 0x0006dc6f, 0x0006de6f, 0x0006e06f, 0x0006e26f, + 0x0006e46f, 0x0006e66f, 0x0006e86f, 0x0006ea6f, + 0x0006ec6f, 0x0006ee6f, 0x0006f06f, 0x0006f26f, + 0x0006f46f, 0x0006f66f, 0x0006f86f, 0x0006fa6f, + 0x0006fc6f, 0x0006fe6f, 0x0007006f, 0x0007026f, + 0x0007046f, 0x0007066f, 0x0007086f, 0x00070a6f, + 0x00070c6f, 0x00070e6f, 0x0007106f, 0x0007126f, + 0x0007146f, 0x0007166f, 0x0007186f, 0x00071a6f, + 0x00071c6f, 0x00071e6f, 0x0007206f, 0x0007226f, + 0x0007246f, 0x0007266f, 0x0007286f, 0x00072a6f, + 0x00072c6f, 0x00072e6f, 0x0007306f, 0x0007326f, + 0x0007346f, 0x0007366f, 0x0007386f, 0x00073a6f, + 0x00073c6f, 0x00073e6f, 0x0007406f, 0x0007426f, + 0x0007446f, 0x0007466f, 0x0007486f, 0x00074a6f, + 0x00074c6f, 0x00074e6f, 0x0007506f, 0x0007526f, + 0x0007546f, 0x0007566f, 0x0007586f, 0x00075a6f, + 0x00075c6f, 0x00075e6f, 0x0007606f, 0x0007626f, + 0x0007646f, 0x0007666f, 0x0007686f, 0x00076a6f, + 0x00076c6f, 0x00076e6f, 0x0007706f, 0x0007726f, + 0x0007746f, 0x0007766f, 0x0007786f, 0x00077a6f, + 0x00077c6f, 0x00077e6f, 0x0007806f, 0x0007826f, + 0x0007846f, 0x0007866f, 0x0007886f, 0x00078a6f, + 0x00078c6f, 0x00078e6f, 0x0007906f, 0x0007926f, + 0x0007946f, 0x0007966f, 0x0007986f, 0x00079a6f, + 0x00079c6f, 0x00079e6f, 0x0007a06f, 0x0007a26f, + 0x0007a46f, 0x0007a66f, 0x0007a86f, 0x0007aa6f, + 0x0007ac6f, 0x0007ae6f, 0x0007b06f, 0x0007b26f, + 0x0007b46f, 0x0007b66f, 0x0007b86f, 0x0007ba6f, + 0x0007bc6f, 0x0007be6f, 0x0007c06f, 0x0007c26f, + 0x0007c46f, 0x0007c66f, 0x0007c86f, 0x0007ca6f, + 0x0007cc6f, 0x0007ce6f, 0x0007d06f, 0x0007d26f, + 0x0007d46f, 0x0007d66f, 0x0007d86f, 0x0007da6f, + 0x0007dc6f, 0x0007de6f, 0x0007e06f, 0x0007e26f, + 0x0007e46f, 0x0007e66f, 0x0007e86f, 0x0007ea6f, + 0x0007ec6f, 0x0007ee6f, 0x0007f06f, 0x0007f26f, + 0x0007f46f, 0x0007f66f, 0x0007f86f, 0x0007fa6f, + 0x0007fc6f, 0x0007fe6f, 0x0008006f, 0x0008026f, + 0x0008046f, 0x0008066f, 0x0008086f, 0x00080a6f, + 0x00080c6f, 0x00080e6f, 0x0008106f, 0x0008126f, + 0x0008146f, 0x0008166f, 0x0008186f, 0x00081a6f, + 0x00081c6f, 0x00081e6f, 0x0008206f, 0x0008226f, + 0x0008246f, 0x0008266f, 0x0008286f, 0x00082a6f, + 0x00082c6f, 0x00082e6f, 0x0008306f, 0x0008326f, + 0x0008346f, 0x0008366f, 0x0008386f, 0x00083a6f, + 0x00083c6f, 0x00083e6f, 0x0008406f, 0x0008426f, + 0x0008446f, 0x0008466f, 0x0008486f, 0x00084a6f, + 0x00084c6f, 0x00084e6f, 0x0008506f, 0x0008526f, + 0x0008546f, 0x0008566f, 0x0008586f, 0x00085a6f, + 0x00085c6f, 0x00085e6f, 0x0008606f, 0x0008626f, + 0x0008646f, 0x0008666f, 0x0008686f, 0x00086a6f, + 0x00086c6f, 0x00086e6f, 0x0008706f, 0x0008726f, + 0x0008746f, 0x0008766f, 0x0008786f, 0x00087a6f, + 0x00087c6f, 0x00087e6f, 0x0008806f, 0x0008826f, + 0x0008846f, 0x0008866f, 0x0008886f, 0x00088a6f, + 0x00088c6f, 0x00088e6f, 0x0008906f, 0x0008926f, + 0x0008946f, 0x0008966f, 0x0008986f, 0x00089a6f, + 0x00089c6f, 0x00089e6f, 0x0008a06f, 0x0008a26f, + 0x0008a46f, 0x0008a66f, 0x0008a86f, 0x0008aa6f, + 0x0008ac6f, 0x0008ae6f, 0x0008b06f, 0x0008b26f, + 0x0008b46f, 0x0008b66f, 0x0008b86f, 0x0008ba6f, + 0x0008bc6f, 0x0008be6f, 0x0008c06f, 0x0008c26f, + 0x0008c46f, 0x0008c66f, 0x0008c86f, 0x0008ca6f, + 0x0008cc6f, 0x0008ce6f, 0x0008d06f, 0x0008d26f, + 0x0008d46f, 0x0008d66f, 0x0008d86f, 0x0008da6f, + 0x0008dc6f, 0x0008de6f, 0x0008e06f, 0x0008e26f, + 0x0008e46f, 0x0008e66f, 0x0008e86f, 0x0008ea6f, + 0x0008ec6f, 0x0008ee6f, 0x0008f06f, 0x0008f26f, + 0x0008f46f, 0x0008f66f, 0x0008f86f, 0x0008fa6f, + 0x0008fc6f, 0x0008fe6f, 0x0009006f, 0x0009026f, + 0x0009046f, 0x0009066f, 0x0009086f, 0x00090a6f, + 0x00090c6f, 0x00090e6f, 0x0009106f, 0x0009126f, + 0x0009146f, 0x0009166f, 0x0009186f, 0x00091a6f, + 0x00091c6f, 0x00091e6f, 0x0009206f, 0x0009226f, + 0x0009246f, 0x0009266f, 0x0009286f, 0x00092a6f, + 0x00092c6f, 0x00092e6f, 0x0009306f, 0x0009326f, + 0x0009346f, 0x0009366f, 0x0009386f, 0x00093a6f, + 0x00093c6f, 0x00093e6f, 0x0009406f, 0x0009426f, + 0x0009446f, 0x0009466f, 0x0009486f, 0x00094a6f, + 0x00094c6f, 0x00094e6f, 0x0009506f, 0x0009526f, + 0x0009546f, 0x0009566f, 0x0009586f, 0x00095a6f, + 0x00095c6f, 0x00095e6f, 0x0009606f, 0x0009626f, + 0x0009646f, 0x0009666f, 0x0009686f, 0x00096a6f, + 0x00096c6f, 0x00096e6f, 0x0009706f, 0x0009726f, + 0x0009746f, 0x0009766f, 0x0009786f, 0x00097a6f, + 0x00097c6f, 0x00097e6f, 0x0009806f, 0x0009826f, + 0x0009846f, 0x0009866f, 0x0009886f, 0x00098a6f, + 0x00098c6f, 0x00098e6f, 0x0009906f, 0x0009926f, + 0x0009946f, 0x0009966f, 0x0009986f, 0x00099a6f, + 0x00099c6f, 0x00099e6f, 0x0009a06f, 0x0009a26f, + 0x0009a46f, 0x0009a66f, 0x0009a86f, 0x0009aa6f, + 0x0009ac6f, 0x0009ae6f, 0x0009b06f, 0x0009b26f, + 0x0009b46f, 0x0009b66f, 0x0009b86f, 0x0009ba6f, + 0x0009bc6f, 0x0009be6f, 0x0009c06f, 0x0009c26f, + 0x0009c46f, 0x0009c66f, 0x0009c86f, 0x0009ca6f, + 0x0009cc6f, 0x0009ce6f, 0x0009d06f, 0x0009d26f, + 0x0009d46f, 0x0009d66f, 0x0009d86f, 0x0009da6f, + 0x0009dc6f, 0x0009de6f, 0x0009e06f, 0x0009e26f, + 0x0009e46f, 0x0009e66f, 0x0009e86f, 0x0009ea6f, + 0x0009ec6f, 0x0009ee6f, 0x0009f06f, 0x0009f26f, + 0x0009f46f, 0x0009f66f, 0x0009f86f, 0x0009fa6f, + 0x0009fc6f, 0x0009fe6f, 0x000a006f, 0x000a026f, + 0x000a046f, 0x000a066f, 0x000a086f, 0x000a0a6f, + 0x000a0c6f, 0x000a0e6f, 0x000a106f, 0x000a126f, + 0x000a146f, 0x000a166f, 0x000a186f, 0x000a1a6f, + 0x000a1c6f, 0x000a1e6f, 0x000a206f, 0x000a226f, + 0x000a246f, 0x000a266f, 0x000a286f, 0x000a2a6f, + 0x000a2c6f, 0x000a2e6f, 0x000a306f, 0x000a326f, + 0x000a346f, 0x000a366f, 0x000a386f, 0x000a3a6f, + 0x000a3c6f, 0x000a3e6f, 0x000a406f, 0x000a426f, + 0x000a446f, 0x000a466f, 0x000a486f, 0x000a4a6f, + 0x000a4c6f, 0x000a4e6f, 0x000a506f, 0x000a526f, + 0x000a546f, 0x000a566f, 0x000a586f, 0x000a5a6f, + 0x000a5c6f, 0x000a5e6f, 0x000a606f, 0x000a626f, + 0x000a646f, 0x000a666f, 0x000a686f, 0x000a6a6f, + 0x000a6c6f, 0x000a6e6f, 0x000a706f, 0x000a726f, + 0x000a746f, 0x000a766f, 0x000a786f, 0x000a7a6f, + 0x000a7c6f, 0x000a7e6f, 0x000a806f, 0x000a826f, + 0x000a846f, 0x000a866f, 0x000a886f, 0x000a8a6f, + 0x000a8c6f, 0x000a8e6f, 0x000a906f, 0x000a926f, + 0x000a946f, 0x000a966f, 0x000a986f, 0x000a9a6f, + 0x000a9c6f, 0x000a9e6f, 0x000aa06f, 0x000aa26f, + 0x000aa46f, 0x000aa66f, 0x000aa86f, 0x000aaa6f, + 0x000aac6f, 0x000aae6f, 0x000ab06f, 0x000ab26f, + 0x000ab46f, 0x000ab66f, 0x000ab86f, 0x000aba6f, + 0x000abc6f, 0x000abe6f, 0x000ac06f, 0x000ac26f, + 0x000ac46f, 0x000ac66f, 0x000ac86f, 0x000aca6f, + 0x000acc6f, 0x000ace6f, 0x000ad06f, 0x000ad26f, + 0x000ad46f, 0x000ad66f, 0x000ad86f, 0x000ada6f, + 0x000adc6f, 0x000ade6f, 0x000ae06f, 0x000ae26f, + 0x000ae46f, 0x000ae66f, 0x000ae86f, 0x000aea6f, + 0x000aec6f, 0x000aee6f, 0x000af06f, 0x000af26f, + 0x000af46f, 0x000af66f, 0x000af86f, 0x000afa6f, + 0x000afc6f, 0x000afe6f, 0x000b006f, 0x000b026f, + 0x000b046f, 0x000b066f, 0x000b086f, 0x000b0a6f, + 0x000b0c6f, 0x000b0e6f, 0x000b106f, 0x000b126f, + 0x000b146f, 0x000b166f, 0x000b186f, 0x000b1a6f, + 0x000b1c6f, 0x000b1e6f, 0x000b206f, 0x000b226f, + 0x000b246f, 0x000b266f, 0x000b286f, 0x000b2a6f, + 0x000b2c6f, 0x000b2e6f, 0x000b306f, 0x000b326f, + 0x000b346f, 0x000b366f, 0x000b386f, 0x000b3a6f, + 0x000b3c6f, 0x000b3e6f, 0x000b406f, 0x000b426f, + 0x000b446f, 0x000b466f, 0x000b486f, 0x000b4a6f, + 0x000b4c6f, 0x000b4e6f, 0x000b506f, 0x000b526f, + 0x000b546f, 0x000b566f, 0x000b586f, 0x000b5a6f, + 0x000b5c6f, 0x000b5e6f, 0x000b606f, 0x000b626f, + 0x000b646f, 0x000b666f, 0x000b686f, 0x000b6a6f, + 0x000b6c6f, 0x000b6e6f, 0x000b706f, 0x000b726f, + 0x000b746f, 0x000b766f, 0x000b786f, 0x000b7a6f, + 0x000b7c6f, 0x000b7e6f, 0x000b806f, 0x000b826f, + 0x000b846f, 0x000b866f, 0x000b886f, 0x000b8a6f, + 0x000b8c6f, 0x000b8e6f, 0x000b906f, 0x000b926f, + 0x000b946f, 0x000b966f, 0x000b986f, 0x000b9a6f, + 0x000b9c6f, 0x000b9e6f, 0x000ba06f, 0x000ba26f, + 0x000ba46f, 0x000ba66f, 0x000ba86f, 0x000baa6f, + 0x000bac6f, 0x000bae6f, 0x000bb06f, 0x000bb26f, + 0x000bb46f, 0x000bb66f, 0x000bb86f, 0x000bba6f, + 0x000bbc6f, 0x000bbe6f, 0x000bc06f, 0x000bc26f, + 0x000bc46f, 0x000bc66f, 0x000bc86f, 0x000bca6f, + 0x000bcc6f, 0x000bce6f, 0x000bd06f, 0x000bd26f, + 0x000bd46f, 0x000bd66f, 0x000bd86f, 0x000bda6f, + 0x000bdc6f, 0x000bde6f, 0x000be06f, 0x000be26f, + 0x000be46f, 0x000be66f, 0x000be86f, 0x000bea6f, + 0x000bec6f, 0x000bee6f, 0x000bf06f, 0x000bf26f, + 0x000bf46f, 0x000bf66f, 0x000bf86f, 0x000bfa6f, + 0x000bfc6f, 0x000bfe6f, 0x000c006f, 0x000c026f, + 0x000c046f, 0x000c066f, 0x000c086f, 0x000c0a6f, + 0x000c0c6f, 0x000c0e6f, 0x000c106f, 0x000c126f, + 0x000c146f, 0x000c166f, 0x000c186f, 0x000c1a6f, + 0x000c1c6f, 0x000c1e6f, 0x000c206f, 0x000c226f, + 0x000c246f, 0x000c266f, 0x000c286f, 0x000c2a6f, + 0x000c2c6f, 0x000c2e6f, 0x000c306f, 0x000c326f, + 0x000c346f, 0x000c366f, 0x000c386f, 0x000c3a6f, + 0x000c3c6f, 0x000c3e6f, 0x000c406f, 0x000c426f, + 0x000c446f, 0x000c466f, 0x000c486f, 0x000c4a6f, + 0x000c4c6f, 0x000c4e6f, 0x000c506f, 0x000c526f, + 0x000c546f, 0x000c566f, 0x000c586f, 0x000c5a6f, + 0x000c5c6f, 0x000c5e6f, 0x000c606f, 0x000c626f, + 0x000c646f, 0x000c666f, 0x000c686f, 0x000c6a6f, + 0x000c6c6f, 0x000c6e6f, 0x000c706f, 0x000c726f, + 0x000c746f, 0x000c766f, 0x000c786f, 0x000c7a6f, + 0x000c7c6f, 0x000c7e6f, 0x000c806f, 0x000c826f, + 0x000c846f, 0x000c866f, 0x000c886f, 0x000c8a6f, + 0x000c8c6f, 0x000c8e6f, 0x000c906f, 0x000c926f, + 0x000c946f, 0x000c966f, 0x000c986f, 0x000c9a6f, + 0x000c9c6f, 0x000c9e6f, 0x000ca06f, 0x000ca26f, + 0x000ca46f, 0x000ca66f, 0x000ca86f, 0x000caa6f, + 0x000cac6f, 0x000cae6f, 0x000cb06f, 0x000cb26f, + 0x000cb46f, 0x000cb66f, 0x000cb86f, 0x000cba6f, + 0x000cbc6f, 0x000cbe6f, 0x000cc06f, 0x000cc26f, + 0x000cc46f, 0x000cc66f, 0x000cc86f, 0x000cca6f, + 0x000ccc6f, 0x000cce6f, 0x000cd06f, 0x000cd26f, + 0x000cd46f, 0x000cd66f, 0x000cd86f, 0x000cda6f, + 0x000cdc6f, 0x000cde6f, 0x000ce06f, 0x000ce26f, + 0x000ce46f, 0x000ce66f, 0x000ce86f, 0x000cea6f, + 0x000cec6f, 0x000cee6f, 0x000cf06f, 0x000cf26f, + 0x000cf46f, 0x000cf66f, 0x000cf86f, 0x000cfa6f, + 0x000cfc6f, 0x000cfe6f, 0x000d006f, 0x000d026f, + 0x000d046f, 0x000d066f, 0x000d086f, 0x000d0a6f, + 0x000d0c6f, 0x000d0e6f, 0x000d106f, 0x000d126f, + 0x000d146f, 0x000d166f, 0x000d186f, 0x000d1a6f, + 0x000d1c6f, 0x000d1e6f, 0x000d206f, 0x000d226f, + 0x000d246f, 0x000d266f, 0x000d286f, 0x000d2a6f, + 0x000d2c6f, 0x000d2e6f, 0x000d306f, 0x000d326f, + 0x000d346f, 0x000d366f, 0x000d386f, 0x000d3a6f, + 0x000d3c6f, 0x000d3e6f, 0x000d406f, 0x000d426f, + 0x000d446f, 0x000d466f, 0x000d486f, 0x000d4a6f, + 0x000d4c6f, 0x000d4e6f, 0x000d506f, 0x000d526f, + 0x000d546f, 0x000d566f, 0x000d586f, 0x000d5a6f, + 0x000d5c6f, 0x000d5e6f, 0x000d606f, 0x000d626f, + 0x000d646f, 0x000d666f, 0x000d686f, 0x000d6a6f, + 0x000d6c6f, 0x000d6e6f, 0x000d706f, 0x000d726f, + 0x000d746f, 0x000d766f, 0x000d786f, 0x000d7a6f, + 0x000d7c6f, 0x000d7e6f, 0x000d806f, 0x000d826f, + 0x000d846f, 0x000d866f, 0x000d886f, 0x000d8a6f, + 0x000d8c6f, 0x000d8e6f, 0x000d906f, 0x000d926f, + 0x000d946f, 0x000d966f, 0x000d986f, 0x000d9a6f, + 0x000d9c6f, 0x000d9e6f, 0x000da06f, 0x000da26f, + 0x000da46f, 0x000da66f, 0x000da86f, 0x000daa6f, + 0x000dac6f, 0x000dae6f, 0x000db06f, 0x000db26f, + 0x000db46f, 0x000db66f, 0x000db86f, 0x000dba6f, + 0x000dbc6f, 0x000dbe6f, 0x000dc06f, 0x000dc26f, + 0x000dc46f, 0x000dc66f, 0x000dc86f, 0x000dca6f, + 0x000dcc6f, 0x000dce6f, 0x000dd06f, 0x000dd26f, + 0x000dd46f, 0x000dd66f, 0x000dd86f, 0x000dda6f, + 0x000ddc6f, 0x000dde6f, 0x000de06f, 0x000de26f, + 0x000de46f, 0x000de66f, 0x000de86f, 0x000dea6f, + 0x000dec6f, 0x000dee6f, 0x000df06f, 0x000df26f, + 0x000df46f, 0x000df66f, 0x000df86f, 0x000dfa6f, + 0x000dfc6f, 0x000dfe6f, 0x000e006f, 0x000e026f, + 0x000e046f, 0x000e066f, 0x000e086f, 0x000e0a6f, + 0x000e0c6f, 0x000e0e6f, 0x000e106f, 0x000e126f, + 0x000e146f, 0x000e166f, 0x000e186f, 0x000e1a6f, + 0x000e1c6f, 0x000e1e6f, 0x000e206f, 0x000e226f, + 0x000e246f, 0x000e266f, 0x000e286f, 0x000e2a6f, + 0x000e2c6f, 0x000e2e6f, 0x000e306f, 0x000e326f, + 0x000e346f, 0x000e366f, 0x000e386f, 0x000e3a6f, + 0x000e3c6f, 0x000e3e6f, 0x000e406f, 0x000e426f, + 0x000e446f, 0x000e466f, 0x000e486f, 0x000e4a6f, + 0x000e4c6f, 0x000e4e6f, 0x000e506f, 0x000e526f, + 0x000e546f, 0x000e566f, 0x000e586f, 0x000e5a6f, + 0x000e5c6f, 0x000e5e6f, 0x000e606f, 0x000e626f, + 0x000e646f, 0x000e666f, 0x000e686f, 0x000e6a6f, + 0x000e6c6f, 0x000e6e6f, 0x000e706f, 0x000e726f, + 0x000e746f, 0x000e766f, 0x000e786f, 0x000e7a6f, + 0x000e7c6f, 0x000e7e6f, 0x000e806f, 0x000e826f, + 0x000e846f, 0x000e866f, 0x000e886f, 0x000e8a6f, + 0x000e8c6f, 0x000e8e6f, 0x000e906f, 0x000e926f, + 0x000e946f, 0x000e966f, 0x000e986f, 0x000e9a6f, + 0x000e9c6f, 0x000e9e6f, 0x000ea06f, 0x000ea26f, + 0x000ea46f, 0x000ea66f, 0x000ea86f, 0x000eaa6f, + 0x000eac6f, 0x000eae6f, 0x000eb06f, 0x000eb26f, + 0x000eb46f, 0x000eb66f, 0x000eb86f, 0x000eba6f, + 0x000ebc6f, 0x000ebe6f, 0x000ec06f, 0x000ec26f, + 0x000ec46f, 0x000ec66f, 0x000ec86f, 0x000eca6f, + 0x000ecc6f, 0x000ece6f, 0x000ed06f, 0x000ed26f, + 0x000ed46f, 0x000ed66f, 0x000ed86f, 0x000eda6f, + 0x000edc6f, 0x000ede6f, 0x000ee06f, 0x000ee26f, + 0x000ee46f, 0x000ee66f, 0x000ee86f, 0x000eea6f, + 0x000eec6f, 0x000eee6f, 0x000ef06f, 0x000ef26f, + 0x000ef46f, 0x000ef66f, 0x000ef86f, 0x000efa6f, + 0x000efc6f, 0x000efe6f, 0x000f006f, 0x000f026f, + 0x000f046f, 0x000f066f, 0x000f086f, 0x000f0a6f, + 0x000f0c6f, 0x000f0e6f, 0x000f106f, 0x000f126f, + 0x000f146f, 0x000f166f, 0x000f186f, 0x000f1a6f, + 0x000f1c6f, 0x000f1e6f, 0x000f206f, 0x000f226f, + 0x000f246f, 0x000f266f, 0x000f286f, 0x000f2a6f, + 0x000f2c6f, 0x000f2e6f, 0x000f306f, 0x000f326f, + 0x000f346f, 0x000f366f, 0x000f386f, 0x000f3a6f, + 0x000f3c6f, 0x000f3e6f, 0x000f406f, 0x000f426f, + 0x000f446f, 0x000f466f, 0x000f486f, 0x000f4a6f, + 0x000f4c6f, 0x000f4e6f, 0x000f506f, 0x000f526f, + 0x000f546f, 0x000f566f, 0x000f586f, 0x000f5a6f, + 0x000f5c6f, 0x000f5e6f, 0x000f606f, 0x000f626f, + 0x000f646f, 0x000f666f, 0x000f686f, 0x000f6a6f, + 0x000f6c6f, 0x000f6e6f, 0x000f706f, 0x000f726f, + 0x000f746f, 0x000f766f, 0x000f786f, 0x000f7a6f, + 0x000f7c6f, 0x000f7e6f, 0x000f806f, 0x000f826f, + 0x000f846f, 0x000f866f, 0x000f886f, 0x000f8a6f, + 0x000f8c6f, 0x000f8e6f, 0x000f906f, 0x000f926f, + 0x000f946f, 0x000f966f, 0x000f986f, 0x000f9a6f, + 0x000f9c6f, 0x000f9e6f, 0x000fa06f, 0x000fa26f, + 0x000fa46f, 0x000fa66f, 0x000fa86f, 0x000faa6f, + 0x000fac6f, 0x000fae6f, 0x000fb06f, 0x000fb26f, + 0x000fb46f, 0x000fb66f, 0x000fb86f, 0x000fba6f, + 0x000fbc6f, 0x000fbe6f, 0x000fc06f, 0x000fc26f, + 0x000fc46f, 0x000fc66f, 0x000fc86f, 0x000fca6f, + 0x000fcc6f, 0x000fce6f, 0x000fd06f, 0x000fd26f, + 0x000fd46f, 0x000fd66f, 0x000fd86f, 0x000fda6f, + 0x000fdc6f, 0x000fde6f, 0x000fe06f, 0x000fe26f, + 0x000fe46f, 0x000fe66f, 0x000fe86f, 0x000fea6f, + 0x000fec6f, 0x000fee6f, 0x000ff06f, 0x000ff26f, + 0x000ff46f, 0x000ff66f, 0x000ff86f, 0x000ffa6f, + 0x000ffc6f, 0x000ffe6f +#endif /* LONGER_HUFFTABLE */ + }, + + .len_table = { + 0x000bffef, 0x00000003, 0x00000084, 0x00000145, + 0x00000345, 0x00000626, 0x000002a7, 0x00000aa7, + 0x000000c6, 0x000004c6, 0x00001469, 0x00003469, + 0x00000c69, 0x00002c69, 0x00001c69, 0x00003c69, + 0x0000026a, 0x0000226a, 0x0000426a, 0x0000626a, + 0x000008eb, 0x000048eb, 0x000088eb, 0x0000c8eb, + 0x000029ec, 0x0000a9ec, 0x000129ec, 0x0001a9ec, + 0x000069ec, 0x0000e9ec, 0x000169ec, 0x0001e9ec, + 0x000019ed, 0x000099ed, 0x000119ed, 0x000199ed, + 0x000219ed, 0x000299ed, 0x000319ed, 0x000399ed, + 0x000059ed, 0x0000d9ed, 0x000159ed, 0x0001d9ed, + 0x000259ed, 0x0002d9ed, 0x000359ed, 0x0003d9ed, + 0x000039ed, 0x0000b9ed, 0x000139ed, 0x0001b9ed, + 0x000239ed, 0x0002b9ed, 0x000339ed, 0x0003b9ed, + 0x000079ed, 0x0000f9ed, 0x000179ed, 0x0001f9ed, + 0x000279ed, 0x0002f9ed, 0x000379ed, 0x0003f9ed, + 0x00003fef, 0x00013fef, 0x00023fef, 0x00033fef, + 0x00043fef, 0x00053fef, 0x00063fef, 0x00073fef, + 0x00083fef, 0x00093fef, 0x000a3fef, 0x000b3fef, + 0x000c3fef, 0x000d3fef, 0x000e3fef, 0x000f3fef, + 0x00007ff0, 0x00027ff0, 0x00047ff0, 0x00067ff0, + 0x00087ff0, 0x000a7ff0, 0x000c7ff0, 0x000e7ff0, + 0x00107ff0, 0x00127ff0, 0x00147ff0, 0x00167ff0, + 0x00187ff0, 0x001a7ff0, 0x001c7ff0, 0x001e7ff0, + 0x0000fff1, 0x0004fff1, 0x0008fff1, 0x000cfff1, + 0x0010fff1, 0x0014fff1, 0x0018fff1, 0x001cfff1, + 0x0020fff1, 0x0024fff1, 0x0028fff1, 0x002cfff1, + 0x0030fff1, 0x0034fff1, 0x0038fff1, 0x003cfff1, + 0x0002fff1, 0x0006fff1, 0x000afff1, 0x000efff1, + 0x0012fff1, 0x0016fff1, 0x001afff1, 0x001efff1, + 0x0022fff1, 0x0026fff1, 0x002afff1, 0x002efff1, + 0x0032fff1, 0x0036fff1, 0x003afff1, 0x003efff1, + 0x00017ff1, 0x00037ff1, 0x00057ff1, 0x00077ff1, + 0x00097ff1, 0x000b7ff1, 0x000d7ff1, 0x000f7ff1, + 0x00117ff1, 0x00137ff1, 0x00157ff1, 0x00177ff1, + 0x00197ff1, 0x001b7ff1, 0x001d7ff1, 0x001f7ff1, + 0x00217ff1, 0x00237ff1, 0x00257ff1, 0x00277ff1, + 0x00297ff1, 0x002b7ff1, 0x002d7ff1, 0x002f7ff1, + 0x00317ff1, 0x00337ff1, 0x00357ff1, 0x00377ff1, + 0x00397ff1, 0x003b7ff1, 0x003d7ff1, 0x003f7ff1, + 0x0001fff2, 0x0005fff2, 0x0009fff2, 0x000dfff2, + 0x0011fff2, 0x0015fff2, 0x0019fff2, 0x001dfff2, + 0x0021fff2, 0x0025fff2, 0x0029fff2, 0x002dfff2, + 0x0031fff2, 0x0035fff2, 0x0039fff2, 0x003dfff2, + 0x0041fff2, 0x0045fff2, 0x0049fff2, 0x004dfff2, + 0x0051fff2, 0x0055fff2, 0x0059fff2, 0x005dfff2, + 0x0061fff2, 0x0065fff2, 0x0069fff2, 0x006dfff2, + 0x0071fff2, 0x0075fff2, 0x0079fff2, 0x007dfff2, + 0x0007fff4, 0x0017fff4, 0x0027fff4, 0x0037fff4, + 0x0047fff4, 0x0057fff4, 0x0067fff4, 0x0077fff4, + 0x0087fff4, 0x0097fff4, 0x00a7fff4, 0x00b7fff4, + 0x00c7fff4, 0x00d7fff4, 0x00e7fff4, 0x00f7fff4, + 0x0107fff4, 0x0117fff4, 0x0127fff4, 0x0137fff4, + 0x0147fff4, 0x0157fff4, 0x0167fff4, 0x0177fff4, + 0x0187fff4, 0x0197fff4, 0x01a7fff4, 0x01b7fff4, + 0x01c7fff4, 0x01d7fff4, 0x01e7fff4, 0x01f7fff4, + 0x000ffff4, 0x001ffff4, 0x002ffff4, 0x003ffff4, + 0x004ffff4, 0x005ffff4, 0x006ffff4, 0x007ffff4, + 0x008ffff4, 0x009ffff4, 0x00affff4, 0x00bffff4, + 0x00cffff4, 0x00dffff4, 0x00effff4, 0x00fffff4, + 0x010ffff4, 0x011ffff4, 0x012ffff4, 0x013ffff4, + 0x014ffff4, 0x015ffff4, 0x016ffff4, 0x017ffff4, + 0x018ffff4, 0x019ffff4, 0x01affff4, 0x01bffff4, + 0x01cffff4, 0x01dffff4, 0x01effff4, 0x0000bfeb}, + + .lit_table = { + 0x000c, 0x0035, 0x0093, 0x00b5, 0x0075, 0x00f5, 0x0193, 0x0053, + 0x0153, 0x000d, 0x0009, 0x00d3, 0x01d3, 0x008d, 0x0033, 0x0133, + 0x00b3, 0x0147, 0x0347, 0x00c7, 0x02c7, 0x01c7, 0x03c7, 0x0027, + 0x0227, 0x002f, 0x042f, 0x022f, 0x0127, 0x062f, 0x01b3, 0x0073, + 0x001c, 0x0327, 0x0173, 0x00a7, 0x00f3, 0x02a7, 0x01a7, 0x01f3, + 0x004d, 0x000b, 0x03a7, 0x0067, 0x0049, 0x00cd, 0x0029, 0x0267, + 0x002d, 0x00ad, 0x006d, 0x00ed, 0x001d, 0x009d, 0x010b, 0x008b, + 0x005d, 0x018b, 0x004b, 0x014b, 0x00cb, 0x0167, 0x01cb, 0x002b, + 0x00dd, 0x003d, 0x00bd, 0x007d, 0x012b, 0x00ab, 0x01ab, 0x006b, + 0x016b, 0x00fd, 0x00eb, 0x0367, 0x01eb, 0x001b, 0x011b, 0x009b, + 0x0003, 0x00e7, 0x019b, 0x0083, 0x005b, 0x015b, 0x02e7, 0x00db, + 0x01e7, 0x03e7, 0x0017, 0x0217, 0x0117, 0x0317, 0x0097, 0x0297, + 0x01db, 0x0002, 0x0069, 0x0019, 0x0016, 0x0012, 0x0059, 0x0039, + 0x0079, 0x0036, 0x003b, 0x0043, 0x000e, 0x0005, 0x002e, 0x001e, + 0x0045, 0x0197, 0x003e, 0x0001, 0x0021, 0x0011, 0x00c3, 0x0025, + 0x013b, 0x0065, 0x00bb, 0x012f, 0x0397, 0x0057, 0x0257, 0x0157, + 0x01bb, 0x052f, 0x032f, 0x0357, 0x00d7, 0x072f, 0x00af, 0x02d7, + 0x01d7, 0x04af, 0x02af, 0x03d7, 0x06af, 0x01af, 0x05af, 0x0037, + 0x0237, 0x03af, 0x07af, 0x006f, 0x046f, 0x026f, 0x066f, 0x016f, + 0x056f, 0x036f, 0x076f, 0x00ef, 0x04ef, 0x02ef, 0x06ef, 0x01ef, + 0x0137, 0x05ef, 0x03ef, 0x07ef, 0x0337, 0x001f, 0x00b7, 0x041f, + 0x02b7, 0x021f, 0x061f, 0x011f, 0x051f, 0x031f, 0x071f, 0x009f, + 0x01b7, 0x049f, 0x029f, 0x069f, 0x03b7, 0x019f, 0x059f, 0x039f, + 0x079f, 0x005f, 0x045f, 0x025f, 0x065f, 0x0077, 0x015f, 0x0277, + 0x007b, 0x0177, 0x017b, 0x00fb, 0x055f, 0x035f, 0x075f, 0x0377, + 0x00f7, 0x00df, 0x04df, 0x02df, 0x06df, 0x01df, 0x05df, 0x02f7, + 0x01f7, 0x03df, 0x07df, 0x003f, 0x043f, 0x023f, 0x063f, 0x013f, + 0x053f, 0x033f, 0x073f, 0x00bf, 0x04bf, 0x02bf, 0x06bf, 0x01bf, + 0x01fb, 0x03f7, 0x05bf, 0x000f, 0x020f, 0x03bf, 0x07bf, 0x010f, + 0x030f, 0x007f, 0x047f, 0x027f, 0x067f, 0x017f, 0x057f, 0x008f, + 0x0007, 0x028f, 0x037f, 0x018f, 0x038f, 0x077f, 0x00ff, 0x04ff, + 0x0107, 0x004f, 0x02ff, 0x06ff, 0x0087, 0x024f, 0x0187, 0x0023, + 0x1fff}, + + .lit_table_sizes = { + 0x05, 0x08, 0x09, 0x08, 0x08, 0x08, 0x09, 0x09, + 0x09, 0x08, 0x07, 0x09, 0x09, 0x08, 0x09, 0x09, + 0x09, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, + 0x0a, 0x0b, 0x0b, 0x0b, 0x0a, 0x0b, 0x09, 0x09, + 0x05, 0x0a, 0x09, 0x0a, 0x09, 0x0a, 0x0a, 0x09, + 0x08, 0x09, 0x0a, 0x0a, 0x07, 0x08, 0x07, 0x0a, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x09, 0x09, + 0x08, 0x09, 0x09, 0x09, 0x09, 0x0a, 0x09, 0x09, + 0x08, 0x08, 0x08, 0x08, 0x09, 0x09, 0x09, 0x09, + 0x09, 0x08, 0x09, 0x0a, 0x09, 0x09, 0x09, 0x09, + 0x08, 0x0a, 0x09, 0x08, 0x09, 0x09, 0x0a, 0x09, + 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, + 0x09, 0x05, 0x07, 0x07, 0x06, 0x05, 0x07, 0x07, + 0x07, 0x06, 0x09, 0x08, 0x06, 0x07, 0x06, 0x06, + 0x07, 0x0a, 0x06, 0x06, 0x06, 0x06, 0x08, 0x07, + 0x09, 0x07, 0x09, 0x0b, 0x0a, 0x0a, 0x0a, 0x0a, + 0x09, 0x0b, 0x0b, 0x0a, 0x0a, 0x0b, 0x0b, 0x0a, + 0x0a, 0x0b, 0x0b, 0x0a, 0x0b, 0x0b, 0x0b, 0x0a, + 0x0a, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, + 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, + 0x0a, 0x0b, 0x0b, 0x0b, 0x0a, 0x0b, 0x0a, 0x0b, + 0x0a, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, + 0x0a, 0x0b, 0x0b, 0x0b, 0x0a, 0x0b, 0x0b, 0x0b, + 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0a, 0x0b, 0x0a, + 0x09, 0x0a, 0x09, 0x09, 0x0b, 0x0b, 0x0b, 0x0a, + 0x0a, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0a, + 0x0a, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, + 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, + 0x09, 0x0a, 0x0b, 0x0a, 0x0a, 0x0b, 0x0b, 0x0a, + 0x0a, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0a, + 0x09, 0x0a, 0x0b, 0x0a, 0x0a, 0x0b, 0x0b, 0x0b, + 0x09, 0x0a, 0x0b, 0x0b, 0x09, 0x0a, 0x09, 0x08, + 0x0f}, + +#ifndef LONGER_HUFFTABLE + .dcodes = { + 0x007f, 0x01ff, 0x017f, 0x03ff, 0x00ff, 0x003f, 0x00bf, 0x000f, + 0x002f, 0x001f, 0x000b, 0x001b, 0x0004, 0x0007, 0x000c, 0x0002, + 0x000a, 0x0006, 0x000e, 0x0001, 0x0009, 0x0017, 0x0000, 0x0005, + 0x000d, 0x0003, 0x0000, 0x0000, 0x0000, 0x0000}, + + .dcodes_sizes = { + 0x09, 0x0a, 0x09, 0x0a, 0x09, 0x08, 0x08, 0x06, + 0x06, 0x06, 0x05, 0x05, 0x04, 0x05, 0x04, 0x04, + 0x04, 0x04, 0x04, 0x04, 0x04, 0x05, 0x03, 0x04, + 0x04, 0x04, 0x00, 0x00, 0x00, 0x00} +#else + .dcodes = { + 0x0000, 0x0000, 0x0000, 0x0000}, + + .dcodes_sizes = { + 0x00, 0x00, 0x00, 0x00} +#endif +}; + +#else // LARGE_WINDOW + +const uint8_t gzip_hdr[] = { + 0x1f, 0x8b, 0x08, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xff +}; + +const uint32_t gzip_hdr_bytes = 10; +const uint32_t gzip_trl_bytes = 8; + +const uint8_t zlib_hdr[] = { 0x78, 0x01 }; + +const uint32_t zlib_hdr_bytes = 2; +const uint32_t zlib_trl_bytes = 4; + +struct isal_hufftables hufftables_default = { + + .deflate_hdr = { + 0xed, 0xfd, 0x09, 0x80, 0x1c, 0x45, 0xf9, 0xbf, + 0x81, 0xf7, 0x66, 0x37, 0xd7, 0x24, 0x9b, 0x04, + 0x40, 0x45, 0x45, 0x52, 0x04, 0x20, 0x09, 0xcc, + 0x2e, 0xbb, 0x9b, 0x3b, 0x81, 0x24, 0xbb, 0xb9, + 0x21, 0x17, 0x49, 0xb8, 0x04, 0x85, 0xde, 0x99, + 0xde, 0x9d, 0x26, 0x33, 0xd3, 0x43, 0xf7, 0x4c, + 0x36, 0x8b, 0x08, 0xf1, 0x56, 0x51, 0xc1, 0xfb, + 0x56, 0x54, 0xc4, 0x5b, 0x51, 0xf1, 0x16, 0x0d, + 0x89, 0x8a, 0x37, 0x78, 0xdf, 0x1a, 0x45, 0x05, + 0xef, 0x20, 0xaa, 0xa0, 0x90, 0xfd, 0xff, 0x9f, + 0xf7, 0xad, 0x9e, 0xa9, 0xdd, 0xdd, 0x70, 0xfa, + 0xfd, 0xfd, 0xbe, 0xbf, 0xff, 0xdf, 0xcd, 0x66, + 0x67, 0xba, 0xbb, 0xaa, 0xea, 0x7d, 0xdf, 0x7a, + 0xeb, 0xad, 0x4f, 0xbd, 0xf5, 0x56, 0x35}, + + .deflate_hdr_count = 110, + .deflate_hdr_extra_bits = 6, + + .dist_table = { + 0x000007e8, 0x00001fe9, +#ifdef LONGER_HUFFTABLE + 0x000017e8, 0x00003fe9, 0x00000fe9, 0x00002fe9, + 0x000003e8, 0x000013e8, 0x00000be9, 0x00001be9, + 0x00002be9, 0x00003be9, 0x000002e8, 0x00000ae8, + 0x000012e8, 0x00001ae8, 0x000006e9, 0x00000ee9, + 0x000016e9, 0x00001ee9, 0x000026e9, 0x00002ee9, + 0x000036e9, 0x00003ee9, 0x000001e9, 0x000009e9, + 0x000011e9, 0x000019e9, 0x000021e9, 0x000029e9, + 0x000031e9, 0x000039e9, 0x00000129, 0x00000529, + 0x00000929, 0x00000d29, 0x00001129, 0x00001529, + 0x00001929, 0x00001d29, 0x00002129, 0x00002529, + 0x00002929, 0x00002d29, 0x00003129, 0x00003529, + 0x00003929, 0x00003d29, 0x00000329, 0x00000729, + 0x00000b29, 0x00000f29, 0x00001329, 0x00001729, + 0x00001b29, 0x00001f29, 0x00002329, 0x00002729, + 0x00002b29, 0x00002f29, 0x00003329, 0x00003729, + 0x00003b29, 0x00003f29, 0x000000aa, 0x000004aa, + 0x000008aa, 0x00000caa, 0x000010aa, 0x000014aa, + 0x000018aa, 0x00001caa, 0x000020aa, 0x000024aa, + 0x000028aa, 0x00002caa, 0x000030aa, 0x000034aa, + 0x000038aa, 0x00003caa, 0x000040aa, 0x000044aa, + 0x000048aa, 0x00004caa, 0x000050aa, 0x000054aa, + 0x000058aa, 0x00005caa, 0x000060aa, 0x000064aa, + 0x000068aa, 0x00006caa, 0x000070aa, 0x000074aa, + 0x000078aa, 0x00007caa, 0x000002aa, 0x000006aa, + 0x00000aaa, 0x00000eaa, 0x000012aa, 0x000016aa, + 0x00001aaa, 0x00001eaa, 0x000022aa, 0x000026aa, + 0x00002aaa, 0x00002eaa, 0x000032aa, 0x000036aa, + 0x00003aaa, 0x00003eaa, 0x000042aa, 0x000046aa, + 0x00004aaa, 0x00004eaa, 0x000052aa, 0x000056aa, + 0x00005aaa, 0x00005eaa, 0x000062aa, 0x000066aa, + 0x00006aaa, 0x00006eaa, 0x000072aa, 0x000076aa, + 0x00007aaa, 0x00007eaa, 0x0000008a, 0x0000028a, + 0x0000048a, 0x0000068a, 0x0000088a, 0x00000a8a, + 0x00000c8a, 0x00000e8a, 0x0000108a, 0x0000128a, + 0x0000148a, 0x0000168a, 0x0000188a, 0x00001a8a, + 0x00001c8a, 0x00001e8a, 0x0000208a, 0x0000228a, + 0x0000248a, 0x0000268a, 0x0000288a, 0x00002a8a, + 0x00002c8a, 0x00002e8a, 0x0000308a, 0x0000328a, + 0x0000348a, 0x0000368a, 0x0000388a, 0x00003a8a, + 0x00003c8a, 0x00003e8a, 0x0000408a, 0x0000428a, + 0x0000448a, 0x0000468a, 0x0000488a, 0x00004a8a, + 0x00004c8a, 0x00004e8a, 0x0000508a, 0x0000528a, + 0x0000548a, 0x0000568a, 0x0000588a, 0x00005a8a, + 0x00005c8a, 0x00005e8a, 0x0000608a, 0x0000628a, + 0x0000648a, 0x0000668a, 0x0000688a, 0x00006a8a, + 0x00006c8a, 0x00006e8a, 0x0000708a, 0x0000728a, + 0x0000748a, 0x0000768a, 0x0000788a, 0x00007a8a, + 0x00007c8a, 0x00007e8a, 0x0000018a, 0x0000038a, + 0x0000058a, 0x0000078a, 0x0000098a, 0x00000b8a, + 0x00000d8a, 0x00000f8a, 0x0000118a, 0x0000138a, + 0x0000158a, 0x0000178a, 0x0000198a, 0x00001b8a, + 0x00001d8a, 0x00001f8a, 0x0000218a, 0x0000238a, + 0x0000258a, 0x0000278a, 0x0000298a, 0x00002b8a, + 0x00002d8a, 0x00002f8a, 0x0000318a, 0x0000338a, + 0x0000358a, 0x0000378a, 0x0000398a, 0x00003b8a, + 0x00003d8a, 0x00003f8a, 0x0000418a, 0x0000438a, + 0x0000458a, 0x0000478a, 0x0000498a, 0x00004b8a, + 0x00004d8a, 0x00004f8a, 0x0000518a, 0x0000538a, + 0x0000558a, 0x0000578a, 0x0000598a, 0x00005b8a, + 0x00005d8a, 0x00005f8a, 0x0000618a, 0x0000638a, + 0x0000658a, 0x0000678a, 0x0000698a, 0x00006b8a, + 0x00006d8a, 0x00006f8a, 0x0000718a, 0x0000738a, + 0x0000758a, 0x0000778a, 0x0000798a, 0x00007b8a, + 0x00007d8a, 0x00007f8a, 0x0000004b, 0x0000024b, + 0x0000044b, 0x0000064b, 0x0000084b, 0x00000a4b, + 0x00000c4b, 0x00000e4b, 0x0000104b, 0x0000124b, + 0x0000144b, 0x0000164b, 0x0000184b, 0x00001a4b, + 0x00001c4b, 0x00001e4b, 0x0000204b, 0x0000224b, + 0x0000244b, 0x0000264b, 0x0000284b, 0x00002a4b, + 0x00002c4b, 0x00002e4b, 0x0000304b, 0x0000324b, + 0x0000344b, 0x0000364b, 0x0000384b, 0x00003a4b, + 0x00003c4b, 0x00003e4b, 0x0000404b, 0x0000424b, + 0x0000444b, 0x0000464b, 0x0000484b, 0x00004a4b, + 0x00004c4b, 0x00004e4b, 0x0000504b, 0x0000524b, + 0x0000544b, 0x0000564b, 0x0000584b, 0x00005a4b, + 0x00005c4b, 0x00005e4b, 0x0000604b, 0x0000624b, + 0x0000644b, 0x0000664b, 0x0000684b, 0x00006a4b, + 0x00006c4b, 0x00006e4b, 0x0000704b, 0x0000724b, + 0x0000744b, 0x0000764b, 0x0000784b, 0x00007a4b, + 0x00007c4b, 0x00007e4b, 0x0000804b, 0x0000824b, + 0x0000844b, 0x0000864b, 0x0000884b, 0x00008a4b, + 0x00008c4b, 0x00008e4b, 0x0000904b, 0x0000924b, + 0x0000944b, 0x0000964b, 0x0000984b, 0x00009a4b, + 0x00009c4b, 0x00009e4b, 0x0000a04b, 0x0000a24b, + 0x0000a44b, 0x0000a64b, 0x0000a84b, 0x0000aa4b, + 0x0000ac4b, 0x0000ae4b, 0x0000b04b, 0x0000b24b, + 0x0000b44b, 0x0000b64b, 0x0000b84b, 0x0000ba4b, + 0x0000bc4b, 0x0000be4b, 0x0000c04b, 0x0000c24b, + 0x0000c44b, 0x0000c64b, 0x0000c84b, 0x0000ca4b, + 0x0000cc4b, 0x0000ce4b, 0x0000d04b, 0x0000d24b, + 0x0000d44b, 0x0000d64b, 0x0000d84b, 0x0000da4b, + 0x0000dc4b, 0x0000de4b, 0x0000e04b, 0x0000e24b, + 0x0000e44b, 0x0000e64b, 0x0000e84b, 0x0000ea4b, + 0x0000ec4b, 0x0000ee4b, 0x0000f04b, 0x0000f24b, + 0x0000f44b, 0x0000f64b, 0x0000f84b, 0x0000fa4b, + 0x0000fc4b, 0x0000fe4b, 0x000001ac, 0x000005ac, + 0x000009ac, 0x00000dac, 0x000011ac, 0x000015ac, + 0x000019ac, 0x00001dac, 0x000021ac, 0x000025ac, + 0x000029ac, 0x00002dac, 0x000031ac, 0x000035ac, + 0x000039ac, 0x00003dac, 0x000041ac, 0x000045ac, + 0x000049ac, 0x00004dac, 0x000051ac, 0x000055ac, + 0x000059ac, 0x00005dac, 0x000061ac, 0x000065ac, + 0x000069ac, 0x00006dac, 0x000071ac, 0x000075ac, + 0x000079ac, 0x00007dac, 0x000081ac, 0x000085ac, + 0x000089ac, 0x00008dac, 0x000091ac, 0x000095ac, + 0x000099ac, 0x00009dac, 0x0000a1ac, 0x0000a5ac, + 0x0000a9ac, 0x0000adac, 0x0000b1ac, 0x0000b5ac, + 0x0000b9ac, 0x0000bdac, 0x0000c1ac, 0x0000c5ac, + 0x0000c9ac, 0x0000cdac, 0x0000d1ac, 0x0000d5ac, + 0x0000d9ac, 0x0000ddac, 0x0000e1ac, 0x0000e5ac, + 0x0000e9ac, 0x0000edac, 0x0000f1ac, 0x0000f5ac, + 0x0000f9ac, 0x0000fdac, 0x000101ac, 0x000105ac, + 0x000109ac, 0x00010dac, 0x000111ac, 0x000115ac, + 0x000119ac, 0x00011dac, 0x000121ac, 0x000125ac, + 0x000129ac, 0x00012dac, 0x000131ac, 0x000135ac, + 0x000139ac, 0x00013dac, 0x000141ac, 0x000145ac, + 0x000149ac, 0x00014dac, 0x000151ac, 0x000155ac, + 0x000159ac, 0x00015dac, 0x000161ac, 0x000165ac, + 0x000169ac, 0x00016dac, 0x000171ac, 0x000175ac, + 0x000179ac, 0x00017dac, 0x000181ac, 0x000185ac, + 0x000189ac, 0x00018dac, 0x000191ac, 0x000195ac, + 0x000199ac, 0x00019dac, 0x0001a1ac, 0x0001a5ac, + 0x0001a9ac, 0x0001adac, 0x0001b1ac, 0x0001b5ac, + 0x0001b9ac, 0x0001bdac, 0x0001c1ac, 0x0001c5ac, + 0x0001c9ac, 0x0001cdac, 0x0001d1ac, 0x0001d5ac, + 0x0001d9ac, 0x0001ddac, 0x0001e1ac, 0x0001e5ac, + 0x0001e9ac, 0x0001edac, 0x0001f1ac, 0x0001f5ac, + 0x0001f9ac, 0x0001fdac, 0x0000014c, 0x0000034c, + 0x0000054c, 0x0000074c, 0x0000094c, 0x00000b4c, + 0x00000d4c, 0x00000f4c, 0x0000114c, 0x0000134c, + 0x0000154c, 0x0000174c, 0x0000194c, 0x00001b4c, + 0x00001d4c, 0x00001f4c, 0x0000214c, 0x0000234c, + 0x0000254c, 0x0000274c, 0x0000294c, 0x00002b4c, + 0x00002d4c, 0x00002f4c, 0x0000314c, 0x0000334c, + 0x0000354c, 0x0000374c, 0x0000394c, 0x00003b4c, + 0x00003d4c, 0x00003f4c, 0x0000414c, 0x0000434c, + 0x0000454c, 0x0000474c, 0x0000494c, 0x00004b4c, + 0x00004d4c, 0x00004f4c, 0x0000514c, 0x0000534c, + 0x0000554c, 0x0000574c, 0x0000594c, 0x00005b4c, + 0x00005d4c, 0x00005f4c, 0x0000614c, 0x0000634c, + 0x0000654c, 0x0000674c, 0x0000694c, 0x00006b4c, + 0x00006d4c, 0x00006f4c, 0x0000714c, 0x0000734c, + 0x0000754c, 0x0000774c, 0x0000794c, 0x00007b4c, + 0x00007d4c, 0x00007f4c, 0x0000814c, 0x0000834c, + 0x0000854c, 0x0000874c, 0x0000894c, 0x00008b4c, + 0x00008d4c, 0x00008f4c, 0x0000914c, 0x0000934c, + 0x0000954c, 0x0000974c, 0x0000994c, 0x00009b4c, + 0x00009d4c, 0x00009f4c, 0x0000a14c, 0x0000a34c, + 0x0000a54c, 0x0000a74c, 0x0000a94c, 0x0000ab4c, + 0x0000ad4c, 0x0000af4c, 0x0000b14c, 0x0000b34c, + 0x0000b54c, 0x0000b74c, 0x0000b94c, 0x0000bb4c, + 0x0000bd4c, 0x0000bf4c, 0x0000c14c, 0x0000c34c, + 0x0000c54c, 0x0000c74c, 0x0000c94c, 0x0000cb4c, + 0x0000cd4c, 0x0000cf4c, 0x0000d14c, 0x0000d34c, + 0x0000d54c, 0x0000d74c, 0x0000d94c, 0x0000db4c, + 0x0000dd4c, 0x0000df4c, 0x0000e14c, 0x0000e34c, + 0x0000e54c, 0x0000e74c, 0x0000e94c, 0x0000eb4c, + 0x0000ed4c, 0x0000ef4c, 0x0000f14c, 0x0000f34c, + 0x0000f54c, 0x0000f74c, 0x0000f94c, 0x0000fb4c, + 0x0000fd4c, 0x0000ff4c, 0x0001014c, 0x0001034c, + 0x0001054c, 0x0001074c, 0x0001094c, 0x00010b4c, + 0x00010d4c, 0x00010f4c, 0x0001114c, 0x0001134c, + 0x0001154c, 0x0001174c, 0x0001194c, 0x00011b4c, + 0x00011d4c, 0x00011f4c, 0x0001214c, 0x0001234c, + 0x0001254c, 0x0001274c, 0x0001294c, 0x00012b4c, + 0x00012d4c, 0x00012f4c, 0x0001314c, 0x0001334c, + 0x0001354c, 0x0001374c, 0x0001394c, 0x00013b4c, + 0x00013d4c, 0x00013f4c, 0x0001414c, 0x0001434c, + 0x0001454c, 0x0001474c, 0x0001494c, 0x00014b4c, + 0x00014d4c, 0x00014f4c, 0x0001514c, 0x0001534c, + 0x0001554c, 0x0001574c, 0x0001594c, 0x00015b4c, + 0x00015d4c, 0x00015f4c, 0x0001614c, 0x0001634c, + 0x0001654c, 0x0001674c, 0x0001694c, 0x00016b4c, + 0x00016d4c, 0x00016f4c, 0x0001714c, 0x0001734c, + 0x0001754c, 0x0001774c, 0x0001794c, 0x00017b4c, + 0x00017d4c, 0x00017f4c, 0x0001814c, 0x0001834c, + 0x0001854c, 0x0001874c, 0x0001894c, 0x00018b4c, + 0x00018d4c, 0x00018f4c, 0x0001914c, 0x0001934c, + 0x0001954c, 0x0001974c, 0x0001994c, 0x00019b4c, + 0x00019d4c, 0x00019f4c, 0x0001a14c, 0x0001a34c, + 0x0001a54c, 0x0001a74c, 0x0001a94c, 0x0001ab4c, + 0x0001ad4c, 0x0001af4c, 0x0001b14c, 0x0001b34c, + 0x0001b54c, 0x0001b74c, 0x0001b94c, 0x0001bb4c, + 0x0001bd4c, 0x0001bf4c, 0x0001c14c, 0x0001c34c, + 0x0001c54c, 0x0001c74c, 0x0001c94c, 0x0001cb4c, + 0x0001cd4c, 0x0001cf4c, 0x0001d14c, 0x0001d34c, + 0x0001d54c, 0x0001d74c, 0x0001d94c, 0x0001db4c, + 0x0001dd4c, 0x0001df4c, 0x0001e14c, 0x0001e34c, + 0x0001e54c, 0x0001e74c, 0x0001e94c, 0x0001eb4c, + 0x0001ed4c, 0x0001ef4c, 0x0001f14c, 0x0001f34c, + 0x0001f54c, 0x0001f74c, 0x0001f94c, 0x0001fb4c, + 0x0001fd4c, 0x0001ff4c, 0x000003ad, 0x000007ad, + 0x00000bad, 0x00000fad, 0x000013ad, 0x000017ad, + 0x00001bad, 0x00001fad, 0x000023ad, 0x000027ad, + 0x00002bad, 0x00002fad, 0x000033ad, 0x000037ad, + 0x00003bad, 0x00003fad, 0x000043ad, 0x000047ad, + 0x00004bad, 0x00004fad, 0x000053ad, 0x000057ad, + 0x00005bad, 0x00005fad, 0x000063ad, 0x000067ad, + 0x00006bad, 0x00006fad, 0x000073ad, 0x000077ad, + 0x00007bad, 0x00007fad, 0x000083ad, 0x000087ad, + 0x00008bad, 0x00008fad, 0x000093ad, 0x000097ad, + 0x00009bad, 0x00009fad, 0x0000a3ad, 0x0000a7ad, + 0x0000abad, 0x0000afad, 0x0000b3ad, 0x0000b7ad, + 0x0000bbad, 0x0000bfad, 0x0000c3ad, 0x0000c7ad, + 0x0000cbad, 0x0000cfad, 0x0000d3ad, 0x0000d7ad, + 0x0000dbad, 0x0000dfad, 0x0000e3ad, 0x0000e7ad, + 0x0000ebad, 0x0000efad, 0x0000f3ad, 0x0000f7ad, + 0x0000fbad, 0x0000ffad, 0x000103ad, 0x000107ad, + 0x00010bad, 0x00010fad, 0x000113ad, 0x000117ad, + 0x00011bad, 0x00011fad, 0x000123ad, 0x000127ad, + 0x00012bad, 0x00012fad, 0x000133ad, 0x000137ad, + 0x00013bad, 0x00013fad, 0x000143ad, 0x000147ad, + 0x00014bad, 0x00014fad, 0x000153ad, 0x000157ad, + 0x00015bad, 0x00015fad, 0x000163ad, 0x000167ad, + 0x00016bad, 0x00016fad, 0x000173ad, 0x000177ad, + 0x00017bad, 0x00017fad, 0x000183ad, 0x000187ad, + 0x00018bad, 0x00018fad, 0x000193ad, 0x000197ad, + 0x00019bad, 0x00019fad, 0x0001a3ad, 0x0001a7ad, + 0x0001abad, 0x0001afad, 0x0001b3ad, 0x0001b7ad, + 0x0001bbad, 0x0001bfad, 0x0001c3ad, 0x0001c7ad, + 0x0001cbad, 0x0001cfad, 0x0001d3ad, 0x0001d7ad, + 0x0001dbad, 0x0001dfad, 0x0001e3ad, 0x0001e7ad, + 0x0001ebad, 0x0001efad, 0x0001f3ad, 0x0001f7ad, + 0x0001fbad, 0x0001ffad, 0x000203ad, 0x000207ad, + 0x00020bad, 0x00020fad, 0x000213ad, 0x000217ad, + 0x00021bad, 0x00021fad, 0x000223ad, 0x000227ad, + 0x00022bad, 0x00022fad, 0x000233ad, 0x000237ad, + 0x00023bad, 0x00023fad, 0x000243ad, 0x000247ad, + 0x00024bad, 0x00024fad, 0x000253ad, 0x000257ad, + 0x00025bad, 0x00025fad, 0x000263ad, 0x000267ad, + 0x00026bad, 0x00026fad, 0x000273ad, 0x000277ad, + 0x00027bad, 0x00027fad, 0x000283ad, 0x000287ad, + 0x00028bad, 0x00028fad, 0x000293ad, 0x000297ad, + 0x00029bad, 0x00029fad, 0x0002a3ad, 0x0002a7ad, + 0x0002abad, 0x0002afad, 0x0002b3ad, 0x0002b7ad, + 0x0002bbad, 0x0002bfad, 0x0002c3ad, 0x0002c7ad, + 0x0002cbad, 0x0002cfad, 0x0002d3ad, 0x0002d7ad, + 0x0002dbad, 0x0002dfad, 0x0002e3ad, 0x0002e7ad, + 0x0002ebad, 0x0002efad, 0x0002f3ad, 0x0002f7ad, + 0x0002fbad, 0x0002ffad, 0x000303ad, 0x000307ad, + 0x00030bad, 0x00030fad, 0x000313ad, 0x000317ad, + 0x00031bad, 0x00031fad, 0x000323ad, 0x000327ad, + 0x00032bad, 0x00032fad, 0x000333ad, 0x000337ad, + 0x00033bad, 0x00033fad, 0x000343ad, 0x000347ad, + 0x00034bad, 0x00034fad, 0x000353ad, 0x000357ad, + 0x00035bad, 0x00035fad, 0x000363ad, 0x000367ad, + 0x00036bad, 0x00036fad, 0x000373ad, 0x000377ad, + 0x00037bad, 0x00037fad, 0x000383ad, 0x000387ad, + 0x00038bad, 0x00038fad, 0x000393ad, 0x000397ad, + 0x00039bad, 0x00039fad, 0x0003a3ad, 0x0003a7ad, + 0x0003abad, 0x0003afad, 0x0003b3ad, 0x0003b7ad, + 0x0003bbad, 0x0003bfad, 0x0003c3ad, 0x0003c7ad, + 0x0003cbad, 0x0003cfad, 0x0003d3ad, 0x0003d7ad, + 0x0003dbad, 0x0003dfad, 0x0003e3ad, 0x0003e7ad, + 0x0003ebad, 0x0003efad, 0x0003f3ad, 0x0003f7ad, + 0x0003fbad, 0x0003ffad, 0x000000cd, 0x000002cd, + 0x000004cd, 0x000006cd, 0x000008cd, 0x00000acd, + 0x00000ccd, 0x00000ecd, 0x000010cd, 0x000012cd, + 0x000014cd, 0x000016cd, 0x000018cd, 0x00001acd, + 0x00001ccd, 0x00001ecd, 0x000020cd, 0x000022cd, + 0x000024cd, 0x000026cd, 0x000028cd, 0x00002acd, + 0x00002ccd, 0x00002ecd, 0x000030cd, 0x000032cd, + 0x000034cd, 0x000036cd, 0x000038cd, 0x00003acd, + 0x00003ccd, 0x00003ecd, 0x000040cd, 0x000042cd, + 0x000044cd, 0x000046cd, 0x000048cd, 0x00004acd, + 0x00004ccd, 0x00004ecd, 0x000050cd, 0x000052cd, + 0x000054cd, 0x000056cd, 0x000058cd, 0x00005acd, + 0x00005ccd, 0x00005ecd, 0x000060cd, 0x000062cd, + 0x000064cd, 0x000066cd, 0x000068cd, 0x00006acd, + 0x00006ccd, 0x00006ecd, 0x000070cd, 0x000072cd, + 0x000074cd, 0x000076cd, 0x000078cd, 0x00007acd, + 0x00007ccd, 0x00007ecd, 0x000080cd, 0x000082cd, + 0x000084cd, 0x000086cd, 0x000088cd, 0x00008acd, + 0x00008ccd, 0x00008ecd, 0x000090cd, 0x000092cd, + 0x000094cd, 0x000096cd, 0x000098cd, 0x00009acd, + 0x00009ccd, 0x00009ecd, 0x0000a0cd, 0x0000a2cd, + 0x0000a4cd, 0x0000a6cd, 0x0000a8cd, 0x0000aacd, + 0x0000accd, 0x0000aecd, 0x0000b0cd, 0x0000b2cd, + 0x0000b4cd, 0x0000b6cd, 0x0000b8cd, 0x0000bacd, + 0x0000bccd, 0x0000becd, 0x0000c0cd, 0x0000c2cd, + 0x0000c4cd, 0x0000c6cd, 0x0000c8cd, 0x0000cacd, + 0x0000cccd, 0x0000cecd, 0x0000d0cd, 0x0000d2cd, + 0x0000d4cd, 0x0000d6cd, 0x0000d8cd, 0x0000dacd, + 0x0000dccd, 0x0000decd, 0x0000e0cd, 0x0000e2cd, + 0x0000e4cd, 0x0000e6cd, 0x0000e8cd, 0x0000eacd, + 0x0000eccd, 0x0000eecd, 0x0000f0cd, 0x0000f2cd, + 0x0000f4cd, 0x0000f6cd, 0x0000f8cd, 0x0000facd, + 0x0000fccd, 0x0000fecd, 0x000100cd, 0x000102cd, + 0x000104cd, 0x000106cd, 0x000108cd, 0x00010acd, + 0x00010ccd, 0x00010ecd, 0x000110cd, 0x000112cd, + 0x000114cd, 0x000116cd, 0x000118cd, 0x00011acd, + 0x00011ccd, 0x00011ecd, 0x000120cd, 0x000122cd, + 0x000124cd, 0x000126cd, 0x000128cd, 0x00012acd, + 0x00012ccd, 0x00012ecd, 0x000130cd, 0x000132cd, + 0x000134cd, 0x000136cd, 0x000138cd, 0x00013acd, + 0x00013ccd, 0x00013ecd, 0x000140cd, 0x000142cd, + 0x000144cd, 0x000146cd, 0x000148cd, 0x00014acd, + 0x00014ccd, 0x00014ecd, 0x000150cd, 0x000152cd, + 0x000154cd, 0x000156cd, 0x000158cd, 0x00015acd, + 0x00015ccd, 0x00015ecd, 0x000160cd, 0x000162cd, + 0x000164cd, 0x000166cd, 0x000168cd, 0x00016acd, + 0x00016ccd, 0x00016ecd, 0x000170cd, 0x000172cd, + 0x000174cd, 0x000176cd, 0x000178cd, 0x00017acd, + 0x00017ccd, 0x00017ecd, 0x000180cd, 0x000182cd, + 0x000184cd, 0x000186cd, 0x000188cd, 0x00018acd, + 0x00018ccd, 0x00018ecd, 0x000190cd, 0x000192cd, + 0x000194cd, 0x000196cd, 0x000198cd, 0x00019acd, + 0x00019ccd, 0x00019ecd, 0x0001a0cd, 0x0001a2cd, + 0x0001a4cd, 0x0001a6cd, 0x0001a8cd, 0x0001aacd, + 0x0001accd, 0x0001aecd, 0x0001b0cd, 0x0001b2cd, + 0x0001b4cd, 0x0001b6cd, 0x0001b8cd, 0x0001bacd, + 0x0001bccd, 0x0001becd, 0x0001c0cd, 0x0001c2cd, + 0x0001c4cd, 0x0001c6cd, 0x0001c8cd, 0x0001cacd, + 0x0001cccd, 0x0001cecd, 0x0001d0cd, 0x0001d2cd, + 0x0001d4cd, 0x0001d6cd, 0x0001d8cd, 0x0001dacd, + 0x0001dccd, 0x0001decd, 0x0001e0cd, 0x0001e2cd, + 0x0001e4cd, 0x0001e6cd, 0x0001e8cd, 0x0001eacd, + 0x0001eccd, 0x0001eecd, 0x0001f0cd, 0x0001f2cd, + 0x0001f4cd, 0x0001f6cd, 0x0001f8cd, 0x0001facd, + 0x0001fccd, 0x0001fecd, 0x000200cd, 0x000202cd, + 0x000204cd, 0x000206cd, 0x000208cd, 0x00020acd, + 0x00020ccd, 0x00020ecd, 0x000210cd, 0x000212cd, + 0x000214cd, 0x000216cd, 0x000218cd, 0x00021acd, + 0x00021ccd, 0x00021ecd, 0x000220cd, 0x000222cd, + 0x000224cd, 0x000226cd, 0x000228cd, 0x00022acd, + 0x00022ccd, 0x00022ecd, 0x000230cd, 0x000232cd, + 0x000234cd, 0x000236cd, 0x000238cd, 0x00023acd, + 0x00023ccd, 0x00023ecd, 0x000240cd, 0x000242cd, + 0x000244cd, 0x000246cd, 0x000248cd, 0x00024acd, + 0x00024ccd, 0x00024ecd, 0x000250cd, 0x000252cd, + 0x000254cd, 0x000256cd, 0x000258cd, 0x00025acd, + 0x00025ccd, 0x00025ecd, 0x000260cd, 0x000262cd, + 0x000264cd, 0x000266cd, 0x000268cd, 0x00026acd, + 0x00026ccd, 0x00026ecd, 0x000270cd, 0x000272cd, + 0x000274cd, 0x000276cd, 0x000278cd, 0x00027acd, + 0x00027ccd, 0x00027ecd, 0x000280cd, 0x000282cd, + 0x000284cd, 0x000286cd, 0x000288cd, 0x00028acd, + 0x00028ccd, 0x00028ecd, 0x000290cd, 0x000292cd, + 0x000294cd, 0x000296cd, 0x000298cd, 0x00029acd, + 0x00029ccd, 0x00029ecd, 0x0002a0cd, 0x0002a2cd, + 0x0002a4cd, 0x0002a6cd, 0x0002a8cd, 0x0002aacd, + 0x0002accd, 0x0002aecd, 0x0002b0cd, 0x0002b2cd, + 0x0002b4cd, 0x0002b6cd, 0x0002b8cd, 0x0002bacd, + 0x0002bccd, 0x0002becd, 0x0002c0cd, 0x0002c2cd, + 0x0002c4cd, 0x0002c6cd, 0x0002c8cd, 0x0002cacd, + 0x0002cccd, 0x0002cecd, 0x0002d0cd, 0x0002d2cd, + 0x0002d4cd, 0x0002d6cd, 0x0002d8cd, 0x0002dacd, + 0x0002dccd, 0x0002decd, 0x0002e0cd, 0x0002e2cd, + 0x0002e4cd, 0x0002e6cd, 0x0002e8cd, 0x0002eacd, + 0x0002eccd, 0x0002eecd, 0x0002f0cd, 0x0002f2cd, + 0x0002f4cd, 0x0002f6cd, 0x0002f8cd, 0x0002facd, + 0x0002fccd, 0x0002fecd, 0x000300cd, 0x000302cd, + 0x000304cd, 0x000306cd, 0x000308cd, 0x00030acd, + 0x00030ccd, 0x00030ecd, 0x000310cd, 0x000312cd, + 0x000314cd, 0x000316cd, 0x000318cd, 0x00031acd, + 0x00031ccd, 0x00031ecd, 0x000320cd, 0x000322cd, + 0x000324cd, 0x000326cd, 0x000328cd, 0x00032acd, + 0x00032ccd, 0x00032ecd, 0x000330cd, 0x000332cd, + 0x000334cd, 0x000336cd, 0x000338cd, 0x00033acd, + 0x00033ccd, 0x00033ecd, 0x000340cd, 0x000342cd, + 0x000344cd, 0x000346cd, 0x000348cd, 0x00034acd, + 0x00034ccd, 0x00034ecd, 0x000350cd, 0x000352cd, + 0x000354cd, 0x000356cd, 0x000358cd, 0x00035acd, + 0x00035ccd, 0x00035ecd, 0x000360cd, 0x000362cd, + 0x000364cd, 0x000366cd, 0x000368cd, 0x00036acd, + 0x00036ccd, 0x00036ecd, 0x000370cd, 0x000372cd, + 0x000374cd, 0x000376cd, 0x000378cd, 0x00037acd, + 0x00037ccd, 0x00037ecd, 0x000380cd, 0x000382cd, + 0x000384cd, 0x000386cd, 0x000388cd, 0x00038acd, + 0x00038ccd, 0x00038ecd, 0x000390cd, 0x000392cd, + 0x000394cd, 0x000396cd, 0x000398cd, 0x00039acd, + 0x00039ccd, 0x00039ecd, 0x0003a0cd, 0x0003a2cd, + 0x0003a4cd, 0x0003a6cd, 0x0003a8cd, 0x0003aacd, + 0x0003accd, 0x0003aecd, 0x0003b0cd, 0x0003b2cd, + 0x0003b4cd, 0x0003b6cd, 0x0003b8cd, 0x0003bacd, + 0x0003bccd, 0x0003becd, 0x0003c0cd, 0x0003c2cd, + 0x0003c4cd, 0x0003c6cd, 0x0003c8cd, 0x0003cacd, + 0x0003cccd, 0x0003cecd, 0x0003d0cd, 0x0003d2cd, + 0x0003d4cd, 0x0003d6cd, 0x0003d8cd, 0x0003dacd, + 0x0003dccd, 0x0003decd, 0x0003e0cd, 0x0003e2cd, + 0x0003e4cd, 0x0003e6cd, 0x0003e8cd, 0x0003eacd, + 0x0003eccd, 0x0003eecd, 0x0003f0cd, 0x0003f2cd, + 0x0003f4cd, 0x0003f6cd, 0x0003f8cd, 0x0003facd, + 0x0003fccd, 0x0003fecd, 0x0000006e, 0x0000046e, + 0x0000086e, 0x00000c6e, 0x0000106e, 0x0000146e, + 0x0000186e, 0x00001c6e, 0x0000206e, 0x0000246e, + 0x0000286e, 0x00002c6e, 0x0000306e, 0x0000346e, + 0x0000386e, 0x00003c6e, 0x0000406e, 0x0000446e, + 0x0000486e, 0x00004c6e, 0x0000506e, 0x0000546e, + 0x0000586e, 0x00005c6e, 0x0000606e, 0x0000646e, + 0x0000686e, 0x00006c6e, 0x0000706e, 0x0000746e, + 0x0000786e, 0x00007c6e, 0x0000806e, 0x0000846e, + 0x0000886e, 0x00008c6e, 0x0000906e, 0x0000946e, + 0x0000986e, 0x00009c6e, 0x0000a06e, 0x0000a46e, + 0x0000a86e, 0x0000ac6e, 0x0000b06e, 0x0000b46e, + 0x0000b86e, 0x0000bc6e, 0x0000c06e, 0x0000c46e, + 0x0000c86e, 0x0000cc6e, 0x0000d06e, 0x0000d46e, + 0x0000d86e, 0x0000dc6e, 0x0000e06e, 0x0000e46e, + 0x0000e86e, 0x0000ec6e, 0x0000f06e, 0x0000f46e, + 0x0000f86e, 0x0000fc6e, 0x0001006e, 0x0001046e, + 0x0001086e, 0x00010c6e, 0x0001106e, 0x0001146e, + 0x0001186e, 0x00011c6e, 0x0001206e, 0x0001246e, + 0x0001286e, 0x00012c6e, 0x0001306e, 0x0001346e, + 0x0001386e, 0x00013c6e, 0x0001406e, 0x0001446e, + 0x0001486e, 0x00014c6e, 0x0001506e, 0x0001546e, + 0x0001586e, 0x00015c6e, 0x0001606e, 0x0001646e, + 0x0001686e, 0x00016c6e, 0x0001706e, 0x0001746e, + 0x0001786e, 0x00017c6e, 0x0001806e, 0x0001846e, + 0x0001886e, 0x00018c6e, 0x0001906e, 0x0001946e, + 0x0001986e, 0x00019c6e, 0x0001a06e, 0x0001a46e, + 0x0001a86e, 0x0001ac6e, 0x0001b06e, 0x0001b46e, + 0x0001b86e, 0x0001bc6e, 0x0001c06e, 0x0001c46e, + 0x0001c86e, 0x0001cc6e, 0x0001d06e, 0x0001d46e, + 0x0001d86e, 0x0001dc6e, 0x0001e06e, 0x0001e46e, + 0x0001e86e, 0x0001ec6e, 0x0001f06e, 0x0001f46e, + 0x0001f86e, 0x0001fc6e, 0x0002006e, 0x0002046e, + 0x0002086e, 0x00020c6e, 0x0002106e, 0x0002146e, + 0x0002186e, 0x00021c6e, 0x0002206e, 0x0002246e, + 0x0002286e, 0x00022c6e, 0x0002306e, 0x0002346e, + 0x0002386e, 0x00023c6e, 0x0002406e, 0x0002446e, + 0x0002486e, 0x00024c6e, 0x0002506e, 0x0002546e, + 0x0002586e, 0x00025c6e, 0x0002606e, 0x0002646e, + 0x0002686e, 0x00026c6e, 0x0002706e, 0x0002746e, + 0x0002786e, 0x00027c6e, 0x0002806e, 0x0002846e, + 0x0002886e, 0x00028c6e, 0x0002906e, 0x0002946e, + 0x0002986e, 0x00029c6e, 0x0002a06e, 0x0002a46e, + 0x0002a86e, 0x0002ac6e, 0x0002b06e, 0x0002b46e, + 0x0002b86e, 0x0002bc6e, 0x0002c06e, 0x0002c46e, + 0x0002c86e, 0x0002cc6e, 0x0002d06e, 0x0002d46e, + 0x0002d86e, 0x0002dc6e, 0x0002e06e, 0x0002e46e, + 0x0002e86e, 0x0002ec6e, 0x0002f06e, 0x0002f46e, + 0x0002f86e, 0x0002fc6e, 0x0003006e, 0x0003046e, + 0x0003086e, 0x00030c6e, 0x0003106e, 0x0003146e, + 0x0003186e, 0x00031c6e, 0x0003206e, 0x0003246e, + 0x0003286e, 0x00032c6e, 0x0003306e, 0x0003346e, + 0x0003386e, 0x00033c6e, 0x0003406e, 0x0003446e, + 0x0003486e, 0x00034c6e, 0x0003506e, 0x0003546e, + 0x0003586e, 0x00035c6e, 0x0003606e, 0x0003646e, + 0x0003686e, 0x00036c6e, 0x0003706e, 0x0003746e, + 0x0003786e, 0x00037c6e, 0x0003806e, 0x0003846e, + 0x0003886e, 0x00038c6e, 0x0003906e, 0x0003946e, + 0x0003986e, 0x00039c6e, 0x0003a06e, 0x0003a46e, + 0x0003a86e, 0x0003ac6e, 0x0003b06e, 0x0003b46e, + 0x0003b86e, 0x0003bc6e, 0x0003c06e, 0x0003c46e, + 0x0003c86e, 0x0003cc6e, 0x0003d06e, 0x0003d46e, + 0x0003d86e, 0x0003dc6e, 0x0003e06e, 0x0003e46e, + 0x0003e86e, 0x0003ec6e, 0x0003f06e, 0x0003f46e, + 0x0003f86e, 0x0003fc6e, 0x0004006e, 0x0004046e, + 0x0004086e, 0x00040c6e, 0x0004106e, 0x0004146e, + 0x0004186e, 0x00041c6e, 0x0004206e, 0x0004246e, + 0x0004286e, 0x00042c6e, 0x0004306e, 0x0004346e, + 0x0004386e, 0x00043c6e, 0x0004406e, 0x0004446e, + 0x0004486e, 0x00044c6e, 0x0004506e, 0x0004546e, + 0x0004586e, 0x00045c6e, 0x0004606e, 0x0004646e, + 0x0004686e, 0x00046c6e, 0x0004706e, 0x0004746e, + 0x0004786e, 0x00047c6e, 0x0004806e, 0x0004846e, + 0x0004886e, 0x00048c6e, 0x0004906e, 0x0004946e, + 0x0004986e, 0x00049c6e, 0x0004a06e, 0x0004a46e, + 0x0004a86e, 0x0004ac6e, 0x0004b06e, 0x0004b46e, + 0x0004b86e, 0x0004bc6e, 0x0004c06e, 0x0004c46e, + 0x0004c86e, 0x0004cc6e, 0x0004d06e, 0x0004d46e, + 0x0004d86e, 0x0004dc6e, 0x0004e06e, 0x0004e46e, + 0x0004e86e, 0x0004ec6e, 0x0004f06e, 0x0004f46e, + 0x0004f86e, 0x0004fc6e, 0x0005006e, 0x0005046e, + 0x0005086e, 0x00050c6e, 0x0005106e, 0x0005146e, + 0x0005186e, 0x00051c6e, 0x0005206e, 0x0005246e, + 0x0005286e, 0x00052c6e, 0x0005306e, 0x0005346e, + 0x0005386e, 0x00053c6e, 0x0005406e, 0x0005446e, + 0x0005486e, 0x00054c6e, 0x0005506e, 0x0005546e, + 0x0005586e, 0x00055c6e, 0x0005606e, 0x0005646e, + 0x0005686e, 0x00056c6e, 0x0005706e, 0x0005746e, + 0x0005786e, 0x00057c6e, 0x0005806e, 0x0005846e, + 0x0005886e, 0x00058c6e, 0x0005906e, 0x0005946e, + 0x0005986e, 0x00059c6e, 0x0005a06e, 0x0005a46e, + 0x0005a86e, 0x0005ac6e, 0x0005b06e, 0x0005b46e, + 0x0005b86e, 0x0005bc6e, 0x0005c06e, 0x0005c46e, + 0x0005c86e, 0x0005cc6e, 0x0005d06e, 0x0005d46e, + 0x0005d86e, 0x0005dc6e, 0x0005e06e, 0x0005e46e, + 0x0005e86e, 0x0005ec6e, 0x0005f06e, 0x0005f46e, + 0x0005f86e, 0x0005fc6e, 0x0006006e, 0x0006046e, + 0x0006086e, 0x00060c6e, 0x0006106e, 0x0006146e, + 0x0006186e, 0x00061c6e, 0x0006206e, 0x0006246e, + 0x0006286e, 0x00062c6e, 0x0006306e, 0x0006346e, + 0x0006386e, 0x00063c6e, 0x0006406e, 0x0006446e, + 0x0006486e, 0x00064c6e, 0x0006506e, 0x0006546e, + 0x0006586e, 0x00065c6e, 0x0006606e, 0x0006646e, + 0x0006686e, 0x00066c6e, 0x0006706e, 0x0006746e, + 0x0006786e, 0x00067c6e, 0x0006806e, 0x0006846e, + 0x0006886e, 0x00068c6e, 0x0006906e, 0x0006946e, + 0x0006986e, 0x00069c6e, 0x0006a06e, 0x0006a46e, + 0x0006a86e, 0x0006ac6e, 0x0006b06e, 0x0006b46e, + 0x0006b86e, 0x0006bc6e, 0x0006c06e, 0x0006c46e, + 0x0006c86e, 0x0006cc6e, 0x0006d06e, 0x0006d46e, + 0x0006d86e, 0x0006dc6e, 0x0006e06e, 0x0006e46e, + 0x0006e86e, 0x0006ec6e, 0x0006f06e, 0x0006f46e, + 0x0006f86e, 0x0006fc6e, 0x0007006e, 0x0007046e, + 0x0007086e, 0x00070c6e, 0x0007106e, 0x0007146e, + 0x0007186e, 0x00071c6e, 0x0007206e, 0x0007246e, + 0x0007286e, 0x00072c6e, 0x0007306e, 0x0007346e, + 0x0007386e, 0x00073c6e, 0x0007406e, 0x0007446e, + 0x0007486e, 0x00074c6e, 0x0007506e, 0x0007546e, + 0x0007586e, 0x00075c6e, 0x0007606e, 0x0007646e, + 0x0007686e, 0x00076c6e, 0x0007706e, 0x0007746e, + 0x0007786e, 0x00077c6e, 0x0007806e, 0x0007846e, + 0x0007886e, 0x00078c6e, 0x0007906e, 0x0007946e, + 0x0007986e, 0x00079c6e, 0x0007a06e, 0x0007a46e, + 0x0007a86e, 0x0007ac6e, 0x0007b06e, 0x0007b46e, + 0x0007b86e, 0x0007bc6e, 0x0007c06e, 0x0007c46e, + 0x0007c86e, 0x0007cc6e, 0x0007d06e, 0x0007d46e, + 0x0007d86e, 0x0007dc6e, 0x0007e06e, 0x0007e46e, + 0x0007e86e, 0x0007ec6e, 0x0007f06e, 0x0007f46e, + 0x0007f86e, 0x0007fc6e, 0x0000000d, 0x0000010d, + 0x0000020d, 0x0000030d, 0x0000040d, 0x0000050d, + 0x0000060d, 0x0000070d, 0x0000080d, 0x0000090d, + 0x00000a0d, 0x00000b0d, 0x00000c0d, 0x00000d0d, + 0x00000e0d, 0x00000f0d, 0x0000100d, 0x0000110d, + 0x0000120d, 0x0000130d, 0x0000140d, 0x0000150d, + 0x0000160d, 0x0000170d, 0x0000180d, 0x0000190d, + 0x00001a0d, 0x00001b0d, 0x00001c0d, 0x00001d0d, + 0x00001e0d, 0x00001f0d, 0x0000200d, 0x0000210d, + 0x0000220d, 0x0000230d, 0x0000240d, 0x0000250d, + 0x0000260d, 0x0000270d, 0x0000280d, 0x0000290d, + 0x00002a0d, 0x00002b0d, 0x00002c0d, 0x00002d0d, + 0x00002e0d, 0x00002f0d, 0x0000300d, 0x0000310d, + 0x0000320d, 0x0000330d, 0x0000340d, 0x0000350d, + 0x0000360d, 0x0000370d, 0x0000380d, 0x0000390d, + 0x00003a0d, 0x00003b0d, 0x00003c0d, 0x00003d0d, + 0x00003e0d, 0x00003f0d, 0x0000400d, 0x0000410d, + 0x0000420d, 0x0000430d, 0x0000440d, 0x0000450d, + 0x0000460d, 0x0000470d, 0x0000480d, 0x0000490d, + 0x00004a0d, 0x00004b0d, 0x00004c0d, 0x00004d0d, + 0x00004e0d, 0x00004f0d, 0x0000500d, 0x0000510d, + 0x0000520d, 0x0000530d, 0x0000540d, 0x0000550d, + 0x0000560d, 0x0000570d, 0x0000580d, 0x0000590d, + 0x00005a0d, 0x00005b0d, 0x00005c0d, 0x00005d0d, + 0x00005e0d, 0x00005f0d, 0x0000600d, 0x0000610d, + 0x0000620d, 0x0000630d, 0x0000640d, 0x0000650d, + 0x0000660d, 0x0000670d, 0x0000680d, 0x0000690d, + 0x00006a0d, 0x00006b0d, 0x00006c0d, 0x00006d0d, + 0x00006e0d, 0x00006f0d, 0x0000700d, 0x0000710d, + 0x0000720d, 0x0000730d, 0x0000740d, 0x0000750d, + 0x0000760d, 0x0000770d, 0x0000780d, 0x0000790d, + 0x00007a0d, 0x00007b0d, 0x00007c0d, 0x00007d0d, + 0x00007e0d, 0x00007f0d, 0x0000800d, 0x0000810d, + 0x0000820d, 0x0000830d, 0x0000840d, 0x0000850d, + 0x0000860d, 0x0000870d, 0x0000880d, 0x0000890d, + 0x00008a0d, 0x00008b0d, 0x00008c0d, 0x00008d0d, + 0x00008e0d, 0x00008f0d, 0x0000900d, 0x0000910d, + 0x0000920d, 0x0000930d, 0x0000940d, 0x0000950d, + 0x0000960d, 0x0000970d, 0x0000980d, 0x0000990d, + 0x00009a0d, 0x00009b0d, 0x00009c0d, 0x00009d0d, + 0x00009e0d, 0x00009f0d, 0x0000a00d, 0x0000a10d, + 0x0000a20d, 0x0000a30d, 0x0000a40d, 0x0000a50d, + 0x0000a60d, 0x0000a70d, 0x0000a80d, 0x0000a90d, + 0x0000aa0d, 0x0000ab0d, 0x0000ac0d, 0x0000ad0d, + 0x0000ae0d, 0x0000af0d, 0x0000b00d, 0x0000b10d, + 0x0000b20d, 0x0000b30d, 0x0000b40d, 0x0000b50d, + 0x0000b60d, 0x0000b70d, 0x0000b80d, 0x0000b90d, + 0x0000ba0d, 0x0000bb0d, 0x0000bc0d, 0x0000bd0d, + 0x0000be0d, 0x0000bf0d, 0x0000c00d, 0x0000c10d, + 0x0000c20d, 0x0000c30d, 0x0000c40d, 0x0000c50d, + 0x0000c60d, 0x0000c70d, 0x0000c80d, 0x0000c90d, + 0x0000ca0d, 0x0000cb0d, 0x0000cc0d, 0x0000cd0d, + 0x0000ce0d, 0x0000cf0d, 0x0000d00d, 0x0000d10d, + 0x0000d20d, 0x0000d30d, 0x0000d40d, 0x0000d50d, + 0x0000d60d, 0x0000d70d, 0x0000d80d, 0x0000d90d, + 0x0000da0d, 0x0000db0d, 0x0000dc0d, 0x0000dd0d, + 0x0000de0d, 0x0000df0d, 0x0000e00d, 0x0000e10d, + 0x0000e20d, 0x0000e30d, 0x0000e40d, 0x0000e50d, + 0x0000e60d, 0x0000e70d, 0x0000e80d, 0x0000e90d, + 0x0000ea0d, 0x0000eb0d, 0x0000ec0d, 0x0000ed0d, + 0x0000ee0d, 0x0000ef0d, 0x0000f00d, 0x0000f10d, + 0x0000f20d, 0x0000f30d, 0x0000f40d, 0x0000f50d, + 0x0000f60d, 0x0000f70d, 0x0000f80d, 0x0000f90d, + 0x0000fa0d, 0x0000fb0d, 0x0000fc0d, 0x0000fd0d, + 0x0000fe0d, 0x0000ff0d, 0x0001000d, 0x0001010d, + 0x0001020d, 0x0001030d, 0x0001040d, 0x0001050d, + 0x0001060d, 0x0001070d, 0x0001080d, 0x0001090d, + 0x00010a0d, 0x00010b0d, 0x00010c0d, 0x00010d0d, + 0x00010e0d, 0x00010f0d, 0x0001100d, 0x0001110d, + 0x0001120d, 0x0001130d, 0x0001140d, 0x0001150d, + 0x0001160d, 0x0001170d, 0x0001180d, 0x0001190d, + 0x00011a0d, 0x00011b0d, 0x00011c0d, 0x00011d0d, + 0x00011e0d, 0x00011f0d, 0x0001200d, 0x0001210d, + 0x0001220d, 0x0001230d, 0x0001240d, 0x0001250d, + 0x0001260d, 0x0001270d, 0x0001280d, 0x0001290d, + 0x00012a0d, 0x00012b0d, 0x00012c0d, 0x00012d0d, + 0x00012e0d, 0x00012f0d, 0x0001300d, 0x0001310d, + 0x0001320d, 0x0001330d, 0x0001340d, 0x0001350d, + 0x0001360d, 0x0001370d, 0x0001380d, 0x0001390d, + 0x00013a0d, 0x00013b0d, 0x00013c0d, 0x00013d0d, + 0x00013e0d, 0x00013f0d, 0x0001400d, 0x0001410d, + 0x0001420d, 0x0001430d, 0x0001440d, 0x0001450d, + 0x0001460d, 0x0001470d, 0x0001480d, 0x0001490d, + 0x00014a0d, 0x00014b0d, 0x00014c0d, 0x00014d0d, + 0x00014e0d, 0x00014f0d, 0x0001500d, 0x0001510d, + 0x0001520d, 0x0001530d, 0x0001540d, 0x0001550d, + 0x0001560d, 0x0001570d, 0x0001580d, 0x0001590d, + 0x00015a0d, 0x00015b0d, 0x00015c0d, 0x00015d0d, + 0x00015e0d, 0x00015f0d, 0x0001600d, 0x0001610d, + 0x0001620d, 0x0001630d, 0x0001640d, 0x0001650d, + 0x0001660d, 0x0001670d, 0x0001680d, 0x0001690d, + 0x00016a0d, 0x00016b0d, 0x00016c0d, 0x00016d0d, + 0x00016e0d, 0x00016f0d, 0x0001700d, 0x0001710d, + 0x0001720d, 0x0001730d, 0x0001740d, 0x0001750d, + 0x0001760d, 0x0001770d, 0x0001780d, 0x0001790d, + 0x00017a0d, 0x00017b0d, 0x00017c0d, 0x00017d0d, + 0x00017e0d, 0x00017f0d, 0x0001800d, 0x0001810d, + 0x0001820d, 0x0001830d, 0x0001840d, 0x0001850d, + 0x0001860d, 0x0001870d, 0x0001880d, 0x0001890d, + 0x00018a0d, 0x00018b0d, 0x00018c0d, 0x00018d0d, + 0x00018e0d, 0x00018f0d, 0x0001900d, 0x0001910d, + 0x0001920d, 0x0001930d, 0x0001940d, 0x0001950d, + 0x0001960d, 0x0001970d, 0x0001980d, 0x0001990d, + 0x00019a0d, 0x00019b0d, 0x00019c0d, 0x00019d0d, + 0x00019e0d, 0x00019f0d, 0x0001a00d, 0x0001a10d, + 0x0001a20d, 0x0001a30d, 0x0001a40d, 0x0001a50d, + 0x0001a60d, 0x0001a70d, 0x0001a80d, 0x0001a90d, + 0x0001aa0d, 0x0001ab0d, 0x0001ac0d, 0x0001ad0d, + 0x0001ae0d, 0x0001af0d, 0x0001b00d, 0x0001b10d, + 0x0001b20d, 0x0001b30d, 0x0001b40d, 0x0001b50d, + 0x0001b60d, 0x0001b70d, 0x0001b80d, 0x0001b90d, + 0x0001ba0d, 0x0001bb0d, 0x0001bc0d, 0x0001bd0d, + 0x0001be0d, 0x0001bf0d, 0x0001c00d, 0x0001c10d, + 0x0001c20d, 0x0001c30d, 0x0001c40d, 0x0001c50d, + 0x0001c60d, 0x0001c70d, 0x0001c80d, 0x0001c90d, + 0x0001ca0d, 0x0001cb0d, 0x0001cc0d, 0x0001cd0d, + 0x0001ce0d, 0x0001cf0d, 0x0001d00d, 0x0001d10d, + 0x0001d20d, 0x0001d30d, 0x0001d40d, 0x0001d50d, + 0x0001d60d, 0x0001d70d, 0x0001d80d, 0x0001d90d, + 0x0001da0d, 0x0001db0d, 0x0001dc0d, 0x0001dd0d, + 0x0001de0d, 0x0001df0d, 0x0001e00d, 0x0001e10d, + 0x0001e20d, 0x0001e30d, 0x0001e40d, 0x0001e50d, + 0x0001e60d, 0x0001e70d, 0x0001e80d, 0x0001e90d, + 0x0001ea0d, 0x0001eb0d, 0x0001ec0d, 0x0001ed0d, + 0x0001ee0d, 0x0001ef0d, 0x0001f00d, 0x0001f10d, + 0x0001f20d, 0x0001f30d, 0x0001f40d, 0x0001f50d, + 0x0001f60d, 0x0001f70d, 0x0001f80d, 0x0001f90d, + 0x0001fa0d, 0x0001fb0d, 0x0001fc0d, 0x0001fd0d, + 0x0001fe0d, 0x0001ff0d, 0x0002000d, 0x0002010d, + 0x0002020d, 0x0002030d, 0x0002040d, 0x0002050d, + 0x0002060d, 0x0002070d, 0x0002080d, 0x0002090d, + 0x00020a0d, 0x00020b0d, 0x00020c0d, 0x00020d0d, + 0x00020e0d, 0x00020f0d, 0x0002100d, 0x0002110d, + 0x0002120d, 0x0002130d, 0x0002140d, 0x0002150d, + 0x0002160d, 0x0002170d, 0x0002180d, 0x0002190d, + 0x00021a0d, 0x00021b0d, 0x00021c0d, 0x00021d0d, + 0x00021e0d, 0x00021f0d, 0x0002200d, 0x0002210d, + 0x0002220d, 0x0002230d, 0x0002240d, 0x0002250d, + 0x0002260d, 0x0002270d, 0x0002280d, 0x0002290d, + 0x00022a0d, 0x00022b0d, 0x00022c0d, 0x00022d0d, + 0x00022e0d, 0x00022f0d, 0x0002300d, 0x0002310d, + 0x0002320d, 0x0002330d, 0x0002340d, 0x0002350d, + 0x0002360d, 0x0002370d, 0x0002380d, 0x0002390d, + 0x00023a0d, 0x00023b0d, 0x00023c0d, 0x00023d0d, + 0x00023e0d, 0x00023f0d, 0x0002400d, 0x0002410d, + 0x0002420d, 0x0002430d, 0x0002440d, 0x0002450d, + 0x0002460d, 0x0002470d, 0x0002480d, 0x0002490d, + 0x00024a0d, 0x00024b0d, 0x00024c0d, 0x00024d0d, + 0x00024e0d, 0x00024f0d, 0x0002500d, 0x0002510d, + 0x0002520d, 0x0002530d, 0x0002540d, 0x0002550d, + 0x0002560d, 0x0002570d, 0x0002580d, 0x0002590d, + 0x00025a0d, 0x00025b0d, 0x00025c0d, 0x00025d0d, + 0x00025e0d, 0x00025f0d, 0x0002600d, 0x0002610d, + 0x0002620d, 0x0002630d, 0x0002640d, 0x0002650d, + 0x0002660d, 0x0002670d, 0x0002680d, 0x0002690d, + 0x00026a0d, 0x00026b0d, 0x00026c0d, 0x00026d0d, + 0x00026e0d, 0x00026f0d, 0x0002700d, 0x0002710d, + 0x0002720d, 0x0002730d, 0x0002740d, 0x0002750d, + 0x0002760d, 0x0002770d, 0x0002780d, 0x0002790d, + 0x00027a0d, 0x00027b0d, 0x00027c0d, 0x00027d0d, + 0x00027e0d, 0x00027f0d, 0x0002800d, 0x0002810d, + 0x0002820d, 0x0002830d, 0x0002840d, 0x0002850d, + 0x0002860d, 0x0002870d, 0x0002880d, 0x0002890d, + 0x00028a0d, 0x00028b0d, 0x00028c0d, 0x00028d0d, + 0x00028e0d, 0x00028f0d, 0x0002900d, 0x0002910d, + 0x0002920d, 0x0002930d, 0x0002940d, 0x0002950d, + 0x0002960d, 0x0002970d, 0x0002980d, 0x0002990d, + 0x00029a0d, 0x00029b0d, 0x00029c0d, 0x00029d0d, + 0x00029e0d, 0x00029f0d, 0x0002a00d, 0x0002a10d, + 0x0002a20d, 0x0002a30d, 0x0002a40d, 0x0002a50d, + 0x0002a60d, 0x0002a70d, 0x0002a80d, 0x0002a90d, + 0x0002aa0d, 0x0002ab0d, 0x0002ac0d, 0x0002ad0d, + 0x0002ae0d, 0x0002af0d, 0x0002b00d, 0x0002b10d, + 0x0002b20d, 0x0002b30d, 0x0002b40d, 0x0002b50d, + 0x0002b60d, 0x0002b70d, 0x0002b80d, 0x0002b90d, + 0x0002ba0d, 0x0002bb0d, 0x0002bc0d, 0x0002bd0d, + 0x0002be0d, 0x0002bf0d, 0x0002c00d, 0x0002c10d, + 0x0002c20d, 0x0002c30d, 0x0002c40d, 0x0002c50d, + 0x0002c60d, 0x0002c70d, 0x0002c80d, 0x0002c90d, + 0x0002ca0d, 0x0002cb0d, 0x0002cc0d, 0x0002cd0d, + 0x0002ce0d, 0x0002cf0d, 0x0002d00d, 0x0002d10d, + 0x0002d20d, 0x0002d30d, 0x0002d40d, 0x0002d50d, + 0x0002d60d, 0x0002d70d, 0x0002d80d, 0x0002d90d, + 0x0002da0d, 0x0002db0d, 0x0002dc0d, 0x0002dd0d, + 0x0002de0d, 0x0002df0d, 0x0002e00d, 0x0002e10d, + 0x0002e20d, 0x0002e30d, 0x0002e40d, 0x0002e50d, + 0x0002e60d, 0x0002e70d, 0x0002e80d, 0x0002e90d, + 0x0002ea0d, 0x0002eb0d, 0x0002ec0d, 0x0002ed0d, + 0x0002ee0d, 0x0002ef0d, 0x0002f00d, 0x0002f10d, + 0x0002f20d, 0x0002f30d, 0x0002f40d, 0x0002f50d, + 0x0002f60d, 0x0002f70d, 0x0002f80d, 0x0002f90d, + 0x0002fa0d, 0x0002fb0d, 0x0002fc0d, 0x0002fd0d, + 0x0002fe0d, 0x0002ff0d, 0x0003000d, 0x0003010d, + 0x0003020d, 0x0003030d, 0x0003040d, 0x0003050d, + 0x0003060d, 0x0003070d, 0x0003080d, 0x0003090d, + 0x00030a0d, 0x00030b0d, 0x00030c0d, 0x00030d0d, + 0x00030e0d, 0x00030f0d, 0x0003100d, 0x0003110d, + 0x0003120d, 0x0003130d, 0x0003140d, 0x0003150d, + 0x0003160d, 0x0003170d, 0x0003180d, 0x0003190d, + 0x00031a0d, 0x00031b0d, 0x00031c0d, 0x00031d0d, + 0x00031e0d, 0x00031f0d, 0x0003200d, 0x0003210d, + 0x0003220d, 0x0003230d, 0x0003240d, 0x0003250d, + 0x0003260d, 0x0003270d, 0x0003280d, 0x0003290d, + 0x00032a0d, 0x00032b0d, 0x00032c0d, 0x00032d0d, + 0x00032e0d, 0x00032f0d, 0x0003300d, 0x0003310d, + 0x0003320d, 0x0003330d, 0x0003340d, 0x0003350d, + 0x0003360d, 0x0003370d, 0x0003380d, 0x0003390d, + 0x00033a0d, 0x00033b0d, 0x00033c0d, 0x00033d0d, + 0x00033e0d, 0x00033f0d, 0x0003400d, 0x0003410d, + 0x0003420d, 0x0003430d, 0x0003440d, 0x0003450d, + 0x0003460d, 0x0003470d, 0x0003480d, 0x0003490d, + 0x00034a0d, 0x00034b0d, 0x00034c0d, 0x00034d0d, + 0x00034e0d, 0x00034f0d, 0x0003500d, 0x0003510d, + 0x0003520d, 0x0003530d, 0x0003540d, 0x0003550d, + 0x0003560d, 0x0003570d, 0x0003580d, 0x0003590d, + 0x00035a0d, 0x00035b0d, 0x00035c0d, 0x00035d0d, + 0x00035e0d, 0x00035f0d, 0x0003600d, 0x0003610d, + 0x0003620d, 0x0003630d, 0x0003640d, 0x0003650d, + 0x0003660d, 0x0003670d, 0x0003680d, 0x0003690d, + 0x00036a0d, 0x00036b0d, 0x00036c0d, 0x00036d0d, + 0x00036e0d, 0x00036f0d, 0x0003700d, 0x0003710d, + 0x0003720d, 0x0003730d, 0x0003740d, 0x0003750d, + 0x0003760d, 0x0003770d, 0x0003780d, 0x0003790d, + 0x00037a0d, 0x00037b0d, 0x00037c0d, 0x00037d0d, + 0x00037e0d, 0x00037f0d, 0x0003800d, 0x0003810d, + 0x0003820d, 0x0003830d, 0x0003840d, 0x0003850d, + 0x0003860d, 0x0003870d, 0x0003880d, 0x0003890d, + 0x00038a0d, 0x00038b0d, 0x00038c0d, 0x00038d0d, + 0x00038e0d, 0x00038f0d, 0x0003900d, 0x0003910d, + 0x0003920d, 0x0003930d, 0x0003940d, 0x0003950d, + 0x0003960d, 0x0003970d, 0x0003980d, 0x0003990d, + 0x00039a0d, 0x00039b0d, 0x00039c0d, 0x00039d0d, + 0x00039e0d, 0x00039f0d, 0x0003a00d, 0x0003a10d, + 0x0003a20d, 0x0003a30d, 0x0003a40d, 0x0003a50d, + 0x0003a60d, 0x0003a70d, 0x0003a80d, 0x0003a90d, + 0x0003aa0d, 0x0003ab0d, 0x0003ac0d, 0x0003ad0d, + 0x0003ae0d, 0x0003af0d, 0x0003b00d, 0x0003b10d, + 0x0003b20d, 0x0003b30d, 0x0003b40d, 0x0003b50d, + 0x0003b60d, 0x0003b70d, 0x0003b80d, 0x0003b90d, + 0x0003ba0d, 0x0003bb0d, 0x0003bc0d, 0x0003bd0d, + 0x0003be0d, 0x0003bf0d, 0x0003c00d, 0x0003c10d, + 0x0003c20d, 0x0003c30d, 0x0003c40d, 0x0003c50d, + 0x0003c60d, 0x0003c70d, 0x0003c80d, 0x0003c90d, + 0x0003ca0d, 0x0003cb0d, 0x0003cc0d, 0x0003cd0d, + 0x0003ce0d, 0x0003cf0d, 0x0003d00d, 0x0003d10d, + 0x0003d20d, 0x0003d30d, 0x0003d40d, 0x0003d50d, + 0x0003d60d, 0x0003d70d, 0x0003d80d, 0x0003d90d, + 0x0003da0d, 0x0003db0d, 0x0003dc0d, 0x0003dd0d, + 0x0003de0d, 0x0003df0d, 0x0003e00d, 0x0003e10d, + 0x0003e20d, 0x0003e30d, 0x0003e40d, 0x0003e50d, + 0x0003e60d, 0x0003e70d, 0x0003e80d, 0x0003e90d, + 0x0003ea0d, 0x0003eb0d, 0x0003ec0d, 0x0003ed0d, + 0x0003ee0d, 0x0003ef0d, 0x0003f00d, 0x0003f10d, + 0x0003f20d, 0x0003f30d, 0x0003f40d, 0x0003f50d, + 0x0003f60d, 0x0003f70d, 0x0003f80d, 0x0003f90d, + 0x0003fa0d, 0x0003fb0d, 0x0003fc0d, 0x0003fd0d, + 0x0003fe0d, 0x0003ff0d, 0x0000026f, 0x0000066f, + 0x00000a6f, 0x00000e6f, 0x0000126f, 0x0000166f, + 0x00001a6f, 0x00001e6f, 0x0000226f, 0x0000266f, + 0x00002a6f, 0x00002e6f, 0x0000326f, 0x0000366f, + 0x00003a6f, 0x00003e6f, 0x0000426f, 0x0000466f, + 0x00004a6f, 0x00004e6f, 0x0000526f, 0x0000566f, + 0x00005a6f, 0x00005e6f, 0x0000626f, 0x0000666f, + 0x00006a6f, 0x00006e6f, 0x0000726f, 0x0000766f, + 0x00007a6f, 0x00007e6f, 0x0000826f, 0x0000866f, + 0x00008a6f, 0x00008e6f, 0x0000926f, 0x0000966f, + 0x00009a6f, 0x00009e6f, 0x0000a26f, 0x0000a66f, + 0x0000aa6f, 0x0000ae6f, 0x0000b26f, 0x0000b66f, + 0x0000ba6f, 0x0000be6f, 0x0000c26f, 0x0000c66f, + 0x0000ca6f, 0x0000ce6f, 0x0000d26f, 0x0000d66f, + 0x0000da6f, 0x0000de6f, 0x0000e26f, 0x0000e66f, + 0x0000ea6f, 0x0000ee6f, 0x0000f26f, 0x0000f66f, + 0x0000fa6f, 0x0000fe6f, 0x0001026f, 0x0001066f, + 0x00010a6f, 0x00010e6f, 0x0001126f, 0x0001166f, + 0x00011a6f, 0x00011e6f, 0x0001226f, 0x0001266f, + 0x00012a6f, 0x00012e6f, 0x0001326f, 0x0001366f, + 0x00013a6f, 0x00013e6f, 0x0001426f, 0x0001466f, + 0x00014a6f, 0x00014e6f, 0x0001526f, 0x0001566f, + 0x00015a6f, 0x00015e6f, 0x0001626f, 0x0001666f, + 0x00016a6f, 0x00016e6f, 0x0001726f, 0x0001766f, + 0x00017a6f, 0x00017e6f, 0x0001826f, 0x0001866f, + 0x00018a6f, 0x00018e6f, 0x0001926f, 0x0001966f, + 0x00019a6f, 0x00019e6f, 0x0001a26f, 0x0001a66f, + 0x0001aa6f, 0x0001ae6f, 0x0001b26f, 0x0001b66f, + 0x0001ba6f, 0x0001be6f, 0x0001c26f, 0x0001c66f, + 0x0001ca6f, 0x0001ce6f, 0x0001d26f, 0x0001d66f, + 0x0001da6f, 0x0001de6f, 0x0001e26f, 0x0001e66f, + 0x0001ea6f, 0x0001ee6f, 0x0001f26f, 0x0001f66f, + 0x0001fa6f, 0x0001fe6f, 0x0002026f, 0x0002066f, + 0x00020a6f, 0x00020e6f, 0x0002126f, 0x0002166f, + 0x00021a6f, 0x00021e6f, 0x0002226f, 0x0002266f, + 0x00022a6f, 0x00022e6f, 0x0002326f, 0x0002366f, + 0x00023a6f, 0x00023e6f, 0x0002426f, 0x0002466f, + 0x00024a6f, 0x00024e6f, 0x0002526f, 0x0002566f, + 0x00025a6f, 0x00025e6f, 0x0002626f, 0x0002666f, + 0x00026a6f, 0x00026e6f, 0x0002726f, 0x0002766f, + 0x00027a6f, 0x00027e6f, 0x0002826f, 0x0002866f, + 0x00028a6f, 0x00028e6f, 0x0002926f, 0x0002966f, + 0x00029a6f, 0x00029e6f, 0x0002a26f, 0x0002a66f, + 0x0002aa6f, 0x0002ae6f, 0x0002b26f, 0x0002b66f, + 0x0002ba6f, 0x0002be6f, 0x0002c26f, 0x0002c66f, + 0x0002ca6f, 0x0002ce6f, 0x0002d26f, 0x0002d66f, + 0x0002da6f, 0x0002de6f, 0x0002e26f, 0x0002e66f, + 0x0002ea6f, 0x0002ee6f, 0x0002f26f, 0x0002f66f, + 0x0002fa6f, 0x0002fe6f, 0x0003026f, 0x0003066f, + 0x00030a6f, 0x00030e6f, 0x0003126f, 0x0003166f, + 0x00031a6f, 0x00031e6f, 0x0003226f, 0x0003266f, + 0x00032a6f, 0x00032e6f, 0x0003326f, 0x0003366f, + 0x00033a6f, 0x00033e6f, 0x0003426f, 0x0003466f, + 0x00034a6f, 0x00034e6f, 0x0003526f, 0x0003566f, + 0x00035a6f, 0x00035e6f, 0x0003626f, 0x0003666f, + 0x00036a6f, 0x00036e6f, 0x0003726f, 0x0003766f, + 0x00037a6f, 0x00037e6f, 0x0003826f, 0x0003866f, + 0x00038a6f, 0x00038e6f, 0x0003926f, 0x0003966f, + 0x00039a6f, 0x00039e6f, 0x0003a26f, 0x0003a66f, + 0x0003aa6f, 0x0003ae6f, 0x0003b26f, 0x0003b66f, + 0x0003ba6f, 0x0003be6f, 0x0003c26f, 0x0003c66f, + 0x0003ca6f, 0x0003ce6f, 0x0003d26f, 0x0003d66f, + 0x0003da6f, 0x0003de6f, 0x0003e26f, 0x0003e66f, + 0x0003ea6f, 0x0003ee6f, 0x0003f26f, 0x0003f66f, + 0x0003fa6f, 0x0003fe6f, 0x0004026f, 0x0004066f, + 0x00040a6f, 0x00040e6f, 0x0004126f, 0x0004166f, + 0x00041a6f, 0x00041e6f, 0x0004226f, 0x0004266f, + 0x00042a6f, 0x00042e6f, 0x0004326f, 0x0004366f, + 0x00043a6f, 0x00043e6f, 0x0004426f, 0x0004466f, + 0x00044a6f, 0x00044e6f, 0x0004526f, 0x0004566f, + 0x00045a6f, 0x00045e6f, 0x0004626f, 0x0004666f, + 0x00046a6f, 0x00046e6f, 0x0004726f, 0x0004766f, + 0x00047a6f, 0x00047e6f, 0x0004826f, 0x0004866f, + 0x00048a6f, 0x00048e6f, 0x0004926f, 0x0004966f, + 0x00049a6f, 0x00049e6f, 0x0004a26f, 0x0004a66f, + 0x0004aa6f, 0x0004ae6f, 0x0004b26f, 0x0004b66f, + 0x0004ba6f, 0x0004be6f, 0x0004c26f, 0x0004c66f, + 0x0004ca6f, 0x0004ce6f, 0x0004d26f, 0x0004d66f, + 0x0004da6f, 0x0004de6f, 0x0004e26f, 0x0004e66f, + 0x0004ea6f, 0x0004ee6f, 0x0004f26f, 0x0004f66f, + 0x0004fa6f, 0x0004fe6f, 0x0005026f, 0x0005066f, + 0x00050a6f, 0x00050e6f, 0x0005126f, 0x0005166f, + 0x00051a6f, 0x00051e6f, 0x0005226f, 0x0005266f, + 0x00052a6f, 0x00052e6f, 0x0005326f, 0x0005366f, + 0x00053a6f, 0x00053e6f, 0x0005426f, 0x0005466f, + 0x00054a6f, 0x00054e6f, 0x0005526f, 0x0005566f, + 0x00055a6f, 0x00055e6f, 0x0005626f, 0x0005666f, + 0x00056a6f, 0x00056e6f, 0x0005726f, 0x0005766f, + 0x00057a6f, 0x00057e6f, 0x0005826f, 0x0005866f, + 0x00058a6f, 0x00058e6f, 0x0005926f, 0x0005966f, + 0x00059a6f, 0x00059e6f, 0x0005a26f, 0x0005a66f, + 0x0005aa6f, 0x0005ae6f, 0x0005b26f, 0x0005b66f, + 0x0005ba6f, 0x0005be6f, 0x0005c26f, 0x0005c66f, + 0x0005ca6f, 0x0005ce6f, 0x0005d26f, 0x0005d66f, + 0x0005da6f, 0x0005de6f, 0x0005e26f, 0x0005e66f, + 0x0005ea6f, 0x0005ee6f, 0x0005f26f, 0x0005f66f, + 0x0005fa6f, 0x0005fe6f, 0x0006026f, 0x0006066f, + 0x00060a6f, 0x00060e6f, 0x0006126f, 0x0006166f, + 0x00061a6f, 0x00061e6f, 0x0006226f, 0x0006266f, + 0x00062a6f, 0x00062e6f, 0x0006326f, 0x0006366f, + 0x00063a6f, 0x00063e6f, 0x0006426f, 0x0006466f, + 0x00064a6f, 0x00064e6f, 0x0006526f, 0x0006566f, + 0x00065a6f, 0x00065e6f, 0x0006626f, 0x0006666f, + 0x00066a6f, 0x00066e6f, 0x0006726f, 0x0006766f, + 0x00067a6f, 0x00067e6f, 0x0006826f, 0x0006866f, + 0x00068a6f, 0x00068e6f, 0x0006926f, 0x0006966f, + 0x00069a6f, 0x00069e6f, 0x0006a26f, 0x0006a66f, + 0x0006aa6f, 0x0006ae6f, 0x0006b26f, 0x0006b66f, + 0x0006ba6f, 0x0006be6f, 0x0006c26f, 0x0006c66f, + 0x0006ca6f, 0x0006ce6f, 0x0006d26f, 0x0006d66f, + 0x0006da6f, 0x0006de6f, 0x0006e26f, 0x0006e66f, + 0x0006ea6f, 0x0006ee6f, 0x0006f26f, 0x0006f66f, + 0x0006fa6f, 0x0006fe6f, 0x0007026f, 0x0007066f, + 0x00070a6f, 0x00070e6f, 0x0007126f, 0x0007166f, + 0x00071a6f, 0x00071e6f, 0x0007226f, 0x0007266f, + 0x00072a6f, 0x00072e6f, 0x0007326f, 0x0007366f, + 0x00073a6f, 0x00073e6f, 0x0007426f, 0x0007466f, + 0x00074a6f, 0x00074e6f, 0x0007526f, 0x0007566f, + 0x00075a6f, 0x00075e6f, 0x0007626f, 0x0007666f, + 0x00076a6f, 0x00076e6f, 0x0007726f, 0x0007766f, + 0x00077a6f, 0x00077e6f, 0x0007826f, 0x0007866f, + 0x00078a6f, 0x00078e6f, 0x0007926f, 0x0007966f, + 0x00079a6f, 0x00079e6f, 0x0007a26f, 0x0007a66f, + 0x0007aa6f, 0x0007ae6f, 0x0007b26f, 0x0007b66f, + 0x0007ba6f, 0x0007be6f, 0x0007c26f, 0x0007c66f, + 0x0007ca6f, 0x0007ce6f, 0x0007d26f, 0x0007d66f, + 0x0007da6f, 0x0007de6f, 0x0007e26f, 0x0007e66f, + 0x0007ea6f, 0x0007ee6f, 0x0007f26f, 0x0007f66f, + 0x0007fa6f, 0x0007fe6f, 0x0008026f, 0x0008066f, + 0x00080a6f, 0x00080e6f, 0x0008126f, 0x0008166f, + 0x00081a6f, 0x00081e6f, 0x0008226f, 0x0008266f, + 0x00082a6f, 0x00082e6f, 0x0008326f, 0x0008366f, + 0x00083a6f, 0x00083e6f, 0x0008426f, 0x0008466f, + 0x00084a6f, 0x00084e6f, 0x0008526f, 0x0008566f, + 0x00085a6f, 0x00085e6f, 0x0008626f, 0x0008666f, + 0x00086a6f, 0x00086e6f, 0x0008726f, 0x0008766f, + 0x00087a6f, 0x00087e6f, 0x0008826f, 0x0008866f, + 0x00088a6f, 0x00088e6f, 0x0008926f, 0x0008966f, + 0x00089a6f, 0x00089e6f, 0x0008a26f, 0x0008a66f, + 0x0008aa6f, 0x0008ae6f, 0x0008b26f, 0x0008b66f, + 0x0008ba6f, 0x0008be6f, 0x0008c26f, 0x0008c66f, + 0x0008ca6f, 0x0008ce6f, 0x0008d26f, 0x0008d66f, + 0x0008da6f, 0x0008de6f, 0x0008e26f, 0x0008e66f, + 0x0008ea6f, 0x0008ee6f, 0x0008f26f, 0x0008f66f, + 0x0008fa6f, 0x0008fe6f, 0x0009026f, 0x0009066f, + 0x00090a6f, 0x00090e6f, 0x0009126f, 0x0009166f, + 0x00091a6f, 0x00091e6f, 0x0009226f, 0x0009266f, + 0x00092a6f, 0x00092e6f, 0x0009326f, 0x0009366f, + 0x00093a6f, 0x00093e6f, 0x0009426f, 0x0009466f, + 0x00094a6f, 0x00094e6f, 0x0009526f, 0x0009566f, + 0x00095a6f, 0x00095e6f, 0x0009626f, 0x0009666f, + 0x00096a6f, 0x00096e6f, 0x0009726f, 0x0009766f, + 0x00097a6f, 0x00097e6f, 0x0009826f, 0x0009866f, + 0x00098a6f, 0x00098e6f, 0x0009926f, 0x0009966f, + 0x00099a6f, 0x00099e6f, 0x0009a26f, 0x0009a66f, + 0x0009aa6f, 0x0009ae6f, 0x0009b26f, 0x0009b66f, + 0x0009ba6f, 0x0009be6f, 0x0009c26f, 0x0009c66f, + 0x0009ca6f, 0x0009ce6f, 0x0009d26f, 0x0009d66f, + 0x0009da6f, 0x0009de6f, 0x0009e26f, 0x0009e66f, + 0x0009ea6f, 0x0009ee6f, 0x0009f26f, 0x0009f66f, + 0x0009fa6f, 0x0009fe6f, 0x000a026f, 0x000a066f, + 0x000a0a6f, 0x000a0e6f, 0x000a126f, 0x000a166f, + 0x000a1a6f, 0x000a1e6f, 0x000a226f, 0x000a266f, + 0x000a2a6f, 0x000a2e6f, 0x000a326f, 0x000a366f, + 0x000a3a6f, 0x000a3e6f, 0x000a426f, 0x000a466f, + 0x000a4a6f, 0x000a4e6f, 0x000a526f, 0x000a566f, + 0x000a5a6f, 0x000a5e6f, 0x000a626f, 0x000a666f, + 0x000a6a6f, 0x000a6e6f, 0x000a726f, 0x000a766f, + 0x000a7a6f, 0x000a7e6f, 0x000a826f, 0x000a866f, + 0x000a8a6f, 0x000a8e6f, 0x000a926f, 0x000a966f, + 0x000a9a6f, 0x000a9e6f, 0x000aa26f, 0x000aa66f, + 0x000aaa6f, 0x000aae6f, 0x000ab26f, 0x000ab66f, + 0x000aba6f, 0x000abe6f, 0x000ac26f, 0x000ac66f, + 0x000aca6f, 0x000ace6f, 0x000ad26f, 0x000ad66f, + 0x000ada6f, 0x000ade6f, 0x000ae26f, 0x000ae66f, + 0x000aea6f, 0x000aee6f, 0x000af26f, 0x000af66f, + 0x000afa6f, 0x000afe6f, 0x000b026f, 0x000b066f, + 0x000b0a6f, 0x000b0e6f, 0x000b126f, 0x000b166f, + 0x000b1a6f, 0x000b1e6f, 0x000b226f, 0x000b266f, + 0x000b2a6f, 0x000b2e6f, 0x000b326f, 0x000b366f, + 0x000b3a6f, 0x000b3e6f, 0x000b426f, 0x000b466f, + 0x000b4a6f, 0x000b4e6f, 0x000b526f, 0x000b566f, + 0x000b5a6f, 0x000b5e6f, 0x000b626f, 0x000b666f, + 0x000b6a6f, 0x000b6e6f, 0x000b726f, 0x000b766f, + 0x000b7a6f, 0x000b7e6f, 0x000b826f, 0x000b866f, + 0x000b8a6f, 0x000b8e6f, 0x000b926f, 0x000b966f, + 0x000b9a6f, 0x000b9e6f, 0x000ba26f, 0x000ba66f, + 0x000baa6f, 0x000bae6f, 0x000bb26f, 0x000bb66f, + 0x000bba6f, 0x000bbe6f, 0x000bc26f, 0x000bc66f, + 0x000bca6f, 0x000bce6f, 0x000bd26f, 0x000bd66f, + 0x000bda6f, 0x000bde6f, 0x000be26f, 0x000be66f, + 0x000bea6f, 0x000bee6f, 0x000bf26f, 0x000bf66f, + 0x000bfa6f, 0x000bfe6f, 0x000c026f, 0x000c066f, + 0x000c0a6f, 0x000c0e6f, 0x000c126f, 0x000c166f, + 0x000c1a6f, 0x000c1e6f, 0x000c226f, 0x000c266f, + 0x000c2a6f, 0x000c2e6f, 0x000c326f, 0x000c366f, + 0x000c3a6f, 0x000c3e6f, 0x000c426f, 0x000c466f, + 0x000c4a6f, 0x000c4e6f, 0x000c526f, 0x000c566f, + 0x000c5a6f, 0x000c5e6f, 0x000c626f, 0x000c666f, + 0x000c6a6f, 0x000c6e6f, 0x000c726f, 0x000c766f, + 0x000c7a6f, 0x000c7e6f, 0x000c826f, 0x000c866f, + 0x000c8a6f, 0x000c8e6f, 0x000c926f, 0x000c966f, + 0x000c9a6f, 0x000c9e6f, 0x000ca26f, 0x000ca66f, + 0x000caa6f, 0x000cae6f, 0x000cb26f, 0x000cb66f, + 0x000cba6f, 0x000cbe6f, 0x000cc26f, 0x000cc66f, + 0x000cca6f, 0x000cce6f, 0x000cd26f, 0x000cd66f, + 0x000cda6f, 0x000cde6f, 0x000ce26f, 0x000ce66f, + 0x000cea6f, 0x000cee6f, 0x000cf26f, 0x000cf66f, + 0x000cfa6f, 0x000cfe6f, 0x000d026f, 0x000d066f, + 0x000d0a6f, 0x000d0e6f, 0x000d126f, 0x000d166f, + 0x000d1a6f, 0x000d1e6f, 0x000d226f, 0x000d266f, + 0x000d2a6f, 0x000d2e6f, 0x000d326f, 0x000d366f, + 0x000d3a6f, 0x000d3e6f, 0x000d426f, 0x000d466f, + 0x000d4a6f, 0x000d4e6f, 0x000d526f, 0x000d566f, + 0x000d5a6f, 0x000d5e6f, 0x000d626f, 0x000d666f, + 0x000d6a6f, 0x000d6e6f, 0x000d726f, 0x000d766f, + 0x000d7a6f, 0x000d7e6f, 0x000d826f, 0x000d866f, + 0x000d8a6f, 0x000d8e6f, 0x000d926f, 0x000d966f, + 0x000d9a6f, 0x000d9e6f, 0x000da26f, 0x000da66f, + 0x000daa6f, 0x000dae6f, 0x000db26f, 0x000db66f, + 0x000dba6f, 0x000dbe6f, 0x000dc26f, 0x000dc66f, + 0x000dca6f, 0x000dce6f, 0x000dd26f, 0x000dd66f, + 0x000dda6f, 0x000dde6f, 0x000de26f, 0x000de66f, + 0x000dea6f, 0x000dee6f, 0x000df26f, 0x000df66f, + 0x000dfa6f, 0x000dfe6f, 0x000e026f, 0x000e066f, + 0x000e0a6f, 0x000e0e6f, 0x000e126f, 0x000e166f, + 0x000e1a6f, 0x000e1e6f, 0x000e226f, 0x000e266f, + 0x000e2a6f, 0x000e2e6f, 0x000e326f, 0x000e366f, + 0x000e3a6f, 0x000e3e6f, 0x000e426f, 0x000e466f, + 0x000e4a6f, 0x000e4e6f, 0x000e526f, 0x000e566f, + 0x000e5a6f, 0x000e5e6f, 0x000e626f, 0x000e666f, + 0x000e6a6f, 0x000e6e6f, 0x000e726f, 0x000e766f, + 0x000e7a6f, 0x000e7e6f, 0x000e826f, 0x000e866f, + 0x000e8a6f, 0x000e8e6f, 0x000e926f, 0x000e966f, + 0x000e9a6f, 0x000e9e6f, 0x000ea26f, 0x000ea66f, + 0x000eaa6f, 0x000eae6f, 0x000eb26f, 0x000eb66f, + 0x000eba6f, 0x000ebe6f, 0x000ec26f, 0x000ec66f, + 0x000eca6f, 0x000ece6f, 0x000ed26f, 0x000ed66f, + 0x000eda6f, 0x000ede6f, 0x000ee26f, 0x000ee66f, + 0x000eea6f, 0x000eee6f, 0x000ef26f, 0x000ef66f, + 0x000efa6f, 0x000efe6f, 0x000f026f, 0x000f066f, + 0x000f0a6f, 0x000f0e6f, 0x000f126f, 0x000f166f, + 0x000f1a6f, 0x000f1e6f, 0x000f226f, 0x000f266f, + 0x000f2a6f, 0x000f2e6f, 0x000f326f, 0x000f366f, + 0x000f3a6f, 0x000f3e6f, 0x000f426f, 0x000f466f, + 0x000f4a6f, 0x000f4e6f, 0x000f526f, 0x000f566f, + 0x000f5a6f, 0x000f5e6f, 0x000f626f, 0x000f666f, + 0x000f6a6f, 0x000f6e6f, 0x000f726f, 0x000f766f, + 0x000f7a6f, 0x000f7e6f, 0x000f826f, 0x000f866f, + 0x000f8a6f, 0x000f8e6f, 0x000f926f, 0x000f966f, + 0x000f9a6f, 0x000f9e6f, 0x000fa26f, 0x000fa66f, + 0x000faa6f, 0x000fae6f, 0x000fb26f, 0x000fb66f, + 0x000fba6f, 0x000fbe6f, 0x000fc26f, 0x000fc66f, + 0x000fca6f, 0x000fce6f, 0x000fd26f, 0x000fd66f, + 0x000fda6f, 0x000fde6f, 0x000fe26f, 0x000fe66f, + 0x000fea6f, 0x000fee6f, 0x000ff26f, 0x000ff66f, + 0x000ffa6f, 0x000ffe6f, 0x000001cf, 0x000003cf, + 0x000005cf, 0x000007cf, 0x000009cf, 0x00000bcf, + 0x00000dcf, 0x00000fcf, 0x000011cf, 0x000013cf, + 0x000015cf, 0x000017cf, 0x000019cf, 0x00001bcf, + 0x00001dcf, 0x00001fcf, 0x000021cf, 0x000023cf, + 0x000025cf, 0x000027cf, 0x000029cf, 0x00002bcf, + 0x00002dcf, 0x00002fcf, 0x000031cf, 0x000033cf, + 0x000035cf, 0x000037cf, 0x000039cf, 0x00003bcf, + 0x00003dcf, 0x00003fcf, 0x000041cf, 0x000043cf, + 0x000045cf, 0x000047cf, 0x000049cf, 0x00004bcf, + 0x00004dcf, 0x00004fcf, 0x000051cf, 0x000053cf, + 0x000055cf, 0x000057cf, 0x000059cf, 0x00005bcf, + 0x00005dcf, 0x00005fcf, 0x000061cf, 0x000063cf, + 0x000065cf, 0x000067cf, 0x000069cf, 0x00006bcf, + 0x00006dcf, 0x00006fcf, 0x000071cf, 0x000073cf, + 0x000075cf, 0x000077cf, 0x000079cf, 0x00007bcf, + 0x00007dcf, 0x00007fcf, 0x000081cf, 0x000083cf, + 0x000085cf, 0x000087cf, 0x000089cf, 0x00008bcf, + 0x00008dcf, 0x00008fcf, 0x000091cf, 0x000093cf, + 0x000095cf, 0x000097cf, 0x000099cf, 0x00009bcf, + 0x00009dcf, 0x00009fcf, 0x0000a1cf, 0x0000a3cf, + 0x0000a5cf, 0x0000a7cf, 0x0000a9cf, 0x0000abcf, + 0x0000adcf, 0x0000afcf, 0x0000b1cf, 0x0000b3cf, + 0x0000b5cf, 0x0000b7cf, 0x0000b9cf, 0x0000bbcf, + 0x0000bdcf, 0x0000bfcf, 0x0000c1cf, 0x0000c3cf, + 0x0000c5cf, 0x0000c7cf, 0x0000c9cf, 0x0000cbcf, + 0x0000cdcf, 0x0000cfcf, 0x0000d1cf, 0x0000d3cf, + 0x0000d5cf, 0x0000d7cf, 0x0000d9cf, 0x0000dbcf, + 0x0000ddcf, 0x0000dfcf, 0x0000e1cf, 0x0000e3cf, + 0x0000e5cf, 0x0000e7cf, 0x0000e9cf, 0x0000ebcf, + 0x0000edcf, 0x0000efcf, 0x0000f1cf, 0x0000f3cf, + 0x0000f5cf, 0x0000f7cf, 0x0000f9cf, 0x0000fbcf, + 0x0000fdcf, 0x0000ffcf, 0x000101cf, 0x000103cf, + 0x000105cf, 0x000107cf, 0x000109cf, 0x00010bcf, + 0x00010dcf, 0x00010fcf, 0x000111cf, 0x000113cf, + 0x000115cf, 0x000117cf, 0x000119cf, 0x00011bcf, + 0x00011dcf, 0x00011fcf, 0x000121cf, 0x000123cf, + 0x000125cf, 0x000127cf, 0x000129cf, 0x00012bcf, + 0x00012dcf, 0x00012fcf, 0x000131cf, 0x000133cf, + 0x000135cf, 0x000137cf, 0x000139cf, 0x00013bcf, + 0x00013dcf, 0x00013fcf, 0x000141cf, 0x000143cf, + 0x000145cf, 0x000147cf, 0x000149cf, 0x00014bcf, + 0x00014dcf, 0x00014fcf, 0x000151cf, 0x000153cf, + 0x000155cf, 0x000157cf, 0x000159cf, 0x00015bcf, + 0x00015dcf, 0x00015fcf, 0x000161cf, 0x000163cf, + 0x000165cf, 0x000167cf, 0x000169cf, 0x00016bcf, + 0x00016dcf, 0x00016fcf, 0x000171cf, 0x000173cf, + 0x000175cf, 0x000177cf, 0x000179cf, 0x00017bcf, + 0x00017dcf, 0x00017fcf, 0x000181cf, 0x000183cf, + 0x000185cf, 0x000187cf, 0x000189cf, 0x00018bcf, + 0x00018dcf, 0x00018fcf, 0x000191cf, 0x000193cf, + 0x000195cf, 0x000197cf, 0x000199cf, 0x00019bcf, + 0x00019dcf, 0x00019fcf, 0x0001a1cf, 0x0001a3cf, + 0x0001a5cf, 0x0001a7cf, 0x0001a9cf, 0x0001abcf, + 0x0001adcf, 0x0001afcf, 0x0001b1cf, 0x0001b3cf, + 0x0001b5cf, 0x0001b7cf, 0x0001b9cf, 0x0001bbcf, + 0x0001bdcf, 0x0001bfcf, 0x0001c1cf, 0x0001c3cf, + 0x0001c5cf, 0x0001c7cf, 0x0001c9cf, 0x0001cbcf, + 0x0001cdcf, 0x0001cfcf, 0x0001d1cf, 0x0001d3cf, + 0x0001d5cf, 0x0001d7cf, 0x0001d9cf, 0x0001dbcf, + 0x0001ddcf, 0x0001dfcf, 0x0001e1cf, 0x0001e3cf, + 0x0001e5cf, 0x0001e7cf, 0x0001e9cf, 0x0001ebcf, + 0x0001edcf, 0x0001efcf, 0x0001f1cf, 0x0001f3cf, + 0x0001f5cf, 0x0001f7cf, 0x0001f9cf, 0x0001fbcf, + 0x0001fdcf, 0x0001ffcf, 0x000201cf, 0x000203cf, + 0x000205cf, 0x000207cf, 0x000209cf, 0x00020bcf, + 0x00020dcf, 0x00020fcf, 0x000211cf, 0x000213cf, + 0x000215cf, 0x000217cf, 0x000219cf, 0x00021bcf, + 0x00021dcf, 0x00021fcf, 0x000221cf, 0x000223cf, + 0x000225cf, 0x000227cf, 0x000229cf, 0x00022bcf, + 0x00022dcf, 0x00022fcf, 0x000231cf, 0x000233cf, + 0x000235cf, 0x000237cf, 0x000239cf, 0x00023bcf, + 0x00023dcf, 0x00023fcf, 0x000241cf, 0x000243cf, + 0x000245cf, 0x000247cf, 0x000249cf, 0x00024bcf, + 0x00024dcf, 0x00024fcf, 0x000251cf, 0x000253cf, + 0x000255cf, 0x000257cf, 0x000259cf, 0x00025bcf, + 0x00025dcf, 0x00025fcf, 0x000261cf, 0x000263cf, + 0x000265cf, 0x000267cf, 0x000269cf, 0x00026bcf, + 0x00026dcf, 0x00026fcf, 0x000271cf, 0x000273cf, + 0x000275cf, 0x000277cf, 0x000279cf, 0x00027bcf, + 0x00027dcf, 0x00027fcf, 0x000281cf, 0x000283cf, + 0x000285cf, 0x000287cf, 0x000289cf, 0x00028bcf, + 0x00028dcf, 0x00028fcf, 0x000291cf, 0x000293cf, + 0x000295cf, 0x000297cf, 0x000299cf, 0x00029bcf, + 0x00029dcf, 0x00029fcf, 0x0002a1cf, 0x0002a3cf, + 0x0002a5cf, 0x0002a7cf, 0x0002a9cf, 0x0002abcf, + 0x0002adcf, 0x0002afcf, 0x0002b1cf, 0x0002b3cf, + 0x0002b5cf, 0x0002b7cf, 0x0002b9cf, 0x0002bbcf, + 0x0002bdcf, 0x0002bfcf, 0x0002c1cf, 0x0002c3cf, + 0x0002c5cf, 0x0002c7cf, 0x0002c9cf, 0x0002cbcf, + 0x0002cdcf, 0x0002cfcf, 0x0002d1cf, 0x0002d3cf, + 0x0002d5cf, 0x0002d7cf, 0x0002d9cf, 0x0002dbcf, + 0x0002ddcf, 0x0002dfcf, 0x0002e1cf, 0x0002e3cf, + 0x0002e5cf, 0x0002e7cf, 0x0002e9cf, 0x0002ebcf, + 0x0002edcf, 0x0002efcf, 0x0002f1cf, 0x0002f3cf, + 0x0002f5cf, 0x0002f7cf, 0x0002f9cf, 0x0002fbcf, + 0x0002fdcf, 0x0002ffcf, 0x000301cf, 0x000303cf, + 0x000305cf, 0x000307cf, 0x000309cf, 0x00030bcf, + 0x00030dcf, 0x00030fcf, 0x000311cf, 0x000313cf, + 0x000315cf, 0x000317cf, 0x000319cf, 0x00031bcf, + 0x00031dcf, 0x00031fcf, 0x000321cf, 0x000323cf, + 0x000325cf, 0x000327cf, 0x000329cf, 0x00032bcf, + 0x00032dcf, 0x00032fcf, 0x000331cf, 0x000333cf, + 0x000335cf, 0x000337cf, 0x000339cf, 0x00033bcf, + 0x00033dcf, 0x00033fcf, 0x000341cf, 0x000343cf, + 0x000345cf, 0x000347cf, 0x000349cf, 0x00034bcf, + 0x00034dcf, 0x00034fcf, 0x000351cf, 0x000353cf, + 0x000355cf, 0x000357cf, 0x000359cf, 0x00035bcf, + 0x00035dcf, 0x00035fcf, 0x000361cf, 0x000363cf, + 0x000365cf, 0x000367cf, 0x000369cf, 0x00036bcf, + 0x00036dcf, 0x00036fcf, 0x000371cf, 0x000373cf, + 0x000375cf, 0x000377cf, 0x000379cf, 0x00037bcf, + 0x00037dcf, 0x00037fcf, 0x000381cf, 0x000383cf, + 0x000385cf, 0x000387cf, 0x000389cf, 0x00038bcf, + 0x00038dcf, 0x00038fcf, 0x000391cf, 0x000393cf, + 0x000395cf, 0x000397cf, 0x000399cf, 0x00039bcf, + 0x00039dcf, 0x00039fcf, 0x0003a1cf, 0x0003a3cf, + 0x0003a5cf, 0x0003a7cf, 0x0003a9cf, 0x0003abcf, + 0x0003adcf, 0x0003afcf, 0x0003b1cf, 0x0003b3cf, + 0x0003b5cf, 0x0003b7cf, 0x0003b9cf, 0x0003bbcf, + 0x0003bdcf, 0x0003bfcf, 0x0003c1cf, 0x0003c3cf, + 0x0003c5cf, 0x0003c7cf, 0x0003c9cf, 0x0003cbcf, + 0x0003cdcf, 0x0003cfcf, 0x0003d1cf, 0x0003d3cf, + 0x0003d5cf, 0x0003d7cf, 0x0003d9cf, 0x0003dbcf, + 0x0003ddcf, 0x0003dfcf, 0x0003e1cf, 0x0003e3cf, + 0x0003e5cf, 0x0003e7cf, 0x0003e9cf, 0x0003ebcf, + 0x0003edcf, 0x0003efcf, 0x0003f1cf, 0x0003f3cf, + 0x0003f5cf, 0x0003f7cf, 0x0003f9cf, 0x0003fbcf, + 0x0003fdcf, 0x0003ffcf, 0x000401cf, 0x000403cf, + 0x000405cf, 0x000407cf, 0x000409cf, 0x00040bcf, + 0x00040dcf, 0x00040fcf, 0x000411cf, 0x000413cf, + 0x000415cf, 0x000417cf, 0x000419cf, 0x00041bcf, + 0x00041dcf, 0x00041fcf, 0x000421cf, 0x000423cf, + 0x000425cf, 0x000427cf, 0x000429cf, 0x00042bcf, + 0x00042dcf, 0x00042fcf, 0x000431cf, 0x000433cf, + 0x000435cf, 0x000437cf, 0x000439cf, 0x00043bcf, + 0x00043dcf, 0x00043fcf, 0x000441cf, 0x000443cf, + 0x000445cf, 0x000447cf, 0x000449cf, 0x00044bcf, + 0x00044dcf, 0x00044fcf, 0x000451cf, 0x000453cf, + 0x000455cf, 0x000457cf, 0x000459cf, 0x00045bcf, + 0x00045dcf, 0x00045fcf, 0x000461cf, 0x000463cf, + 0x000465cf, 0x000467cf, 0x000469cf, 0x00046bcf, + 0x00046dcf, 0x00046fcf, 0x000471cf, 0x000473cf, + 0x000475cf, 0x000477cf, 0x000479cf, 0x00047bcf, + 0x00047dcf, 0x00047fcf, 0x000481cf, 0x000483cf, + 0x000485cf, 0x000487cf, 0x000489cf, 0x00048bcf, + 0x00048dcf, 0x00048fcf, 0x000491cf, 0x000493cf, + 0x000495cf, 0x000497cf, 0x000499cf, 0x00049bcf, + 0x00049dcf, 0x00049fcf, 0x0004a1cf, 0x0004a3cf, + 0x0004a5cf, 0x0004a7cf, 0x0004a9cf, 0x0004abcf, + 0x0004adcf, 0x0004afcf, 0x0004b1cf, 0x0004b3cf, + 0x0004b5cf, 0x0004b7cf, 0x0004b9cf, 0x0004bbcf, + 0x0004bdcf, 0x0004bfcf, 0x0004c1cf, 0x0004c3cf, + 0x0004c5cf, 0x0004c7cf, 0x0004c9cf, 0x0004cbcf, + 0x0004cdcf, 0x0004cfcf, 0x0004d1cf, 0x0004d3cf, + 0x0004d5cf, 0x0004d7cf, 0x0004d9cf, 0x0004dbcf, + 0x0004ddcf, 0x0004dfcf, 0x0004e1cf, 0x0004e3cf, + 0x0004e5cf, 0x0004e7cf, 0x0004e9cf, 0x0004ebcf, + 0x0004edcf, 0x0004efcf, 0x0004f1cf, 0x0004f3cf, + 0x0004f5cf, 0x0004f7cf, 0x0004f9cf, 0x0004fbcf, + 0x0004fdcf, 0x0004ffcf, 0x000501cf, 0x000503cf, + 0x000505cf, 0x000507cf, 0x000509cf, 0x00050bcf, + 0x00050dcf, 0x00050fcf, 0x000511cf, 0x000513cf, + 0x000515cf, 0x000517cf, 0x000519cf, 0x00051bcf, + 0x00051dcf, 0x00051fcf, 0x000521cf, 0x000523cf, + 0x000525cf, 0x000527cf, 0x000529cf, 0x00052bcf, + 0x00052dcf, 0x00052fcf, 0x000531cf, 0x000533cf, + 0x000535cf, 0x000537cf, 0x000539cf, 0x00053bcf, + 0x00053dcf, 0x00053fcf, 0x000541cf, 0x000543cf, + 0x000545cf, 0x000547cf, 0x000549cf, 0x00054bcf, + 0x00054dcf, 0x00054fcf, 0x000551cf, 0x000553cf, + 0x000555cf, 0x000557cf, 0x000559cf, 0x00055bcf, + 0x00055dcf, 0x00055fcf, 0x000561cf, 0x000563cf, + 0x000565cf, 0x000567cf, 0x000569cf, 0x00056bcf, + 0x00056dcf, 0x00056fcf, 0x000571cf, 0x000573cf, + 0x000575cf, 0x000577cf, 0x000579cf, 0x00057bcf, + 0x00057dcf, 0x00057fcf, 0x000581cf, 0x000583cf, + 0x000585cf, 0x000587cf, 0x000589cf, 0x00058bcf, + 0x00058dcf, 0x00058fcf, 0x000591cf, 0x000593cf, + 0x000595cf, 0x000597cf, 0x000599cf, 0x00059bcf, + 0x00059dcf, 0x00059fcf, 0x0005a1cf, 0x0005a3cf, + 0x0005a5cf, 0x0005a7cf, 0x0005a9cf, 0x0005abcf, + 0x0005adcf, 0x0005afcf, 0x0005b1cf, 0x0005b3cf, + 0x0005b5cf, 0x0005b7cf, 0x0005b9cf, 0x0005bbcf, + 0x0005bdcf, 0x0005bfcf, 0x0005c1cf, 0x0005c3cf, + 0x0005c5cf, 0x0005c7cf, 0x0005c9cf, 0x0005cbcf, + 0x0005cdcf, 0x0005cfcf, 0x0005d1cf, 0x0005d3cf, + 0x0005d5cf, 0x0005d7cf, 0x0005d9cf, 0x0005dbcf, + 0x0005ddcf, 0x0005dfcf, 0x0005e1cf, 0x0005e3cf, + 0x0005e5cf, 0x0005e7cf, 0x0005e9cf, 0x0005ebcf, + 0x0005edcf, 0x0005efcf, 0x0005f1cf, 0x0005f3cf, + 0x0005f5cf, 0x0005f7cf, 0x0005f9cf, 0x0005fbcf, + 0x0005fdcf, 0x0005ffcf, 0x000601cf, 0x000603cf, + 0x000605cf, 0x000607cf, 0x000609cf, 0x00060bcf, + 0x00060dcf, 0x00060fcf, 0x000611cf, 0x000613cf, + 0x000615cf, 0x000617cf, 0x000619cf, 0x00061bcf, + 0x00061dcf, 0x00061fcf, 0x000621cf, 0x000623cf, + 0x000625cf, 0x000627cf, 0x000629cf, 0x00062bcf, + 0x00062dcf, 0x00062fcf, 0x000631cf, 0x000633cf, + 0x000635cf, 0x000637cf, 0x000639cf, 0x00063bcf, + 0x00063dcf, 0x00063fcf, 0x000641cf, 0x000643cf, + 0x000645cf, 0x000647cf, 0x000649cf, 0x00064bcf, + 0x00064dcf, 0x00064fcf, 0x000651cf, 0x000653cf, + 0x000655cf, 0x000657cf, 0x000659cf, 0x00065bcf, + 0x00065dcf, 0x00065fcf, 0x000661cf, 0x000663cf, + 0x000665cf, 0x000667cf, 0x000669cf, 0x00066bcf, + 0x00066dcf, 0x00066fcf, 0x000671cf, 0x000673cf, + 0x000675cf, 0x000677cf, 0x000679cf, 0x00067bcf, + 0x00067dcf, 0x00067fcf, 0x000681cf, 0x000683cf, + 0x000685cf, 0x000687cf, 0x000689cf, 0x00068bcf, + 0x00068dcf, 0x00068fcf, 0x000691cf, 0x000693cf, + 0x000695cf, 0x000697cf, 0x000699cf, 0x00069bcf, + 0x00069dcf, 0x00069fcf, 0x0006a1cf, 0x0006a3cf, + 0x0006a5cf, 0x0006a7cf, 0x0006a9cf, 0x0006abcf, + 0x0006adcf, 0x0006afcf, 0x0006b1cf, 0x0006b3cf, + 0x0006b5cf, 0x0006b7cf, 0x0006b9cf, 0x0006bbcf, + 0x0006bdcf, 0x0006bfcf, 0x0006c1cf, 0x0006c3cf, + 0x0006c5cf, 0x0006c7cf, 0x0006c9cf, 0x0006cbcf, + 0x0006cdcf, 0x0006cfcf, 0x0006d1cf, 0x0006d3cf, + 0x0006d5cf, 0x0006d7cf, 0x0006d9cf, 0x0006dbcf, + 0x0006ddcf, 0x0006dfcf, 0x0006e1cf, 0x0006e3cf, + 0x0006e5cf, 0x0006e7cf, 0x0006e9cf, 0x0006ebcf, + 0x0006edcf, 0x0006efcf, 0x0006f1cf, 0x0006f3cf, + 0x0006f5cf, 0x0006f7cf, 0x0006f9cf, 0x0006fbcf, + 0x0006fdcf, 0x0006ffcf, 0x000701cf, 0x000703cf, + 0x000705cf, 0x000707cf, 0x000709cf, 0x00070bcf, + 0x00070dcf, 0x00070fcf, 0x000711cf, 0x000713cf, + 0x000715cf, 0x000717cf, 0x000719cf, 0x00071bcf, + 0x00071dcf, 0x00071fcf, 0x000721cf, 0x000723cf, + 0x000725cf, 0x000727cf, 0x000729cf, 0x00072bcf, + 0x00072dcf, 0x00072fcf, 0x000731cf, 0x000733cf, + 0x000735cf, 0x000737cf, 0x000739cf, 0x00073bcf, + 0x00073dcf, 0x00073fcf, 0x000741cf, 0x000743cf, + 0x000745cf, 0x000747cf, 0x000749cf, 0x00074bcf, + 0x00074dcf, 0x00074fcf, 0x000751cf, 0x000753cf, + 0x000755cf, 0x000757cf, 0x000759cf, 0x00075bcf, + 0x00075dcf, 0x00075fcf, 0x000761cf, 0x000763cf, + 0x000765cf, 0x000767cf, 0x000769cf, 0x00076bcf, + 0x00076dcf, 0x00076fcf, 0x000771cf, 0x000773cf, + 0x000775cf, 0x000777cf, 0x000779cf, 0x00077bcf, + 0x00077dcf, 0x00077fcf, 0x000781cf, 0x000783cf, + 0x000785cf, 0x000787cf, 0x000789cf, 0x00078bcf, + 0x00078dcf, 0x00078fcf, 0x000791cf, 0x000793cf, + 0x000795cf, 0x000797cf, 0x000799cf, 0x00079bcf, + 0x00079dcf, 0x00079fcf, 0x0007a1cf, 0x0007a3cf, + 0x0007a5cf, 0x0007a7cf, 0x0007a9cf, 0x0007abcf, + 0x0007adcf, 0x0007afcf, 0x0007b1cf, 0x0007b3cf, + 0x0007b5cf, 0x0007b7cf, 0x0007b9cf, 0x0007bbcf, + 0x0007bdcf, 0x0007bfcf, 0x0007c1cf, 0x0007c3cf, + 0x0007c5cf, 0x0007c7cf, 0x0007c9cf, 0x0007cbcf, + 0x0007cdcf, 0x0007cfcf, 0x0007d1cf, 0x0007d3cf, + 0x0007d5cf, 0x0007d7cf, 0x0007d9cf, 0x0007dbcf, + 0x0007ddcf, 0x0007dfcf, 0x0007e1cf, 0x0007e3cf, + 0x0007e5cf, 0x0007e7cf, 0x0007e9cf, 0x0007ebcf, + 0x0007edcf, 0x0007efcf, 0x0007f1cf, 0x0007f3cf, + 0x0007f5cf, 0x0007f7cf, 0x0007f9cf, 0x0007fbcf, + 0x0007fdcf, 0x0007ffcf, 0x000801cf, 0x000803cf, + 0x000805cf, 0x000807cf, 0x000809cf, 0x00080bcf, + 0x00080dcf, 0x00080fcf, 0x000811cf, 0x000813cf, + 0x000815cf, 0x000817cf, 0x000819cf, 0x00081bcf, + 0x00081dcf, 0x00081fcf, 0x000821cf, 0x000823cf, + 0x000825cf, 0x000827cf, 0x000829cf, 0x00082bcf, + 0x00082dcf, 0x00082fcf, 0x000831cf, 0x000833cf, + 0x000835cf, 0x000837cf, 0x000839cf, 0x00083bcf, + 0x00083dcf, 0x00083fcf, 0x000841cf, 0x000843cf, + 0x000845cf, 0x000847cf, 0x000849cf, 0x00084bcf, + 0x00084dcf, 0x00084fcf, 0x000851cf, 0x000853cf, + 0x000855cf, 0x000857cf, 0x000859cf, 0x00085bcf, + 0x00085dcf, 0x00085fcf, 0x000861cf, 0x000863cf, + 0x000865cf, 0x000867cf, 0x000869cf, 0x00086bcf, + 0x00086dcf, 0x00086fcf, 0x000871cf, 0x000873cf, + 0x000875cf, 0x000877cf, 0x000879cf, 0x00087bcf, + 0x00087dcf, 0x00087fcf, 0x000881cf, 0x000883cf, + 0x000885cf, 0x000887cf, 0x000889cf, 0x00088bcf, + 0x00088dcf, 0x00088fcf, 0x000891cf, 0x000893cf, + 0x000895cf, 0x000897cf, 0x000899cf, 0x00089bcf, + 0x00089dcf, 0x00089fcf, 0x0008a1cf, 0x0008a3cf, + 0x0008a5cf, 0x0008a7cf, 0x0008a9cf, 0x0008abcf, + 0x0008adcf, 0x0008afcf, 0x0008b1cf, 0x0008b3cf, + 0x0008b5cf, 0x0008b7cf, 0x0008b9cf, 0x0008bbcf, + 0x0008bdcf, 0x0008bfcf, 0x0008c1cf, 0x0008c3cf, + 0x0008c5cf, 0x0008c7cf, 0x0008c9cf, 0x0008cbcf, + 0x0008cdcf, 0x0008cfcf, 0x0008d1cf, 0x0008d3cf, + 0x0008d5cf, 0x0008d7cf, 0x0008d9cf, 0x0008dbcf, + 0x0008ddcf, 0x0008dfcf, 0x0008e1cf, 0x0008e3cf, + 0x0008e5cf, 0x0008e7cf, 0x0008e9cf, 0x0008ebcf, + 0x0008edcf, 0x0008efcf, 0x0008f1cf, 0x0008f3cf, + 0x0008f5cf, 0x0008f7cf, 0x0008f9cf, 0x0008fbcf, + 0x0008fdcf, 0x0008ffcf, 0x000901cf, 0x000903cf, + 0x000905cf, 0x000907cf, 0x000909cf, 0x00090bcf, + 0x00090dcf, 0x00090fcf, 0x000911cf, 0x000913cf, + 0x000915cf, 0x000917cf, 0x000919cf, 0x00091bcf, + 0x00091dcf, 0x00091fcf, 0x000921cf, 0x000923cf, + 0x000925cf, 0x000927cf, 0x000929cf, 0x00092bcf, + 0x00092dcf, 0x00092fcf, 0x000931cf, 0x000933cf, + 0x000935cf, 0x000937cf, 0x000939cf, 0x00093bcf, + 0x00093dcf, 0x00093fcf, 0x000941cf, 0x000943cf, + 0x000945cf, 0x000947cf, 0x000949cf, 0x00094bcf, + 0x00094dcf, 0x00094fcf, 0x000951cf, 0x000953cf, + 0x000955cf, 0x000957cf, 0x000959cf, 0x00095bcf, + 0x00095dcf, 0x00095fcf, 0x000961cf, 0x000963cf, + 0x000965cf, 0x000967cf, 0x000969cf, 0x00096bcf, + 0x00096dcf, 0x00096fcf, 0x000971cf, 0x000973cf, + 0x000975cf, 0x000977cf, 0x000979cf, 0x00097bcf, + 0x00097dcf, 0x00097fcf, 0x000981cf, 0x000983cf, + 0x000985cf, 0x000987cf, 0x000989cf, 0x00098bcf, + 0x00098dcf, 0x00098fcf, 0x000991cf, 0x000993cf, + 0x000995cf, 0x000997cf, 0x000999cf, 0x00099bcf, + 0x00099dcf, 0x00099fcf, 0x0009a1cf, 0x0009a3cf, + 0x0009a5cf, 0x0009a7cf, 0x0009a9cf, 0x0009abcf, + 0x0009adcf, 0x0009afcf, 0x0009b1cf, 0x0009b3cf, + 0x0009b5cf, 0x0009b7cf, 0x0009b9cf, 0x0009bbcf, + 0x0009bdcf, 0x0009bfcf, 0x0009c1cf, 0x0009c3cf, + 0x0009c5cf, 0x0009c7cf, 0x0009c9cf, 0x0009cbcf, + 0x0009cdcf, 0x0009cfcf, 0x0009d1cf, 0x0009d3cf, + 0x0009d5cf, 0x0009d7cf, 0x0009d9cf, 0x0009dbcf, + 0x0009ddcf, 0x0009dfcf, 0x0009e1cf, 0x0009e3cf, + 0x0009e5cf, 0x0009e7cf, 0x0009e9cf, 0x0009ebcf, + 0x0009edcf, 0x0009efcf, 0x0009f1cf, 0x0009f3cf, + 0x0009f5cf, 0x0009f7cf, 0x0009f9cf, 0x0009fbcf, + 0x0009fdcf, 0x0009ffcf, 0x000a01cf, 0x000a03cf, + 0x000a05cf, 0x000a07cf, 0x000a09cf, 0x000a0bcf, + 0x000a0dcf, 0x000a0fcf, 0x000a11cf, 0x000a13cf, + 0x000a15cf, 0x000a17cf, 0x000a19cf, 0x000a1bcf, + 0x000a1dcf, 0x000a1fcf, 0x000a21cf, 0x000a23cf, + 0x000a25cf, 0x000a27cf, 0x000a29cf, 0x000a2bcf, + 0x000a2dcf, 0x000a2fcf, 0x000a31cf, 0x000a33cf, + 0x000a35cf, 0x000a37cf, 0x000a39cf, 0x000a3bcf, + 0x000a3dcf, 0x000a3fcf, 0x000a41cf, 0x000a43cf, + 0x000a45cf, 0x000a47cf, 0x000a49cf, 0x000a4bcf, + 0x000a4dcf, 0x000a4fcf, 0x000a51cf, 0x000a53cf, + 0x000a55cf, 0x000a57cf, 0x000a59cf, 0x000a5bcf, + 0x000a5dcf, 0x000a5fcf, 0x000a61cf, 0x000a63cf, + 0x000a65cf, 0x000a67cf, 0x000a69cf, 0x000a6bcf, + 0x000a6dcf, 0x000a6fcf, 0x000a71cf, 0x000a73cf, + 0x000a75cf, 0x000a77cf, 0x000a79cf, 0x000a7bcf, + 0x000a7dcf, 0x000a7fcf, 0x000a81cf, 0x000a83cf, + 0x000a85cf, 0x000a87cf, 0x000a89cf, 0x000a8bcf, + 0x000a8dcf, 0x000a8fcf, 0x000a91cf, 0x000a93cf, + 0x000a95cf, 0x000a97cf, 0x000a99cf, 0x000a9bcf, + 0x000a9dcf, 0x000a9fcf, 0x000aa1cf, 0x000aa3cf, + 0x000aa5cf, 0x000aa7cf, 0x000aa9cf, 0x000aabcf, + 0x000aadcf, 0x000aafcf, 0x000ab1cf, 0x000ab3cf, + 0x000ab5cf, 0x000ab7cf, 0x000ab9cf, 0x000abbcf, + 0x000abdcf, 0x000abfcf, 0x000ac1cf, 0x000ac3cf, + 0x000ac5cf, 0x000ac7cf, 0x000ac9cf, 0x000acbcf, + 0x000acdcf, 0x000acfcf, 0x000ad1cf, 0x000ad3cf, + 0x000ad5cf, 0x000ad7cf, 0x000ad9cf, 0x000adbcf, + 0x000addcf, 0x000adfcf, 0x000ae1cf, 0x000ae3cf, + 0x000ae5cf, 0x000ae7cf, 0x000ae9cf, 0x000aebcf, + 0x000aedcf, 0x000aefcf, 0x000af1cf, 0x000af3cf, + 0x000af5cf, 0x000af7cf, 0x000af9cf, 0x000afbcf, + 0x000afdcf, 0x000affcf, 0x000b01cf, 0x000b03cf, + 0x000b05cf, 0x000b07cf, 0x000b09cf, 0x000b0bcf, + 0x000b0dcf, 0x000b0fcf, 0x000b11cf, 0x000b13cf, + 0x000b15cf, 0x000b17cf, 0x000b19cf, 0x000b1bcf, + 0x000b1dcf, 0x000b1fcf, 0x000b21cf, 0x000b23cf, + 0x000b25cf, 0x000b27cf, 0x000b29cf, 0x000b2bcf, + 0x000b2dcf, 0x000b2fcf, 0x000b31cf, 0x000b33cf, + 0x000b35cf, 0x000b37cf, 0x000b39cf, 0x000b3bcf, + 0x000b3dcf, 0x000b3fcf, 0x000b41cf, 0x000b43cf, + 0x000b45cf, 0x000b47cf, 0x000b49cf, 0x000b4bcf, + 0x000b4dcf, 0x000b4fcf, 0x000b51cf, 0x000b53cf, + 0x000b55cf, 0x000b57cf, 0x000b59cf, 0x000b5bcf, + 0x000b5dcf, 0x000b5fcf, 0x000b61cf, 0x000b63cf, + 0x000b65cf, 0x000b67cf, 0x000b69cf, 0x000b6bcf, + 0x000b6dcf, 0x000b6fcf, 0x000b71cf, 0x000b73cf, + 0x000b75cf, 0x000b77cf, 0x000b79cf, 0x000b7bcf, + 0x000b7dcf, 0x000b7fcf, 0x000b81cf, 0x000b83cf, + 0x000b85cf, 0x000b87cf, 0x000b89cf, 0x000b8bcf, + 0x000b8dcf, 0x000b8fcf, 0x000b91cf, 0x000b93cf, + 0x000b95cf, 0x000b97cf, 0x000b99cf, 0x000b9bcf, + 0x000b9dcf, 0x000b9fcf, 0x000ba1cf, 0x000ba3cf, + 0x000ba5cf, 0x000ba7cf, 0x000ba9cf, 0x000babcf, + 0x000badcf, 0x000bafcf, 0x000bb1cf, 0x000bb3cf, + 0x000bb5cf, 0x000bb7cf, 0x000bb9cf, 0x000bbbcf, + 0x000bbdcf, 0x000bbfcf, 0x000bc1cf, 0x000bc3cf, + 0x000bc5cf, 0x000bc7cf, 0x000bc9cf, 0x000bcbcf, + 0x000bcdcf, 0x000bcfcf, 0x000bd1cf, 0x000bd3cf, + 0x000bd5cf, 0x000bd7cf, 0x000bd9cf, 0x000bdbcf, + 0x000bddcf, 0x000bdfcf, 0x000be1cf, 0x000be3cf, + 0x000be5cf, 0x000be7cf, 0x000be9cf, 0x000bebcf, + 0x000bedcf, 0x000befcf, 0x000bf1cf, 0x000bf3cf, + 0x000bf5cf, 0x000bf7cf, 0x000bf9cf, 0x000bfbcf, + 0x000bfdcf, 0x000bffcf, 0x000c01cf, 0x000c03cf, + 0x000c05cf, 0x000c07cf, 0x000c09cf, 0x000c0bcf, + 0x000c0dcf, 0x000c0fcf, 0x000c11cf, 0x000c13cf, + 0x000c15cf, 0x000c17cf, 0x000c19cf, 0x000c1bcf, + 0x000c1dcf, 0x000c1fcf, 0x000c21cf, 0x000c23cf, + 0x000c25cf, 0x000c27cf, 0x000c29cf, 0x000c2bcf, + 0x000c2dcf, 0x000c2fcf, 0x000c31cf, 0x000c33cf, + 0x000c35cf, 0x000c37cf, 0x000c39cf, 0x000c3bcf, + 0x000c3dcf, 0x000c3fcf, 0x000c41cf, 0x000c43cf, + 0x000c45cf, 0x000c47cf, 0x000c49cf, 0x000c4bcf, + 0x000c4dcf, 0x000c4fcf, 0x000c51cf, 0x000c53cf, + 0x000c55cf, 0x000c57cf, 0x000c59cf, 0x000c5bcf, + 0x000c5dcf, 0x000c5fcf, 0x000c61cf, 0x000c63cf, + 0x000c65cf, 0x000c67cf, 0x000c69cf, 0x000c6bcf, + 0x000c6dcf, 0x000c6fcf, 0x000c71cf, 0x000c73cf, + 0x000c75cf, 0x000c77cf, 0x000c79cf, 0x000c7bcf, + 0x000c7dcf, 0x000c7fcf, 0x000c81cf, 0x000c83cf, + 0x000c85cf, 0x000c87cf, 0x000c89cf, 0x000c8bcf, + 0x000c8dcf, 0x000c8fcf, 0x000c91cf, 0x000c93cf, + 0x000c95cf, 0x000c97cf, 0x000c99cf, 0x000c9bcf, + 0x000c9dcf, 0x000c9fcf, 0x000ca1cf, 0x000ca3cf, + 0x000ca5cf, 0x000ca7cf, 0x000ca9cf, 0x000cabcf, + 0x000cadcf, 0x000cafcf, 0x000cb1cf, 0x000cb3cf, + 0x000cb5cf, 0x000cb7cf, 0x000cb9cf, 0x000cbbcf, + 0x000cbdcf, 0x000cbfcf, 0x000cc1cf, 0x000cc3cf, + 0x000cc5cf, 0x000cc7cf, 0x000cc9cf, 0x000ccbcf, + 0x000ccdcf, 0x000ccfcf, 0x000cd1cf, 0x000cd3cf, + 0x000cd5cf, 0x000cd7cf, 0x000cd9cf, 0x000cdbcf, + 0x000cddcf, 0x000cdfcf, 0x000ce1cf, 0x000ce3cf, + 0x000ce5cf, 0x000ce7cf, 0x000ce9cf, 0x000cebcf, + 0x000cedcf, 0x000cefcf, 0x000cf1cf, 0x000cf3cf, + 0x000cf5cf, 0x000cf7cf, 0x000cf9cf, 0x000cfbcf, + 0x000cfdcf, 0x000cffcf, 0x000d01cf, 0x000d03cf, + 0x000d05cf, 0x000d07cf, 0x000d09cf, 0x000d0bcf, + 0x000d0dcf, 0x000d0fcf, 0x000d11cf, 0x000d13cf, + 0x000d15cf, 0x000d17cf, 0x000d19cf, 0x000d1bcf, + 0x000d1dcf, 0x000d1fcf, 0x000d21cf, 0x000d23cf, + 0x000d25cf, 0x000d27cf, 0x000d29cf, 0x000d2bcf, + 0x000d2dcf, 0x000d2fcf, 0x000d31cf, 0x000d33cf, + 0x000d35cf, 0x000d37cf, 0x000d39cf, 0x000d3bcf, + 0x000d3dcf, 0x000d3fcf, 0x000d41cf, 0x000d43cf, + 0x000d45cf, 0x000d47cf, 0x000d49cf, 0x000d4bcf, + 0x000d4dcf, 0x000d4fcf, 0x000d51cf, 0x000d53cf, + 0x000d55cf, 0x000d57cf, 0x000d59cf, 0x000d5bcf, + 0x000d5dcf, 0x000d5fcf, 0x000d61cf, 0x000d63cf, + 0x000d65cf, 0x000d67cf, 0x000d69cf, 0x000d6bcf, + 0x000d6dcf, 0x000d6fcf, 0x000d71cf, 0x000d73cf, + 0x000d75cf, 0x000d77cf, 0x000d79cf, 0x000d7bcf, + 0x000d7dcf, 0x000d7fcf, 0x000d81cf, 0x000d83cf, + 0x000d85cf, 0x000d87cf, 0x000d89cf, 0x000d8bcf, + 0x000d8dcf, 0x000d8fcf, 0x000d91cf, 0x000d93cf, + 0x000d95cf, 0x000d97cf, 0x000d99cf, 0x000d9bcf, + 0x000d9dcf, 0x000d9fcf, 0x000da1cf, 0x000da3cf, + 0x000da5cf, 0x000da7cf, 0x000da9cf, 0x000dabcf, + 0x000dadcf, 0x000dafcf, 0x000db1cf, 0x000db3cf, + 0x000db5cf, 0x000db7cf, 0x000db9cf, 0x000dbbcf, + 0x000dbdcf, 0x000dbfcf, 0x000dc1cf, 0x000dc3cf, + 0x000dc5cf, 0x000dc7cf, 0x000dc9cf, 0x000dcbcf, + 0x000dcdcf, 0x000dcfcf, 0x000dd1cf, 0x000dd3cf, + 0x000dd5cf, 0x000dd7cf, 0x000dd9cf, 0x000ddbcf, + 0x000dddcf, 0x000ddfcf, 0x000de1cf, 0x000de3cf, + 0x000de5cf, 0x000de7cf, 0x000de9cf, 0x000debcf, + 0x000dedcf, 0x000defcf, 0x000df1cf, 0x000df3cf, + 0x000df5cf, 0x000df7cf, 0x000df9cf, 0x000dfbcf, + 0x000dfdcf, 0x000dffcf, 0x000e01cf, 0x000e03cf, + 0x000e05cf, 0x000e07cf, 0x000e09cf, 0x000e0bcf, + 0x000e0dcf, 0x000e0fcf, 0x000e11cf, 0x000e13cf, + 0x000e15cf, 0x000e17cf, 0x000e19cf, 0x000e1bcf, + 0x000e1dcf, 0x000e1fcf, 0x000e21cf, 0x000e23cf, + 0x000e25cf, 0x000e27cf, 0x000e29cf, 0x000e2bcf, + 0x000e2dcf, 0x000e2fcf, 0x000e31cf, 0x000e33cf, + 0x000e35cf, 0x000e37cf, 0x000e39cf, 0x000e3bcf, + 0x000e3dcf, 0x000e3fcf, 0x000e41cf, 0x000e43cf, + 0x000e45cf, 0x000e47cf, 0x000e49cf, 0x000e4bcf, + 0x000e4dcf, 0x000e4fcf, 0x000e51cf, 0x000e53cf, + 0x000e55cf, 0x000e57cf, 0x000e59cf, 0x000e5bcf, + 0x000e5dcf, 0x000e5fcf, 0x000e61cf, 0x000e63cf, + 0x000e65cf, 0x000e67cf, 0x000e69cf, 0x000e6bcf, + 0x000e6dcf, 0x000e6fcf, 0x000e71cf, 0x000e73cf, + 0x000e75cf, 0x000e77cf, 0x000e79cf, 0x000e7bcf, + 0x000e7dcf, 0x000e7fcf, 0x000e81cf, 0x000e83cf, + 0x000e85cf, 0x000e87cf, 0x000e89cf, 0x000e8bcf, + 0x000e8dcf, 0x000e8fcf, 0x000e91cf, 0x000e93cf, + 0x000e95cf, 0x000e97cf, 0x000e99cf, 0x000e9bcf, + 0x000e9dcf, 0x000e9fcf, 0x000ea1cf, 0x000ea3cf, + 0x000ea5cf, 0x000ea7cf, 0x000ea9cf, 0x000eabcf, + 0x000eadcf, 0x000eafcf, 0x000eb1cf, 0x000eb3cf, + 0x000eb5cf, 0x000eb7cf, 0x000eb9cf, 0x000ebbcf, + 0x000ebdcf, 0x000ebfcf, 0x000ec1cf, 0x000ec3cf, + 0x000ec5cf, 0x000ec7cf, 0x000ec9cf, 0x000ecbcf, + 0x000ecdcf, 0x000ecfcf, 0x000ed1cf, 0x000ed3cf, + 0x000ed5cf, 0x000ed7cf, 0x000ed9cf, 0x000edbcf, + 0x000eddcf, 0x000edfcf, 0x000ee1cf, 0x000ee3cf, + 0x000ee5cf, 0x000ee7cf, 0x000ee9cf, 0x000eebcf, + 0x000eedcf, 0x000eefcf, 0x000ef1cf, 0x000ef3cf, + 0x000ef5cf, 0x000ef7cf, 0x000ef9cf, 0x000efbcf, + 0x000efdcf, 0x000effcf, 0x000f01cf, 0x000f03cf, + 0x000f05cf, 0x000f07cf, 0x000f09cf, 0x000f0bcf, + 0x000f0dcf, 0x000f0fcf, 0x000f11cf, 0x000f13cf, + 0x000f15cf, 0x000f17cf, 0x000f19cf, 0x000f1bcf, + 0x000f1dcf, 0x000f1fcf, 0x000f21cf, 0x000f23cf, + 0x000f25cf, 0x000f27cf, 0x000f29cf, 0x000f2bcf, + 0x000f2dcf, 0x000f2fcf, 0x000f31cf, 0x000f33cf, + 0x000f35cf, 0x000f37cf, 0x000f39cf, 0x000f3bcf, + 0x000f3dcf, 0x000f3fcf, 0x000f41cf, 0x000f43cf, + 0x000f45cf, 0x000f47cf, 0x000f49cf, 0x000f4bcf, + 0x000f4dcf, 0x000f4fcf, 0x000f51cf, 0x000f53cf, + 0x000f55cf, 0x000f57cf, 0x000f59cf, 0x000f5bcf, + 0x000f5dcf, 0x000f5fcf, 0x000f61cf, 0x000f63cf, + 0x000f65cf, 0x000f67cf, 0x000f69cf, 0x000f6bcf, + 0x000f6dcf, 0x000f6fcf, 0x000f71cf, 0x000f73cf, + 0x000f75cf, 0x000f77cf, 0x000f79cf, 0x000f7bcf, + 0x000f7dcf, 0x000f7fcf, 0x000f81cf, 0x000f83cf, + 0x000f85cf, 0x000f87cf, 0x000f89cf, 0x000f8bcf, + 0x000f8dcf, 0x000f8fcf, 0x000f91cf, 0x000f93cf, + 0x000f95cf, 0x000f97cf, 0x000f99cf, 0x000f9bcf, + 0x000f9dcf, 0x000f9fcf, 0x000fa1cf, 0x000fa3cf, + 0x000fa5cf, 0x000fa7cf, 0x000fa9cf, 0x000fabcf, + 0x000fadcf, 0x000fafcf, 0x000fb1cf, 0x000fb3cf, + 0x000fb5cf, 0x000fb7cf, 0x000fb9cf, 0x000fbbcf, + 0x000fbdcf, 0x000fbfcf, 0x000fc1cf, 0x000fc3cf, + 0x000fc5cf, 0x000fc7cf, 0x000fc9cf, 0x000fcbcf, + 0x000fcdcf, 0x000fcfcf, 0x000fd1cf, 0x000fd3cf, + 0x000fd5cf, 0x000fd7cf, 0x000fd9cf, 0x000fdbcf, + 0x000fddcf, 0x000fdfcf, 0x000fe1cf, 0x000fe3cf, + 0x000fe5cf, 0x000fe7cf, 0x000fe9cf, 0x000febcf, + 0x000fedcf, 0x000fefcf, 0x000ff1cf, 0x000ff3cf, + 0x000ff5cf, 0x000ff7cf, 0x000ff9cf, 0x000ffbcf, + 0x000ffdcf, 0x000fffcf, 0x00000170, 0x00000570, + 0x00000970, 0x00000d70, 0x00001170, 0x00001570, + 0x00001970, 0x00001d70, 0x00002170, 0x00002570, + 0x00002970, 0x00002d70, 0x00003170, 0x00003570, + 0x00003970, 0x00003d70, 0x00004170, 0x00004570, + 0x00004970, 0x00004d70, 0x00005170, 0x00005570, + 0x00005970, 0x00005d70, 0x00006170, 0x00006570, + 0x00006970, 0x00006d70, 0x00007170, 0x00007570, + 0x00007970, 0x00007d70, 0x00008170, 0x00008570, + 0x00008970, 0x00008d70, 0x00009170, 0x00009570, + 0x00009970, 0x00009d70, 0x0000a170, 0x0000a570, + 0x0000a970, 0x0000ad70, 0x0000b170, 0x0000b570, + 0x0000b970, 0x0000bd70, 0x0000c170, 0x0000c570, + 0x0000c970, 0x0000cd70, 0x0000d170, 0x0000d570, + 0x0000d970, 0x0000dd70, 0x0000e170, 0x0000e570, + 0x0000e970, 0x0000ed70, 0x0000f170, 0x0000f570, + 0x0000f970, 0x0000fd70, 0x00010170, 0x00010570, + 0x00010970, 0x00010d70, 0x00011170, 0x00011570, + 0x00011970, 0x00011d70, 0x00012170, 0x00012570, + 0x00012970, 0x00012d70, 0x00013170, 0x00013570, + 0x00013970, 0x00013d70, 0x00014170, 0x00014570, + 0x00014970, 0x00014d70, 0x00015170, 0x00015570, + 0x00015970, 0x00015d70, 0x00016170, 0x00016570, + 0x00016970, 0x00016d70, 0x00017170, 0x00017570, + 0x00017970, 0x00017d70, 0x00018170, 0x00018570, + 0x00018970, 0x00018d70, 0x00019170, 0x00019570, + 0x00019970, 0x00019d70, 0x0001a170, 0x0001a570, + 0x0001a970, 0x0001ad70, 0x0001b170, 0x0001b570, + 0x0001b970, 0x0001bd70, 0x0001c170, 0x0001c570, + 0x0001c970, 0x0001cd70, 0x0001d170, 0x0001d570, + 0x0001d970, 0x0001dd70, 0x0001e170, 0x0001e570, + 0x0001e970, 0x0001ed70, 0x0001f170, 0x0001f570, + 0x0001f970, 0x0001fd70, 0x00020170, 0x00020570, + 0x00020970, 0x00020d70, 0x00021170, 0x00021570, + 0x00021970, 0x00021d70, 0x00022170, 0x00022570, + 0x00022970, 0x00022d70, 0x00023170, 0x00023570, + 0x00023970, 0x00023d70, 0x00024170, 0x00024570, + 0x00024970, 0x00024d70, 0x00025170, 0x00025570, + 0x00025970, 0x00025d70, 0x00026170, 0x00026570, + 0x00026970, 0x00026d70, 0x00027170, 0x00027570, + 0x00027970, 0x00027d70, 0x00028170, 0x00028570, + 0x00028970, 0x00028d70, 0x00029170, 0x00029570, + 0x00029970, 0x00029d70, 0x0002a170, 0x0002a570, + 0x0002a970, 0x0002ad70, 0x0002b170, 0x0002b570, + 0x0002b970, 0x0002bd70, 0x0002c170, 0x0002c570, + 0x0002c970, 0x0002cd70, 0x0002d170, 0x0002d570, + 0x0002d970, 0x0002dd70, 0x0002e170, 0x0002e570, + 0x0002e970, 0x0002ed70, 0x0002f170, 0x0002f570, + 0x0002f970, 0x0002fd70, 0x00030170, 0x00030570, + 0x00030970, 0x00030d70, 0x00031170, 0x00031570, + 0x00031970, 0x00031d70, 0x00032170, 0x00032570, + 0x00032970, 0x00032d70, 0x00033170, 0x00033570, + 0x00033970, 0x00033d70, 0x00034170, 0x00034570, + 0x00034970, 0x00034d70, 0x00035170, 0x00035570, + 0x00035970, 0x00035d70, 0x00036170, 0x00036570, + 0x00036970, 0x00036d70, 0x00037170, 0x00037570, + 0x00037970, 0x00037d70, 0x00038170, 0x00038570, + 0x00038970, 0x00038d70, 0x00039170, 0x00039570, + 0x00039970, 0x00039d70, 0x0003a170, 0x0003a570, + 0x0003a970, 0x0003ad70, 0x0003b170, 0x0003b570, + 0x0003b970, 0x0003bd70, 0x0003c170, 0x0003c570, + 0x0003c970, 0x0003cd70, 0x0003d170, 0x0003d570, + 0x0003d970, 0x0003dd70, 0x0003e170, 0x0003e570, + 0x0003e970, 0x0003ed70, 0x0003f170, 0x0003f570, + 0x0003f970, 0x0003fd70, 0x00040170, 0x00040570, + 0x00040970, 0x00040d70, 0x00041170, 0x00041570, + 0x00041970, 0x00041d70, 0x00042170, 0x00042570, + 0x00042970, 0x00042d70, 0x00043170, 0x00043570, + 0x00043970, 0x00043d70, 0x00044170, 0x00044570, + 0x00044970, 0x00044d70, 0x00045170, 0x00045570, + 0x00045970, 0x00045d70, 0x00046170, 0x00046570, + 0x00046970, 0x00046d70, 0x00047170, 0x00047570, + 0x00047970, 0x00047d70, 0x00048170, 0x00048570, + 0x00048970, 0x00048d70, 0x00049170, 0x00049570, + 0x00049970, 0x00049d70, 0x0004a170, 0x0004a570, + 0x0004a970, 0x0004ad70, 0x0004b170, 0x0004b570, + 0x0004b970, 0x0004bd70, 0x0004c170, 0x0004c570, + 0x0004c970, 0x0004cd70, 0x0004d170, 0x0004d570, + 0x0004d970, 0x0004dd70, 0x0004e170, 0x0004e570, + 0x0004e970, 0x0004ed70, 0x0004f170, 0x0004f570, + 0x0004f970, 0x0004fd70, 0x00050170, 0x00050570, + 0x00050970, 0x00050d70, 0x00051170, 0x00051570, + 0x00051970, 0x00051d70, 0x00052170, 0x00052570, + 0x00052970, 0x00052d70, 0x00053170, 0x00053570, + 0x00053970, 0x00053d70, 0x00054170, 0x00054570, + 0x00054970, 0x00054d70, 0x00055170, 0x00055570, + 0x00055970, 0x00055d70, 0x00056170, 0x00056570, + 0x00056970, 0x00056d70, 0x00057170, 0x00057570, + 0x00057970, 0x00057d70, 0x00058170, 0x00058570, + 0x00058970, 0x00058d70, 0x00059170, 0x00059570, + 0x00059970, 0x00059d70, 0x0005a170, 0x0005a570, + 0x0005a970, 0x0005ad70, 0x0005b170, 0x0005b570, + 0x0005b970, 0x0005bd70, 0x0005c170, 0x0005c570, + 0x0005c970, 0x0005cd70, 0x0005d170, 0x0005d570, + 0x0005d970, 0x0005dd70, 0x0005e170, 0x0005e570, + 0x0005e970, 0x0005ed70, 0x0005f170, 0x0005f570, + 0x0005f970, 0x0005fd70, 0x00060170, 0x00060570, + 0x00060970, 0x00060d70, 0x00061170, 0x00061570, + 0x00061970, 0x00061d70, 0x00062170, 0x00062570, + 0x00062970, 0x00062d70, 0x00063170, 0x00063570, + 0x00063970, 0x00063d70, 0x00064170, 0x00064570, + 0x00064970, 0x00064d70, 0x00065170, 0x00065570, + 0x00065970, 0x00065d70, 0x00066170, 0x00066570, + 0x00066970, 0x00066d70, 0x00067170, 0x00067570, + 0x00067970, 0x00067d70, 0x00068170, 0x00068570, + 0x00068970, 0x00068d70, 0x00069170, 0x00069570, + 0x00069970, 0x00069d70, 0x0006a170, 0x0006a570, + 0x0006a970, 0x0006ad70, 0x0006b170, 0x0006b570, + 0x0006b970, 0x0006bd70, 0x0006c170, 0x0006c570, + 0x0006c970, 0x0006cd70, 0x0006d170, 0x0006d570, + 0x0006d970, 0x0006dd70, 0x0006e170, 0x0006e570, + 0x0006e970, 0x0006ed70, 0x0006f170, 0x0006f570, + 0x0006f970, 0x0006fd70, 0x00070170, 0x00070570, + 0x00070970, 0x00070d70, 0x00071170, 0x00071570, + 0x00071970, 0x00071d70, 0x00072170, 0x00072570, + 0x00072970, 0x00072d70, 0x00073170, 0x00073570, + 0x00073970, 0x00073d70, 0x00074170, 0x00074570, + 0x00074970, 0x00074d70, 0x00075170, 0x00075570, + 0x00075970, 0x00075d70, 0x00076170, 0x00076570, + 0x00076970, 0x00076d70, 0x00077170, 0x00077570, + 0x00077970, 0x00077d70, 0x00078170, 0x00078570, + 0x00078970, 0x00078d70, 0x00079170, 0x00079570, + 0x00079970, 0x00079d70, 0x0007a170, 0x0007a570, + 0x0007a970, 0x0007ad70, 0x0007b170, 0x0007b570, + 0x0007b970, 0x0007bd70, 0x0007c170, 0x0007c570, + 0x0007c970, 0x0007cd70, 0x0007d170, 0x0007d570, + 0x0007d970, 0x0007dd70, 0x0007e170, 0x0007e570, + 0x0007e970, 0x0007ed70, 0x0007f170, 0x0007f570, + 0x0007f970, 0x0007fd70, 0x00080170, 0x00080570, + 0x00080970, 0x00080d70, 0x00081170, 0x00081570, + 0x00081970, 0x00081d70, 0x00082170, 0x00082570, + 0x00082970, 0x00082d70, 0x00083170, 0x00083570, + 0x00083970, 0x00083d70, 0x00084170, 0x00084570, + 0x00084970, 0x00084d70, 0x00085170, 0x00085570, + 0x00085970, 0x00085d70, 0x00086170, 0x00086570, + 0x00086970, 0x00086d70, 0x00087170, 0x00087570, + 0x00087970, 0x00087d70, 0x00088170, 0x00088570, + 0x00088970, 0x00088d70, 0x00089170, 0x00089570, + 0x00089970, 0x00089d70, 0x0008a170, 0x0008a570, + 0x0008a970, 0x0008ad70, 0x0008b170, 0x0008b570, + 0x0008b970, 0x0008bd70, 0x0008c170, 0x0008c570, + 0x0008c970, 0x0008cd70, 0x0008d170, 0x0008d570, + 0x0008d970, 0x0008dd70, 0x0008e170, 0x0008e570, + 0x0008e970, 0x0008ed70, 0x0008f170, 0x0008f570, + 0x0008f970, 0x0008fd70, 0x00090170, 0x00090570, + 0x00090970, 0x00090d70, 0x00091170, 0x00091570, + 0x00091970, 0x00091d70, 0x00092170, 0x00092570, + 0x00092970, 0x00092d70, 0x00093170, 0x00093570, + 0x00093970, 0x00093d70, 0x00094170, 0x00094570, + 0x00094970, 0x00094d70, 0x00095170, 0x00095570, + 0x00095970, 0x00095d70, 0x00096170, 0x00096570, + 0x00096970, 0x00096d70, 0x00097170, 0x00097570, + 0x00097970, 0x00097d70, 0x00098170, 0x00098570, + 0x00098970, 0x00098d70, 0x00099170, 0x00099570, + 0x00099970, 0x00099d70, 0x0009a170, 0x0009a570, + 0x0009a970, 0x0009ad70, 0x0009b170, 0x0009b570, + 0x0009b970, 0x0009bd70, 0x0009c170, 0x0009c570, + 0x0009c970, 0x0009cd70, 0x0009d170, 0x0009d570, + 0x0009d970, 0x0009dd70, 0x0009e170, 0x0009e570, + 0x0009e970, 0x0009ed70, 0x0009f170, 0x0009f570, + 0x0009f970, 0x0009fd70, 0x000a0170, 0x000a0570, + 0x000a0970, 0x000a0d70, 0x000a1170, 0x000a1570, + 0x000a1970, 0x000a1d70, 0x000a2170, 0x000a2570, + 0x000a2970, 0x000a2d70, 0x000a3170, 0x000a3570, + 0x000a3970, 0x000a3d70, 0x000a4170, 0x000a4570, + 0x000a4970, 0x000a4d70, 0x000a5170, 0x000a5570, + 0x000a5970, 0x000a5d70, 0x000a6170, 0x000a6570, + 0x000a6970, 0x000a6d70, 0x000a7170, 0x000a7570, + 0x000a7970, 0x000a7d70, 0x000a8170, 0x000a8570, + 0x000a8970, 0x000a8d70, 0x000a9170, 0x000a9570, + 0x000a9970, 0x000a9d70, 0x000aa170, 0x000aa570, + 0x000aa970, 0x000aad70, 0x000ab170, 0x000ab570, + 0x000ab970, 0x000abd70, 0x000ac170, 0x000ac570, + 0x000ac970, 0x000acd70, 0x000ad170, 0x000ad570, + 0x000ad970, 0x000add70, 0x000ae170, 0x000ae570, + 0x000ae970, 0x000aed70, 0x000af170, 0x000af570, + 0x000af970, 0x000afd70, 0x000b0170, 0x000b0570, + 0x000b0970, 0x000b0d70, 0x000b1170, 0x000b1570, + 0x000b1970, 0x000b1d70, 0x000b2170, 0x000b2570, + 0x000b2970, 0x000b2d70, 0x000b3170, 0x000b3570, + 0x000b3970, 0x000b3d70, 0x000b4170, 0x000b4570, + 0x000b4970, 0x000b4d70, 0x000b5170, 0x000b5570, + 0x000b5970, 0x000b5d70, 0x000b6170, 0x000b6570, + 0x000b6970, 0x000b6d70, 0x000b7170, 0x000b7570, + 0x000b7970, 0x000b7d70, 0x000b8170, 0x000b8570, + 0x000b8970, 0x000b8d70, 0x000b9170, 0x000b9570, + 0x000b9970, 0x000b9d70, 0x000ba170, 0x000ba570, + 0x000ba970, 0x000bad70, 0x000bb170, 0x000bb570, + 0x000bb970, 0x000bbd70, 0x000bc170, 0x000bc570, + 0x000bc970, 0x000bcd70, 0x000bd170, 0x000bd570, + 0x000bd970, 0x000bdd70, 0x000be170, 0x000be570, + 0x000be970, 0x000bed70, 0x000bf170, 0x000bf570, + 0x000bf970, 0x000bfd70, 0x000c0170, 0x000c0570, + 0x000c0970, 0x000c0d70, 0x000c1170, 0x000c1570, + 0x000c1970, 0x000c1d70, 0x000c2170, 0x000c2570, + 0x000c2970, 0x000c2d70, 0x000c3170, 0x000c3570, + 0x000c3970, 0x000c3d70, 0x000c4170, 0x000c4570, + 0x000c4970, 0x000c4d70, 0x000c5170, 0x000c5570, + 0x000c5970, 0x000c5d70, 0x000c6170, 0x000c6570, + 0x000c6970, 0x000c6d70, 0x000c7170, 0x000c7570, + 0x000c7970, 0x000c7d70, 0x000c8170, 0x000c8570, + 0x000c8970, 0x000c8d70, 0x000c9170, 0x000c9570, + 0x000c9970, 0x000c9d70, 0x000ca170, 0x000ca570, + 0x000ca970, 0x000cad70, 0x000cb170, 0x000cb570, + 0x000cb970, 0x000cbd70, 0x000cc170, 0x000cc570, + 0x000cc970, 0x000ccd70, 0x000cd170, 0x000cd570, + 0x000cd970, 0x000cdd70, 0x000ce170, 0x000ce570, + 0x000ce970, 0x000ced70, 0x000cf170, 0x000cf570, + 0x000cf970, 0x000cfd70, 0x000d0170, 0x000d0570, + 0x000d0970, 0x000d0d70, 0x000d1170, 0x000d1570, + 0x000d1970, 0x000d1d70, 0x000d2170, 0x000d2570, + 0x000d2970, 0x000d2d70, 0x000d3170, 0x000d3570, + 0x000d3970, 0x000d3d70, 0x000d4170, 0x000d4570, + 0x000d4970, 0x000d4d70, 0x000d5170, 0x000d5570, + 0x000d5970, 0x000d5d70, 0x000d6170, 0x000d6570, + 0x000d6970, 0x000d6d70, 0x000d7170, 0x000d7570, + 0x000d7970, 0x000d7d70, 0x000d8170, 0x000d8570, + 0x000d8970, 0x000d8d70, 0x000d9170, 0x000d9570, + 0x000d9970, 0x000d9d70, 0x000da170, 0x000da570, + 0x000da970, 0x000dad70, 0x000db170, 0x000db570, + 0x000db970, 0x000dbd70, 0x000dc170, 0x000dc570, + 0x000dc970, 0x000dcd70, 0x000dd170, 0x000dd570, + 0x000dd970, 0x000ddd70, 0x000de170, 0x000de570, + 0x000de970, 0x000ded70, 0x000df170, 0x000df570, + 0x000df970, 0x000dfd70, 0x000e0170, 0x000e0570, + 0x000e0970, 0x000e0d70, 0x000e1170, 0x000e1570, + 0x000e1970, 0x000e1d70, 0x000e2170, 0x000e2570, + 0x000e2970, 0x000e2d70, 0x000e3170, 0x000e3570, + 0x000e3970, 0x000e3d70, 0x000e4170, 0x000e4570, + 0x000e4970, 0x000e4d70, 0x000e5170, 0x000e5570, + 0x000e5970, 0x000e5d70, 0x000e6170, 0x000e6570, + 0x000e6970, 0x000e6d70, 0x000e7170, 0x000e7570, + 0x000e7970, 0x000e7d70, 0x000e8170, 0x000e8570, + 0x000e8970, 0x000e8d70, 0x000e9170, 0x000e9570, + 0x000e9970, 0x000e9d70, 0x000ea170, 0x000ea570, + 0x000ea970, 0x000ead70, 0x000eb170, 0x000eb570, + 0x000eb970, 0x000ebd70, 0x000ec170, 0x000ec570, + 0x000ec970, 0x000ecd70, 0x000ed170, 0x000ed570, + 0x000ed970, 0x000edd70, 0x000ee170, 0x000ee570, + 0x000ee970, 0x000eed70, 0x000ef170, 0x000ef570, + 0x000ef970, 0x000efd70, 0x000f0170, 0x000f0570, + 0x000f0970, 0x000f0d70, 0x000f1170, 0x000f1570, + 0x000f1970, 0x000f1d70, 0x000f2170, 0x000f2570, + 0x000f2970, 0x000f2d70, 0x000f3170, 0x000f3570, + 0x000f3970, 0x000f3d70, 0x000f4170, 0x000f4570, + 0x000f4970, 0x000f4d70, 0x000f5170, 0x000f5570, + 0x000f5970, 0x000f5d70, 0x000f6170, 0x000f6570, + 0x000f6970, 0x000f6d70, 0x000f7170, 0x000f7570, + 0x000f7970, 0x000f7d70, 0x000f8170, 0x000f8570, + 0x000f8970, 0x000f8d70, 0x000f9170, 0x000f9570, + 0x000f9970, 0x000f9d70, 0x000fa170, 0x000fa570, + 0x000fa970, 0x000fad70, 0x000fb170, 0x000fb570, + 0x000fb970, 0x000fbd70, 0x000fc170, 0x000fc570, + 0x000fc970, 0x000fcd70, 0x000fd170, 0x000fd570, + 0x000fd970, 0x000fdd70, 0x000fe170, 0x000fe570, + 0x000fe970, 0x000fed70, 0x000ff170, 0x000ff570, + 0x000ff970, 0x000ffd70, 0x00100170, 0x00100570, + 0x00100970, 0x00100d70, 0x00101170, 0x00101570, + 0x00101970, 0x00101d70, 0x00102170, 0x00102570, + 0x00102970, 0x00102d70, 0x00103170, 0x00103570, + 0x00103970, 0x00103d70, 0x00104170, 0x00104570, + 0x00104970, 0x00104d70, 0x00105170, 0x00105570, + 0x00105970, 0x00105d70, 0x00106170, 0x00106570, + 0x00106970, 0x00106d70, 0x00107170, 0x00107570, + 0x00107970, 0x00107d70, 0x00108170, 0x00108570, + 0x00108970, 0x00108d70, 0x00109170, 0x00109570, + 0x00109970, 0x00109d70, 0x0010a170, 0x0010a570, + 0x0010a970, 0x0010ad70, 0x0010b170, 0x0010b570, + 0x0010b970, 0x0010bd70, 0x0010c170, 0x0010c570, + 0x0010c970, 0x0010cd70, 0x0010d170, 0x0010d570, + 0x0010d970, 0x0010dd70, 0x0010e170, 0x0010e570, + 0x0010e970, 0x0010ed70, 0x0010f170, 0x0010f570, + 0x0010f970, 0x0010fd70, 0x00110170, 0x00110570, + 0x00110970, 0x00110d70, 0x00111170, 0x00111570, + 0x00111970, 0x00111d70, 0x00112170, 0x00112570, + 0x00112970, 0x00112d70, 0x00113170, 0x00113570, + 0x00113970, 0x00113d70, 0x00114170, 0x00114570, + 0x00114970, 0x00114d70, 0x00115170, 0x00115570, + 0x00115970, 0x00115d70, 0x00116170, 0x00116570, + 0x00116970, 0x00116d70, 0x00117170, 0x00117570, + 0x00117970, 0x00117d70, 0x00118170, 0x00118570, + 0x00118970, 0x00118d70, 0x00119170, 0x00119570, + 0x00119970, 0x00119d70, 0x0011a170, 0x0011a570, + 0x0011a970, 0x0011ad70, 0x0011b170, 0x0011b570, + 0x0011b970, 0x0011bd70, 0x0011c170, 0x0011c570, + 0x0011c970, 0x0011cd70, 0x0011d170, 0x0011d570, + 0x0011d970, 0x0011dd70, 0x0011e170, 0x0011e570, + 0x0011e970, 0x0011ed70, 0x0011f170, 0x0011f570, + 0x0011f970, 0x0011fd70, 0x00120170, 0x00120570, + 0x00120970, 0x00120d70, 0x00121170, 0x00121570, + 0x00121970, 0x00121d70, 0x00122170, 0x00122570, + 0x00122970, 0x00122d70, 0x00123170, 0x00123570, + 0x00123970, 0x00123d70, 0x00124170, 0x00124570, + 0x00124970, 0x00124d70, 0x00125170, 0x00125570, + 0x00125970, 0x00125d70, 0x00126170, 0x00126570, + 0x00126970, 0x00126d70, 0x00127170, 0x00127570, + 0x00127970, 0x00127d70, 0x00128170, 0x00128570, + 0x00128970, 0x00128d70, 0x00129170, 0x00129570, + 0x00129970, 0x00129d70, 0x0012a170, 0x0012a570, + 0x0012a970, 0x0012ad70, 0x0012b170, 0x0012b570, + 0x0012b970, 0x0012bd70, 0x0012c170, 0x0012c570, + 0x0012c970, 0x0012cd70, 0x0012d170, 0x0012d570, + 0x0012d970, 0x0012dd70, 0x0012e170, 0x0012e570, + 0x0012e970, 0x0012ed70, 0x0012f170, 0x0012f570, + 0x0012f970, 0x0012fd70, 0x00130170, 0x00130570, + 0x00130970, 0x00130d70, 0x00131170, 0x00131570, + 0x00131970, 0x00131d70, 0x00132170, 0x00132570, + 0x00132970, 0x00132d70, 0x00133170, 0x00133570, + 0x00133970, 0x00133d70, 0x00134170, 0x00134570, + 0x00134970, 0x00134d70, 0x00135170, 0x00135570, + 0x00135970, 0x00135d70, 0x00136170, 0x00136570, + 0x00136970, 0x00136d70, 0x00137170, 0x00137570, + 0x00137970, 0x00137d70, 0x00138170, 0x00138570, + 0x00138970, 0x00138d70, 0x00139170, 0x00139570, + 0x00139970, 0x00139d70, 0x0013a170, 0x0013a570, + 0x0013a970, 0x0013ad70, 0x0013b170, 0x0013b570, + 0x0013b970, 0x0013bd70, 0x0013c170, 0x0013c570, + 0x0013c970, 0x0013cd70, 0x0013d170, 0x0013d570, + 0x0013d970, 0x0013dd70, 0x0013e170, 0x0013e570, + 0x0013e970, 0x0013ed70, 0x0013f170, 0x0013f570, + 0x0013f970, 0x0013fd70, 0x00140170, 0x00140570, + 0x00140970, 0x00140d70, 0x00141170, 0x00141570, + 0x00141970, 0x00141d70, 0x00142170, 0x00142570, + 0x00142970, 0x00142d70, 0x00143170, 0x00143570, + 0x00143970, 0x00143d70, 0x00144170, 0x00144570, + 0x00144970, 0x00144d70, 0x00145170, 0x00145570, + 0x00145970, 0x00145d70, 0x00146170, 0x00146570, + 0x00146970, 0x00146d70, 0x00147170, 0x00147570, + 0x00147970, 0x00147d70, 0x00148170, 0x00148570, + 0x00148970, 0x00148d70, 0x00149170, 0x00149570, + 0x00149970, 0x00149d70, 0x0014a170, 0x0014a570, + 0x0014a970, 0x0014ad70, 0x0014b170, 0x0014b570, + 0x0014b970, 0x0014bd70, 0x0014c170, 0x0014c570, + 0x0014c970, 0x0014cd70, 0x0014d170, 0x0014d570, + 0x0014d970, 0x0014dd70, 0x0014e170, 0x0014e570, + 0x0014e970, 0x0014ed70, 0x0014f170, 0x0014f570, + 0x0014f970, 0x0014fd70, 0x00150170, 0x00150570, + 0x00150970, 0x00150d70, 0x00151170, 0x00151570, + 0x00151970, 0x00151d70, 0x00152170, 0x00152570, + 0x00152970, 0x00152d70, 0x00153170, 0x00153570, + 0x00153970, 0x00153d70, 0x00154170, 0x00154570, + 0x00154970, 0x00154d70, 0x00155170, 0x00155570, + 0x00155970, 0x00155d70, 0x00156170, 0x00156570, + 0x00156970, 0x00156d70, 0x00157170, 0x00157570, + 0x00157970, 0x00157d70, 0x00158170, 0x00158570, + 0x00158970, 0x00158d70, 0x00159170, 0x00159570, + 0x00159970, 0x00159d70, 0x0015a170, 0x0015a570, + 0x0015a970, 0x0015ad70, 0x0015b170, 0x0015b570, + 0x0015b970, 0x0015bd70, 0x0015c170, 0x0015c570, + 0x0015c970, 0x0015cd70, 0x0015d170, 0x0015d570, + 0x0015d970, 0x0015dd70, 0x0015e170, 0x0015e570, + 0x0015e970, 0x0015ed70, 0x0015f170, 0x0015f570, + 0x0015f970, 0x0015fd70, 0x00160170, 0x00160570, + 0x00160970, 0x00160d70, 0x00161170, 0x00161570, + 0x00161970, 0x00161d70, 0x00162170, 0x00162570, + 0x00162970, 0x00162d70, 0x00163170, 0x00163570, + 0x00163970, 0x00163d70, 0x00164170, 0x00164570, + 0x00164970, 0x00164d70, 0x00165170, 0x00165570, + 0x00165970, 0x00165d70, 0x00166170, 0x00166570, + 0x00166970, 0x00166d70, 0x00167170, 0x00167570, + 0x00167970, 0x00167d70, 0x00168170, 0x00168570, + 0x00168970, 0x00168d70, 0x00169170, 0x00169570, + 0x00169970, 0x00169d70, 0x0016a170, 0x0016a570, + 0x0016a970, 0x0016ad70, 0x0016b170, 0x0016b570, + 0x0016b970, 0x0016bd70, 0x0016c170, 0x0016c570, + 0x0016c970, 0x0016cd70, 0x0016d170, 0x0016d570, + 0x0016d970, 0x0016dd70, 0x0016e170, 0x0016e570, + 0x0016e970, 0x0016ed70, 0x0016f170, 0x0016f570, + 0x0016f970, 0x0016fd70, 0x00170170, 0x00170570, + 0x00170970, 0x00170d70, 0x00171170, 0x00171570, + 0x00171970, 0x00171d70, 0x00172170, 0x00172570, + 0x00172970, 0x00172d70, 0x00173170, 0x00173570, + 0x00173970, 0x00173d70, 0x00174170, 0x00174570, + 0x00174970, 0x00174d70, 0x00175170, 0x00175570, + 0x00175970, 0x00175d70, 0x00176170, 0x00176570, + 0x00176970, 0x00176d70, 0x00177170, 0x00177570, + 0x00177970, 0x00177d70, 0x00178170, 0x00178570, + 0x00178970, 0x00178d70, 0x00179170, 0x00179570, + 0x00179970, 0x00179d70, 0x0017a170, 0x0017a570, + 0x0017a970, 0x0017ad70, 0x0017b170, 0x0017b570, + 0x0017b970, 0x0017bd70, 0x0017c170, 0x0017c570, + 0x0017c970, 0x0017cd70, 0x0017d170, 0x0017d570, + 0x0017d970, 0x0017dd70, 0x0017e170, 0x0017e570, + 0x0017e970, 0x0017ed70, 0x0017f170, 0x0017f570, + 0x0017f970, 0x0017fd70, 0x00180170, 0x00180570, + 0x00180970, 0x00180d70, 0x00181170, 0x00181570, + 0x00181970, 0x00181d70, 0x00182170, 0x00182570, + 0x00182970, 0x00182d70, 0x00183170, 0x00183570, + 0x00183970, 0x00183d70, 0x00184170, 0x00184570, + 0x00184970, 0x00184d70, 0x00185170, 0x00185570, + 0x00185970, 0x00185d70, 0x00186170, 0x00186570, + 0x00186970, 0x00186d70, 0x00187170, 0x00187570, + 0x00187970, 0x00187d70, 0x00188170, 0x00188570, + 0x00188970, 0x00188d70, 0x00189170, 0x00189570, + 0x00189970, 0x00189d70, 0x0018a170, 0x0018a570, + 0x0018a970, 0x0018ad70, 0x0018b170, 0x0018b570, + 0x0018b970, 0x0018bd70, 0x0018c170, 0x0018c570, + 0x0018c970, 0x0018cd70, 0x0018d170, 0x0018d570, + 0x0018d970, 0x0018dd70, 0x0018e170, 0x0018e570, + 0x0018e970, 0x0018ed70, 0x0018f170, 0x0018f570, + 0x0018f970, 0x0018fd70, 0x00190170, 0x00190570, + 0x00190970, 0x00190d70, 0x00191170, 0x00191570, + 0x00191970, 0x00191d70, 0x00192170, 0x00192570, + 0x00192970, 0x00192d70, 0x00193170, 0x00193570, + 0x00193970, 0x00193d70, 0x00194170, 0x00194570, + 0x00194970, 0x00194d70, 0x00195170, 0x00195570, + 0x00195970, 0x00195d70, 0x00196170, 0x00196570, + 0x00196970, 0x00196d70, 0x00197170, 0x00197570, + 0x00197970, 0x00197d70, 0x00198170, 0x00198570, + 0x00198970, 0x00198d70, 0x00199170, 0x00199570, + 0x00199970, 0x00199d70, 0x0019a170, 0x0019a570, + 0x0019a970, 0x0019ad70, 0x0019b170, 0x0019b570, + 0x0019b970, 0x0019bd70, 0x0019c170, 0x0019c570, + 0x0019c970, 0x0019cd70, 0x0019d170, 0x0019d570, + 0x0019d970, 0x0019dd70, 0x0019e170, 0x0019e570, + 0x0019e970, 0x0019ed70, 0x0019f170, 0x0019f570, + 0x0019f970, 0x0019fd70, 0x001a0170, 0x001a0570, + 0x001a0970, 0x001a0d70, 0x001a1170, 0x001a1570, + 0x001a1970, 0x001a1d70, 0x001a2170, 0x001a2570, + 0x001a2970, 0x001a2d70, 0x001a3170, 0x001a3570, + 0x001a3970, 0x001a3d70, 0x001a4170, 0x001a4570, + 0x001a4970, 0x001a4d70, 0x001a5170, 0x001a5570, + 0x001a5970, 0x001a5d70, 0x001a6170, 0x001a6570, + 0x001a6970, 0x001a6d70, 0x001a7170, 0x001a7570, + 0x001a7970, 0x001a7d70, 0x001a8170, 0x001a8570, + 0x001a8970, 0x001a8d70, 0x001a9170, 0x001a9570, + 0x001a9970, 0x001a9d70, 0x001aa170, 0x001aa570, + 0x001aa970, 0x001aad70, 0x001ab170, 0x001ab570, + 0x001ab970, 0x001abd70, 0x001ac170, 0x001ac570, + 0x001ac970, 0x001acd70, 0x001ad170, 0x001ad570, + 0x001ad970, 0x001add70, 0x001ae170, 0x001ae570, + 0x001ae970, 0x001aed70, 0x001af170, 0x001af570, + 0x001af970, 0x001afd70, 0x001b0170, 0x001b0570, + 0x001b0970, 0x001b0d70, 0x001b1170, 0x001b1570, + 0x001b1970, 0x001b1d70, 0x001b2170, 0x001b2570, + 0x001b2970, 0x001b2d70, 0x001b3170, 0x001b3570, + 0x001b3970, 0x001b3d70, 0x001b4170, 0x001b4570, + 0x001b4970, 0x001b4d70, 0x001b5170, 0x001b5570, + 0x001b5970, 0x001b5d70, 0x001b6170, 0x001b6570, + 0x001b6970, 0x001b6d70, 0x001b7170, 0x001b7570, + 0x001b7970, 0x001b7d70, 0x001b8170, 0x001b8570, + 0x001b8970, 0x001b8d70, 0x001b9170, 0x001b9570, + 0x001b9970, 0x001b9d70, 0x001ba170, 0x001ba570, + 0x001ba970, 0x001bad70, 0x001bb170, 0x001bb570, + 0x001bb970, 0x001bbd70, 0x001bc170, 0x001bc570, + 0x001bc970, 0x001bcd70, 0x001bd170, 0x001bd570, + 0x001bd970, 0x001bdd70, 0x001be170, 0x001be570, + 0x001be970, 0x001bed70, 0x001bf170, 0x001bf570, + 0x001bf970, 0x001bfd70, 0x001c0170, 0x001c0570, + 0x001c0970, 0x001c0d70, 0x001c1170, 0x001c1570, + 0x001c1970, 0x001c1d70, 0x001c2170, 0x001c2570, + 0x001c2970, 0x001c2d70, 0x001c3170, 0x001c3570, + 0x001c3970, 0x001c3d70, 0x001c4170, 0x001c4570, + 0x001c4970, 0x001c4d70, 0x001c5170, 0x001c5570, + 0x001c5970, 0x001c5d70, 0x001c6170, 0x001c6570, + 0x001c6970, 0x001c6d70, 0x001c7170, 0x001c7570, + 0x001c7970, 0x001c7d70, 0x001c8170, 0x001c8570, + 0x001c8970, 0x001c8d70, 0x001c9170, 0x001c9570, + 0x001c9970, 0x001c9d70, 0x001ca170, 0x001ca570, + 0x001ca970, 0x001cad70, 0x001cb170, 0x001cb570, + 0x001cb970, 0x001cbd70, 0x001cc170, 0x001cc570, + 0x001cc970, 0x001ccd70, 0x001cd170, 0x001cd570, + 0x001cd970, 0x001cdd70, 0x001ce170, 0x001ce570, + 0x001ce970, 0x001ced70, 0x001cf170, 0x001cf570, + 0x001cf970, 0x001cfd70, 0x001d0170, 0x001d0570, + 0x001d0970, 0x001d0d70, 0x001d1170, 0x001d1570, + 0x001d1970, 0x001d1d70, 0x001d2170, 0x001d2570, + 0x001d2970, 0x001d2d70, 0x001d3170, 0x001d3570, + 0x001d3970, 0x001d3d70, 0x001d4170, 0x001d4570, + 0x001d4970, 0x001d4d70, 0x001d5170, 0x001d5570, + 0x001d5970, 0x001d5d70, 0x001d6170, 0x001d6570, + 0x001d6970, 0x001d6d70, 0x001d7170, 0x001d7570, + 0x001d7970, 0x001d7d70, 0x001d8170, 0x001d8570, + 0x001d8970, 0x001d8d70, 0x001d9170, 0x001d9570, + 0x001d9970, 0x001d9d70, 0x001da170, 0x001da570, + 0x001da970, 0x001dad70, 0x001db170, 0x001db570, + 0x001db970, 0x001dbd70, 0x001dc170, 0x001dc570, + 0x001dc970, 0x001dcd70, 0x001dd170, 0x001dd570, + 0x001dd970, 0x001ddd70, 0x001de170, 0x001de570, + 0x001de970, 0x001ded70, 0x001df170, 0x001df570, + 0x001df970, 0x001dfd70, 0x001e0170, 0x001e0570, + 0x001e0970, 0x001e0d70, 0x001e1170, 0x001e1570, + 0x001e1970, 0x001e1d70, 0x001e2170, 0x001e2570, + 0x001e2970, 0x001e2d70, 0x001e3170, 0x001e3570, + 0x001e3970, 0x001e3d70, 0x001e4170, 0x001e4570, + 0x001e4970, 0x001e4d70, 0x001e5170, 0x001e5570, + 0x001e5970, 0x001e5d70, 0x001e6170, 0x001e6570, + 0x001e6970, 0x001e6d70, 0x001e7170, 0x001e7570, + 0x001e7970, 0x001e7d70, 0x001e8170, 0x001e8570, + 0x001e8970, 0x001e8d70, 0x001e9170, 0x001e9570, + 0x001e9970, 0x001e9d70, 0x001ea170, 0x001ea570, + 0x001ea970, 0x001ead70, 0x001eb170, 0x001eb570, + 0x001eb970, 0x001ebd70, 0x001ec170, 0x001ec570, + 0x001ec970, 0x001ecd70, 0x001ed170, 0x001ed570, + 0x001ed970, 0x001edd70, 0x001ee170, 0x001ee570, + 0x001ee970, 0x001eed70, 0x001ef170, 0x001ef570, + 0x001ef970, 0x001efd70, 0x001f0170, 0x001f0570, + 0x001f0970, 0x001f0d70, 0x001f1170, 0x001f1570, + 0x001f1970, 0x001f1d70, 0x001f2170, 0x001f2570, + 0x001f2970, 0x001f2d70, 0x001f3170, 0x001f3570, + 0x001f3970, 0x001f3d70, 0x001f4170, 0x001f4570, + 0x001f4970, 0x001f4d70, 0x001f5170, 0x001f5570, + 0x001f5970, 0x001f5d70, 0x001f6170, 0x001f6570, + 0x001f6970, 0x001f6d70, 0x001f7170, 0x001f7570, + 0x001f7970, 0x001f7d70, 0x001f8170, 0x001f8570, + 0x001f8970, 0x001f8d70, 0x001f9170, 0x001f9570, + 0x001f9970, 0x001f9d70, 0x001fa170, 0x001fa570, + 0x001fa970, 0x001fad70, 0x001fb170, 0x001fb570, + 0x001fb970, 0x001fbd70, 0x001fc170, 0x001fc570, + 0x001fc970, 0x001fcd70, 0x001fd170, 0x001fd570, + 0x001fd970, 0x001fdd70, 0x001fe170, 0x001fe570, + 0x001fe970, 0x001fed70, 0x001ff170, 0x001ff570, + 0x001ff970, 0x001ffd70 +#endif /* LONGER_HUFFTABLE */ + }, + + .len_table = { + 0x000bffef, 0x00000002, 0x00000044, 0x00000144, + 0x000002c5, 0x00000526, 0x00000ea7, 0x000001a7, + 0x000001c6, 0x000005c6, 0x00001869, 0x00003869, + 0x00000469, 0x00002469, 0x00001469, 0x00003469, + 0x00000c6a, 0x00002c6a, 0x00004c6a, 0x00006c6a, + 0x000030eb, 0x000070eb, 0x0000b0eb, 0x0000f0eb, + 0x000041ec, 0x0000c1ec, 0x000141ec, 0x0001c1ec, + 0x000021ec, 0x0000a1ec, 0x000121ec, 0x0001a1ec, + 0x000061ed, 0x0000e1ed, 0x000161ed, 0x0001e1ed, + 0x000261ed, 0x0002e1ed, 0x000361ed, 0x0003e1ed, + 0x000011ed, 0x000091ed, 0x000111ed, 0x000191ed, + 0x000211ed, 0x000291ed, 0x000311ed, 0x000391ed, + 0x000051ed, 0x0000d1ed, 0x000151ed, 0x0001d1ed, + 0x000251ed, 0x0002d1ed, 0x000351ed, 0x0003d1ed, + 0x000031ed, 0x0000b1ed, 0x000131ed, 0x0001b1ed, + 0x000231ed, 0x0002b1ed, 0x000331ed, 0x0003b1ed, + 0x00003fef, 0x00013fef, 0x00023fef, 0x00033fef, + 0x00043fef, 0x00053fef, 0x00063fef, 0x00073fef, + 0x00083fef, 0x00093fef, 0x000a3fef, 0x000b3fef, + 0x000c3fef, 0x000d3fef, 0x000e3fef, 0x000f3fef, + 0x00007ff0, 0x00027ff0, 0x00047ff0, 0x00067ff0, + 0x00087ff0, 0x000a7ff0, 0x000c7ff0, 0x000e7ff0, + 0x00107ff0, 0x00127ff0, 0x00147ff0, 0x00167ff0, + 0x00187ff0, 0x001a7ff0, 0x001c7ff0, 0x001e7ff0, + 0x0000fff1, 0x0004fff1, 0x0008fff1, 0x000cfff1, + 0x0010fff1, 0x0014fff1, 0x0018fff1, 0x001cfff1, + 0x0020fff1, 0x0024fff1, 0x0028fff1, 0x002cfff1, + 0x0030fff1, 0x0034fff1, 0x0038fff1, 0x003cfff1, + 0x0002fff1, 0x0006fff1, 0x000afff1, 0x000efff1, + 0x0012fff1, 0x0016fff1, 0x001afff1, 0x001efff1, + 0x0022fff1, 0x0026fff1, 0x002afff1, 0x002efff1, + 0x0032fff1, 0x0036fff1, 0x003afff1, 0x003efff1, + 0x00017ff1, 0x00037ff1, 0x00057ff1, 0x00077ff1, + 0x00097ff1, 0x000b7ff1, 0x000d7ff1, 0x000f7ff1, + 0x00117ff1, 0x00137ff1, 0x00157ff1, 0x00177ff1, + 0x00197ff1, 0x001b7ff1, 0x001d7ff1, 0x001f7ff1, + 0x00217ff1, 0x00237ff1, 0x00257ff1, 0x00277ff1, + 0x00297ff1, 0x002b7ff1, 0x002d7ff1, 0x002f7ff1, + 0x00317ff1, 0x00337ff1, 0x00357ff1, 0x00377ff1, + 0x00397ff1, 0x003b7ff1, 0x003d7ff1, 0x003f7ff1, + 0x0001fff2, 0x0005fff2, 0x0009fff2, 0x000dfff2, + 0x0011fff2, 0x0015fff2, 0x0019fff2, 0x001dfff2, + 0x0021fff2, 0x0025fff2, 0x0029fff2, 0x002dfff2, + 0x0031fff2, 0x0035fff2, 0x0039fff2, 0x003dfff2, + 0x0041fff2, 0x0045fff2, 0x0049fff2, 0x004dfff2, + 0x0051fff2, 0x0055fff2, 0x0059fff2, 0x005dfff2, + 0x0061fff2, 0x0065fff2, 0x0069fff2, 0x006dfff2, + 0x0071fff2, 0x0075fff2, 0x0079fff2, 0x007dfff2, + 0x0007fff4, 0x0017fff4, 0x0027fff4, 0x0037fff4, + 0x0047fff4, 0x0057fff4, 0x0067fff4, 0x0077fff4, + 0x0087fff4, 0x0097fff4, 0x00a7fff4, 0x00b7fff4, + 0x00c7fff4, 0x00d7fff4, 0x00e7fff4, 0x00f7fff4, + 0x0107fff4, 0x0117fff4, 0x0127fff4, 0x0137fff4, + 0x0147fff4, 0x0157fff4, 0x0167fff4, 0x0177fff4, + 0x0187fff4, 0x0197fff4, 0x01a7fff4, 0x01b7fff4, + 0x01c7fff4, 0x01d7fff4, 0x01e7fff4, 0x01f7fff4, + 0x000ffff4, 0x001ffff4, 0x002ffff4, 0x003ffff4, + 0x004ffff4, 0x005ffff4, 0x006ffff4, 0x007ffff4, + 0x008ffff4, 0x009ffff4, 0x00affff4, 0x00bffff4, + 0x00cffff4, 0x00dffff4, 0x00effff4, 0x00fffff4, + 0x010ffff4, 0x011ffff4, 0x012ffff4, 0x013ffff4, + 0x014ffff4, 0x015ffff4, 0x016ffff4, 0x017ffff4, + 0x018ffff4, 0x019ffff4, 0x01affff4, 0x01bffff4, + 0x01cffff4, 0x01dffff4, 0x01effff4, 0x0000bfeb}, + + .lit_table = { + 0x001e, 0x004d, 0x00e3, 0x00cd, 0x002d, 0x01e3, 0x0013, 0x0113, + 0x0093, 0x0193, 0x0019, 0x0053, 0x0153, 0x00ad, 0x00d3, 0x01d3, + 0x0033, 0x0047, 0x0247, 0x0147, 0x0347, 0x038f, 0x078f, 0x004f, + 0x00c7, 0x044f, 0x024f, 0x064f, 0x02c7, 0x014f, 0x01c7, 0x0133, + 0x0006, 0x03c7, 0x00b3, 0x0027, 0x0227, 0x0127, 0x0327, 0x01b3, + 0x0073, 0x0173, 0x00a7, 0x02a7, 0x0059, 0x006d, 0x00ed, 0x01a7, + 0x001d, 0x009d, 0x005d, 0x00f3, 0x01f3, 0x000b, 0x010b, 0x008b, + 0x018b, 0x004b, 0x014b, 0x00cb, 0x03a7, 0x0067, 0x01cb, 0x002b, + 0x012b, 0x00dd, 0x003d, 0x00ab, 0x01ab, 0x006b, 0x016b, 0x00eb, + 0x01eb, 0x001b, 0x0267, 0x0167, 0x011b, 0x009b, 0x019b, 0x005b, + 0x015b, 0x0367, 0x00db, 0x01db, 0x003b, 0x00e7, 0x02e7, 0x01e7, + 0x03e7, 0x0017, 0x054f, 0x0217, 0x0117, 0x034f, 0x074f, 0x0317, + 0x0097, 0x003e, 0x00bd, 0x0039, 0x0079, 0x0001, 0x007d, 0x00fd, + 0x0005, 0x0021, 0x0297, 0x013b, 0x0045, 0x0025, 0x0065, 0x0011, + 0x0015, 0x0197, 0x0031, 0x0009, 0x0055, 0x0035, 0x00bb, 0x0003, + 0x01bb, 0x0083, 0x0397, 0x00cf, 0x0057, 0x04cf, 0x0257, 0x0157, + 0x007b, 0x02cf, 0x06cf, 0x01cf, 0x05cf, 0x03cf, 0x07cf, 0x002f, + 0x042f, 0x022f, 0x062f, 0x0357, 0x012f, 0x052f, 0x032f, 0x00d7, + 0x02d7, 0x072f, 0x00af, 0x04af, 0x02af, 0x06af, 0x01af, 0x05af, + 0x03af, 0x07af, 0x006f, 0x046f, 0x026f, 0x066f, 0x016f, 0x056f, + 0x01d7, 0x036f, 0x076f, 0x00ef, 0x03d7, 0x04ef, 0x0037, 0x02ef, + 0x06ef, 0x01ef, 0x05ef, 0x03ef, 0x07ef, 0x001f, 0x041f, 0x021f, + 0x0237, 0x061f, 0x011f, 0x051f, 0x0137, 0x031f, 0x071f, 0x009f, + 0x049f, 0x029f, 0x069f, 0x019f, 0x059f, 0x0337, 0x039f, 0x079f, + 0x017b, 0x00b7, 0x00fb, 0x01fb, 0x005f, 0x045f, 0x025f, 0x02b7, + 0x065f, 0x015f, 0x055f, 0x035f, 0x075f, 0x00df, 0x04df, 0x01b7, + 0x03b7, 0x02df, 0x06df, 0x01df, 0x05df, 0x03df, 0x07df, 0x003f, + 0x043f, 0x023f, 0x063f, 0x013f, 0x053f, 0x033f, 0x073f, 0x00bf, + 0x0007, 0x04bf, 0x02bf, 0x0077, 0x06bf, 0x01bf, 0x05bf, 0x0277, + 0x0177, 0x03bf, 0x07bf, 0x007f, 0x047f, 0x027f, 0x067f, 0x017f, + 0x0107, 0x0377, 0x057f, 0x00f7, 0x037f, 0x077f, 0x00ff, 0x04ff, + 0x02f7, 0x01f7, 0x02ff, 0x06ff, 0x03f7, 0x000f, 0x0087, 0x0043, + 0x1fff}, + + .lit_table_sizes = { + 0x06, 0x08, 0x09, 0x08, 0x08, 0x09, 0x09, 0x09, + 0x09, 0x09, 0x07, 0x09, 0x09, 0x08, 0x09, 0x09, + 0x09, 0x0a, 0x0a, 0x0a, 0x0a, 0x0b, 0x0b, 0x0b, + 0x0a, 0x0b, 0x0b, 0x0b, 0x0a, 0x0b, 0x0a, 0x09, + 0x05, 0x0a, 0x09, 0x0a, 0x0a, 0x0a, 0x0a, 0x09, + 0x09, 0x09, 0x0a, 0x0a, 0x07, 0x08, 0x08, 0x0a, + 0x08, 0x08, 0x08, 0x09, 0x09, 0x09, 0x09, 0x09, + 0x09, 0x09, 0x09, 0x09, 0x0a, 0x0a, 0x09, 0x09, + 0x09, 0x08, 0x08, 0x09, 0x09, 0x09, 0x09, 0x09, + 0x09, 0x09, 0x0a, 0x0a, 0x09, 0x09, 0x09, 0x09, + 0x09, 0x0a, 0x09, 0x09, 0x09, 0x0a, 0x0a, 0x0a, + 0x0a, 0x0a, 0x0b, 0x0a, 0x0a, 0x0b, 0x0b, 0x0a, + 0x0a, 0x06, 0x08, 0x07, 0x07, 0x06, 0x08, 0x08, + 0x07, 0x06, 0x0a, 0x09, 0x07, 0x07, 0x07, 0x06, + 0x07, 0x0a, 0x06, 0x06, 0x07, 0x07, 0x09, 0x08, + 0x09, 0x08, 0x0a, 0x0b, 0x0a, 0x0b, 0x0a, 0x0a, + 0x09, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, + 0x0b, 0x0b, 0x0b, 0x0a, 0x0b, 0x0b, 0x0b, 0x0a, + 0x0a, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, + 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, + 0x0a, 0x0b, 0x0b, 0x0b, 0x0a, 0x0b, 0x0a, 0x0b, + 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, + 0x0a, 0x0b, 0x0b, 0x0b, 0x0a, 0x0b, 0x0b, 0x0b, + 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0a, 0x0b, 0x0b, + 0x09, 0x0a, 0x09, 0x09, 0x0b, 0x0b, 0x0b, 0x0a, + 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0a, + 0x0a, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, + 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, + 0x09, 0x0b, 0x0b, 0x0a, 0x0b, 0x0b, 0x0b, 0x0a, + 0x0a, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, + 0x09, 0x0a, 0x0b, 0x0a, 0x0b, 0x0b, 0x0b, 0x0b, + 0x0a, 0x0a, 0x0b, 0x0b, 0x0a, 0x0a, 0x09, 0x08, + 0x0f}, + +#ifndef LONGER_HUFFTABLE + .dcodes = { + 0x003f, 0x00ff, 0x00bf, 0x01ff, 0x007f, 0x001f, 0x005f, 0x0017, + 0x0037, 0x000f, 0x0009, 0x0019, 0x0005, 0x0015, 0x0004, 0x000c, + 0x0002, 0x000d, 0x000a, 0x001d, 0x0006, 0x0003, 0x0000, 0x0013, + 0x000e, 0x000b, 0x0001, 0x001b, 0x0007, 0x002f}, + + .dcodes_sizes = { + 0x08, 0x09, 0x08, 0x09, 0x08, 0x07, 0x07, 0x06, + 0x06, 0x06, 0x05, 0x05, 0x05, 0x05, 0x04, 0x04, + 0x04, 0x05, 0x04, 0x05, 0x04, 0x05, 0x03, 0x05, + 0x04, 0x05, 0x04, 0x05, 0x05, 0x06} +#else + .dcodes = { + 0x0001, 0x001b, 0x0007, 0x002f}, + + .dcodes_sizes = { + 0x04, 0x05, 0x05, 0x06} +#endif +}; +#endif // LARGE_WINDOW + +struct isal_hufftables hufftables_static = { + + .deflate_hdr = {0x03}, + .deflate_hdr_count = 0, + .deflate_hdr_extra_bits = 3, + + .dist_table = { + 0x00000005, 0x00000205, +#ifdef LONGER_HUFFTABLE + 0x00000105, 0x00000305, 0x00000086, 0x00000486, + 0x00000286, 0x00000686, 0x00000187, 0x00000587, + 0x00000987, 0x00000d87, 0x00000387, 0x00000787, + 0x00000b87, 0x00000f87, 0x00000048, 0x00000448, + 0x00000848, 0x00000c48, 0x00001048, 0x00001448, + 0x00001848, 0x00001c48, 0x00000248, 0x00000648, + 0x00000a48, 0x00000e48, 0x00001248, 0x00001648, + 0x00001a48, 0x00001e48, 0x00000149, 0x00000549, + 0x00000949, 0x00000d49, 0x00001149, 0x00001549, + 0x00001949, 0x00001d49, 0x00002149, 0x00002549, + 0x00002949, 0x00002d49, 0x00003149, 0x00003549, + 0x00003949, 0x00003d49, 0x00000349, 0x00000749, + 0x00000b49, 0x00000f49, 0x00001349, 0x00001749, + 0x00001b49, 0x00001f49, 0x00002349, 0x00002749, + 0x00002b49, 0x00002f49, 0x00003349, 0x00003749, + 0x00003b49, 0x00003f49, 0x000000ca, 0x000004ca, + 0x000008ca, 0x00000cca, 0x000010ca, 0x000014ca, + 0x000018ca, 0x00001cca, 0x000020ca, 0x000024ca, + 0x000028ca, 0x00002cca, 0x000030ca, 0x000034ca, + 0x000038ca, 0x00003cca, 0x000040ca, 0x000044ca, + 0x000048ca, 0x00004cca, 0x000050ca, 0x000054ca, + 0x000058ca, 0x00005cca, 0x000060ca, 0x000064ca, + 0x000068ca, 0x00006cca, 0x000070ca, 0x000074ca, + 0x000078ca, 0x00007cca, 0x000002ca, 0x000006ca, + 0x00000aca, 0x00000eca, 0x000012ca, 0x000016ca, + 0x00001aca, 0x00001eca, 0x000022ca, 0x000026ca, + 0x00002aca, 0x00002eca, 0x000032ca, 0x000036ca, + 0x00003aca, 0x00003eca, 0x000042ca, 0x000046ca, + 0x00004aca, 0x00004eca, 0x000052ca, 0x000056ca, + 0x00005aca, 0x00005eca, 0x000062ca, 0x000066ca, + 0x00006aca, 0x00006eca, 0x000072ca, 0x000076ca, + 0x00007aca, 0x00007eca, 0x000001cb, 0x000005cb, + 0x000009cb, 0x00000dcb, 0x000011cb, 0x000015cb, + 0x000019cb, 0x00001dcb, 0x000021cb, 0x000025cb, + 0x000029cb, 0x00002dcb, 0x000031cb, 0x000035cb, + 0x000039cb, 0x00003dcb, 0x000041cb, 0x000045cb, + 0x000049cb, 0x00004dcb, 0x000051cb, 0x000055cb, + 0x000059cb, 0x00005dcb, 0x000061cb, 0x000065cb, + 0x000069cb, 0x00006dcb, 0x000071cb, 0x000075cb, + 0x000079cb, 0x00007dcb, 0x000081cb, 0x000085cb, + 0x000089cb, 0x00008dcb, 0x000091cb, 0x000095cb, + 0x000099cb, 0x00009dcb, 0x0000a1cb, 0x0000a5cb, + 0x0000a9cb, 0x0000adcb, 0x0000b1cb, 0x0000b5cb, + 0x0000b9cb, 0x0000bdcb, 0x0000c1cb, 0x0000c5cb, + 0x0000c9cb, 0x0000cdcb, 0x0000d1cb, 0x0000d5cb, + 0x0000d9cb, 0x0000ddcb, 0x0000e1cb, 0x0000e5cb, + 0x0000e9cb, 0x0000edcb, 0x0000f1cb, 0x0000f5cb, + 0x0000f9cb, 0x0000fdcb, 0x000003cb, 0x000007cb, + 0x00000bcb, 0x00000fcb, 0x000013cb, 0x000017cb, + 0x00001bcb, 0x00001fcb, 0x000023cb, 0x000027cb, + 0x00002bcb, 0x00002fcb, 0x000033cb, 0x000037cb, + 0x00003bcb, 0x00003fcb, 0x000043cb, 0x000047cb, + 0x00004bcb, 0x00004fcb, 0x000053cb, 0x000057cb, + 0x00005bcb, 0x00005fcb, 0x000063cb, 0x000067cb, + 0x00006bcb, 0x00006fcb, 0x000073cb, 0x000077cb, + 0x00007bcb, 0x00007fcb, 0x000083cb, 0x000087cb, + 0x00008bcb, 0x00008fcb, 0x000093cb, 0x000097cb, + 0x00009bcb, 0x00009fcb, 0x0000a3cb, 0x0000a7cb, + 0x0000abcb, 0x0000afcb, 0x0000b3cb, 0x0000b7cb, + 0x0000bbcb, 0x0000bfcb, 0x0000c3cb, 0x0000c7cb, + 0x0000cbcb, 0x0000cfcb, 0x0000d3cb, 0x0000d7cb, + 0x0000dbcb, 0x0000dfcb, 0x0000e3cb, 0x0000e7cb, + 0x0000ebcb, 0x0000efcb, 0x0000f3cb, 0x0000f7cb, + 0x0000fbcb, 0x0000ffcb, 0x0000002c, 0x0000042c, + 0x0000082c, 0x00000c2c, 0x0000102c, 0x0000142c, + 0x0000182c, 0x00001c2c, 0x0000202c, 0x0000242c, + 0x0000282c, 0x00002c2c, 0x0000302c, 0x0000342c, + 0x0000382c, 0x00003c2c, 0x0000402c, 0x0000442c, + 0x0000482c, 0x00004c2c, 0x0000502c, 0x0000542c, + 0x0000582c, 0x00005c2c, 0x0000602c, 0x0000642c, + 0x0000682c, 0x00006c2c, 0x0000702c, 0x0000742c, + 0x0000782c, 0x00007c2c, 0x0000802c, 0x0000842c, + 0x0000882c, 0x00008c2c, 0x0000902c, 0x0000942c, + 0x0000982c, 0x00009c2c, 0x0000a02c, 0x0000a42c, + 0x0000a82c, 0x0000ac2c, 0x0000b02c, 0x0000b42c, + 0x0000b82c, 0x0000bc2c, 0x0000c02c, 0x0000c42c, + 0x0000c82c, 0x0000cc2c, 0x0000d02c, 0x0000d42c, + 0x0000d82c, 0x0000dc2c, 0x0000e02c, 0x0000e42c, + 0x0000e82c, 0x0000ec2c, 0x0000f02c, 0x0000f42c, + 0x0000f82c, 0x0000fc2c, 0x0001002c, 0x0001042c, + 0x0001082c, 0x00010c2c, 0x0001102c, 0x0001142c, + 0x0001182c, 0x00011c2c, 0x0001202c, 0x0001242c, + 0x0001282c, 0x00012c2c, 0x0001302c, 0x0001342c, + 0x0001382c, 0x00013c2c, 0x0001402c, 0x0001442c, + 0x0001482c, 0x00014c2c, 0x0001502c, 0x0001542c, + 0x0001582c, 0x00015c2c, 0x0001602c, 0x0001642c, + 0x0001682c, 0x00016c2c, 0x0001702c, 0x0001742c, + 0x0001782c, 0x00017c2c, 0x0001802c, 0x0001842c, + 0x0001882c, 0x00018c2c, 0x0001902c, 0x0001942c, + 0x0001982c, 0x00019c2c, 0x0001a02c, 0x0001a42c, + 0x0001a82c, 0x0001ac2c, 0x0001b02c, 0x0001b42c, + 0x0001b82c, 0x0001bc2c, 0x0001c02c, 0x0001c42c, + 0x0001c82c, 0x0001cc2c, 0x0001d02c, 0x0001d42c, + 0x0001d82c, 0x0001dc2c, 0x0001e02c, 0x0001e42c, + 0x0001e82c, 0x0001ec2c, 0x0001f02c, 0x0001f42c, + 0x0001f82c, 0x0001fc2c, 0x0000022c, 0x0000062c, + 0x00000a2c, 0x00000e2c, 0x0000122c, 0x0000162c, + 0x00001a2c, 0x00001e2c, 0x0000222c, 0x0000262c, + 0x00002a2c, 0x00002e2c, 0x0000322c, 0x0000362c, + 0x00003a2c, 0x00003e2c, 0x0000422c, 0x0000462c, + 0x00004a2c, 0x00004e2c, 0x0000522c, 0x0000562c, + 0x00005a2c, 0x00005e2c, 0x0000622c, 0x0000662c, + 0x00006a2c, 0x00006e2c, 0x0000722c, 0x0000762c, + 0x00007a2c, 0x00007e2c, 0x0000822c, 0x0000862c, + 0x00008a2c, 0x00008e2c, 0x0000922c, 0x0000962c, + 0x00009a2c, 0x00009e2c, 0x0000a22c, 0x0000a62c, + 0x0000aa2c, 0x0000ae2c, 0x0000b22c, 0x0000b62c, + 0x0000ba2c, 0x0000be2c, 0x0000c22c, 0x0000c62c, + 0x0000ca2c, 0x0000ce2c, 0x0000d22c, 0x0000d62c, + 0x0000da2c, 0x0000de2c, 0x0000e22c, 0x0000e62c, + 0x0000ea2c, 0x0000ee2c, 0x0000f22c, 0x0000f62c, + 0x0000fa2c, 0x0000fe2c, 0x0001022c, 0x0001062c, + 0x00010a2c, 0x00010e2c, 0x0001122c, 0x0001162c, + 0x00011a2c, 0x00011e2c, 0x0001222c, 0x0001262c, + 0x00012a2c, 0x00012e2c, 0x0001322c, 0x0001362c, + 0x00013a2c, 0x00013e2c, 0x0001422c, 0x0001462c, + 0x00014a2c, 0x00014e2c, 0x0001522c, 0x0001562c, + 0x00015a2c, 0x00015e2c, 0x0001622c, 0x0001662c, + 0x00016a2c, 0x00016e2c, 0x0001722c, 0x0001762c, + 0x00017a2c, 0x00017e2c, 0x0001822c, 0x0001862c, + 0x00018a2c, 0x00018e2c, 0x0001922c, 0x0001962c, + 0x00019a2c, 0x00019e2c, 0x0001a22c, 0x0001a62c, + 0x0001aa2c, 0x0001ae2c, 0x0001b22c, 0x0001b62c, + 0x0001ba2c, 0x0001be2c, 0x0001c22c, 0x0001c62c, + 0x0001ca2c, 0x0001ce2c, 0x0001d22c, 0x0001d62c, + 0x0001da2c, 0x0001de2c, 0x0001e22c, 0x0001e62c, + 0x0001ea2c, 0x0001ee2c, 0x0001f22c, 0x0001f62c, + 0x0001fa2c, 0x0001fe2c, 0x0000012d, 0x0000052d, + 0x0000092d, 0x00000d2d, 0x0000112d, 0x0000152d, + 0x0000192d, 0x00001d2d, 0x0000212d, 0x0000252d, + 0x0000292d, 0x00002d2d, 0x0000312d, 0x0000352d, + 0x0000392d, 0x00003d2d, 0x0000412d, 0x0000452d, + 0x0000492d, 0x00004d2d, 0x0000512d, 0x0000552d, + 0x0000592d, 0x00005d2d, 0x0000612d, 0x0000652d, + 0x0000692d, 0x00006d2d, 0x0000712d, 0x0000752d, + 0x0000792d, 0x00007d2d, 0x0000812d, 0x0000852d, + 0x0000892d, 0x00008d2d, 0x0000912d, 0x0000952d, + 0x0000992d, 0x00009d2d, 0x0000a12d, 0x0000a52d, + 0x0000a92d, 0x0000ad2d, 0x0000b12d, 0x0000b52d, + 0x0000b92d, 0x0000bd2d, 0x0000c12d, 0x0000c52d, + 0x0000c92d, 0x0000cd2d, 0x0000d12d, 0x0000d52d, + 0x0000d92d, 0x0000dd2d, 0x0000e12d, 0x0000e52d, + 0x0000e92d, 0x0000ed2d, 0x0000f12d, 0x0000f52d, + 0x0000f92d, 0x0000fd2d, 0x0001012d, 0x0001052d, + 0x0001092d, 0x00010d2d, 0x0001112d, 0x0001152d, + 0x0001192d, 0x00011d2d, 0x0001212d, 0x0001252d, + 0x0001292d, 0x00012d2d, 0x0001312d, 0x0001352d, + 0x0001392d, 0x00013d2d, 0x0001412d, 0x0001452d, + 0x0001492d, 0x00014d2d, 0x0001512d, 0x0001552d, + 0x0001592d, 0x00015d2d, 0x0001612d, 0x0001652d, + 0x0001692d, 0x00016d2d, 0x0001712d, 0x0001752d, + 0x0001792d, 0x00017d2d, 0x0001812d, 0x0001852d, + 0x0001892d, 0x00018d2d, 0x0001912d, 0x0001952d, + 0x0001992d, 0x00019d2d, 0x0001a12d, 0x0001a52d, + 0x0001a92d, 0x0001ad2d, 0x0001b12d, 0x0001b52d, + 0x0001b92d, 0x0001bd2d, 0x0001c12d, 0x0001c52d, + 0x0001c92d, 0x0001cd2d, 0x0001d12d, 0x0001d52d, + 0x0001d92d, 0x0001dd2d, 0x0001e12d, 0x0001e52d, + 0x0001e92d, 0x0001ed2d, 0x0001f12d, 0x0001f52d, + 0x0001f92d, 0x0001fd2d, 0x0002012d, 0x0002052d, + 0x0002092d, 0x00020d2d, 0x0002112d, 0x0002152d, + 0x0002192d, 0x00021d2d, 0x0002212d, 0x0002252d, + 0x0002292d, 0x00022d2d, 0x0002312d, 0x0002352d, + 0x0002392d, 0x00023d2d, 0x0002412d, 0x0002452d, + 0x0002492d, 0x00024d2d, 0x0002512d, 0x0002552d, + 0x0002592d, 0x00025d2d, 0x0002612d, 0x0002652d, + 0x0002692d, 0x00026d2d, 0x0002712d, 0x0002752d, + 0x0002792d, 0x00027d2d, 0x0002812d, 0x0002852d, + 0x0002892d, 0x00028d2d, 0x0002912d, 0x0002952d, + 0x0002992d, 0x00029d2d, 0x0002a12d, 0x0002a52d, + 0x0002a92d, 0x0002ad2d, 0x0002b12d, 0x0002b52d, + 0x0002b92d, 0x0002bd2d, 0x0002c12d, 0x0002c52d, + 0x0002c92d, 0x0002cd2d, 0x0002d12d, 0x0002d52d, + 0x0002d92d, 0x0002dd2d, 0x0002e12d, 0x0002e52d, + 0x0002e92d, 0x0002ed2d, 0x0002f12d, 0x0002f52d, + 0x0002f92d, 0x0002fd2d, 0x0003012d, 0x0003052d, + 0x0003092d, 0x00030d2d, 0x0003112d, 0x0003152d, + 0x0003192d, 0x00031d2d, 0x0003212d, 0x0003252d, + 0x0003292d, 0x00032d2d, 0x0003312d, 0x0003352d, + 0x0003392d, 0x00033d2d, 0x0003412d, 0x0003452d, + 0x0003492d, 0x00034d2d, 0x0003512d, 0x0003552d, + 0x0003592d, 0x00035d2d, 0x0003612d, 0x0003652d, + 0x0003692d, 0x00036d2d, 0x0003712d, 0x0003752d, + 0x0003792d, 0x00037d2d, 0x0003812d, 0x0003852d, + 0x0003892d, 0x00038d2d, 0x0003912d, 0x0003952d, + 0x0003992d, 0x00039d2d, 0x0003a12d, 0x0003a52d, + 0x0003a92d, 0x0003ad2d, 0x0003b12d, 0x0003b52d, + 0x0003b92d, 0x0003bd2d, 0x0003c12d, 0x0003c52d, + 0x0003c92d, 0x0003cd2d, 0x0003d12d, 0x0003d52d, + 0x0003d92d, 0x0003dd2d, 0x0003e12d, 0x0003e52d, + 0x0003e92d, 0x0003ed2d, 0x0003f12d, 0x0003f52d, + 0x0003f92d, 0x0003fd2d, 0x0000032d, 0x0000072d, + 0x00000b2d, 0x00000f2d, 0x0000132d, 0x0000172d, + 0x00001b2d, 0x00001f2d, 0x0000232d, 0x0000272d, + 0x00002b2d, 0x00002f2d, 0x0000332d, 0x0000372d, + 0x00003b2d, 0x00003f2d, 0x0000432d, 0x0000472d, + 0x00004b2d, 0x00004f2d, 0x0000532d, 0x0000572d, + 0x00005b2d, 0x00005f2d, 0x0000632d, 0x0000672d, + 0x00006b2d, 0x00006f2d, 0x0000732d, 0x0000772d, + 0x00007b2d, 0x00007f2d, 0x0000832d, 0x0000872d, + 0x00008b2d, 0x00008f2d, 0x0000932d, 0x0000972d, + 0x00009b2d, 0x00009f2d, 0x0000a32d, 0x0000a72d, + 0x0000ab2d, 0x0000af2d, 0x0000b32d, 0x0000b72d, + 0x0000bb2d, 0x0000bf2d, 0x0000c32d, 0x0000c72d, + 0x0000cb2d, 0x0000cf2d, 0x0000d32d, 0x0000d72d, + 0x0000db2d, 0x0000df2d, 0x0000e32d, 0x0000e72d, + 0x0000eb2d, 0x0000ef2d, 0x0000f32d, 0x0000f72d, + 0x0000fb2d, 0x0000ff2d, 0x0001032d, 0x0001072d, + 0x00010b2d, 0x00010f2d, 0x0001132d, 0x0001172d, + 0x00011b2d, 0x00011f2d, 0x0001232d, 0x0001272d, + 0x00012b2d, 0x00012f2d, 0x0001332d, 0x0001372d, + 0x00013b2d, 0x00013f2d, 0x0001432d, 0x0001472d, + 0x00014b2d, 0x00014f2d, 0x0001532d, 0x0001572d, + 0x00015b2d, 0x00015f2d, 0x0001632d, 0x0001672d, + 0x00016b2d, 0x00016f2d, 0x0001732d, 0x0001772d, + 0x00017b2d, 0x00017f2d, 0x0001832d, 0x0001872d, + 0x00018b2d, 0x00018f2d, 0x0001932d, 0x0001972d, + 0x00019b2d, 0x00019f2d, 0x0001a32d, 0x0001a72d, + 0x0001ab2d, 0x0001af2d, 0x0001b32d, 0x0001b72d, + 0x0001bb2d, 0x0001bf2d, 0x0001c32d, 0x0001c72d, + 0x0001cb2d, 0x0001cf2d, 0x0001d32d, 0x0001d72d, + 0x0001db2d, 0x0001df2d, 0x0001e32d, 0x0001e72d, + 0x0001eb2d, 0x0001ef2d, 0x0001f32d, 0x0001f72d, + 0x0001fb2d, 0x0001ff2d, 0x0002032d, 0x0002072d, + 0x00020b2d, 0x00020f2d, 0x0002132d, 0x0002172d, + 0x00021b2d, 0x00021f2d, 0x0002232d, 0x0002272d, + 0x00022b2d, 0x00022f2d, 0x0002332d, 0x0002372d, + 0x00023b2d, 0x00023f2d, 0x0002432d, 0x0002472d, + 0x00024b2d, 0x00024f2d, 0x0002532d, 0x0002572d, + 0x00025b2d, 0x00025f2d, 0x0002632d, 0x0002672d, + 0x00026b2d, 0x00026f2d, 0x0002732d, 0x0002772d, + 0x00027b2d, 0x00027f2d, 0x0002832d, 0x0002872d, + 0x00028b2d, 0x00028f2d, 0x0002932d, 0x0002972d, + 0x00029b2d, 0x00029f2d, 0x0002a32d, 0x0002a72d, + 0x0002ab2d, 0x0002af2d, 0x0002b32d, 0x0002b72d, + 0x0002bb2d, 0x0002bf2d, 0x0002c32d, 0x0002c72d, + 0x0002cb2d, 0x0002cf2d, 0x0002d32d, 0x0002d72d, + 0x0002db2d, 0x0002df2d, 0x0002e32d, 0x0002e72d, + 0x0002eb2d, 0x0002ef2d, 0x0002f32d, 0x0002f72d, + 0x0002fb2d, 0x0002ff2d, 0x0003032d, 0x0003072d, + 0x00030b2d, 0x00030f2d, 0x0003132d, 0x0003172d, + 0x00031b2d, 0x00031f2d, 0x0003232d, 0x0003272d, + 0x00032b2d, 0x00032f2d, 0x0003332d, 0x0003372d, + 0x00033b2d, 0x00033f2d, 0x0003432d, 0x0003472d, + 0x00034b2d, 0x00034f2d, 0x0003532d, 0x0003572d, + 0x00035b2d, 0x00035f2d, 0x0003632d, 0x0003672d, + 0x00036b2d, 0x00036f2d, 0x0003732d, 0x0003772d, + 0x00037b2d, 0x00037f2d, 0x0003832d, 0x0003872d, + 0x00038b2d, 0x00038f2d, 0x0003932d, 0x0003972d, + 0x00039b2d, 0x00039f2d, 0x0003a32d, 0x0003a72d, + 0x0003ab2d, 0x0003af2d, 0x0003b32d, 0x0003b72d, + 0x0003bb2d, 0x0003bf2d, 0x0003c32d, 0x0003c72d, + 0x0003cb2d, 0x0003cf2d, 0x0003d32d, 0x0003d72d, + 0x0003db2d, 0x0003df2d, 0x0003e32d, 0x0003e72d, + 0x0003eb2d, 0x0003ef2d, 0x0003f32d, 0x0003f72d, + 0x0003fb2d, 0x0003ff2d, 0x000000ae, 0x000004ae, + 0x000008ae, 0x00000cae, 0x000010ae, 0x000014ae, + 0x000018ae, 0x00001cae, 0x000020ae, 0x000024ae, + 0x000028ae, 0x00002cae, 0x000030ae, 0x000034ae, + 0x000038ae, 0x00003cae, 0x000040ae, 0x000044ae, + 0x000048ae, 0x00004cae, 0x000050ae, 0x000054ae, + 0x000058ae, 0x00005cae, 0x000060ae, 0x000064ae, + 0x000068ae, 0x00006cae, 0x000070ae, 0x000074ae, + 0x000078ae, 0x00007cae, 0x000080ae, 0x000084ae, + 0x000088ae, 0x00008cae, 0x000090ae, 0x000094ae, + 0x000098ae, 0x00009cae, 0x0000a0ae, 0x0000a4ae, + 0x0000a8ae, 0x0000acae, 0x0000b0ae, 0x0000b4ae, + 0x0000b8ae, 0x0000bcae, 0x0000c0ae, 0x0000c4ae, + 0x0000c8ae, 0x0000ccae, 0x0000d0ae, 0x0000d4ae, + 0x0000d8ae, 0x0000dcae, 0x0000e0ae, 0x0000e4ae, + 0x0000e8ae, 0x0000ecae, 0x0000f0ae, 0x0000f4ae, + 0x0000f8ae, 0x0000fcae, 0x000100ae, 0x000104ae, + 0x000108ae, 0x00010cae, 0x000110ae, 0x000114ae, + 0x000118ae, 0x00011cae, 0x000120ae, 0x000124ae, + 0x000128ae, 0x00012cae, 0x000130ae, 0x000134ae, + 0x000138ae, 0x00013cae, 0x000140ae, 0x000144ae, + 0x000148ae, 0x00014cae, 0x000150ae, 0x000154ae, + 0x000158ae, 0x00015cae, 0x000160ae, 0x000164ae, + 0x000168ae, 0x00016cae, 0x000170ae, 0x000174ae, + 0x000178ae, 0x00017cae, 0x000180ae, 0x000184ae, + 0x000188ae, 0x00018cae, 0x000190ae, 0x000194ae, + 0x000198ae, 0x00019cae, 0x0001a0ae, 0x0001a4ae, + 0x0001a8ae, 0x0001acae, 0x0001b0ae, 0x0001b4ae, + 0x0001b8ae, 0x0001bcae, 0x0001c0ae, 0x0001c4ae, + 0x0001c8ae, 0x0001ccae, 0x0001d0ae, 0x0001d4ae, + 0x0001d8ae, 0x0001dcae, 0x0001e0ae, 0x0001e4ae, + 0x0001e8ae, 0x0001ecae, 0x0001f0ae, 0x0001f4ae, + 0x0001f8ae, 0x0001fcae, 0x000200ae, 0x000204ae, + 0x000208ae, 0x00020cae, 0x000210ae, 0x000214ae, + 0x000218ae, 0x00021cae, 0x000220ae, 0x000224ae, + 0x000228ae, 0x00022cae, 0x000230ae, 0x000234ae, + 0x000238ae, 0x00023cae, 0x000240ae, 0x000244ae, + 0x000248ae, 0x00024cae, 0x000250ae, 0x000254ae, + 0x000258ae, 0x00025cae, 0x000260ae, 0x000264ae, + 0x000268ae, 0x00026cae, 0x000270ae, 0x000274ae, + 0x000278ae, 0x00027cae, 0x000280ae, 0x000284ae, + 0x000288ae, 0x00028cae, 0x000290ae, 0x000294ae, + 0x000298ae, 0x00029cae, 0x0002a0ae, 0x0002a4ae, + 0x0002a8ae, 0x0002acae, 0x0002b0ae, 0x0002b4ae, + 0x0002b8ae, 0x0002bcae, 0x0002c0ae, 0x0002c4ae, + 0x0002c8ae, 0x0002ccae, 0x0002d0ae, 0x0002d4ae, + 0x0002d8ae, 0x0002dcae, 0x0002e0ae, 0x0002e4ae, + 0x0002e8ae, 0x0002ecae, 0x0002f0ae, 0x0002f4ae, + 0x0002f8ae, 0x0002fcae, 0x000300ae, 0x000304ae, + 0x000308ae, 0x00030cae, 0x000310ae, 0x000314ae, + 0x000318ae, 0x00031cae, 0x000320ae, 0x000324ae, + 0x000328ae, 0x00032cae, 0x000330ae, 0x000334ae, + 0x000338ae, 0x00033cae, 0x000340ae, 0x000344ae, + 0x000348ae, 0x00034cae, 0x000350ae, 0x000354ae, + 0x000358ae, 0x00035cae, 0x000360ae, 0x000364ae, + 0x000368ae, 0x00036cae, 0x000370ae, 0x000374ae, + 0x000378ae, 0x00037cae, 0x000380ae, 0x000384ae, + 0x000388ae, 0x00038cae, 0x000390ae, 0x000394ae, + 0x000398ae, 0x00039cae, 0x0003a0ae, 0x0003a4ae, + 0x0003a8ae, 0x0003acae, 0x0003b0ae, 0x0003b4ae, + 0x0003b8ae, 0x0003bcae, 0x0003c0ae, 0x0003c4ae, + 0x0003c8ae, 0x0003ccae, 0x0003d0ae, 0x0003d4ae, + 0x0003d8ae, 0x0003dcae, 0x0003e0ae, 0x0003e4ae, + 0x0003e8ae, 0x0003ecae, 0x0003f0ae, 0x0003f4ae, + 0x0003f8ae, 0x0003fcae, 0x000400ae, 0x000404ae, + 0x000408ae, 0x00040cae, 0x000410ae, 0x000414ae, + 0x000418ae, 0x00041cae, 0x000420ae, 0x000424ae, + 0x000428ae, 0x00042cae, 0x000430ae, 0x000434ae, + 0x000438ae, 0x00043cae, 0x000440ae, 0x000444ae, + 0x000448ae, 0x00044cae, 0x000450ae, 0x000454ae, + 0x000458ae, 0x00045cae, 0x000460ae, 0x000464ae, + 0x000468ae, 0x00046cae, 0x000470ae, 0x000474ae, + 0x000478ae, 0x00047cae, 0x000480ae, 0x000484ae, + 0x000488ae, 0x00048cae, 0x000490ae, 0x000494ae, + 0x000498ae, 0x00049cae, 0x0004a0ae, 0x0004a4ae, + 0x0004a8ae, 0x0004acae, 0x0004b0ae, 0x0004b4ae, + 0x0004b8ae, 0x0004bcae, 0x0004c0ae, 0x0004c4ae, + 0x0004c8ae, 0x0004ccae, 0x0004d0ae, 0x0004d4ae, + 0x0004d8ae, 0x0004dcae, 0x0004e0ae, 0x0004e4ae, + 0x0004e8ae, 0x0004ecae, 0x0004f0ae, 0x0004f4ae, + 0x0004f8ae, 0x0004fcae, 0x000500ae, 0x000504ae, + 0x000508ae, 0x00050cae, 0x000510ae, 0x000514ae, + 0x000518ae, 0x00051cae, 0x000520ae, 0x000524ae, + 0x000528ae, 0x00052cae, 0x000530ae, 0x000534ae, + 0x000538ae, 0x00053cae, 0x000540ae, 0x000544ae, + 0x000548ae, 0x00054cae, 0x000550ae, 0x000554ae, + 0x000558ae, 0x00055cae, 0x000560ae, 0x000564ae, + 0x000568ae, 0x00056cae, 0x000570ae, 0x000574ae, + 0x000578ae, 0x00057cae, 0x000580ae, 0x000584ae, + 0x000588ae, 0x00058cae, 0x000590ae, 0x000594ae, + 0x000598ae, 0x00059cae, 0x0005a0ae, 0x0005a4ae, + 0x0005a8ae, 0x0005acae, 0x0005b0ae, 0x0005b4ae, + 0x0005b8ae, 0x0005bcae, 0x0005c0ae, 0x0005c4ae, + 0x0005c8ae, 0x0005ccae, 0x0005d0ae, 0x0005d4ae, + 0x0005d8ae, 0x0005dcae, 0x0005e0ae, 0x0005e4ae, + 0x0005e8ae, 0x0005ecae, 0x0005f0ae, 0x0005f4ae, + 0x0005f8ae, 0x0005fcae, 0x000600ae, 0x000604ae, + 0x000608ae, 0x00060cae, 0x000610ae, 0x000614ae, + 0x000618ae, 0x00061cae, 0x000620ae, 0x000624ae, + 0x000628ae, 0x00062cae, 0x000630ae, 0x000634ae, + 0x000638ae, 0x00063cae, 0x000640ae, 0x000644ae, + 0x000648ae, 0x00064cae, 0x000650ae, 0x000654ae, + 0x000658ae, 0x00065cae, 0x000660ae, 0x000664ae, + 0x000668ae, 0x00066cae, 0x000670ae, 0x000674ae, + 0x000678ae, 0x00067cae, 0x000680ae, 0x000684ae, + 0x000688ae, 0x00068cae, 0x000690ae, 0x000694ae, + 0x000698ae, 0x00069cae, 0x0006a0ae, 0x0006a4ae, + 0x0006a8ae, 0x0006acae, 0x0006b0ae, 0x0006b4ae, + 0x0006b8ae, 0x0006bcae, 0x0006c0ae, 0x0006c4ae, + 0x0006c8ae, 0x0006ccae, 0x0006d0ae, 0x0006d4ae, + 0x0006d8ae, 0x0006dcae, 0x0006e0ae, 0x0006e4ae, + 0x0006e8ae, 0x0006ecae, 0x0006f0ae, 0x0006f4ae, + 0x0006f8ae, 0x0006fcae, 0x000700ae, 0x000704ae, + 0x000708ae, 0x00070cae, 0x000710ae, 0x000714ae, + 0x000718ae, 0x00071cae, 0x000720ae, 0x000724ae, + 0x000728ae, 0x00072cae, 0x000730ae, 0x000734ae, + 0x000738ae, 0x00073cae, 0x000740ae, 0x000744ae, + 0x000748ae, 0x00074cae, 0x000750ae, 0x000754ae, + 0x000758ae, 0x00075cae, 0x000760ae, 0x000764ae, + 0x000768ae, 0x00076cae, 0x000770ae, 0x000774ae, + 0x000778ae, 0x00077cae, 0x000780ae, 0x000784ae, + 0x000788ae, 0x00078cae, 0x000790ae, 0x000794ae, + 0x000798ae, 0x00079cae, 0x0007a0ae, 0x0007a4ae, + 0x0007a8ae, 0x0007acae, 0x0007b0ae, 0x0007b4ae, + 0x0007b8ae, 0x0007bcae, 0x0007c0ae, 0x0007c4ae, + 0x0007c8ae, 0x0007ccae, 0x0007d0ae, 0x0007d4ae, + 0x0007d8ae, 0x0007dcae, 0x0007e0ae, 0x0007e4ae, + 0x0007e8ae, 0x0007ecae, 0x0007f0ae, 0x0007f4ae, + 0x0007f8ae, 0x0007fcae, 0x000002ae, 0x000006ae, + 0x00000aae, 0x00000eae, 0x000012ae, 0x000016ae, + 0x00001aae, 0x00001eae, 0x000022ae, 0x000026ae, + 0x00002aae, 0x00002eae, 0x000032ae, 0x000036ae, + 0x00003aae, 0x00003eae, 0x000042ae, 0x000046ae, + 0x00004aae, 0x00004eae, 0x000052ae, 0x000056ae, + 0x00005aae, 0x00005eae, 0x000062ae, 0x000066ae, + 0x00006aae, 0x00006eae, 0x000072ae, 0x000076ae, + 0x00007aae, 0x00007eae, 0x000082ae, 0x000086ae, + 0x00008aae, 0x00008eae, 0x000092ae, 0x000096ae, + 0x00009aae, 0x00009eae, 0x0000a2ae, 0x0000a6ae, + 0x0000aaae, 0x0000aeae, 0x0000b2ae, 0x0000b6ae, + 0x0000baae, 0x0000beae, 0x0000c2ae, 0x0000c6ae, + 0x0000caae, 0x0000ceae, 0x0000d2ae, 0x0000d6ae, + 0x0000daae, 0x0000deae, 0x0000e2ae, 0x0000e6ae, + 0x0000eaae, 0x0000eeae, 0x0000f2ae, 0x0000f6ae, + 0x0000faae, 0x0000feae, 0x000102ae, 0x000106ae, + 0x00010aae, 0x00010eae, 0x000112ae, 0x000116ae, + 0x00011aae, 0x00011eae, 0x000122ae, 0x000126ae, + 0x00012aae, 0x00012eae, 0x000132ae, 0x000136ae, + 0x00013aae, 0x00013eae, 0x000142ae, 0x000146ae, + 0x00014aae, 0x00014eae, 0x000152ae, 0x000156ae, + 0x00015aae, 0x00015eae, 0x000162ae, 0x000166ae, + 0x00016aae, 0x00016eae, 0x000172ae, 0x000176ae, + 0x00017aae, 0x00017eae, 0x000182ae, 0x000186ae, + 0x00018aae, 0x00018eae, 0x000192ae, 0x000196ae, + 0x00019aae, 0x00019eae, 0x0001a2ae, 0x0001a6ae, + 0x0001aaae, 0x0001aeae, 0x0001b2ae, 0x0001b6ae, + 0x0001baae, 0x0001beae, 0x0001c2ae, 0x0001c6ae, + 0x0001caae, 0x0001ceae, 0x0001d2ae, 0x0001d6ae, + 0x0001daae, 0x0001deae, 0x0001e2ae, 0x0001e6ae, + 0x0001eaae, 0x0001eeae, 0x0001f2ae, 0x0001f6ae, + 0x0001faae, 0x0001feae, 0x000202ae, 0x000206ae, + 0x00020aae, 0x00020eae, 0x000212ae, 0x000216ae, + 0x00021aae, 0x00021eae, 0x000222ae, 0x000226ae, + 0x00022aae, 0x00022eae, 0x000232ae, 0x000236ae, + 0x00023aae, 0x00023eae, 0x000242ae, 0x000246ae, + 0x00024aae, 0x00024eae, 0x000252ae, 0x000256ae, + 0x00025aae, 0x00025eae, 0x000262ae, 0x000266ae, + 0x00026aae, 0x00026eae, 0x000272ae, 0x000276ae, + 0x00027aae, 0x00027eae, 0x000282ae, 0x000286ae, + 0x00028aae, 0x00028eae, 0x000292ae, 0x000296ae, + 0x00029aae, 0x00029eae, 0x0002a2ae, 0x0002a6ae, + 0x0002aaae, 0x0002aeae, 0x0002b2ae, 0x0002b6ae, + 0x0002baae, 0x0002beae, 0x0002c2ae, 0x0002c6ae, + 0x0002caae, 0x0002ceae, 0x0002d2ae, 0x0002d6ae, + 0x0002daae, 0x0002deae, 0x0002e2ae, 0x0002e6ae, + 0x0002eaae, 0x0002eeae, 0x0002f2ae, 0x0002f6ae, + 0x0002faae, 0x0002feae, 0x000302ae, 0x000306ae, + 0x00030aae, 0x00030eae, 0x000312ae, 0x000316ae, + 0x00031aae, 0x00031eae, 0x000322ae, 0x000326ae, + 0x00032aae, 0x00032eae, 0x000332ae, 0x000336ae, + 0x00033aae, 0x00033eae, 0x000342ae, 0x000346ae, + 0x00034aae, 0x00034eae, 0x000352ae, 0x000356ae, + 0x00035aae, 0x00035eae, 0x000362ae, 0x000366ae, + 0x00036aae, 0x00036eae, 0x000372ae, 0x000376ae, + 0x00037aae, 0x00037eae, 0x000382ae, 0x000386ae, + 0x00038aae, 0x00038eae, 0x000392ae, 0x000396ae, + 0x00039aae, 0x00039eae, 0x0003a2ae, 0x0003a6ae, + 0x0003aaae, 0x0003aeae, 0x0003b2ae, 0x0003b6ae, + 0x0003baae, 0x0003beae, 0x0003c2ae, 0x0003c6ae, + 0x0003caae, 0x0003ceae, 0x0003d2ae, 0x0003d6ae, + 0x0003daae, 0x0003deae, 0x0003e2ae, 0x0003e6ae, + 0x0003eaae, 0x0003eeae, 0x0003f2ae, 0x0003f6ae, + 0x0003faae, 0x0003feae, 0x000402ae, 0x000406ae, + 0x00040aae, 0x00040eae, 0x000412ae, 0x000416ae, + 0x00041aae, 0x00041eae, 0x000422ae, 0x000426ae, + 0x00042aae, 0x00042eae, 0x000432ae, 0x000436ae, + 0x00043aae, 0x00043eae, 0x000442ae, 0x000446ae, + 0x00044aae, 0x00044eae, 0x000452ae, 0x000456ae, + 0x00045aae, 0x00045eae, 0x000462ae, 0x000466ae, + 0x00046aae, 0x00046eae, 0x000472ae, 0x000476ae, + 0x00047aae, 0x00047eae, 0x000482ae, 0x000486ae, + 0x00048aae, 0x00048eae, 0x000492ae, 0x000496ae, + 0x00049aae, 0x00049eae, 0x0004a2ae, 0x0004a6ae, + 0x0004aaae, 0x0004aeae, 0x0004b2ae, 0x0004b6ae, + 0x0004baae, 0x0004beae, 0x0004c2ae, 0x0004c6ae, + 0x0004caae, 0x0004ceae, 0x0004d2ae, 0x0004d6ae, + 0x0004daae, 0x0004deae, 0x0004e2ae, 0x0004e6ae, + 0x0004eaae, 0x0004eeae, 0x0004f2ae, 0x0004f6ae, + 0x0004faae, 0x0004feae, 0x000502ae, 0x000506ae, + 0x00050aae, 0x00050eae, 0x000512ae, 0x000516ae, + 0x00051aae, 0x00051eae, 0x000522ae, 0x000526ae, + 0x00052aae, 0x00052eae, 0x000532ae, 0x000536ae, + 0x00053aae, 0x00053eae, 0x000542ae, 0x000546ae, + 0x00054aae, 0x00054eae, 0x000552ae, 0x000556ae, + 0x00055aae, 0x00055eae, 0x000562ae, 0x000566ae, + 0x00056aae, 0x00056eae, 0x000572ae, 0x000576ae, + 0x00057aae, 0x00057eae, 0x000582ae, 0x000586ae, + 0x00058aae, 0x00058eae, 0x000592ae, 0x000596ae, + 0x00059aae, 0x00059eae, 0x0005a2ae, 0x0005a6ae, + 0x0005aaae, 0x0005aeae, 0x0005b2ae, 0x0005b6ae, + 0x0005baae, 0x0005beae, 0x0005c2ae, 0x0005c6ae, + 0x0005caae, 0x0005ceae, 0x0005d2ae, 0x0005d6ae, + 0x0005daae, 0x0005deae, 0x0005e2ae, 0x0005e6ae, + 0x0005eaae, 0x0005eeae, 0x0005f2ae, 0x0005f6ae, + 0x0005faae, 0x0005feae, 0x000602ae, 0x000606ae, + 0x00060aae, 0x00060eae, 0x000612ae, 0x000616ae, + 0x00061aae, 0x00061eae, 0x000622ae, 0x000626ae, + 0x00062aae, 0x00062eae, 0x000632ae, 0x000636ae, + 0x00063aae, 0x00063eae, 0x000642ae, 0x000646ae, + 0x00064aae, 0x00064eae, 0x000652ae, 0x000656ae, + 0x00065aae, 0x00065eae, 0x000662ae, 0x000666ae, + 0x00066aae, 0x00066eae, 0x000672ae, 0x000676ae, + 0x00067aae, 0x00067eae, 0x000682ae, 0x000686ae, + 0x00068aae, 0x00068eae, 0x000692ae, 0x000696ae, + 0x00069aae, 0x00069eae, 0x0006a2ae, 0x0006a6ae, + 0x0006aaae, 0x0006aeae, 0x0006b2ae, 0x0006b6ae, + 0x0006baae, 0x0006beae, 0x0006c2ae, 0x0006c6ae, + 0x0006caae, 0x0006ceae, 0x0006d2ae, 0x0006d6ae, + 0x0006daae, 0x0006deae, 0x0006e2ae, 0x0006e6ae, + 0x0006eaae, 0x0006eeae, 0x0006f2ae, 0x0006f6ae, + 0x0006faae, 0x0006feae, 0x000702ae, 0x000706ae, + 0x00070aae, 0x00070eae, 0x000712ae, 0x000716ae, + 0x00071aae, 0x00071eae, 0x000722ae, 0x000726ae, + 0x00072aae, 0x00072eae, 0x000732ae, 0x000736ae, + 0x00073aae, 0x00073eae, 0x000742ae, 0x000746ae, + 0x00074aae, 0x00074eae, 0x000752ae, 0x000756ae, + 0x00075aae, 0x00075eae, 0x000762ae, 0x000766ae, + 0x00076aae, 0x00076eae, 0x000772ae, 0x000776ae, + 0x00077aae, 0x00077eae, 0x000782ae, 0x000786ae, + 0x00078aae, 0x00078eae, 0x000792ae, 0x000796ae, + 0x00079aae, 0x00079eae, 0x0007a2ae, 0x0007a6ae, + 0x0007aaae, 0x0007aeae, 0x0007b2ae, 0x0007b6ae, + 0x0007baae, 0x0007beae, 0x0007c2ae, 0x0007c6ae, + 0x0007caae, 0x0007ceae, 0x0007d2ae, 0x0007d6ae, + 0x0007daae, 0x0007deae, 0x0007e2ae, 0x0007e6ae, + 0x0007eaae, 0x0007eeae, 0x0007f2ae, 0x0007f6ae, + 0x0007faae, 0x0007feae, 0x000001af, 0x000005af, + 0x000009af, 0x00000daf, 0x000011af, 0x000015af, + 0x000019af, 0x00001daf, 0x000021af, 0x000025af, + 0x000029af, 0x00002daf, 0x000031af, 0x000035af, + 0x000039af, 0x00003daf, 0x000041af, 0x000045af, + 0x000049af, 0x00004daf, 0x000051af, 0x000055af, + 0x000059af, 0x00005daf, 0x000061af, 0x000065af, + 0x000069af, 0x00006daf, 0x000071af, 0x000075af, + 0x000079af, 0x00007daf, 0x000081af, 0x000085af, + 0x000089af, 0x00008daf, 0x000091af, 0x000095af, + 0x000099af, 0x00009daf, 0x0000a1af, 0x0000a5af, + 0x0000a9af, 0x0000adaf, 0x0000b1af, 0x0000b5af, + 0x0000b9af, 0x0000bdaf, 0x0000c1af, 0x0000c5af, + 0x0000c9af, 0x0000cdaf, 0x0000d1af, 0x0000d5af, + 0x0000d9af, 0x0000ddaf, 0x0000e1af, 0x0000e5af, + 0x0000e9af, 0x0000edaf, 0x0000f1af, 0x0000f5af, + 0x0000f9af, 0x0000fdaf, 0x000101af, 0x000105af, + 0x000109af, 0x00010daf, 0x000111af, 0x000115af, + 0x000119af, 0x00011daf, 0x000121af, 0x000125af, + 0x000129af, 0x00012daf, 0x000131af, 0x000135af, + 0x000139af, 0x00013daf, 0x000141af, 0x000145af, + 0x000149af, 0x00014daf, 0x000151af, 0x000155af, + 0x000159af, 0x00015daf, 0x000161af, 0x000165af, + 0x000169af, 0x00016daf, 0x000171af, 0x000175af, + 0x000179af, 0x00017daf, 0x000181af, 0x000185af, + 0x000189af, 0x00018daf, 0x000191af, 0x000195af, + 0x000199af, 0x00019daf, 0x0001a1af, 0x0001a5af, + 0x0001a9af, 0x0001adaf, 0x0001b1af, 0x0001b5af, + 0x0001b9af, 0x0001bdaf, 0x0001c1af, 0x0001c5af, + 0x0001c9af, 0x0001cdaf, 0x0001d1af, 0x0001d5af, + 0x0001d9af, 0x0001ddaf, 0x0001e1af, 0x0001e5af, + 0x0001e9af, 0x0001edaf, 0x0001f1af, 0x0001f5af, + 0x0001f9af, 0x0001fdaf, 0x000201af, 0x000205af, + 0x000209af, 0x00020daf, 0x000211af, 0x000215af, + 0x000219af, 0x00021daf, 0x000221af, 0x000225af, + 0x000229af, 0x00022daf, 0x000231af, 0x000235af, + 0x000239af, 0x00023daf, 0x000241af, 0x000245af, + 0x000249af, 0x00024daf, 0x000251af, 0x000255af, + 0x000259af, 0x00025daf, 0x000261af, 0x000265af, + 0x000269af, 0x00026daf, 0x000271af, 0x000275af, + 0x000279af, 0x00027daf, 0x000281af, 0x000285af, + 0x000289af, 0x00028daf, 0x000291af, 0x000295af, + 0x000299af, 0x00029daf, 0x0002a1af, 0x0002a5af, + 0x0002a9af, 0x0002adaf, 0x0002b1af, 0x0002b5af, + 0x0002b9af, 0x0002bdaf, 0x0002c1af, 0x0002c5af, + 0x0002c9af, 0x0002cdaf, 0x0002d1af, 0x0002d5af, + 0x0002d9af, 0x0002ddaf, 0x0002e1af, 0x0002e5af, + 0x0002e9af, 0x0002edaf, 0x0002f1af, 0x0002f5af, + 0x0002f9af, 0x0002fdaf, 0x000301af, 0x000305af, + 0x000309af, 0x00030daf, 0x000311af, 0x000315af, + 0x000319af, 0x00031daf, 0x000321af, 0x000325af, + 0x000329af, 0x00032daf, 0x000331af, 0x000335af, + 0x000339af, 0x00033daf, 0x000341af, 0x000345af, + 0x000349af, 0x00034daf, 0x000351af, 0x000355af, + 0x000359af, 0x00035daf, 0x000361af, 0x000365af, + 0x000369af, 0x00036daf, 0x000371af, 0x000375af, + 0x000379af, 0x00037daf, 0x000381af, 0x000385af, + 0x000389af, 0x00038daf, 0x000391af, 0x000395af, + 0x000399af, 0x00039daf, 0x0003a1af, 0x0003a5af, + 0x0003a9af, 0x0003adaf, 0x0003b1af, 0x0003b5af, + 0x0003b9af, 0x0003bdaf, 0x0003c1af, 0x0003c5af, + 0x0003c9af, 0x0003cdaf, 0x0003d1af, 0x0003d5af, + 0x0003d9af, 0x0003ddaf, 0x0003e1af, 0x0003e5af, + 0x0003e9af, 0x0003edaf, 0x0003f1af, 0x0003f5af, + 0x0003f9af, 0x0003fdaf, 0x000401af, 0x000405af, + 0x000409af, 0x00040daf, 0x000411af, 0x000415af, + 0x000419af, 0x00041daf, 0x000421af, 0x000425af, + 0x000429af, 0x00042daf, 0x000431af, 0x000435af, + 0x000439af, 0x00043daf, 0x000441af, 0x000445af, + 0x000449af, 0x00044daf, 0x000451af, 0x000455af, + 0x000459af, 0x00045daf, 0x000461af, 0x000465af, + 0x000469af, 0x00046daf, 0x000471af, 0x000475af, + 0x000479af, 0x00047daf, 0x000481af, 0x000485af, + 0x000489af, 0x00048daf, 0x000491af, 0x000495af, + 0x000499af, 0x00049daf, 0x0004a1af, 0x0004a5af, + 0x0004a9af, 0x0004adaf, 0x0004b1af, 0x0004b5af, + 0x0004b9af, 0x0004bdaf, 0x0004c1af, 0x0004c5af, + 0x0004c9af, 0x0004cdaf, 0x0004d1af, 0x0004d5af, + 0x0004d9af, 0x0004ddaf, 0x0004e1af, 0x0004e5af, + 0x0004e9af, 0x0004edaf, 0x0004f1af, 0x0004f5af, + 0x0004f9af, 0x0004fdaf, 0x000501af, 0x000505af, + 0x000509af, 0x00050daf, 0x000511af, 0x000515af, + 0x000519af, 0x00051daf, 0x000521af, 0x000525af, + 0x000529af, 0x00052daf, 0x000531af, 0x000535af, + 0x000539af, 0x00053daf, 0x000541af, 0x000545af, + 0x000549af, 0x00054daf, 0x000551af, 0x000555af, + 0x000559af, 0x00055daf, 0x000561af, 0x000565af, + 0x000569af, 0x00056daf, 0x000571af, 0x000575af, + 0x000579af, 0x00057daf, 0x000581af, 0x000585af, + 0x000589af, 0x00058daf, 0x000591af, 0x000595af, + 0x000599af, 0x00059daf, 0x0005a1af, 0x0005a5af, + 0x0005a9af, 0x0005adaf, 0x0005b1af, 0x0005b5af, + 0x0005b9af, 0x0005bdaf, 0x0005c1af, 0x0005c5af, + 0x0005c9af, 0x0005cdaf, 0x0005d1af, 0x0005d5af, + 0x0005d9af, 0x0005ddaf, 0x0005e1af, 0x0005e5af, + 0x0005e9af, 0x0005edaf, 0x0005f1af, 0x0005f5af, + 0x0005f9af, 0x0005fdaf, 0x000601af, 0x000605af, + 0x000609af, 0x00060daf, 0x000611af, 0x000615af, + 0x000619af, 0x00061daf, 0x000621af, 0x000625af, + 0x000629af, 0x00062daf, 0x000631af, 0x000635af, + 0x000639af, 0x00063daf, 0x000641af, 0x000645af, + 0x000649af, 0x00064daf, 0x000651af, 0x000655af, + 0x000659af, 0x00065daf, 0x000661af, 0x000665af, + 0x000669af, 0x00066daf, 0x000671af, 0x000675af, + 0x000679af, 0x00067daf, 0x000681af, 0x000685af, + 0x000689af, 0x00068daf, 0x000691af, 0x000695af, + 0x000699af, 0x00069daf, 0x0006a1af, 0x0006a5af, + 0x0006a9af, 0x0006adaf, 0x0006b1af, 0x0006b5af, + 0x0006b9af, 0x0006bdaf, 0x0006c1af, 0x0006c5af, + 0x0006c9af, 0x0006cdaf, 0x0006d1af, 0x0006d5af, + 0x0006d9af, 0x0006ddaf, 0x0006e1af, 0x0006e5af, + 0x0006e9af, 0x0006edaf, 0x0006f1af, 0x0006f5af, + 0x0006f9af, 0x0006fdaf, 0x000701af, 0x000705af, + 0x000709af, 0x00070daf, 0x000711af, 0x000715af, + 0x000719af, 0x00071daf, 0x000721af, 0x000725af, + 0x000729af, 0x00072daf, 0x000731af, 0x000735af, + 0x000739af, 0x00073daf, 0x000741af, 0x000745af, + 0x000749af, 0x00074daf, 0x000751af, 0x000755af, + 0x000759af, 0x00075daf, 0x000761af, 0x000765af, + 0x000769af, 0x00076daf, 0x000771af, 0x000775af, + 0x000779af, 0x00077daf, 0x000781af, 0x000785af, + 0x000789af, 0x00078daf, 0x000791af, 0x000795af, + 0x000799af, 0x00079daf, 0x0007a1af, 0x0007a5af, + 0x0007a9af, 0x0007adaf, 0x0007b1af, 0x0007b5af, + 0x0007b9af, 0x0007bdaf, 0x0007c1af, 0x0007c5af, + 0x0007c9af, 0x0007cdaf, 0x0007d1af, 0x0007d5af, + 0x0007d9af, 0x0007ddaf, 0x0007e1af, 0x0007e5af, + 0x0007e9af, 0x0007edaf, 0x0007f1af, 0x0007f5af, + 0x0007f9af, 0x0007fdaf, 0x000801af, 0x000805af, + 0x000809af, 0x00080daf, 0x000811af, 0x000815af, + 0x000819af, 0x00081daf, 0x000821af, 0x000825af, + 0x000829af, 0x00082daf, 0x000831af, 0x000835af, + 0x000839af, 0x00083daf, 0x000841af, 0x000845af, + 0x000849af, 0x00084daf, 0x000851af, 0x000855af, + 0x000859af, 0x00085daf, 0x000861af, 0x000865af, + 0x000869af, 0x00086daf, 0x000871af, 0x000875af, + 0x000879af, 0x00087daf, 0x000881af, 0x000885af, + 0x000889af, 0x00088daf, 0x000891af, 0x000895af, + 0x000899af, 0x00089daf, 0x0008a1af, 0x0008a5af, + 0x0008a9af, 0x0008adaf, 0x0008b1af, 0x0008b5af, + 0x0008b9af, 0x0008bdaf, 0x0008c1af, 0x0008c5af, + 0x0008c9af, 0x0008cdaf, 0x0008d1af, 0x0008d5af, + 0x0008d9af, 0x0008ddaf, 0x0008e1af, 0x0008e5af, + 0x0008e9af, 0x0008edaf, 0x0008f1af, 0x0008f5af, + 0x0008f9af, 0x0008fdaf, 0x000901af, 0x000905af, + 0x000909af, 0x00090daf, 0x000911af, 0x000915af, + 0x000919af, 0x00091daf, 0x000921af, 0x000925af, + 0x000929af, 0x00092daf, 0x000931af, 0x000935af, + 0x000939af, 0x00093daf, 0x000941af, 0x000945af, + 0x000949af, 0x00094daf, 0x000951af, 0x000955af, + 0x000959af, 0x00095daf, 0x000961af, 0x000965af, + 0x000969af, 0x00096daf, 0x000971af, 0x000975af, + 0x000979af, 0x00097daf, 0x000981af, 0x000985af, + 0x000989af, 0x00098daf, 0x000991af, 0x000995af, + 0x000999af, 0x00099daf, 0x0009a1af, 0x0009a5af, + 0x0009a9af, 0x0009adaf, 0x0009b1af, 0x0009b5af, + 0x0009b9af, 0x0009bdaf, 0x0009c1af, 0x0009c5af, + 0x0009c9af, 0x0009cdaf, 0x0009d1af, 0x0009d5af, + 0x0009d9af, 0x0009ddaf, 0x0009e1af, 0x0009e5af, + 0x0009e9af, 0x0009edaf, 0x0009f1af, 0x0009f5af, + 0x0009f9af, 0x0009fdaf, 0x000a01af, 0x000a05af, + 0x000a09af, 0x000a0daf, 0x000a11af, 0x000a15af, + 0x000a19af, 0x000a1daf, 0x000a21af, 0x000a25af, + 0x000a29af, 0x000a2daf, 0x000a31af, 0x000a35af, + 0x000a39af, 0x000a3daf, 0x000a41af, 0x000a45af, + 0x000a49af, 0x000a4daf, 0x000a51af, 0x000a55af, + 0x000a59af, 0x000a5daf, 0x000a61af, 0x000a65af, + 0x000a69af, 0x000a6daf, 0x000a71af, 0x000a75af, + 0x000a79af, 0x000a7daf, 0x000a81af, 0x000a85af, + 0x000a89af, 0x000a8daf, 0x000a91af, 0x000a95af, + 0x000a99af, 0x000a9daf, 0x000aa1af, 0x000aa5af, + 0x000aa9af, 0x000aadaf, 0x000ab1af, 0x000ab5af, + 0x000ab9af, 0x000abdaf, 0x000ac1af, 0x000ac5af, + 0x000ac9af, 0x000acdaf, 0x000ad1af, 0x000ad5af, + 0x000ad9af, 0x000addaf, 0x000ae1af, 0x000ae5af, + 0x000ae9af, 0x000aedaf, 0x000af1af, 0x000af5af, + 0x000af9af, 0x000afdaf, 0x000b01af, 0x000b05af, + 0x000b09af, 0x000b0daf, 0x000b11af, 0x000b15af, + 0x000b19af, 0x000b1daf, 0x000b21af, 0x000b25af, + 0x000b29af, 0x000b2daf, 0x000b31af, 0x000b35af, + 0x000b39af, 0x000b3daf, 0x000b41af, 0x000b45af, + 0x000b49af, 0x000b4daf, 0x000b51af, 0x000b55af, + 0x000b59af, 0x000b5daf, 0x000b61af, 0x000b65af, + 0x000b69af, 0x000b6daf, 0x000b71af, 0x000b75af, + 0x000b79af, 0x000b7daf, 0x000b81af, 0x000b85af, + 0x000b89af, 0x000b8daf, 0x000b91af, 0x000b95af, + 0x000b99af, 0x000b9daf, 0x000ba1af, 0x000ba5af, + 0x000ba9af, 0x000badaf, 0x000bb1af, 0x000bb5af, + 0x000bb9af, 0x000bbdaf, 0x000bc1af, 0x000bc5af, + 0x000bc9af, 0x000bcdaf, 0x000bd1af, 0x000bd5af, + 0x000bd9af, 0x000bddaf, 0x000be1af, 0x000be5af, + 0x000be9af, 0x000bedaf, 0x000bf1af, 0x000bf5af, + 0x000bf9af, 0x000bfdaf, 0x000c01af, 0x000c05af, + 0x000c09af, 0x000c0daf, 0x000c11af, 0x000c15af, + 0x000c19af, 0x000c1daf, 0x000c21af, 0x000c25af, + 0x000c29af, 0x000c2daf, 0x000c31af, 0x000c35af, + 0x000c39af, 0x000c3daf, 0x000c41af, 0x000c45af, + 0x000c49af, 0x000c4daf, 0x000c51af, 0x000c55af, + 0x000c59af, 0x000c5daf, 0x000c61af, 0x000c65af, + 0x000c69af, 0x000c6daf, 0x000c71af, 0x000c75af, + 0x000c79af, 0x000c7daf, 0x000c81af, 0x000c85af, + 0x000c89af, 0x000c8daf, 0x000c91af, 0x000c95af, + 0x000c99af, 0x000c9daf, 0x000ca1af, 0x000ca5af, + 0x000ca9af, 0x000cadaf, 0x000cb1af, 0x000cb5af, + 0x000cb9af, 0x000cbdaf, 0x000cc1af, 0x000cc5af, + 0x000cc9af, 0x000ccdaf, 0x000cd1af, 0x000cd5af, + 0x000cd9af, 0x000cddaf, 0x000ce1af, 0x000ce5af, + 0x000ce9af, 0x000cedaf, 0x000cf1af, 0x000cf5af, + 0x000cf9af, 0x000cfdaf, 0x000d01af, 0x000d05af, + 0x000d09af, 0x000d0daf, 0x000d11af, 0x000d15af, + 0x000d19af, 0x000d1daf, 0x000d21af, 0x000d25af, + 0x000d29af, 0x000d2daf, 0x000d31af, 0x000d35af, + 0x000d39af, 0x000d3daf, 0x000d41af, 0x000d45af, + 0x000d49af, 0x000d4daf, 0x000d51af, 0x000d55af, + 0x000d59af, 0x000d5daf, 0x000d61af, 0x000d65af, + 0x000d69af, 0x000d6daf, 0x000d71af, 0x000d75af, + 0x000d79af, 0x000d7daf, 0x000d81af, 0x000d85af, + 0x000d89af, 0x000d8daf, 0x000d91af, 0x000d95af, + 0x000d99af, 0x000d9daf, 0x000da1af, 0x000da5af, + 0x000da9af, 0x000dadaf, 0x000db1af, 0x000db5af, + 0x000db9af, 0x000dbdaf, 0x000dc1af, 0x000dc5af, + 0x000dc9af, 0x000dcdaf, 0x000dd1af, 0x000dd5af, + 0x000dd9af, 0x000dddaf, 0x000de1af, 0x000de5af, + 0x000de9af, 0x000dedaf, 0x000df1af, 0x000df5af, + 0x000df9af, 0x000dfdaf, 0x000e01af, 0x000e05af, + 0x000e09af, 0x000e0daf, 0x000e11af, 0x000e15af, + 0x000e19af, 0x000e1daf, 0x000e21af, 0x000e25af, + 0x000e29af, 0x000e2daf, 0x000e31af, 0x000e35af, + 0x000e39af, 0x000e3daf, 0x000e41af, 0x000e45af, + 0x000e49af, 0x000e4daf, 0x000e51af, 0x000e55af, + 0x000e59af, 0x000e5daf, 0x000e61af, 0x000e65af, + 0x000e69af, 0x000e6daf, 0x000e71af, 0x000e75af, + 0x000e79af, 0x000e7daf, 0x000e81af, 0x000e85af, + 0x000e89af, 0x000e8daf, 0x000e91af, 0x000e95af, + 0x000e99af, 0x000e9daf, 0x000ea1af, 0x000ea5af, + 0x000ea9af, 0x000eadaf, 0x000eb1af, 0x000eb5af, + 0x000eb9af, 0x000ebdaf, 0x000ec1af, 0x000ec5af, + 0x000ec9af, 0x000ecdaf, 0x000ed1af, 0x000ed5af, + 0x000ed9af, 0x000eddaf, 0x000ee1af, 0x000ee5af, + 0x000ee9af, 0x000eedaf, 0x000ef1af, 0x000ef5af, + 0x000ef9af, 0x000efdaf, 0x000f01af, 0x000f05af, + 0x000f09af, 0x000f0daf, 0x000f11af, 0x000f15af, + 0x000f19af, 0x000f1daf, 0x000f21af, 0x000f25af, + 0x000f29af, 0x000f2daf, 0x000f31af, 0x000f35af, + 0x000f39af, 0x000f3daf, 0x000f41af, 0x000f45af, + 0x000f49af, 0x000f4daf, 0x000f51af, 0x000f55af, + 0x000f59af, 0x000f5daf, 0x000f61af, 0x000f65af, + 0x000f69af, 0x000f6daf, 0x000f71af, 0x000f75af, + 0x000f79af, 0x000f7daf, 0x000f81af, 0x000f85af, + 0x000f89af, 0x000f8daf, 0x000f91af, 0x000f95af, + 0x000f99af, 0x000f9daf, 0x000fa1af, 0x000fa5af, + 0x000fa9af, 0x000fadaf, 0x000fb1af, 0x000fb5af, + 0x000fb9af, 0x000fbdaf, 0x000fc1af, 0x000fc5af, + 0x000fc9af, 0x000fcdaf, 0x000fd1af, 0x000fd5af, + 0x000fd9af, 0x000fddaf, 0x000fe1af, 0x000fe5af, + 0x000fe9af, 0x000fedaf, 0x000ff1af, 0x000ff5af, + 0x000ff9af, 0x000ffdaf, 0x000003af, 0x000007af, + 0x00000baf, 0x00000faf, 0x000013af, 0x000017af, + 0x00001baf, 0x00001faf, 0x000023af, 0x000027af, + 0x00002baf, 0x00002faf, 0x000033af, 0x000037af, + 0x00003baf, 0x00003faf, 0x000043af, 0x000047af, + 0x00004baf, 0x00004faf, 0x000053af, 0x000057af, + 0x00005baf, 0x00005faf, 0x000063af, 0x000067af, + 0x00006baf, 0x00006faf, 0x000073af, 0x000077af, + 0x00007baf, 0x00007faf, 0x000083af, 0x000087af, + 0x00008baf, 0x00008faf, 0x000093af, 0x000097af, + 0x00009baf, 0x00009faf, 0x0000a3af, 0x0000a7af, + 0x0000abaf, 0x0000afaf, 0x0000b3af, 0x0000b7af, + 0x0000bbaf, 0x0000bfaf, 0x0000c3af, 0x0000c7af, + 0x0000cbaf, 0x0000cfaf, 0x0000d3af, 0x0000d7af, + 0x0000dbaf, 0x0000dfaf, 0x0000e3af, 0x0000e7af, + 0x0000ebaf, 0x0000efaf, 0x0000f3af, 0x0000f7af, + 0x0000fbaf, 0x0000ffaf, 0x000103af, 0x000107af, + 0x00010baf, 0x00010faf, 0x000113af, 0x000117af, + 0x00011baf, 0x00011faf, 0x000123af, 0x000127af, + 0x00012baf, 0x00012faf, 0x000133af, 0x000137af, + 0x00013baf, 0x00013faf, 0x000143af, 0x000147af, + 0x00014baf, 0x00014faf, 0x000153af, 0x000157af, + 0x00015baf, 0x00015faf, 0x000163af, 0x000167af, + 0x00016baf, 0x00016faf, 0x000173af, 0x000177af, + 0x00017baf, 0x00017faf, 0x000183af, 0x000187af, + 0x00018baf, 0x00018faf, 0x000193af, 0x000197af, + 0x00019baf, 0x00019faf, 0x0001a3af, 0x0001a7af, + 0x0001abaf, 0x0001afaf, 0x0001b3af, 0x0001b7af, + 0x0001bbaf, 0x0001bfaf, 0x0001c3af, 0x0001c7af, + 0x0001cbaf, 0x0001cfaf, 0x0001d3af, 0x0001d7af, + 0x0001dbaf, 0x0001dfaf, 0x0001e3af, 0x0001e7af, + 0x0001ebaf, 0x0001efaf, 0x0001f3af, 0x0001f7af, + 0x0001fbaf, 0x0001ffaf, 0x000203af, 0x000207af, + 0x00020baf, 0x00020faf, 0x000213af, 0x000217af, + 0x00021baf, 0x00021faf, 0x000223af, 0x000227af, + 0x00022baf, 0x00022faf, 0x000233af, 0x000237af, + 0x00023baf, 0x00023faf, 0x000243af, 0x000247af, + 0x00024baf, 0x00024faf, 0x000253af, 0x000257af, + 0x00025baf, 0x00025faf, 0x000263af, 0x000267af, + 0x00026baf, 0x00026faf, 0x000273af, 0x000277af, + 0x00027baf, 0x00027faf, 0x000283af, 0x000287af, + 0x00028baf, 0x00028faf, 0x000293af, 0x000297af, + 0x00029baf, 0x00029faf, 0x0002a3af, 0x0002a7af, + 0x0002abaf, 0x0002afaf, 0x0002b3af, 0x0002b7af, + 0x0002bbaf, 0x0002bfaf, 0x0002c3af, 0x0002c7af, + 0x0002cbaf, 0x0002cfaf, 0x0002d3af, 0x0002d7af, + 0x0002dbaf, 0x0002dfaf, 0x0002e3af, 0x0002e7af, + 0x0002ebaf, 0x0002efaf, 0x0002f3af, 0x0002f7af, + 0x0002fbaf, 0x0002ffaf, 0x000303af, 0x000307af, + 0x00030baf, 0x00030faf, 0x000313af, 0x000317af, + 0x00031baf, 0x00031faf, 0x000323af, 0x000327af, + 0x00032baf, 0x00032faf, 0x000333af, 0x000337af, + 0x00033baf, 0x00033faf, 0x000343af, 0x000347af, + 0x00034baf, 0x00034faf, 0x000353af, 0x000357af, + 0x00035baf, 0x00035faf, 0x000363af, 0x000367af, + 0x00036baf, 0x00036faf, 0x000373af, 0x000377af, + 0x00037baf, 0x00037faf, 0x000383af, 0x000387af, + 0x00038baf, 0x00038faf, 0x000393af, 0x000397af, + 0x00039baf, 0x00039faf, 0x0003a3af, 0x0003a7af, + 0x0003abaf, 0x0003afaf, 0x0003b3af, 0x0003b7af, + 0x0003bbaf, 0x0003bfaf, 0x0003c3af, 0x0003c7af, + 0x0003cbaf, 0x0003cfaf, 0x0003d3af, 0x0003d7af, + 0x0003dbaf, 0x0003dfaf, 0x0003e3af, 0x0003e7af, + 0x0003ebaf, 0x0003efaf, 0x0003f3af, 0x0003f7af, + 0x0003fbaf, 0x0003ffaf, 0x000403af, 0x000407af, + 0x00040baf, 0x00040faf, 0x000413af, 0x000417af, + 0x00041baf, 0x00041faf, 0x000423af, 0x000427af, + 0x00042baf, 0x00042faf, 0x000433af, 0x000437af, + 0x00043baf, 0x00043faf, 0x000443af, 0x000447af, + 0x00044baf, 0x00044faf, 0x000453af, 0x000457af, + 0x00045baf, 0x00045faf, 0x000463af, 0x000467af, + 0x00046baf, 0x00046faf, 0x000473af, 0x000477af, + 0x00047baf, 0x00047faf, 0x000483af, 0x000487af, + 0x00048baf, 0x00048faf, 0x000493af, 0x000497af, + 0x00049baf, 0x00049faf, 0x0004a3af, 0x0004a7af, + 0x0004abaf, 0x0004afaf, 0x0004b3af, 0x0004b7af, + 0x0004bbaf, 0x0004bfaf, 0x0004c3af, 0x0004c7af, + 0x0004cbaf, 0x0004cfaf, 0x0004d3af, 0x0004d7af, + 0x0004dbaf, 0x0004dfaf, 0x0004e3af, 0x0004e7af, + 0x0004ebaf, 0x0004efaf, 0x0004f3af, 0x0004f7af, + 0x0004fbaf, 0x0004ffaf, 0x000503af, 0x000507af, + 0x00050baf, 0x00050faf, 0x000513af, 0x000517af, + 0x00051baf, 0x00051faf, 0x000523af, 0x000527af, + 0x00052baf, 0x00052faf, 0x000533af, 0x000537af, + 0x00053baf, 0x00053faf, 0x000543af, 0x000547af, + 0x00054baf, 0x00054faf, 0x000553af, 0x000557af, + 0x00055baf, 0x00055faf, 0x000563af, 0x000567af, + 0x00056baf, 0x00056faf, 0x000573af, 0x000577af, + 0x00057baf, 0x00057faf, 0x000583af, 0x000587af, + 0x00058baf, 0x00058faf, 0x000593af, 0x000597af, + 0x00059baf, 0x00059faf, 0x0005a3af, 0x0005a7af, + 0x0005abaf, 0x0005afaf, 0x0005b3af, 0x0005b7af, + 0x0005bbaf, 0x0005bfaf, 0x0005c3af, 0x0005c7af, + 0x0005cbaf, 0x0005cfaf, 0x0005d3af, 0x0005d7af, + 0x0005dbaf, 0x0005dfaf, 0x0005e3af, 0x0005e7af, + 0x0005ebaf, 0x0005efaf, 0x0005f3af, 0x0005f7af, + 0x0005fbaf, 0x0005ffaf, 0x000603af, 0x000607af, + 0x00060baf, 0x00060faf, 0x000613af, 0x000617af, + 0x00061baf, 0x00061faf, 0x000623af, 0x000627af, + 0x00062baf, 0x00062faf, 0x000633af, 0x000637af, + 0x00063baf, 0x00063faf, 0x000643af, 0x000647af, + 0x00064baf, 0x00064faf, 0x000653af, 0x000657af, + 0x00065baf, 0x00065faf, 0x000663af, 0x000667af, + 0x00066baf, 0x00066faf, 0x000673af, 0x000677af, + 0x00067baf, 0x00067faf, 0x000683af, 0x000687af, + 0x00068baf, 0x00068faf, 0x000693af, 0x000697af, + 0x00069baf, 0x00069faf, 0x0006a3af, 0x0006a7af, + 0x0006abaf, 0x0006afaf, 0x0006b3af, 0x0006b7af, + 0x0006bbaf, 0x0006bfaf, 0x0006c3af, 0x0006c7af, + 0x0006cbaf, 0x0006cfaf, 0x0006d3af, 0x0006d7af, + 0x0006dbaf, 0x0006dfaf, 0x0006e3af, 0x0006e7af, + 0x0006ebaf, 0x0006efaf, 0x0006f3af, 0x0006f7af, + 0x0006fbaf, 0x0006ffaf, 0x000703af, 0x000707af, + 0x00070baf, 0x00070faf, 0x000713af, 0x000717af, + 0x00071baf, 0x00071faf, 0x000723af, 0x000727af, + 0x00072baf, 0x00072faf, 0x000733af, 0x000737af, + 0x00073baf, 0x00073faf, 0x000743af, 0x000747af, + 0x00074baf, 0x00074faf, 0x000753af, 0x000757af, + 0x00075baf, 0x00075faf, 0x000763af, 0x000767af, + 0x00076baf, 0x00076faf, 0x000773af, 0x000777af, + 0x00077baf, 0x00077faf, 0x000783af, 0x000787af, + 0x00078baf, 0x00078faf, 0x000793af, 0x000797af, + 0x00079baf, 0x00079faf, 0x0007a3af, 0x0007a7af, + 0x0007abaf, 0x0007afaf, 0x0007b3af, 0x0007b7af, + 0x0007bbaf, 0x0007bfaf, 0x0007c3af, 0x0007c7af, + 0x0007cbaf, 0x0007cfaf, 0x0007d3af, 0x0007d7af, + 0x0007dbaf, 0x0007dfaf, 0x0007e3af, 0x0007e7af, + 0x0007ebaf, 0x0007efaf, 0x0007f3af, 0x0007f7af, + 0x0007fbaf, 0x0007ffaf, 0x000803af, 0x000807af, + 0x00080baf, 0x00080faf, 0x000813af, 0x000817af, + 0x00081baf, 0x00081faf, 0x000823af, 0x000827af, + 0x00082baf, 0x00082faf, 0x000833af, 0x000837af, + 0x00083baf, 0x00083faf, 0x000843af, 0x000847af, + 0x00084baf, 0x00084faf, 0x000853af, 0x000857af, + 0x00085baf, 0x00085faf, 0x000863af, 0x000867af, + 0x00086baf, 0x00086faf, 0x000873af, 0x000877af, + 0x00087baf, 0x00087faf, 0x000883af, 0x000887af, + 0x00088baf, 0x00088faf, 0x000893af, 0x000897af, + 0x00089baf, 0x00089faf, 0x0008a3af, 0x0008a7af, + 0x0008abaf, 0x0008afaf, 0x0008b3af, 0x0008b7af, + 0x0008bbaf, 0x0008bfaf, 0x0008c3af, 0x0008c7af, + 0x0008cbaf, 0x0008cfaf, 0x0008d3af, 0x0008d7af, + 0x0008dbaf, 0x0008dfaf, 0x0008e3af, 0x0008e7af, + 0x0008ebaf, 0x0008efaf, 0x0008f3af, 0x0008f7af, + 0x0008fbaf, 0x0008ffaf, 0x000903af, 0x000907af, + 0x00090baf, 0x00090faf, 0x000913af, 0x000917af, + 0x00091baf, 0x00091faf, 0x000923af, 0x000927af, + 0x00092baf, 0x00092faf, 0x000933af, 0x000937af, + 0x00093baf, 0x00093faf, 0x000943af, 0x000947af, + 0x00094baf, 0x00094faf, 0x000953af, 0x000957af, + 0x00095baf, 0x00095faf, 0x000963af, 0x000967af, + 0x00096baf, 0x00096faf, 0x000973af, 0x000977af, + 0x00097baf, 0x00097faf, 0x000983af, 0x000987af, + 0x00098baf, 0x00098faf, 0x000993af, 0x000997af, + 0x00099baf, 0x00099faf, 0x0009a3af, 0x0009a7af, + 0x0009abaf, 0x0009afaf, 0x0009b3af, 0x0009b7af, + 0x0009bbaf, 0x0009bfaf, 0x0009c3af, 0x0009c7af, + 0x0009cbaf, 0x0009cfaf, 0x0009d3af, 0x0009d7af, + 0x0009dbaf, 0x0009dfaf, 0x0009e3af, 0x0009e7af, + 0x0009ebaf, 0x0009efaf, 0x0009f3af, 0x0009f7af, + 0x0009fbaf, 0x0009ffaf, 0x000a03af, 0x000a07af, + 0x000a0baf, 0x000a0faf, 0x000a13af, 0x000a17af, + 0x000a1baf, 0x000a1faf, 0x000a23af, 0x000a27af, + 0x000a2baf, 0x000a2faf, 0x000a33af, 0x000a37af, + 0x000a3baf, 0x000a3faf, 0x000a43af, 0x000a47af, + 0x000a4baf, 0x000a4faf, 0x000a53af, 0x000a57af, + 0x000a5baf, 0x000a5faf, 0x000a63af, 0x000a67af, + 0x000a6baf, 0x000a6faf, 0x000a73af, 0x000a77af, + 0x000a7baf, 0x000a7faf, 0x000a83af, 0x000a87af, + 0x000a8baf, 0x000a8faf, 0x000a93af, 0x000a97af, + 0x000a9baf, 0x000a9faf, 0x000aa3af, 0x000aa7af, + 0x000aabaf, 0x000aafaf, 0x000ab3af, 0x000ab7af, + 0x000abbaf, 0x000abfaf, 0x000ac3af, 0x000ac7af, + 0x000acbaf, 0x000acfaf, 0x000ad3af, 0x000ad7af, + 0x000adbaf, 0x000adfaf, 0x000ae3af, 0x000ae7af, + 0x000aebaf, 0x000aefaf, 0x000af3af, 0x000af7af, + 0x000afbaf, 0x000affaf, 0x000b03af, 0x000b07af, + 0x000b0baf, 0x000b0faf, 0x000b13af, 0x000b17af, + 0x000b1baf, 0x000b1faf, 0x000b23af, 0x000b27af, + 0x000b2baf, 0x000b2faf, 0x000b33af, 0x000b37af, + 0x000b3baf, 0x000b3faf, 0x000b43af, 0x000b47af, + 0x000b4baf, 0x000b4faf, 0x000b53af, 0x000b57af, + 0x000b5baf, 0x000b5faf, 0x000b63af, 0x000b67af, + 0x000b6baf, 0x000b6faf, 0x000b73af, 0x000b77af, + 0x000b7baf, 0x000b7faf, 0x000b83af, 0x000b87af, + 0x000b8baf, 0x000b8faf, 0x000b93af, 0x000b97af, + 0x000b9baf, 0x000b9faf, 0x000ba3af, 0x000ba7af, + 0x000babaf, 0x000bafaf, 0x000bb3af, 0x000bb7af, + 0x000bbbaf, 0x000bbfaf, 0x000bc3af, 0x000bc7af, + 0x000bcbaf, 0x000bcfaf, 0x000bd3af, 0x000bd7af, + 0x000bdbaf, 0x000bdfaf, 0x000be3af, 0x000be7af, + 0x000bebaf, 0x000befaf, 0x000bf3af, 0x000bf7af, + 0x000bfbaf, 0x000bffaf, 0x000c03af, 0x000c07af, + 0x000c0baf, 0x000c0faf, 0x000c13af, 0x000c17af, + 0x000c1baf, 0x000c1faf, 0x000c23af, 0x000c27af, + 0x000c2baf, 0x000c2faf, 0x000c33af, 0x000c37af, + 0x000c3baf, 0x000c3faf, 0x000c43af, 0x000c47af, + 0x000c4baf, 0x000c4faf, 0x000c53af, 0x000c57af, + 0x000c5baf, 0x000c5faf, 0x000c63af, 0x000c67af, + 0x000c6baf, 0x000c6faf, 0x000c73af, 0x000c77af, + 0x000c7baf, 0x000c7faf, 0x000c83af, 0x000c87af, + 0x000c8baf, 0x000c8faf, 0x000c93af, 0x000c97af, + 0x000c9baf, 0x000c9faf, 0x000ca3af, 0x000ca7af, + 0x000cabaf, 0x000cafaf, 0x000cb3af, 0x000cb7af, + 0x000cbbaf, 0x000cbfaf, 0x000cc3af, 0x000cc7af, + 0x000ccbaf, 0x000ccfaf, 0x000cd3af, 0x000cd7af, + 0x000cdbaf, 0x000cdfaf, 0x000ce3af, 0x000ce7af, + 0x000cebaf, 0x000cefaf, 0x000cf3af, 0x000cf7af, + 0x000cfbaf, 0x000cffaf, 0x000d03af, 0x000d07af, + 0x000d0baf, 0x000d0faf, 0x000d13af, 0x000d17af, + 0x000d1baf, 0x000d1faf, 0x000d23af, 0x000d27af, + 0x000d2baf, 0x000d2faf, 0x000d33af, 0x000d37af, + 0x000d3baf, 0x000d3faf, 0x000d43af, 0x000d47af, + 0x000d4baf, 0x000d4faf, 0x000d53af, 0x000d57af, + 0x000d5baf, 0x000d5faf, 0x000d63af, 0x000d67af, + 0x000d6baf, 0x000d6faf, 0x000d73af, 0x000d77af, + 0x000d7baf, 0x000d7faf, 0x000d83af, 0x000d87af, + 0x000d8baf, 0x000d8faf, 0x000d93af, 0x000d97af, + 0x000d9baf, 0x000d9faf, 0x000da3af, 0x000da7af, + 0x000dabaf, 0x000dafaf, 0x000db3af, 0x000db7af, + 0x000dbbaf, 0x000dbfaf, 0x000dc3af, 0x000dc7af, + 0x000dcbaf, 0x000dcfaf, 0x000dd3af, 0x000dd7af, + 0x000ddbaf, 0x000ddfaf, 0x000de3af, 0x000de7af, + 0x000debaf, 0x000defaf, 0x000df3af, 0x000df7af, + 0x000dfbaf, 0x000dffaf, 0x000e03af, 0x000e07af, + 0x000e0baf, 0x000e0faf, 0x000e13af, 0x000e17af, + 0x000e1baf, 0x000e1faf, 0x000e23af, 0x000e27af, + 0x000e2baf, 0x000e2faf, 0x000e33af, 0x000e37af, + 0x000e3baf, 0x000e3faf, 0x000e43af, 0x000e47af, + 0x000e4baf, 0x000e4faf, 0x000e53af, 0x000e57af, + 0x000e5baf, 0x000e5faf, 0x000e63af, 0x000e67af, + 0x000e6baf, 0x000e6faf, 0x000e73af, 0x000e77af, + 0x000e7baf, 0x000e7faf, 0x000e83af, 0x000e87af, + 0x000e8baf, 0x000e8faf, 0x000e93af, 0x000e97af, + 0x000e9baf, 0x000e9faf, 0x000ea3af, 0x000ea7af, + 0x000eabaf, 0x000eafaf, 0x000eb3af, 0x000eb7af, + 0x000ebbaf, 0x000ebfaf, 0x000ec3af, 0x000ec7af, + 0x000ecbaf, 0x000ecfaf, 0x000ed3af, 0x000ed7af, + 0x000edbaf, 0x000edfaf, 0x000ee3af, 0x000ee7af, + 0x000eebaf, 0x000eefaf, 0x000ef3af, 0x000ef7af, + 0x000efbaf, 0x000effaf, 0x000f03af, 0x000f07af, + 0x000f0baf, 0x000f0faf, 0x000f13af, 0x000f17af, + 0x000f1baf, 0x000f1faf, 0x000f23af, 0x000f27af, + 0x000f2baf, 0x000f2faf, 0x000f33af, 0x000f37af, + 0x000f3baf, 0x000f3faf, 0x000f43af, 0x000f47af, + 0x000f4baf, 0x000f4faf, 0x000f53af, 0x000f57af, + 0x000f5baf, 0x000f5faf, 0x000f63af, 0x000f67af, + 0x000f6baf, 0x000f6faf, 0x000f73af, 0x000f77af, + 0x000f7baf, 0x000f7faf, 0x000f83af, 0x000f87af, + 0x000f8baf, 0x000f8faf, 0x000f93af, 0x000f97af, + 0x000f9baf, 0x000f9faf, 0x000fa3af, 0x000fa7af, + 0x000fabaf, 0x000fafaf, 0x000fb3af, 0x000fb7af, + 0x000fbbaf, 0x000fbfaf, 0x000fc3af, 0x000fc7af, + 0x000fcbaf, 0x000fcfaf, 0x000fd3af, 0x000fd7af, + 0x000fdbaf, 0x000fdfaf, 0x000fe3af, 0x000fe7af, + 0x000febaf, 0x000fefaf, 0x000ff3af, 0x000ff7af, + 0x000ffbaf, 0x000fffaf, 0x00000070, 0x00000470, + 0x00000870, 0x00000c70, 0x00001070, 0x00001470, + 0x00001870, 0x00001c70, 0x00002070, 0x00002470, + 0x00002870, 0x00002c70, 0x00003070, 0x00003470, + 0x00003870, 0x00003c70, 0x00004070, 0x00004470, + 0x00004870, 0x00004c70, 0x00005070, 0x00005470, + 0x00005870, 0x00005c70, 0x00006070, 0x00006470, + 0x00006870, 0x00006c70, 0x00007070, 0x00007470, + 0x00007870, 0x00007c70, 0x00008070, 0x00008470, + 0x00008870, 0x00008c70, 0x00009070, 0x00009470, + 0x00009870, 0x00009c70, 0x0000a070, 0x0000a470, + 0x0000a870, 0x0000ac70, 0x0000b070, 0x0000b470, + 0x0000b870, 0x0000bc70, 0x0000c070, 0x0000c470, + 0x0000c870, 0x0000cc70, 0x0000d070, 0x0000d470, + 0x0000d870, 0x0000dc70, 0x0000e070, 0x0000e470, + 0x0000e870, 0x0000ec70, 0x0000f070, 0x0000f470, + 0x0000f870, 0x0000fc70, 0x00010070, 0x00010470, + 0x00010870, 0x00010c70, 0x00011070, 0x00011470, + 0x00011870, 0x00011c70, 0x00012070, 0x00012470, + 0x00012870, 0x00012c70, 0x00013070, 0x00013470, + 0x00013870, 0x00013c70, 0x00014070, 0x00014470, + 0x00014870, 0x00014c70, 0x00015070, 0x00015470, + 0x00015870, 0x00015c70, 0x00016070, 0x00016470, + 0x00016870, 0x00016c70, 0x00017070, 0x00017470, + 0x00017870, 0x00017c70, 0x00018070, 0x00018470, + 0x00018870, 0x00018c70, 0x00019070, 0x00019470, + 0x00019870, 0x00019c70, 0x0001a070, 0x0001a470, + 0x0001a870, 0x0001ac70, 0x0001b070, 0x0001b470, + 0x0001b870, 0x0001bc70, 0x0001c070, 0x0001c470, + 0x0001c870, 0x0001cc70, 0x0001d070, 0x0001d470, + 0x0001d870, 0x0001dc70, 0x0001e070, 0x0001e470, + 0x0001e870, 0x0001ec70, 0x0001f070, 0x0001f470, + 0x0001f870, 0x0001fc70, 0x00020070, 0x00020470, + 0x00020870, 0x00020c70, 0x00021070, 0x00021470, + 0x00021870, 0x00021c70, 0x00022070, 0x00022470, + 0x00022870, 0x00022c70, 0x00023070, 0x00023470, + 0x00023870, 0x00023c70, 0x00024070, 0x00024470, + 0x00024870, 0x00024c70, 0x00025070, 0x00025470, + 0x00025870, 0x00025c70, 0x00026070, 0x00026470, + 0x00026870, 0x00026c70, 0x00027070, 0x00027470, + 0x00027870, 0x00027c70, 0x00028070, 0x00028470, + 0x00028870, 0x00028c70, 0x00029070, 0x00029470, + 0x00029870, 0x00029c70, 0x0002a070, 0x0002a470, + 0x0002a870, 0x0002ac70, 0x0002b070, 0x0002b470, + 0x0002b870, 0x0002bc70, 0x0002c070, 0x0002c470, + 0x0002c870, 0x0002cc70, 0x0002d070, 0x0002d470, + 0x0002d870, 0x0002dc70, 0x0002e070, 0x0002e470, + 0x0002e870, 0x0002ec70, 0x0002f070, 0x0002f470, + 0x0002f870, 0x0002fc70, 0x00030070, 0x00030470, + 0x00030870, 0x00030c70, 0x00031070, 0x00031470, + 0x00031870, 0x00031c70, 0x00032070, 0x00032470, + 0x00032870, 0x00032c70, 0x00033070, 0x00033470, + 0x00033870, 0x00033c70, 0x00034070, 0x00034470, + 0x00034870, 0x00034c70, 0x00035070, 0x00035470, + 0x00035870, 0x00035c70, 0x00036070, 0x00036470, + 0x00036870, 0x00036c70, 0x00037070, 0x00037470, + 0x00037870, 0x00037c70, 0x00038070, 0x00038470, + 0x00038870, 0x00038c70, 0x00039070, 0x00039470, + 0x00039870, 0x00039c70, 0x0003a070, 0x0003a470, + 0x0003a870, 0x0003ac70, 0x0003b070, 0x0003b470, + 0x0003b870, 0x0003bc70, 0x0003c070, 0x0003c470, + 0x0003c870, 0x0003cc70, 0x0003d070, 0x0003d470, + 0x0003d870, 0x0003dc70, 0x0003e070, 0x0003e470, + 0x0003e870, 0x0003ec70, 0x0003f070, 0x0003f470, + 0x0003f870, 0x0003fc70, 0x00040070, 0x00040470, + 0x00040870, 0x00040c70, 0x00041070, 0x00041470, + 0x00041870, 0x00041c70, 0x00042070, 0x00042470, + 0x00042870, 0x00042c70, 0x00043070, 0x00043470, + 0x00043870, 0x00043c70, 0x00044070, 0x00044470, + 0x00044870, 0x00044c70, 0x00045070, 0x00045470, + 0x00045870, 0x00045c70, 0x00046070, 0x00046470, + 0x00046870, 0x00046c70, 0x00047070, 0x00047470, + 0x00047870, 0x00047c70, 0x00048070, 0x00048470, + 0x00048870, 0x00048c70, 0x00049070, 0x00049470, + 0x00049870, 0x00049c70, 0x0004a070, 0x0004a470, + 0x0004a870, 0x0004ac70, 0x0004b070, 0x0004b470, + 0x0004b870, 0x0004bc70, 0x0004c070, 0x0004c470, + 0x0004c870, 0x0004cc70, 0x0004d070, 0x0004d470, + 0x0004d870, 0x0004dc70, 0x0004e070, 0x0004e470, + 0x0004e870, 0x0004ec70, 0x0004f070, 0x0004f470, + 0x0004f870, 0x0004fc70, 0x00050070, 0x00050470, + 0x00050870, 0x00050c70, 0x00051070, 0x00051470, + 0x00051870, 0x00051c70, 0x00052070, 0x00052470, + 0x00052870, 0x00052c70, 0x00053070, 0x00053470, + 0x00053870, 0x00053c70, 0x00054070, 0x00054470, + 0x00054870, 0x00054c70, 0x00055070, 0x00055470, + 0x00055870, 0x00055c70, 0x00056070, 0x00056470, + 0x00056870, 0x00056c70, 0x00057070, 0x00057470, + 0x00057870, 0x00057c70, 0x00058070, 0x00058470, + 0x00058870, 0x00058c70, 0x00059070, 0x00059470, + 0x00059870, 0x00059c70, 0x0005a070, 0x0005a470, + 0x0005a870, 0x0005ac70, 0x0005b070, 0x0005b470, + 0x0005b870, 0x0005bc70, 0x0005c070, 0x0005c470, + 0x0005c870, 0x0005cc70, 0x0005d070, 0x0005d470, + 0x0005d870, 0x0005dc70, 0x0005e070, 0x0005e470, + 0x0005e870, 0x0005ec70, 0x0005f070, 0x0005f470, + 0x0005f870, 0x0005fc70, 0x00060070, 0x00060470, + 0x00060870, 0x00060c70, 0x00061070, 0x00061470, + 0x00061870, 0x00061c70, 0x00062070, 0x00062470, + 0x00062870, 0x00062c70, 0x00063070, 0x00063470, + 0x00063870, 0x00063c70, 0x00064070, 0x00064470, + 0x00064870, 0x00064c70, 0x00065070, 0x00065470, + 0x00065870, 0x00065c70, 0x00066070, 0x00066470, + 0x00066870, 0x00066c70, 0x00067070, 0x00067470, + 0x00067870, 0x00067c70, 0x00068070, 0x00068470, + 0x00068870, 0x00068c70, 0x00069070, 0x00069470, + 0x00069870, 0x00069c70, 0x0006a070, 0x0006a470, + 0x0006a870, 0x0006ac70, 0x0006b070, 0x0006b470, + 0x0006b870, 0x0006bc70, 0x0006c070, 0x0006c470, + 0x0006c870, 0x0006cc70, 0x0006d070, 0x0006d470, + 0x0006d870, 0x0006dc70, 0x0006e070, 0x0006e470, + 0x0006e870, 0x0006ec70, 0x0006f070, 0x0006f470, + 0x0006f870, 0x0006fc70, 0x00070070, 0x00070470, + 0x00070870, 0x00070c70, 0x00071070, 0x00071470, + 0x00071870, 0x00071c70, 0x00072070, 0x00072470, + 0x00072870, 0x00072c70, 0x00073070, 0x00073470, + 0x00073870, 0x00073c70, 0x00074070, 0x00074470, + 0x00074870, 0x00074c70, 0x00075070, 0x00075470, + 0x00075870, 0x00075c70, 0x00076070, 0x00076470, + 0x00076870, 0x00076c70, 0x00077070, 0x00077470, + 0x00077870, 0x00077c70, 0x00078070, 0x00078470, + 0x00078870, 0x00078c70, 0x00079070, 0x00079470, + 0x00079870, 0x00079c70, 0x0007a070, 0x0007a470, + 0x0007a870, 0x0007ac70, 0x0007b070, 0x0007b470, + 0x0007b870, 0x0007bc70, 0x0007c070, 0x0007c470, + 0x0007c870, 0x0007cc70, 0x0007d070, 0x0007d470, + 0x0007d870, 0x0007dc70, 0x0007e070, 0x0007e470, + 0x0007e870, 0x0007ec70, 0x0007f070, 0x0007f470, + 0x0007f870, 0x0007fc70, 0x00080070, 0x00080470, + 0x00080870, 0x00080c70, 0x00081070, 0x00081470, + 0x00081870, 0x00081c70, 0x00082070, 0x00082470, + 0x00082870, 0x00082c70, 0x00083070, 0x00083470, + 0x00083870, 0x00083c70, 0x00084070, 0x00084470, + 0x00084870, 0x00084c70, 0x00085070, 0x00085470, + 0x00085870, 0x00085c70, 0x00086070, 0x00086470, + 0x00086870, 0x00086c70, 0x00087070, 0x00087470, + 0x00087870, 0x00087c70, 0x00088070, 0x00088470, + 0x00088870, 0x00088c70, 0x00089070, 0x00089470, + 0x00089870, 0x00089c70, 0x0008a070, 0x0008a470, + 0x0008a870, 0x0008ac70, 0x0008b070, 0x0008b470, + 0x0008b870, 0x0008bc70, 0x0008c070, 0x0008c470, + 0x0008c870, 0x0008cc70, 0x0008d070, 0x0008d470, + 0x0008d870, 0x0008dc70, 0x0008e070, 0x0008e470, + 0x0008e870, 0x0008ec70, 0x0008f070, 0x0008f470, + 0x0008f870, 0x0008fc70, 0x00090070, 0x00090470, + 0x00090870, 0x00090c70, 0x00091070, 0x00091470, + 0x00091870, 0x00091c70, 0x00092070, 0x00092470, + 0x00092870, 0x00092c70, 0x00093070, 0x00093470, + 0x00093870, 0x00093c70, 0x00094070, 0x00094470, + 0x00094870, 0x00094c70, 0x00095070, 0x00095470, + 0x00095870, 0x00095c70, 0x00096070, 0x00096470, + 0x00096870, 0x00096c70, 0x00097070, 0x00097470, + 0x00097870, 0x00097c70, 0x00098070, 0x00098470, + 0x00098870, 0x00098c70, 0x00099070, 0x00099470, + 0x00099870, 0x00099c70, 0x0009a070, 0x0009a470, + 0x0009a870, 0x0009ac70, 0x0009b070, 0x0009b470, + 0x0009b870, 0x0009bc70, 0x0009c070, 0x0009c470, + 0x0009c870, 0x0009cc70, 0x0009d070, 0x0009d470, + 0x0009d870, 0x0009dc70, 0x0009e070, 0x0009e470, + 0x0009e870, 0x0009ec70, 0x0009f070, 0x0009f470, + 0x0009f870, 0x0009fc70, 0x000a0070, 0x000a0470, + 0x000a0870, 0x000a0c70, 0x000a1070, 0x000a1470, + 0x000a1870, 0x000a1c70, 0x000a2070, 0x000a2470, + 0x000a2870, 0x000a2c70, 0x000a3070, 0x000a3470, + 0x000a3870, 0x000a3c70, 0x000a4070, 0x000a4470, + 0x000a4870, 0x000a4c70, 0x000a5070, 0x000a5470, + 0x000a5870, 0x000a5c70, 0x000a6070, 0x000a6470, + 0x000a6870, 0x000a6c70, 0x000a7070, 0x000a7470, + 0x000a7870, 0x000a7c70, 0x000a8070, 0x000a8470, + 0x000a8870, 0x000a8c70, 0x000a9070, 0x000a9470, + 0x000a9870, 0x000a9c70, 0x000aa070, 0x000aa470, + 0x000aa870, 0x000aac70, 0x000ab070, 0x000ab470, + 0x000ab870, 0x000abc70, 0x000ac070, 0x000ac470, + 0x000ac870, 0x000acc70, 0x000ad070, 0x000ad470, + 0x000ad870, 0x000adc70, 0x000ae070, 0x000ae470, + 0x000ae870, 0x000aec70, 0x000af070, 0x000af470, + 0x000af870, 0x000afc70, 0x000b0070, 0x000b0470, + 0x000b0870, 0x000b0c70, 0x000b1070, 0x000b1470, + 0x000b1870, 0x000b1c70, 0x000b2070, 0x000b2470, + 0x000b2870, 0x000b2c70, 0x000b3070, 0x000b3470, + 0x000b3870, 0x000b3c70, 0x000b4070, 0x000b4470, + 0x000b4870, 0x000b4c70, 0x000b5070, 0x000b5470, + 0x000b5870, 0x000b5c70, 0x000b6070, 0x000b6470, + 0x000b6870, 0x000b6c70, 0x000b7070, 0x000b7470, + 0x000b7870, 0x000b7c70, 0x000b8070, 0x000b8470, + 0x000b8870, 0x000b8c70, 0x000b9070, 0x000b9470, + 0x000b9870, 0x000b9c70, 0x000ba070, 0x000ba470, + 0x000ba870, 0x000bac70, 0x000bb070, 0x000bb470, + 0x000bb870, 0x000bbc70, 0x000bc070, 0x000bc470, + 0x000bc870, 0x000bcc70, 0x000bd070, 0x000bd470, + 0x000bd870, 0x000bdc70, 0x000be070, 0x000be470, + 0x000be870, 0x000bec70, 0x000bf070, 0x000bf470, + 0x000bf870, 0x000bfc70, 0x000c0070, 0x000c0470, + 0x000c0870, 0x000c0c70, 0x000c1070, 0x000c1470, + 0x000c1870, 0x000c1c70, 0x000c2070, 0x000c2470, + 0x000c2870, 0x000c2c70, 0x000c3070, 0x000c3470, + 0x000c3870, 0x000c3c70, 0x000c4070, 0x000c4470, + 0x000c4870, 0x000c4c70, 0x000c5070, 0x000c5470, + 0x000c5870, 0x000c5c70, 0x000c6070, 0x000c6470, + 0x000c6870, 0x000c6c70, 0x000c7070, 0x000c7470, + 0x000c7870, 0x000c7c70, 0x000c8070, 0x000c8470, + 0x000c8870, 0x000c8c70, 0x000c9070, 0x000c9470, + 0x000c9870, 0x000c9c70, 0x000ca070, 0x000ca470, + 0x000ca870, 0x000cac70, 0x000cb070, 0x000cb470, + 0x000cb870, 0x000cbc70, 0x000cc070, 0x000cc470, + 0x000cc870, 0x000ccc70, 0x000cd070, 0x000cd470, + 0x000cd870, 0x000cdc70, 0x000ce070, 0x000ce470, + 0x000ce870, 0x000cec70, 0x000cf070, 0x000cf470, + 0x000cf870, 0x000cfc70, 0x000d0070, 0x000d0470, + 0x000d0870, 0x000d0c70, 0x000d1070, 0x000d1470, + 0x000d1870, 0x000d1c70, 0x000d2070, 0x000d2470, + 0x000d2870, 0x000d2c70, 0x000d3070, 0x000d3470, + 0x000d3870, 0x000d3c70, 0x000d4070, 0x000d4470, + 0x000d4870, 0x000d4c70, 0x000d5070, 0x000d5470, + 0x000d5870, 0x000d5c70, 0x000d6070, 0x000d6470, + 0x000d6870, 0x000d6c70, 0x000d7070, 0x000d7470, + 0x000d7870, 0x000d7c70, 0x000d8070, 0x000d8470, + 0x000d8870, 0x000d8c70, 0x000d9070, 0x000d9470, + 0x000d9870, 0x000d9c70, 0x000da070, 0x000da470, + 0x000da870, 0x000dac70, 0x000db070, 0x000db470, + 0x000db870, 0x000dbc70, 0x000dc070, 0x000dc470, + 0x000dc870, 0x000dcc70, 0x000dd070, 0x000dd470, + 0x000dd870, 0x000ddc70, 0x000de070, 0x000de470, + 0x000de870, 0x000dec70, 0x000df070, 0x000df470, + 0x000df870, 0x000dfc70, 0x000e0070, 0x000e0470, + 0x000e0870, 0x000e0c70, 0x000e1070, 0x000e1470, + 0x000e1870, 0x000e1c70, 0x000e2070, 0x000e2470, + 0x000e2870, 0x000e2c70, 0x000e3070, 0x000e3470, + 0x000e3870, 0x000e3c70, 0x000e4070, 0x000e4470, + 0x000e4870, 0x000e4c70, 0x000e5070, 0x000e5470, + 0x000e5870, 0x000e5c70, 0x000e6070, 0x000e6470, + 0x000e6870, 0x000e6c70, 0x000e7070, 0x000e7470, + 0x000e7870, 0x000e7c70, 0x000e8070, 0x000e8470, + 0x000e8870, 0x000e8c70, 0x000e9070, 0x000e9470, + 0x000e9870, 0x000e9c70, 0x000ea070, 0x000ea470, + 0x000ea870, 0x000eac70, 0x000eb070, 0x000eb470, + 0x000eb870, 0x000ebc70, 0x000ec070, 0x000ec470, + 0x000ec870, 0x000ecc70, 0x000ed070, 0x000ed470, + 0x000ed870, 0x000edc70, 0x000ee070, 0x000ee470, + 0x000ee870, 0x000eec70, 0x000ef070, 0x000ef470, + 0x000ef870, 0x000efc70, 0x000f0070, 0x000f0470, + 0x000f0870, 0x000f0c70, 0x000f1070, 0x000f1470, + 0x000f1870, 0x000f1c70, 0x000f2070, 0x000f2470, + 0x000f2870, 0x000f2c70, 0x000f3070, 0x000f3470, + 0x000f3870, 0x000f3c70, 0x000f4070, 0x000f4470, + 0x000f4870, 0x000f4c70, 0x000f5070, 0x000f5470, + 0x000f5870, 0x000f5c70, 0x000f6070, 0x000f6470, + 0x000f6870, 0x000f6c70, 0x000f7070, 0x000f7470, + 0x000f7870, 0x000f7c70, 0x000f8070, 0x000f8470, + 0x000f8870, 0x000f8c70, 0x000f9070, 0x000f9470, + 0x000f9870, 0x000f9c70, 0x000fa070, 0x000fa470, + 0x000fa870, 0x000fac70, 0x000fb070, 0x000fb470, + 0x000fb870, 0x000fbc70, 0x000fc070, 0x000fc470, + 0x000fc870, 0x000fcc70, 0x000fd070, 0x000fd470, + 0x000fd870, 0x000fdc70, 0x000fe070, 0x000fe470, + 0x000fe870, 0x000fec70, 0x000ff070, 0x000ff470, + 0x000ff870, 0x000ffc70, 0x00100070, 0x00100470, + 0x00100870, 0x00100c70, 0x00101070, 0x00101470, + 0x00101870, 0x00101c70, 0x00102070, 0x00102470, + 0x00102870, 0x00102c70, 0x00103070, 0x00103470, + 0x00103870, 0x00103c70, 0x00104070, 0x00104470, + 0x00104870, 0x00104c70, 0x00105070, 0x00105470, + 0x00105870, 0x00105c70, 0x00106070, 0x00106470, + 0x00106870, 0x00106c70, 0x00107070, 0x00107470, + 0x00107870, 0x00107c70, 0x00108070, 0x00108470, + 0x00108870, 0x00108c70, 0x00109070, 0x00109470, + 0x00109870, 0x00109c70, 0x0010a070, 0x0010a470, + 0x0010a870, 0x0010ac70, 0x0010b070, 0x0010b470, + 0x0010b870, 0x0010bc70, 0x0010c070, 0x0010c470, + 0x0010c870, 0x0010cc70, 0x0010d070, 0x0010d470, + 0x0010d870, 0x0010dc70, 0x0010e070, 0x0010e470, + 0x0010e870, 0x0010ec70, 0x0010f070, 0x0010f470, + 0x0010f870, 0x0010fc70, 0x00110070, 0x00110470, + 0x00110870, 0x00110c70, 0x00111070, 0x00111470, + 0x00111870, 0x00111c70, 0x00112070, 0x00112470, + 0x00112870, 0x00112c70, 0x00113070, 0x00113470, + 0x00113870, 0x00113c70, 0x00114070, 0x00114470, + 0x00114870, 0x00114c70, 0x00115070, 0x00115470, + 0x00115870, 0x00115c70, 0x00116070, 0x00116470, + 0x00116870, 0x00116c70, 0x00117070, 0x00117470, + 0x00117870, 0x00117c70, 0x00118070, 0x00118470, + 0x00118870, 0x00118c70, 0x00119070, 0x00119470, + 0x00119870, 0x00119c70, 0x0011a070, 0x0011a470, + 0x0011a870, 0x0011ac70, 0x0011b070, 0x0011b470, + 0x0011b870, 0x0011bc70, 0x0011c070, 0x0011c470, + 0x0011c870, 0x0011cc70, 0x0011d070, 0x0011d470, + 0x0011d870, 0x0011dc70, 0x0011e070, 0x0011e470, + 0x0011e870, 0x0011ec70, 0x0011f070, 0x0011f470, + 0x0011f870, 0x0011fc70, 0x00120070, 0x00120470, + 0x00120870, 0x00120c70, 0x00121070, 0x00121470, + 0x00121870, 0x00121c70, 0x00122070, 0x00122470, + 0x00122870, 0x00122c70, 0x00123070, 0x00123470, + 0x00123870, 0x00123c70, 0x00124070, 0x00124470, + 0x00124870, 0x00124c70, 0x00125070, 0x00125470, + 0x00125870, 0x00125c70, 0x00126070, 0x00126470, + 0x00126870, 0x00126c70, 0x00127070, 0x00127470, + 0x00127870, 0x00127c70, 0x00128070, 0x00128470, + 0x00128870, 0x00128c70, 0x00129070, 0x00129470, + 0x00129870, 0x00129c70, 0x0012a070, 0x0012a470, + 0x0012a870, 0x0012ac70, 0x0012b070, 0x0012b470, + 0x0012b870, 0x0012bc70, 0x0012c070, 0x0012c470, + 0x0012c870, 0x0012cc70, 0x0012d070, 0x0012d470, + 0x0012d870, 0x0012dc70, 0x0012e070, 0x0012e470, + 0x0012e870, 0x0012ec70, 0x0012f070, 0x0012f470, + 0x0012f870, 0x0012fc70, 0x00130070, 0x00130470, + 0x00130870, 0x00130c70, 0x00131070, 0x00131470, + 0x00131870, 0x00131c70, 0x00132070, 0x00132470, + 0x00132870, 0x00132c70, 0x00133070, 0x00133470, + 0x00133870, 0x00133c70, 0x00134070, 0x00134470, + 0x00134870, 0x00134c70, 0x00135070, 0x00135470, + 0x00135870, 0x00135c70, 0x00136070, 0x00136470, + 0x00136870, 0x00136c70, 0x00137070, 0x00137470, + 0x00137870, 0x00137c70, 0x00138070, 0x00138470, + 0x00138870, 0x00138c70, 0x00139070, 0x00139470, + 0x00139870, 0x00139c70, 0x0013a070, 0x0013a470, + 0x0013a870, 0x0013ac70, 0x0013b070, 0x0013b470, + 0x0013b870, 0x0013bc70, 0x0013c070, 0x0013c470, + 0x0013c870, 0x0013cc70, 0x0013d070, 0x0013d470, + 0x0013d870, 0x0013dc70, 0x0013e070, 0x0013e470, + 0x0013e870, 0x0013ec70, 0x0013f070, 0x0013f470, + 0x0013f870, 0x0013fc70, 0x00140070, 0x00140470, + 0x00140870, 0x00140c70, 0x00141070, 0x00141470, + 0x00141870, 0x00141c70, 0x00142070, 0x00142470, + 0x00142870, 0x00142c70, 0x00143070, 0x00143470, + 0x00143870, 0x00143c70, 0x00144070, 0x00144470, + 0x00144870, 0x00144c70, 0x00145070, 0x00145470, + 0x00145870, 0x00145c70, 0x00146070, 0x00146470, + 0x00146870, 0x00146c70, 0x00147070, 0x00147470, + 0x00147870, 0x00147c70, 0x00148070, 0x00148470, + 0x00148870, 0x00148c70, 0x00149070, 0x00149470, + 0x00149870, 0x00149c70, 0x0014a070, 0x0014a470, + 0x0014a870, 0x0014ac70, 0x0014b070, 0x0014b470, + 0x0014b870, 0x0014bc70, 0x0014c070, 0x0014c470, + 0x0014c870, 0x0014cc70, 0x0014d070, 0x0014d470, + 0x0014d870, 0x0014dc70, 0x0014e070, 0x0014e470, + 0x0014e870, 0x0014ec70, 0x0014f070, 0x0014f470, + 0x0014f870, 0x0014fc70, 0x00150070, 0x00150470, + 0x00150870, 0x00150c70, 0x00151070, 0x00151470, + 0x00151870, 0x00151c70, 0x00152070, 0x00152470, + 0x00152870, 0x00152c70, 0x00153070, 0x00153470, + 0x00153870, 0x00153c70, 0x00154070, 0x00154470, + 0x00154870, 0x00154c70, 0x00155070, 0x00155470, + 0x00155870, 0x00155c70, 0x00156070, 0x00156470, + 0x00156870, 0x00156c70, 0x00157070, 0x00157470, + 0x00157870, 0x00157c70, 0x00158070, 0x00158470, + 0x00158870, 0x00158c70, 0x00159070, 0x00159470, + 0x00159870, 0x00159c70, 0x0015a070, 0x0015a470, + 0x0015a870, 0x0015ac70, 0x0015b070, 0x0015b470, + 0x0015b870, 0x0015bc70, 0x0015c070, 0x0015c470, + 0x0015c870, 0x0015cc70, 0x0015d070, 0x0015d470, + 0x0015d870, 0x0015dc70, 0x0015e070, 0x0015e470, + 0x0015e870, 0x0015ec70, 0x0015f070, 0x0015f470, + 0x0015f870, 0x0015fc70, 0x00160070, 0x00160470, + 0x00160870, 0x00160c70, 0x00161070, 0x00161470, + 0x00161870, 0x00161c70, 0x00162070, 0x00162470, + 0x00162870, 0x00162c70, 0x00163070, 0x00163470, + 0x00163870, 0x00163c70, 0x00164070, 0x00164470, + 0x00164870, 0x00164c70, 0x00165070, 0x00165470, + 0x00165870, 0x00165c70, 0x00166070, 0x00166470, + 0x00166870, 0x00166c70, 0x00167070, 0x00167470, + 0x00167870, 0x00167c70, 0x00168070, 0x00168470, + 0x00168870, 0x00168c70, 0x00169070, 0x00169470, + 0x00169870, 0x00169c70, 0x0016a070, 0x0016a470, + 0x0016a870, 0x0016ac70, 0x0016b070, 0x0016b470, + 0x0016b870, 0x0016bc70, 0x0016c070, 0x0016c470, + 0x0016c870, 0x0016cc70, 0x0016d070, 0x0016d470, + 0x0016d870, 0x0016dc70, 0x0016e070, 0x0016e470, + 0x0016e870, 0x0016ec70, 0x0016f070, 0x0016f470, + 0x0016f870, 0x0016fc70, 0x00170070, 0x00170470, + 0x00170870, 0x00170c70, 0x00171070, 0x00171470, + 0x00171870, 0x00171c70, 0x00172070, 0x00172470, + 0x00172870, 0x00172c70, 0x00173070, 0x00173470, + 0x00173870, 0x00173c70, 0x00174070, 0x00174470, + 0x00174870, 0x00174c70, 0x00175070, 0x00175470, + 0x00175870, 0x00175c70, 0x00176070, 0x00176470, + 0x00176870, 0x00176c70, 0x00177070, 0x00177470, + 0x00177870, 0x00177c70, 0x00178070, 0x00178470, + 0x00178870, 0x00178c70, 0x00179070, 0x00179470, + 0x00179870, 0x00179c70, 0x0017a070, 0x0017a470, + 0x0017a870, 0x0017ac70, 0x0017b070, 0x0017b470, + 0x0017b870, 0x0017bc70, 0x0017c070, 0x0017c470, + 0x0017c870, 0x0017cc70, 0x0017d070, 0x0017d470, + 0x0017d870, 0x0017dc70, 0x0017e070, 0x0017e470, + 0x0017e870, 0x0017ec70, 0x0017f070, 0x0017f470, + 0x0017f870, 0x0017fc70, 0x00180070, 0x00180470, + 0x00180870, 0x00180c70, 0x00181070, 0x00181470, + 0x00181870, 0x00181c70, 0x00182070, 0x00182470, + 0x00182870, 0x00182c70, 0x00183070, 0x00183470, + 0x00183870, 0x00183c70, 0x00184070, 0x00184470, + 0x00184870, 0x00184c70, 0x00185070, 0x00185470, + 0x00185870, 0x00185c70, 0x00186070, 0x00186470, + 0x00186870, 0x00186c70, 0x00187070, 0x00187470, + 0x00187870, 0x00187c70, 0x00188070, 0x00188470, + 0x00188870, 0x00188c70, 0x00189070, 0x00189470, + 0x00189870, 0x00189c70, 0x0018a070, 0x0018a470, + 0x0018a870, 0x0018ac70, 0x0018b070, 0x0018b470, + 0x0018b870, 0x0018bc70, 0x0018c070, 0x0018c470, + 0x0018c870, 0x0018cc70, 0x0018d070, 0x0018d470, + 0x0018d870, 0x0018dc70, 0x0018e070, 0x0018e470, + 0x0018e870, 0x0018ec70, 0x0018f070, 0x0018f470, + 0x0018f870, 0x0018fc70, 0x00190070, 0x00190470, + 0x00190870, 0x00190c70, 0x00191070, 0x00191470, + 0x00191870, 0x00191c70, 0x00192070, 0x00192470, + 0x00192870, 0x00192c70, 0x00193070, 0x00193470, + 0x00193870, 0x00193c70, 0x00194070, 0x00194470, + 0x00194870, 0x00194c70, 0x00195070, 0x00195470, + 0x00195870, 0x00195c70, 0x00196070, 0x00196470, + 0x00196870, 0x00196c70, 0x00197070, 0x00197470, + 0x00197870, 0x00197c70, 0x00198070, 0x00198470, + 0x00198870, 0x00198c70, 0x00199070, 0x00199470, + 0x00199870, 0x00199c70, 0x0019a070, 0x0019a470, + 0x0019a870, 0x0019ac70, 0x0019b070, 0x0019b470, + 0x0019b870, 0x0019bc70, 0x0019c070, 0x0019c470, + 0x0019c870, 0x0019cc70, 0x0019d070, 0x0019d470, + 0x0019d870, 0x0019dc70, 0x0019e070, 0x0019e470, + 0x0019e870, 0x0019ec70, 0x0019f070, 0x0019f470, + 0x0019f870, 0x0019fc70, 0x001a0070, 0x001a0470, + 0x001a0870, 0x001a0c70, 0x001a1070, 0x001a1470, + 0x001a1870, 0x001a1c70, 0x001a2070, 0x001a2470, + 0x001a2870, 0x001a2c70, 0x001a3070, 0x001a3470, + 0x001a3870, 0x001a3c70, 0x001a4070, 0x001a4470, + 0x001a4870, 0x001a4c70, 0x001a5070, 0x001a5470, + 0x001a5870, 0x001a5c70, 0x001a6070, 0x001a6470, + 0x001a6870, 0x001a6c70, 0x001a7070, 0x001a7470, + 0x001a7870, 0x001a7c70, 0x001a8070, 0x001a8470, + 0x001a8870, 0x001a8c70, 0x001a9070, 0x001a9470, + 0x001a9870, 0x001a9c70, 0x001aa070, 0x001aa470, + 0x001aa870, 0x001aac70, 0x001ab070, 0x001ab470, + 0x001ab870, 0x001abc70, 0x001ac070, 0x001ac470, + 0x001ac870, 0x001acc70, 0x001ad070, 0x001ad470, + 0x001ad870, 0x001adc70, 0x001ae070, 0x001ae470, + 0x001ae870, 0x001aec70, 0x001af070, 0x001af470, + 0x001af870, 0x001afc70, 0x001b0070, 0x001b0470, + 0x001b0870, 0x001b0c70, 0x001b1070, 0x001b1470, + 0x001b1870, 0x001b1c70, 0x001b2070, 0x001b2470, + 0x001b2870, 0x001b2c70, 0x001b3070, 0x001b3470, + 0x001b3870, 0x001b3c70, 0x001b4070, 0x001b4470, + 0x001b4870, 0x001b4c70, 0x001b5070, 0x001b5470, + 0x001b5870, 0x001b5c70, 0x001b6070, 0x001b6470, + 0x001b6870, 0x001b6c70, 0x001b7070, 0x001b7470, + 0x001b7870, 0x001b7c70, 0x001b8070, 0x001b8470, + 0x001b8870, 0x001b8c70, 0x001b9070, 0x001b9470, + 0x001b9870, 0x001b9c70, 0x001ba070, 0x001ba470, + 0x001ba870, 0x001bac70, 0x001bb070, 0x001bb470, + 0x001bb870, 0x001bbc70, 0x001bc070, 0x001bc470, + 0x001bc870, 0x001bcc70, 0x001bd070, 0x001bd470, + 0x001bd870, 0x001bdc70, 0x001be070, 0x001be470, + 0x001be870, 0x001bec70, 0x001bf070, 0x001bf470, + 0x001bf870, 0x001bfc70, 0x001c0070, 0x001c0470, + 0x001c0870, 0x001c0c70, 0x001c1070, 0x001c1470, + 0x001c1870, 0x001c1c70, 0x001c2070, 0x001c2470, + 0x001c2870, 0x001c2c70, 0x001c3070, 0x001c3470, + 0x001c3870, 0x001c3c70, 0x001c4070, 0x001c4470, + 0x001c4870, 0x001c4c70, 0x001c5070, 0x001c5470, + 0x001c5870, 0x001c5c70, 0x001c6070, 0x001c6470, + 0x001c6870, 0x001c6c70, 0x001c7070, 0x001c7470, + 0x001c7870, 0x001c7c70, 0x001c8070, 0x001c8470, + 0x001c8870, 0x001c8c70, 0x001c9070, 0x001c9470, + 0x001c9870, 0x001c9c70, 0x001ca070, 0x001ca470, + 0x001ca870, 0x001cac70, 0x001cb070, 0x001cb470, + 0x001cb870, 0x001cbc70, 0x001cc070, 0x001cc470, + 0x001cc870, 0x001ccc70, 0x001cd070, 0x001cd470, + 0x001cd870, 0x001cdc70, 0x001ce070, 0x001ce470, + 0x001ce870, 0x001cec70, 0x001cf070, 0x001cf470, + 0x001cf870, 0x001cfc70, 0x001d0070, 0x001d0470, + 0x001d0870, 0x001d0c70, 0x001d1070, 0x001d1470, + 0x001d1870, 0x001d1c70, 0x001d2070, 0x001d2470, + 0x001d2870, 0x001d2c70, 0x001d3070, 0x001d3470, + 0x001d3870, 0x001d3c70, 0x001d4070, 0x001d4470, + 0x001d4870, 0x001d4c70, 0x001d5070, 0x001d5470, + 0x001d5870, 0x001d5c70, 0x001d6070, 0x001d6470, + 0x001d6870, 0x001d6c70, 0x001d7070, 0x001d7470, + 0x001d7870, 0x001d7c70, 0x001d8070, 0x001d8470, + 0x001d8870, 0x001d8c70, 0x001d9070, 0x001d9470, + 0x001d9870, 0x001d9c70, 0x001da070, 0x001da470, + 0x001da870, 0x001dac70, 0x001db070, 0x001db470, + 0x001db870, 0x001dbc70, 0x001dc070, 0x001dc470, + 0x001dc870, 0x001dcc70, 0x001dd070, 0x001dd470, + 0x001dd870, 0x001ddc70, 0x001de070, 0x001de470, + 0x001de870, 0x001dec70, 0x001df070, 0x001df470, + 0x001df870, 0x001dfc70, 0x001e0070, 0x001e0470, + 0x001e0870, 0x001e0c70, 0x001e1070, 0x001e1470, + 0x001e1870, 0x001e1c70, 0x001e2070, 0x001e2470, + 0x001e2870, 0x001e2c70, 0x001e3070, 0x001e3470, + 0x001e3870, 0x001e3c70, 0x001e4070, 0x001e4470, + 0x001e4870, 0x001e4c70, 0x001e5070, 0x001e5470, + 0x001e5870, 0x001e5c70, 0x001e6070, 0x001e6470, + 0x001e6870, 0x001e6c70, 0x001e7070, 0x001e7470, + 0x001e7870, 0x001e7c70, 0x001e8070, 0x001e8470, + 0x001e8870, 0x001e8c70, 0x001e9070, 0x001e9470, + 0x001e9870, 0x001e9c70, 0x001ea070, 0x001ea470, + 0x001ea870, 0x001eac70, 0x001eb070, 0x001eb470, + 0x001eb870, 0x001ebc70, 0x001ec070, 0x001ec470, + 0x001ec870, 0x001ecc70, 0x001ed070, 0x001ed470, + 0x001ed870, 0x001edc70, 0x001ee070, 0x001ee470, + 0x001ee870, 0x001eec70, 0x001ef070, 0x001ef470, + 0x001ef870, 0x001efc70, 0x001f0070, 0x001f0470, + 0x001f0870, 0x001f0c70, 0x001f1070, 0x001f1470, + 0x001f1870, 0x001f1c70, 0x001f2070, 0x001f2470, + 0x001f2870, 0x001f2c70, 0x001f3070, 0x001f3470, + 0x001f3870, 0x001f3c70, 0x001f4070, 0x001f4470, + 0x001f4870, 0x001f4c70, 0x001f5070, 0x001f5470, + 0x001f5870, 0x001f5c70, 0x001f6070, 0x001f6470, + 0x001f6870, 0x001f6c70, 0x001f7070, 0x001f7470, + 0x001f7870, 0x001f7c70, 0x001f8070, 0x001f8470, + 0x001f8870, 0x001f8c70, 0x001f9070, 0x001f9470, + 0x001f9870, 0x001f9c70, 0x001fa070, 0x001fa470, + 0x001fa870, 0x001fac70, 0x001fb070, 0x001fb470, + 0x001fb870, 0x001fbc70, 0x001fc070, 0x001fc470, + 0x001fc870, 0x001fcc70, 0x001fd070, 0x001fd470, + 0x001fd870, 0x001fdc70, 0x001fe070, 0x001fe470, + 0x001fe870, 0x001fec70, 0x001ff070, 0x001ff470, + 0x001ff870, 0x001ffc70, 0x00000270, 0x00000670, + 0x00000a70, 0x00000e70, 0x00001270, 0x00001670, + 0x00001a70, 0x00001e70, 0x00002270, 0x00002670, + 0x00002a70, 0x00002e70, 0x00003270, 0x00003670, + 0x00003a70, 0x00003e70, 0x00004270, 0x00004670, + 0x00004a70, 0x00004e70, 0x00005270, 0x00005670, + 0x00005a70, 0x00005e70, 0x00006270, 0x00006670, + 0x00006a70, 0x00006e70, 0x00007270, 0x00007670, + 0x00007a70, 0x00007e70, 0x00008270, 0x00008670, + 0x00008a70, 0x00008e70, 0x00009270, 0x00009670, + 0x00009a70, 0x00009e70, 0x0000a270, 0x0000a670, + 0x0000aa70, 0x0000ae70, 0x0000b270, 0x0000b670, + 0x0000ba70, 0x0000be70, 0x0000c270, 0x0000c670, + 0x0000ca70, 0x0000ce70, 0x0000d270, 0x0000d670, + 0x0000da70, 0x0000de70, 0x0000e270, 0x0000e670, + 0x0000ea70, 0x0000ee70, 0x0000f270, 0x0000f670, + 0x0000fa70, 0x0000fe70, 0x00010270, 0x00010670, + 0x00010a70, 0x00010e70, 0x00011270, 0x00011670, + 0x00011a70, 0x00011e70, 0x00012270, 0x00012670, + 0x00012a70, 0x00012e70, 0x00013270, 0x00013670, + 0x00013a70, 0x00013e70, 0x00014270, 0x00014670, + 0x00014a70, 0x00014e70, 0x00015270, 0x00015670, + 0x00015a70, 0x00015e70, 0x00016270, 0x00016670, + 0x00016a70, 0x00016e70, 0x00017270, 0x00017670, + 0x00017a70, 0x00017e70, 0x00018270, 0x00018670, + 0x00018a70, 0x00018e70, 0x00019270, 0x00019670, + 0x00019a70, 0x00019e70, 0x0001a270, 0x0001a670, + 0x0001aa70, 0x0001ae70, 0x0001b270, 0x0001b670, + 0x0001ba70, 0x0001be70, 0x0001c270, 0x0001c670, + 0x0001ca70, 0x0001ce70, 0x0001d270, 0x0001d670, + 0x0001da70, 0x0001de70, 0x0001e270, 0x0001e670, + 0x0001ea70, 0x0001ee70, 0x0001f270, 0x0001f670, + 0x0001fa70, 0x0001fe70, 0x00020270, 0x00020670, + 0x00020a70, 0x00020e70, 0x00021270, 0x00021670, + 0x00021a70, 0x00021e70, 0x00022270, 0x00022670, + 0x00022a70, 0x00022e70, 0x00023270, 0x00023670, + 0x00023a70, 0x00023e70, 0x00024270, 0x00024670, + 0x00024a70, 0x00024e70, 0x00025270, 0x00025670, + 0x00025a70, 0x00025e70, 0x00026270, 0x00026670, + 0x00026a70, 0x00026e70, 0x00027270, 0x00027670, + 0x00027a70, 0x00027e70, 0x00028270, 0x00028670, + 0x00028a70, 0x00028e70, 0x00029270, 0x00029670, + 0x00029a70, 0x00029e70, 0x0002a270, 0x0002a670, + 0x0002aa70, 0x0002ae70, 0x0002b270, 0x0002b670, + 0x0002ba70, 0x0002be70, 0x0002c270, 0x0002c670, + 0x0002ca70, 0x0002ce70, 0x0002d270, 0x0002d670, + 0x0002da70, 0x0002de70, 0x0002e270, 0x0002e670, + 0x0002ea70, 0x0002ee70, 0x0002f270, 0x0002f670, + 0x0002fa70, 0x0002fe70, 0x00030270, 0x00030670, + 0x00030a70, 0x00030e70, 0x00031270, 0x00031670, + 0x00031a70, 0x00031e70, 0x00032270, 0x00032670, + 0x00032a70, 0x00032e70, 0x00033270, 0x00033670, + 0x00033a70, 0x00033e70, 0x00034270, 0x00034670, + 0x00034a70, 0x00034e70, 0x00035270, 0x00035670, + 0x00035a70, 0x00035e70, 0x00036270, 0x00036670, + 0x00036a70, 0x00036e70, 0x00037270, 0x00037670, + 0x00037a70, 0x00037e70, 0x00038270, 0x00038670, + 0x00038a70, 0x00038e70, 0x00039270, 0x00039670, + 0x00039a70, 0x00039e70, 0x0003a270, 0x0003a670, + 0x0003aa70, 0x0003ae70, 0x0003b270, 0x0003b670, + 0x0003ba70, 0x0003be70, 0x0003c270, 0x0003c670, + 0x0003ca70, 0x0003ce70, 0x0003d270, 0x0003d670, + 0x0003da70, 0x0003de70, 0x0003e270, 0x0003e670, + 0x0003ea70, 0x0003ee70, 0x0003f270, 0x0003f670, + 0x0003fa70, 0x0003fe70, 0x00040270, 0x00040670, + 0x00040a70, 0x00040e70, 0x00041270, 0x00041670, + 0x00041a70, 0x00041e70, 0x00042270, 0x00042670, + 0x00042a70, 0x00042e70, 0x00043270, 0x00043670, + 0x00043a70, 0x00043e70, 0x00044270, 0x00044670, + 0x00044a70, 0x00044e70, 0x00045270, 0x00045670, + 0x00045a70, 0x00045e70, 0x00046270, 0x00046670, + 0x00046a70, 0x00046e70, 0x00047270, 0x00047670, + 0x00047a70, 0x00047e70, 0x00048270, 0x00048670, + 0x00048a70, 0x00048e70, 0x00049270, 0x00049670, + 0x00049a70, 0x00049e70, 0x0004a270, 0x0004a670, + 0x0004aa70, 0x0004ae70, 0x0004b270, 0x0004b670, + 0x0004ba70, 0x0004be70, 0x0004c270, 0x0004c670, + 0x0004ca70, 0x0004ce70, 0x0004d270, 0x0004d670, + 0x0004da70, 0x0004de70, 0x0004e270, 0x0004e670, + 0x0004ea70, 0x0004ee70, 0x0004f270, 0x0004f670, + 0x0004fa70, 0x0004fe70, 0x00050270, 0x00050670, + 0x00050a70, 0x00050e70, 0x00051270, 0x00051670, + 0x00051a70, 0x00051e70, 0x00052270, 0x00052670, + 0x00052a70, 0x00052e70, 0x00053270, 0x00053670, + 0x00053a70, 0x00053e70, 0x00054270, 0x00054670, + 0x00054a70, 0x00054e70, 0x00055270, 0x00055670, + 0x00055a70, 0x00055e70, 0x00056270, 0x00056670, + 0x00056a70, 0x00056e70, 0x00057270, 0x00057670, + 0x00057a70, 0x00057e70, 0x00058270, 0x00058670, + 0x00058a70, 0x00058e70, 0x00059270, 0x00059670, + 0x00059a70, 0x00059e70, 0x0005a270, 0x0005a670, + 0x0005aa70, 0x0005ae70, 0x0005b270, 0x0005b670, + 0x0005ba70, 0x0005be70, 0x0005c270, 0x0005c670, + 0x0005ca70, 0x0005ce70, 0x0005d270, 0x0005d670, + 0x0005da70, 0x0005de70, 0x0005e270, 0x0005e670, + 0x0005ea70, 0x0005ee70, 0x0005f270, 0x0005f670, + 0x0005fa70, 0x0005fe70, 0x00060270, 0x00060670, + 0x00060a70, 0x00060e70, 0x00061270, 0x00061670, + 0x00061a70, 0x00061e70, 0x00062270, 0x00062670, + 0x00062a70, 0x00062e70, 0x00063270, 0x00063670, + 0x00063a70, 0x00063e70, 0x00064270, 0x00064670, + 0x00064a70, 0x00064e70, 0x00065270, 0x00065670, + 0x00065a70, 0x00065e70, 0x00066270, 0x00066670, + 0x00066a70, 0x00066e70, 0x00067270, 0x00067670, + 0x00067a70, 0x00067e70, 0x00068270, 0x00068670, + 0x00068a70, 0x00068e70, 0x00069270, 0x00069670, + 0x00069a70, 0x00069e70, 0x0006a270, 0x0006a670, + 0x0006aa70, 0x0006ae70, 0x0006b270, 0x0006b670, + 0x0006ba70, 0x0006be70, 0x0006c270, 0x0006c670, + 0x0006ca70, 0x0006ce70, 0x0006d270, 0x0006d670, + 0x0006da70, 0x0006de70, 0x0006e270, 0x0006e670, + 0x0006ea70, 0x0006ee70, 0x0006f270, 0x0006f670, + 0x0006fa70, 0x0006fe70, 0x00070270, 0x00070670, + 0x00070a70, 0x00070e70, 0x00071270, 0x00071670, + 0x00071a70, 0x00071e70, 0x00072270, 0x00072670, + 0x00072a70, 0x00072e70, 0x00073270, 0x00073670, + 0x00073a70, 0x00073e70, 0x00074270, 0x00074670, + 0x00074a70, 0x00074e70, 0x00075270, 0x00075670, + 0x00075a70, 0x00075e70, 0x00076270, 0x00076670, + 0x00076a70, 0x00076e70, 0x00077270, 0x00077670, + 0x00077a70, 0x00077e70, 0x00078270, 0x00078670, + 0x00078a70, 0x00078e70, 0x00079270, 0x00079670, + 0x00079a70, 0x00079e70, 0x0007a270, 0x0007a670, + 0x0007aa70, 0x0007ae70, 0x0007b270, 0x0007b670, + 0x0007ba70, 0x0007be70, 0x0007c270, 0x0007c670, + 0x0007ca70, 0x0007ce70, 0x0007d270, 0x0007d670, + 0x0007da70, 0x0007de70, 0x0007e270, 0x0007e670, + 0x0007ea70, 0x0007ee70, 0x0007f270, 0x0007f670, + 0x0007fa70, 0x0007fe70, 0x00080270, 0x00080670, + 0x00080a70, 0x00080e70, 0x00081270, 0x00081670, + 0x00081a70, 0x00081e70, 0x00082270, 0x00082670, + 0x00082a70, 0x00082e70, 0x00083270, 0x00083670, + 0x00083a70, 0x00083e70, 0x00084270, 0x00084670, + 0x00084a70, 0x00084e70, 0x00085270, 0x00085670, + 0x00085a70, 0x00085e70, 0x00086270, 0x00086670, + 0x00086a70, 0x00086e70, 0x00087270, 0x00087670, + 0x00087a70, 0x00087e70, 0x00088270, 0x00088670, + 0x00088a70, 0x00088e70, 0x00089270, 0x00089670, + 0x00089a70, 0x00089e70, 0x0008a270, 0x0008a670, + 0x0008aa70, 0x0008ae70, 0x0008b270, 0x0008b670, + 0x0008ba70, 0x0008be70, 0x0008c270, 0x0008c670, + 0x0008ca70, 0x0008ce70, 0x0008d270, 0x0008d670, + 0x0008da70, 0x0008de70, 0x0008e270, 0x0008e670, + 0x0008ea70, 0x0008ee70, 0x0008f270, 0x0008f670, + 0x0008fa70, 0x0008fe70, 0x00090270, 0x00090670, + 0x00090a70, 0x00090e70, 0x00091270, 0x00091670, + 0x00091a70, 0x00091e70, 0x00092270, 0x00092670, + 0x00092a70, 0x00092e70, 0x00093270, 0x00093670, + 0x00093a70, 0x00093e70, 0x00094270, 0x00094670, + 0x00094a70, 0x00094e70, 0x00095270, 0x00095670, + 0x00095a70, 0x00095e70, 0x00096270, 0x00096670, + 0x00096a70, 0x00096e70, 0x00097270, 0x00097670, + 0x00097a70, 0x00097e70, 0x00098270, 0x00098670, + 0x00098a70, 0x00098e70, 0x00099270, 0x00099670, + 0x00099a70, 0x00099e70, 0x0009a270, 0x0009a670, + 0x0009aa70, 0x0009ae70, 0x0009b270, 0x0009b670, + 0x0009ba70, 0x0009be70, 0x0009c270, 0x0009c670, + 0x0009ca70, 0x0009ce70, 0x0009d270, 0x0009d670, + 0x0009da70, 0x0009de70, 0x0009e270, 0x0009e670, + 0x0009ea70, 0x0009ee70, 0x0009f270, 0x0009f670, + 0x0009fa70, 0x0009fe70, 0x000a0270, 0x000a0670, + 0x000a0a70, 0x000a0e70, 0x000a1270, 0x000a1670, + 0x000a1a70, 0x000a1e70, 0x000a2270, 0x000a2670, + 0x000a2a70, 0x000a2e70, 0x000a3270, 0x000a3670, + 0x000a3a70, 0x000a3e70, 0x000a4270, 0x000a4670, + 0x000a4a70, 0x000a4e70, 0x000a5270, 0x000a5670, + 0x000a5a70, 0x000a5e70, 0x000a6270, 0x000a6670, + 0x000a6a70, 0x000a6e70, 0x000a7270, 0x000a7670, + 0x000a7a70, 0x000a7e70, 0x000a8270, 0x000a8670, + 0x000a8a70, 0x000a8e70, 0x000a9270, 0x000a9670, + 0x000a9a70, 0x000a9e70, 0x000aa270, 0x000aa670, + 0x000aaa70, 0x000aae70, 0x000ab270, 0x000ab670, + 0x000aba70, 0x000abe70, 0x000ac270, 0x000ac670, + 0x000aca70, 0x000ace70, 0x000ad270, 0x000ad670, + 0x000ada70, 0x000ade70, 0x000ae270, 0x000ae670, + 0x000aea70, 0x000aee70, 0x000af270, 0x000af670, + 0x000afa70, 0x000afe70, 0x000b0270, 0x000b0670, + 0x000b0a70, 0x000b0e70, 0x000b1270, 0x000b1670, + 0x000b1a70, 0x000b1e70, 0x000b2270, 0x000b2670, + 0x000b2a70, 0x000b2e70, 0x000b3270, 0x000b3670, + 0x000b3a70, 0x000b3e70, 0x000b4270, 0x000b4670, + 0x000b4a70, 0x000b4e70, 0x000b5270, 0x000b5670, + 0x000b5a70, 0x000b5e70, 0x000b6270, 0x000b6670, + 0x000b6a70, 0x000b6e70, 0x000b7270, 0x000b7670, + 0x000b7a70, 0x000b7e70, 0x000b8270, 0x000b8670, + 0x000b8a70, 0x000b8e70, 0x000b9270, 0x000b9670, + 0x000b9a70, 0x000b9e70, 0x000ba270, 0x000ba670, + 0x000baa70, 0x000bae70, 0x000bb270, 0x000bb670, + 0x000bba70, 0x000bbe70, 0x000bc270, 0x000bc670, + 0x000bca70, 0x000bce70, 0x000bd270, 0x000bd670, + 0x000bda70, 0x000bde70, 0x000be270, 0x000be670, + 0x000bea70, 0x000bee70, 0x000bf270, 0x000bf670, + 0x000bfa70, 0x000bfe70, 0x000c0270, 0x000c0670, + 0x000c0a70, 0x000c0e70, 0x000c1270, 0x000c1670, + 0x000c1a70, 0x000c1e70, 0x000c2270, 0x000c2670, + 0x000c2a70, 0x000c2e70, 0x000c3270, 0x000c3670, + 0x000c3a70, 0x000c3e70, 0x000c4270, 0x000c4670, + 0x000c4a70, 0x000c4e70, 0x000c5270, 0x000c5670, + 0x000c5a70, 0x000c5e70, 0x000c6270, 0x000c6670, + 0x000c6a70, 0x000c6e70, 0x000c7270, 0x000c7670, + 0x000c7a70, 0x000c7e70, 0x000c8270, 0x000c8670, + 0x000c8a70, 0x000c8e70, 0x000c9270, 0x000c9670, + 0x000c9a70, 0x000c9e70, 0x000ca270, 0x000ca670, + 0x000caa70, 0x000cae70, 0x000cb270, 0x000cb670, + 0x000cba70, 0x000cbe70, 0x000cc270, 0x000cc670, + 0x000cca70, 0x000cce70, 0x000cd270, 0x000cd670, + 0x000cda70, 0x000cde70, 0x000ce270, 0x000ce670, + 0x000cea70, 0x000cee70, 0x000cf270, 0x000cf670, + 0x000cfa70, 0x000cfe70, 0x000d0270, 0x000d0670, + 0x000d0a70, 0x000d0e70, 0x000d1270, 0x000d1670, + 0x000d1a70, 0x000d1e70, 0x000d2270, 0x000d2670, + 0x000d2a70, 0x000d2e70, 0x000d3270, 0x000d3670, + 0x000d3a70, 0x000d3e70, 0x000d4270, 0x000d4670, + 0x000d4a70, 0x000d4e70, 0x000d5270, 0x000d5670, + 0x000d5a70, 0x000d5e70, 0x000d6270, 0x000d6670, + 0x000d6a70, 0x000d6e70, 0x000d7270, 0x000d7670, + 0x000d7a70, 0x000d7e70, 0x000d8270, 0x000d8670, + 0x000d8a70, 0x000d8e70, 0x000d9270, 0x000d9670, + 0x000d9a70, 0x000d9e70, 0x000da270, 0x000da670, + 0x000daa70, 0x000dae70, 0x000db270, 0x000db670, + 0x000dba70, 0x000dbe70, 0x000dc270, 0x000dc670, + 0x000dca70, 0x000dce70, 0x000dd270, 0x000dd670, + 0x000dda70, 0x000dde70, 0x000de270, 0x000de670, + 0x000dea70, 0x000dee70, 0x000df270, 0x000df670, + 0x000dfa70, 0x000dfe70, 0x000e0270, 0x000e0670, + 0x000e0a70, 0x000e0e70, 0x000e1270, 0x000e1670, + 0x000e1a70, 0x000e1e70, 0x000e2270, 0x000e2670, + 0x000e2a70, 0x000e2e70, 0x000e3270, 0x000e3670, + 0x000e3a70, 0x000e3e70, 0x000e4270, 0x000e4670, + 0x000e4a70, 0x000e4e70, 0x000e5270, 0x000e5670, + 0x000e5a70, 0x000e5e70, 0x000e6270, 0x000e6670, + 0x000e6a70, 0x000e6e70, 0x000e7270, 0x000e7670, + 0x000e7a70, 0x000e7e70, 0x000e8270, 0x000e8670, + 0x000e8a70, 0x000e8e70, 0x000e9270, 0x000e9670, + 0x000e9a70, 0x000e9e70, 0x000ea270, 0x000ea670, + 0x000eaa70, 0x000eae70, 0x000eb270, 0x000eb670, + 0x000eba70, 0x000ebe70, 0x000ec270, 0x000ec670, + 0x000eca70, 0x000ece70, 0x000ed270, 0x000ed670, + 0x000eda70, 0x000ede70, 0x000ee270, 0x000ee670, + 0x000eea70, 0x000eee70, 0x000ef270, 0x000ef670, + 0x000efa70, 0x000efe70, 0x000f0270, 0x000f0670, + 0x000f0a70, 0x000f0e70, 0x000f1270, 0x000f1670, + 0x000f1a70, 0x000f1e70, 0x000f2270, 0x000f2670, + 0x000f2a70, 0x000f2e70, 0x000f3270, 0x000f3670, + 0x000f3a70, 0x000f3e70, 0x000f4270, 0x000f4670, + 0x000f4a70, 0x000f4e70, 0x000f5270, 0x000f5670, + 0x000f5a70, 0x000f5e70, 0x000f6270, 0x000f6670, + 0x000f6a70, 0x000f6e70, 0x000f7270, 0x000f7670, + 0x000f7a70, 0x000f7e70, 0x000f8270, 0x000f8670, + 0x000f8a70, 0x000f8e70, 0x000f9270, 0x000f9670, + 0x000f9a70, 0x000f9e70, 0x000fa270, 0x000fa670, + 0x000faa70, 0x000fae70, 0x000fb270, 0x000fb670, + 0x000fba70, 0x000fbe70, 0x000fc270, 0x000fc670, + 0x000fca70, 0x000fce70, 0x000fd270, 0x000fd670, + 0x000fda70, 0x000fde70, 0x000fe270, 0x000fe670, + 0x000fea70, 0x000fee70, 0x000ff270, 0x000ff670, + 0x000ffa70, 0x000ffe70, 0x00100270, 0x00100670, + 0x00100a70, 0x00100e70, 0x00101270, 0x00101670, + 0x00101a70, 0x00101e70, 0x00102270, 0x00102670, + 0x00102a70, 0x00102e70, 0x00103270, 0x00103670, + 0x00103a70, 0x00103e70, 0x00104270, 0x00104670, + 0x00104a70, 0x00104e70, 0x00105270, 0x00105670, + 0x00105a70, 0x00105e70, 0x00106270, 0x00106670, + 0x00106a70, 0x00106e70, 0x00107270, 0x00107670, + 0x00107a70, 0x00107e70, 0x00108270, 0x00108670, + 0x00108a70, 0x00108e70, 0x00109270, 0x00109670, + 0x00109a70, 0x00109e70, 0x0010a270, 0x0010a670, + 0x0010aa70, 0x0010ae70, 0x0010b270, 0x0010b670, + 0x0010ba70, 0x0010be70, 0x0010c270, 0x0010c670, + 0x0010ca70, 0x0010ce70, 0x0010d270, 0x0010d670, + 0x0010da70, 0x0010de70, 0x0010e270, 0x0010e670, + 0x0010ea70, 0x0010ee70, 0x0010f270, 0x0010f670, + 0x0010fa70, 0x0010fe70, 0x00110270, 0x00110670, + 0x00110a70, 0x00110e70, 0x00111270, 0x00111670, + 0x00111a70, 0x00111e70, 0x00112270, 0x00112670, + 0x00112a70, 0x00112e70, 0x00113270, 0x00113670, + 0x00113a70, 0x00113e70, 0x00114270, 0x00114670, + 0x00114a70, 0x00114e70, 0x00115270, 0x00115670, + 0x00115a70, 0x00115e70, 0x00116270, 0x00116670, + 0x00116a70, 0x00116e70, 0x00117270, 0x00117670, + 0x00117a70, 0x00117e70, 0x00118270, 0x00118670, + 0x00118a70, 0x00118e70, 0x00119270, 0x00119670, + 0x00119a70, 0x00119e70, 0x0011a270, 0x0011a670, + 0x0011aa70, 0x0011ae70, 0x0011b270, 0x0011b670, + 0x0011ba70, 0x0011be70, 0x0011c270, 0x0011c670, + 0x0011ca70, 0x0011ce70, 0x0011d270, 0x0011d670, + 0x0011da70, 0x0011de70, 0x0011e270, 0x0011e670, + 0x0011ea70, 0x0011ee70, 0x0011f270, 0x0011f670, + 0x0011fa70, 0x0011fe70, 0x00120270, 0x00120670, + 0x00120a70, 0x00120e70, 0x00121270, 0x00121670, + 0x00121a70, 0x00121e70, 0x00122270, 0x00122670, + 0x00122a70, 0x00122e70, 0x00123270, 0x00123670, + 0x00123a70, 0x00123e70, 0x00124270, 0x00124670, + 0x00124a70, 0x00124e70, 0x00125270, 0x00125670, + 0x00125a70, 0x00125e70, 0x00126270, 0x00126670, + 0x00126a70, 0x00126e70, 0x00127270, 0x00127670, + 0x00127a70, 0x00127e70, 0x00128270, 0x00128670, + 0x00128a70, 0x00128e70, 0x00129270, 0x00129670, + 0x00129a70, 0x00129e70, 0x0012a270, 0x0012a670, + 0x0012aa70, 0x0012ae70, 0x0012b270, 0x0012b670, + 0x0012ba70, 0x0012be70, 0x0012c270, 0x0012c670, + 0x0012ca70, 0x0012ce70, 0x0012d270, 0x0012d670, + 0x0012da70, 0x0012de70, 0x0012e270, 0x0012e670, + 0x0012ea70, 0x0012ee70, 0x0012f270, 0x0012f670, + 0x0012fa70, 0x0012fe70, 0x00130270, 0x00130670, + 0x00130a70, 0x00130e70, 0x00131270, 0x00131670, + 0x00131a70, 0x00131e70, 0x00132270, 0x00132670, + 0x00132a70, 0x00132e70, 0x00133270, 0x00133670, + 0x00133a70, 0x00133e70, 0x00134270, 0x00134670, + 0x00134a70, 0x00134e70, 0x00135270, 0x00135670, + 0x00135a70, 0x00135e70, 0x00136270, 0x00136670, + 0x00136a70, 0x00136e70, 0x00137270, 0x00137670, + 0x00137a70, 0x00137e70, 0x00138270, 0x00138670, + 0x00138a70, 0x00138e70, 0x00139270, 0x00139670, + 0x00139a70, 0x00139e70, 0x0013a270, 0x0013a670, + 0x0013aa70, 0x0013ae70, 0x0013b270, 0x0013b670, + 0x0013ba70, 0x0013be70, 0x0013c270, 0x0013c670, + 0x0013ca70, 0x0013ce70, 0x0013d270, 0x0013d670, + 0x0013da70, 0x0013de70, 0x0013e270, 0x0013e670, + 0x0013ea70, 0x0013ee70, 0x0013f270, 0x0013f670, + 0x0013fa70, 0x0013fe70, 0x00140270, 0x00140670, + 0x00140a70, 0x00140e70, 0x00141270, 0x00141670, + 0x00141a70, 0x00141e70, 0x00142270, 0x00142670, + 0x00142a70, 0x00142e70, 0x00143270, 0x00143670, + 0x00143a70, 0x00143e70, 0x00144270, 0x00144670, + 0x00144a70, 0x00144e70, 0x00145270, 0x00145670, + 0x00145a70, 0x00145e70, 0x00146270, 0x00146670, + 0x00146a70, 0x00146e70, 0x00147270, 0x00147670, + 0x00147a70, 0x00147e70, 0x00148270, 0x00148670, + 0x00148a70, 0x00148e70, 0x00149270, 0x00149670, + 0x00149a70, 0x00149e70, 0x0014a270, 0x0014a670, + 0x0014aa70, 0x0014ae70, 0x0014b270, 0x0014b670, + 0x0014ba70, 0x0014be70, 0x0014c270, 0x0014c670, + 0x0014ca70, 0x0014ce70, 0x0014d270, 0x0014d670, + 0x0014da70, 0x0014de70, 0x0014e270, 0x0014e670, + 0x0014ea70, 0x0014ee70, 0x0014f270, 0x0014f670, + 0x0014fa70, 0x0014fe70, 0x00150270, 0x00150670, + 0x00150a70, 0x00150e70, 0x00151270, 0x00151670, + 0x00151a70, 0x00151e70, 0x00152270, 0x00152670, + 0x00152a70, 0x00152e70, 0x00153270, 0x00153670, + 0x00153a70, 0x00153e70, 0x00154270, 0x00154670, + 0x00154a70, 0x00154e70, 0x00155270, 0x00155670, + 0x00155a70, 0x00155e70, 0x00156270, 0x00156670, + 0x00156a70, 0x00156e70, 0x00157270, 0x00157670, + 0x00157a70, 0x00157e70, 0x00158270, 0x00158670, + 0x00158a70, 0x00158e70, 0x00159270, 0x00159670, + 0x00159a70, 0x00159e70, 0x0015a270, 0x0015a670, + 0x0015aa70, 0x0015ae70, 0x0015b270, 0x0015b670, + 0x0015ba70, 0x0015be70, 0x0015c270, 0x0015c670, + 0x0015ca70, 0x0015ce70, 0x0015d270, 0x0015d670, + 0x0015da70, 0x0015de70, 0x0015e270, 0x0015e670, + 0x0015ea70, 0x0015ee70, 0x0015f270, 0x0015f670, + 0x0015fa70, 0x0015fe70, 0x00160270, 0x00160670, + 0x00160a70, 0x00160e70, 0x00161270, 0x00161670, + 0x00161a70, 0x00161e70, 0x00162270, 0x00162670, + 0x00162a70, 0x00162e70, 0x00163270, 0x00163670, + 0x00163a70, 0x00163e70, 0x00164270, 0x00164670, + 0x00164a70, 0x00164e70, 0x00165270, 0x00165670, + 0x00165a70, 0x00165e70, 0x00166270, 0x00166670, + 0x00166a70, 0x00166e70, 0x00167270, 0x00167670, + 0x00167a70, 0x00167e70, 0x00168270, 0x00168670, + 0x00168a70, 0x00168e70, 0x00169270, 0x00169670, + 0x00169a70, 0x00169e70, 0x0016a270, 0x0016a670, + 0x0016aa70, 0x0016ae70, 0x0016b270, 0x0016b670, + 0x0016ba70, 0x0016be70, 0x0016c270, 0x0016c670, + 0x0016ca70, 0x0016ce70, 0x0016d270, 0x0016d670, + 0x0016da70, 0x0016de70, 0x0016e270, 0x0016e670, + 0x0016ea70, 0x0016ee70, 0x0016f270, 0x0016f670, + 0x0016fa70, 0x0016fe70, 0x00170270, 0x00170670, + 0x00170a70, 0x00170e70, 0x00171270, 0x00171670, + 0x00171a70, 0x00171e70, 0x00172270, 0x00172670, + 0x00172a70, 0x00172e70, 0x00173270, 0x00173670, + 0x00173a70, 0x00173e70, 0x00174270, 0x00174670, + 0x00174a70, 0x00174e70, 0x00175270, 0x00175670, + 0x00175a70, 0x00175e70, 0x00176270, 0x00176670, + 0x00176a70, 0x00176e70, 0x00177270, 0x00177670, + 0x00177a70, 0x00177e70, 0x00178270, 0x00178670, + 0x00178a70, 0x00178e70, 0x00179270, 0x00179670, + 0x00179a70, 0x00179e70, 0x0017a270, 0x0017a670, + 0x0017aa70, 0x0017ae70, 0x0017b270, 0x0017b670, + 0x0017ba70, 0x0017be70, 0x0017c270, 0x0017c670, + 0x0017ca70, 0x0017ce70, 0x0017d270, 0x0017d670, + 0x0017da70, 0x0017de70, 0x0017e270, 0x0017e670, + 0x0017ea70, 0x0017ee70, 0x0017f270, 0x0017f670, + 0x0017fa70, 0x0017fe70, 0x00180270, 0x00180670, + 0x00180a70, 0x00180e70, 0x00181270, 0x00181670, + 0x00181a70, 0x00181e70, 0x00182270, 0x00182670, + 0x00182a70, 0x00182e70, 0x00183270, 0x00183670, + 0x00183a70, 0x00183e70, 0x00184270, 0x00184670, + 0x00184a70, 0x00184e70, 0x00185270, 0x00185670, + 0x00185a70, 0x00185e70, 0x00186270, 0x00186670, + 0x00186a70, 0x00186e70, 0x00187270, 0x00187670, + 0x00187a70, 0x00187e70, 0x00188270, 0x00188670, + 0x00188a70, 0x00188e70, 0x00189270, 0x00189670, + 0x00189a70, 0x00189e70, 0x0018a270, 0x0018a670, + 0x0018aa70, 0x0018ae70, 0x0018b270, 0x0018b670, + 0x0018ba70, 0x0018be70, 0x0018c270, 0x0018c670, + 0x0018ca70, 0x0018ce70, 0x0018d270, 0x0018d670, + 0x0018da70, 0x0018de70, 0x0018e270, 0x0018e670, + 0x0018ea70, 0x0018ee70, 0x0018f270, 0x0018f670, + 0x0018fa70, 0x0018fe70, 0x00190270, 0x00190670, + 0x00190a70, 0x00190e70, 0x00191270, 0x00191670, + 0x00191a70, 0x00191e70, 0x00192270, 0x00192670, + 0x00192a70, 0x00192e70, 0x00193270, 0x00193670, + 0x00193a70, 0x00193e70, 0x00194270, 0x00194670, + 0x00194a70, 0x00194e70, 0x00195270, 0x00195670, + 0x00195a70, 0x00195e70, 0x00196270, 0x00196670, + 0x00196a70, 0x00196e70, 0x00197270, 0x00197670, + 0x00197a70, 0x00197e70, 0x00198270, 0x00198670, + 0x00198a70, 0x00198e70, 0x00199270, 0x00199670, + 0x00199a70, 0x00199e70, 0x0019a270, 0x0019a670, + 0x0019aa70, 0x0019ae70, 0x0019b270, 0x0019b670, + 0x0019ba70, 0x0019be70, 0x0019c270, 0x0019c670, + 0x0019ca70, 0x0019ce70, 0x0019d270, 0x0019d670, + 0x0019da70, 0x0019de70, 0x0019e270, 0x0019e670, + 0x0019ea70, 0x0019ee70, 0x0019f270, 0x0019f670, + 0x0019fa70, 0x0019fe70, 0x001a0270, 0x001a0670, + 0x001a0a70, 0x001a0e70, 0x001a1270, 0x001a1670, + 0x001a1a70, 0x001a1e70, 0x001a2270, 0x001a2670, + 0x001a2a70, 0x001a2e70, 0x001a3270, 0x001a3670, + 0x001a3a70, 0x001a3e70, 0x001a4270, 0x001a4670, + 0x001a4a70, 0x001a4e70, 0x001a5270, 0x001a5670, + 0x001a5a70, 0x001a5e70, 0x001a6270, 0x001a6670, + 0x001a6a70, 0x001a6e70, 0x001a7270, 0x001a7670, + 0x001a7a70, 0x001a7e70, 0x001a8270, 0x001a8670, + 0x001a8a70, 0x001a8e70, 0x001a9270, 0x001a9670, + 0x001a9a70, 0x001a9e70, 0x001aa270, 0x001aa670, + 0x001aaa70, 0x001aae70, 0x001ab270, 0x001ab670, + 0x001aba70, 0x001abe70, 0x001ac270, 0x001ac670, + 0x001aca70, 0x001ace70, 0x001ad270, 0x001ad670, + 0x001ada70, 0x001ade70, 0x001ae270, 0x001ae670, + 0x001aea70, 0x001aee70, 0x001af270, 0x001af670, + 0x001afa70, 0x001afe70, 0x001b0270, 0x001b0670, + 0x001b0a70, 0x001b0e70, 0x001b1270, 0x001b1670, + 0x001b1a70, 0x001b1e70, 0x001b2270, 0x001b2670, + 0x001b2a70, 0x001b2e70, 0x001b3270, 0x001b3670, + 0x001b3a70, 0x001b3e70, 0x001b4270, 0x001b4670, + 0x001b4a70, 0x001b4e70, 0x001b5270, 0x001b5670, + 0x001b5a70, 0x001b5e70, 0x001b6270, 0x001b6670, + 0x001b6a70, 0x001b6e70, 0x001b7270, 0x001b7670, + 0x001b7a70, 0x001b7e70, 0x001b8270, 0x001b8670, + 0x001b8a70, 0x001b8e70, 0x001b9270, 0x001b9670, + 0x001b9a70, 0x001b9e70, 0x001ba270, 0x001ba670, + 0x001baa70, 0x001bae70, 0x001bb270, 0x001bb670, + 0x001bba70, 0x001bbe70, 0x001bc270, 0x001bc670, + 0x001bca70, 0x001bce70, 0x001bd270, 0x001bd670, + 0x001bda70, 0x001bde70, 0x001be270, 0x001be670, + 0x001bea70, 0x001bee70, 0x001bf270, 0x001bf670, + 0x001bfa70, 0x001bfe70, 0x001c0270, 0x001c0670, + 0x001c0a70, 0x001c0e70, 0x001c1270, 0x001c1670, + 0x001c1a70, 0x001c1e70, 0x001c2270, 0x001c2670, + 0x001c2a70, 0x001c2e70, 0x001c3270, 0x001c3670, + 0x001c3a70, 0x001c3e70, 0x001c4270, 0x001c4670, + 0x001c4a70, 0x001c4e70, 0x001c5270, 0x001c5670, + 0x001c5a70, 0x001c5e70, 0x001c6270, 0x001c6670, + 0x001c6a70, 0x001c6e70, 0x001c7270, 0x001c7670, + 0x001c7a70, 0x001c7e70, 0x001c8270, 0x001c8670, + 0x001c8a70, 0x001c8e70, 0x001c9270, 0x001c9670, + 0x001c9a70, 0x001c9e70, 0x001ca270, 0x001ca670, + 0x001caa70, 0x001cae70, 0x001cb270, 0x001cb670, + 0x001cba70, 0x001cbe70, 0x001cc270, 0x001cc670, + 0x001cca70, 0x001cce70, 0x001cd270, 0x001cd670, + 0x001cda70, 0x001cde70, 0x001ce270, 0x001ce670, + 0x001cea70, 0x001cee70, 0x001cf270, 0x001cf670, + 0x001cfa70, 0x001cfe70, 0x001d0270, 0x001d0670, + 0x001d0a70, 0x001d0e70, 0x001d1270, 0x001d1670, + 0x001d1a70, 0x001d1e70, 0x001d2270, 0x001d2670, + 0x001d2a70, 0x001d2e70, 0x001d3270, 0x001d3670, + 0x001d3a70, 0x001d3e70, 0x001d4270, 0x001d4670, + 0x001d4a70, 0x001d4e70, 0x001d5270, 0x001d5670, + 0x001d5a70, 0x001d5e70, 0x001d6270, 0x001d6670, + 0x001d6a70, 0x001d6e70, 0x001d7270, 0x001d7670, + 0x001d7a70, 0x001d7e70, 0x001d8270, 0x001d8670, + 0x001d8a70, 0x001d8e70, 0x001d9270, 0x001d9670, + 0x001d9a70, 0x001d9e70, 0x001da270, 0x001da670, + 0x001daa70, 0x001dae70, 0x001db270, 0x001db670, + 0x001dba70, 0x001dbe70, 0x001dc270, 0x001dc670, + 0x001dca70, 0x001dce70, 0x001dd270, 0x001dd670, + 0x001dda70, 0x001dde70, 0x001de270, 0x001de670, + 0x001dea70, 0x001dee70, 0x001df270, 0x001df670, + 0x001dfa70, 0x001dfe70, 0x001e0270, 0x001e0670, + 0x001e0a70, 0x001e0e70, 0x001e1270, 0x001e1670, + 0x001e1a70, 0x001e1e70, 0x001e2270, 0x001e2670, + 0x001e2a70, 0x001e2e70, 0x001e3270, 0x001e3670, + 0x001e3a70, 0x001e3e70, 0x001e4270, 0x001e4670, + 0x001e4a70, 0x001e4e70, 0x001e5270, 0x001e5670, + 0x001e5a70, 0x001e5e70, 0x001e6270, 0x001e6670, + 0x001e6a70, 0x001e6e70, 0x001e7270, 0x001e7670, + 0x001e7a70, 0x001e7e70, 0x001e8270, 0x001e8670, + 0x001e8a70, 0x001e8e70, 0x001e9270, 0x001e9670, + 0x001e9a70, 0x001e9e70, 0x001ea270, 0x001ea670, + 0x001eaa70, 0x001eae70, 0x001eb270, 0x001eb670, + 0x001eba70, 0x001ebe70, 0x001ec270, 0x001ec670, + 0x001eca70, 0x001ece70, 0x001ed270, 0x001ed670, + 0x001eda70, 0x001ede70, 0x001ee270, 0x001ee670, + 0x001eea70, 0x001eee70, 0x001ef270, 0x001ef670, + 0x001efa70, 0x001efe70, 0x001f0270, 0x001f0670, + 0x001f0a70, 0x001f0e70, 0x001f1270, 0x001f1670, + 0x001f1a70, 0x001f1e70, 0x001f2270, 0x001f2670, + 0x001f2a70, 0x001f2e70, 0x001f3270, 0x001f3670, + 0x001f3a70, 0x001f3e70, 0x001f4270, 0x001f4670, + 0x001f4a70, 0x001f4e70, 0x001f5270, 0x001f5670, + 0x001f5a70, 0x001f5e70, 0x001f6270, 0x001f6670, + 0x001f6a70, 0x001f6e70, 0x001f7270, 0x001f7670, + 0x001f7a70, 0x001f7e70, 0x001f8270, 0x001f8670, + 0x001f8a70, 0x001f8e70, 0x001f9270, 0x001f9670, + 0x001f9a70, 0x001f9e70, 0x001fa270, 0x001fa670, + 0x001faa70, 0x001fae70, 0x001fb270, 0x001fb670, + 0x001fba70, 0x001fbe70, 0x001fc270, 0x001fc670, + 0x001fca70, 0x001fce70, 0x001fd270, 0x001fd670, + 0x001fda70, 0x001fde70, 0x001fe270, 0x001fe670, + 0x001fea70, 0x001fee70, 0x001ff270, 0x001ff670, + 0x001ffa70, 0x001ffe70 +#endif /* LONGER_HUFFTABLE */ + }, + + .len_table = { + 0x00000807, 0x00000407, 0x00000c07, 0x00000207, + 0x00000a07, 0x00000607, 0x00000e07, 0x00000107, + 0x00000908, 0x00001908, 0x00000508, 0x00001508, + 0x00000d08, 0x00001d08, 0x00000308, 0x00001308, + 0x00000b09, 0x00001b09, 0x00002b09, 0x00003b09, + 0x00000709, 0x00001709, 0x00002709, 0x00003709, + 0x00000f09, 0x00001f09, 0x00002f09, 0x00003f09, + 0x00000089, 0x00001089, 0x00002089, 0x00003089, + 0x0000088a, 0x0000188a, 0x0000288a, 0x0000388a, + 0x0000488a, 0x0000588a, 0x0000688a, 0x0000788a, + 0x0000048a, 0x0000148a, 0x0000248a, 0x0000348a, + 0x0000448a, 0x0000548a, 0x0000648a, 0x0000748a, + 0x00000c8a, 0x00001c8a, 0x00002c8a, 0x00003c8a, + 0x00004c8a, 0x00005c8a, 0x00006c8a, 0x00007c8a, + 0x0000028a, 0x0000128a, 0x0000228a, 0x0000328a, + 0x0000428a, 0x0000528a, 0x0000628a, 0x0000728a, + 0x00000a8b, 0x00001a8b, 0x00002a8b, 0x00003a8b, + 0x00004a8b, 0x00005a8b, 0x00006a8b, 0x00007a8b, + 0x00008a8b, 0x00009a8b, 0x0000aa8b, 0x0000ba8b, + 0x0000ca8b, 0x0000da8b, 0x0000ea8b, 0x0000fa8b, + 0x0000068b, 0x0000168b, 0x0000268b, 0x0000368b, + 0x0000468b, 0x0000568b, 0x0000668b, 0x0000768b, + 0x0000868b, 0x0000968b, 0x0000a68b, 0x0000b68b, + 0x0000c68b, 0x0000d68b, 0x0000e68b, 0x0000f68b, + 0x00000e8b, 0x00001e8b, 0x00002e8b, 0x00003e8b, + 0x00004e8b, 0x00005e8b, 0x00006e8b, 0x00007e8b, + 0x00008e8b, 0x00009e8b, 0x0000ae8b, 0x0000be8b, + 0x0000ce8b, 0x0000de8b, 0x0000ee8b, 0x0000fe8b, + 0x0000006c, 0x0000206c, 0x0000406c, 0x0000606c, + 0x0000806c, 0x0000a06c, 0x0000c06c, 0x0000e06c, + 0x0001006c, 0x0001206c, 0x0001406c, 0x0001606c, + 0x0001806c, 0x0001a06c, 0x0001c06c, 0x0001e06c, + 0x0000106d, 0x0000306d, 0x0000506d, 0x0000706d, + 0x0000906d, 0x0000b06d, 0x0000d06d, 0x0000f06d, + 0x0001106d, 0x0001306d, 0x0001506d, 0x0001706d, + 0x0001906d, 0x0001b06d, 0x0001d06d, 0x0001f06d, + 0x0002106d, 0x0002306d, 0x0002506d, 0x0002706d, + 0x0002906d, 0x0002b06d, 0x0002d06d, 0x0002f06d, + 0x0003106d, 0x0003306d, 0x0003506d, 0x0003706d, + 0x0003906d, 0x0003b06d, 0x0003d06d, 0x0003f06d, + 0x0000086d, 0x0000286d, 0x0000486d, 0x0000686d, + 0x0000886d, 0x0000a86d, 0x0000c86d, 0x0000e86d, + 0x0001086d, 0x0001286d, 0x0001486d, 0x0001686d, + 0x0001886d, 0x0001a86d, 0x0001c86d, 0x0001e86d, + 0x0002086d, 0x0002286d, 0x0002486d, 0x0002686d, + 0x0002886d, 0x0002a86d, 0x0002c86d, 0x0002e86d, + 0x0003086d, 0x0003286d, 0x0003486d, 0x0003686d, + 0x0003886d, 0x0003a86d, 0x0003c86d, 0x0003e86d, + 0x0000186d, 0x0000386d, 0x0000586d, 0x0000786d, + 0x0000986d, 0x0000b86d, 0x0000d86d, 0x0000f86d, + 0x0001186d, 0x0001386d, 0x0001586d, 0x0001786d, + 0x0001986d, 0x0001b86d, 0x0001d86d, 0x0001f86d, + 0x0002186d, 0x0002386d, 0x0002586d, 0x0002786d, + 0x0002986d, 0x0002b86d, 0x0002d86d, 0x0002f86d, + 0x0003186d, 0x0003386d, 0x0003586d, 0x0003786d, + 0x0003986d, 0x0003b86d, 0x0003d86d, 0x0003f86d, + 0x0000046d, 0x0000246d, 0x0000446d, 0x0000646d, + 0x0000846d, 0x0000a46d, 0x0000c46d, 0x0000e46d, + 0x0001046d, 0x0001246d, 0x0001446d, 0x0001646d, + 0x0001846d, 0x0001a46d, 0x0001c46d, 0x0001e46d, + 0x0002046d, 0x0002246d, 0x0002446d, 0x0002646d, + 0x0002846d, 0x0002a46d, 0x0002c46d, 0x0002e46d, + 0x0003046d, 0x0003246d, 0x0003446d, 0x0003646d, + 0x0003846d, 0x0003a46d, 0x0003c46d, 0x00001468}, + + .lit_table = { + 0x000c, 0x008c, 0x004c, 0x00cc, 0x002c, 0x00ac, 0x006c, 0x00ec, + 0x001c, 0x009c, 0x005c, 0x00dc, 0x003c, 0x00bc, 0x007c, 0x00fc, + 0x0002, 0x0082, 0x0042, 0x00c2, 0x0022, 0x00a2, 0x0062, 0x00e2, + 0x0012, 0x0092, 0x0052, 0x00d2, 0x0032, 0x00b2, 0x0072, 0x00f2, + 0x000a, 0x008a, 0x004a, 0x00ca, 0x002a, 0x00aa, 0x006a, 0x00ea, + 0x001a, 0x009a, 0x005a, 0x00da, 0x003a, 0x00ba, 0x007a, 0x00fa, + 0x0006, 0x0086, 0x0046, 0x00c6, 0x0026, 0x00a6, 0x0066, 0x00e6, + 0x0016, 0x0096, 0x0056, 0x00d6, 0x0036, 0x00b6, 0x0076, 0x00f6, + 0x000e, 0x008e, 0x004e, 0x00ce, 0x002e, 0x00ae, 0x006e, 0x00ee, + 0x001e, 0x009e, 0x005e, 0x00de, 0x003e, 0x00be, 0x007e, 0x00fe, + 0x0001, 0x0081, 0x0041, 0x00c1, 0x0021, 0x00a1, 0x0061, 0x00e1, + 0x0011, 0x0091, 0x0051, 0x00d1, 0x0031, 0x00b1, 0x0071, 0x00f1, + 0x0009, 0x0089, 0x0049, 0x00c9, 0x0029, 0x00a9, 0x0069, 0x00e9, + 0x0019, 0x0099, 0x0059, 0x00d9, 0x0039, 0x00b9, 0x0079, 0x00f9, + 0x0005, 0x0085, 0x0045, 0x00c5, 0x0025, 0x00a5, 0x0065, 0x00e5, + 0x0015, 0x0095, 0x0055, 0x00d5, 0x0035, 0x00b5, 0x0075, 0x00f5, + 0x000d, 0x008d, 0x004d, 0x00cd, 0x002d, 0x00ad, 0x006d, 0x00ed, + 0x001d, 0x009d, 0x005d, 0x00dd, 0x003d, 0x00bd, 0x007d, 0x00fd, + 0x0013, 0x0113, 0x0093, 0x0193, 0x0053, 0x0153, 0x00d3, 0x01d3, + 0x0033, 0x0133, 0x00b3, 0x01b3, 0x0073, 0x0173, 0x00f3, 0x01f3, + 0x000b, 0x010b, 0x008b, 0x018b, 0x004b, 0x014b, 0x00cb, 0x01cb, + 0x002b, 0x012b, 0x00ab, 0x01ab, 0x006b, 0x016b, 0x00eb, 0x01eb, + 0x001b, 0x011b, 0x009b, 0x019b, 0x005b, 0x015b, 0x00db, 0x01db, + 0x003b, 0x013b, 0x00bb, 0x01bb, 0x007b, 0x017b, 0x00fb, 0x01fb, + 0x0007, 0x0107, 0x0087, 0x0187, 0x0047, 0x0147, 0x00c7, 0x01c7, + 0x0027, 0x0127, 0x00a7, 0x01a7, 0x0067, 0x0167, 0x00e7, 0x01e7, + 0x0017, 0x0117, 0x0097, 0x0197, 0x0057, 0x0157, 0x00d7, 0x01d7, + 0x0037, 0x0137, 0x00b7, 0x01b7, 0x0077, 0x0177, 0x00f7, 0x01f7, + 0x000f, 0x010f, 0x008f, 0x018f, 0x004f, 0x014f, 0x00cf, 0x01cf, + 0x002f, 0x012f, 0x00af, 0x01af, 0x006f, 0x016f, 0x00ef, 0x01ef, + 0x001f, 0x011f, 0x009f, 0x019f, 0x005f, 0x015f, 0x00df, 0x01df, + 0x003f, 0x013f, 0x00bf, 0x01bf, 0x007f, 0x017f, 0x00ff, 0x01ff, + 0x0000}, + + .lit_table_sizes = { + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, + 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, + 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, + 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, + 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, + 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, + 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, + 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, + 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, + 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, + 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, + 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, + 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, + 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, + 0x07}, + +#ifndef LONGER_HUFFTABLE + .dcodes = { + 0x0000, 0x0010, 0x0008, 0x0018, 0x0004, 0x0014, 0x000c, 0x001c, + 0x0002, 0x0012, 0x000a, 0x001a, 0x0006, 0x0016, 0x000e, 0x001e, + 0x0001, 0x0011, 0x0009, 0x0019, 0x0005, 0x0015, 0x000d, 0x001d, + 0x0003, 0x0013, 0x000b, 0x001b, 0x0007, 0x0017}, + + .dcodes_sizes = { + 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, + 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, + 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, + 0x05, 0x05, 0x05, 0x05, 0x05, 0x05} +#else + .dcodes = { + 0x000b, 0x001b, 0x0007, 0x0017}, + + .dcodes_sizes = { + 0x05, 0x05, 0x05, 0x05} +#endif +}; diff --git a/src/spdk/isa-l/igzip/igzip.c b/src/spdk/isa-l/igzip/igzip.c new file mode 100644 index 000000000..52a5dc470 --- /dev/null +++ b/src/spdk/isa-l/igzip/igzip.c @@ -0,0 +1,1931 @@ +/********************************************************************** + Copyright(c) 2011-2016 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#define ASM + +#include +#include +#include +#ifdef _WIN32 +# include +#endif + +#define MAX_WRITE_BITS_SIZE 8 +#define FORCE_FLUSH 64 +#define MIN_OBUF_SIZE 224 +#define NON_EMPTY_BLOCK_SIZE 6 +#define MAX_SYNC_FLUSH_SIZE NON_EMPTY_BLOCK_SIZE + MAX_WRITE_BITS_SIZE + +#include "huffman.h" +#include "bitbuf2.h" +#include "igzip_lib.h" +#include "crc.h" +#include "repeated_char_result.h" +#include "huff_codes.h" +#include "encode_df.h" +#include "igzip_level_buf_structs.h" +#include "igzip_checksums.h" +#include "igzip_wrapper.h" +#include "unaligned.h" + +#ifdef __FreeBSD__ +#include +#include +# define to_be32(x) bswap32(x) +#elif defined (__APPLE__) +#include +# define to_be32(x) OSSwapInt32(x) +#elif defined (__GNUC__) && !defined (__MINGW32__) +# include +# define to_be32(x) bswap_32(x) +#elif defined _WIN64 +# define to_be32(x) _byteswap_ulong(x) +#endif + +extern void isal_deflate_hash_lvl0(uint16_t *, uint32_t, uint32_t, uint8_t *, uint32_t); +extern void isal_deflate_hash_lvl1(uint16_t *, uint32_t, uint32_t, uint8_t *, uint32_t); +extern void isal_deflate_hash_lvl2(uint16_t *, uint32_t, uint32_t, uint8_t *, uint32_t); +extern void isal_deflate_hash_lvl3(uint16_t *, uint32_t, uint32_t, uint8_t *, uint32_t); +extern const uint8_t gzip_hdr[]; +extern const uint32_t gzip_hdr_bytes; +extern const uint32_t gzip_trl_bytes; +extern const uint8_t zlib_hdr[]; +extern const uint32_t zlib_hdr_bytes; +extern const uint32_t zlib_trl_bytes; +extern const struct isal_hufftables hufftables_default; +extern const struct isal_hufftables hufftables_static; + +static uint32_t write_stored_block(struct isal_zstream *stream); + +static int write_stream_header_stateless(struct isal_zstream *stream); +static void write_stream_header(struct isal_zstream *stream); +static int write_deflate_header_stateless(struct isal_zstream *stream); +static int write_deflate_header_unaligned_stateless(struct isal_zstream *stream); + +#define TYPE0_HDR_LEN 4 +#define TYPE0_BLK_HDR_LEN 5 +#define TYPE0_MAX_BLK_LEN 65535 + +void isal_deflate_body(struct isal_zstream *stream); +void isal_deflate_finish(struct isal_zstream *stream); + +void isal_deflate_icf_body(struct isal_zstream *stream); +void isal_deflate_icf_finish_lvl1(struct isal_zstream *stream); +void isal_deflate_icf_finish_lvl2(struct isal_zstream *stream); +void isal_deflate_icf_finish_lvl3(struct isal_zstream *stream); +/*****************************************************************/ + +/* Forward declarations */ +static inline void reset_match_history(struct isal_zstream *stream); +static void write_header(struct isal_zstream *stream, uint8_t * deflate_hdr, + uint32_t deflate_hdr_count, uint32_t extra_bits_count, + uint32_t next_state, uint32_t toggle_end_of_stream); +static void write_trailer(struct isal_zstream *stream); + +struct slver { + uint16_t snum; + uint8_t ver; + uint8_t core; +}; + +/* Version info */ +struct slver isal_deflate_init_slver_01030081; +struct slver isal_deflate_init_slver = { 0x0081, 0x03, 0x01 }; + +struct slver isal_deflate_reset_slver_0001008e; +struct slver isal_deflate_reset_slver = { 0x008e, 0x01, 0x00 }; + +struct slver isal_deflate_stateless_init_slver_00010084; +struct slver isal_deflate_stateless_init_slver = { 0x0084, 0x01, 0x00 }; + +struct slver isal_deflate_slver_01030082; +struct slver isal_deflate_slver = { 0x0082, 0x03, 0x01 }; + +struct slver isal_deflate_stateless_slver_01010083; +struct slver isal_deflate_stateless_slver = { 0x0083, 0x01, 0x01 }; + +struct slver isal_deflate_set_hufftables_slver_0001008b; +struct slver isal_deflate_set_hufftables_slver = { 0x008b, 0x01, 0x00 }; + +struct slver isal_deflate_set_dict_slver_0001008c; +struct slver isal_deflate_set_dict_slver = { 0x008c, 0x01, 0x00 }; + +/*****************************************************************/ + +// isal_adler32_bam1 - adler with (B | A minus 1) storage + +uint32_t isal_adler32_bam1(uint32_t adler32, const unsigned char *start, uint64_t length) +{ + uint64_t a; + + /* Internally the checksum is being stored as B | (A-1) so crc and + * addler have same init value */ + a = adler32 & 0xffff; + a = (a == ADLER_MOD - 1) ? 0 : a + 1; + adler32 = isal_adler32((adler32 & 0xffff0000) | a, start, length); + a = (adler32 & 0xffff); + a = (a == 0) ? ADLER_MOD - 1 : a - 1; + + return (adler32 & 0xffff0000) | a; +} + +static void update_checksum(struct isal_zstream *stream, uint8_t * start_in, uint64_t length) +{ + struct isal_zstate *state = &stream->internal_state; + switch (stream->gzip_flag) { + case IGZIP_GZIP: + case IGZIP_GZIP_NO_HDR: + state->crc = crc32_gzip_refl(state->crc, start_in, length); + break; + case IGZIP_ZLIB: + case IGZIP_ZLIB_NO_HDR: + state->crc = isal_adler32_bam1(state->crc, start_in, length); + break; + } +} + +static +void sync_flush(struct isal_zstream *stream) +{ + struct isal_zstate *state = &stream->internal_state; + uint64_t bits_to_write = 0xFFFF0000, bits_len; + uint64_t bytes; + int flush_size; + + if (stream->avail_out >= 8) { + set_buf(&state->bitbuf, stream->next_out, stream->avail_out); + + flush_size = (-(state->bitbuf.m_bit_count + 3)) % 8; + + bits_to_write <<= flush_size + 3; + bits_len = 32 + flush_size + 3; + + state->state = ZSTATE_NEW_HDR; + state->has_eob = 0; + + write_bits(&state->bitbuf, bits_to_write, bits_len); + + bytes = buffer_used(&state->bitbuf); + stream->next_out = buffer_ptr(&state->bitbuf); + stream->avail_out -= bytes; + stream->total_out += bytes; + + if (stream->flush == FULL_FLUSH) { + /* Clear match history so there are no cross + * block length distance pairs */ + state->has_hist = IGZIP_NO_HIST; + } + } +} + +static void flush_write_buffer(struct isal_zstream *stream) +{ + struct isal_zstate *state = &stream->internal_state; + int bytes = 0; + if (stream->avail_out >= 8) { + set_buf(&state->bitbuf, stream->next_out, stream->avail_out); + flush(&state->bitbuf); + stream->next_out = buffer_ptr(&state->bitbuf); + bytes = buffer_used(&state->bitbuf); + stream->avail_out -= bytes; + stream->total_out += bytes; + state->state = ZSTATE_NEW_HDR; + } +} + +static void flush_icf_block(struct isal_zstream *stream) +{ + struct isal_zstate *state = &stream->internal_state; + struct level_buf *level_buf = (struct level_buf *)stream->level_buf; + struct BitBuf2 *write_buf = &state->bitbuf; + struct deflate_icf *icf_buf_encoded_next; + + set_buf(write_buf, stream->next_out, stream->avail_out); + + icf_buf_encoded_next = encode_deflate_icf(level_buf->icf_buf_start + state->count, + level_buf->icf_buf_next, write_buf, + &level_buf->encode_tables); + + state->count = icf_buf_encoded_next - level_buf->icf_buf_start; + stream->next_out = buffer_ptr(write_buf); + stream->total_out += buffer_used(write_buf); + stream->avail_out -= buffer_used(write_buf); + + if (level_buf->icf_buf_next <= icf_buf_encoded_next) { + state->count = 0; + if (stream->avail_in == 0 && stream->end_of_stream) + state->state = ZSTATE_TRL; + else if (stream->avail_in == 0 && stream->flush != NO_FLUSH) + state->state = ZSTATE_SYNC_FLUSH; + else + state->state = ZSTATE_NEW_HDR; + } +} + +static int check_level_req(struct isal_zstream *stream) +{ + if (stream->level == 0) + return 0; + + if (stream->level_buf == NULL) + return ISAL_INVALID_LEVEL_BUF; + + switch (stream->level) { + case 3: + if (stream->level_buf_size < ISAL_DEF_LVL3_MIN) + return ISAL_INVALID_LEVEL; + break; + + case 2: + if (stream->level_buf_size < ISAL_DEF_LVL2_MIN) + return ISAL_INVALID_LEVEL; + break; + case 1: + if (stream->level_buf_size < ISAL_DEF_LVL1_MIN) + return ISAL_INVALID_LEVEL; + break; + default: + return ISAL_INVALID_LEVEL; + } + + return 0; +} + +static int init_hash8k_buf(struct isal_zstream *stream) +{ + struct isal_zstate *state = &stream->internal_state; + struct level_buf *level_buf = (struct level_buf *)stream->level_buf; + state->has_level_buf_init = 1; + return sizeof(struct level_buf) - MAX_LVL_BUF_SIZE + sizeof(level_buf->hash8k); +} + +static int init_hash_hist_buf(struct isal_zstream *stream) +{ + struct isal_zstate *state = &stream->internal_state; + struct level_buf *level_buf = (struct level_buf *)stream->level_buf; + state->has_level_buf_init = 1; + return sizeof(struct level_buf) - MAX_LVL_BUF_SIZE + sizeof(level_buf->hash_hist); +} + +static int init_hash_map_buf(struct isal_zstream *stream) +{ + struct isal_zstate *state = &stream->internal_state; + struct level_buf *level_buf = (struct level_buf *)stream->level_buf; + if (!state->has_level_buf_init) { + level_buf->hash_map.matches_next = level_buf->hash_map.matches; + level_buf->hash_map.matches_end = level_buf->hash_map.matches; + } + state->has_level_buf_init = 1; + + return sizeof(struct level_buf) - MAX_LVL_BUF_SIZE + sizeof(level_buf->hash_map); + +} + +/* returns the size of the level specific buffer */ +static int init_lvlX_buf(struct isal_zstream *stream) +{ + switch (stream->level) { + case 3: + return init_hash_map_buf(stream); + case 2: + return init_hash_hist_buf(stream); + default: + return init_hash8k_buf(stream); + } + +} + +static void init_new_icf_block(struct isal_zstream *stream) +{ + struct isal_zstate *state = &stream->internal_state; + struct level_buf *level_buf = (struct level_buf *)stream->level_buf; + int level_struct_size; + + level_struct_size = init_lvlX_buf(stream); + + state->block_next = state->block_end; + level_buf->icf_buf_start = + (struct deflate_icf *)(stream->level_buf + level_struct_size); + + level_buf->icf_buf_next = level_buf->icf_buf_start; + level_buf->icf_buf_avail_out = + stream->level_buf_size - level_struct_size - sizeof(struct deflate_icf); + + memset(&level_buf->hist, 0, sizeof(struct isal_mod_hist)); + state->state = ZSTATE_BODY; +} + +static int are_buffers_empty_hashX(struct isal_zstream *stream) +{ + return !stream->avail_in; +} + +static int are_buffers_empty_hash_map(struct isal_zstream *stream) +{ + struct level_buf *level_buf = (struct level_buf *)stream->level_buf; + + return (!stream->avail_in + && level_buf->hash_map.matches_next >= level_buf->hash_map.matches_end); +} + +static int are_buffers_empty(struct isal_zstream *stream) +{ + + switch (stream->level) { + case 3: + return are_buffers_empty_hash_map(stream); + case 2: + return are_buffers_empty_hashX(stream); + default: + return are_buffers_empty_hashX(stream); + } +} + +static void create_icf_block_hdr(struct isal_zstream *stream, uint8_t * start_in) +{ + struct isal_zstate *state = &stream->internal_state; + struct level_buf *level_buf = (struct level_buf *)stream->level_buf; + struct BitBuf2 *write_buf = &state->bitbuf; + struct BitBuf2 write_buf_tmp; + uint32_t out_size = stream->avail_out; + uint32_t avail_output, block_start_offset; + uint8_t *end_out = stream->next_out + out_size; + uint64_t cur_in_processed; + uint64_t bit_count; + uint64_t block_in_size = state->block_end - state->block_next; + uint64_t block_size; + int buffer_header = 0; + + memcpy(&write_buf_tmp, write_buf, sizeof(struct BitBuf2)); + + /* Calculate the bytes required to store a type 0 block. Need to account + * for bits stored in the bitbuf. Since 3 bits correspond to the deflate + * type 0 header, we need to add one byte more when the number of bits + * is at least 6 mod 8. */ + block_size = (TYPE0_BLK_HDR_LEN) * ((block_in_size + TYPE0_MAX_BLK_LEN - 1) / + TYPE0_MAX_BLK_LEN) + block_in_size; + block_size = block_size ? block_size : TYPE0_BLK_HDR_LEN; + block_size += (write_buf->m_bit_count + 2) / 8; + + /* Write EOB in icf_buf */ + level_buf->hist.ll_hist[256] = 1; + level_buf->icf_buf_next->lit_len = 0x100; + level_buf->icf_buf_next->lit_dist = NULL_DIST_SYM; + level_buf->icf_buf_next->dist_extra = 0; + level_buf->icf_buf_next++; + + state->has_eob_hdr = (stream->end_of_stream && are_buffers_empty(stream)) ? 1 : 0; + + if (end_out - stream->next_out >= ISAL_DEF_MAX_HDR_SIZE) { + /* Assumes ISAL_DEF_MAX_HDR_SIZE is large enough to contain a + * max length header and a gzip header */ + if (stream->gzip_flag == IGZIP_GZIP || stream->gzip_flag == IGZIP_ZLIB) + write_stream_header_stateless(stream); + set_buf(write_buf, stream->next_out, stream->avail_out); + buffer_header = 0; + + } else { + /* Start writing into temporary buffer */ + set_buf(write_buf, level_buf->deflate_hdr, ISAL_DEF_MAX_HDR_SIZE); + buffer_header = 1; + } + + bit_count = create_hufftables_icf(write_buf, &level_buf->encode_tables, + &level_buf->hist, state->has_eob_hdr); + + /* Assumes that type 0 block has size less than 4G */ + block_start_offset = (stream->total_in - state->block_next); + cur_in_processed = stream->next_in - start_in; + avail_output = stream->avail_out + sizeof(state->buffer) - + (stream->total_in - state->block_end); + + if (bit_count / 8 >= block_size && cur_in_processed >= block_start_offset + && block_size <= avail_output) { + /* Reset stream for writing out a type0 block */ + state->has_eob_hdr = 0; + memcpy(write_buf, &write_buf_tmp, sizeof(struct BitBuf2)); + state->state = ZSTATE_TYPE0_HDR; + + } else if (buffer_header) { + /* Setup stream to write out a buffered header */ + level_buf->deflate_hdr_count = buffer_used(write_buf); + level_buf->deflate_hdr_extra_bits = write_buf->m_bit_count; + flush(write_buf); + memcpy(write_buf, &write_buf_tmp, sizeof(struct BitBuf2)); + write_buf->m_bits = 0; + write_buf->m_bit_count = 0; + state->state = ZSTATE_HDR; + + } else { + stream->next_out = buffer_ptr(write_buf); + stream->total_out += buffer_used(write_buf); + stream->avail_out -= buffer_used(write_buf); + state->state = ZSTATE_FLUSH_ICF_BUFFER; + } +} + +static void isal_deflate_pass(struct isal_zstream *stream) +{ + struct isal_zstate *state = &stream->internal_state; + struct isal_hufftables *hufftables = stream->hufftables; + uint8_t *start_in = stream->next_in; + + if (state->state == ZSTATE_NEW_HDR || state->state == ZSTATE_HDR) { + if (state->count == 0) + /* Assume the final header is being written since the header + * stored in hufftables is the final header. */ + state->has_eob_hdr = 1; + write_header(stream, hufftables->deflate_hdr, hufftables->deflate_hdr_count, + hufftables->deflate_hdr_extra_bits, ZSTATE_BODY, + !stream->end_of_stream); + } + + if (state->state == ZSTATE_BODY) + isal_deflate_body(stream); + + if (state->state == ZSTATE_FLUSH_READ_BUFFER) + isal_deflate_finish(stream); + if (state->state == ZSTATE_SYNC_FLUSH) + sync_flush(stream); + + if (state->state == ZSTATE_FLUSH_WRITE_BUFFER) + flush_write_buffer(stream); + + if (stream->gzip_flag) + update_checksum(stream, start_in, stream->next_in - start_in); + + if (state->state == ZSTATE_TRL) + write_trailer(stream); +} + +static void isal_deflate_icf_finish(struct isal_zstream *stream) +{ + switch (stream->level) { + case 3: + isal_deflate_icf_finish_lvl3(stream); + break; + case 2: + isal_deflate_icf_finish_lvl2(stream); + break; + default: + isal_deflate_icf_finish_lvl1(stream); + } +} + +static void isal_deflate_icf_pass(struct isal_zstream *stream, uint8_t * inbuf_start) +{ + uint8_t *start_in = stream->next_in; + struct isal_zstate *state = &stream->internal_state; + struct level_buf *level_buf = (struct level_buf *)stream->level_buf; + + do { + if (state->state == ZSTATE_NEW_HDR) + init_new_icf_block(stream); + + if (state->state == ZSTATE_BODY) + isal_deflate_icf_body(stream); + + if (state->state == ZSTATE_FLUSH_READ_BUFFER) + isal_deflate_icf_finish(stream); + + if (state->state == ZSTATE_CREATE_HDR) + create_icf_block_hdr(stream, inbuf_start); + + if (state->state == ZSTATE_HDR) + /* Note that the header may be prepended by the + * remaining bits in the previous block, as such the + * toggle header flag cannot be used */ + write_header(stream, level_buf->deflate_hdr, + level_buf->deflate_hdr_count, + level_buf->deflate_hdr_extra_bits, + ZSTATE_FLUSH_ICF_BUFFER, 0); + + if (state->state == ZSTATE_FLUSH_ICF_BUFFER) + flush_icf_block(stream); + + if (state->state == ZSTATE_TYPE0_HDR || state->state == ZSTATE_TYPE0_BODY) { + if (stream->gzip_flag == IGZIP_GZIP || stream->gzip_flag == IGZIP_ZLIB) + write_stream_header(stream); + write_stored_block(stream); + } + + } + while (state->state == ZSTATE_NEW_HDR); + + if (state->state == ZSTATE_SYNC_FLUSH) + sync_flush(stream); + + if (state->state == ZSTATE_FLUSH_WRITE_BUFFER) + flush_write_buffer(stream); + + if (stream->gzip_flag) + update_checksum(stream, start_in, stream->next_in - start_in); + + if (state->state == ZSTATE_TRL) + write_trailer(stream); +} + +static void isal_deflate_int(struct isal_zstream *stream, uint8_t * start_in) +{ + struct isal_zstate *state = &stream->internal_state; + uint32_t size; + + /* Move data from temporary output buffer to output buffer */ + if (state->state >= ZSTATE_TMP_OFFSET) { + size = state->tmp_out_end - state->tmp_out_start; + if (size > stream->avail_out) + size = stream->avail_out; + memcpy(stream->next_out, state->tmp_out_buff + state->tmp_out_start, size); + stream->next_out += size; + stream->avail_out -= size; + stream->total_out += size; + state->tmp_out_start += size; + + if (state->tmp_out_start == state->tmp_out_end) + state->state -= ZSTATE_TMP_OFFSET; + + if (stream->avail_out == 0 || state->state == ZSTATE_END + // or do not write out empty blocks since the outbuffer was processed + || (state->state == ZSTATE_NEW_HDR && stream->avail_out == 0)) + return; + } + assert(state->tmp_out_start == state->tmp_out_end); + + if (stream->level == 0) + isal_deflate_pass(stream); + else + isal_deflate_icf_pass(stream, start_in); + + /* Fill temporary output buffer then complete filling output buffer */ + if (stream->avail_out > 0 && stream->avail_out < 8 && state->state != ZSTATE_NEW_HDR) { + uint8_t *next_out; + uint32_t avail_out; + uint32_t total_out; + + next_out = stream->next_out; + avail_out = stream->avail_out; + total_out = stream->total_out; + + stream->next_out = state->tmp_out_buff; + stream->avail_out = sizeof(state->tmp_out_buff); + stream->total_out = 0; + + if (stream->level == 0) + isal_deflate_pass(stream); + else + isal_deflate_icf_pass(stream, start_in); + + state->tmp_out_start = 0; + state->tmp_out_end = stream->total_out; + + stream->next_out = next_out; + stream->avail_out = avail_out; + stream->total_out = total_out; + if (state->tmp_out_end) { + size = state->tmp_out_end; + if (size > stream->avail_out) + size = stream->avail_out; + memcpy(stream->next_out, state->tmp_out_buff, size); + stream->next_out += size; + stream->avail_out -= size; + stream->total_out += size; + state->tmp_out_start += size; + if (state->tmp_out_start != state->tmp_out_end) + state->state += ZSTATE_TMP_OFFSET; + + } + } + +} + +static void write_constant_compressed_stateless(struct isal_zstream *stream, + uint32_t repeated_length) +{ + /* Assumes repeated_length is at least 1. + * Assumes the input end_of_stream is either 0 or 1. */ + struct isal_zstate *state = &stream->internal_state; + uint32_t rep_bits = ((repeated_length - 1) / 258) * 2; + uint32_t rep_bytes = rep_bits / 8; + uint32_t rep_extra = (repeated_length - 1) % 258; + uint32_t bytes; + uint32_t repeated_char = *stream->next_in; + uint8_t *start_in = stream->next_in; + + /* Guarantee there is enough space for the header even in the worst case */ + if (stream->avail_out < HEADER_LENGTH + MAX_FIXUP_CODE_LENGTH + rep_bytes + 8) + return; + + /* Assumes the repeated char is either 0 or 0xFF. */ + memcpy(stream->next_out, repeated_char_header[repeated_char & 1], HEADER_LENGTH); + + if (stream->avail_in == repeated_length && stream->end_of_stream > 0) { + stream->next_out[0] |= 1; + state->has_eob_hdr = 1; + state->has_eob = 1; + state->state = ZSTATE_TRL; + } else { + state->state = ZSTATE_NEW_HDR; + } + + memset(stream->next_out + HEADER_LENGTH, 0, rep_bytes); + stream->avail_out -= HEADER_LENGTH + rep_bytes; + stream->next_out += HEADER_LENGTH + rep_bytes; + stream->total_out += HEADER_LENGTH + rep_bytes; + + set_buf(&state->bitbuf, stream->next_out, stream->avail_out); + + /* These two lines are basically a modified version of init. */ + state->bitbuf.m_bits = 0; + state->bitbuf.m_bit_count = rep_bits % 8; + + /* Add smaller repeat codes as necessary. Code280 can describe repeat + * lengths of 115-130 bits. Code10 can describe repeat lengths of 10 + * bits. If more than 230 bits, fill code with two code280s. Else if + * more than 115 repeates, fill with code10s until one code280 can + * finish the rest of the repeats. Else, fill with code10s and + * literals */ + if (rep_extra > 115) { + while (rep_extra > 130 && rep_extra < 230) { + write_bits(&state->bitbuf, CODE_10, CODE_10_LENGTH); + rep_extra -= 10; + } + + if (rep_extra >= 230) { + write_bits(&state->bitbuf, + CODE_280 | ((rep_extra / 2 - 115) << + CODE_280_LENGTH), CODE_280_TOTAL_LENGTH); + rep_extra -= rep_extra / 2; + } + + write_bits(&state->bitbuf, + CODE_280 | ((rep_extra - 115) << CODE_280_LENGTH), + CODE_280_TOTAL_LENGTH); + + } else { + while (rep_extra >= 10) { + + write_bits(&state->bitbuf, CODE_10, CODE_10_LENGTH); + rep_extra -= 10; + } + + for (; rep_extra > 0; rep_extra--) + write_bits(&state->bitbuf, CODE_LIT, CODE_LIT_LENGTH); + } + + write_bits(&state->bitbuf, END_OF_BLOCK, END_OF_BLOCK_LEN); + + stream->next_in += repeated_length; + stream->avail_in -= repeated_length; + stream->total_in += repeated_length; + state->block_end += repeated_length; + + bytes = buffer_used(&state->bitbuf); + stream->next_out = buffer_ptr(&state->bitbuf); + stream->avail_out -= bytes; + stream->total_out += bytes; + + if (stream->gzip_flag) + update_checksum(stream, start_in, stream->next_in - start_in); + + return; +} + +static int detect_repeated_char_length(uint8_t * in, uint32_t length) +{ + /* This currently assumes the first 8 bytes are the same character. + * This won't work effectively if the input stream isn't aligned well. */ + uint8_t *p_8, *end = in + length; + uint64_t *p_64 = (uint64_t *) in; + uint64_t w = *p_64; + uint8_t c = (uint8_t) w; + + for (; (p_64 <= (uint64_t *) (end - 8)) && (w == *p_64); p_64++) ; + + p_8 = (uint8_t *) p_64; + + for (; (p_8 < end) && (c == *p_8); p_8++) ; + + return p_8 - in; +} + +static int isal_deflate_int_stateless(struct isal_zstream *stream) +{ + uint32_t repeat_length; + struct isal_zstate *state = &stream->internal_state; + + if (stream->gzip_flag == IGZIP_GZIP || stream->gzip_flag == IGZIP_ZLIB) + if (write_stream_header_stateless(stream)) + return STATELESS_OVERFLOW; + + if (stream->avail_in >= 8 + && (load_u64(stream->next_in) == 0 + || load_u64(stream->next_in) == ~(uint64_t) 0)) { + repeat_length = detect_repeated_char_length(stream->next_in, stream->avail_in); + + if (stream->avail_in == repeat_length || repeat_length >= MIN_REPEAT_LEN) + write_constant_compressed_stateless(stream, repeat_length); + } + + if (stream->level == 0) { + if (state->state == ZSTATE_NEW_HDR || state->state == ZSTATE_HDR) { + write_deflate_header_unaligned_stateless(stream); + if (state->state == ZSTATE_NEW_HDR || state->state == ZSTATE_HDR) + return STATELESS_OVERFLOW; + + reset_match_history(stream); + } + + isal_deflate_pass(stream); + + } else if (stream->level <= ISAL_DEF_MAX_LEVEL) { + if (state->state == ZSTATE_NEW_HDR || state->state == ZSTATE_HDR) + reset_match_history(stream); + + state->count = 0; + isal_deflate_icf_pass(stream, stream->next_in); + + } + + if (state->state == ZSTATE_END + || (state->state == ZSTATE_NEW_HDR && stream->flush == FULL_FLUSH)) + return COMP_OK; + else + return STATELESS_OVERFLOW; +} + +static void write_type0_header(struct isal_zstream *stream) +{ + struct isal_zstate *state = &stream->internal_state; + uint64_t stored_blk_hdr; + uint32_t copy_size; + uint32_t memcpy_len, avail_in; + uint32_t block_in_size = state->block_end - state->block_next; + uint32_t block_next_offset; + struct BitBuf2 *bitbuf = &stream->internal_state.bitbuf; + + if (block_in_size > TYPE0_MAX_BLK_LEN) { + stored_blk_hdr = 0xFFFF; + copy_size = TYPE0_MAX_BLK_LEN; + } else { + stored_blk_hdr = ~block_in_size; + stored_blk_hdr <<= 16; + stored_blk_hdr |= (block_in_size & 0xFFFF); + copy_size = block_in_size; + + /* Handle BFINAL bit */ + block_next_offset = stream->total_in - state->block_next; + avail_in = stream->avail_in + block_next_offset; + if (stream->end_of_stream && avail_in == block_in_size) + stream->internal_state.has_eob_hdr = 1; + } + + if (bitbuf->m_bit_count == 0 && stream->avail_out >= TYPE0_HDR_LEN + 1) { + stored_blk_hdr = stored_blk_hdr << 8; + stored_blk_hdr |= stream->internal_state.has_eob_hdr; + memcpy_len = TYPE0_HDR_LEN + 1; + memcpy(stream->next_out, &stored_blk_hdr, memcpy_len); + } else if (stream->avail_out >= 8) { + set_buf(bitbuf, stream->next_out, stream->avail_out); + write_bits_flush(bitbuf, stream->internal_state.has_eob_hdr, 3); + stream->next_out = buffer_ptr(bitbuf); + stream->total_out += buffer_used(bitbuf); + stream->avail_out -= buffer_used(bitbuf); + memcpy_len = TYPE0_HDR_LEN; + memcpy(stream->next_out, &stored_blk_hdr, memcpy_len); + } else { + stream->internal_state.has_eob_hdr = 0; + return; + } + + stream->next_out += memcpy_len; + stream->avail_out -= memcpy_len; + stream->total_out += memcpy_len; + stream->internal_state.state = ZSTATE_TYPE0_BODY; + + stream->internal_state.count = copy_size; +} + +static uint32_t write_stored_block(struct isal_zstream *stream) +{ + uint32_t copy_size, avail_in, block_next_offset; + uint8_t *next_in; + struct isal_zstate *state = &stream->internal_state; + + do { + if (state->state == ZSTATE_TYPE0_HDR) { + write_type0_header(stream); + if (state->state == ZSTATE_TYPE0_HDR) + break; + } + + assert(state->count <= state->block_end - state->block_next); + copy_size = state->count; + + block_next_offset = stream->total_in - state->block_next; + next_in = stream->next_in - block_next_offset; + avail_in = stream->avail_in + block_next_offset; + + if (copy_size > stream->avail_out || copy_size > avail_in) { + state->count = copy_size; + copy_size = (stream->avail_out <= avail_in) ? + stream->avail_out : avail_in; + + memcpy(stream->next_out, next_in, copy_size); + state->count -= copy_size; + } else { + memcpy(stream->next_out, next_in, copy_size); + + state->count = 0; + state->state = ZSTATE_TYPE0_HDR; + } + + state->block_next += copy_size; + stream->next_out += copy_size; + stream->avail_out -= copy_size; + stream->total_out += copy_size; + + if (state->block_next == state->block_end) { + state->state = state->has_eob_hdr ? ZSTATE_TRL : ZSTATE_NEW_HDR; + if (stream->flush == FULL_FLUSH && state->state == ZSTATE_NEW_HDR + && are_buffers_empty(stream)) { + /* Clear match history so there are no cross + * block length distance pairs */ + reset_match_history(stream); + } + } + } while (state->state == ZSTATE_TYPE0_HDR); + + return state->block_end - state->block_next; +} + +static inline void reset_match_history(struct isal_zstream *stream) +{ + struct isal_zstate *state = &stream->internal_state; + struct level_buf *level_buf = (struct level_buf *)stream->level_buf; + uint16_t *hash_table; + uint32_t hash_table_size; + + hash_table_size = 2 * (state->hash_mask + 1); + + switch (stream->level) { + case 3: + hash_table = level_buf->lvl3.hash_table; + break; + case 2: + hash_table = level_buf->lvl2.hash_table; + break; + case 1: + hash_table = level_buf->lvl1.hash_table; + break; + default: + hash_table = state->head; + } + + state->has_hist = IGZIP_NO_HIST; + + /* There is definitely more than 16 bytes in the hash table. Set this + * minimum to avoid a wmemset of size 0 */ + if (hash_table_size <= sizeof(wchar_t)) + hash_table_size = sizeof(wchar_t); + + if (sizeof(wchar_t) == 2) { + uint16_t hash_init_val; + + hash_init_val = stream->total_in & 0xffff; + wmemset((wchar_t *)hash_table, hash_init_val, + hash_table_size / sizeof(wchar_t)); + + } else if (sizeof(wchar_t) == 4) { + uint32_t hash_init_val; + int rep_bits; + + hash_init_val = stream->total_in & 0xffff; + for (rep_bits = sizeof(uint16_t) * 8; rep_bits < sizeof(wchar_t) * 8; + rep_bits *= 2) + hash_init_val |= hash_init_val << rep_bits; + + wmemset((wchar_t *)hash_table, hash_init_val, + hash_table_size / sizeof(wchar_t)); + } else { + if ((stream->total_in & 0xFFFF) == 0) + memset(hash_table, 0, hash_table_size); + else { + int i; + for (i = 0; i < hash_table_size / 2; i++) { + hash_table[i] = (uint16_t) (stream->total_in); + } + } + } + +} + +static void inline set_dist_mask(struct isal_zstream *stream) +{ + struct isal_zstate *state = &stream->internal_state; + uint32_t hist_size; + + if (stream->hist_bits > ISAL_DEF_MAX_HIST_BITS || stream->hist_bits == 0) + stream->hist_bits = ISAL_DEF_MAX_HIST_BITS; + + hist_size = (1 << (stream->hist_bits)); + state->dist_mask = hist_size - 1; + + if (IGZIP_HIST_SIZE < ISAL_DEF_HIST_SIZE && state->dist_mask > IGZIP_HIST_SIZE - 1) + state->dist_mask = IGZIP_HIST_SIZE - 1; +} + +static void inline set_hash_mask(struct isal_zstream *stream) +{ + struct isal_zstate *state = &stream->internal_state; + + switch (stream->level) { + case 3: + state->hash_mask = LVL3_HASH_MASK; + break; + case 2: + state->hash_mask = LVL2_HASH_MASK; + break; + case 1: + state->hash_mask = LVL1_HASH_MASK; + break; + case 0: + state->hash_mask = LVL0_HASH_MASK; + } +} + +void isal_deflate_init(struct isal_zstream *stream) +{ + struct isal_zstate *state = &stream->internal_state; + + stream->total_in = 0; + stream->total_out = 0; + stream->hufftables = (struct isal_hufftables *)&hufftables_default; + stream->level = 0; + stream->level_buf = NULL; + stream->level_buf_size = 0; + stream->end_of_stream = 0; + stream->flush = NO_FLUSH; + stream->gzip_flag = 0; + stream->hist_bits = 0; + + state->block_next = 0; + state->block_end = 0; + state->b_bytes_valid = 0; + state->b_bytes_processed = 0; + state->total_in_start = 0; + state->has_wrap_hdr = 0; + state->has_eob = 0; + state->has_eob_hdr = 0; + state->has_hist = IGZIP_NO_HIST; + state->has_level_buf_init = 0; + state->state = ZSTATE_NEW_HDR; + state->count = 0; + + state->tmp_out_start = 0; + state->tmp_out_end = 0; + + init(&state->bitbuf); + + state->crc = 0; + + return; +} + +void isal_deflate_reset(struct isal_zstream *stream) +{ + struct isal_zstate *state = &stream->internal_state; + + stream->total_in = 0; + stream->total_out = 0; + + state->block_next = 0; + state->block_end = 0; + state->b_bytes_valid = 0; + state->b_bytes_processed = 0; + state->total_in_start = 0; + state->has_wrap_hdr = 0; + state->has_eob = 0; + state->has_level_buf_init = 0; + state->has_eob_hdr = 0; + state->has_hist = IGZIP_NO_HIST; + state->state = ZSTATE_NEW_HDR; + state->count = 0; + + state->tmp_out_start = 0; + state->tmp_out_end = 0; + + init(&state->bitbuf); + + state->crc = 0; + +} + +void isal_gzip_header_init(struct isal_gzip_header *gz_hdr) +{ + gz_hdr->text = 0; + gz_hdr->time = 0; + gz_hdr->xflags = 0; + gz_hdr->os = 0xff; + gz_hdr->extra = NULL; + gz_hdr->extra_buf_len = 0; + gz_hdr->extra_len = 0; + gz_hdr->name = NULL; + gz_hdr->name_buf_len = 0; + gz_hdr->comment = NULL; + gz_hdr->comment_buf_len = 0; + gz_hdr->hcrc = 0; +}; + +uint32_t isal_write_gzip_header(struct isal_zstream *stream, struct isal_gzip_header *gz_hdr) +{ + uint32_t flags = 0, hcrc, hdr_size = GZIP_HDR_BASE; + uint8_t *out_buf = stream->next_out, *out_buf_start = stream->next_out; + uint32_t name_len = 0, comment_len = 0; + + if (gz_hdr->text) + flags |= TEXT_FLAG; + if (gz_hdr->extra) { + flags |= EXTRA_FLAG; + hdr_size += GZIP_EXTRA_LEN + gz_hdr->extra_len; + } + if (gz_hdr->name) { + flags |= NAME_FLAG; + name_len = strnlen(gz_hdr->name, gz_hdr->name_buf_len); + if (name_len < gz_hdr->name_buf_len) + name_len++; + hdr_size += name_len; + } + if (gz_hdr->comment) { + flags |= COMMENT_FLAG; + comment_len = strnlen(gz_hdr->comment, gz_hdr->comment_buf_len); + if (comment_len < gz_hdr->comment_buf_len) + comment_len++; + hdr_size += comment_len; + } + if (gz_hdr->hcrc) { + flags |= HCRC_FLAG; + hdr_size += GZIP_HCRC_LEN; + } + + if (stream->avail_out < hdr_size) + return hdr_size; + + out_buf[0] = 0x1f; + out_buf[1] = 0x8b; + out_buf[2] = DEFLATE_METHOD; + out_buf[3] = flags; + store_u32(out_buf + 4, gz_hdr->time); + out_buf[8] = gz_hdr->xflags; + out_buf[9] = gz_hdr->os; + + out_buf += GZIP_HDR_BASE; + if (flags & EXTRA_FLAG) { + store_u16(out_buf, gz_hdr->extra_len); + out_buf += GZIP_EXTRA_LEN; + + memcpy(out_buf, gz_hdr->extra, gz_hdr->extra_len); + out_buf += gz_hdr->extra_len; + } + + if (flags & NAME_FLAG) { + memcpy(out_buf, gz_hdr->name, name_len); + out_buf += name_len; + } + + if (flags & COMMENT_FLAG) { + memcpy(out_buf, gz_hdr->comment, comment_len); + out_buf += comment_len; + } + + if (flags & HCRC_FLAG) { + hcrc = crc32_gzip_refl(0, out_buf_start, out_buf - out_buf_start); + store_u16(out_buf, hcrc); + out_buf += GZIP_HCRC_LEN; + } + + stream->next_out += hdr_size; + stream->total_out += hdr_size; + stream->avail_out -= hdr_size; + + return ISAL_DECOMP_OK; +} + +uint32_t isal_write_zlib_header(struct isal_zstream *stream, struct isal_zlib_header *z_hdr) +{ + uint32_t cmf, flg, dict_flag = 0, hdr_size = ZLIB_HDR_BASE; + uint8_t *out_buf = stream->next_out; + + if (z_hdr->dict_flag) { + dict_flag = ZLIB_DICT_FLAG; + hdr_size = ZLIB_HDR_BASE + ZLIB_DICT_LEN; + } + + if (stream->avail_out < hdr_size) + return hdr_size; + + cmf = DEFLATE_METHOD | (z_hdr->info << 4); + flg = (z_hdr->level << 6) | dict_flag; + + flg += 31 - ((256 * cmf + flg) % 31); + + out_buf[0] = cmf; + out_buf[1] = flg; + + if (dict_flag) + store_u32(out_buf + 2, z_hdr->dict_id); + + stream->next_out += hdr_size; + stream->total_out += hdr_size; + stream->avail_out -= hdr_size; + + return ISAL_DECOMP_OK; +} + +int isal_deflate_set_hufftables(struct isal_zstream *stream, + struct isal_hufftables *hufftables, int type) +{ + if (stream->internal_state.state != ZSTATE_NEW_HDR) + return ISAL_INVALID_OPERATION; + + switch (type) { + case IGZIP_HUFFTABLE_DEFAULT: + stream->hufftables = (struct isal_hufftables *)&hufftables_default; + break; + case IGZIP_HUFFTABLE_STATIC: + stream->hufftables = (struct isal_hufftables *)&hufftables_static; + break; + case IGZIP_HUFFTABLE_CUSTOM: + if (hufftables != NULL) { + stream->hufftables = hufftables; + break; + } + default: + return ISAL_INVALID_OPERATION; + } + + return COMP_OK; +} + +void isal_deflate_stateless_init(struct isal_zstream *stream) +{ + stream->total_in = 0; + stream->total_out = 0; + stream->hufftables = (struct isal_hufftables *)&hufftables_default; + stream->level = 0; + stream->level_buf = NULL; + stream->level_buf_size = 0; + stream->end_of_stream = 0; + stream->flush = NO_FLUSH; + stream->gzip_flag = 0; + stream->internal_state.has_wrap_hdr = 0; + stream->internal_state.state = ZSTATE_NEW_HDR; + return; +} + +void isal_deflate_hash(struct isal_zstream *stream, uint8_t * dict, uint32_t dict_len) +{ + /* Reset history to prevent out of bounds matches this works because + * dictionary must set at least 1 element in the history */ + struct level_buf *level_buf = (struct level_buf *)stream->level_buf; + uint32_t hash_mask = stream->internal_state.hash_mask; + + switch (stream->level) { + case 3: + memset(level_buf->lvl3.hash_table, -1, sizeof(level_buf->lvl3.hash_table)); + isal_deflate_hash_lvl3(level_buf->lvl3.hash_table, hash_mask, + stream->total_in, dict, dict_len); + break; + + case 2: + memset(level_buf->lvl2.hash_table, -1, sizeof(level_buf->lvl2.hash_table)); + isal_deflate_hash_lvl2(level_buf->lvl2.hash_table, hash_mask, + stream->total_in, dict, dict_len); + break; + case 1: + memset(level_buf->lvl1.hash_table, -1, sizeof(level_buf->lvl1.hash_table)); + isal_deflate_hash_lvl1(level_buf->lvl1.hash_table, hash_mask, + stream->total_in, dict, dict_len); + break; + default: + memset(stream->internal_state.head, -1, sizeof(stream->internal_state.head)); + isal_deflate_hash_lvl0(stream->internal_state.head, hash_mask, + stream->total_in, dict, dict_len); + } + + stream->internal_state.has_hist = IGZIP_HIST; +} + +int isal_deflate_set_dict(struct isal_zstream *stream, uint8_t * dict, uint32_t dict_len) +{ + struct isal_zstate *state = &stream->internal_state; + + if (state->state != ZSTATE_NEW_HDR || state->b_bytes_processed != state->b_bytes_valid) + return ISAL_INVALID_STATE; + + if (dict_len <= 0) + return COMP_OK; + + if (dict_len > IGZIP_HIST_SIZE) { + dict = dict + dict_len - IGZIP_HIST_SIZE; + dict_len = IGZIP_HIST_SIZE; + } + + memcpy(state->buffer, dict, dict_len); + state->b_bytes_processed = dict_len; + state->b_bytes_valid = dict_len; + + state->has_hist = IGZIP_DICT_HIST; + + return COMP_OK; +} + +int isal_deflate_stateless(struct isal_zstream *stream) +{ + struct isal_zstate *state = &stream->internal_state; + uint8_t *next_in = stream->next_in; + const uint32_t avail_in = stream->avail_in; + const uint32_t total_in = stream->total_in; + + uint8_t *next_out = stream->next_out; + const uint32_t avail_out = stream->avail_out; + const uint32_t total_out = stream->total_out; + const uint32_t gzip_flag = stream->gzip_flag; + const uint32_t has_wrap_hdr = state->has_wrap_hdr; + + int level_check; + uint64_t stored_len; + + /* Final block has already been written */ + state->block_next = stream->total_in; + state->block_end = stream->total_in; + state->has_eob_hdr = 0; + init(&state->bitbuf); + state->state = ZSTATE_NEW_HDR; + state->crc = 0; + state->has_level_buf_init = 0; + set_dist_mask(stream); + + if (stream->flush == NO_FLUSH) + stream->end_of_stream = 1; + + if (stream->flush != NO_FLUSH && stream->flush != FULL_FLUSH) + return INVALID_FLUSH; + + level_check = check_level_req(stream); + if (level_check) { + if (stream->level == 1 && stream->level_buf == NULL) { + /* Default to internal buffer if invalid size is supplied */ + stream->level_buf = state->buffer; + stream->level_buf_size = sizeof(state->buffer) + sizeof(state->head); + } else + return level_check; + } + + set_hash_mask(stream); + + if (state->hash_mask > 2 * avail_in) + state->hash_mask = (1 << bsr(avail_in)) - 1; + + if (avail_in == 0) + stored_len = TYPE0_BLK_HDR_LEN; + else { + stored_len = TYPE0_BLK_HDR_LEN * ((avail_in + TYPE0_MAX_BLK_LEN - 1) / + TYPE0_MAX_BLK_LEN); + stored_len += avail_in; + } + + /* + at least 1 byte compressed data in the case of empty dynamic block which only + contains the EOB + */ + if (stream->gzip_flag == IGZIP_GZIP) + stored_len += gzip_hdr_bytes + gzip_trl_bytes; + else if (stream->gzip_flag == IGZIP_GZIP_NO_HDR) + stored_len += gzip_trl_bytes; + + else if (stream->gzip_flag == IGZIP_ZLIB) + stored_len += zlib_hdr_bytes + zlib_trl_bytes; + + else if (stream->gzip_flag == IGZIP_ZLIB_NO_HDR) + stored_len += zlib_trl_bytes; + + if (avail_out >= stored_len) + stream->avail_out = stored_len; + + if (isal_deflate_int_stateless(stream) == COMP_OK) { + if (avail_out >= stored_len) + stream->avail_out += avail_out - stored_len; + return COMP_OK; + } else { + if (avail_out >= stored_len) + stream->avail_out += avail_out - stored_len; + if (stream->flush == FULL_FLUSH) { + reset_match_history(stream); + } + stream->internal_state.has_eob_hdr = 0; + } + + if (avail_out < stored_len) + return STATELESS_OVERFLOW; + + stream->next_in = next_in + avail_in; + stream->avail_in = 0; + stream->total_in = avail_in; + + state->block_next = stream->total_in - avail_in; + state->block_end = stream->total_in; + + stream->next_out = next_out; + stream->avail_out = avail_out; + stream->total_out = total_out; + + stream->gzip_flag = gzip_flag; + state->has_wrap_hdr = has_wrap_hdr; + init(&stream->internal_state.bitbuf); + stream->internal_state.count = 0; + + if (stream->gzip_flag == IGZIP_GZIP || stream->gzip_flag == IGZIP_ZLIB) + write_stream_header_stateless(stream); + + stream->internal_state.state = ZSTATE_TYPE0_HDR; + + write_stored_block(stream); + + stream->total_in = total_in + avail_in; + + if (stream->gzip_flag) { + stream->internal_state.crc = 0; + update_checksum(stream, next_in, avail_in); + } + + if (stream->end_of_stream) + write_trailer(stream); + + return COMP_OK; + +} + +static inline uint32_t get_hist_size(struct isal_zstream *stream, uint8_t * start_in, + int32_t buf_hist_start) +{ + struct isal_zstate *state = &stream->internal_state; + uint32_t history_size; + uint32_t buffered_history; + uint32_t buffered_size = state->b_bytes_valid - state->b_bytes_processed; + uint32_t input_history; + + buffered_history = (state->has_hist) ? state->b_bytes_processed - buf_hist_start : 0; + input_history = stream->next_in - start_in; + + /* Calculate history required for deflate window */ + history_size = (buffered_history >= input_history) ? buffered_history : input_history; + if (history_size > IGZIP_HIST_SIZE) + history_size = IGZIP_HIST_SIZE; + + /* Calculate history required based on internal state */ + if (state->state == ZSTATE_TYPE0_HDR + || state->state == ZSTATE_TYPE0_BODY + || state->state == ZSTATE_TMP_TYPE0_HDR || state->state == ZSTATE_TMP_TYPE0_BODY) { + if (stream->total_in - state->block_next > history_size) { + history_size = (stream->total_in - state->block_next); + } + } else if (stream->avail_in + buffered_size == 0 + && (stream->end_of_stream || stream->flush == FULL_FLUSH)) { + history_size = 0; + } + return history_size; +} + +int isal_deflate(struct isal_zstream *stream) +{ + struct isal_zstate *state = &stream->internal_state; + int ret = COMP_OK; + uint8_t *next_in, *start_in, *buf_start_in, *next_in_pre; + uint32_t avail_in, total_start, hist_size, future_size; + uint32_t in_size, in_size_initial, out_size, out_size_initial; + uint32_t processed, buffered_size = state->b_bytes_valid - state->b_bytes_processed; + uint32_t flush_type = stream->flush; + uint32_t end_of_stream = stream->end_of_stream; + uint32_t size = 0; + int32_t buf_hist_start = 0; + uint8_t *copy_down_src = NULL; + uint64_t copy_down_size = 0, copy_start_offset; + int internal; + + if (stream->flush >= 3) + return INVALID_FLUSH; + + ret = check_level_req(stream); + if (ret) + return ret; + + start_in = stream->next_in; + total_start = stream->total_in; + + hist_size = get_hist_size(stream, start_in, buf_hist_start); + + if (state->has_hist == IGZIP_NO_HIST) { + set_dist_mask(stream); + set_hash_mask(stream); + if (state->hash_mask > 2 * stream->avail_in + && (stream->flush == FULL_FLUSH || stream->end_of_stream)) + state->hash_mask = (1 << bsr(2 * stream->avail_in)) - 1; + stream->total_in -= buffered_size; + reset_match_history(stream); + stream->total_in += buffered_size; + buf_hist_start = state->b_bytes_processed; + + } else if (state->has_hist == IGZIP_DICT_HIST) { + set_dist_mask(stream); + set_hash_mask(stream); + isal_deflate_hash(stream, state->buffer, state->b_bytes_processed); + } + + in_size = stream->avail_in + buffered_size; + out_size = stream->total_out; + do { + in_size_initial = in_size; + out_size_initial = out_size; + buf_start_in = start_in; + internal = 0; + + /* Setup to compress from internal buffer if insufficient history */ + if (stream->total_in - total_start < hist_size + buffered_size) { + /* On entry there should always be sufficient history bufferd */ + /* assert(state->b_bytes_processed >= hist_size); */ + + internal = 1; + /* Shift down internal buffer if it contains more data + * than required */ + if (state->b_bytes_processed > hist_size) { + copy_start_offset = state->b_bytes_processed - hist_size; + + copy_down_src = &state->buffer[copy_start_offset]; + copy_down_size = state->b_bytes_valid - copy_start_offset; + memmove(state->buffer, copy_down_src, copy_down_size); + + state->b_bytes_valid -= copy_down_src - state->buffer; + state->b_bytes_processed -= copy_down_src - state->buffer; + buf_hist_start -= copy_down_src - state->buffer; + if (buf_hist_start < 0) + buf_hist_start = 0; + } + + size = stream->avail_in; + if (size > sizeof(state->buffer) - state->b_bytes_valid) + size = sizeof(state->buffer) - state->b_bytes_valid; + + memcpy(&state->buffer[state->b_bytes_valid], stream->next_in, size); + + stream->next_in += size; + stream->avail_in -= size; + stream->total_in += size; + state->b_bytes_valid += size; + buffered_size += size; + + /* Save off next_in and avail_in if compression is + * performed in internal buffer, total_in can be + * recovered from knowledge of the size of the buffered + * input */ + next_in = stream->next_in; + avail_in = stream->avail_in; + + /* If not much data is buffered and there is no need to + * flush the buffer, just continue rather than attempt + * to compress */ + if (avail_in == 0 && buffered_size <= IGZIP_HIST_SIZE + && stream->total_in - buffered_size - state->block_next <= + IGZIP_HIST_SIZE && !stream->end_of_stream + && stream->flush == NO_FLUSH) + continue; + + if (avail_in) { + stream->flush = NO_FLUSH; + stream->end_of_stream = 0; + } + + stream->next_in = &state->buffer[state->b_bytes_processed]; + stream->avail_in = buffered_size; + stream->total_in -= buffered_size; + + buf_start_in = state->buffer; + + } else if (buffered_size) { + /* The user provided buffer has sufficient data, reset + * the user supplied buffer to included any data already + * buffered */ + stream->next_in -= buffered_size; + stream->avail_in += buffered_size; + stream->total_in -= buffered_size; + state->b_bytes_valid = 0; + state->b_bytes_processed = 0; + buffered_size = 0; + } + + next_in_pre = stream->next_in; + isal_deflate_int(stream, buf_start_in); + processed = stream->next_in - next_in_pre; + hist_size = get_hist_size(stream, buf_start_in, buf_hist_start); + + /* Restore compression to unbuffered input when compressing to internal buffer */ + if (internal) { + state->b_bytes_processed += processed; + buffered_size -= processed; + + stream->flush = flush_type; + stream->end_of_stream = end_of_stream; + stream->total_in += buffered_size; + + stream->next_in = next_in; + stream->avail_in = avail_in; + } + + in_size = stream->avail_in + buffered_size; + out_size = stream->total_out; + + } while (internal && stream->avail_in > 0 && stream->avail_out > 0 + && (in_size_initial != in_size || out_size_initial != out_size)); + + /* Buffer history if data was pulled from the external buffer and future + * calls to deflate will be required */ + if (!internal && (state->state != ZSTATE_END || state->state != ZSTATE_TRL)) { + /* If the external buffer was used, sufficient history must + * exist in the user input buffer */ + /* assert(stream->total_in - total_start >= */ + /* hist_size + buffered_size); */ + + stream->next_in -= buffered_size; + stream->avail_in += buffered_size; + stream->total_in -= buffered_size; + + memmove(state->buffer, stream->next_in - hist_size, hist_size); + state->b_bytes_processed = hist_size; + state->b_bytes_valid = hist_size; + buffered_size = 0; + } + + /* Buffer input data if it is necessary for continued execution */ + if (stream->avail_in > 0 && (stream->avail_out > 0 || stream->level == 3)) { + /* Determine how much data to buffer */ + future_size = sizeof(state->buffer) - state->b_bytes_valid; + if (stream->avail_in < future_size) + /* Buffer all data if it fits as it will need to be buffered + * on the next call anyways*/ + future_size = stream->avail_in; + else if (ISAL_LOOK_AHEAD < future_size) + /* Buffer a minimum look ahead required for level 3 */ + future_size = ISAL_LOOK_AHEAD; + + memcpy(&state->buffer[state->b_bytes_valid], stream->next_in, future_size); + + state->b_bytes_valid += future_size; + buffered_size += future_size; + stream->next_in += future_size; + stream->total_in += future_size; + stream->avail_in -= future_size; + + } + + return ret; +} + +static int write_stream_header_stateless(struct isal_zstream *stream) +{ + uint32_t hdr_bytes; + const uint8_t *hdr; + uint32_t next_flag; + + if (stream->internal_state.has_wrap_hdr) + return COMP_OK; + + if (stream->gzip_flag == IGZIP_ZLIB) { + hdr_bytes = zlib_hdr_bytes; + hdr = zlib_hdr; + next_flag = IGZIP_ZLIB_NO_HDR; + + } else { + hdr_bytes = gzip_hdr_bytes; + hdr = gzip_hdr; + next_flag = IGZIP_GZIP_NO_HDR; + } + + if (hdr_bytes >= stream->avail_out) + return STATELESS_OVERFLOW; + + stream->avail_out -= hdr_bytes; + stream->total_out += hdr_bytes; + + memcpy(stream->next_out, hdr, hdr_bytes); + + stream->next_out += hdr_bytes; + stream->internal_state.has_wrap_hdr = 1; + stream->gzip_flag = next_flag; + + return COMP_OK; +} + +static void write_stream_header(struct isal_zstream *stream) +{ + struct isal_zstate *state = &stream->internal_state; + int bytes_to_write; + uint32_t hdr_bytes; + const uint8_t *hdr; + + if (stream->internal_state.has_wrap_hdr) + return; + + if (stream->gzip_flag == IGZIP_ZLIB) { + hdr_bytes = zlib_hdr_bytes; + hdr = zlib_hdr; + } else { + hdr_bytes = gzip_hdr_bytes; + hdr = gzip_hdr; + } + + bytes_to_write = hdr_bytes; + bytes_to_write -= state->count; + + if (bytes_to_write > stream->avail_out) + bytes_to_write = stream->avail_out; + + memcpy(stream->next_out, hdr + state->count, bytes_to_write); + state->count += bytes_to_write; + + if (state->count == hdr_bytes) { + state->count = 0; + state->has_wrap_hdr = 1; + } + + stream->avail_out -= bytes_to_write; + stream->total_out += bytes_to_write; + stream->next_out += bytes_to_write; + +} + +static int write_deflate_header_stateless(struct isal_zstream *stream) +{ + struct isal_zstate *state = &stream->internal_state; + struct isal_hufftables *hufftables = stream->hufftables; + uint64_t hdr_extra_bits = hufftables->deflate_hdr[hufftables->deflate_hdr_count]; + uint32_t count; + + if (hufftables->deflate_hdr_count + 8 >= stream->avail_out) + return STATELESS_OVERFLOW; + + memcpy(stream->next_out, hufftables->deflate_hdr, hufftables->deflate_hdr_count); + + if (stream->end_of_stream == 0) { + if (hufftables->deflate_hdr_count > 0) + *stream->next_out -= 1; + else + hdr_extra_bits -= 1; + } else + state->has_eob_hdr = 1; + + stream->avail_out -= hufftables->deflate_hdr_count; + stream->total_out += hufftables->deflate_hdr_count; + stream->next_out += hufftables->deflate_hdr_count; + + set_buf(&state->bitbuf, stream->next_out, stream->avail_out); + + write_bits(&state->bitbuf, hdr_extra_bits, hufftables->deflate_hdr_extra_bits); + + count = buffer_used(&state->bitbuf); + stream->next_out = buffer_ptr(&state->bitbuf); + stream->avail_out -= count; + stream->total_out += count; + + state->state = ZSTATE_BODY; + + return COMP_OK; +} + +static int write_deflate_header_unaligned_stateless(struct isal_zstream *stream) +{ + struct isal_zstate *state = &stream->internal_state; + struct isal_hufftables *hufftables = stream->hufftables; + unsigned int count; + uint64_t bit_count; + uint8_t *header_next; + uint8_t *header_end; + uint64_t header_bits; + + if (state->bitbuf.m_bit_count == 0) + return write_deflate_header_stateless(stream); + + if (hufftables->deflate_hdr_count + 16 >= stream->avail_out) + return STATELESS_OVERFLOW; + + set_buf(&state->bitbuf, stream->next_out, stream->avail_out); + + header_next = hufftables->deflate_hdr; + header_end = header_next + + (hufftables->deflate_hdr_count / sizeof(header_bits)) * sizeof(header_bits); + + header_bits = load_u64(header_next); + + if (stream->end_of_stream == 0) + header_bits--; + else + state->has_eob_hdr = 1; + + header_next += sizeof(header_bits); + + /* Write out Complete Header bits */ + for (; header_next <= header_end; header_next += sizeof(header_bits)) { + write_bits(&state->bitbuf, header_bits, 32); + header_bits >>= 32; + write_bits(&state->bitbuf, header_bits, 32); + header_bits = load_u64(header_next); + } + bit_count = + (hufftables->deflate_hdr_count & 0x7) * 8 + hufftables->deflate_hdr_extra_bits; + + if (bit_count > MAX_BITBUF_BIT_WRITE) { + write_bits(&state->bitbuf, header_bits, MAX_BITBUF_BIT_WRITE); + header_bits >>= MAX_BITBUF_BIT_WRITE; + bit_count -= MAX_BITBUF_BIT_WRITE; + + } + + write_bits(&state->bitbuf, header_bits, bit_count); + + /* check_space flushes extra bytes in bitbuf. Required because + * write_bits_always fails when the next commit makes the buffer + * length exceed 64 bits */ + check_space(&state->bitbuf, FORCE_FLUSH); + + count = buffer_used(&state->bitbuf); + stream->next_out = buffer_ptr(&state->bitbuf); + stream->avail_out -= count; + stream->total_out += count; + + state->state = ZSTATE_BODY; + + return COMP_OK; +} + +/* Toggle end of stream only works when deflate header is aligned */ +static void write_header(struct isal_zstream *stream, uint8_t * deflate_hdr, + uint32_t deflate_hdr_count, uint32_t extra_bits_count, + uint32_t next_state, uint32_t toggle_end_of_stream) +{ + struct isal_zstate *state = &stream->internal_state; + uint32_t hdr_extra_bits = deflate_hdr[deflate_hdr_count]; + uint32_t count; + state->state = ZSTATE_HDR; + + if (state->bitbuf.m_bit_count != 0) { + if (stream->avail_out < 8) + return; + set_buf(&state->bitbuf, stream->next_out, stream->avail_out); + flush(&state->bitbuf); + count = buffer_used(&state->bitbuf); + stream->next_out = buffer_ptr(&state->bitbuf); + stream->avail_out -= count; + stream->total_out += count; + } + + if (stream->gzip_flag == IGZIP_GZIP || stream->gzip_flag == IGZIP_ZLIB) + write_stream_header(stream); + + count = deflate_hdr_count - state->count; + + if (count != 0) { + if (count > stream->avail_out) + count = stream->avail_out; + + memcpy(stream->next_out, deflate_hdr + state->count, count); + + if (toggle_end_of_stream && state->count == 0 && count > 0) { + /* Assumes the final block bit is the first bit */ + *stream->next_out ^= 1; + state->has_eob_hdr = !state->has_eob_hdr; + } + + stream->next_out += count; + stream->avail_out -= count; + stream->total_out += count; + state->count += count; + + count = deflate_hdr_count - state->count; + } else if (toggle_end_of_stream && deflate_hdr_count == 0) { + /* Assumes the final block bit is the first bit */ + hdr_extra_bits ^= 1; + state->has_eob_hdr = !state->has_eob_hdr; + } + + if ((count == 0) && (stream->avail_out >= 8)) { + + set_buf(&state->bitbuf, stream->next_out, stream->avail_out); + + write_bits(&state->bitbuf, hdr_extra_bits, extra_bits_count); + + state->state = next_state; + state->count = 0; + + count = buffer_used(&state->bitbuf); + stream->next_out = buffer_ptr(&state->bitbuf); + stream->avail_out -= count; + stream->total_out += count; + } + +} + +static void write_trailer(struct isal_zstream *stream) +{ + struct isal_zstate *state = &stream->internal_state; + unsigned int bytes = 0; + uint32_t crc = state->crc; + + set_buf(&state->bitbuf, stream->next_out, stream->avail_out); + + if (!state->has_eob_hdr) { + /* If the final header has not been written, write a + * final block. This block is a static huffman block + * which only contains the end of block symbol. The code + * that happens to do this is the fist 10 bits of + * 0x003 */ + if (stream->avail_out < 8) + return; + + state->has_eob_hdr = 1; + write_bits(&state->bitbuf, 0x003, 10); + if (is_full(&state->bitbuf)) { + stream->next_out = buffer_ptr(&state->bitbuf); + bytes = buffer_used(&state->bitbuf); + stream->avail_out -= bytes; + stream->total_out += bytes; + return; + } + } + + if (state->bitbuf.m_bit_count) { + /* the flush() will pad to the next byte and write up to 8 bytes + * to the output stream/buffer. + */ + if (stream->avail_out < 8) + return; + + flush(&state->bitbuf); + } + + stream->next_out = buffer_ptr(&state->bitbuf); + bytes = buffer_used(&state->bitbuf); + + switch (stream->gzip_flag) { + case IGZIP_GZIP: + case IGZIP_GZIP_NO_HDR: + if (stream->avail_out - bytes >= gzip_trl_bytes) { + store_u64(stream->next_out, ((uint64_t) stream->total_in << 32) | crc); + stream->next_out += gzip_trl_bytes; + bytes += gzip_trl_bytes; + state->state = ZSTATE_END; + } + break; + + case IGZIP_ZLIB: + case IGZIP_ZLIB_NO_HDR: + if (stream->avail_out - bytes >= zlib_trl_bytes) { + store_u32(stream->next_out, + to_be32((crc & 0xFFFF0000) | ((crc & 0xFFFF) + 1) % + ADLER_MOD)); + stream->next_out += zlib_trl_bytes; + bytes += zlib_trl_bytes; + state->state = ZSTATE_END; + } + break; + + default: + state->state = ZSTATE_END; + } + + stream->avail_out -= bytes; + stream->total_out += bytes; +} diff --git a/src/spdk/isa-l/igzip/igzip_base.c b/src/spdk/isa-l/igzip/igzip_base.c new file mode 100644 index 000000000..bcc965f6d --- /dev/null +++ b/src/spdk/isa-l/igzip/igzip_base.c @@ -0,0 +1,236 @@ +#include +#include "igzip_lib.h" +#include "huffman.h" +#include "huff_codes.h" +#include "bitbuf2.h" + +extern const struct isal_hufftables hufftables_default; + +static inline void update_state(struct isal_zstream *stream, uint8_t * start_in, + uint8_t * next_in, uint8_t * end_in) +{ + struct isal_zstate *state = &stream->internal_state; + uint32_t bytes_written; + + if (next_in - start_in > 0) + state->has_hist = IGZIP_HIST; + + stream->next_in = next_in; + stream->total_in += next_in - start_in; + stream->avail_in = end_in - next_in; + + bytes_written = buffer_used(&state->bitbuf); + stream->total_out += bytes_written; + stream->next_out += bytes_written; + stream->avail_out -= bytes_written; + +} + +void isal_deflate_body_base(struct isal_zstream *stream) +{ + uint32_t literal, hash; + uint8_t *start_in, *next_in, *end_in, *end, *next_hash; + uint16_t match_length; + uint32_t dist; + uint64_t code, code_len, code2, code_len2; + struct isal_zstate *state = &stream->internal_state; + uint16_t *last_seen = state->head; + uint8_t *file_start = (uint8_t *) ((uintptr_t) stream->next_in - stream->total_in); + uint32_t hist_size = state->dist_mask; + uint32_t hash_mask = state->hash_mask; + + if (stream->avail_in == 0) { + if (stream->end_of_stream || stream->flush != NO_FLUSH) + state->state = ZSTATE_FLUSH_READ_BUFFER; + return; + } + + set_buf(&state->bitbuf, stream->next_out, stream->avail_out); + + start_in = stream->next_in; + end_in = start_in + stream->avail_in; + next_in = start_in; + + while (next_in + ISAL_LOOK_AHEAD < end_in) { + + if (is_full(&state->bitbuf)) { + update_state(stream, start_in, next_in, end_in); + return; + } + + literal = load_u32(next_in); + hash = compute_hash(literal) & hash_mask; + dist = (next_in - file_start - last_seen[hash]) & 0xFFFF; + last_seen[hash] = (uint64_t) (next_in - file_start); + + /* The -1 are to handle the case when dist = 0 */ + if (dist - 1 < hist_size) { + assert(dist != 0); + + match_length = compare258(next_in - dist, next_in, 258); + + if (match_length >= SHORTEST_MATCH) { + next_hash = next_in; +#ifdef ISAL_LIMIT_HASH_UPDATE + end = next_hash + 3; +#else + end = next_hash + match_length; +#endif + next_hash++; + + for (; next_hash < end; next_hash++) { + literal = load_u32(next_hash); + hash = compute_hash(literal) & hash_mask; + last_seen[hash] = (uint64_t) (next_hash - file_start); + } + + get_len_code(stream->hufftables, match_length, &code, + &code_len); + get_dist_code(stream->hufftables, dist, &code2, &code_len2); + + code |= code2 << code_len; + code_len += code_len2; + + write_bits(&state->bitbuf, code, code_len); + + next_in += match_length; + + continue; + } + } + + get_lit_code(stream->hufftables, literal & 0xFF, &code, &code_len); + write_bits(&state->bitbuf, code, code_len); + next_in++; + } + + update_state(stream, start_in, next_in, end_in); + + assert(stream->avail_in <= ISAL_LOOK_AHEAD); + if (stream->end_of_stream || stream->flush != NO_FLUSH) + state->state = ZSTATE_FLUSH_READ_BUFFER; + + return; + +} + +void isal_deflate_finish_base(struct isal_zstream *stream) +{ + uint32_t literal = 0, hash; + uint8_t *start_in, *next_in, *end_in, *end, *next_hash; + uint16_t match_length; + uint32_t dist; + uint64_t code, code_len, code2, code_len2; + struct isal_zstate *state = &stream->internal_state; + uint16_t *last_seen = state->head; + uint8_t *file_start = (uint8_t *) ((uintptr_t) stream->next_in - stream->total_in); + uint32_t hist_size = state->dist_mask; + uint32_t hash_mask = state->hash_mask; + + set_buf(&state->bitbuf, stream->next_out, stream->avail_out); + + start_in = stream->next_in; + end_in = start_in + stream->avail_in; + next_in = start_in; + + if (stream->avail_in != 0) { + while (next_in + 3 < end_in) { + if (is_full(&state->bitbuf)) { + update_state(stream, start_in, next_in, end_in); + return; + } + + literal = load_u32(next_in); + hash = compute_hash(literal) & hash_mask; + dist = (next_in - file_start - last_seen[hash]) & 0xFFFF; + last_seen[hash] = (uint64_t) (next_in - file_start); + + if (dist - 1 < hist_size) { /* The -1 are to handle the case when dist = 0 */ + match_length = + compare258(next_in - dist, next_in, end_in - next_in); + + if (match_length >= SHORTEST_MATCH) { + next_hash = next_in; +#ifdef ISAL_LIMIT_HASH_UPDATE + end = next_hash + 3; +#else + end = next_hash + match_length; +#endif + next_hash++; + + for (; next_hash < end - 3; next_hash++) { + literal = load_u32(next_hash); + hash = compute_hash(literal) & hash_mask; + last_seen[hash] = + (uint64_t) (next_hash - file_start); + } + + get_len_code(stream->hufftables, match_length, &code, + &code_len); + get_dist_code(stream->hufftables, dist, &code2, + &code_len2); + + code |= code2 << code_len; + code_len += code_len2; + + write_bits(&state->bitbuf, code, code_len); + + next_in += match_length; + + continue; + } + } + + get_lit_code(stream->hufftables, literal & 0xFF, &code, &code_len); + write_bits(&state->bitbuf, code, code_len); + next_in++; + + } + + while (next_in < end_in) { + if (is_full(&state->bitbuf)) { + update_state(stream, start_in, next_in, end_in); + return; + } + + literal = *next_in; + get_lit_code(stream->hufftables, literal & 0xFF, &code, &code_len); + write_bits(&state->bitbuf, code, code_len); + next_in++; + + } + } + + if (!is_full(&state->bitbuf)) { + get_lit_code(stream->hufftables, 256, &code, &code_len); + write_bits(&state->bitbuf, code, code_len); + state->has_eob = 1; + + if (stream->end_of_stream == 1) + state->state = ZSTATE_TRL; + else + state->state = ZSTATE_SYNC_FLUSH; + } + + update_state(stream, start_in, next_in, end_in); + + return; +} + +void isal_deflate_hash_base(uint16_t * hash_table, uint32_t hash_mask, + uint32_t current_index, uint8_t * dict, uint32_t dict_len) +{ + uint8_t *next_in = dict; + uint8_t *end_in = dict + dict_len - SHORTEST_MATCH; + uint32_t literal; + uint32_t hash; + uint16_t index = current_index - dict_len; + + while (next_in <= end_in) { + literal = load_u32(next_in); + hash = compute_hash(literal) & hash_mask; + hash_table[hash] = index; + index++; + next_in++; + } +} diff --git a/src/spdk/isa-l/igzip/igzip_base_aliases.c b/src/spdk/isa-l/igzip/igzip_base_aliases.c new file mode 100644 index 000000000..486ed8e3e --- /dev/null +++ b/src/spdk/isa-l/igzip/igzip_base_aliases.c @@ -0,0 +1,153 @@ +/********************************************************************** + Copyright(c) 2011-2017 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#include +#include "igzip_lib.h" +#include "encode_df.h" +#include "igzip_level_buf_structs.h" + +void isal_deflate_body_base(struct isal_zstream *stream); +void isal_deflate_finish_base(struct isal_zstream *stream); +void isal_deflate_icf_body_hash_hist_base(struct isal_zstream *stream); +void icf_body_hash1_fillgreedy_lazy(struct isal_zstream *stream); +void isal_deflate_icf_finish_hash_hist_base(struct isal_zstream *stream); +void isal_deflate_icf_finish_hash_map_base(struct isal_zstream *stream); +void isal_update_histogram_base(uint8_t * start_stream, int length, + struct isal_huff_histogram *histogram); +struct deflate_icf *encode_deflate_icf_base(struct deflate_icf *next_in, + struct deflate_icf *end_in, struct BitBuf2 *bb, + struct hufftables_icf *hufftables); +uint32_t adler32_base(uint32_t init, const unsigned char *buf, uint64_t len); +int decode_huffman_code_block_stateless_base(struct inflate_state *s, uint8_t * start_out); + +extern void isal_deflate_hash_base(uint16_t *, uint32_t, uint32_t, uint8_t *, uint32_t); + +void set_long_icf_fg_base(uint8_t * next_in, uint8_t * end_in, + struct deflate_icf *match_lookup, struct level_buf *level_buf); +void gen_icf_map_h1_base(struct isal_zstream *stream, + struct deflate_icf *matches_icf_lookup, uint64_t input_size); + +void isal_deflate_body(struct isal_zstream *stream) +{ + isal_deflate_body_base(stream); +} + +void isal_deflate_finish(struct isal_zstream *stream) +{ + isal_deflate_finish_base(stream); +} + +void isal_deflate_icf_body_lvl1(struct isal_zstream *stream) +{ + isal_deflate_icf_body_hash_hist_base(stream); +} + +void isal_deflate_icf_body_lvl2(struct isal_zstream *stream) +{ + isal_deflate_icf_body_hash_hist_base(stream); +} + +void isal_deflate_icf_body_lvl3(struct isal_zstream *stream) +{ + icf_body_hash1_fillgreedy_lazy(stream); +} + +void isal_deflate_icf_finish_lvl1(struct isal_zstream *stream) +{ + isal_deflate_icf_finish_hash_hist_base(stream); +} + +void isal_deflate_icf_finish_lvl2(struct isal_zstream *stream) +{ + isal_deflate_icf_finish_hash_hist_base(stream); +} + +void isal_deflate_icf_finish_lvl3(struct isal_zstream *stream) +{ + isal_deflate_icf_finish_hash_map_base(stream); +} + +void isal_update_histogram(uint8_t * start_stream, int length, + struct isal_huff_histogram *histogram) +{ + isal_update_histogram_base(start_stream, length, histogram); +} + +struct deflate_icf *encode_deflate_icf(struct deflate_icf *next_in, + struct deflate_icf *end_in, struct BitBuf2 *bb, + struct hufftables_icf *hufftables) +{ + return encode_deflate_icf_base(next_in, end_in, bb, hufftables); +} + +uint32_t isal_adler32(uint32_t init, const unsigned char *buf, uint64_t len) +{ + return adler32_base(init, buf, len); +} + +int decode_huffman_code_block_stateless(struct inflate_state *s, uint8_t * start_out) +{ + return decode_huffman_code_block_stateless_base(s, start_out); +} + +void isal_deflate_hash_lvl0(uint16_t * hash_table, uint32_t hash_mask, + uint32_t current_index, uint8_t * dict, uint32_t dict_len) +{ + isal_deflate_hash_base(hash_table, hash_mask, current_index, dict, dict_len); +} + +void isal_deflate_hash_lvl1(uint16_t * hash_table, uint32_t hash_mask, + uint32_t current_index, uint8_t * dict, uint32_t dict_len) +{ + isal_deflate_hash_base(hash_table, hash_mask, current_index, dict, dict_len); +} + +void isal_deflate_hash_lvl2(uint16_t * hash_table, uint32_t hash_mask, + uint32_t current_index, uint8_t * dict, uint32_t dict_len) +{ + isal_deflate_hash_base(hash_table, hash_mask, current_index, dict, dict_len); +} + +void isal_deflate_hash_lvl3(uint16_t * hash_table, uint32_t hash_mask, + uint32_t current_index, uint8_t * dict, uint32_t dict_len) +{ + isal_deflate_hash_base(hash_table, hash_mask, current_index, dict, dict_len); +} + +void set_long_icf_fg(uint8_t * next_in, uint8_t * end_in, + struct deflate_icf *match_lookup, struct level_buf *level_buf) +{ + set_long_icf_fg_base(next_in, end_in, match_lookup, level_buf); +} + +void gen_icf_map_lh1(struct isal_zstream *stream, + struct deflate_icf *matches_icf_lookup, uint64_t input_size) +{ + gen_icf_map_h1_base(stream, matches_icf_lookup, input_size); +} diff --git a/src/spdk/isa-l/igzip/igzip_body.asm b/src/spdk/isa-l/igzip/igzip_body.asm new file mode 100644 index 000000000..43de23479 --- /dev/null +++ b/src/spdk/isa-l/igzip/igzip_body.asm @@ -0,0 +1,786 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2016 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +%include "options.asm" + +%include "lz0a_const.asm" +%include "data_struct2.asm" +%include "bitbuf2.asm" +%include "huffman.asm" +%include "igzip_compare_types.asm" +%include "reg_sizes.asm" + +%include "stdmac.asm" + +%define LARGE_MATCH_HASH_REP 1 ; Hash 4 * LARGE_MATCH_HASH_REP elements +%define LARGE_MATCH_MIN 264 ; Minimum match size to enter large match emit loop +%define MIN_INBUF_PADDING 16 +%define MAX_EMIT_SIZE 258 * 16 +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +%define tmp2 rcx +%define hash2 rcx + +%define curr_data rax +%define code rax +%define tmp5 rax + +%define tmp4 rbx +%define dist rbx +%define code2 rbx +%define hmask1 rbx + +%define hash rdx +%define len rdx +%define code_len3 rdx +%define tmp8 rdx + +%define tmp1 rsi +%define code_len2 rsi + +%define file_start rdi + +%define m_bit_count rbp + +%define curr_data2 r8 +%define len2 r8 +%define tmp6 r8 +%define f_end_i r8 + +%define m_bits r9 + +%define f_i r10 + +%define m_out_buf r11 + +%define dist2 r12 +%define tmp7 r12 +%define code4 r12 + +%define tmp3 r13 +%define code3 r13 + +%define stream r14 + +%define hufftables r15 + +;; GPR r8 & r15 can be used + +%define xtmp0 xmm0 ; tmp +%define xtmp1 xmm1 ; tmp +%define xhash xmm2 +%define xmask xmm3 +%define xdata xmm4 + +%define ytmp0 ymm0 ; tmp +%define ytmp1 ymm1 ; tmp + + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + +blen_mem_offset equ 0 ; local variable (8 bytes) +f_end_i_mem_offset equ 8 +inbuf_slop_offset equ 16 +gpr_save_mem_offset equ 32 ; gpr save area (8*8 bytes) +xmm_save_mem_offset equ 32 + 8*8 ; xmm save area (4*16 bytes) (16 byte aligned) +stack_size equ 4*8 + 8*8 + 4*16 + 8 +;;; 8 because stack address is odd multiple of 8 after a function call and +;;; we want it aligned to 16 bytes + +;; Defines to generate functions for different architecture +%xdefine ARCH 01 +%xdefine ARCH1 02 +%xdefine ARCH2 04 + +%ifndef COMPARE_TYPE +%xdefine COMPARE_TYPE_NOT_DEF +%xdefine COMPARE_TYPE 1 +%xdefine COMPARE_TYPE1 2 +%xdefine COMPARE_TYPE2 3 +%endif + +%rep 3 +%if ARCH == 04 +%define USE_HSWNI +%endif +; void isal_deflate_body ( isal_zstream *stream ) +; arg 1: rcx: addr of stream +global isal_deflate_body_ %+ ARCH +isal_deflate_body_ %+ ARCH %+ : +%ifidn __OUTPUT_FORMAT__, elf64 + mov rcx, rdi +%endif + + ;; do nothing if (avail_in == 0) + cmp dword [rcx + _avail_in], 0 + jne .skip1 + + ;; Set stream's next state + mov rdx, ZSTATE_FLUSH_READ_BUFFER + mov rax, ZSTATE_BODY + cmp word [rcx + _end_of_stream], 0 + cmovne rax, rdx + cmp word [rcx + _flush], _NO_FLUSH + cmovne rax, rdx + mov dword [rcx + _internal_state_state], eax + ret +.skip1: + +%ifdef ALIGN_STACK + push rbp + mov rbp, rsp + sub rsp, stack_size + and rsp, ~15 +%else + sub rsp, stack_size +%endif + + mov [rsp + gpr_save_mem_offset + 0*8], rbx + mov [rsp + gpr_save_mem_offset + 1*8], rsi + mov [rsp + gpr_save_mem_offset + 2*8], rdi + mov [rsp + gpr_save_mem_offset + 3*8], rbp + mov [rsp + gpr_save_mem_offset + 4*8], r12 + mov [rsp + gpr_save_mem_offset + 5*8], r13 + mov [rsp + gpr_save_mem_offset + 6*8], r14 + mov [rsp + gpr_save_mem_offset + 7*8], r15 + + mov stream, rcx + mov byte [stream + _internal_state_has_eob], 0 + + MOVD xmask, [stream + _internal_state_hash_mask] + PSHUFD xmask, xmask, 0 + + ; state->bitbuf.set_buf(stream->next_out, stream->avail_out); + mov m_out_buf, [stream + _next_out] + mov [stream + _internal_state_bitbuf_m_out_start], m_out_buf + mov tmp1 %+ d, [stream + _avail_out] + add tmp1, m_out_buf + sub tmp1, SLOP + + mov [stream + _internal_state_bitbuf_m_out_end], tmp1 + + mov m_bits, [stream + _internal_state_bitbuf_m_bits] + mov m_bit_count %+ d, [stream + _internal_state_bitbuf_m_bit_count] + mov hufftables, [stream + _hufftables] + + mov file_start, [stream + _next_in] + + mov f_i %+ d, dword [stream + _total_in] + sub file_start, f_i + + mov f_end_i %+ d, [stream + _avail_in] + add f_end_i, f_i + + mov qword [rsp + inbuf_slop_offset], MIN_INBUF_PADDING + cmp byte [stream + _end_of_stream], 0 + jnz .default_inbuf_padding + cmp byte [stream + _flush], 0 + jnz .default_inbuf_padding + mov qword [rsp + inbuf_slop_offset], LA +.default_inbuf_padding: + + ; f_end_i -= INBUF_PADDING; + sub f_end_i, [rsp + inbuf_slop_offset] + mov [rsp + f_end_i_mem_offset], f_end_i + ; if (f_end_i <= 0) continue; + + cmp f_end_i, f_i + jle .input_end + + MOVD hmask1 %+ d, xmask + ; for (f_i = f_start_i; f_i < f_end_i; f_i++) { + MOVDQU xdata, [file_start + f_i] + mov curr_data, [file_start + f_i] + mov tmp3, curr_data + mov tmp6, curr_data + + compute_hash hash, curr_data + + shr tmp3, 8 + compute_hash hash2, tmp3 + + and hash %+ d, hmask1 %+ d + and hash2 %+ d, hmask1 %+ d + + cmp byte [stream + _internal_state_has_hist], IGZIP_NO_HIST + je .write_first_byte + + jmp .loop2 + align 16 + +.loop2: + mov tmp3 %+ d, dword [stream + _internal_state_dist_mask] + + ; if (state->bitbuf.is_full()) { + cmp m_out_buf, [stream + _internal_state_bitbuf_m_out_end] + ja .output_end + + xor dist, dist + xor dist2, dist2 + + lea tmp1, [file_start + f_i] + + mov dist %+ w, f_i %+ w + dec dist + sub dist %+ w, word [stream + _internal_state_head + 2 * hash] + mov [stream + _internal_state_head + 2 * hash], f_i %+ w + + inc f_i + + MOVQ tmp6, xdata + shr tmp5, 16 + mov tmp8, tmp5 + compute_hash tmp6, tmp5 + + mov dist2 %+ w, f_i %+ w + dec dist2 + sub dist2 %+ w, word [stream + _internal_state_head + 2 * hash2] + mov [stream + _internal_state_head + 2 * hash2], f_i %+ w + + ; if ((dist-1) < (D-1)) { + and dist, tmp3 + neg dist + + shr tmp8, 8 + compute_hash tmp2, tmp8 + + and dist2, tmp3 + neg dist2 + + ;; Check for long len/dist match (>7) with first literal + MOVQ len, xdata + mov curr_data, len + PSRLDQ xdata, 1 + xor len, [tmp1 + dist - 1] + jz .compare_loop + + MOVD xhash, tmp6 %+ d + PINSRD xhash, tmp2 %+ d, 1 + PAND xhash, xhash, xmask + + ;; Check for len/dist match (>7) with second literal + MOVQ len2, xdata + xor len2, [tmp1 + dist2] + jz .compare_loop2 + + ;; Specutively load the code for the first literal + movzx tmp1, curr_data %+ b + get_lit_code tmp1, code3, rcx, hufftables + + ;; Check for len/dist match for first literal + test len %+ d, 0xFFFFFFFF + jz .len_dist_huffman_pre + + ;; Specutively load the code for the second literal + shr curr_data, 8 + and curr_data, 0xff + get_lit_code curr_data, code2, code_len2, hufftables + + SHLX code2, code2, rcx + or code2, code3 + add code_len2, rcx + + ;; Check for len/dist match for second literal + test len2 %+ d, 0xFFFFFFFF + jnz .write_lit_bits + +.len_dist_lit_huffman_pre: + mov code_len3, rcx + bsf len2, len2 + shr len2, 3 + +.len_dist_lit_huffman: + neg dist2 + +%ifndef LONGER_HUFFTABLE + mov tmp4, dist2 + get_dist_code tmp4, code4, code_len2, hufftables ;; clobbers dist, rcx +%else + get_dist_code dist2, code4, code_len2, hufftables +%endif + get_len_code len2, code, rcx, hufftables ;; rcx is code_len + + MOVD hmask1 %+ d, xmask + + SHLX code4, code4, rcx + or code4, code + add code_len2, rcx + + add f_i, len2 + neg len2 + + SHLX code4, code4, code_len3 + + MOVQ tmp5, xdata + shr tmp5, 24 + compute_hash hash2, tmp5 + and hash2 %+ d, hmask1 %+ d + + or code4, code3 + add code_len2, code_len3 + + ;; Setup for updating hash + lea tmp3, [f_i + len2 + 1] ; tmp3 <= k + + mov tmp6, [rsp + f_end_i_mem_offset] + cmp f_i, tmp6 + jge .len_dist_lit_huffman_finish + + MOVDQU xdata, [file_start + f_i] + mov curr_data, [file_start + f_i] + + MOVD hash %+ d, xhash + PEXTRD tmp6 %+ d, xhash, 1 + mov [stream + _internal_state_head + 2 * hash], tmp3 %+ w + + compute_hash hash, curr_data + + add tmp3,1 + mov [stream + _internal_state_head + 2 * tmp6], tmp3 %+ w + + add tmp3, 1 + mov [stream + _internal_state_head + 2 * hash2], tmp3 %+ w + + write_bits m_bits, m_bit_count, code4, code_len2, m_out_buf + + mov curr_data2, curr_data + shr curr_data2, 8 + compute_hash hash2, curr_data2 + +%ifdef NO_LIMIT_HASH_UPDATE +.loop3: + add tmp3,1 + cmp tmp3, f_i + jae .loop3_done + mov tmp6, [file_start + tmp3] + compute_hash tmp1, tmp6 + and tmp1 %+ d, hmask1 %+ d + ; state->head[hash] = k; + mov [stream + _internal_state_head + 2 * tmp1], tmp3 %+ w + jmp .loop3 +.loop3_done: +%endif + ; hash = compute_hash(state->file_start + f_i) & hash_mask; + and hash %+ d, hmask1 %+ d + and hash2 %+ d, hmask1 %+ d + + ; continue + jmp .loop2 + ;; encode as dist/len +.len_dist_lit_huffman_finish: + MOVD hash %+ d, xhash + PEXTRD tmp6 %+ d, xhash, 1 + mov [stream + _internal_state_head + 2 * hash], tmp3 %+ w + add tmp3,1 + mov [stream + _internal_state_head + 2 * tmp6], tmp3 %+ w + add tmp3, 1 + mov [stream + _internal_state_head + 2 * hash2], tmp3 %+ w + + write_bits m_bits, m_bit_count, code4, code_len2, m_out_buf + jmp .input_end + +align 16 +.len_dist_huffman_pre: + bsf len, len + shr len, 3 + +.len_dist_huffman: + dec f_i + neg dist + + ; get_dist_code(dist, &code2, &code_len2); +%ifndef LONGER_HUFFTABLE + mov tmp3, dist ; since code2 and dist are rbx + get_dist_code tmp3, code2, code_len2, hufftables ;; clobbers dist, rcx +%else + get_dist_code dist, code2, code_len2, hufftables +%endif + ; get_len_code(len, &code, &code_len); + get_len_code len, code, rcx, hufftables ;; rcx is code_len + + ; code2 <<= code_len + ; code2 |= code + ; code_len2 += code_len + SHLX code4, code2, rcx + or code4, code + add code_len2, rcx + + ;; Setup for updateing hash + lea tmp3, [f_i + 2] ; tmp3 <= k + add f_i, len + + MOVD hash %+ d, xhash + PEXTRD hash2 %+ d, xhash, 1 + mov [stream + _internal_state_head + 2 * hash], tmp3 %+ w + add tmp3,1 + mov [stream + _internal_state_head + 2 * hash2], tmp3 %+ w + + MOVD hmask1 %+ d, xmask + + cmp f_i, [rsp + f_end_i_mem_offset] + jge .len_dist_huffman_finish + + MOVDQU xdata, [file_start + f_i] + mov curr_data, [file_start + f_i] + compute_hash hash, curr_data + + write_bits m_bits, m_bit_count, code4, code_len2, m_out_buf + + mov curr_data2, curr_data + shr curr_data2, 8 + compute_hash hash2, curr_data2 + +%ifdef NO_LIMIT_HASH_UPDATE +.loop4: + add tmp3,1 + cmp tmp3, f_i + jae .loop4_done + mov tmp6, [file_start + tmp3] + compute_hash tmp1, tmp6 + and tmp1 %+ d, hmask1 %+ d + mov [stream + _internal_state_head + 2 * tmp1], tmp3 %+ w + jmp .loop4 +.loop4_done: +%endif + + ; hash = compute_hash(state->file_start + f_i) & hash_mask; + and hash %+ d, hmask1 %+ d + and hash2 %+ d, hmask1 %+ d + + ; continue + jmp .loop2 + +.len_dist_huffman_finish: + write_bits m_bits, m_bit_count, code4, code_len2, m_out_buf + jmp .input_end + +align 16 +.write_lit_bits: + PSRLDQ xdata, 1 + + add f_i, 1 + cmp f_i, [rsp + f_end_i_mem_offset] + jge .write_lit_bits_finish + + MOVQ curr_data, xdata + MOVDQU xdata, [file_start + f_i] + + MOVD hash %+ d, xhash + + write_bits m_bits, m_bit_count, code2, code_len2, m_out_buf + + PEXTRD hash2 %+ d, xhash, 1 + jmp .loop2 + +.write_lit_bits_finish: + write_bits m_bits, m_bit_count, code2, code_len2, m_out_buf + +.input_end: + mov tmp1, ZSTATE_FLUSH_READ_BUFFER + mov tmp5, ZSTATE_BODY + cmp word [stream + _end_of_stream], 0 + cmovne tmp5, tmp1 + cmp word [stream + _flush], _NO_FLUSH + cmovne tmp5, tmp1 + mov dword [stream + _internal_state_state], tmp5 %+ d + +.output_end: + ;; update input buffer + mov f_end_i, [rsp + f_end_i_mem_offset] + add f_end_i, [rsp + inbuf_slop_offset] + mov [stream + _total_in], f_i %+ d + add file_start, f_i + mov [stream + _next_in], file_start + sub f_end_i, f_i + mov [stream + _avail_in], f_end_i %+ d + + ;; update output buffer + mov [stream + _next_out], m_out_buf + sub m_out_buf, [stream + _internal_state_bitbuf_m_out_start] + sub [stream + _avail_out], m_out_buf %+ d + add [stream + _total_out], m_out_buf %+ d + + mov [stream + _internal_state_bitbuf_m_bits], m_bits + mov [stream + _internal_state_bitbuf_m_bit_count], m_bit_count %+ d + + mov rbx, [rsp + gpr_save_mem_offset + 0*8] + mov rsi, [rsp + gpr_save_mem_offset + 1*8] + mov rdi, [rsp + gpr_save_mem_offset + 2*8] + mov rbp, [rsp + gpr_save_mem_offset + 3*8] + mov r12, [rsp + gpr_save_mem_offset + 4*8] + mov r13, [rsp + gpr_save_mem_offset + 5*8] + mov r14, [rsp + gpr_save_mem_offset + 6*8] + mov r15, [rsp + gpr_save_mem_offset + 7*8] + +%ifndef ALIGN_STACK + add rsp, stack_size +%else + mov rsp, rbp + pop rbp +%endif + ret + +align 16 +.compare_loop: + MOVD xhash, tmp6 %+ d + PINSRD xhash, tmp2 %+ d, 1 + PAND xhash, xhash, xmask + lea tmp2, [tmp1 + dist - 1] + + mov len2, [rsp + f_end_i_mem_offset] + sub len2, f_i + add len2, [rsp + inbuf_slop_offset] + add len2, 1 + mov tmp3, MAX_EMIT_SIZE + cmp len2, tmp3 + cmovg len2, tmp3 + + mov len, 8 + compare_large tmp1, tmp2, len, len2, tmp3, ytmp0, ytmp1 + + cmp len, 258 + jle .len_dist_huffman + cmp len, LARGE_MATCH_MIN + jge .do_emit + mov len, 258 + jmp .len_dist_huffman + +align 16 +.compare_loop2: + lea tmp2, [tmp1 + dist2] + add tmp1, 1 + + mov len, [rsp + f_end_i_mem_offset] + sub len, f_i + add len, [rsp + inbuf_slop_offset] + mov tmp3, MAX_EMIT_SIZE + cmp len, tmp3 + cmovg len, tmp3 + + mov len2, 8 + compare_large tmp1, tmp2, len2, len, tmp3, ytmp0, ytmp1 + + and curr_data, 0xff + get_lit_code curr_data, code3, code_len3, hufftables + cmp len2, 258 + jle .len_dist_lit_huffman + cmp len2, LARGE_MATCH_MIN + jge .do_emit2 + mov len2, 258 + jmp .len_dist_lit_huffman + +align 16 +.do_emit2: + neg dist2 + + ; get_dist_code(dist2, &code2, &code_len2); + get_dist_code dist2, code2, code_len2, hufftables + + ; get_len_code(len, &code, &code_len); + get_len_code 258, code, rcx, hufftables ;; rcx is code_len + + ; code2 <<= code_len + ; code2 |= code + ; code_len2 += code_len + SHLX code4, code2, rcx + or code4, code + add code_len2, rcx + mov tmp5, rcx + + mov rcx, code_len3 + SHLX tmp8, code4, rcx + or code3, tmp8 + add rcx, code_len2 + mov code_len3, rcx + + write_bits m_bits, m_bit_count, code3, code_len3, m_out_buf + + lea tmp3, [f_i + 2] ; tmp3 <= k + MOVD tmp2 %+ d, xhash + mov [stream + _internal_state_head + 2 * tmp2], tmp3 %+ w + add tmp3,1 + PEXTRD tmp2 %+ d, xhash, 1 + mov [stream + _internal_state_head + 2 * tmp2], tmp3 %+ w + + add f_i, 258 + lea len, [len2 - 258] + + jmp .emit_loop + +.do_emit: + dec f_i + neg dist + + ; get_dist_code(dist, &code2, &code_len2); +%ifndef LONGER_HUFFTABLE + mov tmp3, dist ; since code2 and dist are rbx + get_dist_code tmp3, code2, code_len2, hufftables ;; clobbers dist, rcx +%else + get_dist_code dist, code2, code_len2, hufftables +%endif + ; get_len_code(len, &code, &code_len); + get_len_code 258, code, rcx, hufftables ;; rcx is code_len + + ; code2 <<= code_len + ; code2 |= code + ; code_len2 += code_len + SHLX code4, code2, rcx + or code4, code + add code_len2, rcx + + lea tmp3, [f_i + 2] ; tmp3 <= k + MOVD tmp6 %+ d, xhash + PEXTRD tmp5 %+ d, xhash, 1 + mov [stream + _internal_state_head + 2 * tmp6], tmp3 %+ w + add tmp3,1 + mov [stream + _internal_state_head + 2 * tmp5], tmp3 %+ w + mov tmp5, rcx + +.emit: + add f_i, 258 + sub len, 258 + mov code3, code4 + + write_bits m_bits, m_bit_count, code3, code_len2, m_out_buf + +.emit_loop: + cmp m_out_buf, [stream + _internal_state_bitbuf_m_out_end] + ja .output_end + cmp len, LARGE_MATCH_MIN + jge .emit + + mov len2, 258 + cmp len, len2 + cmovg len, len2 + + add f_i, len + + sub code_len2, tmp5 + get_len_code len, code, rcx, hufftables + SHLX code4, code2, rcx + or code4, code + add code_len2, rcx + + write_bits m_bits, m_bit_count, code4, code_len2, m_out_buf + + cmp f_i, [rsp + f_end_i_mem_offset] + jge .input_end + + lea tmp7, [f_i - 4 * LARGE_MATCH_HASH_REP] + MOVD hmask1 %+ d, xmask +%rep LARGE_MATCH_HASH_REP + mov curr_data %+ d, dword [file_start + tmp7] + mov curr_data2 %+ d, dword [file_start + tmp7 + 1] + + compute_hash hash, curr_data + compute_hash hash2, curr_data2 + + and hash %+ d, hmask1 %+ d + and hash2 %+ d, hmask1 %+ d + + mov [stream + _internal_state_head + 2 * hash], tmp7 %+ w + add tmp7, 1 + mov [stream + _internal_state_head + 2 * hash2], tmp7 %+ w + add tmp7, 1 + + mov curr_data %+ d, dword [file_start + tmp7] + mov curr_data2 %+ d, dword [file_start + tmp7 + 1] + + compute_hash hash, curr_data + compute_hash hash2, curr_data2 + + and hash %+ d, hmask1 %+ d + and hash2 %+ d, hmask1 %+ d + + mov [stream + _internal_state_head + 2 * hash], tmp7 %+ w + add tmp7, 1 + mov [stream + _internal_state_head + 2 * hash2], tmp7 %+ w +%if (LARGE_MATCH_HASH_REP > 1) + add tmp7, 1 +%endif +%endrep + + MOVDQU xdata, [file_start + f_i] + mov curr_data, [file_start + f_i] + compute_hash hash, curr_data + + + mov curr_data2, curr_data + shr curr_data2, 8 + compute_hash hash2, curr_data2 + + ; hash = compute_hash(state->file_start + f_i) & hash_mask; + and hash %+ d, hmask1 %+ d + and hash2 %+ d, hmask1 %+ d + + ; continue + jmp .loop2 + +.write_first_byte: + cmp m_out_buf, [stream + _internal_state_bitbuf_m_out_end] + ja .output_end + + mov byte [stream + _internal_state_has_hist], IGZIP_HIST + + mov [stream + _internal_state_head + 2 * hash], f_i %+ w + + mov hash, hash2 + shr tmp6, 16 + compute_hash hash2, tmp6 + + MOVD xhash, hash %+ d + PINSRD xhash, hash2 %+ d, 1 + PAND xhash, xhash, xmask + + and curr_data, 0xff + get_lit_code curr_data, code2, code_len2, hufftables + jmp .write_lit_bits + +%ifdef USE_HSWNI +%undef USE_HSWNI +%endif + +;; Shift defines over in order to iterate over all versions +%undef ARCH +%xdefine ARCH ARCH1 +%undef ARCH1 +%xdefine ARCH1 ARCH2 + +%ifdef COMPARE_TYPE_NOT_DEF +%undef COMPARE_TYPE +%xdefine COMPARE_TYPE COMPARE_TYPE1 +%undef COMPARE_TYPE1 +%xdefine COMPARE_TYPE1 COMPARE_TYPE2 +%endif +%endrep diff --git a/src/spdk/isa-l/igzip/igzip_build_hash_table_perf.c b/src/spdk/isa-l/igzip/igzip_build_hash_table_perf.c new file mode 100644 index 000000000..c402c91b2 --- /dev/null +++ b/src/spdk/isa-l/igzip/igzip_build_hash_table_perf.c @@ -0,0 +1,39 @@ +#include +#include +#include +#include +#include +#include "igzip_lib.h" +#include "test.h" + +#define DICT_LEN 32*1024 + +extern void isal_deflate_hash(struct isal_zstream *stream, uint8_t * dict, int dict_len); + +void create_rand_data(uint8_t * data, uint32_t size) +{ + int i; + for (i = 0; i < size; i++) { + data[i] = rand() % 256; + } +} + +int main(int argc, char *argv[]) +{ + int time = BENCHMARK_TIME; + struct isal_zstream stream; + uint8_t dict[DICT_LEN]; + uint32_t dict_len = DICT_LEN; + + stream.level = 0; + create_rand_data(dict, dict_len); + + struct perf start; + BENCHMARK(&start, time, isal_deflate_hash(&stream, dict, dict_len)); + + printf("igzip_build_hash_table_perf:\n"); + printf(" in_size=%u ", dict_len); + perf_print(start, (long long)dict_len); + + return 0; +} diff --git a/src/spdk/isa-l/igzip/igzip_checksums.h b/src/spdk/isa-l/igzip/igzip_checksums.h new file mode 100644 index 000000000..e09a1f161 --- /dev/null +++ b/src/spdk/isa-l/igzip/igzip_checksums.h @@ -0,0 +1,12 @@ +#ifndef IGZIP_CHECKSUMS_H +#define IGZIP_CHECKSUMS_H + +#include + +#define MAX_ADLER_BUF (1 << 28) +#define ADLER_MOD 65521 + +uint32_t isal_adler32(uint32_t init_crc, const unsigned char *buf, uint64_t len); +uint32_t isal_adler32_bam1(uint32_t init_crc, const unsigned char *buf, uint64_t len); + +#endif diff --git a/src/spdk/isa-l/igzip/igzip_compare_types.asm b/src/spdk/isa-l/igzip/igzip_compare_types.asm new file mode 100644 index 000000000..c5ab3169f --- /dev/null +++ b/src/spdk/isa-l/igzip/igzip_compare_types.asm @@ -0,0 +1,452 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2016 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +%include "options.asm" +%include "stdmac.asm" + +%ifndef UTILS_ASM +%define UTILS_ASM +; compare macro + +;; sttni2 is faster, but it can't be debugged +;; so following code is based on "mine5" + +;; compares 8 bytes at a time, using xor +;; assumes the input buffer has size at least 8 +;; compare_r src1, src2, result, result_max, tmp +%macro compare_r 5 +%define %%src1 %1 +%define %%src2 %2 +%define %%result %3 +%define %%result_max %4 +%define %%tmp %5 +%define %%tmp16 %5w ; tmp as a 16-bit register + + sub %%result_max, 16 + cmp %%result, %%result_max + jg %%_by_8 + +%%loop1: + mov %%tmp, [%%src1 + %%result] + xor %%tmp, [%%src2 + %%result] + jnz %%miscompare_reg + add %%result, 8 + + mov %%tmp, [%%src1 + %%result] + xor %%tmp, [%%src2 + %%result] + jnz %%miscompare_reg + add %%result, 8 + cmp %%result, %%result_max + jle %%loop1 + +%%_by_8: + add %%result_max, 8 + cmp %%result, %%result_max + jg %%_cmp_last + + ; compare last two bytes + mov %%tmp, [%%src1 + %%result] + xor %%tmp, [%%src2 + %%result] + jnz %%miscompare_reg + add %%result, 8 + +%%_cmp_last: + add %%result_max, 8 + cmp %%result, %%result_max + je %%end + + lea %%result, [%%result_max - 8] + + mov %%tmp, [%%src1 + %%result] + xor %%tmp, [%%src2 + %%result] + jnz %%miscompare_reg + add %%result, 8 + jmp %%end + +%%miscompare_reg: + bsf %%tmp, %%tmp + shr %%tmp, 3 + add %%result, %%tmp +%%end: +%endm + +;; compares 16 bytes at a time, using pcmpeqb/pmovmskb +;; assumes the input buffer has size at least 8 +;; compare_x src1, src2, result, result_max, tmp, xtmp1, xtmp2 +%macro compare_x 7 +%define %%src1 %1 +%define %%src2 %2 +%define %%result %3 ; Accumulator for match_length +%define %%result_max %4 +%define %%tmp %5 +%define %%tmp16 %5w ; tmp as a 16-bit register +%define %%tmp32 %5d ; tmp as a 32-bit register +%define %%xtmp %6 +%define %%xtmp2 %7 + + sub %%result_max, 32 + cmp %%result, %%result_max + jg %%_by_16 + +%%loop1: + MOVDQU %%xtmp, [%%src1 + %%result] + MOVDQU %%xtmp2, [%%src2 + %%result] + PCMPEQB %%xtmp, %%xtmp, %%xtmp2 + PMOVMSKB %%tmp32, %%xtmp + xor %%tmp, 0xFFFF + jnz %%miscompare_vect + add %%result, 16 + + MOVDQU %%xtmp, [%%src1 + %%result] + MOVDQU %%xtmp2, [%%src2 + %%result] + PCMPEQB %%xtmp, %%xtmp, %%xtmp2 + PMOVMSKB %%tmp32, %%xtmp + xor %%tmp, 0xFFFF + jnz %%miscompare_vect + add %%result, 16 + + cmp %%result, %%result_max + jle %%loop1 + +%%_by_16: + add %%result_max, 16 + cmp %%result, %%result_max + jg %%_by_8 + + MOVDQU %%xtmp, [%%src1 + %%result] + MOVDQU %%xtmp2, [%%src2 + %%result] + PCMPEQB %%xtmp, %%xtmp, %%xtmp2 + PMOVMSKB %%tmp32, %%xtmp + xor %%tmp, 0xFFFF + jnz %%miscompare_vect + add %%result, 16 + +%%_by_8: + add %%result_max, 8 + cmp %%result, %%result_max + jg %%_cmp_last + + ; compare last two bytes + mov %%tmp, [%%src1 + %%result] + xor %%tmp, [%%src2 + %%result] + jnz %%miscompare_reg + add %%result, 8 + +%%_cmp_last: + add %%result_max, 8 + cmp %%result, %%result_max + je %%end + + lea %%result, [%%result_max - 8] + + mov %%tmp, [%%src1 + %%result] + xor %%tmp, [%%src2 + %%result] + jnz %%miscompare_reg + add %%result, 8 + jmp %%end + +%%miscompare_reg: + bsf %%tmp, %%tmp + shr %%tmp, 3 + add %%result, %%tmp + jmp %%end + +%%miscompare_vect: + bsf %%tmp, %%tmp + add %%result, %%tmp +%%end: +%endm + +;; compares 32 bytes at a time, using pcmpeqb/pmovmskb +;; assumes the input buffer has size at least 8 +;; compare_y src1, src2, result, result_max, tmp, xtmp1, xtmp2 +%macro compare_y 7 +%define %%src1 %1 +%define %%src2 %2 +%define %%result %3 ; Accumulator for match_length +%define %%result_max %4 +%define %%tmp %5 +%define %%tmp16 %5w ; tmp as a 16-bit register +%define %%tmp32 %5d ; tmp as a 32-bit register +%define %%ytmp %6 +%define %%ytmp2 %7 + + sub %%result_max, 64 + cmp %%result, %%result_max + jg %%_by_32 + +%%loop1: + vmovdqu %%ytmp, [%%src1 + %%result] + vmovdqu %%ytmp2, [%%src2 + %%result] + vpcmpeqb %%ytmp, %%ytmp, %%ytmp2 + vpmovmskb %%tmp, %%ytmp + xor %%tmp32, 0xFFFFFFFF + jnz %%miscompare_vect + add %%result, 32 + + vmovdqu %%ytmp, [%%src1 + %%result] + vmovdqu %%ytmp2, [%%src2 + %%result] + vpcmpeqb %%ytmp, %%ytmp, %%ytmp2 + vpmovmskb %%tmp, %%ytmp + xor %%tmp32, 0xFFFFFFFF + jnz %%miscompare_vect + add %%result, 32 + + cmp %%result, %%result_max + jle %%loop1 + +%%_by_32: + add %%result_max, 32 + cmp %%result, %%result_max + jg %%_by_16 + + vmovdqu %%ytmp, [%%src1 + %%result] + vmovdqu %%ytmp2, [%%src2 + %%result] + vpcmpeqb %%ytmp, %%ytmp, %%ytmp2 + vpmovmskb %%tmp, %%ytmp + xor %%tmp32, 0xFFFFFFFF + jnz %%miscompare_vect + add %%result, 32 + +%%_by_16: + add %%result_max, 16 + cmp %%result, %%result_max + jg %%_by_8 + + vmovdqu %%ytmp %+ x, [%%src1 + %%result] + vmovdqu %%ytmp2 %+ x, [%%src2 + %%result] + vpcmpeqb %%ytmp %+ x, %%ytmp %+ x, %%ytmp2 %+ x + vpmovmskb %%tmp, %%ytmp %+ x + xor %%tmp32, 0xFFFF + jnz %%miscompare_vect + add %%result, 16 + +%%_by_8: + add %%result_max, 8 + cmp %%result, %%result_max + jg %%_cmp_last + + mov %%tmp, [%%src1 + %%result] + xor %%tmp, [%%src2 + %%result] + jnz %%miscompare_reg + add %%result, 8 + +%%_cmp_last: + add %%result_max, 8 + cmp %%result, %%result_max + je %%end + + lea %%result, [%%result_max - 8] + + ; compare last two bytes + mov %%tmp, [%%src1 + %%result] + xor %%tmp, [%%src2 + %%result] + jnz %%miscompare_reg + add %%result, 8 + jmp %%end + +%%miscompare_reg: + bsf %%tmp, %%tmp + shr %%tmp, 3 + add %%result, %%tmp + jmp %%end + +%%miscompare_vect: + tzcnt %%tmp, %%tmp + add %%result, %%tmp +%%end: +%endm + +;; compares 64 bytes at a time +;; compare_z src1, src2, result, result_max, tmp, ktmp, ztmp1, ztmp2 +;; Clobbers result_max +%macro compare_z 8 +%define %%src1 %1 +%define %%src2 %2 +%define %%result %3 ; Accumulator for match_length +%define %%result_max %4 +%define %%tmp %5 ; tmp as a 16-bit register +%define %%ktmp %6 +%define %%ztmp %7 +%define %%ztmp2 %8 + + sub %%result_max, 128 + cmp %%result, %%result_max + jg %%_by_64 + +%%loop1: + vmovdqu8 %%ztmp, [%%src1 + %%result] + vmovdqu8 %%ztmp2, [%%src2 + %%result] + vpcmpb %%ktmp, %%ztmp, %%ztmp2, NEQ + ktestq %%ktmp, %%ktmp + jnz %%miscompare + add %%result, 64 + + vmovdqu8 %%ztmp, [%%src1 + %%result] + vmovdqu8 %%ztmp2, [%%src2 + %%result] + vpcmpb %%ktmp, %%ztmp, %%ztmp2, NEQ + ktestq %%ktmp, %%ktmp + jnz %%miscompare + add %%result, 64 + + cmp %%result, %%result_max + jle %%loop1 + +%%_by_64: + add %%result_max, 64 + cmp %%result, %%result_max + jg %%_less_than_64 + + vmovdqu8 %%ztmp, [%%src1 + %%result] + vmovdqu8 %%ztmp2, [%%src2 + %%result] + vpcmpb %%ktmp, %%ztmp, %%ztmp2, NEQ + ktestq %%ktmp, %%ktmp + jnz %%miscompare + add %%result, 64 + +%%_less_than_64: + add %%result_max, 64 + sub %%result_max, %%result + jle %%end + + mov %%tmp, -1 + bzhi %%tmp, %%tmp, %%result_max + kmovq %%ktmp, %%tmp + + vmovdqu8 %%ztmp {%%ktmp}{z}, [%%src1 + %%result] + vmovdqu8 %%ztmp2 {%%ktmp}{z}, [%%src2 + %%result] + vpcmpb %%ktmp, %%ztmp, %%ztmp2, NEQ + ktestq %%ktmp, %%ktmp + jnz %%miscompare + add %%result, %%result_max + + jmp %%end +%%miscompare: + kmovq %%tmp, %%ktmp + tzcnt %%tmp, %%tmp + add %%result, %%tmp +%%end: +%endm + +%macro compare250 7 +%define %%src1 %1 +%define %%src2 %2 +%define %%result %3 +%define %%result_max %4 +%define %%tmp %5 +%define %%xtmp0 %6x +%define %%xtmp1 %7x +%define %%ytmp0 %6 +%define %%ytmp1 %7 + + mov %%tmp, 250 + cmp %%result_max, 250 + cmovg %%result_max, %%tmp + +%if (COMPARE_TYPE == 1) + compare_r %%src1, %%src2, %%result, %%result_max, %%tmp +%elif (COMPARE_TYPE == 2) + compare_x %%src1, %%src2, %%result, %%result_max, %%tmp, %%xtmp0, %%xtmp1 +%elif (COMPARE_TYPE == 3) + compare_y %%src1, %%src2, %%result, %%result_max, %%tmp, %%ytmp0, %%ytmp1 +%else +%error Unknown Compare type COMPARE_TYPE + % error +%endif +%endmacro + +; Assumes the buffer has at least 8 bytes +; Accumulates match length onto result +%macro compare_large 7 +%define %%src1 %1 +%define %%src2 %2 +%define %%result %3 +%define %%result_max %4 +%define %%tmp %5 +%define %%xtmp0 %6x +%define %%xtmp1 %7x +%define %%ytmp0 %6 +%define %%ytmp1 %7 + +%if (COMPARE_TYPE == 1) + compare_r %%src1, %%src2, %%result, %%result_max, %%tmp +%elif (COMPARE_TYPE == 2) + compare_x %%src1, %%src2, %%result, %%result_max, %%tmp, %%xtmp0, %%xtmp1 +%elif (COMPARE_TYPE == 3) + compare_y %%src1, %%src2, %%result, %%result_max, %%tmp, %%ytmp0, %%ytmp1 +%else +%error Unknown Compare type COMPARE_TYPE + % error +%endif +%endmacro + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; compare size, src1, src2, result, tmp +%macro compare 5 +%define %%size %1 +%define %%src1 %2 +%define %%src2 %3 +%define %%result %4 +%define %%tmp %5 +%define %%tmp8 %5b ; tmp as a 8-bit register + + xor %%result, %%result + sub %%size, 7 + jle %%lab2 +%%loop1: + mov %%tmp, [%%src1 + %%result] + xor %%tmp, [%%src2 + %%result] + jnz %%miscompare + add %%result, 8 + sub %%size, 8 + jg %%loop1 +%%lab2: + ;; if we fall through from above, we have found no mismatches, + ;; %%size+7 is the number of bytes left to look at, and %%result is the + ;; number of bytes that have matched + add %%size, 7 + jle %%end +%%loop3: + mov %%tmp8, [%%src1 + %%result] + cmp %%tmp8, [%%src2 + %%result] + jne %%end + inc %%result + dec %%size + jg %%loop3 + jmp %%end +%%miscompare: + bsf %%tmp, %%tmp + shr %%tmp, 3 + add %%result, %%tmp +%%end: +%endm + +%endif ;UTILS_ASM diff --git a/src/spdk/isa-l/igzip/igzip_decode_block_stateless.asm b/src/spdk/isa-l/igzip/igzip_decode_block_stateless.asm new file mode 100644 index 000000000..f5e35cd68 --- /dev/null +++ b/src/spdk/isa-l/igzip/igzip_decode_block_stateless.asm @@ -0,0 +1,795 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2018 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +default rel + +%include "reg_sizes.asm" + +%define DECOMP_OK 0 +%define END_INPUT 1 +%define OUT_OVERFLOW 2 +%define INVALID_BLOCK -1 +%define INVALID_SYMBOL -2 +%define INVALID_LOOKBACK -3 + +%define ISAL_DECODE_LONG_BITS 12 +%define ISAL_DECODE_SHORT_BITS 10 + +%define COPY_SIZE 16 +%define COPY_LEN_MAX 258 + +%define IN_BUFFER_SLOP 8 +%define OUT_BUFFER_SLOP COPY_SIZE + COPY_LEN_MAX + +%include "inflate_data_structs.asm" +%include "stdmac.asm" + +extern rfc1951_lookup_table + + + +%define LARGE_SHORT_SYM_LEN 25 +%define LARGE_SHORT_SYM_MASK ((1 << LARGE_SHORT_SYM_LEN) - 1) +%define LARGE_LONG_SYM_LEN 10 +%define LARGE_LONG_SYM_MASK ((1 << LARGE_LONG_SYM_LEN) - 1) +%define LARGE_SHORT_CODE_LEN_OFFSET 28 +%define LARGE_LONG_CODE_LEN_OFFSET 10 +%define LARGE_FLAG_BIT_OFFSET 25 +%define LARGE_FLAG_BIT (1 << LARGE_FLAG_BIT_OFFSET) +%define LARGE_SYM_COUNT_OFFSET 26 +%define LARGE_SYM_COUNT_LEN 2 +%define LARGE_SYM_COUNT_MASK ((1 << LARGE_SYM_COUNT_LEN) - 1) +%define LARGE_SHORT_MAX_LEN_OFFSET 26 + +%define SMALL_SHORT_SYM_LEN 9 +%define SMALL_SHORT_SYM_MASK ((1 << SMALL_SHORT_SYM_LEN) - 1) +%define SMALL_LONG_SYM_LEN 9 +%define SMALL_LONG_SYM_MASK ((1 << SMALL_LONG_SYM_LEN) - 1) +%define SMALL_SHORT_CODE_LEN_OFFSET 11 +%define SMALL_LONG_CODE_LEN_OFFSET 10 +%define SMALL_FLAG_BIT_OFFSET 10 +%define SMALL_FLAG_BIT (1 << SMALL_FLAG_BIT_OFFSET) + +%define DIST_SYM_OFFSET 0 +%define DIST_SYM_LEN 5 +%define DIST_SYM_MASK ((1 << DIST_SYM_LEN) - 1) +%define DIST_SYM_EXTRA_OFFSET 5 +%define DIST_SYM_EXTRA_LEN 4 +%define DIST_SYM_EXTRA_MASK ((1 << DIST_SYM_EXTRA_LEN) - 1) + +;; rax +%define tmp3 rax +%define read_in_2 rax +%define look_back_dist rax + +;; rcx +;; rdx arg3 +%define next_sym2 rdx +%define copy_start rdx +%define tmp4 rdx + +;; rdi arg1 +%define tmp1 rdi +%define look_back_dist2 rdi +%define next_bits2 rdi +%define next_sym3 rdi + +;; rsi arg2 +%define tmp2 rsi +%define next_sym_num rsi +%define next_bits rsi + +;; rbx ; Saved +%define next_in rbx + +;; rbp ; Saved +%define end_in rbp + +;; r8 +%define repeat_length r8 + +;; r9 +%define read_in r9 + +;; r10 +%define read_in_length r10 + +;; r11 +%define state r11 + +;; r12 ; Saved +%define next_out r12 + +;; r13 ; Saved +%define end_out r13 + +;; r14 ; Saved +%define next_sym r14 + +;; r15 ; Saved +%define rfc_lookup r15 + +start_out_mem_offset equ 0 +read_in_mem_offset equ 8 +read_in_length_mem_offset equ 16 +next_out_mem_offset equ 24 +gpr_save_mem_offset equ 32 +stack_size equ 4 * 8 + 8 * 8 + +%define _dist_extra_bit_count 264 +%define _dist_start _dist_extra_bit_count + 1*32 +%define _len_extra_bit_count _dist_start + 4*32 +%define _len_start _len_extra_bit_count + 1*32 + +%ifidn __OUTPUT_FORMAT__, elf64 +%define arg0 rdi +%define arg1 rsi + +%macro FUNC_SAVE 0 +%ifdef ALIGN_STACK + push rbp + mov rbp, rsp + sub rsp, stack_size + and rsp, ~15 +%else + sub rsp, stack_size +%endif + + mov [rsp + gpr_save_mem_offset + 0*8], rbx + mov [rsp + gpr_save_mem_offset + 1*8], rbp + mov [rsp + gpr_save_mem_offset + 2*8], r12 + mov [rsp + gpr_save_mem_offset + 3*8], r13 + mov [rsp + gpr_save_mem_offset + 4*8], r14 + mov [rsp + gpr_save_mem_offset + 5*8], r15 +%endm + +%macro FUNC_RESTORE 0 + mov rbx, [rsp + gpr_save_mem_offset + 0*8] + mov rbp, [rsp + gpr_save_mem_offset + 1*8] + mov r12, [rsp + gpr_save_mem_offset + 2*8] + mov r13, [rsp + gpr_save_mem_offset + 3*8] + mov r14, [rsp + gpr_save_mem_offset + 4*8] + mov r15, [rsp + gpr_save_mem_offset + 5*8] + +%ifndef ALIGN_STACK + add rsp, stack_size +%else + mov rsp, rbp + pop rbp +%endif +%endm +%endif + +%ifidn __OUTPUT_FORMAT__, win64 +%define arg0 rcx +%define arg1 rdx + +%macro FUNC_SAVE 0 +%ifdef ALIGN_STACK + push rbp + mov rbp, rsp + sub rsp, stack_size + and rsp, ~15 +%else + sub rsp, stack_size +%endif + + mov [rsp + gpr_save_mem_offset + 0*8], rbx + mov [rsp + gpr_save_mem_offset + 1*8], rsi + mov [rsp + gpr_save_mem_offset + 2*8], rdi + mov [rsp + gpr_save_mem_offset + 3*8], rbp + mov [rsp + gpr_save_mem_offset + 4*8], r12 + mov [rsp + gpr_save_mem_offset + 5*8], r13 + mov [rsp + gpr_save_mem_offset + 6*8], r14 + mov [rsp + gpr_save_mem_offset + 7*8], r15 +%endm + +%macro FUNC_RESTORE 0 + mov rbx, [rsp + gpr_save_mem_offset + 0*8] + mov rsi, [rsp + gpr_save_mem_offset + 1*8] + mov rdi, [rsp + gpr_save_mem_offset + 2*8] + mov rbp, [rsp + gpr_save_mem_offset + 3*8] + mov r12, [rsp + gpr_save_mem_offset + 4*8] + mov r13, [rsp + gpr_save_mem_offset + 5*8] + mov r14, [rsp + gpr_save_mem_offset + 6*8] + mov r15, [rsp + gpr_save_mem_offset + 7*8] + +%ifndef ALIGN_STACK + add rsp, stack_size +%else + mov rsp, rbp + pop rbp +%endif +%endm +%endif + +;; Load read_in and updated in_buffer accordingly +;; when there are at least 8 bytes in the in buffer +;; Clobbers rcx, unless rcx is %%read_in_length +%macro inflate_in_load 6 +%define %%next_in %1 +%define %%end_in %2 +%define %%read_in %3 +%define %%read_in_length %4 +%define %%tmp1 %5 ; Tmp registers +%define %%tmp2 %6 + + SHLX %%tmp1, [%%next_in], %%read_in_length + or %%read_in, %%tmp1 + + mov %%tmp1, 64 + sub %%tmp1, %%read_in_length + shr %%tmp1, 3 + + add %%next_in, %%tmp1 + lea %%read_in_length, [%%read_in_length + 8 * %%tmp1] +%%end: +%endm + +;; Load read_in and updated in_buffer accordingly +;; Clobbers rcx, unless rcx is %%read_in_length +%macro inflate_in_small_load 6 +%define %%next_in %1 +%define %%end_in %2 +%define %%read_in %3 +%define %%read_in_length %4 +%define %%avail_in %5 ; Tmp registers +%define %%tmp1 %5 +%define %%loop_count %6 + + mov %%avail_in, %%end_in + sub %%avail_in, %%next_in + +%ifnidn %%read_in_length, rcx + mov rcx, %%read_in_length +%endif + + mov %%loop_count, 64 + sub %%loop_count, %%read_in_length + shr %%loop_count, 3 + + cmp %%loop_count, %%avail_in + cmovg %%loop_count, %%avail_in + cmp %%loop_count, 0 + je %%end + +%%load_byte: + xor %%tmp1, %%tmp1 + mov %%tmp1 %+ b, byte [%%next_in] + SHLX %%tmp1, %%tmp1, rcx + or %%read_in, %%tmp1 + add rcx, 8 + add %%next_in, 1 + sub %%loop_count, 1 + jg %%load_byte +%ifnidn %%read_in_length, rcx + mov %%read_in_length, rcx +%endif +%%end: +%endm + +;; Clears all bits at index %%bit_count and above in %%next_bits +;; May clobber rcx and %%bit_count +%macro CLEAR_HIGH_BITS 3 +%define %%next_bits %1 +%define %%bit_count %2 +%define %%lookup_size %3 + + sub %%bit_count, 0x40 + %%lookup_size +;; Extract the 15-DECODE_LOOKUP_SIZE bits beyond the first DECODE_LOOKUP_SIZE bits. +%ifdef USE_HSWNI + and %%bit_count, 0x1F + bzhi %%next_bits, %%next_bits, %%bit_count +%else +%ifnidn %%bit_count, rcx + mov rcx, %%bit_count +%endif + neg rcx + shl %%next_bits, cl + shr %%next_bits, cl +%endif + +%endm + +;; Decode next symbol +;; Clobber rcx +%macro decode_next_lit_len 8 +%define %%state %1 ; State structure associated with compressed stream +%define %%lookup_size %2 ; Number of bits used for small lookup +%define %%state_offset %3 ; Type of huff code, should be either LIT or DIST +%define %%read_in %4 ; Bits read in from compressed stream +%define %%read_in_length %5 ; Number of valid bits in read_in +%define %%next_sym %6 ; Returned symbols +%define %%next_sym_num %7 ; Returned symbols count +%define %%next_bits %8 + + mov %%next_sym_num, %%next_sym + mov rcx, %%next_sym + shr rcx, LARGE_SHORT_CODE_LEN_OFFSET + jz invalid_symbol + + and %%next_sym_num, LARGE_SYM_COUNT_MASK << LARGE_SYM_COUNT_OFFSET + shr %%next_sym_num, LARGE_SYM_COUNT_OFFSET + + ;; Check if symbol or hint was looked up + and %%next_sym, LARGE_FLAG_BIT | LARGE_SHORT_SYM_MASK + test %%next_sym, LARGE_FLAG_BIT + jz %%end + + shl rcx, LARGE_SYM_COUNT_LEN + or rcx, %%next_sym_num + + ;; Save length associated with symbol + mov %%next_bits, %%read_in + shr %%next_bits, %%lookup_size + + ;; Extract the bits beyond the first %%lookup_size bits. + CLEAR_HIGH_BITS %%next_bits, rcx, %%lookup_size + + and %%next_sym, LARGE_SHORT_SYM_MASK + add %%next_sym, %%next_bits + + ;; Lookup actual next symbol + movzx %%next_sym, word [%%state + LARGE_LONG_CODE_SIZE * %%next_sym + %%state_offset + LARGE_SHORT_CODE_SIZE * (1 << %%lookup_size)] + mov %%next_sym_num, 1 + + ;; Save length associated with symbol + mov rcx, %%next_sym + shr rcx, LARGE_LONG_CODE_LEN_OFFSET + jz invalid_symbol + and %%next_sym, LARGE_LONG_SYM_MASK + +%%end: +;; Updated read_in to reflect the bits which were decoded + SHRX %%read_in, %%read_in, rcx + sub %%read_in_length, rcx +%endm + +;; Decode next symbol +;; Clobber rcx +%macro decode_next_lit_len_with_load 8 +%define %%state %1 ; State structure associated with compressed stream +%define %%lookup_size %2 ; Number of bits used for small lookup +%define %%state_offset %3 +%define %%read_in %4 ; Bits read in from compressed stream +%define %%read_in_length %5 ; Number of valid bits in read_in +%define %%next_sym %6 ; Returned symbols +%define %%next_sym_num %7 ; Returned symbols count +%define %%next_bits %8 + + ;; Lookup possible next symbol + mov %%next_bits, %%read_in + and %%next_bits, (1 << %%lookup_size) - 1 + mov %%next_sym %+ d, dword [%%state + %%state_offset + LARGE_SHORT_CODE_SIZE * %%next_bits] + + decode_next_lit_len %%state, %%lookup_size, %%state_offset, %%read_in, %%read_in_length, %%next_sym, %%next_sym_num, %%next_bits +%endm + +;; Decode next symbol +;; Clobber rcx +%macro decode_next_dist 8 +%define %%state %1 ; State structure associated with compressed stream +%define %%lookup_size %2 ; Number of bits used for small lookup +%define %%state_offset %3 ; Type of huff code, should be either LIT or DIST +%define %%read_in %4 ; Bits read in from compressed stream +%define %%read_in_length %5 ; Number of valid bits in read_in +%define %%next_sym %6 ; Returned symobl +%define %%next_extra_bits %7 +%define %%next_bits %8 + + mov rcx, %%next_sym + shr rcx, SMALL_SHORT_CODE_LEN_OFFSET + jz invalid_dist_symbol_ %+ %%next_sym + + ;; Check if symbol or hint was looked up + and %%next_sym, SMALL_FLAG_BIT | SMALL_SHORT_SYM_MASK + test %%next_sym, SMALL_FLAG_BIT + jz %%end + + ;; Save length associated with symbol + mov %%next_bits, %%read_in + shr %%next_bits, %%lookup_size + + ;; Extract the 15-DECODE_LOOKUP_SIZE bits beyond the first %%lookup_size bits. + lea %%next_sym, [%%state + SMALL_LONG_CODE_SIZE * %%next_sym] + + CLEAR_HIGH_BITS %%next_bits, rcx, %%lookup_size + + ;; Lookup actual next symbol + movzx %%next_sym, word [%%next_sym + %%state_offset + SMALL_LONG_CODE_SIZE * %%next_bits + SMALL_SHORT_CODE_SIZE * (1 << %%lookup_size) - SMALL_LONG_CODE_SIZE * SMALL_FLAG_BIT] + + ;; Save length associated with symbol + mov rcx, %%next_sym + shr rcx, SMALL_LONG_CODE_LEN_OFFSET + jz invalid_dist_symbol_ %+ %%next_sym + and %%next_sym, SMALL_SHORT_SYM_MASK + +%%end: + ;; Updated read_in to reflect the bits which were decoded + SHRX %%read_in, %%read_in, rcx + sub %%read_in_length, rcx + mov rcx, %%next_sym + shr rcx, DIST_SYM_EXTRA_OFFSET + and %%next_sym, DIST_SYM_MASK +%endm + +;; Decode next symbol +;; Clobber rcx +%macro decode_next_dist_with_load 8 +%define %%state %1 ; State structure associated with compressed stream +%define %%lookup_size %2 ; Number of bits used for small lookup +%define %%state_offset %3 +%define %%read_in %4 ; Bits read in from compressed stream +%define %%read_in_length %5 ; Number of valid bits in read_in +%define %%next_sym %6 ; Returned symobl +%define %%next_extra_bits %7 +%define %%next_bits %8 + + ;; Lookup possible next symbol + mov %%next_bits, %%read_in + and %%next_bits, (1 << %%lookup_size) - 1 + movzx %%next_sym, word [%%state + %%state_offset + SMALL_SHORT_CODE_SIZE * %%next_bits] + + decode_next_dist %%state, %%lookup_size, %%state_offset, %%read_in, %%read_in_length, %%next_sym, %%next_extra_bits, %%next_bits +%endm + +global decode_huffman_code_block_stateless_ %+ ARCH +decode_huffman_code_block_stateless_ %+ ARCH %+ : + + FUNC_SAVE + + mov state, arg0 + mov [rsp + start_out_mem_offset], arg1 + lea rfc_lookup, [rfc1951_lookup_table] + + mov read_in,[state + _read_in] + mov read_in_length %+ d, dword [state + _read_in_length] + mov next_out, [state + _next_out] + mov end_out %+ d, dword [state + _avail_out] + add end_out, next_out + mov next_in, [state + _next_in] + mov end_in %+ d, dword [state + _avail_in] + add end_in, next_in + + mov dword [state + _copy_overflow_len], 0 + mov dword [state + _copy_overflow_dist], 0 + + sub end_out, OUT_BUFFER_SLOP + sub end_in, IN_BUFFER_SLOP + + cmp next_in, end_in + jg end_loop_block_pre + + cmp read_in_length, 64 + je skip_load + + inflate_in_load next_in, end_in, read_in, read_in_length, tmp1, tmp2 + +skip_load: + mov tmp3, read_in + and tmp3, (1 << ISAL_DECODE_LONG_BITS) - 1 + mov next_sym %+ d, dword [state + _lit_huff_code + LARGE_SHORT_CODE_SIZE * tmp3] + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Main Loop +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +loop_block: + ;; Check if near end of in buffer or out buffer + cmp next_in, end_in + jg end_loop_block_pre + cmp next_out, end_out + jg end_loop_block_pre + + ;; Decode next symbol and reload the read_in buffer + decode_next_lit_len state, ISAL_DECODE_LONG_BITS, _lit_huff_code, read_in, read_in_length, next_sym, next_sym_num, tmp1 + + ;; Specutively write next_sym if it is a literal + mov [next_out], next_sym + add next_out, next_sym_num + lea next_sym2, [8 * next_sym_num - 8] + SHRX next_sym2, next_sym, next_sym2 + + ;; Find index to specutively preload next_sym from + mov tmp3, (1 << ISAL_DECODE_LONG_BITS) - 1 + and tmp3, read_in + + ;; Start reloading read_in + mov tmp1, [next_in] + SHLX tmp1, tmp1, read_in_length + or read_in, tmp1 + + ;; Specutively load data associated with length symbol + lea repeat_length, [next_sym2 - 254] + + ;; Test for end of block symbol + cmp next_sym2, 256 + je end_symbol_pre + + ;; Specutively load next_sym for next loop if a literal was decoded + mov next_sym %+ d, dword [state + _lit_huff_code + LARGE_SHORT_CODE_SIZE * tmp3] + + ;; Finish updating read_in_length for read_in + mov tmp1, 64 + sub tmp1, read_in_length + shr tmp1, 3 + add next_in, tmp1 + lea read_in_length, [read_in_length + 8 * tmp1] + + ;; Specultively load next dist code + mov next_bits2, (1 << ISAL_DECODE_SHORT_BITS) - 1 + and next_bits2, read_in + movzx next_sym3, word [state + _dist_huff_code + SMALL_SHORT_CODE_SIZE * next_bits2] + + ;; Check if next_sym2 is a literal, length, or end of block symbol + cmp next_sym2, 256 + jl loop_block + +decode_len_dist: + ;; Determine next_out after the copy is finished + lea next_out, [next_out + repeat_length - 1] + + ;; Decode distance code + decode_next_dist state, ISAL_DECODE_SHORT_BITS, _dist_huff_code, read_in, read_in_length, next_sym3, rcx, tmp2 + + mov look_back_dist2 %+ d, [rfc_lookup + _dist_start + 4 * next_sym3] + + ; ;; Load distance code extra bits + mov next_bits, read_in + + ;; Calculate the look back distance + BZHI next_bits, next_bits, rcx, tmp4 + SHRX read_in, read_in, rcx + + ;; Setup next_sym, read_in, and read_in_length for next loop + mov read_in_2, (1 << ISAL_DECODE_LONG_BITS) - 1 + and read_in_2, read_in + mov next_sym %+ d, dword [state + _lit_huff_code + LARGE_SHORT_CODE_SIZE * read_in_2] + sub read_in_length, rcx + + ;; Copy distance in len/dist pair + add look_back_dist2, next_bits + + ;; Find beginning of copy + mov copy_start, next_out + sub copy_start, repeat_length + sub copy_start, look_back_dist2 + + ;; Check if a valid look back distances was decoded + cmp copy_start, [rsp + start_out_mem_offset] + jl invalid_look_back_distance + MOVDQU xmm1, [copy_start] + + ;; Set tmp2 to be the minimum of COPY_SIZE and repeat_length + ;; This is to decrease use of small_byte_copy branch + mov tmp2, COPY_SIZE + cmp tmp2, repeat_length + cmovg tmp2, repeat_length + + ;; Check for overlapping memory in the copy + cmp look_back_dist2, tmp2 + jl small_byte_copy_pre + +large_byte_copy: + ;; Copy length distance pair when memory overlap is not an issue + MOVDQU [copy_start + look_back_dist2], xmm1 + + sub repeat_length, COPY_SIZE + jle loop_block + + add copy_start, COPY_SIZE + MOVDQU xmm1, [copy_start] + jmp large_byte_copy + +small_byte_copy_pre: + ;; Copy length distance pair when source and destination overlap + add repeat_length, look_back_dist2 +small_byte_copy: + MOVDQU [copy_start + look_back_dist2], xmm1 + + shl look_back_dist2, 1 + MOVDQU xmm1, [copy_start] + cmp look_back_dist2, COPY_SIZE + jl small_byte_copy + + sub repeat_length, look_back_dist2 + jge large_byte_copy + jmp loop_block + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Finish Main Loop +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +end_loop_block_pre: + ;; Fix up in buffer and out buffer to reflect the actual buffer end + add end_out, OUT_BUFFER_SLOP + add end_in, IN_BUFFER_SLOP + +end_loop_block: + ;; Load read in buffer and decode next lit/len symbol + inflate_in_small_load next_in, end_in, read_in, read_in_length, tmp1, tmp2 + mov [rsp + read_in_mem_offset], read_in + mov [rsp + read_in_length_mem_offset], read_in_length + mov [rsp + next_out_mem_offset], next_out + + decode_next_lit_len_with_load state, ISAL_DECODE_LONG_BITS, _lit_huff_code, read_in, read_in_length, next_sym, next_sym_num, tmp1 + + ;; Check that enough input was available to decode symbol + cmp read_in_length, 0 + jl end_of_input + +multi_symbol_start: + cmp next_sym_num, 1 + jg decode_literal + + cmp next_sym, 256 + jl decode_literal + je end_symbol + +decode_len_dist_2: + lea repeat_length, [next_sym - 254] + ;; Decode distance code + decode_next_dist_with_load state, ISAL_DECODE_SHORT_BITS, _dist_huff_code, read_in, read_in_length, next_sym, rcx, tmp1 + + ;; Load distance code extra bits + mov next_bits, read_in + mov look_back_dist %+ d, [rfc_lookup + _dist_start + 4 * next_sym] + + ;; Calculate the look back distance and check for enough input + BZHI next_bits, next_bits, rcx, tmp1 + SHRX read_in, read_in, rcx + add look_back_dist, next_bits + sub read_in_length, rcx + jl end_of_input + + ;; Setup code for byte copy using rep movsb + mov rsi, next_out + mov rdi, rsi + mov rcx, repeat_length + sub rsi, look_back_dist + + ;; Check if a valid look back distance was decoded + cmp rsi, [rsp + start_out_mem_offset] + jl invalid_look_back_distance + + ;; Check for out buffer overflow + add repeat_length, next_out + cmp repeat_length, end_out + jg out_buffer_overflow_repeat + + mov next_out, repeat_length + + rep movsb + jmp end_loop_block + +decode_literal: + ;; Store literal decoded from the input stream + cmp next_out, end_out + jge out_buffer_overflow_lit + add next_out, 1 + mov byte [next_out - 1], next_sym %+ b + sub next_sym_num, 1 + jz end_loop_block + shr next_sym, 8 + jmp multi_symbol_start + +;; Set exit codes +end_of_input: + mov read_in, [rsp + read_in_mem_offset] + mov read_in_length, [rsp + read_in_length_mem_offset] + mov next_out, [rsp + next_out_mem_offset] + xor tmp1, tmp1 + mov dword [state + _write_overflow_lits], tmp1 %+ d + mov dword [state + _write_overflow_len], tmp1 %+ d + mov rax, END_INPUT + jmp end + +out_buffer_overflow_repeat: + mov rcx, end_out + sub rcx, next_out + sub repeat_length, rcx + sub repeat_length, next_out + rep movsb + + mov [state + _copy_overflow_len], repeat_length %+ d + mov [state + _copy_overflow_dist], look_back_dist %+ d + + mov next_out, end_out + + mov rax, OUT_OVERFLOW + jmp end + +out_buffer_overflow_lit: + mov dword [state + _write_overflow_lits], next_sym %+ d + mov dword [state + _write_overflow_len], next_sym_num %+ d + sub next_sym_num, 1 + shl next_sym_num, 3 + SHRX next_sym, next_sym, next_sym_num + mov rax, OUT_OVERFLOW + shr next_sym_num, 3 + cmp next_sym, 256 + jl end + mov dword [state + _write_overflow_len], next_sym_num %+ d + jg decode_len_dist_2 + jmp end_state + +invalid_look_back_distance: + mov rax, INVALID_LOOKBACK + jmp end + +invalid_dist_symbol_ %+ next_sym: + cmp read_in_length, next_sym + jl end_of_input + jmp invalid_symbol +invalid_dist_symbol_ %+ next_sym3: + cmp read_in_length, next_sym3 + jl end_of_input +invalid_symbol: + mov rax, INVALID_SYMBOL + jmp end + +end_symbol_pre: + ;; Fix up in buffer and out buffer to reflect the actual buffer + sub next_out, 1 + add end_out, OUT_BUFFER_SLOP + add end_in, IN_BUFFER_SLOP +end_symbol: + xor rax, rax +end_state: + ;; Set flag identifying a new block is required + mov byte [state + _block_state], ISAL_BLOCK_NEW_HDR + cmp dword [state + _bfinal], 0 + je end + mov byte [state + _block_state], ISAL_BLOCK_INPUT_DONE + +end: + ;; Save current buffer states + mov [state + _read_in], read_in + mov [state + _read_in_length], read_in_length %+ d + + ;; Set avail_out + sub end_out, next_out + mov dword [state + _avail_out], end_out %+ d + + ;; Set total_out + mov tmp1, next_out + sub tmp1, [state + _next_out] + add [state + _total_out], tmp1 %+ d + + ;; Set next_out + mov [state + _next_out], next_out + + ;; Set next_in + mov [state + _next_in], next_in + + ;; Set avail_in + sub end_in, next_in + mov [state + _avail_in], end_in %+ d + + FUNC_RESTORE + + ret diff --git a/src/spdk/isa-l/igzip/igzip_decode_block_stateless_01.asm b/src/spdk/isa-l/igzip/igzip_decode_block_stateless_01.asm new file mode 100644 index 000000000..4aa39fe1c --- /dev/null +++ b/src/spdk/isa-l/igzip/igzip_decode_block_stateless_01.asm @@ -0,0 +1,3 @@ +%define ARCH 01 + +%include "igzip_decode_block_stateless.asm" diff --git a/src/spdk/isa-l/igzip/igzip_decode_block_stateless_04.asm b/src/spdk/isa-l/igzip/igzip_decode_block_stateless_04.asm new file mode 100644 index 000000000..769fca22d --- /dev/null +++ b/src/spdk/isa-l/igzip/igzip_decode_block_stateless_04.asm @@ -0,0 +1,4 @@ +%define ARCH 04 +%define USE_HSWNI + +%include "igzip_decode_block_stateless.asm" diff --git a/src/spdk/isa-l/igzip/igzip_deflate_hash.asm b/src/spdk/isa-l/igzip/igzip_deflate_hash.asm new file mode 100644 index 000000000..b61c4be1e --- /dev/null +++ b/src/spdk/isa-l/igzip/igzip_deflate_hash.asm @@ -0,0 +1,165 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2018 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +%include "options.asm" +%include "lz0a_const.asm" +%include "data_struct2.asm" +%include "huffman.asm" +%include "reg_sizes.asm" + +%define DICT_SLOP 8 +%define DICT_END_SLOP 4 + +%ifidn __OUTPUT_FORMAT__, win64 +%define arg1 rcx +%define arg2 rdx +%define arg3 r8 +%define arg4 r9 +%define arg5 rdi +%define swap1 rsi +%define stack_size 3 * 8 +%define PS 8 +%define arg(x) [rsp + stack_size + PS*x] +%else +%define arg1 rdi +%define arg2 rsi +%define arg3 rdx +%define arg4 rcx +%define arg5 r8 +%define swap1 r9 +%endif + +%define hash_table arg1 + +%define hash_mask arg2 + +%define f_i_end arg3 + +%define dict_offset arg4 + +%define dict_len arg5 +%define f_i arg5 + +%define f_i_tmp rax + +%define hash swap1 + +%define hash2 r10 + +%define hash3 r11 + +%define hash4 r12 + + +%macro FUNC_SAVE 0 +%ifidn __OUTPUT_FORMAT__, win64 + push rsi + push rdi + push r12 + mov arg5 %+ d, arg(5) +%else + push r12 +%endif +%endm + +%macro FUNC_RESTORE 0 +%ifidn __OUTPUT_FORMAT__, win64 + pop r12 + pop rdi + pop rsi +%else + pop r12 +%endif +%endm + +global isal_deflate_hash_crc_01 +isal_deflate_hash_crc_01: + FUNC_SAVE + + neg f_i + add f_i, f_i_end + + sub dict_offset, f_i + + sub f_i_end, DICT_SLOP + cmp f_i, f_i_end + jg end_main + +main_loop: + lea f_i_tmp, [f_i + 2] + + xor hash, hash + crc32 hash %+ d, dword [f_i + dict_offset] + + xor hash2, hash2 + crc32 hash2 %+ d, dword [f_i + dict_offset + 1] + + xor hash3, hash3 + crc32 hash3 %+ d, dword [f_i_tmp + dict_offset] + + xor hash4, hash4 + crc32 hash4 %+ d, dword [f_i_tmp + dict_offset + 1] + + and hash, hash_mask + and hash2, hash_mask + and hash3, hash_mask + and hash4, hash_mask + + mov [hash_table + 2 * hash], f_i %+ w + add f_i, 1 + + mov [hash_table + 2 * hash2], f_i %+ w + add f_i, 3 + + mov [hash_table + 2 * hash3], f_i_tmp %+ w + add f_i_tmp, 1 + + mov [hash_table + 2 * hash4], f_i_tmp %+ w + + cmp f_i, f_i_end + jle main_loop + +end_main: + add f_i_end, DICT_SLOP - DICT_END_SLOP + cmp f_i, f_i_end + jg end + +end_loop: + xor hash, hash + crc32 hash %+ d, dword [f_i + dict_offset] + + and hash, hash_mask + mov [hash_table + 2 * hash], f_i %+ w + + add f_i, 1 + cmp f_i, f_i_end + jle end_loop +end: + FUNC_RESTORE + ret diff --git a/src/spdk/isa-l/igzip/igzip_example.c b/src/spdk/isa-l/igzip/igzip_example.c new file mode 100644 index 000000000..5930c717f --- /dev/null +++ b/src/spdk/isa-l/igzip/igzip_example.c @@ -0,0 +1,101 @@ +/********************************************************************** + Copyright(c) 2011-2016 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ +#include +#include +#include +#include "igzip_lib.h" + +#define BUF_SIZE 8192 +#ifndef LEVEL +# define LEVEL 0 +#else +# define LEVEL 1 +#endif + +struct isal_zstream stream; + +int main(int argc, char *argv[]) +{ + uint8_t inbuf[BUF_SIZE], outbuf[BUF_SIZE]; + FILE *in, *out; + + if (argc != 3) { + fprintf(stderr, "Usage: igzip_example infile outfile\n"); + exit(0); + } + in = fopen(argv[1], "rb"); + if (!in) { + fprintf(stderr, "Can't open %s for reading\n", argv[1]); + exit(0); + } + out = fopen(argv[2], "wb"); + if (!out) { + fprintf(stderr, "Can't open %s for writing\n", argv[2]); + exit(0); + } + + printf("igzip_example\nWindow Size: %d K\n", IGZIP_HIST_SIZE / 1024); + fflush(0); + + isal_deflate_init(&stream); + stream.end_of_stream = 0; + stream.flush = NO_FLUSH; + + if (LEVEL == 1) { + stream.level = 1; + stream.level_buf = malloc(ISAL_DEF_LVL1_DEFAULT); + stream.level_buf_size = ISAL_DEF_LVL1_DEFAULT; + if (stream.level_buf == 0) { + printf("Failed to allocate level compression buffer\n"); + exit(0); + } + } + + do { + stream.avail_in = (uint32_t) fread(inbuf, 1, BUF_SIZE, in); + stream.end_of_stream = feof(in) ? 1 : 0; + stream.next_in = inbuf; + do { + stream.avail_out = BUF_SIZE; + stream.next_out = outbuf; + + isal_deflate(&stream); + + fwrite(outbuf, 1, BUF_SIZE - stream.avail_out, out); + } while (stream.avail_out == 0); + + assert(stream.avail_in == 0); + } while (stream.internal_state.state != ZSTATE_END); + + fclose(out); + fclose(in); + + printf("End of igzip_example\n\n"); + return 0; +} diff --git a/src/spdk/isa-l/igzip/igzip_file_perf.c b/src/spdk/isa-l/igzip/igzip_file_perf.c new file mode 100644 index 000000000..c04ed24a8 --- /dev/null +++ b/src/spdk/isa-l/igzip/igzip_file_perf.c @@ -0,0 +1,334 @@ +/********************************************************************** + Copyright(c) 2011-2016 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#define _FILE_OFFSET_BITS 64 +#include +#include +#include +#include +#include +#include "igzip_lib.h" +#include "test.h" + +#define BUF_SIZE 1024 + +int level_size_buf[10] = { +#ifdef ISAL_DEF_LVL0_DEFAULT + ISAL_DEF_LVL0_DEFAULT, +#else + 0, +#endif +#ifdef ISAL_DEF_LVL1_DEFAULT + ISAL_DEF_LVL1_DEFAULT, +#else + 0, +#endif +#ifdef ISAL_DEF_LVL2_DEFAULT + ISAL_DEF_LVL2_DEFAULT, +#else + 0, +#endif +#ifdef ISAL_DEF_LVL3_DEFAULT + ISAL_DEF_LVL3_DEFAULT, +#else + 0, +#endif +#ifdef ISAL_DEF_LVL4_DEFAULT + ISAL_DEF_LVL4_DEFAULT, +#else + 0, +#endif +#ifdef ISAL_DEF_LVL5_DEFAULT + ISAL_DEF_LVL5_DEFAULT, +#else + 0, +#endif +#ifdef ISAL_DEF_LVL6_DEFAULT + ISAL_DEF_LVL6_DEFAULT, +#else + 0, +#endif +#ifdef ISAL_DEF_LVL7_DEFAULT + ISAL_DEF_LVL7_DEFAULT, +#else + 0, +#endif +#ifdef ISAL_DEF_LVL8_DEFAULT + ISAL_DEF_LVL8_DEFAULT, +#else + 0, +#endif +#ifdef ISAL_DEF_LVL9_DEFAULT + ISAL_DEF_LVL9_DEFAULT, +#else + 0, +#endif +}; + +int usage(void) +{ + fprintf(stderr, + "Usage: igzip_file_perf [options] \n" + " -h help\n" + " -X use compression level X with 0 <= X <= 1\n" + " -b input buffer size, 0 buffers all the input\n" + " -i